├── swagger.png
├── static
├── pkc.ttf
└── favicon.ico
├── utils
├── api_tags.py
├── __init__.py
├── models.py
├── tools.py
├── log.py
└── config.py
├── start.sh
├── requirements.txt
├── config.ini
├── docker-compose.yaml
├── pkcWordcloud.py
├── Dockerfile
├── README.md
├── main.py
└── pkcDouYinVideo.py
/swagger.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/curtinlv/PKC-API/HEAD/swagger.png
--------------------------------------------------------------------------------
/static/pkc.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/curtinlv/PKC-API/HEAD/static/pkc.ttf
--------------------------------------------------------------------------------
/static/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/curtinlv/PKC-API/HEAD/static/favicon.ico
--------------------------------------------------------------------------------
/utils/api_tags.py:
--------------------------------------------------------------------------------
1 | from enum import Enum
2 |
3 | class ApiTags(str, Enum):
4 | """
5 | Tags used to group API endpoints
6 | """
7 | PKC = "PKC"
--------------------------------------------------------------------------------
/start.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | # 设置 Chromium 参数并启动 Xvfb
3 | Xvfb :99 -screen 0 1024x768x24 -ac +extension GLX +render -noreset &
4 | # 解决 /dev/shm 太小的问题
5 | export DISPLAY=:99
6 | exec python main.py
7 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | colorlog==6.9.0
2 | quart==0.18.4
3 | quart_schema==0.17.1
4 | Requests==2.26.0
5 | requests_html==0.10.0
6 | wordcloud==1.9.4
7 | werkzeug==2.2.3
8 | lxml_html_clean==0.4.1
9 | psutil == 5.8.0
--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .api_tags import ApiTags
2 | from .models import wordcloudTodo, dyQuery, dyResp, ApiErrorResponse, ApiSuccessResponse
3 | from .config import config
4 | from .tools import generate_random_name, kill_chromium_if_long_running
5 | from .log import log
6 |
7 |
8 |
--------------------------------------------------------------------------------
/config.ini:
--------------------------------------------------------------------------------
1 | ## 主配置,改动需要重启服务生效
2 | [main]
3 | # 默认端口
4 | port = 80
5 | # 接口密钥,如未配置则每次启动随机生成一个
6 | apiKey =
7 | # 禁用的接口,将需要禁用的接口路径填入下面,多个用,分隔。如禁用 抖音视频解析接口,填写 /getDouyinVideo,/getDouyinVideoUrl
8 | disableInterfaces =
9 |
10 | ###### 抖音解析接口配置 ######
11 | [DouYin]
12 | # 解析等待时间,时间越多解析越慢但成功率越大
13 | sleepNum = 10.0
--------------------------------------------------------------------------------
/docker-compose.yaml:
--------------------------------------------------------------------------------
1 | version: '3'
2 | services:
3 | pkc-api:
4 | image: curtinlv/pkc-api:latest
5 | container_name: pkc-api
6 | ports:
7 | - "80:80"
8 | environment:
9 | - apiKey= #接口密钥,如未配置则每次启动随机生成一个
10 | - disableInterfaces= #禁用的接口,将需要禁用的接口路径填入下面,多个用,分隔。如禁用 抖音视频解析接口,填写 /getDouyinVideo,/getDouyinVideoUrl
11 | - sleepNum=5 #解析等待时间,时间越多解析越慢但成功率越大
12 | volumes:
13 | - ./config.ini:/app/config.ini #映射配置文件,需手动建立config.ini
14 | # - ./static/pkc.ttf:/app/static/pkc.ttf #词云字体
15 | restart: unless-stopped
--------------------------------------------------------------------------------
/pkcWordcloud.py:
--------------------------------------------------------------------------------
1 | from wordcloud import WordCloud
2 | async def createWordCloud(text, width, height, background_color, max_words):
3 | # 配置词云参数
4 | wc_config = {
5 | 'width': width,
6 | 'height': height,
7 | 'background_color': background_color,
8 | 'max_words': max_words,
9 | 'scale': 3, # 提升渲染清晰度
10 | 'min_font_size': 10 # 最新字体大小
11 | # 'collocations': False # 禁用词组组合
12 | }
13 |
14 | # 生成词云(优化字体渲染)
15 | try:
16 | wordcloud = WordCloud(**wc_config).generate(text)
17 | return wordcloud
18 | except ValueError as e:
19 | return None
--------------------------------------------------------------------------------
/utils/models.py:
--------------------------------------------------------------------------------
1 | from dataclasses import dataclass, field
2 |
3 | @dataclass
4 | class wordcloudTodo:
5 | text: str = field(default='PKC很棒,也很简单')
6 | width: int = field(default=600) # 默认宽度
7 | height: int = field(default=600) # 默认高度
8 | dpi: int = field(default=400) # 默认DPI,值越大越清晰
9 | max_words: int = field(default=100) # 最大词数
10 | background_color: str = field(default='white') # 背景色,默认白色
11 |
12 | @dataclass
13 | class dyQuery:
14 | url: str
15 | # ua: str = field(default=None)
16 |
17 | @dataclass
18 | class dyResp:
19 | code: int = 200
20 | msg: str = '成功'
21 | video_url: str = '视频原始链接'
22 |
23 | @dataclass
24 | class ApiErrorResponse:
25 | code: int = 500
26 | msg: str = '错误信息'
27 |
28 | @dataclass
29 | class ApiSuccessResponse:
30 | """
31 | Default success response
32 | """
33 |
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | # 使用多架构兼容的 Python 基础镜像
2 | FROM python:3.9-slim
3 | # 设置工作目录
4 | WORKDIR /app
5 | COPY PKC-API/ .
6 | RUN apt-get update && apt-get install -y \
7 | wget \
8 | gnupg \
9 | build-essential \
10 | python3-dev \
11 | libjpeg-dev \
12 | libfreetype6-dev \
13 | zlib1g-dev \
14 | libpng-dev \
15 | && apt-get install -y --no-install-recommends \
16 | libx11-6 \
17 | libxcomposite1 \
18 | libxdamage1 \
19 | libxext6 \
20 | libxfixes3 \
21 | libxrandr2 \
22 | libxtst6 \
23 | libappindicator3-1 \
24 | libasound2 \
25 | libatk-bridge2.0-0 \
26 | libatk1.0-0 \
27 | libcairo2 \
28 | libcups2 \
29 | libdbus-1-3 \
30 | libdrm2 \
31 | libgbm1 \
32 | libgdk-pixbuf2.0-0 \
33 | libglib2.0-0 \
34 | libgtk-3-0 \
35 | libnspr4 \
36 | libnss3 \
37 | libpango-1.0-0 \
38 | libpangocairo-1.0-0 \
39 | libxcb1 \
40 | libxss1 \
41 | libxkbcommon0 \
42 | xvfb \
43 | fonts-wqy-microhei \
44 | fonts-wqy-zenhei \
45 | chromium \
46 | chromium-driver \
47 | && rm -rf /var/lib/apt/lists/*
48 |
49 | # 设置 Chromium 环境变量
50 | ENV FONT_PATH=/app/static/pkc.ttf
51 | ENV DISPLAY=:99
52 | ENV CHROMIUM_PATH=/usr/bin/chromium
53 | ENV PUPPETEER_SKIP_DOWNLOAD=true
54 | ENV PUPPETEER_EXECUTABLE_PATH=/usr/bin/chromium
55 |
56 | # 安装 Python 依赖
57 | RUN pip install --upgrade pip && \
58 | pip install --no-cache-dir -r requirements.txt
59 |
60 | # 暴露端口
61 | EXPOSE 80
62 |
63 | # 启动脚本
64 | RUN chmod +x /app/start.sh
65 |
66 | CMD ["/app/start.sh"]
67 |
68 | # buildx build --platform linux/amd64,linux/arm64 -t curtinlv/pkc-api:latest --push .
69 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # PKC-API
2 | ### v1.2.0
3 | ### API接口名称
4 | ***
5 | - *1.词云分析*
6 | - *2.抖音视频解析*
7 | - *3. ...*
8 | ***
9 | ## Ⅰ.搭建PKC-API
10 | 环境要求:
11 | CPU: 2核心或以上
12 | 内存: 4GB或更高
13 | 其他:国内IP
14 | ### 方式一:Docker一键部署
15 | ```bash
16 | docker run -d --name pkc-api -p 80:80 curtinlv/pkc-api:latest
17 | ```
18 |
19 | ### 方式二:Docker-compose部署
20 | 建立文件`docker-compose.yaml`,文件内容以下:
21 | ```yaml
22 | version: '3'
23 | services:
24 | pkc-api:
25 | image: curtinlv/pkc-api:latest
26 | container_name: pkc-api
27 | ports:
28 | - "80:80"
29 | environment:
30 | - apiKey= #接口密钥,如未配置则每次启动随机生成一个
31 | - disableInterfaces= #禁用的接口,将需要禁用的接口路径填入下面,多个用,分隔。如禁用 抖音视频解析接口,填写 /getDouyinVideo,/getDouyinVideoUrl
32 | - sleepNum=10 #解析等待时间,时间越多解析越慢但成功率越大
33 | volumes:
34 | - ./config.ini:/app/config.ini #映射配置文件config.ini
35 | # - ./static/pkc.ttf:/app/static/pkc.ttf #词云字体
36 | restart: unless-stopped
37 | ```
38 | 启动
39 | ```bash
40 | docker-compose up -d
41 | ```
42 | ### 方式三:Python启动
43 | 版本要求:`python3.9 +`
44 | ```bash
45 | # 拉取本项目
46 | git clone https://github.com/curtinlv/PKC-API.git
47 | # 切换项目目录
48 | cd PKC-API
49 | # 安装依赖包
50 | pip install -r requirements.txt
51 | # 词云字体(可自定义)
52 | export FONT_PATH=./static/pkc.ttf
53 | # 启动
54 | python main.py
55 | # 或
56 | nohup python main.py >./log.log 2>&1 & #后台启动
57 | ````
58 | ## Ⅱ.API调试页面
59 | ```html
60 | http://ip/swagger
61 | ```
62 | 
63 |
64 | ## Ⅲ.更新日志
65 | ~~~
66 | v1.2.0
67 | 1、优化抖音解析接口
68 | 2、新增外挂配置文件 config.ini(可配置自定义端口、apiKey验证、禁用指定接口)
69 | 3、增加接口apiKey认证(默认临时生成apiKey到控制台,如需配置固定apiKey请编辑配置文件config.ini)
70 |
71 | v1.1.0
72 | 1、新增抖音解析接口
73 |
74 | v1.0.0
75 | 1、新增词云接口
76 | ~~~
--------------------------------------------------------------------------------
/utils/tools.py:
--------------------------------------------------------------------------------
1 | # !/usr/bin/env python
2 | # -*- coding:utf-8 -*-
3 | # @FileName :tools.py
4 | # @Time :2025/3/6 11:28
5 | # @Author :Curtin
6 |
7 |
8 | import random
9 | import string
10 | import psutil
11 | import time
12 | import sys
13 | from .log import log
14 |
15 | def generate_random_name(length=8):
16 | # 随机生成一个包含字母和数字的名称
17 | name = ''.join(random.choices(string.ascii_letters + string.digits, k=length))
18 | return name
19 |
20 | async def save_content_to_file(content, file_path):
21 | # 打开文件并写入内容
22 | with open(file_path, 'w', encoding='utf-8') as file:
23 | file.write(content)
24 | # print(f"内容已保存到 {file_path}")
25 |
26 | def kill_chromium_if_long_running():
27 | # 遍历系统中的所有进程
28 | if not sys.platform.startswith('win'):
29 | for proc in psutil.process_iter(['pid', 'name', 'create_time']):
30 | try:
31 | if 'chrom' in proc.info['name'].lower():
32 | # 计算进程的运行时间
33 | create_time = proc.info['create_time']
34 | current_time = time.time() # 当前时间
35 | run_time = current_time - create_time # 进程运行时间(秒)
36 | # 如果运行时间超过30秒,则杀掉进程
37 | if run_time > 60:
38 | # log.info(f"Process {proc.info['name']} (PID: {proc.info['pid']}) running for {run_time:.2f} seconds. Killing process.")
39 | proc.terminate() # 终止进程
40 | # proc.wait() # 等待进程终止
41 | # log.info(f"Process {proc.info['pid']} has been terminated.")
42 | except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess):
43 | # 捕获异常,避免权限问题或进程已结束
44 | pass
45 |
46 |
47 |
48 |
--------------------------------------------------------------------------------
/utils/log.py:
--------------------------------------------------------------------------------
1 | import logging
2 | import colorlog
3 | import time
4 | import sys
5 |
6 | def print_header():
7 | print("\n" + "="*50, flush=True)
8 | print(" ____ __ __ ____ ", flush=True)
9 | print(" | _ \ | |/ / / ___| ", flush=True)
10 | print(" | |_) | | ' / | | ", flush=True)
11 | print(" | __/ | . \ | |____ ", flush=True)
12 | print(" |_| |_|\_\\ |______\ ", flush=True)
13 | print("\n" + "=" * 50, flush=True)
14 | print("Initializing PKC-API...\n", flush=True)
15 | print_header()
16 | def typing_effect(text, delay=0.1):
17 | """模拟打字效果"""
18 | for char in text:
19 | sys.stdout.write(char)
20 | sys.stdout.flush()
21 | time.sleep(delay)
22 | print() # 打印新的一行
23 |
24 | class Logger:
25 | def __init__(self, log_level=logging.DEBUG):
26 | # 创建一个日志器
27 | self.logger = logging.getLogger()
28 |
29 | # 设置日志级别
30 | self.logger.setLevel(log_level)
31 |
32 | # 创建带颜色的流处理器
33 | log_handler = colorlog.StreamHandler()
34 |
35 | # 创建带颜色的日志格式
36 | formatter = colorlog.ColoredFormatter(
37 | '%(asctime)s - %(levelname)s - %(message)s',
38 | datefmt='%Y-%m-%d %H:%M:%S',
39 | log_colors={
40 | 'DEBUG': 'cyan',
41 | 'INFO': 'green',
42 | 'WARNING': 'yellow',
43 | 'ERROR': 'red',
44 | 'CRITICAL': 'bold_red',
45 | }
46 | )
47 |
48 | # 设置日志格式
49 | log_handler.setFormatter(formatter)
50 |
51 | # 将处理器添加到日志器
52 | self.logger.addHandler(log_handler)
53 |
54 | def get_logger(self):
55 | """返回日志器对象"""
56 | return self.logger
57 |
58 | # 示例:如何在其他模块中使用这个带颜色的 Logger 类
59 | # 创建日志实例
60 | log = Logger(log_level=logging.INFO).get_logger()
61 |
62 | # 在需要使用日志的模块中调用 Logger
63 | if __name__ == '__main__':
64 | # 创建日志实例
65 | logger = Logger().get_logger()
66 | # 使用 logger 打印日志
67 | logger.debug("这是一个调试信息") # 蓝色
68 | logger.info("这是一个普通信息") # 绿色
69 | logger.warning("这是一个警告信息") # 黄色
70 | logger.error("这是一个错误信息") # 红色
71 | logger.critical("这是一个严重错误信息") # 粗体红色
72 |
--------------------------------------------------------------------------------
/utils/config.py:
--------------------------------------------------------------------------------
1 | # !/usr/bin/env python
2 | # -*- coding:utf-8 -*-
3 | # @FileName :config.py
4 | # @Time :2025/3/14 11:47
5 | # @Author :Curtin
6 | import os
7 | from configparser import RawConfigParser
8 | from .tools import generate_random_name
9 | from .log import log
10 | class Config:
11 | def __init__(self):
12 | # 获取当前工作目录
13 | pwd = os.path.dirname(os.path.abspath(__file__))
14 | pwd = pwd.replace('utils', '')
15 | # 路由标记是否需要 API Key 验证
16 | self.NO_API_KEY_REQUIRED_ROUTES = ['/', '/favicon.ico', '/swagger', '/openapi.json']
17 | # 路由标记是否需要 API Key 验证
18 | self.disableInterfaces = []
19 | self.port = 80
20 | self.apiKey = ''
21 | ##### 抖音配置
22 | self.sleepNum = 10.0
23 | # 获取账号参数
24 | try:
25 | configinfo = RawConfigParser()
26 | try:
27 | configinfo.read(pwd + "config.ini", encoding="UTF-8")
28 | except Exception as e:
29 | with open(pwd + "config.ini", "r", encoding="UTF-8") as config:
30 | getConfig = config.read().encode('utf-8').decode('utf-8-sig')
31 | with open(pwd + "config.ini", "w", encoding="UTF-8") as config:
32 | config.write(getConfig)
33 | try:
34 | configinfo.read(pwd + "config.ini", encoding="UTF-8")
35 | except:
36 | configinfo.read(pwd + "config.ini", encoding="gbk")
37 | self.disableInterfaces = strToList(configinfo.get('main', 'disableInterfaces'))
38 | self.port = configinfo.getint('main', 'port')
39 | self.apiKey = configinfo.get('main', 'apiKey')
40 | self.sleepNum = configinfo.getfloat('DouYin', 'sleepNum')
41 | except Exception as e:
42 | print("参数配置有误,config.ini\nError:", e, flush=True)
43 | # 判断系统环境变量(优先使用)
44 | if "disableInterfaces" in os.environ:
45 | self.disableInterfaces = strToList(os.environ["disableInterfaces"])
46 | if "port" in os.environ:
47 | if len(os.environ["port"]) > 1:
48 | self.port = int(os.environ["port"])
49 | if "apiKey" in os.environ:
50 | self.apiKey = os.environ["apiKey"]
51 | if "sleepNum" in os.environ:
52 | if len(os.environ["sleepNum"]) > 0:
53 | self.sleepNum = int(os.environ["sleepNum"])
54 | if len(self.apiKey) == 0:
55 | tmpApiKey = generate_random_name(length=32)
56 | log.info(f"你的接口密钥(temp apiKey):{tmpApiKey} !!!这是临时接口密钥,如需修改请到config.ini文件配置apiKey")
57 | self.apiKey = tmpApiKey
58 |
59 | def getConfig(self):
60 | """返回日志器对象"""
61 | return self
62 |
63 | def strToList(text: str, s = ','):
64 | list = []
65 | try:
66 | list = text.split(s)
67 | except:
68 | pass
69 | return list
70 | config = Config().getConfig()
71 |
72 | if __name__ == "__main__":
73 | print("Curtin")
74 |
--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
1 | # !/usr/bin/env python
2 | # -*- coding:utf-8 -*-
3 | # @FileName :main.py
4 | # @Time :2025/3/1 09:02
5 | # @Author :Curtin
6 |
7 | from quart import Quart, Blueprint, jsonify, send_file, redirect, request, abort
8 | from pkcWordcloud import createWordCloud
9 | from pkcDouYinVideo import getDyHtml, extract_url, downloadViden, getExtract_lonGurl, get_seconds_from_html
10 | from io import BytesIO
11 | from quart_schema import QuartSchema, tag, validate_request, validate_response, hide, validate_querystring
12 | from utils import config, log, ApiTags, wordcloudTodo, dyQuery, dyResp, ApiErrorResponse, generate_random_name
13 |
14 | #
15 | version = "v1.2.0"
16 | app = Quart(__name__)
17 | QuartSchema(
18 | app,
19 | security=[{"apiKey": []}],
20 | security_schemes={
21 | "apiKey": {"type": "apiKey", "name": "apiKey", "in": "query"}
22 | },
23 | info={
24 | "title": "PKC-API",
25 | "version": version,
26 | "description": '开发者:Curtinlv'
27 | },
28 | convert_casing=True,
29 | swagger_ui_path='/swagger'
30 | )
31 | # app.config['SEND_FILE_MAX_AGE_DEFAULT'] = 0 # 禁止缓存
32 | app.config['TIMEOUT'] = 180 # 请求超时设置为3分钟
33 | # 创建一个蓝图
34 | pkcTools = Blueprint('PKC工具', __name__)
35 |
36 | @app.errorhandler(401)
37 | async def handle_401_error(error):
38 | return jsonify(ApiErrorResponse(code=401, msg="请带上 API Key 验证")), 401
39 | @app.errorhandler(403)
40 | async def handle_403_error(error):
41 | return jsonify(ApiErrorResponse(code=403, msg="无效的 API Key")), 403
42 | @app.errorhandler(405)
43 | async def handle_405_error(error):
44 | return jsonify(ApiErrorResponse(405, msg="接口已被禁用")), 405
45 | async def verify_api_key():
46 | api_key = request.args.get('apiKey')
47 | if not api_key:
48 | abort(401)
49 | if api_key != config.apiKey:
50 | abort(403)
51 |
52 | @app.before_request
53 | async def before_request():
54 | # 获取当前路由的路径
55 | current_route = request.url_rule.rule
56 | # 如果当前路由不在 NO_API_KEY_REQUIRED_ROUTES 列表中,则验证 API Key
57 | if current_route not in config.NO_API_KEY_REQUIRED_ROUTES:
58 | await verify_api_key()
59 | if current_route in config.disableInterfaces:
60 | abort(405)
61 | @pkcTools.route('/')
62 | @hide
63 | async def pkcApiIndex():
64 | return redirect('/swagger', code=301)
65 |
66 | @pkcTools.route('/favicon.ico')
67 | @hide
68 | async def pkcIndexIcon():
69 | result = await send_file('./static/favicon.ico')
70 | return result
71 |
72 | ###########################【Route】###########################
73 | ########## 词云分析
74 | @pkcTools.route('/generate_wordcloud', methods=['POST'])
75 | @tag([ApiTags.PKC])
76 | @validate_request(wordcloudTodo)
77 | async def generate_wordcloud(data: wordcloudTodo):
78 | """
79 | 词云生成
80 |
81 | Request:
82 |
83 | `text`:分析的字符串,必要
84 |
85 | `width`:默认宽度,非必要
86 |
87 | `height`:默认高度,非必要
88 |
89 | `dpi`:默认DPI,值越大越清晰,非必要
90 |
91 | `max_words`:最大词数,非必要
92 |
93 | `background_color`:背景色,默认白色,非必要
94 |
95 | ------
96 |
97 | Response:image/png
98 | """
99 | # 解析请求参数
100 | text = data.text.encode('utf-8').decode('utf-8') # 确保是UTF-8编码
101 | if not text:
102 | return jsonify({"code": 400, "msg": "请提供要分析的文本内容。"}), 400
103 |
104 | # 生成词云(优化字体渲染)
105 | try:
106 | wordcloud = await createWordCloud(text, data.width, data.height, data.background_color, data.max_words)
107 | except ValueError as e:
108 | return jsonify({"code": 400, "msg": f"文本分析失败: {str(e)}"}), 400
109 | # 生成高清图像
110 | img = BytesIO()
111 | image = wordcloud.to_image()
112 | # 保存为高清PNG
113 | image.save(img, format='PNG', dpi=(data.dpi, data.dpi), optimize=True, quality=95)
114 | img.seek(0)
115 | result = await send_file(img, mimetype='image/png')
116 | return result
117 | ########## 抖音视频解析
118 | @pkcTools.route('/getDouyinVideoUrl', methods=['GET'])
119 | @tag([ApiTags.PKC])
120 | @validate_querystring(dyQuery)
121 | @validate_response(dyResp, 200)
122 | @validate_response(ApiErrorResponse, 500)
123 | async def getDouyinVideoUrl(query_args: dyQuery):
124 | """
125 | 抖音视频链接提取
126 |
127 |
128 | `url`:抖音分享的链接
129 | Response:
130 | {
131 | "videoUrl": "视频原始链接"
132 | }
133 | """
134 | # 获取入参URL
135 | url = query_args.url
136 | # log.info(f'url={url}')
137 | if not url or 'douyin.com' not in url:
138 | return ApiErrorResponse(code=400, msg="请带上正确的参数:url"), 400
139 | try:
140 | # 获取视频链接
141 | newUrl = getExtract_lonGurl(url)
142 | html_content = await getDyHtml(newUrl)
143 | if not html_content:
144 | return ApiErrorResponse(code=500, msg="失败"), 500
145 | # time = get_seconds_from_html(html_content)
146 | # log.info(f"时长为{time}")
147 | video_url = extract_url(html_content)
148 | if video_url:
149 | return dyResp(video_url=video_url)
150 | else:
151 | return ApiErrorResponse(code=404, msg="视频链接提取失败!"), 404
152 | except Exception as e:
153 | return ApiErrorResponse(code=500, msg=str(e)), 500
154 | ########## 抖音视频解析响应视频
155 | @pkcTools.route('/getDouyinVideo', methods=['GET'])
156 | @tag([ApiTags.PKC])
157 | @validate_querystring(dyQuery)
158 | async def getDouyinVideo(query_args: dyQuery):
159 | """
160 | 抖音视频提取
161 |
162 | `url`:抖音分享的链接
163 |
164 | Response:video/mp4
165 | """
166 | # 获取入参URL
167 | url = query_args.url
168 | if not url or 'douyin.com' not in url:
169 | return ApiErrorResponse(code=400, msg="请带上正确的参数:url"), 400
170 | try:
171 | # 获取视频链接
172 | newUrl = getExtract_lonGurl(url)
173 | html_content = await getDyHtml(newUrl)
174 | if not html_content:
175 | return ApiErrorResponse(code=500, msg="失败"), 500
176 | video_url = extract_url(html_content)
177 | if video_url:
178 | # 下载远程视频
179 | response = await downloadViden(video_url)
180 | # log.info(f'resp: {response.status_code}, {video_url}')
181 | if response.status_code > 206:
182 | return ApiErrorResponse(code=500, msg="视频下载失败!"), 500
183 | # 将下载的视频存储到内存中
184 | video_data = BytesIO(response.content)
185 | # # 返回视频文件,客户端会自动下载
186 | return await send_file(video_data, as_attachment=True, attachment_filename=f"{generate_random_name()}.mp4",
187 | mimetype='video/mp4')
188 | else:
189 | return ApiErrorResponse(code=404, msg="视频链接提取失败!"), 404
190 | except Exception as e:
191 | return ApiErrorResponse(code=500, msg=str(e)), 500
192 | # 注册蓝图
193 | app.register_blueprint(pkcTools, url_prefix='')
194 |
195 | if __name__ == '__main__':
196 | print(f"版本:{version}")
197 | app.run(host='0.0.0.0', port=config.port)
198 |
--------------------------------------------------------------------------------
/pkcDouYinVideo.py:
--------------------------------------------------------------------------------
1 | from requests_html import AsyncHTMLSession
2 | from pyppeteer import launch
3 | import re, sys, os
4 | import requests
5 | import asyncio
6 | import random
7 | from utils import kill_chromium_if_long_running, config, log
8 |
9 | # 动态生成一个版本号
10 | def generate_version():
11 | major_version = random.randint(10, 20) # 随机生成主版本号
12 | minor_version = random.randint(0, 10) # 随机生成次版本号
13 | return f"{major_version}.{minor_version}"
14 | # 创建动态的 User-Agent
15 | def generate_user_agent():
16 | version = generate_version()
17 | user_agent = f"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/{version} Safari/605.1.15"
18 | return user_agent
19 | headers = {
20 | "Accept": "application/json, text/javascript",
21 | "Accept-Language": "zh-CN,zh-Hans;q=0.9",
22 | "Content-Type": "application/x-www-form-urlencoded",
23 | "User-Agent": generate_user_agent()
24 | }
25 | def getExtract_lonGurl(dyLink):
26 | # 正则表达式来提取 URL
27 | url_pattern = r'https?://[^\s/$.?#].[^\s]*'
28 | # 使用 re.search 查找匹配的部分
29 | urls = re.findall(url_pattern, dyLink)
30 | if urls:
31 | # 如果找到了匹配的 URL,返回 URL
32 | return urls[0]
33 | else:
34 | # 如果没有找到匹配的 URL,返回 None
35 | return None
36 | async def downloadViden(url, hd=None):
37 | h = {
38 | "accept": "*/*",
39 | "accept-language": "zh-CN,zh;q=0.9",
40 | "range": "bytes=0-",
41 | "Referer": url,
42 | "Referrer-Policy": "strict-origin-when-cross-origin"
43 | }
44 | if hd:
45 | h['User-Agent'] = hd['User-Agent']
46 | return requests.get(url, headers=h, stream=True)
47 | async def getDyHtml(url):
48 | # 跨平台配置
49 | is_windows = sys.platform.startswith('win')
50 | if is_windows:
51 | html_content = await get_rendered_html_win(url)
52 | else:
53 | html_content = await get_rendered_html(url)
54 | return html_content
55 | def getLongURL(url, hd=None):
56 | if 'https://www.douyin.com/video/' in url:
57 | return url
58 | else:
59 | if not url.startswith('https'):
60 | url = getExtract_lonGurl(url)
61 | header = headers.copy()
62 | if hd:
63 | header['User-Agent'] = hd['User-Agent']
64 | response = requests.get(url, headers=header, allow_redirects=False)
65 | new_url = response.headers['Location']
66 | video_id = re.findall(r'video/(.*?)/\?', new_url)[0]
67 | new_url = "https://www.douyin.com/video/"+video_id
68 | # print(f"抖音动态原始链接:{new_url}", flush=True)
69 | return new_url
70 |
71 | async def get_rendered_html(url, max_retries=3, required_content="https://v3-web.douyinvod.com"):
72 | attempt = 0
73 | # 配置浏览器参数
74 | browser_args = [
75 | '--no-sandbox',
76 | '--disable-setuid-sandbox',
77 | '--disable-dev-shm-usage',
78 | '--disable-accelerated-2d-canvas',
79 | '--disable-gpu',
80 | '--window-size=1920x1080'
81 | ]
82 | # 启动指定路径的 Chromium
83 | # browser = None
84 | browser = await launch(
85 | executablePath='/usr/bin/chromium' if os.path.exists('/usr/bin/chromium') else None,
86 | args=browser_args,
87 | headless=True,
88 | timeout=60000 # 60秒超时
89 | )
90 | page = await browser.newPage()
91 | await page.setJavaScriptEnabled(True)
92 | # await page.setUserAgent(headers['User-Agent'])
93 | # 设置页面超时和重试策略
94 | page.setDefaultNavigationTimeout(60000) # 60秒
95 | while attempt < max_retries:
96 | try:
97 | # 等待直到所有请求完成
98 | response = await page.goto(url, {'waitUntil': 'networkidle2'})
99 | # 等待页面完全加载,包括渲染的内容和异步请求
100 | await page.waitFor((config.sleepNum * 1000) + (attempt * 1000)) # 延迟,确保所有脚本执行完毕
101 | await auto_scroll(page) # 滚动页面,加载更多内容
102 | content = await page.content()
103 | if required_content in content:
104 | if page:
105 | await page.close()
106 | if browser:
107 | await browser.close()
108 | kill_chromium_if_long_running()
109 | return content
110 | else:
111 | # print(f"未找到, 重试... (Attempt {attempt + 1}/{max_retries})", flush=True)
112 | attempt += 1
113 | except Exception as e:
114 | # print(f"发生异常: {str(e)[:200]}, 重试... (Attempt {attempt + 1}/{max_retries})", flush=True)
115 | attempt += 1
116 | # finally:
117 | # if browser:
118 | # await browser.close()
119 | if page:
120 | await page.close()
121 | if browser:
122 | await browser.close()
123 | kill_chromium_if_long_running()
124 | return None
125 | async def auto_scroll(page):
126 | # 执行页面滚动,直到底部
127 | last_height = await page.evaluate('document.body.scrollHeight')
128 | while True:
129 | # 滚动到底部
130 | await page.evaluate('window.scrollTo(0, document.body.scrollHeight);')
131 | # 等待新的内容加载
132 | await page.waitFor(1000)
133 | new_height = await page.evaluate('document.body.scrollHeight')
134 | if new_height == last_height:
135 | break
136 | last_height = new_height
137 | async def get_rendered_html_win(url, max_retries=3, required_content="https://v3-web.douyinvod.com"):
138 | session = AsyncHTMLSession()
139 | attempt = 0
140 | while attempt < max_retries:
141 | try:
142 | # 发起请求并获取响应
143 | response = await session.get(url, headers=headers)
144 | # 执行JavaScript并等待页面加载完成
145 | await response.html.arender(timeout=60, sleep=config.sleepNum+attempt, keep_page=True, scrolldown=3)
146 | # 检查页面内容是否包含指定的字符串
147 | if required_content in response.html.html:
148 | html_text = response.html.html
149 | if session:
150 | await session.close()
151 | kill_chromium_if_long_running()
152 | return html_text
153 | else:
154 | # print(f"未找到, 重试... (Attempt {attempt + 1}/{max_retries})", flush=True)
155 | attempt += 1
156 | except Exception as e:
157 | # print(f"发生异常: {e}, 重试... (Attempt {attempt + 1}/{max_retries})", flush=True)
158 | attempt += 1
159 | # print("未找到,已超出最多尝试次数。", flush=True)
160 | if session:
161 | await session.close()
162 | kill_chromium_if_long_running()
163 | return None
164 | def extract_url(text):
165 | # 正则表达式提取src中完整的https://v3-web.douyinvod.com链接
166 | pattern = r'src="(https://v3-web\.douyinvod\.com[^\s"]+)"'
167 | match = re.search(pattern, text)
168 | if match:
169 | return match.group(1) # 返回匹配到的第一个结果
170 | else:
171 | return None
172 |
173 |
174 | def get_seconds_from_html(html_str):
175 | # 使用正则表达式提取时间字符串,允许跨行匹配
176 | match = re.search(r'([\d:]+)', html_str, re.DOTALL)
177 | if match:
178 | time_str = match.group(1) # 获取时间字符串
179 | # 分割时间字符串
180 | time_parts = time_str.split(':')
181 |
182 | # 根据时间字符串长度处理不同格式
183 | if len(time_parts) == 3: # "HH:MM:SS"
184 | hours = int(time_parts[0])
185 | minutes = int(time_parts[1])
186 | seconds = int(time_parts[2])
187 | elif len(time_parts) == 2: # "MM:SS"
188 | hours = 0
189 | minutes = int(time_parts[0])
190 | seconds = int(time_parts[1])
191 | else:
192 | return 0
193 | # 计算总秒数
194 | total_seconds = hours * 3600 + minutes * 60 + seconds
195 | return total_seconds
196 | else:
197 | return 0
198 |
199 |
200 | async def main():
201 | # 使用示例
202 | url = 'https://www.douyin.com/video/1'
203 | newUrl = getExtract_lonGurl(url)
204 | html_content = await get_rendered_html_win(newUrl)
205 | # print(html_content)
206 |
207 | videoUrl = extract_url(html_content)
208 | print(f"视频链接为:{videoUrl}")
209 | def run_async():
210 | loop = asyncio.get_event_loop()
211 | try:
212 | loop.run_until_complete(main())
213 | except Exception as e:
214 | print(f"An error occurred: {e}")
215 | finally:
216 | if loop.is_running():
217 | loop.close() # 确保事件循环关闭
218 | def read_file(file_path):
219 | try:
220 | # 打开文件并读取内容
221 | with open(file_path, 'r', encoding='utf-8') as file:
222 | content = file.read() # 读取文件的所有内容
223 | return content
224 | except FileNotFoundError:
225 | return f"Error: The file at {file_path} was not found."
226 | except Exception as e:
227 | return f"An error occurred: {str(e)}"
228 |
229 | if __name__ == '__main__':
230 | # run_async()
231 | pass
232 |
--------------------------------------------------------------------------------