├── swagger.png ├── static ├── pkc.ttf └── favicon.ico ├── utils ├── api_tags.py ├── __init__.py ├── models.py ├── tools.py ├── log.py └── config.py ├── start.sh ├── requirements.txt ├── config.ini ├── docker-compose.yaml ├── pkcWordcloud.py ├── Dockerfile ├── README.md ├── main.py └── pkcDouYinVideo.py /swagger.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/curtinlv/PKC-API/HEAD/swagger.png -------------------------------------------------------------------------------- /static/pkc.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/curtinlv/PKC-API/HEAD/static/pkc.ttf -------------------------------------------------------------------------------- /static/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/curtinlv/PKC-API/HEAD/static/favicon.ico -------------------------------------------------------------------------------- /utils/api_tags.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | 3 | class ApiTags(str, Enum): 4 | """ 5 | Tags used to group API endpoints 6 | """ 7 | PKC = "PKC" -------------------------------------------------------------------------------- /start.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # 设置 Chromium 参数并启动 Xvfb 3 | Xvfb :99 -screen 0 1024x768x24 -ac +extension GLX +render -noreset & 4 | # 解决 /dev/shm 太小的问题 5 | export DISPLAY=:99 6 | exec python main.py 7 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | colorlog==6.9.0 2 | quart==0.18.4 3 | quart_schema==0.17.1 4 | Requests==2.26.0 5 | requests_html==0.10.0 6 | wordcloud==1.9.4 7 | werkzeug==2.2.3 8 | lxml_html_clean==0.4.1 9 | psutil == 5.8.0 -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .api_tags import ApiTags 2 | from .models import wordcloudTodo, dyQuery, dyResp, ApiErrorResponse, ApiSuccessResponse 3 | from .config import config 4 | from .tools import generate_random_name, kill_chromium_if_long_running 5 | from .log import log 6 | 7 | 8 | -------------------------------------------------------------------------------- /config.ini: -------------------------------------------------------------------------------- 1 | ## 主配置,改动需要重启服务生效 2 | [main] 3 | # 默认端口 4 | port = 80 5 | # 接口密钥,如未配置则每次启动随机生成一个 6 | apiKey = 7 | # 禁用的接口,将需要禁用的接口路径填入下面,多个用,分隔。如禁用 抖音视频解析接口,填写 /getDouyinVideo,/getDouyinVideoUrl 8 | disableInterfaces = 9 | 10 | ###### 抖音解析接口配置 ###### 11 | [DouYin] 12 | # 解析等待时间,时间越多解析越慢但成功率越大 13 | sleepNum = 10.0 -------------------------------------------------------------------------------- /docker-compose.yaml: -------------------------------------------------------------------------------- 1 | version: '3' 2 | services: 3 | pkc-api: 4 | image: curtinlv/pkc-api:latest 5 | container_name: pkc-api 6 | ports: 7 | - "80:80" 8 | environment: 9 | - apiKey= #接口密钥,如未配置则每次启动随机生成一个 10 | - disableInterfaces= #禁用的接口,将需要禁用的接口路径填入下面,多个用,分隔。如禁用 抖音视频解析接口,填写 /getDouyinVideo,/getDouyinVideoUrl 11 | - sleepNum=5 #解析等待时间,时间越多解析越慢但成功率越大 12 | volumes: 13 | - ./config.ini:/app/config.ini #映射配置文件,需手动建立config.ini 14 | # - ./static/pkc.ttf:/app/static/pkc.ttf #词云字体 15 | restart: unless-stopped -------------------------------------------------------------------------------- /pkcWordcloud.py: -------------------------------------------------------------------------------- 1 | from wordcloud import WordCloud 2 | async def createWordCloud(text, width, height, background_color, max_words): 3 | # 配置词云参数 4 | wc_config = { 5 | 'width': width, 6 | 'height': height, 7 | 'background_color': background_color, 8 | 'max_words': max_words, 9 | 'scale': 3, # 提升渲染清晰度 10 | 'min_font_size': 10 # 最新字体大小 11 | # 'collocations': False # 禁用词组组合 12 | } 13 | 14 | # 生成词云(优化字体渲染) 15 | try: 16 | wordcloud = WordCloud(**wc_config).generate(text) 17 | return wordcloud 18 | except ValueError as e: 19 | return None -------------------------------------------------------------------------------- /utils/models.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass, field 2 | 3 | @dataclass 4 | class wordcloudTodo: 5 | text: str = field(default='PKC很棒,也很简单') 6 | width: int = field(default=600) # 默认宽度 7 | height: int = field(default=600) # 默认高度 8 | dpi: int = field(default=400) # 默认DPI,值越大越清晰 9 | max_words: int = field(default=100) # 最大词数 10 | background_color: str = field(default='white') # 背景色,默认白色 11 | 12 | @dataclass 13 | class dyQuery: 14 | url: str 15 | # ua: str = field(default=None) 16 | 17 | @dataclass 18 | class dyResp: 19 | code: int = 200 20 | msg: str = '成功' 21 | video_url: str = '视频原始链接' 22 | 23 | @dataclass 24 | class ApiErrorResponse: 25 | code: int = 500 26 | msg: str = '错误信息' 27 | 28 | @dataclass 29 | class ApiSuccessResponse: 30 | """ 31 | Default success response 32 | """ 33 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # 使用多架构兼容的 Python 基础镜像 2 | FROM python:3.9-slim 3 | # 设置工作目录 4 | WORKDIR /app 5 | COPY PKC-API/ . 6 | RUN apt-get update && apt-get install -y \ 7 | wget \ 8 | gnupg \ 9 | build-essential \ 10 | python3-dev \ 11 | libjpeg-dev \ 12 | libfreetype6-dev \ 13 | zlib1g-dev \ 14 | libpng-dev \ 15 | && apt-get install -y --no-install-recommends \ 16 | libx11-6 \ 17 | libxcomposite1 \ 18 | libxdamage1 \ 19 | libxext6 \ 20 | libxfixes3 \ 21 | libxrandr2 \ 22 | libxtst6 \ 23 | libappindicator3-1 \ 24 | libasound2 \ 25 | libatk-bridge2.0-0 \ 26 | libatk1.0-0 \ 27 | libcairo2 \ 28 | libcups2 \ 29 | libdbus-1-3 \ 30 | libdrm2 \ 31 | libgbm1 \ 32 | libgdk-pixbuf2.0-0 \ 33 | libglib2.0-0 \ 34 | libgtk-3-0 \ 35 | libnspr4 \ 36 | libnss3 \ 37 | libpango-1.0-0 \ 38 | libpangocairo-1.0-0 \ 39 | libxcb1 \ 40 | libxss1 \ 41 | libxkbcommon0 \ 42 | xvfb \ 43 | fonts-wqy-microhei \ 44 | fonts-wqy-zenhei \ 45 | chromium \ 46 | chromium-driver \ 47 | && rm -rf /var/lib/apt/lists/* 48 | 49 | # 设置 Chromium 环境变量 50 | ENV FONT_PATH=/app/static/pkc.ttf 51 | ENV DISPLAY=:99 52 | ENV CHROMIUM_PATH=/usr/bin/chromium 53 | ENV PUPPETEER_SKIP_DOWNLOAD=true 54 | ENV PUPPETEER_EXECUTABLE_PATH=/usr/bin/chromium 55 | 56 | # 安装 Python 依赖 57 | RUN pip install --upgrade pip && \ 58 | pip install --no-cache-dir -r requirements.txt 59 | 60 | # 暴露端口 61 | EXPOSE 80 62 | 63 | # 启动脚本 64 | RUN chmod +x /app/start.sh 65 | 66 | CMD ["/app/start.sh"] 67 | 68 | # buildx build --platform linux/amd64,linux/arm64 -t curtinlv/pkc-api:latest --push . 69 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # PKC-API 2 | ### v1.2.0 3 | ### API接口名称 4 | *** 5 | - *1.词云分析* 6 | - *2.抖音视频解析* 7 | - *3. ...* 8 | *** 9 | ## Ⅰ.搭建PKC-API 10 | 环境要求: 11 | CPU: 2核心或以上 12 | 内存: 4GB或更高 13 | 其他:国内IP 14 | ### 方式一:Docker一键部署 15 | ```bash 16 | docker run -d --name pkc-api -p 80:80 curtinlv/pkc-api:latest 17 | ``` 18 | 19 | ### 方式二:Docker-compose部署 20 | 建立文件`docker-compose.yaml`,文件内容以下: 21 | ```yaml 22 | version: '3' 23 | services: 24 | pkc-api: 25 | image: curtinlv/pkc-api:latest 26 | container_name: pkc-api 27 | ports: 28 | - "80:80" 29 | environment: 30 | - apiKey= #接口密钥,如未配置则每次启动随机生成一个 31 | - disableInterfaces= #禁用的接口,将需要禁用的接口路径填入下面,多个用,分隔。如禁用 抖音视频解析接口,填写 /getDouyinVideo,/getDouyinVideoUrl 32 | - sleepNum=10 #解析等待时间,时间越多解析越慢但成功率越大 33 | volumes: 34 | - ./config.ini:/app/config.ini #映射配置文件config.ini 35 | # - ./static/pkc.ttf:/app/static/pkc.ttf #词云字体 36 | restart: unless-stopped 37 | ``` 38 | 启动 39 | ```bash 40 | docker-compose up -d 41 | ``` 42 | ### 方式三:Python启动 43 | 版本要求:`python3.9 +` 44 | ```bash 45 | # 拉取本项目 46 | git clone https://github.com/curtinlv/PKC-API.git 47 | # 切换项目目录 48 | cd PKC-API 49 | # 安装依赖包 50 | pip install -r requirements.txt 51 | # 词云字体(可自定义) 52 | export FONT_PATH=./static/pkc.ttf 53 | # 启动 54 | python main.py 55 | # 或 56 | nohup python main.py >./log.log 2>&1 & #后台启动 57 | ```` 58 | ## Ⅱ.API调试页面 59 | ```html 60 | http://ip/swagger 61 | ``` 62 | ![swagger.png](swagger.png) 63 | 64 | ## Ⅲ.更新日志 65 | ~~~ 66 | v1.2.0 67 | 1、优化抖音解析接口 68 | 2、新增外挂配置文件 config.ini(可配置自定义端口、apiKey验证、禁用指定接口) 69 | 3、增加接口apiKey认证(默认临时生成apiKey到控制台,如需配置固定apiKey请编辑配置文件config.ini) 70 | 71 | v1.1.0 72 | 1、新增抖音解析接口 73 | 74 | v1.0.0 75 | 1、新增词云接口 76 | ~~~ -------------------------------------------------------------------------------- /utils/tools.py: -------------------------------------------------------------------------------- 1 | # !/usr/bin/env python 2 | # -*- coding:utf-8 -*- 3 | # @FileName :tools.py 4 | # @Time :2025/3/6 11:28 5 | # @Author :Curtin 6 | 7 | 8 | import random 9 | import string 10 | import psutil 11 | import time 12 | import sys 13 | from .log import log 14 | 15 | def generate_random_name(length=8): 16 | # 随机生成一个包含字母和数字的名称 17 | name = ''.join(random.choices(string.ascii_letters + string.digits, k=length)) 18 | return name 19 | 20 | async def save_content_to_file(content, file_path): 21 | # 打开文件并写入内容 22 | with open(file_path, 'w', encoding='utf-8') as file: 23 | file.write(content) 24 | # print(f"内容已保存到 {file_path}") 25 | 26 | def kill_chromium_if_long_running(): 27 | # 遍历系统中的所有进程 28 | if not sys.platform.startswith('win'): 29 | for proc in psutil.process_iter(['pid', 'name', 'create_time']): 30 | try: 31 | if 'chrom' in proc.info['name'].lower(): 32 | # 计算进程的运行时间 33 | create_time = proc.info['create_time'] 34 | current_time = time.time() # 当前时间 35 | run_time = current_time - create_time # 进程运行时间(秒) 36 | # 如果运行时间超过30秒,则杀掉进程 37 | if run_time > 60: 38 | # log.info(f"Process {proc.info['name']} (PID: {proc.info['pid']}) running for {run_time:.2f} seconds. Killing process.") 39 | proc.terminate() # 终止进程 40 | # proc.wait() # 等待进程终止 41 | # log.info(f"Process {proc.info['pid']} has been terminated.") 42 | except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess): 43 | # 捕获异常,避免权限问题或进程已结束 44 | pass 45 | 46 | 47 | 48 | -------------------------------------------------------------------------------- /utils/log.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import colorlog 3 | import time 4 | import sys 5 | 6 | def print_header(): 7 | print("\n" + "="*50, flush=True) 8 | print(" ____ __ __ ____ ", flush=True) 9 | print(" | _ \ | |/ / / ___| ", flush=True) 10 | print(" | |_) | | ' / | | ", flush=True) 11 | print(" | __/ | . \ | |____ ", flush=True) 12 | print(" |_| |_|\_\\ |______\ ", flush=True) 13 | print("\n" + "=" * 50, flush=True) 14 | print("Initializing PKC-API...\n", flush=True) 15 | print_header() 16 | def typing_effect(text, delay=0.1): 17 | """模拟打字效果""" 18 | for char in text: 19 | sys.stdout.write(char) 20 | sys.stdout.flush() 21 | time.sleep(delay) 22 | print() # 打印新的一行 23 | 24 | class Logger: 25 | def __init__(self, log_level=logging.DEBUG): 26 | # 创建一个日志器 27 | self.logger = logging.getLogger() 28 | 29 | # 设置日志级别 30 | self.logger.setLevel(log_level) 31 | 32 | # 创建带颜色的流处理器 33 | log_handler = colorlog.StreamHandler() 34 | 35 | # 创建带颜色的日志格式 36 | formatter = colorlog.ColoredFormatter( 37 | '%(asctime)s - %(levelname)s - %(message)s', 38 | datefmt='%Y-%m-%d %H:%M:%S', 39 | log_colors={ 40 | 'DEBUG': 'cyan', 41 | 'INFO': 'green', 42 | 'WARNING': 'yellow', 43 | 'ERROR': 'red', 44 | 'CRITICAL': 'bold_red', 45 | } 46 | ) 47 | 48 | # 设置日志格式 49 | log_handler.setFormatter(formatter) 50 | 51 | # 将处理器添加到日志器 52 | self.logger.addHandler(log_handler) 53 | 54 | def get_logger(self): 55 | """返回日志器对象""" 56 | return self.logger 57 | 58 | # 示例:如何在其他模块中使用这个带颜色的 Logger 类 59 | # 创建日志实例 60 | log = Logger(log_level=logging.INFO).get_logger() 61 | 62 | # 在需要使用日志的模块中调用 Logger 63 | if __name__ == '__main__': 64 | # 创建日志实例 65 | logger = Logger().get_logger() 66 | # 使用 logger 打印日志 67 | logger.debug("这是一个调试信息") # 蓝色 68 | logger.info("这是一个普通信息") # 绿色 69 | logger.warning("这是一个警告信息") # 黄色 70 | logger.error("这是一个错误信息") # 红色 71 | logger.critical("这是一个严重错误信息") # 粗体红色 72 | -------------------------------------------------------------------------------- /utils/config.py: -------------------------------------------------------------------------------- 1 | # !/usr/bin/env python 2 | # -*- coding:utf-8 -*- 3 | # @FileName :config.py 4 | # @Time :2025/3/14 11:47 5 | # @Author :Curtin 6 | import os 7 | from configparser import RawConfigParser 8 | from .tools import generate_random_name 9 | from .log import log 10 | class Config: 11 | def __init__(self): 12 | # 获取当前工作目录 13 | pwd = os.path.dirname(os.path.abspath(__file__)) 14 | pwd = pwd.replace('utils', '') 15 | # 路由标记是否需要 API Key 验证 16 | self.NO_API_KEY_REQUIRED_ROUTES = ['/', '/favicon.ico', '/swagger', '/openapi.json'] 17 | # 路由标记是否需要 API Key 验证 18 | self.disableInterfaces = [] 19 | self.port = 80 20 | self.apiKey = '' 21 | ##### 抖音配置 22 | self.sleepNum = 10.0 23 | # 获取账号参数 24 | try: 25 | configinfo = RawConfigParser() 26 | try: 27 | configinfo.read(pwd + "config.ini", encoding="UTF-8") 28 | except Exception as e: 29 | with open(pwd + "config.ini", "r", encoding="UTF-8") as config: 30 | getConfig = config.read().encode('utf-8').decode('utf-8-sig') 31 | with open(pwd + "config.ini", "w", encoding="UTF-8") as config: 32 | config.write(getConfig) 33 | try: 34 | configinfo.read(pwd + "config.ini", encoding="UTF-8") 35 | except: 36 | configinfo.read(pwd + "config.ini", encoding="gbk") 37 | self.disableInterfaces = strToList(configinfo.get('main', 'disableInterfaces')) 38 | self.port = configinfo.getint('main', 'port') 39 | self.apiKey = configinfo.get('main', 'apiKey') 40 | self.sleepNum = configinfo.getfloat('DouYin', 'sleepNum') 41 | except Exception as e: 42 | print("参数配置有误,config.ini\nError:", e, flush=True) 43 | # 判断系统环境变量(优先使用) 44 | if "disableInterfaces" in os.environ: 45 | self.disableInterfaces = strToList(os.environ["disableInterfaces"]) 46 | if "port" in os.environ: 47 | if len(os.environ["port"]) > 1: 48 | self.port = int(os.environ["port"]) 49 | if "apiKey" in os.environ: 50 | self.apiKey = os.environ["apiKey"] 51 | if "sleepNum" in os.environ: 52 | if len(os.environ["sleepNum"]) > 0: 53 | self.sleepNum = int(os.environ["sleepNum"]) 54 | if len(self.apiKey) == 0: 55 | tmpApiKey = generate_random_name(length=32) 56 | log.info(f"你的接口密钥(temp apiKey):{tmpApiKey} !!!这是临时接口密钥,如需修改请到config.ini文件配置apiKey") 57 | self.apiKey = tmpApiKey 58 | 59 | def getConfig(self): 60 | """返回日志器对象""" 61 | return self 62 | 63 | def strToList(text: str, s = ','): 64 | list = [] 65 | try: 66 | list = text.split(s) 67 | except: 68 | pass 69 | return list 70 | config = Config().getConfig() 71 | 72 | if __name__ == "__main__": 73 | print("Curtin") 74 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | # !/usr/bin/env python 2 | # -*- coding:utf-8 -*- 3 | # @FileName :main.py 4 | # @Time :2025/3/1 09:02 5 | # @Author :Curtin 6 | 7 | from quart import Quart, Blueprint, jsonify, send_file, redirect, request, abort 8 | from pkcWordcloud import createWordCloud 9 | from pkcDouYinVideo import getDyHtml, extract_url, downloadViden, getExtract_lonGurl, get_seconds_from_html 10 | from io import BytesIO 11 | from quart_schema import QuartSchema, tag, validate_request, validate_response, hide, validate_querystring 12 | from utils import config, log, ApiTags, wordcloudTodo, dyQuery, dyResp, ApiErrorResponse, generate_random_name 13 | 14 | # 15 | version = "v1.2.0" 16 | app = Quart(__name__) 17 | QuartSchema( 18 | app, 19 | security=[{"apiKey": []}], 20 | security_schemes={ 21 | "apiKey": {"type": "apiKey", "name": "apiKey", "in": "query"} 22 | }, 23 | info={ 24 | "title": "PKC-API", 25 | "version": version, 26 | "description": '开发者:Curtinlv' 27 | }, 28 | convert_casing=True, 29 | swagger_ui_path='/swagger' 30 | ) 31 | # app.config['SEND_FILE_MAX_AGE_DEFAULT'] = 0 # 禁止缓存 32 | app.config['TIMEOUT'] = 180 # 请求超时设置为3分钟 33 | # 创建一个蓝图 34 | pkcTools = Blueprint('PKC工具', __name__) 35 | 36 | @app.errorhandler(401) 37 | async def handle_401_error(error): 38 | return jsonify(ApiErrorResponse(code=401, msg="请带上 API Key 验证")), 401 39 | @app.errorhandler(403) 40 | async def handle_403_error(error): 41 | return jsonify(ApiErrorResponse(code=403, msg="无效的 API Key")), 403 42 | @app.errorhandler(405) 43 | async def handle_405_error(error): 44 | return jsonify(ApiErrorResponse(405, msg="接口已被禁用")), 405 45 | async def verify_api_key(): 46 | api_key = request.args.get('apiKey') 47 | if not api_key: 48 | abort(401) 49 | if api_key != config.apiKey: 50 | abort(403) 51 | 52 | @app.before_request 53 | async def before_request(): 54 | # 获取当前路由的路径 55 | current_route = request.url_rule.rule 56 | # 如果当前路由不在 NO_API_KEY_REQUIRED_ROUTES 列表中,则验证 API Key 57 | if current_route not in config.NO_API_KEY_REQUIRED_ROUTES: 58 | await verify_api_key() 59 | if current_route in config.disableInterfaces: 60 | abort(405) 61 | @pkcTools.route('/') 62 | @hide 63 | async def pkcApiIndex(): 64 | return redirect('/swagger', code=301) 65 | 66 | @pkcTools.route('/favicon.ico') 67 | @hide 68 | async def pkcIndexIcon(): 69 | result = await send_file('./static/favicon.ico') 70 | return result 71 | 72 | ###########################【Route】########################### 73 | ########## 词云分析 74 | @pkcTools.route('/generate_wordcloud', methods=['POST']) 75 | @tag([ApiTags.PKC]) 76 | @validate_request(wordcloudTodo) 77 | async def generate_wordcloud(data: wordcloudTodo): 78 | """ 79 | 词云生成 80 |
81 | Request: 82 |
83 | `text`:分析的字符串,必要 84 |
85 | `width`:默认宽度,非必要 86 |
87 | `height`:默认高度,非必要 88 |
89 | `dpi`:默认DPI,值越大越清晰,非必要 90 |
91 | `max_words`:最大词数,非必要 92 |
93 | `background_color`:背景色,默认白色,非必要 94 |
95 | ------ 96 |
97 | Response:image/png 98 | """ 99 | # 解析请求参数 100 | text = data.text.encode('utf-8').decode('utf-8') # 确保是UTF-8编码 101 | if not text: 102 | return jsonify({"code": 400, "msg": "请提供要分析的文本内容。"}), 400 103 | 104 | # 生成词云(优化字体渲染) 105 | try: 106 | wordcloud = await createWordCloud(text, data.width, data.height, data.background_color, data.max_words) 107 | except ValueError as e: 108 | return jsonify({"code": 400, "msg": f"文本分析失败: {str(e)}"}), 400 109 | # 生成高清图像 110 | img = BytesIO() 111 | image = wordcloud.to_image() 112 | # 保存为高清PNG 113 | image.save(img, format='PNG', dpi=(data.dpi, data.dpi), optimize=True, quality=95) 114 | img.seek(0) 115 | result = await send_file(img, mimetype='image/png') 116 | return result 117 | ########## 抖音视频解析 118 | @pkcTools.route('/getDouyinVideoUrl', methods=['GET']) 119 | @tag([ApiTags.PKC]) 120 | @validate_querystring(dyQuery) 121 | @validate_response(dyResp, 200) 122 | @validate_response(ApiErrorResponse, 500) 123 | async def getDouyinVideoUrl(query_args: dyQuery): 124 | """ 125 | 抖音视频链接提取 126 | 127 |

128 | `url`:抖音分享的链接
129 | Response:
130 | {
131 | "videoUrl": "视频原始链接"
132 | }
133 | """ 134 | # 获取入参URL 135 | url = query_args.url 136 | # log.info(f'url={url}') 137 | if not url or 'douyin.com' not in url: 138 | return ApiErrorResponse(code=400, msg="请带上正确的参数:url"), 400 139 | try: 140 | # 获取视频链接 141 | newUrl = getExtract_lonGurl(url) 142 | html_content = await getDyHtml(newUrl) 143 | if not html_content: 144 | return ApiErrorResponse(code=500, msg="失败"), 500 145 | # time = get_seconds_from_html(html_content) 146 | # log.info(f"时长为{time}") 147 | video_url = extract_url(html_content) 148 | if video_url: 149 | return dyResp(video_url=video_url) 150 | else: 151 | return ApiErrorResponse(code=404, msg="视频链接提取失败!"), 404 152 | except Exception as e: 153 | return ApiErrorResponse(code=500, msg=str(e)), 500 154 | ########## 抖音视频解析响应视频 155 | @pkcTools.route('/getDouyinVideo', methods=['GET']) 156 | @tag([ApiTags.PKC]) 157 | @validate_querystring(dyQuery) 158 | async def getDouyinVideo(query_args: dyQuery): 159 | """ 160 | 抖音视频提取 161 |

162 | `url`:抖音分享的链接 163 |
164 | Response:video/mp4 165 | """ 166 | # 获取入参URL 167 | url = query_args.url 168 | if not url or 'douyin.com' not in url: 169 | return ApiErrorResponse(code=400, msg="请带上正确的参数:url"), 400 170 | try: 171 | # 获取视频链接 172 | newUrl = getExtract_lonGurl(url) 173 | html_content = await getDyHtml(newUrl) 174 | if not html_content: 175 | return ApiErrorResponse(code=500, msg="失败"), 500 176 | video_url = extract_url(html_content) 177 | if video_url: 178 | # 下载远程视频 179 | response = await downloadViden(video_url) 180 | # log.info(f'resp: {response.status_code}, {video_url}') 181 | if response.status_code > 206: 182 | return ApiErrorResponse(code=500, msg="视频下载失败!"), 500 183 | # 将下载的视频存储到内存中 184 | video_data = BytesIO(response.content) 185 | # # 返回视频文件,客户端会自动下载 186 | return await send_file(video_data, as_attachment=True, attachment_filename=f"{generate_random_name()}.mp4", 187 | mimetype='video/mp4') 188 | else: 189 | return ApiErrorResponse(code=404, msg="视频链接提取失败!"), 404 190 | except Exception as e: 191 | return ApiErrorResponse(code=500, msg=str(e)), 500 192 | # 注册蓝图 193 | app.register_blueprint(pkcTools, url_prefix='') 194 | 195 | if __name__ == '__main__': 196 | print(f"版本:{version}") 197 | app.run(host='0.0.0.0', port=config.port) 198 | -------------------------------------------------------------------------------- /pkcDouYinVideo.py: -------------------------------------------------------------------------------- 1 | from requests_html import AsyncHTMLSession 2 | from pyppeteer import launch 3 | import re, sys, os 4 | import requests 5 | import asyncio 6 | import random 7 | from utils import kill_chromium_if_long_running, config, log 8 | 9 | # 动态生成一个版本号 10 | def generate_version(): 11 | major_version = random.randint(10, 20) # 随机生成主版本号 12 | minor_version = random.randint(0, 10) # 随机生成次版本号 13 | return f"{major_version}.{minor_version}" 14 | # 创建动态的 User-Agent 15 | def generate_user_agent(): 16 | version = generate_version() 17 | user_agent = f"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/{version} Safari/605.1.15" 18 | return user_agent 19 | headers = { 20 | "Accept": "application/json, text/javascript", 21 | "Accept-Language": "zh-CN,zh-Hans;q=0.9", 22 | "Content-Type": "application/x-www-form-urlencoded", 23 | "User-Agent": generate_user_agent() 24 | } 25 | def getExtract_lonGurl(dyLink): 26 | # 正则表达式来提取 URL 27 | url_pattern = r'https?://[^\s/$.?#].[^\s]*' 28 | # 使用 re.search 查找匹配的部分 29 | urls = re.findall(url_pattern, dyLink) 30 | if urls: 31 | # 如果找到了匹配的 URL,返回 URL 32 | return urls[0] 33 | else: 34 | # 如果没有找到匹配的 URL,返回 None 35 | return None 36 | async def downloadViden(url, hd=None): 37 | h = { 38 | "accept": "*/*", 39 | "accept-language": "zh-CN,zh;q=0.9", 40 | "range": "bytes=0-", 41 | "Referer": url, 42 | "Referrer-Policy": "strict-origin-when-cross-origin" 43 | } 44 | if hd: 45 | h['User-Agent'] = hd['User-Agent'] 46 | return requests.get(url, headers=h, stream=True) 47 | async def getDyHtml(url): 48 | # 跨平台配置 49 | is_windows = sys.platform.startswith('win') 50 | if is_windows: 51 | html_content = await get_rendered_html_win(url) 52 | else: 53 | html_content = await get_rendered_html(url) 54 | return html_content 55 | def getLongURL(url, hd=None): 56 | if 'https://www.douyin.com/video/' in url: 57 | return url 58 | else: 59 | if not url.startswith('https'): 60 | url = getExtract_lonGurl(url) 61 | header = headers.copy() 62 | if hd: 63 | header['User-Agent'] = hd['User-Agent'] 64 | response = requests.get(url, headers=header, allow_redirects=False) 65 | new_url = response.headers['Location'] 66 | video_id = re.findall(r'video/(.*?)/\?', new_url)[0] 67 | new_url = "https://www.douyin.com/video/"+video_id 68 | # print(f"抖音动态原始链接:{new_url}", flush=True) 69 | return new_url 70 | 71 | async def get_rendered_html(url, max_retries=3, required_content="https://v3-web.douyinvod.com"): 72 | attempt = 0 73 | # 配置浏览器参数 74 | browser_args = [ 75 | '--no-sandbox', 76 | '--disable-setuid-sandbox', 77 | '--disable-dev-shm-usage', 78 | '--disable-accelerated-2d-canvas', 79 | '--disable-gpu', 80 | '--window-size=1920x1080' 81 | ] 82 | # 启动指定路径的 Chromium 83 | # browser = None 84 | browser = await launch( 85 | executablePath='/usr/bin/chromium' if os.path.exists('/usr/bin/chromium') else None, 86 | args=browser_args, 87 | headless=True, 88 | timeout=60000 # 60秒超时 89 | ) 90 | page = await browser.newPage() 91 | await page.setJavaScriptEnabled(True) 92 | # await page.setUserAgent(headers['User-Agent']) 93 | # 设置页面超时和重试策略 94 | page.setDefaultNavigationTimeout(60000) # 60秒 95 | while attempt < max_retries: 96 | try: 97 | # 等待直到所有请求完成 98 | response = await page.goto(url, {'waitUntil': 'networkidle2'}) 99 | # 等待页面完全加载,包括渲染的内容和异步请求 100 | await page.waitFor((config.sleepNum * 1000) + (attempt * 1000)) # 延迟,确保所有脚本执行完毕 101 | await auto_scroll(page) # 滚动页面,加载更多内容 102 | content = await page.content() 103 | if required_content in content: 104 | if page: 105 | await page.close() 106 | if browser: 107 | await browser.close() 108 | kill_chromium_if_long_running() 109 | return content 110 | else: 111 | # print(f"未找到, 重试... (Attempt {attempt + 1}/{max_retries})", flush=True) 112 | attempt += 1 113 | except Exception as e: 114 | # print(f"发生异常: {str(e)[:200]}, 重试... (Attempt {attempt + 1}/{max_retries})", flush=True) 115 | attempt += 1 116 | # finally: 117 | # if browser: 118 | # await browser.close() 119 | if page: 120 | await page.close() 121 | if browser: 122 | await browser.close() 123 | kill_chromium_if_long_running() 124 | return None 125 | async def auto_scroll(page): 126 | # 执行页面滚动,直到底部 127 | last_height = await page.evaluate('document.body.scrollHeight') 128 | while True: 129 | # 滚动到底部 130 | await page.evaluate('window.scrollTo(0, document.body.scrollHeight);') 131 | # 等待新的内容加载 132 | await page.waitFor(1000) 133 | new_height = await page.evaluate('document.body.scrollHeight') 134 | if new_height == last_height: 135 | break 136 | last_height = new_height 137 | async def get_rendered_html_win(url, max_retries=3, required_content="https://v3-web.douyinvod.com"): 138 | session = AsyncHTMLSession() 139 | attempt = 0 140 | while attempt < max_retries: 141 | try: 142 | # 发起请求并获取响应 143 | response = await session.get(url, headers=headers) 144 | # 执行JavaScript并等待页面加载完成 145 | await response.html.arender(timeout=60, sleep=config.sleepNum+attempt, keep_page=True, scrolldown=3) 146 | # 检查页面内容是否包含指定的字符串 147 | if required_content in response.html.html: 148 | html_text = response.html.html 149 | if session: 150 | await session.close() 151 | kill_chromium_if_long_running() 152 | return html_text 153 | else: 154 | # print(f"未找到, 重试... (Attempt {attempt + 1}/{max_retries})", flush=True) 155 | attempt += 1 156 | except Exception as e: 157 | # print(f"发生异常: {e}, 重试... (Attempt {attempt + 1}/{max_retries})", flush=True) 158 | attempt += 1 159 | # print("未找到,已超出最多尝试次数。", flush=True) 160 | if session: 161 | await session.close() 162 | kill_chromium_if_long_running() 163 | return None 164 | def extract_url(text): 165 | # 正则表达式提取src中完整的https://v3-web.douyinvod.com链接 166 | pattern = r'src="(https://v3-web\.douyinvod\.com[^\s"]+)"' 167 | match = re.search(pattern, text) 168 | if match: 169 | return match.group(1) # 返回匹配到的第一个结果 170 | else: 171 | return None 172 | 173 | 174 | def get_seconds_from_html(html_str): 175 | # 使用正则表达式提取时间字符串,允许跨行匹配 176 | match = re.search(r'([\d:]+)', html_str, re.DOTALL) 177 | if match: 178 | time_str = match.group(1) # 获取时间字符串 179 | # 分割时间字符串 180 | time_parts = time_str.split(':') 181 | 182 | # 根据时间字符串长度处理不同格式 183 | if len(time_parts) == 3: # "HH:MM:SS" 184 | hours = int(time_parts[0]) 185 | minutes = int(time_parts[1]) 186 | seconds = int(time_parts[2]) 187 | elif len(time_parts) == 2: # "MM:SS" 188 | hours = 0 189 | minutes = int(time_parts[0]) 190 | seconds = int(time_parts[1]) 191 | else: 192 | return 0 193 | # 计算总秒数 194 | total_seconds = hours * 3600 + minutes * 60 + seconds 195 | return total_seconds 196 | else: 197 | return 0 198 | 199 | 200 | async def main(): 201 | # 使用示例 202 | url = 'https://www.douyin.com/video/1' 203 | newUrl = getExtract_lonGurl(url) 204 | html_content = await get_rendered_html_win(newUrl) 205 | # print(html_content) 206 | 207 | videoUrl = extract_url(html_content) 208 | print(f"视频链接为:{videoUrl}") 209 | def run_async(): 210 | loop = asyncio.get_event_loop() 211 | try: 212 | loop.run_until_complete(main()) 213 | except Exception as e: 214 | print(f"An error occurred: {e}") 215 | finally: 216 | if loop.is_running(): 217 | loop.close() # 确保事件循环关闭 218 | def read_file(file_path): 219 | try: 220 | # 打开文件并读取内容 221 | with open(file_path, 'r', encoding='utf-8') as file: 222 | content = file.read() # 读取文件的所有内容 223 | return content 224 | except FileNotFoundError: 225 | return f"Error: The file at {file_path} was not found." 226 | except Exception as e: 227 | return f"An error occurred: {str(e)}" 228 | 229 | if __name__ == '__main__': 230 | # run_async() 231 | pass 232 | --------------------------------------------------------------------------------