├── src
    ├── api
    │   ├── requirements.txt
    │   ├── .env.example
    │   └── app
    │   │   ├── utils.py
    │   │   ├── tts_handler.py
    │   │   └── server.py
    ├── setup.sh
    ├── openai-edge-tts.sh
    ├── startup.sh
    └── sshx.sh
├── 免费部署微软EdgeTTS,适配openai语音api接口，一键部署教程-封面.jpg
├── Dockerfile
└── README.md


/src/api/requirements.txt:
--------------------------------------------------------------------------------
1 | flask
2 | gevent
3 | python-dotenv
4 | edge-tts
5 | art
6 | 


--------------------------------------------------------------------------------
/免费部署微软EdgeTTS,适配openai语音api接口，一键部署教程-封面.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aigem/edgeTTS-openai-api/HEAD/免费部署微软EdgeTTS,适配openai语音api接口，一键部署教程-封面.jpg


--------------------------------------------------------------------------------
/src/setup.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | # 在这里添加任何必要的设置步骤
 4 | echo "正在进行设置..."
 5 | 
 6 | # 安装 pnpm pm2 
 7 | npm install -g pnpm pm2
 8 | 
 9 | echo "设置完成"
10 | 


--------------------------------------------------------------------------------
/src/api/.env.example:
--------------------------------------------------------------------------------
 1 | API_KEY=your_api_key_here
 2 | PORT=7860
 3 | 
 4 | DEFAULT_VOICE=en-US-AndrewNeural
 5 | DEFAULT_RESPONSE_FORMAT=mp3
 6 | DEFAULT_SPEED=1.0
 7 | 
 8 | DEFAULT_LANGUAGE=zh-CN
 9 | 
10 | REQUIRE_API_KEY=True


--------------------------------------------------------------------------------
/src/openai-edge-tts.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | set -e
 3 | 
 4 | echo "开始安装 openai-edge-tts..."
 5 | 
 6 | # 检查 openai-edge-tts 文件夹是否存在
 7 | if [ ! -d "$HOMEDIR/edgeTTS-openai-api/src/api" ]; then
 8 |     echo "openai-edge-tts 文件夹不存在"
 9 |     exit 1
10 | fi
11 | 
12 | # 进入 openai-edge-tts 文件夹并安装依赖
13 | cd $HOMEDIR/edgeTTS-openai-api/src/api
14 | pip install -r requirements.txt
15 | 
16 | # .env 文件
17 | cp .env.example .env
18 | 
19 | echo "openai-edge-tts 安装完成"
20 | 


--------------------------------------------------------------------------------
/src/startup.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | cat $HOMEDIR/edgeTTS-openai-api/src/api/.env
 4 | 
 5 | # 检查 OPENAI_EDGE_TTS_INSTALL是否为true
 6 | if [ "$OPENAI_EDGE_TTS_INSTALL" = true ]; then
 7 |     cd ${HOMEDIR}/edgeTTS-openai-api/src/api    
 8 |     # 先尝试删除已存在的实例
 9 |     pm2 delete openai-edge-tts 2>/dev/null || true
10 |     # 启动新的实例
11 |     pm2 start python --name openai-edge-tts -- ${HOMEDIR}/edgeTTS-openai-api/src/api/app/server.py
12 |     pm2 save
13 | fi
14 | 
15 | cd ${HOMEDIR}
16 | 
17 | # 只输出日志
18 | pm2 logs --lines 50
19 | 


--------------------------------------------------------------------------------
/src/sshx.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | set -e
 3 | 
 4 | echo "安装 sshx..."
 5 | 
 6 | # 设置下载URL和目标目录
 7 | DOWNLOAD_URL="https://sshx.s3.amazonaws.com/sshx-x86_64-unknown-linux-musl.tar.gz"
 8 | TARGET_DIR="$HOMEDIR/sshx"
 9 | 
10 | # 创建目标目录
11 | mkdir -p "$TARGET_DIR"
12 | chown $USER:$USER "$TARGET_DIR"
13 | 
14 | # 下载tar.gz文件
15 | echo "Downloading sshx..."
16 | curl -L "$DOWNLOAD_URL" -o /tmp/sshx.tar.gz
17 | 
18 | # 解压文件到目标目录
19 | echo "Extracting sshx..."
20 | tar -xzf /tmp/sshx.tar.gz -C "$TARGET_DIR"
21 | 
22 | # 清理临时文件
23 | rm /tmp/sshx.tar.gz
24 | 
25 | # 检查是否成功解压
26 | if [ -f "$TARGET_DIR/sshx" ]; then
27 |     echo "sshx has been successfully downloaded and extracted to $TARGET_DIR"
28 |     chmod +x "$TARGET_DIR/sshx"
29 |     echo "sshx 版本: $($TARGET_DIR/sshx -V)"
30 | else
31 |     echo "Failed to extract sshx. Please check the download URL and try again."
32 |     exit 1
33 | fi
34 | 
35 | # 将sshx添加到PATH
36 | export PATH="$PATH:$TARGET_DIR"
37 | 
38 | echo "sshx 安装成功"
39 | 
40 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM nikolaik/python-nodejs:python3.10-nodejs20
 2 | 
 3 | # 添加 CACHEBUST 参数
 4 | ARG CACHEBUST=1
 5 | 
 6 | ENV USER=pn \
 7 |     HOMEDIR=/home/pn \
 8 |     PORT=7860 \
 9 |     SSHX_INSTALL=true \
10 |     OPENAI_EDGE_TTS_INSTALL=true
11 | 
12 | RUN apt-get update && apt-get install -y --no-install-recommends \
13 |     apt-utils \
14 |     build-essential \
15 |     libpq-dev \
16 |     neofetch \
17 |     git \
18 |     curl \
19 |     vim \
20 |     && rm -rf /var/lib/apt/lists/*
21 | 
22 | WORKDIR ${HOMEDIR}
23 | 
24 | # 使用 CACHEBUST 参数来强制更新
25 | RUN git clone https://github.com/aigem/edgeTTS-openai-api.git \
26 |     && chmod +x ${HOMEDIR}/edgeTTS-openai-api/src/*.sh \
27 |     && ls -la ${HOMEDIR}/edgeTTS-openai-api/src \
28 |     && ls -la ${HOMEDIR}/edgeTTS-openai-api \
29 |     && ${HOMEDIR}/edgeTTS-openai-api/src/setup.sh \
30 |     && if [ "$SSHX_INSTALL" = true ]; then ${HOMEDIR}/edgeTTS-openai-api/src/sshx.sh; fi \
31 |     && if [ "$OPENAI_EDGE_TTS_INSTALL" = true ]; then ${HOMEDIR}/edgeTTS-openai-api/src/openai-edge-tts.sh; fi
32 | 
33 | # 暴露 Remix 端口
34 | EXPOSE ${PORT}
35 | 
36 | # 在启动时设置 API_KEY 环境变量
37 | ENTRYPOINT /home/pn/edgeTTS-openai-api/src/startup.sh
38 | 


--------------------------------------------------------------------------------
/src/api/app/utils.py:
--------------------------------------------------------------------------------
 1 | # utils.py
 2 | 
 3 | from flask import request, jsonify
 4 | from functools import wraps
 5 | import os
 6 | from dotenv import load_dotenv
 7 | 
 8 | load_dotenv()
 9 | 
10 | def getenv_bool(name: str, default: bool = False) -> bool:
11 |     return os.getenv(name, str(default)).lower() in ("yes", "y", "true", "1", "t")
12 | 
13 | API_KEY = os.getenv('API_KEY', 'your_api_key_here')
14 | REQUIRE_API_KEY = getenv_bool('REQUIRE_API_KEY', True)
15 | 
16 | def require_api_key(f):
17 |     @wraps(f)
18 |     def decorated_function(*args, **kwargs):
19 |         if not REQUIRE_API_KEY:
20 |             return f(*args, **kwargs)
21 |         auth_header = request.headers.get('Authorization')
22 |         if not auth_header or not auth_header.startswith('Bearer '):
23 |             return jsonify({"error": "Missing or invalid API key"}), 401
24 |         token = auth_header.split('Bearer ')[1]
25 |         if token != API_KEY:
26 |             return jsonify({"error": "Invalid API key"}), 401
27 |         return f(*args, **kwargs)
28 |     return decorated_function
29 | 
30 | # Mapping of audio format to MIME type
31 | AUDIO_FORMAT_MIME_TYPES = {
32 |     "mp3": "audio/mpeg",
33 |     "opus": "audio/ogg",
34 |     "aac": "audio/aac",
35 |     "flac": "audio/flac",
36 |     "wav": "audio/wav",
37 |     "pcm": "audio/L16"
38 | }
39 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # OpenAI 通用的微软 Edge TTS API
  2 | 
  3 | 这个项目提供了一个基于微软 Edge TTS 的 OpenAI TTS API 替代品。它允许用户通过 API 调用将文本转换为语音,支持多种语音和音频格式。
  4 | 
  5 | ## 功能特点
  6 | 
  7 | - 兼容 OpenAI TTS API 的接口
  8 | - 支持多种语音和语言
  9 | - 可调节语音速度
 10 | - 支持多种音频输出格式(mp3, opus, aac, flac, wav, pcm)
 11 | - 提供模型和可用语音列表的 API 端点
 12 | - 可选的 API 密钥认证
 13 | - Huggingface 支持,便于部署
 14 | 
 15 | ## 安装
 16 | 
 17 | ### 方法 1: 
 18 | 打开 Huggingface 页面，点击 `复制空间` 按钮，直接部署。
 19 | 
 20 | [Huggingface 部署链接](https://huggingface.co/spaces/fuliai/edgetts-api-openai)
 21 | 
 22 | 最简单的方法。
 23 | 
 24 | 视频教程 [B站](https://www.bilibili.com/video/BV1M8yiYNEkj/)
 25 | ![img](https://raw.githubusercontent.com/aigem/edgeTTS-openai-api/refs/heads/main/%E5%85%8D%E8%B4%B9%E9%83%A8%E7%BD%B2%E5%BE%AE%E8%BD%AFEdgeTTS%2C%E9%80%82%E9%85%8Dopenai%E8%AF%AD%E9%9F%B3api%E6%8E%A5%E5%8F%A3%EF%BC%8C%E4%B8%80%E9%94%AE%E9%83%A8%E7%BD%B2%E6%95%99%E7%A8%8B-%E5%B0%81%E9%9D%A2.jpg)
 26 | 
 27 | ### 方法 2: 复制Dockerfile文件内容，在Huggingface上新建空间后，新建Dockerfile文件，粘贴内容,保存就`部署`了。
 28 | 
 29 | ### 方法 3: 本地安装
 30 | 
 31 | 1. 克隆仓库:
 32 | bash
 33 | git clone https://github.com/aigem/edgeTTS-openai-api.git
 34 | cd edgeTTS-openai-api/src/api
 35 | 
 36 | 
 37 | 2. 安装依赖:
 38 | 
 39 | bash
 40 | pip install -r requirements.txt
 41 | 
 42 | 3. 配置环境变量:
 43 | 
 44 | 复制 `.env.example` 文件为 `.env`,并根据需要修改其中的设置:
 45 | bash
 46 | cp .env.example .env
 47 | 
 48 | 2. 服务器将在 http://localhost:PORT 上运行(具体查看 .env 文件)
 49 | 
 50 | ## 使用 API:
 51 | 
 52 |    - 文本转语音: POST `/v1/audio/speech`
 53 |    - 获取可用模型列表: GET/POST `/v1/models`
 54 |    - 获取可用语音列表: GET/POST `/v1/voices`
 55 |    - 获取所有可用语音: GET/POST `/v1/voices/all`
 56 | 
 57 | ### API 使用示例
 58 | 
 59 | #### 文本转语音
 60 | 
 61 | bash:README.md
 62 | curl http://localhost:5050/v1/audio/speech \
 63 | -H "Authorization: Bearer your_api_key_here" \
 64 | -H "Content-Type: application/json" \
 65 | -d '{
 66 | "model": "tts-1",
 67 | "input": "Hello, world!",
 68 | "voice": "alloy",
 69 | "response_format": "mp3",
 70 | "speed": 1.0
 71 | }' \
 72 | --output speech.mp3
 73 | 
 74 | 
 75 | #### 获取可用模型列表
 76 | bash
 77 | curl http://localhost:5050/v1/models \
 78 | -H "Authorization: Bearer your_api_key_here"
 79 | 
 80 | ## 环境变量
 81 | 
 82 | - `API_KEY`: API 密钥(默认: 'your_api_key_here')
 83 | - `PORT`: 服务器端口(默认: 5050)
 84 | - `DEFAULT_VOICE`: 默认语音(默认: 'en-US-AndrewNeural')
 85 | - `DEFAULT_RESPONSE_FORMAT`: 默认音频格式(默认: 'mp3')
 86 | - `DEFAULT_SPEED`: 默认语音速度(默认: 1.0)
 87 | - `DEFAULT_LANGUAGE`: 默认语言(默认: 'en-US')
 88 | - `REQUIRE_API_KEY`: 是否要求 API 密钥认证(默认: true)
 89 | 
 90 | ## 待办事项
 91 | 
 92 | - 增加 Hugging Face 空间环境变量设置 API_KEY
 93 | - 开发用户友好的 Web 界面
 94 | 
 95 | ## 贡献
 96 | 
 97 | 欢迎提交问题报告和拉取请求。对于重大更改,请先开issue讨论您想要改变的内容。
 98 | 
 99 | ## 许可证
100 | 
101 | [MIT](https://choosealicense.com/licenses/mit/)
102 | 
103 | ## 相关链接
104 | 
105 | - [视频教程](https://www.youtube.com/@all.ai.)
106 | - [项目 GitHub 仓库](https://github.com/aigem/edgeTTS-openai-api)
107 | 


--------------------------------------------------------------------------------
/src/api/app/tts_handler.py:
--------------------------------------------------------------------------------
 1 | # tts_handler.py
 2 | 
 3 | import edge_tts
 4 | import asyncio
 5 | import tempfile
 6 | import subprocess
 7 | import os
 8 | 
 9 | # Language default (environment variable)
10 | DEFAULT_LANGUAGE = os.getenv('DEFAULT_LANGUAGE', 'en-US')
11 | 
12 | # OpenAI voice names mapped to edge-tts equivalents
13 | voice_mapping = {
14 |     'alloy': 'en-US-AvaNeural',
15 |     'echo': 'en-US-AndrewNeural',
16 |     'fable': 'en-GB-SoniaNeural',
17 |     'onyx': 'en-US-EricNeural',
18 |     'nova': 'en-US-SteffanNeural',
19 |     'shimmer': 'en-US-EmmaNeural'
20 | }
21 | 
22 | async def _generate_audio(text, voice, response_format, speed):
23 |     # Determine if the voice is an OpenAI-compatible voice or a direct edge-tts voice
24 |     edge_tts_voice = voice_mapping.get(voice, voice)  # Use mapping if in OpenAI names, otherwise use as-is
25 | 
26 |     # Generate the TTS output in mp3 format first
27 |     temp_output_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
28 |     communicator = edge_tts.Communicate(text, edge_tts_voice)
29 |     await communicator.save(temp_output_file.name)
30 | 
31 |     # If the requested format is mp3 and speed is 1.0, return the generated file directly
32 |     if response_format == "mp3" and speed == 1.0:
33 |         return temp_output_file.name
34 | 
35 |     # Convert to the requested format if not mp3 or if speed adjustment is needed
36 |     converted_output_file = tempfile.NamedTemporaryFile(delete=False, suffix=f".{response_format}")
37 |     
38 |     # ffmpeg playback speed adjustment
39 |     speed_filter = f"atempo={speed}" if response_format != "pcm" else f"asetrate=44100*{speed},aresample=44100"
40 |     ffmpeg_command = [
41 |         "ffmpeg", "-i", temp_output_file.name, 
42 |         "-filter:a", speed_filter,  # Apply speed adjustment
43 |         "-f", response_format, "-y",
44 |         converted_output_file.name
45 |     ]
46 | 
47 |     try:
48 |         subprocess.run(ffmpeg_command, check=True)
49 |     except subprocess.CalledProcessError as e:
50 |         raise RuntimeError(f"Error in audio conversion: {e}")
51 | 
52 |     return converted_output_file.name
53 | 
54 | def generate_speech(text, voice, response_format, speed=1.0):
55 |     return asyncio.run(_generate_audio(text, voice, response_format, speed))
56 | 
57 | def get_models():
58 |     return [
59 |         {"id": "tts-1", "name": "Text-to-speech v1"},
60 |         {"id": "tts-1-hd", "name": "Text-to-speech v1 HD"}
61 |     ]
62 | 
63 | async def _get_voices(language=None):
64 |     # List all voices, filter by language if specified
65 |     all_voices = await edge_tts.list_voices()
66 |     language = language or DEFAULT_LANGUAGE  # Use default if no language specified
67 |     filtered_voices = [
68 |         {"name": v['ShortName'], "gender": v['Gender'], "language": v['Locale']}
69 |         for v in all_voices if language == 'all' or language is None or v['Locale'] == language
70 |     ]
71 |     return filtered_voices
72 | 
73 | def get_voices(language=None):
74 |     return asyncio.run(_get_voices(language))
75 | 


--------------------------------------------------------------------------------
/src/api/app/server.py:
--------------------------------------------------------------------------------
  1 | # server.py
  2 | 
  3 | from flask import Flask, request, send_file, jsonify, render_template_string
  4 | from gevent.pywsgi import WSGIServer
  5 | from dotenv import load_dotenv
  6 | import os
  7 | 
  8 | from tts_handler import generate_speech, get_models, get_voices
  9 | from utils import require_api_key, AUDIO_FORMAT_MIME_TYPES
 10 | 
 11 | app = Flask(__name__)
 12 | load_dotenv()
 13 | 
 14 | API_KEY = os.getenv('API_KEY', 'your_api_key_here')
 15 | PORT = int(os.getenv('PORT', 5050))
 16 | 
 17 | DEFAULT_VOICE = os.getenv('DEFAULT_VOICE', 'en-US-AndrewNeural')
 18 | DEFAULT_RESPONSE_FORMAT = os.getenv('DEFAULT_RESPONSE_FORMAT', 'mp3')
 19 | DEFAULT_SPEED = float(os.getenv('DEFAULT_SPEED', 1.0))
 20 | 
 21 | # DEFAULT_MODEL = os.getenv('DEFAULT_MODEL', 'tts-1')
 22 | 
 23 | # 添加一个简单的 HTML 模板
 24 | HOME_TEMPLATE = """
 25 | <!DOCTYPE html>
 26 | <html lang="zh-CN">
 27 | <head>
 28 |     <meta charset="UTF-8">
 29 |     <meta name="viewport" content="width=device-width, initial-scale=1.0">
 30 |     <title>OpenAI通用的微软Edge TTS API</title>
 31 |     <style>
 32 |         body { font-family: Arial, sans-serif; line-height: 1.6; padding: 20px; max-width: 800px; margin: 0 auto; }
 33 |         h1 { color: #333; }
 34 |         pre { background-color: #f4f4f4; padding: 10px; border-radius: 5px; }
 35 |     </style>
 36 | </head>
 37 | <body>
 38 |     <h1>OpenAI通用的微软Edge TTS API</h1>
 39 |     <p>这是一个基于 Edge TTS 的 OpenAI TTS API 替代品。</p>
 40 |     <h2>API 端点：</h2>
 41 |     <ul>
 42 |         <li>TTS 生成：<code>/v1/audio/speech</code> (POST)</li>
 43 |         <li>列出模型：<code>/v1/models</code> (GET/POST)</li>
 44 |         <li>列出语音：<code>/v1/voices</code> (GET/POST)</li>
 45 |         <li>列出所有语音：<code>/v1/voices/all</code> (GET/POST)</li>
 46 |     </ul>
 47 |     <h2>使用示例：</h2>
 48 |     <pre>
 49 |     <p>
 50 |     <a href="https://www.youtube.com/@all.ai.">视频教程</a>
 51 |     <a href="https://github.com/aigem/edgeTTS-openai-api">更多信息请参考项目说明</a></p>
 52 |     </pre>
 53 | </body>
 54 | </html>
 55 | """
 56 | 
 57 | @app.route('/')
 58 | def home():
 59 |     return render_template_string(HOME_TEMPLATE, port=PORT)
 60 | 
 61 | @app.route('/v1/audio/speech', methods=['POST'])
 62 | @require_api_key
 63 | def text_to_speech():
 64 |     data = request.json
 65 |     if not data or 'input' not in data:
 66 |         return jsonify({"error": "Missing 'input' in request body"}), 400
 67 | 
 68 |     text = data.get('input')
 69 |     # model = data.get('model', DEFAULT_MODEL)
 70 |     voice = data.get('voice', DEFAULT_VOICE)
 71 | 
 72 |     response_format = data.get('response_format', DEFAULT_RESPONSE_FORMAT)
 73 |     speed = float(data.get('speed', DEFAULT_SPEED))
 74 |     
 75 |     mime_type = AUDIO_FORMAT_MIME_TYPES.get(response_format, "audio/mpeg")
 76 | 
 77 |     # Generate the audio file in the specified format with speed adjustment
 78 |     output_file_path = generate_speech(text, voice, response_format, speed)
 79 | 
 80 |     # Return the file with the correct MIME type
 81 |     return send_file(output_file_path, mimetype=mime_type, as_attachment=True, download_name=f"speech.{response_format}")
 82 | 
 83 | @app.route('/v1/models', methods=['GET', 'POST'])
 84 | @require_api_key
 85 | def list_models():
 86 |     return jsonify({"data": get_models()})
 87 | 
 88 | @app.route('/v1/voices', methods=['GET', 'POST'])
 89 | @require_api_key
 90 | def list_voices():
 91 |     specific_language = None
 92 | 
 93 |     data = request.args if request.method == 'GET' else request.json
 94 |     if data and ('language' in data or 'locale' in data):
 95 |         specific_language = data.get('language') if 'language' in data else data.get('locale')
 96 | 
 97 |     return jsonify({"voices": get_voices(specific_language)})
 98 | 
 99 | @app.route('/v1/voices/all', methods=['GET', 'POST'])
100 | @require_api_key
101 | def list_all_voices():
102 |     return jsonify({"voices": get_voices('all')})
103 | 
104 | print(f" Edge TTS (Free Azure TTS) Replacement for OpenAI's TTS API")
105 | print(f" ")
106 | print(f" * Serving OpenAI Edge TTS")
107 | print(f" * Server running on http://localhost:{PORT}")
108 | print(f" * TTS Endpoint: http://localhost:{PORT}/v1/audio/speech")
109 | print(f" ")
110 | 
111 | if __name__ == '__main__':
112 |     http_server = WSGIServer(('0.0.0.0', PORT), app)
113 |     http_server.serve_forever()
114 | 


--------------------------------------------------------------------------------