├── README.md
├── linux
    └── app.py
└── win
    └── app.py


/README.md:
--------------------------------------------------------------------------------
 1 | # Chutes2api
 2 | ### 对你有用的话麻烦给个stars谢谢
 3 | 
 4 | ## 支持模型
 5 | - nvidia/Llama-3.1-405B-Instruct-FP8
 6 | - deepseek-ai/DeepSeek-R1  
 7 | - Qwen/Qwen2.5-72B-Instruct
 8 | - Qwen/Qwen2.5-Coder-32B-Instruc
 9 | - bytedance-research/UI-TARS-72B-DPO
10 | - OpenGVLab/InternVL2_5-78B
11 | - hugging-quants/Meta-Llama-3.1-70B-Instruct-AWQ-INT4
12 | - NousResearch/Hermes-3-Llama-3.1-8B
13 | - Qwen/QVQ-72B-Preview
14 | - deepseek-ai/DeepSeek-R1-Distill-Qwen-32B
15 | - jondurbin/bagel-8b-v1.0
16 | - unsloth/QwQ-32B-Preview
17 | - Qwen/QwQ-32B-Preview
18 | - jondurbin/airoboros-34b-3.3
19 | - NovaSky-AI/Sky-T1-32B-Preview
20 | - driaforall/Dria-Agent-a-3B
21 | - NousResearch/Nous-Hermes-Llama2-13b
22 | - unsloth/Llama-3.2-1B-Instruct
23 | 
24 | 
25 | ## 请求路由
26 |  - /v1/models
27 |  - /v1/chat/completions
28 |  - 如果有配置AUTH_TOKEN请求都要带key
29 | 
30 | 
31 | ## 请求格式
32 | 和 OpenAI 的请求格式相同，支持非流式和流式响应
33 | 
34 | ## Docker部署
35 | 
36 | ### Linux版本
37 | ```bash
38 | # 拉取
39 | docker pull mtxyt/chutes2api-linux:1.2
40 | 
41 | # 运行(不带认证)
42 | docker run -d -p 8805:8805 mtxyt/chutes2api-linux:1.2
43 | 
44 | # 运行(带认证)
45 | docker run -d -e AUTH_TOKEN=your_token -p 8805:8805 mtxyt/chutes2api-linux:1.2
46 | ```
47 | 
48 | ### Windows版本
49 | ```bash
50 | # 拉取
51 | docker pull mtxyt/chutes2api:1.5
52 | 
53 | # 运行(不带认证)
54 | docker run -d -p 8805:8805 mtxyt/chutes2api:1.5
55 | 
56 | # 运行(带认证)
57 | docker run -d -e AUTH_TOKEN=your_token -p 8805:8805 mtxyt/chutes2api:1.5
58 | ```
59 | 
60 | ## Token获取方式
61 | ### 准备步骤
62 | 1. 访问 [chutes.ai](https://chutes.ai)
63 | 2. 随便找一个LLM模型切换到chat聊天界面里
64 | 4. 打开开发者工具(F12)，切换到网络面板
65 | 5. 随便发起一个对话
66 | 6. 在请求中找到 cf_clearance cookie(如果没有,多对话几次回复空,就是触发盾了刷新网页过人机验证再请求就有了)
67 | ![image](https://github.com/user-attachments/assets/9e5423aa-9b4c-4c97-a737-281d3f195884)
68 | 
69 | 如果你配置了 AUTH_TOKEN，完整的认证格式为：`AUTH_TOKEN|||cf_clearance`
70 | 
71 | 例如:
72 | - AUTH_TOKEN=123456
73 | - cf_clearance=abcdef
74 | - 最终认证格式: `123456|||abcdef`
75 | 
76 | ## 注意事项
77 | 1. cf_clearance 有时效性，过期后会自动获取新的
78 | 2. 支持配置和不配置 AUTH_TOKEN 两种方式
79 | 3. 服务启动后可以访问根路径查看服务状态
80 | 
81 | ## 更多
82 | 目前提供 Linux 和 Windows 两个版本的镜像，win版本在win系统系统运行时稳定性更好。
83 | 
84 | ## 声明
85 | 本项目仅供学习研究使用，请勿用于商业用途。
86 | 


--------------------------------------------------------------------------------
/linux/app.py:
--------------------------------------------------------------------------------
  1 | from flask import Flask, request, Response, stream_with_context, jsonify
  2 | import cloudscraper
  3 | import json
  4 | import uuid
  5 | from datetime import datetime, timezone
  6 | import time
  7 | import os
  8 | import logging
  9 | 
 10 | # 配置日志
 11 | logging.basicConfig(
 12 |     level=logging.INFO,
 13 |     format='%(asctime)s - %(levelname)s - %(message)s'
 14 | )
 15 | 
 16 | app = Flask(__name__)
 17 | 
 18 | # 模型映射字典
 19 | MODEL_MAPPING = {
 20 |    "nvidia/Llama-3.1-405B-Instruct-FP8": "chutes-nvidia-llama-3-1-405b-instruct-fp8",
 21 |    "deepseek-ai/DeepSeek-R1": "chutes-deepseek-ai-deepseek-r1",
 22 |    "Qwen/Qwen2.5-72B-Instruct": "chutes-qwen-qwen2-5-72b-instruct", 
 23 |    "Qwen/Qwen2.5-Coder-32B-Instruc": "chutes-qwen-qwen2-5-coder-32b-instruct",
 24 |    "bytedance-research/UI-TARS-72B-DPO": "chutes-bytedance-research-ui-tars-72b-dpo",
 25 |    "OpenGVLab/InternVL2_5-78B": "chutes-opengvlab-internvl2-5-78b",
 26 |    "hugging-quants/Meta-Llama-3.1-70B-Instruct-AWQ-INT4": "chutes-hugging-quants-meta-llama-3-1-70b-instruct-awq-int4",
 27 |    "NousResearch/Hermes-3-Llama-3.1-8B": "cxmplexbb-nousresearch-hermes-3-llama-3-1-8b",
 28 |    "Qwen/QVQ-72B-Preview": "chutes-qwen-qvq-72b-preview",
 29 |    "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B": "chutes-deepseek-ai-deepseek-r1-distill-qwen-32b",
 30 |    "jondurbin/bagel-8b-v1.0": "chutes-jondurbin-bagel-8b-v1-0",
 31 |    "unsloth/QwQ-32B-Preview": "cxmplexbb-unsloth-qwq-32b-preview",
 32 |    "Qwen/QwQ-32B-Preview": "chutes-qwq-32b-preview",
 33 |    "jondurbin/airoboros-34b-3.3": "chutes-jondurbin-airoboros-34b-3-3",
 34 |    "NovaSky-AI/Sky-T1-32B-Preview": "chutes-novasky-ai-sky-t1-32b-preview",
 35 |    "driaforall/Dria-Agent-a-3B": "chutes-driaforall-dria-agent-a-3b",
 36 |    "NousResearch/Nous-Hermes-Llama2-13b": "cxmplexbb-nousresearch-nous-hermes-llama2-13b",
 37 |    "unsloth/Llama-3.2-1B-Instruct": "chutes-unsloth-llama-3-2-1b-instruct"
 38 | }
 39 | 
 40 | # 存储当前的 cf_clearance 和 key
 41 | current_cf_clearance = None
 42 | auth_token = os.getenv('AUTH_TOKEN', '')  # 从环境变量获取 AUTH_TOKEN
 43 | 
 44 | def check_auth():
 45 |     """检查认证"""
 46 |     request_token = request.headers.get('Authorization', '')
 47 |     if not auth_token:
 48 |         return True
 49 |     return request_token == f"Bearer {auth_token}"
 50 | 
 51 | def get_new_cf_clearance():
 52 |     """动态获取新的 cf_clearance"""
 53 |     try:
 54 |         temp_scraper = cloudscraper.create_scraper(
 55 |             browser={
 56 |                 'browser': 'firefox',
 57 |                 'platform': 'windows',
 58 |                 'mobile': False
 59 |             },
 60 |             delay=10,
 61 |             allow_brotli=False,
 62 |             doubleDown=True
 63 |         )
 64 |         
 65 |         if current_cf_clearance:
 66 |             if auth_token:
 67 |                 temp_scraper.headers.update({
 68 |                     "Authorization": f"Bearer {auth_token}|||{current_cf_clearance}"
 69 |                 })
 70 |             else:
 71 |                 temp_scraper.headers.update({
 72 |                     "Authorization": f"Bearer {current_cf_clearance}"
 73 |                 })
 74 |         elif auth_token:
 75 |             temp_scraper.headers.update({
 76 |                 "Authorization": f"Bearer {auth_token}"
 77 |             })
 78 |             
 79 |         logging.info("尝试获取新的 cf_clearance")
 80 |         response = temp_scraper.get('https://chutes.ai')
 81 |         
 82 |         if 'cf_clearance' in temp_scraper.cookies:
 83 |             new_cf = temp_scraper.cookies['cf_clearance']
 84 |             logging.info(f"成功获取新的 cf_clearance: {new_cf[:10]}...")
 85 |             return new_cf
 86 |         logging.warning("未能获取 cf_clearance")
 87 |         return None
 88 |     except Exception as e:
 89 |         logging.error(f"获取新的 cf_clearance 失败: {str(e)}")
 90 |         return None
 91 | 
 92 | def create_scraper(cf_clearance=None):
 93 |     """创建配置好的 scraper"""
 94 |     global current_cf_clearance
 95 |     
 96 |     scraper = cloudscraper.create_scraper(
 97 |         browser={
 98 |             'browser': 'firefox',
 99 |             'platform': 'windows',
100 |             'mobile': False
101 |         },
102 |         delay=10,
103 |         allow_brotli=False,
104 |         doubleDown=True
105 |     )
106 |     
107 |     # 基础请求头配置
108 |     scraper.headers.update({
109 |         "Accept": "text/event-stream",
110 |         "Accept-Encoding": "gzip, deflate, br",
111 |         "Accept-Language": "en-US,en;q=0.9",
112 |         "Cache-Control": "no-cache",
113 |         "Content-Type": "application/json",
114 |         "Origin": "https://chutes.ai",
115 |         "Pragma": "no-cache",
116 |         "Referer": "https://chutes.ai/",
117 |         "Sec-Ch-Ua": '"Chromium";v="122", "Not(A:Brand";v="24", "Microsoft Edge";v="122"',
118 |         "Sec-Ch-Ua-Mobile": "?0",
119 |         "Sec-Ch-Ua-Platform": '"Windows"',
120 |         "Sec-Fetch-Dest": "empty",
121 |         "Sec-Fetch-Mode": "cors",
122 |         "Sec-Fetch-Site": "same-origin",
123 |         "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36 Edg/122.0.0.0",
124 |         "X-Requested-With": "XMLHttpRequest"
125 |     })
126 |     
127 |     # 使用提供的或当前的 cf_clearance
128 |     cf_value = cf_clearance or current_cf_clearance
129 |     if cf_value:
130 |         scraper.cookies.update({
131 |             "cf_clearance": cf_value
132 |         })
133 |         # 构造 Authorization 头
134 |         if auth_token:
135 |             scraper.headers.update({
136 |                 "Authorization": f"Bearer {auth_token}|||{cf_value}"
137 |             })
138 |         else:
139 |             # 如果没有 auth_token，只使用 cf_clearance
140 |             scraper.headers.update({
141 |                 "Authorization": f"Bearer {cf_value}"
142 |             })
143 |     
144 |     return scraper
145 | 
146 | def create_chutes_request(openai_request):
147 |     """将OpenAI格式请求转换为Chutes格式"""
148 |     messages = openai_request['messages']
149 |     message_id = str(uuid.uuid4())
150 |     current_time = datetime.now(timezone.utc).strftime('%Y-%m-%dT%H:%M:%S.%f')[:-3] + 'Z'
151 |     
152 |     model = openai_request.get('model', 'deepseek-ai/DeepSeek-R1')
153 |     chute_name = MODEL_MAPPING.get(model, 'chutes-deepseek-ai-deepseek-r1')
154 |     
155 |     return {
156 |         "messages": [{
157 |             "role": messages[-1]['role'],
158 |             "content": messages[-1]['content'],
159 |             "id": message_id,
160 |             "createdOn": current_time
161 |         }],
162 |         "model": model,
163 |         "chuteName": chute_name
164 |     }
165 | 
166 | def process_chunk(chunk):
167 |     """处理响应数据块"""
168 |     try:
169 |         if "choices" in chunk and chunk["choices"][0]["delta"].get("content"):
170 |             return chunk["choices"][0]["delta"]["content"]
171 |         return None
172 |     except:
173 |         return None
174 | 
175 | def process_non_stream_response(response, model):
176 |     """处理非流式响应"""
177 |     try:
178 |         full_content = ""
179 |         for line in response.iter_lines():
180 |             if line:
181 |                 line = line.decode('utf-8')
182 |                 if line.startswith("data: "):
183 |                     data = line[6:]
184 |                     if data == "[DONE]":
185 |                         break
186 |                     try:
187 |                         chunk = json.loads(data)
188 |                         content = process_chunk(chunk)
189 |                         if content:
190 |                             full_content += content
191 |                     except json.JSONDecodeError:
192 |                         continue
193 | 
194 |         if not full_content:
195 |             return Response("Empty response from server", status=500)
196 | 
197 |         return {
198 |             "id": str(uuid.uuid4()),
199 |             "object": "chat.completion",
200 |             "created": int(time.time()),
201 |             "model": model,
202 |             "choices": [{
203 |                 "message": {
204 |                     "role": "assistant",
205 |                     "content": full_content
206 |                 },
207 |                 "finish_reason": "stop",
208 |                 "index": 0
209 |             }],
210 |             "usage": {
211 |                 "prompt_tokens": 0,
212 |                 "completion_tokens": 0,
213 |                 "total_tokens": 0
214 |             }
215 |         }
216 |     except Exception as e:
217 |         logging.error(f"处理非流式响应时出错: {str(e)}")
218 |         return Response("Failed to process response", status=500)
219 | 
220 | def make_request_with_retry(openai_request, max_retries=3):
221 |     """带重试机制的请求函数"""
222 |     global current_cf_clearance
223 |     
224 |     chutes_request = create_chutes_request(openai_request)
225 |     last_error = None
226 |     
227 |     for attempt in range(max_retries):
228 |         try:
229 |             scraper = create_scraper()
230 |             logging.info(f"尝试第 {attempt + 1} 次请求")
231 |             
232 |             response = scraper.post(
233 |                 "https://chutes.ai/app/api/chat",
234 |                 json=chutes_request,
235 |                 stream=True
236 |             )
237 |             
238 |             logging.info(f"请求状态码: {response.status_code}")
239 |             
240 |             # 如果响应成功,返回响应对象
241 |             if response.status_code == 200:
242 |                 return response
243 |                 
244 |             # 如果是 403 错误,尝试获取新的 cf_clearance
245 |             if response.status_code == 403:
246 |                 logging.warning(f"尝试 {attempt + 1}: 获取新的 cf_clearance")
247 |                 new_cf_clearance = get_new_cf_clearance()
248 |                 if new_cf_clearance:
249 |                     current_cf_clearance = new_cf_clearance
250 |                     continue
251 |                     
252 |             last_error = f"Status code: {response.status_code}, Response: {response.text}"
253 |             logging.error(f"请求失败: {last_error}")
254 |             
255 |         except Exception as e:
256 |             last_error = str(e)
257 |             logging.error(f"尝试 {attempt + 1} 失败: {last_error}", exc_info=True)
258 |         
259 |         # 在重试之前等待一段时间
260 |         if attempt < max_retries - 1:
261 |             time.sleep(2 ** attempt)  # 指数退避
262 |             
263 |     # 所有重试都失败后返回错误
264 |     return Response(f"请求失败,所有重试均未成功。最后的错误: {last_error}", status=500)
265 | 
266 | @app.route('/', methods=['GET'])
267 | def home():
268 |     """健康检查端点"""
269 |     config_info = {
270 |         "status": "Chutes API Service Running",
271 |         "version": "1.0",
272 |         "has_auth_token": bool(auth_token),
273 |         "has_cf_clearance": bool(current_cf_clearance)
274 |     }
275 |     return config_info
276 | 
277 | @app.route('/v1/models', methods=['GET'])
278 | def get_models():
279 |     """获取可用模型列表"""
280 |     if not check_auth():
281 |         return Response("Unauthorized", status=401)
282 |         
283 |     models = []
284 |     for model_id in MODEL_MAPPING.keys():
285 |         models.append({
286 |             "id": model_id,
287 |             "object": "model",
288 |             "created": int(time.time()),
289 |             "owned_by": "chutes"
290 |         })
291 |     
292 |     return jsonify({
293 |         "object": "list",
294 |         "data": models
295 |     })
296 | 
297 | @app.route('/v1/chat/completions', methods=['POST'])
298 | def chat():
299 |     """聊天完成接口"""
300 |     try:
301 |         if not check_auth():
302 |             return Response("Unauthorized", status=401)
303 | 
304 |         openai_request = request.json
305 |         logging.info("收到新的聊天请求")
306 |         response = make_request_with_retry(openai_request)
307 |         
308 |         # 如果返回的是错误响应,直接返回
309 |         if isinstance(response, Response):
310 |             return response
311 |             
312 |         # 处理非流式请求
313 |         if not openai_request.get('stream', False):
314 |             result = process_non_stream_response(response, openai_request.get('model'))
315 |             return Response(
316 |                 json.dumps(result, ensure_ascii=False),
317 |                 status=200,
318 |                 content_type='application/json'
319 |             ) if isinstance(result, dict) else result
320 | 
321 |         # 处理流式请求
322 |         def generate():
323 |             try:
324 |                 for line in response.iter_lines():
325 |                     if line:
326 |                         line = line.decode('utf-8')
327 |                         if line.startswith("data: "):
328 |                             data = line[6:]
329 |                             if data == "[DONE]":
330 |                                 yield "data: [DONE]\n\n"
331 |                                 break
332 |                             
333 |                             try:
334 |                                 chunk = json.loads(data)
335 |                                 content = process_chunk(chunk)
336 |                                 if content:
337 |                                     response_chunk = {
338 |                                         "id": str(uuid.uuid4()),
339 |                                         "object": "chat.completion.chunk",
340 |                                         "created": int(time.time()),
341 |                                         "model": openai_request.get('model'),
342 |                                         "choices": [{
343 |                                             "delta": {
344 |                                                 "content": content
345 |                                             },
346 |                                             "index": 0,
347 |                                             "finish_reason": None
348 |                                         }]
349 |                                     }
350 |                                     yield f"data: {json.dumps(response_chunk, ensure_ascii=False)}\n\n"
351 |                             except json.JSONDecodeError:
352 |                                 continue
353 |                             
354 |             except Exception as e:
355 |                 logging.error(f"生成响应时出错: {str(e)}", exc_info=True)
356 |                 return
357 | 
358 |         return Response(
359 |             stream_with_context(generate()),
360 |             content_type='text/event-stream'
361 |         )
362 | 
363 |     except Exception as e:
364 |         logging.error(f"聊天接口出错: {str(e)}", exc_info=True)
365 |         return Response(f"服务器内部错误: {str(e)}", status=500)
366 | 
367 | if __name__ == '__main__':
368 |    port = int(os.getenv('PORT', 8805))
369 |    app.run(host='0.0.0.0', port=port, debug=False)
370 | 


--------------------------------------------------------------------------------
/win/app.py:
--------------------------------------------------------------------------------
  1 | from flask import Flask, request, Response, stream_with_context, jsonify
  2 | import cloudscraper
  3 | import json
  4 | import uuid
  5 | from datetime import datetime, timezone
  6 | import time
  7 | import os
  8 | 
  9 | app = Flask(__name__)
 10 | 
 11 | # 模型映射字典
 12 | MODEL_MAPPING = {
 13 |     "nvidia/Llama-3.1-405B-Instruct-FP8": "chutes-nvidia-llama-3-1-405b-instruct-fp8",
 14 |     "deepseek-ai/DeepSeek-R1": "chutes-deepseek-ai-deepseek-r1",
 15 |     "Qwen/Qwen2.5-72B-Instruct": "chutes-qwen-qwen2-5-72b-instruct",
 16 |     "Qwen/Qwen2.5-Coder-32B-Instruc": "chutes-qwen-qwen2-5-coder-32b-instruct",
 17 |     "bytedance-research/UI-TARS-72B-DPO": "chutes-bytedance-research-ui-tars-72b-dpo",
 18 |     "OpenGVLab/InternVL2_5-78B": "chutes-opengvlab-internvl2-5-78b",
 19 |     "hugging-quants/Meta-Llama-3.1-70B-Instruct-AWQ-INT4": "chutes-hugging-quants-meta-llama-3-1-70b-instruct-awq-int4",
 20 |     "NousResearch/Hermes-3-Llama-3.1-8B": "cxmplexbb-nousresearch-hermes-3-llama-3-1-8b",
 21 |     "Qwen/QVQ-72B-Preview": "chutes-qwen-qvq-72b-preview",
 22 |     "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B": "chutes-deepseek-ai-deepseek-r1-distill-qwen-32b",
 23 |     "jondurbin/bagel-8b-v1.0": "chutes-jondurbin-bagel-8b-v1-0",
 24 |     "unsloth/QwQ-32B-Preview": "cxmplexbb-unsloth-qwq-32b-preview",
 25 |     "Qwen/QwQ-32B-Preview": "chutes-qwq-32b-preview",
 26 |     "jondurbin/airoboros-34b-3.3": "chutes-jondurbin-airoboros-34b-3-3",
 27 |     "NovaSky-AI/Sky-T1-32B-Preview": "chutes-novasky-ai-sky-t1-32b-preview",
 28 |     "driaforall/Dria-Agent-a-3B": "chutes-driaforall-dria-agent-a-3b",
 29 |     "NousResearch/Nous-Hermes-Llama2-13b": "cxmplexbb-nousresearch-nous-hermes-llama2-13b",
 30 |     "unsloth/Llama-3.2-1B-Instruct": "chutes-unsloth-llama-3-2-1b-instruct"
 31 | }
 32 | 
 33 | def check_auth():
 34 |     """检查认证"""
 35 |     auth_token = os.getenv('AUTH_TOKEN')
 36 |     if not auth_token:
 37 |         return True
 38 |     
 39 |     request_token = request.headers.get('Authorization', '')
 40 |     return request_token == f"Bearer {auth_token}"
 41 | 
 42 | def create_scraper():
 43 |     scraper = cloudscraper.create_scraper(
 44 |         browser={
 45 |             'browser': 'chrome',
 46 |             'platform': 'linux',
 47 |             'desktop': True,
 48 |             'mobile': False,
 49 |             'version': '121.0.0.0'
 50 |         }
 51 |     )
 52 |     
 53 |     # 设置请求头
 54 |     scraper.headers.update({
 55 |         "Accept": "*/*",
 56 |         "Accept-Language": "en-US,en;q=0.9",
 57 |         "Cache-Control": "no-cache",
 58 |         "Content-Type": "text/plain;charset=UTF-8",
 59 |         "Origin": "https://chutes.ai",
 60 |         "Pragma": "no-cache",
 61 |         "Referer": "https://chutes.ai/",
 62 |         "Sec-Ch-Ua": '"Not A(Brand";v="99", "Google Chrome";v="121", "Chromium";v="121"',
 63 |         "Sec-Ch-Ua-Mobile": "?0",
 64 |         "Sec-Ch-Ua-Platform": '"Linux"',
 65 |         "Sec-Fetch-Dest": "empty",
 66 |         "Sec-Fetch-Mode": "cors",
 67 |         "Sec-Fetch-Site": "same-origin",
 68 |         "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36"
 69 |     })
 70 |     
 71 |     return scraper
 72 | 
 73 | 
 74 | def create_chutes_request(openai_request):
 75 |     """将OpenAI格式请求转换为Chutes格式"""
 76 |     messages = openai_request['messages']
 77 |     message_id = str(uuid.uuid4())
 78 |     current_time = datetime.now(timezone.utc).strftime('%Y-%m-%dT%H:%M:%S.%f')[:-3] + 'Z'
 79 |     
 80 |     model = openai_request.get('model', 'deepseek-ai/DeepSeek-R1')
 81 |     chute_name = MODEL_MAPPING.get(model, 'chutes-deepseek-ai-deepseek-r1')
 82 |     
 83 |     return {
 84 |         "messages": [{
 85 |             "role": messages[-1]['role'],
 86 |             "content": messages[-1]['content'],
 87 |             "id": message_id,
 88 |             "createdOn": current_time
 89 |         }],
 90 |         "model": model,
 91 |         "chuteName": chute_name
 92 |     }
 93 | 
 94 | def process_chunk(chunk):
 95 |     """处理响应数据块"""
 96 |     try:
 97 |         if "choices" in chunk and chunk["choices"][0]["delta"].get("content"):
 98 |             return chunk["choices"][0]["delta"]["content"]
 99 |         return None
100 |     except:
101 |         return None
102 | 
103 | def process_non_stream_response(response, model):
104 |     """处理非流式响应"""
105 |     try:
106 |         full_content = ""
107 |         for line in response.iter_lines():
108 |             if line:
109 |                 line = line.decode('utf-8')
110 |                 if line.startswith("data: "):
111 |                     data = line[6:]
112 |                     if data == "[DONE]":
113 |                         break
114 |                     try:
115 |                         chunk = json.loads(data)
116 |                         content = process_chunk(chunk)
117 |                         if content:
118 |                             full_content += content
119 |                     except json.JSONDecodeError:
120 |                         continue
121 | 
122 |         if not full_content:
123 |             return Response("Empty response from server", status=500)
124 | 
125 |         return {
126 |             "id": str(uuid.uuid4()),
127 |             "object": "chat.completion",
128 |             "created": int(time.time()),
129 |             "model": model,
130 |             "choices": [{
131 |                 "message": {
132 |                     "role": "assistant",
133 |                     "content": full_content
134 |                 },
135 |                 "finish_reason": "stop",
136 |                 "index": 0
137 |             }],
138 |             "usage": {
139 |                 "prompt_tokens": 0,
140 |                 "completion_tokens": 0,
141 |                 "total_tokens": 0
142 |             }
143 |         }
144 |     except Exception as e:
145 |         print(f"Error processing non-stream response: {str(e)}")
146 |         return Response("Failed to process response", status=500)
147 | 
148 | @app.route('/', methods=['GET'])
149 | def home():
150 |     """健康检查端点"""
151 |     return {"status": "Chutes API Service Running", "version": "1.0"}
152 | 
153 | @app.route('/v1/models', methods=['GET'])
154 | def get_models():
155 |     """获取可用模型列表"""
156 |     if not check_auth():
157 |         return Response("Unauthorized", status=401)
158 |         
159 |     models = []
160 |     for model_id in MODEL_MAPPING.keys():
161 |         models.append({
162 |             "id": model_id,
163 |             "object": "model",
164 |             "created": int(time.time()),
165 |             "owned_by": "chutes"
166 |         })
167 |     
168 |     return jsonify({
169 |         "object": "list",
170 |         "data": models
171 |     })
172 | 
173 | @app.route('/v1/chat/completions', methods=['POST'])
174 | def chat():
175 |     """聊天完成接口"""
176 |     try:
177 |         if not check_auth():
178 |             return Response("Unauthorized", status=401)
179 | 
180 |         openai_request = request.json
181 |         chutes_request = create_chutes_request(openai_request)
182 |         scraper = create_scraper()
183 | 
184 |         headers = {
185 |             "Accept": "*/*",
186 |             "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
187 |             "Cache-Control": "no-cache",
188 |             "Content-Type": "text/plain;charset=UTF-8",
189 |             "Origin": "https://chutes.ai",
190 |             "Pragma": "no-cache",
191 |             "Referer": "https://chutes.ai/app/chute/590d919c-8d4c-5b7b-9445-ed2cd71944a8",
192 |             "Sec-Ch-Ua": '"Not A(Brand";v="8", "Chromium";v="132", "Microsoft Edge";v="132"',
193 |             "Sec-Ch-Ua-Mobile": "?0",
194 |             "Sec-Ch-Ua-Platform": '"Windows"',
195 |             "Sec-Fetch-Dest": "empty",
196 |             "Sec-Fetch-Mode": "cors",
197 |             "Sec-Fetch-Site": "same-origin"
198 |         }
199 | 
200 |         response = scraper.post(
201 |             "https://chutes.ai/app/api/chat",
202 |             headers=headers,
203 |             json=chutes_request,
204 |             stream=True
205 |         )
206 | 
207 |         if response.status_code != 200:
208 |             return Response(f"Chutes API error: {response.text}", status=response.status_code)
209 | 
210 |         # 处理非流式请求
211 |         if not openai_request.get('stream', False):
212 |             result = process_non_stream_response(response, chutes_request["model"])
213 |             return Response(
214 |                 json.dumps(result, ensure_ascii=False),
215 |                 status=200,
216 |                 content_type='application/json'
217 |             ) if isinstance(result, dict) else result
218 | 
219 |         # 处理流式请求
220 |         def generate():
221 |             try:
222 |                 for line in response.iter_lines():
223 |                     if line:
224 |                         line = line.decode('utf-8')
225 |                         if line.startswith("data: "):
226 |                             data = line[6:]
227 |                             if data == "[DONE]":
228 |                                 yield "data: [DONE]\n\n"
229 |                                 break
230 |                             
231 |                             try:
232 |                                 chunk = json.loads(data)
233 |                                 content = process_chunk(chunk)
234 |                                 if content:
235 |                                     response_chunk = {
236 |                                         "id": str(uuid.uuid4()),
237 |                                         "object": "chat.completion.chunk",
238 |                                         "created": int(time.time()),
239 |                                         "model": chutes_request["model"],
240 |                                         "choices": [{
241 |                                             "delta": {
242 |                                                 "content": content
243 |                                             },
244 |                                             "index": 0,
245 |                                             "finish_reason": None
246 |                                         }]
247 |                                     }
248 |                                     yield f"data: {json.dumps(response_chunk, ensure_ascii=False)}\n\n"
249 |                             except json.JSONDecodeError:
250 |                                 continue
251 |                             
252 |             except Exception as e:
253 |                 print(f"Error in generate: {str(e)}")
254 |                 return
255 | 
256 |         return Response(
257 |             stream_with_context(generate()),
258 |             content_type='text/event-stream'
259 |         )
260 | 
261 |     except Exception as e:
262 |         print(f"Error in chat endpoint: {str(e)}")
263 |         return Response(f"Internal server error: {str(e)}", status=500)
264 | 
265 | if __name__ == '__main__':
266 |     port = int(os.getenv('PORT', 8805))
267 |     app.run(host='0.0.0.0', port=port, debug=False)
268 | 


--------------------------------------------------------------------------------