feat: add aliyun quota fallback

This commit is contained in:
JOJO 2026-03-06 12:31:20 +08:00
parent 4be61fe76e
commit 868640b479
6 changed files with 864 additions and 369 deletions

View File

@ -1,9 +1,32 @@
import os import os
from pathlib import Path
from typing import Optional from typing import Optional
def _env(name: str, default: str = "") -> str: def _env(name: str, default: str = "") -> str:
return os.environ.get(name, default) return os.environ.get(name, default)
def _env_optional(name: str) -> Optional[str]:
value = os.environ.get(name)
if value is None:
# 回退读取 .env支持运行中更新
env_path = Path(__file__).resolve().parents[1] / ".env"
if env_path.exists():
try:
for raw_line in env_path.read_text(encoding="utf-8").splitlines():
line = raw_line.strip()
if not line or line.startswith("#") or "=" not in line:
continue
key, val = line.split("=", 1)
if key.strip() == name:
value = val.strip().strip('"').strip("'")
break
except Exception:
value = None
if value is None:
return None
value = value.strip()
return value or None
# 模型上下文窗口(单位: token # 模型上下文窗口(单位: token
CONTEXT_WINDOWS = { CONTEXT_WINDOWS = {
@ -19,6 +42,8 @@ CONTEXT_WINDOWS = {
# 默认Kimi # 默认Kimi
KIMI_BASE = _env("API_BASE_KIMI", _env("AGENT_API_BASE_URL", "https://api.moonshot.cn/v1")) KIMI_BASE = _env("API_BASE_KIMI", _env("AGENT_API_BASE_URL", "https://api.moonshot.cn/v1"))
KIMI_KEY = _env("API_KEY_KIMI", _env("AGENT_API_KEY", "")) KIMI_KEY = _env("API_KEY_KIMI", _env("AGENT_API_KEY", ""))
KIMI_BASE_OFFICIAL = _env_optional("API_BASE_KIMI_OFFICIAL")
KIMI_KEY_OFFICIAL = _env_optional("API_KEY_KIMI_OFFICIAL")
KIMI_FAST_MODEL = _env("MODEL_KIMI_FAST", _env("AGENT_MODEL_ID", "kimi-k2-0905-preview")) KIMI_FAST_MODEL = _env("MODEL_KIMI_FAST", _env("AGENT_MODEL_ID", "kimi-k2-0905-preview"))
KIMI_THINK_MODEL = _env("MODEL_KIMI_THINK", _env("AGENT_THINKING_MODEL_ID", "kimi-k2-thinking")) KIMI_THINK_MODEL = _env("MODEL_KIMI_THINK", _env("AGENT_THINKING_MODEL_ID", "kimi-k2-thinking"))
KIMI_25_MODEL = _env("MODEL_KIMI_25", "kimi-k2.5") KIMI_25_MODEL = _env("MODEL_KIMI_25", "kimi-k2.5")
@ -32,12 +57,16 @@ DEEPSEEK_THINK_MODEL = _env("MODEL_DEEPSEEK_THINK", "deepseek-reasoner")
# Qwen # Qwen
QWEN_BASE = _env("API_BASE_QWEN", "https://dashscope.aliyuncs.com/compatible-mode/v1") QWEN_BASE = _env("API_BASE_QWEN", "https://dashscope.aliyuncs.com/compatible-mode/v1")
QWEN_KEY = _env("API_KEY_QWEN", _env("DASHSCOPE_API_KEY", "")) QWEN_KEY = _env("API_KEY_QWEN", _env("DASHSCOPE_API_KEY", ""))
QWEN_BASE_OFFICIAL = _env_optional("API_BASE_QWEN_OFFICIAL")
QWEN_KEY_OFFICIAL = _env_optional("API_KEY_QWEN_OFFICIAL")
QWEN_MAX_MODEL = _env("MODEL_QWEN_MAX", "qwen3-max") QWEN_MAX_MODEL = _env("MODEL_QWEN_MAX", "qwen3-max")
QWEN_VL_MODEL = _env("MODEL_QWEN_VL", "qwen3.5-plus") QWEN_VL_MODEL = _env("MODEL_QWEN_VL", "qwen3.5-plus")
# MiniMax # MiniMax
MINIMAX_BASE = _env("API_BASE_MINIMAX", "https://api.minimaxi.com/v1") MINIMAX_BASE = _env("API_BASE_MINIMAX", "https://api.minimaxi.com/v1")
MINIMAX_KEY = _env("API_KEY_MINIMAX", "") MINIMAX_KEY = _env("API_KEY_MINIMAX", "")
MINIMAX_BASE_OFFICIAL = _env_optional("API_BASE_MINIMAX_OFFICIAL")
MINIMAX_KEY_OFFICIAL = _env_optional("API_KEY_MINIMAX_OFFICIAL")
MINIMAX_MODEL = _env("MODEL_MINIMAX", "MiniMax-M2.5") MINIMAX_MODEL = _env("MODEL_MINIMAX", "MiniMax-M2.5")
@ -78,7 +107,7 @@ MODEL_PROFILES = {
"model_id": KIMI_25_MODEL, "model_id": KIMI_25_MODEL,
"max_tokens": None, "max_tokens": None,
"context_window": CONTEXT_WINDOWS["kimi-k2.5"], "context_window": CONTEXT_WINDOWS["kimi-k2.5"],
"extra_params": {"thinking": {"type": "enabled"}} "extra_params": {"thinking": {"type": "enabled"}, "enable_thinking": True}
}, },
"supports_thinking": True, "supports_thinking": True,
"fast_only": False, "fast_only": False,
@ -204,6 +233,45 @@ def get_model_profile(key: str) -> dict:
if key not in MODEL_PROFILES: if key not in MODEL_PROFILES:
raise ValueError(f"未知模型 key: {key}") raise ValueError(f"未知模型 key: {key}")
profile = MODEL_PROFILES[key] profile = MODEL_PROFILES[key]
try:
from utils.aliyun_fallback import is_fallback_active
except Exception:
is_fallback_active = None
if is_fallback_active and is_fallback_active(key):
if key == "kimi-k2.5":
kimi_base_official = _env_optional("API_BASE_KIMI_OFFICIAL") or KIMI_BASE_OFFICIAL
kimi_key_official = _env_optional("API_KEY_KIMI_OFFICIAL") or KIMI_KEY_OFFICIAL
if kimi_base_official and kimi_key_official:
profile = dict(profile)
fast = dict(profile.get("fast") or {})
thinking = dict(profile.get("thinking") or fast)
fast.update({"base_url": kimi_base_official, "api_key": kimi_key_official})
thinking.update({"base_url": kimi_base_official, "api_key": kimi_key_official})
profile["fast"] = fast
profile["thinking"] = thinking
elif key == "qwen3-vl-plus":
qwen_base_official = _env_optional("API_BASE_QWEN_OFFICIAL") or QWEN_BASE_OFFICIAL
qwen_key_official = _env_optional("API_KEY_QWEN_OFFICIAL") or QWEN_KEY_OFFICIAL
if qwen_base_official and qwen_key_official:
profile = dict(profile)
fast = dict(profile.get("fast") or {})
thinking = dict(profile.get("thinking") or fast)
fast.update({"base_url": qwen_base_official, "api_key": qwen_key_official})
thinking.update({"base_url": qwen_base_official, "api_key": qwen_key_official})
profile["fast"] = fast
profile["thinking"] = thinking
elif key == "minimax-m2.5":
minimax_base_official = _env_optional("API_BASE_MINIMAX_OFFICIAL") or MINIMAX_BASE_OFFICIAL
minimax_key_official = _env_optional("API_KEY_MINIMAX_OFFICIAL") or MINIMAX_KEY_OFFICIAL
if minimax_base_official and minimax_key_official:
profile = dict(profile)
fast = dict(profile.get("fast") or {})
thinking = dict(profile.get("thinking") or fast)
fast.update({"base_url": minimax_base_official, "api_key": minimax_key_official})
thinking.update({"base_url": minimax_base_official, "api_key": minimax_key_official})
profile["fast"] = fast
profile["thinking"] = thinking
# 基础校验:必须有 fast 段且有 key # 基础校验:必须有 fast 段且有 key
fast = profile.get("fast") or {} fast = profile.get("fast") or {}
if not fast.get("api_key"): if not fast.get("api_key"):

View File

@ -0,0 +1,40 @@
from http.server import BaseHTTPRequestHandler, HTTPServer
import json
HOST = "0.0.0.0"
PORT = 8899
ERROR_MESSAGE = "hour allocated quota exceeded"
class Handler(BaseHTTPRequestHandler):
def _send(self, code: int, payload: dict):
body = json.dumps(payload).encode("utf-8")
self.send_response(code)
self.send_header("Content-Type", "application/json; charset=utf-8")
self.send_header("Content-Length", str(len(body)))
self.end_headers()
self.wfile.write(body)
def do_POST(self):
# Consume request body to avoid broken pipe on clients
try:
length = int(self.headers.get("Content-Length", "0"))
except ValueError:
length = 0
if length:
_ = self.rfile.read(length)
payload = {
"error": {
"message": ERROR_MESSAGE,
"type": "quota_exceeded"
}
}
self._send(429, payload)
def log_message(self, format, *args):
return
if __name__ == "__main__":
server = HTTPServer((HOST, PORT), Handler)
print(f"mock aliyun quota server running on http://{HOST}:{PORT}")
server.serve_forever()

View File

@ -505,6 +505,11 @@ async def handle_task_with_sender(terminal: WebTerminal, workspace: UserWorkspac
context = web_terminal.build_context() context = web_terminal.build_context()
messages = web_terminal.build_messages(context, message) messages = web_terminal.build_messages(context, message)
tools = web_terminal.define_tools() tools = web_terminal.define_tools()
try:
profile = get_model_profile(getattr(web_terminal, "model_key", None) or "kimi-k2.5")
web_terminal.apply_model_profile(profile)
except Exception as exc:
debug_log(f"更新模型配置失败: {exc}")
# === 上下文预算与安全校验(避免超出模型上下文) === # === 上下文预算与安全校验(避免超出模型上下文) ===
max_context_tokens = get_model_context_window(getattr(web_terminal, "model_key", None) or "kimi-k2.5") max_context_tokens = get_model_context_window(getattr(web_terminal, "model_key", None) or "kimi-k2.5")
@ -559,6 +564,8 @@ async def handle_task_with_sender(terminal: WebTerminal, workspace: UserWorkspac
# 设置最大迭代次数API 可覆盖) # 设置最大迭代次数API 可覆盖)
max_iterations = getattr(web_terminal, "max_iterations_override", None) or MAX_ITERATIONS_PER_TASK max_iterations = getattr(web_terminal, "max_iterations_override", None) or MAX_ITERATIONS_PER_TASK
max_api_retries = 4
retry_delay_seconds = 10
pending_append = None # {"path": str, "tool_call_id": str, "buffer": str, ...} pending_append = None # {"path": str, "tool_call_id": str, "buffer": str, ...}
append_probe_buffer = "" append_probe_buffer = ""
@ -1199,6 +1206,25 @@ async def handle_task_with_sender(terminal: WebTerminal, workspace: UserWorkspac
}) })
maybe_mark_failure_from_message(web_terminal, message) maybe_mark_failure_from_message(web_terminal, message)
async def _wait_retry_delay(delay_seconds: int) -> bool:
"""等待重试间隔,同时检查是否收到停止请求。"""
if delay_seconds <= 0:
return False
deadline = time.time() + delay_seconds
while time.time() < deadline:
client_stop_info = get_stop_flag(client_sid, username)
if client_stop_info:
stop_requested = client_stop_info.get('stop', False) if isinstance(client_stop_info, dict) else client_stop_info
if stop_requested:
sender('task_stopped', {
'message': '命令执行被用户取消',
'reason': 'user_stop'
})
clear_stop_flag(client_sid, username)
return True
await asyncio.sleep(0.2)
return False
for iteration in range(max_iterations): for iteration in range(max_iterations):
total_iterations += 1 total_iterations += 1
debug_log(f"\n--- 迭代 {iteration + 1}/{max_iterations} 开始 ---") debug_log(f"\n--- 迭代 {iteration + 1}/{max_iterations} 开始 ---")
@ -1293,6 +1319,33 @@ async def handle_task_with_sender(terminal: WebTerminal, workspace: UserWorkspac
print(f"[API] 第{iteration + 1}次调用 (总工具调用: {total_tool_calls}/{MAX_TOTAL_TOOL_CALLS})") print(f"[API] 第{iteration + 1}次调用 (总工具调用: {total_tool_calls}/{MAX_TOTAL_TOOL_CALLS})")
api_error = None
for api_attempt in range(max_api_retries + 1):
api_error = None
if api_attempt > 0:
full_response = ""
tool_calls = []
current_thinking = ""
detected_tools = {}
last_usage_payload = None
in_thinking = False
thinking_started = False
thinking_ended = False
text_started = False
text_has_content = False
text_streaming = False
text_chunk_index = 0
last_text_chunk_time = None
chunk_count = 0
reasoning_chunks = 0
content_chunks = 0
tool_chunks = 0
append_break_triggered = False
append_result = {"handled": False}
modify_break_triggered = False
modify_result = {"handled": False}
last_finish_reason = None
# 收集流式响应 # 收集流式响应
async for chunk in web_terminal.api_client.chat(messages, tools, stream=True): async for chunk in web_terminal.api_client.chat(messages, tools, stream=True):
chunk_count += 1 chunk_count += 1
@ -1315,6 +1368,10 @@ async def handle_task_with_sender(terminal: WebTerminal, workspace: UserWorkspac
clear_stop_flag(client_sid, username) clear_stop_flag(client_sid, username)
return return
if isinstance(chunk, dict) and chunk.get("error"):
api_error = chunk.get("error")
break
# 先尝试记录 usage有些平台会在最后一个 chunk 里携带 usage 但 choices 为空) # 先尝试记录 usage有些平台会在最后一个 chunk 里携带 usage 但 choices 为空)
usage_info = chunk.get("usage") usage_info = chunk.get("usage")
if usage_info: if usage_info:
@ -1687,6 +1744,58 @@ async def handle_task_with_sender(terminal: WebTerminal, workspace: UserWorkspac
else: else:
debug_log("未获取到usage字段跳过token统计更新") debug_log("未获取到usage字段跳过token统计更新")
if api_error:
error_message = ""
error_status = None
error_type = None
if isinstance(api_error, dict):
error_status = api_error.get("status_code")
error_type = api_error.get("error_type")
error_message = api_error.get("error_message") or api_error.get("error_text") or ""
if not error_message:
error_message = "API 请求失败"
# 若命中阿里云配额错误,立即写入状态并切换到官方 API
try:
from utils.aliyun_fallback import compute_disabled_until, set_disabled_until
disabled_until, reason = compute_disabled_until(error_message)
if disabled_until and reason:
set_disabled_until(getattr(web_terminal, "model_key", None) or "kimi-k2.5", disabled_until, reason)
profile = get_model_profile(getattr(web_terminal, "model_key", None) or "kimi-k2.5")
web_terminal.apply_model_profile(profile)
except Exception as exc:
debug_log(f"处理阿里云配额回退失败: {exc}")
can_retry = (
api_attempt < max_api_retries
and not full_response
and not tool_calls
and not current_thinking
and not pending_append
and not pending_modify
)
sender('error', {
'message': error_message,
'status_code': error_status,
'error_type': error_type,
'retry': bool(can_retry),
'retry_in': retry_delay_seconds if can_retry else None,
'attempt': api_attempt + 1,
'max_attempts': max_api_retries + 1
})
if can_retry:
try:
profile = get_model_profile(getattr(web_terminal, "model_key", None) or "kimi-k2.5")
web_terminal.apply_model_profile(profile)
except Exception as exc:
debug_log(f"重试前更新模型配置失败: {exc}")
cancelled = await _wait_retry_delay(retry_delay_seconds)
if cancelled:
return
continue
_cancel_pending_tools(tool_calls)
return
break
# 流结束后的处理 # 流结束后的处理
debug_log(f"\n流结束统计:") debug_log(f"\n流结束统计:")
debug_log(f" 总chunks: {chunk_count}") debug_log(f" 总chunks: {chunk_count}")

View File

@ -1042,6 +1042,10 @@ export async function initializeLegacySocket(ctx: any) {
if (!msg) { if (!msg) {
return; return;
} }
if (msg.awaitingFirstContent) {
msg.awaitingFirstContent = false;
msg.generatingLabel = '';
}
const action = { const action = {
id: data.id, id: data.id,
type: 'tool', type: 'tool',
@ -1405,7 +1409,10 @@ export async function initializeLegacySocket(ctx: any) {
const msg = data?.message || '发生未知错误'; const msg = data?.message || '发生未知错误';
const code = data?.status_code; const code = data?.status_code;
const errType = data?.error_type; const errType = data?.error_type;
ctx.addSystemMessage(`错误: ${msg}`); const shouldRetry = Boolean(data?.retry);
const retryIn = Number(data?.retry_in) || 5;
const retryAttempt = Number(data?.attempt) || 1;
const retryMax = Number(data?.max_attempts) || retryAttempt;
if (typeof ctx.uiPushToast === 'function') { if (typeof ctx.uiPushToast === 'function') {
ctx.uiPushToast({ ctx.uiPushToast({
title: code ? `API错误 ${code}` : 'API错误', title: code ? `API错误 ${code}` : 'API错误',
@ -1413,8 +1420,35 @@ export async function initializeLegacySocket(ctx: any) {
type: 'error', type: 'error',
duration: 6000 duration: 6000
}); });
if (shouldRetry) {
ctx.uiPushToast({
title: '即将重试',
message: `将在 ${retryIn} 秒后重试(第 ${retryAttempt}/${retryMax} 次)`,
type: 'info',
duration: Math.max(retryIn, 1) * 1000
});
}
}
if (shouldRetry) {
// 错误后保持停止按钮态,用户可手动停止或等待自动重试
ctx.stopRequested = false;
ctx.taskInProgress = true;
ctx.streamingMessage = true;
return;
}
// 最后一次报错:恢复输入状态并清理提示动画
const msgIndex = typeof ctx.currentMessageIndex === 'number' ? ctx.currentMessageIndex : -1;
if (msgIndex >= 0 && Array.isArray(ctx.messages)) {
const currentMessage = ctx.messages[msgIndex];
if (currentMessage && currentMessage.role === 'assistant') {
currentMessage.awaitingFirstContent = false;
currentMessage.generatingLabel = '';
}
}
if (typeof ctx.chatClearThinkingLocks === 'function') {
ctx.chatClearThinkingLocks();
} }
// 仅标记当前流结束,避免状态错乱
ctx.streamingMessage = false; ctx.streamingMessage = false;
ctx.stopRequested = false; ctx.stopRequested = false;
ctx.taskInProgress = false; ctx.taskInProgress = false;

103
utils/aliyun_fallback.py Normal file
View File

@ -0,0 +1,103 @@
import json
from datetime import datetime, timedelta, timezone
from pathlib import Path
from typing import Dict, Optional, Tuple
FALLBACK_MODELS = {"qwen3-vl-plus", "kimi-k2.5", "minimax-m2.5"}
STATE_PATH = Path(__file__).resolve().parents[1] / "data" / "aliyun_fallback_state.json"
def _read_state() -> Dict:
if not STATE_PATH.exists():
return {"models": {}}
try:
data = json.loads(STATE_PATH.read_text(encoding="utf-8"))
except Exception:
return {"models": {}}
if not isinstance(data, dict):
return {"models": {}}
if "models" not in data or not isinstance(data["models"], dict):
data["models"] = {}
return data
def _write_state(data: Dict) -> None:
STATE_PATH.parent.mkdir(parents=True, exist_ok=True)
STATE_PATH.write_text(json.dumps(data, ensure_ascii=False, indent=2), encoding="utf-8")
def get_disabled_until(model_key: str) -> Optional[float]:
data = _read_state()
entry = (data.get("models") or {}).get(model_key) or {}
ts = entry.get("disabled_until")
try:
return float(ts) if ts is not None else None
except (TypeError, ValueError):
return None
def is_fallback_active(model_key: str, now_ts: Optional[float] = None) -> bool:
if model_key not in FALLBACK_MODELS:
return False
now_ts = float(now_ts) if now_ts is not None else datetime.now(tz=timezone.utc).timestamp()
disabled_until = get_disabled_until(model_key)
return bool(disabled_until and disabled_until > now_ts)
def set_disabled_until(model_key: str, disabled_until_ts: float, reason: str = "") -> None:
if model_key not in FALLBACK_MODELS:
return
data = _read_state()
models = data.setdefault("models", {})
models[model_key] = {
"disabled_until": float(disabled_until_ts),
"reason": reason,
"updated_at": datetime.now(tz=timezone.utc).timestamp(),
}
_write_state(data)
def _next_monday_utc8(now: datetime) -> datetime:
# Monday = 0
weekday = now.weekday()
days_ahead = (7 - weekday) % 7
if days_ahead == 0:
days_ahead = 7
target = (now + timedelta(days=days_ahead)).replace(hour=0, minute=0, second=0, microsecond=0)
return target
def _next_month_same_day_utc8(now: datetime) -> datetime:
year = now.year
month = now.month + 1
if month > 12:
month = 1
year += 1
# clamp day to last day of next month
if month == 12:
next_month = datetime(year + 1, 1, 1, tzinfo=now.tzinfo)
else:
next_month = datetime(year, month + 1, 1, tzinfo=now.tzinfo)
last_day = (next_month - timedelta(days=1)).day
day = min(now.day, last_day)
return datetime(year, month, day, 0, 0, 0, tzinfo=now.tzinfo)
def compute_disabled_until(error_text: str) -> Tuple[Optional[float], Optional[str]]:
if not error_text:
return None, None
text = str(error_text).lower()
tz8 = timezone(timedelta(hours=8))
now = datetime.now(tz=tz8)
if "hour allocated quota exceeded" in text or "每 5 小时请求额度已用完" in text:
until = now + timedelta(hours=5)
return until.astimezone(timezone.utc).timestamp(), "hour_quota"
if "week allocated quota exceeded" in text or "每周请求额度已用完" in text:
until = _next_monday_utc8(now)
return until.astimezone(timezone.utc).timestamp(), "week_quota"
if "month allocated quota exceeded" in text or "每月请求额度已用完" in text:
until = _next_month_same_day_utc8(now)
return until.astimezone(timezone.utc).timestamp(), "month_quota"
return None, None

View File

@ -6,9 +6,12 @@ import json
import asyncio import asyncio
import base64 import base64
import mimetypes import mimetypes
import os
from typing import List, Dict, Optional, AsyncGenerator, Any from typing import List, Dict, Optional, AsyncGenerator, Any
from pathlib import Path from pathlib import Path
from datetime import datetime from datetime import datetime
from pathlib import Path
from typing import Tuple
try: try:
from config import ( from config import (
API_BASE_URL, API_BASE_URL,
@ -78,6 +81,73 @@ class DeepSeekClient:
# 请求体落盘目录 # 请求体落盘目录
self.request_dump_dir = Path(__file__).resolve().parents[1] / "logs" / "api_requests" self.request_dump_dir = Path(__file__).resolve().parents[1] / "logs" / "api_requests"
self.request_dump_dir.mkdir(parents=True, exist_ok=True) self.request_dump_dir.mkdir(parents=True, exist_ok=True)
self.debug_log_path = Path(__file__).resolve().parents[1] / "logs" / "api_debug.log"
def _maybe_mark_aliyun_quota(self, error_text: str) -> None:
if not error_text or not self.model_key:
return
try:
from utils.aliyun_fallback import compute_disabled_until, set_disabled_until
except Exception:
return
disabled_until, reason = compute_disabled_until(error_text)
if disabled_until and reason:
set_disabled_until(self.model_key, disabled_until, reason)
# 立即切换到官方 API仅在有配置时
base_env_key = None
key_env_key = None
if self.model_key == "kimi-k2.5":
base_env_key = "API_BASE_KIMI_OFFICIAL"
key_env_key = "API_KEY_KIMI_OFFICIAL"
elif self.model_key == "qwen3-vl-plus":
base_env_key = "API_BASE_QWEN_OFFICIAL"
key_env_key = "API_KEY_QWEN_OFFICIAL"
elif self.model_key == "minimax-m2.5":
base_env_key = "API_BASE_MINIMAX_OFFICIAL"
key_env_key = "API_KEY_MINIMAX_OFFICIAL"
if base_env_key and key_env_key:
official_base = self._resolve_env_value(base_env_key)
official_key = self._resolve_env_value(key_env_key)
if official_base and official_key:
self.fast_api_config["base_url"] = official_base
self.fast_api_config["api_key"] = official_key
self.thinking_api_config["base_url"] = official_base
self.thinking_api_config["api_key"] = official_key
self.api_base_url = official_base
self.api_key = official_key
def _debug_log(self, payload: Dict[str, Any]) -> None:
try:
entry = {
"ts": datetime.now().isoformat(),
**payload
}
self.debug_log_path.parent.mkdir(parents=True, exist_ok=True)
with self.debug_log_path.open("a", encoding="utf-8") as f:
f.write(json.dumps(entry, ensure_ascii=False) + "\n")
except Exception:
pass
def _resolve_env_value(self, name: str) -> Optional[str]:
value = os.environ.get(name)
if value is None:
env_path = Path(__file__).resolve().parents[1] / ".env"
if env_path.exists():
try:
for raw_line in env_path.read_text(encoding="utf-8").splitlines():
line = raw_line.strip()
if not line or line.startswith("#") or "=" not in line:
continue
key, val = line.split("=", 1)
if key.strip() == name:
value = val.strip().strip('"').strip("'")
break
except Exception:
value = None
if value is None:
return None
value = value.strip()
return value or None
def _print(self, message: str, end: str = "\n", flush: bool = False): def _print(self, message: str, end: str = "\n", flush: bool = False):
"""安全的打印函数在Web模式下不输出""" """安全的打印函数在Web模式下不输出"""
@ -568,7 +638,10 @@ class DeepSeekClient:
"error_text": error_text, "error_text": error_text,
"error_type": None, "error_type": None,
"error_message": None, "error_message": None,
"request_dump": str(dump_path) "request_dump": str(dump_path),
"base_url": api_config.get("base_url"),
"model_id": api_config.get("model_id"),
"model_key": self.model_key
} }
try: try:
parsed = json.loads(error_text) parsed = json.loads(error_text)
@ -578,7 +651,20 @@ class DeepSeekClient:
self.last_error_info["error_message"] = err.get("message") self.last_error_info["error_message"] = err.get("message")
except Exception: except Exception:
pass pass
self._print(f"{OUTPUT_FORMATS['error']} API请求失败 ({response.status_code}): {error_text}") self._maybe_mark_aliyun_quota(error_text)
self._debug_log({
"event": "http_error_stream",
"status_code": response.status_code,
"error_text": error_text,
"base_url": api_config.get("base_url"),
"model_id": api_config.get("model_id"),
"model_key": self.model_key,
"request_dump": str(dump_path)
})
self._print(
f"{OUTPUT_FORMATS['error']} API请求失败 ({response.status_code}): {error_text} "
f"(base_url={api_config.get('base_url')}, model_id={api_config.get('model_id')})"
)
self._mark_request_error(dump_path, response.status_code, error_text) self._mark_request_error(dump_path, response.status_code, error_text)
yield {"error": self.last_error_info} yield {"error": self.last_error_info}
return return
@ -607,7 +693,10 @@ class DeepSeekClient:
"error_text": error_text, "error_text": error_text,
"error_type": None, "error_type": None,
"error_message": None, "error_message": None,
"request_dump": str(dump_path) "request_dump": str(dump_path),
"base_url": api_config.get("base_url"),
"model_id": api_config.get("model_id"),
"model_key": self.model_key
} }
try: try:
parsed = response.json() parsed = response.json()
@ -617,7 +706,20 @@ class DeepSeekClient:
self.last_error_info["error_message"] = err.get("message") self.last_error_info["error_message"] = err.get("message")
except Exception: except Exception:
pass pass
self._print(f"{OUTPUT_FORMATS['error']} API请求失败 ({response.status_code}): {error_text}") self._maybe_mark_aliyun_quota(error_text)
self._debug_log({
"event": "http_error",
"status_code": response.status_code,
"error_text": error_text,
"base_url": api_config.get("base_url"),
"model_id": api_config.get("model_id"),
"model_key": self.model_key,
"request_dump": str(dump_path)
})
self._print(
f"{OUTPUT_FORMATS['error']} API请求失败 ({response.status_code}): {error_text} "
f"(base_url={api_config.get('base_url')}, model_id={api_config.get('model_id')})"
)
self._mark_request_error(dump_path, response.status_code, error_text) self._mark_request_error(dump_path, response.status_code, error_text)
yield {"error": self.last_error_info} yield {"error": self.last_error_info}
return return
@ -632,8 +734,21 @@ class DeepSeekClient:
"error_text": "connect_error", "error_text": "connect_error",
"error_type": "connection_error", "error_type": "connection_error",
"error_message": "无法连接到API服务器", "error_message": "无法连接到API服务器",
"request_dump": str(dump_path) "request_dump": str(dump_path),
"base_url": api_config.get("base_url"),
"model_id": api_config.get("model_id"),
"model_key": self.model_key
} }
self._maybe_mark_aliyun_quota(self.last_error_info.get("error_text"))
self._debug_log({
"event": "connect_error",
"status_code": None,
"error_text": "connect_error",
"base_url": api_config.get("base_url"),
"model_id": api_config.get("model_id"),
"model_key": self.model_key,
"request_dump": str(dump_path)
})
self._mark_request_error(dump_path, error_text="connect_error") self._mark_request_error(dump_path, error_text="connect_error")
yield {"error": self.last_error_info} yield {"error": self.last_error_info}
except httpx.TimeoutException: except httpx.TimeoutException:
@ -643,8 +758,21 @@ class DeepSeekClient:
"error_text": "timeout", "error_text": "timeout",
"error_type": "timeout", "error_type": "timeout",
"error_message": "API请求超时", "error_message": "API请求超时",
"request_dump": str(dump_path) "request_dump": str(dump_path),
"base_url": api_config.get("base_url"),
"model_id": api_config.get("model_id"),
"model_key": self.model_key
} }
self._maybe_mark_aliyun_quota(self.last_error_info.get("error_text"))
self._debug_log({
"event": "timeout",
"status_code": None,
"error_text": "timeout",
"base_url": api_config.get("base_url"),
"model_id": api_config.get("model_id"),
"model_key": self.model_key,
"request_dump": str(dump_path)
})
self._mark_request_error(dump_path, error_text="timeout") self._mark_request_error(dump_path, error_text="timeout")
yield {"error": self.last_error_info} yield {"error": self.last_error_info}
except Exception as e: except Exception as e:
@ -654,8 +782,21 @@ class DeepSeekClient:
"error_text": str(e), "error_text": str(e),
"error_type": "exception", "error_type": "exception",
"error_message": str(e), "error_message": str(e),
"request_dump": str(dump_path) "request_dump": str(dump_path),
"base_url": api_config.get("base_url"),
"model_id": api_config.get("model_id"),
"model_key": self.model_key
} }
self._maybe_mark_aliyun_quota(self.last_error_info.get("error_text"))
self._debug_log({
"event": "exception",
"status_code": None,
"error_text": str(e),
"base_url": api_config.get("base_url"),
"model_id": api_config.get("model_id"),
"model_key": self.model_key,
"request_dump": str(dump_path)
})
self._mark_request_error(dump_path, error_text=str(e)) self._mark_request_error(dump_path, error_text=str(e))
yield {"error": self.last_error_info} yield {"error": self.last_error_info}