feat: add aliyun quota fallback

2026-03-06 12:31:20 +08:00 · 2026-03-06 12:31:20 +08:00 · 868640b479
commit 868640b479
parent 4be61fe76e
6 changed files with 864 additions and 369 deletions
--- a/config/model_profiles.py
+++ b/config/model_profiles.py
@ -1,9 +1,32 @@
 import os
 from pathlib import Path
 from typing import Optional
 def _env(name: str, default: str = "") -> str:
    return os.environ.get(name, default)
 def _env_optional(name: str) -> Optional[str]:
    value = os.environ.get(name)
    if value is None:
        # 回退读取 .env（支持运行中更新）
        env_path = Path(__file__).resolve().parents[1] / ".env"
        if env_path.exists():
            try:
                for raw_line in env_path.read_text(encoding="utf-8").splitlines():
                    line = raw_line.strip()
                    if not line or line.startswith("#") or "=" not in line:
                        continue
                    key, val = line.split("=", 1)
                    if key.strip() == name:
                        value = val.strip().strip('"').strip("'")
                        break
            except Exception:
                value = None
    if value is None:
        return None
    value = value.strip()
    return value or None
 # 模型上下文窗口（单位: token）
 CONTEXT_WINDOWS = {
@ -19,6 +42,8 @@ CONTEXT_WINDOWS = {
 # 默认（Kimi）
 KIMI_BASE = _env("API_BASE_KIMI", _env("AGENT_API_BASE_URL", "https://api.moonshot.cn/v1"))
 KIMI_KEY = _env("API_KEY_KIMI", _env("AGENT_API_KEY", ""))
 KIMI_BASE_OFFICIAL = _env_optional("API_BASE_KIMI_OFFICIAL")
 KIMI_KEY_OFFICIAL = _env_optional("API_KEY_KIMI_OFFICIAL")
 KIMI_FAST_MODEL = _env("MODEL_KIMI_FAST", _env("AGENT_MODEL_ID", "kimi-k2-0905-preview"))
 KIMI_THINK_MODEL = _env("MODEL_KIMI_THINK", _env("AGENT_THINKING_MODEL_ID", "kimi-k2-thinking"))
 KIMI_25_MODEL = _env("MODEL_KIMI_25", "kimi-k2.5")
@ -32,12 +57,16 @@ DEEPSEEK_THINK_MODEL = _env("MODEL_DEEPSEEK_THINK", "deepseek-reasoner")
 # Qwen
 QWEN_BASE = _env("API_BASE_QWEN", "https://dashscope.aliyuncs.com/compatible-mode/v1")
 QWEN_KEY = _env("API_KEY_QWEN", _env("DASHSCOPE_API_KEY", ""))
 QWEN_BASE_OFFICIAL = _env_optional("API_BASE_QWEN_OFFICIAL")
 QWEN_KEY_OFFICIAL = _env_optional("API_KEY_QWEN_OFFICIAL")
 QWEN_MAX_MODEL = _env("MODEL_QWEN_MAX", "qwen3-max")
 QWEN_VL_MODEL = _env("MODEL_QWEN_VL", "qwen3.5-plus")
 # MiniMax
 MINIMAX_BASE = _env("API_BASE_MINIMAX", "https://api.minimaxi.com/v1")
 MINIMAX_KEY = _env("API_KEY_MINIMAX", "")
 MINIMAX_BASE_OFFICIAL = _env_optional("API_BASE_MINIMAX_OFFICIAL")
 MINIMAX_KEY_OFFICIAL = _env_optional("API_KEY_MINIMAX_OFFICIAL")
 MINIMAX_MODEL = _env("MODEL_MINIMAX", "MiniMax-M2.5")
@ -78,7 +107,7 @@ MODEL_PROFILES = {
            "model_id": KIMI_25_MODEL,
            "max_tokens": None,
            "context_window": CONTEXT_WINDOWS["kimi-k2.5"],
-            "extra_params": {"thinking": {"type": "enabled"}}
+            "extra_params": {"thinking": {"type": "enabled"}, "enable_thinking": True}
        },
        "supports_thinking": True,
        "fast_only": False,
@ -204,6 +233,45 @@ def get_model_profile(key: str) -> dict:
    if key not in MODEL_PROFILES:
        raise ValueError(f"未知模型 key: {key}")
    profile = MODEL_PROFILES[key]
    try:
        from utils.aliyun_fallback import is_fallback_active
    except Exception:
        is_fallback_active = None
    if is_fallback_active and is_fallback_active(key):
        if key == "kimi-k2.5":
            kimi_base_official = _env_optional("API_BASE_KIMI_OFFICIAL") or KIMI_BASE_OFFICIAL
            kimi_key_official = _env_optional("API_KEY_KIMI_OFFICIAL") or KIMI_KEY_OFFICIAL
            if kimi_base_official and kimi_key_official:
                profile = dict(profile)
                fast = dict(profile.get("fast") or {})
                thinking = dict(profile.get("thinking") or fast)
                fast.update({"base_url": kimi_base_official, "api_key": kimi_key_official})
                thinking.update({"base_url": kimi_base_official, "api_key": kimi_key_official})
                profile["fast"] = fast
                profile["thinking"] = thinking
        elif key == "qwen3-vl-plus":
            qwen_base_official = _env_optional("API_BASE_QWEN_OFFICIAL") or QWEN_BASE_OFFICIAL
            qwen_key_official = _env_optional("API_KEY_QWEN_OFFICIAL") or QWEN_KEY_OFFICIAL
            if qwen_base_official and qwen_key_official:
                profile = dict(profile)
                fast = dict(profile.get("fast") or {})
                thinking = dict(profile.get("thinking") or fast)
                fast.update({"base_url": qwen_base_official, "api_key": qwen_key_official})
                thinking.update({"base_url": qwen_base_official, "api_key": qwen_key_official})
                profile["fast"] = fast
                profile["thinking"] = thinking
        elif key == "minimax-m2.5":
            minimax_base_official = _env_optional("API_BASE_MINIMAX_OFFICIAL") or MINIMAX_BASE_OFFICIAL
            minimax_key_official = _env_optional("API_KEY_MINIMAX_OFFICIAL") or MINIMAX_KEY_OFFICIAL
            if minimax_base_official and minimax_key_official:
                profile = dict(profile)
                fast = dict(profile.get("fast") or {})
                thinking = dict(profile.get("thinking") or fast)
                fast.update({"base_url": minimax_base_official, "api_key": minimax_key_official})
                thinking.update({"base_url": minimax_base_official, "api_key": minimax_key_official})
                profile["fast"] = fast
                profile["thinking"] = thinking
    # 基础校验：必须有 fast 段且有 key
    fast = profile.get("fast") or {}
    if not fast.get("api_key"):
--- a/scripts/mock_aliyun_quota_server.py
+++ b/scripts/mock_aliyun_quota_server.py
@ -0,0 +1,40 @@
 from http.server import BaseHTTPRequestHandler, HTTPServer
 import json
 HOST = "0.0.0.0"
 PORT = 8899
 ERROR_MESSAGE = "hour allocated quota exceeded"
 class Handler(BaseHTTPRequestHandler):
    def _send(self, code: int, payload: dict):
        body = json.dumps(payload).encode("utf-8")
        self.send_response(code)
        self.send_header("Content-Type", "application/json; charset=utf-8")
        self.send_header("Content-Length", str(len(body)))
        self.end_headers()
        self.wfile.write(body)
    def do_POST(self):
        # Consume request body to avoid broken pipe on clients
        try:
            length = int(self.headers.get("Content-Length", "0"))
        except ValueError:
            length = 0
        if length:
            _ = self.rfile.read(length)
        payload = {
            "error": {
                "message": ERROR_MESSAGE,
                "type": "quota_exceeded"
            }
        }
        self._send(429, payload)
    def log_message(self, format, *args):
        return
 if __name__ == "__main__":
    server = HTTPServer((HOST, PORT), Handler)
    print(f"mock aliyun quota server running on http://{HOST}:{PORT}")
    server.serve_forever()
--- a/server/chat_flow.py
+++ b/server/chat_flow.py
@ -505,6 +505,11 @@ async def handle_task_with_sender(terminal: WebTerminal, workspace: UserWorkspac
    context = web_terminal.build_context()
    messages = web_terminal.build_messages(context, message)
    tools = web_terminal.define_tools()
    try:
        profile = get_model_profile(getattr(web_terminal, "model_key", None) or "kimi-k2.5")
        web_terminal.apply_model_profile(profile)
    except Exception as exc:
        debug_log(f"更新模型配置失败: {exc}")
    # === 上下文预算与安全校验（避免超出模型上下文） ===
    max_context_tokens = get_model_context_window(getattr(web_terminal, "model_key", None) or "kimi-k2.5")
@ -559,6 +564,8 @@ async def handle_task_with_sender(terminal: WebTerminal, workspace: UserWorkspac
    # 设置最大迭代次数（API 可覆盖）
    max_iterations = getattr(web_terminal, "max_iterations_override", None) or MAX_ITERATIONS_PER_TASK
    max_api_retries = 4
    retry_delay_seconds = 10
    pending_append = None  # {"path": str, "tool_call_id": str, "buffer": str, ...}
    append_probe_buffer = ""
@ -1199,6 +1206,25 @@ async def handle_task_with_sender(terminal: WebTerminal, workspace: UserWorkspac
            })
            maybe_mark_failure_from_message(web_terminal, message)
    async def _wait_retry_delay(delay_seconds: int) -> bool:
        """等待重试间隔，同时检查是否收到停止请求。"""
        if delay_seconds <= 0:
            return False
        deadline = time.time() + delay_seconds
        while time.time() < deadline:
            client_stop_info = get_stop_flag(client_sid, username)
            if client_stop_info:
                stop_requested = client_stop_info.get('stop', False) if isinstance(client_stop_info, dict) else client_stop_info
                if stop_requested:
                    sender('task_stopped', {
                        'message': '命令执行被用户取消',
                        'reason': 'user_stop'
                    })
                    clear_stop_flag(client_sid, username)
                    return True
            await asyncio.sleep(0.2)
        return False
    for iteration in range(max_iterations):
        total_iterations += 1
        debug_log(f"\n--- 迭代 {iteration + 1}/{max_iterations} 开始 ---")
@ -1293,6 +1319,33 @@ async def handle_task_with_sender(terminal: WebTerminal, workspace: UserWorkspac
        print(f"[API] 第{iteration + 1}次调用 (总工具调用: {total_tool_calls}/{MAX_TOTAL_TOOL_CALLS})")
        api_error = None
        for api_attempt in range(max_api_retries + 1):
            api_error = None
            if api_attempt > 0:
                full_response = ""
                tool_calls = []
                current_thinking = ""
                detected_tools = {}
                last_usage_payload = None
                in_thinking = False
                thinking_started = False
                thinking_ended = False
                text_started = False
                text_has_content = False
                text_streaming = False
                text_chunk_index = 0
                last_text_chunk_time = None
                chunk_count = 0
                reasoning_chunks = 0
                content_chunks = 0
                tool_chunks = 0
                append_break_triggered = False
                append_result = {"handled": False}
                modify_break_triggered = False
                modify_result = {"handled": False}
                last_finish_reason = None
            # 收集流式响应
            async for chunk in web_terminal.api_client.chat(messages, tools, stream=True):
                chunk_count += 1
@ -1315,6 +1368,10 @@ async def handle_task_with_sender(terminal: WebTerminal, workspace: UserWorkspac
                        clear_stop_flag(client_sid, username)
                        return
                if isinstance(chunk, dict) and chunk.get("error"):
                    api_error = chunk.get("error")
                    break
                # 先尝试记录 usage（有些平台会在最后一个 chunk 里携带 usage 但 choices 为空）
                usage_info = chunk.get("usage")
                if usage_info:
@ -1687,6 +1744,58 @@ async def handle_task_with_sender(terminal: WebTerminal, workspace: UserWorkspac
            else:
                debug_log("未获取到usage字段，跳过token统计更新")
            if api_error:
                error_message = ""
                error_status = None
                error_type = None
                if isinstance(api_error, dict):
                    error_status = api_error.get("status_code")
                    error_type = api_error.get("error_type")
                    error_message = api_error.get("error_message") or api_error.get("error_text") or ""
                if not error_message:
                    error_message = "API 请求失败"
                # 若命中阿里云配额错误，立即写入状态并切换到官方 API
                try:
                    from utils.aliyun_fallback import compute_disabled_until, set_disabled_until
                    disabled_until, reason = compute_disabled_until(error_message)
                    if disabled_until and reason:
                        set_disabled_until(getattr(web_terminal, "model_key", None) or "kimi-k2.5", disabled_until, reason)
                        profile = get_model_profile(getattr(web_terminal, "model_key", None) or "kimi-k2.5")
                        web_terminal.apply_model_profile(profile)
                except Exception as exc:
                    debug_log(f"处理阿里云配额回退失败: {exc}")
                can_retry = (
                    api_attempt < max_api_retries
                    and not full_response
                    and not tool_calls
                    and not current_thinking
                    and not pending_append
                    and not pending_modify
                )
                sender('error', {
                    'message': error_message,
                    'status_code': error_status,
                    'error_type': error_type,
                    'retry': bool(can_retry),
                    'retry_in': retry_delay_seconds if can_retry else None,
                    'attempt': api_attempt + 1,
                    'max_attempts': max_api_retries + 1
                })
                if can_retry:
                    try:
                        profile = get_model_profile(getattr(web_terminal, "model_key", None) or "kimi-k2.5")
                        web_terminal.apply_model_profile(profile)
                    except Exception as exc:
                        debug_log(f"重试前更新模型配置失败: {exc}")
                    cancelled = await _wait_retry_delay(retry_delay_seconds)
                    if cancelled:
                        return
                    continue
                _cancel_pending_tools(tool_calls)
                return
            break
        # 流结束后的处理
        debug_log(f"\n流结束统计:")
        debug_log(f"  总chunks: {chunk_count}")
--- a/static/src/composables/useLegacySocket.ts
+++ b/static/src/composables/useLegacySocket.ts
@ -1042,6 +1042,10 @@ export async function initializeLegacySocket(ctx: any) {
            if (!msg) {
                return;
            }
            if (msg.awaitingFirstContent) {
                msg.awaitingFirstContent = false;
                msg.generatingLabel = '';
            }
            const action = {
                id: data.id,
                type: 'tool',
@ -1405,7 +1409,10 @@ export async function initializeLegacySocket(ctx: any) {
            const msg = data?.message || '发生未知错误';
            const code = data?.status_code;
            const errType = data?.error_type;
-            ctx.addSystemMessage(`错误: ${msg}`);
+            const shouldRetry = Boolean(data?.retry);
            const retryIn = Number(data?.retry_in) || 5;
            const retryAttempt = Number(data?.attempt) || 1;
            const retryMax = Number(data?.max_attempts) || retryAttempt;
            if (typeof ctx.uiPushToast === 'function') {
                ctx.uiPushToast({
                    title: code ? `API错误 ${code}` : 'API错误',
@ -1413,8 +1420,35 @@ export async function initializeLegacySocket(ctx: any) {
                    type: 'error',
                    duration: 6000
                });
                if (shouldRetry) {
                    ctx.uiPushToast({
                        title: '即将重试',
                        message: `将在 ${retryIn} 秒后重试（第 ${retryAttempt}/${retryMax} 次）`,
                        type: 'info',
                        duration: Math.max(retryIn, 1) * 1000
                    });
                }
            }
            if (shouldRetry) {
                // 错误后保持停止按钮态，用户可手动停止或等待自动重试
                ctx.stopRequested = false;
                ctx.taskInProgress = true;
                ctx.streamingMessage = true;
                return;
            }
            // 最后一次报错：恢复输入状态并清理提示动画
            const msgIndex = typeof ctx.currentMessageIndex === 'number' ? ctx.currentMessageIndex : -1;
            if (msgIndex >= 0 && Array.isArray(ctx.messages)) {
                const currentMessage = ctx.messages[msgIndex];
                if (currentMessage && currentMessage.role === 'assistant') {
                    currentMessage.awaitingFirstContent = false;
                    currentMessage.generatingLabel = '';
                }
            }
            if (typeof ctx.chatClearThinkingLocks === 'function') {
                ctx.chatClearThinkingLocks();
            }
            // 仅标记当前流结束，避免状态错乱
            ctx.streamingMessage = false;
            ctx.stopRequested = false;
            ctx.taskInProgress = false;
--- a/utils/aliyun_fallback.py
+++ b/utils/aliyun_fallback.py
@ -0,0 +1,103 @@
 import json
 from datetime import datetime, timedelta, timezone
 from pathlib import Path
 from typing import Dict, Optional, Tuple
 FALLBACK_MODELS = {"qwen3-vl-plus", "kimi-k2.5", "minimax-m2.5"}
 STATE_PATH = Path(__file__).resolve().parents[1] / "data" / "aliyun_fallback_state.json"
 def _read_state() -> Dict:
    if not STATE_PATH.exists():
        return {"models": {}}
    try:
        data = json.loads(STATE_PATH.read_text(encoding="utf-8"))
    except Exception:
        return {"models": {}}
    if not isinstance(data, dict):
        return {"models": {}}
    if "models" not in data or not isinstance(data["models"], dict):
        data["models"] = {}
    return data
 def _write_state(data: Dict) -> None:
    STATE_PATH.parent.mkdir(parents=True, exist_ok=True)
    STATE_PATH.write_text(json.dumps(data, ensure_ascii=False, indent=2), encoding="utf-8")
 def get_disabled_until(model_key: str) -> Optional[float]:
    data = _read_state()
    entry = (data.get("models") or {}).get(model_key) or {}
    ts = entry.get("disabled_until")
    try:
        return float(ts) if ts is not None else None
    except (TypeError, ValueError):
        return None
 def is_fallback_active(model_key: str, now_ts: Optional[float] = None) -> bool:
    if model_key not in FALLBACK_MODELS:
        return False
    now_ts = float(now_ts) if now_ts is not None else datetime.now(tz=timezone.utc).timestamp()
    disabled_until = get_disabled_until(model_key)
    return bool(disabled_until and disabled_until > now_ts)
 def set_disabled_until(model_key: str, disabled_until_ts: float, reason: str = "") -> None:
    if model_key not in FALLBACK_MODELS:
        return
    data = _read_state()
    models = data.setdefault("models", {})
    models[model_key] = {
        "disabled_until": float(disabled_until_ts),
        "reason": reason,
        "updated_at": datetime.now(tz=timezone.utc).timestamp(),
    }
    _write_state(data)
 def _next_monday_utc8(now: datetime) -> datetime:
    # Monday = 0
    weekday = now.weekday()
    days_ahead = (7 - weekday) % 7
    if days_ahead == 0:
        days_ahead = 7
    target = (now + timedelta(days=days_ahead)).replace(hour=0, minute=0, second=0, microsecond=0)
    return target
 def _next_month_same_day_utc8(now: datetime) -> datetime:
    year = now.year
    month = now.month + 1
    if month > 12:
        month = 1
        year += 1
    # clamp day to last day of next month
    if month == 12:
        next_month = datetime(year + 1, 1, 1, tzinfo=now.tzinfo)
    else:
        next_month = datetime(year, month + 1, 1, tzinfo=now.tzinfo)
    last_day = (next_month - timedelta(days=1)).day
    day = min(now.day, last_day)
    return datetime(year, month, day, 0, 0, 0, tzinfo=now.tzinfo)
 def compute_disabled_until(error_text: str) -> Tuple[Optional[float], Optional[str]]:
    if not error_text:
        return None, None
    text = str(error_text).lower()
    tz8 = timezone(timedelta(hours=8))
    now = datetime.now(tz=tz8)
    if "hour allocated quota exceeded" in text or "每 5 小时请求额度已用完" in text:
        until = now + timedelta(hours=5)
        return until.astimezone(timezone.utc).timestamp(), "hour_quota"
    if "week allocated quota exceeded" in text or "每周请求额度已用完" in text:
        until = _next_monday_utc8(now)
        return until.astimezone(timezone.utc).timestamp(), "week_quota"
    if "month allocated quota exceeded" in text or "每月请求额度已用完" in text:
        until = _next_month_same_day_utc8(now)
        return until.astimezone(timezone.utc).timestamp(), "month_quota"
    return None, None
--- a/utils/api_client.py
+++ b/utils/api_client.py
@ -6,9 +6,12 @@ import json
 import asyncio
 import base64
 import mimetypes
 import os
 from typing import List, Dict, Optional, AsyncGenerator, Any
 from pathlib import Path
 from datetime import datetime
 from pathlib import Path
 from typing import Tuple
 try:
    from config import (
        API_BASE_URL,
@ -78,6 +81,73 @@ class DeepSeekClient:
        # 请求体落盘目录
        self.request_dump_dir = Path(__file__).resolve().parents[1] / "logs" / "api_requests"
        self.request_dump_dir.mkdir(parents=True, exist_ok=True)
        self.debug_log_path = Path(__file__).resolve().parents[1] / "logs" / "api_debug.log"
    def _maybe_mark_aliyun_quota(self, error_text: str) -> None:
        if not error_text or not self.model_key:
            return
        try:
            from utils.aliyun_fallback import compute_disabled_until, set_disabled_until
        except Exception:
            return
        disabled_until, reason = compute_disabled_until(error_text)
        if disabled_until and reason:
            set_disabled_until(self.model_key, disabled_until, reason)
            # 立即切换到官方 API（仅在有配置时）
            base_env_key = None
            key_env_key = None
            if self.model_key == "kimi-k2.5":
                base_env_key = "API_BASE_KIMI_OFFICIAL"
                key_env_key = "API_KEY_KIMI_OFFICIAL"
            elif self.model_key == "qwen3-vl-plus":
                base_env_key = "API_BASE_QWEN_OFFICIAL"
                key_env_key = "API_KEY_QWEN_OFFICIAL"
            elif self.model_key == "minimax-m2.5":
                base_env_key = "API_BASE_MINIMAX_OFFICIAL"
                key_env_key = "API_KEY_MINIMAX_OFFICIAL"
            if base_env_key and key_env_key:
                official_base = self._resolve_env_value(base_env_key)
                official_key = self._resolve_env_value(key_env_key)
                if official_base and official_key:
                    self.fast_api_config["base_url"] = official_base
                    self.fast_api_config["api_key"] = official_key
                    self.thinking_api_config["base_url"] = official_base
                    self.thinking_api_config["api_key"] = official_key
                    self.api_base_url = official_base
                    self.api_key = official_key
    def _debug_log(self, payload: Dict[str, Any]) -> None:
        try:
            entry = {
                "ts": datetime.now().isoformat(),
                **payload
            }
            self.debug_log_path.parent.mkdir(parents=True, exist_ok=True)
            with self.debug_log_path.open("a", encoding="utf-8") as f:
                f.write(json.dumps(entry, ensure_ascii=False) + "\n")
        except Exception:
            pass
    def _resolve_env_value(self, name: str) -> Optional[str]:
        value = os.environ.get(name)
        if value is None:
            env_path = Path(__file__).resolve().parents[1] / ".env"
            if env_path.exists():
                try:
                    for raw_line in env_path.read_text(encoding="utf-8").splitlines():
                        line = raw_line.strip()
                        if not line or line.startswith("#") or "=" not in line:
                            continue
                        key, val = line.split("=", 1)
                        if key.strip() == name:
                            value = val.strip().strip('"').strip("'")
                            break
                except Exception:
                    value = None
        if value is None:
            return None
        value = value.strip()
        return value or None
    def _print(self, message: str, end: str = "\n", flush: bool = False):
        """安全的打印函数，在Web模式下不输出"""
@ -568,7 +638,10 @@ class DeepSeekClient:
                                "error_text": error_text,
                                "error_type": None,
                                "error_message": None,
-                                "request_dump": str(dump_path)
+                                "request_dump": str(dump_path),
                                "base_url": api_config.get("base_url"),
                                "model_id": api_config.get("model_id"),
                                "model_key": self.model_key
                            }
                            try:
                                parsed = json.loads(error_text)
@ -578,7 +651,20 @@ class DeepSeekClient:
                                    self.last_error_info["error_message"] = err.get("message")
                            except Exception:
                                pass
-                            self._print(f"{OUTPUT_FORMATS['error']} API请求失败 ({response.status_code}): {error_text}")
+                            self._maybe_mark_aliyun_quota(error_text)
                            self._debug_log({
                                "event": "http_error_stream",
                                "status_code": response.status_code,
                                "error_text": error_text,
                                "base_url": api_config.get("base_url"),
                                "model_id": api_config.get("model_id"),
                                "model_key": self.model_key,
                                "request_dump": str(dump_path)
                            })
                            self._print(
                                f"{OUTPUT_FORMATS['error']} API请求失败 ({response.status_code}): {error_text} "
                                f"(base_url={api_config.get('base_url')}, model_id={api_config.get('model_id')})"
                            )
                            self._mark_request_error(dump_path, response.status_code, error_text)
                            yield {"error": self.last_error_info}
                            return
@ -607,7 +693,10 @@ class DeepSeekClient:
                            "error_text": error_text,
                            "error_type": None,
                            "error_message": None,
-                            "request_dump": str(dump_path)
+                            "request_dump": str(dump_path),
                            "base_url": api_config.get("base_url"),
                            "model_id": api_config.get("model_id"),
                            "model_key": self.model_key
                        }
                        try:
                            parsed = response.json()
@ -617,7 +706,20 @@ class DeepSeekClient:
                                self.last_error_info["error_message"] = err.get("message")
                        except Exception:
                            pass
-                        self._print(f"{OUTPUT_FORMATS['error']} API请求失败 ({response.status_code}): {error_text}")
+                        self._maybe_mark_aliyun_quota(error_text)
                        self._debug_log({
                            "event": "http_error",
                            "status_code": response.status_code,
                            "error_text": error_text,
                            "base_url": api_config.get("base_url"),
                            "model_id": api_config.get("model_id"),
                            "model_key": self.model_key,
                            "request_dump": str(dump_path)
                        })
                        self._print(
                            f"{OUTPUT_FORMATS['error']} API请求失败 ({response.status_code}): {error_text} "
                            f"(base_url={api_config.get('base_url')}, model_id={api_config.get('model_id')})"
                        )
                        self._mark_request_error(dump_path, response.status_code, error_text)
                        yield {"error": self.last_error_info}
                        return
@ -632,8 +734,21 @@ class DeepSeekClient:
                "error_text": "connect_error",
                "error_type": "connection_error",
                "error_message": "无法连接到API服务器",
-                "request_dump": str(dump_path)
+                "request_dump": str(dump_path),
                "base_url": api_config.get("base_url"),
                "model_id": api_config.get("model_id"),
                "model_key": self.model_key
            }
            self._maybe_mark_aliyun_quota(self.last_error_info.get("error_text"))
            self._debug_log({
                "event": "connect_error",
                "status_code": None,
                "error_text": "connect_error",
                "base_url": api_config.get("base_url"),
                "model_id": api_config.get("model_id"),
                "model_key": self.model_key,
                "request_dump": str(dump_path)
            })
            self._mark_request_error(dump_path, error_text="connect_error")
            yield {"error": self.last_error_info}
        except httpx.TimeoutException:
@ -643,8 +758,21 @@ class DeepSeekClient:
                "error_text": "timeout",
                "error_type": "timeout",
                "error_message": "API请求超时",
-                "request_dump": str(dump_path)
+                "request_dump": str(dump_path),
                "base_url": api_config.get("base_url"),
                "model_id": api_config.get("model_id"),
                "model_key": self.model_key
            }
            self._maybe_mark_aliyun_quota(self.last_error_info.get("error_text"))
            self._debug_log({
                "event": "timeout",
                "status_code": None,
                "error_text": "timeout",
                "base_url": api_config.get("base_url"),
                "model_id": api_config.get("model_id"),
                "model_key": self.model_key,
                "request_dump": str(dump_path)
            })
            self._mark_request_error(dump_path, error_text="timeout")
            yield {"error": self.last_error_info}
        except Exception as e:
@ -654,8 +782,21 @@ class DeepSeekClient:
                "error_text": str(e),
                "error_type": "exception",
                "error_message": str(e),
-                "request_dump": str(dump_path)
+                "request_dump": str(dump_path),
                "base_url": api_config.get("base_url"),
                "model_id": api_config.get("model_id"),
                "model_key": self.model_key
            }
            self._maybe_mark_aliyun_quota(self.last_error_info.get("error_text"))
            self._debug_log({
                "event": "exception",
                "status_code": None,
                "error_text": str(e),
                "base_url": api_config.get("base_url"),
                "model_id": api_config.get("model_id"),
                "model_key": self.model_key,
                "request_dump": str(dump_path)
            })
            self._mark_request_error(dump_path, error_text=str(e))
            yield {"error": self.last_error_info}