fix: improve api error diagnostics and raise model quotas

2026-03-06 17:02:19 +08:00 · 2026-03-06 17:02:19 +08:00 · 877bcc2fad
commit 877bcc2fad
parent 868640b479
6 changed files with 133 additions and 36 deletions
--- a/config/limits.py
+++ b/config/limits.py
@ -13,12 +13,12 @@ CODE_EXECUTION_TIMEOUT = 60
 TERMINAL_COMMAND_TIMEOUT = 30
 SEARCH_MAX_RESULTS = 10
-# 自动修复与工具调用限制
+# 自动修复与工具调用限制（None 表示不限制）
 AUTO_FIX_TOOL_CALL = False
 AUTO_FIX_MAX_ATTEMPTS = 3
-MAX_ITERATIONS_PER_TASK = 100
+MAX_ITERATIONS_PER_TASK = None
-MAX_CONSECUTIVE_SAME_TOOL = 50
+MAX_CONSECUTIVE_SAME_TOOL = None
-MAX_TOTAL_TOOL_CALLS = 100
+MAX_TOTAL_TOOL_CALLS = None
 TOOL_CALL_COOLDOWN = 0.5
 THINKING_FAST_INTERVAL = 10
--- a/modules/usage_tracker.py
+++ b/modules/usage_tracker.py
@ -12,8 +12,8 @@ QuotaKey = Literal["fast", "thinking", "search"]
 QUOTA_DEFAULTS = {
    "default": {
-        "fast": {"limit": 50, "window_hours": 5},
+        "fast": {"limit": 200, "window_hours": 5},
-        "thinking": {"limit": 20, "window_hours": 5},
+        "thinking": {"limit": 200, "window_hours": 5},
        "search": {"limit": 20, "window_hours": 24},
    },
    "search_daily": {"limit": 20, "window_hours": 24},
--- a/server/_conversation_segment.py
+++ b/server/_conversation_segment.py
@ -695,7 +695,7 @@ async def handle_task_with_sender(terminal: WebTerminal, workspace: UserWorkspac
    last_tool_call_time = 0
    detected_tool_intent: Dict[str, str] = {}
-    # 设置最大迭代次数
+    # 设置最大迭代次数；None 表示不限制
    max_iterations = MAX_ITERATIONS_PER_TASK
    pending_append = None  # {"path": str, "tool_call_id": str, "buffer": str, ...}
@ -1337,12 +1337,16 @@ async def handle_task_with_sender(terminal: WebTerminal, workspace: UserWorkspac
            })
            maybe_mark_failure_from_message(web_terminal, message)
-    for iteration in range(max_iterations):
+    iteration = 0
    while max_iterations is None or iteration < max_iterations:
        current_iteration = iteration + 1
        iteration += 1
        total_iterations += 1
-        debug_log(f"\n--- 迭代 {iteration + 1}/{max_iterations} 开始 ---")
+        iteration_limit_label = max_iterations if max_iterations is not None else "∞"
        debug_log(f"\n--- 迭代 {current_iteration}/{iteration_limit_label} 开始 ---")
        # 检查是否超过总工具调用限制
-        if total_tool_calls >= MAX_TOTAL_TOOL_CALLS:
+        if MAX_TOTAL_TOOL_CALLS is not None and total_tool_calls >= MAX_TOTAL_TOOL_CALLS:
            debug_log(f"已达到最大工具调用次数限制 ({MAX_TOTAL_TOOL_CALLS})")
            sender('system_message', {
                'content': f'⚠️ 已达到最大工具调用次数限制 ({MAX_TOTAL_TOOL_CALLS})，任务结束。'
@ -1403,7 +1407,8 @@ async def handle_task_with_sender(terminal: WebTerminal, workspace: UserWorkspac
            })
            return
-        print(f"[API] 第{iteration + 1}次调用 (总工具调用: {total_tool_calls}/{MAX_TOTAL_TOOL_CALLS})")
+        tool_call_limit_label = MAX_TOTAL_TOOL_CALLS if MAX_TOTAL_TOOL_CALLS is not None else "∞"
        print(f"[API] 第{current_iteration}次调用 (总工具调用: {total_tool_calls}/{tool_call_limit_label})")
        # 收集流式响应
        async for chunk in web_terminal.api_client.chat(messages, tools, stream=True):
@ -1677,7 +1682,7 @@ async def handle_task_with_sender(terminal: WebTerminal, workspace: UserWorkspac
                        text_chunk_index += 1
                        log_backend_chunk(
                            conversation_id,
-                            iteration + 1,
+                            current_iteration,
                            text_chunk_index,
                            elapsed,
                            len(content),
@ -2099,7 +2104,10 @@ async def handle_task_with_sender(terminal: WebTerminal, workspace: UserWorkspac
            if tool_name == last_tool_name:
                consecutive_same_tool[tool_name] += 1
-                if consecutive_same_tool[tool_name] >= MAX_CONSECUTIVE_SAME_TOOL:
+                if (
                    MAX_CONSECUTIVE_SAME_TOOL is not None
                    and consecutive_same_tool[tool_name] >= MAX_CONSECUTIVE_SAME_TOOL
                ):
                    debug_log(f"警告: 连续调用相同工具 {tool_name} 已达 {MAX_CONSECUTIVE_SAME_TOOL} 次")
                    sender('system_message', {
                        'content': f'⚠️ 检测到重复调用 {tool_name} 工具 {MAX_CONSECUTIVE_SAME_TOOL} 次，可能存在循环。'
--- a/server/chat_flow.py
+++ b/server/chat_flow.py
@ -48,7 +48,7 @@ from core.web_terminal import WebTerminal
 from utils.tool_result_formatter import format_tool_result_for_context
 from utils.conversation_manager import ConversationManager
 from utils.api_client import DeepSeekClient
-from config.model_profiles import get_model_context_window
+from config.model_profiles import get_model_context_window, get_model_profile
 from .auth_helpers import api_login_required, resolve_admin_policy, get_current_user_record, get_current_username
 from .context import with_terminal, get_gui_manager, get_upload_guard, build_upload_error_response, ensure_conversation_loaded, reset_system_state, get_user_resources, get_or_create_usage_tracker
@ -425,7 +425,12 @@ def process_message_task(terminal: WebTerminal, message: str, images, sender, cl
        debug_log(f"任务处理错误: {e}")
        import traceback
        traceback.print_exc()
-        sender('error', {'message': str(e)})
+        sender('error', {
            'message': str(e),
            'conversation_id': getattr(getattr(terminal, "context_manager", None), "current_conversation_id", None),
            'task_id': getattr(terminal, "task_id", None) or client_sid,
            'client_sid': client_sid
        })
        sender('task_complete', {
            'total_iterations': 0,
            'total_tool_calls': 0,
@ -469,6 +474,25 @@ async def handle_task_with_sender(terminal: WebTerminal, workspace: UserWorkspac
    web_terminal = terminal
    conversation_id = getattr(web_terminal.context_manager, "current_conversation_id", None)
    videos = videos or []
    raw_sender = sender
    def sender(event_type, data):
        """为关键事件补充会话标识，便于前端定位报错归属。"""
        if not isinstance(data, dict):
            raw_sender(event_type, data)
            return
        payload = data
        if event_type in {"error", "quota_exceeded", "task_stopped", "task_complete"}:
            payload = dict(data)
            current_conv = conversation_id or getattr(web_terminal.context_manager, "current_conversation_id", None)
            if current_conv:
                payload.setdefault("conversation_id", current_conv)
            task_id = getattr(web_terminal, "task_id", None) or client_sid
            if task_id:
                payload.setdefault("task_id", task_id)
            if client_sid:
                payload.setdefault("client_sid", client_sid)
        raw_sender(event_type, payload)
    # 如果是思考模式，重置状态
    if web_terminal.thinking_mode:
@ -562,8 +586,9 @@ async def handle_task_with_sender(terminal: WebTerminal, workspace: UserWorkspac
    last_tool_call_time = 0
    detected_tool_intent: Dict[str, str] = {}
-    # 设置最大迭代次数（API 可覆盖）
+    # 设置最大迭代次数（API 可覆盖）；None 表示不限制
-    max_iterations = getattr(web_terminal, "max_iterations_override", None) or MAX_ITERATIONS_PER_TASK
+    max_iterations_override = getattr(web_terminal, "max_iterations_override", None)
    max_iterations = max_iterations_override if max_iterations_override is not None else MAX_ITERATIONS_PER_TASK
    max_api_retries = 4
    retry_delay_seconds = 10
@ -1225,12 +1250,16 @@ async def handle_task_with_sender(terminal: WebTerminal, workspace: UserWorkspac
            await asyncio.sleep(0.2)
        return False
-    for iteration in range(max_iterations):
+    iteration = 0
    while max_iterations is None or iteration < max_iterations:
        current_iteration = iteration + 1
        iteration += 1
        total_iterations += 1
-        debug_log(f"\n--- 迭代 {iteration + 1}/{max_iterations} 开始 ---")
+        iteration_limit_label = max_iterations if max_iterations is not None else "∞"
        debug_log(f"\n--- 迭代 {current_iteration}/{iteration_limit_label} 开始 ---")
        # 检查是否超过总工具调用限制
-        if total_tool_calls >= MAX_TOTAL_TOOL_CALLS:
+        if MAX_TOTAL_TOOL_CALLS is not None and total_tool_calls >= MAX_TOTAL_TOOL_CALLS:
            debug_log(f"已达到最大工具调用次数限制 ({MAX_TOTAL_TOOL_CALLS})")
            sender('system_message', {
                'content': f'⚠️ 已达到最大工具调用次数限制 ({MAX_TOTAL_TOOL_CALLS})，任务结束。'
@ -1317,7 +1346,8 @@ async def handle_task_with_sender(terminal: WebTerminal, workspace: UserWorkspac
            })
            return
-        print(f"[API] 第{iteration + 1}次调用 (总工具调用: {total_tool_calls}/{MAX_TOTAL_TOOL_CALLS})")
+        tool_call_limit_label = MAX_TOTAL_TOOL_CALLS if MAX_TOTAL_TOOL_CALLS is not None else "∞"
        print(f"[API] 第{current_iteration}次调用 (总工具调用: {total_tool_calls}/{tool_call_limit_label})")
        api_error = None
        for api_attempt in range(max_api_retries + 1):
@ -1612,7 +1642,7 @@ async def handle_task_with_sender(terminal: WebTerminal, workspace: UserWorkspac
                            text_chunk_index += 1
                            log_backend_chunk(
                                conversation_id,
-                                iteration + 1,
+                                current_iteration,
                                text_chunk_index,
                                elapsed,
                                len(content),
@ -1746,14 +1776,38 @@ async def handle_task_with_sender(terminal: WebTerminal, workspace: UserWorkspac
            if api_error:
                try:
                    debug_log(f"API错误原始数据: {json.dumps(api_error, ensure_ascii=False)}")
                except Exception:
                    debug_log(f"API错误原始数据(不可序列化): {repr(api_error)}")
                error_message = ""
                error_status = None
                error_type = None
                error_code = None
                error_text = ""
                request_dump = None
                error_base_url = None
                error_model_id = None
                if isinstance(api_error, dict):
                    error_status = api_error.get("status_code")
-                    error_type = api_error.get("error_type")
+                    error_type = api_error.get("error_type") or api_error.get("type")
-                    error_message = api_error.get("error_message") or api_error.get("error_text") or ""
+                    error_code = api_error.get("error_code") or api_error.get("code")
                    error_text = api_error.get("error_text") or ""
                    error_message = (
                        api_error.get("error_message")
                        or api_error.get("message")
                        or error_text
                        or ""
                    )
                    request_dump = api_error.get("request_dump")
                    error_base_url = api_error.get("base_url")
                    error_model_id = api_error.get("model_id")
                elif isinstance(api_error, str):
                    error_message = api_error
                if not error_message:
                    if error_status:
                        error_message = f"API 请求失败（HTTP {error_status}）"
                    else:
                        error_message = "API 请求失败"
                # 若命中阿里云配额错误，立即写入状态并切换到官方 API
                try:
@ -1777,6 +1831,11 @@ async def handle_task_with_sender(terminal: WebTerminal, workspace: UserWorkspac
                    'message': error_message,
                    'status_code': error_status,
                    'error_type': error_type,
                    'error_code': error_code,
                    'error_text': error_text,
                    'request_dump': request_dump,
                    'base_url': error_base_url,
                    'model_id': error_model_id,
                    'retry': bool(can_retry),
                    'retry_in': retry_delay_seconds if can_retry else None,
                    'attempt': api_attempt + 1,
@ -2092,7 +2151,10 @@ async def handle_task_with_sender(terminal: WebTerminal, workspace: UserWorkspac
            if tool_name == last_tool_name:
                consecutive_same_tool[tool_name] += 1
-                if consecutive_same_tool[tool_name] >= MAX_CONSECUTIVE_SAME_TOOL:
+                if (
                    MAX_CONSECUTIVE_SAME_TOOL is not None
                    and consecutive_same_tool[tool_name] >= MAX_CONSECUTIVE_SAME_TOOL
                ):
                    debug_log(f"警告: 连续调用相同工具 {tool_name} 已达 {MAX_CONSECUTIVE_SAME_TOOL} 次")
                    sender('system_message', {
                        'content': f'⚠️ 检测到重复调用 {tool_name} 工具 {MAX_CONSECUTIVE_SAME_TOOL} 次，可能存在循环。'
--- a/static/src/composables/useLegacySocket.ts
+++ b/static/src/composables/useLegacySocket.ts
@ -1409,14 +1409,28 @@ export async function initializeLegacySocket(ctx: any) {
            const msg = data?.message || '发生未知错误';
            const code = data?.status_code;
            const errType = data?.error_type;
            const errCode = data?.error_code;
            const dumpPath = data?.request_dump;
            const baseUrl = data?.base_url;
            const modelId = data?.model_id;
            const conversationId = data?.conversation_id;
            const taskId = data?.task_id;
            const shouldRetry = Boolean(data?.retry);
            const retryIn = Number(data?.retry_in) || 5;
            const retryAttempt = Number(data?.attempt) || 1;
            const retryMax = Number(data?.max_attempts) || retryAttempt;
            const detailParts = [
                dumpPath ? `请求记录: ${dumpPath}` : '',
                baseUrl ? `接口: ${baseUrl}` : '',
                modelId ? `模型: ${modelId}` : '',
                conversationId ? `对话ID: ${conversationId}` : '',
                taskId ? `任务ID: ${taskId}` : ''
            ].filter(Boolean);
            const detailText = detailParts.length ? `\n${detailParts.join('\n')}` : '';
            if (typeof ctx.uiPushToast === 'function') {
                ctx.uiPushToast({
                    title: code ? `API错误 ${code}` : 'API错误',
-                    message: errType ? `${errType}: ${msg}` : msg,
+                    message: `${errType ? `${errType}${errCode ? `(${errCode})` : ''}: ${msg}` : msg}${detailText}`,
                    type: 'error',
                    duration: 6000
                });
@ -1437,6 +1451,12 @@ export async function initializeLegacySocket(ctx: any) {
                return;
            }
            if (typeof ctx.appendSystemAction === 'function') {
                ctx.appendSystemAction(
                    `${code ? `[API ${code}] ` : '[API] '}${errType ? `${errType}${errCode ? `(${errCode})` : ''}: ` : ''}${msg}${detailText}`
                );
            }
            // 最后一次报错：恢复输入状态并清理提示动画
            const msgIndex = typeof ctx.currentMessageIndex === 'number' ? ctx.currentMessageIndex : -1;
            if (msgIndex >= 0 && Array.isArray(ctx.messages)) {
--- a/utils/api_client.py
+++ b/utils/api_client.py
@ -727,13 +727,18 @@ class DeepSeekClient:
                    self.last_error_info = None
                    yield response.json()
-        except httpx.ConnectError:
+        except httpx.ConnectError as e:
-            self._print(f"{OUTPUT_FORMATS['error']} 无法连接到API服务器，请检查网络连接")
+            connect_detail = str(e).strip() or repr(e)
            self._print(
                f"{OUTPUT_FORMATS['error']} 无法连接到API服务器，请检查网络连接"
                f"（{connect_detail}）"
            )
            self.last_error_info = {
                "status_code": None,
                "error_text": "connect_error",
                "error_type": "connection_error",
-                "error_message": "无法连接到API服务器",
+                "error_message": f"无法连接到API服务器: {connect_detail}",
                "error_detail": connect_detail,
                "request_dump": str(dump_path),
                "base_url": api_config.get("base_url"),
                "model_id": api_config.get("model_id"),
@ -744,12 +749,13 @@ class DeepSeekClient:
                "event": "connect_error",
                "status_code": None,
                "error_text": "connect_error",
                "error_detail": connect_detail,
                "base_url": api_config.get("base_url"),
                "model_id": api_config.get("model_id"),
                "model_key": self.model_key,
                "request_dump": str(dump_path)
            })
-            self._mark_request_error(dump_path, error_text="connect_error")
+            self._mark_request_error(dump_path, error_text=f"connect_error: {connect_detail}")
            yield {"error": self.last_error_info}
        except httpx.TimeoutException:
            self._print(f"{OUTPUT_FORMATS['error']} API请求超时")
@ -776,12 +782,13 @@ class DeepSeekClient:
            self._mark_request_error(dump_path, error_text="timeout")
            yield {"error": self.last_error_info}
        except Exception as e:
-            self._print(f"{OUTPUT_FORMATS['error']} API调用异常: {e}")
+            error_text = str(e).strip() or repr(e)
            self._print(f"{OUTPUT_FORMATS['error']} API调用异常: {error_text}")
            self.last_error_info = {
                "status_code": None,
-                "error_text": str(e),
+                "error_text": error_text,
                "error_type": "exception",
-                "error_message": str(e),
+                "error_message": error_text,
                "request_dump": str(dump_path),
                "base_url": api_config.get("base_url"),
                "model_id": api_config.get("model_id"),
@ -791,13 +798,13 @@ class DeepSeekClient:
            self._debug_log({
                "event": "exception",
                "status_code": None,
-                "error_text": str(e),
+                "error_text": error_text,
                "base_url": api_config.get("base_url"),
                "model_id": api_config.get("model_id"),
                "model_key": self.model_key,
                "request_dump": str(dump_path)
            })
-            self._mark_request_error(dump_path, error_text=str(e))
+            self._mark_request_error(dump_path, error_text=error_text)
            yield {"error": self.last_error_info}
    async def chat_with_tools(