agent-Specialization/server/chat_flow_task_main.py

from __future__ import annotations

import asyncio
import json
import time
import re
import zipfile
from collections import defaultdict, Counter, deque
from datetime import datetime, timedelta
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple

from werkzeug.utils import secure_filename

from config import (
    OUTPUT_FORMATS,
    AUTO_FIX_TOOL_CALL,
    AUTO_FIX_MAX_ATTEMPTS,
    MAX_ITERATIONS_PER_TASK,
    MAX_CONSECUTIVE_SAME_TOOL,
    MAX_TOTAL_TOOL_CALLS,
    TOOL_CALL_COOLDOWN,
    MAX_UPLOAD_SIZE,
    DEFAULT_CONVERSATIONS_LIMIT,
    MAX_CONVERSATIONS_LIMIT,
    CONVERSATIONS_DIR,
    DEFAULT_RESPONSE_MAX_TOKENS,
    DEFAULT_PROJECT_PATH,
    LOGS_DIR,
    AGENT_VERSION,
    THINKING_FAST_INTERVAL,
    PROJECT_MAX_STORAGE_MB,
    PROJECT_MAX_STORAGE_BYTES,
    UPLOAD_SCAN_LOG_SUBDIR,
)
from modules.personalization_manager import (
    load_personalization_config,
    save_personalization_config,
    THINKING_INTERVAL_MIN,
    THINKING_INTERVAL_MAX,
)
from modules.skill_hint_manager import SkillHintManager
from modules.upload_security import UploadSecurityError
from modules.user_manager import UserWorkspace
from modules.usage_tracker import QUOTA_DEFAULTS
from modules.sub_agent_manager import TERMINAL_STATUSES
from core.web_terminal import WebTerminal
from utils.tool_result_formatter import format_tool_result_for_context
from utils.conversation_manager import ConversationManager
from config.model_profiles import get_model_context_window, get_model_profile

from .auth_helpers import api_login_required, resolve_admin_policy, get_current_user_record, get_current_username
from .context import with_terminal, get_gui_manager, get_upload_guard, build_upload_error_response, ensure_conversation_loaded, reset_system_state, get_user_resources, get_or_create_usage_tracker
from .utils_common import (
    build_review_lines,
    debug_log,
    log_backend_chunk,
    log_frontend_chunk,
    log_streaming_debug_entry,
    brief_log,
    DEBUG_LOG_FILE,
    CHUNK_BACKEND_LOG_FILE,
    CHUNK_FRONTEND_LOG_FILE,
    STREAMING_DEBUG_LOG_FILE,
)
from .security import rate_limited, compact_web_search_result, consume_socket_token, prune_socket_tokens, validate_csrf_request, requires_csrf_protection, get_csrf_token
from .monitor import cache_monitor_snapshot, get_cached_monitor_snapshot
from .extensions import socketio
from .state import (
    MONITOR_FILE_TOOLS,
    MONITOR_MEMORY_TOOLS,
    MONITOR_SNAPSHOT_CHAR_LIMIT,
    MONITOR_MEMORY_ENTRY_LIMIT,
    RATE_LIMIT_BUCKETS,
    FAILURE_TRACKERS,
    pending_socket_tokens,
    usage_trackers,
    MONITOR_SNAPSHOT_CACHE,
    MONITOR_SNAPSHOT_CACHE_LIMIT,
    PROJECT_STORAGE_CACHE,
    PROJECT_STORAGE_CACHE_TTL_SECONDS,
    RECENT_UPLOAD_EVENT_LIMIT,
    RECENT_UPLOAD_FEED_LIMIT,
    THINKING_FAILURE_KEYWORDS,
    TITLE_PROMPT_PATH,
    get_last_active_ts,
    user_manager,
    container_manager,
    custom_tool_registry,
    user_terminals,
    terminal_rooms,
    connection_users,
    stop_flags,
    active_polling_tasks,
    get_stop_flag,
    set_stop_flag,
    clear_stop_flag,
)
from .chat_flow_helpers import (
    detect_malformed_tool_call as _detect_malformed_tool_call,
    detect_tool_failure,
    get_thinking_state,
    mark_force_thinking as _mark_force_thinking,
    mark_suppress_thinking,
    apply_thinking_schedule as _apply_thinking_schedule,
    update_thinking_after_call as _update_thinking_after_call,
    maybe_mark_failure_from_message as _maybe_mark_failure_from_message,
    generate_conversation_title_background as _generate_conversation_title_background,
)


from .chat_flow_runner_helpers import (
    extract_intent_from_partial,
    resolve_monitor_path,
    resolve_monitor_memory,
    capture_monitor_snapshot,
)


from .chat_flow_runtime import (
    generate_conversation_title_background,
    mark_force_thinking,
    apply_thinking_schedule,
    update_thinking_after_call,
    maybe_mark_failure_from_message,
    detect_malformed_tool_call,
)

from .chat_flow_task_support import process_sub_agent_updates
from .chat_flow_tool_loop import execute_tool_calls
from .chat_flow_stream_loop import run_streaming_attempts

async def poll_sub_agent_completion(*, web_terminal, workspace, conversation_id, client_sid, username):
    """后台轮询子智能体完成状态，完成后触发新一轮对话"""
    from .extensions import socketio

    manager = getattr(web_terminal, "sub_agent_manager", None)
    if not manager:
        debug_log("[SubAgent] poll_sub_agent_completion: manager 不存在")
        return
    if not hasattr(web_terminal, "_announced_sub_agent_tasks"):
        web_terminal._announced_sub_agent_tasks = set()

    max_wait_time = 3600  # 最多等待1小时
    start_wait = time.time()

    debug_log(f"[SubAgent] 开始后台轮询，conversation_id={conversation_id}, username={username}")

    # 创建 sender 函数，用于发送 socket 事件
    def sender(event_type, data):
        try:
            socketio.emit(event_type, data, room=f"user_{username}")
            debug_log(f"[SubAgent] 发送事件: {event_type}")
        except Exception as e:
            debug_log(f"[SubAgent] 发送事件失败: {event_type}, 错误: {e}")

    while (time.time() - start_wait) < max_wait_time:
        debug_log(f"[SubAgent] 轮询检查...")

        # 检查停止标志
        client_stop_info = get_stop_flag(client_sid, username)
        if client_stop_info:
            stop_requested = client_stop_info.get('stop', False) if isinstance(client_stop_info, dict) else client_stop_info
            if stop_requested:
                debug_log("[SubAgent] 用户请求停止，终止轮询")
                break

        # 若主对话仍在工具循环中，暂不消费完成事件，避免抢占 system 消息插入
        if getattr(web_terminal, "_tool_loop_active", False):
            debug_log("[SubAgent] 主对话工具循环中，延迟后台轮询发送 user 消息")
            await asyncio.sleep(1)
            continue

        updates = manager.poll_updates()
        debug_log(f"[SubAgent] poll_updates 返回 {len(updates)} 个更新")

        for update in updates:
            agent_id = update.get("agent_id")
            summary = update.get("summary")
            result_summary = update.get("result_summary") or update.get("message", "")
            deliverables_dir = update.get("deliverables_dir", "")
            status = update.get("status")
            task_id = update.get("task_id")
            task_info = manager.tasks.get(task_id) if task_id else None
            task_conv_id = task_info.get("conversation_id") if isinstance(task_info, dict) else None
            if task_conv_id and task_conv_id != conversation_id:
                debug_log(f"[SubAgent] 跳过非当前对话任务: task={task_id} conv={task_conv_id} current={conversation_id}")
                continue
            if task_id and task_info is None:
                debug_log(f"[SubAgent] 找不到任务详情，跳过: task={task_id}")
                continue
            if status == "terminated" or (isinstance(task_info, dict) and task_info.get("notified")):
                debug_log(f"[SubAgent] 跳过已终止/已通知任务: task={task_id} status={status}")
                continue

            debug_log(f"[SubAgent] 子智能体{agent_id}完成，状态: {status}")

            # 构建 user 消息（后台完成时才发送）
            prefix = "这是一句系统自动发送的user消息，用于通知你子智能体已经运行完成"
            runtime_line = ""
            elapsed_seconds = update.get("runtime_seconds")
            if elapsed_seconds is None:
                elapsed_seconds = update.get("elapsed_seconds")
            if status == "completed" and isinstance(elapsed_seconds, (int, float)):
                runtime_line = f"\n\n运行了{int(round(elapsed_seconds))}秒"
            user_message = f"""{prefix}

子智能体{agent_id} ({summary}) 已完成任务。

{result_summary}
{runtime_line}

交付目录：{deliverables_dir}"""

            debug_log(f"[SubAgent] 准备发送 user_message: {user_message[:100]}...")

            has_remaining = False
            remaining_count = 0
            try:
                if task_id:
                    web_terminal._announced_sub_agent_tasks.add(task_id)
                    if isinstance(task_info, dict):
                        task_info["notified"] = True
                        task_info["updated_at"] = time.time()
                        try:
                            manager._save_state()
                        except Exception as exc:
                            debug_log(f"[SubAgent] 保存通知状态失败: {exc}")

                # 计算剩余子智能体状态（用于前端清理等待标记）
                if not hasattr(web_terminal, "_announced_sub_agent_tasks"):
                    web_terminal._announced_sub_agent_tasks = set()
                announced = web_terminal._announced_sub_agent_tasks
                running_tasks = [
                    task for task in manager.tasks.values()
                    if isinstance(task, dict)
                    and task.get("status") not in TERMINAL_STATUSES.union({"terminated"})
                    and task.get("run_in_background")
                    and task.get("conversation_id") == conversation_id
                ]
                pending_notice_tasks = [
                    task for task in manager.tasks.values()
                    if isinstance(task, dict)
                    and task.get("status") in TERMINAL_STATUSES.union({"terminated"})
                    and task.get("run_in_background")
                    and task.get("conversation_id") == conversation_id
                    and task.get("task_id") not in announced
                    and not task.get("notified")
                ]
                remaining_count = len(running_tasks) + len(pending_notice_tasks)
                has_remaining = remaining_count > 0

                # 注册为后台任务，确保刷新后可恢复轮询
                from .tasks import task_manager
                workspace_id = getattr(workspace, "workspace_id", None) or "default"
                session_data = {
                    "username": username,
                    "role": getattr(web_terminal, "user_role", "user"),
                    "is_api_user": getattr(web_terminal, "user_role", "") == "api",
                    "workspace_id": workspace_id,
                    "run_mode": getattr(web_terminal, "run_mode", None),
                    "thinking_mode": getattr(web_terminal, "thinking_mode", None),
                    "model_key": getattr(web_terminal, "model_key", None),
                }
                rec = task_manager.create_chat_task(
                    username,
                    workspace_id,
                    user_message,
                    [],
                    conversation_id,
                    model_key=session_data.get("model_key"),
                    thinking_mode=session_data.get("thinking_mode"),
                    run_mode=session_data.get("run_mode"),
                    session_data=session_data,
                )
                debug_log(f"[SubAgent] 已创建后台任务: task_id={rec.task_id}")
                sender('user_message', {
                    'message': user_message,
                    'conversation_id': conversation_id,
                    'task_id': rec.task_id,
                    'sub_agent_notice': True,
                    'has_running_sub_agents': has_remaining,
                    'remaining_count': remaining_count
                })
            except Exception as e:
                debug_log(f"[SubAgent] 创建后台任务失败，回退直接执行: {e}")
                sender('user_message', {
                    'message': user_message,
                    'conversation_id': conversation_id,
                    'sub_agent_notice': True,
                    'has_running_sub_agents': has_remaining,
                    'remaining_count': remaining_count
                })
                try:
                    task = asyncio.create_task(handle_task_with_sender(
                        terminal=web_terminal,
                        workspace=workspace,
                        message=user_message,
                        images=[],
                        sender=sender,
                        client_sid=client_sid,
                        username=username,
                        videos=[]
                    ))
                    await task
                    debug_log(f"[SubAgent] process_message_task 调用成功")
                except Exception as inner_exc:
                    debug_log(f"[SubAgent] process_message_task 失败: {inner_exc}")
                    import traceback
                    debug_log(f"[SubAgent] 错误堆栈: {traceback.format_exc()}")

            return  # 只处理第一个完成的子智能体

        # 检查是否还有运行中的任务
        running_tasks = [
            task for task in manager.tasks.values()
            if task.get("status") not in {"completed", "failed", "timeout", "terminated"}
            and task.get("run_in_background")
            and task.get("conversation_id") == conversation_id
        ]

        debug_log(f"[SubAgent] 当前还有 {len(running_tasks)} 个运行中的任务")

        if not running_tasks:
            debug_log("[SubAgent] 所有子智能体已完成")
            # 若状态已提前被更新为终态（poll_updates 返回空），补发完成提示
            completed_tasks = [
                task for task in manager.tasks.values()
                if task.get("status") in {"completed", "failed", "timeout"}
                and task.get("run_in_background")
                and task.get("conversation_id") == conversation_id
                and not task.get("notified")
            ]
            if completed_tasks:
                completed_tasks.sort(
                    key=lambda item: item.get("updated_at") or item.get("created_at") or 0,
                    reverse=True
                )
                task = completed_tasks[0]
                agent_id = task.get("agent_id")
                summary = task.get("summary") or ""
                final_result = task.get("final_result") or {}
                result_summary = (
                    final_result.get("message")
                    or final_result.get("result_summary")
                    or final_result.get("system_message")
                    or ""
                )
                deliverables_dir = final_result.get("deliverables_dir") or task.get("deliverables_dir") or ""
                status = final_result.get("status") or task.get("status")
                debug_log(f"[SubAgent] 补发完成提示: task={task.get('task_id')} status={status}")

                user_message = f"""子智能体{agent_id} ({summary}) 已完成任务。

{result_summary}

交付目录：{deliverables_dir}"""

                try:
                    task_id = task.get("task_id")
                    if task_id:
                        web_terminal._announced_sub_agent_tasks.add(task_id)
                        if isinstance(task, dict):
                            task["notified"] = True
                            task["updated_at"] = time.time()
                            try:
                                manager._save_state()
                            except Exception as exc:
                                debug_log(f"[SubAgent] 保存通知状态失败: {exc}")
                    sender('user_message', {
                        'message': user_message,
                        'conversation_id': conversation_id
                    })
                    from .tasks import task_manager
                    workspace_id = getattr(workspace, "workspace_id", None) or "default"
                    session_data = {
                        "username": username,
                        "role": getattr(web_terminal, "user_role", "user"),
                        "is_api_user": getattr(web_terminal, "user_role", "") == "api",
                        "workspace_id": workspace_id,
                        "run_mode": getattr(web_terminal, "run_mode", None),
                        "thinking_mode": getattr(web_terminal, "thinking_mode", None),
                        "model_key": getattr(web_terminal, "model_key", None),
                    }
                    rec = task_manager.create_chat_task(
                        username,
                        workspace_id,
                        user_message,
                        [],
                        conversation_id,
                        model_key=session_data.get("model_key"),
                        thinking_mode=session_data.get("thinking_mode"),
                        run_mode=session_data.get("run_mode"),
                        session_data=session_data,
                    )
                    debug_log(f"[SubAgent] 补发通知创建后台任务: task_id={rec.task_id}")
                except Exception as e:
                    debug_log(f"[SubAgent] 补发通知创建后台任务失败，回退直接执行: {e}")
                    try:
                        task_handle = asyncio.create_task(handle_task_with_sender(
                            terminal=web_terminal,
                            workspace=workspace,
                            message=user_message,
                            images=[],
                            sender=sender,
                            client_sid=client_sid,
                            username=username,
                            videos=[]
                        ))
                        await task_handle
                    except Exception as inner_exc:
                        debug_log(f"[SubAgent] 补发完成提示失败: {inner_exc}")
                        import traceback
                        debug_log(f"[SubAgent] 错误堆栈: {traceback.format_exc()}")
            break

        await asyncio.sleep(5)

    debug_log("[SubAgent] 后台轮询结束")

async def handle_task_with_sender(terminal: WebTerminal, workspace: UserWorkspace, message, images, sender, client_sid, username: str, videos=None):
    """处理任务并发送消息 - 集成token统计版本"""
    from .extensions import socketio

    web_terminal = terminal
    conversation_id = getattr(web_terminal.context_manager, "current_conversation_id", None)
    videos = videos or []
    raw_sender = sender

    def sender(event_type, data):
        """为关键事件补充会话标识，便于前端定位报错归属。"""
        if not isinstance(data, dict):
            raw_sender(event_type, data)
            return
        payload = dict(data)
        current_conv = conversation_id or getattr(web_terminal.context_manager, "current_conversation_id", None)

        # 为所有事件添加 conversation_id，确保前端能正确匹配
        if current_conv and event_type not in {"connect", "disconnect", "system_ready"}:
            payload.setdefault("conversation_id", current_conv)

        # 调试信息：记录关键事件
        if event_type in {"user_message", "ai_message_start", "text_start", "text_chunk", "tool_preparing"}:
            debug_log(f"[SENDER] 发送事件: {event_type}, conversation_id={current_conv}, data_keys={list(payload.keys())}")

        # 为关键事件添加额外的标识信息
        if event_type in {"error", "quota_exceeded", "task_stopped", "task_complete"}:
            task_id = getattr(web_terminal, "task_id", None) or client_sid
            if task_id:
                payload.setdefault("task_id", task_id)
            if client_sid:
                payload.setdefault("client_sid", client_sid)

        raw_sender(event_type, payload)

    # 如果是思考模式，重置状态
    if web_terminal.thinking_mode:
        web_terminal.api_client.start_new_task(force_deep=web_terminal.deep_thinking_mode)
        state = get_thinking_state(web_terminal)
        state["fast_streak"] = 0
        state["force_next"] = False
        state["suppress_next"] = False

    # 添加到对话历史
    history_len_before = len(getattr(web_terminal.context_manager, "conversation_history", []) or [])
    is_first_user_message = history_len_before == 0
    web_terminal.context_manager.add_conversation("user", message, images=images, videos=videos)

    # Skill 提示系统：检测关键词并在用户消息之后插入 system 消息
    try:
        personal_config = load_personalization_config(workspace.data_dir)
        skill_hints_enabled = personal_config.get("skill_hints_enabled", False)

        if skill_hints_enabled and message:
            hint_manager = SkillHintManager()
            hint_manager.set_enabled(True)
            hint_messages = hint_manager.build_hint_messages(message)

            # 将提示消息插入到对话历史中（在用户消息之后）
            for hint_msg in hint_messages:
                debug_log(f"[Skill Hints] 插入提示消息: {hint_msg['content'][:100]}")
                web_terminal.context_manager.add_conversation(
                    "system",
                    hint_msg["content"]
                )
                # 验证插入后的消息
                last_msg = web_terminal.context_manager.conversation_history[-1]
                debug_log(f"[Skill Hints] 插入后验证 - role: {last_msg.get('role')}, content: {last_msg.get('content')[:100]}")
    except Exception as exc:
        debug_log(f"Skill hints 处理失败: {exc}")

    if is_first_user_message and getattr(web_terminal, "context_manager", None):
        try:
            personal_config = load_personalization_config(workspace.data_dir)
        except Exception:
            personal_config = {}
        auto_title_enabled = personal_config.get("auto_generate_title", True)
        if auto_title_enabled:
            conv_id = getattr(web_terminal.context_manager, "current_conversation_id", None)
            socketio.start_background_task(
                generate_conversation_title_background,
                web_terminal,
                conv_id,
                message,
                username
            )

    # === 移除：不在这里计算输入token，改为在每次API调用前计算 ===

    # 构建上下文和消息（用于API调用）
    context = web_terminal.build_context()
    messages = web_terminal.build_messages(context, message)
    tools = web_terminal.define_tools()
    try:
        profile = get_model_profile(getattr(web_terminal, "model_key", None) or "kimi-k2.5")
        web_terminal.apply_model_profile(profile)
    except Exception as exc:
        debug_log(f"更新模型配置失败: {exc}")

    # === 上下文预算与安全校验（避免超出模型上下文） ===
    max_context_tokens = get_model_context_window(getattr(web_terminal, "model_key", None) or "kimi-k2.5")
    current_tokens = web_terminal.context_manager.get_current_context_tokens(conversation_id)
    # 提前同步给底层客户端，动态收缩 max_tokens
    web_terminal.api_client.update_context_budget(current_tokens, max_context_tokens)
    if max_context_tokens:
        if current_tokens >= max_context_tokens:
            err_msg = (
                f"当前对话上下文已达 {current_tokens} tokens，超过模型上限 "
                f"{max_context_tokens}，请先使用压缩功能或清理对话后再试。"
            )
            debug_log(err_msg)
            web_terminal.context_manager.add_conversation("system", err_msg)
            sender('error', {
                'message': err_msg,
                'status_code': 400,
                'error_type': 'context_overflow'
            })
            return
        usage_percent = (current_tokens / max_context_tokens) * 100
        warned = web_terminal.context_manager.conversation_metadata.get("context_warning_sent", False)
        if usage_percent >= 70 and not warned:
            warn_msg = (
                f"当前对话上下文约占 {usage_percent:.1f}%（{current_tokens}/{max_context_tokens}），"
                "建议使用压缩功能。"
            )
            web_terminal.context_manager.conversation_metadata["context_warning_sent"] = True
            web_terminal.context_manager.auto_save_conversation(force=True)
            sender('context_warning', {
                'title': '上下文过长',
                'message': warn_msg,
                'type': 'warning',
                'conversation_id': conversation_id
            })

    # 开始新的AI消息
    sender('ai_message_start', {})

    # 增量保存相关变量
    accumulated_response = ""   # 累积的响应内容
    is_first_iteration = True   # 是否是第一次迭代

    # 统计和限制变量
    total_iterations = 0
    total_tool_calls = 0
    consecutive_same_tool = defaultdict(int)
    last_tool_name = ""
    auto_fix_attempts = 0
    last_tool_call_time = 0
    detected_tool_intent: Dict[str, str] = {}

    # 设置最大迭代次数（API 可覆盖）；None 表示不限制
    max_iterations_override = getattr(web_terminal, "max_iterations_override", None)
    max_iterations = max_iterations_override if max_iterations_override is not None else MAX_ITERATIONS_PER_TASK
    max_api_retries = 4
    retry_delay_seconds = 10


    iteration = 0
    while max_iterations is None or iteration < max_iterations:
        current_iteration = iteration + 1
        iteration += 1
        total_iterations += 1
        iteration_limit_label = max_iterations if max_iterations is not None else "∞"
        debug_log(f"\n--- 迭代 {current_iteration}/{iteration_limit_label} 开始 ---")

        # 检查是否超过总工具调用限制
        if MAX_TOTAL_TOOL_CALLS is not None and total_tool_calls >= MAX_TOTAL_TOOL_CALLS:
            debug_log(f"已达到最大工具调用次数限制 ({MAX_TOTAL_TOOL_CALLS})")
            sender('system_message', {
                'content': f'⚠️ 已达到最大工具调用次数限制 ({MAX_TOTAL_TOOL_CALLS})，任务结束。'
            })
            mark_force_thinking(web_terminal, reason="tool_limit")
            break

        apply_thinking_schedule(web_terminal)

        full_response = ""
        tool_calls = []
        current_thinking = ""
        detected_tools = {}
        last_usage_payload = None

        # 状态标志
        in_thinking = False
        thinking_started = False
        thinking_ended = False
        text_started = False
        text_has_content = False
        text_streaming = False
        text_chunk_index = 0
        last_text_chunk_time: Optional[float] = None

        # 计数器
        chunk_count = 0
        reasoning_chunks = 0
        content_chunks = 0
        tool_chunks = 0
        last_finish_reason = None

        thinking_expected = web_terminal.api_client.get_current_thinking_mode()
        debug_log(f"思考模式: {thinking_expected}")
        quota_allowed = True
        quota_info = {}
        if hasattr(web_terminal, "record_model_call"):
            quota_allowed, quota_info = web_terminal.record_model_call(bool(thinking_expected))
        if not quota_allowed:
            quota_type = 'thinking' if thinking_expected else 'fast'
            socketio.emit('quota_notice', {
                'type': quota_type,
                'reset_at': quota_info.get('reset_at'),
                'limit': quota_info.get('limit'),
                'count': quota_info.get('count')
            }, room=f"user_{getattr(web_terminal, 'username', '')}")
            sender('quota_exceeded', {
                'type': quota_type,
                'reset_at': quota_info.get('reset_at')
            })
            sender('error', {
                'message': "配额已达到上限，暂时无法继续调用模型。",
                'quota': quota_info
            })
            return

        tool_call_limit_label = MAX_TOTAL_TOOL_CALLS if MAX_TOTAL_TOOL_CALLS is not None else "∞"
        print(f"[API] 第{current_iteration}次调用 (总工具调用: {total_tool_calls}/{tool_call_limit_label})")

        stream_result = await run_streaming_attempts(
            web_terminal=web_terminal,
            messages=messages,
            tools=tools,
            sender=sender,
            client_sid=client_sid,
            username=username,
            conversation_id=conversation_id,
            current_iteration=current_iteration,
            max_api_retries=max_api_retries,
            retry_delay_seconds=retry_delay_seconds,
            detected_tool_intent=detected_tool_intent,
            full_response=full_response,
            tool_calls=tool_calls,
            current_thinking=current_thinking,
            detected_tools=detected_tools,
            last_usage_payload=last_usage_payload,
            in_thinking=in_thinking,
            thinking_started=thinking_started,
            thinking_ended=thinking_ended,
            text_started=text_started,
            text_has_content=text_has_content,
            text_streaming=text_streaming,
            text_chunk_index=text_chunk_index,
            last_text_chunk_time=last_text_chunk_time,
            chunk_count=chunk_count,
            reasoning_chunks=reasoning_chunks,
            content_chunks=content_chunks,
            tool_chunks=tool_chunks,
            last_finish_reason=last_finish_reason,
            accumulated_response=accumulated_response,
        )
        if stream_result.get("stopped"):
            return

        full_response = stream_result["full_response"]
        tool_calls = stream_result["tool_calls"]
        current_thinking = stream_result["current_thinking"]
        detected_tools = stream_result["detected_tools"]
        last_usage_payload = stream_result["last_usage_payload"]
        in_thinking = stream_result["in_thinking"]
        thinking_started = stream_result["thinking_started"]
        thinking_ended = stream_result["thinking_ended"]
        text_started = stream_result["text_started"]
        text_has_content = stream_result["text_has_content"]
        text_streaming = stream_result["text_streaming"]
        text_chunk_index = stream_result["text_chunk_index"]
        last_text_chunk_time = stream_result["last_text_chunk_time"]
        chunk_count = stream_result["chunk_count"]
        reasoning_chunks = stream_result["reasoning_chunks"]
        content_chunks = stream_result["content_chunks"]
        tool_chunks = stream_result["tool_chunks"]
        last_finish_reason = stream_result["last_finish_reason"]
        accumulated_response = stream_result["accumulated_response"]

        # 流结束后的处理
        debug_log(f"\n流结束统计:")
        debug_log(f"  总chunks: {chunk_count}")
        debug_log(f"  思考chunks: {reasoning_chunks}")
        debug_log(f"  内容chunks: {content_chunks}")
        debug_log(f"  工具chunks: {tool_chunks}")
        debug_log(f"  收集到的思考: {len(current_thinking)} 字符")
        debug_log(f"  收集到的正文: {len(full_response)} 字符")
        debug_log(f"  收集到的工具: {len(tool_calls)} 个")

        # 结束未完成的流
        if in_thinking and not thinking_ended:
            sender('thinking_end', {'full_content': current_thinking})
            await asyncio.sleep(0.1)


        # 确保text_end事件被发送
        if text_started and text_has_content:
            debug_log(f"发送text_end事件，完整内容长度: {len(full_response)}")
            sender('text_end', {'full_content': full_response})
            await asyncio.sleep(0.1)
            text_streaming = False

            if full_response.strip():
                debug_log(f"流式文本内容长度: {len(full_response)} 字符")

        if web_terminal.api_client.last_call_used_thinking and current_thinking:
            web_terminal.api_client.current_task_thinking = current_thinking or ""
        if web_terminal.api_client.current_task_first_call:
            web_terminal.api_client.current_task_first_call = False
        update_thinking_after_call(web_terminal)

        # 检测是否有格式错误的工具调用
        if not tool_calls and full_response and AUTO_FIX_TOOL_CALL:
            if detect_malformed_tool_call(full_response):
                auto_fix_attempts += 1

                if auto_fix_attempts <= AUTO_FIX_MAX_ATTEMPTS:
                    debug_log(f"检测到格式错误的工具调用，尝试自动修复 (尝试 {auto_fix_attempts}/{AUTO_FIX_MAX_ATTEMPTS})")

                    fix_message = "你使用了错误的格式输出工具调用。请使用正确的工具调用格式而不是直接输出JSON。根据当前进度继续执行任务。"

                    sender('system_message', {
                        'content': f'⚠️ 自动修复: {fix_message}'
                    })
                    maybe_mark_failure_from_message(web_terminal, f'⚠️ 自动修复: {fix_message}')

                    messages.append({
                        "role": "user",
                        "content": fix_message
                    })

                    await asyncio.sleep(1)
                    continue
                else:
                    debug_log(f"自动修复尝试已达上限 ({AUTO_FIX_MAX_ATTEMPTS})")
                    sender('system_message', {
                        'content': f'⌘ 工具调用格式错误，自动修复失败。请手动检查并重试。'
                    })
                    maybe_mark_failure_from_message(web_terminal, '⌘ 工具调用格式错误，自动修复失败。请手动检查并重试。')
                    break

        # 构建助手消息（用于API继续对话）
        assistant_content_parts = []

        if full_response:
            assistant_content_parts.append(full_response)

        assistant_content = "\n".join(assistant_content_parts) if assistant_content_parts else ""

        # 添加到消息历史（用于API继续对话，不保存到文件）
        assistant_message = {
            "role": "assistant",
            "content": assistant_content,
            "tool_calls": tool_calls
        }
        if current_thinking:
            assistant_message["reasoning_content"] = current_thinking

        messages.append(assistant_message)
        if assistant_content or current_thinking or tool_calls:
            web_terminal.context_manager.add_conversation(
                "assistant",
                assistant_content,
                tool_calls=tool_calls if tool_calls else None,
                reasoning_content=current_thinking or None
            )

        # 为下一轮迭代重置流状态标志，但保留 full_response 供上面保存使用
        text_streaming = False
        text_started = False
        text_has_content = False
        full_response = ""

        if not tool_calls:
            debug_log("没有工具调用，结束迭代")
            break

        # 检查连续相同工具调用
        for tc in tool_calls:
            tool_name = tc["function"]["name"]

            if tool_name == last_tool_name:
                consecutive_same_tool[tool_name] += 1

                if (
                    MAX_CONSECUTIVE_SAME_TOOL is not None
                    and consecutive_same_tool[tool_name] >= MAX_CONSECUTIVE_SAME_TOOL
                ):
                    debug_log(f"警告: 连续调用相同工具 {tool_name} 已达 {MAX_CONSECUTIVE_SAME_TOOL} 次")
                    sender('system_message', {
                        'content': f'⚠️ 检测到重复调用 {tool_name} 工具 {MAX_CONSECUTIVE_SAME_TOOL} 次，可能存在循环。'
                    })
                    maybe_mark_failure_from_message(web_terminal, f'⚠️ 检测到重复调用 {tool_name} 工具 {MAX_CONSECUTIVE_SAME_TOOL} 次，可能存在循环。')

                    if consecutive_same_tool[tool_name] >= MAX_CONSECUTIVE_SAME_TOOL + 2:
                        debug_log(f"终止: 工具 {tool_name} 调用次数过多")
                        sender('system_message', {
                            'content': f'⌘ 工具 {tool_name} 重复调用过多，任务终止。'
                        })
                        maybe_mark_failure_from_message(web_terminal, f'⌘ 工具 {tool_name} 重复调用过多，任务终止。')
                        break
            else:
                consecutive_same_tool.clear()
                consecutive_same_tool[tool_name] = 1

            last_tool_name = tool_name

        # 更新统计
        total_tool_calls += len(tool_calls)

        # 执行每个工具
        tool_loop_result = await execute_tool_calls(
            web_terminal=web_terminal,
            tool_calls=tool_calls,
            sender=sender,
            messages=messages,
            client_sid=client_sid,
            username=username,
            iteration=iteration,
            conversation_id=conversation_id,
            last_tool_call_time=last_tool_call_time,
            process_sub_agent_updates=process_sub_agent_updates,
            maybe_mark_failure_from_message=maybe_mark_failure_from_message,
            mark_force_thinking=mark_force_thinking,
            get_stop_flag=get_stop_flag,
            clear_stop_flag=clear_stop_flag,
        )
        last_tool_call_time = tool_loop_result.get("last_tool_call_time", last_tool_call_time)
        if tool_loop_result.get("stopped"):
            return

        # 标记不再是第一次迭代
        is_first_iteration = False


    # 最终统计
    debug_log(f"\n{'='*40}")
    debug_log(f"任务完成统计:")
    debug_log(f"  总迭代次数: {total_iterations}")
    debug_log(f"  总工具调用: {total_tool_calls}")
    debug_log(f"  自动修复尝试: {auto_fix_attempts}")
    debug_log(f"  累积响应: {len(accumulated_response)} 字符")
    debug_log(f"{'='*40}\n")

    # 检查是否有后台运行的子智能体或待通知的完成任务
    manager = getattr(web_terminal, "sub_agent_manager", None)
    has_running_sub_agents = False
    if manager:
        if not hasattr(web_terminal, "_announced_sub_agent_tasks"):
            web_terminal._announced_sub_agent_tasks = set()
        running_tasks = [
            task for task in manager.tasks.values()
            if task.get("status") not in TERMINAL_STATUSES.union({"terminated"})
            and task.get("run_in_background")
            and task.get("conversation_id") == conversation_id
        ]
        pending_notice_tasks = [
            task for task in manager.tasks.values()
            if task.get("status") in TERMINAL_STATUSES.union({"terminated"})
            and task.get("run_in_background")
            and task.get("conversation_id") == conversation_id
            and task.get("task_id") not in web_terminal._announced_sub_agent_tasks
        ]

        if running_tasks or pending_notice_tasks:
            has_running_sub_agents = True
            notify_tasks = running_tasks + pending_notice_tasks
            debug_log(f"[SubAgent] 后台子智能体等待: running={len(running_tasks)} pending_notice={len(pending_notice_tasks)}")
            # 先通知前端：有子智能体在运行/待通知，保持等待状态
            sender('sub_agent_waiting', {
                'count': len(notify_tasks),
                'tasks': [{'agent_id': t.get('agent_id'), 'summary': t.get('summary')} for t in notify_tasks]
            })

            # 启动后台任务来轮询/补发子智能体完成
            def run_poll():
                import asyncio
                loop = asyncio.new_event_loop()
                asyncio.set_event_loop(loop)
                try:
                    loop.run_until_complete(poll_sub_agent_completion(
                        web_terminal=web_terminal,
                        workspace=workspace,
                        conversation_id=conversation_id,
                        client_sid=client_sid,
                        username=username
                    ))
                finally:
                    loop.close()

            socketio.start_background_task(run_poll)

    # 发送完成事件（如果有子智能体在运行，前端会保持等待状态）
    sender('task_complete', {
        'total_iterations': total_iterations,
        'total_tool_calls': total_tool_calls,
        'auto_fix_attempts': auto_fix_attempts,
        'has_running_sub_agents': has_running_sub_agents
    })