agent-Specialization/server/chat_flow_task_main.py

from __future__ import annotations

import asyncio
import json
import time
import re
import zipfile
from collections import defaultdict, Counter, deque
from datetime import datetime, timedelta
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple

from werkzeug.utils import secure_filename

from config import (
    OUTPUT_FORMATS,
    AUTO_FIX_TOOL_CALL,
    AUTO_FIX_MAX_ATTEMPTS,
    MAX_ITERATIONS_PER_TASK,
    MAX_CONSECUTIVE_SAME_TOOL,
    MAX_TOTAL_TOOL_CALLS,
    TOOL_CALL_COOLDOWN,
    MAX_UPLOAD_SIZE,
    DEFAULT_CONVERSATIONS_LIMIT,
    MAX_CONVERSATIONS_LIMIT,
    CONVERSATIONS_DIR,
    DEFAULT_RESPONSE_MAX_TOKENS,
    DEFAULT_PROJECT_PATH,
    LOGS_DIR,
    AGENT_VERSION,
    THINKING_FAST_INTERVAL,
    PROJECT_MAX_STORAGE_MB,
    PROJECT_MAX_STORAGE_BYTES,
    UPLOAD_SCAN_LOG_SUBDIR,
)
from modules.personalization_manager import (
    load_personalization_config,
    save_personalization_config,
    THINKING_INTERVAL_MIN,
    THINKING_INTERVAL_MAX,
)
from modules.upload_security import UploadSecurityError
from modules.user_manager import UserWorkspace
from modules.usage_tracker import QUOTA_DEFAULTS
from core.web_terminal import WebTerminal
from utils.tool_result_formatter import format_tool_result_for_context
from utils.conversation_manager import ConversationManager
from config.model_profiles import get_model_context_window, get_model_profile

from .auth_helpers import api_login_required, resolve_admin_policy, get_current_user_record, get_current_username
from .context import with_terminal, get_gui_manager, get_upload_guard, build_upload_error_response, ensure_conversation_loaded, reset_system_state, get_user_resources, get_or_create_usage_tracker
from .utils_common import (
    build_review_lines,
    debug_log,
    log_backend_chunk,
    log_frontend_chunk,
    log_streaming_debug_entry,
    brief_log,
    DEBUG_LOG_FILE,
    CHUNK_BACKEND_LOG_FILE,
    CHUNK_FRONTEND_LOG_FILE,
    STREAMING_DEBUG_LOG_FILE,
)
from .security import rate_limited, format_tool_result_notice, compact_web_search_result, consume_socket_token, prune_socket_tokens, validate_csrf_request, requires_csrf_protection, get_csrf_token
from .monitor import cache_monitor_snapshot, get_cached_monitor_snapshot
from .extensions import socketio
from .state import (
    MONITOR_FILE_TOOLS,
    MONITOR_MEMORY_TOOLS,
    MONITOR_SNAPSHOT_CHAR_LIMIT,
    MONITOR_MEMORY_ENTRY_LIMIT,
    RATE_LIMIT_BUCKETS,
    FAILURE_TRACKERS,
    pending_socket_tokens,
    usage_trackers,
    MONITOR_SNAPSHOT_CACHE,
    MONITOR_SNAPSHOT_CACHE_LIMIT,
    PROJECT_STORAGE_CACHE,
    PROJECT_STORAGE_CACHE_TTL_SECONDS,
    RECENT_UPLOAD_EVENT_LIMIT,
    RECENT_UPLOAD_FEED_LIMIT,
    THINKING_FAILURE_KEYWORDS,
    TITLE_PROMPT_PATH,
    get_last_active_ts,
    user_manager,
    container_manager,
    custom_tool_registry,
    user_terminals,
    terminal_rooms,
    connection_users,
    stop_flags,
    get_stop_flag,
    set_stop_flag,
    clear_stop_flag,
)
from .chat_flow_helpers import (
    detect_malformed_tool_call as _detect_malformed_tool_call,
    detect_tool_failure,
    get_thinking_state,
    mark_force_thinking as _mark_force_thinking,
    mark_suppress_thinking,
    apply_thinking_schedule as _apply_thinking_schedule,
    update_thinking_after_call as _update_thinking_after_call,
    maybe_mark_failure_from_message as _maybe_mark_failure_from_message,
    generate_conversation_title_background as _generate_conversation_title_background,
)


from .chat_flow_runner_helpers import (
    extract_intent_from_partial,
    resolve_monitor_path,
    resolve_monitor_memory,
    capture_monitor_snapshot,
)


from .chat_flow_runtime import (
    generate_conversation_title_background,
    mark_force_thinking,
    apply_thinking_schedule,
    update_thinking_after_call,
    maybe_mark_failure_from_message,
    detect_malformed_tool_call,
)

from .chat_flow_pending_writes import finalize_pending_append, finalize_pending_modify
from .chat_flow_task_support import process_sub_agent_updates, wait_retry_delay, cancel_pending_tools
from .chat_flow_tool_loop import execute_tool_calls

async def handle_task_with_sender(terminal: WebTerminal, workspace: UserWorkspace, message, images, sender, client_sid, username: str, videos=None):
    """处理任务并发送消息 - 集成token统计版本"""
    web_terminal = terminal
    conversation_id = getattr(web_terminal.context_manager, "current_conversation_id", None)
    videos = videos or []
    raw_sender = sender

    def sender(event_type, data):
        """为关键事件补充会话标识，便于前端定位报错归属。"""
        if not isinstance(data, dict):
            raw_sender(event_type, data)
            return
        payload = data
        if event_type in {"error", "quota_exceeded", "task_stopped", "task_complete"}:
            payload = dict(data)
            current_conv = conversation_id or getattr(web_terminal.context_manager, "current_conversation_id", None)
            if current_conv:
                payload.setdefault("conversation_id", current_conv)
            task_id = getattr(web_terminal, "task_id", None) or client_sid
            if task_id:
                payload.setdefault("task_id", task_id)
            if client_sid:
                payload.setdefault("client_sid", client_sid)
        raw_sender(event_type, payload)

    # 如果是思考模式，重置状态
    if web_terminal.thinking_mode:
        web_terminal.api_client.start_new_task(force_deep=web_terminal.deep_thinking_mode)
        state = get_thinking_state(web_terminal)
        state["fast_streak"] = 0
        state["force_next"] = False
        state["suppress_next"] = False

    # 添加到对话历史
    history_len_before = len(getattr(web_terminal.context_manager, "conversation_history", []) or [])
    is_first_user_message = history_len_before == 0
    web_terminal.context_manager.add_conversation("user", message, images=images, videos=videos)

    if is_first_user_message and getattr(web_terminal, "context_manager", None):
        try:
            personal_config = load_personalization_config(workspace.data_dir)
        except Exception:
            personal_config = {}
        auto_title_enabled = personal_config.get("auto_generate_title", True)
        if auto_title_enabled:
            conv_id = getattr(web_terminal.context_manager, "current_conversation_id", None)
            socketio.start_background_task(
                generate_conversation_title_background,
                web_terminal,
                conv_id,
                message,
                username
            )

    # === 移除：不在这里计算输入token，改为在每次API调用前计算 ===

    # 构建上下文和消息（用于API调用）
    context = web_terminal.build_context()
    messages = web_terminal.build_messages(context, message)
    tools = web_terminal.define_tools()
    try:
        profile = get_model_profile(getattr(web_terminal, "model_key", None) or "kimi-k2.5")
        web_terminal.apply_model_profile(profile)
    except Exception as exc:
        debug_log(f"更新模型配置失败: {exc}")

    # === 上下文预算与安全校验（避免超出模型上下文） ===
    max_context_tokens = get_model_context_window(getattr(web_terminal, "model_key", None) or "kimi-k2.5")
    current_tokens = web_terminal.context_manager.get_current_context_tokens(conversation_id)
    # 提前同步给底层客户端，动态收缩 max_tokens
    web_terminal.api_client.update_context_budget(current_tokens, max_context_tokens)
    if max_context_tokens:
        if current_tokens >= max_context_tokens:
            err_msg = (
                f"当前对话上下文已达 {current_tokens} tokens，超过模型上限 "
                f"{max_context_tokens}，请先使用压缩功能或清理对话后再试。"
            )
            debug_log(err_msg)
            web_terminal.context_manager.add_conversation("system", err_msg)
            sender('error', {
                'message': err_msg,
                'status_code': 400,
                'error_type': 'context_overflow'
            })
            return
        usage_percent = (current_tokens / max_context_tokens) * 100
        warned = web_terminal.context_manager.conversation_metadata.get("context_warning_sent", False)
        if usage_percent >= 70 and not warned:
            warn_msg = (
                f"当前对话上下文约占 {usage_percent:.1f}%（{current_tokens}/{max_context_tokens}），"
                "建议使用压缩功能。"
            )
            web_terminal.context_manager.conversation_metadata["context_warning_sent"] = True
            web_terminal.context_manager.auto_save_conversation(force=True)
            sender('context_warning', {
                'title': '上下文过长',
                'message': warn_msg,
                'type': 'warning',
                'conversation_id': conversation_id
            })

    # 开始新的AI消息
    sender('ai_message_start', {})

    # 增量保存相关变量
    accumulated_response = ""   # 累积的响应内容
    is_first_iteration = True   # 是否是第一次迭代

    # 统计和限制变量
    total_iterations = 0
    total_tool_calls = 0
    consecutive_same_tool = defaultdict(int)
    last_tool_name = ""
    auto_fix_attempts = 0
    last_tool_call_time = 0
    detected_tool_intent: Dict[str, str] = {}

    # 设置最大迭代次数（API 可覆盖）；None 表示不限制
    max_iterations_override = getattr(web_terminal, "max_iterations_override", None)
    max_iterations = max_iterations_override if max_iterations_override is not None else MAX_ITERATIONS_PER_TASK
    max_api_retries = 4
    retry_delay_seconds = 10

    pending_append = None  # {"path": str, "tool_call_id": str, "buffer": str, ...}
    append_probe_buffer = ""
    pending_modify = None  # {"path": str, "tool_call_id": str, "buffer": str, ...}
    modify_probe_buffer = ""

    iteration = 0
    while max_iterations is None or iteration < max_iterations:
        current_iteration = iteration + 1
        iteration += 1
        total_iterations += 1
        iteration_limit_label = max_iterations if max_iterations is not None else "∞"
        debug_log(f"\n--- 迭代 {current_iteration}/{iteration_limit_label} 开始 ---")

        # 检查是否超过总工具调用限制
        if MAX_TOTAL_TOOL_CALLS is not None and total_tool_calls >= MAX_TOTAL_TOOL_CALLS:
            debug_log(f"已达到最大工具调用次数限制 ({MAX_TOTAL_TOOL_CALLS})")
            sender('system_message', {
                'content': f'⚠️ 已达到最大工具调用次数限制 ({MAX_TOTAL_TOOL_CALLS})，任务结束。'
            })
            mark_force_thinking(web_terminal, reason="tool_limit")
            break

        apply_thinking_schedule(web_terminal)

        full_response = ""
        tool_calls = []
        current_thinking = ""
        detected_tools = {}
        last_usage_payload = None

        # 状态标志
        in_thinking = False
        thinking_started = False
        thinking_ended = False
        text_started = False
        text_has_content = False
        text_streaming = False
        text_chunk_index = 0
        last_text_chunk_time: Optional[float] = None

        # 计数器
        chunk_count = 0
        reasoning_chunks = 0
        content_chunks = 0
        tool_chunks = 0
        append_break_triggered = False
        append_result = {"handled": False}
        modify_break_triggered = False
        modify_result = {"handled": False}
        last_finish_reason = None

        thinking_expected = web_terminal.api_client.get_current_thinking_mode()
        debug_log(f"思考模式: {thinking_expected}")
        quota_allowed = True
        quota_info = {}
        if hasattr(web_terminal, "record_model_call"):
            quota_allowed, quota_info = web_terminal.record_model_call(bool(thinking_expected))
        if not quota_allowed:
            quota_type = 'thinking' if thinking_expected else 'fast'
            socketio.emit('quota_notice', {
                'type': quota_type,
                'reset_at': quota_info.get('reset_at'),
                'limit': quota_info.get('limit'),
                'count': quota_info.get('count')
            }, room=f"user_{getattr(web_terminal, 'username', '')}")
            sender('quota_exceeded', {
                'type': quota_type,
                'reset_at': quota_info.get('reset_at')
            })
            sender('error', {
                'message': "配额已达到上限，暂时无法继续调用模型。",
                'quota': quota_info
            })
            return

        tool_call_limit_label = MAX_TOTAL_TOOL_CALLS if MAX_TOTAL_TOOL_CALLS is not None else "∞"
        print(f"[API] 第{current_iteration}次调用 (总工具调用: {total_tool_calls}/{tool_call_limit_label})")

        api_error = None
        for api_attempt in range(max_api_retries + 1):
            api_error = None
            if api_attempt > 0:
                full_response = ""
                tool_calls = []
                current_thinking = ""
                detected_tools = {}
                last_usage_payload = None
                in_thinking = False
                thinking_started = False
                thinking_ended = False
                text_started = False
                text_has_content = False
                text_streaming = False
                text_chunk_index = 0
                last_text_chunk_time = None
                chunk_count = 0
                reasoning_chunks = 0
                content_chunks = 0
                tool_chunks = 0
                append_break_triggered = False
                append_result = {"handled": False}
                modify_break_triggered = False
                modify_result = {"handled": False}
                last_finish_reason = None

            # 收集流式响应
            async for chunk in web_terminal.api_client.chat(messages, tools, stream=True):
                chunk_count += 1

                # 检查停止标志
                client_stop_info = get_stop_flag(client_sid, username)
                if client_stop_info:
                    stop_requested = client_stop_info.get('stop', False) if isinstance(client_stop_info, dict) else client_stop_info
                    if stop_requested:
                        debug_log(f"检测到停止请求，中断流处理")
                        if pending_append:
                            append_result, pending_append, append_probe_buffer = await finalize_pending_append(pending_append=pending_append, append_probe_buffer=append_probe_buffer, response_text=full_response, stream_completed=False, finish_reason="user_stop", web_terminal=web_terminal, sender=sender, debug_log=debug_log)
                        if pending_modify:
                            modify_result, pending_modify, modify_probe_buffer = await finalize_pending_modify(pending_modify=pending_modify, modify_probe_buffer=modify_probe_buffer, response_text=full_response, stream_completed=False, finish_reason="user_stop", web_terminal=web_terminal, sender=sender, debug_log=debug_log)
                        cancel_pending_tools(tool_calls_list=tool_calls, sender=sender, messages=messages)
                        sender('task_stopped', {
                            'message': '命令执行被用户取消',
                            'reason': 'user_stop'
                        })
                        clear_stop_flag(client_sid, username)
                        return

                if isinstance(chunk, dict) and chunk.get("error"):
                    api_error = chunk.get("error")
                    break

                # 先尝试记录 usage（有些平台会在最后一个 chunk 里携带 usage 但 choices 为空）
                usage_info = chunk.get("usage")
                if usage_info:
                    last_usage_payload = usage_info

                if "choices" not in chunk:
                    debug_log(f"Chunk {chunk_count}: 无choices字段")
                    continue
                if not chunk.get("choices"):
                    debug_log(f"Chunk {chunk_count}: choices为空列表")
                    continue
                choice = chunk["choices"][0]
                if not usage_info and isinstance(choice, dict) and choice.get("usage"):
                    # 兼容部分供应商将 usage 放在 choice 内的格式（例如部分 Kimi/Qwen 返回）
                    last_usage_payload = choice.get("usage")
                delta = choice.get("delta", {})
                finish_reason = choice.get("finish_reason")
                if finish_reason:
                    last_finish_reason = finish_reason

                # 处理思考内容（兼容 reasoning_content / reasoning_details）
                reasoning_content = ""
                if "reasoning_content" in delta:
                    reasoning_content = delta.get("reasoning_content") or ""
                elif "reasoning_details" in delta:
                    details = delta.get("reasoning_details")
                    if isinstance(details, list):
                        parts = []
                        for item in details:
                            if isinstance(item, dict):
                                text = item.get("text")
                                if text:
                                    parts.append(text)
                        if parts:
                            reasoning_content = "".join(parts)
                if reasoning_content:
                    reasoning_chunks += 1
                    debug_log(f"  思考内容 #{reasoning_chunks}: {len(reasoning_content)} 字符")

                    if not thinking_started:
                        in_thinking = True
                        thinking_started = True
                        sender('thinking_start', {})
                        await asyncio.sleep(0.05)

                    current_thinking += reasoning_content
                    sender('thinking_chunk', {'content': reasoning_content})

                # 处理正常内容
                if "content" in delta:
                    content = delta["content"]
                    if content:
                        content_chunks += 1
                        debug_log(f"  正式内容 #{content_chunks}: {repr(content[:100] if content else 'None')}")

                        if in_thinking and not thinking_ended:
                            in_thinking = False
                            thinking_ended = True
                            sender('thinking_end', {'full_content': current_thinking})
                            await asyncio.sleep(0.1)


                        expecting_modify = bool(pending_modify) or bool(getattr(web_terminal, "pending_modify_request", None))
                        expecting_append = bool(pending_append) or bool(getattr(web_terminal, "pending_append_request", None))

                        if pending_modify:
                            if not pending_modify.get("start_seen"):
                                probe_buffer = pending_modify.get("probe_buffer", "") + content
                                if len(probe_buffer) > 10000:
                                    probe_buffer = probe_buffer[-10000:]
                                marker = pending_modify.get("start_marker")
                                marker_index = probe_buffer.find(marker)
                                if marker_index == -1:
                                    pending_modify["probe_buffer"] = probe_buffer
                                    continue
                                after_marker = marker_index + len(marker)
                                remainder = probe_buffer[after_marker:]
                                pending_modify["buffer"] = remainder
                                pending_modify["raw_buffer"] = marker + remainder
                                pending_modify["start_seen"] = True
                                pending_modify["detected_blocks"] = set()
                                pending_modify["probe_buffer"] = ""
                                if pending_modify.get("display_id"):
                                    sender('update_action', {
                                        'id': pending_modify["display_id"],
                                        'status': 'running',
                                        'preparing_id': pending_modify.get("tool_call_id"),
                                        'message': f"正在修改 {pending_modify['path']}..."
                                    })
                            else:
                                pending_modify["buffer"] += content
                                pending_modify["raw_buffer"] += content

                            if pending_modify.get("start_seen"):
                                block_text = pending_modify["buffer"]
                                for match in re.finditer(r"\[replace:(\d+)\]", block_text):
                                    try:
                                        block_index = int(match.group(1))
                                    except ValueError:
                                        continue
                                    detected_blocks = pending_modify.setdefault("detected_blocks", set())
                                    if block_index not in detected_blocks:
                                        detected_blocks.add(block_index)
                                        if pending_modify.get("display_id"):
                                            sender('update_action', {
                                                'id': pending_modify["display_id"],
                                                'status': 'running',
                                                'preparing_id': pending_modify.get("tool_call_id"),
                                                'message': f"正在对 {pending_modify['path']} 进行第 {block_index} 处修改..."
                                            })

                            if pending_modify.get("start_seen"):
                                end_pos = pending_modify["buffer"].find(pending_modify["end_marker"])
                                if end_pos != -1:
                                    pending_modify["end_index"] = end_pos
                                    modify_break_triggered = True
                                    debug_log("检测到<<<END_MODIFY>>>，即将终止流式输出并应用修改")
                                    break
                            continue
                        elif expecting_modify:
                            modify_probe_buffer += content
                            if len(modify_probe_buffer) > 10000:
                                modify_probe_buffer = modify_probe_buffer[-10000:]

                            marker_match = re.search(r"<<<MODIFY:\s*([\s\S]*?)>>>", modify_probe_buffer)
                            if marker_match:
                                detected_raw_path = marker_match.group(1)
                                detected_path = detected_raw_path.strip()
                                marker_full = marker_match.group(0)
                                after_marker_index = modify_probe_buffer.find(marker_full) + len(marker_full)
                                remainder = modify_probe_buffer[after_marker_index:]
                                modify_probe_buffer = ""

                                if not detected_path:
                                    debug_log("检测到 MODIFY 起始标记但路径为空，忽略。")
                                    continue

                                pending_modify = {
                                    "path": detected_path,
                                    "tool_call_id": None,
                                    "buffer": remainder,
                                    "raw_buffer": marker_full + remainder,
                                    "start_marker": marker_full,
                                    "end_marker": "<<<END_MODIFY>>>",
                                    "start_seen": True,
                                    "end_index": None,
                                    "display_id": None,
                                    "detected_blocks": set()
                                }
                                if hasattr(web_terminal, "pending_modify_request"):
                                    web_terminal.pending_modify_request = {"path": detected_path}
                                debug_log(f"直接检测到modify起始标记，构建修改缓冲: {detected_path}")

                                end_pos = pending_modify["buffer"].find(pending_modify["end_marker"])
                                if end_pos != -1:
                                    pending_modify["end_index"] = end_pos
                                    modify_break_triggered = True
                                    debug_log("检测到<<<END_MODIFY>>>，即将终止流式输出并应用修改")
                                    break
                            continue

                        if pending_append:
                            pending_append["buffer"] += content

                            if pending_append.get("content_start") is None:
                                marker_index = pending_append["buffer"].find(pending_append["start_marker"])
                                if marker_index != -1:
                                    pending_append["content_start"] = marker_index + len(pending_append["start_marker"])
                                    debug_log(f"检测到追加起始标识: {pending_append['start_marker']}")

                            if pending_append.get("content_start") is not None:
                                end_index = pending_append["buffer"].find(
                                    pending_append["end_marker"],
                                    pending_append["content_start"]
                                )
                                if end_index != -1:
                                    pending_append["end_index"] = end_index
                                    append_break_triggered = True
                                    debug_log("检测到<<<END_APPEND>>>，即将终止流式输出并写入文件")
                                    break

                            # 继续累积追加内容
                            continue
                        elif expecting_append:
                            append_probe_buffer += content
                            # 限制缓冲区大小防止过长
                            if len(append_probe_buffer) > 10000:
                                append_probe_buffer = append_probe_buffer[-10000:]

                            marker_match = re.search(r"<<<APPEND:\s*([\s\S]*?)>>>", append_probe_buffer)
                            if marker_match:
                                detected_raw_path = marker_match.group(1)
                                detected_path = detected_raw_path.strip()
                                if not detected_path:
                                    append_probe_buffer = append_probe_buffer[marker_match.end():]
                                    continue
                                marker_full = marker_match.group(0)
                                after_marker_index = append_probe_buffer.find(marker_full) + len(marker_full)
                                remainder = append_probe_buffer[after_marker_index:]
                                append_probe_buffer = ""
                                pending_append = {
                                    "path": detected_path,
                                    "tool_call_id": None,
                                    "buffer": remainder,
                                    "start_marker": marker_full,
                                    "end_marker": "<<<END_APPEND>>>",
                                    "content_start": 0,
                                    "end_index": None,
                                    "display_id": None
                                }
                                if hasattr(web_terminal, "pending_append_request"):
                                    web_terminal.pending_append_request = {"path": detected_path}
                                debug_log(f"直接检测到append起始标记，构建追加缓冲: {detected_path}")
                                # 检查是否立即包含结束标记
                                if pending_append["buffer"]:
                                    end_index = pending_append["buffer"].find(pending_append["end_marker"], pending_append["content_start"])
                                    if end_index != -1:
                                        pending_append["end_index"] = end_index
                                        append_break_triggered = True
                                        debug_log("检测到<<<END_APPEND>>>，即将终止流式输出并写入文件")
                                        break
                                continue

                        if not text_started:
                            text_started = True
                            text_streaming = True
                            sender('text_start', {})
                            brief_log("模型输出了内容")
                            await asyncio.sleep(0.05)

                        if not pending_append:
                            full_response += content
                            accumulated_response += content
                            text_has_content = True
                            emit_time = time.time()
                            elapsed = 0.0 if last_text_chunk_time is None else emit_time - last_text_chunk_time
                            last_text_chunk_time = emit_time
                            text_chunk_index += 1
                            log_backend_chunk(
                                conversation_id,
                                current_iteration,
                                text_chunk_index,
                                elapsed,
                                len(content),
                                content[:32]
                            )
                            sender('text_chunk', {
                                'content': content,
                                'index': text_chunk_index,
                                'elapsed': elapsed
                            })

                # 收集工具调用 - 实时发送准备状态
                if "tool_calls" in delta:
                    tool_chunks += 1
                    for tc in delta["tool_calls"]:
                        found = False
                        for existing in tool_calls:
                            if existing.get("index") == tc.get("index"):
                                if "function" in tc and "arguments" in tc["function"]:
                                    arg_chunk = tc["function"]["arguments"]
                                    existing_fn = existing.get("function", {})
                                    existing_args = existing_fn.get("arguments", "")
                                    existing_fn["arguments"] = (existing_args or "") + arg_chunk
                                    existing["function"] = existing_fn

                                    combined_args = existing_fn.get("arguments", "")
                                    tool_id = existing.get("id") or tc.get("id")
                                    tool_name = (
                                        existing_fn.get("name")
                                        or tc.get("function", {}).get("name", "")
                                    )
                                    intent_value = extract_intent_from_partial(combined_args)
                                    if (
                                        intent_value
                                        and tool_id
                                        and detected_tool_intent.get(tool_id) != intent_value
                                    ):
                                        detected_tool_intent[tool_id] = intent_value
                                        brief_log(f"[intent] 增量提取 {tool_name}: {intent_value}")
                                        sender('tool_intent', {
                                            'id': tool_id,
                                            'name': tool_name,
                                            'intent': intent_value,
                                            'conversation_id': conversation_id
                                        })
                                        debug_log(f"    发送工具意图: {tool_name} -> {intent_value}")
                                        await asyncio.sleep(0.01)
                                found = True
                                break

                        if not found and tc.get("id"):
                            tool_id = tc["id"]
                            tool_name = tc.get("function", {}).get("name", "")
                            arguments_str = tc.get("function", {}).get("arguments", "") or ""

                            # 新工具检测到，立即发送准备事件
                            if tool_id not in detected_tools and tool_name:
                                detected_tools[tool_id] = tool_name

                                # 尝试提前提取 intent
                                intent_value = None
                                if arguments_str:
                                    intent_value = extract_intent_from_partial(arguments_str)
                                    if intent_value:
                                        detected_tool_intent[tool_id] = intent_value
                                        brief_log(f"[intent] 预提取 {tool_name}: {intent_value}")

                                # 立即发送工具准备中事件
                                brief_log(f"[tool] 准备调用 {tool_name} (id={tool_id}) intent={intent_value or '-'}")
                                sender('tool_preparing', {
                                    'id': tool_id,
                                    'name': tool_name,
                                    'message': f'准备调用 {tool_name}...',
                                    'intent': intent_value,
                                    'conversation_id': conversation_id
                                })
                                debug_log(f"    发送工具准备事件: {tool_name}")
                                await asyncio.sleep(0.1)

                            tool_calls.append({
                                "id": tool_id,
                                "index": tc.get("index"),
                                "type": "function",
                                "function": {
                                    "name": tool_name,
                                    "arguments": arguments_str
                                }
                            })
                            # 尝试从增量参数中抽取 intent，并单独推送
                            if tool_id and arguments_str:
                                intent_value = extract_intent_from_partial(arguments_str)
                                if intent_value and detected_tool_intent.get(tool_id) != intent_value:
                                    detected_tool_intent[tool_id] = intent_value
                                    sender('tool_intent', {
                                        'id': tool_id,
                                        'name': tool_name,
                                        'intent': intent_value,
                                        'conversation_id': conversation_id
                                    })
                                    debug_log(f"    发送工具意图: {tool_name} -> {intent_value}")
                                    await asyncio.sleep(0.01)
                            debug_log(f"    新工具: {tool_name}")

            # 检查是否被停止
            client_stop_info = get_stop_flag(client_sid, username)
            if client_stop_info:
                stop_requested = client_stop_info.get('stop', False) if isinstance(client_stop_info, dict) else client_stop_info
                if stop_requested:
                    debug_log("任务在流处理完成后检测到停止状态")
                    sender('task_stopped', {
                        'message': '命令执行被用户取消',
                        'reason': 'user_stop'
                    })
                    cancel_pending_tools(tool_calls_list=tool_calls, sender=sender, messages=messages)
                    clear_stop_flag(client_sid, username)
                    return

            # === API响应完成后只计算输出token ===
            if last_usage_payload:
                try:
                    web_terminal.context_manager.apply_usage_statistics(last_usage_payload)
                    debug_log(
                        f"Usage统计: prompt={last_usage_payload.get('prompt_tokens', 0)}, "
                        f"completion={last_usage_payload.get('completion_tokens', 0)}, "
                        f"total={last_usage_payload.get('total_tokens', 0)}"
                    )
                except Exception as e:
                    debug_log(f"Usage统计更新失败: {e}")
            else:
                debug_log("未获取到usage字段，跳过token统计更新")


            if api_error:
                try:
                    debug_log(f"API错误原始数据: {json.dumps(api_error, ensure_ascii=False)}")
                except Exception:
                    debug_log(f"API错误原始数据(不可序列化): {repr(api_error)}")
                error_message = ""
                error_status = None
                error_type = None
                error_code = None
                error_text = ""
                request_dump = None
                error_base_url = None
                error_model_id = None
                if isinstance(api_error, dict):
                    error_status = api_error.get("status_code")
                    error_type = api_error.get("error_type") or api_error.get("type")
                    error_code = api_error.get("error_code") or api_error.get("code")
                    error_text = api_error.get("error_text") or ""
                    error_message = (
                        api_error.get("error_message")
                        or api_error.get("message")
                        or error_text
                        or ""
                    )
                    request_dump = api_error.get("request_dump")
                    error_base_url = api_error.get("base_url")
                    error_model_id = api_error.get("model_id")
                elif isinstance(api_error, str):
                    error_message = api_error
                if not error_message:
                    if error_status:
                        error_message = f"API 请求失败（HTTP {error_status}）"
                    else:
                        error_message = "API 请求失败"
                # 若命中阿里云配额错误，立即写入状态并切换到官方 API
                try:
                    from utils.aliyun_fallback import compute_disabled_until, set_disabled_until
                    disabled_until, reason = compute_disabled_until(error_message)
                    if disabled_until and reason:
                        set_disabled_until(getattr(web_terminal, "model_key", None) or "kimi-k2.5", disabled_until, reason)
                        profile = get_model_profile(getattr(web_terminal, "model_key", None) or "kimi-k2.5")
                        web_terminal.apply_model_profile(profile)
                except Exception as exc:
                    debug_log(f"处理阿里云配额回退失败: {exc}")
                can_retry = (
                    api_attempt < max_api_retries
                    and not full_response
                    and not tool_calls
                    and not current_thinking
                    and not pending_append
                    and not pending_modify
                )
                sender('error', {
                    'message': error_message,
                    'status_code': error_status,
                    'error_type': error_type,
                    'error_code': error_code,
                    'error_text': error_text,
                    'request_dump': request_dump,
                    'base_url': error_base_url,
                    'model_id': error_model_id,
                    'retry': bool(can_retry),
                    'retry_in': retry_delay_seconds if can_retry else None,
                    'attempt': api_attempt + 1,
                    'max_attempts': max_api_retries + 1
                })
                if can_retry:
                    try:
                        profile = get_model_profile(getattr(web_terminal, "model_key", None) or "kimi-k2.5")
                        web_terminal.apply_model_profile(profile)
                    except Exception as exc:
                        debug_log(f"重试前更新模型配置失败: {exc}")
                    cancelled = await wait_retry_delay(delay_seconds=retry_delay_seconds, client_sid=client_sid, username=username, sender=sender, get_stop_flag=get_stop_flag, clear_stop_flag=clear_stop_flag)
                    if cancelled:
                        return
                    continue
                cancel_pending_tools(tool_calls_list=tool_calls, sender=sender, messages=messages)
                return
            break

        # 流结束后的处理
        debug_log(f"\n流结束统计:")
        debug_log(f"  总chunks: {chunk_count}")
        debug_log(f"  思考chunks: {reasoning_chunks}")
        debug_log(f"  内容chunks: {content_chunks}")
        debug_log(f"  工具chunks: {tool_chunks}")
        debug_log(f"  收集到的思考: {len(current_thinking)} 字符")
        debug_log(f"  收集到的正文: {len(full_response)} 字符")
        debug_log(f"  收集到的工具: {len(tool_calls)} 个")

        if not append_result["handled"] and pending_append:
            append_result, pending_append, append_probe_buffer = await finalize_pending_append(pending_append=pending_append, append_probe_buffer=append_probe_buffer, response_text=full_response, stream_completed=True, finish_reason=last_finish_reason, web_terminal=web_terminal, sender=sender, debug_log=debug_log)
        if not modify_result["handled"] and pending_modify:
            modify_result, pending_modify, modify_probe_buffer = await finalize_pending_modify(pending_modify=pending_modify, modify_probe_buffer=modify_probe_buffer, response_text=full_response, stream_completed=True, finish_reason=last_finish_reason, web_terminal=web_terminal, sender=sender, debug_log=debug_log)

        # 结束未完成的流
        if in_thinking and not thinking_ended:
            sender('thinking_end', {'full_content': current_thinking})
            await asyncio.sleep(0.1)


        # 确保text_end事件被发送
        if text_started and text_has_content and not append_result["handled"] and not modify_result["handled"]:
            debug_log(f"发送text_end事件，完整内容长度: {len(full_response)}")
            sender('text_end', {'full_content': full_response})
            await asyncio.sleep(0.1)
            text_streaming = False

            if full_response.strip():
                debug_log(f"流式文本内容长度: {len(full_response)} 字符")

        if append_result["handled"]:
            append_metadata = append_result.get("assistant_metadata")
            append_content_text = append_result.get("assistant_content")
            if append_content_text:
                web_terminal.context_manager.add_conversation(
                    "assistant",
                    append_content_text,
                    metadata=append_metadata
                )
                debug_log("💾 增量保存：追加正文快照")

                payload_info = append_metadata.get("append_payload") if append_metadata else {}
                sender('append_payload', {
                    'path': payload_info.get("path") or append_result.get("path"),
                    'forced': payload_info.get("forced", False),
                    'lines': payload_info.get("lines"),
                    'bytes': payload_info.get("bytes"),
                    'tool_call_id': payload_info.get("tool_call_id") or append_result.get("tool_call_id"),
                    'success': payload_info.get("success", append_result.get("success", False)),
                    'conversation_id': conversation_id
                })

            if append_result["tool_content"]:
                tool_call_id = append_result.get("tool_call_id") or f"append_{int(time.time() * 1000)}"
                system_notice = format_tool_result_notice("append_to_file", tool_call_id, append_result["tool_content"])
                web_terminal.context_manager.add_conversation("system", system_notice)
                append_result["tool_call_id"] = tool_call_id
                debug_log("💾 增量保存：append_to_file 工具结果（system 通知）")

            finish_reason = append_result.get("finish_reason")
            path_for_prompt = append_result.get("path")
            need_follow_prompt = (
                finish_reason == "length" or
                append_result.get("forced") or
                not append_result.get("success")
            )

            if need_follow_prompt and path_for_prompt:
                prompt_lines = [
                    f"append_to_file 在处理 {path_for_prompt} 时未完成，需要重新发起写入。"
                ]
                if finish_reason == "length":
                    prompt_lines.append(
                        "上一次输出达到系统单次输出上限，已写入的内容已保存。"
                    )
                if append_result.get("forced"):
                    prompt_lines.append(
                        "收到的内容缺少 <<<END_APPEND>>> 标记，系统依据流式结束位置落盘。"
                    )
                if not append_result.get("success"):
                    prompt_lines.append("系统未能识别有效的追加标记。")
                prompt_lines.append(
                    "请再次调用 append_to_file 工具获取新的写入窗口，并在工具调用的输出中遵循以下格式："
                )
                prompt_lines.append(f"<<<APPEND:{path_for_prompt}>>>")
                prompt_lines.append("...填写剩余正文，如内容已完成可留空...")
                prompt_lines.append("<<<END_APPEND>>>")
                prompt_lines.append("不要在普通回复中粘贴上述标记，必须通过 append_to_file 工具发送。")
                follow_prompt = "\n".join(prompt_lines)
                messages.append({
                    "role": "system",
                    "content": follow_prompt
                })
                web_terminal.context_manager.add_conversation("system", follow_prompt)
                debug_log("已注入追加任务提示")

        if append_result["handled"] and append_result.get("forced") and append_result.get("success"):
            mark_force_thinking(web_terminal, reason="append_forced_finish")
        if append_result["handled"] and not append_result.get("success"):
            sender('system_message', {
                'content': f'⚠️ 追加写入失败：{append_result.get("error")}'
            })
            maybe_mark_failure_from_message(web_terminal, f'⚠️ 追加写入失败：{append_result.get("error")}')
            mark_force_thinking(web_terminal, reason="append_failed")

        if modify_result["handled"]:
            modify_metadata = modify_result.get("assistant_metadata")
            modify_content_text = modify_result.get("assistant_content")
            if modify_content_text:
                web_terminal.context_manager.add_conversation(
                    "assistant",
                    modify_content_text,
                    metadata=modify_metadata
                )
                debug_log("💾 增量保存：修改正文快照")

                payload_info = modify_metadata.get("modify_payload") if modify_metadata else {}
                sender('modify_payload', {
                    'path': payload_info.get("path") or modify_result.get("path"),
                    'total': payload_info.get("total_blocks") or modify_result.get("total_blocks"),
                    'completed': payload_info.get("completed") or modify_result.get("completed_blocks"),
                    'failed': payload_info.get("failed") or modify_result.get("failed_blocks"),
                    'forced': payload_info.get("forced", modify_result.get("forced", False)),
                    'success': modify_result.get("success", False),
                    'conversation_id': conversation_id
                })

            if modify_result["tool_content"]:
                tool_call_id = modify_result.get("tool_call_id") or f"modify_{int(time.time() * 1000)}"
                system_notice = format_tool_result_notice("modify_file", tool_call_id, modify_result["tool_content"])
                web_terminal.context_manager.add_conversation("system", system_notice)
                modify_result["tool_call_id"] = tool_call_id
                debug_log("💾 增量保存：modify_file 工具结果（system 通知）")

            path_for_prompt = modify_result.get("path")
            failed_blocks = modify_result.get("failed_blocks") or []
            need_follow_prompt = modify_result.get("forced") or bool(failed_blocks)

            if need_follow_prompt and path_for_prompt:
                prompt_lines = [
                    f"modify_file 在处理 {path_for_prompt} 时未完成，需要重新发起补丁。"
                ]
                if modify_result.get("forced"):
                    prompt_lines.append(
                        "刚才的内容缺少 <<<END_MODIFY>>> 标记，系统仅应用了已识别的部分。"
                    )
                if failed_blocks:
                    failed_text = "、".join(str(idx) for idx in failed_blocks)
                    prompt_lines.append(f"以下补丁未成功：第 {failed_text} 处。")
                prompt_lines.append(
                    "请再次调用 modify_file 工具，并在新的工具调用中按以下模板提供完整补丁："
                )
                prompt_lines.append(f"<<<MODIFY:{path_for_prompt}>>>")
                prompt_lines.append("[replace:序号]")
                prompt_lines.append("<<OLD>>")
                prompt_lines.append("...原文（必须逐字匹配，包含全部缩进、空格和换行）...")
                prompt_lines.append("<<END>>")
                prompt_lines.append("<<NEW>>")
                prompt_lines.append("...新内容，可留空表示清空，注意保持结构完整...")
                prompt_lines.append("<<END>>")
                prompt_lines.append("[/replace]")
                prompt_lines.append("<<<END_MODIFY>>>")
                prompt_lines.append("请勿在普通回复中直接粘贴补丁，必须通过 modify_file 工具发送。")
                follow_prompt = "\n".join(prompt_lines)
                messages.append({
                    "role": "system",
                    "content": follow_prompt
                })
                web_terminal.context_manager.add_conversation("system", follow_prompt)
                debug_log("已注入修改任务提示")

            if modify_result["handled"] and modify_result.get("failed_blocks"):
                mark_force_thinking(web_terminal, reason="modify_partial_failure")
            if modify_result["handled"] and modify_result.get("forced") and modify_result.get("success"):
                mark_force_thinking(web_terminal, reason="modify_forced_finish")
            if modify_result["handled"] and not modify_result.get("success"):
                error_message = modify_result.get("summary_message") or modify_result.get("error") or "修改操作未成功，请根据提示重新执行。"
                sender('system_message', {
                    'content': f'⚠️ 修改操作存在未完成的内容：{error_message}'
                })
                maybe_mark_failure_from_message(web_terminal, f'⚠️ 修改操作存在未完成的内容：{error_message}')
                mark_force_thinking(web_terminal, reason="modify_failed")

        if web_terminal.api_client.last_call_used_thinking and current_thinking:
            web_terminal.api_client.current_task_thinking = current_thinking or ""
        if web_terminal.api_client.current_task_first_call:
            web_terminal.api_client.current_task_first_call = False
        update_thinking_after_call(web_terminal)

        # 检测是否有格式错误的工具调用
        if not tool_calls and full_response and AUTO_FIX_TOOL_CALL and not append_result["handled"] and not modify_result["handled"]:
            if detect_malformed_tool_call(full_response):
                auto_fix_attempts += 1

                if auto_fix_attempts <= AUTO_FIX_MAX_ATTEMPTS:
                    debug_log(f"检测到格式错误的工具调用，尝试自动修复 (尝试 {auto_fix_attempts}/{AUTO_FIX_MAX_ATTEMPTS})")

                    fix_message = "你使用了错误的格式输出工具调用。请使用正确的工具调用格式而不是直接输出JSON。根据当前进度继续执行任务。"

                    sender('system_message', {
                        'content': f'⚠️ 自动修复: {fix_message}'
                    })
                    maybe_mark_failure_from_message(web_terminal, f'⚠️ 自动修复: {fix_message}')

                    messages.append({
                        "role": "user",
                        "content": fix_message
                    })

                    await asyncio.sleep(1)
                    continue
                else:
                    debug_log(f"自动修复尝试已达上限 ({AUTO_FIX_MAX_ATTEMPTS})")
                    sender('system_message', {
                        'content': f'⌘ 工具调用格式错误，自动修复失败。请手动检查并重试。'
                    })
                    maybe_mark_failure_from_message(web_terminal, '⌘ 工具调用格式错误，自动修复失败。请手动检查并重试。')
                    break

        # 构建助手消息（用于API继续对话）
        assistant_content_parts = []

        if full_response:
            assistant_content_parts.append(full_response)
        elif append_result["handled"] and append_result["assistant_content"]:
            assistant_content_parts.append(append_result["assistant_content"])
        elif modify_result["handled"] and modify_result.get("assistant_content"):
            assistant_content_parts.append(modify_result["assistant_content"])

        assistant_content = "\n".join(assistant_content_parts) if assistant_content_parts else ""

        # 添加到消息历史（用于API继续对话，不保存到文件）
        assistant_message = {
            "role": "assistant",
            "content": assistant_content,
            "tool_calls": tool_calls
        }
        if current_thinking:
            assistant_message["reasoning_content"] = current_thinking

        messages.append(assistant_message)
        if assistant_content or current_thinking or tool_calls:
            web_terminal.context_manager.add_conversation(
                "assistant",
                assistant_content,
                tool_calls=tool_calls if tool_calls else None,
                reasoning_content=current_thinking or None
            )

        # 为下一轮迭代重置流状态标志，但保留 full_response 供上面保存使用
        text_streaming = False
        text_started = False
        text_has_content = False
        full_response = ""

        if append_result["handled"] and append_result.get("tool_content"):
            tool_call_id = append_result.get("tool_call_id") or f"append_{int(time.time() * 1000)}"
            system_notice = format_tool_result_notice("append_to_file", tool_call_id, append_result["tool_content"])
            messages.append({
                "role": "system",
                "content": system_notice
            })
            append_result["tool_call_id"] = tool_call_id
            debug_log("已将 append_to_file 工具结果以 system 形式追加到对话上下文")
        if modify_result["handled"] and modify_result.get("tool_content"):
            tool_call_id = modify_result.get("tool_call_id") or f"modify_{int(time.time() * 1000)}"
            system_notice = format_tool_result_notice("modify_file", tool_call_id, modify_result["tool_content"])
            messages.append({
                "role": "system",
                "content": system_notice
            })
            modify_result["tool_call_id"] = tool_call_id
            debug_log("已将 modify_file 工具结果以 system 形式追加到对话上下文")

        force_continue = append_result["handled"] or modify_result["handled"]
        if force_continue:
            if append_result["handled"]:
                debug_log("append_to_file 已处理，继续下一轮以让模型返回确认回复")
            elif modify_result["handled"]:
                debug_log("modify_file 已处理，继续下一轮以让模型返回确认回复")
            else:
                debug_log("补丁处理完成，继续下一轮以获取模型回复")
            continue

        if not tool_calls:
            debug_log("没有工具调用，结束迭代")
            break

        # 检查连续相同工具调用
        for tc in tool_calls:
            tool_name = tc["function"]["name"]

            if tool_name == last_tool_name:
                consecutive_same_tool[tool_name] += 1

                if (
                    MAX_CONSECUTIVE_SAME_TOOL is not None
                    and consecutive_same_tool[tool_name] >= MAX_CONSECUTIVE_SAME_TOOL
                ):
                    debug_log(f"警告: 连续调用相同工具 {tool_name} 已达 {MAX_CONSECUTIVE_SAME_TOOL} 次")
                    sender('system_message', {
                        'content': f'⚠️ 检测到重复调用 {tool_name} 工具 {MAX_CONSECUTIVE_SAME_TOOL} 次，可能存在循环。'
                    })
                    maybe_mark_failure_from_message(web_terminal, f'⚠️ 检测到重复调用 {tool_name} 工具 {MAX_CONSECUTIVE_SAME_TOOL} 次，可能存在循环。')

                    if consecutive_same_tool[tool_name] >= MAX_CONSECUTIVE_SAME_TOOL + 2:
                        debug_log(f"终止: 工具 {tool_name} 调用次数过多")
                        sender('system_message', {
                            'content': f'⌘ 工具 {tool_name} 重复调用过多，任务终止。'
                        })
                        maybe_mark_failure_from_message(web_terminal, f'⌘ 工具 {tool_name} 重复调用过多，任务终止。')
                        break
            else:
                consecutive_same_tool.clear()
                consecutive_same_tool[tool_name] = 1

            last_tool_name = tool_name

        # 更新统计
        total_tool_calls += len(tool_calls)

        # 执行每个工具
        tool_loop_result = await execute_tool_calls(
            web_terminal=web_terminal,
            tool_calls=tool_calls,
            sender=sender,
            messages=messages,
            client_sid=client_sid,
            username=username,
            iteration=iteration,
            conversation_id=conversation_id,
            last_tool_call_time=last_tool_call_time,
            process_sub_agent_updates=process_sub_agent_updates,
            maybe_mark_failure_from_message=maybe_mark_failure_from_message,
            mark_force_thinking=mark_force_thinking,
            get_stop_flag=get_stop_flag,
            clear_stop_flag=clear_stop_flag,
        )
        last_tool_call_time = tool_loop_result.get("last_tool_call_time", last_tool_call_time)
        if tool_loop_result.get("stopped"):
            return

        # 标记不再是第一次迭代
        is_first_iteration = False


    # 最终统计
    debug_log(f"\n{'='*40}")
    debug_log(f"任务完成统计:")
    debug_log(f"  总迭代次数: {total_iterations}")
    debug_log(f"  总工具调用: {total_tool_calls}")
    debug_log(f"  自动修复尝试: {auto_fix_attempts}")
    debug_log(f"  累积响应: {len(accumulated_response)} 字符")
    debug_log(f"{'='*40}\n")

    # 发送完成事件
    sender('task_complete', {
        'total_iterations': total_iterations,
        'total_tool_calls': total_tool_calls,
        'auto_fix_attempts': auto_fix_attempts
    })