922 lines
40 KiB
Python
922 lines
40 KiB
Python
from __future__ import annotations
|
||
|
||
import asyncio
|
||
import json
|
||
import time
|
||
import re
|
||
import zipfile
|
||
from collections import defaultdict, Counter, deque
|
||
from datetime import datetime, timedelta
|
||
from pathlib import Path
|
||
from typing import Any, Dict, List, Optional, Tuple
|
||
|
||
from werkzeug.utils import secure_filename
|
||
|
||
from config import (
|
||
OUTPUT_FORMATS,
|
||
AUTO_FIX_TOOL_CALL,
|
||
AUTO_FIX_MAX_ATTEMPTS,
|
||
MAX_ITERATIONS_PER_TASK,
|
||
MAX_CONSECUTIVE_SAME_TOOL,
|
||
MAX_TOTAL_TOOL_CALLS,
|
||
TOOL_CALL_COOLDOWN,
|
||
MAX_UPLOAD_SIZE,
|
||
DEFAULT_CONVERSATIONS_LIMIT,
|
||
MAX_CONVERSATIONS_LIMIT,
|
||
CONVERSATIONS_DIR,
|
||
DEFAULT_RESPONSE_MAX_TOKENS,
|
||
DEFAULT_PROJECT_PATH,
|
||
LOGS_DIR,
|
||
AGENT_VERSION,
|
||
THINKING_FAST_INTERVAL,
|
||
PROJECT_MAX_STORAGE_MB,
|
||
PROJECT_MAX_STORAGE_BYTES,
|
||
UPLOAD_SCAN_LOG_SUBDIR,
|
||
)
|
||
from modules.personalization_manager import (
|
||
load_personalization_config,
|
||
save_personalization_config,
|
||
THINKING_INTERVAL_MIN,
|
||
THINKING_INTERVAL_MAX,
|
||
)
|
||
from modules.skill_hint_manager import SkillHintManager
|
||
from modules.upload_security import UploadSecurityError
|
||
from modules.user_manager import UserWorkspace
|
||
from modules.usage_tracker import QUOTA_DEFAULTS
|
||
from modules.sub_agent_manager import TERMINAL_STATUSES
|
||
from core.web_terminal import WebTerminal
|
||
from utils.tool_result_formatter import format_tool_result_for_context
|
||
from utils.conversation_manager import ConversationManager
|
||
from config.model_profiles import get_model_context_window, get_model_profile
|
||
|
||
from .auth_helpers import api_login_required, resolve_admin_policy, get_current_user_record, get_current_username
|
||
from .context import with_terminal, get_gui_manager, get_upload_guard, build_upload_error_response, ensure_conversation_loaded, reset_system_state, get_user_resources, get_or_create_usage_tracker
|
||
from .utils_common import (
|
||
build_review_lines,
|
||
debug_log,
|
||
log_backend_chunk,
|
||
log_frontend_chunk,
|
||
log_streaming_debug_entry,
|
||
brief_log,
|
||
DEBUG_LOG_FILE,
|
||
CHUNK_BACKEND_LOG_FILE,
|
||
CHUNK_FRONTEND_LOG_FILE,
|
||
STREAMING_DEBUG_LOG_FILE,
|
||
)
|
||
from .security import rate_limited, compact_web_search_result, consume_socket_token, prune_socket_tokens, validate_csrf_request, requires_csrf_protection, get_csrf_token
|
||
from .monitor import cache_monitor_snapshot, get_cached_monitor_snapshot
|
||
from .extensions import socketio
|
||
from .state import (
|
||
MONITOR_FILE_TOOLS,
|
||
MONITOR_MEMORY_TOOLS,
|
||
MONITOR_SNAPSHOT_CHAR_LIMIT,
|
||
MONITOR_MEMORY_ENTRY_LIMIT,
|
||
RATE_LIMIT_BUCKETS,
|
||
FAILURE_TRACKERS,
|
||
pending_socket_tokens,
|
||
usage_trackers,
|
||
MONITOR_SNAPSHOT_CACHE,
|
||
MONITOR_SNAPSHOT_CACHE_LIMIT,
|
||
PROJECT_STORAGE_CACHE,
|
||
PROJECT_STORAGE_CACHE_TTL_SECONDS,
|
||
RECENT_UPLOAD_EVENT_LIMIT,
|
||
RECENT_UPLOAD_FEED_LIMIT,
|
||
THINKING_FAILURE_KEYWORDS,
|
||
TITLE_PROMPT_PATH,
|
||
get_last_active_ts,
|
||
user_manager,
|
||
container_manager,
|
||
custom_tool_registry,
|
||
user_terminals,
|
||
terminal_rooms,
|
||
connection_users,
|
||
stop_flags,
|
||
active_polling_tasks,
|
||
get_stop_flag,
|
||
set_stop_flag,
|
||
clear_stop_flag,
|
||
)
|
||
from .chat_flow_helpers import (
|
||
detect_malformed_tool_call as _detect_malformed_tool_call,
|
||
detect_tool_failure,
|
||
get_thinking_state,
|
||
mark_force_thinking as _mark_force_thinking,
|
||
mark_suppress_thinking,
|
||
apply_thinking_schedule as _apply_thinking_schedule,
|
||
update_thinking_after_call as _update_thinking_after_call,
|
||
maybe_mark_failure_from_message as _maybe_mark_failure_from_message,
|
||
generate_conversation_title_background as _generate_conversation_title_background,
|
||
)
|
||
|
||
|
||
from .chat_flow_runner_helpers import (
|
||
extract_intent_from_partial,
|
||
resolve_monitor_path,
|
||
resolve_monitor_memory,
|
||
capture_monitor_snapshot,
|
||
)
|
||
|
||
|
||
from .chat_flow_runtime import (
|
||
generate_conversation_title_background,
|
||
mark_force_thinking,
|
||
apply_thinking_schedule,
|
||
update_thinking_after_call,
|
||
maybe_mark_failure_from_message,
|
||
detect_malformed_tool_call,
|
||
)
|
||
|
||
from .chat_flow_task_support import process_sub_agent_updates
|
||
from .chat_flow_tool_loop import execute_tool_calls
|
||
from .chat_flow_stream_loop import run_streaming_attempts
|
||
|
||
async def poll_sub_agent_completion(*, web_terminal, workspace, conversation_id, client_sid, username):
|
||
"""后台轮询子智能体完成状态,完成后触发新一轮对话"""
|
||
from .extensions import socketio
|
||
|
||
manager = getattr(web_terminal, "sub_agent_manager", None)
|
||
if not manager:
|
||
debug_log("[SubAgent] poll_sub_agent_completion: manager 不存在")
|
||
return
|
||
if not hasattr(web_terminal, "_announced_sub_agent_tasks"):
|
||
web_terminal._announced_sub_agent_tasks = set()
|
||
|
||
max_wait_time = 3600 # 最多等待1小时
|
||
start_wait = time.time()
|
||
|
||
debug_log(f"[SubAgent] 开始后台轮询,conversation_id={conversation_id}, username={username}")
|
||
|
||
# 创建 sender 函数,用于发送 socket 事件
|
||
def sender(event_type, data):
|
||
try:
|
||
socketio.emit(event_type, data, room=f"user_{username}")
|
||
debug_log(f"[SubAgent] 发送事件: {event_type}")
|
||
except Exception as e:
|
||
debug_log(f"[SubAgent] 发送事件失败: {event_type}, 错误: {e}")
|
||
|
||
while (time.time() - start_wait) < max_wait_time:
|
||
debug_log(f"[SubAgent] 轮询检查...")
|
||
|
||
# 检查停止标志
|
||
client_stop_info = get_stop_flag(client_sid, username)
|
||
if client_stop_info:
|
||
stop_requested = client_stop_info.get('stop', False) if isinstance(client_stop_info, dict) else client_stop_info
|
||
if stop_requested:
|
||
debug_log("[SubAgent] 用户请求停止,终止轮询")
|
||
break
|
||
|
||
# 若主对话仍在工具循环中,暂不消费完成事件,避免抢占 system 消息插入
|
||
if getattr(web_terminal, "_tool_loop_active", False):
|
||
debug_log("[SubAgent] 主对话工具循环中,延迟后台轮询发送 user 消息")
|
||
await asyncio.sleep(1)
|
||
continue
|
||
|
||
updates = manager.poll_updates()
|
||
debug_log(f"[SubAgent] poll_updates 返回 {len(updates)} 个更新")
|
||
|
||
for update in updates:
|
||
agent_id = update.get("agent_id")
|
||
summary = update.get("summary")
|
||
result_summary = update.get("result_summary") or update.get("message", "")
|
||
deliverables_dir = update.get("deliverables_dir", "")
|
||
status = update.get("status")
|
||
task_id = update.get("task_id")
|
||
task_info = manager.tasks.get(task_id) if task_id else None
|
||
task_conv_id = task_info.get("conversation_id") if isinstance(task_info, dict) else None
|
||
if task_conv_id and task_conv_id != conversation_id:
|
||
debug_log(f"[SubAgent] 跳过非当前对话任务: task={task_id} conv={task_conv_id} current={conversation_id}")
|
||
continue
|
||
if task_id and task_info is None:
|
||
debug_log(f"[SubAgent] 找不到任务详情,跳过: task={task_id}")
|
||
continue
|
||
if status == "terminated" or (isinstance(task_info, dict) and task_info.get("notified")):
|
||
debug_log(f"[SubAgent] 跳过已终止/已通知任务: task={task_id} status={status}")
|
||
continue
|
||
|
||
debug_log(f"[SubAgent] 子智能体{agent_id}完成,状态: {status}")
|
||
|
||
# 构建 user 消息(后台完成时才发送)
|
||
prefix = "这是一句系统自动发送的user消息,用于通知你子智能体已经运行完成"
|
||
runtime_line = ""
|
||
elapsed_seconds = update.get("runtime_seconds")
|
||
if elapsed_seconds is None:
|
||
elapsed_seconds = update.get("elapsed_seconds")
|
||
if status == "completed" and isinstance(elapsed_seconds, (int, float)):
|
||
runtime_line = f"\n\n运行了{int(round(elapsed_seconds))}秒"
|
||
user_message = f"""{prefix}
|
||
|
||
子智能体{agent_id} ({summary}) 已完成任务。
|
||
|
||
{result_summary}
|
||
{runtime_line}
|
||
|
||
交付目录:{deliverables_dir}"""
|
||
|
||
debug_log(f"[SubAgent] 准备发送 user_message: {user_message[:100]}...")
|
||
|
||
has_remaining = False
|
||
remaining_count = 0
|
||
try:
|
||
if task_id:
|
||
web_terminal._announced_sub_agent_tasks.add(task_id)
|
||
if isinstance(task_info, dict):
|
||
task_info["notified"] = True
|
||
task_info["updated_at"] = time.time()
|
||
try:
|
||
manager._save_state()
|
||
except Exception as exc:
|
||
debug_log(f"[SubAgent] 保存通知状态失败: {exc}")
|
||
|
||
# 计算剩余子智能体状态(用于前端清理等待标记)
|
||
if not hasattr(web_terminal, "_announced_sub_agent_tasks"):
|
||
web_terminal._announced_sub_agent_tasks = set()
|
||
announced = web_terminal._announced_sub_agent_tasks
|
||
running_tasks = [
|
||
task for task in manager.tasks.values()
|
||
if isinstance(task, dict)
|
||
and task.get("status") not in TERMINAL_STATUSES.union({"terminated"})
|
||
and task.get("run_in_background")
|
||
and task.get("conversation_id") == conversation_id
|
||
]
|
||
pending_notice_tasks = [
|
||
task for task in manager.tasks.values()
|
||
if isinstance(task, dict)
|
||
and task.get("status") in TERMINAL_STATUSES.union({"terminated"})
|
||
and task.get("run_in_background")
|
||
and task.get("conversation_id") == conversation_id
|
||
and task.get("task_id") not in announced
|
||
and not task.get("notified")
|
||
]
|
||
remaining_count = len(running_tasks) + len(pending_notice_tasks)
|
||
has_remaining = remaining_count > 0
|
||
|
||
# 注册为后台任务,确保刷新后可恢复轮询
|
||
from .tasks import task_manager
|
||
workspace_id = getattr(workspace, "workspace_id", None) or "default"
|
||
session_data = {
|
||
"username": username,
|
||
"role": getattr(web_terminal, "user_role", "user"),
|
||
"is_api_user": getattr(web_terminal, "user_role", "") == "api",
|
||
"workspace_id": workspace_id,
|
||
"run_mode": getattr(web_terminal, "run_mode", None),
|
||
"thinking_mode": getattr(web_terminal, "thinking_mode", None),
|
||
"model_key": getattr(web_terminal, "model_key", None),
|
||
}
|
||
rec = task_manager.create_chat_task(
|
||
username,
|
||
workspace_id,
|
||
user_message,
|
||
[],
|
||
conversation_id,
|
||
model_key=session_data.get("model_key"),
|
||
thinking_mode=session_data.get("thinking_mode"),
|
||
run_mode=session_data.get("run_mode"),
|
||
session_data=session_data,
|
||
)
|
||
debug_log(f"[SubAgent] 已创建后台任务: task_id={rec.task_id}")
|
||
sender('user_message', {
|
||
'message': user_message,
|
||
'conversation_id': conversation_id,
|
||
'task_id': rec.task_id,
|
||
'sub_agent_notice': True,
|
||
'has_running_sub_agents': has_remaining,
|
||
'remaining_count': remaining_count
|
||
})
|
||
except Exception as e:
|
||
debug_log(f"[SubAgent] 创建后台任务失败,回退直接执行: {e}")
|
||
sender('user_message', {
|
||
'message': user_message,
|
||
'conversation_id': conversation_id,
|
||
'sub_agent_notice': True,
|
||
'has_running_sub_agents': has_remaining,
|
||
'remaining_count': remaining_count
|
||
})
|
||
try:
|
||
task = asyncio.create_task(handle_task_with_sender(
|
||
terminal=web_terminal,
|
||
workspace=workspace,
|
||
message=user_message,
|
||
images=[],
|
||
sender=sender,
|
||
client_sid=client_sid,
|
||
username=username,
|
||
videos=[]
|
||
))
|
||
await task
|
||
debug_log(f"[SubAgent] process_message_task 调用成功")
|
||
except Exception as inner_exc:
|
||
debug_log(f"[SubAgent] process_message_task 失败: {inner_exc}")
|
||
import traceback
|
||
debug_log(f"[SubAgent] 错误堆栈: {traceback.format_exc()}")
|
||
|
||
return # 只处理第一个完成的子智能体
|
||
|
||
# 检查是否还有运行中的任务
|
||
running_tasks = [
|
||
task for task in manager.tasks.values()
|
||
if task.get("status") not in {"completed", "failed", "timeout", "terminated"}
|
||
and task.get("run_in_background")
|
||
and task.get("conversation_id") == conversation_id
|
||
]
|
||
|
||
debug_log(f"[SubAgent] 当前还有 {len(running_tasks)} 个运行中的任务")
|
||
|
||
if not running_tasks:
|
||
debug_log("[SubAgent] 所有子智能体已完成")
|
||
# 若状态已提前被更新为终态(poll_updates 返回空),补发完成提示
|
||
completed_tasks = [
|
||
task for task in manager.tasks.values()
|
||
if task.get("status") in {"completed", "failed", "timeout"}
|
||
and task.get("run_in_background")
|
||
and task.get("conversation_id") == conversation_id
|
||
and not task.get("notified")
|
||
]
|
||
if completed_tasks:
|
||
completed_tasks.sort(
|
||
key=lambda item: item.get("updated_at") or item.get("created_at") or 0,
|
||
reverse=True
|
||
)
|
||
task = completed_tasks[0]
|
||
agent_id = task.get("agent_id")
|
||
summary = task.get("summary") or ""
|
||
final_result = task.get("final_result") or {}
|
||
result_summary = (
|
||
final_result.get("message")
|
||
or final_result.get("result_summary")
|
||
or final_result.get("system_message")
|
||
or ""
|
||
)
|
||
deliverables_dir = final_result.get("deliverables_dir") or task.get("deliverables_dir") or ""
|
||
status = final_result.get("status") or task.get("status")
|
||
debug_log(f"[SubAgent] 补发完成提示: task={task.get('task_id')} status={status}")
|
||
|
||
user_message = f"""子智能体{agent_id} ({summary}) 已完成任务。
|
||
|
||
{result_summary}
|
||
|
||
交付目录:{deliverables_dir}"""
|
||
|
||
try:
|
||
task_id = task.get("task_id")
|
||
if task_id:
|
||
web_terminal._announced_sub_agent_tasks.add(task_id)
|
||
if isinstance(task, dict):
|
||
task["notified"] = True
|
||
task["updated_at"] = time.time()
|
||
try:
|
||
manager._save_state()
|
||
except Exception as exc:
|
||
debug_log(f"[SubAgent] 保存通知状态失败: {exc}")
|
||
sender('user_message', {
|
||
'message': user_message,
|
||
'conversation_id': conversation_id
|
||
})
|
||
from .tasks import task_manager
|
||
workspace_id = getattr(workspace, "workspace_id", None) or "default"
|
||
session_data = {
|
||
"username": username,
|
||
"role": getattr(web_terminal, "user_role", "user"),
|
||
"is_api_user": getattr(web_terminal, "user_role", "") == "api",
|
||
"workspace_id": workspace_id,
|
||
"run_mode": getattr(web_terminal, "run_mode", None),
|
||
"thinking_mode": getattr(web_terminal, "thinking_mode", None),
|
||
"model_key": getattr(web_terminal, "model_key", None),
|
||
}
|
||
rec = task_manager.create_chat_task(
|
||
username,
|
||
workspace_id,
|
||
user_message,
|
||
[],
|
||
conversation_id,
|
||
model_key=session_data.get("model_key"),
|
||
thinking_mode=session_data.get("thinking_mode"),
|
||
run_mode=session_data.get("run_mode"),
|
||
session_data=session_data,
|
||
)
|
||
debug_log(f"[SubAgent] 补发通知创建后台任务: task_id={rec.task_id}")
|
||
except Exception as e:
|
||
debug_log(f"[SubAgent] 补发通知创建后台任务失败,回退直接执行: {e}")
|
||
try:
|
||
task_handle = asyncio.create_task(handle_task_with_sender(
|
||
terminal=web_terminal,
|
||
workspace=workspace,
|
||
message=user_message,
|
||
images=[],
|
||
sender=sender,
|
||
client_sid=client_sid,
|
||
username=username,
|
||
videos=[]
|
||
))
|
||
await task_handle
|
||
except Exception as inner_exc:
|
||
debug_log(f"[SubAgent] 补发完成提示失败: {inner_exc}")
|
||
import traceback
|
||
debug_log(f"[SubAgent] 错误堆栈: {traceback.format_exc()}")
|
||
break
|
||
|
||
await asyncio.sleep(5)
|
||
|
||
debug_log("[SubAgent] 后台轮询结束")
|
||
|
||
async def handle_task_with_sender(terminal: WebTerminal, workspace: UserWorkspace, message, images, sender, client_sid, username: str, videos=None):
|
||
"""处理任务并发送消息 - 集成token统计版本"""
|
||
from .extensions import socketio
|
||
|
||
web_terminal = terminal
|
||
conversation_id = getattr(web_terminal.context_manager, "current_conversation_id", None)
|
||
videos = videos or []
|
||
raw_sender = sender
|
||
|
||
def sender(event_type, data):
|
||
"""为关键事件补充会话标识,便于前端定位报错归属。"""
|
||
if not isinstance(data, dict):
|
||
raw_sender(event_type, data)
|
||
return
|
||
payload = dict(data)
|
||
current_conv = conversation_id or getattr(web_terminal.context_manager, "current_conversation_id", None)
|
||
|
||
# 为所有事件添加 conversation_id,确保前端能正确匹配
|
||
if current_conv and event_type not in {"connect", "disconnect", "system_ready"}:
|
||
payload.setdefault("conversation_id", current_conv)
|
||
|
||
# 调试信息:记录关键事件
|
||
if event_type in {"user_message", "ai_message_start", "text_start", "text_chunk", "tool_preparing"}:
|
||
debug_log(f"[SENDER] 发送事件: {event_type}, conversation_id={current_conv}, data_keys={list(payload.keys())}")
|
||
|
||
# 为关键事件添加额外的标识信息
|
||
if event_type in {"error", "quota_exceeded", "task_stopped", "task_complete"}:
|
||
task_id = getattr(web_terminal, "task_id", None) or client_sid
|
||
if task_id:
|
||
payload.setdefault("task_id", task_id)
|
||
if client_sid:
|
||
payload.setdefault("client_sid", client_sid)
|
||
|
||
raw_sender(event_type, payload)
|
||
|
||
# 如果是思考模式,重置状态
|
||
if web_terminal.thinking_mode:
|
||
web_terminal.api_client.start_new_task(force_deep=web_terminal.deep_thinking_mode)
|
||
state = get_thinking_state(web_terminal)
|
||
state["fast_streak"] = 0
|
||
state["force_next"] = False
|
||
state["suppress_next"] = False
|
||
|
||
# 添加到对话历史
|
||
history_len_before = len(getattr(web_terminal.context_manager, "conversation_history", []) or [])
|
||
is_first_user_message = history_len_before == 0
|
||
web_terminal.context_manager.add_conversation("user", message, images=images, videos=videos)
|
||
|
||
# Skill 提示系统:检测关键词并在用户消息之后插入 system 消息
|
||
try:
|
||
personal_config = load_personalization_config(workspace.data_dir)
|
||
skill_hints_enabled = personal_config.get("skill_hints_enabled", False)
|
||
|
||
if skill_hints_enabled and message:
|
||
hint_manager = SkillHintManager()
|
||
hint_manager.set_enabled(True)
|
||
hint_messages = hint_manager.build_hint_messages(message)
|
||
|
||
# 将提示消息插入到对话历史中(在用户消息之后)
|
||
for hint_msg in hint_messages:
|
||
debug_log(f"[Skill Hints] 插入提示消息: {hint_msg['content'][:100]}")
|
||
web_terminal.context_manager.add_conversation(
|
||
"system",
|
||
hint_msg["content"]
|
||
)
|
||
# 验证插入后的消息
|
||
last_msg = web_terminal.context_manager.conversation_history[-1]
|
||
debug_log(f"[Skill Hints] 插入后验证 - role: {last_msg.get('role')}, content: {last_msg.get('content')[:100]}")
|
||
except Exception as exc:
|
||
debug_log(f"Skill hints 处理失败: {exc}")
|
||
|
||
if is_first_user_message and getattr(web_terminal, "context_manager", None):
|
||
try:
|
||
personal_config = load_personalization_config(workspace.data_dir)
|
||
except Exception:
|
||
personal_config = {}
|
||
auto_title_enabled = personal_config.get("auto_generate_title", True)
|
||
if auto_title_enabled:
|
||
conv_id = getattr(web_terminal.context_manager, "current_conversation_id", None)
|
||
socketio.start_background_task(
|
||
generate_conversation_title_background,
|
||
web_terminal,
|
||
conv_id,
|
||
message,
|
||
username
|
||
)
|
||
|
||
# === 移除:不在这里计算输入token,改为在每次API调用前计算 ===
|
||
|
||
# 构建上下文和消息(用于API调用)
|
||
context = web_terminal.build_context()
|
||
messages = web_terminal.build_messages(context, message)
|
||
tools = web_terminal.define_tools()
|
||
try:
|
||
profile = get_model_profile(getattr(web_terminal, "model_key", None) or "kimi-k2.5")
|
||
web_terminal.apply_model_profile(profile)
|
||
except Exception as exc:
|
||
debug_log(f"更新模型配置失败: {exc}")
|
||
|
||
# === 上下文预算与安全校验(避免超出模型上下文) ===
|
||
max_context_tokens = get_model_context_window(getattr(web_terminal, "model_key", None) or "kimi-k2.5")
|
||
current_tokens = web_terminal.context_manager.get_current_context_tokens(conversation_id)
|
||
# 提前同步给底层客户端,动态收缩 max_tokens
|
||
web_terminal.api_client.update_context_budget(current_tokens, max_context_tokens)
|
||
if max_context_tokens:
|
||
if current_tokens >= max_context_tokens:
|
||
err_msg = (
|
||
f"当前对话上下文已达 {current_tokens} tokens,超过模型上限 "
|
||
f"{max_context_tokens},请先使用压缩功能或清理对话后再试。"
|
||
)
|
||
debug_log(err_msg)
|
||
web_terminal.context_manager.add_conversation("system", err_msg)
|
||
sender('error', {
|
||
'message': err_msg,
|
||
'status_code': 400,
|
||
'error_type': 'context_overflow'
|
||
})
|
||
return
|
||
usage_percent = (current_tokens / max_context_tokens) * 100
|
||
warned = web_terminal.context_manager.conversation_metadata.get("context_warning_sent", False)
|
||
if usage_percent >= 70 and not warned:
|
||
warn_msg = (
|
||
f"当前对话上下文约占 {usage_percent:.1f}%({current_tokens}/{max_context_tokens}),"
|
||
"建议使用压缩功能。"
|
||
)
|
||
web_terminal.context_manager.conversation_metadata["context_warning_sent"] = True
|
||
web_terminal.context_manager.auto_save_conversation(force=True)
|
||
sender('context_warning', {
|
||
'title': '上下文过长',
|
||
'message': warn_msg,
|
||
'type': 'warning',
|
||
'conversation_id': conversation_id
|
||
})
|
||
|
||
# 开始新的AI消息
|
||
sender('ai_message_start', {})
|
||
|
||
# 增量保存相关变量
|
||
accumulated_response = "" # 累积的响应内容
|
||
is_first_iteration = True # 是否是第一次迭代
|
||
|
||
# 统计和限制变量
|
||
total_iterations = 0
|
||
total_tool_calls = 0
|
||
consecutive_same_tool = defaultdict(int)
|
||
last_tool_name = ""
|
||
auto_fix_attempts = 0
|
||
last_tool_call_time = 0
|
||
detected_tool_intent: Dict[str, str] = {}
|
||
|
||
# 设置最大迭代次数(API 可覆盖);None 表示不限制
|
||
max_iterations_override = getattr(web_terminal, "max_iterations_override", None)
|
||
max_iterations = max_iterations_override if max_iterations_override is not None else MAX_ITERATIONS_PER_TASK
|
||
max_api_retries = 4
|
||
retry_delay_seconds = 10
|
||
|
||
|
||
iteration = 0
|
||
while max_iterations is None or iteration < max_iterations:
|
||
current_iteration = iteration + 1
|
||
iteration += 1
|
||
total_iterations += 1
|
||
iteration_limit_label = max_iterations if max_iterations is not None else "∞"
|
||
debug_log(f"\n--- 迭代 {current_iteration}/{iteration_limit_label} 开始 ---")
|
||
|
||
# 检查是否超过总工具调用限制
|
||
if MAX_TOTAL_TOOL_CALLS is not None and total_tool_calls >= MAX_TOTAL_TOOL_CALLS:
|
||
debug_log(f"已达到最大工具调用次数限制 ({MAX_TOTAL_TOOL_CALLS})")
|
||
sender('system_message', {
|
||
'content': f'⚠️ 已达到最大工具调用次数限制 ({MAX_TOTAL_TOOL_CALLS}),任务结束。'
|
||
})
|
||
mark_force_thinking(web_terminal, reason="tool_limit")
|
||
break
|
||
|
||
apply_thinking_schedule(web_terminal)
|
||
|
||
full_response = ""
|
||
tool_calls = []
|
||
current_thinking = ""
|
||
detected_tools = {}
|
||
last_usage_payload = None
|
||
|
||
# 状态标志
|
||
in_thinking = False
|
||
thinking_started = False
|
||
thinking_ended = False
|
||
text_started = False
|
||
text_has_content = False
|
||
text_streaming = False
|
||
text_chunk_index = 0
|
||
last_text_chunk_time: Optional[float] = None
|
||
|
||
# 计数器
|
||
chunk_count = 0
|
||
reasoning_chunks = 0
|
||
content_chunks = 0
|
||
tool_chunks = 0
|
||
last_finish_reason = None
|
||
|
||
thinking_expected = web_terminal.api_client.get_current_thinking_mode()
|
||
debug_log(f"思考模式: {thinking_expected}")
|
||
quota_allowed = True
|
||
quota_info = {}
|
||
if hasattr(web_terminal, "record_model_call"):
|
||
quota_allowed, quota_info = web_terminal.record_model_call(bool(thinking_expected))
|
||
if not quota_allowed:
|
||
quota_type = 'thinking' if thinking_expected else 'fast'
|
||
socketio.emit('quota_notice', {
|
||
'type': quota_type,
|
||
'reset_at': quota_info.get('reset_at'),
|
||
'limit': quota_info.get('limit'),
|
||
'count': quota_info.get('count')
|
||
}, room=f"user_{getattr(web_terminal, 'username', '')}")
|
||
sender('quota_exceeded', {
|
||
'type': quota_type,
|
||
'reset_at': quota_info.get('reset_at')
|
||
})
|
||
sender('error', {
|
||
'message': "配额已达到上限,暂时无法继续调用模型。",
|
||
'quota': quota_info
|
||
})
|
||
return
|
||
|
||
tool_call_limit_label = MAX_TOTAL_TOOL_CALLS if MAX_TOTAL_TOOL_CALLS is not None else "∞"
|
||
print(f"[API] 第{current_iteration}次调用 (总工具调用: {total_tool_calls}/{tool_call_limit_label})")
|
||
|
||
stream_result = await run_streaming_attempts(
|
||
web_terminal=web_terminal,
|
||
messages=messages,
|
||
tools=tools,
|
||
sender=sender,
|
||
client_sid=client_sid,
|
||
username=username,
|
||
conversation_id=conversation_id,
|
||
current_iteration=current_iteration,
|
||
max_api_retries=max_api_retries,
|
||
retry_delay_seconds=retry_delay_seconds,
|
||
detected_tool_intent=detected_tool_intent,
|
||
full_response=full_response,
|
||
tool_calls=tool_calls,
|
||
current_thinking=current_thinking,
|
||
detected_tools=detected_tools,
|
||
last_usage_payload=last_usage_payload,
|
||
in_thinking=in_thinking,
|
||
thinking_started=thinking_started,
|
||
thinking_ended=thinking_ended,
|
||
text_started=text_started,
|
||
text_has_content=text_has_content,
|
||
text_streaming=text_streaming,
|
||
text_chunk_index=text_chunk_index,
|
||
last_text_chunk_time=last_text_chunk_time,
|
||
chunk_count=chunk_count,
|
||
reasoning_chunks=reasoning_chunks,
|
||
content_chunks=content_chunks,
|
||
tool_chunks=tool_chunks,
|
||
last_finish_reason=last_finish_reason,
|
||
accumulated_response=accumulated_response,
|
||
)
|
||
if stream_result.get("stopped"):
|
||
return
|
||
|
||
full_response = stream_result["full_response"]
|
||
tool_calls = stream_result["tool_calls"]
|
||
current_thinking = stream_result["current_thinking"]
|
||
detected_tools = stream_result["detected_tools"]
|
||
last_usage_payload = stream_result["last_usage_payload"]
|
||
in_thinking = stream_result["in_thinking"]
|
||
thinking_started = stream_result["thinking_started"]
|
||
thinking_ended = stream_result["thinking_ended"]
|
||
text_started = stream_result["text_started"]
|
||
text_has_content = stream_result["text_has_content"]
|
||
text_streaming = stream_result["text_streaming"]
|
||
text_chunk_index = stream_result["text_chunk_index"]
|
||
last_text_chunk_time = stream_result["last_text_chunk_time"]
|
||
chunk_count = stream_result["chunk_count"]
|
||
reasoning_chunks = stream_result["reasoning_chunks"]
|
||
content_chunks = stream_result["content_chunks"]
|
||
tool_chunks = stream_result["tool_chunks"]
|
||
last_finish_reason = stream_result["last_finish_reason"]
|
||
accumulated_response = stream_result["accumulated_response"]
|
||
|
||
# 流结束后的处理
|
||
debug_log(f"\n流结束统计:")
|
||
debug_log(f" 总chunks: {chunk_count}")
|
||
debug_log(f" 思考chunks: {reasoning_chunks}")
|
||
debug_log(f" 内容chunks: {content_chunks}")
|
||
debug_log(f" 工具chunks: {tool_chunks}")
|
||
debug_log(f" 收集到的思考: {len(current_thinking)} 字符")
|
||
debug_log(f" 收集到的正文: {len(full_response)} 字符")
|
||
debug_log(f" 收集到的工具: {len(tool_calls)} 个")
|
||
|
||
# 结束未完成的流
|
||
if in_thinking and not thinking_ended:
|
||
sender('thinking_end', {'full_content': current_thinking})
|
||
await asyncio.sleep(0.1)
|
||
|
||
|
||
# 确保text_end事件被发送
|
||
if text_started and text_has_content:
|
||
debug_log(f"发送text_end事件,完整内容长度: {len(full_response)}")
|
||
sender('text_end', {'full_content': full_response})
|
||
await asyncio.sleep(0.1)
|
||
text_streaming = False
|
||
|
||
if full_response.strip():
|
||
debug_log(f"流式文本内容长度: {len(full_response)} 字符")
|
||
|
||
if web_terminal.api_client.last_call_used_thinking and current_thinking:
|
||
web_terminal.api_client.current_task_thinking = current_thinking or ""
|
||
if web_terminal.api_client.current_task_first_call:
|
||
web_terminal.api_client.current_task_first_call = False
|
||
update_thinking_after_call(web_terminal)
|
||
|
||
# 检测是否有格式错误的工具调用
|
||
if not tool_calls and full_response and AUTO_FIX_TOOL_CALL:
|
||
if detect_malformed_tool_call(full_response):
|
||
auto_fix_attempts += 1
|
||
|
||
if auto_fix_attempts <= AUTO_FIX_MAX_ATTEMPTS:
|
||
debug_log(f"检测到格式错误的工具调用,尝试自动修复 (尝试 {auto_fix_attempts}/{AUTO_FIX_MAX_ATTEMPTS})")
|
||
|
||
fix_message = "你使用了错误的格式输出工具调用。请使用正确的工具调用格式而不是直接输出JSON。根据当前进度继续执行任务。"
|
||
|
||
sender('system_message', {
|
||
'content': f'⚠️ 自动修复: {fix_message}'
|
||
})
|
||
maybe_mark_failure_from_message(web_terminal, f'⚠️ 自动修复: {fix_message}')
|
||
|
||
messages.append({
|
||
"role": "user",
|
||
"content": fix_message
|
||
})
|
||
|
||
await asyncio.sleep(1)
|
||
continue
|
||
else:
|
||
debug_log(f"自动修复尝试已达上限 ({AUTO_FIX_MAX_ATTEMPTS})")
|
||
sender('system_message', {
|
||
'content': f'⌘ 工具调用格式错误,自动修复失败。请手动检查并重试。'
|
||
})
|
||
maybe_mark_failure_from_message(web_terminal, '⌘ 工具调用格式错误,自动修复失败。请手动检查并重试。')
|
||
break
|
||
|
||
# 构建助手消息(用于API继续对话)
|
||
assistant_content_parts = []
|
||
|
||
if full_response:
|
||
assistant_content_parts.append(full_response)
|
||
|
||
assistant_content = "\n".join(assistant_content_parts) if assistant_content_parts else ""
|
||
|
||
# 添加到消息历史(用于API继续对话,不保存到文件)
|
||
assistant_message = {
|
||
"role": "assistant",
|
||
"content": assistant_content,
|
||
"tool_calls": tool_calls
|
||
}
|
||
if current_thinking:
|
||
assistant_message["reasoning_content"] = current_thinking
|
||
|
||
messages.append(assistant_message)
|
||
if assistant_content or current_thinking or tool_calls:
|
||
web_terminal.context_manager.add_conversation(
|
||
"assistant",
|
||
assistant_content,
|
||
tool_calls=tool_calls if tool_calls else None,
|
||
reasoning_content=current_thinking or None
|
||
)
|
||
|
||
# 为下一轮迭代重置流状态标志,但保留 full_response 供上面保存使用
|
||
text_streaming = False
|
||
text_started = False
|
||
text_has_content = False
|
||
full_response = ""
|
||
|
||
if not tool_calls:
|
||
debug_log("没有工具调用,结束迭代")
|
||
break
|
||
|
||
# 检查连续相同工具调用
|
||
for tc in tool_calls:
|
||
tool_name = tc["function"]["name"]
|
||
|
||
if tool_name == last_tool_name:
|
||
consecutive_same_tool[tool_name] += 1
|
||
|
||
if (
|
||
MAX_CONSECUTIVE_SAME_TOOL is not None
|
||
and consecutive_same_tool[tool_name] >= MAX_CONSECUTIVE_SAME_TOOL
|
||
):
|
||
debug_log(f"警告: 连续调用相同工具 {tool_name} 已达 {MAX_CONSECUTIVE_SAME_TOOL} 次")
|
||
sender('system_message', {
|
||
'content': f'⚠️ 检测到重复调用 {tool_name} 工具 {MAX_CONSECUTIVE_SAME_TOOL} 次,可能存在循环。'
|
||
})
|
||
maybe_mark_failure_from_message(web_terminal, f'⚠️ 检测到重复调用 {tool_name} 工具 {MAX_CONSECUTIVE_SAME_TOOL} 次,可能存在循环。')
|
||
|
||
if consecutive_same_tool[tool_name] >= MAX_CONSECUTIVE_SAME_TOOL + 2:
|
||
debug_log(f"终止: 工具 {tool_name} 调用次数过多")
|
||
sender('system_message', {
|
||
'content': f'⌘ 工具 {tool_name} 重复调用过多,任务终止。'
|
||
})
|
||
maybe_mark_failure_from_message(web_terminal, f'⌘ 工具 {tool_name} 重复调用过多,任务终止。')
|
||
break
|
||
else:
|
||
consecutive_same_tool.clear()
|
||
consecutive_same_tool[tool_name] = 1
|
||
|
||
last_tool_name = tool_name
|
||
|
||
# 更新统计
|
||
total_tool_calls += len(tool_calls)
|
||
|
||
# 执行每个工具
|
||
tool_loop_result = await execute_tool_calls(
|
||
web_terminal=web_terminal,
|
||
tool_calls=tool_calls,
|
||
sender=sender,
|
||
messages=messages,
|
||
client_sid=client_sid,
|
||
username=username,
|
||
iteration=iteration,
|
||
conversation_id=conversation_id,
|
||
last_tool_call_time=last_tool_call_time,
|
||
process_sub_agent_updates=process_sub_agent_updates,
|
||
maybe_mark_failure_from_message=maybe_mark_failure_from_message,
|
||
mark_force_thinking=mark_force_thinking,
|
||
get_stop_flag=get_stop_flag,
|
||
clear_stop_flag=clear_stop_flag,
|
||
)
|
||
last_tool_call_time = tool_loop_result.get("last_tool_call_time", last_tool_call_time)
|
||
if tool_loop_result.get("stopped"):
|
||
return
|
||
|
||
# 标记不再是第一次迭代
|
||
is_first_iteration = False
|
||
|
||
|
||
# 最终统计
|
||
debug_log(f"\n{'='*40}")
|
||
debug_log(f"任务完成统计:")
|
||
debug_log(f" 总迭代次数: {total_iterations}")
|
||
debug_log(f" 总工具调用: {total_tool_calls}")
|
||
debug_log(f" 自动修复尝试: {auto_fix_attempts}")
|
||
debug_log(f" 累积响应: {len(accumulated_response)} 字符")
|
||
debug_log(f"{'='*40}\n")
|
||
|
||
# 检查是否有后台运行的子智能体或待通知的完成任务
|
||
manager = getattr(web_terminal, "sub_agent_manager", None)
|
||
has_running_sub_agents = False
|
||
if manager:
|
||
if not hasattr(web_terminal, "_announced_sub_agent_tasks"):
|
||
web_terminal._announced_sub_agent_tasks = set()
|
||
running_tasks = [
|
||
task for task in manager.tasks.values()
|
||
if task.get("status") not in TERMINAL_STATUSES.union({"terminated"})
|
||
and task.get("run_in_background")
|
||
and task.get("conversation_id") == conversation_id
|
||
]
|
||
pending_notice_tasks = [
|
||
task for task in manager.tasks.values()
|
||
if task.get("status") in TERMINAL_STATUSES.union({"terminated"})
|
||
and task.get("run_in_background")
|
||
and task.get("conversation_id") == conversation_id
|
||
and task.get("task_id") not in web_terminal._announced_sub_agent_tasks
|
||
]
|
||
|
||
if running_tasks or pending_notice_tasks:
|
||
has_running_sub_agents = True
|
||
notify_tasks = running_tasks + pending_notice_tasks
|
||
debug_log(f"[SubAgent] 后台子智能体等待: running={len(running_tasks)} pending_notice={len(pending_notice_tasks)}")
|
||
# 先通知前端:有子智能体在运行/待通知,保持等待状态
|
||
sender('sub_agent_waiting', {
|
||
'count': len(notify_tasks),
|
||
'tasks': [{'agent_id': t.get('agent_id'), 'summary': t.get('summary')} for t in notify_tasks]
|
||
})
|
||
|
||
# 启动后台任务来轮询/补发子智能体完成
|
||
def run_poll():
|
||
import asyncio
|
||
loop = asyncio.new_event_loop()
|
||
asyncio.set_event_loop(loop)
|
||
try:
|
||
loop.run_until_complete(poll_sub_agent_completion(
|
||
web_terminal=web_terminal,
|
||
workspace=workspace,
|
||
conversation_id=conversation_id,
|
||
client_sid=client_sid,
|
||
username=username
|
||
))
|
||
finally:
|
||
loop.close()
|
||
|
||
socketio.start_background_task(run_poll)
|
||
|
||
# 发送完成事件(如果有子智能体在运行,前端会保持等待状态)
|
||
sender('task_complete', {
|
||
'total_iterations': total_iterations,
|
||
'total_tool_calls': total_tool_calls,
|
||
'auto_fix_attempts': auto_fix_attempts,
|
||
'has_running_sub_agents': has_running_sub_agents
|
||
})
|