feat: improve streaming buffering and tooling

2025-11-14 22:17:55 +08:00 · 2025-11-14 22:17:55 +08:00 · 2e42a970e7
commit 2e42a970e7
parent caaee38967
5 changed files with 480 additions and 26 deletions
--- a/config/api.py
+++ b/config/api.py
@ -17,3 +17,9 @@ __all__ = [
    "TAVILY_API_KEY",
    "DEFAULT_RESPONSE_MAX_TOKENS",
 ]
 '''
 API_BASE_URL = "https://api.moonshot.cn/v1",
 API_KEY = "sk-xW0xjfQM6Mp9ZCWMLlnHiRJcpEOIZPTkXcN0dQ15xpZSuw2y",
 MODEL_ID = "kimi-k2-0905-preview"
 '''
--- a/data/conversations/conv_tool_system_fixture.json
+++ b/data/conversations/conv_tool_system_fixture.json
@ -0,0 +1,66 @@
 {
  "id": "conv_tool_system_fixture",
  "title": "工具结果以 system 回传示例",
  "created_at": "2025-11-14T21:08:00",
  "updated_at": "2025-11-14T21:08:00",
  "messages": [
    {
      "role": "user",
      "content": "我正在测试添加文件工具，请在这个文件中随便添加一些内容，不需要阅读\n/users/jojo/desktop/agents/正在修复中/agent_kimi_源码备份_标记有bug版本/project/test_add_file.txt\n"
    },
    {
      "role": "assistant",
      "content": "我来帮你在文件中添加一些内容。"
    },
    {
      "role": "assistant",
      "content": "",
      "tool_calls": [
        {
          "id": "append_to_file:0",
          "index": 0,
          "type": "function",
          "function": {
            "name": "append_to_file",
            "arguments": "{\"path\": \"/users/jojo/desktop/agents/正在修复中/agent_kimi_源码备份_标记有bug版本/project/test_add_file.txt\", \"reason\": \"测试添加文件工具，添加一些随机内容\"}"
          }
        }
      ]
    },
    {
      "role": "tool",
      "content": "{\"success\": false, \"error\": \"路径必须在项目文件夹内\"}",
      "tool_call_id": "append_to_file:0",
      "name": "append_to_file"
    },
    {
      "role": "assistant",
      "content": "",
      "tool_calls": [
        {
          "id": "append_to_file:1",
          "index": 0,
          "type": "function",
          "function": {
            "name": "append_to_file",
            "arguments": "{\"path\": \"test_add_file.txt\", \"reason\": \"测试添加文件工具，添加一些随机内容\"}"
          }
        }
      ]
    },
    {
      "role": "tool",
      "content": "{\"success\": true, \"awaiting_content\": true, \"path\": \"test_add_file.txt\", \"message\": \"\\n请按照以下格式输出需要追加到文件的完整内容，禁止输出任何解释性文字：\\n<<<APPEND:test_add_file.txt>>>\\n(在此行之后紧接着写入要追加的全部内容，可包含多行代码)\\n<<<END_APPEND>>>\\n若追加完成后仍需继续写入，请再次调用 append_to_file。\"}",
      "tool_call_id": "append_to_file:1",
      "name": "append_to_file"
    },
    {
      "role": "assistant",
      "content": "<<<APPEND:test_add_file.txt>>>\n这是一个测试文件，用于测试添加文件工具的功能。\n这里添加了一些随机的内容，包括：\n- 普通文本内容\n- 列表项\n- 时间戳：2025-10-09 16:13:00\n\n测试完成，文件添加功能正常工作。\n\n<<<END_APPEND>>>"
    },
    {
      "role": "system",
      "content": "[工具结果] append_to_file (tool_call_id=append_to_file:1)\n{\"success\": true, \"path\": \"test_add_file.txt\", \"lines\": 7, \"bytes\": 236, \"forced\": false, \"message\": \"已向 test_add_file.txt 追加 7 行（236 字节）\", \"finish_reason\": null, \"appended_content\": \"这是一个测试文件，用于测试添加文件工具的功能。\\n这里添加了一些随机的内容，包括：\\n- 普通文本内容\\n- 列表项\\n- 时间戳：2025-10-09 16:13:00\\n\\n测试完成，文件添加功能正常工作。\\n\"}"
    }
  ]
 }
--- a/scripts/api_tool_role_experiment.py
+++ b/scripts/api_tool_role_experiment.py
@ -0,0 +1,196 @@
 #!/usr/bin/env python3
 """
 使用现有“文件追加/修改”对话上下文对不同模型服务发起一次 Chat Completions 请求，
 用于复现“单次工具调用对应多个 tool 消息”在不同 API 上的兼容性差异。
 """
 import argparse
 import json
 from datetime import datetime
 from pathlib import Path
 from typing import Any, Dict, List, Tuple
 import httpx
 DEFAULT_CONVERSATION = Path("data/conversations/conv_20251009_161243_189.json")
 DEFAULT_OUTPUT_DIR = Path("logs/api_experiment")
 def convert_messages(raw_messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
    """将存档中的消息转换为 OpenAI Chat Completions 兼容格式。"""
    converted: List[Dict[str, Any]] = []
    for msg in raw_messages:
        role = msg.get("role")
        if not role:
            continue
        entry: Dict[str, Any] = {
            "role": role,
            "content": msg.get("content", "") or ""
        }
        if role == "tool":
            entry["tool_call_id"] = msg.get("tool_call_id")
            if msg.get("name"):
                entry["name"] = msg["name"]
        if msg.get("tool_calls"):
            entry["tool_calls"] = msg["tool_calls"]
        converted.append(entry)
    return converted
 def load_conversation_messages(path: Path) -> List[Dict[str, Any]]:
    """读取对话文件并返回 messages 列表。"""
    data = json.loads(path.read_text(encoding="utf-8"))
    raw_messages = data.get("messages")
    if not isinstance(raw_messages, list):
        raise ValueError(f"{path} 中缺少 messages 数据")
    return convert_messages(raw_messages)
 def minimal_tool_definitions() -> List[Dict[str, Any]]:
    """返回涵盖 append/modify 的最小工具定义集合。"""
    return [
        {
            "type": "function",
            "function": {
                "name": "append_to_file",
                "description": (
                    "准备向文件追加大段内容。调用后系统会发放 <<<APPEND:path>>>…<<<END_APPEND>>> "
                    "格式的写入窗口，AI 必须在窗口内一次性输出需要追加的全部内容。"
                ),
                "parameters": {
                    "type": "object",
                    "properties": {
                        "path": {"type": "string", "description": "目标文件的相对路径"},
                        "reason": {"type": "string", "description": "为什么需要追加（可选）"}
                    },
                    "required": ["path"]
                }
            }
        },
        {
            "type": "function",
            "function": {
                "name": "modify_file",
                "description": (
                    "准备替换文件中的指定内容。模型必须在 <<<MODIFY:path>>>…<<<END_MODIFY>>> "
                    "结构内输出若干 [replace:n] 补丁块，每块包含 <<OLD>> 原文 和 <<NEW>> 新内容。"
                ),
                "parameters": {
                    "type": "object",
                    "properties": {
                        "path": {"type": "string", "description": "目标文件的相对路径"}
                    },
                    "required": ["path"]
                }
            }
        }
    ]
 def send_request(
    api_base: str,
    api_key: str,
    model_id: str,
    messages: List[Dict[str, Any]],
    tools: List[Dict[str, Any]],
    timeout: float = 60.0
 ) -> Tuple[int, Dict[str, Any], str]:
    """向指定 API 发送一次非流式请求，返回状态码、JSON/空字典、原始文本。"""
    url = api_base.rstrip("/") + "/chat/completions"
    payload = {
        "model": model_id,
        "messages": messages,
        "tools": tools,
        "tool_choice": "auto",
        "stream": False
    }
    headers = {
        "Authorization": f"Bearer {api_key}",
        "Content-Type": "application/json"
    }
    with httpx.Client(timeout=timeout) as client:
        response = client.post(url, json=payload, headers=headers)
    text = response.text
    try:
        data = response.json()
    except ValueError:
        data = {}
    return response.status_code, data, text
 def dump_result(
    output_dir: Path,
    label: str,
    payload: Dict[str, Any],
    status_code: int,
    json_body: Dict[str, Any],
    raw_text: str
 ) -> Path:
    """将实验结果落盘，便于后续分析。"""
    output_dir.mkdir(parents=True, exist_ok=True)
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    filename = output_dir / f"{label}_{timestamp}.json"
    record = {
        "label": label,
        "status_code": status_code,
        "request_payload": payload,
        "response_json": json_body,
        "response_text": raw_text
    }
    filename.write_text(json.dumps(record, ensure_ascii=False, indent=2), encoding="utf-8")
    return filename
 def main() -> None:
    parser = argparse.ArgumentParser(description="对比不同 API 对工具消息结构的兼容性。")
    parser.add_argument("--conversation-file", type=Path, default=DEFAULT_CONVERSATION,
                        help="使用的对话存档 JSON 文件路径")
    parser.add_argument("--api-base", required=True, help="API 基础地址，如 https://api.example.com/v1")
    parser.add_argument("--api-key", required=True, help="API Key")
    parser.add_argument("--model-id", required=True, help="模型 ID")
    parser.add_argument("--label", required=True, help="本次实验标签，用于输出文件命名")
    parser.add_argument("--output-dir", type=Path, default=DEFAULT_OUTPUT_DIR,
                        help="实验结果输出目录")
    parser.add_argument("--timeout", type=float, default=60.0, help="HTTP 请求超时时间（秒）")
    args = parser.parse_args()
    messages = load_conversation_messages(args.conversation_file)
    tools = minimal_tool_definitions()
    payload = {
        "model": args.model_id,
        "messages": messages,
        "tools": tools,
        "tool_choice": "auto",
        "stream": False
    }
    print(f"📨 发送消息数: {len(messages)}，工具定义数: {len(tools)}")
    print(f"➡️  目标: {args.api_base} / {args.model_id} (label={args.label})")
    status_code, json_body, raw_text = send_request(
        api_base=args.api_base,
        api_key=args.api_key,
        model_id=args.model_id,
        messages=messages,
        tools=tools,
        timeout=args.timeout
    )
    output_path = dump_result(
        output_dir=args.output_dir,
        label=args.label,
        payload=payload,
        status_code=status_code,
        json_body=json_body,
        raw_text=raw_text
    )
    print(f"✅ HTTP {status_code}，结果已保存: {output_path}")
    if status_code >= 400:
        print("⚠️  响应出现错误，请查看 response_json/response_text 获取详细信息。")
 if __name__ == "__main__":
    main()
--- a/scripts/stream_chunk_probe.py
+++ b/scripts/stream_chunk_probe.py
@ -0,0 +1,120 @@
 #!/usr/bin/env python3
 """
 对指定模型服务发起一次流式请求，并记录每个 data chunk 的输出长度与时间间隔。
 Usage:
    python3 scripts/stream_chunk_probe.py \
        --api-base https://api.moonshot.cn/v1 \
        --api-key sk-xxx \
        --model-id kimi-k2-0905-preview \
        --prompt "帮我写一个Python脚本..."
 """
 import argparse
 import asyncio
 import json
 import time
 from pathlib import Path
 from typing import Any, Dict, List
 import httpx
 def build_messages(prompt: str) -> List[Dict[str, str]]:
    """构建最小化消息列表。"""
    system_prompt = "你是一个友好的中文助手。请在回答时输出足够长的内容以便观察流式分片。"
    return [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": prompt},
    ]
 async def stream_once(api_base: str, api_key: str, model_id: str, prompt: str, timeout: float, max_chunks: int = 0) -> None:
    headers = {
        "Authorization": f"Bearer {api_key}",
        "Content-Type": "application/json"
    }
    payload = {
        "model": model_id,
        "messages": build_messages(prompt),
        "stream": True
    }
    url = api_base.rstrip("/") + "/chat/completions"
    print(f"➡️  发起流式请求: {url} ({model_id})")
    start_time = time.time()
    last_time = start_time
    chunk_index = 0
    total_tokens = 0
    async with httpx.AsyncClient(timeout=timeout) as client:
        async with client.stream("POST", url, json=payload, headers=headers) as response:
            print(f"HTTP {response.status_code}")
            async for raw_line in response.aiter_lines():
                if not raw_line:
                    continue
                if not raw_line.startswith("data:"):
                    continue
                data_part = raw_line[5:].strip()
                if data_part == "[DONE]":
                    break
                chunk_index += 1
                now = time.time()
                delta = now - last_time
                last_time = now
                try:
                    data = json.loads(data_part)
                except json.JSONDecodeError:
                    print(f"[{chunk_index:03d}] Δ{delta:.3f}s | 非JSON: {data_part[:80]}")
                    continue
                delta_obj = data.get("choices", [{}])[0].get("delta", {})
                text_piece = delta_obj.get("content") or ""
                total_tokens += len(text_piece)
                reasoning = delta_obj.get("reasoning_content")
                has_tool = bool(delta_obj.get("tool_calls"))
                summary = []
                if text_piece:
                    summary.append(f"text {len(text_piece)} chars")
                if reasoning:
                    summary.append(f"think {len(reasoning)} chars")
                if has_tool:
                    summary.append("tool_calls")
                if not summary:
                    summary.append("no-content")
                summary_text = ", ".join(summary)
                print(f"[{chunk_index:03d}] Δ{delta:.3f}s | {summary_text}")
                if max_chunks and chunk_index >= max_chunks:
                    print(f"⚠️  已达到 max_chunks={max_chunks}，提前停止流式读取。")
                    break
    total_time = last_time - start_time
    print(f"✅ 流结束，共 {chunk_index} 个 chunk，用时 {total_time:.2f}s，累计正文字符 {total_tokens}")
 def main() -> None:
    parser = argparse.ArgumentParser(description="采集流式输出 chunk 间隔。")
    parser.add_argument("--api-base", required=True, help="API 基础地址，例如 https://api.moonshot.cn/v1")
    parser.add_argument("--api-key", required=True, help="API Key")
    parser.add_argument("--model-id", required=True, help="模型 ID")
    parser.add_argument("--prompt", default="请用中文详细说明流式输出测试，输出足够多的文字。", help="测试用 prompt")
    parser.add_argument("--timeout", type=float, default=120.0, help="HTTP 超时时间（秒）")
    parser.add_argument("--max-chunks", type=int, default=0, help="可选，限制最多采集的 chunk 数")
    args = parser.parse_args()
    asyncio.run(stream_once(
        api_base=args.api_base,
        api_key=args.api_key,
        model_id=args.model_id,
        prompt=args.prompt,
        timeout=args.timeout,
        max_chunks=args.max_chunks
    ))
 if __name__ == "__main__":
    main()
--- a/web_server.py
+++ b/web_server.py
@ -18,7 +18,7 @@ from functools import wraps
 from datetime import timedelta
 import time
 from datetime import datetime
-from collections import defaultdict
+from collections import defaultdict, deque
 from werkzeug.utils import secure_filename
 from werkzeug.routing import BaseConverter
@ -150,6 +150,17 @@ def sanitize_filename_preserve_unicode(filename: str) -> str:
    # Windows/Unix 通用文件名长度安全上限
    return cleaned[:255]
 def format_tool_result_notice(tool_name: str, tool_call_id: Optional[str], content: str) -> str:
    """将工具执行结果转为系统消息文本，方便在对话中回传。"""
    header = f"[工具结果] {tool_name}"
    if tool_call_id:
        header += f" (tool_call_id={tool_call_id})"
    body = (content or "").strip()
    if not body:
        body = "（无附加输出）"
    return f"{header}\n{body}"
 # 创建调试日志文件
 DEBUG_LOG_FILE = Path(LOGS_DIR).expanduser().resolve() / "debug_stream.log"
 UPLOAD_FOLDER_NAME = "user_upload"
@ -2341,6 +2352,64 @@ async def handle_task_with_sender(terminal: WebTerminal, message, sender, client
        thinking_ended = False
        text_started = False
        text_has_content = False
        TEXT_BUFFER_MAX_CHARS = 1
        TEXT_BUFFER_MAX_INTERVAL = 0.0
        TEXT_BUFFER_FLUSH_CHARS = 1
        text_chunk_buffer: deque[str] = deque()
        text_chunk_buffer_size = 0
        last_text_flush_time = time.time()
        TEXT_BUFFER_CHAR_DELAY = 0.02
        def _drain_text_buffer(force: bool = False) -> bool:
            nonlocal text_chunk_buffer, text_chunk_buffer_size, last_text_flush_time
            if not text_chunk_buffer:
                return False
            drain_all = force or TEXT_BUFFER_MAX_INTERVAL == 0.0
            sent = False
            while text_chunk_buffer:
                now = time.time()
                should_flush = (
                    force
                    or text_chunk_buffer_size >= TEXT_BUFFER_MAX_CHARS
                    or TEXT_BUFFER_MAX_INTERVAL == 0.0
                    or (TEXT_BUFFER_MAX_INTERVAL > 0 and (now - last_text_flush_time) >= TEXT_BUFFER_MAX_INTERVAL)
                )
                if not should_flush:
                    break
                batch_size = text_chunk_buffer_size if drain_all else max(1, min(text_chunk_buffer_size, TEXT_BUFFER_FLUSH_CHARS or 1))
                pieces: List[str] = []
                remaining = batch_size
                while text_chunk_buffer and remaining > 0:
                    chunk = text_chunk_buffer.popleft()
                    chunk_len = len(chunk)
                    if chunk_len <= remaining:
                        pieces.append(chunk)
                        remaining -= chunk_len
                    else:
                        pieces.append(chunk[:remaining])
                        text_chunk_buffer.appendleft(chunk[remaining:])
                        chunk_len = remaining
                        remaining = 0
                    text_chunk_buffer_size -= chunk_len
                if not pieces:
                    break
                sender('text_chunk', {'content': "".join(pieces)})
                last_text_flush_time = now
                sent = True
                if not drain_all:
                    break
            return sent
        async def flush_text_buffer(force: bool = False):
            sent = _drain_text_buffer(force)
            if sent and not force and TEXT_BUFFER_CHAR_DELAY > 0:
                await asyncio.sleep(TEXT_BUFFER_CHAR_DELAY)
        text_streaming = False
        # 计数器
@ -2625,7 +2694,10 @@ async def handle_task_with_sender(terminal: WebTerminal, message, sender, client
                        full_response += content
                        accumulated_response += content
                        text_has_content = True
-                        sender('text_chunk', {'content': content})
+                        for ch in content:
                            text_chunk_buffer.append(ch)
                            text_chunk_buffer_size += 1
                            await flush_text_buffer()
            # 收集工具调用 - 实时发送准备状态
            if "tool_calls" in delta:
@ -2698,6 +2770,7 @@ async def handle_task_with_sender(terminal: WebTerminal, message, sender, client
            debug_log(f"输出token统计失败: {e}")
        # 流结束后的处理
        await flush_text_buffer(force=True)
        debug_log(f"\n流结束统计:")
        debug_log(f"  总chunks: {chunk_count}")
        debug_log(f"  思考chunks: {reasoning_chunks}")
@ -2726,6 +2799,7 @@ async def handle_task_with_sender(terminal: WebTerminal, message, sender, client
        # 确保text_end事件被发送
        if text_started and text_has_content and not append_result["handled"] and not modify_result["handled"]:
            await flush_text_buffer(force=True)
            debug_log(f"发送text_end事件，完整内容长度: {len(full_response)}")
            sender('text_end', {'full_content': full_response})
            await asyncio.sleep(0.1)
@ -2759,14 +2833,10 @@ async def handle_task_with_sender(terminal: WebTerminal, message, sender, client
            if append_result["tool_content"]:
                tool_call_id = append_result.get("tool_call_id") or f"append_{int(time.time() * 1000)}"
-                web_terminal.context_manager.add_conversation(
+                system_notice = format_tool_result_notice("append_to_file", tool_call_id, append_result["tool_content"])
-                    "tool",
+                web_terminal.context_manager.add_conversation("system", system_notice)
                    append_result["tool_content"],
                    tool_call_id=tool_call_id,
                    name="append_to_file"
                )
                append_result["tool_call_id"] = tool_call_id
-                debug_log("💾 增量保存：append_to_file 工具结果")
+                debug_log("💾 增量保存：append_to_file 工具结果（system 通知）")
            finish_reason = append_result.get("finish_reason")
            path_for_prompt = append_result.get("path")
@ -2839,14 +2909,10 @@ async def handle_task_with_sender(terminal: WebTerminal, message, sender, client
            if modify_result["tool_content"]:
                tool_call_id = modify_result.get("tool_call_id") or f"modify_{int(time.time() * 1000)}"
-                web_terminal.context_manager.add_conversation(
+                system_notice = format_tool_result_notice("modify_file", tool_call_id, modify_result["tool_content"])
-                    "tool",
+                web_terminal.context_manager.add_conversation("system", system_notice)
                    modify_result["tool_content"],
                    tool_call_id=tool_call_id,
                    name="modify_file"
                )
                modify_result["tool_call_id"] = tool_call_id
-                debug_log("💾 增量保存：modify_file 工具结果")
+                debug_log("💾 增量保存：modify_file 工具结果（system 通知）")
            path_for_prompt = modify_result.get("path")
            failed_blocks = modify_result.get("failed_blocks") or []
@ -2955,22 +3021,22 @@ async def handle_task_with_sender(terminal: WebTerminal, message, sender, client
        if append_result["handled"] and append_result.get("tool_content"):
            tool_call_id = append_result.get("tool_call_id") or f"append_{int(time.time() * 1000)}"
            system_notice = format_tool_result_notice("append_to_file", tool_call_id, append_result["tool_content"])
            messages.append({
-                "role": "tool",
+                "role": "system",
-                "tool_call_id": tool_call_id,
+                "content": system_notice
                "name": "append_to_file",
                "content": append_result["tool_content"]
            })
-            debug_log("已将 append_to_file 工具结果追加到对话上下文")
+            append_result["tool_call_id"] = tool_call_id
            debug_log("已将 append_to_file 工具结果以 system 形式追加到对话上下文")
        if modify_result["handled"] and modify_result.get("tool_content"):
            tool_call_id = modify_result.get("tool_call_id") or f"modify_{int(time.time() * 1000)}"
            system_notice = format_tool_result_notice("modify_file", tool_call_id, modify_result["tool_content"])
            messages.append({
-                "role": "tool",
+                "role": "system",
-                "tool_call_id": tool_call_id,
+                "content": system_notice
                "name": "modify_file",
                "content": modify_result["tool_content"]
            })
-            debug_log("已将 modify_file 工具结果追加到对话上下文")
+            modify_result["tool_call_id"] = tool_call_id
            debug_log("已将 modify_file 工具结果以 system 形式追加到对话上下文")
        force_continue = append_result["handled"] or modify_result["handled"]
        if force_continue: