feat(web): add reasoning mode toggle

2025-11-18 10:12:16 +08:00 · 2025-11-18 10:12:16 +08:00 · f7ce0559b7
commit f7ce0559b7
parent e1704f87ea
9 changed files with 235 additions and 112 deletions
--- a/config/api.py
+++ b/config/api.py
@ -4,6 +4,11 @@ API_BASE_URL = "https://ark.cn-beijing.volces.com/api/v3"
 API_KEY = "3e96a682-919d-45c1-acb2-53bc4e9660d3"
 MODEL_ID = "kimi-k2-250905"

+# 推理模型配置（智能思考模式使用）
+THINKING_API_BASE_URL = "https://ark.cn-beijing.volces.com/api/v3"
+THINKING_API_KEY = "3e96a682-919d-45c1-acb2-53bc4e9660d3"
+THINKING_MODEL_ID = "kimi-k2-250905"
+
 # Tavily 搜索
 TAVILY_API_KEY = "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"

@ -16,10 +21,13 @@ __all__ = [
    "MODEL_ID",
    "TAVILY_API_KEY",
    "DEFAULT_RESPONSE_MAX_TOKENS",
+    "THINKING_API_BASE_URL",
+    "THINKING_API_KEY",
+    "THINKING_MODEL_ID",
 ]

 '''
-API_BASE_URL = "https://api.moonshot.cn/v1",
+API_BASE_URL = "https://api.moonshot.cn/v1"
 API_KEY = "sk-xW0xjfQM6Mp9ZCWMLlnHiRJcpEOIZPTkXcN0dQ15xpZSuw2y",
 MODEL_ID = "kimi-k2-0905-preview"
 '''
--- a/core/main_terminal.py
+++ b/core/main_terminal.py
@ -2066,6 +2066,11 @@ class MainTerminal:
            if sub_agent_prompt:
                messages.append({"role": "system", "content": sub_agent_prompt})
        
+        if self.thinking_mode:
+            thinking_prompt = self.load_prompt("thinking_mode_guidelines").strip()
+            if thinking_prompt:
+                messages.append({"role": "system", "content": thinking_prompt})
+        
        # 添加对话历史（保留完整结构，包括tool_calls和tool消息）
        for conv in context["conversation"]:
            metadata = conv.get("metadata") or {}
--- a/core/web_terminal.py
+++ b/core/web_terminal.py
@ -284,13 +284,7 @@ class WebTerminal(MainTerminal):
    
    def get_thinking_mode_status(self) -> str:
        """获取思考模式状态描述"""
-        if not self.thinking_mode:
-            return "快速模式"
-        else:
-            if self.api_client.current_task_first_call:
-                return "思考模式（等待新任务）"
-            else:
-                return "思考模式（任务进行中）"
+        return "思考模式" if self.thinking_mode else "快速模式"
    
    def get_focused_files_info(self) -> Dict:
        """获取聚焦文件信息（用于WebSocket更新）- 使用与 /api/focused 一致的格式"""
--- a/static/app.js
+++ b/static/app.js
@ -111,6 +111,7 @@ async function bootstrapApp() {
                projectPath: '',
                agentVersion: '',
                thinkingMode: '未知',
+                nextThinkingMode: null,
                
                // 消息相关
                messages: [],
@ -161,6 +162,7 @@ async function bootstrapApp() {
                // 对话历史侧边栏
                sidebarCollapsed: true,  // 默认收起对话侧边栏
                panelMode: 'files', // files | todo | subAgents
+                panelMenuOpen: false,
                subAgents: [],
                subAgentPollTimer: null,
                conversations: [],
@ -253,6 +255,7 @@ async function bootstrapApp() {
            
            document.addEventListener('click', this.handleClickOutsideSettings);
            document.addEventListener('click', this.handleClickOutsideToolMenu);
+            document.addEventListener('click', this.handleClickOutsidePanelMenu);
            window.addEventListener('popstate', this.handlePopState);

            this.onDocumentClick = (event) => {
@ -292,6 +295,7 @@ async function bootstrapApp() {
        beforeUnmount() {
            document.removeEventListener('click', this.handleClickOutsideSettings);
            document.removeEventListener('click', this.handleClickOutsideToolMenu);
+            document.removeEventListener('click', this.handleClickOutsidePanelMenu);
            window.removeEventListener('popstate', this.handlePopState);
            if (this.onDocumentClick) {
                document.removeEventListener('click', this.onDocumentClick);
@ -590,6 +594,7 @@ async function bootstrapApp() {
                        this.projectPath = data.project_path || '';
                        this.agentVersion = data.version || this.agentVersion;
                        this.thinkingMode = data.thinking_mode || '未知';
+                        this.nextThinkingMode = null;
                        console.log('系统就绪:', data);
                        
                        // 系统就绪后立即加载对话列表
@ -1318,7 +1323,13 @@ async function bootstrapApp() {
                    const statusData = await statusResponse.json();
                    this.projectPath = statusData.project_path || '';
                    this.agentVersion = statusData.version || this.agentVersion;
-                    this.thinkingMode = statusData.thinking_mode || '未知';
+                    if (statusData.thinking_mode) {
+                        this.thinkingMode = statusData.thinking_mode.label || '未知';
+                        this.nextThinkingMode = statusData.thinking_mode.next ?? null;
+                    } else {
+                        this.thinkingMode = '未知';
+                        this.nextThinkingMode = null;
+                    }
                    
                    // 获取当前对话信息
                    const statusConversationId = statusData.conversation && statusData.conversation.current_id;
@ -1629,30 +1640,34 @@ async function bootstrapApp() {
                            };
                        }
                        
-                        // 处理思考内容 - 支持多种格式
                        const content = message.content || '';
+                        let reasoningText = (message.reasoning_content || '').trim();
+                        
+                        if (!reasoningText) {
                            const thinkPatterns = [
                                /<think>([\s\S]*?)<\/think>/g,
                                /<thinking>([\s\S]*?)<\/thinking>/g
                            ];
                            
-                        let allThinkingContent = '';
+                            let extracted = '';
                            for (const pattern of thinkPatterns) {
                                let match;
                                while ((match = pattern.exec(content)) !== null) {
-                                allThinkingContent += match[1].trim() + '\n';
+                                    extracted += (match[1] || '').trim() + '\n';
                                }
                            }
+                            reasoningText = extracted.trim();
+                        }
                        
-                        if (allThinkingContent) {
+                        if (reasoningText) {
                            currentAssistantMessage.actions.push({
                                id: `history-think-${Date.now()}-${Math.random()}`,
                                type: 'thinking',
-                                content: allThinkingContent.trim(),
+                                content: reasoningText,
                                streaming: false,
                                timestamp: Date.now()
                            });
-                            console.log('添加思考内容:', allThinkingContent.substring(0, 50) + '...');
+                            console.log('添加思考内容:', reasoningText.substring(0, 50) + '...');
                        }
                        
                        // 处理普通文本内容（移除思考标签后的内容）
@ -1660,10 +1675,15 @@ async function bootstrapApp() {
                        const appendPayloadMeta = metadata.append_payload;
                        const modifyPayloadMeta = metadata.modify_payload;
                        
-                        let textContent = content
+                        let textContent = content;
+                        if (!message.reasoning_content) {
+                            textContent = textContent
                                .replace(/<think>[\s\S]*?<\/think>/g, '')
                                .replace(/<thinking>[\s\S]*?<\/thinking>/g, '')
                                .trim();
+                        } else {
+                            textContent = textContent.trim();
+                        }
                            
                        if (appendPayloadMeta) {
                            currentAssistantMessage.actions.push({
@ -2081,6 +2101,24 @@ async function bootstrapApp() {
                }
            },
            
+            togglePanelMenu() {
+                this.panelMenuOpen = !this.panelMenuOpen;
+            },
+            
+            selectPanelMode(mode) {
+                if (this.panelMode === mode) {
+                    this.panelMenuOpen = false;
+                    return;
+                }
+                this.panelMode = mode;
+                this.panelMenuOpen = false;
+                if (mode === 'todo') {
+                    this.fetchTodoList();
+                } else if (mode === 'subAgents') {
+                    this.fetchSubAgents();
+                }
+            },
+
            formatTaskStatus(task) {
                if (!task) {
                    return '';
@ -2114,10 +2152,10 @@ async function bootstrapApp() {
                    return;
                }
                const { protocol, hostname } = window.location;
+                const base = `${protocol}//${hostname}:8092`;
                const parentConv = agent.conversation_id || this.currentConversationId || '';
                const convSegment = this.stripConversationPrefix(parentConv);
                const agentLabel = agent.agent_id ? `sub_agent${agent.agent_id}` : agent.task_id;
-                const base = `${protocol}//${hostname}:8092`;
                const pathSuffix = convSegment
                    ? `/${convSegment}+${agentLabel}`
                    : `/sub_agent/${agent.task_id}`;
@ -2332,6 +2370,17 @@ async function bootstrapApp() {
                }
            },
            
+            handleClickOutsidePanelMenu(event) {
+                if (!this.panelMenuOpen) {
+                    return;
+                }
+                const wrapper = this.$refs.panelMenuWrapper;
+                if (wrapper && wrapper.contains(event.target)) {
+                    return;
+                }
+                this.panelMenuOpen = false;
+            },
+
            applyToolSettingsSnapshot(categories) {
                if (!Array.isArray(categories)) {
                    return;
@ -2492,7 +2541,8 @@ async function bootstrapApp() {
                    'todo_finish': '🏁',
                    'todo_finish_confirm': '❗',
                    'create_sub_agent': '🤖',
-                    'wait_sub_agent': '⏳'
+                    'wait_sub_agent': '⏳',
+                    'close_sub_agent': '🛑'
                };
                return icons[toolName] || '⚙️';
            },
--- a/static/index.html
+++ b/static/index.html
@ -128,13 +128,29 @@
                <!-- 左侧文件树 -->
                <aside class="sidebar left-sidebar" :style="{ width: leftWidth + 'px' }">
                    <div class="sidebar-header">
+                        <div class="panel-menu-wrapper" ref="panelMenuWrapper">
                            <button class="sidebar-view-toggle"
-                                @click="cycleSidebarPanel"
-                                :title="panelMode === 'files' ? '查看待办列表' : (panelMode === 'todo' ? '查看子智能体' : '查看项目文件')">
-                            <span v-if="panelMode === 'files'">{{ todoEmoji }}</span>
-                            <span v-else-if="panelMode === 'todo'">🤖</span>
-                            <span v-else>{{ fileEmoji }}</span>
+                                    @click.stop="togglePanelMenu"
+                                    title="切换侧边栏">
+                                ☰
                            </button>
+                            <transition name="fade">
+                                <div class="panel-menu" v-if="panelMenuOpen">
+                                    <button type="button"
+                                            :class="{ active: panelMode === 'files' }"
+                                            @click.stop="selectPanelMode('files')"
+                                            title="项目文件">📁</button>
+                                    <button type="button"
+                                            :class="{ active: panelMode === 'todo' }"
+                                            @click.stop="selectPanelMode('todo')"
+                                            title="待办列表">{{ todoEmoji }}</button>
+                                    <button type="button"
+                                            :class="{ active: panelMode === 'subAgents' }"
+                                            @click.stop="selectPanelMode('subAgents')"
+                                            title="子智能体">🤖</button>
+                                </div>
+                            </transition>
+                        </div>
                        <button class="sidebar-manage-btn"
                                @click="openGuiFileManager"
                                title="打开桌面式文件管理器">
@ -525,10 +541,10 @@
                                                {{ compressing ? '压缩中...' : '压缩' }}
                                            </button>
                                <button type="button"
-                                                    class="menu-btn clear-entry"
-                                                    @click="clearChat"
+                                        class="menu-btn mode-entry"
+                                        @click="toggleNextThinkingMode"
                                        :disabled="streamingMessage || !isConnected">
-                                                清除
+                                    {{ nextThinkingMode ? '下一次: 思考模式' : '下一次: 快速模式' }}
                                </button>
                                        </div>
                                    </transition>
--- a/static/style.css
+++ b/static/style.css
@ -492,6 +492,49 @@ body {
    background: rgba(255, 255, 255, 0.95);
 }

+.panel-menu-wrapper {
+    position: relative;
+    display: inline-flex;
+    align-items: center;
+}
+
+.panel-menu {
+    position: absolute;
+    left: calc(100% + 8px);
+    top: 0;
+    display: flex;
+    gap: 6px;
+    background: rgba(255, 255, 255, 0.95);
+    border: 1px solid rgba(118, 103, 84, 0.2);
+    border-radius: 8px;
+    padding: 6px 8px;
+    box-shadow: 0 6px 18px rgba(61, 57, 41, 0.12);
+    z-index: 20;
+}
+
+.panel-menu button {
+    border: none;
+    background: transparent;
+    font-size: 18px;
+    cursor: pointer;
+    padding: 4px 6px;
+    border-radius: 6px;
+}
+
+.panel-menu button.active {
+    background: rgba(108, 92, 231, 0.1);
+}
+
+.fade-enter-active,
+.fade-leave-active {
+    transition: opacity 0.15s ease;
+}
+
+.fade-enter-from,
+.fade-leave-to {
+    opacity: 0;
+}
+
 .sidebar.right-sidebar.collapsed {
    width: 0 !important;
    min-width: 0 !important;
--- a/utils/api_client.py
+++ b/utils/api_client.py
@ -6,26 +6,47 @@ import json
 import asyncio
 from typing import List, Dict, Optional, AsyncGenerator
 try:
-    from config import API_BASE_URL, API_KEY, MODEL_ID, OUTPUT_FORMATS, DEFAULT_RESPONSE_MAX_TOKENS
+    from config import (
+        API_BASE_URL,
+        API_KEY,
+        MODEL_ID,
+        OUTPUT_FORMATS,
+        DEFAULT_RESPONSE_MAX_TOKENS,
+        THINKING_API_BASE_URL,
+        THINKING_API_KEY,
+        THINKING_MODEL_ID
+    )
 except ImportError:
    import sys
    from pathlib import Path
    project_root = Path(__file__).resolve().parents[1]
    if str(project_root) not in sys.path:
        sys.path.insert(0, str(project_root))
-    from config import API_BASE_URL, API_KEY, MODEL_ID, OUTPUT_FORMATS, DEFAULT_RESPONSE_MAX_TOKENS
+    from config import (
+        API_BASE_URL,
+        API_KEY,
+        MODEL_ID,
+        OUTPUT_FORMATS,
+        DEFAULT_RESPONSE_MAX_TOKENS,
+        THINKING_API_BASE_URL,
+        THINKING_API_KEY,
+        THINKING_MODEL_ID
+    )

 class DeepSeekClient:
    def __init__(self, thinking_mode: bool = True, web_mode: bool = False):
-        self.api_base_url = API_BASE_URL
-        self.api_key = API_KEY
-        self.model_id = MODEL_ID
+        self.fast_api_config = {
+            "base_url": API_BASE_URL,
+            "api_key": API_KEY,
+            "model_id": MODEL_ID
+        }
+        self.thinking_api_config = {
+            "base_url": THINKING_API_BASE_URL or API_BASE_URL,
+            "api_key": THINKING_API_KEY or API_KEY,
+            "model_id": THINKING_MODEL_ID or MODEL_ID
+        }
        self.thinking_mode = thinking_mode  # True=智能思考模式, False=快速模式
        self.web_mode = web_mode  # Web模式标志，用于禁用print输出
-        self.headers = {
-            "Authorization": f"Bearer {self.api_key}",
-            "Content-Type": "application/json"
-        }
        # 每个任务的独立状态
        self.current_task_first_call = True  # 当前任务是否是第一次调用
        self.current_task_thinking = ""  # 当前任务的思考内容
@ -103,6 +124,24 @@ class DeepSeekClient:
        self.current_task_first_call = True
        self.current_task_thinking = ""

+    def _build_headers(self, api_key: str) -> Dict[str, str]:
+        return {
+            "Authorization": f"Bearer {api_key}",
+            "Content-Type": "application/json"
+        }
+
+    def _select_api_config(self, use_thinking: bool) -> Dict[str, str]:
+        """
+        根据当前模式选择API配置，确保缺失字段回退到默认模型。
+        """
+        config = self.thinking_api_config if use_thinking else self.fast_api_config
+        fallback = self.fast_api_config
+        return {
+            "base_url": config.get("base_url") or fallback["base_url"],
+            "api_key": config.get("api_key") or fallback["api_key"],
+            "model_id": config.get("model_id") or fallback["model_id"]
+        }
+    
    def get_current_thinking_mode(self) -> bool:
        """获取当前应该使用的思考模式"""
        if not self.thinking_mode:
@ -203,6 +242,8 @@ class DeepSeekClient:
        
        # 决定是否使用思考模式
        current_thinking_mode = self.get_current_thinking_mode()
+        api_config = self._select_api_config(current_thinking_mode)
+        headers = self._build_headers(api_config["api_key"])
        
        # 如果是思考模式且不是当前任务的第一次，显示提示
        if self.thinking_mode and not self.current_task_first_call:
@ -216,12 +257,13 @@ class DeepSeekClient:
            max_tokens = 4096
        
        payload = {
-            "model": self.model_id,
+            "model": api_config["model_id"],
            "messages": messages,
            "stream": stream,
-            "thinking": {"type": "enabled" if current_thinking_mode else "disabled"},
            "max_tokens": max_tokens
        }
+        if current_thinking_mode:
+            payload["thinking"] = {"type": "enabled"}
        
        if tools:
            payload["tools"] = tools
@ -232,9 +274,9 @@ class DeepSeekClient:
                if stream:
                    async with client.stream(
                        "POST",
-                        f"{self.api_base_url}/chat/completions",
+                        f"{api_config['base_url']}/chat/completions",
                        json=payload,
-                        headers=self.headers
+                        headers=headers
                    ) as response:
                        # 检查响应状态
                        if response.status_code != 200:
@ -255,9 +297,9 @@ class DeepSeekClient:
                                    continue
                else:
                    response = await client.post(
-                        f"{self.api_base_url}/chat/completions",
+                        f"{api_config['base_url']}/chat/completions",
                        json=payload,
-                        headers=self.headers
+                        headers=headers
                    )
                    if response.status_code != 200:
                        error_text = response.text
@ -294,22 +336,6 @@ class DeepSeekClient:
        iteration = 0
        all_tool_results = []  # 记录所有工具调用结果
        
-        # 如果是思考模式且不是当前任务的第一次调用，注入本次任务的思考
-        # 注意：这里重置的是当前任务的第一次调用标志，确保新用户请求重新思考
-        # 只有在同一个任务的多轮迭代中才应该注入
-        # 对于新的用户请求，应该重新开始思考，而不是使用之前的思考内容
-        # 只有在当前任务有思考内容且不是第一次调用时才注入
-        if (self.thinking_mode and 
-            not self.current_task_first_call and 
-            self.current_task_thinking and
-            iteration == 0):  # 只在第一次迭代时注入，避免多次注入
-            # 在messages末尾添加一个系统消息，包含本次任务的思考
-            thinking_context = f"\n=== 📋 本次任务的思考 ===\n{self.current_task_thinking}\n=== 思考结束 ===\n提示：这是本次任务的初始思考，你可以基于此继续处理。"
-            messages.append({
-                "role": "system",
-                "content": thinking_context
-            })
-        
        while iteration < max_iterations:
            iteration += 1
            
@ -409,13 +435,13 @@ class DeepSeekClient:
            # 构建助手消息 - 始终包含所有收集到的内容
            assistant_content_parts = []
            
-            # 添加思考内容（如果有）
-            if current_thinking:
-                assistant_content_parts.append(f"<think>\n{current_thinking}\n</think>")
-            
            # 添加正式回复内容（如果有）
            if full_response:
                assistant_content_parts.append(full_response)
+            elif append_result["handled"] and append_result["assistant_content"]:
+                assistant_content_parts.append(append_result["assistant_content"])
+            elif modify_result["handled"] and modify_result.get("assistant_content"):
+                assistant_content_parts.append(modify_result["assistant_content"])
            
            # 添加工具调用说明
            if tool_calls:
@ -556,14 +582,6 @@ class DeepSeekClient:
        # 获取当前是否应该显示思考
        should_show_thinking = self.get_current_thinking_mode()
        
-        # 如果是思考模式且不是当前任务的第一次调用，注入本次任务的思考
-        if self.thinking_mode and not self.current_task_first_call and self.current_task_thinking:
-            thinking_context = f"\n=== 📋 本次任务的思考 ===\n{self.current_task_thinking}\n=== 思考结束 ===\n"
-            messages.append({
-                "role": "system",
-                "content": thinking_context
-            })
-        
        try:
            async for chunk in self.chat(messages, tools=None, stream=True):
                if "choices" not in chunk:
--- a/utils/context_manager.py
+++ b/utils/context_manager.py
@ -648,7 +648,8 @@ class ContextManager:
        tool_calls: Optional[List[Dict]] = None,
        tool_call_id: Optional[str] = None,
        name: Optional[str] = None,
-        metadata: Optional[Dict[str, Any]] = None
+        metadata: Optional[Dict[str, Any]] = None,
+        reasoning_content: Optional[str] = None
    ):
        """添加对话记录（改进版：集成自动保存 + 智能token统计）"""
        message = {
@ -660,6 +661,9 @@ class ContextManager:
        if metadata:
            message["metadata"] = metadata
        
+        if reasoning_content:
+            message["reasoning_content"] = reasoning_content
+        
        # 如果是assistant消息且有工具调用，保存完整格式
        if role == "assistant" and tool_calls:
            # 确保工具调用格式完整
--- a/web_server.py
+++ b/web_server.py
@ -1807,7 +1807,6 @@ async def handle_task_with_sender(terminal: WebTerminal, message, sender, client
    sender('ai_message_start', {})
    
    # 增量保存相关变量
-    has_saved_thinking = False  # 是否已保存思考内容
    accumulated_response = ""   # 累积的响应内容
    is_first_iteration = True   # 是否是第一次迭代
    
@ -2618,12 +2617,6 @@ async def handle_task_with_sender(terminal: WebTerminal, message, sender, client
                        sender('thinking_end', {'full_content': current_thinking})
                        await asyncio.sleep(0.1)
                        
-                        # ===== 增量保存：保存思考内容 =====
-                        if current_thinking and not has_saved_thinking and is_first_iteration:
-                            thinking_content = f"<think>\n{current_thinking}\n</think>"
-                            web_terminal.context_manager.add_conversation("assistant", thinking_content)
-                            has_saved_thinking = True
-                            debug_log(f"💾 增量保存：思考内容 ({len(current_thinking)} 字符)")
                    
                    expecting_modify = bool(pending_modify) or bool(getattr(web_terminal, "pending_modify_request", None))
                    expecting_append = bool(pending_append) or bool(getattr(web_terminal, "pending_append_request", None))
@ -2851,12 +2844,7 @@ async def handle_task_with_sender(terminal: WebTerminal, message, sender, client
        
        # === API响应完成后只计算输出token ===
        try:
-            # 计算AI输出的token（包括thinking、文本内容、工具调用）
-            ai_output_content = ""
-            if current_thinking:
-                ai_output_content += f"<think>\n{current_thinking}\n</think>\n"
-            if full_response:
-                ai_output_content += full_response
+            ai_output_content = full_response or append_result.get("assistant_content") or modify_result.get("assistant_content") or ""
            if tool_calls:
                ai_output_content += json.dumps(tool_calls, ensure_ascii=False)
            
@ -2892,12 +2880,6 @@ async def handle_task_with_sender(terminal: WebTerminal, message, sender, client
            sender('thinking_end', {'full_content': current_thinking})
            await asyncio.sleep(0.1)
            
-            # 保存思考内容
-            if current_thinking and not has_saved_thinking and is_first_iteration:
-                thinking_content = f"<think>\n{current_thinking}\n</think>"
-                web_terminal.context_manager.add_conversation("assistant", thinking_content)
-                has_saved_thinking = True
-                debug_log(f"💾 增量保存：延迟思考内容 ({len(current_thinking)} 字符)")
        
        # 确保text_end事件被发送
        if text_started and text_has_content and not append_result["handled"] and not modify_result["handled"]:
@ -2907,10 +2889,8 @@ async def handle_task_with_sender(terminal: WebTerminal, message, sender, client
            await asyncio.sleep(0.1)
            text_streaming = False
            
-            # ===== 增量保存：保存当前轮次的文本内容 =====
            if full_response.strip():
-                web_terminal.context_manager.add_conversation("assistant", full_response)
-                debug_log(f"💾 增量保存：文本内容 ({len(full_response)} 字符)")
+                debug_log(f"流式文本内容长度: {len(full_response)} 字符")
        
        if append_result["handled"]:
            append_metadata = append_result.get("assistant_metadata")
@ -3066,8 +3046,8 @@ async def handle_task_with_sender(terminal: WebTerminal, message, sender, client
            
        
        # 保存思考内容（如果这是第一次迭代且有思考）
-        if web_terminal.thinking_mode and web_terminal.api_client.current_task_first_call and current_thinking:
-            web_terminal.api_client.current_task_thinking = current_thinking
+        if web_terminal.thinking_mode and web_terminal.api_client.current_task_first_call:
+            web_terminal.api_client.current_task_thinking = current_thinking or ""
            web_terminal.api_client.current_task_first_call = False
        
        # 检测是否有格式错误的工具调用
@ -3101,9 +3081,6 @@ async def handle_task_with_sender(terminal: WebTerminal, message, sender, client
        # 构建助手消息（用于API继续对话）
        assistant_content_parts = []
        
-        if current_thinking:
-            assistant_content_parts.append(f"<think>\n{current_thinking}\n</think>")
-        
        if full_response:
            assistant_content_parts.append(full_response)
        elif append_result["handled"] and append_result["assistant_content"]:
@ -3122,6 +3099,14 @@ async def handle_task_with_sender(terminal: WebTerminal, message, sender, client
        
        messages.append(assistant_message)
        
+        if assistant_content or current_thinking:
+            web_terminal.context_manager.add_conversation(
+                "assistant",
+                assistant_content,
+                tool_calls=tool_calls if tool_calls else None,
+                reasoning_content=current_thinking or None
+            )
+        
        if append_result["handled"] and append_result.get("tool_content"):
            tool_call_id = append_result.get("tool_call_id") or f"append_{int(time.time() * 1000)}"
            system_notice = format_tool_result_notice("append_to_file", tool_call_id, append_result["tool_content"])