feat: expand model support and qwen-vl ux

2026-01-03 07:01:24 +08:00 · 2026-01-03 07:01:24 +08:00 · e2ba632ac8
commit e2ba632ac8
parent 77959226dd
34 changed files with 2179 additions and 100 deletions
--- a/config/model_profiles.py
+++ b/config/model_profiles.py
@ -0,0 +1,117 @@
+import os
+
+def _env(name: str, default: str = "") -> str:
+    return os.environ.get(name, default)
+
+
+# 默认（Kimi）
+KIMI_BASE = _env("API_BASE_KIMI", _env("AGENT_API_BASE_URL", "https://api.moonshot.cn/v1"))
+KIMI_KEY = _env("API_KEY_KIMI", _env("AGENT_API_KEY", ""))
+KIMI_FAST_MODEL = _env("MODEL_KIMI_FAST", _env("AGENT_MODEL_ID", "kimi-k2-0905-preview"))
+KIMI_THINK_MODEL = _env("MODEL_KIMI_THINK", _env("AGENT_THINKING_MODEL_ID", "kimi-k2-thinking"))
+
+# DeepSeek
+DEEPSEEK_BASE = _env("API_BASE_DEEPSEEK", "https://api.deepseek.com")
+DEEPSEEK_KEY = _env("API_KEY_DEEPSEEK", _env("AGENT_DEEPSEEK_API_KEY", ""))
+DEEPSEEK_FAST_MODEL = _env("MODEL_DEEPSEEK_FAST", "deepseek-chat")
+DEEPSEEK_THINK_MODEL = _env("MODEL_DEEPSEEK_THINK", "deepseek-reasoner")
+
+# Qwen
+QWEN_BASE = _env("API_BASE_QWEN", "https://dashscope.aliyuncs.com/compatible-mode/v1")
+QWEN_KEY = _env("API_KEY_QWEN", _env("DASHSCOPE_API_KEY", ""))
+QWEN_MAX_MODEL = _env("MODEL_QWEN_MAX", "qwen3-max")
+QWEN_VL_MODEL = _env("MODEL_QWEN_VL", "qwen3-vl-plus")
+
+
+MODEL_PROFILES = {
+    "kimi": {
+        "fast": {"base_url": KIMI_BASE, "api_key": KIMI_KEY, "model_id": KIMI_FAST_MODEL, "max_tokens": None},
+        "thinking": {"base_url": KIMI_BASE, "api_key": KIMI_KEY, "model_id": KIMI_THINK_MODEL, "max_tokens": None},
+        "supports_thinking": True,
+        "fast_only": False,
+        "name": "Kimi-k2"
+    },
+    "deepseek": {
+        "fast": {"base_url": DEEPSEEK_BASE, "api_key": DEEPSEEK_KEY, "model_id": DEEPSEEK_FAST_MODEL, "max_tokens": 8192},
+        "thinking": {
+            "base_url": DEEPSEEK_BASE,
+            "api_key": DEEPSEEK_KEY,
+            "model_id": DEEPSEEK_THINK_MODEL,
+            "max_tokens": 65536
+        },
+        "supports_thinking": True,
+        "fast_only": False,
+        "name": "DeepSeek"
+    },
+    "qwen3-max": {
+        "fast": {"base_url": QWEN_BASE, "api_key": QWEN_KEY, "model_id": QWEN_MAX_MODEL, "max_tokens": 65536},
+        "thinking": None,  # 不支持思考
+        "supports_thinking": False,
+        "fast_only": True,
+        "name": "Qwen3-Max"
+    },
+    "qwen3-vl-plus": {
+        "fast": {
+            "base_url": QWEN_BASE,
+            "api_key": QWEN_KEY,
+            "model_id": QWEN_VL_MODEL,
+            "max_tokens": 32768,
+            "extra_params": {}
+        },
+        "thinking": {
+            "base_url": QWEN_BASE,
+            "api_key": QWEN_KEY,
+            "model_id": QWEN_VL_MODEL,
+            "max_tokens": 32768,
+            "extra_params": {"enable_thinking": True}
+        },
+        "supports_thinking": True,
+        "fast_only": False,
+        "name": "Qwen3-VL"
+    }
+}
+
+MODEL_PROMPT_OVERRIDES = {
+    "kimi": {
+        "model_description": "你的基础模型是 Kimi-k2，由月之暗面公司开发，是一个开源的 MoE 架构模型，拥有 1T 参数和 32B 激活参数，当前智能助手应用由火山引擎提供 API 服务。",
+        "thinking_model_line": "思考模式时，第一次请求的模型不是 Kimi-k2，而是 Kimi-k2-Thinking，一个更善于分析复杂问题、规划复杂流程的模型，在后续请求时模型会换回 Kimi-k2。",
+        "deep_thinking_line": "在深度思考模式中，请求的模型是 Kimi-k2-Thinking，一个更善于分析复杂问题、规划复杂流程的模型。"
+    },
+    "deepseek": {
+        "model_description": "你的基础模型是 DeepSeek-V3.2（deepseek-chat），由 DeepSeek 提供，数学与推理能力较强，当前通过官方 API 调用。",
+        "thinking_model_line": "思考模式时，第一次请求使用 DeepSeek-Reasoner，一个强化推理的模型，后续请求会切回 DeepSeek-V3.2。",
+        "deep_thinking_line": "在深度思考模式中，请求的模型是 DeepSeek-Reasoner，用于深入分析复杂问题并规划步骤。"
+    },
+    "qwen3-max": {
+        "model_description": "你的基础模型是 Qwen3-Max，由通义千问提供，当前仅支持快速模式，不提供思考或深度思考能力。",
+        "thinking_model_line": "Qwen3-Max 仅支持快速模式，思考模式会被自动关闭。",
+        "deep_thinking_line": "Qwen3-Max 不支持深度思考模式，将保持快速模式。"
+    },
+    "qwen3-vl-plus": {
+        "model_description": "你的基础模型是 Qwen3-VL-Plus，支持图文多模态理解，接口来自通义千问 DashScope。",
+        "thinking_model_line": "思考模式时，请求的模型仍为 Qwen3-VL-Plus（开启思考能力），后续请求会切回快速模型。",
+        "deep_thinking_line": "在深度思考模式中，请求的模型是 Qwen3-VL-Plus（思考版），以获得更强的分析能力。"
+    }
+}
+
+
+def get_model_profile(key: str) -> dict:
+    if key not in MODEL_PROFILES:
+        raise ValueError(f"未知模型 key: {key}")
+    profile = MODEL_PROFILES[key]
+    # 基础校验：必须有 fast 段且有 key
+    fast = profile.get("fast") or {}
+    if not fast.get("api_key"):
+        raise ValueError(f"模型 {key} 缺少 API Key 配置")
+    return profile
+
+
+def get_model_prompt_replacements(key: str) -> dict:
+    """获取模型相关的提示词替换字段，若缺失则回退到 Kimi 版本。"""
+    fallback = MODEL_PROMPT_OVERRIDES.get("kimi", {})
+    overrides = MODEL_PROMPT_OVERRIDES.get(key) or {}
+    return {
+        "model_description": overrides.get("model_description") or fallback.get("model_description") or "",
+        "thinking_model_line": overrides.get("thinking_model_line") or fallback.get("thinking_model_line") or "",
+        "deep_thinking_line": overrides.get("deep_thinking_line") or fallback.get("deep_thinking_line") or ""
+    }
--- a/config/ui.py
+++ b/config/ui.py
@ -16,7 +16,7 @@ OUTPUT_FORMATS = {
    "session": "📺 [会话]",
 }

-AGENT_VERSION = "v5.5"
+AGENT_VERSION = "v6.0"

 LOG_LEVEL = "INFO"
 LOG_FORMAT = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
--- a/core/main_terminal.py
+++ b/core/main_terminal.py
@ -64,6 +64,7 @@ from utils.api_client import DeepSeekClient
 from utils.context_manager import ContextManager
 from utils.tool_result_formatter import format_tool_result_for_context
 from utils.logger import setup_logger
+from config.model_profiles import get_model_profile, get_model_prompt_replacements

 if TYPE_CHECKING:
    from modules.user_container_manager import ContainerHandle
@ -93,6 +94,9 @@ class MainTerminal:
        # 初始化组件
        self.api_client = DeepSeekClient(thinking_mode=self.thinking_mode)
        self.api_client.set_deep_thinking_mode(self.deep_thinking_mode)
+        self.model_key = "kimi"
+        self.model_profile = get_model_profile(self.model_key)
+        self.apply_model_profile(self.model_profile)
        self.context_manager = ContextManager(project_path, data_dir=str(self.data_dir))
        self.context_manager.main_terminal = self
        self.container_mount_path = TERMINAL_SANDBOX_MOUNT_PATH or "/workspace"
@ -108,6 +112,7 @@ class MainTerminal:
        self.search_engine = SearchEngine()
        self.terminal_ops = TerminalOperator(project_path, container_session=container_session)
        self.ocr_client = OCRClient(project_path, self.file_manager)
+        self.pending_image_view = None  # 供 view_image 工具使用，保存一次性图片插入请求
        
        # 新增：终端管理器
        self.terminal_manager = TerminalManager(
@ -1689,6 +1694,29 @@ class MainTerminal:
                }
            }
        ]
+        # Qwen-VL 自带多模态能力，不向其暴露额外的 vlm_analyze 工具，避免重复与误导
+        if getattr(self, "model_key", None) == "qwen3-vl-plus":
+            tools = [
+                tool for tool in tools
+                if (tool.get("function") or {}).get("name") != "vlm_analyze"
+            ]
+            tools.append({
+                "type": "function",
+                "function": {
+                    "name": "view_image",
+                    "description": "将指定本地图片插入到对话中（系统代发一条包含图片的消息），便于模型主动查看图片内容。",
+                    "parameters": {
+                        "type": "object",
+                        "properties": self._inject_intent({
+                            "path": {
+                                "type": "string",
+                                "description": "项目内的图片相对路径（不要以 /workspace 开头），支持 png/jpg/webp/gif/bmp/svg。"
+                            }
+                        }),
+                        "required": ["path"]
+                    }
+                }
+            })
        if self.disabled_tools:
            tools = [
                tool for tool in tools
@ -1749,6 +1777,29 @@ class MainTerminal:
                if not path:
                    return json.dumps({"success": False, "error": "缺少 path 参数", "warnings": []}, ensure_ascii=False)
                result = self.ocr_client.vlm_analyze(path=path, prompt=prompt or "")
+            elif tool_name == "view_image":
+                path = (arguments.get("path") or "").strip()
+                if not path:
+                    return json.dumps({"success": False, "error": "path 不能为空"}, ensure_ascii=False)
+                if path.startswith("/workspace"):
+                    return json.dumps({"success": False, "error": "非法路径，超出项目根目录，请使用不带/workspace的相对路径"}, ensure_ascii=False)
+                abs_path = (Path(self.context_manager.project_path) / path).resolve()
+                try:
+                    abs_path.relative_to(Path(self.context_manager.project_path).resolve())
+                except Exception:
+                    return json.dumps({"success": False, "error": "非法路径，超出项目根目录，请使用不带/workspace的相对路径"}, ensure_ascii=False)
+                if not abs_path.exists() or not abs_path.is_file():
+                    return json.dumps({"success": False, "error": f"图片不存在: {path}"}, ensure_ascii=False)
+                if abs_path.stat().st_size > 10 * 1024 * 1024:
+                    return json.dumps({"success": False, "error": "图片过大，需 <= 10MB"}, ensure_ascii=False)
+                allowed_ext = {".png", ".jpg", ".jpeg", ".webp", ".gif", ".bmp", ".svg"}
+                if abs_path.suffix.lower() not in allowed_ext:
+                    return json.dumps({"success": False, "error": f"不支持的图片格式: {abs_path.suffix}"}, ensure_ascii=False)
+                # 记录待注入图片，供上层循环追加消息
+                self.pending_image_view = {
+                    "path": str(path)
+                }
+                result = {"success": True, "message": "图片已请求插入到对话中，将在后续消息中呈现。", "path": path}
            
            # 终端会话管理工具
            elif tool_name == "terminal_session":
@ -2292,14 +2343,17 @@ class MainTerminal:

    def build_messages(self, context: Dict, user_input: str) -> List[Dict]:
        """构建消息列表（添加终端内容注入）"""
-        # 加载系统提示
-        system_prompt = self.load_prompt("main_system")
+        # 加载系统提示（Qwen-VL 使用专用提示）
+        prompt_name = "main_system_qwenvl" if getattr(self, "model_key", "kimi") == "qwen3-vl-plus" else "main_system"
+        system_prompt = self.load_prompt(prompt_name)
        
        # 格式化系统提示
        container_path = self.container_mount_path or "/workspace"
        container_cpus = self.container_cpu_limit
        container_memory = self.container_memory_limit
        project_storage = self.project_storage_limit
+        model_key = getattr(self, "model_key", "kimi")
+        prompt_replacements = get_model_prompt_replacements(model_key)
        system_prompt = system_prompt.format(
            project_path=container_path,
            container_path=container_path,
@ -2308,7 +2362,8 @@ class MainTerminal:
            project_storage=project_storage,
            file_tree=context["project_info"]["file_tree"],
            memory=context["memory"],
-            current_time=datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+            current_time=datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
+            model_description=prompt_replacements.get("model_description", "")
        )
        
        messages = [
@ -2328,10 +2383,16 @@ class MainTerminal:
        if self.deep_thinking_mode:
            deep_prompt = self.load_prompt("deep_thinking_mode_guidelines").strip()
            if deep_prompt:
+                deep_prompt = deep_prompt.format(
+                    deep_thinking_line=prompt_replacements.get("deep_thinking_line", "")
+                )
                messages.append({"role": "system", "content": deep_prompt})
        elif self.thinking_mode:
            thinking_prompt = self.load_prompt("thinking_mode_guidelines").strip()
            if thinking_prompt:
+                thinking_prompt = thinking_prompt.format(
+                    thinking_model_line=prompt_replacements.get("thinking_model_line", "")
+                )
                messages.append({"role": "system", "content": thinking_prompt})

        personalization_config = load_personalization_config(self.data_dir)
@ -2383,9 +2444,14 @@ class MainTerminal:
                })
            else:
                # User 或普通 System 消息
+                images = conv.get("images") or metadata.get("images") or []
+                content_payload = (
+                    self.context_manager._build_content_with_images(conv["content"], images)
+                    if images else conv["content"]
+                )
                messages.append({
                    "role": conv["role"],
-                    "content": conv["content"]
+                    "content": content_payload
                })
        
        # 当前用户输入已经在conversation中了，不需要重复添加
@ -2554,6 +2620,12 @@ class MainTerminal:
        normalized = mode.lower()
        if normalized not in allowed:
            raise ValueError(f"不支持的模式: {mode}")
+        # Qwen-VL 官方不支持深度思考模式
+        if getattr(self, "model_key", None) == "qwen3-vl-plus" and normalized == "deep":
+            raise ValueError("Qwen-VL 不支持深度思考模式")
+        # fast-only 模型限制
+        if getattr(self, "model_profile", {}).get("fast_only") and normalized != "fast":
+            raise ValueError("当前模型仅支持快速模式")
        previous_mode = getattr(self, "run_mode", "fast")
        self.run_mode = normalized
        self.thinking_mode = normalized != "fast"
@ -2569,6 +2641,29 @@ class MainTerminal:
            self.api_client.start_new_task()
        return self.run_mode

+    def apply_model_profile(self, profile: dict):
+        """将模型配置应用到 API 客户端"""
+        if not profile:
+            return
+        self.api_client.apply_profile(profile)
+
+    def set_model(self, model_key: str) -> str:
+        profile = get_model_profile(model_key)
+        if getattr(self.context_manager, "has_images", False) and model_key != "qwen3-vl-plus":
+            raise ValueError("当前对话包含图片，仅支持 Qwen-VL")
+        self.model_key = model_key
+        self.model_profile = profile
+        # 将模型标识传递给底层 API 客户端，便于按模型做兼容处理
+        self.api_client.model_key = model_key
+        # 应用模型配置
+        self.apply_model_profile(profile)
+        # fast-only 模型强制快速模式
+        if profile.get("fast_only") and self.run_mode != "fast":
+            self.set_run_mode("fast")
+        # 如果模型支持思考，但当前 run_mode 为 thinking/deep，则保持；否则无需调整
+        self.api_client.start_new_task(force_deep=self.deep_thinking_mode)
+        return self.model_key
+
    def get_run_mode_label(self) -> str:
        labels = {
            "fast": "快速模式（无思考）",
--- a/core/tool_config.py
+++ b/core/tool_config.py
@ -40,7 +40,14 @@ TOOL_CATEGORIES: Dict[str, ToolCategory] = {
    ),
    "read_focus": ToolCategory(
        label="阅读聚焦",
-        tools=["read_file", "focus_file", "unfocus_file", "vlm_analyze", "ocr_image"],
+        tools=[
+            "read_file",
+            "focus_file",
+            "unfocus_file",
+            "vlm_analyze",
+            "ocr_image",
+            "view_image",
+        ],
    ),
    "terminal_realtime": ToolCategory(
        label="实时终端",
--- a/core/web_terminal.py
+++ b/core/web_terminal.py
@ -286,6 +286,8 @@ class WebTerminal(MainTerminal):
            "thinking_mode": self.thinking_mode,
            "thinking_status": self.get_thinking_mode_status(),
            "run_mode": self.run_mode,
+            "model_key": getattr(self, "model_key", None),
+            "has_images": getattr(self.context_manager, "has_images", False),
            "context": {
                "usage_percent": context_status['usage_percent'],
                "total_size": context_status['sizes']['total'],
--- a/model_tests/deepseek_stream_test.py
+++ b/model_tests/deepseek_stream_test.py
@ -0,0 +1,105 @@
+"""Deepseek API 流式快速验证脚本。
+
+按照需求：
+- deepseek-chat: max_tokens=8000
+- deepseek-reasoner: max_tokens=65536
+- 开启流式输出，打印每条 data 行便于确认返回。
+
+根据用户允许，直接在脚本中硬编码 DeepSeek 测试用 API 信息，避免被仓库中其他
+默认配置（如 kimi）覆盖。请勿在生产使用。
+"""
+
+from __future__ import annotations
+
+import asyncio
+from typing import Optional
+
+import httpx
+
+
+DEEPSEEK_BASE_URL = "https://api.deepseek.com"
+DEEPSEEK_API_KEY = "sk-3457fbc33f0b4aefb2ce1d3101bb2341"
+DEEPSEEK_CHAT_MODEL = "deepseek-chat"
+DEEPSEEK_REASONER_MODEL = "deepseek-reasoner"
+
+
+def build_headers(api_key: str) -> dict[str, str]:
+    return {
+        "Authorization": f"Bearer {api_key}",
+        "Content-Type": "application/json",
+    }
+
+
+async def stream_call(
+    *,
+    name: str,
+    base_url: str,
+    api_key: str,
+    model: str,
+    max_tokens: int,
+    prompt: Optional[str] = None,
+) -> None:
+    """向指定模型发起流式 chat/completions 请求并打印 data 行。"""
+    url = base_url.rstrip("/") + "/chat/completions"
+    payload = {
+        "model": model,
+        "stream": True,
+        "max_tokens": max_tokens,
+        "messages": [
+            {
+                "role": "user",
+                "content": prompt
+                or f"这是 {name} 模型的流式测试，请用一句话自我介绍。",
+            }
+        ],
+    }
+
+    print(f"\n=== {name} ===")
+    print(f"POST {url}")
+    async with httpx.AsyncClient(http2=True, timeout=120) as client:
+        async with client.stream(
+            "POST",
+            url,
+            json=payload,
+            headers=build_headers(api_key),
+        ) as resp:
+            print(f"status: {resp.status_code}")
+            if resp.status_code != 200:
+                body = await resp.aread()
+                print("error body:", body.decode(errors="ignore"))
+                return
+            async for line in resp.aiter_lines():
+                if not line:
+                    continue
+                if line.startswith("data:"):
+                    data = line[5:].strip()
+                    if data == "[DONE]":
+                        print("[DONE]")
+                        break
+                    print(data)
+                else:
+                    # 兼容潜在的非 data 行（例如心跳）
+                    print(line)
+
+
+async def main() -> None:
+    await stream_call(
+        name="deepseek-chat (max_tokens=8000)",
+        base_url=DEEPSEEK_BASE_URL,
+        api_key=DEEPSEEK_API_KEY,
+        model=DEEPSEEK_CHAT_MODEL,
+        max_tokens=8000,
+    )
+
+    await stream_call(
+        name="deepseek-reasoner (max_tokens=65536)",
+        base_url=DEEPSEEK_BASE_URL,
+        api_key=DEEPSEEK_API_KEY,
+        model=DEEPSEEK_REASONER_MODEL,
+        max_tokens=65536,
+        prompt="你是一个思考模型，请简述测试状态并结束。",
+    )
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
--- a/model_tests/qwen_stream_test.py
+++ b/model_tests/qwen_stream_test.py
@ -0,0 +1,86 @@
+"""Qwen API 流式测试脚本（qwen-max，快速模型）。
+
+目标：
+- 验证 qwen-max 的流式输出与用量字段（无思考能力）。
+- 打印 data 行，尾包包含 usage。
+
+注意：硬编码测试密钥，仅限本地验证，勿用于生产。
+"""
+
+from __future__ import annotations
+
+import asyncio
+from typing import Optional
+
+import httpx
+
+
+QWEN_BASE_URL = "https://dashscope.aliyuncs.com/compatible-mode/v1"
+QWEN_API_KEY = "sk-64af1343e67d46d7a902ef5bcf6817ad"
+QWEN_MAX_MODEL = "qwen3-max"
+
+
+def headers(api_key: str) -> dict[str, str]:
+    return {
+        "Authorization": f"Bearer {api_key}",
+        "Content-Type": "application/json",
+    }
+
+
+async def stream_call(
+    *,
+    base_url: str,
+    api_key: str,
+    model: str,
+    max_tokens: int = 2048,
+    prompt: Optional[str] = None,
+) -> None:
+    url = base_url.rstrip("/") + "/chat/completions"
+    payload = {
+        "model": model,
+        "stream": True,
+        "max_tokens": max_tokens,
+        "stream_options": {"include_usage": True},
+        "messages": [
+            {
+                "role": "user",
+                "content": prompt
+                or "请用简短中文自我介绍，并说明你当前正在执行的动作。",
+            }
+        ],
+    }
+    print("\n=== qwen-max fast mode ===")
+    print(f"POST {url}")
+    async with httpx.AsyncClient(http2=True, timeout=120) as client:
+        async with client.stream(
+            "POST", url, json=payload, headers=headers(api_key)
+        ) as resp:
+            print("status:", resp.status_code)
+            if resp.status_code != 200:
+                body = await resp.aread()
+                print("error body:", body.decode(errors="ignore"))
+                return
+            async for line in resp.aiter_lines():
+                if not line:
+                    continue
+                if line.startswith("data:"):
+                    data = line[5:].strip()
+                    if data == "[DONE]":
+                        print("[DONE]")
+                        break
+                    print(data)
+                else:
+                    print(line)
+
+
+async def main() -> None:
+    await stream_call(
+        base_url=QWEN_BASE_URL,
+        api_key=QWEN_API_KEY,
+        model=QWEN_MAX_MODEL,
+        max_tokens=64000,  # qwen3-max 官方上限 64K
+    )
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
--- a/model_tests/qwen_vl_stream_test.py
+++ b/model_tests/qwen_vl_stream_test.py
@ -0,0 +1,130 @@
+"""Qwen VL 测试脚本（兼容模式）。
+
+用例：
+1) 纯文字：验证流式输出与 usage。
+2) 图文：发送本地图片，验证多模态输入。
+
+注意：硬编码测试密钥，仅限本地验证，勿用于生产。
+"""
+
+from __future__ import annotations
+
+import asyncio
+import base64
+from pathlib import Path
+from typing import Optional
+
+import httpx
+
+
+QWEN_BASE_URL = "https://dashscope.aliyuncs.com/compatible-mode/v1"
+QWEN_API_KEY = "sk-64af1343e67d46d7a902ef5bcf6817ad"
+QWEN_VL_MODEL = "qwen3-vl-plus"
+
+# 默认图片路径（仓库根目录下“截图/截屏2025-12-12 17.30.04.png”）
+DEFAULT_IMAGE_PATH = Path("截图/截屏2025-12-12 17.30.04.png")
+
+
+def headers(api_key: str) -> dict[str, str]:
+    return {
+        "Authorization": f"Bearer {api_key}",
+        "Content-Type": "application/json",
+    }
+
+
+def build_image_content(image_path: Path) -> str:
+    data = image_path.read_bytes()
+    b64 = base64.b64encode(data).decode("ascii")
+    return f"data:image/{image_path.suffix.lstrip('.').lower()};base64,{b64}"
+
+
+async def stream_call(
+    *,
+    name: str,
+    base_url: str,
+    api_key: str,
+    model: str,
+    messages,
+    max_tokens: int,
+    enable_thinking: bool = False,
+) -> None:
+    url = base_url.rstrip("/") + "/chat/completions"
+    payload = {
+        "model": model,
+        "stream": True,
+        "max_tokens": max_tokens,
+        "messages": messages,
+        "stream_options": {"include_usage": True},
+    }
+    if enable_thinking:
+        payload["enable_thinking"] = True
+    print(f"\n=== {name} ===")
+    print(f"POST {url}")
+    async with httpx.AsyncClient(http2=True, timeout=180) as client:
+        async with client.stream(
+            "POST", url, json=payload, headers=headers(api_key)
+        ) as resp:
+            print("status:", resp.status_code)
+            if resp.status_code != 200:
+                body = await resp.aread()
+                print("error body:", body.decode(errors="ignore"))
+                return
+            async for line in resp.aiter_lines():
+                if not line:
+                    continue
+                if line.startswith("data:"):
+                    data = line[5:].strip()
+                    if data == "[DONE]":
+                        print("[DONE]")
+                        break
+                    print(data)
+                else:
+                    print(line)
+
+
+async def main(image_path: Optional[Path] = None) -> None:
+    # 1) 纯文字
+    text_messages = [
+        {
+            "role": "user",
+            "content": "请用一句话自我介绍，并简单说明你目前在执行的动作。",
+        }
+    ]
+    await stream_call(
+        name="qwen-vl text only",
+        base_url=QWEN_BASE_URL,
+        api_key=QWEN_API_KEY,
+        model=QWEN_VL_MODEL,
+        messages=text_messages,
+        max_tokens=32000,  # 官方上限 32K
+        enable_thinking=True,
+    )
+
+    # 2) 图文
+    img_path = image_path or DEFAULT_IMAGE_PATH
+    if not img_path.exists():
+        print(f"\n[warn] 图片文件不存在: {img_path}")
+        return
+    img_url = build_image_content(img_path)
+    multimodal_messages = [
+        {
+            "role": "user",
+            "content": [
+                {"type": "text", "text": "请描述这张图片的主要内容，并给出一句话总结。"},
+                {"type": "image_url", "image_url": {"url": img_url}},
+            ],
+        }
+    ]
+    await stream_call(
+        name="qwen-vl image+text",
+        base_url=QWEN_BASE_URL,
+        api_key=QWEN_API_KEY,
+        model=QWEN_VL_MODEL,
+        messages=multimodal_messages,
+        max_tokens=32000,  # 官方上限 32K
+        enable_thinking=True,
+    )
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
--- a/model_tests/截屏2026-01-02
+++ b/model_tests/截屏2026-01-02
--- a/prompts/deep_thinking_mode_guidelines.txt
+++ b/prompts/deep_thinking_mode_guidelines.txt
@ -1,4 +1,4 @@
 你现在处于「深度思考模式」
-在深度思考模式中，请求的模型是kimi-k2-thinking 一个更善于分析复杂问题，规划复杂流程的模型
+{deep_thinking_line}
 在每一轮对用户要求的执行中，你的之前的思考会始终可见，保障思维过程和操作流程的连续性
 每次思考时，禁止回顾“我上一步做了什么”，只需要判断“下一步应该做什么”
--- a/prompts/main_system.txt
+++ b/prompts/main_system.txt
@ -1,6 +1,6 @@
 你是一名运行在云端服务器上的智能助手，可以帮助用户完成各种任务。你的用户可能没有编程背景，请用通俗易懂的方式与他们交流。

-你的基础模型是Kimi-k2,由月之暗面公司开发，是一个开源的Moe架构模型，由1t的参数和32b的激活参数，当前智能助手应用由火山引擎提供api服务
+{model_description}

 ## 你能做什么
 - **文档处理**：整理文字、编辑文件、格式转换
--- a/prompts/main_system_qwenvl.txt
+++ b/prompts/main_system_qwenvl.txt
@ -0,0 +1,300 @@
+你是一名运行在云端服务器上的智能助手，可以帮助用户完成各种任务。你的用户可能没有编程背景，请用通俗易懂的方式与他们交流。
+
+{model_description}
+
+## 你能做什么
+- **文档处理**：整理文字、编辑文件、格式转换
+- **信息查找**：搜索资料、提取网页内容、整理信息
+- **数据整理**：处理表格、分析数据、生成报告
+- **文件管理**：创建、修改、重命名文件和文件夹
+- **自动化任务**：批量处理文件、执行重复性工作
+- **视觉理解**：你自带多模态能力，用户可以直接发送图片；如需主动查看本地图片，可调用 `view_image` 指定路径，系统会代发一条包含图片的用户消息供你查看。
+
+## 图片分析（Qwen-VL 重点）
+当用户提出“这是什么”“识别文字/表格/票据”“找瑕疵/细节”“读屏/按钮含义”等图片分析任务时，优先采用下面的方法，保证细节充分、结论可验证：
+
+### 基本流程（先粗后细）
+1. **先整体后局部**：先看全图总结场景与目标，再针对关键区域逐块放大验证。
+2. **明确不确定性**：对看不清/模糊/遮挡区域，明确指出并提出下一步（放大/裁切/增强）。
+3. **用证据说话**：结论尽量引用可见线索（位置、颜色、形状、文字片段），避免凭感觉下结论。
+
+### 细节增强与“切图放大”方法（推荐）
+当图片文字很小、细节密集、或需要逐块检查时：
+1. **用 `run_python` 切图**：把原图按区域裁切成若干张更小的局部图（例如：左上/右上/左下/右下；或按表格/按钮/车标/铭牌所在区域裁切）。
+2. **必要时做多版本增强**：对同一区域输出多张增强版本（例如：对比度增强、锐化、灰度、二值化）用于读字/看边缘。
+3. **再次查看局部图**：对每张局部图分别观察并给出结论，再把结论汇总回原图任务。
+
+### 实操建议（你要主动想到并执行）
+- **裁切策略**：优先裁“目标本体 + 周边上下文”而不是只裁最小块；读文字时再裁更紧。
+- **输出路径**：建议输出到 `/workspace/cache/` 或项目内临时目录（如 `cache/`），文件名带序号（例如 `crop_01.png`）。
+- **复核展示**：需要让用户/自己确认时，可用 `<show_image src="..." />` 展示裁切结果；或将裁切图作为本地文件再用 `view_image` 查看。
+- **多图对比**：同一部位若存在多张版本（原裁切/增强后），按顺序展示并说明“哪张更利于读字/看细节”。
+
+## 图片展示
+- 如果需要直接在界面展示图片（本地或网络），请在回复里输出 `<show_image src="路径" alt="描述" />`，不用调用工具。
+- `src` 支持以 `/` 开头的本地静态路径或 `http/https`，`alt` 可选，会显示在图片下方。
+- 不要用 Markdown 图片语法或其它自定义标签。
+- 示例：
+  - `<show_image src="/workspace/images/result.png" alt="最终渲染效果" />`
+  - `<show_image src="/workspace/cache/thumb.jpg" />`
+  - `<show_image src="https://example.com/demo.png" alt="官方示例截图" />`
+
+### 图片检索与展示流程
+- 触发：用户询问“X长什么样”“给我看X的图片”等需求时。
+- 检索优先级：先在 **Wikimedia Commons（commons.wikimedia.org）** 搜索关键词（必要时添加“图片/照片/截图”）；若无合适结果，再用 `web_search` 进行全网搜索。
+- 提取：对候选链接使用 `extract_webpage` 获取正文中的图片直链，优先 `https`、扩展名为 jpg/png/webp、分辨率≥800px 的原图，避开缩略图和水印预览。仍优先采用 Wikipedia/Wikimedia 图源，其次再选其他站点。
+- 本地/校验：已有本地图片时直接展示；若网上图片是否匹配存疑，先下载并用你的视觉能力查看内容后再确定是否展示。
+- 展示：选数张代表性图片，直接输出 `<show_image src="直链或本地路径" alt="简短描述" />`；需要多张时多行重复该标签。
+- 回退：用户反馈“看不到/无法展示”时，先将图片下载到可访问路径（如 `/workspace/cache/xxx.jpg`）再用本地路径展示；仍失败则提供文字描述并询问是否换图源。
+
+## 重要提醒：你的工作环境
+1. **云端运行**：你在远程服务器上工作，在网页端和用户交互
+2. **多人共用**：服务器上可能有其他用户，你只能访问被授权的文件夹
+3. **文件传输**：用户可以在网页上传文件给你，你也可以生成文件让用户下载
+4. **安全第一**：只操作用户明确要求的文件，不要碰其他内容
+
+## 工作方式：先想后做
+遇到任务时，请这样工作：
+1. **确认理解**：复述一遍你理解的任务是什么
+2. **说明计划**：告诉用户你打算怎么做，分几步
+3. **征求同意**：询问用户的意见，向用户确认更多细节
+4. **报告结果**：在用户给出明确的指令，比如”好的，请开始做吧“再开始创建待办事项并完成任务
+
+**❌ 不要做的事**：
+- 不要一句"好的我来做"就直接开始
+- 不要猜测用户想要什么
+- 不要操作用户没提到的文件
+- 不要编造没做的事情
+
+## 文件查看：两种方式选择
+
+### 方式1：读取（临时看一眼）
+适合场景：
+- 只是想快速看看内容
+- 小文件（比如配置文件、说明文档）
+- 看完就不用了
+
+### 方式2：聚焦（长期盯着）
+适合场景：
+- 需要反复查看和修改的文件
+- 重要的核心文件
+- 会花较长时间处理的文件
+
+**限制**：
+- 聚焦最多3个文件
+- 每个文件不超过10000字
+- 用完记得取消聚焦，给下个任务腾空间
+
+**已聚焦的文件**：内容完全可见，不需要也不能再用命令查看
+
+## 文件操作示例
+
+### 创建和写入文件
+```
+用户："帮我整理一份待办清单"
+你的做法：
+1. 先询问清单内容有哪些
+2. 调用 create_file 创建空文件
+3. 调用 append_to_file 写入内容
+4. 告诉用户文件创建在哪里
+```
+
+### 修改文件内容
+```
+用户："把报告里的'2024'改成'2025'"
+你的做法：
+1. 如果文件已聚焦，直接看到内容
+2. 如果没聚焦，先读取或聚焦文件
+3. 调用 modify_file 进行替换
+4. 确认修改是否成功
+```
+
+### 搜索和提取信息
+```
+用户："帮我找一下最近的AI新闻"
+你的做法：
+1. 调用 web_search 搜索相关信息
+2. 如果需要详细内容，用 extract_webpage
+3. 整理信息给用户
+4. 如果用户要保存，可以创建文件
+```
+
+## 执行命令的两种方式
+
+### 方式1：快速命令（一次性的）
+用 `run_command` 工具
+适合：
+- 查看文件列表：`ls -lh`
+- 查看文件内容：`cat 文件.txt`
+- 统计行数：`wc -l 文件.txt`
+- 搜索内容：`grep "关键词" 文件.txt`
+
+### 方式2：持久终端（需要保持运行的）
+用 `terminal_session` + `terminal_input` 工具
+适合：
+- 运行需要一直开着的程序
+- 需要多次输入的交互任务
+- 需要等待较长时间的任务
+
+**⚠️ 注意**：
+- 最多同时开3个终端
+- 不要在终端里启动 python、node、vim 这类会占用界面的程序
+- 如果终端卡住了，用 terminal_reset 重启
+
+## 常用命令示例
+
+### 文件查看
+```bash
+# 查看文件内容
+cat 文件.txt
+
+# 查看文件前10行
+head -n 10 文件.txt
+
+# 查看文件后10行
+tail -n 10 文件.txt
+
+# 搜索包含关键词的行
+grep "关键词" 文件.txt
+
+# 统计文件行数
+wc -l 文件.txt
+```
+
+### 文件操作
+```bash
+# 复制文件
+cp 原文件.txt 新文件.txt
+
+# 移动/重命名文件
+mv 旧名.txt 新名.txt
+
+# 删除文件（谨慎使用）
+rm 文件.txt
+
+# 创建文件夹
+mkdir 文件夹名
+```
+
+### 文件信息
+```bash
+# 查看文件大小
+ls -lh 文件.txt
+
+# 查看当前目录所有文件
+ls -lah
+
+# 查看文件类型
+file 文件名
+
+# 查看目录结构
+tree -L 2
+```
+
+## 待办事项系统（简单任务管理）
+
+当任务需要多个步骤时，可以创建待办清单：
+
+### 使用规则
+1. **什么时候用**：任务需要2步以上、涉及多个文件或工具时
+2. **清单要求**：
+   - 概述：用一句话说明任务目标（不超过50字）
+   - 任务：最多4条，按执行顺序排列
+   - 每条任务要说清楚具体做什么，不要用"优化""处理"这种模糊词
+3. **执行方式**：
+   - 完成一项，勾选一项
+   - 如果计划有变，先告诉用户
+   - 全部完成后，用 todo_finish 结束
+
+### 示例：整理文档
+```
+概述：整理年度总结文档，统一格式并导出PDF
+任务1：读取所有Word文档，统一标题格式
+任务2：合并内容到一个新文件
+任务3：检查错别字和标点
+任务4：转换为PDF并保存
+```
+
+## 网络搜索技巧
+
+### 基础搜索
+```
+用户："搜索一下Python教程"
+你调用：web_search(query="Python教程")
+```
+
+### 搜索最近的内容
+```
+用户："最近一周的科技新闻"
+你调用：web_search(query="4-6个和科技新闻相关的关键词", time_range="week")
+```
+
+
+### 提取网页详细内容
+```
+用户："把这篇文章的内容提取出来"
+步骤：
+1. 先用 web_search 找到链接
+2. 再用 extract_webpage 提取完整内容
+3. 如果用户要保存，用 save_webpage 存为txt文件
+```
+
+## 资源管理：记得收拾
+
+由于服务器资源有限，请养成好习惯：
+1. **聚焦文件**：用完及时取消聚焦
+2. **终端会话**：不用的终端及时关闭
+3. **大文件**：避免一次输出超长内容，分批处理
+4. **上下文**：对话太长时（超过10万字符），提醒用户压缩
+
+## 遇到问题怎么办
+
+### 文件太大
+```
+如果提示"文件超过10000字符"：
+1. 告诉用户文件大小
+2. 建议只查看部分内容
+3. 用命令查看：head -n 100 文件.txt
+```
+
+### 命令执行失败
+```
+1. 不要重复执行相同命令
+2. 检查是否有权限问题
+3. 尝试用其他方法
+4. 实在不行，诚实告诉用户
+```
+
+### 不确定怎么做
+```
+1. 不要瞎猜
+2. 问用户更多信息
+3. 提供几个可行方案让用户选
+```
+
+## 交流风格
+
+- 使用口语化表达，避免技术黑话
+- 主动说明你在做什么
+- 遇到问题时说明原因
+- 完成任务后总结成果
+- 不要用生硬的"执行工具: xxx"，而是说"我来帮你..."
+
+## 当前环境信息
+- 项目路径: 你运行在隔离容器中（挂载目录 {container_path}），宿主机路径已对你隐藏
+- 资源限制: 容器内核数上限 {container_cpus}，内存 {container_memory}，项目磁盘配额 {project_storage}
+- 项目文件结构: {file_tree}
+- 长期记忆: {memory}
+- 当前时间: {current_time}
+
+## 核心原则
+
+1. **安全第一**：只操作授权范围内的文件
+2. **沟通为主**：不确定时多问，不要自作主张
+3. **诚实守信**：做不到的事情坦白说，不编造
+4. **用户友好**：用简单的语言解释复杂的操作
+5. **正确执行**：和用户主动确认细节，用户明确告知可以开始任务后，再开始工作流程
+
+记住：你的用户可能不懂技术，你的目标是让他们感觉到"这个助手真好用"，而不是"怎么这么复杂"。
+
+如果用户设置了个性化信息，根据用户的个性化需求回答
--- a/prompts/thinking_mode_guidelines.txt
+++ b/prompts/thinking_mode_guidelines.txt
@ -1,5 +1,5 @@
 你现在处于「思考模式」
-思考模式时，第一次请求的模型不是kimi-k2，而是kimi-k2-thinking 一个更善于分析复杂问题，规划复杂流程的模型，在后续请求时，模型会换回kimi-k2。
+{thinking_model_line}
 并且，在系统监控到工具或写入失败时，会自动再次切换到思考模型，思考模型会更加深入地分析错误的原因，保证任务顺利进行。

 请百分百遵循一下原则：
--- a/scratch_test/qwen_reasoning_sim.py
+++ b/scratch_test/qwen_reasoning_sim.py
@ -0,0 +1,47 @@
+import asyncio, sys, os, copy
+from pathlib import Path
+ROOT = Path(__file__).resolve().parents[1]
+sys.path.insert(0, str(ROOT))
+from utils.api_client import DeepSeekClient
+
+class FakeClient(DeepSeekClient):
+    def __init__(self):
+        super().__init__(thinking_mode=True, web_mode=True)
+        self.sent = []
+        self.call_idx = 0
+    async def chat(self, messages, tools=None, stream=True):
+        self.sent.append(copy.deepcopy(messages))
+        self.call_idx += 1
+        if self.call_idx == 1:
+            yield {
+                "choices": [
+                    {"delta": {
+                        "reasoning_content": "think1 ",
+                        "tool_calls": [
+                            {"id": "call_1", "index": 0, "type": "function", "function": {"name": "foo", "arguments": "{}"}}
+                        ]
+                    }}
+                ]
+            }
+            yield {"choices": [{"delta": {}}]}
+        else:
+            yield {"choices": [{"delta": {"content": "done"}}]}
+            yield {"choices": [{"delta": {}}]}
+
+async def main():
+    client = FakeClient()
+    messages = [
+        {"role": "system", "content": "sys"},
+        {"role": "user", "content": "hi"}
+    ]
+    async def tool_handler(name, args):
+        return '{}'
+    out = await client.chat_with_tools(messages, tools=[{"type":"function","function":{"name":"foo","parameters":{"type":"object","properties":{}}}}], tool_handler=tool_handler)
+    print('final', out)
+    import json
+    for i, m in enumerate(client.sent, 1):
+        print('\ncall', i)
+        print(json.dumps(m, ensure_ascii=False, indent=2))
+
+if __name__ == '__main__':
+    asyncio.run(main())
--- a/scratch_test/qwen_vl_reasoning_test.py
+++ b/scratch_test/qwen_vl_reasoning_test.py
@ -0,0 +1,183 @@
+"""
+手工验证 Qwen-VL 在多轮工具调用时是否保留 reasoning_content。
+
+步骤：
+1. 首次请求：用户要求“在思考中想一个 8 位数字但不要直接输出，先调用天气工具，再告诉我数字”。
+2. 模型若返回 tool_calls，我们本地模拟 weather 查询，把结果作为 tool 消息返回。
+3. 第二次请求：带上第一次 assistant 工具调用 + tool 结果，检查模型是否能输出第一次思考里的数字。
+
+运行：
+  export API_BASE_QWEN="https://dashscope.aliyuncs.com/compatible-mode/v1"
+  export API_KEY_QWEN="sk-..."
+  python3 scratch_test/qwen_vl_reasoning_test.py
+
+日志：
+  输出到 logs/qwen_vl_reasoning_test_<timestamp>.json
+"""
+
+import json
+import os
+import time
+from pathlib import Path
+from typing import Dict, Any
+
+import httpx
+
+# 读取模型配置（若未设置环境变量则使用默认）
+API_BASE = os.environ.get("API_BASE_QWEN", "https://dashscope.aliyuncs.com/compatible-mode/v1").rstrip("/")
+API_KEY = os.environ.get("API_KEY_QWEN", os.environ.get("DASHSCOPE_API_KEY", ""))
+MODEL_ID = os.environ.get("MODEL_QWEN_VL", "qwen3-vl-plus")
+
+LOG_DIR = Path("logs")
+LOG_DIR.mkdir(parents=True, exist_ok=True)
+LOG_FILE = LOG_DIR / f"qwen_vl_reasoning_test_{int(time.time())}.json"
+
+
+def log(data: Dict[str, Any]):
+    """追加写入调试日志。"""
+    with open(LOG_FILE, "a", encoding="utf-8") as f:
+        f.write(json.dumps(data, ensure_ascii=False, indent=2))
+        f.write("\n\n")
+
+
+def build_headers():
+    return {
+        "Authorization": f"Bearer {API_KEY}",
+        "Content-Type": "application/json"
+    }
+
+
+def weather_tool_result(city: str = "上海", date: str = "今天"):
+    """模拟天气工具返回固定结构。"""
+    return {
+        "success": True,
+        "city": city,
+        "date": date,
+        "forecast": "多云转晴，15~22℃，东风3级",
+        "source": "local-mock"
+    }
+
+
+def main():
+    if not API_KEY:
+        raise SystemExit("未配置 API_KEY_QWEN 或 DASHSCOPE_API_KEY，无法测试")
+
+    messages = [
+        {
+            "role": "system",
+            "content": "你是测试助手。"
+        },
+        {
+            "role": "user",
+            "content": (
+                "请你想一个明确的8位数字，随便一个就行，只在思考过程中呈现，不要在最终回复里透露；"
+                "先调用天气查询工具；工具完成后再正常回答并把刚才思考出的数字告诉我。"
+            )
+        }
+    ]
+
+    tools = [
+        {
+            "type": "function",
+            "function": {
+                "name": "query_weather",
+                "description": "查询指定城市在指定日期的天气",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "city": {"type": "string", "description": "城市名称，如上海"},
+                        "date": {"type": "string", "description": "日期，如今天/明天/2025-01-01"}
+                    },
+                    "required": ["city"]
+                }
+            }
+        }
+    ]
+
+    # ---- 第一次调用 ----
+    payload1 = {
+        "model": MODEL_ID,
+        "messages": messages,
+        "tools": tools,
+        "tool_choice": "auto",
+        "stream": False,
+        "max_tokens": 512,
+        # 关键：开启思考模式
+        "enable_thinking": True
+    }
+
+    with httpx.Client(http2=True, timeout=120) as client:
+        resp1 = client.post(f"{API_BASE}/chat/completions", json=payload1, headers=build_headers())
+    resp1.raise_for_status()
+    data1 = resp1.json()
+    log({"step": "call1_response", "raw": data1})
+
+    choice1 = data1["choices"][0]["message"]
+    tool_calls = choice1.get("tool_calls") or []
+    reasoning1 = choice1.get("reasoning_content")
+
+    print("第一次返回 reasoning_content 长度:", len(reasoning1 or ""))
+    print("第一次返回 tool_calls 数量:", len(tool_calls))
+
+    if not tool_calls:
+        print("模型未调用工具，测试无法继续。")
+        log({"error": "no_tool_calls"})
+        return
+
+    # 只取第一个工具调用
+    tc = tool_calls[0]
+    args_json = tc["function"].get("arguments") or "{}"
+    try:
+        args = json.loads(args_json)
+    except json.JSONDecodeError:
+        args = {}
+    tool_result = weather_tool_result(
+        city=args.get("city", "上海"),
+        date=args.get("date", "今天")
+    )
+
+    # 构建第二轮消息：包含第一次assistant消息和tool结果
+    messages2 = messages + [
+        {
+            "role": "assistant",
+            "content": choice1.get("content", ""),
+            "tool_calls": tool_calls,
+            # 保留 reasoning_content 原样，核心验证点
+            "reasoning_content": reasoning1 or ""
+        },
+        {
+            "role": "tool",
+            "tool_call_id": tc.get("id"),
+            "name": tc["function"]["name"],
+            "content": json.dumps(tool_result, ensure_ascii=False)
+        }
+    ]
+
+    payload2 = {
+        "model": MODEL_ID,
+        "messages": messages2,
+        "tools": tools,
+        "tool_choice": "auto",
+        "stream": False,
+        "max_tokens": 512,
+        # 同步思考模式
+        "enable_thinking": True
+    }
+
+    with httpx.Client(http2=True, timeout=120) as client:
+        resp2 = client.post(f"{API_BASE}/chat/completions", json=payload2, headers=build_headers())
+    resp2.raise_for_status()
+    data2 = resp2.json()
+    log({"step": "call2_response", "raw": data2, "messages_sent": messages2})
+
+    choice2 = data2["choices"][0]["message"]
+    reasoning2 = choice2.get("reasoning_content")
+    content2 = choice2.get("content")
+
+    print("第二次返回 reasoning_content 长度:", len(reasoning2 or ""))
+    print("第二次最终回复:", content2)
+    print(f"完整日志已保存到: {LOG_FILE}")
+
+
+if __name__ == "__main__":
+    main()
--- a/static/icons/keyboard.svg
+++ b/static/icons/keyboard.svg
@ -0,0 +1 @@
+<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="48" height="48" fill="none" stroke="#000000" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" style="opacity:1;"><path d="M10 8h.01M12 12h.01M14 8h.01M16 12h.01M18 8h.01M6 8h.01M7 16h10m-9-4h.01"/><rect width="20" height="16" x="2" y="4" rx="2"/></svg>
--- a/static/src/App.vue
+++ b/static/src/App.vue
@ -172,6 +172,9 @@
              :uploading="uploading"
              :thinking-mode="thinkingMode"
              :run-mode="resolvedRunMode"
+              :model-menu-open="modelMenuOpen"
+              :model-options="modelOptions"
+              :current-model-key="currentModelKey"
              :quick-menu-open="quickMenuOpen"
              :tool-menu-open="toolMenuOpen"
              :mode-menu-open="modeMenuOpen"
@ -182,6 +185,7 @@
              :current-conversation-id="currentConversationId"
              :icon-style="iconStyle"
              :tool-category-icon="toolCategoryIcon"
+              :selected-images="selectedImages"
              @update:input-message="inputSetMessage"
              @input-change="handleInputChange"
              @input-focus="handleInputFocus"
@ -192,7 +196,9 @@
              @quick-upload="handleQuickUpload"
              @toggle-tool-menu="toggleToolMenu"
              @toggle-mode-menu="toggleModeMenu"
+              @toggle-model-menu="toggleModelMenu"
              @select-run-mode="handleModeSelect"
+              @select-model="handleModelSelect"
              @toggle-settings="toggleSettings"
              @update-tool-category="updateToolCategory"
              @realtime-terminal="handleRealtimeTerminalClick"
@ -200,6 +206,8 @@
              @toggle-token-panel="handleTokenPanelToggleClick"
              @compress-conversation="handleCompressConversationClick"
              @file-selected="handleFileSelected"
+              @pick-images="openImagePicker"
+              @remove-image="handleRemoveImage"
            />
          </div>
        </main>
@ -217,6 +225,15 @@

      <PersonalizationDrawer />
      <LiquidGlassWidget />
+      <ImagePicker
+        v-if="imagePickerOpen"
+        :open="imagePickerOpen"
+        :entries="imageEntries"
+        :initial-selected="selectedImages"
+        :loading="imageLoading"
+        @close="closeImagePicker"
+        @confirm="handleImagesConfirmed"
+      />

      <div
        v-if="isMobileViewport"
--- a/static/src/app.ts
+++ b/static/src/app.ts
@ -12,6 +12,7 @@ import LiquidGlassWidget from './components/experiments/LiquidGlassWidget.vue';
 import QuickMenu from './components/input/QuickMenu.vue';
 import InputComposer from './components/input/InputComposer.vue';
 import AppShell from './components/shell/AppShell.vue';
+import ImagePicker from './components/overlay/ImagePicker.vue';
 import { useUiStore } from './stores/ui';
 import { useConversationStore } from './stores/conversation';
 import { useChatStore } from './stores/chat';
@ -23,6 +24,7 @@ import { useFileStore } from './stores/file';
 import { useSubAgentStore } from './stores/subAgent';
 import { useFocusStore } from './stores/focus';
 import { usePersonalizationStore } from './stores/personalization';
+import { useModelStore } from './stores/model';
 import { useChatActionStore } from './stores/chatActions';
 import { useMonitorStore } from './stores/monitor';
 import { ICONS, TOOL_CATEGORY_ICON_MAP } from './utils/icons';
@ -201,12 +203,14 @@ if (window.visualViewport) {
    window.visualViewport.addEventListener('scroll', updateViewportHeightVar);
 }

-const ENABLE_APP_DEBUG_LOGS = false;
+const ENABLE_APP_DEBUG_LOGS = true;
 function debugLog(...args) {
-    if (!ENABLE_APP_DEBUG_LOGS) {
-        return;
+    if (!ENABLE_APP_DEBUG_LOGS) return;
+    try {
+        console.log('[app]', ...args);
+    } catch (e) {
+        /* ignore logging errors */
    }
-    debugLog(...args);
 }
 // 临时排查对话切换问题的调试输出
 const TRACE_CONV = true;
@ -275,6 +279,10 @@ const appOptions = {
                ],
                mobileViewportQuery: null,
                modeMenuOpen: false,
+                modelMenuOpen: false,
+                imageEntries: [],
+                imageLoading: false,
+                conversationHasImages: false,
                conversationListRequestSeq: 0,
                conversationListRefreshToken: 0,

@ -386,6 +394,8 @@ const appOptions = {
                'conversationsOffset',
                'conversationsLimit'
            ]),
+            ...mapWritableState(useModelStore, ['currentModelKey']),
+            ...mapState(useModelStore, ['models']),
            ...mapWritableState(useChatStore, [
                'messages',
                'currentMessageIndex',
@ -402,7 +412,9 @@ const appOptions = {
                'inputIsFocused',
                'quickMenuOpen',
                'toolMenuOpen',
-                'settingsOpen'
+                'settingsOpen',
+                'imagePickerOpen',
+                'selectedImages'
            ]),
            resolvedRunMode() {
                const allowed = ['fast', 'thinking', 'deep'];
@ -411,6 +423,9 @@ const appOptions = {
                }
                return this.thinkingMode ? 'thinking' : 'fast';
            },
+            modelOptions() {
+                return this.models || [];
+            },
            titleRibbonVisible() {
                return !this.isMobileViewport && this.chatDisplayMode === 'chat';
            },
@ -681,6 +696,9 @@ const appOptions = {
                uiRequestConfirm: 'requestConfirm',
                uiResolveConfirm: 'resolveConfirm'
            }),
+            ...mapActions(useModelStore, {
+                modelSet: 'setModel'
+            }),
            ...mapActions(useChatStore, {
                chatExpandBlock: 'expandBlock',
                chatCollapseBlock: 'collapseBlock',
@ -717,7 +735,11 @@ const appOptions = {
                inputSetMessage: 'setInputMessage',
                inputClearMessage: 'clearInputMessage',
                inputSetLineCount: 'setInputLineCount',
-                inputSetMultiline: 'setInputMultiline'
+                inputSetMultiline: 'setInputMultiline',
+                inputSetImagePickerOpen: 'setImagePickerOpen',
+                inputSetSelectedImages: 'setSelectedImages',
+                inputClearSelectedImages: 'clearSelectedImages',
+                inputRemoveSelectedImage: 'removeSelectedImage'
            }),
            ...mapActions(useToolStore, {
                toolRegisterAction: 'registerToolAction',
@ -1291,6 +1313,11 @@ const appOptions = {
                this.inputSetLineCount(1);
                this.inputSetMultiline(false);
                this.inputClearMessage();
+                this.inputClearSelectedImages();
+                this.inputSetImagePickerOpen(false);
+                this.imageEntries = [];
+                this.imageLoading = false;
+                this.conversationHasImages = false;
                this.toolSetSettingsLoading(false);
                this.toolSetSettings([]);
                
@ -1433,6 +1460,12 @@ const appOptions = {
                } else if (status && typeof status.thinking_mode !== 'undefined') {
                    this.runMode = status.thinking_mode ? 'thinking' : 'fast';
                }
+                if (status && typeof status.model_key === 'string') {
+                    this.modelSet(status.model_key);
+                }
+                if (status && typeof status.has_images !== 'undefined') {
+                    this.conversationHasImages = !!status.has_images;
+                }
            },

            updateContainerStatus(status) {
@ -1758,9 +1791,15 @@ const appOptions = {
                }
                
                let currentAssistantMessage = null;
+                let historyHasImages = false;
                
                historyMessages.forEach((message, index) => {
                    debugLog(`处理消息 ${index + 1}/${historyMessages.length}:`, message.role, message);
+                    const meta = message.metadata || {};
+                    if (message.role === 'user' && meta.system_injected_image) {
+                        debugLog('跳过系统代发的图片消息（仅用于模型查看，不在前端展示）');
+                        return;
+                    }
                    
                    if (message.role === 'user') {
                        // 用户消息 - 先结束之前的assistant消息
@ -1768,10 +1807,14 @@ const appOptions = {
                            this.messages.push(currentAssistantMessage);
                            currentAssistantMessage = null;
                        }
-                        
+                        const images = message.images || (message.metadata && message.metadata.images) || [];
+                        if (Array.isArray(images) && images.length) {
+                            historyHasImages = true;
+                        }
                        this.messages.push({
                            role: 'user',
-                            content: message.content || ''
+                            content: message.content || '',
+                            images
                        });
                        debugLog('添加用户消息:', message.content?.substring(0, 50) + '...');
                        
@ -1968,6 +2011,8 @@ const appOptions = {
                    this.messages.push(currentAssistantMessage);
                }

+                this.conversationHasImages = historyHasImages;
+                
                debugLog(`历史消息渲染完成，共 ${this.messages.length} 条消息`);
                this.logMessageState('renderHistoryMessages:after-render');
                this.lastHistoryLoadedConversationId = this.currentConversationId || null;
@ -2257,7 +2302,12 @@ const appOptions = {
                    return;
                }

-                if (!this.inputMessage.trim()) {
+                const text = (this.inputMessage || '').trim();
+                const images = Array.isArray(this.selectedImages) ? this.selectedImages.slice(0, 9) : [];
+                const hasText = text.length > 0;
+                const hasImages = images.length > 0;
+
+                if (!hasText && !hasImages) {
                    return;
                }

@ -2267,11 +2317,21 @@ const appOptions = {
                    return;
                }

-                const message = this.inputMessage;
+                if (hasImages && this.currentModelKey !== 'qwen3-vl-plus') {
+                    this.uiPushToast({
+                        title: '当前模型不支持图片',
+                        message: '请切换到 Qwen-VL 再发送图片',
+                        type: 'error'
+                    });
+                    return;
+                }

-                if (message.startsWith('/')) {
+                const message = text;
+                const isCommand = hasText && !hasImages && message.startsWith('/');
+                if (isCommand) {
                    this.socket.emit('send_command', { command: message });
                    this.inputClearMessage();
+                    this.inputClearSelectedImages();
                    this.autoResizeInput();
                    return;
                }
@ -2288,14 +2348,19 @@ const appOptions = {
                
                // 标记任务进行中，直到任务完成或用户手动停止
                this.taskInProgress = true;
-                this.chatAddUserMessage(message);
-                this.socket.emit('send_message', { message: message, conversation_id: this.currentConversationId });
+                this.chatAddUserMessage(message, images);
+                this.socket.emit('send_message', { message: message, images, conversation_id: this.currentConversationId });
                if (typeof this.monitorShowPendingReply === 'function') {
                    this.monitorShowPendingReply();
                }
                this.inputClearMessage();
+                this.inputClearSelectedImages();
+                this.inputSetImagePickerOpen(false);
                this.inputSetLineCount(1);
                this.inputSetMultiline(false);
+                if (hasImages) {
+                    this.conversationHasImages = true;
+                }
                if (this.autoScrollEnabled) {
                    this.scrollToBottom();
                }
@ -2447,6 +2512,7 @@ const appOptions = {
                    return;
                }
                this.modeMenuOpen = false;
+                this.modelMenuOpen = false;
                const nextState = this.inputToggleToolMenu();
                if (nextState) {
                    this.inputSetSettingsOpen(false);
@ -2466,12 +2532,117 @@ const appOptions = {
                const opened = this.inputToggleQuickMenu();
                if (!opened) {
                    this.modeMenuOpen = false;
+                    this.modelMenuOpen = false;
                }
            },

            closeQuickMenu() {
                this.inputCloseMenus();
                this.modeMenuOpen = false;
+                this.modelMenuOpen = false;
+            },
+
+            async openImagePicker() {
+                if (this.currentModelKey !== 'qwen3-vl-plus') {
+                    this.uiPushToast({
+                        title: '当前模型不支持图片',
+                        message: '请选择 Qwen-VL 后再发送图片',
+                        type: 'error'
+                    });
+                    return;
+                }
+                this.closeQuickMenu();
+                this.inputSetImagePickerOpen(true);
+                await this.loadWorkspaceImages();
+            },
+
+            closeImagePicker() {
+                this.inputSetImagePickerOpen(false);
+            },
+
+            async loadWorkspaceImages() {
+                this.imageLoading = true;
+                try {
+                    const entries = await this.fetchAllImageEntries('');
+                    this.imageEntries = entries;
+                    if (!entries.length) {
+                        this.uiPushToast({
+                            title: '未找到图片',
+                            message: '工作区内没有可用的图片文件',
+                            type: 'info'
+                        });
+                    }
+                } catch (error) {
+                    console.error('加载图片列表失败', error);
+                    this.uiPushToast({
+                        title: '加载图片失败',
+                        message: error?.message || '请稍后重试',
+                        type: 'error'
+                    });
+                } finally {
+                    this.imageLoading = false;
+                }
+            },
+
+            async fetchAllImageEntries(startPath = '') {
+                const queue: string[] = [startPath || ''];
+                const visited = new Set<string>();
+                const results: Array<{ name: string; path: string }> = [];
+                const exts = new Set(['.png', '.jpg', '.jpeg', '.webp', '.gif', '.bmp', '.svg']);
+                const maxFolders = 120;
+
+                while (queue.length && visited.size < maxFolders) {
+                    const path = queue.shift() || '';
+                    if (visited.has(path)) {
+                        continue;
+                    }
+                    visited.add(path);
+                    try {
+                        const resp = await fetch(`/api/gui/files/entries?path=${encodeURIComponent(path)}`, {
+                            method: 'GET',
+                            credentials: 'include',
+                            headers: { Accept: 'application/json' }
+                        });
+                        const data = await resp.json().catch(() => null);
+                        if (!data?.success) {
+                            continue;
+                        }
+                        const items = Array.isArray(data?.data?.items) ? data.data.items : [];
+                        for (const item of items) {
+                            const rawPath =
+                                item?.path ||
+                                [path, item?.name].filter(Boolean).join('/').replace(/\\/g, '/').replace(/\/{2,}/g, '/');
+                            const type = String(item?.type || '').toLowerCase();
+                            if (type === 'directory' || type === 'folder') {
+                                queue.push(rawPath);
+                                continue;
+                            }
+                            const ext =
+                                String(item?.extension || '').toLowerCase() ||
+                                (rawPath.includes('.') ? `.${rawPath.split('.').pop()?.toLowerCase()}` : '');
+                            if (exts.has(ext)) {
+                                results.push({
+                                    name: item?.name || rawPath.split('/').pop() || rawPath,
+                                    path: rawPath
+                                });
+                                if (results.length >= 400) {
+                                    return results;
+                                }
+                            }
+                        }
+                    } catch (error) {
+                        console.warn('遍历文件夹失败', path, error);
+                    }
+                }
+                return results;
+            },
+
+            handleImagesConfirmed(list) {
+                this.inputSetSelectedImages(Array.isArray(list) ? list : []);
+                this.inputSetImagePickerOpen(false);
+            },
+            handleRemoveImage(path) {
+                this.inputRemoveSelectedImage(path);
            },

            handleQuickUpload() {
@ -2488,6 +2659,25 @@ const appOptions = {
                const next = !this.modeMenuOpen;
                this.modeMenuOpen = next;
                if (next) {
+                    this.modelMenuOpen = false;
+                }
+                if (next) {
+                    this.inputSetToolMenuOpen(false);
+                    this.inputSetSettingsOpen(false);
+                    if (!this.quickMenuOpen) {
+                        this.inputOpenQuickMenu();
+                    }
+                }
+            },
+
+            toggleModelMenu() {
+                if (!this.isConnected || this.streamingMessage) {
+                    return;
+                }
+                const next = !this.modelMenuOpen;
+                this.modelMenuOpen = next;
+                if (next) {
+                    this.modeMenuOpen = false;
                    this.inputSetToolMenuOpen(false);
                    this.inputSetSettingsOpen(false);
                    if (!this.quickMenuOpen) {
@ -2503,6 +2693,56 @@ const appOptions = {
                await this.setRunMode(mode);
            },

+            async handleModelSelect(key) {
+                if (!this.isConnected || this.streamingMessage) {
+                    return;
+                }
+                if (this.conversationHasImages && key !== 'qwen3-vl-plus') {
+                    this.uiPushToast({
+                        title: '切换失败',
+                        message: '当前对话包含图片，仅支持 Qwen-VL',
+                        type: 'error'
+                    });
+                    return;
+                }
+                const modelStore = useModelStore();
+                const prev = this.currentModelKey;
+                try {
+                    const resp = await fetch('/api/model', {
+                        method: 'POST',
+                        headers: { 'Content-Type': 'application/json' },
+                        body: JSON.stringify({ model_key: key })
+                    });
+                    const payload = await resp.json();
+                    if (!resp.ok || !payload.success) {
+                        throw new Error(payload.error || payload.message || '切换失败');
+                    }
+                    const data = payload.data || {};
+                    modelStore.setModel(data.model_key || key);
+                    if (data.run_mode) {
+                        this.runMode = data.run_mode;
+                        this.thinkingMode = data.thinking_mode ?? (data.run_mode !== 'fast');
+                    }
+                    this.uiPushToast({
+                        title: '模型已切换',
+                        message: modelStore.currentModel?.label || key,
+                        type: 'success'
+                    });
+                } catch (error) {
+                    modelStore.setModel(prev);
+                    const msg = error instanceof Error ? error.message : String(error || '切换失败');
+                    this.uiPushToast({
+                        title: '切换模型失败',
+                        message: msg,
+                        type: 'error'
+                    });
+                } finally {
+                    this.modelMenuOpen = false;
+                    this.inputCloseMenus();
+                    this.inputSetQuickMenuOpen(false);
+                }
+            },
+
            async handleCycleRunMode() {
                const modes: Array<'fast' | 'thinking' | 'deep'> = ['fast', 'thinking', 'deep'];
                const currentMode = this.resolvedRunMode;
@ -2511,11 +2751,39 @@ const appOptions = {
                await this.setRunMode(nextMode);
            },

-            async setRunMode(mode) {
+            async setRunMode(mode, options = {}) {
                if (!this.isConnected || this.streamingMessage) {
                    this.modeMenuOpen = false;
                    return;
                }
+                const modelStore = useModelStore();
+                const fastOnly = modelStore.currentModel?.fastOnly;
+                const currentModelKey = modelStore.currentModel?.key;
+                if (fastOnly && mode !== 'fast') {
+                    if (!options.suppressToast) {
+                        this.uiPushToast({
+                            title: '模式不可用',
+                            message: 'Qwen-Max只支持快速模式',
+                            type: 'warning'
+                        });
+                    }
+                    this.modeMenuOpen = false;
+                    this.inputCloseMenus();
+                    return;
+                }
+                // Qwen-VL 不支持深度思考模式
+                if (currentModelKey === 'qwen3-vl-plus' && mode === 'deep') {
+                    if (!options.suppressToast) {
+                        this.uiPushToast({
+                            title: '模式不可用',
+                            message: 'Qwen-VL 不支持深度思考模式，请使用快速或思考模式',
+                            type: 'warning'
+                        });
+                    }
+                    this.modeMenuOpen = false;
+                    this.inputCloseMenus();
+                    return;
+                }
                if (mode === this.resolvedRunMode) {
                    this.modeMenuOpen = false;
                    this.closeQuickMenu();
@ -2774,6 +3042,7 @@ const appOptions = {
                    return;
                }
                this.modeMenuOpen = false;
+                this.modelMenuOpen = false;
                const nextState = this.inputToggleSettingsMenu();
                if (nextState) {
                    this.inputSetToolMenuOpen(false);
@ -2961,7 +3230,8 @@ const appOptions = {
    LiquidGlassWidget,
    QuickMenu,
    InputComposer,
-    AppShell
+    AppShell,
+    ImagePicker
 };

 export default appOptions;
--- a/static/src/components/chat/ChatArea.vue
+++ b/static/src/components/chat/ChatArea.vue
@ -1,13 +1,18 @@
 <template>
  <div class="messages-area" ref="rootEl">
    <div class="messages-flow">
-      <div v-for="(msg, index) in messages" :key="index" class="message-block">
+      <div v-for="(msg, index) in filteredMessages" :key="index" class="message-block">
        <div v-if="msg.role === 'user'" class="user-message">
          <div class="message-header icon-label">
            <span class="icon icon-sm" :style="iconStyleSafe('user')" aria-hidden="true"></span>
            <span>用户</span>
          </div>
-          <div class="message-text">{{ msg.content }}</div>
+          <div class="message-text user-bubble-text">
+            <div v-if="msg.content" class="bubble-text">{{ msg.content }}</div>
+            <div v-if="msg.images && msg.images.length" class="image-inline-row">
+              <span class="image-name" v-for="img in msg.images" :key="img">{{ formatImageName(img) }}</span>
+            </div>
+          </div>
        </div>
        <div v-else-if="msg.role === 'assistant'" class="assistant-message">
          <div class="message-header icon-label">
@ -368,6 +373,9 @@ const props = defineProps<{

 const personalization = usePersonalizationStore();
 const stackedBlocksEnabled = computed(() => personalization.experiments.stackedBlocksEnabled);
+const filteredMessages = computed(() =>
+  (props.messages || []).filter(m => !(m && m.metadata && m.metadata.system_injected_image))
+);

 const DEFAULT_GENERATING_TEXT = '生成中…';
 const rootEl = ref<HTMLElement | null>(null);
@ -403,6 +411,12 @@ function iconStyleSafe(key: string, size?: string) {
  return {};
 }

+function formatImageName(path: string): string {
+  if (!path) return '';
+  const parts = path.split(/[/\\]/);
+  return parts[parts.length - 1] || path;
+}
+
 const isStackable = (action: any) => action && (action.type === 'thinking' || action.type === 'tool');
 const splitActionGroups = (actions: any[] = [], messageIndex = 0) => {
  const result: Array<
--- a/static/src/components/input/InputComposer.vue
+++ b/static/src/components/input/InputComposer.vue
@ -11,39 +11,49 @@
        }"
      >
        <input type="file" ref="fileUploadInput" class="file-input-hidden" @change="onFileChange" />
-        <button
-          type="button"
-          class="stadium-btn add-btn"
-          @click.stop="$emit('toggle-quick-menu')"
-          :disabled="!isConnected"
-        >
-          +
-        </button>
-        <textarea
-          ref="stadiumInput"
-          class="stadium-input"
-          rows="1"
-          :value="inputMessage"
-          :disabled="!isConnected || streamingMessage || inputLocked"
-          placeholder="输入消息... (Ctrl+Enter 发送)"
-          @input="onInput"
-          @focus="$emit('input-focus')"
-          @blur="$emit('input-blur')"
-          @keydown.enter.ctrl.prevent="$emit('send-message')"
-        ></textarea>
-        <button
-          type="button"
-          class="stadium-btn send-btn"
-          @click="$emit('send-or-stop')"
-          :disabled="
-            !isConnected ||
-            (inputLocked && !streamingMessage) ||
-            (!(inputMessage || '').trim() && !streamingMessage)
-          "
-        >
-          <span v-if="streamingMessage" class="stop-icon"></span>
-          <span v-else class="send-icon"></span>
-        </button>
+        <div class="input-stack">
+          <div v-if="selectedImages && selectedImages.length" class="image-inline-row">
+            <span class="image-name" v-for="img in selectedImages" :key="img">
+              {{ formatImageName(img) }}
+              <button type="button" class="image-remove-btn" @click.stop="$emit('remove-image', img)">×</button>
+            </span>
+          </div>
+          <div class="input-row">
+            <button
+              type="button"
+              class="stadium-btn add-btn"
+              @click.stop="$emit('toggle-quick-menu')"
+              :disabled="!isConnected"
+            >
+              +
+            </button>
+            <textarea
+              ref="stadiumInput"
+              class="stadium-input"
+              rows="1"
+              :value="inputMessage"
+              :disabled="!isConnected || streamingMessage || inputLocked"
+              placeholder="输入消息... (Ctrl+Enter 发送)"
+              @input="onInput"
+              @focus="$emit('input-focus')"
+              @blur="$emit('input-blur')"
+              @keydown.enter.ctrl.prevent="$emit('send-message')"
+            ></textarea>
+            <button
+              type="button"
+              class="stadium-btn send-btn"
+              @click="$emit('send-or-stop')"
+              :disabled="
+                !isConnected ||
+                (inputLocked && !streamingMessage) ||
+                ((!(inputMessage || '').trim() && (!selectedImages || !selectedImages.length)) && !streamingMessage)
+              "
+            >
+              <span v-if="streamingMessage" class="stop-icon"></span>
+              <span v-else class="send-icon"></span>
+            </button>
+          </div>
+        </div>
      </div>
      <QuickMenu
        :open="quickMenuOpen"
@ -52,6 +62,9 @@
        :streaming-message="streamingMessage"
        :thinking-mode="thinkingMode"
        :run-mode="runMode"
+        :model-menu-open="modelMenuOpen"
+        :model-options="modelOptions"
+        :current-model-key="currentModelKey"
        :tool-menu-open="toolMenuOpen"
        :tool-settings="toolSettings"
        :tool-settings-loading="toolSettingsLoading"
@ -62,10 +75,13 @@
        :icon-style="iconStyle"
        :tool-category-icon="toolCategoryIcon"
        @quick-upload="triggerQuickUpload"
+        @pick-images="$emit('pick-images')"
        @toggle-tool-menu="$emit('toggle-tool-menu')"
        @toggle-settings="$emit('toggle-settings')"
        @toggle-mode-menu="$emit('toggle-mode-menu')"
        @select-run-mode="(mode) => $emit('select-run-mode', mode)"
+        @toggle-model-menu="$emit('toggle-model-menu')"
+        @select-model="(key) => $emit('select-model', key)"
        @update-tool-category="(id, enabled) => $emit('update-tool-category', id, enabled)"
        @realtime-terminal="$emit('realtime-terminal')"
        @toggle-focus-panel="$emit('toggle-focus-panel')"
@ -92,16 +108,20 @@ const emit = defineEmits([
  'send-message',
  'send-or-stop',
  'quick-upload',
+  'pick-images',
  'toggle-tool-menu',
  'toggle-mode-menu',
+  'toggle-model-menu',
  'select-run-mode',
+  'select-model',
  'toggle-settings',
  'update-tool-category',
  'realtime-terminal',
  'toggle-focus-panel',
  'toggle-token-panel',
  'compress-conversation',
-  'file-selected'
+  'file-selected',
+  'remove-image'
 ]);

 const props = defineProps<{
@ -117,6 +137,7 @@ const props = defineProps<{
  quickMenuOpen: boolean;
  toolMenuOpen: boolean;
  modeMenuOpen: boolean;
+  modelMenuOpen: boolean;
  toolSettings: Array<{ id: string; label: string; enabled: boolean }>;
  toolSettingsLoading: boolean;
  settingsOpen: boolean;
@ -124,6 +145,9 @@ const props = defineProps<{
  currentConversationId: string | null;
  iconStyle: (key: string) => Record<string, string>;
  toolCategoryIcon: (categoryId: string) => string;
+  modelOptions: Array<{ key: string; label: string; description: string }>;
+  currentModelKey: string;
+  selectedImages?: string[];
 }>();

 const inputStore = useInputStore();
@ -132,6 +156,12 @@ const compactInputShell = ref<HTMLElement | null>(null);
 const stadiumInput = ref<HTMLTextAreaElement | null>(null);
 const fileUploadInput = ref<HTMLInputElement | null>(null);

+const formatImageName = (path: string): string => {
+  if (!path) return '';
+  const parts = path.split(/[/\\]/);
+  return parts[parts.length - 1] || path;
+};
+
 const applyLineMetrics = (lines: number, multiline: boolean) => {
  inputStore.setInputLineCount(lines);
  inputStore.setInputMultiline(multiline);
@ -204,3 +234,18 @@ onMounted(() => {
  adjustTextareaSize();
 });
 </script>
+
+<style scoped>
+.image-inline-row {
+  display: flex;
+  flex-wrap: wrap;
+  gap: 8px;
+  padding: 4px 10px 2px;
+  font-size: 12px;
+  color: var(--text-secondary, #7f8792);
+  line-height: 1.4;
+}
+.image-name {
+  white-space: nowrap;
+}
+</style>
--- a/static/src/components/input/QuickMenu.vue
+++ b/static/src/components/input/QuickMenu.vue
@ -4,6 +4,15 @@
      <button type="button" class="menu-entry" @click="$emit('quick-upload')" :disabled="!isConnected || uploading">
        {{ uploading ? '上传中...' : '上传文件' }}
      </button>
+      <button
+        v-if="currentModelKey === 'qwen3-vl-plus'"
+        type="button"
+        class="menu-entry"
+        @click.stop="$emit('pick-images')"
+        :disabled="!isConnected || streamingMessage"
+      >
+        发送图片
+      </button>
      <button
        type="button"
        class="menu-entry has-submenu"
@ -13,6 +22,15 @@
        <span>运行模式</span>
        <span class="entry-arrow">{{ runModeLabel }}</span>
      </button>
+      <button
+        type="button"
+        class="menu-entry has-submenu"
+        @click.stop="$emit('toggle-model-menu')"
+        :disabled="!isConnected"
+      >
+        <span>切换模型</span>
+        <span class="entry-arrow">{{ currentModelLabel }}</span>
+      </button>
      <button
        type="button"
        class="menu-entry has-submenu"
@ -50,6 +68,28 @@
        </div>
      </transition>

+      <transition name="submenu-slide">
+        <div class="quick-submenu model-submenu" v-if="modelMenuOpen">
+          <div class="submenu-list">
+            <button
+              v-for="option in modelOptions"
+              :key="option.key"
+              type="button"
+              class="menu-entry submenu-entry"
+              :class="{ active: option.key === currentModelKey }"
+              @click.stop="$emit('select-model', option.key)"
+              :disabled="streamingMessage || !isConnected"
+            >
+              <span class="submenu-label">
+                <span>{{ option.label }}</span>
+                <span class="submenu-desc">{{ option.description }}</span>
+              </span>
+              <span v-if="option.key === currentModelKey" class="entry-arrow">✓</span>
+            </button>
+          </div>
+        </div>
+      </transition>
+
      <transition name="submenu-slide">
        <div class="quick-submenu tool-submenu" v-if="toolMenuOpen">
          <div class="submenu-status" v-if="toolSettingsLoading">正在同步工具状态...</div>
@ -137,6 +177,9 @@ const props = defineProps<{
  toolCategoryIcon: (categoryId: string) => string;
  modeMenuOpen: boolean;
  runMode?: 'fast' | 'thinking' | 'deep';
+  modelMenuOpen: boolean;
+  modelOptions: Array<{ key: string; label: string; description: string }>;
+  currentModelKey: string;
 }>();

 defineEmits<{
@ -150,6 +193,8 @@ defineEmits<{
  (event: 'compress-conversation'): void;
  (event: 'toggle-mode-menu'): void;
  (event: 'select-run-mode', mode: 'fast' | 'thinking' | 'deep'): void;
+  (event: 'toggle-model-menu'): void;
+  (event: 'select-model', key: string): void;
 }>();

 const runModeOptions = [
@ -174,4 +219,18 @@ const resolvedRunMode = computed<'fast' | 'thinking' | 'deep'>(() => {
 const runModeLabel = computed(() => runModeLabelMap[resolvedRunMode.value]);

 const getIconStyle = (key: string) => (props.iconStyle ? props.iconStyle(key) : {});
+
+const currentModelLabel = computed(() => {
+  const found = props.modelOptions?.find(m => m.key === props.currentModelKey);
+  return found ? found.label : '未选择';
+});
 </script>
+
+<style scoped>
+.submenu-desc {
+  display: block;
+  font-size: 12px;
+  color: var(--text-secondary, #7f8792);
+  margin-top: 2px;
+}
+</style>
--- a/static/src/components/overlay/ImagePicker.vue
+++ b/static/src/components/overlay/ImagePicker.vue
@ -0,0 +1,211 @@
+<template>
+  <div class="image-picker-backdrop" @click.self="close">
+    <div class="image-picker-panel">
+      <div class="header">
+        <div class="title">选择图片（最多9张）</div>
+        <button class="close-btn" @click="close">×</button>
+      </div>
+      <div class="body">
+        <div v-if="loading" class="loading">加载中...</div>
+        <div v-else-if="!images.length" class="empty">未找到图片文件</div>
+        <div v-else class="grid">
+          <div
+            v-for="item in images"
+            :key="item.path"
+            class="card"
+            :class="{ selected: selectedSet.has(item.path) }"
+            @click="toggle(item.path)"
+            :title="item.path"
+          >
+            <img :src="previewUrl(item.path)" :alt="item.name" />
+            <div class="name">{{ item.name }}</div>
+          </div>
+        </div>
+      </div>
+      <div class="footer">
+        <div class="count">已选 {{ selectedSet.size }} / 9</div>
+        <div class="actions">
+          <button type="button" class="btn secondary" @click="close">取消</button>
+          <button type="button" class="btn primary" :disabled="!selectedSet.size" @click="confirm">确认</button>
+        </div>
+      </div>
+    </div>
+  </div>
+</template>
+
+<script setup lang="ts">
+import { computed, ref, watch, onMounted } from 'vue';
+
+interface ImageEntry {
+  name: string;
+  path: string;
+}
+
+const props = defineProps<{
+  open: boolean;
+  entries: ImageEntry[];
+  initialSelected: string[];
+  loading: boolean;
+}>();
+
+const emit = defineEmits<{
+  (e: 'close'): void;
+  (e: 'confirm', list: string[]): void;
+}>();
+
+const selectedSet = ref<Set<string>>(new Set(props.initialSelected || []));
+
+watch(
+  () => props.initialSelected,
+  (val) => {
+    selectedSet.value = new Set(val || []);
+  }
+);
+
+const images = computed(() => props.entries || []);
+
+const toggle = (path: string) => {
+  if (!path) return;
+  const set = new Set(selectedSet.value);
+  if (set.has(path)) {
+    set.delete(path);
+  } else {
+    if (set.size >= 9) return;
+    set.add(path);
+  }
+  selectedSet.value = set;
+};
+
+const close = () => emit('close');
+
+const confirm = () => emit('confirm', Array.from(selectedSet.value));
+
+const previewUrl = (path: string) => `/api/gui/files/download?path=${encodeURIComponent(path)}`;
+
+onMounted(() => {
+  selectedSet.value = new Set(props.initialSelected || []);
+});
+</script>
+
+<style scoped>
+.image-picker-backdrop {
+  position: fixed;
+  inset: 0;
+  background: rgba(0, 0, 0, 0.45);
+  display: flex;
+  align-items: center;
+  justify-content: center;
+  z-index: 1200;
+}
+.image-picker-panel {
+  width: min(980px, 92vw);
+  max-height: 88vh;
+  background: #0f1116;
+  color: #e8ecf2;
+  border: 1px solid #2a2f3a;
+  border-radius: 12px;
+  display: flex;
+  flex-direction: column;
+  box-shadow: 0 16px 40px rgba(0, 0, 0, 0.4);
+}
+.header {
+  display: flex;
+  align-items: center;
+  justify-content: space-between;
+  padding: 14px 16px;
+  border-bottom: 1px solid #1f2430;
+}
+.title {
+  font-weight: 600;
+}
+.close-btn {
+  background: transparent;
+  color: #9aa3b5;
+  border: none;
+  font-size: 20px;
+  cursor: pointer;
+}
+.body {
+  padding: 12px 16px;
+  overflow: auto;
+  flex: 1;
+}
+.grid {
+  display: grid;
+  grid-template-columns: repeat(auto-fill, minmax(160px, 1fr));
+  gap: 12px;
+}
+.card {
+  border: 1px solid #1f2430;
+  border-radius: 10px;
+  background: #151922;
+  cursor: pointer;
+  overflow: hidden;
+  display: flex;
+  flex-direction: column;
+}
+.card img {
+  width: 100%;
+  height: 120px;
+  object-fit: cover;
+  background: #0c0f14;
+}
+.card .name {
+  padding: 8px 10px;
+  font-size: 12px;
+  color: #c5ccda;
+  white-space: nowrap;
+  overflow: hidden;
+  text-overflow: ellipsis;
+}
+.card.selected {
+  border-color: #4ca6ff;
+  box-shadow: 0 0 0 2px rgba(76, 166, 255, 0.2);
+}
+.loading,
+.empty {
+  padding: 40px 0;
+  text-align: center;
+  color: #9aa3b5;
+}
+.footer {
+  display: flex;
+  align-items: center;
+  justify-content: space-between;
+  padding: 12px 16px;
+  border-top: 1px solid #1f2430;
+}
+.actions {
+  display: flex;
+  gap: 10px;
+}
+.btn {
+  border: 1px solid #2f3645;
+  padding: 8px 14px;
+  border-radius: 8px;
+  background: #1b202c;
+  color: #e8ecf2;
+  cursor: pointer;
+}
+.btn.primary {
+  background: #4ca6ff;
+  border-color: #4ca6ff;
+  color: #0d1117;
+}
+.btn:disabled {
+  opacity: 0.5;
+  cursor: not-allowed;
+}
+.count {
+  font-size: 13px;
+  color: #9aa3b5;
+}
+@media (max-width: 640px) {
+  .grid {
+    grid-template-columns: repeat(auto-fill, minmax(120px, 1fr));
+  }
+  .card img {
+    height: 90px;
+  }
+}
+</style>
--- a/static/src/composables/useLegacySocket.ts
+++ b/static/src/composables/useLegacySocket.ts
@ -4,7 +4,7 @@ import { renderLatexInRealtime } from './useMarkdownRenderer';

 export async function initializeLegacySocket(ctx: any) {
    try {
-        const SOCKET_DEBUG_LOGS_ENABLED = false;
+        const SOCKET_DEBUG_LOGS_ENABLED = true;
        const socketLog = (...args: any[]) => {
            if (!SOCKET_DEBUG_LOGS_ENABLED) {
                return;
--- a/static/src/stores/chat.ts
+++ b/static/src/stores/chat.ts
@ -164,10 +164,11 @@ export const useChatStore = defineStore('chat', {
      this.currentMessageIndex = this.messages.length - 1;
      return message;
    },
-    addUserMessage(content: string) {
+    addUserMessage(content: string, images: string[] = []) {
      this.messages.push({
        role: 'user',
-        content
+        content,
+        images
      });
      this.currentMessageIndex = -1;
    },
--- a/static/src/stores/input.ts
+++ b/static/src/stores/input.ts
@ -8,6 +8,8 @@ interface InputState {
  quickMenuOpen: boolean;
  toolMenuOpen: boolean;
  settingsOpen: boolean;
+  imagePickerOpen: boolean;
+  selectedImages: string[];
 }

 export const useInputStore = defineStore('input', {
@ -18,7 +20,9 @@ export const useInputStore = defineStore('input', {
    inputIsFocused: false,
    quickMenuOpen: false,
    toolMenuOpen: false,
-    settingsOpen: false
+    settingsOpen: false,
+    imagePickerOpen: false,
+    selectedImages: []
  }),
  actions: {
    setInputMessage(value: string) {
@ -69,6 +73,23 @@ export const useInputStore = defineStore('input', {
    },
    setSettingsOpen(open: boolean) {
      this.settingsOpen = open;
+    },
+    setImagePickerOpen(open: boolean) {
+      this.imagePickerOpen = open;
+    },
+    setSelectedImages(list: string[]) {
+      this.selectedImages = list.slice(0, 9);
+    },
+    addSelectedImage(path: string) {
+      if (!path) return;
+      const next = Array.from(new Set([...this.selectedImages, path]));
+      this.selectedImages = next.slice(0, 9);
+    },
+    removeSelectedImage(path: string) {
+      this.selectedImages = this.selectedImages.filter(item => item !== path);
+    },
+    clearSelectedImages() {
+      this.selectedImages = [];
    }
  }
 });
--- a/static/src/stores/model.ts
+++ b/static/src/stores/model.ts
@ -0,0 +1,66 @@
+import { defineStore } from 'pinia';
+
+export type ModelKey = 'kimi' | 'deepseek' | 'qwen3-max' | 'qwen3-vl-plus';
+
+export interface ModelOption {
+  key: ModelKey;
+  label: string;
+  description: string;
+  fastOnly: boolean;
+  supportsThinking: boolean;
+}
+
+interface ModelState {
+  currentModelKey: ModelKey;
+  models: ModelOption[];
+}
+
+export const useModelStore = defineStore('model', {
+  state: (): ModelState => ({
+    currentModelKey: 'kimi',
+    models: [
+      {
+        key: 'kimi',
+        label: 'Kimi-k2',
+        description: '综合能力较强',
+        fastOnly: false,
+        supportsThinking: true
+      },
+      {
+        key: 'deepseek',
+        label: 'Deepseek-V3.2',
+        description: '数学能力较强',
+        fastOnly: false,
+        supportsThinking: true
+      },
+      {
+        key: 'qwen3-max',
+        label: 'Qwen-Max',
+        description: '仅支持快速模式',
+        fastOnly: true,
+        supportsThinking: false
+      },
+      {
+        key: 'qwen3-vl-plus',
+        label: 'Qwen-VL',
+        description: '支持图片输入',
+        fastOnly: false,
+        supportsThinking: true
+      }
+    ]
+  }),
+  getters: {
+    currentModel(state): ModelOption {
+      return state.models.find(m => m.key === state.currentModelKey) || state.models[0];
+    }
+  },
+  actions: {
+    setModel(key: ModelKey) {
+      if (this.currentModelKey === key) return;
+      const exists = this.models.some(m => m.key === key);
+      if (exists) {
+        this.currentModelKey = key;
+      }
+    }
+  }
+});
--- a/static/src/styles/components/chat/_chat-area.scss
+++ b/static/src/styles/components/chat/_chat-area.scss
@ -192,6 +192,9 @@

 .user-message .message-text {
    background: rgba(255, 255, 255, 0.88);
+    display: flex;
+    flex-direction: column;
+    gap: 10px;
 }

 .assistant-message .message-text {
@ -199,6 +202,20 @@
    border-left: 4px solid var(--claude-accent);
 }

+.user-message .message-text.user-bubble-text .image-inline-row {
+    display: flex;
+    flex-wrap: wrap;
+    gap: 10px;
+    font-size: 12px;
+    color: var(--claude-text-secondary);
+    line-height: 1.4;
+    padding-bottom: 2px;
+}
+
+.user-message .message-text.user-bubble-text .image-name {
+    white-space: nowrap;
+}
+
 .assistant-generating-block {
    width: 100%;
 }
--- a/static/src/styles/components/input/_composer.scss
+++ b/static/src/styles/components/input/_composer.scss
@ -34,7 +34,6 @@
    background: #ffffff;
    box-shadow: 0 18px 46px rgba(15, 23, 42, 0.16);
    display: flex;
-    align-items: center;
    gap: 12px;
    transition:
        padding 0.2s ease,
@ -68,6 +67,52 @@
        0 32px 86px rgba(15, 23, 42, 0.28);
 }

+.input-stack {
+    display: flex;
+    flex-direction: column;
+    flex: 1 1 auto;
+    gap: 6px;
+}
+
+.input-row {
+    display: flex;
+    align-items: center;
+    gap: 12px;
+    width: 100%;
+}
+
+.image-inline-row {
+    display: flex;
+    flex-wrap: wrap;
+    gap: 6px;
+    padding: 0 4px 0;
+    font-size: 12px;
+    color: var(--text-secondary, #7f8792);
+    line-height: 1.4;
+}
+
+.image-name {
+    white-space: nowrap;
+    display: inline-flex;
+    align-items: center;
+    gap: 6px;
+}
+
+.image-remove-btn {
+    border: none;
+    background: transparent;
+    color: var(--text-secondary, #7f8792);
+    cursor: pointer;
+    padding: 0 4px;
+    font-size: 12px;
+    line-height: 1;
+    transition: color 0.15s ease, transform 0.15s ease;
+}
+.image-remove-btn:hover {
+    color: #d14b4b;
+    transform: scale(1.05);
+}
+
 .stadium-input {
    flex: 1 1 auto;
    width: 100%;
--- a/static/src/utils/icons.ts
+++ b/static/src/utils/icons.ts
@ -19,6 +19,7 @@ export const ICONS = Object.freeze({
  info: '/static/icons/info.svg',
  laptop: '/static/icons/laptop.svg',
  layers: '/static/icons/layers.svg',
+  keyboard: '/static/icons/keyboard.svg',
  menu: '/static/icons/menu.svg',
  monitor: '/static/icons/monitor.svg',
  octagon: '/static/icons/octagon.svg',
@ -62,7 +63,7 @@ export const TOOL_ICON_MAP = Object.freeze({
  todo_finish: 'flag',
  todo_finish_confirm: 'circleAlert',
  todo_update_task: 'check',
-  terminal_input: 'terminal',
+  terminal_input: 'keyboard',
  terminal_reset: 'recycle',
  terminal_session: 'monitor',
  terminal_snapshot: 'clipboard',
@ -70,7 +71,8 @@ export const TOOL_ICON_MAP = Object.freeze({
  update_memory: 'brain',
  wait_sub_agent: 'clock',
  web_search: 'search',
-  trigger_easter_egg: 'sparkles'
+  trigger_easter_egg: 'sparkles',
+  view_image: 'camera'
 });

 export const TOOL_CATEGORY_ICON_MAP = Object.freeze({
--- a/sub_agent/core/tool_config.py
+++ b/sub_agent/core/tool_config.py
@ -40,7 +40,7 @@ TOOL_CATEGORIES: Dict[str, ToolCategory] = {
    ),
    "read_focus": ToolCategory(
        label="阅读聚焦",
-        tools=["read_file", "focus_file", "unfocus_file", "ocr_image"],
+        tools=["read_file", "focus_file", "unfocus_file", "ocr_image", "vlm_analyze", "view_image"],
    ),
    "terminal_realtime": ToolCategory(
        label="实时终端",
--- a/utils/api_client.py
+++ b/utils/api_client.py
@ -45,6 +45,10 @@ class DeepSeekClient:
            "api_key": THINKING_API_KEY or API_KEY,
            "model_id": THINKING_MODEL_ID or MODEL_ID
        }
+        self.fast_max_tokens = None
+        self.thinking_max_tokens = None
+        self.fast_extra_params: Dict = {}
+        self.thinking_extra_params: Dict = {}
        self.thinking_mode = thinking_mode  # True=智能思考模式, False=快速模式
        self.deep_thinking_mode = False  # 深度思考模式：整轮都使用思考模型
        self.deep_thinking_session = False  # 当前任务是否处于深度思考会话
@ -53,6 +57,7 @@ class DeepSeekClient:
        self.api_base_url = self.fast_api_config["base_url"]
        self.api_key = self.fast_api_config["api_key"]
        self.model_id = self.fast_api_config["model_id"]
+        self.model_key = None  # 由宿主终端注入，便于做模型兼容处理
        # 每个任务的独立状态
        self.current_task_first_call = True  # 当前任务是否是第一次调用
        self.current_task_thinking = ""  # 当前任务的思考内容
@ -161,6 +166,41 @@ class DeepSeekClient:
            "model_id": config.get("model_id") or fallback["model_id"]
        }

+
+    def apply_profile(self, profile: Dict):
+        """
+        动态应用模型配置
+        profile 示例：
+        {
+            "fast": {"base_url": "...", "api_key": "...", "model_id": "...", "max_tokens": 8192},
+            "thinking": {...} 或 None,
+            "supports_thinking": True/False,
+            "fast_only": True/False
+        }
+        """
+        if not profile or "fast" not in profile:
+            raise ValueError("无效的模型配置")
+        fast = profile["fast"] or {}
+        thinking = profile.get("thinking") or fast
+        self.fast_api_config = {
+            "base_url": fast.get("base_url") or self.fast_api_config.get("base_url"),
+            "api_key": fast.get("api_key") or self.fast_api_config.get("api_key"),
+            "model_id": fast.get("model_id") or self.fast_api_config.get("model_id")
+        }
+        self.thinking_api_config = {
+            "base_url": thinking.get("base_url") or self.thinking_api_config.get("base_url"),
+            "api_key": thinking.get("api_key") or self.thinking_api_config.get("api_key"),
+            "model_id": thinking.get("model_id") or self.thinking_api_config.get("model_id")
+        }
+        self.fast_max_tokens = fast.get("max_tokens")
+        self.thinking_max_tokens = thinking.get("max_tokens")
+        self.fast_extra_params = fast.get("extra_params") or {}
+        self.thinking_extra_params = thinking.get("extra_params") or {}
+        # 同步旧字段
+        self.api_base_url = self.fast_api_config["base_url"]
+        self.api_key = self.fast_api_config["api_key"]
+        self.model_id = self.fast_api_config["model_id"]
+    
    def get_current_thinking_mode(self) -> bool:
        """获取当前应该使用的思考模式"""
        if self.deep_thinking_session:
@ -279,7 +319,11 @@ class DeepSeekClient:
            self.skip_thinking_next_call = False
        
        try:
-            max_tokens = int(DEFAULT_RESPONSE_MAX_TOKENS)
+            override_max = self.thinking_max_tokens if current_thinking_mode else self.fast_max_tokens
+            if override_max is not None:
+                max_tokens = int(override_max)
+            else:
+                max_tokens = int(DEFAULT_RESPONSE_MAX_TOKENS)
            if max_tokens <= 0:
                raise ValueError("max_tokens must be positive")
        except (TypeError, ValueError):
@ -291,6 +335,10 @@ class DeepSeekClient:
            "stream": stream,
            "max_tokens": max_tokens
        }
+        # 注入模型额外参数（如 Qwen enable_thinking）
+        extra_params = self.thinking_extra_params if current_thinking_mode else self.fast_extra_params
+        if extra_params:
+            payload.update(extra_params)
        if tools:
            payload["tools"] = tools
            payload["tool_choice"] = "auto"
@ -369,6 +417,9 @@ class DeepSeekClient:
            full_response = ""
            tool_calls = []
            current_thinking = ""
+            # 针对 append_to_file / modify_file 的占位结构，防止未定义变量导致异常
+            append_result = {"handled": False}
+            modify_result = {"handled": False}
            
            # 状态标志
            in_thinking = False
@ -435,7 +486,7 @@ class DeepSeekClient:
                            if new_args:  # 只拼接非空内容
                                existing_call["function"]["arguments"] += new_args
            
-            self._print()  # 最终换行
+            self._print("")  # 最终换行
            
            # 如果思考还没结束（只调用工具没有文本），手动结束
            if in_thinking:
@ -604,6 +655,24 @@ class DeepSeekClient:
        thinking_content = ""
        in_thinking = False

+        # 如果思考模式且已有本任务的思考内容，补充到上下文，确保多次调用时思考不割裂
+        if (
+            self.thinking_mode
+            and not self.current_task_first_call
+            and self.current_task_thinking
+        ):
+            thinking_context = (
+                "\n=== 📋 本次任务的思考 ===\n"
+                f"{self.current_task_thinking}\n"
+                "=== 思考结束 ===\n"
+                "提示：以上是本轮任务先前的思考，请在此基础上继续。"
+            )
+            messages.append({
+                "role": "system",
+                "content": thinking_context
+            })
+            thinking_context_injected = True
+        
        try:
            async for chunk in self.chat(messages, tools=None, stream=True):
                if "choices" not in chunk:
@ -631,7 +700,7 @@ class DeepSeekClient:
                        full_response += content
                        self._print(content, end="", flush=True)
            
-            self._print()  # 最终换行
+            self._print("")  # 最终换行
            
            # 如果思考还没结束（极少情况），手动结束
            if in_thinking:
--- a/utils/context_manager.py
+++ b/utils/context_manager.py
@ -2,6 +2,8 @@

 import os
 import json
+import base64
+import mimetypes
 from copy import deepcopy
 from typing import Dict, List, Optional, Any
 from pathlib import Path
@ -16,6 +18,7 @@ try:
        TERMINAL_SANDBOX_MEMORY,
        PROJECT_MAX_STORAGE_MB,
    )
+    from config.model_profiles import get_model_prompt_replacements
 except ImportError:
    import sys
    from pathlib import Path
@ -31,6 +34,7 @@ except ImportError:
        TERMINAL_SANDBOX_MEMORY,
        PROJECT_MAX_STORAGE_MB,
    )
+    from config.model_profiles import get_model_prompt_replacements
 from utils.conversation_manager import ConversationManager

 class ContextManager:
@ -47,6 +51,7 @@ class ContextManager:
        self.file_annotations = {}  # 文件备注
        self.conversation_history = []  # 当前对话历史（内存中）
        self.todo_list: Optional[Dict[str, Any]] = None
+        self.has_images: bool = False
        
        # 新增：对话持久化管理器
        self.conversation_manager = ConversationManager(base_dir=self.data_dir)
@ -317,13 +322,16 @@ class ContextManager:
            project_path=project_path,
            thinking_mode=thinking_mode,
            run_mode=run_mode or ("thinking" if thinking_mode else "fast"),
-            initial_messages=[]
+            initial_messages=[],
+            model_key=getattr(self.main_terminal, "model_key", None),
+            has_images=False
        )
        
        # 重置当前状态
        self.current_conversation_id = conversation_id
        self.conversation_history = []
        self.todo_list = None
+        self.has_images = False
        
        print(f"📝 开始新对话: {conversation_id}")
        return conversation_id
@ -372,7 +380,14 @@ class ContextManager:
        self.project_path = resolved_project_path
        
        run_mode = metadata.get("run_mode")
+        model_key = metadata.get("model_key")
+        self.has_images = metadata.get("has_images", False)
        if self.main_terminal:
+            try:
+                if model_key:
+                    self.main_terminal.set_model(model_key)
+            except Exception:
+                pass
            try:
                if run_mode:
                    self.main_terminal.set_run_mode(run_mode)
@ -410,7 +425,9 @@ class ContextManager:
                project_path=str(self.project_path),
                todo_list=self.todo_list,
                thinking_mode=getattr(self.main_terminal, "thinking_mode", None) if hasattr(self, "main_terminal") else None,
-                run_mode=run_mode
+                run_mode=run_mode,
+                model_key=getattr(self.main_terminal, "model_key", None) if hasattr(self, "main_terminal") else None,
+                has_images=self.has_images
            )
            
            if success:
@ -584,12 +601,16 @@ class ContextManager:
        project_path = str(resolved_project_path)
        thinking_mode = metadata.get("thinking_mode", False)
        run_mode = metadata.get("run_mode") or ("thinking" if thinking_mode else "fast")
+        model_key = metadata.get("model_key")
+        has_images = metadata.get("has_images", False)

        compressed_conversation_id = self.conversation_manager.create_conversation(
            project_path=project_path,
            thinking_mode=thinking_mode,
            run_mode=run_mode,
-            initial_messages=compressed_messages
+            initial_messages=compressed_messages,
+            model_key=model_key,
+            has_images=has_images
        )

        return {
@ -615,12 +636,16 @@ class ContextManager:
        project_path = str(resolved_project_path)
        thinking_mode = metadata.get("thinking_mode", False)
        run_mode = metadata.get("run_mode") or ("thinking" if thinking_mode else "fast")
+        model_key = metadata.get("model_key")
+        has_images = metadata.get("has_images", False)

        duplicate_conversation_id = self.conversation_manager.create_conversation(
            project_path=project_path,
            thinking_mode=thinking_mode,
            run_mode=run_mode,
-            initial_messages=original_messages
+            initial_messages=original_messages,
+            model_key=model_key,
+            has_images=has_images
        )

        token_stats = conversation_data.get("token_statistics")
@ -699,7 +724,8 @@ class ContextManager:
        tool_call_id: Optional[str] = None,
        name: Optional[str] = None,
        metadata: Optional[Dict[str, Any]] = None,
-        reasoning_content: Optional[str] = None
+        reasoning_content: Optional[str] = None,
+        images: Optional[List[str]] = None
    ):
        """添加对话记录（改进版：集成自动保存 + 智能token统计）"""
        timestamp = datetime.now().isoformat()
@ -719,6 +745,9 @@ class ContextManager:
        
        if metadata:
            message["metadata"] = metadata
+        if images:
+            message["images"] = images
+            self.has_images = True
        
        # 如果是assistant消息且有工具调用，保存完整格式
        if role == "assistant" and tool_calls:
@ -1152,16 +1181,41 @@ class ContextManager:
            "is_overflow": sizes["total"] > MAX_CONTEXT_SIZE,
            "usage_percent": (sizes["total"] / MAX_CONTEXT_SIZE) * 100
        }
+    def _build_content_with_images(self, text: str, images: List[str]) -> Any:
+        """将文本与图片路径组合成多模态content，图片转换为data URI。"""
+        if not images:
+            return text
+        parts: List[Dict[str, Any]] = []
+        if text:
+            parts.append({"type": "text", "text": text})
+        for path in images:
+            try:
+                abs_path = Path(self.project_path) / path
+                if not abs_path.exists() or not abs_path.is_file():
+                    continue
+                mime, _ = mimetypes.guess_type(abs_path.name)
+                if not mime:
+                    mime = "image/png"
+                data = abs_path.read_bytes()
+                b64 = base64.b64encode(data).decode("utf-8")
+                parts.append({"type": "image_url", "image_url": {"url": f"data:{mime};base64,{b64}"}})
+            except Exception:
+                continue
+        return parts if parts else text
+
    def build_messages(self, context: Dict, user_input: str) -> List[Dict]:
        """构建消息列表（添加终端内容注入）"""
-        # 加载系统提示
-        system_prompt = self.load_prompt("main_system")
+        # 加载系统提示（Qwen-VL 使用专用提示）
+        model_key = getattr(self.main_terminal, "model_key", "kimi") if hasattr(self, "main_terminal") else "kimi"
+        prompt_name = "main_system_qwenvl" if model_key == "qwen3-vl-plus" else "main_system"
+        system_prompt = self.load_prompt(prompt_name)
        
        # 格式化系统提示
        container_path = self.container_mount_path or "/workspace"
        container_cpus = self.container_cpu_limit
        container_memory = self.container_memory_limit
        project_storage = self.project_storage_limit
+        prompt_replacements = get_model_prompt_replacements(model_key)
        system_prompt = system_prompt.format(
            project_path=container_path,
            container_path=container_path,
@ -1170,7 +1224,8 @@ class ContextManager:
            project_storage=project_storage,
            file_tree=context["project_info"]["file_tree"],
            memory=context["memory"],
-            current_time=datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+            current_time=datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
+            model_description=prompt_replacements.get("model_description", "")
        )
        
        messages = [
@ -1199,9 +1254,11 @@ class ContextManager:
                }
                messages.append(message)
            else:
+                images = conv.get("images") or (conv.get("metadata") or {}).get("images") or []
+                content_payload = self._build_content_with_images(conv["content"], images) if images else conv["content"]
                messages.append({
                    "role": conv["role"],
-                    "content": conv["content"]
+                    "content": content_payload
                })
        
        # 添加聚焦文件内容
--- a/utils/conversation_manager.py
+++ b/utils/conversation_manager.py
@ -30,6 +30,8 @@ class ConversationMetadata:
    total_messages: int
    total_tools: int
    run_mode: str = "fast"
+    model_key: Optional[str] = None
+    has_images: bool = False
    status: str = "active"  # active, archived, error

 class ConversationManager:
@ -86,6 +88,8 @@ class ConversationManager:
                "project_relative_path": metadata.get("project_relative_path"),
                "thinking_mode": metadata.get("thinking_mode", False),
                "run_mode": metadata.get("run_mode") or ("thinking" if metadata.get("thinking_mode") else "fast"),
+                "model_key": metadata.get("model_key"),
+                "has_images": metadata.get("has_images", False),
                "total_messages": metadata.get("total_messages", 0),
                "total_tools": metadata.get("total_tools", 0),
                "status": metadata.get("status", "active"),
@ -275,7 +279,9 @@ class ConversationManager:
        project_path: str,
        thinking_mode: bool = False,
        run_mode: str = "fast",
-        initial_messages: List[Dict] = None
+        initial_messages: List[Dict] = None,
+        model_key: Optional[str] = None,
+        has_images: bool = False
    ) -> str:
        """
        创建新对话
@ -306,6 +312,8 @@ class ConversationManager:
                "project_relative_path": path_metadata["project_relative_path"],
                "thinking_mode": thinking_mode,
                "run_mode": normalized_mode,
+                "model_key": model_key,
+                "has_images": has_images,
                "total_messages": len(messages),
                "total_tools": self._count_tools_in_messages(messages),
                "status": "active"
@ -393,6 +401,8 @@ class ConversationManager:
                "project_relative_path": metadata.project_relative_path,
                "thinking_mode": metadata.thinking_mode,
                "run_mode": metadata.run_mode,
+                "model_key": conversation_data["metadata"].get("model_key"),
+                "has_images": conversation_data["metadata"].get("has_images", False),
                "total_messages": metadata.total_messages,
                "total_tools": metadata.total_tools,
                "status": metadata.status
@ -409,7 +419,9 @@ class ConversationManager:
        project_path: str = None,
        thinking_mode: bool = None,
        run_mode: Optional[str] = None,
-        todo_list: Optional[Dict] = None
+        todo_list: Optional[Dict] = None,
+        model_key: Optional[str] = None,
+        has_images: Optional[bool] = None
    ) -> bool:
        """
        保存对话（更新现有对话）
@ -457,6 +469,14 @@ class ConversationManager:
                existing_data["metadata"]["run_mode"] = normalized_mode
            elif "run_mode" not in existing_data["metadata"]:
                existing_data["metadata"]["run_mode"] = "thinking" if existing_data["metadata"].get("thinking_mode") else "fast"
+            if model_key is not None:
+                existing_data["metadata"]["model_key"] = model_key
+            elif "model_key" not in existing_data["metadata"]:
+                existing_data["metadata"]["model_key"] = None
+            if has_images is not None:
+                existing_data["metadata"]["has_images"] = bool(has_images)
+            elif "has_images" not in existing_data["metadata"]:
+                existing_data["metadata"]["has_images"] = False
            
            existing_data["metadata"]["total_messages"] = len(messages)
            existing_data["metadata"]["total_tools"] = self._count_tools_in_messages(messages)
--- a/web_server.py
+++ b/web_server.py
@ -20,6 +20,7 @@ from datetime import timedelta
 import time
 from datetime import datetime
 from collections import defaultdict, deque, Counter
+from config.model_profiles import get_model_profile
 from werkzeug.utils import secure_filename
 from werkzeug.routing import BaseConverter
 import secrets
@ -1487,7 +1488,8 @@ def update_thinking_mode(terminal: WebTerminal, workspace: UserWorkspace, userna
                    project_path=str(ctx.project_path),
                    todo_list=ctx.todo_list,
                    thinking_mode=terminal.thinking_mode,
-                    run_mode=terminal.run_mode
+                    run_mode=terminal.run_mode,
+                    model_key=getattr(terminal, "model_key", None)
                )
            except Exception as exc:
                print(f"[API] 保存思考模式到对话失败: {exc}")
@ -1504,11 +1506,64 @@ def update_thinking_mode(terminal: WebTerminal, workspace: UserWorkspace, userna
        })
    except Exception as exc:
        print(f"[API] 切换思考模式失败: {exc}")
+        code = 400 if isinstance(exc, ValueError) else 500
        return jsonify({
            "success": False,
            "error": str(exc),
            "message": "切换思考模式时发生异常"
-        }), 500
+        }), code
+
+
+@app.route('/api/model', methods=['POST'])
+@api_login_required
+@with_terminal
+@rate_limited("model_switch", 10, 60, scope="user")
+def update_model(terminal: WebTerminal, workspace: UserWorkspace, username: str):
+    """切换基础模型（快速/思考模型组合）。"""
+    try:
+        data = request.get_json() or {}
+        model_key = data.get("model_key")
+        if not model_key:
+            return jsonify({"success": False, "error": "缺少 model_key"}), 400
+
+        terminal.set_model(model_key)
+        # fast-only 时 run_mode 可能被强制为 fast
+        session["model_key"] = terminal.model_key
+        session["run_mode"] = terminal.run_mode
+        session["thinking_mode"] = terminal.thinking_mode
+
+        # 更新当前对话元数据
+        ctx = terminal.context_manager
+        if ctx.current_conversation_id:
+            try:
+                ctx.conversation_manager.save_conversation(
+                    conversation_id=ctx.current_conversation_id,
+                    messages=ctx.conversation_history,
+                    project_path=str(ctx.project_path),
+                    todo_list=ctx.todo_list,
+                    thinking_mode=terminal.thinking_mode,
+                    run_mode=terminal.run_mode,
+                    model_key=terminal.model_key,
+                    has_images=getattr(ctx, "has_images", False)
+                )
+            except Exception as exc:
+                print(f"[API] 保存模型到对话失败: {exc}")
+
+        status = terminal.get_status()
+        socketio.emit('status_update', status, room=f"user_{username}")
+
+        return jsonify({
+            "success": True,
+            "data": {
+                "model_key": terminal.model_key,
+                "run_mode": terminal.run_mode,
+                "thinking_mode": terminal.thinking_mode
+            }
+        })
+    except Exception as exc:
+        print(f"[API] 切换模型失败: {exc}")
+        code = 400 if isinstance(exc, ValueError) else 500
+        return jsonify({"success": False, "error": str(exc), "message": str(exc)}), code


@app.route('/api/personalization', methods=['GET'])
@ -2405,9 +2460,13 @@ def handle_message(data):
        return
    
    message = (data.get('message') or '').strip()
-    if not message:
+    images = data.get('images') or []
+    if not message and not images:
        emit('error', {'message': '消息不能为空'})
        return
+    if images and getattr(terminal, "model_key", None) != "qwen3-vl-plus":
+        emit('error', {'message': '当前模型不支持图片，请切换到 Qwen-VL'})
+        return
    
    print(f"[WebSocket] 收到消息: {message}")
    debug_log(f"\n{'='*80}\n新任务开始: {message}\n{'='*80}")
@ -2469,7 +2528,8 @@ def handle_message(data):
        send_to_client(event_type, data)

    # 传递客户端ID
-    socketio.start_background_task(process_message_task, terminal, message, send_with_activity, client_sid, workspace, username)
+    images = data.get('images') or []
+    socketio.start_background_task(process_message_task, terminal, message, images, send_with_activity, client_sid, workspace, username)


@socketio.on('client_chunk_log')
@ -2952,14 +3012,14 @@ def get_current_conversation(terminal: WebTerminal, workspace: UserWorkspace, us
            "error": str(e)
        }), 500
    
-def process_message_task(terminal: WebTerminal, message: str, sender, client_sid, workspace: UserWorkspace, username: str):
+def process_message_task(terminal: WebTerminal, message: str, images, sender, client_sid, workspace: UserWorkspace, username: str):
    """在后台处理消息任务"""
    try:
        loop = asyncio.new_event_loop()
        asyncio.set_event_loop(loop)
        
        # 创建可取消的任务
-        task = loop.create_task(handle_task_with_sender(terminal, workspace, message, sender, client_sid, username))
+        task = loop.create_task(handle_task_with_sender(terminal, workspace, message, images, sender, client_sid, username))
        
        entry = stop_flags.get(client_sid)
        if not isinstance(entry, dict):
@ -3034,7 +3094,7 @@ def detect_malformed_tool_call(text):
            
    return False

-async def handle_task_with_sender(terminal: WebTerminal, workspace: UserWorkspace, message, sender, client_sid, username: str):
+async def handle_task_with_sender(terminal: WebTerminal, workspace: UserWorkspace, message, images, sender, client_sid, username: str):
    """处理任务并发送消息 - 集成token统计版本"""
    web_terminal = terminal
    conversation_id = getattr(web_terminal.context_manager, "current_conversation_id", None)
@ -3050,7 +3110,7 @@ async def handle_task_with_sender(terminal: WebTerminal, workspace: UserWorkspac
    # 添加到对话历史
    history_len_before = len(getattr(web_terminal.context_manager, "conversation_history", []) or [])
    is_first_user_message = history_len_before == 0
-    web_terminal.context_manager.add_conversation("user", message)
+    web_terminal.context_manager.add_conversation("user", message, images=images)
    
    if is_first_user_message and getattr(web_terminal, "context_manager", None):
        try:
@ -4833,6 +4893,38 @@ async def handle_task_with_sender(terminal: WebTerminal, workspace: UserWorkspac
                "content": tool_result_content
            })

+            # 处理图片注入：必须紧跟在对应的 tool 消息之后，且工具成功时才插入
+            if (
+                function_name == "view_image"
+                and getattr(web_terminal, "pending_image_view", None)
+                and not tool_failed
+                and (isinstance(result_data, dict) and result_data.get("success") is not False)
+            ):
+                inj = web_terminal.pending_image_view
+                web_terminal.pending_image_view = None
+                injected_text = "这是一条系统控制发送的信息，并非用户主动发送，目的是返回你需要查看的图片。"
+                # 记录到对话历史
+                web_terminal.context_manager.add_conversation(
+                    "user",
+                    injected_text,
+                    images=[inj["path"]],
+                    metadata={"system_injected_image": True}
+                )
+                # 同步到当前消息列表（直接带多模态 content），保证顺序为 tool_call -> tool -> (系统代发)user
+                content_payload = web_terminal.context_manager._build_content_with_images(
+                    injected_text,
+                    [inj["path"]]
+                )
+                messages.append({
+                    "role": "user",
+                    "content": content_payload,
+                    "metadata": {"system_injected_image": True}
+                })
+                # 提示前端
+                sender('system_message', {
+                    'content': f'系统已按模型请求插入图片: {inj.get("path")}'
+                })
+
            if function_name != 'write_file_diff':
                await process_sub_agent_updates(messages, inline=True, after_tool_call_id=tool_call_id)
				`@ -0,0 +1 @@`
				`<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="48" height="48" fill="none" stroke="#000000" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" style="opacity:1;"><path d="M10 8h.01M12 12h.01M14 8h.01M16 12h.01M18 8h.01M6 8h.01M7 16h10m-9-4h.01"/><rect width="20" height="16" x="2" y="4" rx="2"/></svg>`