feat: update model support and multimodal

2026-02-25 01:41:05 +08:00 · 2026-02-25 01:41:05 +08:00 · 08bc08b35f
commit 08bc08b35f
parent 89eeb449b5
25 changed files with 372 additions and 167 deletions
--- a/api_doc/openapi.yaml
+++ b/api_doc/openapi.yaml
@ -117,7 +117,7 @@ components:
        default_model:
          type: string
          default: kimi
-          description: kimi/deepseek/qwen3-max/qwen3-vl-plus
+          description: kimi/deepseek/qwen3-vl-plus/minimax-m2.5
      additionalProperties: true
      example:
        enabled: true
--- a/api_doc/prompts_personalization.md
+++ b/api_doc/prompts_personalization.md
@ -47,7 +47,7 @@
 | `default_run_mode` | "fast"/"thinking"/"deep"/null | null | 默认运行模式：非法值会变成 null |
 | `auto_generate_title` | bool | true | 是否自动生成对话标题 |
 | `tool_intent_enabled` | bool | true | 工具意图提示开关（属于配置结构的一部分） |
-| `default_model` | string | "kimi" | 默认模型：仅允许 `"kimi"|"deepseek"|"qwen3-max"|"qwen3-vl-plus"`，非法值回落到 `"kimi"` |
+| `default_model` | string | "kimi" | 默认模型：仅允许 `"kimi"|"deepseek"|"qwen3-vl-plus"|"minimax-m2.5"`，非法值回落到 `"kimi"` |
 ### 最小示例（启用 + 2 条注意事项）
--- a/config/model_profiles.py
+++ b/config/model_profiles.py
@ -11,6 +11,7 @@ CONTEXT_WINDOWS = {
    "kimi-k2.5": 256_000,
    "qwen3-max": 256_000,
    "qwen3-vl-plus": 256_000,
    "minimax-m2.5": 204_800,
    "deepseek": 128_000,
 }
@ -32,7 +33,12 @@ DEEPSEEK_THINK_MODEL = _env("MODEL_DEEPSEEK_THINK", "deepseek-reasoner")
 QWEN_BASE = _env("API_BASE_QWEN", "https://dashscope.aliyuncs.com/compatible-mode/v1")
 QWEN_KEY = _env("API_KEY_QWEN", _env("DASHSCOPE_API_KEY", ""))
 QWEN_MAX_MODEL = _env("MODEL_QWEN_MAX", "qwen3-max")
-QWEN_VL_MODEL = _env("MODEL_QWEN_VL", "qwen3-vl-plus")
+QWEN_VL_MODEL = _env("MODEL_QWEN_VL", "qwen3.5-plus")
 # MiniMax
 MINIMAX_BASE = _env("API_BASE_MINIMAX", "https://api.minimaxi.com/v1")
 MINIMAX_KEY = _env("API_KEY_MINIMAX", "")
 MINIMAX_MODEL = _env("MODEL_MINIMAX", "MiniMax-M2.5")
 MODEL_PROFILES = {
@ -110,7 +116,8 @@ MODEL_PROFILES = {
        "thinking": None,  # 不支持思考
        "supports_thinking": False,
        "fast_only": True,
-        "name": "Qwen3-Max"
+        "name": "Qwen3-Max",
        "hidden": True
    },
    "qwen3-vl-plus": {
        "context_window": CONTEXT_WINDOWS["qwen3-vl-plus"],
@ -132,7 +139,30 @@ MODEL_PROFILES = {
        },
        "supports_thinking": True,
        "fast_only": False,
-        "name": "Qwen3-VL"
+        "name": "Qwen3.5"
    },
    "minimax-m2.5": {
        "context_window": CONTEXT_WINDOWS["minimax-m2.5"],
        "fast": {
            "base_url": MINIMAX_BASE,
            "api_key": MINIMAX_KEY,
            "model_id": MINIMAX_MODEL,
            "max_tokens": 65536,
            "context_window": CONTEXT_WINDOWS["minimax-m2.5"],
            "extra_params": {"reasoning_split": True}
        },
        "thinking": {
            "base_url": MINIMAX_BASE,
            "api_key": MINIMAX_KEY,
            "model_id": MINIMAX_MODEL,
            "max_tokens": 65536,
            "context_window": CONTEXT_WINDOWS["minimax-m2.5"],
            "extra_params": {"reasoning_split": True}
        },
        "supports_thinking": True,
        "fast_only": False,
        "deep_only": True,
        "name": "MiniMax-M2.5"
    }
 }
@ -158,9 +188,14 @@ MODEL_PROMPT_OVERRIDES = {
        "deep_thinking_line": "Qwen3-Max 不支持深度思考模式，将保持快速模式。"
    },
    "qwen3-vl-plus": {
-        "model_description": "你的基础模型是 Qwen3-VL-Plus，支持图文多模态理解，接口来自通义千问 DashScope。",
+        "model_description": "你的基础模型是 Qwen3.5，由通义千问提供，支持图文多模态理解。",
-        "thinking_model_line": "思考模式时，请求的模型仍为 Qwen3-VL-Plus（开启思考能力），后续请求会切回快速模型。",
+        "thinking_model_line": "思考模式时仍使用 Qwen3.5，并开启思考能力。",
-        "deep_thinking_line": "在深度思考模式中，请求的模型是 Qwen3-VL-Plus（思考版），以获得更强的分析能力。"
+        "deep_thinking_line": "深度思考模式下，所有请求都将启用思考能力，以获得更强的分析表现。"
    },
    "minimax-m2.5": {
        "model_description": "你的基础模型是 MiniMax-M2.5，支持超长上下文，当前仅以深度思考模式运行。",
        "thinking_model_line": "MiniMax-M2.5 为思考模型，快速模式不会使用。",
        "deep_thinking_line": "深度思考模式下，所有请求持续输出思考过程并给出最终回答。"
    }
 }
--- a/core/main_terminal.py
+++ b/core/main_terminal.py
@ -1479,7 +1479,7 @@ class MainTerminal:
                "type": "function",
                "function": {
                    "name": "vlm_analyze",
-                    "description": "使用大参数视觉语言模型（Qwen-VL模型）理解图片：文字、物体、布局、表格等，仅支持本地路径。",
+                    "description": "使用大参数视觉语言模型（Qwen3.5）理解图片：文字、物体、布局、表格等，仅支持本地路径。",
                    "parameters": {
                        "type": "object",
                        "properties": self._inject_intent({
@ -1872,7 +1872,7 @@ class MainTerminal:
                }
            }
        ]
-        # 视觉模型（Qwen-VL / Kimi-k2.5）自带多模态能力，不再暴露 vlm_analyze，改为 view_image
+        # 视觉模型（Qwen3.5 / Kimi-k2.5）自带多模态能力，不再暴露 vlm_analyze，改为 view_image / view_video
        if getattr(self, "model_key", None) in {"qwen3-vl-plus", "kimi-k2.5"}:
            tools = [
                tool for tool in tools
@ -1895,24 +1895,23 @@ class MainTerminal:
                    }
                }
            })
-            if getattr(self, "model_key", None) == "kimi-k2.5":
+            tools.append({
-                tools.append({
+                "type": "function",
-                    "type": "function",
+                "function": {
                    "function": {
                    "name": "view_video",
                    "description": "将指定本地视频附加到工具结果中（tool 消息携带 video_url），便于模型查看视频内容。",
-                        "parameters": {
+                    "parameters": {
-                            "type": "object",
+                        "type": "object",
-                            "properties": self._inject_intent({
+                        "properties": self._inject_intent({
                            "path": {
                                "type": "string",
                                "description": "项目内的视频相对路径（不要以 /workspace 开头）；宿主机模式可用绝对路径。支持 mp4/mov/mkv/avi/webm。"
-                                }
+                            }
-                            }),
+                        }),
-                            "required": ["path"]
+                        "required": ["path"]
                        }
                    }
-                })
+                }
            })
        # 附加自定义工具（仅管理员可见）
        custom_tools = self._build_custom_tools()
        if custom_tools:
@ -2045,7 +2044,11 @@ class MainTerminal:
                if abs_path.stat().st_size > 50 * 1024 * 1024:
                    return json.dumps({"success": False, "error": "视频过大，需 <= 50MB"}, ensure_ascii=False)
                self.pending_video_view = {"path": str(path)}
-                result = {"success": True, "message": "视频已附加到工具结果中，将随 tool 返回。", "path": path}
+                result = {
                    "success": True,
                    "message": "视频已附加到工具结果中，将随 tool 返回。",
                    "path": path
                }
            # 终端会话管理工具
            elif tool_name == "terminal_session":
@ -2528,7 +2531,7 @@ class MainTerminal:
    def build_messages(self, context: Dict, user_input: str) -> List[Dict]:
        """构建消息列表（添加终端内容注入）"""
-        # 加载系统提示（Qwen-VL 使用专用提示）
+        # 加载系统提示（Qwen3.5 使用专用提示）
        prompt_name = "main_system_qwenvl" if getattr(self, "model_key", "kimi") in {"qwen3-vl-plus", "kimi-k2.5"} else "main_system"
        system_prompt = self.load_prompt(prompt_name)
@ -2810,9 +2813,9 @@ class MainTerminal:
        normalized = mode.lower()
        if normalized not in allowed:
            raise ValueError(f"不支持的模式: {mode}")
-        # Qwen-VL 官方不支持深度思考模式
+        # 仅深度思考模型限制
-        if getattr(self, "model_key", None) == "qwen3-vl-plus" and normalized == "deep":
+        if getattr(self, "model_profile", {}).get("deep_only") and normalized != "deep":
-            raise ValueError("Qwen-VL 不支持深度思考模式")
+            raise ValueError("当前模型仅支持深度思考模式")
        # fast-only 模型限制
        if getattr(self, "model_profile", {}).get("fast_only") and normalized != "fast":
            raise ValueError("当前模型仅支持快速模式")
@ -2840,9 +2843,9 @@ class MainTerminal:
    def set_model(self, model_key: str) -> str:
        profile = get_model_profile(model_key)
        if getattr(self.context_manager, "has_images", False) and model_key not in {"qwen3-vl-plus", "kimi-k2.5"}:
-            raise ValueError("当前对话包含图片，仅支持 Qwen-VL 或 Kimi-k2.5")
+            raise ValueError("当前对话包含图片，仅支持 Qwen3.5 或 Kimi-k2.5")
-        if getattr(self.context_manager, "has_videos", False) and model_key != "kimi-k2.5":
+        if getattr(self.context_manager, "has_videos", False) and model_key not in {"qwen3-vl-plus", "kimi-k2.5"}:
-            raise ValueError("当前对话包含视频，仅支持 Kimi-k2.5")
+            raise ValueError("当前对话包含视频，仅支持 Qwen3.5 或 Kimi-k2.5")
        self.model_key = model_key
        self.model_profile = profile
        # 将模型标识传递给底层 API 客户端，便于按模型做兼容处理
@ -2852,9 +2855,9 @@ class MainTerminal:
        # fast-only 模型强制快速模式
        if profile.get("fast_only") and self.run_mode != "fast":
            self.set_run_mode("fast")
-        # Qwen-VL 不支持深度思考，自动回落到思考模式
+        # 仅深度思考模型强制 deep
-        if model_key == "qwen3-vl-plus" and self.run_mode == "deep":
+        if profile.get("deep_only") and self.run_mode != "deep":
-            self.set_run_mode("thinking")
+            self.set_run_mode("deep")
        # 如果模型支持思考，但当前 run_mode 为 thinking/deep，则保持；否则无需调整
        self.api_client.start_new_task(force_deep=self.deep_thinking_mode)
        return self.model_key
--- a/modules/admin_policy_manager.py
+++ b/modules/admin_policy_manager.py
@ -17,7 +17,7 @@ from config.paths import ADMIN_POLICY_FILE
 from modules.custom_tool_registry import CustomToolRegistry, build_default_tool_category
 # 可用的模型 key（与前端、model_profiles 保持一致）
-ALLOWED_MODELS = {"kimi", "deepseek", "qwen3-max", "qwen3-vl-plus"}
+ALLOWED_MODELS = {"kimi", "deepseek", "qwen3-vl-plus", "minimax-m2.5"}
 # UI 禁用项键名，前后端统一
 UI_BLOCK_KEYS = [
--- a/modules/ocr_client.py
+++ b/modules/ocr_client.py
@ -13,7 +13,7 @@ from modules.file_manager import FileManager
 class OCRClient:
-    """封装 VLM（如 DeepSeek-OCR / Qwen-VL）调用逻辑。"""
+    """封装 VLM（如 DeepSeek-OCR / Qwen3.5）调用逻辑。"""
    def __init__(self, project_path: str, file_manager: FileManager):
        self.project_path = Path(project_path).resolve()
--- a/modules/personalization_manager.py
+++ b/modules/personalization_manager.py
@ -113,7 +113,7 @@ def sanitize_personalization_payload(
        base.update(fallback)
    data = payload or {}
    allowed_tool_categories = set(TOOL_CATEGORIES.keys())
-    allowed_models = {"kimi", "kimi-k2.5", "deepseek", "qwen3-max", "qwen3-vl-plus"}
+    allowed_models = {"kimi", "kimi-k2.5", "deepseek", "qwen3-vl-plus", "minimax-m2.5"}
    allowed_image_modes = {"original", "1080p", "720p", "540p"}
    def _resolve_short_field(key: str) -> str:
--- a/prompts/main_system.txt
+++ b/prompts/main_system.txt
@ -57,7 +57,7 @@
 ### 3.3 视觉理解
 - **非视觉模型**：`vlm_analyze` 调用 VLM 分析图片
- **视觉模型**（Qwen-VL / Kimi-k2.5）：`view_image` 直接查看图片，`view_video` 查看视频（Kimi-k2.5）
+- **视觉模型**（Qwen3.5 / Kimi-k2.5）：`view_image` 直接查看图片，`view_video` 查看视频
 ### 3.4 终端操作
--- a/prompts/main_system_qwenvl.txt
+++ b/prompts/main_system_qwenvl.txt
@ -56,7 +56,7 @@
 ### 3.3 视觉理解（重点）
-你**自带多模态能力**，用户可以直接发送图片；如需主动查看本地图片/视频，可调用 `view_image`/`view_video` 指定路径，系统会在工具结果中附带媒体（tool 消息携带 image_url/video_url）供你查看。
+你**自带多模态能力**，用户可以直接发送图片/视频；如需主动查看本地图片/视频，可调用 `view_image`/`view_video` 指定路径，系统会在工具结果中附带媒体（tool 消息携带 image_url/video_url）供你查看。
 当用户提出"这是什么""识别文字/表格/票据""找瑕疵/细节""读屏/按钮含义"等图片分析任务时，优先采用下面的方法，保证细节充分、结论可验证：
--- a/server/_conversation_segment.py
+++ b/server/_conversation_segment.py
@ -1457,21 +1457,33 @@ async def handle_task_with_sender(terminal: WebTerminal, workspace: UserWorkspac
            if finish_reason:
                last_finish_reason = finish_reason
-            # 处理思考内容
+            # 处理思考内容（兼容 reasoning_content / reasoning_details）
            reasoning_content = ""
            if "reasoning_content" in delta:
-                reasoning_content = delta["reasoning_content"]
+                reasoning_content = delta.get("reasoning_content") or ""
-                if reasoning_content:
+            elif "reasoning_details" in delta:
-                    reasoning_chunks += 1
+                details = delta.get("reasoning_details")
-                    debug_log(f"  思考内容 #{reasoning_chunks}: {len(reasoning_content)} 字符")
+                if isinstance(details, list):
                    parts = []
                    for item in details:
                        if isinstance(item, dict):
                            text = item.get("text")
                            if text:
                                parts.append(text)
                    if parts:
                        reasoning_content = "".join(parts)
            if reasoning_content:
                reasoning_chunks += 1
                debug_log(f"  思考内容 #{reasoning_chunks}: {len(reasoning_content)} 字符")
-                    if not thinking_started:
+                if not thinking_started:
-                        in_thinking = True
+                    in_thinking = True
-                        thinking_started = True
+                    thinking_started = True
-                        sender('thinking_start', {})
+                    sender('thinking_start', {})
-                        await asyncio.sleep(0.05)
+                    await asyncio.sleep(0.05)
-                    current_thinking += reasoning_content
+                current_thinking += reasoning_content
-                    sender('thinking_chunk', {'content': reasoning_content})
+                sender('thinking_chunk', {'content': reasoning_content})
            # 处理正常内容
            if "content" in delta:
@ -2424,10 +2436,11 @@ async def handle_task_with_sender(terminal: WebTerminal, workspace: UserWorkspac
                    video_path = inj.get("path") if isinstance(inj, dict) else None
                    if video_path:
                        text_part = tool_result_content if isinstance(tool_result_content, str) else ""
                        video_payload = [video_path]
                        tool_message_content = web_terminal.context_manager._build_content_with_images(
                            text_part,
                            [],
-                            [video_path]
+                            video_payload
                        )
                        tool_videos = [video_path]
                        if metadata_payload is None:
--- a/server/_socket_segment.py
+++ b/server/_socket_segment.py
@ -196,7 +196,7 @@ def handle_message(data):
        emit('error', {'message': '消息不能为空'})
        return
    if images and getattr(terminal, "model_key", None) not in {"qwen3-vl-plus", "kimi-k2.5"}:
-        emit('error', {'message': '当前模型不支持图片，请切换到 Qwen-VL 或 Kimi-k2.5'})
+        emit('error', {'message': '当前模型不支持图片，请切换到 Qwen3.5 或 Kimi-k2.5'})
        return
    print(f"[WebSocket] 收到消息: {message}")
--- a/server/api_v1.py
+++ b/server/api_v1.py
@ -677,11 +677,14 @@ def create_personalization_api():
 def list_models_api():
    items = []
    for key, profile in MODEL_PROFILES.items():
        if profile.get("hidden"):
            continue
        items.append({
            "model_key": key,
            "name": profile.get("name", key),
            "supports_thinking": profile.get("supports_thinking", False),
            "fast_only": profile.get("fast_only", False),
            "deep_only": profile.get("deep_only", False),
        })
    return jsonify({"success": True, "items": items})
--- a/server/app_legacy.py
+++ b/server/app_legacy.py
@ -922,7 +922,7 @@ def get_user_resources(username: Optional[str] = None) -> Tuple[Optional[WebTerm
        terminal.admin_policy_version = policy.get("updated_at")
        # 若当前模型被禁用，则回退到第一个可用模型
        if terminal.model_key in disabled_models:
-            for candidate in ["kimi-k2.5", "kimi", "deepseek", "qwen3-vl-plus", "qwen3-max"]:
+            for candidate in ["kimi-k2.5", "kimi", "deepseek", "qwen3-vl-plus", "minimax-m2.5"]:
                if candidate not in disabled_models:
                    try:
                        terminal.set_model(candidate)
--- a/server/chat_flow.py
+++ b/server/chat_flow.py
@ -1335,21 +1335,33 @@ async def handle_task_with_sender(terminal: WebTerminal, workspace: UserWorkspac
            if finish_reason:
                last_finish_reason = finish_reason
-            # 处理思考内容
+            # 处理思考内容（兼容 reasoning_content / reasoning_details）
            reasoning_content = ""
            if "reasoning_content" in delta:
-                reasoning_content = delta["reasoning_content"]
+                reasoning_content = delta.get("reasoning_content") or ""
-                if reasoning_content:
+            elif "reasoning_details" in delta:
-                    reasoning_chunks += 1
+                details = delta.get("reasoning_details")
-                    debug_log(f"  思考内容 #{reasoning_chunks}: {len(reasoning_content)} 字符")
+                if isinstance(details, list):
                    parts = []
                    for item in details:
                        if isinstance(item, dict):
                            text = item.get("text")
                            if text:
                                parts.append(text)
                    if parts:
                        reasoning_content = "".join(parts)
            if reasoning_content:
                reasoning_chunks += 1
                debug_log(f"  思考内容 #{reasoning_chunks}: {len(reasoning_content)} 字符")
-                    if not thinking_started:
+                if not thinking_started:
-                        in_thinking = True
+                    in_thinking = True
-                        thinking_started = True
+                    thinking_started = True
-                        sender('thinking_start', {})
+                    sender('thinking_start', {})
-                        await asyncio.sleep(0.05)
+                    await asyncio.sleep(0.05)
-                    current_thinking += reasoning_content
+                current_thinking += reasoning_content
-                    sender('thinking_chunk', {'content': reasoning_content})
+                sender('thinking_chunk', {'content': reasoning_content})
            # 处理正常内容
            if "content" in delta:
@ -2335,10 +2347,11 @@ async def handle_task_with_sender(terminal: WebTerminal, workspace: UserWorkspac
                    video_path = inj.get("path") if isinstance(inj, dict) else None
                    if video_path:
                        text_part = tool_result_content if isinstance(tool_result_content, str) else ""
                        video_payload = [video_path]
                        tool_message_content = web_terminal.context_manager._build_content_with_images(
                            text_part,
                            [],
-                            [video_path]
+                            video_payload
                        )
                        tool_videos = [video_path]
                        if metadata_payload is None:
--- a/server/context.py
+++ b/server/context.py
@ -218,7 +218,7 @@ def get_user_resources(username: Optional[str] = None, workspace_id: Optional[st
            terminal.admin_policy_ui_blocks = policy.get("ui_blocks") or {}
            terminal.admin_policy_version = policy.get("updated_at")
            if terminal.model_key in disabled_models:
-                for candidate in ["kimi-k2.5", "kimi", "deepseek", "qwen3-vl-plus", "qwen3-max"]:
+                for candidate in ["kimi-k2.5", "kimi", "deepseek", "qwen3-vl-plus", "minimax-m2.5"]:
                    if candidate not in disabled_models:
                        try:
                            terminal.set_model(candidate)
--- a/server/socket_handlers.py
+++ b/server/socket_handlers.py
@ -230,10 +230,10 @@ def handle_message(data):
        emit('error', {'message': '消息不能为空'})
        return
    if images and getattr(terminal, "model_key", None) not in {"qwen3-vl-plus", "kimi-k2.5"}:
-        emit('error', {'message': '当前模型不支持图片，请切换到 Qwen-VL 或 Kimi-k2.5'})
+        emit('error', {'message': '当前模型不支持图片，请切换到 Qwen3.5 或 Kimi-k2.5'})
        return
-    if videos and getattr(terminal, "model_key", None) != "kimi-k2.5":
+    if videos and getattr(terminal, "model_key", None) not in {"qwen3-vl-plus", "kimi-k2.5"}:
-        emit('error', {'message': '当前模型不支持视频，请切换到 Kimi-k2.5'})
+        emit('error', {'message': '当前模型不支持视频，请切换到 Qwen3.5 或 Kimi-k2.5'})
        return
    if images and videos:
        emit('error', {'message': '图片和视频请分开发送'})
--- a/static/src/app.ts
+++ b/static/src/app.ts
@ -2612,16 +2612,16 @@ const appOptions = {
                if (hasImages && !['qwen3-vl-plus', 'kimi-k2.5'].includes(this.currentModelKey)) {
                    this.uiPushToast({
                        title: '当前模型不支持图片',
-                        message: '请切换到 Qwen-VL 或 Kimi-k2.5 再发送图片',
+                        message: '请切换到 Qwen3.5 或 Kimi-k2.5 再发送图片',
                        type: 'error'
                    });
                    return;
                }
-                if (hasVideos && this.currentModelKey !== 'kimi-k2.5') {
+                if (hasVideos && !['qwen3-vl-plus', 'kimi-k2.5'].includes(this.currentModelKey)) {
                    this.uiPushToast({
                        title: '当前模型不支持视频',
-                        message: '请切换到 Kimi-k2.5 后再发送视频',
+                        message: '请切换到 Qwen3.5 或 Kimi-k2.5 后再发送视频',
                        type: 'error'
                    });
                    return;
@ -2876,7 +2876,7 @@ const appOptions = {
                if (!['qwen3-vl-plus', 'kimi-k2.5'].includes(this.currentModelKey)) {
                    this.uiPushToast({
                        title: '当前模型不支持图片',
-                        message: '请选择 Qwen-VL 或 Kimi-k2.5 后再发送图片',
+                        message: '请选择 Qwen3.5 或 Kimi-k2.5 后再发送图片',
                        type: 'error'
                    });
                    return;
@ -2891,10 +2891,10 @@ const appOptions = {
            },
            async openVideoPicker() {
-                if (this.currentModelKey !== 'kimi-k2.5') {
+                if (!['qwen3-vl-plus', 'kimi-k2.5'].includes(this.currentModelKey)) {
                    this.uiPushToast({
                        title: '当前模型不支持视频',
-                        message: '请切换到 Kimi-k2.5 后再发送视频',
+                        message: '请切换到 Qwen3.5 或 Kimi-k2.5 后再发送视频',
                        type: 'error'
                    });
                    return;
@ -3163,7 +3163,7 @@ const appOptions = {
                if (this.conversationHasImages && !['qwen3-vl-plus', 'kimi-k2.5'].includes(key)) {
                    this.uiPushToast({
                        title: '切换失败',
-                        message: '当前对话包含图片，仅支持 Qwen-VL 或 Kimi-k2.5',
+                        message: '当前对话包含图片，仅支持 Qwen3.5 或 Kimi-k2.5',
                        type: 'error'
                    });
                    return;
@ -3187,16 +3187,11 @@ const appOptions = {
                        this.thinkingMode = data.thinking_mode ?? (data.run_mode !== 'fast');
                    } else {
                        // 前端兼容策略：根据模型特性自动调整运行模式
-                        if (key === 'qwen3-vl-plus') {
+                        const currentModel = modelStore.currentModel;
-                            // Qwen-VL 不支持深度思考，若当前为 deep 则回落到思考模式
+                        if (currentModel?.deepOnly) {
-                            if (this.runMode === 'deep') {
+                            this.runMode = 'deep';
-                                this.runMode = 'thinking';
+                            this.thinkingMode = true;
-                                this.thinkingMode = true;
+                        } else if (currentModel?.fastOnly) {
                            } else {
                                this.thinkingMode = this.runMode !== 'fast';
                            }
                        } else if (key === 'qwen3-max') {
                            // Qwen-Max 仅快速模式
                            this.runMode = 'fast';
                            this.thinkingMode = false;
                        } else {
@ -3244,12 +3239,12 @@ const appOptions = {
                }
                const modelStore = useModelStore();
                const fastOnly = modelStore.currentModel?.fastOnly;
-                const currentModelKey = modelStore.currentModel?.key;
+                const deepOnly = modelStore.currentModel?.deepOnly;
                if (fastOnly && mode !== 'fast') {
                    if (!options.suppressToast) {
                        this.uiPushToast({
                            title: '模式不可用',
-                            message: 'Qwen-Max只支持快速模式',
+                            message: '当前模型仅支持快速模式',
                            type: 'warning'
                        });
                    }
@ -3257,12 +3252,11 @@ const appOptions = {
                    this.inputCloseMenus();
                    return;
                }
-                // Qwen-VL 不支持深度思考模式
+                if (deepOnly && mode !== 'deep') {
                if (currentModelKey === 'qwen3-vl-plus' && mode === 'deep') {
                    if (!options.suppressToast) {
                        this.uiPushToast({
                            title: '模式不可用',
-                            message: 'Qwen-VL 不支持深度思考模式，请使用快速或思考模式',
+                            message: '当前模型仅支持深度思考模式',
                            type: 'warning'
                        });
                    }
--- a/static/src/components/input/QuickMenu.vue
+++ b/static/src/components/input/QuickMenu.vue
@ -27,7 +27,7 @@
        发送图片
      </button>
      <button
-        v-if="currentModelKey === 'kimi-k2.5'"
+        v-if="['qwen3-vl-plus', 'kimi-k2.5'].includes(currentModelKey)"
        type="button"
        class="menu-entry"
        @click.stop="$emit('pick-video')"
--- a/static/src/components/personalization/PersonalizationDrawer.vue
+++ b/static/src/components/personalization/PersonalizationDrawer.vue
@ -252,7 +252,7 @@
                          </button>
                        </div>
                        <p class="behavior-hint">
-                          Qwen-Max 仅支持快速模式；Qwen-VL 不支持深度思考模式，选择时会给出提示。
+                          MiniMax-M2.5 仅支持深度思考模式，选择时会给出提示。
                        </p>
                      </div>
                      <div class="behavior-field">
@ -720,8 +720,8 @@ const modelOptions = [
  { id: 'deepseek', label: 'DeepSeek', desc: '通用 + 思考强化', value: 'deepseek' },
  { id: 'kimi-k2.5', label: 'Kimi-k2.5', desc: '新版 Kimi，思考开关 + 图文多模态', value: 'kimi-k2.5', badge: '图文' },
  { id: 'kimi', label: 'Kimi-k2', desc: '旧版 Kimi-k2，兼顾通用对话', value: 'kimi' },
-  { id: 'qwen3-max', label: 'Qwen-Max', desc: '仅快速模式，不支持思考', value: 'qwen3-max', badge: '仅快速' },
+  { id: 'qwen3-vl-plus', label: 'Qwen3.5', desc: '图文多模态 + 深度思考', value: 'qwen3-vl-plus', badge: '图文' },
-  { id: 'qwen3-vl-plus', label: 'Qwen-VL', desc: '图文多模态，思考/快速均可', value: 'qwen3-vl-plus', badge: '图文' }
+  { id: 'minimax-m2.5', label: 'MiniMax-M2.5', desc: '仅深度思考，超长上下文', value: 'minimax-m2.5', badge: '深度思考' }
 ] as const;
 const filteredModelOptions = computed(() =>
@ -815,11 +815,8 @@ const setDefaultModel = (value: string) => {
 const checkModeModelConflict = (mode: RunModeValue, model: string | null): boolean => {
  const warnings: string[] = [];
-  if (model === 'qwen3-max' && mode && mode !== 'fast') {
+  if (model === 'minimax-m2.5' && mode && mode !== 'deep') {
-    warnings.push('Qwen-Max 仅支持快速模式，已保持原设置。');
+    warnings.push('MiniMax-M2.5 仅支持深度思考模式，已保持原设置。');
  }
  if (model === 'qwen3-vl-plus' && mode === 'deep') {
    warnings.push('Qwen-VL 不支持深度思考模式，已保持原设置。');
  }
  if (warnings.length) {
    uiStore.pushToast({
--- a/static/src/stores/model.ts
+++ b/static/src/stores/model.ts
@ -1,6 +1,6 @@
 import { defineStore } from 'pinia';
-export type ModelKey = 'kimi-k2.5' | 'kimi' | 'deepseek' | 'qwen3-max' | 'qwen3-vl-plus';
+export type ModelKey = 'kimi-k2.5' | 'kimi' | 'deepseek' | 'qwen3-vl-plus' | 'minimax-m2.5';
 export interface ModelOption {
  key: ModelKey;
@ -8,6 +8,7 @@ export interface ModelOption {
  description: string;
  fastOnly: boolean;
  supportsThinking: boolean;
  deepOnly?: boolean;
 }
 interface ModelState {
@ -40,19 +41,20 @@ export const useModelStore = defineStore('model', {
        fastOnly: false,
        supportsThinking: true
      },
      {
        key: 'qwen3-max',
        label: 'Qwen-Max',
        description: '仅支持快速模式',
        fastOnly: true,
        supportsThinking: false
      },
      {
        key: 'qwen3-vl-plus',
-        label: 'Qwen-VL',
+        label: 'Qwen3.5',
-        description: '支持图片输入',
+        description: '图文视频多模态 + 深度思考',
        fastOnly: false,
        supportsThinking: true
      },
      {
        key: 'minimax-m2.5',
        label: 'MiniMax-M2.5',
        description: '仅深度思考，超长上下文',
        fastOnly: false,
        supportsThinking: true,
        deepOnly: true
      }
    ]
  }),
--- a/static/src/stores/personalization.ts
+++ b/static/src/stores/personalization.ts
@ -397,7 +397,7 @@ export const usePersonalizationStore = defineStore('personalization', {
      this.clearFeedback();
    },
    setDefaultModel(model: string | null) {
-      const allowed = ['deepseek', 'kimi-k2.5', 'kimi', 'qwen3-max', 'qwen3-vl-plus'];
+      const allowed = ['deepseek', 'kimi-k2.5', 'kimi', 'qwen3-vl-plus', 'minimax-m2.5'];
      const target = typeof model === 'string' && allowed.includes(model) ? model : null;
      this.form = {
        ...this.form,
--- a/sub_agent/utils/api_client.py
+++ b/sub_agent/utils/api_client.py
@ -4,7 +4,7 @@
 import httpx
 import json
 import asyncio
-from typing import List, Dict, Optional, AsyncGenerator
+from typing import List, Dict, Optional, AsyncGenerator, Any
 try:
    from config import API_BASE_URL, API_KEY, MODEL_ID, OUTPUT_FORMATS, DEFAULT_RESPONSE_MAX_TOKENS
 except ImportError:
@ -98,6 +98,54 @@ class DeepSeekClient:
        return json.dumps(data, ensure_ascii=False)
    def _extract_reasoning_delta(self, delta: Dict[str, Any]) -> str:
        """统一提取思考内容，兼容 reasoning_content / reasoning_details。"""
        if not isinstance(delta, dict):
            return ""
        if "reasoning_content" in delta:
            return delta.get("reasoning_content") or ""
        details = delta.get("reasoning_details")
        if isinstance(details, list):
            parts: List[str] = []
            for item in details:
                if isinstance(item, dict):
                    text = item.get("text")
                    if text:
                        parts.append(text)
            if parts:
                return "".join(parts)
        return ""
    def _merge_system_messages(self, messages: List[Dict]) -> List[Dict]:
        """
        将多个 system 消息合并为一个。
        """
        if not messages:
            return messages
        merged_contents: List[str] = []
        new_messages: List[Dict] = []
        first_system_index: Optional[int] = None
        for msg in messages:
            if msg.get("role") == "system":
                if first_system_index is None:
                    first_system_index = len(new_messages)
                content = msg.get("content", "")
                if isinstance(content, str):
                    merged_contents.append(content)
                else:
                    merged_contents.append(json.dumps(content, ensure_ascii=False))
            else:
                new_messages.append(msg)
        if not merged_contents:
            return messages
        merged = {
            "role": "system",
            "content": "\n\n".join(c for c in merged_contents if c)
        }
        insert_at = first_system_index if first_system_index is not None else 0
        new_messages.insert(insert_at, merged)
        return new_messages
    def start_new_task(self):
        """开始新任务（重置任务级别的状态）"""
        self.current_task_first_call = True
@ -215,9 +263,11 @@ class DeepSeekClient:
        except (TypeError, ValueError):
            max_tokens = 4096
        final_messages = self._merge_system_messages(messages)
        payload = {
            "model": self.model_id,
-            "messages": messages,
+            "messages": final_messages,
            "stream": stream,
            "thinking": {"type": "enabled" if current_thinking_mode else "disabled"},
            "max_tokens": max_tokens
@ -332,9 +382,9 @@ class DeepSeekClient:
                delta = chunk["choices"][0].get("delta", {})
                # 处理思考内容（只在思考模式开启时）
-                if "reasoning_content" in delta and should_show_thinking:
+                if should_show_thinking:
-                    reasoning_content = delta["reasoning_content"]
+                    reasoning_content = self._extract_reasoning_delta(delta)
-                    if reasoning_content:  # 只处理非空内容
+                    if reasoning_content:
                        if not in_thinking:
                            self._print("💭 [正在思考]\n", end="", flush=True)
                            in_thinking = True
@ -568,9 +618,9 @@ class DeepSeekClient:
                delta = chunk["choices"][0].get("delta", {})
                # 处理思考内容
-                if "reasoning_content" in delta and should_show_thinking:
+                if should_show_thinking:
-                    reasoning_content = delta["reasoning_content"]
+                    reasoning_content = self._extract_reasoning_delta(delta)
-                    if reasoning_content:  # 只处理非空内容
+                    if reasoning_content:
                        if not in_thinking:
                            self._print("💭 [正在思考]\n", end="", flush=True)
                            in_thinking = True
--- a/sub_agent/web_server.py
+++ b/sub_agent/web_server.py
@ -3354,22 +3354,34 @@ async def handle_task_with_sender(terminal: WebTerminal, message, sender, client
            if finish_reason:
                last_finish_reason = finish_reason
-            # 处理思考内容
+            # 处理思考内容（兼容 reasoning_content / reasoning_details）
            reasoning_content = ""
            if "reasoning_content" in delta:
-                reasoning_content = delta["reasoning_content"]
+                reasoning_content = delta.get("reasoning_content") or ""
-                if reasoning_content:
+            elif "reasoning_details" in delta:
-                    reasoning_chunks += 1
+                details = delta.get("reasoning_details")
-                    debug_log(f"  思考内容 #{reasoning_chunks}: {len(reasoning_content)} 字符")
+                if isinstance(details, list):
                    parts = []
                    for item in details:
                        if isinstance(item, dict):
                            text = item.get("text")
                            if text:
                                parts.append(text)
                    if parts:
                        reasoning_content = "".join(parts)
            if reasoning_content:
                reasoning_chunks += 1
                debug_log(f"  思考内容 #{reasoning_chunks}: {len(reasoning_content)} 字符")
-                    if should_show_thinking:
+                if should_show_thinking:
-                        if not thinking_started:
+                    if not thinking_started:
-                            in_thinking = True
+                        in_thinking = True
-                            thinking_started = True
+                        thinking_started = True
-                            sender('thinking_start', {})
+                        sender('thinking_start', {})
-                            await asyncio.sleep(0.05)
+                        await asyncio.sleep(0.05)
-                        current_thinking += reasoning_content
+                    current_thinking += reasoning_content
-                        sender('thinking_chunk', {'content': reasoning_content})
+                    sender('thinking_chunk', {'content': reasoning_content})
            # 处理正常内容
            if "content" in delta:
--- a/utils/api_client.py
+++ b/utils/api_client.py
@ -128,11 +128,12 @@ class DeepSeekClient:
                match_texts.append("未找到匹配内容。")
            return "\n".join([header] + match_texts)
-    def _build_content_with_images(self, text: str, images: List[str], videos: Optional[List[str]] = None) -> Any:
+    def _build_content_with_images(self, text: str, images: List[str], videos: Optional[List[Any]] = None) -> Any:
        """将文本与图片/视频路径拼成多模态 content（用于 tool 消息）。"""
        videos = videos or []
        if not images and not videos:
            return text
        qwen_video_fps = 2
        parts: List[Dict[str, Any]] = []
        if text:
            parts.append({"type": "text", "text": text})
@ -150,8 +151,14 @@ class DeepSeekClient:
                parts.append({"type": "image_url", "image_url": {"url": f"data:{mime};base64,{b64}"}})
            except Exception:
                continue
-        for path in videos:
+        for item in videos:
            try:
                if isinstance(item, dict):
                    path = item.get("path") or ""
                else:
                    path = item
                if not path:
                    continue
                abs_path = (base_path / path).resolve()
                if not abs_path.exists() or not abs_path.is_file():
                    continue
@ -160,7 +167,13 @@ class DeepSeekClient:
                    mime = "video/mp4"
                data = abs_path.read_bytes()
                b64 = base64.b64encode(data).decode("utf-8")
-                parts.append({"type": "video_url", "video_url": {"url": f"data:{mime};base64,{b64}"}})
+                payload: Dict[str, Any] = {
                    "type": "video_url",
                    "video_url": {"url": f"data:{mime};base64,{b64}"}
                }
                if self.model_key == "qwen3-vl-plus":
                    payload["fps"] = qwen_video_fps
                parts.append(payload)
            except Exception:
                continue
        return parts if parts else text
@ -184,6 +197,55 @@ class DeepSeekClient:
        return json.dumps(data, ensure_ascii=False)
    def _extract_reasoning_delta(self, delta: Dict[str, Any]) -> str:
        """统一提取思考内容，兼容 reasoning_content / reasoning_details。"""
        if not isinstance(delta, dict):
            return ""
        if "reasoning_content" in delta:
            return delta.get("reasoning_content") or ""
        details = delta.get("reasoning_details")
        if isinstance(details, list):
            parts: List[str] = []
            for item in details:
                if isinstance(item, dict):
                    text = item.get("text")
                    if text:
                        parts.append(text)
            if parts:
                return "".join(parts)
        return ""
    def _merge_system_messages(self, messages: List[Dict]) -> List[Dict]:
        """
        将多个 system 消息合并为一个（部分模型仅支持单条 system）。
        保留原有顺序，把合并后的 system 放在第一条 system 的位置。
        """
        if not messages:
            return messages
        merged_contents: List[str] = []
        new_messages: List[Dict] = []
        first_system_index: Optional[int] = None
        for msg in messages:
            if msg.get("role") == "system":
                if first_system_index is None:
                    first_system_index = len(new_messages)
                content = msg.get("content", "")
                if isinstance(content, str):
                    merged_contents.append(content)
                else:
                    merged_contents.append(json.dumps(content, ensure_ascii=False))
            else:
                new_messages.append(msg)
        if not merged_contents:
            return messages
        merged = {
            "role": "system",
            "content": "\n\n".join(c for c in merged_contents if c)
        }
        insert_at = first_system_index if first_system_index is not None else 0
        new_messages.insert(insert_at, merged)
        return new_messages
    def set_deep_thinking_mode(self, enabled: bool):
        """配置深度思考模式（持续使用思考模型）。"""
        self.deep_thinking_mode = bool(enabled)
@ -445,16 +507,24 @@ class DeepSeekClient:
            else:
                max_tokens = min(max_tokens, available)
        lower_base_url = (api_config.get("base_url") or "").lower()
        is_minimax = self.model_key == "minimax-m2.5" or "minimax" in lower_base_url
        final_messages = self._merge_system_messages(messages)
        payload = {
            "model": api_config["model_id"],
-            "messages": messages,
+            "messages": final_messages,
            "stream": stream,
            "max_tokens": max_tokens
        }
        if is_minimax:
            payload["max_completion_tokens"] = max_tokens
        else:
            payload["max_tokens"] = max_tokens
        # 部分平台（如 Qwen、DeepSeek）需要显式请求 usage 才会在流式尾包返回
        if stream:
            should_include_usage = False
-            if self.model_key in {"qwen3-max", "qwen3-vl-plus", "deepseek"}:
+            if self.model_key in {"qwen3-max", "qwen3-vl-plus", "deepseek", "minimax-m2.5"}:
                should_include_usage = True
            # 兜底：根据 base_url 识别 openai 兼容的提供商
            if api_config["base_url"]:
@ -462,14 +532,18 @@ class DeepSeekClient:
                if any(keyword in lower_url for keyword in ["dashscope", "aliyuncs", "deepseek.com"]):
                    should_include_usage = True
            if should_include_usage:
-                payload.setdefault("stream_options", {})["include_usage"] = True
+                if is_minimax:
                    payload["include_usage"] = True
                else:
                    payload.setdefault("stream_options", {})["include_usage"] = True
        # 注入模型额外参数（如 Qwen enable_thinking）
        extra_params = self.thinking_extra_params if current_thinking_mode else self.fast_extra_params
        if extra_params:
            payload.update(extra_params)
        if tools:
            payload["tools"] = tools
-            payload["tool_choice"] = "auto"
+            if not is_minimax:
                payload["tool_choice"] = "auto"
        # 将本次请求落盘，便于出错时快速定位
        dump_path = self._dump_request_payload(payload, api_config, headers)
@ -636,15 +710,14 @@ class DeepSeekClient:
                delta = chunk["choices"][0].get("delta", {})
                # 处理思考内容
-                if "reasoning_content" in delta:
+                reasoning_content = self._extract_reasoning_delta(delta)
-                    reasoning_content = delta["reasoning_content"]
+                if reasoning_content:
-                    if reasoning_content:  # 只处理非空内容
+                    if not in_thinking:
-                        if not in_thinking:
+                        self._print("💭 [正在思考]\n", end="", flush=True)
-                            self._print("💭 [正在思考]\n", end="", flush=True)
+                        in_thinking = True
-                            in_thinking = True
+                        thinking_printed = True
-                            thinking_printed = True
+                    current_thinking += reasoning_content
-                        current_thinking += reasoning_content
+                    self._print(reasoning_content, end="", flush=True)
                        self._print(reasoning_content, end="", flush=True)
                # 处理正常内容 - 独立的if，不是elif
                if "content" in delta:
@ -910,14 +983,13 @@ class DeepSeekClient:
                delta = chunk["choices"][0].get("delta", {})
                # 处理思考内容
-                if "reasoning_content" in delta:
+                reasoning_content = self._extract_reasoning_delta(delta)
-                    reasoning_content = delta["reasoning_content"]
+                if reasoning_content:
-                    if reasoning_content:  # 只处理非空内容
+                    if not in_thinking:
-                        if not in_thinking:
+                        self._print("💭 [正在思考]\n", end="", flush=True)
-                            self._print("💭 [正在思考]\n", end="", flush=True)
+                        in_thinking = True
-                            in_thinking = True
+                    thinking_content += reasoning_content
-                        thinking_content += reasoning_content
+                    self._print(reasoning_content, end="", flush=True)
                        self._print(reasoning_content, end="", flush=True)
                # 处理正常内容 - 独立的if而不是elif
                if "content" in delta:
--- a/utils/context_manager.py
+++ b/utils/context_manager.py
@ -1414,12 +1414,14 @@ class ContextManager:
        except Exception:
            return None
-    def _build_content_with_images(self, text: str, images: List[str], videos: Optional[List[str]] = None) -> Any:
+    def _build_content_with_images(self, text: str, images: List[str], videos: Optional[List[Any]] = None) -> Any:
        """将文本与图片/视频路径组合成多模态content，图片转换为data URI，视频转换为 data URL。"""
        videos = videos or []
        if not images and not videos:
            return text
        parts: List[Dict[str, Any]] = []
        supports_video_fps = getattr(getattr(self, "main_terminal", None), "model_key", None) == "qwen3-vl-plus"
        qwen_video_fps = 2
        if text:
            parts.append({"type": "text", "text": text})
        for path in images:
@ -1438,8 +1440,14 @@ class ContextManager:
                parts.append({"type": "image_url", "image_url": {"url": data_url}})
            except Exception:
                continue
-        for path in videos:
+        for item in videos:
            try:
                if isinstance(item, dict):
                    path = item.get("path") or ""
                else:
                    path = item
                if not path:
                    continue
                abs_path = Path(self.project_path) / path
                if not abs_path.exists() or not abs_path.is_file():
                    continue
@ -1451,7 +1459,10 @@ class ContextManager:
                data = abs_path.read_bytes()
                b64 = base64.b64encode(data).decode("utf-8")
                data_url = f"data:{mime};base64,{b64}"
-                parts.append({"type": "video_url", "video_url": {"url": data_url}})
+                payload: Dict[str, Any] = {"type": "video_url", "video_url": {"url": data_url}}
                if supports_video_fps:
                    payload["fps"] = qwen_video_fps
                parts.append(payload)
            except Exception:
                continue
        return parts if parts else text
@ -1501,7 +1512,7 @@ class ContextManager:
    def build_messages(self, context: Dict, user_input: str) -> List[Dict]:
        """构建消息列表（添加终端内容注入）"""
-        # 加载系统提示（Qwen-VL 使用专用提示）
+        # 加载系统提示（Qwen3.5 使用专用提示）
        model_key = getattr(self.main_terminal, "model_key", "kimi") if hasattr(self, "main_terminal") else "kimi"
        prompt_name = "main_system_qwenvl" if model_key in {"qwen3-vl-plus", "kimi-k2.5"} else "main_system"
        system_prompt = self.load_prompt(prompt_name)