feat: update model support and multimodal

This commit is contained in:
JOJO 2026-02-25 01:41:05 +08:00
parent 89eeb449b5
commit 08bc08b35f
25 changed files with 372 additions and 167 deletions

View File

@ -117,7 +117,7 @@ components:
default_model: default_model:
type: string type: string
default: kimi default: kimi
description: kimi/deepseek/qwen3-max/qwen3-vl-plus description: kimi/deepseek/qwen3-vl-plus/minimax-m2.5
additionalProperties: true additionalProperties: true
example: example:
enabled: true enabled: true

View File

@ -47,7 +47,7 @@
| `default_run_mode` | "fast"/"thinking"/"deep"/null | null | 默认运行模式:非法值会变成 null | | `default_run_mode` | "fast"/"thinking"/"deep"/null | null | 默认运行模式:非法值会变成 null |
| `auto_generate_title` | bool | true | 是否自动生成对话标题 | | `auto_generate_title` | bool | true | 是否自动生成对话标题 |
| `tool_intent_enabled` | bool | true | 工具意图提示开关(属于配置结构的一部分) | | `tool_intent_enabled` | bool | true | 工具意图提示开关(属于配置结构的一部分) |
| `default_model` | string | "kimi" | 默认模型:仅允许 `"kimi"|"deepseek"|"qwen3-max"|"qwen3-vl-plus"`,非法值回落到 `"kimi"` | | `default_model` | string | "kimi" | 默认模型:仅允许 `"kimi"|"deepseek"|"qwen3-vl-plus"|"minimax-m2.5"`,非法值回落到 `"kimi"` |
### 最小示例(启用 + 2 条注意事项) ### 最小示例(启用 + 2 条注意事项)

View File

@ -11,6 +11,7 @@ CONTEXT_WINDOWS = {
"kimi-k2.5": 256_000, "kimi-k2.5": 256_000,
"qwen3-max": 256_000, "qwen3-max": 256_000,
"qwen3-vl-plus": 256_000, "qwen3-vl-plus": 256_000,
"minimax-m2.5": 204_800,
"deepseek": 128_000, "deepseek": 128_000,
} }
@ -32,7 +33,12 @@ DEEPSEEK_THINK_MODEL = _env("MODEL_DEEPSEEK_THINK", "deepseek-reasoner")
QWEN_BASE = _env("API_BASE_QWEN", "https://dashscope.aliyuncs.com/compatible-mode/v1") QWEN_BASE = _env("API_BASE_QWEN", "https://dashscope.aliyuncs.com/compatible-mode/v1")
QWEN_KEY = _env("API_KEY_QWEN", _env("DASHSCOPE_API_KEY", "")) QWEN_KEY = _env("API_KEY_QWEN", _env("DASHSCOPE_API_KEY", ""))
QWEN_MAX_MODEL = _env("MODEL_QWEN_MAX", "qwen3-max") QWEN_MAX_MODEL = _env("MODEL_QWEN_MAX", "qwen3-max")
QWEN_VL_MODEL = _env("MODEL_QWEN_VL", "qwen3-vl-plus") QWEN_VL_MODEL = _env("MODEL_QWEN_VL", "qwen3.5-plus")
# MiniMax
MINIMAX_BASE = _env("API_BASE_MINIMAX", "https://api.minimaxi.com/v1")
MINIMAX_KEY = _env("API_KEY_MINIMAX", "")
MINIMAX_MODEL = _env("MODEL_MINIMAX", "MiniMax-M2.5")
MODEL_PROFILES = { MODEL_PROFILES = {
@ -110,7 +116,8 @@ MODEL_PROFILES = {
"thinking": None, # 不支持思考 "thinking": None, # 不支持思考
"supports_thinking": False, "supports_thinking": False,
"fast_only": True, "fast_only": True,
"name": "Qwen3-Max" "name": "Qwen3-Max",
"hidden": True
}, },
"qwen3-vl-plus": { "qwen3-vl-plus": {
"context_window": CONTEXT_WINDOWS["qwen3-vl-plus"], "context_window": CONTEXT_WINDOWS["qwen3-vl-plus"],
@ -132,7 +139,30 @@ MODEL_PROFILES = {
}, },
"supports_thinking": True, "supports_thinking": True,
"fast_only": False, "fast_only": False,
"name": "Qwen3-VL" "name": "Qwen3.5"
},
"minimax-m2.5": {
"context_window": CONTEXT_WINDOWS["minimax-m2.5"],
"fast": {
"base_url": MINIMAX_BASE,
"api_key": MINIMAX_KEY,
"model_id": MINIMAX_MODEL,
"max_tokens": 65536,
"context_window": CONTEXT_WINDOWS["minimax-m2.5"],
"extra_params": {"reasoning_split": True}
},
"thinking": {
"base_url": MINIMAX_BASE,
"api_key": MINIMAX_KEY,
"model_id": MINIMAX_MODEL,
"max_tokens": 65536,
"context_window": CONTEXT_WINDOWS["minimax-m2.5"],
"extra_params": {"reasoning_split": True}
},
"supports_thinking": True,
"fast_only": False,
"deep_only": True,
"name": "MiniMax-M2.5"
} }
} }
@ -158,9 +188,14 @@ MODEL_PROMPT_OVERRIDES = {
"deep_thinking_line": "Qwen3-Max 不支持深度思考模式,将保持快速模式。" "deep_thinking_line": "Qwen3-Max 不支持深度思考模式,将保持快速模式。"
}, },
"qwen3-vl-plus": { "qwen3-vl-plus": {
"model_description": "你的基础模型是 Qwen3-VL-Plus支持图文多模态理解接口来自通义千问 DashScope。", "model_description": "你的基础模型是 Qwen3.5,由通义千问提供,支持图文多模态理解。",
"thinking_model_line": "思考模式时,请求的模型仍为 Qwen3-VL-Plus开启思考能力后续请求会切回快速模型。", "thinking_model_line": "思考模式时仍使用 Qwen3.5,并开启思考能力。",
"deep_thinking_line": "在深度思考模式中,请求的模型是 Qwen3-VL-Plus思考版以获得更强的分析能力。" "deep_thinking_line": "深度思考模式下,所有请求都将启用思考能力,以获得更强的分析表现。"
},
"minimax-m2.5": {
"model_description": "你的基础模型是 MiniMax-M2.5,支持超长上下文,当前仅以深度思考模式运行。",
"thinking_model_line": "MiniMax-M2.5 为思考模型,快速模式不会使用。",
"deep_thinking_line": "深度思考模式下,所有请求持续输出思考过程并给出最终回答。"
} }
} }

View File

@ -1479,7 +1479,7 @@ class MainTerminal:
"type": "function", "type": "function",
"function": { "function": {
"name": "vlm_analyze", "name": "vlm_analyze",
"description": "使用大参数视觉语言模型Qwen-VL模型)理解图片:文字、物体、布局、表格等,仅支持本地路径。", "description": "使用大参数视觉语言模型Qwen3.5)理解图片:文字、物体、布局、表格等,仅支持本地路径。",
"parameters": { "parameters": {
"type": "object", "type": "object",
"properties": self._inject_intent({ "properties": self._inject_intent({
@ -1872,7 +1872,7 @@ class MainTerminal:
} }
} }
] ]
# 视觉模型Qwen-VL / Kimi-k2.5)自带多模态能力,不再暴露 vlm_analyze改为 view_image # 视觉模型Qwen3.5 / Kimi-k2.5)自带多模态能力,不再暴露 vlm_analyze改为 view_image / view_video
if getattr(self, "model_key", None) in {"qwen3-vl-plus", "kimi-k2.5"}: if getattr(self, "model_key", None) in {"qwen3-vl-plus", "kimi-k2.5"}:
tools = [ tools = [
tool for tool in tools tool for tool in tools
@ -1895,24 +1895,23 @@ class MainTerminal:
} }
} }
}) })
if getattr(self, "model_key", None) == "kimi-k2.5": tools.append({
tools.append({ "type": "function",
"type": "function", "function": {
"function": {
"name": "view_video", "name": "view_video",
"description": "将指定本地视频附加到工具结果中tool 消息携带 video_url便于模型查看视频内容。", "description": "将指定本地视频附加到工具结果中tool 消息携带 video_url便于模型查看视频内容。",
"parameters": { "parameters": {
"type": "object", "type": "object",
"properties": self._inject_intent({ "properties": self._inject_intent({
"path": { "path": {
"type": "string", "type": "string",
"description": "项目内的视频相对路径(不要以 /workspace 开头);宿主机模式可用绝对路径。支持 mp4/mov/mkv/avi/webm。" "description": "项目内的视频相对路径(不要以 /workspace 开头);宿主机模式可用绝对路径。支持 mp4/mov/mkv/avi/webm。"
} }
}), }),
"required": ["path"] "required": ["path"]
}
} }
}) }
})
# 附加自定义工具(仅管理员可见) # 附加自定义工具(仅管理员可见)
custom_tools = self._build_custom_tools() custom_tools = self._build_custom_tools()
if custom_tools: if custom_tools:
@ -2045,7 +2044,11 @@ class MainTerminal:
if abs_path.stat().st_size > 50 * 1024 * 1024: if abs_path.stat().st_size > 50 * 1024 * 1024:
return json.dumps({"success": False, "error": "视频过大,需 <= 50MB"}, ensure_ascii=False) return json.dumps({"success": False, "error": "视频过大,需 <= 50MB"}, ensure_ascii=False)
self.pending_video_view = {"path": str(path)} self.pending_video_view = {"path": str(path)}
result = {"success": True, "message": "视频已附加到工具结果中,将随 tool 返回。", "path": path} result = {
"success": True,
"message": "视频已附加到工具结果中,将随 tool 返回。",
"path": path
}
# 终端会话管理工具 # 终端会话管理工具
elif tool_name == "terminal_session": elif tool_name == "terminal_session":
@ -2528,7 +2531,7 @@ class MainTerminal:
def build_messages(self, context: Dict, user_input: str) -> List[Dict]: def build_messages(self, context: Dict, user_input: str) -> List[Dict]:
"""构建消息列表(添加终端内容注入)""" """构建消息列表(添加终端内容注入)"""
# 加载系统提示Qwen-VL 使用专用提示) # 加载系统提示Qwen3.5 使用专用提示)
prompt_name = "main_system_qwenvl" if getattr(self, "model_key", "kimi") in {"qwen3-vl-plus", "kimi-k2.5"} else "main_system" prompt_name = "main_system_qwenvl" if getattr(self, "model_key", "kimi") in {"qwen3-vl-plus", "kimi-k2.5"} else "main_system"
system_prompt = self.load_prompt(prompt_name) system_prompt = self.load_prompt(prompt_name)
@ -2810,9 +2813,9 @@ class MainTerminal:
normalized = mode.lower() normalized = mode.lower()
if normalized not in allowed: if normalized not in allowed:
raise ValueError(f"不支持的模式: {mode}") raise ValueError(f"不支持的模式: {mode}")
# Qwen-VL 官方不支持深度思考模式 # 仅深度思考模型限制
if getattr(self, "model_key", None) == "qwen3-vl-plus" and normalized == "deep": if getattr(self, "model_profile", {}).get("deep_only") and normalized != "deep":
raise ValueError("Qwen-VL 不支持深度思考模式") raise ValueError("当前模型仅支持深度思考模式")
# fast-only 模型限制 # fast-only 模型限制
if getattr(self, "model_profile", {}).get("fast_only") and normalized != "fast": if getattr(self, "model_profile", {}).get("fast_only") and normalized != "fast":
raise ValueError("当前模型仅支持快速模式") raise ValueError("当前模型仅支持快速模式")
@ -2840,9 +2843,9 @@ class MainTerminal:
def set_model(self, model_key: str) -> str: def set_model(self, model_key: str) -> str:
profile = get_model_profile(model_key) profile = get_model_profile(model_key)
if getattr(self.context_manager, "has_images", False) and model_key not in {"qwen3-vl-plus", "kimi-k2.5"}: if getattr(self.context_manager, "has_images", False) and model_key not in {"qwen3-vl-plus", "kimi-k2.5"}:
raise ValueError("当前对话包含图片,仅支持 Qwen-VL 或 Kimi-k2.5") raise ValueError("当前对话包含图片,仅支持 Qwen3.5 或 Kimi-k2.5")
if getattr(self.context_manager, "has_videos", False) and model_key != "kimi-k2.5": if getattr(self.context_manager, "has_videos", False) and model_key not in {"qwen3-vl-plus", "kimi-k2.5"}:
raise ValueError("当前对话包含视频,仅支持 Kimi-k2.5") raise ValueError("当前对话包含视频,仅支持 Qwen3.5 或 Kimi-k2.5")
self.model_key = model_key self.model_key = model_key
self.model_profile = profile self.model_profile = profile
# 将模型标识传递给底层 API 客户端,便于按模型做兼容处理 # 将模型标识传递给底层 API 客户端,便于按模型做兼容处理
@ -2852,9 +2855,9 @@ class MainTerminal:
# fast-only 模型强制快速模式 # fast-only 模型强制快速模式
if profile.get("fast_only") and self.run_mode != "fast": if profile.get("fast_only") and self.run_mode != "fast":
self.set_run_mode("fast") self.set_run_mode("fast")
# Qwen-VL 不支持深度思考,自动回落到思考模式 # 仅深度思考模型强制 deep
if model_key == "qwen3-vl-plus" and self.run_mode == "deep": if profile.get("deep_only") and self.run_mode != "deep":
self.set_run_mode("thinking") self.set_run_mode("deep")
# 如果模型支持思考,但当前 run_mode 为 thinking/deep则保持否则无需调整 # 如果模型支持思考,但当前 run_mode 为 thinking/deep则保持否则无需调整
self.api_client.start_new_task(force_deep=self.deep_thinking_mode) self.api_client.start_new_task(force_deep=self.deep_thinking_mode)
return self.model_key return self.model_key

View File

@ -17,7 +17,7 @@ from config.paths import ADMIN_POLICY_FILE
from modules.custom_tool_registry import CustomToolRegistry, build_default_tool_category from modules.custom_tool_registry import CustomToolRegistry, build_default_tool_category
# 可用的模型 key与前端、model_profiles 保持一致) # 可用的模型 key与前端、model_profiles 保持一致)
ALLOWED_MODELS = {"kimi", "deepseek", "qwen3-max", "qwen3-vl-plus"} ALLOWED_MODELS = {"kimi", "deepseek", "qwen3-vl-plus", "minimax-m2.5"}
# UI 禁用项键名,前后端统一 # UI 禁用项键名,前后端统一
UI_BLOCK_KEYS = [ UI_BLOCK_KEYS = [

View File

@ -13,7 +13,7 @@ from modules.file_manager import FileManager
class OCRClient: class OCRClient:
"""封装 VLM如 DeepSeek-OCR / Qwen-VL)调用逻辑。""" """封装 VLM如 DeepSeek-OCR / Qwen3.5)调用逻辑。"""
def __init__(self, project_path: str, file_manager: FileManager): def __init__(self, project_path: str, file_manager: FileManager):
self.project_path = Path(project_path).resolve() self.project_path = Path(project_path).resolve()

View File

@ -113,7 +113,7 @@ def sanitize_personalization_payload(
base.update(fallback) base.update(fallback)
data = payload or {} data = payload or {}
allowed_tool_categories = set(TOOL_CATEGORIES.keys()) allowed_tool_categories = set(TOOL_CATEGORIES.keys())
allowed_models = {"kimi", "kimi-k2.5", "deepseek", "qwen3-max", "qwen3-vl-plus"} allowed_models = {"kimi", "kimi-k2.5", "deepseek", "qwen3-vl-plus", "minimax-m2.5"}
allowed_image_modes = {"original", "1080p", "720p", "540p"} allowed_image_modes = {"original", "1080p", "720p", "540p"}
def _resolve_short_field(key: str) -> str: def _resolve_short_field(key: str) -> str:

View File

@ -57,7 +57,7 @@
### 3.3 视觉理解 ### 3.3 视觉理解
- **非视觉模型**`vlm_analyze` 调用 VLM 分析图片 - **非视觉模型**`vlm_analyze` 调用 VLM 分析图片
- **视觉模型**Qwen-VL / Kimi-k2.5`view_image` 直接查看图片,`view_video` 查看视频Kimi-k2.5 - **视觉模型**Qwen3.5 / Kimi-k2.5`view_image` 直接查看图片,`view_video` 查看视频
### 3.4 终端操作 ### 3.4 终端操作

View File

@ -56,7 +56,7 @@
### 3.3 视觉理解(重点) ### 3.3 视觉理解(重点)
你**自带多模态能力**,用户可以直接发送图片;如需主动查看本地图片/视频,可调用 `view_image`/`view_video` 指定路径系统会在工具结果中附带媒体tool 消息携带 image_url/video_url供你查看。 你**自带多模态能力**,用户可以直接发送图片/视频;如需主动查看本地图片/视频,可调用 `view_image`/`view_video` 指定路径系统会在工具结果中附带媒体tool 消息携带 image_url/video_url供你查看。
当用户提出"这是什么""识别文字/表格/票据""找瑕疵/细节""读屏/按钮含义"等图片分析任务时,优先采用下面的方法,保证细节充分、结论可验证: 当用户提出"这是什么""识别文字/表格/票据""找瑕疵/细节""读屏/按钮含义"等图片分析任务时,优先采用下面的方法,保证细节充分、结论可验证:

View File

@ -1457,21 +1457,33 @@ async def handle_task_with_sender(terminal: WebTerminal, workspace: UserWorkspac
if finish_reason: if finish_reason:
last_finish_reason = finish_reason last_finish_reason = finish_reason
# 处理思考内容 # 处理思考内容(兼容 reasoning_content / reasoning_details
reasoning_content = ""
if "reasoning_content" in delta: if "reasoning_content" in delta:
reasoning_content = delta["reasoning_content"] reasoning_content = delta.get("reasoning_content") or ""
if reasoning_content: elif "reasoning_details" in delta:
reasoning_chunks += 1 details = delta.get("reasoning_details")
debug_log(f" 思考内容 #{reasoning_chunks}: {len(reasoning_content)} 字符") if isinstance(details, list):
parts = []
for item in details:
if isinstance(item, dict):
text = item.get("text")
if text:
parts.append(text)
if parts:
reasoning_content = "".join(parts)
if reasoning_content:
reasoning_chunks += 1
debug_log(f" 思考内容 #{reasoning_chunks}: {len(reasoning_content)} 字符")
if not thinking_started: if not thinking_started:
in_thinking = True in_thinking = True
thinking_started = True thinking_started = True
sender('thinking_start', {}) sender('thinking_start', {})
await asyncio.sleep(0.05) await asyncio.sleep(0.05)
current_thinking += reasoning_content current_thinking += reasoning_content
sender('thinking_chunk', {'content': reasoning_content}) sender('thinking_chunk', {'content': reasoning_content})
# 处理正常内容 # 处理正常内容
if "content" in delta: if "content" in delta:
@ -2424,10 +2436,11 @@ async def handle_task_with_sender(terminal: WebTerminal, workspace: UserWorkspac
video_path = inj.get("path") if isinstance(inj, dict) else None video_path = inj.get("path") if isinstance(inj, dict) else None
if video_path: if video_path:
text_part = tool_result_content if isinstance(tool_result_content, str) else "" text_part = tool_result_content if isinstance(tool_result_content, str) else ""
video_payload = [video_path]
tool_message_content = web_terminal.context_manager._build_content_with_images( tool_message_content = web_terminal.context_manager._build_content_with_images(
text_part, text_part,
[], [],
[video_path] video_payload
) )
tool_videos = [video_path] tool_videos = [video_path]
if metadata_payload is None: if metadata_payload is None:

View File

@ -196,7 +196,7 @@ def handle_message(data):
emit('error', {'message': '消息不能为空'}) emit('error', {'message': '消息不能为空'})
return return
if images and getattr(terminal, "model_key", None) not in {"qwen3-vl-plus", "kimi-k2.5"}: if images and getattr(terminal, "model_key", None) not in {"qwen3-vl-plus", "kimi-k2.5"}:
emit('error', {'message': '当前模型不支持图片,请切换到 Qwen-VL 或 Kimi-k2.5'}) emit('error', {'message': '当前模型不支持图片,请切换到 Qwen3.5 或 Kimi-k2.5'})
return return
print(f"[WebSocket] 收到消息: {message}") print(f"[WebSocket] 收到消息: {message}")

View File

@ -677,11 +677,14 @@ def create_personalization_api():
def list_models_api(): def list_models_api():
items = [] items = []
for key, profile in MODEL_PROFILES.items(): for key, profile in MODEL_PROFILES.items():
if profile.get("hidden"):
continue
items.append({ items.append({
"model_key": key, "model_key": key,
"name": profile.get("name", key), "name": profile.get("name", key),
"supports_thinking": profile.get("supports_thinking", False), "supports_thinking": profile.get("supports_thinking", False),
"fast_only": profile.get("fast_only", False), "fast_only": profile.get("fast_only", False),
"deep_only": profile.get("deep_only", False),
}) })
return jsonify({"success": True, "items": items}) return jsonify({"success": True, "items": items})

View File

@ -922,7 +922,7 @@ def get_user_resources(username: Optional[str] = None) -> Tuple[Optional[WebTerm
terminal.admin_policy_version = policy.get("updated_at") terminal.admin_policy_version = policy.get("updated_at")
# 若当前模型被禁用,则回退到第一个可用模型 # 若当前模型被禁用,则回退到第一个可用模型
if terminal.model_key in disabled_models: if terminal.model_key in disabled_models:
for candidate in ["kimi-k2.5", "kimi", "deepseek", "qwen3-vl-plus", "qwen3-max"]: for candidate in ["kimi-k2.5", "kimi", "deepseek", "qwen3-vl-plus", "minimax-m2.5"]:
if candidate not in disabled_models: if candidate not in disabled_models:
try: try:
terminal.set_model(candidate) terminal.set_model(candidate)

View File

@ -1335,21 +1335,33 @@ async def handle_task_with_sender(terminal: WebTerminal, workspace: UserWorkspac
if finish_reason: if finish_reason:
last_finish_reason = finish_reason last_finish_reason = finish_reason
# 处理思考内容 # 处理思考内容(兼容 reasoning_content / reasoning_details
reasoning_content = ""
if "reasoning_content" in delta: if "reasoning_content" in delta:
reasoning_content = delta["reasoning_content"] reasoning_content = delta.get("reasoning_content") or ""
if reasoning_content: elif "reasoning_details" in delta:
reasoning_chunks += 1 details = delta.get("reasoning_details")
debug_log(f" 思考内容 #{reasoning_chunks}: {len(reasoning_content)} 字符") if isinstance(details, list):
parts = []
for item in details:
if isinstance(item, dict):
text = item.get("text")
if text:
parts.append(text)
if parts:
reasoning_content = "".join(parts)
if reasoning_content:
reasoning_chunks += 1
debug_log(f" 思考内容 #{reasoning_chunks}: {len(reasoning_content)} 字符")
if not thinking_started: if not thinking_started:
in_thinking = True in_thinking = True
thinking_started = True thinking_started = True
sender('thinking_start', {}) sender('thinking_start', {})
await asyncio.sleep(0.05) await asyncio.sleep(0.05)
current_thinking += reasoning_content current_thinking += reasoning_content
sender('thinking_chunk', {'content': reasoning_content}) sender('thinking_chunk', {'content': reasoning_content})
# 处理正常内容 # 处理正常内容
if "content" in delta: if "content" in delta:
@ -2335,10 +2347,11 @@ async def handle_task_with_sender(terminal: WebTerminal, workspace: UserWorkspac
video_path = inj.get("path") if isinstance(inj, dict) else None video_path = inj.get("path") if isinstance(inj, dict) else None
if video_path: if video_path:
text_part = tool_result_content if isinstance(tool_result_content, str) else "" text_part = tool_result_content if isinstance(tool_result_content, str) else ""
video_payload = [video_path]
tool_message_content = web_terminal.context_manager._build_content_with_images( tool_message_content = web_terminal.context_manager._build_content_with_images(
text_part, text_part,
[], [],
[video_path] video_payload
) )
tool_videos = [video_path] tool_videos = [video_path]
if metadata_payload is None: if metadata_payload is None:

View File

@ -218,7 +218,7 @@ def get_user_resources(username: Optional[str] = None, workspace_id: Optional[st
terminal.admin_policy_ui_blocks = policy.get("ui_blocks") or {} terminal.admin_policy_ui_blocks = policy.get("ui_blocks") or {}
terminal.admin_policy_version = policy.get("updated_at") terminal.admin_policy_version = policy.get("updated_at")
if terminal.model_key in disabled_models: if terminal.model_key in disabled_models:
for candidate in ["kimi-k2.5", "kimi", "deepseek", "qwen3-vl-plus", "qwen3-max"]: for candidate in ["kimi-k2.5", "kimi", "deepseek", "qwen3-vl-plus", "minimax-m2.5"]:
if candidate not in disabled_models: if candidate not in disabled_models:
try: try:
terminal.set_model(candidate) terminal.set_model(candidate)

View File

@ -230,10 +230,10 @@ def handle_message(data):
emit('error', {'message': '消息不能为空'}) emit('error', {'message': '消息不能为空'})
return return
if images and getattr(terminal, "model_key", None) not in {"qwen3-vl-plus", "kimi-k2.5"}: if images and getattr(terminal, "model_key", None) not in {"qwen3-vl-plus", "kimi-k2.5"}:
emit('error', {'message': '当前模型不支持图片,请切换到 Qwen-VL 或 Kimi-k2.5'}) emit('error', {'message': '当前模型不支持图片,请切换到 Qwen3.5 或 Kimi-k2.5'})
return return
if videos and getattr(terminal, "model_key", None) != "kimi-k2.5": if videos and getattr(terminal, "model_key", None) not in {"qwen3-vl-plus", "kimi-k2.5"}:
emit('error', {'message': '当前模型不支持视频,请切换到 Kimi-k2.5'}) emit('error', {'message': '当前模型不支持视频,请切换到 Qwen3.5 或 Kimi-k2.5'})
return return
if images and videos: if images and videos:
emit('error', {'message': '图片和视频请分开发送'}) emit('error', {'message': '图片和视频请分开发送'})

View File

@ -2612,16 +2612,16 @@ const appOptions = {
if (hasImages && !['qwen3-vl-plus', 'kimi-k2.5'].includes(this.currentModelKey)) { if (hasImages && !['qwen3-vl-plus', 'kimi-k2.5'].includes(this.currentModelKey)) {
this.uiPushToast({ this.uiPushToast({
title: '当前模型不支持图片', title: '当前模型不支持图片',
message: '请切换到 Qwen-VL 或 Kimi-k2.5 再发送图片', message: '请切换到 Qwen3.5 或 Kimi-k2.5 再发送图片',
type: 'error' type: 'error'
}); });
return; return;
} }
if (hasVideos && this.currentModelKey !== 'kimi-k2.5') { if (hasVideos && !['qwen3-vl-plus', 'kimi-k2.5'].includes(this.currentModelKey)) {
this.uiPushToast({ this.uiPushToast({
title: '当前模型不支持视频', title: '当前模型不支持视频',
message: '请切换到 Kimi-k2.5 后再发送视频', message: '请切换到 Qwen3.5 或 Kimi-k2.5 后再发送视频',
type: 'error' type: 'error'
}); });
return; return;
@ -2876,7 +2876,7 @@ const appOptions = {
if (!['qwen3-vl-plus', 'kimi-k2.5'].includes(this.currentModelKey)) { if (!['qwen3-vl-plus', 'kimi-k2.5'].includes(this.currentModelKey)) {
this.uiPushToast({ this.uiPushToast({
title: '当前模型不支持图片', title: '当前模型不支持图片',
message: '请选择 Qwen-VL 或 Kimi-k2.5 后再发送图片', message: '请选择 Qwen3.5 或 Kimi-k2.5 后再发送图片',
type: 'error' type: 'error'
}); });
return; return;
@ -2891,10 +2891,10 @@ const appOptions = {
}, },
async openVideoPicker() { async openVideoPicker() {
if (this.currentModelKey !== 'kimi-k2.5') { if (!['qwen3-vl-plus', 'kimi-k2.5'].includes(this.currentModelKey)) {
this.uiPushToast({ this.uiPushToast({
title: '当前模型不支持视频', title: '当前模型不支持视频',
message: '请切换到 Kimi-k2.5 后再发送视频', message: '请切换到 Qwen3.5 或 Kimi-k2.5 后再发送视频',
type: 'error' type: 'error'
}); });
return; return;
@ -3163,7 +3163,7 @@ const appOptions = {
if (this.conversationHasImages && !['qwen3-vl-plus', 'kimi-k2.5'].includes(key)) { if (this.conversationHasImages && !['qwen3-vl-plus', 'kimi-k2.5'].includes(key)) {
this.uiPushToast({ this.uiPushToast({
title: '切换失败', title: '切换失败',
message: '当前对话包含图片,仅支持 Qwen-VL 或 Kimi-k2.5', message: '当前对话包含图片,仅支持 Qwen3.5 或 Kimi-k2.5',
type: 'error' type: 'error'
}); });
return; return;
@ -3187,16 +3187,11 @@ const appOptions = {
this.thinkingMode = data.thinking_mode ?? (data.run_mode !== 'fast'); this.thinkingMode = data.thinking_mode ?? (data.run_mode !== 'fast');
} else { } else {
// 前端兼容策略:根据模型特性自动调整运行模式 // 前端兼容策略:根据模型特性自动调整运行模式
if (key === 'qwen3-vl-plus') { const currentModel = modelStore.currentModel;
// Qwen-VL 不支持深度思考,若当前为 deep 则回落到思考模式 if (currentModel?.deepOnly) {
if (this.runMode === 'deep') { this.runMode = 'deep';
this.runMode = 'thinking'; this.thinkingMode = true;
this.thinkingMode = true; } else if (currentModel?.fastOnly) {
} else {
this.thinkingMode = this.runMode !== 'fast';
}
} else if (key === 'qwen3-max') {
// Qwen-Max 仅快速模式
this.runMode = 'fast'; this.runMode = 'fast';
this.thinkingMode = false; this.thinkingMode = false;
} else { } else {
@ -3244,12 +3239,12 @@ const appOptions = {
} }
const modelStore = useModelStore(); const modelStore = useModelStore();
const fastOnly = modelStore.currentModel?.fastOnly; const fastOnly = modelStore.currentModel?.fastOnly;
const currentModelKey = modelStore.currentModel?.key; const deepOnly = modelStore.currentModel?.deepOnly;
if (fastOnly && mode !== 'fast') { if (fastOnly && mode !== 'fast') {
if (!options.suppressToast) { if (!options.suppressToast) {
this.uiPushToast({ this.uiPushToast({
title: '模式不可用', title: '模式不可用',
message: 'Qwen-Max只支持快速模式', message: '当前模型仅支持快速模式',
type: 'warning' type: 'warning'
}); });
} }
@ -3257,12 +3252,11 @@ const appOptions = {
this.inputCloseMenus(); this.inputCloseMenus();
return; return;
} }
// Qwen-VL 不支持深度思考模式 if (deepOnly && mode !== 'deep') {
if (currentModelKey === 'qwen3-vl-plus' && mode === 'deep') {
if (!options.suppressToast) { if (!options.suppressToast) {
this.uiPushToast({ this.uiPushToast({
title: '模式不可用', title: '模式不可用',
message: 'Qwen-VL 不支持深度思考模式,请使用快速或思考模式', message: '当前模型仅支持深度思考模式',
type: 'warning' type: 'warning'
}); });
} }

View File

@ -27,7 +27,7 @@
发送图片 发送图片
</button> </button>
<button <button
v-if="currentModelKey === 'kimi-k2.5'" v-if="['qwen3-vl-plus', 'kimi-k2.5'].includes(currentModelKey)"
type="button" type="button"
class="menu-entry" class="menu-entry"
@click.stop="$emit('pick-video')" @click.stop="$emit('pick-video')"

View File

@ -252,7 +252,7 @@
</button> </button>
</div> </div>
<p class="behavior-hint"> <p class="behavior-hint">
Qwen-Max 仅支持快速模式Qwen-VL 支持深度思考模式选择时会给出提示 MiniMax-M2.5 支持深度思考模式选择时会给出提示
</p> </p>
</div> </div>
<div class="behavior-field"> <div class="behavior-field">
@ -720,8 +720,8 @@ const modelOptions = [
{ id: 'deepseek', label: 'DeepSeek', desc: '通用 + 思考强化', value: 'deepseek' }, { id: 'deepseek', label: 'DeepSeek', desc: '通用 + 思考强化', value: 'deepseek' },
{ id: 'kimi-k2.5', label: 'Kimi-k2.5', desc: '新版 Kimi思考开关 + 图文多模态', value: 'kimi-k2.5', badge: '图文' }, { id: 'kimi-k2.5', label: 'Kimi-k2.5', desc: '新版 Kimi思考开关 + 图文多模态', value: 'kimi-k2.5', badge: '图文' },
{ id: 'kimi', label: 'Kimi-k2', desc: '旧版 Kimi-k2兼顾通用对话', value: 'kimi' }, { id: 'kimi', label: 'Kimi-k2', desc: '旧版 Kimi-k2兼顾通用对话', value: 'kimi' },
{ id: 'qwen3-max', label: 'Qwen-Max', desc: '仅快速模式,不支持思考', value: 'qwen3-max', badge: '仅快速' }, { id: 'qwen3-vl-plus', label: 'Qwen3.5', desc: '图文多模态 + 深度思考', value: 'qwen3-vl-plus', badge: '图文' },
{ id: 'qwen3-vl-plus', label: 'Qwen-VL', desc: '图文多模态,思考/快速均可', value: 'qwen3-vl-plus', badge: '图文' } { id: 'minimax-m2.5', label: 'MiniMax-M2.5', desc: '仅深度思考,超长上下文', value: 'minimax-m2.5', badge: '深度思考' }
] as const; ] as const;
const filteredModelOptions = computed(() => const filteredModelOptions = computed(() =>
@ -815,11 +815,8 @@ const setDefaultModel = (value: string) => {
const checkModeModelConflict = (mode: RunModeValue, model: string | null): boolean => { const checkModeModelConflict = (mode: RunModeValue, model: string | null): boolean => {
const warnings: string[] = []; const warnings: string[] = [];
if (model === 'qwen3-max' && mode && mode !== 'fast') { if (model === 'minimax-m2.5' && mode && mode !== 'deep') {
warnings.push('Qwen-Max 仅支持快速模式,已保持原设置。'); warnings.push('MiniMax-M2.5 仅支持深度思考模式,已保持原设置。');
}
if (model === 'qwen3-vl-plus' && mode === 'deep') {
warnings.push('Qwen-VL 不支持深度思考模式,已保持原设置。');
} }
if (warnings.length) { if (warnings.length) {
uiStore.pushToast({ uiStore.pushToast({

View File

@ -1,6 +1,6 @@
import { defineStore } from 'pinia'; import { defineStore } from 'pinia';
export type ModelKey = 'kimi-k2.5' | 'kimi' | 'deepseek' | 'qwen3-max' | 'qwen3-vl-plus'; export type ModelKey = 'kimi-k2.5' | 'kimi' | 'deepseek' | 'qwen3-vl-plus' | 'minimax-m2.5';
export interface ModelOption { export interface ModelOption {
key: ModelKey; key: ModelKey;
@ -8,6 +8,7 @@ export interface ModelOption {
description: string; description: string;
fastOnly: boolean; fastOnly: boolean;
supportsThinking: boolean; supportsThinking: boolean;
deepOnly?: boolean;
} }
interface ModelState { interface ModelState {
@ -40,19 +41,20 @@ export const useModelStore = defineStore('model', {
fastOnly: false, fastOnly: false,
supportsThinking: true supportsThinking: true
}, },
{
key: 'qwen3-max',
label: 'Qwen-Max',
description: '仅支持快速模式',
fastOnly: true,
supportsThinking: false
},
{ {
key: 'qwen3-vl-plus', key: 'qwen3-vl-plus',
label: 'Qwen-VL', label: 'Qwen3.5',
description: '支持图片输入', description: '图文视频多模态 + 深度思考',
fastOnly: false, fastOnly: false,
supportsThinking: true supportsThinking: true
},
{
key: 'minimax-m2.5',
label: 'MiniMax-M2.5',
description: '仅深度思考,超长上下文',
fastOnly: false,
supportsThinking: true,
deepOnly: true
} }
] ]
}), }),

View File

@ -397,7 +397,7 @@ export const usePersonalizationStore = defineStore('personalization', {
this.clearFeedback(); this.clearFeedback();
}, },
setDefaultModel(model: string | null) { setDefaultModel(model: string | null) {
const allowed = ['deepseek', 'kimi-k2.5', 'kimi', 'qwen3-max', 'qwen3-vl-plus']; const allowed = ['deepseek', 'kimi-k2.5', 'kimi', 'qwen3-vl-plus', 'minimax-m2.5'];
const target = typeof model === 'string' && allowed.includes(model) ? model : null; const target = typeof model === 'string' && allowed.includes(model) ? model : null;
this.form = { this.form = {
...this.form, ...this.form,

View File

@ -4,7 +4,7 @@
import httpx import httpx
import json import json
import asyncio import asyncio
from typing import List, Dict, Optional, AsyncGenerator from typing import List, Dict, Optional, AsyncGenerator, Any
try: try:
from config import API_BASE_URL, API_KEY, MODEL_ID, OUTPUT_FORMATS, DEFAULT_RESPONSE_MAX_TOKENS from config import API_BASE_URL, API_KEY, MODEL_ID, OUTPUT_FORMATS, DEFAULT_RESPONSE_MAX_TOKENS
except ImportError: except ImportError:
@ -98,6 +98,54 @@ class DeepSeekClient:
return json.dumps(data, ensure_ascii=False) return json.dumps(data, ensure_ascii=False)
def _extract_reasoning_delta(self, delta: Dict[str, Any]) -> str:
"""统一提取思考内容,兼容 reasoning_content / reasoning_details。"""
if not isinstance(delta, dict):
return ""
if "reasoning_content" in delta:
return delta.get("reasoning_content") or ""
details = delta.get("reasoning_details")
if isinstance(details, list):
parts: List[str] = []
for item in details:
if isinstance(item, dict):
text = item.get("text")
if text:
parts.append(text)
if parts:
return "".join(parts)
return ""
def _merge_system_messages(self, messages: List[Dict]) -> List[Dict]:
"""
将多个 system 消息合并为一个
"""
if not messages:
return messages
merged_contents: List[str] = []
new_messages: List[Dict] = []
first_system_index: Optional[int] = None
for msg in messages:
if msg.get("role") == "system":
if first_system_index is None:
first_system_index = len(new_messages)
content = msg.get("content", "")
if isinstance(content, str):
merged_contents.append(content)
else:
merged_contents.append(json.dumps(content, ensure_ascii=False))
else:
new_messages.append(msg)
if not merged_contents:
return messages
merged = {
"role": "system",
"content": "\n\n".join(c for c in merged_contents if c)
}
insert_at = first_system_index if first_system_index is not None else 0
new_messages.insert(insert_at, merged)
return new_messages
def start_new_task(self): def start_new_task(self):
"""开始新任务(重置任务级别的状态)""" """开始新任务(重置任务级别的状态)"""
self.current_task_first_call = True self.current_task_first_call = True
@ -215,9 +263,11 @@ class DeepSeekClient:
except (TypeError, ValueError): except (TypeError, ValueError):
max_tokens = 4096 max_tokens = 4096
final_messages = self._merge_system_messages(messages)
payload = { payload = {
"model": self.model_id, "model": self.model_id,
"messages": messages, "messages": final_messages,
"stream": stream, "stream": stream,
"thinking": {"type": "enabled" if current_thinking_mode else "disabled"}, "thinking": {"type": "enabled" if current_thinking_mode else "disabled"},
"max_tokens": max_tokens "max_tokens": max_tokens
@ -332,9 +382,9 @@ class DeepSeekClient:
delta = chunk["choices"][0].get("delta", {}) delta = chunk["choices"][0].get("delta", {})
# 处理思考内容(只在思考模式开启时) # 处理思考内容(只在思考模式开启时)
if "reasoning_content" in delta and should_show_thinking: if should_show_thinking:
reasoning_content = delta["reasoning_content"] reasoning_content = self._extract_reasoning_delta(delta)
if reasoning_content: # 只处理非空内容 if reasoning_content:
if not in_thinking: if not in_thinking:
self._print("💭 [正在思考]\n", end="", flush=True) self._print("💭 [正在思考]\n", end="", flush=True)
in_thinking = True in_thinking = True
@ -568,9 +618,9 @@ class DeepSeekClient:
delta = chunk["choices"][0].get("delta", {}) delta = chunk["choices"][0].get("delta", {})
# 处理思考内容 # 处理思考内容
if "reasoning_content" in delta and should_show_thinking: if should_show_thinking:
reasoning_content = delta["reasoning_content"] reasoning_content = self._extract_reasoning_delta(delta)
if reasoning_content: # 只处理非空内容 if reasoning_content:
if not in_thinking: if not in_thinking:
self._print("💭 [正在思考]\n", end="", flush=True) self._print("💭 [正在思考]\n", end="", flush=True)
in_thinking = True in_thinking = True

View File

@ -3354,22 +3354,34 @@ async def handle_task_with_sender(terminal: WebTerminal, message, sender, client
if finish_reason: if finish_reason:
last_finish_reason = finish_reason last_finish_reason = finish_reason
# 处理思考内容 # 处理思考内容(兼容 reasoning_content / reasoning_details
reasoning_content = ""
if "reasoning_content" in delta: if "reasoning_content" in delta:
reasoning_content = delta["reasoning_content"] reasoning_content = delta.get("reasoning_content") or ""
if reasoning_content: elif "reasoning_details" in delta:
reasoning_chunks += 1 details = delta.get("reasoning_details")
debug_log(f" 思考内容 #{reasoning_chunks}: {len(reasoning_content)} 字符") if isinstance(details, list):
parts = []
for item in details:
if isinstance(item, dict):
text = item.get("text")
if text:
parts.append(text)
if parts:
reasoning_content = "".join(parts)
if reasoning_content:
reasoning_chunks += 1
debug_log(f" 思考内容 #{reasoning_chunks}: {len(reasoning_content)} 字符")
if should_show_thinking: if should_show_thinking:
if not thinking_started: if not thinking_started:
in_thinking = True in_thinking = True
thinking_started = True thinking_started = True
sender('thinking_start', {}) sender('thinking_start', {})
await asyncio.sleep(0.05) await asyncio.sleep(0.05)
current_thinking += reasoning_content current_thinking += reasoning_content
sender('thinking_chunk', {'content': reasoning_content}) sender('thinking_chunk', {'content': reasoning_content})
# 处理正常内容 # 处理正常内容
if "content" in delta: if "content" in delta:

View File

@ -128,11 +128,12 @@ class DeepSeekClient:
match_texts.append("未找到匹配内容。") match_texts.append("未找到匹配内容。")
return "\n".join([header] + match_texts) return "\n".join([header] + match_texts)
def _build_content_with_images(self, text: str, images: List[str], videos: Optional[List[str]] = None) -> Any: def _build_content_with_images(self, text: str, images: List[str], videos: Optional[List[Any]] = None) -> Any:
"""将文本与图片/视频路径拼成多模态 content用于 tool 消息)。""" """将文本与图片/视频路径拼成多模态 content用于 tool 消息)。"""
videos = videos or [] videos = videos or []
if not images and not videos: if not images and not videos:
return text return text
qwen_video_fps = 2
parts: List[Dict[str, Any]] = [] parts: List[Dict[str, Any]] = []
if text: if text:
parts.append({"type": "text", "text": text}) parts.append({"type": "text", "text": text})
@ -150,8 +151,14 @@ class DeepSeekClient:
parts.append({"type": "image_url", "image_url": {"url": f"data:{mime};base64,{b64}"}}) parts.append({"type": "image_url", "image_url": {"url": f"data:{mime};base64,{b64}"}})
except Exception: except Exception:
continue continue
for path in videos: for item in videos:
try: try:
if isinstance(item, dict):
path = item.get("path") or ""
else:
path = item
if not path:
continue
abs_path = (base_path / path).resolve() abs_path = (base_path / path).resolve()
if not abs_path.exists() or not abs_path.is_file(): if not abs_path.exists() or not abs_path.is_file():
continue continue
@ -160,7 +167,13 @@ class DeepSeekClient:
mime = "video/mp4" mime = "video/mp4"
data = abs_path.read_bytes() data = abs_path.read_bytes()
b64 = base64.b64encode(data).decode("utf-8") b64 = base64.b64encode(data).decode("utf-8")
parts.append({"type": "video_url", "video_url": {"url": f"data:{mime};base64,{b64}"}}) payload: Dict[str, Any] = {
"type": "video_url",
"video_url": {"url": f"data:{mime};base64,{b64}"}
}
if self.model_key == "qwen3-vl-plus":
payload["fps"] = qwen_video_fps
parts.append(payload)
except Exception: except Exception:
continue continue
return parts if parts else text return parts if parts else text
@ -184,6 +197,55 @@ class DeepSeekClient:
return json.dumps(data, ensure_ascii=False) return json.dumps(data, ensure_ascii=False)
def _extract_reasoning_delta(self, delta: Dict[str, Any]) -> str:
"""统一提取思考内容,兼容 reasoning_content / reasoning_details。"""
if not isinstance(delta, dict):
return ""
if "reasoning_content" in delta:
return delta.get("reasoning_content") or ""
details = delta.get("reasoning_details")
if isinstance(details, list):
parts: List[str] = []
for item in details:
if isinstance(item, dict):
text = item.get("text")
if text:
parts.append(text)
if parts:
return "".join(parts)
return ""
def _merge_system_messages(self, messages: List[Dict]) -> List[Dict]:
"""
将多个 system 消息合并为一个部分模型仅支持单条 system
保留原有顺序把合并后的 system 放在第一条 system 的位置
"""
if not messages:
return messages
merged_contents: List[str] = []
new_messages: List[Dict] = []
first_system_index: Optional[int] = None
for msg in messages:
if msg.get("role") == "system":
if first_system_index is None:
first_system_index = len(new_messages)
content = msg.get("content", "")
if isinstance(content, str):
merged_contents.append(content)
else:
merged_contents.append(json.dumps(content, ensure_ascii=False))
else:
new_messages.append(msg)
if not merged_contents:
return messages
merged = {
"role": "system",
"content": "\n\n".join(c for c in merged_contents if c)
}
insert_at = first_system_index if first_system_index is not None else 0
new_messages.insert(insert_at, merged)
return new_messages
def set_deep_thinking_mode(self, enabled: bool): def set_deep_thinking_mode(self, enabled: bool):
"""配置深度思考模式(持续使用思考模型)。""" """配置深度思考模式(持续使用思考模型)。"""
self.deep_thinking_mode = bool(enabled) self.deep_thinking_mode = bool(enabled)
@ -445,16 +507,24 @@ class DeepSeekClient:
else: else:
max_tokens = min(max_tokens, available) max_tokens = min(max_tokens, available)
lower_base_url = (api_config.get("base_url") or "").lower()
is_minimax = self.model_key == "minimax-m2.5" or "minimax" in lower_base_url
final_messages = self._merge_system_messages(messages)
payload = { payload = {
"model": api_config["model_id"], "model": api_config["model_id"],
"messages": messages, "messages": final_messages,
"stream": stream, "stream": stream,
"max_tokens": max_tokens
} }
if is_minimax:
payload["max_completion_tokens"] = max_tokens
else:
payload["max_tokens"] = max_tokens
# 部分平台(如 Qwen、DeepSeek需要显式请求 usage 才会在流式尾包返回 # 部分平台(如 Qwen、DeepSeek需要显式请求 usage 才会在流式尾包返回
if stream: if stream:
should_include_usage = False should_include_usage = False
if self.model_key in {"qwen3-max", "qwen3-vl-plus", "deepseek"}: if self.model_key in {"qwen3-max", "qwen3-vl-plus", "deepseek", "minimax-m2.5"}:
should_include_usage = True should_include_usage = True
# 兜底:根据 base_url 识别 openai 兼容的提供商 # 兜底:根据 base_url 识别 openai 兼容的提供商
if api_config["base_url"]: if api_config["base_url"]:
@ -462,14 +532,18 @@ class DeepSeekClient:
if any(keyword in lower_url for keyword in ["dashscope", "aliyuncs", "deepseek.com"]): if any(keyword in lower_url for keyword in ["dashscope", "aliyuncs", "deepseek.com"]):
should_include_usage = True should_include_usage = True
if should_include_usage: if should_include_usage:
payload.setdefault("stream_options", {})["include_usage"] = True if is_minimax:
payload["include_usage"] = True
else:
payload.setdefault("stream_options", {})["include_usage"] = True
# 注入模型额外参数(如 Qwen enable_thinking # 注入模型额外参数(如 Qwen enable_thinking
extra_params = self.thinking_extra_params if current_thinking_mode else self.fast_extra_params extra_params = self.thinking_extra_params if current_thinking_mode else self.fast_extra_params
if extra_params: if extra_params:
payload.update(extra_params) payload.update(extra_params)
if tools: if tools:
payload["tools"] = tools payload["tools"] = tools
payload["tool_choice"] = "auto" if not is_minimax:
payload["tool_choice"] = "auto"
# 将本次请求落盘,便于出错时快速定位 # 将本次请求落盘,便于出错时快速定位
dump_path = self._dump_request_payload(payload, api_config, headers) dump_path = self._dump_request_payload(payload, api_config, headers)
@ -636,15 +710,14 @@ class DeepSeekClient:
delta = chunk["choices"][0].get("delta", {}) delta = chunk["choices"][0].get("delta", {})
# 处理思考内容 # 处理思考内容
if "reasoning_content" in delta: reasoning_content = self._extract_reasoning_delta(delta)
reasoning_content = delta["reasoning_content"] if reasoning_content:
if reasoning_content: # 只处理非空内容 if not in_thinking:
if not in_thinking: self._print("💭 [正在思考]\n", end="", flush=True)
self._print("💭 [正在思考]\n", end="", flush=True) in_thinking = True
in_thinking = True thinking_printed = True
thinking_printed = True current_thinking += reasoning_content
current_thinking += reasoning_content self._print(reasoning_content, end="", flush=True)
self._print(reasoning_content, end="", flush=True)
# 处理正常内容 - 独立的if不是elif # 处理正常内容 - 独立的if不是elif
if "content" in delta: if "content" in delta:
@ -910,14 +983,13 @@ class DeepSeekClient:
delta = chunk["choices"][0].get("delta", {}) delta = chunk["choices"][0].get("delta", {})
# 处理思考内容 # 处理思考内容
if "reasoning_content" in delta: reasoning_content = self._extract_reasoning_delta(delta)
reasoning_content = delta["reasoning_content"] if reasoning_content:
if reasoning_content: # 只处理非空内容 if not in_thinking:
if not in_thinking: self._print("💭 [正在思考]\n", end="", flush=True)
self._print("💭 [正在思考]\n", end="", flush=True) in_thinking = True
in_thinking = True thinking_content += reasoning_content
thinking_content += reasoning_content self._print(reasoning_content, end="", flush=True)
self._print(reasoning_content, end="", flush=True)
# 处理正常内容 - 独立的if而不是elif # 处理正常内容 - 独立的if而不是elif
if "content" in delta: if "content" in delta:

View File

@ -1414,12 +1414,14 @@ class ContextManager:
except Exception: except Exception:
return None return None
def _build_content_with_images(self, text: str, images: List[str], videos: Optional[List[str]] = None) -> Any: def _build_content_with_images(self, text: str, images: List[str], videos: Optional[List[Any]] = None) -> Any:
"""将文本与图片/视频路径组合成多模态content图片转换为data URI视频转换为 data URL。""" """将文本与图片/视频路径组合成多模态content图片转换为data URI视频转换为 data URL。"""
videos = videos or [] videos = videos or []
if not images and not videos: if not images and not videos:
return text return text
parts: List[Dict[str, Any]] = [] parts: List[Dict[str, Any]] = []
supports_video_fps = getattr(getattr(self, "main_terminal", None), "model_key", None) == "qwen3-vl-plus"
qwen_video_fps = 2
if text: if text:
parts.append({"type": "text", "text": text}) parts.append({"type": "text", "text": text})
for path in images: for path in images:
@ -1438,8 +1440,14 @@ class ContextManager:
parts.append({"type": "image_url", "image_url": {"url": data_url}}) parts.append({"type": "image_url", "image_url": {"url": data_url}})
except Exception: except Exception:
continue continue
for path in videos: for item in videos:
try: try:
if isinstance(item, dict):
path = item.get("path") or ""
else:
path = item
if not path:
continue
abs_path = Path(self.project_path) / path abs_path = Path(self.project_path) / path
if not abs_path.exists() or not abs_path.is_file(): if not abs_path.exists() or not abs_path.is_file():
continue continue
@ -1451,7 +1459,10 @@ class ContextManager:
data = abs_path.read_bytes() data = abs_path.read_bytes()
b64 = base64.b64encode(data).decode("utf-8") b64 = base64.b64encode(data).decode("utf-8")
data_url = f"data:{mime};base64,{b64}" data_url = f"data:{mime};base64,{b64}"
parts.append({"type": "video_url", "video_url": {"url": data_url}}) payload: Dict[str, Any] = {"type": "video_url", "video_url": {"url": data_url}}
if supports_video_fps:
payload["fps"] = qwen_video_fps
parts.append(payload)
except Exception: except Exception:
continue continue
return parts if parts else text return parts if parts else text
@ -1501,7 +1512,7 @@ class ContextManager:
def build_messages(self, context: Dict, user_input: str) -> List[Dict]: def build_messages(self, context: Dict, user_input: str) -> List[Dict]:
"""构建消息列表(添加终端内容注入)""" """构建消息列表(添加终端内容注入)"""
# 加载系统提示Qwen-VL 使用专用提示) # 加载系统提示Qwen3.5 使用专用提示)
model_key = getattr(self.main_terminal, "model_key", "kimi") if hasattr(self, "main_terminal") else "kimi" model_key = getattr(self.main_terminal, "model_key", "kimi") if hasattr(self, "main_terminal") else "kimi"
prompt_name = "main_system_qwenvl" if model_key in {"qwen3-vl-plus", "kimi-k2.5"} else "main_system" prompt_name = "main_system_qwenvl" if model_key in {"qwen3-vl-plus", "kimi-k2.5"} else "main_system"
system_prompt = self.load_prompt(prompt_name) system_prompt = self.load_prompt(prompt_name)