feat: add video send/view flow and guard model constraints

This commit is contained in:
JOJO 2026-01-30 17:04:33 +08:00
parent 462b0ed6f3
commit bb91d22631
16 changed files with 619 additions and 44 deletions

View File

@ -119,6 +119,7 @@ class MainTerminal:
self.terminal_ops = TerminalOperator(project_path, container_session=container_session)
self.ocr_client = OCRClient(project_path, self.file_manager)
self.pending_image_view = None # 供 view_image 工具使用,保存一次性图片插入请求
self.pending_video_view = None # 供 view_video 工具使用,保存一次性视频插入请求
# 新增:终端管理器
self.terminal_manager = TerminalManager(
@ -1826,6 +1827,24 @@ class MainTerminal:
}
}
})
if getattr(self, "model_key", None) == "kimi-k2.5":
tools.append({
"type": "function",
"function": {
"name": "view_video",
"description": "将指定本地视频插入到对话中(系统代发一条包含视频的消息),便于模型查看视频内容。",
"parameters": {
"type": "object",
"properties": self._inject_intent({
"path": {
"type": "string",
"description": "项目内的视频相对路径(不要以 /workspace 开头),支持 mp4/mov/mkv/avi/webm。"
}
}),
"required": ["path"]
}
}
})
# 附加自定义工具(仅管理员可见)
custom_tools = self._build_custom_tools()
if custom_tools:
@ -1923,6 +1942,26 @@ class MainTerminal:
"path": str(path)
}
result = {"success": True, "message": "图片已请求插入到对话中,将在后续消息中呈现。", "path": path}
elif tool_name == "view_video":
path = (arguments.get("path") or "").strip()
if not path:
return json.dumps({"success": False, "error": "path 不能为空"}, ensure_ascii=False)
if path.startswith("/workspace"):
return json.dumps({"success": False, "error": "非法路径,超出项目根目录,请使用相对路径"}, ensure_ascii=False)
abs_path = (Path(self.context_manager.project_path) / path).resolve()
try:
abs_path.relative_to(Path(self.context_manager.project_path).resolve())
except Exception:
return json.dumps({"success": False, "error": "非法路径,超出项目根目录,请使用相对路径"}, ensure_ascii=False)
if not abs_path.exists() or not abs_path.is_file():
return json.dumps({"success": False, "error": f"视频不存在: {path}"}, ensure_ascii=False)
allowed_ext = {".mp4", ".mov", ".mkv", ".avi", ".webm"}
if abs_path.suffix.lower() not in allowed_ext:
return json.dumps({"success": False, "error": f"不支持的视频格式: {abs_path.suffix}"}, ensure_ascii=False)
if abs_path.stat().st_size > 50 * 1024 * 1024:
return json.dumps({"success": False, "error": "视频过大,需 <= 50MB"}, ensure_ascii=False)
self.pending_video_view = {"path": str(path)}
result = {"success": True, "message": "视频已请求插入到对话中,将在后续消息中呈现。", "path": path}
# 终端会话管理工具
elif tool_name == "terminal_session":
@ -2511,9 +2550,10 @@ class MainTerminal:
else:
# User 或普通 System 消息
images = conv.get("images") or metadata.get("images") or []
videos = conv.get("videos") or metadata.get("videos") or []
content_payload = (
self.context_manager._build_content_with_images(conv["content"], images)
if images else conv["content"]
self.context_manager._build_content_with_images(conv["content"], images, videos)
if (images or videos) else conv["content"]
)
messages.append({
"role": conv["role"],
@ -2686,6 +2726,8 @@ class MainTerminal:
profile = get_model_profile(model_key)
if getattr(self.context_manager, "has_images", False) and model_key not in {"qwen3-vl-plus", "kimi-k2.5"}:
raise ValueError("当前对话包含图片,仅支持 Qwen-VL 或 Kimi-k2.5")
if getattr(self.context_manager, "has_videos", False) and model_key != "kimi-k2.5":
raise ValueError("当前对话包含视频,仅支持 Kimi-k2.5")
self.model_key = model_key
self.model_profile = profile
# 将模型标识传递给底层 API 客户端,便于按模型做兼容处理

View File

@ -321,6 +321,7 @@ class WebTerminal(MainTerminal):
"run_mode": self.run_mode,
"model_key": getattr(self, "model_key", None),
"has_images": getattr(self.context_manager, "has_images", False),
"has_videos": getattr(self.context_manager, "has_videos", False),
"context": {
"usage_percent": context_status['usage_percent'],
"total_size": context_status['sizes']['total'],

View File

@ -69,7 +69,9 @@ def update_thinking_mode(terminal: WebTerminal, workspace: UserWorkspace, userna
todo_list=ctx.todo_list,
thinking_mode=terminal.thinking_mode,
run_mode=terminal.run_mode,
model_key=getattr(terminal, "model_key", None)
model_key=getattr(terminal, "model_key", None),
has_images=getattr(ctx, "has_images", False),
has_videos=getattr(ctx, "has_videos", False)
)
except Exception as exc:
print(f"[API] 保存思考模式到对话失败: {exc}")
@ -134,7 +136,8 @@ def update_model(terminal: WebTerminal, workspace: UserWorkspace, username: str)
thinking_mode=terminal.thinking_mode,
run_mode=terminal.run_mode,
model_key=terminal.model_key,
has_images=getattr(ctx, "has_images", False)
has_images=getattr(ctx, "has_images", False),
has_videos=getattr(ctx, "has_videos", False)
)
except Exception as exc:
print(f"[API] 保存模型到对话失败: {exc}")

View File

@ -380,14 +380,15 @@ def detect_tool_failure(result_data: Any) -> bool:
return False
def process_message_task(terminal: WebTerminal, message: str, images, sender, client_sid, workspace: UserWorkspace, username: str):
def process_message_task(terminal: WebTerminal, message: str, images, sender, client_sid, workspace: UserWorkspace, username: str, videos=None):
"""在后台处理消息任务"""
videos = videos or []
try:
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
# 创建可取消的任务
task = loop.create_task(handle_task_with_sender(terminal, workspace, message, images, sender, client_sid, username))
task = loop.create_task(handle_task_with_sender(terminal, workspace, message, images, sender, client_sid, username, videos))
entry = get_stop_flag(client_sid, username)
if not isinstance(entry, dict):
@ -462,10 +463,11 @@ def detect_malformed_tool_call(text):
return False
async def handle_task_with_sender(terminal: WebTerminal, workspace: UserWorkspace, message, images, sender, client_sid, username: str):
async def handle_task_with_sender(terminal: WebTerminal, workspace: UserWorkspace, message, images, sender, client_sid, username: str, videos=None):
"""处理任务并发送消息 - 集成token统计版本"""
web_terminal = terminal
conversation_id = getattr(web_terminal.context_manager, "current_conversation_id", None)
videos = videos or []
# 如果是思考模式,重置状态
if web_terminal.thinking_mode:
@ -478,7 +480,7 @@ async def handle_task_with_sender(terminal: WebTerminal, workspace: UserWorkspac
# 添加到对话历史
history_len_before = len(getattr(web_terminal.context_manager, "conversation_history", []) or [])
is_first_user_message = history_len_before == 0
web_terminal.context_manager.add_conversation("user", message, images=images)
web_terminal.context_manager.add_conversation("user", message, images=images, videos=videos)
if is_first_user_message and getattr(web_terminal, "context_manager", None):
try:
@ -1178,6 +1180,7 @@ async def handle_task_with_sender(terminal: WebTerminal, workspace: UserWorkspac
full_response = ""
tool_calls = []
video_injections = []
current_thinking = ""
detected_tools = {}
last_usage_payload = None
@ -2281,7 +2284,7 @@ async def handle_task_with_sender(terminal: WebTerminal, workspace: UserWorkspac
"content": tool_result_content
})
# 收集图片注入请求,延后统一追加
# 收集图片/视频注入请求,延后统一追加
if (
function_name == "view_image"
and getattr(web_terminal, "pending_image_view", None)
@ -2293,6 +2296,17 @@ async def handle_task_with_sender(terminal: WebTerminal, workspace: UserWorkspac
if inj and inj.get("path"):
image_injections.append(inj["path"])
if (
function_name == "view_video"
and getattr(web_terminal, "pending_video_view", None)
and not tool_failed
and (isinstance(result_data, dict) and result_data.get("success") is not False)
):
inj = web_terminal.pending_video_view
web_terminal.pending_video_view = None
if inj and inj.get("path"):
video_injections.append(inj["path"])
if function_name not in {'write_file', 'edit_file'}:
await process_sub_agent_updates(messages, inline=True, after_tool_call_id=tool_call_id)
@ -2304,7 +2318,7 @@ async def handle_task_with_sender(terminal: WebTerminal, workspace: UserWorkspac
# 标记不再是第一次迭代
is_first_iteration = False
# 统一附加图片消息,保证所有 tool 响应先完成
# 统一附加图片/视频消息,保证所有 tool 响应先完成
if image_injections:
for img_path in image_injections:
injected_text = "这是一条系统控制发送的信息,并非用户主动发送,目的是返回你需要查看的图片。"
@ -2327,6 +2341,29 @@ async def handle_task_with_sender(terminal: WebTerminal, workspace: UserWorkspac
'content': f'系统已按模型请求插入图片: {img_path}'
})
if video_injections:
for video_path in video_injections:
injected_text = "这是一条系统控制发送的信息,并非用户主动发送,目的是返回你需要查看的视频。"
web_terminal.context_manager.add_conversation(
"user",
injected_text,
videos=[video_path],
metadata={"system_injected_video": True}
)
content_payload = web_terminal.context_manager._build_content_with_images(
injected_text,
[],
[video_path]
)
messages.append({
"role": "user",
"content": content_payload,
"metadata": {"system_injected_video": True}
})
sender('system_message', {
'content': f'系统已按模型请求插入视频: {video_path}'
})
# 最终统计
debug_log(f"\n{'='*40}")
debug_log(f"任务完成统计:")
@ -2345,7 +2382,7 @@ async def handle_task_with_sender(terminal: WebTerminal, workspace: UserWorkspac
# === 统一对外入口 ===
def start_chat_task(terminal, message: str, images: Any, sender, client_sid: str, workspace, username: str):
def start_chat_task(terminal, message: str, images: Any, sender, client_sid: str, workspace, username: str, videos: Any = None):
"""在线程模式下启动对话任务,供 Socket 事件调用。"""
return socketio.start_background_task(
process_message_task,
@ -2356,9 +2393,10 @@ def start_chat_task(terminal, message: str, images: Any, sender, client_sid: str
client_sid,
workspace,
username,
videos
)
def run_chat_task_sync(terminal, message: str, images: Any, sender, client_sid: str, workspace, username: str):
def run_chat_task_sync(terminal, message: str, images: Any, sender, client_sid: str, workspace, username: str, videos: Any = None):
"""同步执行(测试/CLI 使用)。"""
return process_message_task(terminal, message, images, sender, client_sid, workspace, username)
return process_message_task(terminal, message, images, sender, client_sid, workspace, username, videos)

View File

@ -217,12 +217,19 @@ def handle_message(data):
message = (data.get('message') or '').strip()
images = data.get('images') or []
if not message and not images:
videos = data.get('videos') or []
if not message and not images and not videos:
emit('error', {'message': '消息不能为空'})
return
if images and getattr(terminal, "model_key", None) not in {"qwen3-vl-plus", "kimi-k2.5"}:
emit('error', {'message': '当前模型不支持图片,请切换到 Qwen-VL 或 Kimi-k2.5'})
return
if videos and getattr(terminal, "model_key", None) != "kimi-k2.5":
emit('error', {'message': '当前模型不支持视频,请切换到 Kimi-k2.5'})
return
if images and videos:
emit('error', {'message': '图片和视频请分开发送'})
return
print(f"[WebSocket] 收到消息: {message}")
debug_log(f"\n{'='*80}\n新任务开始: {message}\n{'='*80}")
@ -285,7 +292,8 @@ def handle_message(data):
# 传递客户端ID
images = data.get('images') or []
start_chat_task(terminal, message, images, send_with_activity, client_sid, workspace, username)
videos = data.get('videos') or []
start_chat_task(terminal, message, images, send_with_activity, client_sid, workspace, username, videos)
@socketio.on('client_chunk_log')

View File

@ -251,6 +251,7 @@
:icon-style="iconStyle"
:tool-category-icon="toolCategoryIcon"
:selected-images="selectedImages"
:selected-videos="selectedVideos"
:block-upload="policyUiBlocks.block_upload"
:block-tool-toggle="policyUiBlocks.block_tool_toggle"
:block-realtime-terminal="policyUiBlocks.block_realtime_terminal"
@ -279,7 +280,9 @@
@compress-conversation="handleCompressConversationClick"
@file-selected="handleFileSelected"
@pick-images="openImagePicker"
@pick-video="openVideoPicker"
@remove-image="handleRemoveImage"
@remove-video="handleRemoveVideo"
@open-review="openReviewDialog"
/>
</div>
@ -309,6 +312,17 @@
@confirm="handleImagesConfirmed"
/>
</transition>
<transition name="overlay-fade">
<VideoPicker
v-if="videoPickerOpen"
:open="videoPickerOpen"
:entries="videoEntries"
:initial-selected="selectedVideos"
:loading="videoLoading"
@close="closeVideoPicker"
@confirm="handleVideosConfirmed"
/>
</transition>
<transition name="overlay-fade">
<ConversationReviewDialog
v-if="reviewDialogOpen"
@ -464,6 +478,7 @@
<script setup lang="ts">
import appOptions from './app';
import VirtualMonitorSurface from './components/chat/VirtualMonitorSurface.vue';
import VideoPicker from './components/overlay/VideoPicker.vue';
const mobilePanelIcon = new URL('../icons/align-left.svg', import.meta.url).href;
const mobileMenuIcons = {

View File

@ -285,7 +285,10 @@ const appOptions = {
modelMenuOpen: false,
imageEntries: [],
imageLoading: false,
videoEntries: [],
videoLoading: false,
conversationHasImages: false,
conversationHasVideos: false,
conversationListRequestSeq: 0,
conversationListRefreshToken: 0,
@ -429,7 +432,9 @@ const appOptions = {
'toolMenuOpen',
'settingsOpen',
'imagePickerOpen',
'selectedImages'
'videoPickerOpen',
'selectedImages',
'selectedVideos'
]),
resolvedRunMode() {
const allowed = ['fast', 'thinking', 'deep'];
@ -819,7 +824,11 @@ const appOptions = {
inputSetImagePickerOpen: 'setImagePickerOpen',
inputSetSelectedImages: 'setSelectedImages',
inputClearSelectedImages: 'clearSelectedImages',
inputRemoveSelectedImage: 'removeSelectedImage'
inputRemoveSelectedImage: 'removeSelectedImage',
inputSetVideoPickerOpen: 'setVideoPickerOpen',
inputSetSelectedVideos: 'setSelectedVideos',
inputClearSelectedVideos: 'clearSelectedVideos',
inputRemoveSelectedVideo: 'removeSelectedVideo'
}),
...mapActions(useToolStore, {
toolRegisterAction: 'registerToolAction',
@ -1553,6 +1562,9 @@ const appOptions = {
if (status && typeof status.has_images !== 'undefined') {
this.conversationHasImages = !!status.has_images;
}
if (status && typeof status.has_videos !== 'undefined') {
this.conversationHasVideos = !!status.has_videos;
}
},
updateContainerStatus(status) {
@ -1879,12 +1891,13 @@ const appOptions = {
let currentAssistantMessage = null;
let historyHasImages = false;
let historyHasVideos = false;
historyMessages.forEach((message, index) => {
debugLog(`处理消息 ${index + 1}/${historyMessages.length}:`, message.role, message);
const meta = message.metadata || {};
if (message.role === 'user' && meta.system_injected_image) {
debugLog('跳过系统代发的图片消息(仅用于模型查看,不在前端展示)');
if (message.role === 'user' && (meta.system_injected_image || meta.system_injected_video)) {
debugLog('跳过系统代发的图片/视频消息(仅用于模型查看,不在前端展示)');
return;
}
@ -1895,13 +1908,18 @@ const appOptions = {
currentAssistantMessage = null;
}
const images = message.images || (message.metadata && message.metadata.images) || [];
const videos = message.videos || (message.metadata && message.metadata.videos) || [];
if (Array.isArray(images) && images.length) {
historyHasImages = true;
}
if (Array.isArray(videos) && videos.length) {
historyHasVideos = true;
}
this.messages.push({
role: 'user',
content: message.content || '',
images
images,
videos
});
debugLog('添加用户消息:', message.content?.substring(0, 50) + '...');
@ -2106,6 +2124,7 @@ const appOptions = {
}
this.conversationHasImages = historyHasImages;
this.conversationHasVideos = historyHasVideos;
debugLog(`历史消息渲染完成,共 ${this.messages.length} 条消息`);
this.logMessageState('renderHistoryMessages:after-render');
@ -2410,10 +2429,12 @@ const appOptions = {
const text = (this.inputMessage || '').trim();
const images = Array.isArray(this.selectedImages) ? this.selectedImages.slice(0, 9) : [];
const videos = Array.isArray(this.selectedVideos) ? this.selectedVideos.slice(0, 1) : [];
const hasText = text.length > 0;
const hasImages = images.length > 0;
const hasVideos = videos.length > 0;
if (!hasText && !hasImages) {
if (!hasText && !hasImages && !hasVideos) {
return;
}
@ -2432,12 +2453,31 @@ const appOptions = {
return;
}
if (hasVideos && this.currentModelKey !== 'kimi-k2.5') {
this.uiPushToast({
title: '当前模型不支持视频',
message: '请切换到 Kimi-k2.5 后再发送视频',
type: 'error'
});
return;
}
if (hasVideos && hasImages) {
this.uiPushToast({
title: '请勿同时发送',
message: '视频与图片需分开发送,每条仅包含一种媒体',
type: 'warning'
});
return;
}
const message = text;
const isCommand = hasText && !hasImages && message.startsWith('/');
const isCommand = hasText && !hasImages && !hasVideos && message.startsWith('/');
if (isCommand) {
this.socket.emit('send_command', { command: message });
this.inputClearMessage();
this.inputClearSelectedImages();
this.inputClearSelectedVideos();
this.autoResizeInput();
return;
}
@ -2454,18 +2494,25 @@ const appOptions = {
// 标记任务进行中,直到任务完成或用户手动停止
this.taskInProgress = true;
this.chatAddUserMessage(message, images);
this.socket.emit('send_message', { message: message, images, conversation_id: this.currentConversationId });
this.chatAddUserMessage(message, images, videos);
this.socket.emit('send_message', { message: message, images, videos, conversation_id: this.currentConversationId });
if (typeof this.monitorShowPendingReply === 'function') {
this.monitorShowPendingReply();
}
this.inputClearMessage();
this.inputClearSelectedImages();
this.inputClearSelectedVideos();
this.inputSetImagePickerOpen(false);
this.inputSetVideoPickerOpen(false);
this.inputSetLineCount(1);
this.inputSetMultiline(false);
if (hasImages) {
this.conversationHasImages = true;
this.conversationHasVideos = false;
}
if (hasVideos) {
this.conversationHasVideos = true;
this.conversationHasImages = false;
}
if (this.autoScrollEnabled) {
this.scrollToBottom();
@ -2669,6 +2716,24 @@ const appOptions = {
this.inputSetImagePickerOpen(false);
},
async openVideoPicker() {
if (this.currentModelKey !== 'kimi-k2.5') {
this.uiPushToast({
title: '当前模型不支持视频',
message: '请切换到 Kimi-k2.5 后再发送视频',
type: 'error'
});
return;
}
this.closeQuickMenu();
this.inputSetVideoPickerOpen(true);
await this.loadWorkspaceVideos();
},
closeVideoPicker() {
this.inputSetVideoPickerOpen(false);
},
async loadWorkspaceImages() {
this.imageLoading = true;
try {
@ -2746,6 +2811,83 @@ const appOptions = {
return results;
},
async fetchAllVideoEntries(startPath = '') {
const queue: string[] = [startPath || ''];
const visited = new Set<string>();
const results: Array<{ name: string; path: string }> = [];
const exts = new Set(['.mp4', '.mov', '.mkv', '.avi', '.webm']);
const maxFolders = 120;
while (queue.length && visited.size < maxFolders) {
const path = queue.shift() || '';
if (visited.has(path)) {
continue;
}
visited.add(path);
try {
const resp = await fetch(`/api/gui/files/entries?path=${encodeURIComponent(path)}`, {
method: 'GET',
credentials: 'include',
headers: { Accept: 'application/json' }
});
const data = await resp.json().catch(() => null);
if (!data?.success) {
continue;
}
const items = Array.isArray(data?.data?.items) ? data.data.items : [];
for (const item of items) {
const rawPath =
item?.path ||
[path, item?.name].filter(Boolean).join('/').replace(/\\/g, '/').replace(/\/{2,}/g, '/');
const type = String(item?.type || '').toLowerCase();
if (type === 'directory' || type === 'folder') {
queue.push(rawPath);
continue;
}
const ext =
String(item?.extension || '').toLowerCase() ||
(rawPath.includes('.') ? `.${rawPath.split('.').pop()?.toLowerCase()}` : '');
if (exts.has(ext)) {
results.push({
name: item?.name || rawPath.split('/').pop() || rawPath,
path: rawPath
});
if (results.length >= 200) {
return results;
}
}
}
} catch (error) {
console.warn('遍历文件夹失败', path, error);
}
}
return results;
},
async loadWorkspaceVideos() {
this.videoLoading = true;
try {
const entries = await this.fetchAllVideoEntries('');
this.videoEntries = entries;
if (!entries.length) {
this.uiPushToast({
title: '未找到视频',
message: '工作区内没有可用的视频文件',
type: 'info'
});
}
} catch (error) {
console.error('加载视频列表失败', error);
this.uiPushToast({
title: '加载视频失败',
message: error?.message || '请稍后重试',
type: 'error'
});
} finally {
this.videoLoading = false;
}
},
handleImagesConfirmed(list) {
this.inputSetSelectedImages(Array.isArray(list) ? list : []);
this.inputSetImagePickerOpen(false);
@ -2753,6 +2895,17 @@ const appOptions = {
handleRemoveImage(path) {
this.inputRemoveSelectedImage(path);
},
handleVideosConfirmed(list) {
const arr = Array.isArray(list) ? list.slice(0, 1) : [];
this.inputSetSelectedVideos(arr);
this.inputSetVideoPickerOpen(false);
if (arr.length) {
this.inputClearSelectedImages();
}
},
handleRemoveVideo(path) {
this.inputRemoveSelectedVideo(path);
},
handleQuickUpload() {
if (this.uploading || !this.isConnected) {

View File

@ -12,6 +12,9 @@
<div v-if="msg.images && msg.images.length" class="image-inline-row">
<span class="image-name" v-for="img in msg.images" :key="img">{{ formatImageName(img) }}</span>
</div>
<div v-if="msg.videos && msg.videos.length" class="image-inline-row video-inline-row">
<span class="image-name" v-for="video in msg.videos" :key="video">{{ formatImageName(video) }}</span>
</div>
</div>
</div>
<div v-else-if="msg.role === 'assistant'" class="assistant-message">

View File

@ -18,6 +18,12 @@
<button type="button" class="image-remove-btn" @click.stop="$emit('remove-image', img)">×</button>
</span>
</div>
<div v-if="selectedVideos && selectedVideos.length" class="image-inline-row video-inline-row">
<span class="image-name" v-for="video in selectedVideos" :key="video">
{{ formatImageName(video) }}
<button type="button" class="image-remove-btn" @click.stop="$emit('remove-video', video)">×</button>
</span>
</div>
<div class="input-row">
<button
type="button"
@ -46,7 +52,7 @@
:disabled="
!isConnected ||
(inputLocked && !streamingMessage) ||
((!(inputMessage || '').trim() && (!selectedImages || !selectedImages.length)) && !streamingMessage)
((!(inputMessage || '').trim() && (!selectedImages?.length && !selectedVideos?.length)) && !streamingMessage)
"
>
<span v-if="streamingMessage" class="stop-icon"></span>
@ -83,6 +89,7 @@
:block-conversation-review="blockConversationReview"
@quick-upload="triggerQuickUpload"
@pick-images="$emit('pick-images')"
@pick-video="$emit('pick-video')"
@toggle-tool-menu="$emit('toggle-tool-menu')"
@toggle-settings="$emit('toggle-settings')"
@toggle-mode-menu="$emit('toggle-mode-menu')"
@ -117,6 +124,7 @@ const emit = defineEmits([
'send-or-stop',
'quick-upload',
'pick-images',
'pick-video',
'toggle-tool-menu',
'toggle-mode-menu',
'toggle-model-menu',
@ -130,6 +138,7 @@ const emit = defineEmits([
'compress-conversation',
'file-selected',
'remove-image',
'remove-video',
'open-review'
]);
@ -157,6 +166,7 @@ const props = defineProps<{
modelOptions: Array<{ key: string; label: string; description: string; disabled?: boolean }>;
currentModelKey: string;
selectedImages?: string[];
selectedVideos?: string[];
blockUpload?: boolean;
blockToolToggle?: boolean;
blockRealtimeTerminal?: boolean;

View File

@ -26,6 +26,15 @@
>
发送图片
</button>
<button
v-if="currentModelKey === 'kimi-k2.5'"
type="button"
class="menu-entry"
@click.stop="$emit('pick-video')"
:disabled="!isConnected || streamingMessage"
>
发送视频
</button>
<button
type="button"
class="menu-entry has-submenu"
@ -166,6 +175,7 @@ defineEmits<{
(event: 'toggle-model-menu'): void;
(event: 'select-model', key: string): void;
(event: 'open-review'): void;
(event: 'pick-video'): void;
}>();
const runModeOptions = [

View File

@ -0,0 +1,231 @@
<template>
<transition name="overlay-fade">
<div v-if="open" class="image-picker-backdrop" @click.self="close">
<div class="image-picker-panel">
<div class="header">
<div class="title">选择视频一次最多 1 </div>
<button class="close-btn" @click="close">×</button>
</div>
<div class="body">
<div v-if="loading" class="loading">加载中...</div>
<div v-else-if="!videos.length" class="empty">未找到视频文件</div>
<div v-else class="grid">
<div
v-for="item in videos"
:key="item.path"
class="card"
:class="{ selected: selectedSet.has(item.path) }"
@click="toggle(item.path)"
:title="item.path"
>
<div class="video-thumb">
<span class="icon icon-sm">🎞</span>
<span class="ext">{{ fileExt(item.name) }}</span>
</div>
<div class="name">{{ item.name }}</div>
</div>
</div>
</div>
<div class="footer">
<div class="count">已选 {{ selectedSet.size }} / 1</div>
<div class="actions">
<button type="button" class="btn secondary" @click="close">取消</button>
<button type="button" class="btn primary" :disabled="!selectedSet.size" @click="confirm">确认</button>
</div>
</div>
</div>
</div>
</transition>
</template>
<script setup lang="ts">
import { computed, ref, watch, onMounted } from 'vue';
interface VideoEntry {
name: string;
path: string;
}
const props = defineProps<{
open: boolean;
entries: VideoEntry[];
initialSelected: string[];
loading: boolean;
}>();
const emit = defineEmits<{
(e: 'close'): void;
(e: 'confirm', list: string[]): void;
}>();
const selectedSet = ref<Set<string>>(new Set(props.initialSelected || []));
watch(
() => props.initialSelected,
(val) => {
selectedSet.value = new Set(val || []);
}
);
const videos = computed(() => props.entries || []);
const toggle = (path: string) => {
if (!path) return;
const set = new Set(selectedSet.value);
if (set.has(path)) {
set.delete(path);
} else {
set.clear();
set.add(path);
}
selectedSet.value = set;
};
const close = () => emit('close');
const confirm = () => emit('confirm', Array.from(selectedSet.value));
const fileExt = (name: string) => {
if (!name || !name.includes('.')) return '';
return name.split('.').pop()?.toLowerCase();
};
onMounted(() => {
selectedSet.value = new Set(props.initialSelected || []);
});
</script>
<style scoped>
.image-picker-backdrop {
position: fixed;
inset: 0;
background: rgba(0, 0, 0, 0.45);
display: flex;
align-items: center;
justify-content: center;
z-index: 1200;
}
.image-picker-panel {
width: min(780px, 92vw);
max-height: 88vh;
background: #0f1116;
color: #e8ecf2;
border: 1px solid #2a2f3a;
border-radius: 12px;
display: flex;
flex-direction: column;
box-shadow: 0 16px 40px rgba(0, 0, 0, 0.4);
}
.header {
display: flex;
align-items: center;
justify-content: space-between;
padding: 14px 16px;
border-bottom: 1px solid #1f2430;
}
.title {
font-weight: 600;
}
.close-btn {
background: transparent;
color: #9aa3b5;
border: none;
font-size: 20px;
cursor: pointer;
}
.body {
padding: 12px 16px;
overflow: auto;
flex: 1;
}
.grid {
display: grid;
grid-template-columns: repeat(auto-fill, minmax(180px, 1fr));
gap: 12px;
}
.card {
border: 1px solid #1f2430;
border-radius: 10px;
background: #151922;
cursor: pointer;
overflow: hidden;
display: flex;
flex-direction: column;
padding: 10px 10px 12px;
}
.video-thumb {
height: 110px;
border-radius: 8px;
background: linear-gradient(135deg, rgba(76, 166, 255, 0.12), rgba(76, 166, 255, 0.05));
border: 1px solid #243144;
display: flex;
align-items: center;
justify-content: center;
flex-direction: column;
color: #d9e6ff;
gap: 6px;
font-size: 28px;
}
.video-thumb .ext {
font-size: 12px;
color: #9fb7d8;
}
.card .name {
padding: 8px 2px 0;
font-size: 12px;
color: #c5ccda;
white-space: nowrap;
overflow: hidden;
text-overflow: ellipsis;
}
.card.selected {
border-color: #4ca6ff;
box-shadow: 0 0 0 2px rgba(76, 166, 255, 0.2);
}
.loading,
.empty {
padding: 40px 0;
text-align: center;
color: #9aa3b5;
}
.footer {
display: flex;
align-items: center;
justify-content: space-between;
padding: 12px 16px;
border-top: 1px solid #1f2430;
}
.actions {
display: flex;
gap: 10px;
}
.btn {
border: 1px solid #2f3645;
padding: 8px 14px;
border-radius: 8px;
background: #1b202c;
color: #e8ecf2;
cursor: pointer;
}
.btn.primary {
background: #4ca6ff;
border-color: #4ca6ff;
color: #0d1117;
}
.btn:disabled {
opacity: 0.5;
cursor: not-allowed;
}
.count {
font-size: 13px;
color: #9aa3b5;
}
@media (max-width: 640px) {
.grid {
grid-template-columns: repeat(auto-fill, minmax(150px, 1fr));
}
.video-thumb {
height: 90px;
}
}
</style>

View File

@ -164,11 +164,12 @@ export const useChatStore = defineStore('chat', {
this.currentMessageIndex = this.messages.length - 1;
return message;
},
addUserMessage(content: string, images: string[] = []) {
addUserMessage(content: string, images: string[] = [], videos: string[] = []) {
this.messages.push({
role: 'user',
content,
images
images,
videos
});
this.currentMessageIndex = -1;
},

View File

@ -10,6 +10,8 @@ interface InputState {
settingsOpen: boolean;
imagePickerOpen: boolean;
selectedImages: string[];
videoPickerOpen: boolean;
selectedVideos: string[];
}
export const useInputStore = defineStore('input', {
@ -22,7 +24,9 @@ export const useInputStore = defineStore('input', {
toolMenuOpen: false,
settingsOpen: false,
imagePickerOpen: false,
selectedImages: []
selectedImages: [],
videoPickerOpen: false,
selectedVideos: []
}),
actions: {
setInputMessage(value: string) {
@ -77,6 +81,9 @@ export const useInputStore = defineStore('input', {
setImagePickerOpen(open: boolean) {
this.imagePickerOpen = open;
},
setVideoPickerOpen(open: boolean) {
this.videoPickerOpen = open;
},
setSelectedImages(list: string[]) {
this.selectedImages = list.slice(0, 9);
},
@ -90,6 +97,19 @@ export const useInputStore = defineStore('input', {
},
clearSelectedImages() {
this.selectedImages = [];
},
setSelectedVideos(list: string[]) {
this.selectedVideos = list.slice(0, 1);
},
addSelectedVideo(path: string) {
if (!path) return;
this.selectedVideos = [path];
},
removeSelectedVideo(path: string) {
this.selectedVideos = this.selectedVideos.filter(item => item !== path);
},
clearSelectedVideos() {
this.selectedVideos = [];
}
}
});

View File

@ -70,7 +70,8 @@ export const TOOL_ICON_MAP = Object.freeze({
wait_sub_agent: 'clock',
web_search: 'search',
trigger_easter_egg: 'sparkles',
view_image: 'camera'
view_image: 'camera',
view_video: 'eye'
});
export const TOOL_CATEGORY_ICON_MAP = Object.freeze({

View File

@ -53,6 +53,7 @@ class ContextManager:
self.conversation_history = [] # 当前对话历史(内存中)
self.todo_list: Optional[Dict[str, Any]] = None
self.has_images: bool = False
self.has_videos: bool = False
self.image_compression_mode: str = "original"
# 对话元数据与项目快照缓存
self.conversation_metadata: Dict[str, Any] = {}
@ -324,7 +325,8 @@ class ContextManager:
run_mode=run_mode or ("thinking" if thinking_mode else "fast"),
initial_messages=[],
model_key=getattr(self.main_terminal, "model_key", None),
has_images=False
has_images=False,
has_videos=False
)
# 重置当前状态
@ -332,6 +334,7 @@ class ContextManager:
self.conversation_history = []
self.todo_list = None
self.has_images = False
self.has_videos = False
self.conversation_metadata = {}
self.project_snapshot = None
@ -395,6 +398,7 @@ class ContextManager:
run_mode = metadata.get("run_mode")
model_key = metadata.get("model_key")
self.has_images = metadata.get("has_images", False)
self.has_videos = metadata.get("has_videos", False)
if self.main_terminal:
try:
if model_key:
@ -483,7 +487,8 @@ class ContextManager:
thinking_mode=getattr(self.main_terminal, "thinking_mode", None) if hasattr(self, "main_terminal") else None,
run_mode=run_mode,
model_key=getattr(self.main_terminal, "model_key", None) if hasattr(self, "main_terminal") else None,
has_images=self.has_images
has_images=self.has_images,
has_videos=self.has_videos
)
if success:
@ -513,7 +518,8 @@ class ContextManager:
thinking_mode=getattr(self.main_terminal, "thinking_mode", None) if hasattr(self, "main_terminal") else None,
run_mode=run_mode,
model_key=model_key,
has_images=self.has_images
has_images=self.has_images,
has_videos=self.has_videos
)
# 静默保存,不输出日志
except Exception as e:
@ -785,7 +791,8 @@ class ContextManager:
name: Optional[str] = None,
metadata: Optional[Dict[str, Any]] = None,
reasoning_content: Optional[str] = None,
images: Optional[List[str]] = None
images: Optional[List[str]] = None,
videos: Optional[List[str]] = None
):
"""添加对话记录(改进版:集成自动保存 + 智能token统计"""
timestamp = datetime.now().isoformat()
@ -808,6 +815,9 @@ class ContextManager:
if images:
message["images"] = images
self.has_images = True
if videos:
message["videos"] = videos
self.has_videos = True
# 记录当前助手回复所用模型,便于回放时查看
if role == "assistant":
@ -1300,9 +1310,10 @@ class ContextManager:
except Exception:
return None
def _build_content_with_images(self, text: str, images: List[str]) -> Any:
"""将文本与图片路径组合成多模态content图片转换为data URI支持按设置压缩。"""
if not images:
def _build_content_with_images(self, text: str, images: List[str], videos: Optional[List[str]] = None) -> Any:
"""将文本与图片/视频路径组合成多模态content图片转换为data URI视频转换为 data URL。"""
videos = videos or []
if not images and not videos:
return text
parts: List[Dict[str, Any]] = []
if text:
@ -1323,6 +1334,22 @@ class ContextManager:
parts.append({"type": "image_url", "image_url": {"url": data_url}})
except Exception:
continue
for path in videos:
try:
abs_path = Path(self.project_path) / path
if not abs_path.exists() or not abs_path.is_file():
continue
if abs_path.stat().st_size > 50 * 1024 * 1024:
continue
mime, _ = mimetypes.guess_type(abs_path.name)
if not mime:
mime = "video/mp4"
data = abs_path.read_bytes()
b64 = base64.b64encode(data).decode("utf-8")
data_url = f"data:{mime};base64,{b64}"
parts.append({"type": "video_url", "video_url": {"url": data_url}})
except Exception:
continue
return parts if parts else text
def build_messages(self, context: Dict, user_input: str) -> List[Dict]:
@ -1377,7 +1404,8 @@ class ContextManager:
messages.append(message)
else:
images = conv.get("images") or (conv.get("metadata") or {}).get("images") or []
content_payload = self._build_content_with_images(conv["content"], images) if images else conv["content"]
videos = conv.get("videos") or (conv.get("metadata") or {}).get("videos") or []
content_payload = self._build_content_with_images(conv["content"], images, videos) if (images or videos) else conv["content"]
messages.append({
"role": conv["role"],
"content": content_payload

View File

@ -32,6 +32,7 @@ class ConversationMetadata:
run_mode: str = "fast"
model_key: Optional[str] = None
has_images: bool = False
has_videos: bool = False
status: str = "active" # active, archived, error
class ConversationManager:
@ -99,6 +100,7 @@ class ConversationManager:
"run_mode": metadata.get("run_mode") or ("thinking" if metadata.get("thinking_mode") else "fast"),
"model_key": metadata.get("model_key"),
"has_images": metadata.get("has_images", False),
"has_videos": metadata.get("has_videos", False),
"total_messages": metadata.get("total_messages", 0),
"total_tools": metadata.get("total_tools", 0),
"status": metadata.get("status", "active"),
@ -314,7 +316,8 @@ class ConversationManager:
run_mode: str = "fast",
initial_messages: List[Dict] = None,
model_key: Optional[str] = None,
has_images: bool = False
has_images: bool = False,
has_videos: bool = False
) -> str:
"""
创建新对话
@ -347,6 +350,8 @@ class ConversationManager:
"run_mode": normalized_mode,
"model_key": model_key,
"has_images": has_images,
"has_videos": has_videos,
"has_videos": has_videos,
# 首次对话尚未生成文件树快照,待首次用户消息时填充
"project_file_tree": None,
"project_statistics": None,
@ -440,6 +445,7 @@ class ConversationManager:
"run_mode": metadata.run_mode,
"model_key": conversation_data["metadata"].get("model_key"),
"has_images": conversation_data["metadata"].get("has_images", False),
"has_videos": conversation_data["metadata"].get("has_videos", False),
"total_messages": metadata.total_messages,
"total_tools": metadata.total_tools,
"status": metadata.status
@ -459,6 +465,7 @@ class ConversationManager:
todo_list: Optional[Dict] = None,
model_key: Optional[str] = None,
has_images: Optional[bool] = None,
has_videos: Optional[bool] = None,
project_file_tree: Optional[str] = None,
project_statistics: Optional[Dict] = None,
project_snapshot_at: Optional[str] = None
@ -529,6 +536,10 @@ class ConversationManager:
existing_data["metadata"]["has_images"] = bool(has_images)
elif "has_images" not in existing_data["metadata"]:
existing_data["metadata"]["has_images"] = False
if has_videos is not None:
existing_data["metadata"]["has_videos"] = bool(has_videos)
elif "has_videos" not in existing_data["metadata"]:
existing_data["metadata"]["has_videos"] = False
# 文件树快照(如果有新值则更新,若已有则保持)
if project_file_tree is not None:
existing_data["metadata"]["project_file_tree"] = project_file_tree