feat: add video send/view flow and guard model constraints

2026-01-30 17:04:33 +08:00 · 2026-01-30 17:04:33 +08:00 · bb91d22631
commit bb91d22631
parent 462b0ed6f3
16 changed files with 619 additions and 44 deletions
--- a/core/main_terminal.py
+++ b/core/main_terminal.py
@ -119,6 +119,7 @@ class MainTerminal:
        self.terminal_ops = TerminalOperator(project_path, container_session=container_session)
        self.ocr_client = OCRClient(project_path, self.file_manager)
        self.pending_image_view = None  # 供 view_image 工具使用，保存一次性图片插入请求
+        self.pending_video_view = None  # 供 view_video 工具使用，保存一次性视频插入请求
        
        # 新增：终端管理器
        self.terminal_manager = TerminalManager(
@ -1826,6 +1827,24 @@ class MainTerminal:
                    }
                }
            })
+            if getattr(self, "model_key", None) == "kimi-k2.5":
+                tools.append({
+                    "type": "function",
+                    "function": {
+                        "name": "view_video",
+                        "description": "将指定本地视频插入到对话中（系统代发一条包含视频的消息），便于模型查看视频内容。",
+                        "parameters": {
+                            "type": "object",
+                            "properties": self._inject_intent({
+                                "path": {
+                                    "type": "string",
+                                    "description": "项目内的视频相对路径（不要以 /workspace 开头），支持 mp4/mov/mkv/avi/webm。"
+                                }
+                            }),
+                            "required": ["path"]
+                        }
+                    }
+                })
        # 附加自定义工具（仅管理员可见）
        custom_tools = self._build_custom_tools()
        if custom_tools:
@ -1923,6 +1942,26 @@ class MainTerminal:
                    "path": str(path)
                }
                result = {"success": True, "message": "图片已请求插入到对话中，将在后续消息中呈现。", "path": path}
+            elif tool_name == "view_video":
+                path = (arguments.get("path") or "").strip()
+                if not path:
+                    return json.dumps({"success": False, "error": "path 不能为空"}, ensure_ascii=False)
+                if path.startswith("/workspace"):
+                    return json.dumps({"success": False, "error": "非法路径，超出项目根目录，请使用相对路径"}, ensure_ascii=False)
+                abs_path = (Path(self.context_manager.project_path) / path).resolve()
+                try:
+                    abs_path.relative_to(Path(self.context_manager.project_path).resolve())
+                except Exception:
+                    return json.dumps({"success": False, "error": "非法路径，超出项目根目录，请使用相对路径"}, ensure_ascii=False)
+                if not abs_path.exists() or not abs_path.is_file():
+                    return json.dumps({"success": False, "error": f"视频不存在: {path}"}, ensure_ascii=False)
+                allowed_ext = {".mp4", ".mov", ".mkv", ".avi", ".webm"}
+                if abs_path.suffix.lower() not in allowed_ext:
+                    return json.dumps({"success": False, "error": f"不支持的视频格式: {abs_path.suffix}"}, ensure_ascii=False)
+                if abs_path.stat().st_size > 50 * 1024 * 1024:
+                    return json.dumps({"success": False, "error": "视频过大，需 <= 50MB"}, ensure_ascii=False)
+                self.pending_video_view = {"path": str(path)}
+                result = {"success": True, "message": "视频已请求插入到对话中，将在后续消息中呈现。", "path": path}
            
            # 终端会话管理工具
            elif tool_name == "terminal_session":
@ -2511,9 +2550,10 @@ class MainTerminal:
            else:
                # User 或普通 System 消息
                images = conv.get("images") or metadata.get("images") or []
+                videos = conv.get("videos") or metadata.get("videos") or []
                content_payload = (
-                    self.context_manager._build_content_with_images(conv["content"], images)
-                    if images else conv["content"]
+                    self.context_manager._build_content_with_images(conv["content"], images, videos)
+                    if (images or videos) else conv["content"]
                )
                messages.append({
                    "role": conv["role"],
@ -2686,6 +2726,8 @@ class MainTerminal:
        profile = get_model_profile(model_key)
        if getattr(self.context_manager, "has_images", False) and model_key not in {"qwen3-vl-plus", "kimi-k2.5"}:
            raise ValueError("当前对话包含图片，仅支持 Qwen-VL 或 Kimi-k2.5")
+        if getattr(self.context_manager, "has_videos", False) and model_key != "kimi-k2.5":
+            raise ValueError("当前对话包含视频，仅支持 Kimi-k2.5")
        self.model_key = model_key
        self.model_profile = profile
        # 将模型标识传递给底层 API 客户端，便于按模型做兼容处理
--- a/core/web_terminal.py
+++ b/core/web_terminal.py
@ -321,6 +321,7 @@ class WebTerminal(MainTerminal):
            "run_mode": self.run_mode,
            "model_key": getattr(self, "model_key", None),
            "has_images": getattr(self.context_manager, "has_images", False),
+            "has_videos": getattr(self.context_manager, "has_videos", False),
            "context": {
                "usage_percent": context_status['usage_percent'],
                "total_size": context_status['sizes']['total'],
--- a/server/chat.py
+++ b/server/chat.py
@ -69,7 +69,9 @@ def update_thinking_mode(terminal: WebTerminal, workspace: UserWorkspace, userna
                    todo_list=ctx.todo_list,
                    thinking_mode=terminal.thinking_mode,
                    run_mode=terminal.run_mode,
-                    model_key=getattr(terminal, "model_key", None)
+                    model_key=getattr(terminal, "model_key", None),
+                    has_images=getattr(ctx, "has_images", False),
+                    has_videos=getattr(ctx, "has_videos", False)
                )
            except Exception as exc:
                print(f"[API] 保存思考模式到对话失败: {exc}")
@ -134,7 +136,8 @@ def update_model(terminal: WebTerminal, workspace: UserWorkspace, username: str)
                    thinking_mode=terminal.thinking_mode,
                    run_mode=terminal.run_mode,
                    model_key=terminal.model_key,
-                    has_images=getattr(ctx, "has_images", False)
+                    has_images=getattr(ctx, "has_images", False),
+                    has_videos=getattr(ctx, "has_videos", False)
                )
            except Exception as exc:
                print(f"[API] 保存模型到对话失败: {exc}")
--- a/server/chat_flow.py
+++ b/server/chat_flow.py
@ -380,14 +380,15 @@ def detect_tool_failure(result_data: Any) -> bool:
    return False


-def process_message_task(terminal: WebTerminal, message: str, images, sender, client_sid, workspace: UserWorkspace, username: str):
+def process_message_task(terminal: WebTerminal, message: str, images, sender, client_sid, workspace: UserWorkspace, username: str, videos=None):
    """在后台处理消息任务"""
+    videos = videos or []
    try:
        loop = asyncio.new_event_loop()
        asyncio.set_event_loop(loop)
        
        # 创建可取消的任务
-        task = loop.create_task(handle_task_with_sender(terminal, workspace, message, images, sender, client_sid, username))
+        task = loop.create_task(handle_task_with_sender(terminal, workspace, message, images, sender, client_sid, username, videos))
        
        entry = get_stop_flag(client_sid, username)
        if not isinstance(entry, dict):
@ -462,10 +463,11 @@ def detect_malformed_tool_call(text):
            
    return False

-async def handle_task_with_sender(terminal: WebTerminal, workspace: UserWorkspace, message, images, sender, client_sid, username: str):
+async def handle_task_with_sender(terminal: WebTerminal, workspace: UserWorkspace, message, images, sender, client_sid, username: str, videos=None):
    """处理任务并发送消息 - 集成token统计版本"""
    web_terminal = terminal
    conversation_id = getattr(web_terminal.context_manager, "current_conversation_id", None)
+    videos = videos or []
    
    # 如果是思考模式，重置状态
    if web_terminal.thinking_mode:
@ -478,7 +480,7 @@ async def handle_task_with_sender(terminal: WebTerminal, workspace: UserWorkspac
    # 添加到对话历史
    history_len_before = len(getattr(web_terminal.context_manager, "conversation_history", []) or [])
    is_first_user_message = history_len_before == 0
-    web_terminal.context_manager.add_conversation("user", message, images=images)
+    web_terminal.context_manager.add_conversation("user", message, images=images, videos=videos)
    
    if is_first_user_message and getattr(web_terminal, "context_manager", None):
        try:
@ -1178,6 +1180,7 @@ async def handle_task_with_sender(terminal: WebTerminal, workspace: UserWorkspac

        full_response = ""
        tool_calls = []
+        video_injections = []
        current_thinking = ""
        detected_tools = {}
        last_usage_payload = None
@ -2281,17 +2284,28 @@ async def handle_task_with_sender(terminal: WebTerminal, workspace: UserWorkspac
                "content": tool_result_content
            })

-            # 收集图片注入请求，延后统一追加
-            if (
+        # 收集图片/视频注入请求，延后统一追加
+        if (
                function_name == "view_image"
                and getattr(web_terminal, "pending_image_view", None)
                and not tool_failed
                and (isinstance(result_data, dict) and result_data.get("success") is not False)
-            ):
-                inj = web_terminal.pending_image_view
-                web_terminal.pending_image_view = None
-                if inj and inj.get("path"):
-                    image_injections.append(inj["path"])
+        ):
+            inj = web_terminal.pending_image_view
+            web_terminal.pending_image_view = None
+            if inj and inj.get("path"):
+                image_injections.append(inj["path"])
+
+        if (
+                function_name == "view_video"
+                and getattr(web_terminal, "pending_video_view", None)
+                and not tool_failed
+                and (isinstance(result_data, dict) and result_data.get("success") is not False)
+        ):
+            inj = web_terminal.pending_video_view
+            web_terminal.pending_video_view = None
+            if inj and inj.get("path"):
+                video_injections.append(inj["path"])

            if function_name not in {'write_file', 'edit_file'}:
                await process_sub_agent_updates(messages, inline=True, after_tool_call_id=tool_call_id)
@ -2304,7 +2318,7 @@ async def handle_task_with_sender(terminal: WebTerminal, workspace: UserWorkspac
        # 标记不再是第一次迭代
        is_first_iteration = False

-        # 统一附加图片消息，保证所有 tool 响应先完成
+        # 统一附加图片/视频消息，保证所有 tool 响应先完成
        if image_injections:
            for img_path in image_injections:
                injected_text = "这是一条系统控制发送的信息，并非用户主动发送，目的是返回你需要查看的图片。"
@ -2326,6 +2340,29 @@ async def handle_task_with_sender(terminal: WebTerminal, workspace: UserWorkspac
                sender('system_message', {
                    'content': f'系统已按模型请求插入图片: {img_path}'
                })
+
+        if video_injections:
+            for video_path in video_injections:
+                injected_text = "这是一条系统控制发送的信息，并非用户主动发送，目的是返回你需要查看的视频。"
+                web_terminal.context_manager.add_conversation(
+                    "user",
+                    injected_text,
+                    videos=[video_path],
+                    metadata={"system_injected_video": True}
+                )
+                content_payload = web_terminal.context_manager._build_content_with_images(
+                    injected_text,
+                    [],
+                    [video_path]
+                )
+                messages.append({
+                    "role": "user",
+                    "content": content_payload,
+                    "metadata": {"system_injected_video": True}
+                })
+                sender('system_message', {
+                    'content': f'系统已按模型请求插入视频: {video_path}'
+                })
    
    # 最终统计
    debug_log(f"\n{'='*40}")
@ -2345,7 +2382,7 @@ async def handle_task_with_sender(terminal: WebTerminal, workspace: UserWorkspac
    

 # === 统一对外入口 ===
-def start_chat_task(terminal, message: str, images: Any, sender, client_sid: str, workspace, username: str):
+def start_chat_task(terminal, message: str, images: Any, sender, client_sid: str, workspace, username: str, videos: Any = None):
    """在线程模式下启动对话任务，供 Socket 事件调用。"""
    return socketio.start_background_task(
        process_message_task,
@ -2356,9 +2393,10 @@ def start_chat_task(terminal, message: str, images: Any, sender, client_sid: str
        client_sid,
        workspace,
        username,
+        videos
    )


-def run_chat_task_sync(terminal, message: str, images: Any, sender, client_sid: str, workspace, username: str):
+def run_chat_task_sync(terminal, message: str, images: Any, sender, client_sid: str, workspace, username: str, videos: Any = None):
    """同步执行（测试/CLI 使用）。"""
-    return process_message_task(terminal, message, images, sender, client_sid, workspace, username)
+    return process_message_task(terminal, message, images, sender, client_sid, workspace, username, videos)
--- a/server/socket_handlers.py
+++ b/server/socket_handlers.py
@ -217,12 +217,19 @@ def handle_message(data):
    
    message = (data.get('message') or '').strip()
    images = data.get('images') or []
-    if not message and not images:
+    videos = data.get('videos') or []
+    if not message and not images and not videos:
        emit('error', {'message': '消息不能为空'})
        return
    if images and getattr(terminal, "model_key", None) not in {"qwen3-vl-plus", "kimi-k2.5"}:
        emit('error', {'message': '当前模型不支持图片，请切换到 Qwen-VL 或 Kimi-k2.5'})
        return
+    if videos and getattr(terminal, "model_key", None) != "kimi-k2.5":
+        emit('error', {'message': '当前模型不支持视频，请切换到 Kimi-k2.5'})
+        return
+    if images and videos:
+        emit('error', {'message': '图片和视频请分开发送'})
+        return
    
    print(f"[WebSocket] 收到消息: {message}")
    debug_log(f"\n{'='*80}\n新任务开始: {message}\n{'='*80}")
@ -285,7 +292,8 @@ def handle_message(data):

    # 传递客户端ID
    images = data.get('images') or []
-    start_chat_task(terminal, message, images, send_with_activity, client_sid, workspace, username)
+    videos = data.get('videos') or []
+    start_chat_task(terminal, message, images, send_with_activity, client_sid, workspace, username, videos)


@socketio.on('client_chunk_log')
--- a/static/src/App.vue
+++ b/static/src/App.vue
@ -251,6 +251,7 @@
              :icon-style="iconStyle"
              :tool-category-icon="toolCategoryIcon"
              :selected-images="selectedImages"
+              :selected-videos="selectedVideos"
              :block-upload="policyUiBlocks.block_upload"
              :block-tool-toggle="policyUiBlocks.block_tool_toggle"
              :block-realtime-terminal="policyUiBlocks.block_realtime_terminal"
@ -279,7 +280,9 @@
              @compress-conversation="handleCompressConversationClick"
              @file-selected="handleFileSelected"
              @pick-images="openImagePicker"
+              @pick-video="openVideoPicker"
              @remove-image="handleRemoveImage"
+              @remove-video="handleRemoveVideo"
              @open-review="openReviewDialog"
            />
          </div>
@ -309,6 +312,17 @@
          @confirm="handleImagesConfirmed"
        />
      </transition>
+      <transition name="overlay-fade">
+        <VideoPicker
+          v-if="videoPickerOpen"
+          :open="videoPickerOpen"
+          :entries="videoEntries"
+          :initial-selected="selectedVideos"
+          :loading="videoLoading"
+          @close="closeVideoPicker"
+          @confirm="handleVideosConfirmed"
+        />
+      </transition>
      <transition name="overlay-fade">
        <ConversationReviewDialog
          v-if="reviewDialogOpen"
@ -464,6 +478,7 @@
 <script setup lang="ts">
 import appOptions from './app';
 import VirtualMonitorSurface from './components/chat/VirtualMonitorSurface.vue';
+import VideoPicker from './components/overlay/VideoPicker.vue';

 const mobilePanelIcon = new URL('../icons/align-left.svg', import.meta.url).href;
 const mobileMenuIcons = {
--- a/static/src/app.ts
+++ b/static/src/app.ts
@ -285,7 +285,10 @@ const appOptions = {
                modelMenuOpen: false,
                imageEntries: [],
                imageLoading: false,
+                videoEntries: [],
+                videoLoading: false,
                conversationHasImages: false,
+                conversationHasVideos: false,
                conversationListRequestSeq: 0,
                conversationListRefreshToken: 0,

@ -429,7 +432,9 @@ const appOptions = {
                'toolMenuOpen',
                'settingsOpen',
                'imagePickerOpen',
-                'selectedImages'
+                'videoPickerOpen',
+                'selectedImages',
+                'selectedVideos'
            ]),
            resolvedRunMode() {
                const allowed = ['fast', 'thinking', 'deep'];
@ -819,7 +824,11 @@ const appOptions = {
                inputSetImagePickerOpen: 'setImagePickerOpen',
                inputSetSelectedImages: 'setSelectedImages',
                inputClearSelectedImages: 'clearSelectedImages',
-                inputRemoveSelectedImage: 'removeSelectedImage'
+                inputRemoveSelectedImage: 'removeSelectedImage',
+                inputSetVideoPickerOpen: 'setVideoPickerOpen',
+                inputSetSelectedVideos: 'setSelectedVideos',
+                inputClearSelectedVideos: 'clearSelectedVideos',
+                inputRemoveSelectedVideo: 'removeSelectedVideo'
            }),
            ...mapActions(useToolStore, {
                toolRegisterAction: 'registerToolAction',
@ -1553,6 +1562,9 @@ const appOptions = {
                if (status && typeof status.has_images !== 'undefined') {
                    this.conversationHasImages = !!status.has_images;
                }
+                if (status && typeof status.has_videos !== 'undefined') {
+                    this.conversationHasVideos = !!status.has_videos;
+                }
            },

            updateContainerStatus(status) {
@ -1879,12 +1891,13 @@ const appOptions = {
                
                let currentAssistantMessage = null;
                let historyHasImages = false;
+                let historyHasVideos = false;
                
                historyMessages.forEach((message, index) => {
                    debugLog(`处理消息 ${index + 1}/${historyMessages.length}:`, message.role, message);
                    const meta = message.metadata || {};
-                    if (message.role === 'user' && meta.system_injected_image) {
-                        debugLog('跳过系统代发的图片消息（仅用于模型查看，不在前端展示）');
+                    if (message.role === 'user' && (meta.system_injected_image || meta.system_injected_video)) {
+                        debugLog('跳过系统代发的图片/视频消息（仅用于模型查看，不在前端展示）');
                        return;
                    }
                    
@ -1895,13 +1908,18 @@ const appOptions = {
                            currentAssistantMessage = null;
                        }
                        const images = message.images || (message.metadata && message.metadata.images) || [];
+                        const videos = message.videos || (message.metadata && message.metadata.videos) || [];
                        if (Array.isArray(images) && images.length) {
                            historyHasImages = true;
                        }
+                        if (Array.isArray(videos) && videos.length) {
+                            historyHasVideos = true;
+                        }
                        this.messages.push({
                            role: 'user',
                            content: message.content || '',
-                            images
+                            images,
+                            videos
                        });
                        debugLog('添加用户消息:', message.content?.substring(0, 50) + '...');
                        
@ -2106,6 +2124,7 @@ const appOptions = {
                }

                this.conversationHasImages = historyHasImages;
+                this.conversationHasVideos = historyHasVideos;
                
                debugLog(`历史消息渲染完成，共 ${this.messages.length} 条消息`);
                this.logMessageState('renderHistoryMessages:after-render');
@ -2410,10 +2429,12 @@ const appOptions = {

                const text = (this.inputMessage || '').trim();
                const images = Array.isArray(this.selectedImages) ? this.selectedImages.slice(0, 9) : [];
+                const videos = Array.isArray(this.selectedVideos) ? this.selectedVideos.slice(0, 1) : [];
                const hasText = text.length > 0;
                const hasImages = images.length > 0;
+                const hasVideos = videos.length > 0;

-                if (!hasText && !hasImages) {
+                if (!hasText && !hasImages && !hasVideos) {
                    return;
                }

@ -2432,12 +2453,31 @@ const appOptions = {
                    return;
                }

+                if (hasVideos && this.currentModelKey !== 'kimi-k2.5') {
+                    this.uiPushToast({
+                        title: '当前模型不支持视频',
+                        message: '请切换到 Kimi-k2.5 后再发送视频',
+                        type: 'error'
+                    });
+                    return;
+                }
+
+                if (hasVideos && hasImages) {
+                    this.uiPushToast({
+                        title: '请勿同时发送',
+                        message: '视频与图片需分开发送，每条仅包含一种媒体',
+                        type: 'warning'
+                    });
+                    return;
+                }
+
                const message = text;
-                const isCommand = hasText && !hasImages && message.startsWith('/');
+                const isCommand = hasText && !hasImages && !hasVideos && message.startsWith('/');
                if (isCommand) {
                    this.socket.emit('send_command', { command: message });
                    this.inputClearMessage();
                    this.inputClearSelectedImages();
+                    this.inputClearSelectedVideos();
                    this.autoResizeInput();
                    return;
                }
@ -2454,18 +2494,25 @@ const appOptions = {
                
                // 标记任务进行中，直到任务完成或用户手动停止
                this.taskInProgress = true;
-                this.chatAddUserMessage(message, images);
-                this.socket.emit('send_message', { message: message, images, conversation_id: this.currentConversationId });
+                this.chatAddUserMessage(message, images, videos);
+                this.socket.emit('send_message', { message: message, images, videos, conversation_id: this.currentConversationId });
                if (typeof this.monitorShowPendingReply === 'function') {
                    this.monitorShowPendingReply();
                }
                this.inputClearMessage();
                this.inputClearSelectedImages();
+                this.inputClearSelectedVideos();
                this.inputSetImagePickerOpen(false);
+                this.inputSetVideoPickerOpen(false);
                this.inputSetLineCount(1);
                this.inputSetMultiline(false);
                if (hasImages) {
                    this.conversationHasImages = true;
+                    this.conversationHasVideos = false;
+                }
+                if (hasVideos) {
+                    this.conversationHasVideos = true;
+                    this.conversationHasImages = false;
                }
                if (this.autoScrollEnabled) {
                    this.scrollToBottom();
@ -2669,6 +2716,24 @@ const appOptions = {
                this.inputSetImagePickerOpen(false);
            },

+            async openVideoPicker() {
+                if (this.currentModelKey !== 'kimi-k2.5') {
+                    this.uiPushToast({
+                        title: '当前模型不支持视频',
+                        message: '请切换到 Kimi-k2.5 后再发送视频',
+                        type: 'error'
+                    });
+                    return;
+                }
+                this.closeQuickMenu();
+                this.inputSetVideoPickerOpen(true);
+                await this.loadWorkspaceVideos();
+            },
+
+            closeVideoPicker() {
+                this.inputSetVideoPickerOpen(false);
+            },
+
            async loadWorkspaceImages() {
                this.imageLoading = true;
                try {
@ -2746,6 +2811,83 @@ const appOptions = {
                return results;
            },

+            async fetchAllVideoEntries(startPath = '') {
+                const queue: string[] = [startPath || ''];
+                const visited = new Set<string>();
+                const results: Array<{ name: string; path: string }> = [];
+                const exts = new Set(['.mp4', '.mov', '.mkv', '.avi', '.webm']);
+                const maxFolders = 120;
+
+                while (queue.length && visited.size < maxFolders) {
+                    const path = queue.shift() || '';
+                    if (visited.has(path)) {
+                        continue;
+                    }
+                    visited.add(path);
+                    try {
+                        const resp = await fetch(`/api/gui/files/entries?path=${encodeURIComponent(path)}`, {
+                            method: 'GET',
+                            credentials: 'include',
+                            headers: { Accept: 'application/json' }
+                        });
+                        const data = await resp.json().catch(() => null);
+                        if (!data?.success) {
+                            continue;
+                        }
+                        const items = Array.isArray(data?.data?.items) ? data.data.items : [];
+                        for (const item of items) {
+                            const rawPath =
+                                item?.path ||
+                                [path, item?.name].filter(Boolean).join('/').replace(/\\/g, '/').replace(/\/{2,}/g, '/');
+                            const type = String(item?.type || '').toLowerCase();
+                            if (type === 'directory' || type === 'folder') {
+                                queue.push(rawPath);
+                                continue;
+                            }
+                            const ext =
+                                String(item?.extension || '').toLowerCase() ||
+                                (rawPath.includes('.') ? `.${rawPath.split('.').pop()?.toLowerCase()}` : '');
+                            if (exts.has(ext)) {
+                                results.push({
+                                    name: item?.name || rawPath.split('/').pop() || rawPath,
+                                    path: rawPath
+                                });
+                                if (results.length >= 200) {
+                                    return results;
+                                }
+                            }
+                        }
+                    } catch (error) {
+                        console.warn('遍历文件夹失败', path, error);
+                    }
+                }
+                return results;
+            },
+
+            async loadWorkspaceVideos() {
+                this.videoLoading = true;
+                try {
+                    const entries = await this.fetchAllVideoEntries('');
+                    this.videoEntries = entries;
+                    if (!entries.length) {
+                        this.uiPushToast({
+                            title: '未找到视频',
+                            message: '工作区内没有可用的视频文件',
+                            type: 'info'
+                        });
+                    }
+                } catch (error) {
+                    console.error('加载视频列表失败', error);
+                    this.uiPushToast({
+                        title: '加载视频失败',
+                        message: error?.message || '请稍后重试',
+                        type: 'error'
+                    });
+                } finally {
+                    this.videoLoading = false;
+                }
+            },
+
            handleImagesConfirmed(list) {
                this.inputSetSelectedImages(Array.isArray(list) ? list : []);
                this.inputSetImagePickerOpen(false);
@ -2753,6 +2895,17 @@ const appOptions = {
            handleRemoveImage(path) {
                this.inputRemoveSelectedImage(path);
            },
+            handleVideosConfirmed(list) {
+                const arr = Array.isArray(list) ? list.slice(0, 1) : [];
+                this.inputSetSelectedVideos(arr);
+                this.inputSetVideoPickerOpen(false);
+                if (arr.length) {
+                    this.inputClearSelectedImages();
+                }
+            },
+            handleRemoveVideo(path) {
+                this.inputRemoveSelectedVideo(path);
+            },

            handleQuickUpload() {
                if (this.uploading || !this.isConnected) {
--- a/static/src/components/chat/ChatArea.vue
+++ b/static/src/components/chat/ChatArea.vue
@ -12,6 +12,9 @@
            <div v-if="msg.images && msg.images.length" class="image-inline-row">
              <span class="image-name" v-for="img in msg.images" :key="img">{{ formatImageName(img) }}</span>
            </div>
+            <div v-if="msg.videos && msg.videos.length" class="image-inline-row video-inline-row">
+              <span class="image-name" v-for="video in msg.videos" :key="video">{{ formatImageName(video) }}</span>
+            </div>
          </div>
        </div>
        <div v-else-if="msg.role === 'assistant'" class="assistant-message">
--- a/static/src/components/input/InputComposer.vue
+++ b/static/src/components/input/InputComposer.vue
@ -18,6 +18,12 @@
              <button type="button" class="image-remove-btn" @click.stop="$emit('remove-image', img)">×</button>
            </span>
          </div>
+          <div v-if="selectedVideos && selectedVideos.length" class="image-inline-row video-inline-row">
+            <span class="image-name" v-for="video in selectedVideos" :key="video">
+              {{ formatImageName(video) }}
+              <button type="button" class="image-remove-btn" @click.stop="$emit('remove-video', video)">×</button>
+            </span>
+          </div>
          <div class="input-row">
            <button
              type="button"
@ -46,7 +52,7 @@
              :disabled="
                !isConnected ||
                (inputLocked && !streamingMessage) ||
-                ((!(inputMessage || '').trim() && (!selectedImages || !selectedImages.length)) && !streamingMessage)
+                ((!(inputMessage || '').trim() && (!selectedImages?.length && !selectedVideos?.length)) && !streamingMessage)
              "
            >
              <span v-if="streamingMessage" class="stop-icon"></span>
@ -83,6 +89,7 @@
        :block-conversation-review="blockConversationReview"
        @quick-upload="triggerQuickUpload"
        @pick-images="$emit('pick-images')"
+        @pick-video="$emit('pick-video')"
        @toggle-tool-menu="$emit('toggle-tool-menu')"
        @toggle-settings="$emit('toggle-settings')"
        @toggle-mode-menu="$emit('toggle-mode-menu')"
@ -117,6 +124,7 @@ const emit = defineEmits([
  'send-or-stop',
  'quick-upload',
  'pick-images',
+  'pick-video',
  'toggle-tool-menu',
  'toggle-mode-menu',
  'toggle-model-menu',
@ -130,6 +138,7 @@ const emit = defineEmits([
  'compress-conversation',
  'file-selected',
  'remove-image',
+  'remove-video',
  'open-review'
 ]);

@ -157,6 +166,7 @@ const props = defineProps<{
  modelOptions: Array<{ key: string; label: string; description: string; disabled?: boolean }>;
  currentModelKey: string;
  selectedImages?: string[];
+  selectedVideos?: string[];
  blockUpload?: boolean;
  blockToolToggle?: boolean;
  blockRealtimeTerminal?: boolean;
--- a/static/src/components/input/QuickMenu.vue
+++ b/static/src/components/input/QuickMenu.vue
@ -26,6 +26,15 @@
      >
        发送图片
      </button>
+      <button
+        v-if="currentModelKey === 'kimi-k2.5'"
+        type="button"
+        class="menu-entry"
+        @click.stop="$emit('pick-video')"
+        :disabled="!isConnected || streamingMessage"
+      >
+        发送视频
+      </button>
      <button
        type="button"
        class="menu-entry has-submenu"
@ -166,6 +175,7 @@ defineEmits<{
  (event: 'toggle-model-menu'): void;
  (event: 'select-model', key: string): void;
  (event: 'open-review'): void;
+  (event: 'pick-video'): void;
 }>();

 const runModeOptions = [
--- a/static/src/components/overlay/VideoPicker.vue
+++ b/static/src/components/overlay/VideoPicker.vue
@ -0,0 +1,231 @@
+<template>
+  <transition name="overlay-fade">
+    <div v-if="open" class="image-picker-backdrop" @click.self="close">
+      <div class="image-picker-panel">
+        <div class="header">
+          <div class="title">选择视频（一次最多 1 个）</div>
+          <button class="close-btn" @click="close">×</button>
+        </div>
+        <div class="body">
+          <div v-if="loading" class="loading">加载中...</div>
+          <div v-else-if="!videos.length" class="empty">未找到视频文件</div>
+          <div v-else class="grid">
+            <div
+              v-for="item in videos"
+              :key="item.path"
+              class="card"
+              :class="{ selected: selectedSet.has(item.path) }"
+              @click="toggle(item.path)"
+              :title="item.path"
+            >
+              <div class="video-thumb">
+                <span class="icon icon-sm">🎞️</span>
+                <span class="ext">{{ fileExt(item.name) }}</span>
+              </div>
+              <div class="name">{{ item.name }}</div>
+            </div>
+          </div>
+        </div>
+        <div class="footer">
+          <div class="count">已选 {{ selectedSet.size }} / 1</div>
+          <div class="actions">
+            <button type="button" class="btn secondary" @click="close">取消</button>
+            <button type="button" class="btn primary" :disabled="!selectedSet.size" @click="confirm">确认</button>
+          </div>
+        </div>
+      </div>
+    </div>
+  </transition>
+</template>
+
+<script setup lang="ts">
+import { computed, ref, watch, onMounted } from 'vue';
+
+interface VideoEntry {
+  name: string;
+  path: string;
+}
+
+const props = defineProps<{
+  open: boolean;
+  entries: VideoEntry[];
+  initialSelected: string[];
+  loading: boolean;
+}>();
+
+const emit = defineEmits<{
+  (e: 'close'): void;
+  (e: 'confirm', list: string[]): void;
+}>();
+
+const selectedSet = ref<Set<string>>(new Set(props.initialSelected || []));
+
+watch(
+  () => props.initialSelected,
+  (val) => {
+    selectedSet.value = new Set(val || []);
+  }
+);
+
+const videos = computed(() => props.entries || []);
+
+const toggle = (path: string) => {
+  if (!path) return;
+  const set = new Set(selectedSet.value);
+  if (set.has(path)) {
+    set.delete(path);
+  } else {
+    set.clear();
+    set.add(path);
+  }
+  selectedSet.value = set;
+};
+
+const close = () => emit('close');
+
+const confirm = () => emit('confirm', Array.from(selectedSet.value));
+
+const fileExt = (name: string) => {
+  if (!name || !name.includes('.')) return '';
+  return name.split('.').pop()?.toLowerCase();
+};
+
+onMounted(() => {
+  selectedSet.value = new Set(props.initialSelected || []);
+});
+</script>
+
+<style scoped>
+.image-picker-backdrop {
+  position: fixed;
+  inset: 0;
+  background: rgba(0, 0, 0, 0.45);
+  display: flex;
+  align-items: center;
+  justify-content: center;
+  z-index: 1200;
+}
+.image-picker-panel {
+  width: min(780px, 92vw);
+  max-height: 88vh;
+  background: #0f1116;
+  color: #e8ecf2;
+  border: 1px solid #2a2f3a;
+  border-radius: 12px;
+  display: flex;
+  flex-direction: column;
+  box-shadow: 0 16px 40px rgba(0, 0, 0, 0.4);
+}
+.header {
+  display: flex;
+  align-items: center;
+  justify-content: space-between;
+  padding: 14px 16px;
+  border-bottom: 1px solid #1f2430;
+}
+.title {
+  font-weight: 600;
+}
+.close-btn {
+  background: transparent;
+  color: #9aa3b5;
+  border: none;
+  font-size: 20px;
+  cursor: pointer;
+}
+.body {
+  padding: 12px 16px;
+  overflow: auto;
+  flex: 1;
+}
+.grid {
+  display: grid;
+  grid-template-columns: repeat(auto-fill, minmax(180px, 1fr));
+  gap: 12px;
+}
+.card {
+  border: 1px solid #1f2430;
+  border-radius: 10px;
+  background: #151922;
+  cursor: pointer;
+  overflow: hidden;
+  display: flex;
+  flex-direction: column;
+  padding: 10px 10px 12px;
+}
+.video-thumb {
+  height: 110px;
+  border-radius: 8px;
+  background: linear-gradient(135deg, rgba(76, 166, 255, 0.12), rgba(76, 166, 255, 0.05));
+  border: 1px solid #243144;
+  display: flex;
+  align-items: center;
+  justify-content: center;
+  flex-direction: column;
+  color: #d9e6ff;
+  gap: 6px;
+  font-size: 28px;
+}
+.video-thumb .ext {
+  font-size: 12px;
+  color: #9fb7d8;
+}
+.card .name {
+  padding: 8px 2px 0;
+  font-size: 12px;
+  color: #c5ccda;
+  white-space: nowrap;
+  overflow: hidden;
+  text-overflow: ellipsis;
+}
+.card.selected {
+  border-color: #4ca6ff;
+  box-shadow: 0 0 0 2px rgba(76, 166, 255, 0.2);
+}
+.loading,
+.empty {
+  padding: 40px 0;
+  text-align: center;
+  color: #9aa3b5;
+}
+.footer {
+  display: flex;
+  align-items: center;
+  justify-content: space-between;
+  padding: 12px 16px;
+  border-top: 1px solid #1f2430;
+}
+.actions {
+  display: flex;
+  gap: 10px;
+}
+.btn {
+  border: 1px solid #2f3645;
+  padding: 8px 14px;
+  border-radius: 8px;
+  background: #1b202c;
+  color: #e8ecf2;
+  cursor: pointer;
+}
+.btn.primary {
+  background: #4ca6ff;
+  border-color: #4ca6ff;
+  color: #0d1117;
+}
+.btn:disabled {
+  opacity: 0.5;
+  cursor: not-allowed;
+}
+.count {
+  font-size: 13px;
+  color: #9aa3b5;
+}
+@media (max-width: 640px) {
+  .grid {
+    grid-template-columns: repeat(auto-fill, minmax(150px, 1fr));
+  }
+  .video-thumb {
+    height: 90px;
+  }
+}
+</style>
--- a/static/src/stores/chat.ts
+++ b/static/src/stores/chat.ts
@ -164,11 +164,12 @@ export const useChatStore = defineStore('chat', {
      this.currentMessageIndex = this.messages.length - 1;
      return message;
    },
-    addUserMessage(content: string, images: string[] = []) {
+    addUserMessage(content: string, images: string[] = [], videos: string[] = []) {
      this.messages.push({
        role: 'user',
        content,
-        images
+        images,
+        videos
      });
      this.currentMessageIndex = -1;
    },
--- a/static/src/stores/input.ts
+++ b/static/src/stores/input.ts
@ -10,6 +10,8 @@ interface InputState {
  settingsOpen: boolean;
  imagePickerOpen: boolean;
  selectedImages: string[];
+  videoPickerOpen: boolean;
+  selectedVideos: string[];
 }

 export const useInputStore = defineStore('input', {
@ -22,7 +24,9 @@ export const useInputStore = defineStore('input', {
    toolMenuOpen: false,
    settingsOpen: false,
    imagePickerOpen: false,
-    selectedImages: []
+    selectedImages: [],
+    videoPickerOpen: false,
+    selectedVideos: []
  }),
  actions: {
    setInputMessage(value: string) {
@ -77,6 +81,9 @@ export const useInputStore = defineStore('input', {
    setImagePickerOpen(open: boolean) {
      this.imagePickerOpen = open;
    },
+    setVideoPickerOpen(open: boolean) {
+      this.videoPickerOpen = open;
+    },
    setSelectedImages(list: string[]) {
      this.selectedImages = list.slice(0, 9);
    },
@ -90,6 +97,19 @@ export const useInputStore = defineStore('input', {
    },
    clearSelectedImages() {
      this.selectedImages = [];
+    },
+    setSelectedVideos(list: string[]) {
+      this.selectedVideos = list.slice(0, 1);
+    },
+    addSelectedVideo(path: string) {
+      if (!path) return;
+      this.selectedVideos = [path];
+    },
+    removeSelectedVideo(path: string) {
+      this.selectedVideos = this.selectedVideos.filter(item => item !== path);
+    },
+    clearSelectedVideos() {
+      this.selectedVideos = [];
    }
  }
 });
--- a/static/src/utils/icons.ts
+++ b/static/src/utils/icons.ts
@ -70,7 +70,8 @@ export const TOOL_ICON_MAP = Object.freeze({
  wait_sub_agent: 'clock',
  web_search: 'search',
  trigger_easter_egg: 'sparkles',
-  view_image: 'camera'
+  view_image: 'camera',
+  view_video: 'eye'
 });

 export const TOOL_CATEGORY_ICON_MAP = Object.freeze({
--- a/utils/context_manager.py
+++ b/utils/context_manager.py
@ -53,6 +53,7 @@ class ContextManager:
        self.conversation_history = []  # 当前对话历史（内存中）
        self.todo_list: Optional[Dict[str, Any]] = None
        self.has_images: bool = False
+        self.has_videos: bool = False
        self.image_compression_mode: str = "original"
        # 对话元数据与项目快照缓存
        self.conversation_metadata: Dict[str, Any] = {}
@ -324,7 +325,8 @@ class ContextManager:
            run_mode=run_mode or ("thinking" if thinking_mode else "fast"),
            initial_messages=[],
            model_key=getattr(self.main_terminal, "model_key", None),
-            has_images=False
+            has_images=False,
+            has_videos=False
        )
        
        # 重置当前状态
@ -332,6 +334,7 @@ class ContextManager:
        self.conversation_history = []
        self.todo_list = None
        self.has_images = False
+        self.has_videos = False
        self.conversation_metadata = {}
        self.project_snapshot = None
        
@ -395,6 +398,7 @@ class ContextManager:
        run_mode = metadata.get("run_mode")
        model_key = metadata.get("model_key")
        self.has_images = metadata.get("has_images", False)
+        self.has_videos = metadata.get("has_videos", False)
        if self.main_terminal:
            try:
                if model_key:
@ -483,7 +487,8 @@ class ContextManager:
                thinking_mode=getattr(self.main_terminal, "thinking_mode", None) if hasattr(self, "main_terminal") else None,
                run_mode=run_mode,
                model_key=getattr(self.main_terminal, "model_key", None) if hasattr(self, "main_terminal") else None,
-                has_images=self.has_images
+                has_images=self.has_images,
+                has_videos=self.has_videos
            )
            
            if success:
@ -513,7 +518,8 @@ class ContextManager:
                thinking_mode=getattr(self.main_terminal, "thinking_mode", None) if hasattr(self, "main_terminal") else None,
                run_mode=run_mode,
                model_key=model_key,
-                has_images=self.has_images
+                has_images=self.has_images,
+                has_videos=self.has_videos
            )
            # 静默保存，不输出日志
        except Exception as e:
@ -785,7 +791,8 @@ class ContextManager:
        name: Optional[str] = None,
        metadata: Optional[Dict[str, Any]] = None,
        reasoning_content: Optional[str] = None,
-        images: Optional[List[str]] = None
+        images: Optional[List[str]] = None,
+        videos: Optional[List[str]] = None
    ):
        """添加对话记录（改进版：集成自动保存 + 智能token统计）"""
        timestamp = datetime.now().isoformat()
@ -808,6 +815,9 @@ class ContextManager:
        if images:
            message["images"] = images
            self.has_images = True
+        if videos:
+            message["videos"] = videos
+            self.has_videos = True
        
        # 记录当前助手回复所用模型，便于回放时查看
        if role == "assistant":
@ -1300,9 +1310,10 @@ class ContextManager:
        except Exception:
            return None

-    def _build_content_with_images(self, text: str, images: List[str]) -> Any:
-        """将文本与图片路径组合成多模态content，图片转换为data URI，支持按设置压缩。"""
-        if not images:
+    def _build_content_with_images(self, text: str, images: List[str], videos: Optional[List[str]] = None) -> Any:
+        """将文本与图片/视频路径组合成多模态content，图片转换为data URI，视频转换为 data URL。"""
+        videos = videos or []
+        if not images and not videos:
            return text
        parts: List[Dict[str, Any]] = []
        if text:
@ -1323,6 +1334,22 @@ class ContextManager:
                parts.append({"type": "image_url", "image_url": {"url": data_url}})
            except Exception:
                continue
+        for path in videos:
+            try:
+                abs_path = Path(self.project_path) / path
+                if not abs_path.exists() or not abs_path.is_file():
+                    continue
+                if abs_path.stat().st_size > 50 * 1024 * 1024:
+                    continue
+                mime, _ = mimetypes.guess_type(abs_path.name)
+                if not mime:
+                    mime = "video/mp4"
+                data = abs_path.read_bytes()
+                b64 = base64.b64encode(data).decode("utf-8")
+                data_url = f"data:{mime};base64,{b64}"
+                parts.append({"type": "video_url", "video_url": {"url": data_url}})
+            except Exception:
+                continue
        return parts if parts else text

    def build_messages(self, context: Dict, user_input: str) -> List[Dict]:
@ -1377,7 +1404,8 @@ class ContextManager:
                messages.append(message)
            else:
                images = conv.get("images") or (conv.get("metadata") or {}).get("images") or []
-                content_payload = self._build_content_with_images(conv["content"], images) if images else conv["content"]
+                videos = conv.get("videos") or (conv.get("metadata") or {}).get("videos") or []
+                content_payload = self._build_content_with_images(conv["content"], images, videos) if (images or videos) else conv["content"]
                messages.append({
                    "role": conv["role"],
                    "content": content_payload
--- a/utils/conversation_manager.py
+++ b/utils/conversation_manager.py
@ -32,6 +32,7 @@ class ConversationMetadata:
    run_mode: str = "fast"
    model_key: Optional[str] = None
    has_images: bool = False
+    has_videos: bool = False
    status: str = "active"  # active, archived, error

 class ConversationManager:
@ -99,6 +100,7 @@ class ConversationManager:
                "run_mode": metadata.get("run_mode") or ("thinking" if metadata.get("thinking_mode") else "fast"),
                "model_key": metadata.get("model_key"),
                "has_images": metadata.get("has_images", False),
+                "has_videos": metadata.get("has_videos", False),
                "total_messages": metadata.get("total_messages", 0),
                "total_tools": metadata.get("total_tools", 0),
                "status": metadata.get("status", "active"),
@ -314,7 +316,8 @@ class ConversationManager:
        run_mode: str = "fast",
        initial_messages: List[Dict] = None,
        model_key: Optional[str] = None,
-        has_images: bool = False
+        has_images: bool = False,
+        has_videos: bool = False
    ) -> str:
        """
        创建新对话
@ -347,6 +350,8 @@ class ConversationManager:
                "run_mode": normalized_mode,
                "model_key": model_key,
                "has_images": has_images,
+                "has_videos": has_videos,
+                "has_videos": has_videos,
                # 首次对话尚未生成文件树快照，待首次用户消息时填充
                "project_file_tree": None,
                "project_statistics": None,
@ -440,6 +445,7 @@ class ConversationManager:
                "run_mode": metadata.run_mode,
                "model_key": conversation_data["metadata"].get("model_key"),
                "has_images": conversation_data["metadata"].get("has_images", False),
+                "has_videos": conversation_data["metadata"].get("has_videos", False),
                "total_messages": metadata.total_messages,
                "total_tools": metadata.total_tools,
                "status": metadata.status
@ -459,6 +465,7 @@ class ConversationManager:
        todo_list: Optional[Dict] = None,
        model_key: Optional[str] = None,
        has_images: Optional[bool] = None,
+        has_videos: Optional[bool] = None,
        project_file_tree: Optional[str] = None,
        project_statistics: Optional[Dict] = None,
        project_snapshot_at: Optional[str] = None
@ -529,6 +536,10 @@ class ConversationManager:
                existing_data["metadata"]["has_images"] = bool(has_images)
            elif "has_images" not in existing_data["metadata"]:
                existing_data["metadata"]["has_images"] = False
+            if has_videos is not None:
+                existing_data["metadata"]["has_videos"] = bool(has_videos)
+            elif "has_videos" not in existing_data["metadata"]:
+                existing_data["metadata"]["has_videos"] = False
            # 文件树快照（如果有新值则更新，若已有则保持）
            if project_file_tree is not None:
                existing_data["metadata"]["project_file_tree"] = project_file_tree