fix: expand workspace file access and paginate convo index

2026-01-25 16:13:32 +08:00 · 2026-01-25 16:13:32 +08:00 · d0197c38c3
commit d0197c38c3
parent f7034a3047
10 changed files with 170 additions and 63 deletions
--- a/api_doc/README.md
+++ b/api_doc/README.md
@ -26,7 +26,7 @@
 4. 发送消息/启动后台任务（工作区内）：`POST /api/v1/workspaces/{workspace_id}/messages`
 5. 轮询任务事件：`GET /api/v1/tasks/{task_id}?from=<offset>`
 6. 停止任务：`POST /api/v1/tasks/{task_id}/cancel`
-7. 文件（工作区内，仅 user_upload）：`POST /api/v1/workspaces/{workspace_id}/files/upload`、`GET /api/v1/workspaces/{workspace_id}/files`、`GET /api/v1/workspaces/{workspace_id}/files/download`
+7. 文件：上传仅限 `user_upload/`，但列目录/下载可遍历整个 `project/`：`POST /api/v1/workspaces/{workspace_id}/files/upload`、`GET /api/v1/workspaces/{workspace_id}/files`、`GET /api/v1/workspaces/{workspace_id}/files/download`
 8. Prompt 管理（用户级共享）：`GET/POST /api/v1/prompts`、`GET /api/v1/prompts/{name}`
 9. 个性化管理（用户级共享）：`GET/POST /api/v1/personalizations`、`GET /api/v1/personalizations/{name}`
 10. 模型列表与健康检查：`GET /api/v1/models`、`GET /api/v1/health`
--- a/api_doc/files.md
+++ b/api_doc/files.md
@ -1,6 +1,6 @@
-# 文件 API（工作区内，仅 user_upload）
+# 文件 API（工作区内：上传仅限 user_upload，读取可遍历 project）

-文件读写是**工作区级**的：上传/列目录/下载都发生在指定 workspace 的 `project/user_upload/` 下。
+文件读写是**工作区级**的：上传仅允许写入该 workspace 的 `project/user_upload/`；列目录/下载则可访问整个 `project/`（对应容器内 `/workspace`），兼容传入 `user_upload/` 路径。

 鉴权：所有接口均需要 `Authorization: Bearer <TOKEN>`

@ -12,7 +12,7 @@

 说明：

- 不指定“任意路径上传”，只能上传到该 workspace 的 `user_upload` 目录（以及其子目录）。
+- 仅可上传到 workspace 的 `user_upload/` 目录（及其子目录）；路径越界将被拒绝。

 请求（multipart/form-data）：

@ -21,7 +21,7 @@
 - Form：
  - `file`：文件本体（必填）
  - `filename`：可选，自定义文件名（服务端会清洗）
-  - `dir`：可选，user_upload 下子目录（如 `inputs` / `a/b`）
+- `dir`：可选，`user_upload` 下子目录（如 `inputs` / `a/b`）

 成功响应（200）：

@ -44,7 +44,7 @@

 说明：

- `path` 是 `user_upload` 下的相对路径；不传或传空则表示根目录。
+- `path` 是 `project/` 下的相对路径；不传或传空则表示 `project` 根目录。兼容传入 `user_upload/...`。

 成功响应（200）：

@ -73,7 +73,7 @@

 说明：

- `path` 是 `user_upload` 下相对路径
+- `path` 是 `project/` 下相对路径（兼容传入 `user_upload/...`）
 - 若 `path` 是文件：直接下载该文件
 - 若 `path` 是目录：服务端打包为 zip 返回

@ -111,4 +111,3 @@ curl -L -o inputs.zip \
  -H "Authorization: Bearer <TOKEN>" \
  "https://agent.cyjai.com/api/v1/workspaces/ws1/files/download?path=inputs"
 ```
-
--- a/api_doc/openapi.yaml
+++ b/api_doc/openapi.yaml
@ -934,7 +934,7 @@ paths:

  /api/v1/workspaces/{workspace_id}/files:
    get:
-      summary: 列出 user_upload 目录内容
+      summary: 列出 project 目录内容
      security: [{ bearerAuth: [] }]
      parameters:
        - in: path
--- a/core/main_terminal.py
+++ b/core/main_terminal.py
@ -1496,7 +1496,7 @@ class MainTerminal:
 	                "type": "function",
 	                "function": {
 	                    "name": "terminal_input",
-	                    "description": "向活动终端发送命令或输入。禁止启动会占用终端界面的程序（python/node/nano/vim 等）；如遇卡死请结合 terminal_snapshot 并使用 terminal_reset 恢复。必须提供 timeout；一旦超时，当前命令**一定会被打断**且无法继续执行（需要重新运行），终端会话本身保持可用。若不确定上一条命令是否结束，先用 terminal_snapshot 确认后再继续输入。",
+	                    "description": "向活动终端发送命令或输入。禁止启动会占用终端界面的程序（python/node/nano/vim 等）；如遇卡死请结合 terminal_snapshot 并使用 terminal_reset 恢复。timeout 可填秒数（最大300，超时会强制打断命令）或填 never（不封装超时、不杀进程，可能无输出，无法仅靠快照判断是否成功，需要用 curl/ps 等主动检查）。若不确定上一条命令是否结束，先用 terminal_snapshot 确认后再继续输入。",
 	                    "parameters": {
 	                        "type": "object",
 	                        "properties": self._inject_intent({
@ -1509,8 +1509,8 @@ class MainTerminal:
                                "description": "目标终端会话名称（可选，默认使用活动终端）"
                            },
                            "timeout": {
-                                "type": "number",
-                                "description": "等待输出的最长秒数，必填，最大300"
+                                "type": ["number", "string"],
+                                "description": "等待输出的最长秒数，必填，最大300，或填 never 表示不封装超时且不中断进程"
                            }
                        }),
                        "required": ["command", "timeout"]
--- a/modules/api_user_manager.py
+++ b/modules/api_user_manager.py
@ -268,8 +268,9 @@ class ApiUserManager:
            project_path = p / "project"
            result[ws_id] = {
                "workspace_id": ws_id,
-                "project_path": str(project_path),
-                "data_dir": str(data_dir),
+                # 不暴露宿主机绝对路径，只返回相对工作区的信息
+                "project_path": "project",
+                "data_dir": "data",
                "has_conversations": (data_dir / "conversations").exists(),
            }
        return result
--- a/modules/terminal_manager.py
+++ b/modules/terminal_manager.py
@ -523,29 +523,57 @@ class TerminalManager:
        
        # 发送命令
        terminal = self.terminals[target_session]
-        if timeout is None or timeout <= 0:
-            return {
-                "success": False,
-                "error": "timeout 参数必填且需大于0",
-                "status": "error",
-                "output": "timeout 参数缺失"
-            }
-        timeout = min(timeout, 300)
+        never_timeout = False
+        if isinstance(timeout, str):
+            if timeout.lower() == "never":
+                never_timeout = True
+            else:
+                try:
+                    timeout = float(timeout)
+                except (TypeError, ValueError):
+                    return {
+                        "success": False,
+                        "error": "timeout 参数必须是数字或 'never'",
+                        "status": "error",
+                        "output": "timeout 参数无效"
+                    }

-        base_timeout = timeout
-        marker = f"__CMD_DONE__{int(time.time()*1000)}__"
+        if not never_timeout:
+            if timeout is None or timeout <= 0:
+                return {
+                    "success": False,
+                    "error": "timeout 参数必填且需大于0，或设置为 'never'",
+                    "status": "error",
+                    "output": "timeout 参数缺失"
+                }
+            timeout = min(timeout, 300)

-        wrapped_command, wait_timeout = self._build_wrapped_command(command, marker, timeout)
+            base_timeout = timeout
+            marker = f"__CMD_DONE__{int(time.time()*1000)}__"

+            wrapped_command, wait_timeout = self._build_wrapped_command(command, marker, timeout)
+
+            result = terminal.send_command(
+                wrapped_command,
+                timeout=wait_timeout,
+                timeout_cutoff=base_timeout,
+                enforce_full_timeout=True,
+                sentinel=marker,
+            )
+            result["timeout"] = base_timeout
+            result["never_timeout"] = False
+            return result
+
+        # never_timeout 分支：不包装命令，不发送结束标记，不强杀进程
        result = terminal.send_command(
-            wrapped_command,
-            timeout=wait_timeout,
-            timeout_cutoff=base_timeout,
-            enforce_full_timeout=True,
-            sentinel=marker,
+            command,
+            timeout=None,
+            timeout_cutoff=None,
+            enforce_full_timeout=False,
+            sentinel=None,
        )
-        result["timeout"] = base_timeout
-
+        result["timeout"] = "never"
+        result["never_timeout"] = True
        return result

    def _build_wrapped_command(self, command: str, marker: str, timeout: int) -> (str, int):
--- a/prompts/main_system.txt
+++ b/prompts/main_system.txt
@ -122,7 +122,7 @@
 - 如果终端卡住了，用 terminal_reset 重启

 **⏱️ 时间/超时/状态确认（硬性规则）**：
- 需要控制“命令最多跑多久”，请使用 `run_command` / `terminal_input` 的 `timeout` 参数；一旦超时，命令**一定会被打断**、无法继续执行（需要重新运行）。
+- 需要控制“命令最多跑多久”，请使用 `run_command` / `terminal_input` 的 `timeout` 参数；一旦超时，命令**一定会被打断**、无法继续执行（需要重新运行）。如需在持久终端保持后台运行且不被强制杀掉，可将 `terminal_input` 的 `timeout` 设为 `never`（不添加超时封装，也不会追加结束标记；可能无输出，快照无法判断成败，需用 curl/ps/log 等主动验证）；`run_command` 仍需设定具体秒数。
 - 禁止凭感觉判断“我觉得下载/编译应该已经完成了/还没完成”；必须使用 `terminal_snapshot` 获取终端快照来确认真实情况。
 - 若不确定某终端里**上一条命令是否已结束**，禁止在**同一终端**继续输入任何内容（可能导致终端彻底卡死）；应先用 `terminal_snapshot` 检查，或在**其他终端会话**里用 `ps/pgrep/ls` 等命令验证后再继续操作。

--- a/prompts/main_system_qwenvl.txt
+++ b/prompts/main_system_qwenvl.txt
@ -142,7 +142,7 @@
 - 如果终端卡住了，用 terminal_reset 重启

 **⏱️ 时间/超时/状态确认（硬性规则）**：
- 需要控制“命令最多跑多久”，请使用 `run_command` / `terminal_input` 的 `timeout` 参数；一旦超时，命令**一定会被打断**、无法继续执行（需要重新运行）。
+- 需要控制“命令最多跑多久”，请使用 `run_command` / `terminal_input` 的 `timeout` 参数；一旦超时，命令**一定会被打断**、无法继续执行（需要重新运行）。如需在持久终端保持后台运行且不被强制杀掉，可将 `terminal_input` 的 `timeout` 设为 `never`（不添加超时封装，也不会追加结束标记；可能无输出，快照无法判断成败，需用 curl/ps/log 等主动验证）；`run_command` 仍需设定具体秒数。
 - 禁止凭感觉判断“我觉得下载/编译应该已经完成了/还没完成”；必须使用 `terminal_snapshot` 获取终端快照来确认真实情况。
 - 若不确定某终端里**上一条命令是否已结束**，禁止在**同一终端**继续输入任何内容（可能导致终端彻底卡死）；应先用 `terminal_snapshot` 检查，或在**其他终端会话**里用 `ps/pgrep/ls` 等命令验证后再继续操作。

--- a/server/api_v1.py
+++ b/server/api_v1.py
@ -48,6 +48,16 @@ def _sanitize_workspace_id(ws_id: str) -> str:
    return ws


+def _public_workspace_info(workspace) -> Dict[str, Any]:
+    """对外返回的工作区信息，隐藏宿主机绝对路径。"""
+    return {
+        "success": True,
+        "workspace_id": workspace.workspace_id,
+        "project_path": "project",  # 相对工作区目录，避免暴露宿主机路径
+        "data_dir": "data",
+    }
+
+
@api_v1_bp.route("/workspaces", methods=["GET"])
@api_token_required
 def list_workspaces_api():
@ -68,12 +78,7 @@ def create_workspace_api():
        ws = state.api_user_manager.ensure_workspace(username, ws_id)
    except Exception as exc:
        return jsonify({"success": False, "error": str(exc)}), 500
-    return jsonify({
-        "success": True,
-        "workspace_id": ws.workspace_id,
-        "project_path": str(ws.project_path),
-        "data_dir": str(ws.data_dir),
-    })
+    return jsonify(_public_workspace_info(ws))


@api_v1_bp.route("/workspaces/<workspace_id>", methods=["GET"])
@ -127,6 +132,29 @@ def _within_uploads(workspace, rel_path: str) -> Path:
    return target


+def _within_project(workspace, rel_path: str, default_to_project_root: bool = True) -> Path:
+    """
+    将相对路径解析到工作区 project 目录内，防止越界。
+
+    Args:
+        workspace: ApiUserWorkspace
+        rel_path: 相对路径，允许以 user_upload/ 开头（向后兼容）
+        default_to_project_root: 当 rel_path 为空时，是否默认指向 project 根
+    """
+    base = Path(workspace.project_path).resolve()
+    rel = (rel_path or "").strip()
+    if not rel and default_to_project_root:
+        rel = ""
+    # 兼容 /workspace/<...>/ 前缀（容器路径）
+    if rel.startswith("/workspace/"):
+        rel = rel.split("/workspace/", 1)[1]
+    rel = rel.lstrip("/")
+    target = (base / rel).resolve()
+    if not str(target).startswith(str(base)):
+        raise ValueError("非法路径")
+    return target
+
+
 def _conversation_path(workspace, conv_id: str) -> Path:
    return Path(workspace.data_dir) / "conversations" / f"{conv_id}.json"

@ -449,15 +477,28 @@ def list_files_api(workspace_id: str):
        return jsonify({"success": False, "error": "系统未初始化"}), 503
    rel = request.args.get("path") or ""
    try:
-        target = _within_uploads(workspace, rel)
+        target = _within_project(workspace, rel)
        if not target.exists():
            return jsonify({"success": False, "error": "路径不存在"}), 404
        if not target.is_dir():
-            return jsonify({"success": False, "error": "路径不是文件夹"}), 400
+            stat = target.stat()
+            rel_entry = target.relative_to(workspace.project_path)
+            return jsonify({
+                "success": True,
+                "workspace_id": ws.workspace_id,
+                "items": [{
+                    "name": target.name,
+                    "is_dir": False,
+                    "size": stat.st_size,
+                    "modified_at": stat.st_mtime,
+                    "path": str(rel_entry),
+                }],
+                "base": str(rel_entry.parent) if rel_entry.parent != Path(".") else "",
+            })
        items = []
        for entry in sorted(target.iterdir(), key=lambda p: p.name):
            stat = entry.stat()
-            rel_entry = entry.relative_to(workspace.uploads_dir)
+            rel_entry = entry.relative_to(workspace.project_path)
            items.append({
                "name": entry.name,
                "is_dir": entry.is_dir(),
@ -465,7 +506,9 @@ def list_files_api(workspace_id: str):
                "modified_at": stat.st_mtime,
                "path": str(rel_entry),
            })
-        return jsonify({"success": True, "workspace_id": ws.workspace_id, "items": items, "base": str(target.relative_to(workspace.uploads_dir))})
+        base_rel = target.relative_to(workspace.project_path)
+        base_str = "" if str(base_rel) == "." else str(base_rel)
+        return jsonify({"success": True, "workspace_id": ws.workspace_id, "items": items, "base": base_str})
    except Exception as exc:
        return jsonify({"success": False, "error": str(exc)}), 400

@ -482,7 +525,7 @@ def download_file_api(workspace_id: str):
    if not rel:
        return jsonify({"success": False, "error": "缺少 path"}), 400
    try:
-        target = _within_uploads(workspace, rel)
+        target = _within_project(workspace, rel)
    except Exception as exc:
        return jsonify({"success": False, "error": str(exc)}), 400
    if not target.exists():
--- a/utils/conversation_manager.py
+++ b/utils/conversation_manager.py
@ -45,7 +45,8 @@ class ConversationManager:
        self.workspace_root = Path(__file__).resolve().parents[1]
        self._ensure_directories()
        self._index_verified = False
-        self._load_index(ensure_integrity=True)
+        # 首次加载索引仅重建最近 20 条，降低启动开销；后续按需扩展
+        self._load_index(ensure_integrity=True, max_rebuild=20)
        
    def _ensure_directories(self):
        """确保必要的目录存在"""
@ -56,17 +57,25 @@ class ConversationManager:
        if not self.index_file.exists():
            self._save_index({})
    
-    def _iter_conversation_files(self):
-        """遍历对话文件（排除索引文件）"""
-        for path in self.conversations_dir.glob("*.json"):
-            if path == self.index_file:
-                continue
-            yield path
+    def _iter_conversation_files(self, sort_by_mtime: bool = True):
+        """遍历对话文件（排除索引文件），可按修改时间降序排序。"""
+        files = [p for p in self.conversations_dir.glob("*.json") if p != self.index_file]
+        if sort_by_mtime:
+            files.sort(key=lambda p: p.stat().st_mtime, reverse=True)
+        return files

-    def _rebuild_index_from_files(self) -> Dict:
-        """从现有对话文件重建索引"""
+    def _rebuild_index_from_files(self, max_count: Optional[int] = None) -> Dict:
+        """
+        从现有对话文件重建索引。
+
+        Args:
+            max_count: 限制重建的条目数（按文件修改时间倒序）；None 表示全量重建。
+        """
        rebuilt_index: Dict[str, Dict] = {}
-        for file_path in self._iter_conversation_files():
+        files = self._iter_conversation_files(sort_by_mtime=True)
+        if max_count is not None:
+            files = files[:max(0, int(max_count))]
+        for file_path in files:
            try:
                with open(file_path, "r", encoding="utf-8") as f:
                    raw = f.read().strip()
@ -108,8 +117,8 @@ class ConversationManager:
                return True
        return False

-    def _load_index(self, ensure_integrity: bool = False) -> Dict:
-        """加载对话索引，可选地在缺失时自动重建"""
+    def _load_index(self, ensure_integrity: bool = False, max_rebuild: Optional[int] = None) -> Dict:
+        """加载对话索引，可选地在缺失时自动重建（可限制重建条数）"""
        try:
            index: Dict = {}
            if self.index_file.exists():
@ -120,14 +129,14 @@ class ConversationManager:
                        if index:
                            if ensure_integrity and not self._index_verified:
                                if self._index_missing_conversations(index):
-                                    rebuilt = self._rebuild_index_from_files()
+                                    rebuilt = self._rebuild_index_from_files(max_count=max_rebuild)
                                    if rebuilt:
                                        self._save_index(rebuilt)
                                        index = rebuilt
                                self._index_verified = True
                            return index
                        # 索引为空但对话文件仍然存在时尝试重建
-                        rebuilt = self._rebuild_index_from_files()
+                        rebuilt = self._rebuild_index_from_files(max_count=max_rebuild)
                        if rebuilt:
                            self._save_index(rebuilt)
                            if ensure_integrity:
@ -135,7 +144,7 @@ class ConversationManager:
                            return rebuilt
                        return {}
            # 索引缺失但存在对话文件时重建
-            rebuilt = self._rebuild_index_from_files()
+            rebuilt = self._rebuild_index_from_files(max_count=max_rebuild)
            if rebuilt:
                self._save_index(rebuilt)
                if ensure_integrity:
@ -153,7 +162,7 @@ class ConversationManager:
                    print(f"🗄️ 已备份损坏的索引文件到: {backup_path.name}")
            except Exception as backup_exc:
                print(f"⚠️ 备份损坏索引文件失败: {backup_exc}")
-            rebuilt = self._rebuild_index_from_files()
+            rebuilt = self._rebuild_index_from_files(max_count=max_rebuild)
            if rebuilt:
                self._save_index(rebuilt)
                if ensure_integrity:
@ -175,6 +184,30 @@ class ConversationManager:
            except Exception:
                pass
            print(f"⌘ 保存对话索引失败: {e}")
+
+    def _ensure_index_covering(self, limit: int, offset: int) -> Dict:
+        """
+        确保索引涵盖到 offset+limit 条记录，不足时按需扩展重建（仍按 mtime 倒序，增量加载批量）。
+        """
+        needed = max(0, int(offset) + int(limit))
+        index = self._load_index()
+        if len(index) >= needed:
+            return index
+
+        # 第一次尝试：扩展到需要的数量（按更新时间倒序）
+        rebuilt = self._rebuild_index_from_files(max_count=needed)
+        if rebuilt:
+            self._save_index(rebuilt)
+            index = rebuilt
+
+        # 如果仍不足且存在更多文件可能未被纳入（例如首批限定过小），进行一次全量重建兜底
+        if len(index) < needed:
+            rebuilt_full = self._rebuild_index_from_files(max_count=None)
+            if rebuilt_full:
+                self._save_index(rebuilt_full)
+                index = rebuilt_full
+
+        return index
    
    def _generate_conversation_id(self) -> str:
        """生成唯一的对话ID"""
@ -675,7 +708,10 @@ class ConversationManager:
            Dict: 包含对话列表和统计信息
        """
        try:
-            index = self._load_index()
+            # 总对话数按文件数统计，防止初始索引截断导致“没有更多”按钮消失
+            total_files = len(self._iter_conversation_files(sort_by_mtime=False))
+
+            index = self._ensure_index_covering(limit=limit, offset=offset)
            
            # 按更新时间倒序排列
            sorted_conversations = sorted(
@ -685,7 +721,7 @@ class ConversationManager:
            )
            
            # 分页
-            total = len(sorted_conversations)
+            total = max(len(sorted_conversations), total_files)
            conversations = sorted_conversations[offset:offset+limit]
            
            # 格式化结果