From dacc68f46a22ea69191e16beabdfeed2afb234aa Mon Sep 17 00:00:00 2001 From: JOJO <1498581755@qq.com> Date: Fri, 14 Nov 2025 18:33:55 +0800 Subject: [PATCH] feat: enhance read tool and config structure --- config.py | 182 ------------ config/__init__.py | 29 ++ config/api.py | 19 ++ config/auth.py | 9 + config/conversation.py | 52 ++++ config/limits.py | 64 +++++ config/memory.py | 11 + config/paths.py | 21 ++ config/security.py | 48 ++++ config/terminal.py | 23 ++ config/todo.py | 11 + config/ui.py | 29 ++ core/main_terminal.py | 531 ++++++++++++++++++++--------------- core/tool_config.py | 2 +- core/web_terminal.py | 28 +- modules/file_manager.py | 267 ++++++++++++++++-- prompts/main_system_prev.txt | 28 +- prompts/tool_prompts.txt | 12 +- static/app.js | 33 ++- utils/api_client.py | 70 ++++- utils/context_manager.py | 56 ++-- web_server.py | 67 ++++- 22 files changed, 1067 insertions(+), 525 deletions(-) delete mode 100644 config.py create mode 100644 config/__init__.py create mode 100644 config/api.py create mode 100644 config/auth.py create mode 100644 config/conversation.py create mode 100644 config/limits.py create mode 100644 config/memory.py create mode 100644 config/paths.py create mode 100644 config/security.py create mode 100644 config/terminal.py create mode 100644 config/todo.py create mode 100644 config/ui.py diff --git a/config.py b/config.py deleted file mode 100644 index 9886630..0000000 --- a/config.py +++ /dev/null @@ -1,182 +0,0 @@ -# config.py - 系统配置文件(添加了终端配置) - -# API配置 -API_BASE_URL = "https://ark.cn-beijing.volces.com/api/v3" -API_KEY = "3e96a682-919d-45c1-acb2-53bc4e9660d3" # 替换为你的API密钥 -MODEL_ID = "kimi-k2-250905" #模型ID - -#API_BASE_URL = "https://api.moonshot.cn/v1" -#API_KEY = "sk-xW0xjfQM6Mp9ZCWMLlnHiRJcpEOIZPTkXcN0dQ15xpZSuw2y" # 替换为你的API密钥 -#MODEL_ID = "kimi-k2-thinking" #模型ID - -# Tavily搜索配置 -TAVILY_API_KEY = "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" # 替换为你的Tavily API密钥 - -# 系统配置 -DEFAULT_PROJECT_PATH = "./project" # 默认项目文件夹 -MAX_CONTEXT_SIZE = 100000 # 最大上下文字符数(约100K) -MAX_FILE_SIZE = 10 * 1024 * 1024 # 最大文件大小 10MB -MAX_OPEN_FILES = 20 # 最多同时打开的文件数 -MAX_UPLOAD_SIZE = 50 * 1024 * 1024 # 单次上传最大50MB -UPLOAD_ALLOWED_EXTENSIONS = [] -AGENT_VERSION = "v1.1" # 前端显示版本号 - -# 执行配置 -CODE_EXECUTION_TIMEOUT = 60 # 代码执行超时(秒) -TERMINAL_COMMAND_TIMEOUT = 30 # 终端命令超时(秒) -SEARCH_MAX_RESULTS = 10 # 搜索最大结果数 - -# 持久化终端配置(新增) -MAX_TERMINALS = 3 # 最大同时开启的终端数量 -TERMINAL_BUFFER_SIZE = 100000 # 每个终端的最大缓冲区大小(字符) -TERMINAL_DISPLAY_SIZE = 50000 # 终端显示大小限制(字符) -TERMINAL_TIMEOUT = 300 # 终端空闲超时(秒) -TERMINAL_OUTPUT_WAIT = 5 # 等待终端输出的默认时间(秒) -TERMINAL_SNAPSHOT_DEFAULT_LINES = 50 # 终端快照默认返回的行数 -TERMINAL_SNAPSHOT_MAX_LINES = 200 # 终端快照允许的最大行数 -TERMINAL_SNAPSHOT_MAX_CHARS = 60000 # 终端快照返回的最大字符数 -TERMINAL_INPUT_MAX_CHARS = 20000 # terminal_input返回的最大字符数 - -# 在 config.py 中添加以下配置项 - -# 自动修复配置 -AUTO_FIX_TOOL_CALL = False # 是否自动修复工具调用格式错误 -AUTO_FIX_MAX_ATTEMPTS = 3 # 最大自动修复尝试次数 - -# 工具调用安全限制 -MAX_ITERATIONS_PER_TASK = 100# 单个任务最大迭代次数 -MAX_CONSECUTIVE_SAME_TOOL = 50 # 连续相同工具调用的最大次数 -MAX_TOTAL_TOOL_CALLS = 100 #单个任务最大工具调用总数 -TOOL_CALL_COOLDOWN = 0.5 # 工具调用之间的最小间隔(秒) - -# 文件路径 -PROMPTS_DIR = "./prompts" -DATA_DIR = "./data" -LOGS_DIR = "./logs" -# 多用户空间配置 -USER_SPACE_DIR = "./users" # 每个用户的工作区根目录 -USERS_DB_FILE = f"{DATA_DIR}/users.json" # 用户信息存储 -INVITE_CODES_FILE = f"{DATA_DIR}/invite_codes.json" # 邀请码存储文件 - -# 记忆文件 -MAIN_MEMORY_FILE = f"{DATA_DIR}/memory.md" -TASK_MEMORY_FILE = f"{DATA_DIR}/task_memory.md" -CONVERSATION_HISTORY_FILE = f"{DATA_DIR}/conversation_history.json" - -# 日志配置 -LOG_LEVEL = "INFO" # DEBUG, INFO, WARNING, ERROR -LOG_FORMAT = "%(asctime)s - %(name)s - %(levelname)s - %(message)s" - -# 安全配置 -FORBIDDEN_COMMANDS = [ - "rm -rf /", - "rm -rf ~", - "format", - "shutdown", - "reboot", - "kill -9", - "dd if=", -] - -FORBIDDEN_PATHS = [ - "/System", - "/usr", - "/bin", - "/sbin", - "/etc", - "/var", - "/tmp", - "/Applications", - "/Library", - "C:\\Windows", - "C:\\Program Files", - "C:\\Program Files (x86)", - "C:\\ProgramData" -] - -# 这些是绝对不允许的根路径 -FORBIDDEN_ROOT_PATHS = [ - "/", - "C:\\", - "~" -] - -# 确认操作列表 -NEED_CONFIRMATION = [ - "delete_file", - "delete_folder", - "clear_file", - "execute_terminal", - "batch_delete" -] - -# 输出格式 -OUTPUT_FORMATS = { - "thinking": "💭 [思考]", - "action": "🔧 [执行]", - "file": "📁 [文件]", - "search": "🔍 [搜索]", - "code": "💻 [代码]", - "terminal": "⚡ [终端]", - "memory": "📝 [记忆]", - "success": "✅ [成功]", - "error": "❌ [错误]", - "warning": "⚠️ [警告]", - "confirm": "❓ [确认]", - "info": "ℹ️ [信息]", - "session": "📺 [会话]" # 新增:终端会话标记 -} -# 在 config.py 文件末尾添加以下对话持久化相关配置 - -# ========================================== -# 对话持久化配置(新增) -# ========================================== - -# 对话存储配置 -CONVERSATIONS_DIR = f"{DATA_DIR}/conversations" # 对话存储目录 -CONVERSATION_INDEX_FILE = "index.json" # 对话索引文件名 -CONVERSATION_FILE_PREFIX = "conv_" # 对话文件前缀 - -# 对话管理配置 -DEFAULT_CONVERSATIONS_LIMIT = 20 # API默认返回的对话数量 -MAX_CONVERSATIONS_LIMIT = 100 # API允许的最大对话数量限制 -CONVERSATION_TITLE_MAX_LENGTH = 100 # 对话标题最大长度 -CONVERSATION_SEARCH_MAX_RESULTS = 50 # 搜索结果最大数量 - -# 对话清理策略配置 -CONVERSATION_AUTO_CLEANUP_ENABLED = False # 是否启用自动清理旧对话 -CONVERSATION_RETENTION_DAYS = 30 # 对话保留天数(如果启用自动清理) -CONVERSATION_MAX_TOTAL = 1000 # 最大对话总数(超过时清理最旧的) - -# 对话备份配置 -CONVERSATION_BACKUP_ENABLED = True # 是否启用对话备份 -CONVERSATION_BACKUP_INTERVAL_HOURS = 24 # 备份间隔(小时) -CONVERSATION_BACKUP_MAX_COUNT = 7 # 最多保留多少个备份文件 - -# 对话安全配置 -CONVERSATION_MAX_MESSAGE_SIZE = 50000 # 单条消息最大字符数 -CONVERSATION_MAX_MESSAGES_PER_CONVERSATION = 10000 # 每个对话最大消息数 -CONVERSATION_EXPORT_MAX_SIZE = 10 * 1024 * 1024 # 导出文件最大大小(10MB) - -# 对话性能配置 -CONVERSATION_LAZY_LOADING = True # 是否启用懒加载(只加载对话元数据,不加载完整消息) -CONVERSATION_CACHE_SIZE = 50 # 内存中缓存的对话数量 -CONVERSATION_INDEX_UPDATE_BATCH_SIZE = 100 # 批量更新索引的大小 - -# 工具输出字符数限制 -MAX_READ_FILE_CHARS = 30000 # read_file工具限制 -MAX_FOCUS_FILE_CHARS = 30000 # focus_file工具限制 -MAX_RUN_COMMAND_CHARS = 10000 # run_command工具限制 -MAX_EXTRACT_WEBPAGE_CHARS = 80000 # extract_webpage工具限制 - -# 待办事项配置 -TODO_MAX_TASKS = 4 -TODO_MAX_OVERVIEW_LENGTH = 999 -TODO_MAX_TASK_LENGTH = 999 - -# 登录配置 -ADMIN_USERNAME = "jojo" -ADMIN_PASSWORD_HASH = "pbkdf2:sha256:600000$FSNAVncPXW6CBtfj$b7f093f4256de9d1a16d588565d4b1e108a9c66b2901884dd118c515258d78c7" - -# 模型调用相关 -DEFAULT_RESPONSE_MAX_TOKENS = 32768 # 每次API响应的默认最大tokens,可在此调整 diff --git a/config/__init__.py b/config/__init__.py new file mode 100644 index 0000000..27bfe61 --- /dev/null +++ b/config/__init__.py @@ -0,0 +1,29 @@ +"""Config package initializer,保持对旧 `from config import ...` 的兼容。""" + +from . import api as _api +from . import paths as _paths +from . import limits as _limits +from . import terminal as _terminal +from . import conversation as _conversation +from . import security as _security +from . import ui as _ui +from . import memory as _memory +from . import todo as _todo +from . import auth as _auth + +from .api import * +from .paths import * +from .limits import * +from .terminal import * +from .conversation import * +from .security import * +from .ui import * +from .memory import * +from .todo import * +from .auth import * + +__all__ = [] +for module in (_api, _paths, _limits, _terminal, _conversation, _security, _ui, _memory, _todo, _auth): + __all__ += getattr(module, "__all__", []) + +del _api, _paths, _limits, _terminal, _conversation, _security, _ui, _memory, _todo, _auth diff --git a/config/api.py b/config/api.py new file mode 100644 index 0000000..458ae0a --- /dev/null +++ b/config/api.py @@ -0,0 +1,19 @@ +"""API 和外部服务配置。""" + +API_BASE_URL = "https://ark.cn-beijing.volces.com/api/v3" +API_KEY = "3e96a682-919d-45c1-acb2-53bc4e9660d3" +MODEL_ID = "kimi-k2-250905" + +# Tavily 搜索 +TAVILY_API_KEY = "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" + +# 默认响应 token 限制 +DEFAULT_RESPONSE_MAX_TOKENS = 32768 + +__all__ = [ + "API_BASE_URL", + "API_KEY", + "MODEL_ID", + "TAVILY_API_KEY", + "DEFAULT_RESPONSE_MAX_TOKENS", +] diff --git a/config/auth.py b/config/auth.py new file mode 100644 index 0000000..5706878 --- /dev/null +++ b/config/auth.py @@ -0,0 +1,9 @@ +"""认证与后台账户配置。""" + +ADMIN_USERNAME = "jojo" +ADMIN_PASSWORD_HASH = "pbkdf2:sha256:600000$FSNAVncPXW6CBtfj$b7f093f4256de9d1a16d588565d4b1e108a9c66b2901884dd118c515258d78c7" + +__all__ = [ + "ADMIN_USERNAME", + "ADMIN_PASSWORD_HASH", +] diff --git a/config/conversation.py b/config/conversation.py new file mode 100644 index 0000000..e9bcb76 --- /dev/null +++ b/config/conversation.py @@ -0,0 +1,52 @@ +"""对话持久化与索引配置。""" + +from .paths import DATA_DIR + +CONVERSATION_HISTORY_FILE = f"{DATA_DIR}/conversation_history.json" +CONVERSATIONS_DIR = f"{DATA_DIR}/conversations" +CONVERSATION_INDEX_FILE = "index.json" +CONVERSATION_FILE_PREFIX = "conv_" + +DEFAULT_CONVERSATIONS_LIMIT = 20 +MAX_CONVERSATIONS_LIMIT = 100 +CONVERSATION_TITLE_MAX_LENGTH = 100 +CONVERSATION_SEARCH_MAX_RESULTS = 50 + +CONVERSATION_AUTO_CLEANUP_ENABLED = False +CONVERSATION_RETENTION_DAYS = 30 +CONVERSATION_MAX_TOTAL = 1000 + +CONVERSATION_BACKUP_ENABLED = True +CONVERSATION_BACKUP_INTERVAL_HOURS = 24 +CONVERSATION_BACKUP_MAX_COUNT = 7 + +CONVERSATION_MAX_MESSAGE_SIZE = 50000 +CONVERSATION_MAX_MESSAGES_PER_CONVERSATION = 10000 +CONVERSATION_EXPORT_MAX_SIZE = 10 * 1024 * 1024 + +CONVERSATION_LAZY_LOADING = True +CONVERSATION_CACHE_SIZE = 50 +CONVERSATION_INDEX_UPDATE_BATCH_SIZE = 100 + +__all__ = [ + "CONVERSATION_HISTORY_FILE", + "CONVERSATIONS_DIR", + "CONVERSATION_INDEX_FILE", + "CONVERSATION_FILE_PREFIX", + "DEFAULT_CONVERSATIONS_LIMIT", + "MAX_CONVERSATIONS_LIMIT", + "CONVERSATION_TITLE_MAX_LENGTH", + "CONVERSATION_SEARCH_MAX_RESULTS", + "CONVERSATION_AUTO_CLEANUP_ENABLED", + "CONVERSATION_RETENTION_DAYS", + "CONVERSATION_MAX_TOTAL", + "CONVERSATION_BACKUP_ENABLED", + "CONVERSATION_BACKUP_INTERVAL_HOURS", + "CONVERSATION_BACKUP_MAX_COUNT", + "CONVERSATION_MAX_MESSAGE_SIZE", + "CONVERSATION_MAX_MESSAGES_PER_CONVERSATION", + "CONVERSATION_EXPORT_MAX_SIZE", + "CONVERSATION_LAZY_LOADING", + "CONVERSATION_CACHE_SIZE", + "CONVERSATION_INDEX_UPDATE_BATCH_SIZE", +] diff --git a/config/limits.py b/config/limits.py new file mode 100644 index 0000000..ff9dd44 --- /dev/null +++ b/config/limits.py @@ -0,0 +1,64 @@ +"""全局额度与工具限制配置。""" + +# 上下文与文件 +MAX_CONTEXT_SIZE = 100000 +MAX_FILE_SIZE = 10 * 1024 * 1024 +MAX_OPEN_FILES = 20 +MAX_UPLOAD_SIZE = 50 * 1024 * 1024 + +# 执行超时 +CODE_EXECUTION_TIMEOUT = 60 +TERMINAL_COMMAND_TIMEOUT = 30 +SEARCH_MAX_RESULTS = 10 + +# 自动修复与工具调用限制 +AUTO_FIX_TOOL_CALL = False +AUTO_FIX_MAX_ATTEMPTS = 3 +MAX_ITERATIONS_PER_TASK = 100 +MAX_CONSECUTIVE_SAME_TOOL = 50 +MAX_TOTAL_TOOL_CALLS = 100 +TOOL_CALL_COOLDOWN = 0.5 + +# 工具字符/体积限制 +MAX_READ_FILE_CHARS = 30000 +MAX_FOCUS_FILE_CHARS = 30000 +MAX_RUN_COMMAND_CHARS = 10000 +MAX_EXTRACT_WEBPAGE_CHARS = 80000 + +# read_file 子配置 +READ_TOOL_MAX_FILE_SIZE = 100 * 1024 * 1024 +READ_TOOL_DEFAULT_MAX_CHARS = MAX_READ_FILE_CHARS +READ_TOOL_DEFAULT_CONTEXT_BEFORE = 1 +READ_TOOL_DEFAULT_CONTEXT_AFTER = 1 +READ_TOOL_MAX_CONTEXT_BEFORE = 3 +READ_TOOL_MAX_CONTEXT_AFTER = 5 +READ_TOOL_DEFAULT_MAX_MATCHES = 5 +READ_TOOL_MAX_MATCHES = 50 + +__all__ = [ + "MAX_CONTEXT_SIZE", + "MAX_FILE_SIZE", + "MAX_OPEN_FILES", + "MAX_UPLOAD_SIZE", + "CODE_EXECUTION_TIMEOUT", + "TERMINAL_COMMAND_TIMEOUT", + "SEARCH_MAX_RESULTS", + "AUTO_FIX_TOOL_CALL", + "AUTO_FIX_MAX_ATTEMPTS", + "MAX_ITERATIONS_PER_TASK", + "MAX_CONSECUTIVE_SAME_TOOL", + "MAX_TOTAL_TOOL_CALLS", + "TOOL_CALL_COOLDOWN", + "MAX_READ_FILE_CHARS", + "MAX_FOCUS_FILE_CHARS", + "MAX_RUN_COMMAND_CHARS", + "MAX_EXTRACT_WEBPAGE_CHARS", + "READ_TOOL_MAX_FILE_SIZE", + "READ_TOOL_DEFAULT_MAX_CHARS", + "READ_TOOL_DEFAULT_CONTEXT_BEFORE", + "READ_TOOL_DEFAULT_CONTEXT_AFTER", + "READ_TOOL_MAX_CONTEXT_BEFORE", + "READ_TOOL_MAX_CONTEXT_AFTER", + "READ_TOOL_DEFAULT_MAX_MATCHES", + "READ_TOOL_MAX_MATCHES", +] diff --git a/config/memory.py b/config/memory.py new file mode 100644 index 0000000..e9630d6 --- /dev/null +++ b/config/memory.py @@ -0,0 +1,11 @@ +"""记忆文件配置。""" + +from .paths import DATA_DIR + +MAIN_MEMORY_FILE = f"{DATA_DIR}/memory.md" +TASK_MEMORY_FILE = f"{DATA_DIR}/task_memory.md" + +__all__ = [ + "MAIN_MEMORY_FILE", + "TASK_MEMORY_FILE", +] diff --git a/config/paths.py b/config/paths.py new file mode 100644 index 0000000..1fd835b --- /dev/null +++ b/config/paths.py @@ -0,0 +1,21 @@ +"""项目路径与目录配置。""" + +DEFAULT_PROJECT_PATH = "./project" +PROMPTS_DIR = "./prompts" +DATA_DIR = "./data" +LOGS_DIR = "./logs" + +# 多用户空间 +USER_SPACE_DIR = "./users" +USERS_DB_FILE = f"{DATA_DIR}/users.json" +INVITE_CODES_FILE = f"{DATA_DIR}/invite_codes.json" + +__all__ = [ + "DEFAULT_PROJECT_PATH", + "PROMPTS_DIR", + "DATA_DIR", + "LOGS_DIR", + "USER_SPACE_DIR", + "USERS_DB_FILE", + "INVITE_CODES_FILE", +] diff --git a/config/security.py b/config/security.py new file mode 100644 index 0000000..4af165c --- /dev/null +++ b/config/security.py @@ -0,0 +1,48 @@ +"""安全与确认策略配置。""" + +FORBIDDEN_COMMANDS = [ + "rm -rf /", + "rm -rf ~", + "format", + "shutdown", + "reboot", + "kill -9", + "dd if=", +] + +FORBIDDEN_PATHS = [ + "/System", + "/usr", + "/bin", + "/sbin", + "/etc", + "/var", + "/tmp", + "/Applications", + "/Library", + "C:\\Windows", + "C:\\Program Files", + "C:\\Program Files (x86)", + "C:\\ProgramData", +] + +FORBIDDEN_ROOT_PATHS = [ + "/", + "C:\\", + "~", +] + +NEED_CONFIRMATION = [ + "delete_file", + "delete_folder", + "clear_file", + "execute_terminal", + "batch_delete", +] + +__all__ = [ + "FORBIDDEN_COMMANDS", + "FORBIDDEN_PATHS", + "FORBIDDEN_ROOT_PATHS", + "NEED_CONFIRMATION", +] diff --git a/config/terminal.py b/config/terminal.py new file mode 100644 index 0000000..bf7eeac --- /dev/null +++ b/config/terminal.py @@ -0,0 +1,23 @@ +"""终端与会话管理配置。""" + +MAX_TERMINALS = 3 +TERMINAL_BUFFER_SIZE = 100000 +TERMINAL_DISPLAY_SIZE = 50000 +TERMINAL_TIMEOUT = 300 +TERMINAL_OUTPUT_WAIT = 5 +TERMINAL_SNAPSHOT_DEFAULT_LINES = 50 +TERMINAL_SNAPSHOT_MAX_LINES = 200 +TERMINAL_SNAPSHOT_MAX_CHARS = 60000 +TERMINAL_INPUT_MAX_CHARS = 20000 + +__all__ = [ + "MAX_TERMINALS", + "TERMINAL_BUFFER_SIZE", + "TERMINAL_DISPLAY_SIZE", + "TERMINAL_TIMEOUT", + "TERMINAL_OUTPUT_WAIT", + "TERMINAL_SNAPSHOT_DEFAULT_LINES", + "TERMINAL_SNAPSHOT_MAX_LINES", + "TERMINAL_SNAPSHOT_MAX_CHARS", + "TERMINAL_INPUT_MAX_CHARS", +] diff --git a/config/todo.py b/config/todo.py new file mode 100644 index 0000000..8cb1145 --- /dev/null +++ b/config/todo.py @@ -0,0 +1,11 @@ +"""待办事项工具配置。""" + +TODO_MAX_TASKS = 4 +TODO_MAX_OVERVIEW_LENGTH = 999 +TODO_MAX_TASK_LENGTH = 999 + +__all__ = [ + "TODO_MAX_TASKS", + "TODO_MAX_OVERVIEW_LENGTH", + "TODO_MAX_TASK_LENGTH", +] diff --git a/config/ui.py b/config/ui.py new file mode 100644 index 0000000..f48c838 --- /dev/null +++ b/config/ui.py @@ -0,0 +1,29 @@ +"""界面展示与日志配置。""" + +OUTPUT_FORMATS = { + "thinking": "💭 [思考]", + "action": "🔧 [执行]", + "file": "📁 [文件]", + "search": "🔍 [搜索]", + "code": "💻 [代码]", + "terminal": "⚡ [终端]", + "memory": "📝 [记忆]", + "success": "✅ [成功]", + "error": "❌ [错误]", + "warning": "⚠️ [警告]", + "confirm": "❓ [确认]", + "info": "ℹ️ [信息]", + "session": "📺 [会话]", +} + +AGENT_VERSION = "v1.1" + +LOG_LEVEL = "INFO" +LOG_FORMAT = "%(asctime)s - %(name)s - %(levelname)s - %(message)s" + +__all__ = [ + "OUTPUT_FORMATS", + "AGENT_VERSION", + "LOG_LEVEL", + "LOG_FORMAT", +] diff --git a/core/main_terminal.py b/core/main_terminal.py index 5904fb3..deeec24 100644 --- a/core/main_terminal.py +++ b/core/main_terminal.py @@ -10,7 +10,12 @@ from datetime import datetime try: from config import ( OUTPUT_FORMATS, DATA_DIR, PROMPTS_DIR, NEED_CONFIRMATION, - MAX_TERMINALS, TERMINAL_BUFFER_SIZE, TERMINAL_DISPLAY_SIZE + MAX_TERMINALS, TERMINAL_BUFFER_SIZE, TERMINAL_DISPLAY_SIZE, + MAX_READ_FILE_CHARS, READ_TOOL_DEFAULT_MAX_CHARS, + READ_TOOL_DEFAULT_CONTEXT_BEFORE, READ_TOOL_DEFAULT_CONTEXT_AFTER, + READ_TOOL_MAX_CONTEXT_BEFORE, READ_TOOL_MAX_CONTEXT_AFTER, + READ_TOOL_DEFAULT_MAX_MATCHES, READ_TOOL_MAX_MATCHES, + READ_TOOL_MAX_FILE_SIZE, MAX_FOCUS_FILE_CHARS ) except ImportError: import sys @@ -20,7 +25,12 @@ except ImportError: sys.path.insert(0, str(project_root)) from config import ( OUTPUT_FORMATS, DATA_DIR, PROMPTS_DIR, NEED_CONFIRMATION, - MAX_TERMINALS, TERMINAL_BUFFER_SIZE, TERMINAL_DISPLAY_SIZE + MAX_TERMINALS, TERMINAL_BUFFER_SIZE, TERMINAL_DISPLAY_SIZE, + MAX_READ_FILE_CHARS, READ_TOOL_DEFAULT_MAX_CHARS, + READ_TOOL_DEFAULT_CONTEXT_BEFORE, READ_TOOL_DEFAULT_CONTEXT_AFTER, + READ_TOOL_MAX_CONTEXT_BEFORE, READ_TOOL_MAX_CONTEXT_AFTER, + READ_TOOL_DEFAULT_MAX_MATCHES, READ_TOOL_MAX_MATCHES, + READ_TOOL_MAX_FILE_SIZE, MAX_FOCUS_FILE_CHARS ) from modules.file_manager import FileManager from modules.search_engine import SearchEngine @@ -71,8 +81,6 @@ class MainTerminal: # 聚焦文件管理 self.focused_files = {} # {path: content} 存储聚焦的文件内容 - # 新增:阅读工具使用跟踪 - self.read_file_usage_tracker = {} # {file_path: first_read_session_id} 跟踪文件的首次读取 self.current_session_id = 0 # 用于标识不同的任务会话 # 新增:追加内容状态 self.pending_append_request = None # {"path": str} @@ -129,6 +137,228 @@ class MainTerminal: ) print(f"{OUTPUT_FORMATS['info']} 新建对话: {conversation_id}") + @staticmethod + def _clamp_int(value, default, min_value=None, max_value=None): + """将输入转换为整数并限制范围。""" + if value is None: + return default + try: + num = int(value) + except (TypeError, ValueError): + return default + if min_value is not None: + num = max(min_value, num) + if max_value is not None: + num = min(max_value, num) + return num + + @staticmethod + def _parse_optional_line(value, field_name: str): + """解析可选的行号参数。""" + if value is None: + return None, None + try: + number = int(value) + except (TypeError, ValueError): + return None, f"{field_name} 必须是整数" + if number < 1: + return None, f"{field_name} 必须大于等于1" + return number, None + + @staticmethod + def _truncate_text_block(text: str, max_chars: int): + """对单段文本应用字符限制。""" + if max_chars and len(text) > max_chars: + return text[:max_chars], True, max_chars + return text, False, len(text) + + @staticmethod + def _limit_text_chunks(chunks: List[Dict], text_key: str, max_chars: int): + """对多个文本片段应用全局字符限制。""" + if max_chars is None or max_chars <= 0: + return chunks, False, sum(len(chunk.get(text_key, "") or "") for chunk in chunks) + + remaining = max_chars + limited_chunks: List[Dict] = [] + truncated = False + consumed = 0 + + for chunk in chunks: + snippet = chunk.get(text_key, "") or "" + snippet_len = len(snippet) + chunk_copy = dict(chunk) + + if remaining <= 0: + truncated = True + break + + if snippet_len > remaining: + chunk_copy[text_key] = snippet[:remaining] + chunk_copy["truncated"] = True + consumed += remaining + limited_chunks.append(chunk_copy) + truncated = True + remaining = 0 + break + + limited_chunks.append(chunk_copy) + consumed += snippet_len + remaining -= snippet_len + + return limited_chunks, truncated, consumed + + def _handle_read_tool(self, arguments: Dict) -> Dict: + """集中处理 read_file 工具的三种模式。""" + file_path = arguments.get("path") + if not file_path: + return {"success": False, "error": "缺少文件路径参数"} + + read_type = (arguments.get("type") or "read").lower() + if read_type not in {"read", "search", "extract"}: + return {"success": False, "error": f"未知的读取类型: {read_type}"} + + max_chars = self._clamp_int( + arguments.get("max_chars"), + READ_TOOL_DEFAULT_MAX_CHARS, + 1, + MAX_READ_FILE_CHARS + ) + + base_result = { + "success": True, + "type": read_type, + "path": None, + "encoding": "utf-8", + "max_chars": max_chars, + "truncated": False + } + + if read_type == "read": + start_line, error = self._parse_optional_line(arguments.get("start_line"), "start_line") + if error: + return {"success": False, "error": error} + end_line_val = arguments.get("end_line") + end_line = None + if end_line_val is not None: + end_line, error = self._parse_optional_line(end_line_val, "end_line") + if error: + return {"success": False, "error": error} + if start_line and end_line < start_line: + return {"success": False, "error": "end_line 必须大于等于 start_line"} + + read_result = self.file_manager.read_text_segment( + file_path, + start_line=start_line, + end_line=end_line, + size_limit=READ_TOOL_MAX_FILE_SIZE + ) + if not read_result.get("success"): + return read_result + + content, truncated, char_count = self._truncate_text_block(read_result["content"], max_chars) + base_result.update({ + "path": read_result["path"], + "content": content, + "line_start": read_result["line_start"], + "line_end": read_result["line_end"], + "total_lines": read_result["total_lines"], + "file_size": read_result["size"], + "char_count": char_count, + "message": f"已读取 {read_result['path']} 的内容(行 {read_result['line_start']}~{read_result['line_end']})" + }) + base_result["truncated"] = truncated + self.context_manager.load_file(read_result["path"]) + return base_result + + if read_type == "search": + query = arguments.get("query") + if not query: + return {"success": False, "error": "搜索模式需要提供 query 参数"} + + max_matches = self._clamp_int( + arguments.get("max_matches"), + READ_TOOL_DEFAULT_MAX_MATCHES, + 1, + READ_TOOL_MAX_MATCHES + ) + context_before = self._clamp_int( + arguments.get("context_before"), + READ_TOOL_DEFAULT_CONTEXT_BEFORE, + 0, + READ_TOOL_MAX_CONTEXT_BEFORE + ) + context_after = self._clamp_int( + arguments.get("context_after"), + READ_TOOL_DEFAULT_CONTEXT_AFTER, + 0, + READ_TOOL_MAX_CONTEXT_AFTER + ) + case_sensitive = bool(arguments.get("case_sensitive")) + + search_result = self.file_manager.search_text( + file_path, + query=query, + max_matches=max_matches, + context_before=context_before, + context_after=context_after, + case_sensitive=case_sensitive, + size_limit=READ_TOOL_MAX_FILE_SIZE + ) + if not search_result.get("success"): + return search_result + + matches = search_result["matches"] + limited_matches, truncated, char_count = self._limit_text_chunks(matches, "snippet", max_chars) + + base_result.update({ + "path": search_result["path"], + "file_size": search_result["size"], + "query": query, + "max_matches": max_matches, + "actual_matches": len(matches), + "returned_matches": len(limited_matches), + "context_before": context_before, + "context_after": context_after, + "case_sensitive": case_sensitive, + "matches": limited_matches, + "char_count": char_count, + "message": f"在 {search_result['path']} 中搜索 \"{query}\",返回 {len(limited_matches)} 条结果" + }) + base_result["truncated"] = truncated + return base_result + + # extract + segments = arguments.get("segments") + if not isinstance(segments, list) or not segments: + return {"success": False, "error": "extract 模式需要提供 segments 数组"} + + extract_result = self.file_manager.extract_segments( + file_path, + segments=segments, + size_limit=READ_TOOL_MAX_FILE_SIZE + ) + if not extract_result.get("success"): + return extract_result + + limited_segments, truncated, char_count = self._limit_text_chunks( + extract_result["segments"], + "content", + max_chars + ) + + base_result.update({ + "path": extract_result["path"], + "segments": limited_segments, + "file_size": extract_result["size"], + "total_lines": extract_result["total_lines"], + "segment_count": len(limited_segments), + "char_count": char_count, + "message": f"已从 {extract_result['path']} 抽取 {len(limited_segments)} 个片段" + }) + base_result["truncated"] = truncated + self.context_manager.load_file(extract_result["path"]) + return base_result + def set_tool_category_enabled(self, category: str, enabled: bool) -> None: """设置工具类别的启用状态 / Toggle tool category enablement.""" if category not in TOOL_CATEGORIES: @@ -370,8 +600,6 @@ class MainTerminal: print(f"🔍 聚焦文件") elif tool_name == "unfocus_file": print(f"❌ 取消聚焦") - elif tool_name == "confirm_read_or_focus": - print(f"📋 确认读取方式") elif tool_name == "sleep": print(f"{OUTPUT_FORMATS['info']} 等待操作") else: @@ -452,8 +680,6 @@ class MainTerminal: if self.thinking_mode: self.api_client.start_new_task() - # 重置读取工具跟踪 - self.read_file_usage_tracker.clear() self.current_session_id += 1 else: @@ -476,7 +702,6 @@ class MainTerminal: if self.thinking_mode: self.api_client.start_new_task() - self.read_file_usage_tracker.clear() self.current_session_id += 1 except Exception as e: @@ -533,9 +758,6 @@ class MainTerminal: if self.thinking_mode: thinking_status += f" ({'等待新任务' if self.api_client.current_task_first_call else '任务进行中'})" - # 新增:阅读工具使用统计 - read_files_count = len(self.read_file_usage_tracker) - # 新增:对话统计 conversation_stats = self.context_manager.get_conversation_statistics() @@ -549,7 +771,7 @@ class MainTerminal: 当前消息: {len(self.context_manager.conversation_history)} 条 聚焦文件: {len(self.focused_files)}/3 个 ({focused_size/1024:.1f}KB) 终端会话: {terminal_status['total']}/{terminal_status['max_allowed']} 个 - 已读文件: {read_files_count} 个 (本次会话ID: {self.current_session_id}) + 当前会话ID: {self.current_session_id} 项目文件: {structure['total_files']} 个 项目大小: {structure['total_size'] / 1024 / 1024:.2f} MB @@ -668,33 +890,73 @@ class MainTerminal: "type": "function", "function": { "name": "read_file", - "description": "触发读取流程的首步,适合短小的 UTF-8 文本。首次调用会提示使用 confirm_read_or_focus 再确认;非 UTF-8 或超过字符限制会直接失败,可改用 run_python 获取内容。", + "description": "读取/搜索/抽取 UTF-8 文本文件内容。通过 type 参数选择 read(阅读)、search(搜索)、extract(具体行段),支持限制返回字符数。若文件非 UTF-8 或过大,请改用 run_python。", "parameters": { "type": "object", "properties": { - "path": {"type": "string", "description": "文件路径"} - }, - "required": ["path"] - } - } - }, - { - "type": "function", - "function": { - "name": "confirm_read_or_focus", - "description": "在 read_file 提示后调用,用于确认继续一次性读取(read)或改为聚焦(focus)。read 仅适用于短小 UTF-8 文本;focus 会将文件内容持续注入上下文。", - "parameters": { - "type": "object", - "properties": { - "file_path": {"type": "string", "description": "要操作的文件路径"}, - "choice": { - "type": "string", - "enum": ["read", "focus"], - "description": "选择操作类型:read-一次性读取,focus-持续聚焦" + "path": {"type": "string", "description": "文件路径"}, + "type": { + "type": "string", + "enum": ["read", "search", "extract"], + "description": "读取模式:read=阅读、search=搜索、extract=按行抽取" }, - "reason": {"type": "string", "description": "选择原因(可选)"} + "max_chars": { + "type": "integer", + "description": "返回内容的最大字符数,默认与 config 一致" + }, + "start_line": { + "type": "integer", + "description": "[read] 可选的起始行号(1开始)" + }, + "end_line": { + "type": "integer", + "description": "[read] 可选的结束行号(>=start_line)" + }, + "query": { + "type": "string", + "description": "[search] 搜索关键词" + }, + "max_matches": { + "type": "integer", + "description": "[search] 最多返回多少条命中(默认5,最大50)" + }, + "context_before": { + "type": "integer", + "description": "[search] 命中行向上追加的行数(默认1,最大3)" + }, + "context_after": { + "type": "integer", + "description": "[search] 命中行向下追加的行数(默认1,最大5)" + }, + "case_sensitive": { + "type": "boolean", + "description": "[search] 是否区分大小写,默认 false" + }, + "segments": { + "type": "array", + "description": "[extract] 需要抽取的行区间", + "items": { + "type": "object", + "properties": { + "label": { + "type": "string", + "description": "该片段的标签(可选)" + }, + "start_line": { + "type": "integer", + "description": "起始行号(>=1)" + }, + "end_line": { + "type": "integer", + "description": "结束行号(>=start_line)" + } + }, + "required": ["start_line", "end_line"] + }, + "minItems": 1 + } }, - "required": ["file_path", "choice"] + "required": ["path", "type"] } } }, @@ -1133,181 +1395,8 @@ class MainTerminal: }, ensure_ascii=False) try: - # ===== 新增:阅读工具拦截逻辑 ===== if tool_name == "read_file": - file_path = arguments.get("path", "") - - # 检查是否是本次会话首次读取此文件 - if file_path not in self.read_file_usage_tracker: - # 记录首次读取 - self.read_file_usage_tracker[file_path] = self.current_session_id - - # 返回选择提示,要求AI使用confirm_read_or_focus工具 - return json.dumps({ - "success": False, - "requires_confirmation": True, - "message": "阅读工具只能用于阅读小文件、临时文件、不重要的文件。如果要查看核心文件、需要多次修改的文件、重要的文件,请使用聚焦功能。请确认使用阅读还是聚焦?", - "instruction": f"请使用 confirm_read_or_focus 工具来选择操作方式,文件路径: {file_path}", - "file_path": file_path - }) - - # 如果不是首次读取,检查是否是同一会话 - elif self.read_file_usage_tracker[file_path] != self.current_session_id: - # 新会话首次读取已读过的文件,也需要确认 - self.read_file_usage_tracker[file_path] = self.current_session_id - - return json.dumps({ - "success": False, - "requires_confirmation": True, - "message": f"检测到要重复读取文件 {file_path}。建议使用聚焦功能以避免频繁读取。请确认使用阅读还是聚焦?", - "instruction": f"请使用 confirm_read_or_focus 工具来选择操作方式,文件路径: {file_path}", - "file_path": file_path - }) - - # 同一会话内再次读取,直接执行读取逻辑 - result = self.file_manager.read_file(file_path) - if not result["success"]: - return json.dumps({ - "success": False, - "error": f"读取文件失败: {result.get('error', '未知错误')}" - }) - - file_content = result["content"] - char_count = len(file_content) - - if char_count > MAX_READ_FILE_CHARS: - return json.dumps({ - "success": False, - "error": f"文件过大,有{char_count}字符,请使用run_command限制字符数返回", - "char_count": char_count, - "limit": MAX_READ_FILE_CHARS - }) - - self.context_manager.load_file(result["path"]) - print(f"{OUTPUT_FORMATS['info']} 文件已加载到上下文: {result['path']}") - - return json.dumps({ - "success": True, - "action": "read", - "message": f"已使用读取方式查看文件: {file_path}", - "content": file_content, - "file_size": len(file_content), - "char_count": char_count - }) - - # ===== 新增:处理确认选择工具 ===== - elif tool_name == "confirm_read_or_focus": - file_path = arguments.get("file_path", "") - choice = arguments.get("choice", "") - reason = arguments.get("reason", "") - - if not file_path or not choice: - return json.dumps({ - "success": False, - "error": "缺少必要参数:file_path 或 choice" - }) - - if choice == "read": - # 执行读取操作 - print(f"{OUTPUT_FORMATS['info']} 用户选择:一次性读取文件 {file_path}") - if reason: - print(f"{OUTPUT_FORMATS['info']} 选择原因: {reason}") - - # 直接调用读取文件 - result = self.file_manager.read_file(file_path) - - # ✅ 先检查是否读取成功 - if not result["success"]: - return json.dumps({ - "success": False, - "error": f"读取文件失败: {result.get('error', '未知错误')}" - }) - - # 读取成功,继续处理 - file_content = result["content"] - char_count = len(file_content) - - # 字符数检查 - if char_count > MAX_READ_FILE_CHARS: - return json.dumps({ - "success": False, - "error": f"文件过大,有{char_count}字符,请使用run_command限制字符数返回", - "char_count": char_count, - "limit": MAX_READ_FILE_CHARS - }) - - # 加载到上下文管理器 - self.context_manager.load_file(result["path"]) - print(f"{OUTPUT_FORMATS['info']} 文件已加载到上下文: {result['path']}") - - # ✅ 返回完整内容 - return json.dumps({ - "success": True, - "action": "read", - "message": f"已使用读取方式查看文件: {file_path}", - "content": file_content, # ← 关键:包含完整内容 - "file_size": len(file_content), - "char_count": char_count - }) - elif choice == "focus": - # 执行聚焦操作 - print(f"{OUTPUT_FORMATS['info']} 用户选择:聚焦文件 {file_path}") - if reason: - print(f"{OUTPUT_FORMATS['info']} 选择原因: {reason}") - - # 检查是否已经聚焦 - if file_path in self.focused_files: - return json.dumps({ - "success": False, - "error": f"文件已经处于聚焦状态: {file_path}" - }) - - # 检查聚焦文件数量限制 - if len(self.focused_files) >= 3: - return json.dumps({ - "success": False, - "error": f"已达到最大聚焦文件数量(3个),当前聚焦: {list(self.focused_files.keys())}", - "suggestion": "请先使用 unfocus_file 取消部分文件的聚焦" - }) - - # 读取文件内容并聚焦 - read_result = self.file_manager.read_file(file_path) - if read_result["success"]: - # 字符数检查 - char_count = len(read_result["content"]) - if char_count > MAX_FOCUS_FILE_CHARS: - return json.dumps({ - "success": False, - "error": f"文件过大,有{char_count}字符,请使用run_command限制字符数返回", - "char_count": char_count, - "limit": MAX_FOCUS_FILE_CHARS - }) - - self.focused_files[file_path] = read_result["content"] - result = { - "success": True, - "action": "focus", - "message": f"文件已聚焦: {file_path}", - "focused_files": list(self.focused_files.keys()), - "file_size": len(read_result["content"]) - } - print(f"🔍 文件已聚焦: {file_path} ({len(read_result['content'])} 字节)") - else: - result = { - "success": False, - "action": "focus", - "error": f"读取文件失败: {read_result.get('error', '未知错误')}" - } - - return json.dumps(result) - - else: - return json.dumps({ - "success": False, - "error": f"无效的选择: {choice},只能选择 'read' 或 'focus'" - }) - - # ===== 以下是原有的工具处理逻辑 ===== + result = self._handle_read_tool(arguments) # 终端会话管理工具 elif tool_name == "terminal_session": @@ -1420,30 +1509,6 @@ class MainTerminal: "追加正文内容,或使用 modify_file 进行小范围替换。" ) - # 注意:原始的read_file处理已经移到上面的拦截逻辑中 - elif tool_name == "read_file": - result = self.file_manager.read_file(arguments["path"]) - if result["success"]: - # 字符数检查 - char_count = len(result["content"]) - if char_count > MAX_READ_FILE_CHARS: - return json.dumps({...}) - - # ✅ 先保存文件内容 - file_content = result["content"] - - # 加载到上下文管理器 - self.context_manager.load_file(result["path"]) - print(f"{OUTPUT_FORMATS['info']} 文件已加载到上下文: {result['path']}") - - # ✅ 关键:返回时必须包含content字段 - result = { - "success": True, - "message": f"已读取文件: {arguments['path']}", - "content": file_content, # ← 必须加这个! - "file_size": len(file_content), - "char_count": char_count - } elif tool_name == "delete_file": result = self.file_manager.delete_file(arguments["path"]) # 如果删除成功,同时删除备注和聚焦 diff --git a/core/tool_config.py b/core/tool_config.py index 9f0328b..af4c746 100644 --- a/core/tool_config.py +++ b/core/tool_config.py @@ -32,7 +32,7 @@ TOOL_CATEGORIES: Dict[str, ToolCategory] = { ), "read_focus": ToolCategory( label="阅读聚焦", - tools=["read_file", "focus_file", "unfocus_file", "confirm_read_or_focus"], + tools=["read_file", "focus_file", "unfocus_file"], ), "terminal_realtime": ToolCategory( label="实时终端", diff --git a/core/web_terminal.py b/core/web_terminal.py index c11dd6a..e17d6b3 100644 --- a/core/web_terminal.py +++ b/core/web_terminal.py @@ -102,7 +102,6 @@ class WebTerminal(MainTerminal): if self.thinking_mode: self.api_client.start_new_task() - self.read_file_usage_tracker.clear() self.current_session_id += 1 return { @@ -134,7 +133,6 @@ class WebTerminal(MainTerminal): if self.thinking_mode: self.api_client.start_new_task() - self.read_file_usage_tracker.clear() self.current_session_id += 1 # 获取对话信息 @@ -333,19 +331,11 @@ class WebTerminal(MainTerminal): 'detail': f'创建文件: {arguments.get("path", "未知路径")}' }) elif tool_name == "read_file": + read_type = arguments.get("type", "read") self.broadcast('tool_status', { 'tool': tool_name, 'status': 'reading', - 'detail': f'读取文件: {arguments.get("path", "未知路径")}' - }) - elif tool_name == "confirm_read_or_focus": - # 新增:确认读取或聚焦工具的广播 - choice = arguments.get("choice", "未知") - file_path = arguments.get("file_path", "未知路径") - self.broadcast('tool_status', { - 'tool': tool_name, - 'status': 'confirming', - 'detail': f'确认操作: {choice} - {file_path}' + 'detail': f'读取文件({read_type}): {arguments.get("path", "未知路径")}' }) elif tool_name == "modify_file": path = arguments.get("path", "未知路径") @@ -475,16 +465,6 @@ class WebTerminal(MainTerminal): 'error_type': 'parameter_format_error', 'suggestion': result_data.get('suggestion', '请检查参数格式') }) - elif 'requires_confirmation' in result_data: - # 特殊处理需要确认的情况(read_file拦截) - self.broadcast('tool_execution_end', { - 'tool': tool_name, - 'success': False, - 'result': result_data, - 'message': f'{tool_name}: 需要用户确认操作方式', - 'error_type': 'requires_confirmation', - 'instruction': result_data.get('instruction', '') - }) else: # 一般错误 self.broadcast('tool_execution_end', { @@ -528,7 +508,7 @@ class WebTerminal(MainTerminal): logger.error(f"广播终端更新失败: {e}") # 如果是文件操作,广播文件树更新 - if tool_name in ['create_file', 'delete_file', 'rename_file', 'create_folder', 'confirm_read_or_focus', 'save_webpage']: + if tool_name in ['create_file', 'delete_file', 'rename_file', 'create_folder', 'save_webpage']: try: structure = self.context_manager.get_project_structure() self.broadcast('file_tree_update', structure) @@ -537,7 +517,7 @@ class WebTerminal(MainTerminal): # 如果是聚焦操作,广播聚焦文件更新 - if tool_name in ['focus_file', 'unfocus_file', 'modify_file', 'confirm_read_or_focus']: + if tool_name in ['focus_file', 'unfocus_file', 'modify_file']: try: focused_files_dict = self.get_focused_files_info() self.broadcast('focused_files_update', focused_files_dict) diff --git a/modules/file_manager.py b/modules/file_manager.py index c75c506..35f6895 100644 --- a/modules/file_manager.py +++ b/modules/file_manager.py @@ -6,14 +6,26 @@ from pathlib import Path from typing import Optional, Dict, List, Tuple from datetime import datetime try: - from config import MAX_FILE_SIZE, FORBIDDEN_PATHS, FORBIDDEN_ROOT_PATHS, OUTPUT_FORMATS + from config import ( + MAX_FILE_SIZE, + FORBIDDEN_PATHS, + FORBIDDEN_ROOT_PATHS, + OUTPUT_FORMATS, + READ_TOOL_MAX_FILE_SIZE, + ) except ImportError: # 兼容全局环境中存在同名包的情况 import sys from pathlib import Path project_root = Path(__file__).resolve().parents[1] if str(project_root) not in sys.path: sys.path.insert(0, str(project_root)) - from config import MAX_FILE_SIZE, FORBIDDEN_PATHS, FORBIDDEN_ROOT_PATHS, OUTPUT_FORMATS + from config import ( + MAX_FILE_SIZE, + FORBIDDEN_PATHS, + FORBIDDEN_ROOT_PATHS, + OUTPUT_FORMATS, + READ_TOOL_MAX_FILE_SIZE, + ) # 临时禁用长度检查 DISABLE_LENGTH_CHECK = True class FileManager: @@ -203,8 +215,46 @@ class FileManager: except Exception as e: return {"success": False, "error": str(e)} + def _read_text_lines( + self, + full_path: Path, + *, + size_limit: Optional[int] = None, + encoding: str = "utf-8", + ) -> Dict: + """读取UTF-8文本并返回行列表。""" + try: + file_size = full_path.stat().st_size + except FileNotFoundError: + return {"success": False, "error": "文件不存在"} + + if size_limit and file_size > size_limit: + return { + "success": False, + "error": f"文件太大 ({file_size / 1024 / 1024:.2f}MB > {size_limit / 1024 / 1024}MB)" + } + + try: + with open(full_path, 'r', encoding=encoding) as f: + lines = f.readlines() + except UnicodeDecodeError: + return { + "success": False, + "error": "文件不是 UTF-8 文本,无法直接读取,请改用 run_python 解析。" + } + except Exception as e: + return {"success": False, "error": f"读取文件失败: {e}"} + + content = "".join(lines) + return { + "success": True, + "content": content, + "lines": lines, + "size": file_size + } + def read_file(self, path: str) -> Dict: - """读取文件内容""" + """读取文件内容(兼容旧逻辑,限制为 MAX_FILE_SIZE)。""" valid, error, full_path = self._validate_path(path) if not valid: return {"success": False, "error": error} @@ -215,27 +265,198 @@ class FileManager: if not full_path.is_file(): return {"success": False, "error": "不是文件"} - # 检查文件大小 - file_size = full_path.stat().st_size - if file_size > MAX_FILE_SIZE: - return { - "success": False, - "error": f"文件太大 ({file_size / 1024 / 1024:.2f}MB > {MAX_FILE_SIZE / 1024 / 1024}MB)" - } + result = self._read_text_lines(full_path, size_limit=MAX_FILE_SIZE) + if not result["success"]: + return result - try: - with open(full_path, 'r', encoding='utf-8') as f: - content = f.read() - - relative_path = str(full_path.relative_to(self.project_path)) - return { - "success": True, - "path": relative_path, - "content": content, - "size": file_size - } - except Exception as e: - return {"success": False, "error": str(e)} + relative_path = str(full_path.relative_to(self.project_path)) + return { + "success": True, + "path": relative_path, + "content": result["content"], + "size": result["size"] + } + + def read_text_segment( + self, + path: str, + *, + start_line: Optional[int] = None, + end_line: Optional[int] = None, + size_limit: Optional[int] = None + ) -> Dict: + """按行范围读取文本片段。""" + valid, error, full_path = self._validate_path(path) + if not valid: + return {"success": False, "error": error} + + if not full_path.exists(): + return {"success": False, "error": "文件不存在"} + + if not full_path.is_file(): + return {"success": False, "error": "不是文件"} + + result = self._read_text_lines( + full_path, + size_limit=size_limit or READ_TOOL_MAX_FILE_SIZE + ) + if not result["success"]: + return result + + lines = result["lines"] + total_lines = len(lines) + start = start_line if start_line and start_line > 0 else 1 + end = end_line if end_line and end_line >= start else total_lines + if start > total_lines: + return {"success": False, "error": "起始行超出文件长度"} + end = min(end, total_lines) + + selected_lines = lines[start - 1 : end] + content = "".join(selected_lines) + + relative_path = str(full_path.relative_to(self.project_path)) + return { + "success": True, + "path": relative_path, + "content": content, + "size": result["size"], + "line_start": start, + "line_end": end, + "total_lines": total_lines + } + + def search_text( + self, + path: str, + *, + query: str, + max_matches: int, + context_before: int, + context_after: int, + case_sensitive: bool = False, + size_limit: Optional[int] = None + ) -> Dict: + """在文件中搜索关键词,返回合并后的窗口。""" + if not query: + return {"success": False, "error": "缺少搜索关键词"} + + valid, error, full_path = self._validate_path(path) + if not valid: + return {"success": False, "error": error} + + if not full_path.exists(): + return {"success": False, "error": "文件不存在"} + + if not full_path.is_file(): + return {"success": False, "error": "不是文件"} + + result = self._read_text_lines( + full_path, + size_limit=size_limit or READ_TOOL_MAX_FILE_SIZE + ) + if not result["success"]: + return result + + lines = result["lines"] + total_lines = len(lines) + matches = [] + query_text = query if case_sensitive else query.lower() + + def contains(haystack: str) -> bool: + target = haystack if case_sensitive else haystack.lower() + return query_text in target + + for idx, line in enumerate(lines, start=1): + if contains(line): + window_start = max(1, idx - context_before) + window_end = min(total_lines, idx + context_after) + + if matches and window_start <= matches[-1]["line_end"]: + matches[-1]["line_end"] = max(matches[-1]["line_end"], window_end) + matches[-1]["hits"].append(idx) + else: + if len(matches) >= max_matches: + break + matches.append({ + "line_start": window_start, + "line_end": window_end, + "hits": [idx] + }) + + relative_path = str(full_path.relative_to(self.project_path)) + for window in matches: + snippet_lines = lines[window["line_start"] - 1 : window["line_end"]] + window["snippet"] = "".join(snippet_lines) + + return { + "success": True, + "path": relative_path, + "size": result["size"], + "total_lines": total_lines, + "matches": matches + } + + def extract_segments( + self, + path: str, + segments: List[Dict], + *, + size_limit: Optional[int] = None + ) -> Dict: + """根据多个行区间提取内容。""" + if not segments: + return {"success": False, "error": "缺少要提取的行区间"} + + valid, error, full_path = self._validate_path(path) + if not valid: + return {"success": False, "error": error} + + if not full_path.exists(): + return {"success": False, "error": "文件不存在"} + + if not full_path.is_file(): + return {"success": False, "error": "不是文件"} + + result = self._read_text_lines( + full_path, + size_limit=size_limit or READ_TOOL_MAX_FILE_SIZE + ) + if not result["success"]: + return result + + lines = result["lines"] + total_lines = len(lines) + extracted = [] + + for item in segments: + if not isinstance(item, dict): + return {"success": False, "error": "segments 数组中的每一项都必须是对象"} + start_line = item.get("start_line") + end_line = item.get("end_line") + label = item.get("label") + if start_line is None or end_line is None: + return {"success": False, "error": "所有区间都必须包含 start_line 和 end_line"} + if start_line <= 0 or end_line < start_line: + return {"success": False, "error": "行区间不合法"} + if start_line > total_lines: + return {"success": False, "error": f"区间起点 {start_line} 超出文件行数"} + end_line = min(end_line, total_lines) + snippet = "".join(lines[start_line - 1 : end_line]) + extracted.append({ + "label": label, + "line_start": start_line, + "line_end": end_line, + "content": snippet + }) + + relative_path = str(full_path.relative_to(self.project_path)) + return { + "success": True, + "path": relative_path, + "size": result["size"], + "total_lines": total_lines, + "segments": extracted + } def write_file(self, path: str, content: str, mode: str = "w") -> Dict: """ diff --git a/prompts/main_system_prev.txt b/prompts/main_system_prev.txt index b5132e1..a028cf6 100644 --- a/prompts/main_system_prev.txt +++ b/prompts/main_system_prev.txt @@ -35,25 +35,27 @@ ## 文件查看策略(重要更新) -### 智能选择:读取 vs 聚焦 -当你需要查看文件时,系统会提供选择提示。请根据以下原则做出明智选择: +### 智能选择:多模式读取 vs 聚焦 +当你需要查看文件时,优先考虑 read_file 三种模式与聚焦功能的取舍: -#### 选择读取(read_file)的场景: -- **临时查看**:只需要快速了解文件内容,不会频繁操作 -- **小文件**:配置文件、说明文档、简短脚本等 -- **一次性检查**:验证文件格式、查看示例内容等 -- **不重要文件**:日志文件、临时文件、测试数据等 +#### 使用 read_file(type=read/search/extract)的场景: +- **临时查看**:一次性浏览、验证格式或比对差异 +- **小文件/片段**:配置、示例、测试数据 +- **定位信息**:通过 `type=search` + `query` 快速检索,`context_before/context_after` 控制窗口(0 表示只保留命中行) +- **精准摘取**:用 `type=extract` + `segments[{start_line,end_line}]` 抽取多个片段 +- **返回体量控制**:始终设置合理的 `max_chars`,默认会自动按配置裁剪;超过限制会在返回中标记 `truncated=true` +- **非 UTF-8 文件**:一律改用 `run_python`(可借助 python-docx/pandas 等库)解析,read_file 会直接拒绝 -#### 选择聚焦(focus_file)的场景: +#### 使用聚焦(focus_file)的场景: - **核心文件**:主要代码文件、关键配置文件 - **频繁修改**:需要多次查看和编辑的文件 - **重要文件**:架构核心、业务逻辑、重要接口等 - **长期工作**:将要花费较多时间开发的模块 -#### 决策流程: -1. 当你调用read_file时,系统会询问你的选择意图 -2. 使用confirm_read_or_focus工具明确你的选择和理由 -3. 系统会执行相应操作并告知结果 +#### 调用建议: +1. 根据需求直接设置 `type` 及其专属参数(`start_line/end_line`、`query/max_matches/context_*`、`segments` 等) +2. 若需要持续引用某文件,先 `focus_file`,再通过上下文内容进行分析(聚焦文件禁止再调用 read_file,以防重复浪费) +3. 如果 read_file 返回 `truncated=true`,说明被 `max_chars` 裁剪,可缩小范围或改用聚焦/终端命令继续查看 ### 聚焦文件管理 - **完全可见原则**:聚焦的文件内容会完整显示在上下文中,你可以直接看到每一行内容 @@ -64,7 +66,7 @@ 用户本次的输入 - **实时更新**:文件被修改后内容自动更新,无需重新聚焦 - **合理管理**:任务开始时聚焦核心文件,完成后及时取消,为下个任务腾出空间 -- **禁止重复读取**:已聚焦的文件禁止再次使用read_file +- **禁止重复读取**:已聚焦的文件禁止再次使用 read_file,应直接使用聚焦内容或 run_command/modify_file 完成操作 ## 文件创建与追加策略(重要) - `create_file` 仅用于创建空文件(或极简骨架);所有正文必须通过 `append_to_file` 追加,禁止在创建时写入内容。 diff --git a/prompts/tool_prompts.txt b/prompts/tool_prompts.txt index b9144d2..a65c542 100644 --- a/prompts/tool_prompts.txt +++ b/prompts/tool_prompts.txt @@ -3,12 +3,12 @@ --- ### read_file -- **描述**:`用于触发读取流程的第一步,适合短小的 UTF-8 文本。首次调用不会直接返回文件内容,而是提示使用 confirm_read_or_focus 再次确认。若文件非 UTF-8 或体积超限,会直接失败;此时可改用 run_python 查看或解析。` -- **失败/限制提示**:当触发大小或编码限制时,建议:`文件可能非 UTF-8 或体量过大,请改用 run_python(可结合 pandas、python-docx 等库)或先与用户确认处理方式。` - -### confirm_read_or_focus -- **描述**:`在 read_file 触发确认后调用。choice=read 会尝试一次性返回 UTF-8 文本内容;choice=focus 将聚焦文件、持续注入上下文。若选择 read 时检测到非 UTF-8,将返回错误,可改用 run_python 查看。` -- **成功(focus)消息补充**:提示已聚焦并建议:`聚焦后再次访问请直接查看上下文,不要调用 read_file。` +- **描述**:`统一的阅读工具。通过 type 参数在 read(直接阅读)、search(全文搜索)、extract(按行抽取)之间切换,始终返回 UTF-8 文本。所有模式都会在响应前根据 max_chars 截断输出,保证不会超量。` +- **模式提示**: + - `read`:可选 `start_line`/`end_line`,适合一次性查看短片段。 + - `search`:需提供 `query`,并可设置 `max_matches`、`context_before`、`context_after`、`case_sensitive`,自动合并重复命中,以窗口形式返回。 + - `extract`:传入 `segments=[{start_line,end_line,label?},...]`,适合按多段行号提取关键信息。 +- **失败/限制提示**:若因编码或体积被拒绝,提醒:`文件不是 UTF-8 或体量过大,请改用 run_python(可结合 python-docx、pandas 等库)读取。` 若多次需要查看同一长文件,建议直接调用 `focus_file`。 ### focus_file - **描述**:`持续在上下文中展示 UTF-8 文本文件的完整内容,适合频繁查看/修改的核心文件。文件非 UTF-8 或体积超限将直接拒绝;如需了解二进制/Office 文件,请改用 run_python。` diff --git a/static/app.js b/static/app.js index 24102b0..48059d1 100644 --- a/static/app.js +++ b/static/app.js @@ -2482,7 +2482,6 @@ async function bootstrapApp() { } else if (tool.status === 'running') { const texts = { 'create_file': '正在创建文件...', - 'read_file': '正在读取文件...', 'sleep': '正在等待...', 'delete_file': '正在删除文件...', 'rename_file': '正在重命名文件...', @@ -2502,12 +2501,21 @@ async function bootstrapApp() { 'terminal_snapshot': '正在获取终端快照...', 'terminal_reset': '正在重置终端...' }; + if (tool.name === 'read_file') { + const readType = ((tool.argumentSnapshot && tool.argumentSnapshot.type) || + (tool.arguments && tool.arguments.type) || 'read').toLowerCase(); + const runningMap = { + 'read': '正在读取文件...', + 'search': '正在执行搜索...', + 'extract': '正在提取内容...' + }; + return runningMap[readType] || '正在读取文件...'; + } return texts[tool.name] || '正在执行...'; } else if (tool.status === 'completed') { // 修复:完成状态的文本 const texts = { 'create_file': '文件创建成功', - 'read_file': '文件读取完成', 'delete_file': '文件删除成功', 'sleep': '等待完成', 'rename_file': '文件重命名成功', @@ -2527,6 +2535,27 @@ async function bootstrapApp() { 'terminal_snapshot': '终端快照已返回', 'terminal_reset': '终端已重置' }; + if (tool.name === 'read_file' && tool.result && typeof tool.result === 'object') { + const readType = (tool.result.type || 'read').toLowerCase(); + if (readType === 'search') { + const query = tool.result.query ? `「${tool.result.query}」` : ''; + const count = typeof tool.result.returned_matches === 'number' + ? tool.result.returned_matches + : (tool.result.actual_matches || 0); + return `搜索${query},得到${count}个结果`; + } else if (readType === 'extract') { + const segments = Array.isArray(tool.result.segments) ? tool.result.segments : []; + const totalLines = segments.reduce((sum, seg) => { + const start = Number(seg.line_start) || 0; + const end = Number(seg.line_end) || 0; + if (!start || !end || end < start) return sum; + return sum + (end - start + 1); + }, 0); + const displayLines = totalLines || tool.result.char_count || 0; + return `提取了${displayLines}行`; + } + return '文件读取完成'; + } return texts[tool.name] || '执行完成'; } else { // 其他状态 diff --git a/utils/api_client.py b/utils/api_client.py index 3698807..4133d7e 100644 --- a/utils/api_client.py +++ b/utils/api_client.py @@ -34,6 +34,69 @@ class DeepSeekClient: """安全的打印函数,在Web模式下不输出""" if not self.web_mode: print(message, end=end, flush=flush) + + def _format_read_file_result(self, data: Dict) -> str: + """根据读取模式格式化 read_file 工具结果。""" + if not isinstance(data, dict): + return json.dumps(data, ensure_ascii=False) + if not data.get("success"): + return json.dumps(data, ensure_ascii=False) + + read_type = data.get("type", "read") + truncated_note = "(内容已截断)" if data.get("truncated") else "" + path = data.get("path", "未知路径") + max_chars = data.get("max_chars") + max_note = f"(max_chars={max_chars})" if max_chars else "" + + if read_type == "read": + line_start = data.get("line_start") + line_end = data.get("line_end") + char_count = data.get("char_count", len(data.get("content", "") or "")) + header = f"读取 {path} 行 {line_start}~{line_end},返回 {char_count} 字符 {max_note}{truncated_note}".strip() + content = data.get("content", "") + return f"{header}\n```\n{content}\n```" + + if read_type == "search": + query = data.get("query", "") + actual = data.get("actual_matches", 0) + returned = data.get("returned_matches", 0) + case_hint = "区分大小写" if data.get("case_sensitive") else "不区分大小写" + header = ( + f"在 {path} 中搜索 \"{query}\",返回 {returned}/{actual} 条结果({case_hint})" + f" {max_note}{truncated_note}" + ).strip() + match_texts = [] + for idx, match in enumerate(data.get("matches", []), 1): + match_note = "(片段截断)" if match.get("truncated") else "" + hits = match.get("hits") or [] + hit_text = ", ".join(str(h) for h in hits) if hits else "无" + label = match.get("id") or f"match_{idx}" + snippet = match.get("snippet", "") + match_texts.append( + f"[{label}] 行 {match.get('line_start')}~{match.get('line_end')} 命中行: {hit_text}{match_note}\n```\n{snippet}\n```" + ) + if not match_texts: + match_texts.append("未找到匹配内容。") + return "\n".join([header] + match_texts) + + if read_type == "extract": + segments = data.get("segments", []) + header = ( + f"从 {path} 抽取 {len(segments)} 个片段 {max_note}{truncated_note}" + ).strip() + seg_texts = [] + for idx, segment in enumerate(segments, 1): + seg_note = "(片段截断)" if segment.get("truncated") else "" + label = segment.get("label") or f"segment_{idx}" + snippet = segment.get("content", "") + seg_texts.append( + f"[{label}] 行 {segment.get('line_start')}~{segment.get('line_end')}{seg_note}\n```\n{snippet}\n```" + ) + if not seg_texts: + seg_texts.append("未提供可抽取的片段。") + return "\n".join([header] + seg_texts) + + return json.dumps(data, ensure_ascii=False) def start_new_task(self): """开始新任务(重置任务级别的状态)""" @@ -443,11 +506,8 @@ class DeepSeekClient: # 解析工具结果,提取关键信息 try: result_data = json.loads(tool_result) - # 特殊处理read_file的结果 - if function_name == "read_file" and result_data.get("success"): - file_content = result_data.get("content", "") - # 将文件内容作为明确的上下文信息 - tool_result_msg = f"文件 {result_data.get('path')} 的内容:\n```\n{file_content}\n```\n文件大小: {result_data.get('size')} 字节" + if function_name == "read_file": + tool_result_msg = self._format_read_file_result(result_data) else: tool_result_msg = tool_result except: diff --git a/utils/context_manager.py b/utils/context_manager.py index d1f2816..928ca2f 100644 --- a/utils/context_manager.py +++ b/utils/context_manager.py @@ -475,37 +475,33 @@ class ContextManager: elif role == "tool": tool_name = new_msg.get("name") raw_content = new_msg.get("content") - payload = None - - if isinstance(raw_content, dict): - payload = deepcopy(raw_content) - elif isinstance(raw_content, str): - try: - payload = json.loads(raw_content) - except Exception: - payload = None - - if isinstance(payload, dict): - updated = False - - if tool_name in {"read_file", "confirm_read_or_focus"} and payload.get("content"): - payload["content"] = append_placeholder - compressed_types.add("read_file") - updated = True - elif tool_name == "extract_webpage" and payload.get("content"): - payload["content"] = extract_placeholder - compressed_types.add("extract_webpage") - updated = True - - if updated: - new_msg["content"] = json.dumps(payload, ensure_ascii=False) + if tool_name == "read_file": + new_msg["content"] = append_placeholder + compressed_types.add("read_file") else: - if tool_name in {"read_file", "confirm_read_or_focus"}: - new_msg["content"] = append_placeholder - compressed_types.add("read_file") - elif tool_name == "extract_webpage": - new_msg["content"] = extract_placeholder - compressed_types.add("extract_webpage") + payload = None + + if isinstance(raw_content, dict): + payload = deepcopy(raw_content) + elif isinstance(raw_content, str): + try: + payload = json.loads(raw_content) + except Exception: + payload = None + + if isinstance(payload, dict): + updated = False + if tool_name == "extract_webpage" and payload.get("content"): + payload["content"] = extract_placeholder + compressed_types.add("extract_webpage") + updated = True + + if updated: + new_msg["content"] = json.dumps(payload, ensure_ascii=False) + else: + if tool_name == "extract_webpage": + new_msg["content"] = extract_placeholder + compressed_types.add("extract_webpage") compressed_messages.append(new_msg) diff --git a/web_server.py b/web_server.py index 1852d2f..0e07472 100644 --- a/web_server.py +++ b/web_server.py @@ -69,6 +69,64 @@ stop_flags: Dict[str, Dict[str, Any]] = {} DEFAULT_PORT = 8091 +def format_read_file_result(result_data: Dict) -> str: + """格式化 read_file 工具的输出,便于在Web端展示。""" + if not isinstance(result_data, dict): + return json.dumps(result_data, ensure_ascii=False) + if not result_data.get("success"): + return json.dumps(result_data, ensure_ascii=False) + + read_type = result_data.get("type", "read") + truncated_note = "(内容已截断)" if result_data.get("truncated") else "" + path = result_data.get("path", "未知路径") + max_chars = result_data.get("max_chars") + max_note = f"(max_chars={max_chars})" if max_chars else "" + + if read_type == "read": + header = f"读取 {path} 行 {result_data.get('line_start')}~{result_data.get('line_end')} {max_note}{truncated_note}".strip() + content = result_data.get("content", "") + return f"{header}\n```\n{content}\n```" + + if read_type == "search": + query = result_data.get("query", "") + actual = result_data.get("actual_matches", 0) + returned = result_data.get("returned_matches", 0) + case_hint = "区分大小写" if result_data.get("case_sensitive") else "不区分大小写" + header = ( + f"在 {path} 中搜索 \"{query}\",返回 {returned}/{actual} 条结果({case_hint}) " + f"{max_note}{truncated_note}" + ).strip() + match_texts = [] + for idx, match in enumerate(result_data.get("matches", []), 1): + match_note = "(片段截断)" if match.get("truncated") else "" + hits = match.get("hits") or [] + hit_text = ", ".join(str(h) for h in hits) if hits else "无" + label = match.get("id") or f"match_{idx}" + snippet = match.get("snippet", "") + match_texts.append( + f"[{label}] 行 {match.get('line_start')}~{match.get('line_end')} 命中行: {hit_text}{match_note}\n```\n{snippet}\n```" + ) + if not match_texts: + match_texts.append("未找到匹配内容。") + return "\n".join([header] + match_texts) + + if read_type == "extract": + segments = result_data.get("segments", []) + header = f"从 {path} 抽取 {len(segments)} 个片段 {max_note}{truncated_note}".strip() + seg_texts = [] + for idx, segment in enumerate(segments, 1): + seg_note = "(片段截断)" if segment.get("truncated") else "" + label = segment.get("label") or f"segment_{idx}" + snippet = segment.get("content", "") + seg_texts.append( + f"[{label}] 行 {segment.get('line_start')}~{segment.get('line_end')}{seg_note}\n```\n{snippet}\n```" + ) + if not seg_texts: + seg_texts.append("未提供可抽取的片段。") + return "\n".join([header] + seg_texts) + + return json.dumps(result_data, ensure_ascii=False) + def sanitize_filename_preserve_unicode(filename: str) -> str: """在保留中文等字符的同时,移除危险字符和路径成分""" @@ -230,8 +288,6 @@ def reset_system_state(terminal: Optional[WebTerminal]): debug_log(f"重置会话ID为: {terminal.current_session_id}") # 3. 清理读取文件跟踪器 - if hasattr(terminal, 'read_file_usage_tracker'): - terminal.read_file_usage_tracker.clear() debug_log("清理文件读取跟踪器") # 4. 重置Web特有的状态属性 @@ -3142,7 +3198,7 @@ async def handle_task_with_sender(terminal: WebTerminal, message, sender, client sender('update_action', update_payload) # 更新UI状态 - if function_name in ['focus_file', 'unfocus_file', 'modify_file', 'confirm_read_or_focus']: + if function_name in ['focus_file', 'unfocus_file', 'modify_file']: sender('focused_files_update', web_terminal.get_focused_files_info()) if function_name in ['create_file', 'delete_file', 'rename_file', 'create_folder']: @@ -3152,9 +3208,8 @@ async def handle_task_with_sender(terminal: WebTerminal, message, sender, client # ===== 增量保存:立即保存工具结果 ===== try: result_data = json.loads(tool_result) - if function_name == "read_file" and result_data.get("success"): - file_content = result_data.get("content", "") - tool_result_content = f"文件内容:\n```\n{file_content}\n```\n大小: {result_data.get('size')} 字节" + if function_name == "read_file": + tool_result_content = format_read_file_result(result_data) else: tool_result_content = tool_result except: