Fix multimodal token handling
This commit is contained in:
parent
934a916842
commit
aa4f323809
263
src/cli/index.js
263
src/cli/index.js
@ -18,9 +18,11 @@ const { renderBanner } = require('../ui/banner');
|
|||||||
const { buildStartLine, buildFinalLine, startToolDisplay, formatResultLines, printResultLines } = require('../ui/tool_display');
|
const { buildStartLine, buildFinalLine, startToolDisplay, formatResultLines, printResultLines } = require('../ui/tool_display');
|
||||||
const { createConversation, updateConversation } = require('../storage/conversation_store');
|
const { createConversation, updateConversation } = require('../storage/conversation_store');
|
||||||
const { applyUsage, normalizeTokenUsage } = require('../utils/token_usage');
|
const { applyUsage, normalizeTokenUsage } = require('../utils/token_usage');
|
||||||
const { gray, cyan, green, red } = require('../utils/colors');
|
const { gray, cyan, green, red, blue } = require('../utils/colors');
|
||||||
const { createIndentedWriter } = require('../ui/indented_writer');
|
const { createIndentedWriter } = require('../ui/indented_writer');
|
||||||
const { createStatusBar } = require('../ui/status_bar');
|
const { createStatusBar } = require('../ui/status_bar');
|
||||||
|
const { visibleWidth } = require('../utils/text_width');
|
||||||
|
const { readMediafileTool } = require('../tools/read_mediafile');
|
||||||
|
|
||||||
const WORKSPACE = process.cwd();
|
const WORKSPACE = process.cwd();
|
||||||
const WORKSPACE_NAME = path.basename(WORKSPACE);
|
const WORKSPACE_NAME = path.basename(WORKSPACE);
|
||||||
@ -28,6 +30,8 @@ const USERNAME = os.userInfo().username || 'user';
|
|||||||
const PROMPT = `${USERNAME}@${WORKSPACE_NAME} % `;
|
const PROMPT = `${USERNAME}@${WORKSPACE_NAME} % `;
|
||||||
const MENU_PAGE_SIZE = 6;
|
const MENU_PAGE_SIZE = 6;
|
||||||
const SLASH_COMMANDS = new Set(['/new', '/resume', '/allow', '/model', '/status', '/compact', '/config', '/help', '/exit']);
|
const SLASH_COMMANDS = new Set(['/new', '/resume', '/allow', '/model', '/status', '/compact', '/config', '/help', '/exit']);
|
||||||
|
const IMAGE_EXTS = new Set(['.png', '.jpg', '.jpeg', '.gif', '.webp', '.bmp', '.tiff', '.heic']);
|
||||||
|
const VIDEO_EXTS = new Set(['.mp4', '.mov', '.avi', '.mkv', '.webm', '.m4v']);
|
||||||
|
|
||||||
const config = ensureConfig();
|
const config = ensureConfig();
|
||||||
const state = createState(config, WORKSPACE);
|
const state = createState(config, WORKSPACE);
|
||||||
@ -57,6 +61,7 @@ let isRunning = false;
|
|||||||
let escPendingCancel = false;
|
let escPendingCancel = false;
|
||||||
let activeStreamController = null;
|
let activeStreamController = null;
|
||||||
let activeToolController = null;
|
let activeToolController = null;
|
||||||
|
let currentMedia = { tokens: [], text: '' };
|
||||||
let commandMenuActive = false;
|
let commandMenuActive = false;
|
||||||
let menuSearchTerm = '';
|
let menuSearchTerm = '';
|
||||||
let menuLastSearchTerm = '';
|
let menuLastSearchTerm = '';
|
||||||
@ -64,6 +69,8 @@ let menuJustClosedAt = 0;
|
|||||||
let menuInjectedCommand = null;
|
let menuInjectedCommand = null;
|
||||||
let menuAbortController = null;
|
let menuAbortController = null;
|
||||||
let menuJustClosedInjected = false;
|
let menuJustClosedInjected = false;
|
||||||
|
let suppressSlashMenuUntil = 0;
|
||||||
|
let pendingSlashTimer = null;
|
||||||
|
|
||||||
function printNotice(message) {
|
function printNotice(message) {
|
||||||
console.log('');
|
console.log('');
|
||||||
@ -75,6 +82,141 @@ function printNoticeInline(message) {
|
|||||||
console.log(message);
|
console.log(message);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function getPathExt(p) {
|
||||||
|
const idx = p.lastIndexOf('.');
|
||||||
|
if (idx === -1) return '';
|
||||||
|
return p.slice(idx).toLowerCase();
|
||||||
|
}
|
||||||
|
|
||||||
|
function decodeEscapedPath(p) {
|
||||||
|
let text = String(p || '');
|
||||||
|
if (text.startsWith('file://')) {
|
||||||
|
text = text.replace(/^file:\/\//, '');
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
text = decodeURIComponent(text);
|
||||||
|
} catch (_) {}
|
||||||
|
return text.replace(/\\ /g, ' ').replace(/\\\\/g, '\\');
|
||||||
|
}
|
||||||
|
|
||||||
|
function findMediaMatches(line) {
|
||||||
|
const matches = [];
|
||||||
|
const quoted = /'([^']+)'/g;
|
||||||
|
let m;
|
||||||
|
while ((m = quoted.exec(line)) !== null) {
|
||||||
|
matches.push({ raw: m[0], path: m[1], index: m.index });
|
||||||
|
}
|
||||||
|
const extGroup = Array.from(new Set([...IMAGE_EXTS, ...VIDEO_EXTS]))
|
||||||
|
.map((e) => e.replace('.', '\\.'))
|
||||||
|
.join('|');
|
||||||
|
const unquoted = new RegExp(`/((?:\\\\ |[^\\s])+?)\\.(${extGroup})`, 'g');
|
||||||
|
while ((m = unquoted.exec(line)) !== null) {
|
||||||
|
const raw = `/${m[1]}.${m[2]}`;
|
||||||
|
matches.push({ raw, path: raw, index: m.index });
|
||||||
|
}
|
||||||
|
matches.sort((a, b) => a.index - b.index);
|
||||||
|
return matches;
|
||||||
|
}
|
||||||
|
|
||||||
|
function applyMediaTokens(line) {
|
||||||
|
if (!line) return { line: '', tokens: [] };
|
||||||
|
const matches = findMediaMatches(line);
|
||||||
|
if (!matches.length) return { line, tokens: [] };
|
||||||
|
|
||||||
|
let images = 0;
|
||||||
|
let videos = 0;
|
||||||
|
let cursor = 0;
|
||||||
|
let out = '';
|
||||||
|
const tokens = [];
|
||||||
|
|
||||||
|
for (const match of matches) {
|
||||||
|
if (match.index < cursor) continue;
|
||||||
|
const before = line.slice(cursor, match.index);
|
||||||
|
out += before;
|
||||||
|
const decoded = decodeEscapedPath(match.path);
|
||||||
|
if (!fs.existsSync(decoded)) {
|
||||||
|
out += match.raw;
|
||||||
|
cursor = match.index + match.raw.length;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
const ext = getPathExt(decoded);
|
||||||
|
const isImage = IMAGE_EXTS.has(ext);
|
||||||
|
const isVideo = VIDEO_EXTS.has(ext);
|
||||||
|
if (!isImage && !isVideo) {
|
||||||
|
out += match.raw;
|
||||||
|
cursor = match.index + match.raw.length;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (isImage && images >= 9) {
|
||||||
|
out += match.raw;
|
||||||
|
cursor = match.index + match.raw.length;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (isVideo && videos >= 1) {
|
||||||
|
out += match.raw;
|
||||||
|
cursor = match.index + match.raw.length;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
const token = isImage ? `[图片 #${images + 1}]` : `[视频 #${videos + 1}]`;
|
||||||
|
tokens.push({ token, path: decoded, type: isImage ? 'image' : 'video' });
|
||||||
|
out += token;
|
||||||
|
if (isImage) images += 1;
|
||||||
|
if (isVideo) videos += 1;
|
||||||
|
cursor = match.index + match.raw.length;
|
||||||
|
}
|
||||||
|
out += line.slice(cursor);
|
||||||
|
return { line: out, tokens };
|
||||||
|
}
|
||||||
|
|
||||||
|
function applySingleMediaPath(line, rawPath, tokens) {
|
||||||
|
if (!line || !rawPath) return { line, tokens };
|
||||||
|
const idx = line.indexOf(rawPath);
|
||||||
|
if (idx === -1) return { line, tokens };
|
||||||
|
const decoded = decodeEscapedPath(rawPath);
|
||||||
|
if (!fs.existsSync(decoded)) return { line, tokens };
|
||||||
|
const ext = getPathExt(decoded);
|
||||||
|
const isImage = IMAGE_EXTS.has(ext);
|
||||||
|
const isVideo = VIDEO_EXTS.has(ext);
|
||||||
|
if (!isImage && !isVideo) return { line, tokens };
|
||||||
|
const images = tokens.filter((t) => t.type === 'image').length;
|
||||||
|
const videos = tokens.filter((t) => t.type === 'video').length;
|
||||||
|
if (isImage && images >= 9) return { line, tokens };
|
||||||
|
if (isVideo && videos >= 1) return { line, tokens };
|
||||||
|
const token = isImage ? `[图片 #${images + 1}]` : `[视频 #${videos + 1}]`;
|
||||||
|
const nextLine = line.slice(0, idx) + token + line.slice(idx + rawPath.length);
|
||||||
|
const nextTokens = tokens.concat([{ token, path: decoded, type: isImage ? 'image' : 'video' }]);
|
||||||
|
return { line: nextLine, tokens: nextTokens };
|
||||||
|
}
|
||||||
|
|
||||||
|
function colorizeTokens(line) {
|
||||||
|
return line.replace(/\[(图片|视频) #\d+\]/g, (t) => blue(t));
|
||||||
|
}
|
||||||
|
|
||||||
|
function refreshInputLine() {
|
||||||
|
if (!rl || !process.stdout.isTTY || commandMenuActive) return;
|
||||||
|
const line = rl.line || '';
|
||||||
|
const colorLine = colorizeTokens(line);
|
||||||
|
readline.clearLine(process.stdout, 0);
|
||||||
|
readline.cursorTo(process.stdout, 0);
|
||||||
|
process.stdout.write(PROMPT + colorLine);
|
||||||
|
const cursorCol = visibleWidth(PROMPT) + visibleWidth(line.slice(0, rl.cursor || 0));
|
||||||
|
readline.cursorTo(process.stdout, cursorCol);
|
||||||
|
}
|
||||||
|
|
||||||
|
function removeTokenAtCursor(line, cursor, allowStart = false) {
|
||||||
|
const tokenRe = /\[(图片|视频) #\d+\]/g;
|
||||||
|
let m;
|
||||||
|
while ((m = tokenRe.exec(line)) !== null) {
|
||||||
|
const start = m.index;
|
||||||
|
const end = m.index + m[0].length;
|
||||||
|
if ((allowStart ? cursor >= start : cursor > start) && cursor <= end) {
|
||||||
|
const nextLine = line.slice(0, start) + line.slice(end);
|
||||||
|
return { line: nextLine, cursor: start, removed: true };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return { line, cursor, removed: false };
|
||||||
|
}
|
||||||
|
|
||||||
function isSlashCommand(line) {
|
function isSlashCommand(line) {
|
||||||
if (!line || line[0] !== '/') return false;
|
if (!line || line[0] !== '/') return false;
|
||||||
const cmd = line.split(/\s+/)[0];
|
const cmd = line.split(/\s+/)[0];
|
||||||
@ -84,6 +226,34 @@ function isSlashCommand(line) {
|
|||||||
readline.emitKeypressEvents(process.stdin);
|
readline.emitKeypressEvents(process.stdin);
|
||||||
if (process.stdin.isTTY) process.stdin.setRawMode(true);
|
if (process.stdin.isTTY) process.stdin.setRawMode(true);
|
||||||
|
|
||||||
|
process.stdin.on('data', (chunk) => {
|
||||||
|
if (isRunning || commandMenuActive || !rl) return;
|
||||||
|
const text = chunk ? chunk.toString() : '';
|
||||||
|
if (!text) return;
|
||||||
|
const looksLikePath = text.includes('file://') || /\.(png|jpe?g|gif|webp|bmp|tiff|heic|mp4|mov|avi|mkv|webm|m4v)\b/i.test(text);
|
||||||
|
if (!looksLikePath) return;
|
||||||
|
suppressSlashMenuUntil = Date.now() + 200;
|
||||||
|
setImmediate(() => {
|
||||||
|
const line = rl.line || '';
|
||||||
|
const raw = text.replace(/\r?\n/g, '').trim();
|
||||||
|
const decoded = raw ? decodeEscapedPath(raw) : '';
|
||||||
|
let applied = applyMediaTokens(line);
|
||||||
|
if (raw) {
|
||||||
|
if (line.includes(raw)) {
|
||||||
|
applied = applySingleMediaPath(applied.line, raw, applied.tokens);
|
||||||
|
} else if (decoded && line.includes(decoded)) {
|
||||||
|
applied = applySingleMediaPath(applied.line, decoded, applied.tokens);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (applied.tokens.length) {
|
||||||
|
rl.line = applied.line;
|
||||||
|
rl.cursor = rl.line.length;
|
||||||
|
currentMedia = { tokens: applied.tokens, text: applied.line };
|
||||||
|
refreshInputLine();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
initReadline();
|
initReadline();
|
||||||
|
|
||||||
statusBar = createStatusBar({
|
statusBar = createStatusBar({
|
||||||
@ -110,7 +280,17 @@ process.stdin.on('keypress', (str, key) => {
|
|||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (!isRunning && key && key.name === 'backspace') {
|
if (!isRunning && key && (key.name === 'backspace' || key.name === 'delete')) {
|
||||||
|
const line = rl.line || '';
|
||||||
|
const cursor = rl.cursor || 0;
|
||||||
|
const updated = removeTokenAtCursor(line, cursor, key.name === 'delete');
|
||||||
|
if (updated.removed) {
|
||||||
|
rl.line = updated.line;
|
||||||
|
rl.cursor = updated.cursor;
|
||||||
|
refreshInputLine();
|
||||||
|
if (statusBar) statusBar.render();
|
||||||
|
return;
|
||||||
|
}
|
||||||
if (statusBar) statusBar.render();
|
if (statusBar) statusBar.render();
|
||||||
}
|
}
|
||||||
if (key && key.name === 'escape' && isRunning) {
|
if (key && key.name === 'escape' && isRunning) {
|
||||||
@ -123,7 +303,45 @@ process.stdin.on('keypress', (str, key) => {
|
|||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
if (str === '/' && Date.now() < suppressSlashMenuUntil) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (pendingSlashTimer && (str !== '/' || (rl.line && rl.line !== '/'))) {
|
||||||
|
clearTimeout(pendingSlashTimer);
|
||||||
|
pendingSlashTimer = null;
|
||||||
|
}
|
||||||
|
if (!isRunning) {
|
||||||
|
const rawLine = rl.line || '';
|
||||||
|
let applied = { line: rawLine, tokens: [] };
|
||||||
|
if (!isSlashCommand(rawLine)) {
|
||||||
|
const hasToken = /\[(图片|视频) #\d+\]/.test(rawLine);
|
||||||
|
if (hasToken && currentMedia.tokens.length && currentMedia.text === rawLine) {
|
||||||
|
applied = { line: rawLine, tokens: currentMedia.tokens };
|
||||||
|
} else {
|
||||||
|
applied = applyMediaTokens(rawLine);
|
||||||
|
}
|
||||||
|
if (hasToken && !applied.tokens.length && currentMedia.tokens.length) {
|
||||||
|
applied = { line: rawLine, tokens: currentMedia.tokens };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (applied.line !== (rl.line || '')) {
|
||||||
|
rl.line = applied.line;
|
||||||
|
rl.cursor = rl.line.length;
|
||||||
|
currentMedia = { tokens: applied.tokens, text: applied.line };
|
||||||
|
} else if (currentMedia.text !== applied.line) {
|
||||||
|
currentMedia = { tokens: applied.tokens, text: applied.line };
|
||||||
|
}
|
||||||
|
refreshInputLine();
|
||||||
|
}
|
||||||
|
if (statusBar) statusBar.render();
|
||||||
if (str === '/' && (rl.line === '' || rl.line === '/')) {
|
if (str === '/' && (rl.line === '' || rl.line === '/')) {
|
||||||
|
if (pendingSlashTimer) {
|
||||||
|
clearTimeout(pendingSlashTimer);
|
||||||
|
pendingSlashTimer = null;
|
||||||
|
}
|
||||||
|
pendingSlashTimer = setTimeout(() => {
|
||||||
|
pendingSlashTimer = null;
|
||||||
|
if (rl.line === '' || rl.line === '/') {
|
||||||
commandMenuActive = true;
|
commandMenuActive = true;
|
||||||
if (rl) {
|
if (rl) {
|
||||||
rl.pause();
|
rl.pause();
|
||||||
@ -182,6 +400,9 @@ process.stdin.on('keypress', (str, key) => {
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
}, 80);
|
||||||
|
return;
|
||||||
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
function initReadline() {
|
function initReadline() {
|
||||||
@ -208,7 +429,21 @@ function initReadline() {
|
|||||||
if (commandMenuActive) {
|
if (commandMenuActive) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
const input = line.trim();
|
let applied = { line, tokens: [] };
|
||||||
|
if (!isSlashCommand(line)) {
|
||||||
|
const hasToken = /\[(图片|视频) #\d+\]/.test(line);
|
||||||
|
if (hasToken && currentMedia.tokens.length && currentMedia.text === line) {
|
||||||
|
applied = { line, tokens: currentMedia.tokens };
|
||||||
|
} else {
|
||||||
|
applied = applyMediaTokens(line);
|
||||||
|
}
|
||||||
|
if (hasToken && !applied.tokens.length && currentMedia.tokens.length) {
|
||||||
|
applied = { line, tokens: currentMedia.tokens };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
const normalizedLine = applied.line;
|
||||||
|
currentMedia = { tokens: applied.tokens, text: normalizedLine };
|
||||||
|
const input = normalizedLine.trim();
|
||||||
if (menuJustClosedAt) {
|
if (menuJustClosedAt) {
|
||||||
if (!menuJustClosedInjected) {
|
if (!menuJustClosedInjected) {
|
||||||
const tooOld = Date.now() - menuJustClosedAt > 800;
|
const tooOld = Date.now() - menuJustClosedAt > 800;
|
||||||
@ -249,8 +484,10 @@ function initReadline() {
|
|||||||
|
|
||||||
console.log('');
|
console.log('');
|
||||||
const userWriter = createIndentedWriter(' ');
|
const userWriter = createIndentedWriter(' ');
|
||||||
userWriter.writeLine(`${cyan('用户:')}${line}`);
|
const displayLine = colorizeTokens(normalizedLine);
|
||||||
state.messages.push({ role: 'user', content: line });
|
userWriter.writeLine(`${cyan('用户:')}${displayLine}`);
|
||||||
|
const content = buildUserContent(normalizedLine, currentMedia.tokens);
|
||||||
|
state.messages.push({ role: 'user', content });
|
||||||
persistConversation();
|
persistConversation();
|
||||||
await runAssistantLoop();
|
await runAssistantLoop();
|
||||||
promptWithStatus();
|
promptWithStatus();
|
||||||
@ -328,6 +565,22 @@ function buildApiMessages() {
|
|||||||
return messages;
|
return messages;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function buildUserContent(line, tokens) {
|
||||||
|
if (!tokens.length) return line;
|
||||||
|
const parts = [{ type: 'text', text: line }];
|
||||||
|
for (const info of tokens) {
|
||||||
|
const media = readMediafileTool(WORKSPACE, { path: info.path });
|
||||||
|
if (media && media.success) {
|
||||||
|
const url = `data:${media.mime};base64,${media.b64}`;
|
||||||
|
parts.push({
|
||||||
|
type: media.type === 'image' ? 'image_url' : 'video_url',
|
||||||
|
[media.type === 'image' ? 'image_url' : 'video_url']: { url },
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return parts;
|
||||||
|
}
|
||||||
|
|
||||||
function printCancelLine() {
|
function printCancelLine() {
|
||||||
console.log('');
|
console.log('');
|
||||||
process.stdout.write(` ${red('已取消本次响应')}\n\n`);
|
process.stdout.write(` ${red('已取消本次响应')}\n\n`);
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user