agent-Specialization/data/cleanup_empty_conversations.py
2025-11-14 16:44:12 +08:00

150 lines
4.7 KiB
Python

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
清理空对话脚本
删除 total_messages 为 0 的对话记录及其对应文件
"""
import json
import os
import shutil
from pathlib import Path
def cleanup_empty_conversations():
"""清理空对话"""
# 文件路径
base_dir = Path("/Users/jojo/Desktop/agent_kimi_源码备份/data/conversations")
index_file = base_dir / "index.json"
print(f"🔍 检查文件: {index_file}")
# 检查文件是否存在
if not index_file.exists():
print(f"❌ 错误: {index_file} 不存在")
return
# 创建备份
backup_file = base_dir / "index.json.backup"
try:
shutil.copy2(index_file, backup_file)
print(f"✅ 已创建备份: {backup_file}")
except Exception as e:
print(f"❌ 创建备份失败: {e}")
return
# 读取索引文件
try:
with open(index_file, 'r', encoding='utf-8') as f:
data = json.load(f)
except Exception as e:
print(f"❌ 读取索引文件失败: {e}")
return
print(f"📊 索引文件加载成功,共 {len(data)} 条记录")
# 查找空对话
empty_conversations = []
valid_conversations = []
for conv_id, conv_data in data.items():
if conv_data.get("total_messages", 0) == 0:
empty_conversations.append((conv_id, conv_data))
print(f"🗑️ 发现空对话: {conv_id} (创建于: {conv_data.get('created_at', '未知')})")
else:
valid_conversations.append((conv_id, conv_data))
print(f"\n📈 统计结果:")
print(f" 空对话数量: {len(empty_conversations)}")
print(f" 有效对话数量: {len(valid_conversations)}")
if len(empty_conversations) == 0:
print("✨ 没有发现空对话,无需清理")
return
# 确认删除
print(f"\n⚠️ 将要删除 {len(empty_conversations)} 个空对话:")
for conv_id, conv_data in empty_conversations[:5]: # 只显示前5个
title = conv_data.get('title', '无标题')
created = conv_data.get('created_at', '未知时间')
print(f" - {conv_id}: {title} ({created})")
if len(empty_conversations) > 5:
print(f" ... 还有 {len(empty_conversations) - 5}")
confirm = input("\n❓ 确认删除这些空对话吗? (y/N): ").strip().lower()
if confirm not in ['y', 'yes']:
print("❌ 操作已取消")
return
# 删除对话文件
deleted_files = []
failed_deletions = []
for conv_id, conv_data in empty_conversations:
conv_file = base_dir / f"{conv_id}.json"
try:
if conv_file.exists():
conv_file.unlink() # 删除文件
deleted_files.append(conv_file.name)
print(f"✅ 删除文件: {conv_file.name}")
else:
print(f"⚠️ 文件不存在: {conv_file.name}")
except Exception as e:
failed_deletions.append((conv_file.name, str(e)))
print(f"❌ 删除失败: {conv_file.name} - {e}")
# 更新索引文件
try:
new_data = {conv_id: conv_data for conv_id, conv_data in valid_conversations}
with open(index_file, 'w', encoding='utf-8') as f:
json.dump(new_data, f, ensure_ascii=False, indent=2)
print(f"✅ 索引文件已更新,保留 {len(new_data)} 条记录")
except Exception as e:
print(f"❌ 更新索引文件失败: {e}")
print("🔄 正在恢复备份...")
try:
shutil.copy2(backup_file, index_file)
print("✅ 备份已恢复")
except Exception as e2:
print(f"❌ 恢复备份失败: {e2}")
return
# 总结
print(f"\n🎉 清理完成!")
print(f" 删除的对话记录: {len(empty_conversations)}")
print(f" 删除的文件: {len(deleted_files)}")
if failed_deletions:
print(f" 删除失败: {len(failed_deletions)}")
for filename, error in failed_deletions:
print(f" - {filename}: {error}")
print(f" 备份文件: {backup_file}")
print(f" 剩余有效对话: {len(valid_conversations)}")
def main():
"""主函数"""
print("=" * 50)
print("🧹 空对话清理工具")
print("=" * 50)
try:
cleanup_empty_conversations()
except KeyboardInterrupt:
print("\n❌ 操作被用户取消")
except Exception as e:
print(f"\n💥 发生未预期的错误: {e}")
import traceback
traceback.print_exc()
print("\n" + "=" * 50)
print("🏁 脚本结束")
if __name__ == "__main__":
main()