deepresearch/scripts/init_db.py
2025-07-02 15:35:36 +08:00

172 lines
4.9 KiB
Python
Executable File
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""
初始化数据库和目录结构
"""
import os
import sys
# 添加项目根目录到Python路径
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from config import Config
def init_directories():
"""初始化必要的目录"""
directories = [
Config.DATA_DIR,
Config.SESSIONS_DIR,
Config.REPORTS_DIR,
Config.CACHE_DIR,
Config.LOG_DIR
]
for directory in directories:
if not os.path.exists(directory):
os.makedirs(directory)
print(f"创建目录: {directory}")
else:
print(f"目录已存在: {directory}")
# 创建.gitkeep文件
for directory in [Config.SESSIONS_DIR, Config.REPORTS_DIR, Config.CACHE_DIR]:
gitkeep_path = os.path.join(directory, '.gitkeep')
if not os.path.exists(gitkeep_path):
with open(gitkeep_path, 'w') as f:
f.write('')
print(f"创建.gitkeep: {gitkeep_path}")
def init_mongodb():
"""初始化MongoDB如果使用"""
try:
from pymongo import MongoClient
client = MongoClient(Config.MONGODB_URI)
db = client.get_database()
# 创建集合和索引
collections = {
'sessions': [
('created_at', -1),
('status', 1),
('question_type', 1)
],
'search_results': [
('session_id', 1),
('subtopic_id', 1),
('created_at', -1)
],
'reports': [
('session_id', 1),
('created_at', -1)
]
}
for collection_name, indexes in collections.items():
collection = db[collection_name]
for index in indexes:
if isinstance(index, tuple):
collection.create_index([index])
else:
collection.create_index(index)
print(f"初始化集合: {collection_name}")
print("MongoDB初始化完成")
except Exception as e:
print(f"MongoDB初始化失败可选: {e}")
def check_environment():
"""检查环境变量"""
required_vars = [
'DEEPSEEK_API_KEY',
'TAVILY_API_KEY'
]
missing_vars = []
for var in required_vars:
if not os.environ.get(var):
missing_vars.append(var)
if missing_vars:
print("\n警告: 缺少以下环境变量:")
for var in missing_vars:
print(f" - {var}")
print("\n请在.env文件中设置这些变量")
else:
print("\n环境变量检查通过")
def test_task_manager():
"""测试任务管理器"""
print("\n测试任务管理器...")
try:
from app.services.task_manager import task_manager
# 测试任务提交
def test_task():
return "Task manager is working!"
task_id = task_manager.submit_task(test_task)
print(f"✓ 任务管理器正常工作测试任务ID: {task_id}")
# 关闭任务管理器
task_manager.shutdown()
except Exception as e:
print(f"✗ 任务管理器测试失败: {e}")
def create_test_data():
"""创建测试数据(开发环境)"""
if os.environ.get('FLASK_ENV') == 'development':
print("\n开发环境:创建测试数据...")
# 创建一个示例会话文件
sample_session = {
"id": "test-session-001",
"question": "这是一个测试研究问题",
"status": "completed",
"created_at": "2024-01-01T00:00:00"
}
import json
test_file = os.path.join(Config.SESSIONS_DIR, 'test-session-001.json')
if not os.path.exists(test_file):
with open(test_file, 'w', encoding='utf-8') as f:
json.dump(sample_session, f, ensure_ascii=False, indent=2)
print(f"创建测试会话文件: {test_file}")
def main():
"""主函数"""
print("DeepResearch 初始化脚本")
print("=" * 50)
# 初始化目录
print("\n1. 初始化目录结构...")
init_directories()
# 初始化MongoDB可选
print("\n2. 初始化MongoDB...")
init_mongodb()
# 检查环境变量
print("\n3. 检查环境变量...")
check_environment()
# 测试任务管理器
print("\n4. 测试任务管理器...")
test_task_manager()
# 创建测试数据
print("\n5. 创建测试数据...")
create_test_data()
print("\n" + "=" * 50)
print("初始化完成!")
print("\n下一步:")
print("1. 确保在.env文件中设置了必要的API密钥")
print("2. 运行 'python app.py' 启动应用")
print("\n注意: 不再需要启动 Redis 和 Celery Worker")
if __name__ == '__main__':
main()