deepresearch/所有文件/report.py

"""
研究报告数据模型
"""
import uuid
from datetime import datetime
from typing import List, Optional, Dict, Any
from pydantic import BaseModel, Field

class ReportSection(BaseModel):
    """报告章节"""
    title: str
    content: str
    subsections: List['ReportSection'] = []
    sources: List[str] = []  # URL列表

    def to_markdown(self, level: int = 1) -> str:
        """转换为Markdown格式"""
        header = "#" * level
        markdown = f"{header} {self.title}\n\n{self.content}\n\n"

        # 添加子章节
        for subsection in self.subsections:
            markdown += subsection.to_markdown(level + 1)

        # 添加来源
        if self.sources:
            markdown += f"\n{'#' * (level + 1)} 参考来源\n"
            for i, source in enumerate(self.sources, 1):
                markdown += f"{i}. [{source}]({source})\n"
            markdown += "\n"

        return markdown

# 允许递归引用
ReportSection.model_rebuild()

class KeyInsight(BaseModel):
    """关键洞察"""
    insight: str
    supporting_evidence: List[str] = []
    source_urls: List[str] = []
    confidence: float = 0.0  # 0-1之间

class SubtopicReport(BaseModel):
    """子主题报告"""
    subtopic_id: str
    subtopic_name: str
    sections: List[ReportSection] = []
    key_insights: List[KeyInsight] = []
    recommendations: List[str] = []
    created_at: datetime = Field(default_factory=datetime.now)
    word_count: int = 0

    def to_markdown(self) -> str:
        """转换为Markdown格式"""
        markdown = f"## {self.subtopic_name}\n\n"

        # 添加各个章节
        for section in self.sections:
            markdown += section.to_markdown(level=3)

        # 添加关键洞察
        if self.key_insights:
            markdown += "### 关键洞察\n\n"
            for i, insight in enumerate(self.key_insights, 1):
                markdown += f"{i}. **{insight.insight}**\n"
                if insight.supporting_evidence:
                    for evidence in insight.supporting_evidence:
                        markdown += f"   - {evidence}\n"
                if insight.source_urls:
                    markdown += f"   - 来源: "
                    markdown += ", ".join([f"[{i+1}]({url})" for i, url in enumerate(insight.source_urls)])
                    markdown += "\n"
                markdown += "\n"

        # 添加建议
        if self.recommendations:
            markdown += "### 建议与展望\n\n"
            for recommendation in self.recommendations:
                markdown += f"- {recommendation}\n"
            markdown += "\n"

        return markdown

class HallucinationCheck(BaseModel):
    """幻觉检查记录"""
    content: str
    source_url: str
    original_text: Optional[str] = None
    is_hallucination: bool = False
    hallucination_type: Optional[str] = None  # 夸大/错误归因/无中生有
    corrected_content: Optional[str] = None
    checked_at: datetime = Field(default_factory=datetime.now)

class FinalReport(BaseModel):
    """最终研究报告"""
    session_id: str
    title: str
    executive_summary: str
    main_findings: List[ReportSection] = []
    subtopic_reports: List[SubtopicReport] = []
    overall_insights: List[KeyInsight] = []
    recommendations: List[str] = []
    methodology: Optional[str] = None
    limitations: List[str] = []
    created_at: datetime = Field(default_factory=datetime.now)
    total_sources: int = 0
    total_searches: int = 0

    def to_markdown(self) -> str:
        """转换为完整的Markdown报告"""
        markdown = f"# {self.title}\n\n"
        markdown += f"*生成时间: {self.created_at.strftime('%Y-%m-%d %H:%M:%S')}*\n\n"

        # 执行摘要
        markdown += "## 执行摘要\n\n"
        markdown += f"{self.executive_summary}\n\n"

        # 主要发现
        if self.main_findings:
            markdown += "## 主要发现\n\n"
            for finding in self.main_findings:
                markdown += finding.to_markdown(level=3)

        # 整体洞察
        if self.overall_insights:
            markdown += "## 综合洞察\n\n"
            for i, insight in enumerate(self.overall_insights, 1):
                markdown += f"### {i}. {insight.insight}\n\n"
                if insight.supporting_evidence:
                    for evidence in insight.supporting_evidence:
                        markdown += f"- {evidence}\n"
                    markdown += "\n"

        # 建议
        if self.recommendations:
            markdown += "## 建议\n\n"
            for recommendation in self.recommendations:
                markdown += f"- {recommendation}\n"
            markdown += "\n"

        # 详细子主题报告
        markdown += "## 详细分析\n\n"
        for report in self.subtopic_reports:
            markdown += report.to_markdown()
            markdown += "---\n\n"

        # 研究方法
        if self.methodology:
            markdown += "## 研究方法\n\n"
            markdown += f"{self.methodology}\n\n"

        # 局限性
        if self.limitations:
            markdown += "## 研究局限性\n\n"
            for limitation in self.limitations:
                markdown += f"- {limitation}\n"
            markdown += "\n"

        # 统计信息
        markdown += "## 研究统计\n\n"
        markdown += f"- 总搜索次数: {self.total_searches}\n"
        markdown += f"- 引用来源数: {self.total_sources}\n"
        markdown += f"- 分析子主题数: {len(self.subtopic_reports)}\n"

        return markdown

    def save_to_file(self, filepath: str):
        """保存为Markdown文件"""
        with open(filepath, 'w', encoding='utf-8') as f:
            f.write(self.to_markdown())