deepresearch/app/models/search_result.py

"""
搜索结果数据模型
"""
from datetime import datetime
from typing import List, Optional, Dict, Any
from enum import Enum
from pydantic import BaseModel, Field

class SearchImportance(str, Enum):
    """搜索结果重要性"""
    HIGH = "high"
    MEDIUM = "medium"
    LOW = "low"

class SearchResult(BaseModel):
    """单个搜索结果"""
    title: str
    url: str
    snippet: str
    score: float = 0.0
    published_date: Optional[str] = None
    raw_content: Optional[str] = None
    importance: Optional[SearchImportance] = None
    key_findings: List[str] = []

    def __hash__(self):
        """使URL可以用于集合去重"""
        return hash(self.url)

    def __eq__(self, other):
        """基于URL判断相等性"""
        if isinstance(other, SearchResult):
            return self.url == other.url
        return False

class TavilySearchResponse(BaseModel):
    """Tavily API响应模型"""
    query: str
    answer: Optional[str] = None
    images: List[str] = []
    results: List[Dict[str, Any]] = []
    response_time: float = 0.0

    def to_search_results(self) -> List[SearchResult]:
        """转换为SearchResult列表"""
        search_results = []
        for result in self.results:
            search_results.append(SearchResult(
                title=result.get('title', ''),
                url=result.get('url', ''),
                snippet=result.get('content', ''),
                score=result.get('score', 0.0),
                published_date=result.get('published_date'),
                raw_content=result.get('raw_content')
            ))
        return search_results

class SearchBatch(BaseModel):
    """搜索批次"""
    search_id: str = Field(default_factory=lambda: f"S{uuid.uuid4().hex[:8]}")
    subtopic_id: str
    query: str
    timestamp: datetime = Field(default_factory=datetime.now)
    results: List[SearchResult] = []
    is_refined_search: bool = False
    parent_search_id: Optional[str] = None
    detail_type: Optional[str] = None
    total_results: int = 0

    def add_results(self, results: List[SearchResult]):
        """添加搜索结果并去重"""
        existing_urls = {r.url for r in self.results}
        for result in results:
            if result.url not in existing_urls:
                self.results.append(result)
                existing_urls.add(result.url)
        self.total_results = len(self.results)

class SearchSummary(BaseModel):
    """搜索摘要统计"""
    subtopic_id: str
    total_searches: int = 0
    total_results: int = 0
    high_importance_count: int = 0
    medium_importance_count: int = 0
    low_importance_count: int = 0
    unique_domains: List[str] = []

    @classmethod
    def from_search_batches(cls, subtopic_id: str, batches: List[SearchBatch]) -> 'SearchSummary':
        """从搜索批次生成摘要"""
        summary = cls(subtopic_id=subtopic_id)
        summary.total_searches = len(batches)

        all_results = []
        domains = set()

        for batch in batches:
            all_results.extend(batch.results)
            for result in batch.results:
                # 提取域名
                from urllib.parse import urlparse
                domain = urlparse(result.url).netloc
                if domain:
                    domains.add(domain)

        summary.total_results = len(all_results)
        summary.unique_domains = list(domains)

        # 统计重要性
        for result in all_results:
            if result.importance == SearchImportance.HIGH:
                summary.high_importance_count += 1
            elif result.importance == SearchImportance.MEDIUM:
                summary.medium_importance_count += 1
            elif result.importance == SearchImportance.LOW:
                summary.low_importance_count += 1

        return summary