304 lines
13 KiB
Python
304 lines
13 KiB
Python
import os
|
||
from pathlib import Path
|
||
from typing import Optional
|
||
|
||
def _env(name: str, default: str = "") -> str:
|
||
return os.environ.get(name, default)
|
||
|
||
def _env_optional(name: str) -> Optional[str]:
|
||
value = os.environ.get(name)
|
||
if value is None:
|
||
# 回退读取 .env(支持运行中更新)
|
||
env_path = Path(__file__).resolve().parents[1] / ".env"
|
||
if env_path.exists():
|
||
try:
|
||
for raw_line in env_path.read_text(encoding="utf-8").splitlines():
|
||
line = raw_line.strip()
|
||
if not line or line.startswith("#") or "=" not in line:
|
||
continue
|
||
key, val = line.split("=", 1)
|
||
if key.strip() == name:
|
||
value = val.strip().strip('"').strip("'")
|
||
break
|
||
except Exception:
|
||
value = None
|
||
if value is None:
|
||
return None
|
||
value = value.strip()
|
||
return value or None
|
||
|
||
|
||
# 模型上下文窗口(单位: token)
|
||
CONTEXT_WINDOWS = {
|
||
"kimi": 256_000,
|
||
"kimi-k2.5": 256_000,
|
||
"qwen3-max": 256_000,
|
||
"qwen3-vl-plus": 256_000,
|
||
"minimax-m2.5": 204_800,
|
||
"deepseek": 128_000,
|
||
}
|
||
|
||
|
||
# 默认(Kimi)
|
||
KIMI_BASE = _env("API_BASE_KIMI", _env("AGENT_API_BASE_URL", "https://api.moonshot.cn/v1"))
|
||
KIMI_KEY = _env("API_KEY_KIMI", _env("AGENT_API_KEY", ""))
|
||
KIMI_BASE_OFFICIAL = _env_optional("API_BASE_KIMI_OFFICIAL")
|
||
KIMI_KEY_OFFICIAL = _env_optional("API_KEY_KIMI_OFFICIAL")
|
||
KIMI_FAST_MODEL = _env("MODEL_KIMI_FAST", _env("AGENT_MODEL_ID", "kimi-k2-0905-preview"))
|
||
KIMI_THINK_MODEL = _env("MODEL_KIMI_THINK", _env("AGENT_THINKING_MODEL_ID", "kimi-k2-thinking"))
|
||
KIMI_25_MODEL = _env("MODEL_KIMI_25", "kimi-k2.5")
|
||
|
||
# DeepSeek
|
||
DEEPSEEK_BASE = _env("API_BASE_DEEPSEEK", "https://api.deepseek.com")
|
||
DEEPSEEK_KEY = _env("API_KEY_DEEPSEEK", _env("AGENT_DEEPSEEK_API_KEY", ""))
|
||
DEEPSEEK_FAST_MODEL = _env("MODEL_DEEPSEEK_FAST", "deepseek-chat")
|
||
DEEPSEEK_THINK_MODEL = _env("MODEL_DEEPSEEK_THINK", "deepseek-reasoner")
|
||
|
||
# Qwen
|
||
QWEN_BASE = _env("API_BASE_QWEN", "https://dashscope.aliyuncs.com/compatible-mode/v1")
|
||
QWEN_KEY = _env("API_KEY_QWEN", _env("DASHSCOPE_API_KEY", ""))
|
||
QWEN_BASE_OFFICIAL = _env_optional("API_BASE_QWEN_OFFICIAL")
|
||
QWEN_KEY_OFFICIAL = _env_optional("API_KEY_QWEN_OFFICIAL")
|
||
QWEN_MAX_MODEL = _env("MODEL_QWEN_MAX", "qwen3-max")
|
||
QWEN_VL_MODEL = _env("MODEL_QWEN_VL", "qwen3.5-plus")
|
||
|
||
# MiniMax
|
||
MINIMAX_BASE = _env("API_BASE_MINIMAX", "https://api.minimaxi.com/v1")
|
||
MINIMAX_KEY = _env("API_KEY_MINIMAX", "")
|
||
MINIMAX_BASE_OFFICIAL = _env_optional("API_BASE_MINIMAX_OFFICIAL")
|
||
MINIMAX_KEY_OFFICIAL = _env_optional("API_KEY_MINIMAX_OFFICIAL")
|
||
MINIMAX_MODEL = _env("MODEL_MINIMAX", "MiniMax-M2.5")
|
||
|
||
|
||
MODEL_PROFILES = {
|
||
"kimi": {
|
||
"context_window": CONTEXT_WINDOWS["kimi"],
|
||
"fast": {
|
||
"base_url": KIMI_BASE,
|
||
"api_key": KIMI_KEY,
|
||
"model_id": KIMI_FAST_MODEL,
|
||
"max_tokens": None,
|
||
"context_window": CONTEXT_WINDOWS["kimi"],
|
||
},
|
||
"thinking": {
|
||
"base_url": KIMI_BASE,
|
||
"api_key": KIMI_KEY,
|
||
"model_id": KIMI_THINK_MODEL,
|
||
"max_tokens": None,
|
||
"context_window": CONTEXT_WINDOWS["kimi"],
|
||
},
|
||
"supports_thinking": True,
|
||
"fast_only": False,
|
||
"name": "Kimi-k2"
|
||
},
|
||
"kimi-k2.5": {
|
||
"context_window": CONTEXT_WINDOWS["kimi-k2.5"],
|
||
"fast": {
|
||
"base_url": KIMI_BASE,
|
||
"api_key": KIMI_KEY,
|
||
"model_id": KIMI_25_MODEL,
|
||
"max_tokens": None,
|
||
"context_window": CONTEXT_WINDOWS["kimi-k2.5"],
|
||
"extra_params": {"thinking": {"type": "disabled"}}
|
||
},
|
||
"thinking": {
|
||
"base_url": KIMI_BASE,
|
||
"api_key": KIMI_KEY,
|
||
"model_id": KIMI_25_MODEL,
|
||
"max_tokens": None,
|
||
"context_window": CONTEXT_WINDOWS["kimi-k2.5"],
|
||
"extra_params": {"thinking": {"type": "enabled"}, "enable_thinking": True}
|
||
},
|
||
"supports_thinking": True,
|
||
"fast_only": False,
|
||
"name": "Kimi-k2.5"
|
||
},
|
||
"deepseek": {
|
||
"context_window": CONTEXT_WINDOWS["deepseek"],
|
||
"fast": {
|
||
"base_url": DEEPSEEK_BASE,
|
||
"api_key": DEEPSEEK_KEY,
|
||
"model_id": DEEPSEEK_FAST_MODEL,
|
||
"max_tokens": 8192,
|
||
"context_window": CONTEXT_WINDOWS["deepseek"]
|
||
},
|
||
"thinking": {
|
||
"base_url": DEEPSEEK_BASE,
|
||
"api_key": DEEPSEEK_KEY,
|
||
"model_id": DEEPSEEK_THINK_MODEL,
|
||
"max_tokens": 65536,
|
||
"context_window": CONTEXT_WINDOWS["deepseek"]
|
||
},
|
||
"supports_thinking": True,
|
||
"fast_only": False,
|
||
"name": "DeepSeek"
|
||
},
|
||
"qwen3-max": {
|
||
"context_window": CONTEXT_WINDOWS["qwen3-max"],
|
||
"fast": {
|
||
"base_url": QWEN_BASE,
|
||
"api_key": QWEN_KEY,
|
||
"model_id": QWEN_MAX_MODEL,
|
||
"max_tokens": 65536,
|
||
"context_window": CONTEXT_WINDOWS["qwen3-max"]
|
||
},
|
||
"thinking": None, # 不支持思考
|
||
"supports_thinking": False,
|
||
"fast_only": True,
|
||
"name": "Qwen3-Max",
|
||
"hidden": True
|
||
},
|
||
"qwen3-vl-plus": {
|
||
"context_window": CONTEXT_WINDOWS["qwen3-vl-plus"],
|
||
"fast": {
|
||
"base_url": QWEN_BASE,
|
||
"api_key": QWEN_KEY,
|
||
"model_id": QWEN_VL_MODEL,
|
||
"max_tokens": 32768,
|
||
"context_window": CONTEXT_WINDOWS["qwen3-vl-plus"],
|
||
"extra_params": {}
|
||
},
|
||
"thinking": {
|
||
"base_url": QWEN_BASE,
|
||
"api_key": QWEN_KEY,
|
||
"model_id": QWEN_VL_MODEL,
|
||
"max_tokens": 32768,
|
||
"context_window": CONTEXT_WINDOWS["qwen3-vl-plus"],
|
||
"extra_params": {"enable_thinking": True}
|
||
},
|
||
"supports_thinking": True,
|
||
"fast_only": False,
|
||
"name": "Qwen3.5"
|
||
},
|
||
"minimax-m2.5": {
|
||
"context_window": CONTEXT_WINDOWS["minimax-m2.5"],
|
||
"fast": {
|
||
"base_url": MINIMAX_BASE,
|
||
"api_key": MINIMAX_KEY,
|
||
"model_id": MINIMAX_MODEL,
|
||
"max_tokens": 65536,
|
||
"context_window": CONTEXT_WINDOWS["minimax-m2.5"],
|
||
"extra_params": {"reasoning_split": True}
|
||
},
|
||
"thinking": {
|
||
"base_url": MINIMAX_BASE,
|
||
"api_key": MINIMAX_KEY,
|
||
"model_id": MINIMAX_MODEL,
|
||
"max_tokens": 65536,
|
||
"context_window": CONTEXT_WINDOWS["minimax-m2.5"],
|
||
"extra_params": {"reasoning_split": True}
|
||
},
|
||
"supports_thinking": True,
|
||
"fast_only": False,
|
||
"deep_only": True,
|
||
"name": "MiniMax-M2.5"
|
||
}
|
||
}
|
||
|
||
MODEL_PROMPT_OVERRIDES = {
|
||
"kimi": {
|
||
"model_description": "你的基础模型是 Kimi-k2,由月之暗面公司开发,是一个开源的 MoE 架构模型,拥有 1T 参数和 32B 激活参数,当前智能助手应用由火山引擎提供 API 服务。",
|
||
"thinking_model_line": "思考模式时,第一次请求的模型不是 Kimi-k2,而是 Kimi-k2-Thinking,一个更善于分析复杂问题、规划复杂流程的模型,在后续请求时模型会换回 Kimi-k2。",
|
||
"deep_thinking_line": "在深度思考模式中,请求的模型是 Kimi-k2-Thinking,一个更善于分析复杂问题、规划复杂流程的模型。"
|
||
},
|
||
"kimi-k2.5": {
|
||
"model_description": "你的基础模型是 Kimi-k2.5,支持图文多模态,并通过 thinking 参数开启/关闭思考能力。",
|
||
"thinking_model_line": "思考模式时使用同一个 Kimi-k2.5 模型,但会在请求中注入 thinking={\"type\": \"enabled\"} 来开启思考;快速模式则传递 thinking={\"type\": \"disabled\"}。",
|
||
"deep_thinking_line": "深度思考模式下,所有请求都会携带 thinking={\"type\": \"enabled\"},以获得持续的推理能力。"
|
||
},
|
||
"deepseek": {
|
||
"model_description": "你的基础模型是 DeepSeek-V3.2(deepseek-chat),由 DeepSeek 提供,数学与推理能力较强,当前通过官方 API 调用。",
|
||
"thinking_model_line": "思考模式时,第一次请求使用 DeepSeek-Reasoner,一个强化推理的模型,后续请求会切回 DeepSeek-V3.2。",
|
||
"deep_thinking_line": "在深度思考模式中,请求的模型是 DeepSeek-Reasoner,用于深入分析复杂问题并规划步骤。"
|
||
},
|
||
"qwen3-max": {
|
||
"model_description": "你的基础模型是 Qwen3-Max,由通义千问提供,当前仅支持快速模式,不提供思考或深度思考能力。",
|
||
"thinking_model_line": "Qwen3-Max 仅支持快速模式,思考模式会被自动关闭。",
|
||
"deep_thinking_line": "Qwen3-Max 不支持深度思考模式,将保持快速模式。"
|
||
},
|
||
"qwen3-vl-plus": {
|
||
"model_description": "你的基础模型是 Qwen3.5,由通义千问提供,支持图文多模态理解。",
|
||
"thinking_model_line": "思考模式时仍使用 Qwen3.5,并开启思考能力。",
|
||
"deep_thinking_line": "深度思考模式下,所有请求都将启用思考能力,以获得更强的分析表现。"
|
||
},
|
||
"minimax-m2.5": {
|
||
"model_description": "你的基础模型是 MiniMax-M2.5,支持超长上下文,当前仅以深度思考模式运行。",
|
||
"thinking_model_line": "MiniMax-M2.5 为思考模型,快速模式不会使用。",
|
||
"deep_thinking_line": "深度思考模式下,所有请求持续输出思考过程并给出最终回答。"
|
||
}
|
||
}
|
||
|
||
|
||
def get_model_profile(key: str) -> dict:
|
||
if key not in MODEL_PROFILES:
|
||
raise ValueError(f"未知模型 key: {key}")
|
||
profile = MODEL_PROFILES[key]
|
||
try:
|
||
from utils.aliyun_fallback import is_fallback_active
|
||
except Exception:
|
||
is_fallback_active = None
|
||
|
||
if is_fallback_active and is_fallback_active(key):
|
||
if key == "kimi-k2.5":
|
||
kimi_base_official = _env_optional("API_BASE_KIMI_OFFICIAL") or KIMI_BASE_OFFICIAL
|
||
kimi_key_official = _env_optional("API_KEY_KIMI_OFFICIAL") or KIMI_KEY_OFFICIAL
|
||
if kimi_base_official and kimi_key_official:
|
||
profile = dict(profile)
|
||
fast = dict(profile.get("fast") or {})
|
||
thinking = dict(profile.get("thinking") or fast)
|
||
fast.update({"base_url": kimi_base_official, "api_key": kimi_key_official})
|
||
thinking.update({"base_url": kimi_base_official, "api_key": kimi_key_official})
|
||
profile["fast"] = fast
|
||
profile["thinking"] = thinking
|
||
elif key == "qwen3-vl-plus":
|
||
qwen_base_official = _env_optional("API_BASE_QWEN_OFFICIAL") or QWEN_BASE_OFFICIAL
|
||
qwen_key_official = _env_optional("API_KEY_QWEN_OFFICIAL") or QWEN_KEY_OFFICIAL
|
||
if qwen_base_official and qwen_key_official:
|
||
profile = dict(profile)
|
||
fast = dict(profile.get("fast") or {})
|
||
thinking = dict(profile.get("thinking") or fast)
|
||
fast.update({"base_url": qwen_base_official, "api_key": qwen_key_official})
|
||
thinking.update({"base_url": qwen_base_official, "api_key": qwen_key_official})
|
||
profile["fast"] = fast
|
||
profile["thinking"] = thinking
|
||
elif key == "minimax-m2.5":
|
||
minimax_base_official = _env_optional("API_BASE_MINIMAX_OFFICIAL") or MINIMAX_BASE_OFFICIAL
|
||
minimax_key_official = _env_optional("API_KEY_MINIMAX_OFFICIAL") or MINIMAX_KEY_OFFICIAL
|
||
if minimax_base_official and minimax_key_official:
|
||
profile = dict(profile)
|
||
fast = dict(profile.get("fast") or {})
|
||
thinking = dict(profile.get("thinking") or fast)
|
||
fast.update({"base_url": minimax_base_official, "api_key": minimax_key_official})
|
||
thinking.update({"base_url": minimax_base_official, "api_key": minimax_key_official})
|
||
profile["fast"] = fast
|
||
profile["thinking"] = thinking
|
||
# 基础校验:必须有 fast 段且有 key
|
||
fast = profile.get("fast") or {}
|
||
if not fast.get("api_key"):
|
||
raise ValueError(f"模型 {key} 缺少 API Key 配置")
|
||
return profile
|
||
|
||
|
||
def get_model_prompt_replacements(key: str) -> dict:
|
||
"""获取模型相关的提示词替换字段,若缺失则回退到 Kimi 版本。"""
|
||
fallback = MODEL_PROMPT_OVERRIDES.get("kimi", {})
|
||
overrides = MODEL_PROMPT_OVERRIDES.get(key) or {}
|
||
return {
|
||
"model_description": overrides.get("model_description") or fallback.get("model_description") or "",
|
||
"thinking_model_line": overrides.get("thinking_model_line") or fallback.get("thinking_model_line") or "",
|
||
"deep_thinking_line": overrides.get("deep_thinking_line") or fallback.get("deep_thinking_line") or ""
|
||
}
|
||
|
||
|
||
def get_model_context_window(key: str) -> Optional[int]:
|
||
"""
|
||
获取模型的最大上下文窗口(token 数)。
|
||
优先使用 profile.context_window,其次回退到 fast.context_window。
|
||
"""
|
||
profile = get_model_profile(key)
|
||
ctx = profile.get("context_window")
|
||
if ctx:
|
||
return ctx
|
||
fast = profile.get("fast") or {}
|
||
return fast.get("context_window")
|