agent-Specialization/config/model_profiles.py

304 lines
13 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import os
from pathlib import Path
from typing import Optional
def _env(name: str, default: str = "") -> str:
return os.environ.get(name, default)
def _env_optional(name: str) -> Optional[str]:
value = os.environ.get(name)
if value is None:
# 回退读取 .env支持运行中更新
env_path = Path(__file__).resolve().parents[1] / ".env"
if env_path.exists():
try:
for raw_line in env_path.read_text(encoding="utf-8").splitlines():
line = raw_line.strip()
if not line or line.startswith("#") or "=" not in line:
continue
key, val = line.split("=", 1)
if key.strip() == name:
value = val.strip().strip('"').strip("'")
break
except Exception:
value = None
if value is None:
return None
value = value.strip()
return value or None
# 模型上下文窗口(单位: token
CONTEXT_WINDOWS = {
"kimi": 256_000,
"kimi-k2.5": 256_000,
"qwen3-max": 256_000,
"qwen3-vl-plus": 256_000,
"minimax-m2.5": 204_800,
"deepseek": 128_000,
}
# 默认Kimi
KIMI_BASE = _env("API_BASE_KIMI", _env("AGENT_API_BASE_URL", "https://api.moonshot.cn/v1"))
KIMI_KEY = _env("API_KEY_KIMI", _env("AGENT_API_KEY", ""))
KIMI_BASE_OFFICIAL = _env_optional("API_BASE_KIMI_OFFICIAL")
KIMI_KEY_OFFICIAL = _env_optional("API_KEY_KIMI_OFFICIAL")
KIMI_FAST_MODEL = _env("MODEL_KIMI_FAST", _env("AGENT_MODEL_ID", "kimi-k2-0905-preview"))
KIMI_THINK_MODEL = _env("MODEL_KIMI_THINK", _env("AGENT_THINKING_MODEL_ID", "kimi-k2-thinking"))
KIMI_25_MODEL = _env("MODEL_KIMI_25", "kimi-k2.5")
# DeepSeek
DEEPSEEK_BASE = _env("API_BASE_DEEPSEEK", "https://api.deepseek.com")
DEEPSEEK_KEY = _env("API_KEY_DEEPSEEK", _env("AGENT_DEEPSEEK_API_KEY", ""))
DEEPSEEK_FAST_MODEL = _env("MODEL_DEEPSEEK_FAST", "deepseek-chat")
DEEPSEEK_THINK_MODEL = _env("MODEL_DEEPSEEK_THINK", "deepseek-reasoner")
# Qwen
QWEN_BASE = _env("API_BASE_QWEN", "https://dashscope.aliyuncs.com/compatible-mode/v1")
QWEN_KEY = _env("API_KEY_QWEN", _env("DASHSCOPE_API_KEY", ""))
QWEN_BASE_OFFICIAL = _env_optional("API_BASE_QWEN_OFFICIAL")
QWEN_KEY_OFFICIAL = _env_optional("API_KEY_QWEN_OFFICIAL")
QWEN_MAX_MODEL = _env("MODEL_QWEN_MAX", "qwen3-max")
QWEN_VL_MODEL = _env("MODEL_QWEN_VL", "qwen3.5-plus")
# MiniMax
MINIMAX_BASE = _env("API_BASE_MINIMAX", "https://api.minimaxi.com/v1")
MINIMAX_KEY = _env("API_KEY_MINIMAX", "")
MINIMAX_BASE_OFFICIAL = _env_optional("API_BASE_MINIMAX_OFFICIAL")
MINIMAX_KEY_OFFICIAL = _env_optional("API_KEY_MINIMAX_OFFICIAL")
MINIMAX_MODEL = _env("MODEL_MINIMAX", "MiniMax-M2.5")
MODEL_PROFILES = {
"kimi": {
"context_window": CONTEXT_WINDOWS["kimi"],
"fast": {
"base_url": KIMI_BASE,
"api_key": KIMI_KEY,
"model_id": KIMI_FAST_MODEL,
"max_tokens": None,
"context_window": CONTEXT_WINDOWS["kimi"],
},
"thinking": {
"base_url": KIMI_BASE,
"api_key": KIMI_KEY,
"model_id": KIMI_THINK_MODEL,
"max_tokens": None,
"context_window": CONTEXT_WINDOWS["kimi"],
},
"supports_thinking": True,
"fast_only": False,
"name": "Kimi-k2"
},
"kimi-k2.5": {
"context_window": CONTEXT_WINDOWS["kimi-k2.5"],
"fast": {
"base_url": KIMI_BASE,
"api_key": KIMI_KEY,
"model_id": KIMI_25_MODEL,
"max_tokens": None,
"context_window": CONTEXT_WINDOWS["kimi-k2.5"],
"extra_params": {"thinking": {"type": "disabled"}}
},
"thinking": {
"base_url": KIMI_BASE,
"api_key": KIMI_KEY,
"model_id": KIMI_25_MODEL,
"max_tokens": None,
"context_window": CONTEXT_WINDOWS["kimi-k2.5"],
"extra_params": {"thinking": {"type": "enabled"}, "enable_thinking": True}
},
"supports_thinking": True,
"fast_only": False,
"name": "Kimi-k2.5"
},
"deepseek": {
"context_window": CONTEXT_WINDOWS["deepseek"],
"fast": {
"base_url": DEEPSEEK_BASE,
"api_key": DEEPSEEK_KEY,
"model_id": DEEPSEEK_FAST_MODEL,
"max_tokens": 8192,
"context_window": CONTEXT_WINDOWS["deepseek"]
},
"thinking": {
"base_url": DEEPSEEK_BASE,
"api_key": DEEPSEEK_KEY,
"model_id": DEEPSEEK_THINK_MODEL,
"max_tokens": 65536,
"context_window": CONTEXT_WINDOWS["deepseek"]
},
"supports_thinking": True,
"fast_only": False,
"name": "DeepSeek"
},
"qwen3-max": {
"context_window": CONTEXT_WINDOWS["qwen3-max"],
"fast": {
"base_url": QWEN_BASE,
"api_key": QWEN_KEY,
"model_id": QWEN_MAX_MODEL,
"max_tokens": 65536,
"context_window": CONTEXT_WINDOWS["qwen3-max"]
},
"thinking": None, # 不支持思考
"supports_thinking": False,
"fast_only": True,
"name": "Qwen3-Max",
"hidden": True
},
"qwen3-vl-plus": {
"context_window": CONTEXT_WINDOWS["qwen3-vl-plus"],
"fast": {
"base_url": QWEN_BASE,
"api_key": QWEN_KEY,
"model_id": QWEN_VL_MODEL,
"max_tokens": 32768,
"context_window": CONTEXT_WINDOWS["qwen3-vl-plus"],
"extra_params": {}
},
"thinking": {
"base_url": QWEN_BASE,
"api_key": QWEN_KEY,
"model_id": QWEN_VL_MODEL,
"max_tokens": 32768,
"context_window": CONTEXT_WINDOWS["qwen3-vl-plus"],
"extra_params": {"enable_thinking": True}
},
"supports_thinking": True,
"fast_only": False,
"name": "Qwen3.5"
},
"minimax-m2.5": {
"context_window": CONTEXT_WINDOWS["minimax-m2.5"],
"fast": {
"base_url": MINIMAX_BASE,
"api_key": MINIMAX_KEY,
"model_id": MINIMAX_MODEL,
"max_tokens": 65536,
"context_window": CONTEXT_WINDOWS["minimax-m2.5"],
"extra_params": {"reasoning_split": True}
},
"thinking": {
"base_url": MINIMAX_BASE,
"api_key": MINIMAX_KEY,
"model_id": MINIMAX_MODEL,
"max_tokens": 65536,
"context_window": CONTEXT_WINDOWS["minimax-m2.5"],
"extra_params": {"reasoning_split": True}
},
"supports_thinking": True,
"fast_only": False,
"deep_only": True,
"name": "MiniMax-M2.5"
}
}
MODEL_PROMPT_OVERRIDES = {
"kimi": {
"model_description": "你的基础模型是 Kimi-k2由月之暗面公司开发是一个开源的 MoE 架构模型,拥有 1T 参数和 32B 激活参数,当前智能助手应用由火山引擎提供 API 服务。",
"thinking_model_line": "思考模式时,第一次请求的模型不是 Kimi-k2而是 Kimi-k2-Thinking一个更善于分析复杂问题、规划复杂流程的模型在后续请求时模型会换回 Kimi-k2。",
"deep_thinking_line": "在深度思考模式中,请求的模型是 Kimi-k2-Thinking一个更善于分析复杂问题、规划复杂流程的模型。"
},
"kimi-k2.5": {
"model_description": "你的基础模型是 Kimi-k2.5,支持图文多模态,并通过 thinking 参数开启/关闭思考能力。",
"thinking_model_line": "思考模式时使用同一个 Kimi-k2.5 模型,但会在请求中注入 thinking={\"type\": \"enabled\"} 来开启思考;快速模式则传递 thinking={\"type\": \"disabled\"}。",
"deep_thinking_line": "深度思考模式下,所有请求都会携带 thinking={\"type\": \"enabled\"},以获得持续的推理能力。"
},
"deepseek": {
"model_description": "你的基础模型是 DeepSeek-V3.2deepseek-chat由 DeepSeek 提供,数学与推理能力较强,当前通过官方 API 调用。",
"thinking_model_line": "思考模式时,第一次请求使用 DeepSeek-Reasoner一个强化推理的模型后续请求会切回 DeepSeek-V3.2。",
"deep_thinking_line": "在深度思考模式中,请求的模型是 DeepSeek-Reasoner用于深入分析复杂问题并规划步骤。"
},
"qwen3-max": {
"model_description": "你的基础模型是 Qwen3-Max由通义千问提供当前仅支持快速模式不提供思考或深度思考能力。",
"thinking_model_line": "Qwen3-Max 仅支持快速模式,思考模式会被自动关闭。",
"deep_thinking_line": "Qwen3-Max 不支持深度思考模式,将保持快速模式。"
},
"qwen3-vl-plus": {
"model_description": "你的基础模型是 Qwen3.5,由通义千问提供,支持图文多模态理解。",
"thinking_model_line": "思考模式时仍使用 Qwen3.5,并开启思考能力。",
"deep_thinking_line": "深度思考模式下,所有请求都将启用思考能力,以获得更强的分析表现。"
},
"minimax-m2.5": {
"model_description": "你的基础模型是 MiniMax-M2.5,支持超长上下文,当前仅以深度思考模式运行。",
"thinking_model_line": "MiniMax-M2.5 为思考模型,快速模式不会使用。",
"deep_thinking_line": "深度思考模式下,所有请求持续输出思考过程并给出最终回答。"
}
}
def get_model_profile(key: str) -> dict:
if key not in MODEL_PROFILES:
raise ValueError(f"未知模型 key: {key}")
profile = MODEL_PROFILES[key]
try:
from utils.aliyun_fallback import is_fallback_active
except Exception:
is_fallback_active = None
if is_fallback_active and is_fallback_active(key):
if key == "kimi-k2.5":
kimi_base_official = _env_optional("API_BASE_KIMI_OFFICIAL") or KIMI_BASE_OFFICIAL
kimi_key_official = _env_optional("API_KEY_KIMI_OFFICIAL") or KIMI_KEY_OFFICIAL
if kimi_base_official and kimi_key_official:
profile = dict(profile)
fast = dict(profile.get("fast") or {})
thinking = dict(profile.get("thinking") or fast)
fast.update({"base_url": kimi_base_official, "api_key": kimi_key_official})
thinking.update({"base_url": kimi_base_official, "api_key": kimi_key_official})
profile["fast"] = fast
profile["thinking"] = thinking
elif key == "qwen3-vl-plus":
qwen_base_official = _env_optional("API_BASE_QWEN_OFFICIAL") or QWEN_BASE_OFFICIAL
qwen_key_official = _env_optional("API_KEY_QWEN_OFFICIAL") or QWEN_KEY_OFFICIAL
if qwen_base_official and qwen_key_official:
profile = dict(profile)
fast = dict(profile.get("fast") or {})
thinking = dict(profile.get("thinking") or fast)
fast.update({"base_url": qwen_base_official, "api_key": qwen_key_official})
thinking.update({"base_url": qwen_base_official, "api_key": qwen_key_official})
profile["fast"] = fast
profile["thinking"] = thinking
elif key == "minimax-m2.5":
minimax_base_official = _env_optional("API_BASE_MINIMAX_OFFICIAL") or MINIMAX_BASE_OFFICIAL
minimax_key_official = _env_optional("API_KEY_MINIMAX_OFFICIAL") or MINIMAX_KEY_OFFICIAL
if minimax_base_official and minimax_key_official:
profile = dict(profile)
fast = dict(profile.get("fast") or {})
thinking = dict(profile.get("thinking") or fast)
fast.update({"base_url": minimax_base_official, "api_key": minimax_key_official})
thinking.update({"base_url": minimax_base_official, "api_key": minimax_key_official})
profile["fast"] = fast
profile["thinking"] = thinking
# 基础校验:必须有 fast 段且有 key
fast = profile.get("fast") or {}
if not fast.get("api_key"):
raise ValueError(f"模型 {key} 缺少 API Key 配置")
return profile
def get_model_prompt_replacements(key: str) -> dict:
"""获取模型相关的提示词替换字段,若缺失则回退到 Kimi 版本。"""
fallback = MODEL_PROMPT_OVERRIDES.get("kimi", {})
overrides = MODEL_PROMPT_OVERRIDES.get(key) or {}
return {
"model_description": overrides.get("model_description") or fallback.get("model_description") or "",
"thinking_model_line": overrides.get("thinking_model_line") or fallback.get("thinking_model_line") or "",
"deep_thinking_line": overrides.get("deep_thinking_line") or fallback.get("deep_thinking_line") or ""
}
def get_model_context_window(key: str) -> Optional[int]:
"""
获取模型的最大上下文窗口token 数)。
优先使用 profile.context_window其次回退到 fast.context_window。
"""
profile = get_model_profile(key)
ctx = profile.get("context_window")
if ctx:
return ctx
fast = profile.get("fast") or {}
return fast.get("context_window")