agent-Specialization/config/model_profiles.py

201 lines
8.3 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import os
from typing import Optional
def _env(name: str, default: str = "") -> str:
return os.environ.get(name, default)
# 模型上下文窗口(单位: token
CONTEXT_WINDOWS = {
"kimi": 256_000,
"kimi-k2.5": 256_000,
"qwen3-max": 256_000,
"qwen3-vl-plus": 256_000,
"deepseek": 128_000,
}
# 默认Kimi
KIMI_BASE = _env("API_BASE_KIMI", _env("AGENT_API_BASE_URL", "https://api.moonshot.cn/v1"))
KIMI_KEY = _env("API_KEY_KIMI", _env("AGENT_API_KEY", ""))
KIMI_FAST_MODEL = _env("MODEL_KIMI_FAST", _env("AGENT_MODEL_ID", "kimi-k2-0905-preview"))
KIMI_THINK_MODEL = _env("MODEL_KIMI_THINK", _env("AGENT_THINKING_MODEL_ID", "kimi-k2-thinking"))
KIMI_25_MODEL = _env("MODEL_KIMI_25", "kimi-k2.5")
# DeepSeek
DEEPSEEK_BASE = _env("API_BASE_DEEPSEEK", "https://api.deepseek.com")
DEEPSEEK_KEY = _env("API_KEY_DEEPSEEK", _env("AGENT_DEEPSEEK_API_KEY", ""))
DEEPSEEK_FAST_MODEL = _env("MODEL_DEEPSEEK_FAST", "deepseek-chat")
DEEPSEEK_THINK_MODEL = _env("MODEL_DEEPSEEK_THINK", "deepseek-reasoner")
# Qwen
QWEN_BASE = _env("API_BASE_QWEN", "https://dashscope.aliyuncs.com/compatible-mode/v1")
QWEN_KEY = _env("API_KEY_QWEN", _env("DASHSCOPE_API_KEY", ""))
QWEN_MAX_MODEL = _env("MODEL_QWEN_MAX", "qwen3-max")
QWEN_VL_MODEL = _env("MODEL_QWEN_VL", "qwen3-vl-plus")
MODEL_PROFILES = {
"kimi": {
"context_window": CONTEXT_WINDOWS["kimi"],
"fast": {
"base_url": KIMI_BASE,
"api_key": KIMI_KEY,
"model_id": KIMI_FAST_MODEL,
"max_tokens": None,
"context_window": CONTEXT_WINDOWS["kimi"],
},
"thinking": {
"base_url": KIMI_BASE,
"api_key": KIMI_KEY,
"model_id": KIMI_THINK_MODEL,
"max_tokens": None,
"context_window": CONTEXT_WINDOWS["kimi"],
},
"supports_thinking": True,
"fast_only": False,
"name": "Kimi-k2"
},
"kimi-k2.5": {
"context_window": CONTEXT_WINDOWS["kimi-k2.5"],
"fast": {
"base_url": KIMI_BASE,
"api_key": KIMI_KEY,
"model_id": KIMI_25_MODEL,
"max_tokens": None,
"context_window": CONTEXT_WINDOWS["kimi-k2.5"],
"extra_params": {"thinking": {"type": "disabled"}}
},
"thinking": {
"base_url": KIMI_BASE,
"api_key": KIMI_KEY,
"model_id": KIMI_25_MODEL,
"max_tokens": None,
"context_window": CONTEXT_WINDOWS["kimi-k2.5"],
"extra_params": {"thinking": {"type": "enabled"}}
},
"supports_thinking": True,
"fast_only": False,
"name": "Kimi-k2.5"
},
"deepseek": {
"context_window": CONTEXT_WINDOWS["deepseek"],
"fast": {
"base_url": DEEPSEEK_BASE,
"api_key": DEEPSEEK_KEY,
"model_id": DEEPSEEK_FAST_MODEL,
"max_tokens": 8192,
"context_window": CONTEXT_WINDOWS["deepseek"]
},
"thinking": {
"base_url": DEEPSEEK_BASE,
"api_key": DEEPSEEK_KEY,
"model_id": DEEPSEEK_THINK_MODEL,
"max_tokens": 65536,
"context_window": CONTEXT_WINDOWS["deepseek"]
},
"supports_thinking": True,
"fast_only": False,
"name": "DeepSeek"
},
"qwen3-max": {
"context_window": CONTEXT_WINDOWS["qwen3-max"],
"fast": {
"base_url": QWEN_BASE,
"api_key": QWEN_KEY,
"model_id": QWEN_MAX_MODEL,
"max_tokens": 65536,
"context_window": CONTEXT_WINDOWS["qwen3-max"]
},
"thinking": None, # 不支持思考
"supports_thinking": False,
"fast_only": True,
"name": "Qwen3-Max"
},
"qwen3-vl-plus": {
"context_window": CONTEXT_WINDOWS["qwen3-vl-plus"],
"fast": {
"base_url": QWEN_BASE,
"api_key": QWEN_KEY,
"model_id": QWEN_VL_MODEL,
"max_tokens": 32768,
"context_window": CONTEXT_WINDOWS["qwen3-vl-plus"],
"extra_params": {}
},
"thinking": {
"base_url": QWEN_BASE,
"api_key": QWEN_KEY,
"model_id": QWEN_VL_MODEL,
"max_tokens": 32768,
"context_window": CONTEXT_WINDOWS["qwen3-vl-plus"],
"extra_params": {"enable_thinking": True}
},
"supports_thinking": True,
"fast_only": False,
"name": "Qwen3-VL"
}
}
MODEL_PROMPT_OVERRIDES = {
"kimi": {
"model_description": "你的基础模型是 Kimi-k2由月之暗面公司开发是一个开源的 MoE 架构模型,拥有 1T 参数和 32B 激活参数,当前智能助手应用由火山引擎提供 API 服务。",
"thinking_model_line": "思考模式时,第一次请求的模型不是 Kimi-k2而是 Kimi-k2-Thinking一个更善于分析复杂问题、规划复杂流程的模型在后续请求时模型会换回 Kimi-k2。",
"deep_thinking_line": "在深度思考模式中,请求的模型是 Kimi-k2-Thinking一个更善于分析复杂问题、规划复杂流程的模型。"
},
"kimi-k2.5": {
"model_description": "你的基础模型是 Kimi-k2.5,支持图文多模态,并通过 thinking 参数开启/关闭思考能力。",
"thinking_model_line": "思考模式时使用同一个 Kimi-k2.5 模型,但会在请求中注入 thinking={\"type\": \"enabled\"} 来开启思考;快速模式则传递 thinking={\"type\": \"disabled\"}。",
"deep_thinking_line": "深度思考模式下,所有请求都会携带 thinking={\"type\": \"enabled\"},以获得持续的推理能力。"
},
"deepseek": {
"model_description": "你的基础模型是 DeepSeek-V3.2deepseek-chat由 DeepSeek 提供,数学与推理能力较强,当前通过官方 API 调用。",
"thinking_model_line": "思考模式时,第一次请求使用 DeepSeek-Reasoner一个强化推理的模型后续请求会切回 DeepSeek-V3.2。",
"deep_thinking_line": "在深度思考模式中,请求的模型是 DeepSeek-Reasoner用于深入分析复杂问题并规划步骤。"
},
"qwen3-max": {
"model_description": "你的基础模型是 Qwen3-Max由通义千问提供当前仅支持快速模式不提供思考或深度思考能力。",
"thinking_model_line": "Qwen3-Max 仅支持快速模式,思考模式会被自动关闭。",
"deep_thinking_line": "Qwen3-Max 不支持深度思考模式,将保持快速模式。"
},
"qwen3-vl-plus": {
"model_description": "你的基础模型是 Qwen3-VL-Plus支持图文多模态理解接口来自通义千问 DashScope。",
"thinking_model_line": "思考模式时,请求的模型仍为 Qwen3-VL-Plus开启思考能力后续请求会切回快速模型。",
"deep_thinking_line": "在深度思考模式中,请求的模型是 Qwen3-VL-Plus思考版以获得更强的分析能力。"
}
}
def get_model_profile(key: str) -> dict:
if key not in MODEL_PROFILES:
raise ValueError(f"未知模型 key: {key}")
profile = MODEL_PROFILES[key]
# 基础校验:必须有 fast 段且有 key
fast = profile.get("fast") or {}
if not fast.get("api_key"):
raise ValueError(f"模型 {key} 缺少 API Key 配置")
return profile
def get_model_prompt_replacements(key: str) -> dict:
"""获取模型相关的提示词替换字段,若缺失则回退到 Kimi 版本。"""
fallback = MODEL_PROMPT_OVERRIDES.get("kimi", {})
overrides = MODEL_PROMPT_OVERRIDES.get(key) or {}
return {
"model_description": overrides.get("model_description") or fallback.get("model_description") or "",
"thinking_model_line": overrides.get("thinking_model_line") or fallback.get("thinking_model_line") or "",
"deep_thinking_line": overrides.get("deep_thinking_line") or fallback.get("deep_thinking_line") or ""
}
def get_model_context_window(key: str) -> Optional[int]:
"""
获取模型的最大上下文窗口token 数)。
优先使用 profile.context_window其次回退到 fast.context_window。
"""
profile = get_model_profile(key)
ctx = profile.get("context_window")
if ctx:
return ctx
fast = profile.get("fast") or {}
return fast.get("context_window")