184 lines
5.8 KiB
Python
184 lines
5.8 KiB
Python
"""
|
||
手工验证 Qwen-VL 在多轮工具调用时是否保留 reasoning_content。
|
||
|
||
步骤:
|
||
1. 首次请求:用户要求“在思考中想一个 8 位数字但不要直接输出,先调用天气工具,再告诉我数字”。
|
||
2. 模型若返回 tool_calls,我们本地模拟 weather 查询,把结果作为 tool 消息返回。
|
||
3. 第二次请求:带上第一次 assistant 工具调用 + tool 结果,检查模型是否能输出第一次思考里的数字。
|
||
|
||
运行:
|
||
export API_BASE_QWEN="https://dashscope.aliyuncs.com/compatible-mode/v1"
|
||
export API_KEY_QWEN="sk-..."
|
||
python3 scratch_test/qwen_vl_reasoning_test.py
|
||
|
||
日志:
|
||
输出到 logs/qwen_vl_reasoning_test_<timestamp>.json
|
||
"""
|
||
|
||
import json
|
||
import os
|
||
import time
|
||
from pathlib import Path
|
||
from typing import Dict, Any
|
||
|
||
import httpx
|
||
|
||
# 读取模型配置(若未设置环境变量则使用默认)
|
||
API_BASE = os.environ.get("API_BASE_QWEN", "https://dashscope.aliyuncs.com/compatible-mode/v1").rstrip("/")
|
||
API_KEY = os.environ.get("API_KEY_QWEN", os.environ.get("DASHSCOPE_API_KEY", ""))
|
||
MODEL_ID = os.environ.get("MODEL_QWEN_VL", "qwen3-vl-plus")
|
||
|
||
LOG_DIR = Path("logs")
|
||
LOG_DIR.mkdir(parents=True, exist_ok=True)
|
||
LOG_FILE = LOG_DIR / f"qwen_vl_reasoning_test_{int(time.time())}.json"
|
||
|
||
|
||
def log(data: Dict[str, Any]):
|
||
"""追加写入调试日志。"""
|
||
with open(LOG_FILE, "a", encoding="utf-8") as f:
|
||
f.write(json.dumps(data, ensure_ascii=False, indent=2))
|
||
f.write("\n\n")
|
||
|
||
|
||
def build_headers():
|
||
return {
|
||
"Authorization": f"Bearer {API_KEY}",
|
||
"Content-Type": "application/json"
|
||
}
|
||
|
||
|
||
def weather_tool_result(city: str = "上海", date: str = "今天"):
|
||
"""模拟天气工具返回固定结构。"""
|
||
return {
|
||
"success": True,
|
||
"city": city,
|
||
"date": date,
|
||
"forecast": "多云转晴,15~22℃,东风3级",
|
||
"source": "local-mock"
|
||
}
|
||
|
||
|
||
def main():
|
||
if not API_KEY:
|
||
raise SystemExit("未配置 API_KEY_QWEN 或 DASHSCOPE_API_KEY,无法测试")
|
||
|
||
messages = [
|
||
{
|
||
"role": "system",
|
||
"content": "你是测试助手。"
|
||
},
|
||
{
|
||
"role": "user",
|
||
"content": (
|
||
"请你想一个明确的8位数字,随便一个就行,只在思考过程中呈现,不要在最终回复里透露;"
|
||
"先调用天气查询工具;工具完成后再正常回答并把刚才思考出的数字告诉我。"
|
||
)
|
||
}
|
||
]
|
||
|
||
tools = [
|
||
{
|
||
"type": "function",
|
||
"function": {
|
||
"name": "query_weather",
|
||
"description": "查询指定城市在指定日期的天气",
|
||
"parameters": {
|
||
"type": "object",
|
||
"properties": {
|
||
"city": {"type": "string", "description": "城市名称,如上海"},
|
||
"date": {"type": "string", "description": "日期,如今天/明天/2025-01-01"}
|
||
},
|
||
"required": ["city"]
|
||
}
|
||
}
|
||
}
|
||
]
|
||
|
||
# ---- 第一次调用 ----
|
||
payload1 = {
|
||
"model": MODEL_ID,
|
||
"messages": messages,
|
||
"tools": tools,
|
||
"tool_choice": "auto",
|
||
"stream": False,
|
||
"max_tokens": 512,
|
||
# 关键:开启思考模式
|
||
"enable_thinking": True
|
||
}
|
||
|
||
with httpx.Client(http2=True, timeout=120) as client:
|
||
resp1 = client.post(f"{API_BASE}/chat/completions", json=payload1, headers=build_headers())
|
||
resp1.raise_for_status()
|
||
data1 = resp1.json()
|
||
log({"step": "call1_response", "raw": data1})
|
||
|
||
choice1 = data1["choices"][0]["message"]
|
||
tool_calls = choice1.get("tool_calls") or []
|
||
reasoning1 = choice1.get("reasoning_content")
|
||
|
||
print("第一次返回 reasoning_content 长度:", len(reasoning1 or ""))
|
||
print("第一次返回 tool_calls 数量:", len(tool_calls))
|
||
|
||
if not tool_calls:
|
||
print("模型未调用工具,测试无法继续。")
|
||
log({"error": "no_tool_calls"})
|
||
return
|
||
|
||
# 只取第一个工具调用
|
||
tc = tool_calls[0]
|
||
args_json = tc["function"].get("arguments") or "{}"
|
||
try:
|
||
args = json.loads(args_json)
|
||
except json.JSONDecodeError:
|
||
args = {}
|
||
tool_result = weather_tool_result(
|
||
city=args.get("city", "上海"),
|
||
date=args.get("date", "今天")
|
||
)
|
||
|
||
# 构建第二轮消息:包含第一次assistant消息和tool结果
|
||
messages2 = messages + [
|
||
{
|
||
"role": "assistant",
|
||
"content": choice1.get("content", ""),
|
||
"tool_calls": tool_calls,
|
||
# 保留 reasoning_content 原样,核心验证点
|
||
"reasoning_content": reasoning1 or ""
|
||
},
|
||
{
|
||
"role": "tool",
|
||
"tool_call_id": tc.get("id"),
|
||
"name": tc["function"]["name"],
|
||
"content": json.dumps(tool_result, ensure_ascii=False)
|
||
}
|
||
]
|
||
|
||
payload2 = {
|
||
"model": MODEL_ID,
|
||
"messages": messages2,
|
||
"tools": tools,
|
||
"tool_choice": "auto",
|
||
"stream": False,
|
||
"max_tokens": 512,
|
||
# 同步思考模式
|
||
"enable_thinking": True
|
||
}
|
||
|
||
with httpx.Client(http2=True, timeout=120) as client:
|
||
resp2 = client.post(f"{API_BASE}/chat/completions", json=payload2, headers=build_headers())
|
||
resp2.raise_for_status()
|
||
data2 = resp2.json()
|
||
log({"step": "call2_response", "raw": data2, "messages_sent": messages2})
|
||
|
||
choice2 = data2["choices"][0]["message"]
|
||
reasoning2 = choice2.get("reasoning_content")
|
||
content2 = choice2.get("content")
|
||
|
||
print("第二次返回 reasoning_content 长度:", len(reasoning2 or ""))
|
||
print("第二次最终回复:", content2)
|
||
print(f"完整日志已保存到: {LOG_FILE}")
|
||
|
||
|
||
if __name__ == "__main__":
|
||
main()
|