agent-Specialization/scratch_test/qwen_vl_reasoning_test.py

184 lines
5.8 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
手工验证 Qwen-VL 在多轮工具调用时是否保留 reasoning_content。
步骤:
1. 首次请求:用户要求“在思考中想一个 8 位数字但不要直接输出,先调用天气工具,再告诉我数字”。
2. 模型若返回 tool_calls我们本地模拟 weather 查询,把结果作为 tool 消息返回。
3. 第二次请求:带上第一次 assistant 工具调用 + tool 结果,检查模型是否能输出第一次思考里的数字。
运行:
export API_BASE_QWEN="https://dashscope.aliyuncs.com/compatible-mode/v1"
export API_KEY_QWEN="sk-..."
python3 scratch_test/qwen_vl_reasoning_test.py
日志:
输出到 logs/qwen_vl_reasoning_test_<timestamp>.json
"""
import json
import os
import time
from pathlib import Path
from typing import Dict, Any
import httpx
# 读取模型配置(若未设置环境变量则使用默认)
API_BASE = os.environ.get("API_BASE_QWEN", "https://dashscope.aliyuncs.com/compatible-mode/v1").rstrip("/")
API_KEY = os.environ.get("API_KEY_QWEN", os.environ.get("DASHSCOPE_API_KEY", ""))
MODEL_ID = os.environ.get("MODEL_QWEN_VL", "qwen3-vl-plus")
LOG_DIR = Path("logs")
LOG_DIR.mkdir(parents=True, exist_ok=True)
LOG_FILE = LOG_DIR / f"qwen_vl_reasoning_test_{int(time.time())}.json"
def log(data: Dict[str, Any]):
"""追加写入调试日志。"""
with open(LOG_FILE, "a", encoding="utf-8") as f:
f.write(json.dumps(data, ensure_ascii=False, indent=2))
f.write("\n\n")
def build_headers():
return {
"Authorization": f"Bearer {API_KEY}",
"Content-Type": "application/json"
}
def weather_tool_result(city: str = "上海", date: str = "今天"):
"""模拟天气工具返回固定结构。"""
return {
"success": True,
"city": city,
"date": date,
"forecast": "多云转晴15~22℃东风3级",
"source": "local-mock"
}
def main():
if not API_KEY:
raise SystemExit("未配置 API_KEY_QWEN 或 DASHSCOPE_API_KEY无法测试")
messages = [
{
"role": "system",
"content": "你是测试助手。"
},
{
"role": "user",
"content": (
"请你想一个明确的8位数字随便一个就行只在思考过程中呈现不要在最终回复里透露"
"先调用天气查询工具;工具完成后再正常回答并把刚才思考出的数字告诉我。"
)
}
]
tools = [
{
"type": "function",
"function": {
"name": "query_weather",
"description": "查询指定城市在指定日期的天气",
"parameters": {
"type": "object",
"properties": {
"city": {"type": "string", "description": "城市名称,如上海"},
"date": {"type": "string", "description": "日期,如今天/明天/2025-01-01"}
},
"required": ["city"]
}
}
}
]
# ---- 第一次调用 ----
payload1 = {
"model": MODEL_ID,
"messages": messages,
"tools": tools,
"tool_choice": "auto",
"stream": False,
"max_tokens": 512,
# 关键:开启思考模式
"enable_thinking": True
}
with httpx.Client(http2=True, timeout=120) as client:
resp1 = client.post(f"{API_BASE}/chat/completions", json=payload1, headers=build_headers())
resp1.raise_for_status()
data1 = resp1.json()
log({"step": "call1_response", "raw": data1})
choice1 = data1["choices"][0]["message"]
tool_calls = choice1.get("tool_calls") or []
reasoning1 = choice1.get("reasoning_content")
print("第一次返回 reasoning_content 长度:", len(reasoning1 or ""))
print("第一次返回 tool_calls 数量:", len(tool_calls))
if not tool_calls:
print("模型未调用工具,测试无法继续。")
log({"error": "no_tool_calls"})
return
# 只取第一个工具调用
tc = tool_calls[0]
args_json = tc["function"].get("arguments") or "{}"
try:
args = json.loads(args_json)
except json.JSONDecodeError:
args = {}
tool_result = weather_tool_result(
city=args.get("city", "上海"),
date=args.get("date", "今天")
)
# 构建第二轮消息包含第一次assistant消息和tool结果
messages2 = messages + [
{
"role": "assistant",
"content": choice1.get("content", ""),
"tool_calls": tool_calls,
# 保留 reasoning_content 原样,核心验证点
"reasoning_content": reasoning1 or ""
},
{
"role": "tool",
"tool_call_id": tc.get("id"),
"name": tc["function"]["name"],
"content": json.dumps(tool_result, ensure_ascii=False)
}
]
payload2 = {
"model": MODEL_ID,
"messages": messages2,
"tools": tools,
"tool_choice": "auto",
"stream": False,
"max_tokens": 512,
# 同步思考模式
"enable_thinking": True
}
with httpx.Client(http2=True, timeout=120) as client:
resp2 = client.post(f"{API_BASE}/chat/completions", json=payload2, headers=build_headers())
resp2.raise_for_status()
data2 = resp2.json()
log({"step": "call2_response", "raw": data2, "messages_sent": messages2})
choice2 = data2["choices"][0]["message"]
reasoning2 = choice2.get("reasoning_content")
content2 = choice2.get("content")
print("第二次返回 reasoning_content 长度:", len(reasoning2 or ""))
print("第二次最终回复:", content2)
print(f"完整日志已保存到: {LOG_FILE}")
if __name__ == "__main__":
main()