""" 手工验证 Qwen-VL 在多轮工具调用时是否保留 reasoning_content。 步骤: 1. 首次请求:用户要求“在思考中想一个 8 位数字但不要直接输出,先调用天气工具,再告诉我数字”。 2. 模型若返回 tool_calls,我们本地模拟 weather 查询,把结果作为 tool 消息返回。 3. 第二次请求:带上第一次 assistant 工具调用 + tool 结果,检查模型是否能输出第一次思考里的数字。 运行: export API_BASE_QWEN="https://dashscope.aliyuncs.com/compatible-mode/v1" export API_KEY_QWEN="sk-..." python3 scratch_test/qwen_vl_reasoning_test.py 日志: 输出到 logs/qwen_vl_reasoning_test_.json """ import json import os import time from pathlib import Path from typing import Dict, Any import httpx # 读取模型配置(若未设置环境变量则使用默认) API_BASE = os.environ.get("API_BASE_QWEN", "https://dashscope.aliyuncs.com/compatible-mode/v1").rstrip("/") API_KEY = os.environ.get("API_KEY_QWEN", os.environ.get("DASHSCOPE_API_KEY", "")) MODEL_ID = os.environ.get("MODEL_QWEN_VL", "qwen3-vl-plus") LOG_DIR = Path("logs") LOG_DIR.mkdir(parents=True, exist_ok=True) LOG_FILE = LOG_DIR / f"qwen_vl_reasoning_test_{int(time.time())}.json" def log(data: Dict[str, Any]): """追加写入调试日志。""" with open(LOG_FILE, "a", encoding="utf-8") as f: f.write(json.dumps(data, ensure_ascii=False, indent=2)) f.write("\n\n") def build_headers(): return { "Authorization": f"Bearer {API_KEY}", "Content-Type": "application/json" } def weather_tool_result(city: str = "上海", date: str = "今天"): """模拟天气工具返回固定结构。""" return { "success": True, "city": city, "date": date, "forecast": "多云转晴,15~22℃,东风3级", "source": "local-mock" } def main(): if not API_KEY: raise SystemExit("未配置 API_KEY_QWEN 或 DASHSCOPE_API_KEY,无法测试") messages = [ { "role": "system", "content": "你是测试助手。" }, { "role": "user", "content": ( "请你想一个明确的8位数字,随便一个就行,只在思考过程中呈现,不要在最终回复里透露;" "先调用天气查询工具;工具完成后再正常回答并把刚才思考出的数字告诉我。" ) } ] tools = [ { "type": "function", "function": { "name": "query_weather", "description": "查询指定城市在指定日期的天气", "parameters": { "type": "object", "properties": { "city": {"type": "string", "description": "城市名称,如上海"}, "date": {"type": "string", "description": "日期,如今天/明天/2025-01-01"} }, "required": ["city"] } } } ] # ---- 第一次调用 ---- payload1 = { "model": MODEL_ID, "messages": messages, "tools": tools, "tool_choice": "auto", "stream": False, "max_tokens": 512, # 关键:开启思考模式 "enable_thinking": True } with httpx.Client(http2=True, timeout=120) as client: resp1 = client.post(f"{API_BASE}/chat/completions", json=payload1, headers=build_headers()) resp1.raise_for_status() data1 = resp1.json() log({"step": "call1_response", "raw": data1}) choice1 = data1["choices"][0]["message"] tool_calls = choice1.get("tool_calls") or [] reasoning1 = choice1.get("reasoning_content") print("第一次返回 reasoning_content 长度:", len(reasoning1 or "")) print("第一次返回 tool_calls 数量:", len(tool_calls)) if not tool_calls: print("模型未调用工具,测试无法继续。") log({"error": "no_tool_calls"}) return # 只取第一个工具调用 tc = tool_calls[0] args_json = tc["function"].get("arguments") or "{}" try: args = json.loads(args_json) except json.JSONDecodeError: args = {} tool_result = weather_tool_result( city=args.get("city", "上海"), date=args.get("date", "今天") ) # 构建第二轮消息:包含第一次assistant消息和tool结果 messages2 = messages + [ { "role": "assistant", "content": choice1.get("content", ""), "tool_calls": tool_calls, # 保留 reasoning_content 原样,核心验证点 "reasoning_content": reasoning1 or "" }, { "role": "tool", "tool_call_id": tc.get("id"), "name": tc["function"]["name"], "content": json.dumps(tool_result, ensure_ascii=False) } ] payload2 = { "model": MODEL_ID, "messages": messages2, "tools": tools, "tool_choice": "auto", "stream": False, "max_tokens": 512, # 同步思考模式 "enable_thinking": True } with httpx.Client(http2=True, timeout=120) as client: resp2 = client.post(f"{API_BASE}/chat/completions", json=payload2, headers=build_headers()) resp2.raise_for_status() data2 = resp2.json() log({"step": "call2_response", "raw": data2, "messages_sent": messages2}) choice2 = data2["choices"][0]["message"] reasoning2 = choice2.get("reasoning_content") content2 = choice2.get("content") print("第二次返回 reasoning_content 长度:", len(reasoning2 or "")) print("第二次最终回复:", content2) print(f"完整日志已保存到: {LOG_FILE}") if __name__ == "__main__": main()