agent-Specialization/scratch_test/qwen_vl_reasoning_test.py

"""
手工验证 Qwen-VL 在多轮工具调用时是否保留 reasoning_content。

步骤：
1. 首次请求：用户要求“在思考中想一个 8 位数字但不要直接输出，先调用天气工具，再告诉我数字”。
2. 模型若返回 tool_calls，我们本地模拟 weather 查询，把结果作为 tool 消息返回。
3. 第二次请求：带上第一次 assistant 工具调用 + tool 结果，检查模型是否能输出第一次思考里的数字。

运行：
  export API_BASE_QWEN="https://dashscope.aliyuncs.com/compatible-mode/v1"
  export API_KEY_QWEN="sk-..."
  python3 scratch_test/qwen_vl_reasoning_test.py

日志：
  输出到 logs/qwen_vl_reasoning_test_<timestamp>.json
"""

import json
import os
import time
from pathlib import Path
from typing import Dict, Any

import httpx

# 读取模型配置（若未设置环境变量则使用默认）
API_BASE = os.environ.get("API_BASE_QWEN", "https://dashscope.aliyuncs.com/compatible-mode/v1").rstrip("/")
API_KEY = os.environ.get("API_KEY_QWEN", os.environ.get("DASHSCOPE_API_KEY", ""))
MODEL_ID = os.environ.get("MODEL_QWEN_VL", "qwen3-vl-plus")

LOG_DIR = Path("logs")
LOG_DIR.mkdir(parents=True, exist_ok=True)
LOG_FILE = LOG_DIR / f"qwen_vl_reasoning_test_{int(time.time())}.json"


def log(data: Dict[str, Any]):
    """追加写入调试日志。"""
    with open(LOG_FILE, "a", encoding="utf-8") as f:
        f.write(json.dumps(data, ensure_ascii=False, indent=2))
        f.write("\n\n")


def build_headers():
    return {
        "Authorization": f"Bearer {API_KEY}",
        "Content-Type": "application/json"
    }


def weather_tool_result(city: str = "上海", date: str = "今天"):
    """模拟天气工具返回固定结构。"""
    return {
        "success": True,
        "city": city,
        "date": date,
        "forecast": "多云转晴，15~22℃，东风3级",
        "source": "local-mock"
    }


def main():
    if not API_KEY:
        raise SystemExit("未配置 API_KEY_QWEN 或 DASHSCOPE_API_KEY，无法测试")

    messages = [
        {
            "role": "system",
            "content": "你是测试助手。"
        },
        {
            "role": "user",
            "content": (
                "请你想一个明确的8位数字，随便一个就行，只在思考过程中呈现，不要在最终回复里透露；"
                "先调用天气查询工具；工具完成后再正常回答并把刚才思考出的数字告诉我。"
            )
        }
    ]

    tools = [
        {
            "type": "function",
            "function": {
                "name": "query_weather",
                "description": "查询指定城市在指定日期的天气",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "city": {"type": "string", "description": "城市名称，如上海"},
                        "date": {"type": "string", "description": "日期，如今天/明天/2025-01-01"}
                    },
                    "required": ["city"]
                }
            }
        }
    ]

    # ---- 第一次调用 ----
    payload1 = {
        "model": MODEL_ID,
        "messages": messages,
        "tools": tools,
        "tool_choice": "auto",
        "stream": False,
        "max_tokens": 512,
        # 关键：开启思考模式
        "enable_thinking": True
    }

    with httpx.Client(http2=True, timeout=120) as client:
        resp1 = client.post(f"{API_BASE}/chat/completions", json=payload1, headers=build_headers())
    resp1.raise_for_status()
    data1 = resp1.json()
    log({"step": "call1_response", "raw": data1})

    choice1 = data1["choices"][0]["message"]
    tool_calls = choice1.get("tool_calls") or []
    reasoning1 = choice1.get("reasoning_content")

    print("第一次返回 reasoning_content 长度:", len(reasoning1 or ""))
    print("第一次返回 tool_calls 数量:", len(tool_calls))

    if not tool_calls:
        print("模型未调用工具，测试无法继续。")
        log({"error": "no_tool_calls"})
        return

    # 只取第一个工具调用
    tc = tool_calls[0]
    args_json = tc["function"].get("arguments") or "{}"
    try:
        args = json.loads(args_json)
    except json.JSONDecodeError:
        args = {}
    tool_result = weather_tool_result(
        city=args.get("city", "上海"),
        date=args.get("date", "今天")
    )

    # 构建第二轮消息：包含第一次assistant消息和tool结果
    messages2 = messages + [
        {
            "role": "assistant",
            "content": choice1.get("content", ""),
            "tool_calls": tool_calls,
            # 保留 reasoning_content 原样，核心验证点
            "reasoning_content": reasoning1 or ""
        },
        {
            "role": "tool",
            "tool_call_id": tc.get("id"),
            "name": tc["function"]["name"],
            "content": json.dumps(tool_result, ensure_ascii=False)
        }
    ]

    payload2 = {
        "model": MODEL_ID,
        "messages": messages2,
        "tools": tools,
        "tool_choice": "auto",
        "stream": False,
        "max_tokens": 512,
        # 同步思考模式
        "enable_thinking": True
    }

    with httpx.Client(http2=True, timeout=120) as client:
        resp2 = client.post(f"{API_BASE}/chat/completions", json=payload2, headers=build_headers())
    resp2.raise_for_status()
    data2 = resp2.json()
    log({"step": "call2_response", "raw": data2, "messages_sent": messages2})

    choice2 = data2["choices"][0]["message"]
    reasoning2 = choice2.get("reasoning_content")
    content2 = choice2.get("content")

    print("第二次返回 reasoning_content 长度:", len(reasoning2 or ""))
    print("第二次最终回复:", content2)
    print(f"完整日志已保存到: {LOG_FILE}")


if __name__ == "__main__":
    main()