agent-Specialization/model_tests/qwen_vl_stream_test.py

"""Qwen VL 测试脚本（兼容模式）。

用例：
1) 纯文字：验证流式输出与 usage。
2) 图文：发送本地图片，验证多模态输入。

注意：硬编码测试密钥，仅限本地验证，勿用于生产。
"""

from __future__ import annotations

import asyncio
import base64
from pathlib import Path
from typing import Optional

import httpx


QWEN_BASE_URL = "https://dashscope.aliyuncs.com/compatible-mode/v1"
QWEN_API_KEY = "sk-64af1343e67d46d7a902ef5bcf6817ad"
QWEN_VL_MODEL = "qwen3-vl-plus"

# 默认图片路径（仓库根目录下“截图/截屏2025-12-12 17.30.04.png”）
DEFAULT_IMAGE_PATH = Path("截图/截屏2025-12-12 17.30.04.png")


def headers(api_key: str) -> dict[str, str]:
    return {
        "Authorization": f"Bearer {api_key}",
        "Content-Type": "application/json",
    }


def build_image_content(image_path: Path) -> str:
    data = image_path.read_bytes()
    b64 = base64.b64encode(data).decode("ascii")
    return f"data:image/{image_path.suffix.lstrip('.').lower()};base64,{b64}"


async def stream_call(
    *,
    name: str,
    base_url: str,
    api_key: str,
    model: str,
    messages,
    max_tokens: int,
    enable_thinking: bool = False,
) -> None:
    url = base_url.rstrip("/") + "/chat/completions"
    payload = {
        "model": model,
        "stream": True,
        "max_tokens": max_tokens,
        "messages": messages,
        "stream_options": {"include_usage": True},
    }
    if enable_thinking:
        payload["enable_thinking"] = True
    print(f"\n=== {name} ===")
    print(f"POST {url}")
    async with httpx.AsyncClient(http2=True, timeout=180) as client:
        async with client.stream(
            "POST", url, json=payload, headers=headers(api_key)
        ) as resp:
            print("status:", resp.status_code)
            if resp.status_code != 200:
                body = await resp.aread()
                print("error body:", body.decode(errors="ignore"))
                return
            async for line in resp.aiter_lines():
                if not line:
                    continue
                if line.startswith("data:"):
                    data = line[5:].strip()
                    if data == "[DONE]":
                        print("[DONE]")
                        break
                    print(data)
                else:
                    print(line)


async def main(image_path: Optional[Path] = None) -> None:
    # 1) 纯文字
    text_messages = [
        {
            "role": "user",
            "content": "请用一句话自我介绍，并简单说明你目前在执行的动作。",
        }
    ]
    await stream_call(
        name="qwen-vl text only",
        base_url=QWEN_BASE_URL,
        api_key=QWEN_API_KEY,
        model=QWEN_VL_MODEL,
        messages=text_messages,
        max_tokens=32000,  # 官方上限 32K
        enable_thinking=True,
    )

    # 2) 图文
    img_path = image_path or DEFAULT_IMAGE_PATH
    if not img_path.exists():
        print(f"\n[warn] 图片文件不存在: {img_path}")
        return
    img_url = build_image_content(img_path)
    multimodal_messages = [
        {
            "role": "user",
            "content": [
                {"type": "text", "text": "请描述这张图片的主要内容，并给出一句话总结。"},
                {"type": "image_url", "image_url": {"url": img_url}},
            ],
        }
    ]
    await stream_call(
        name="qwen-vl image+text",
        base_url=QWEN_BASE_URL,
        api_key=QWEN_API_KEY,
        model=QWEN_VL_MODEL,
        messages=multimodal_messages,
        max_tokens=32000,  # 官方上限 32K
        enable_thinking=True,
    )


if __name__ == "__main__":
    asyncio.run(main())