"""Qwen API 流式测试脚本（qwen-max，快速模型）。

目标：
- 验证 qwen-max 的流式输出与用量字段（无思考能力）。
- 打印 data 行，尾包包含 usage。

注意：硬编码测试密钥，仅限本地验证，勿用于生产。
"""

from __future__ import annotations

import asyncio
from typing import Optional

import httpx


QWEN_BASE_URL = "https://dashscope.aliyuncs.com/compatible-mode/v1"
QWEN_API_KEY = "sk-64af1343e67d46d7a902ef5bcf6817ad"
QWEN_MAX_MODEL = "qwen3-max"


def headers(api_key: str) -> dict[str, str]:
    return {
        "Authorization": f"Bearer {api_key}",
        "Content-Type": "application/json",
    }


async def stream_call(
    *,
    base_url: str,
    api_key: str,
    model: str,
    max_tokens: int = 2048,
    prompt: Optional[str] = None,
) -> None:
    url = base_url.rstrip("/") + "/chat/completions"
    payload = {
        "model": model,
        "stream": True,
        "max_tokens": max_tokens,
        "stream_options": {"include_usage": True},
        "messages": [
            {
                "role": "user",
                "content": prompt
                or "请用简短中文自我介绍，并说明你当前正在执行的动作。",
            }
        ],
    }
    print("\n=== qwen-max fast mode ===")
    print(f"POST {url}")
    async with httpx.AsyncClient(http2=True, timeout=120) as client:
        async with client.stream(
            "POST", url, json=payload, headers=headers(api_key)
        ) as resp:
            print("status:", resp.status_code)
            if resp.status_code != 200:
                body = await resp.aread()
                print("error body:", body.decode(errors="ignore"))
                return
            async for line in resp.aiter_lines():
                if not line:
                    continue
                if line.startswith("data:"):
                    data = line[5:].strip()
                    if data == "[DONE]":
                        print("[DONE]")
                        break
                    print(data)
                else:
                    print(line)


async def main() -> None:
    await stream_call(
        base_url=QWEN_BASE_URL,
        api_key=QWEN_API_KEY,
        model=QWEN_MAX_MODEL,
        max_tokens=64000,  # qwen3-max 官方上限 64K
    )


if __name__ == "__main__":
    asyncio.run(main())