"""Qwen API 流式测试脚本(qwen-max,快速模型)。 目标: - 验证 qwen-max 的流式输出与用量字段(无思考能力)。 - 打印 data 行,尾包包含 usage。 注意:硬编码测试密钥,仅限本地验证,勿用于生产。 """ from __future__ import annotations import asyncio from typing import Optional import httpx QWEN_BASE_URL = "https://dashscope.aliyuncs.com/compatible-mode/v1" QWEN_API_KEY = "sk-64af1343e67d46d7a902ef5bcf6817ad" QWEN_MAX_MODEL = "qwen3-max" def headers(api_key: str) -> dict[str, str]: return { "Authorization": f"Bearer {api_key}", "Content-Type": "application/json", } async def stream_call( *, base_url: str, api_key: str, model: str, max_tokens: int = 2048, prompt: Optional[str] = None, ) -> None: url = base_url.rstrip("/") + "/chat/completions" payload = { "model": model, "stream": True, "max_tokens": max_tokens, "stream_options": {"include_usage": True}, "messages": [ { "role": "user", "content": prompt or "请用简短中文自我介绍,并说明你当前正在执行的动作。", } ], } print("\n=== qwen-max fast mode ===") print(f"POST {url}") async with httpx.AsyncClient(http2=True, timeout=120) as client: async with client.stream( "POST", url, json=payload, headers=headers(api_key) ) as resp: print("status:", resp.status_code) if resp.status_code != 200: body = await resp.aread() print("error body:", body.decode(errors="ignore")) return async for line in resp.aiter_lines(): if not line: continue if line.startswith("data:"): data = line[5:].strip() if data == "[DONE]": print("[DONE]") break print(data) else: print(line) async def main() -> None: await stream_call( base_url=QWEN_BASE_URL, api_key=QWEN_API_KEY, model=QWEN_MAX_MODEL, max_tokens=64000, # qwen3-max 官方上限 64K ) if __name__ == "__main__": asyncio.run(main())