#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ 图片内容描述测试 - 使用DeepSeek-OCR描述图片内容 """ import base64 from pathlib import Path from openai import OpenAI # API配置 CLARIFAI_PAT = "941fba50c8c04be590a9b2d21b7d8347" # 初始化OpenAI客户端 client = OpenAI( base_url="https://api.clarifai.com/v2/ext/openai/v1", api_key=CLARIFAI_PAT ) def describe_image(image_path): """描述图片内容""" print(f"📸 处理图片: {image_path}") # 检查文件 if not Path(image_path).exists(): print(f"❌ 文件不存在: {image_path}") return None try: # 读取图片 print("🔄 读取图片...") image_base64 = base64.b64encode(Path(image_path).read_bytes()).decode() print(f"📊 图片编码完成,大小: {len(image_base64)} 字符") # 调用API描述图片 print("🤖 调用DeepSeek-OCR描述图片...") response = client.chat.completions.create( model="https://clarifai.com/deepseek-ai/deepseek-ocr/models/DeepSeek-OCR", messages=[{ "role": "user", "content": [ {"type": "text", "text": "请详细描述这张图片的内容,包括场景、物体、颜色、布局等视觉元素,不要提取文字,而是描述你看到的画面"}, { "type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image_base64}"} } ] }], temperature=0.3, max_tokens=2048 ) result = response.choices[0].message.content print("✅ 图片描述完成!") return result except Exception as e: print(f"❌ 图片描述失败: {e}") return None def main(): print("🎨 图片内容描述测试") print("=" * 50) # 使用新上传的图片 image_path = "/opt/agent/agents/users/jojo/project/user_upload/bff145c49841e6d04cf5c1fc415c5673.jpeg" # 描述图片 result = describe_image(image_path) if result: print("\n🎯 图片描述结果:") print("-" * 40) print(result) print("-" * 40) # 保存结果 with open("image_description.txt", "w", encoding="utf-8") as f: f.write(result) print("💾 描述结果已保存到 image_description.txt") else: print("❌ 未能获取图片描述") print("\n✅ 测试完成!") if __name__ == "__main__": main()