agent/users/jojo/project/ocr_test/describe_image.py
2025-11-14 16:44:12 +08:00

88 lines
2.6 KiB
Python

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
图片内容描述测试 - 使用DeepSeek-OCR描述图片内容
"""
import base64
from pathlib import Path
from openai import OpenAI
# API配置
CLARIFAI_PAT = "941fba50c8c04be590a9b2d21b7d8347"
# 初始化OpenAI客户端
client = OpenAI(
base_url="https://api.clarifai.com/v2/ext/openai/v1",
api_key=CLARIFAI_PAT
)
def describe_image(image_path):
"""描述图片内容"""
print(f"📸 处理图片: {image_path}")
# 检查文件
if not Path(image_path).exists():
print(f"❌ 文件不存在: {image_path}")
return None
try:
# 读取图片
print("🔄 读取图片...")
image_base64 = base64.b64encode(Path(image_path).read_bytes()).decode()
print(f"📊 图片编码完成,大小: {len(image_base64)} 字符")
# 调用API描述图片
print("🤖 调用DeepSeek-OCR描述图片...")
response = client.chat.completions.create(
model="https://clarifai.com/deepseek-ai/deepseek-ocr/models/DeepSeek-OCR",
messages=[{
"role": "user",
"content": [
{"type": "text", "text": "请详细描述这张图片的内容,包括场景、物体、颜色、布局等视觉元素,不要提取文字,而是描述你看到的画面"},
{
"type": "image_url",
"image_url": {"url": f"data:image/jpeg;base64,{image_base64}"}
}
]
}],
temperature=0.3,
max_tokens=2048
)
result = response.choices[0].message.content
print("✅ 图片描述完成!")
return result
except Exception as e:
print(f"❌ 图片描述失败: {e}")
return None
def main():
print("🎨 图片内容描述测试")
print("=" * 50)
# 使用新上传的图片
image_path = "/opt/agent/agents/users/jojo/project/user_upload/bff145c49841e6d04cf5c1fc415c5673.jpeg"
# 描述图片
result = describe_image(image_path)
if result:
print("\n🎯 图片描述结果:")
print("-" * 40)
print(result)
print("-" * 40)
# 保存结果
with open("image_description.txt", "w", encoding="utf-8") as f:
f.write(result)
print("💾 描述结果已保存到 image_description.txt")
else:
print("❌ 未能获取图片描述")
print("\n✅ 测试完成!")
if __name__ == "__main__":
main()