agent/users/jojo/project/ocr_test/describe_image.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
图片内容描述测试 - 使用DeepSeek-OCR描述图片内容
"""

import base64
from pathlib import Path
from openai import OpenAI

# API配置
CLARIFAI_PAT = "941fba50c8c04be590a9b2d21b7d8347"

# 初始化OpenAI客户端
client = OpenAI(
    base_url="https://api.clarifai.com/v2/ext/openai/v1",
    api_key=CLARIFAI_PAT
)

def describe_image(image_path):
    """描述图片内容"""
    print(f"📸 处理图片: {image_path}")

    # 检查文件
    if not Path(image_path).exists():
        print(f"❌ 文件不存在: {image_path}")
        return None

    try:
        # 读取图片
        print("🔄 读取图片...")
        image_base64 = base64.b64encode(Path(image_path).read_bytes()).decode()
        print(f"📊 图片编码完成，大小: {len(image_base64)} 字符")

        # 调用API描述图片
        print("🤖 调用DeepSeek-OCR描述图片...")
        response = client.chat.completions.create(
            model="https://clarifai.com/deepseek-ai/deepseek-ocr/models/DeepSeek-OCR",
            messages=[{
                "role": "user",
                "content": [
                    {"type": "text", "text": "请详细描述这张图片的内容，包括场景、物体、颜色、布局等视觉元素，不要提取文字，而是描述你看到的画面"},
                    {
                        "type": "image_url",
                        "image_url": {"url": f"data:image/jpeg;base64,{image_base64}"}
                    }
                ]
            }],
            temperature=0.3,
            max_tokens=2048
        )

        result = response.choices[0].message.content
        print("✅ 图片描述完成！")
        return result

    except Exception as e:
        print(f"❌ 图片描述失败: {e}")
        return None

def main():
    print("🎨 图片内容描述测试")
    print("=" * 50)

    # 使用新上传的图片
    image_path = "/opt/agent/agents/users/jojo/project/user_upload/bff145c49841e6d04cf5c1fc415c5673.jpeg"

    # 描述图片
    result = describe_image(image_path)

    if result:
        print("\n🎯 图片描述结果:")
        print("-" * 40)
        print(result)
        print("-" * 40)

        # 保存结果
        with open("image_description.txt", "w", encoding="utf-8") as f:
            f.write(result)
        print("💾 描述结果已保存到 image_description.txt")
    else:
        print("❌ 未能获取图片描述")

    print("\n✅ 测试完成！")

if __name__ == "__main__":
    main()