164 lines
4.9 KiB
Python
164 lines
4.9 KiB
Python
#!/usr/bin/env python3
|
||
# -*- coding: utf-8 -*-
|
||
"""
|
||
DeepSeek OCR 测试代码
|
||
测试Clarifai平台的DeepSeek-OCR API
|
||
"""
|
||
|
||
import os
|
||
import base64
|
||
import requests
|
||
from pathlib import Path
|
||
from openai import OpenAI
|
||
import json
|
||
|
||
# API配置 - 使用提供的密钥
|
||
CLARIFAI_PAT = "941fba50c8c04be590a9b2d21b7d8347"
|
||
|
||
# 初始化OpenAI客户端
|
||
client = OpenAI(
|
||
base_url="https://api.clarifai.com/v2/ext/openai/v1",
|
||
api_key=CLARIFAI_PAT
|
||
)
|
||
|
||
def test_connection():
|
||
"""测试API连接"""
|
||
print("🔍 正在测试API连接...")
|
||
try:
|
||
response = client.chat.completions.create(
|
||
model="https://clarifai.com/deepseek-ai/deepseek-ocr/models/DeepSeek-OCR",
|
||
messages=[{"role": "user", "content": "Hello"}],
|
||
max_tokens=10
|
||
)
|
||
print("✅ API连接成功!")
|
||
return True
|
||
except Exception as e:
|
||
print(f"❌ API连接失败:{e}")
|
||
return False
|
||
|
||
def ocr_local_image(image_path, prompt="请提取图片中的所有文字内容"):
|
||
"""使用本地图片进行OCR"""
|
||
print(f"📸 正在处理图片: {image_path}")
|
||
|
||
try:
|
||
# 检查文件是否存在
|
||
if not Path(image_path).exists():
|
||
print(f"❌ 文件不存在: {image_path}")
|
||
return None
|
||
|
||
# 读取并编码图片
|
||
print("🔄 正在编码图片...")
|
||
image_base64 = base64.b64encode(Path(image_path).read_bytes()).decode()
|
||
print(f"📊 图片大小: {len(image_base64)} 字符")
|
||
|
||
# 调用API
|
||
print("🤖 正在调用DeepSeek-OCR API...")
|
||
response = client.chat.completions.create(
|
||
model="https://clarifai.com/deepseek-ai/deepseek-ocr/models/DeepSeek-OCR",
|
||
messages=[{
|
||
"role": "user",
|
||
"content": [
|
||
{"type": "text", "text": prompt},
|
||
{
|
||
"type": "image_url",
|
||
"image_url": {
|
||
"url": f"data:image/png;base64,{image_base64}"
|
||
}
|
||
}
|
||
]
|
||
}],
|
||
temperature=0.0, # 保持准确性
|
||
max_tokens=2048 # 限制输出长度
|
||
)
|
||
|
||
result = response.choices[0].message.content
|
||
print("✅ OCR处理完成!")
|
||
return result
|
||
|
||
except Exception as e:
|
||
print(f"❌ OCR处理失败: {e}")
|
||
return None
|
||
|
||
def ocr_with_different_prompts(image_path):
|
||
"""使用不同的提示词测试OCR"""
|
||
prompts = {
|
||
"基础提取": "请提取图片中的所有文字内容",
|
||
"详细描述": "请详细描述这张图片的内容,包括文字、布局、格式等",
|
||
"表格提取": "如果图片中有表格,请提取表格的结构和内容",
|
||
"Markdown格式": "请将图片内容转换为Markdown格式",
|
||
"结构化数据": "请以JSON格式提取图片中的结构化信息"
|
||
}
|
||
|
||
results = {}
|
||
|
||
for name, prompt in prompts.items():
|
||
print(f"\n📝 {name}: {prompt}")
|
||
result = ocr_local_image(image_path, prompt)
|
||
results[name] = result
|
||
|
||
if result:
|
||
print(f"结果预览: {result[:200]}...")
|
||
else:
|
||
print("无结果")
|
||
|
||
# 短暂延迟,避免速率限制
|
||
import time
|
||
time.sleep(1)
|
||
|
||
return results
|
||
|
||
def save_results(results, output_file="ocr_results.json"):
|
||
"""保存结果到文件"""
|
||
try:
|
||
with open(output_file, 'w', encoding='utf-8') as f:
|
||
json.dump(results, f, ensure_ascii=False, indent=2)
|
||
print(f"💾 结果已保存到: {output_file}")
|
||
except Exception as e:
|
||
print(f"❌ 保存结果失败: {e}")
|
||
|
||
def main():
|
||
"""主函数"""
|
||
print("🚀 DeepSeek OCR 测试开始")
|
||
print("=" * 50)
|
||
|
||
# 测试图片路径
|
||
image_path = "user_upload/截屏2025-10-24 12.34.04.png"
|
||
|
||
# 第一步:测试连接
|
||
if not test_connection():
|
||
print("❌ 无法连接到API,请检查密钥和网络连接")
|
||
return
|
||
|
||
print("\n" + "=" * 50)
|
||
|
||
# 第二步:基础OCR测试
|
||
print("📋 基础OCR测试")
|
||
basic_result = ocr_local_image(image_path, "请提取图片中的所有文字内容")
|
||
|
||
if basic_result:
|
||
print("\n🎯 基础OCR结果:")
|
||
print("-" * 30)
|
||
print(basic_result)
|
||
print("-" * 30)
|
||
|
||
print("\n" + "=" * 50)
|
||
|
||
# 第三步:多提示词测试
|
||
print("🔍 多提示词测试")
|
||
advanced_results = ocr_with_different_prompts(image_path)
|
||
|
||
# 保存所有结果
|
||
all_results = {
|
||
"基础结果": basic_result,
|
||
"高级结果": advanced_results,
|
||
"图片路径": image_path
|
||
}
|
||
|
||
save_results(all_results, "ocr_test_results.json")
|
||
|
||
print("\n" + "=" * 50)
|
||
print("✅ 测试完成!")
|
||
|
||
if __name__ == "__main__":
|
||
main()
|