#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ DeepSeek OCR 测试代码 测试Clarifai平台的DeepSeek-OCR API """ import os import base64 import requests from pathlib import Path from openai import OpenAI import json # API配置 - 使用提供的密钥 CLARIFAI_PAT = "941fba50c8c04be590a9b2d21b7d8347" # 初始化OpenAI客户端 client = OpenAI( base_url="https://api.clarifai.com/v2/ext/openai/v1", api_key=CLARIFAI_PAT ) def test_connection(): """测试API连接""" print("🔍 正在测试API连接...") try: response = client.chat.completions.create( model="https://clarifai.com/deepseek-ai/deepseek-ocr/models/DeepSeek-OCR", messages=[{"role": "user", "content": "Hello"}], max_tokens=10 ) print("✅ API连接成功!") return True except Exception as e: print(f"❌ API连接失败:{e}") return False def ocr_local_image(image_path, prompt="请提取图片中的所有文字内容"): """使用本地图片进行OCR""" print(f"📸 正在处理图片: {image_path}") try: # 检查文件是否存在 if not Path(image_path).exists(): print(f"❌ 文件不存在: {image_path}") return None # 读取并编码图片 print("🔄 正在编码图片...") image_base64 = base64.b64encode(Path(image_path).read_bytes()).decode() print(f"📊 图片大小: {len(image_base64)} 字符") # 调用API print("🤖 正在调用DeepSeek-OCR API...") response = client.chat.completions.create( model="https://clarifai.com/deepseek-ai/deepseek-ocr/models/DeepSeek-OCR", messages=[{ "role": "user", "content": [ {"type": "text", "text": prompt}, { "type": "image_url", "image_url": { "url": f"data:image/png;base64,{image_base64}" } } ] }], temperature=0.0, # 保持准确性 max_tokens=2048 # 限制输出长度 ) result = response.choices[0].message.content print("✅ OCR处理完成!") return result except Exception as e: print(f"❌ OCR处理失败: {e}") return None def ocr_with_different_prompts(image_path): """使用不同的提示词测试OCR""" prompts = { "基础提取": "请提取图片中的所有文字内容", "详细描述": "请详细描述这张图片的内容,包括文字、布局、格式等", "表格提取": "如果图片中有表格,请提取表格的结构和内容", "Markdown格式": "请将图片内容转换为Markdown格式", "结构化数据": "请以JSON格式提取图片中的结构化信息" } results = {} for name, prompt in prompts.items(): print(f"\n📝 {name}: {prompt}") result = ocr_local_image(image_path, prompt) results[name] = result if result: print(f"结果预览: {result[:200]}...") else: print("无结果") # 短暂延迟,避免速率限制 import time time.sleep(1) return results def save_results(results, output_file="ocr_results.json"): """保存结果到文件""" try: with open(output_file, 'w', encoding='utf-8') as f: json.dump(results, f, ensure_ascii=False, indent=2) print(f"💾 结果已保存到: {output_file}") except Exception as e: print(f"❌ 保存结果失败: {e}") def main(): """主函数""" print("🚀 DeepSeek OCR 测试开始") print("=" * 50) # 测试图片路径 image_path = "user_upload/截屏2025-10-24 12.34.04.png" # 第一步:测试连接 if not test_connection(): print("❌ 无法连接到API,请检查密钥和网络连接") return print("\n" + "=" * 50) # 第二步:基础OCR测试 print("📋 基础OCR测试") basic_result = ocr_local_image(image_path, "请提取图片中的所有文字内容") if basic_result: print("\n🎯 基础OCR结果:") print("-" * 30) print(basic_result) print("-" * 30) print("\n" + "=" * 50) # 第三步:多提示词测试 print("🔍 多提示词测试") advanced_results = ocr_with_different_prompts(image_path) # 保存所有结果 all_results = { "基础结果": basic_result, "高级结果": advanced_results, "图片路径": image_path } save_results(all_results, "ocr_test_results.json") print("\n" + "=" * 50) print("✅ 测试完成!") if __name__ == "__main__": main()