#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ DeepSeek-OCR 多种调用方式测试 基于官方GitHub文档的提示词模板 """ import base64 from pathlib import Path from openai import OpenAI # API配置 CLARIFAI_PAT = "941fba50c8c04be590a9b2d21b7d8347" # 初始化OpenAI客户端 client = OpenAI( base_url="https://api.clarifai.com/v2/ext/openai/v1", api_key=CLARIFAI_PAT ) def process_image_with_prompt(image_path, prompt, mode_name, temperature=0.0, max_tokens=2048): """使用指定提示词处理图片""" print(f"\n🎯 {mode_name}") print("-" * 50) print(f"📸 处理图片: {image_path}") print(f"📝 提示词: {prompt}") if not Path(image_path).exists(): print(f"❌ 文件不存在: {image_path}") return None try: # 读取图片 print("🔄 读取图片...") image_base64 = base64.b64encode(Path(image_path).read_bytes()).decode() print(f"📊 图片编码完成,大小: {len(image_base64)} 字符") # 调用API print(f"🤖 调用DeepSeek-OCR...") response = client.chat.completions.create( model="https://clarifai.com/deepseek-ai/deepseek-ocr/models/DeepSeek-OCR", messages=[{ "role": "user", "content": [ {"type": "text", "text": prompt}, { "type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image_base64}"} } ] }], temperature=temperature, max_tokens=max_tokens ) result = response.choices[0].message.content print(f"✅ 处理完成!") return result except Exception as e: print(f"❌ 处理失败: {e}") return None def test_all_modes(image_path): """测试所有DeepSeek-OCR模式""" print("🚀 DeepSeek-OCR 多种调用方式测试") print("=" * 60) # 定义所有模式 - 基于官方GitHub文档 modes = { "基础OCR模式": { "prompt": "OCR this image.", "description": "基础文字识别,不考虑布局" }, "自由OCR模式": { "prompt": "Free OCR.", "description": "自由格式OCR,不考虑布局" }, "Markdown转换模式": { "prompt": "Convert the document to markdown.", "description": "将文档转换为Markdown格式" }, "图表解析模式": { "prompt": "Parse the figure.", "description": "解析图表和图形数据" }, "详细描述模式": { "prompt": "Describe this image in detail.", "description": "详细描述图片内容" }, "目标定位模式": { "prompt": "Locate all text elements in the image.", "description": "定位图片中的所有文字元素" }, "结构化提取模式": { "prompt": "Extract structured data from this document, including tables, lists, and formatted text.", "description": "提取结构化数据" }, "中文优化模式": { "prompt": "请识别图片中的所有中文文字,并保持原有的格式和布局", "description": "专门针对中文文字识别优化" }, "英文优化模式": { "prompt": "Please extract all English text from this image while preserving the original layout and formatting", "description": "专门针对英文文字识别优化" }, "表格识别模式": { "prompt": "Identify and extract all tables from this image, preserving the table structure and data", "description": "专门识别和提取表格" } } results = {} # 测试每种模式 for mode_name, mode_config in modes.items(): print(f"\n{mode_name}") print(f"描述: {mode_config['description']}") result = process_image_with_prompt( image_path, mode_config['prompt'], mode_name ) if result: results[mode_name] = { 'prompt': mode_config['prompt'], 'result': result, 'description': mode_config['description'] } # 显示结果预览 print(f"📋 结果预览:") preview = result[:200] + "..." if len(result) > 200 else result print(f"{preview}") print(f"📊 结果长度: {len(result)} 字符") else: results[mode_name] = { 'prompt': mode_config['prompt'], 'result': "处理失败", 'description': mode_config['description'] } # 短暂延迟,避免API限制 import time time.sleep(1) return results def save_results(results, output_file="ocr_modes_results.json"): """保存所有结果""" try: import json with open(output_file, 'w', encoding='utf-8') as f: # 转换为可JSON序列化的格式 json_results = {} for mode, data in results.items(): json_results[mode] = { 'prompt': data['prompt'], 'description': data['description'], 'result': data['result'] } json.dump(json_results, f, ensure_ascii=False, indent=2) print(f"\n💾 所有结果已保存到: {output_file}") except Exception as e: print(f"\n❌ 保存结果失败: {e}") def main(): print("🚀 DeepSeek-OCR 多种调用方式测试") print("=" * 60) print("📚 基于官方GitHub文档的提示词模板") print("🔗 GitHub: https://github.com/deepseek-ai/DeepSeek-OCR") print("=" * 60) # 使用之前测试过的截图图片 image_path = "/opt/agent/agents/users/jojo/project/user_upload/截屏2025-10-24 12.34.04.png" # 测试所有模式 results = test_all_modes(image_path) # 保存结果 save_results(results) # 生成总结报告 print("\n📊 测试总结报告") print("=" * 60) successful_modes = [mode for mode, data in results.items() if data['result'] != "处理失败"] failed_modes = [mode for mode, data in results.items() if data['result'] == "处理失败"] print(f"✅ 成功模式: {len(successful_modes)} 个") for mode in successful_modes: result_length = len(results[mode]['result']) print(f" - {mode}: {result_length} 字符") if failed_modes: print(f"\n❌ 失败模式: {len(failed_modes)} 个") for mode in failed_modes: print(f" - {mode}") print(f"\n🎯 成功率: {len(successful_modes)/len(results)*100:.1f}%") print("\n✅ 测试完成!") if __name__ == "__main__": main()