Files
mcp-example/pdf_server/pdf.py

69 lines
2.1 KiB
Python
Raw Normal View History

2025-08-15 19:50:11 +08:00
from mcp.server.fastmcp import FastMCP
import fitz
import httpx
import json
import os
mcp = FastMCP("pdf")
with open(os.path.join(os.path.dirname(__file__), '../server_config.json'), 'r', encoding='utf-8') as f:
config = json.load(f)
OLLAMA_URL = config["OLLAMA_URL"]
OLLAMA_MODEL = config["OLLAMA_MODEL"]
async def translate_text(text: str) -> str:
prompt = f"请将以下内容翻译成流畅、准确的中文,仅输出翻译结果:{text}"
payload = {
"model": OLLAMA_MODEL,
"prompt": prompt,
}
async with httpx.AsyncClient() as client:
try:
resp = await client.post(OLLAMA_URL, json=payload, timeout=120.0)
resp.raise_for_status()
lines = resp.text.strip().splitlines()
responses = []
for line in lines:
try:
result = json.loads(line)
if "response" in result and result["response"]:
responses.append(result["response"])
except Exception:
continue
if responses:
return "".join(responses)
return "翻译失败:无有效返回内容"
except Exception as e:
return f"翻译失败: {e}"
def extract_pdf_text(pdf_path: str) -> str:
try:
doc = fitz.open(pdf_path)
text = "\n".join(page.get_text() for page in doc)
doc.close()
return text
except Exception as e:
return f"PDF解析失败: {e}"
@mcp.tool()
async def translate_pdf(pdf_path: str) -> str:
"""
读取PDF文件内容并翻译成中文
Args:
pdf_path: PDF文件的绝对路径
"""
text = extract_pdf_text(pdf_path)
if text.startswith("PDF解析失败"):
return text
max_len = 2000
chunks = [text[i:i+max_len] for i in range(0, len(text), max_len)]
translated = []
for chunk in chunks:
zh = await translate_text(chunk)
translated.append(zh)
return "\n".join(translated)
if __name__ == "__main__":
mcp.run(transport='stdio')