init commit
This commit is contained in:
68
pdf_server/pdf.py
Normal file
68
pdf_server/pdf.py
Normal file
@@ -0,0 +1,68 @@
|
||||
from mcp.server.fastmcp import FastMCP
|
||||
import fitz
|
||||
import httpx
|
||||
import json
|
||||
import os
|
||||
|
||||
mcp = FastMCP("pdf")
|
||||
|
||||
with open(os.path.join(os.path.dirname(__file__), '../server_config.json'), 'r', encoding='utf-8') as f:
|
||||
config = json.load(f)
|
||||
|
||||
OLLAMA_URL = config["OLLAMA_URL"]
|
||||
OLLAMA_MODEL = config["OLLAMA_MODEL"]
|
||||
|
||||
async def translate_text(text: str) -> str:
|
||||
prompt = f"请将以下内容翻译成流畅、准确的中文,仅输出翻译结果:{text}"
|
||||
payload = {
|
||||
"model": OLLAMA_MODEL,
|
||||
"prompt": prompt,
|
||||
}
|
||||
async with httpx.AsyncClient() as client:
|
||||
try:
|
||||
resp = await client.post(OLLAMA_URL, json=payload, timeout=120.0)
|
||||
resp.raise_for_status()
|
||||
lines = resp.text.strip().splitlines()
|
||||
responses = []
|
||||
for line in lines:
|
||||
try:
|
||||
result = json.loads(line)
|
||||
if "response" in result and result["response"]:
|
||||
responses.append(result["response"])
|
||||
except Exception:
|
||||
continue
|
||||
if responses:
|
||||
return "".join(responses)
|
||||
return "翻译失败:无有效返回内容"
|
||||
except Exception as e:
|
||||
return f"翻译失败: {e}"
|
||||
|
||||
def extract_pdf_text(pdf_path: str) -> str:
|
||||
try:
|
||||
doc = fitz.open(pdf_path)
|
||||
text = "\n".join(page.get_text() for page in doc)
|
||||
doc.close()
|
||||
return text
|
||||
except Exception as e:
|
||||
return f"PDF解析失败: {e}"
|
||||
|
||||
@mcp.tool()
|
||||
async def translate_pdf(pdf_path: str) -> str:
|
||||
"""
|
||||
读取PDF文件内容并翻译成中文。
|
||||
Args:
|
||||
pdf_path: PDF文件的绝对路径
|
||||
"""
|
||||
text = extract_pdf_text(pdf_path)
|
||||
if text.startswith("PDF解析失败"):
|
||||
return text
|
||||
max_len = 2000
|
||||
chunks = [text[i:i+max_len] for i in range(0, len(text), max_len)]
|
||||
translated = []
|
||||
for chunk in chunks:
|
||||
zh = await translate_text(chunk)
|
||||
translated.append(zh)
|
||||
return "\n".join(translated)
|
||||
|
||||
if __name__ == "__main__":
|
||||
mcp.run(transport='stdio')
|
15
pdf_server/test_pdf.py
Normal file
15
pdf_server/test_pdf.py
Normal file
@@ -0,0 +1,15 @@
|
||||
import asyncio
|
||||
import os
|
||||
from pdf import translate_pdf
|
||||
|
||||
def test_translate_pdf():
|
||||
test_pdf_path = os.path.abspath("/Users/mengxin/Project/mcp-client/测试文档.pdf")
|
||||
if not os.path.exists(test_pdf_path):
|
||||
print("测试PDF文件不存在,请放置测试文档.pdf 在当前目录下。")
|
||||
return
|
||||
result = asyncio.run(translate_pdf(test_pdf_path))
|
||||
print("翻译结果:\n", result)
|
||||
assert "翻译失败" not in result and "PDF解析失败" not in result, "翻译或解析失败"
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_translate_pdf()
|
Reference in New Issue
Block a user