图片上传识别功能

This commit is contained in:
renjianbo
2026-04-13 22:52:36 +08:00
parent df4fab1e6e
commit 63b54116a5
13 changed files with 708 additions and 17 deletions

View File

@@ -0,0 +1,58 @@
"""检查图片 OCR 环境Pillow、pytesseract、Tesseract 可执行文件、chi_sim 语言包。
在 backend 目录执行:
.\\venv\\Scripts\\python scripts\\check_ocr_env.py
"""
from __future__ import annotations
import sys
from pathlib import Path
# 保证能加载 app 配置
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
from app.core.config import settings # noqa: E402
from app.services.builtin_tools import _local_file_workspace_root, _tessdata_dir_for_ocr # noqa: E402
def main() -> int:
print("TESSERACT_CMD (settings):", settings.TESSERACT_CMD or "(空,将尝试 PATH)")
print("TESSERACT_TESSDATA_DIR (settings):", settings.TESSERACT_TESSDATA_DIR or "(空,将尝试仓库 tessdata/)")
try:
import PIL # noqa: F401
print("Pillow: OK")
except ImportError as e:
print("Pillow: 缺失 —", e)
print(" 请执行: pip install Pillow")
return 2
try:
import pytesseract as pt
print("pytesseract: OK")
except ImportError as e:
print("pytesseract: 缺失 —", e)
print(" 请执行: pip install pytesseract")
return 3
cmd = (settings.TESSERACT_CMD or "").strip()
if cmd:
pt.pytesseract.tesseract_cmd = cmd
try:
ver = pt.get_tesseract_version()
print("Tesseract 版本:", ver)
except Exception as e:
print("Tesseract 可执行文件: 不可用 —", e)
print(" Windows 请安装 Tesseract并在 .env 设置 TESSERACT_CMD=.../tesseract.exe")
return 4
td = _tessdata_dir_for_ocr()
print("解析到的 tessdata 目录:", td or "(未找到)")
root = _local_file_workspace_root()
loc = root / "tessdata"
if loc.is_dir():
has_chi = any(loc.glob("chi_sim.traineddata"))
print("仓库 tessdata/chi_sim.traineddata:", "存在" if has_chi else "缺失(中文识别差)")
return 0
if __name__ == "__main__":
raise SystemExit(main())