59 lines
1.9 KiB
Python
59 lines
1.9 KiB
Python
|
|
"""检查图片 OCR 环境:Pillow、pytesseract、Tesseract 可执行文件、chi_sim 语言包。
|
|||
|
|
|
|||
|
|
在 backend 目录执行:
|
|||
|
|
.\\venv\\Scripts\\python scripts\\check_ocr_env.py
|
|||
|
|
"""
|
|||
|
|
from __future__ import annotations
|
|||
|
|
|
|||
|
|
import sys
|
|||
|
|
from pathlib import Path
|
|||
|
|
|
|||
|
|
# 保证能加载 app 配置
|
|||
|
|
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
|
|||
|
|
|
|||
|
|
from app.core.config import settings # noqa: E402
|
|||
|
|
from app.services.builtin_tools import _local_file_workspace_root, _tessdata_dir_for_ocr # noqa: E402
|
|||
|
|
|
|||
|
|
|
|||
|
|
def main() -> int:
|
|||
|
|
print("TESSERACT_CMD (settings):", settings.TESSERACT_CMD or "(空,将尝试 PATH)")
|
|||
|
|
print("TESSERACT_TESSDATA_DIR (settings):", settings.TESSERACT_TESSDATA_DIR or "(空,将尝试仓库 tessdata/)")
|
|||
|
|
try:
|
|||
|
|
import PIL # noqa: F401
|
|||
|
|
|
|||
|
|
print("Pillow: OK")
|
|||
|
|
except ImportError as e:
|
|||
|
|
print("Pillow: 缺失 —", e)
|
|||
|
|
print(" 请执行: pip install Pillow")
|
|||
|
|
return 2
|
|||
|
|
try:
|
|||
|
|
import pytesseract as pt
|
|||
|
|
|
|||
|
|
print("pytesseract: OK")
|
|||
|
|
except ImportError as e:
|
|||
|
|
print("pytesseract: 缺失 —", e)
|
|||
|
|
print(" 请执行: pip install pytesseract")
|
|||
|
|
return 3
|
|||
|
|
cmd = (settings.TESSERACT_CMD or "").strip()
|
|||
|
|
if cmd:
|
|||
|
|
pt.pytesseract.tesseract_cmd = cmd
|
|||
|
|
try:
|
|||
|
|
ver = pt.get_tesseract_version()
|
|||
|
|
print("Tesseract 版本:", ver)
|
|||
|
|
except Exception as e:
|
|||
|
|
print("Tesseract 可执行文件: 不可用 —", e)
|
|||
|
|
print(" Windows 请安装 Tesseract,并在 .env 设置 TESSERACT_CMD=.../tesseract.exe")
|
|||
|
|
return 4
|
|||
|
|
td = _tessdata_dir_for_ocr()
|
|||
|
|
print("解析到的 tessdata 目录:", td or "(未找到)")
|
|||
|
|
root = _local_file_workspace_root()
|
|||
|
|
loc = root / "tessdata"
|
|||
|
|
if loc.is_dir():
|
|||
|
|
has_chi = any(loc.glob("chi_sim.traineddata"))
|
|||
|
|
print("仓库 tessdata/chi_sim.traineddata:", "存在" if has_chi else "缺失(中文识别差)")
|
|||
|
|
return 0
|
|||
|
|
|
|||
|
|
|
|||
|
|
if __name__ == "__main__":
|
|||
|
|
raise SystemExit(main())
|