"""检查图片 OCR 环境:Pillow、pytesseract、Tesseract 可执行文件、chi_sim 语言包。 在 backend 目录执行: .\\venv\\Scripts\\python scripts\\check_ocr_env.py """ from __future__ import annotations import sys from pathlib import Path # 保证能加载 app 配置 sys.path.insert(0, str(Path(__file__).resolve().parent.parent)) from app.core.config import settings # noqa: E402 from app.services.builtin_tools import _local_file_workspace_root, _tessdata_dir_for_ocr # noqa: E402 def main() -> int: print("TESSERACT_CMD (settings):", settings.TESSERACT_CMD or "(空,将尝试 PATH)") print("TESSERACT_TESSDATA_DIR (settings):", settings.TESSERACT_TESSDATA_DIR or "(空,将尝试仓库 tessdata/)") try: import PIL # noqa: F401 print("Pillow: OK") except ImportError as e: print("Pillow: 缺失 —", e) print(" 请执行: pip install Pillow") return 2 try: import pytesseract as pt print("pytesseract: OK") except ImportError as e: print("pytesseract: 缺失 —", e) print(" 请执行: pip install pytesseract") return 3 cmd = (settings.TESSERACT_CMD or "").strip() if cmd: pt.pytesseract.tesseract_cmd = cmd try: ver = pt.get_tesseract_version() print("Tesseract 版本:", ver) except Exception as e: print("Tesseract 可执行文件: 不可用 —", e) print(" Windows 请安装 Tesseract,并在 .env 设置 TESSERACT_CMD=.../tesseract.exe") return 4 td = _tessdata_dir_for_ocr() print("解析到的 tessdata 目录:", td or "(未找到)") root = _local_file_workspace_root() loc = root / "tessdata" if loc.is_dir(): has_chi = any(loc.glob("chi_sim.traineddata")) print("仓库 tessdata/chi_sim.traineddata:", "存在" if has_chi else "缺失(中文识别差)") return 0 if __name__ == "__main__": raise SystemExit(main())