from paddleocr import PPStructureV3 from typing import Optional import base64, io import numpy as np from PIL import Image def _b64_to_bgr(b64: str) -> Optional[np.ndarray]: """Decode base64 (with or without data URL) to BGR numpy image.""" if "," in b64 and ";base64" in b64[:64]: b64 = b64.split(",", 1)[1] try: data = base64.b64decode(b64, validate=True) img = Image.open(io.BytesIO(data)).convert("RGB") return np.array(img)[:, :, ::-1] # RGB->BGR except Exception: return None # to learn more about model choosing refer to https://www.paddleocr.ai/latest/en/version3.x/pipeline_usage/PP-StructureV3.html def extract_name_from_id(id_front: str, layout_detection_model: Optional[str] = "PP-DocLayout-M", text_detection_model: Optional[str] = "PP-OCRv5_server_det", text_recognition_model: Optional[str] = "en_PP-OCRv4_mobile_rec", ) -> str: pipeline = PPStructureV3(layout_detection_model_name=layout_detection_model, text_detection_model_name=text_detection_model, text_recognition_model_name=text_recognition_model, lang="en") image = _b64_to_bgr(id_front) output = pipeline.predict(image) results = output[0].get("overall_ocr_res").get("rec_texts", []) name = extract_name_from_boxes(results) return name def extract_name_from_boxes(rec_texts) -> str: rec_texts = [t.strip() for t in rec_texts if t.strip()] for i, t in enumerate(rec_texts): lower = t.lower() if "name" in lower: parts = t.split() idx = next((j for j, w in enumerate(parts) if "name" in w.lower()), None) if idx is not None and idx + 1 < len(parts): return " ".join(parts[idx+1: idx+5]) if i + 1 < len(rec_texts): next_items = " ".join(rec_texts[i+1:i+5]) return next_items return ""