from passporteye import read_mrz from datetime import datetime import pytesseract import base64 import io from PIL import Image import os import uuid def extract_mrz_info(base64_str, save_temp=False): pytesseract.pytesseract.tesseract_cmd= r'C:\Program Files\Tesseract-OCR\tesseract.exe' """ Extracts the name, date of birth, and expiry date from a base64-encoded image of a passport or ID card. Parameters: base64_str (str): The base64-encoded string representing the image. Returns: tuple: A tuple containing (full_name, date_of_birth, expiry_date). Returns (None, None, None) if extraction fails. """ temp_filename = None try: # Decode the base64 string to bytes image_data = base64.b64decode(base64_str) # Convert bytes to a PIL Image image = Image.open(io.BytesIO(image_data)).convert('RGB') # Generate a unique filename unique_id = uuid.uuid4().hex temp_filename = f"mrz_temp_{unique_id}.jpg" # Get the current directory current_dir = os.path.dirname(os.path.abspath(__file__)) temp_filepath = os.path.join(current_dir, temp_filename) # Save the image to the current directory image.save(temp_filepath) # print(f"Temporary image saved as: {temp_filepath}") # Read the MRZ from the temporary file using passporteye mrz = read_mrz(temp_filepath, save_roi=True, extra_cmdline_params='--oem 1 --psm 6') if mrz is None: # print("MRZ could not be read from the image.") return None, None # return None, None, None mrz_data = mrz.to_dict() # Extract the required fields surname = mrz_data.get('surname', '') given_names = mrz_data.get('names', '') full_name = f"{given_names} {surname}".replace('<', ' ').strip() # Date of birth # dob = parse_mrz_date(mrz_data.get('date_of_birth', '')) # Expiration date expiry_date = parse_mrz_date(mrz_data.get('expiration_date', '')) # return full_name, dob, expiry_date return full_name, expiry_date except Exception as e: # print(f"Error extracting MRZ information: {e}") return None, None # return None, None, None finally: # Optionally delete the temporary file if temp_filename and not save_temp: try: os.remove(temp_filepath) # print(f"Temporary image {temp_filename} deleted.") except Exception as e: print(f"Error deleting temporary file {temp_filename}: {e}") def parse_mrz_date(date_str): # Date is in format YYMMDD if len(date_str) != 6: return None try: # Use datetime.strptime to parse the date date_obj = datetime.strptime(date_str, '%y%m%d') return date_obj.strftime('%Y-%m-%d') except ValueError: return None # # Replace 'path_to_image.jpg' with the actual image path # image_path = b'...' # name, expiry_date = extract_mrz_info(image_path) # # name, dob, expiry_date = extract_mrz_info(image_path) # print("Name:", name) # # print("Date of Birth:", dob) # print("Expiry Date:", expiry_date)