Skip to content

Instantly share code, notes, and snippets.

@twilightty
Last active March 4, 2025 05:17
Show Gist options
  • Save twilightty/427499cbce3ab284745043b748c9a4bd to your computer and use it in GitHub Desktop.
Save twilightty/427499cbce3ab284745043b748c9a4bd to your computer and use it in GitHub Desktop.
import time
import pytesseract
import cv2
import numpy as np
from PIL import Image
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
# pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
driver = webdriver.Chrome()
driver.get("https://pre-prod.vnpost.vn")
time.sleep(3)
captcha_element = driver.find_element(By.XPATH, "//img[@id='tra-cuu-captcha']")
captcha_location = captcha_element.location
captcha_size = captcha_element.size
driver.save_screenshot("full_screenshot.png")
screenshot = Image.open("full_screenshot.png")
left = captcha_location['x']
top = captcha_location['y']
right = left + captcha_size['width']
bottom = top + captcha_size['height']
captcha_image = screenshot.crop((left, top, right, bottom))
captcha_image.save("captcha.png")
img = cv2.imread("captcha.png", cv2.IMREAD_GRAYSCALE)
_, img = cv2.threshold(img, 150, 255, cv2.THRESH_BINARY)
captcha_text = pytesseract.image_to_string(img, config="--psm 6").strip()
print(f"Extracted CAPTCHA: {captcha_text}")
captcha_input = driver.find_element(By.XPATH, "//input[contains(@placeholder,'Mã xác nhận')]") # Update the element locator
captcha_input.send_keys(captcha_text)
captcha_input.send_keys(Keys.RETURN)
time.sleep(3)
driver.save_screenshot("result.png")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment