Breaking captchas¶
In [21]:
Copied!
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import Select
from webdriver_manager.chrome import ChromeDriverManager
driver = webdriver.Chrome(ChromeDriverManager().install())
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import Select
from webdriver_manager.chrome import ChromeDriverManager
driver = webdriver.Chrome(ChromeDriverManager().install())
/var/folders/_m/b8tjbm6n4zs1q2mvjvg25x1m0000gn/T/ipykernel_96808/58483888.py:8: DeprecationWarning: executable_path has been deprecated, please pass in a Service object driver = webdriver.Chrome(ChromeDriverManager().install())
In [22]:
Copied!
# Visit the webpage
driver.get("https://jsoma.github.io/captcha-breaker-tester/")
# Visit the webpage
driver.get("https://jsoma.github.io/captcha-breaker-tester/")
In [61]:
Copied!
# Locate the image, save as PNG
image = driver.find_element(By.CSS_SELECTOR, "#captcha-holder > img")
answer = driver.find_element(By.CSS_SELECTOR, "#captcha-holder > .answer").text
image.screenshot('captcha.png')
# Locate the image, save as PNG
image = driver.find_element(By.CSS_SELECTOR, "#captcha-holder > img")
answer = driver.find_element(By.CSS_SELECTOR, "#captcha-holder > .answer").text
image.screenshot('captcha.png')
Out[61]:
True
In [56]:
Copied!
import numpy as np
from skimage import io
from skimage.color import rgb2gray
from skimage.transform import rotate
# Remove alpha channel with :3
image = io.imread('captcha.png')[2:-2,2:-2,:3]
io.imsave('sample.png', image.astype(np.uint8))
grayscale = rgb2gray(image)
angle = determine_skew(grayscale)
rotated = rotate(image, angle, resize=True) * 255
io.imsave('captcha-deskewed.png', rotated.astype(np.uint8))
import numpy as np
from skimage import io
from skimage.color import rgb2gray
from skimage.transform import rotate
# Remove alpha channel with :3
image = io.imread('captcha.png')[2:-2,2:-2,:3]
io.imsave('sample.png', image.astype(np.uint8))
grayscale = rgb2gray(image)
angle = determine_skew(grayscale)
rotated = rotate(image, angle, resize=True) * 255
io.imsave('captcha-deskewed.png', rotated.astype(np.uint8))
In [70]:
Copied!
from deskew import determine_skew
from wand.image import Image
import numpy as np
with Image(filename='captcha.png') as image:
with image.clone() as cleaned:
# Pull a couple pixels off the edge to remove border noise
cleaned.crop(2, 2, image.width - 2, image.height - 2)
# Remove anything that isn't the text
cleaned.trim()
# Remove rotation
angle = determine_skew(np.array(cleaned))
print("Rotating", angle, "degrees")
cleaned.rotate(-angle, 'white', True)
# Save
cleaned.save(filename='captcha-cleaned.png')
from deskew import determine_skew
from wand.image import Image
import numpy as np
with Image(filename='captcha.png') as image:
with image.clone() as cleaned:
# Pull a couple pixels off the edge to remove border noise
cleaned.crop(2, 2, image.width - 2, image.height - 2)
# Remove anything that isn't the text
cleaned.trim()
# Remove rotation
angle = determine_skew(np.array(cleaned))
print("Rotating", angle, "degrees")
cleaned.rotate(-angle, 'white', True)
# Save
cleaned.save(filename='captcha-cleaned.png')
Rotating 15.999999999999996 degrees
In [72]:
Copied!
# Check with pytesseract
print("Answer should be", answer)
guess = pytesseract.image_to_string('captcha-cleaned.png').strip()
print("Guess is", guess)
guess == answer
# Check with pytesseract
print("Answer should be", answer)
guess = pytesseract.image_to_string('captcha-cleaned.png').strip()
print("Guess is", guess)
guess == answer
Answer should be 1G6JuW Guess is 1G6Juw
Out[72]:
False
In [45]:
Copied!
import asyncio
from playwright.async_api import async_playwright
import asyncio
from playwright.async_api import async_playwright
In [46]:
Copied!
playwright = await async_playwright().start()
browser = await playwright.chromium.launch(headless = False)
page = await browser.new_page()
await page.goto('https://jsoma.github.io/captcha-breaker-tester')
playwright = await async_playwright().start()
browser = await playwright.chromium.launch(headless = False)
page = await browser.new_page()
await page.goto('https://jsoma.github.io/captcha-breaker-tester')
Out[46]:
<Response url='https://jsoma.github.io/captcha-breaker-tester/' request=<Request url='https://jsoma.github.io/captcha-breaker-tester/' method='GET'>>
In [1]:
Copied!
import pytesseract
import pytesseract
In [2]:
Copied!
!ls
!ls
Breaking captchas walkthrough.ipynb captcha.png breaking-captchas.md selenium-playwright-conversion.md
In [6]:
Copied!
pytesseract.image_to_osd("captcha.png", config='osd --psm 7 --dpi 72 -c min_characters_to_try=5')
pytesseract.image_to_osd("captcha.png", config='osd --psm 7 --dpi 72 -c min_characters_to_try=5')
Out[6]:
'Page number: 0\nOrientation in degrees: 180\nRotate: 180\nOrientation confidence: 0.07\nScript: Han\nScript confidence: 0.00\n'
In [7]:
Copied!
pytesseract.image_to_string("captcha.png", config='osd --psm 7 --dpi 72 -c min_characters_to_try=5')
pytesseract.image_to_string("captcha.png", config='osd --psm 7 --dpi 72 -c min_characters_to_try=5')
Out[7]:
'yaaa\n\n'
In [10]:
Copied!
from deskew import determine_skew
import cv2
from deskew import determine_skew
import cv2
In [20]:
Copied!
from PIL import Image
import numpy as np
image = Image.open("captcha.png")
grayscale = image.convert("L")
np.array(grayscale)
from PIL import Image
import numpy as np
image = Image.open("captcha.png")
grayscale = image.convert("L")
np.array(grayscale)
Out[20]:
array([[255, 255, 255, ..., 255, 255, 255], [255, 255, 255, ..., 255, 255, 255], [255, 255, 255, ..., 255, 255, 255], ..., [255, 255, 255, ..., 255, 255, 255], [255, 255, 255, ..., 255, 255, 255], [255, 255, 255, ..., 255, 255, 255]], dtype=uint8)
In [14]:
Copied!
In [17]:
Copied!
import numpy as np
from skimage import io
from skimage.transform import rotate
from skimage.color import rgb2gray
from deskew import determine_skew
def deskew(filename):
# Don't allow transparency (that's the last :3 part)
image = io.imread(filename)[:,:,:3]
grayscale = rgb2gray(image)
angle = determine_skew(grayscale)
rotated = rotate(image, angle, resize=True) * 255
return rotated.astype(np.uint8)
deskewed = deskew('captcha.png')
io.imshow(deskewed)
import numpy as np
from skimage import io
from skimage.transform import rotate
from skimage.color import rgb2gray
from deskew import determine_skew
def deskew(filename):
# Don't allow transparency (that's the last :3 part)
image = io.imread(filename)[:,:,:3]
grayscale = rgb2gray(image)
angle = determine_skew(grayscale)
rotated = rotate(image, angle, resize=True) * 255
return rotated.astype(np.uint8)
deskewed = deskew('captcha.png')
io.imshow(deskewed)
Out[17]:
<matplotlib.image.AxesImage at 0x2832abac0>
In [12]:
Copied!
In [13]:
Copied!
-13.999999999999995
In [15]:
Copied!
!pip instal
!pip instal
array([[[0, 0, 0], [0, 0, 0], [0, 0, 0], ..., [0, 0, 0], [0, 0, 0], [0, 0, 0]], [[0, 0, 0], [0, 0, 0], [0, 0, 0], ..., [0, 0, 0], [0, 0, 0], [0, 0, 0]], [[0, 0, 0], [0, 0, 0], [0, 0, 0], ..., [0, 0, 0], [0, 0, 0], [0, 0, 0]], ..., [[0, 0, 0], [0, 0, 0], [0, 0, 0], ..., [0, 0, 0], [0, 0, 0], [0, 0, 0]], [[0, 0, 0], [0, 0, 0], [0, 0, 0], ..., [0, 0, 0], [0, 0, 0], [0, 0, 0]], [[0, 0, 0], [0, 0, 0], [0, 0, 0], ..., [0, 0, 0], [0, 0, 0], [0, 0, 0]]], dtype=uint8)
In [9]:
Copied!
import pytesseract
pytesseract.image_to_osd("captcha.png", config='osd --psm 7 --dpi 72 -c min_characters_to_try=5')
import pytesseract
pytesseract.image_to_osd("captcha.png", config='osd --psm 7 --dpi 72 -c min_characters_to_try=5')
Out[9]:
'Page number: 0\nOrientation in degrees: 180\nRotate: 180\nOrientation confidence: 0.48\nScript: Greek\nScript confidence: 0.00\n'
In [5]:
Copied!
from wand.image import Image
from wand.display import display
with Image(filename='captcha.png') as img:
img.deskew(0.18 * img.quantum_range) # 80%
angle = float(img.artifacts['deskew:angle'])
print(angle)
from wand.image import Image
from wand.display import display
with Image(filename='captcha.png') as img:
img.deskew(0.18 * img.quantum_range) # 80%
angle = float(img.artifacts['deskew:angle'])
print(angle)
-0.0
In [6]:
Copied!
import keras_ocr
pipeline = keras_ocr.pipeline.Pipeline()
prediction_groups = pipeline.recognize(['captcha.png'])
import keras_ocr
pipeline = keras_ocr.pipeline.Pipeline()
prediction_groups = pipeline.recognize(['captcha.png'])
Looking for /Users/soma/.keras-ocr/craft_mlt_25k.h5 Looking for /Users/soma/.keras-ocr/crnn_kurapan.h5 1/1 [==============================] - 1s 664ms/step 1/1 [==============================] - 1s 1s/step
In [7]:
Copied!
guess = prediction_groups[0][0][0]
guess
guess = prediction_groups[0][0][0]
guess
Out[7]:
'jzahra'
In [ ]:
Copied!