Breaking captchas¶

In [21]:

            
                Copied!
                
                    
                    
                
                

        
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import Select
from webdriver_manager.chrome import ChromeDriverManager

driver = webdriver.Chrome(ChromeDriverManager().install())
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import Select
from webdriver_manager.chrome import ChromeDriverManager

driver = webdriver.Chrome(ChromeDriverManager().install())

/var/folders/_m/b8tjbm6n4zs1q2mvjvg25x1m0000gn/T/ipykernel_96808/58483888.py:8: DeprecationWarning: executable_path has been deprecated, please pass in a Service object
  driver = webdriver.Chrome(ChromeDriverManager().install())

In [22]:

            
                Copied!
                
# Visit the webpage
driver.get("https://jsoma.github.io/captcha-breaker-tester/")
# Visit the webpage
driver.get("https://jsoma.github.io/captcha-breaker-tester/")

In [61]:

            
                Copied!
                
# Locate the image, save as PNG
image = driver.find_element(By.CSS_SELECTOR, "#captcha-holder > img")
answer = driver.find_element(By.CSS_SELECTOR, "#captcha-holder > .answer").text
image.screenshot('captcha.png')
# Locate the image, save as PNG
image = driver.find_element(By.CSS_SELECTOR, "#captcha-holder > img")
answer = driver.find_element(By.CSS_SELECTOR, "#captcha-holder > .answer").text
image.screenshot('captcha.png')

Out[61]:

True

In [56]:

            
                Copied!
                
                    
                    
                
                

        
import numpy as np
from skimage import io
from skimage.color import rgb2gray
from skimage.transform import rotate

# Remove alpha channel with :3
image = io.imread('captcha.png')[2:-2,2:-2,:3]
io.imsave('sample.png', image.astype(np.uint8))
grayscale = rgb2gray(image)
angle = determine_skew(grayscale)
rotated = rotate(image, angle, resize=True) * 255
io.imsave('captcha-deskewed.png', rotated.astype(np.uint8))
import numpy as np
from skimage import io
from skimage.color import rgb2gray
from skimage.transform import rotate

# Remove alpha channel with :3
image = io.imread('captcha.png')[2:-2,2:-2,:3]
io.imsave('sample.png', image.astype(np.uint8))
grayscale = rgb2gray(image)
angle = determine_skew(grayscale)
rotated = rotate(image, angle, resize=True) * 255
io.imsave('captcha-deskewed.png', rotated.astype(np.uint8))

In [70]:

            
                Copied!
                
                    
                    
                
                

        
from deskew import determine_skew
from wand.image import Image
import numpy as np

with Image(filename='captcha.png') as image:
    with image.clone() as cleaned:
        # Pull a couple pixels off the edge to remove border noise
        cleaned.crop(2, 2, image.width - 2, image.height - 2)

        # Remove anything that isn't the text
        cleaned.trim()

        # Remove rotation
        angle = determine_skew(np.array(cleaned))
        print("Rotating", angle, "degrees")
        cleaned.rotate(-angle, 'white', True)

        # Save
        cleaned.save(filename='captcha-cleaned.png')
from deskew import determine_skew
from wand.image import Image
import numpy as np

with Image(filename='captcha.png') as image:
    with image.clone() as cleaned:
        # Pull a couple pixels off the edge to remove border noise
        cleaned.crop(2, 2, image.width - 2, image.height - 2)

        # Remove anything that isn't the text
        cleaned.trim()

        # Remove rotation
        angle = determine_skew(np.array(cleaned))
        print("Rotating", angle, "degrees")
        cleaned.rotate(-angle, 'white', True)

        # Save
        cleaned.save(filename='captcha-cleaned.png')

Rotating 15.999999999999996 degrees

In [72]:

            
                Copied!
                
# Check with pytesseract
print("Answer should be", answer)
guess = pytesseract.image_to_string('captcha-cleaned.png').strip()
print("Guess is", guess)

guess == answer
# Check with pytesseract
print("Answer should be", answer)
guess = pytesseract.image_to_string('captcha-cleaned.png').strip()
print("Guess is", guess)

guess == answer

Answer should be 1G6JuW
Guess is 1G6Juw

Out[72]:

False

In [45]:

            
                Copied!
                
import asyncio
from playwright.async_api import async_playwright
import asyncio
from playwright.async_api import async_playwright

In [46]:

            
                Copied!
                
playwright = await async_playwright().start()
browser = await playwright.chromium.launch(headless = False)
page = await browser.new_page()
await page.goto('https://jsoma.github.io/captcha-breaker-tester')
playwright = await async_playwright().start()
browser = await playwright.chromium.launch(headless = False)
page = await browser.new_page()
await page.goto('https://jsoma.github.io/captcha-breaker-tester')

Out[46]:

<Response url='https://jsoma.github.io/captcha-breaker-tester/' request=<Request url='https://jsoma.github.io/captcha-breaker-tester/' method='GET'>>

In [1]:

            
                Copied!
                
import pytesseract
import pytesseract

In [2]:

            
                Copied!
                
!ls
!ls

Breaking captchas walkthrough.ipynb captcha.png
breaking-captchas.md                selenium-playwright-conversion.md

In [6]:

            
                Copied!
                
pytesseract.image_to_osd("captcha.png", config='osd --psm 7 --dpi 72 -c min_characters_to_try=5')
pytesseract.image_to_osd("captcha.png", config='osd --psm 7 --dpi 72 -c min_characters_to_try=5')

Out[6]:

'Page number: 0\nOrientation in degrees: 180\nRotate: 180\nOrientation confidence: 0.07\nScript: Han\nScript confidence: 0.00\n'

In [7]:

            
                Copied!
                
pytesseract.image_to_string("captcha.png", config='osd --psm 7 --dpi 72 -c min_characters_to_try=5')
pytesseract.image_to_string("captcha.png", config='osd --psm 7 --dpi 72 -c min_characters_to_try=5')

Out[7]:

'yaaa\n\n'

In [10]:

            
                Copied!
                
from deskew import determine_skew
import cv2
from deskew import determine_skew
import cv2

In [20]:

            
                Copied!
                
from PIL import Image
import numpy as np

image = Image.open("captcha.png")
grayscale = image.convert("L")
np.array(grayscale)
from PIL import Image
import numpy as np

image = Image.open("captcha.png")
grayscale = image.convert("L")
np.array(grayscale)

Out[20]:

array([[255, 255, 255, ..., 255, 255, 255],
       [255, 255, 255, ..., 255, 255, 255],
       [255, 255, 255, ..., 255, 255, 255],
       ...,
       [255, 255, 255, ..., 255, 255, 255],
       [255, 255, 255, ..., 255, 255, 255],
       [255, 255, 255, ..., 255, 255, 255]], dtype=uint8)

In [14]:

In [17]:

            
                Copied!
                
                    
                    
                
                

        
import numpy as np
from skimage import io
from skimage.transform import rotate
from skimage.color import rgb2gray
from deskew import determine_skew

def deskew(filename):
    # Don't allow transparency (that's the last :3 part)
    image = io.imread(filename)[:,:,:3]
    grayscale = rgb2gray(image)
    angle = determine_skew(grayscale)
    rotated = rotate(image, angle, resize=True) * 255
    return rotated.astype(np.uint8)

deskewed = deskew('captcha.png')
io.imshow(deskewed)
import numpy as np
from skimage import io
from skimage.transform import rotate
from skimage.color import rgb2gray
from deskew import determine_skew

def deskew(filename):
    # Don't allow transparency (that's the last :3 part)
    image = io.imread(filename)[:,:,:3]
    grayscale = rgb2gray(image)
    angle = determine_skew(grayscale)
    rotated = rotate(image, angle, resize=True) * 255
    return rotated.astype(np.uint8)

deskewed = deskew('captcha.png')
io.imshow(deskewed)

Out[17]:

<matplotlib.image.AxesImage at 0x2832abac0>

In [12]:

In [13]:

-13.999999999999995

In [15]:

            
                Copied!
                
!pip instal
!pip instal

array([[[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        ...,
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]],

       [[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        ...,
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]],

       [[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        ...,
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]],

       ...,

       [[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        ...,
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]],

       [[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        ...,
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]],

       [[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        ...,
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]]], dtype=uint8)

In [9]:

            
                Copied!
                
import pytesseract
pytesseract.image_to_osd("captcha.png", config='osd --psm 7 --dpi 72 -c min_characters_to_try=5')
import pytesseract
pytesseract.image_to_osd("captcha.png", config='osd --psm 7 --dpi 72 -c min_characters_to_try=5')

Out[9]:

'Page number: 0\nOrientation in degrees: 180\nRotate: 180\nOrientation confidence: 0.48\nScript: Greek\nScript confidence: 0.00\n'

In [5]:

            
                Copied!
                
from wand.image import Image
from wand.display import display

with Image(filename='captcha.png') as img:
    img.deskew(0.18 * img.quantum_range)  # 80%
    angle = float(img.artifacts['deskew:angle'])
    print(angle)
from wand.image import Image
from wand.display import display

with Image(filename='captcha.png') as img:
    img.deskew(0.18 * img.quantum_range)  # 80%
    angle = float(img.artifacts['deskew:angle'])
    print(angle)

-0.0

In [6]:

            
                Copied!
                
import keras_ocr
pipeline = keras_ocr.pipeline.Pipeline()
prediction_groups = pipeline.recognize(['captcha.png'])
import keras_ocr
pipeline = keras_ocr.pipeline.Pipeline()
prediction_groups = pipeline.recognize(['captcha.png'])

Looking for /Users/soma/.keras-ocr/craft_mlt_25k.h5
Looking for /Users/soma/.keras-ocr/crnn_kurapan.h5
1/1 [==============================] - 1s 664ms/step
1/1 [==============================] - 1s 1s/step

In [7]:

            
                Copied!
                
guess = prediction_groups[0][0][0]
guess
guess = prediction_groups[0][0][0]
guess

Out[7]:

'jzahra'

In [ ]: