├── tests ├── __init__.py ├── test_audio.py └── test_image.py ├── captcha ├── data │ ├── 0 │ │ └── default.wav │ ├── 1 │ │ └── default.wav │ ├── 2 │ │ └── default.wav │ ├── 3 │ │ └── default.wav │ ├── 4 │ │ └── default.wav │ ├── 5 │ │ └── default.wav │ ├── 6 │ │ └── default.wav │ ├── 7 │ │ └── default.wav │ ├── 8 │ │ └── default.wav │ ├── 9 │ │ └── default.wav │ ├── beep.wav │ └── DroidSansMono.ttf ├── __init__.py ├── image.py └── audio.py ├── MANIFEST.in ├── tox.ini ├── .gitignore ├── appveyor.yml ├── .travis.yml ├── CHANGES ├── LICENSE ├── setup.py └── README.rst /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /captcha/data/beep.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataXujing/captcha/master/captcha/data/beep.wav -------------------------------------------------------------------------------- /captcha/data/0/default.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataXujing/captcha/master/captcha/data/0/default.wav -------------------------------------------------------------------------------- /captcha/data/1/default.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataXujing/captcha/master/captcha/data/1/default.wav -------------------------------------------------------------------------------- /captcha/data/2/default.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataXujing/captcha/master/captcha/data/2/default.wav -------------------------------------------------------------------------------- /captcha/data/3/default.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataXujing/captcha/master/captcha/data/3/default.wav -------------------------------------------------------------------------------- /captcha/data/4/default.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataXujing/captcha/master/captcha/data/4/default.wav -------------------------------------------------------------------------------- /captcha/data/5/default.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataXujing/captcha/master/captcha/data/5/default.wav -------------------------------------------------------------------------------- /captcha/data/6/default.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataXujing/captcha/master/captcha/data/6/default.wav -------------------------------------------------------------------------------- /captcha/data/7/default.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataXujing/captcha/master/captcha/data/7/default.wav -------------------------------------------------------------------------------- /captcha/data/8/default.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataXujing/captcha/master/captcha/data/8/default.wav -------------------------------------------------------------------------------- /captcha/data/9/default.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataXujing/captcha/master/captcha/data/9/default.wav -------------------------------------------------------------------------------- /captcha/data/DroidSansMono.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataXujing/captcha/master/captcha/data/DroidSansMono.ttf -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include LICENSE 2 | include README.rst 3 | recursive-include captcha/data *.wav 4 | recursive-include captcha/data *.ttf 5 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist = py26,py27,py33,py34,pypy 3 | 4 | [testenv] 5 | deps = 6 | nose 7 | Pillow 8 | wheezy.captcha 9 | commands = nosetests -s 10 | 11 | [testenv:pypy] 12 | deps = nose 13 | commands = nosetests -s 14 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.pyo 3 | *.egg-info 4 | __pycache__ 5 | bin 6 | build 7 | develop-eggs 8 | dist 9 | eggs 10 | parts 11 | .DS_Store 12 | .installed.cfg 13 | docs/_build 14 | cover/ 15 | .tox 16 | *.bak 17 | *.c 18 | *.so 19 | venv/ 20 | -------------------------------------------------------------------------------- /appveyor.yml: -------------------------------------------------------------------------------- 1 | build: false 2 | 3 | environment: 4 | matrix: 5 | - PYTHON: "C:\\Python27-x64" 6 | - PYTHON: "C:\\Python35-x64" 7 | 8 | init: 9 | - SET PATH=%PYTHON%;%PATH% 10 | - python -c "import sys;sys.stdout.write(sys.version)" 11 | 12 | install: 13 | - python -m pip install nose Pillow wheezy.captcha 14 | 15 | test_script: 16 | - python -m nose -s 17 | -------------------------------------------------------------------------------- /captcha/__init__.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | """ 4 | Captcha 5 | ~~~~~~~ 6 | 7 | A captcha library that generates audio and image CAPTCHAs. 8 | 9 | :copyright: (c) 2015 - 2017 by Hsiaoming Yang. 10 | :license: BSD, see LICENSE for more details. 11 | """ 12 | 13 | __version__ = '0.2.4' 14 | __author__ = 'Hsiaoming Yang ' 15 | __homepage__ = 'https://github.com/lepture/captcha' 16 | -------------------------------------------------------------------------------- /tests/test_audio.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | from captcha.audio import AudioCaptcha 4 | 5 | 6 | def test_audio_generate(): 7 | captcha = AudioCaptcha() 8 | data = captcha.generate('1234') 9 | assert isinstance(data, bytearray) 10 | assert bytearray(b'RIFF') in data 11 | 12 | 13 | def test_audio_random(): 14 | captcha = AudioCaptcha() 15 | data = captcha.random(4) 16 | assert len(data) == 4 17 | -------------------------------------------------------------------------------- /tests/test_image.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | import sys 4 | 5 | if not hasattr(sys, 'pypy_version_info'): 6 | from captcha.image import ImageCaptcha, WheezyCaptcha 7 | 8 | def test_image_generate(): 9 | captcha = ImageCaptcha() 10 | data = captcha.generate('1234') 11 | assert hasattr(data, 'read') 12 | 13 | captcha = WheezyCaptcha() 14 | data = captcha.generate('1234') 15 | assert hasattr(data, 'read') 16 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | 3 | python: 4 | - "2.7" 5 | - "3.3" 6 | - "3.4" 7 | - "pypy" 8 | 9 | install: 10 | - if [[ $TRAVIS_PYTHON_VERSION != 'pypy' ]]; then pip install -q Pillow wheezy.captcha; fi 11 | 12 | script: 13 | - python setup.py -q nosetests 14 | 15 | after_success: 16 | - pip install coveralls 17 | - coverage run --source=captcha setup.py -q nosetests 18 | - coveralls 19 | 20 | notifications: 21 | email: false 22 | -------------------------------------------------------------------------------- /CHANGES: -------------------------------------------------------------------------------- 1 | Changlog 2 | ======== 3 | 4 | The changelog of Captcha. 5 | 6 | Version 0.2.4 7 | ------------- 8 | 9 | Released on Jul 14, 2017 10 | 11 | - Fix compatibility with PIL 12 | 13 | https://github.com/lepture/captcha/pull/18 14 | 15 | Version 0.2.3 16 | ------------- 17 | 18 | Released on Jun 21, 2017 19 | 20 | - Fix image width bug 21 | - Fix non-integer error 22 | 23 | Version 0.2.2 24 | ------------- 25 | 26 | Released on Mar 17, 2017 27 | 28 | - Fix memory leak 29 | 30 | 31 | Version 0.2.1 32 | ------------- 33 | 34 | Released on Oct 4, 2015. 35 | 36 | - Fix AudioCaptcha in Python 3 37 | - Improve ImageCaptcha 38 | 39 | 40 | Version 0.2 41 | ----------- 42 | 43 | Released on Aug 12, 2015. 44 | 45 | - File format of Image CAPTCHA can be specified 46 | 47 | 48 | Version 0.1.1 49 | ------------- 50 | 51 | Released on Dec 2, 2014, this is a bugfix release. 52 | 53 | - Use cStringIO in Python 2 instead of BytesIO 54 | - Fix random.randint for Python 3 55 | - Add font_sizes parameters for ImageCaptcha when width is too small 56 | 57 | 58 | Version 0.1 59 | ----------- 60 | 61 | Released on Nov 27, 2014, the very first release. 62 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2014, Hsiaoming Yang 2 | 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 6 | 7 | * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 8 | 9 | * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 10 | 11 | * Neither the name of the creator nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. 12 | 13 | 14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 15 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | try: 5 | from setuptools import setup 6 | except ImportError: 7 | from distutils.core import setup 8 | 9 | import sys 10 | import captcha 11 | from email.utils import parseaddr 12 | 13 | kwargs = {} 14 | if not hasattr(sys, 'pypy_version_info'): 15 | kwargs['install_requires'] = ['Pillow'] 16 | 17 | author, author_email = parseaddr(captcha.__author__) 18 | 19 | 20 | def fopen(filename): 21 | with open(filename) as f: 22 | return f.read() 23 | 24 | 25 | setup( 26 | name='captcha', 27 | version=captcha.__version__, 28 | author=author, 29 | author_email=author_email, 30 | url=captcha.__homepage__, 31 | packages=['captcha'], 32 | description='A captcha library that generates audio and image CAPTCHAs.', 33 | long_description=fopen('README.rst'), 34 | license='BSD', 35 | zip_safe=False, 36 | include_package_data=True, 37 | tests_require=['nose'], 38 | test_suite='nose.collector', 39 | classifiers=[ 40 | 'Development Status :: 4 - Beta', 41 | 'Environment :: Console', 42 | 'Intended Audience :: Developers', 43 | 'License :: OSI Approved', 44 | 'License :: OSI Approved :: BSD License', 45 | 'Operating System :: MacOS', 46 | 'Operating System :: POSIX', 47 | 'Operating System :: POSIX :: Linux', 48 | 'Programming Language :: Python', 49 | 'Programming Language :: Python :: 2.6', 50 | 'Programming Language :: Python :: 2.7', 51 | 'Programming Language :: Python :: 3.3', 52 | 'Programming Language :: Python :: 3.4', 53 | 'Programming Language :: Python :: Implementation', 54 | 'Programming Language :: Python :: Implementation :: CPython', 55 | 'Programming Language :: Python :: Implementation :: PyPy', 56 | ], 57 | **kwargs 58 | ) 59 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | Captcha 2 | ======= 3 | 4 | A captcha library that generates audio and image CAPTCHAs. 5 | 6 | .. image:: https://img.shields.io/badge/donate-lepture-green.svg 7 | :target: https://typlog.com/donate?amount=10&reason=lepture%2Fcaptcha 8 | :alt: Donate lepture 9 | .. image:: https://travis-ci.org/lepture/captcha.svg?branch=master 10 | :target: https://travis-ci.org/lepture/captcha 11 | .. image:: https://ci.appveyor.com/api/projects/status/amm21f13lx4wuura?svg=true 12 | :target: https://ci.appveyor.com/project/lepture/captcha 13 | .. image:: https://coveralls.io/repos/lepture/captcha/badge.svg?branch=master 14 | :target: https://coveralls.io/r/lepture/captcha 15 | 16 | Features 17 | -------- 18 | 19 | 1. Audio CAPTCHAs `DEMO `_ 20 | 2. Image CAPTCHAs 21 | 22 | .. image:: https://cloud.githubusercontent.com/assets/290496/5213632/95e68768-764b-11e4-862f-d95a8f776cdd.png 23 | 24 | 25 | Installation 26 | ------------ 27 | 28 | Install captcha with pip:: 29 | 30 | $ pip install captcha 31 | 32 | Usage 33 | ----- 34 | 35 | Audio and Image CAPTCHAs are in seprated modules: 36 | 37 | .. code:: python 38 | 39 | from captcha.audio import AudioCaptcha 40 | from captcha.image import ImageCaptcha 41 | 42 | audio = AudioCaptcha(voicedir='/path/to/voices') 43 | image = ImageCaptcha(fonts=['/path/A.ttf', '/path/B.ttf']) 44 | 45 | data = audio.generate('1234') 46 | audio.write('1234', 'out.wav') 47 | 48 | data = image.generate('1234') 49 | image.write('1234', 'out.png') 50 | 51 | This is the APIs for your daily works. We do have built-in voice data and font 52 | data. But it is suggested that you use your own voice and font data. 53 | 54 | 55 | Contribution 56 | ------------ 57 | 58 | We need voice wav files. The voice wav file should be in 8-bit, please keep it 59 | as small as possible. Name your voice file as:: 60 | 61 | {{language}}-{{character}}-{{username}}.wav 62 | # exmaple: zh-1-lepture.wav 63 | 64 | TODO: we need a place to upload voice files. 65 | -------------------------------------------------------------------------------- /captcha/image.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | """ 3 | captcha.image 4 | ~~~~~~~~~~~~~ 5 | 6 | Generate Image CAPTCHAs, just the normal image CAPTCHAs you are using. 7 | """ 8 | 9 | import os 10 | import random 11 | from PIL import Image 12 | from PIL import ImageFilter 13 | from PIL.ImageDraw import Draw 14 | from PIL.ImageFont import truetype 15 | try: 16 | from cStringIO import StringIO as BytesIO 17 | except ImportError: 18 | from io import BytesIO 19 | try: 20 | from wheezy.captcha import image as wheezy_captcha 21 | except ImportError: 22 | wheezy_captcha = None 23 | 24 | DATA_DIR = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'data') 25 | DEFAULT_FONTS = [os.path.join(DATA_DIR, 'DroidSansMono.ttf')] 26 | 27 | if wheezy_captcha: 28 | __all__ = ['ImageCaptcha', 'WheezyCaptcha'] 29 | else: 30 | __all__ = ['ImageCaptcha'] 31 | 32 | 33 | table = [] 34 | for i in range( 256 ): 35 | table.append( i * 1.97 ) 36 | 37 | 38 | class _Captcha(object): 39 | def generate(self, chars, format='png'): 40 | """Generate an Image Captcha of the given characters. 41 | 42 | :param chars: text to be generated. 43 | :param format: image file format 44 | """ 45 | im = self.generate_image(chars) 46 | out = BytesIO() 47 | im.save(out, format=format) 48 | out.seek(0) 49 | return out 50 | 51 | def write(self, chars, output, format='png'): 52 | """Generate and write an image CAPTCHA data to the output. 53 | 54 | :param chars: text to be generated. 55 | :param output: output destination. 56 | :param format: image file format 57 | """ 58 | im = self.generate_image(chars) 59 | return im.save(output, format=format) 60 | 61 | 62 | class WheezyCaptcha(_Captcha): 63 | """Create an image CAPTCHA with wheezy.captcha.""" 64 | def __init__(self, width=200, height=75, fonts=None): 65 | self._width = width 66 | self._height = height 67 | self._fonts = fonts or DEFAULT_FONTS 68 | 69 | def generate_image(self, chars): 70 | text_drawings = [ 71 | wheezy_captcha.warp(), 72 | wheezy_captcha.rotate(), 73 | wheezy_captcha.offset(), 74 | ] 75 | fn = wheezy_captcha.captcha( 76 | drawings=[ 77 | wheezy_captcha.background(), 78 | wheezy_captcha.text(fonts=self._fonts, drawings=text_drawings), 79 | wheezy_captcha.curve(), 80 | wheezy_captcha.noise(), 81 | wheezy_captcha.smooth(), 82 | ], 83 | width=self._width, 84 | height=self._height, 85 | ) 86 | return fn(chars) 87 | 88 | 89 | class ImageCaptcha(_Captcha): 90 | """Create an image CAPTCHA. 91 | 92 | Many of the codes are borrowed from wheezy.captcha, with a modification 93 | for memory and developer friendly. 94 | 95 | ImageCaptcha has one built-in font, DroidSansMono, which is licensed under 96 | Apache License 2. You should always use your own fonts:: 97 | 98 | captcha = ImageCaptcha(fonts=['/path/to/A.ttf', '/path/to/B.ttf']) 99 | 100 | You can put as many fonts as you like. But be aware of your memory, all of 101 | the fonts are loaded into your memory, so keep them a lot, but not too 102 | many. 103 | 104 | :param width: The width of the CAPTCHA image. 105 | :param height: The height of the CAPTCHA image. 106 | :param fonts: Fonts to be used to generate CAPTCHA images. 107 | :param font_sizes: Random choose a font size from this parameters. 108 | """ 109 | def __init__(self, width=160, height=60, fonts=None, font_sizes=None): 110 | self._width = width 111 | self._height = height 112 | self._fonts = fonts or DEFAULT_FONTS 113 | self._font_sizes = font_sizes or (42, 50, 56) 114 | self._truefonts = [] 115 | 116 | @property 117 | def truefonts(self): 118 | if self._truefonts: 119 | return self._truefonts 120 | self._truefonts = tuple([ 121 | truetype(n, s) 122 | for n in self._fonts 123 | for s in self._font_sizes 124 | ]) 125 | return self._truefonts 126 | 127 | @staticmethod 128 | def create_noise_curve(image, color): 129 | w, h = image.size 130 | x1 = random.randint(0, int(w / 5)) 131 | x2 = random.randint(w - int(w / 5), w) 132 | y1 = random.randint(int(h / 5), h - int(h / 5)) 133 | y2 = random.randint(y1, h - int(h / 5)) 134 | points = [x1, y1, x2, y2] 135 | end = random.randint(160, 200) 136 | start = random.randint(0, 20) 137 | Draw(image).arc(points, start, end, fill=color) 138 | return image 139 | 140 | @staticmethod 141 | def create_noise_dots(image, color, width=3, number=30): 142 | draw = Draw(image) 143 | w, h = image.size 144 | while number: 145 | x1 = random.randint(0, w) 146 | y1 = random.randint(0, h) 147 | draw.line(((x1, y1), (x1 - 1, y1 - 1)), fill=color, width=width) 148 | number -= 1 149 | return image 150 | 151 | def create_captcha_image(self, chars, color, background): 152 | """Create the CAPTCHA image itself. 153 | 154 | :param chars: text to be generated. 155 | :param color: color of the text. 156 | :param background: color of the background. 157 | 158 | The color should be a tuple of 3 numbers, such as (0, 255, 255). 159 | """ 160 | image = Image.new('RGB', (self._width, self._height), background) 161 | draw = Draw(image) 162 | 163 | def _draw_character(c): 164 | font = random.choice(self.truefonts) 165 | w, h = draw.textsize(c, font=font) 166 | 167 | dx = random.randint(0, 4) 168 | dy = random.randint(0, 6) 169 | im = Image.new('RGBA', (w + dx, h + dy)) 170 | Draw(im).text((dx, dy), c, font=font, fill=color) 171 | 172 | # rotate 173 | im = im.crop(im.getbbox()) 174 | im = im.rotate(random.uniform(-30, 30), Image.BILINEAR, expand=1) 175 | 176 | # warp 177 | dx = w * random.uniform(0.1, 0.3) 178 | dy = h * random.uniform(0.2, 0.3) 179 | x1 = int(random.uniform(-dx, dx)) 180 | y1 = int(random.uniform(-dy, dy)) 181 | x2 = int(random.uniform(-dx, dx)) 182 | y2 = int(random.uniform(-dy, dy)) 183 | w2 = w + abs(x1) + abs(x2) 184 | h2 = h + abs(y1) + abs(y2) 185 | data = ( 186 | x1, y1, 187 | -x1, h2 - y2, 188 | w2 + x2, h2 + y2, 189 | w2 - x2, -y1, 190 | ) 191 | im = im.resize((w2, h2)) 192 | im = im.transform((w, h), Image.QUAD, data) 193 | return im 194 | 195 | images = [] 196 | for c in chars: 197 | if random.random() > 0.5: 198 | images.append(_draw_character(" ")) 199 | images.append(_draw_character(c)) 200 | 201 | text_width = sum([im.size[0] for im in images]) 202 | 203 | width = max(text_width, self._width) 204 | image = image.resize((width, self._height)) 205 | 206 | average = int(text_width / len(chars)) 207 | rand = int(0.25 * average) 208 | offset = int(average * 0.1) 209 | 210 | for im in images: 211 | w, h = im.size 212 | mask = im.convert('L').point(table) 213 | image.paste(im, (offset, int((self._height - h) / 2)), mask) 214 | offset = offset + w + random.randint(-rand, 0) 215 | 216 | if width > self._width: 217 | image = image.resize((self._width, self._height)) 218 | 219 | return image 220 | 221 | def generate_image(self, chars): 222 | """Generate the image of the given characters. 223 | 224 | :param chars: text to be generated. 225 | """ 226 | background = random_color(238, 255) 227 | color = random_color(10, 200, random.randint(220, 255)) 228 | im = self.create_captcha_image(chars, color, background) 229 | self.create_noise_dots(im, color) 230 | self.create_noise_curve(im, color) 231 | im = im.filter(ImageFilter.SMOOTH) 232 | return im 233 | 234 | 235 | def random_color(start, end, opacity=None): 236 | red = random.randint(start, end) 237 | green = random.randint(start, end) 238 | blue = random.randint(start, end) 239 | if opacity is None: 240 | return (red, green, blue) 241 | return (red, green, blue, opacity) 242 | -------------------------------------------------------------------------------- /captcha/audio.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | """ 3 | captcha.audio 4 | ~~~~~~~~~~~~~ 5 | 6 | Generate Audio CAPTCHAs, with built-in digits CAPTCHA. 7 | 8 | This module is totally inspired by https://github.com/dchest/captcha 9 | """ 10 | 11 | import os 12 | import copy 13 | import wave 14 | import struct 15 | import random 16 | import operator 17 | 18 | import sys 19 | if sys.version_info[0] != 2: 20 | import functools 21 | reduce = functools.reduce 22 | 23 | 24 | __all__ = ['AudioCaptcha'] 25 | 26 | WAVE_SAMPLE_RATE = 8000 # HZ 27 | WAVE_HEADER = bytearray( 28 | b'RIFF\x00\x00\x00\x00WAVEfmt \x10\x00\x00\x00\x01\x00\x01\x00' 29 | b'@\x1f\x00\x00@\x1f\x00\x00\x01\x00\x08\x00data' 30 | ) 31 | WAVE_HEADER_LENGTH = len(WAVE_HEADER) - 4 32 | DATA_DIR = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'data') 33 | 34 | 35 | def _read_wave_file(filepath): 36 | w = wave.open(filepath) 37 | data = w.readframes(-1) 38 | w.close() 39 | return bytearray(data) 40 | 41 | 42 | def change_speed(body, speed=1): 43 | """Change the voice speed of the wave body.""" 44 | if speed == 1: 45 | return body 46 | 47 | length = int(len(body) * speed) 48 | rv = bytearray(length) 49 | 50 | step = 0 51 | for v in body: 52 | i = int(step) 53 | while i < int(step + speed) and i < length: 54 | rv[i] = v 55 | i += 1 56 | step += speed 57 | return rv 58 | 59 | 60 | def patch_wave_header(body): 61 | """Patch header to the given wave body. 62 | 63 | :param body: the wave content body, it should be bytearray. 64 | """ 65 | length = len(body) 66 | 67 | padded = length + length % 2 68 | total = WAVE_HEADER_LENGTH + padded 69 | 70 | header = copy.copy(WAVE_HEADER) 71 | # fill the total length position 72 | header[4:8] = bytearray(struct.pack(' 128: 113 | v = (v - 128) * level + 128 114 | v = max(int(v), 128) 115 | v = min(v, 255) 116 | elif v < 128: 117 | v = 128 - (128 - v) * level 118 | v = min(int(v), 128) 119 | v = max(v, 0) 120 | body[i] = v 121 | return body 122 | 123 | 124 | def mix_wave(src, dst): 125 | """Mix two wave body into one.""" 126 | if len(src) > len(dst): 127 | # output should be longer 128 | dst, src = src, dst 129 | 130 | for i, sv in enumerate(src): 131 | dv = dst[i] 132 | if sv < 128 and dv < 128: 133 | dst[i] = int(sv * dv / 128) 134 | else: 135 | dst[i] = int(2 * (sv + dv) - sv * dv / 128 - 256) 136 | return dst 137 | 138 | 139 | BEEP = _read_wave_file(os.path.join(DATA_DIR, 'beep.wav')) 140 | END_BEEP = change_speed(BEEP, 1.4) 141 | SILENCE = create_silence(int(WAVE_SAMPLE_RATE / 5)) 142 | 143 | 144 | class AudioCaptcha(object): 145 | """Create an audio CAPTCHA. 146 | 147 | Create an instance of AudioCaptcha is pretty simple:: 148 | 149 | captcha = AudioCaptcha() 150 | captcha.write('1234', 'out.wav') 151 | 152 | This module has a built-in digits CAPTCHA, but it is suggested that you 153 | create your own voice data library. A voice data library is a directory 154 | that contains lots of single charater named directories, for example:: 155 | 156 | voices/ 157 | 0/ 158 | 1/ 159 | 2/ 160 | 161 | The single charater named directories contain the wave files which pronunce 162 | the directory name. A charater directory can has many wave files, this 163 | AudioCaptcha will randomly choose one of them. 164 | 165 | You should always use your own voice library:: 166 | 167 | captcha = AudioCaptcha(voicedir='/path/to/voices') 168 | """ 169 | def __init__(self, voicedir=None): 170 | if voicedir is None: 171 | voicedir = DATA_DIR 172 | 173 | self._voicedir = voicedir 174 | self._cache = {} 175 | self._choices = [] 176 | 177 | @property 178 | def choices(self): 179 | """Available choices for characters to be generated.""" 180 | if self._choices: 181 | return self._choices 182 | for n in os.listdir(self._voicedir): 183 | if len(n) == 1 and os.path.isdir(os.path.join(self._voicedir, n)): 184 | self._choices.append(n) 185 | return self._choices 186 | 187 | def random(self, length=6): 188 | """Generate a random string with the given length. 189 | 190 | :param length: the return string length. 191 | """ 192 | return random.sample(self.choices, length) 193 | 194 | def load(self): 195 | """Load voice data into memory.""" 196 | for name in self.choices: 197 | self._load_data(name) 198 | 199 | def _load_data(self, name): 200 | dirname = os.path.join(self._voicedir, name) 201 | data = [] 202 | for f in os.listdir(dirname): 203 | filepath = os.path.join(dirname, f) 204 | if f.endswith('.wav') and os.path.isfile(filepath): 205 | data.append(_read_wave_file(filepath)) 206 | self._cache[name] = data 207 | 208 | def _twist_pick(self, key): 209 | voice = random.choice(self._cache[key]) 210 | 211 | # random change speed 212 | speed = random.randrange(90, 120) / 100.0 213 | voice = change_speed(voice, speed) 214 | 215 | # random change sound 216 | level = random.randrange(80, 120) / 100.0 217 | voice = change_sound(voice, level) 218 | return voice 219 | 220 | def _noise_pick(self): 221 | key = random.choice(self.choices) 222 | voice = random.choice(self._cache[key]) 223 | voice = copy.copy(voice) 224 | voice.reverse() 225 | 226 | speed = random.randrange(8, 16) / 10.0 227 | voice = change_speed(voice, speed) 228 | 229 | level = random.randrange(2, 6) / 10.0 230 | voice = change_sound(voice, level) 231 | return voice 232 | 233 | def create_background_noise(self, length, chars): 234 | noise = create_noise(length, 4) 235 | pos = 0 236 | while pos < length: 237 | sound = self._noise_pick() 238 | end = pos + len(sound) + 1 239 | noise[pos:end] = mix_wave(sound, noise[pos:end]) 240 | pos = end + random.randint(0, int(WAVE_SAMPLE_RATE / 10)) 241 | return noise 242 | 243 | def create_wave_body(self, chars): 244 | voices = [] 245 | inters = [] 246 | for key in chars: 247 | voices.append(self._twist_pick(key)) 248 | v = random.randint(WAVE_SAMPLE_RATE, WAVE_SAMPLE_RATE * 3) 249 | inters.append(v) 250 | 251 | durations = map(lambda a: len(a), voices) 252 | length = max(durations) * len(chars) + reduce(operator.add, inters) 253 | bg = self.create_background_noise(length, chars) 254 | 255 | # begin 256 | pos = inters[0] 257 | for i, v in enumerate(voices): 258 | end = pos + len(v) + 1 259 | bg[pos:end] = mix_wave(v, bg[pos:end]) 260 | pos = end + inters[i] 261 | 262 | return BEEP + SILENCE + BEEP + SILENCE + BEEP + bg + END_BEEP 263 | 264 | def generate(self, chars): 265 | """Generate audio CAPTCHA data. The return data is a bytearray. 266 | 267 | :param chars: text to be generated. 268 | """ 269 | if not self._cache: 270 | self.load() 271 | body = self.create_wave_body(chars) 272 | return patch_wave_header(body) 273 | 274 | def write(self, chars, output): 275 | """Generate and write audio CAPTCHA data to the output. 276 | 277 | :param chars: text to be generated. 278 | :param output: output destionation. 279 | """ 280 | data = self.generate(chars) 281 | with open(output, 'wb') as f: 282 | return f.write(data) 283 | --------------------------------------------------------------------------------