├── .travis.yml ├── LICENSE.txt ├── MANIFEST.in ├── Makefile ├── README.rst ├── bench ├── test_chardet_performance.py └── test_performance.py ├── scripts ├── install.sh ├── pyenv-installer └── run.sh ├── setup.cfg ├── setup.py ├── source ├── _static │ └── .empty ├── _templates │ └── .empty ├── conf.py └── index.rst ├── src └── glassbox │ ├── __init__.py │ ├── compat.py │ ├── extension.pyx │ ├── implementation.py │ ├── novelty.py │ ├── pure.py │ └── record.py ├── tests ├── test_implementation.py ├── test_records.py └── test_tracking.py └── tox.ini /.travis.yml: -------------------------------------------------------------------------------- 1 | language: c 2 | 3 | sudo: false 4 | 5 | os: 6 | - linux 7 | 8 | cache: 9 | apt: true 10 | directories: 11 | - $HOME/.pyenv 12 | - $HOME/wheelhouse 13 | 14 | env: 15 | matrix: 16 | - TOXENV=py26-coverage-cython 17 | - TOXENV=py26-coverage-nocython 18 | - TOXENV=py26-nocoverage-cython 19 | - TOXENV=py26-nocoverage-nocython 20 | - TOXENV=py26-nocoverage-pure 21 | - TOXENV=py27-coverage-cython 22 | - TOXENV=py27-coverage-nocython 23 | - TOXENV=py27-nocoverage-cython 24 | - TOXENV=py27-nocoverage-nocython 25 | - TOXENV=py27-nocoverage-pure 26 | - TOXENV=py33-coverage-cython 27 | - TOXENV=py33-coverage-nocython 28 | - TOXENV=py33-nocoverage-cython 29 | - TOXENV=py33-nocoverage-nocython 30 | - TOXENV=py33-nocoverage-pure 31 | - TOXENV=py34-coverage-cython 32 | - TOXENV=py34-coverage-nocython 33 | - TOXENV=py34-nocoverage-cython 34 | - TOXENV=py34-nocoverage-nocython 35 | - TOXENV=py34-nocoverage-pure 36 | - TOXENV=pypy-coverage-pure 37 | - TOXENV=pypy-nocoverage-pure 38 | 39 | install: 40 | - ./scripts/install.sh 41 | 42 | script: 43 | - ./scripts/run.sh 44 | 45 | notifications: 46 | email: false 47 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2013, David R. MacIver 2 | 3 | All code in this repository except where explicitly noted otherwise is released 4 | under the Mozilla Public License v 2.0. You can obtain a copy at http://mozilla.org/MPL/2.0/. 5 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include setup.py README.rst LICENSE.txt 2 | recursive-include src *.py *.pyx *.c 3 | recursive-include tests *.py 4 | include source/*.rst 5 | include source/conf.py 6 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | PAPER = 8 | BUILDDIR = build 9 | 10 | # User-friendly check for sphinx-build 11 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) 12 | $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/) 13 | endif 14 | 15 | # Internal variables. 16 | PAPEROPT_a4 = -D latex_paper_size=a4 17 | PAPEROPT_letter = -D latex_paper_size=letter 18 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source 19 | # the i18n builder cannot share the environment and doctrees with the others 20 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source 21 | 22 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest coverage gettext 23 | 24 | help: 25 | @echo "Please use \`make ' where is one of" 26 | @echo " html to make standalone HTML files" 27 | @echo " dirhtml to make HTML files named index.html in directories" 28 | @echo " singlehtml to make a single large HTML file" 29 | @echo " pickle to make pickle files" 30 | @echo " json to make JSON files" 31 | @echo " htmlhelp to make HTML files and a HTML help project" 32 | @echo " qthelp to make HTML files and a qthelp project" 33 | @echo " applehelp to make an Apple Help Book" 34 | @echo " devhelp to make HTML files and a Devhelp project" 35 | @echo " epub to make an epub" 36 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" 37 | @echo " latexpdf to make LaTeX files and run them through pdflatex" 38 | @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" 39 | @echo " text to make text files" 40 | @echo " man to make manual pages" 41 | @echo " texinfo to make Texinfo files" 42 | @echo " info to make Texinfo files and run them through makeinfo" 43 | @echo " gettext to make PO message catalogs" 44 | @echo " changes to make an overview of all changed/added/deprecated items" 45 | @echo " xml to make Docutils-native XML files" 46 | @echo " pseudoxml to make pseudoxml-XML files for display purposes" 47 | @echo " linkcheck to check all external links for integrity" 48 | @echo " doctest to run all doctests embedded in the documentation (if enabled)" 49 | @echo " coverage to run coverage check of the documentation (if enabled)" 50 | 51 | clean: 52 | rm -rf $(BUILDDIR)/* 53 | 54 | html: 55 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 56 | @echo 57 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 58 | 59 | dirhtml: 60 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml 61 | @echo 62 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." 63 | 64 | singlehtml: 65 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml 66 | @echo 67 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." 68 | 69 | pickle: 70 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle 71 | @echo 72 | @echo "Build finished; now you can process the pickle files." 73 | 74 | json: 75 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json 76 | @echo 77 | @echo "Build finished; now you can process the JSON files." 78 | 79 | htmlhelp: 80 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp 81 | @echo 82 | @echo "Build finished; now you can run HTML Help Workshop with the" \ 83 | ".hhp project file in $(BUILDDIR)/htmlhelp." 84 | 85 | qthelp: 86 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp 87 | @echo 88 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \ 89 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:" 90 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/Glassbox.qhcp" 91 | @echo "To view the help file:" 92 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/Glassbox.qhc" 93 | 94 | applehelp: 95 | $(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp 96 | @echo 97 | @echo "Build finished. The help book is in $(BUILDDIR)/applehelp." 98 | @echo "N.B. You won't be able to view it unless you put it in" \ 99 | "~/Library/Documentation/Help or install it in your application" \ 100 | "bundle." 101 | 102 | devhelp: 103 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp 104 | @echo 105 | @echo "Build finished." 106 | @echo "To view the help file:" 107 | @echo "# mkdir -p $$HOME/.local/share/devhelp/Glassbox" 108 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/Glassbox" 109 | @echo "# devhelp" 110 | 111 | epub: 112 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub 113 | @echo 114 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub." 115 | 116 | latex: 117 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 118 | @echo 119 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." 120 | @echo "Run \`make' in that directory to run these through (pdf)latex" \ 121 | "(use \`make latexpdf' here to do that automatically)." 122 | 123 | latexpdf: 124 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 125 | @echo "Running LaTeX files through pdflatex..." 126 | $(MAKE) -C $(BUILDDIR)/latex all-pdf 127 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 128 | 129 | latexpdfja: 130 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 131 | @echo "Running LaTeX files through platex and dvipdfmx..." 132 | $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja 133 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 134 | 135 | text: 136 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text 137 | @echo 138 | @echo "Build finished. The text files are in $(BUILDDIR)/text." 139 | 140 | man: 141 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man 142 | @echo 143 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man." 144 | 145 | texinfo: 146 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 147 | @echo 148 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." 149 | @echo "Run \`make' in that directory to run these through makeinfo" \ 150 | "(use \`make info' here to do that automatically)." 151 | 152 | info: 153 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 154 | @echo "Running Texinfo files through makeinfo..." 155 | make -C $(BUILDDIR)/texinfo info 156 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." 157 | 158 | gettext: 159 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale 160 | @echo 161 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." 162 | 163 | changes: 164 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes 165 | @echo 166 | @echo "The overview file is in $(BUILDDIR)/changes." 167 | 168 | linkcheck: 169 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck 170 | @echo 171 | @echo "Link check complete; look for any errors in the above output " \ 172 | "or in $(BUILDDIR)/linkcheck/output.txt." 173 | 174 | doctest: 175 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest 176 | @echo "Testing of doctests in the sources finished, look at the " \ 177 | "results in $(BUILDDIR)/doctest/output.txt." 178 | 179 | coverage: 180 | $(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage 181 | @echo "Testing of coverage in the sources finished, look at the " \ 182 | "results in $(BUILDDIR)/coverage/python.txt." 183 | 184 | xml: 185 | $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml 186 | @echo 187 | @echo "Build finished. The XML files are in $(BUILDDIR)/xml." 188 | 189 | pseudoxml: 190 | $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml 191 | @echo 192 | @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." 193 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | ======== 2 | Glassbox 3 | ======== 4 | 5 | Glassbox is a minimal API for program state introspection and detecting 6 | interesting behaviours. 7 | 8 | It's currently rather experimental, but you can see some basic documentation 9 | at https://glassbox.readthedocs.org. 10 | -------------------------------------------------------------------------------- /bench/test_chardet_performance.py: -------------------------------------------------------------------------------- 1 | import os 2 | import glassbox 3 | import pytest 4 | import chardet 5 | 6 | corpus = [bytearray()] + [bytearray([i]) for i in xrange(256)] 7 | 8 | 9 | def test_detecting_novel_corpus_elements_for_chardet(benchmark): 10 | @benchmark 11 | def result(): 12 | novelty = glassbox.NoveltyDetector() 13 | for c in corpus: 14 | glassbox.begin() 15 | chardet.detect(c) 16 | novelty.novel(glassbox.collect()) 17 | assert result is None 18 | 19 | def test_running_chardet_on_whole_corpus_without_glassbox(benchmark): 20 | @benchmark 21 | def result(): 22 | for c in corpus: 23 | chardet.detect(c) 24 | assert result is None 25 | 26 | 27 | def test_running_chardet_on_whole_corpus_under_glassbox(benchmark): 28 | @benchmark 29 | def result(): 30 | glassbox.begin() 31 | for c in corpus: 32 | chardet.detect(c) 33 | glassbox.collect() 34 | assert result is None 35 | -------------------------------------------------------------------------------- /bench/test_performance.py: -------------------------------------------------------------------------------- 1 | import os 2 | from glassbox import begin, collect 3 | import pytest 4 | 5 | 6 | def to_int(x): 7 | if isinstance(x, str): 8 | return ord(x) 9 | else: 10 | return int(x) 11 | 12 | 13 | def do_hash(data): 14 | x = 0 15 | for c in data: 16 | x *= 31 17 | x += to_int(c) 18 | return x 19 | 20 | HASHING_DATA = os.urandom(1024 * 10) 21 | 22 | 23 | @pytest.mark.parametrize('n', range(1, 4)) 24 | def test_hashing_while_glassboxed(benchmark, n): 25 | @benchmark 26 | def result(): 27 | for i in range(n): 28 | begin() 29 | try: 30 | do_hash(HASHING_DATA) 31 | finally: 32 | for i in range(n): 33 | collect() 34 | assert result is None 35 | 36 | 37 | def test_hashing_while_not_glassboxed(benchmark): 38 | @benchmark 39 | def result(): 40 | do_hash(HASHING_DATA) 41 | assert result is None 42 | -------------------------------------------------------------------------------- /scripts/install.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Special license: Take literally anything you want out of this file. I don't 4 | # care. Consider it WTFPL licensed if you like. 5 | # Basically there's a lot of suffering encoded here that I don't want you to 6 | # have to go through and you should feel free to use this to avoid some of 7 | # that suffering in advance. 8 | 9 | set -e 10 | set -x 11 | 12 | # Somehow we occasionally get broken installs of pyenv, and pyenv-installer 13 | # is not good at detecting and cleaning up from those. We use the existence 14 | # of a pyenv executable as a proxy for whether pyenv is actually installed 15 | # correctly, but only because that's the only error I've seen so far. 16 | if [ ! -e "$HOME/.pyenv/bin/pyenv" ] ; then 17 | echo "pyenv does not exist" 18 | if [ -e "$HOME/.pyenv" ] ; then 19 | echo "Looks like a bad pyenv install. Deleting" 20 | rm -rf $HOME/.pyenv 21 | fi 22 | fi 23 | 24 | # Run the pyenv-installer script we've bundled. 25 | # This is basically vendored from >https://github.com/yyuu/pyenv-installer 26 | $(dirname $0)/pyenv-installer 27 | 28 | # Now that pyenv is installed, run the commands it gives us to actually 29 | # activate it. 30 | export PATH="$HOME/.pyenv/bin:$PATH" 31 | eval "$(pyenv init -)" 32 | 33 | 34 | # pyenv update makes a lot of requests to github, which is not entirely 35 | # reliable. As long as we got a working pyenv in the first place (above) we 36 | # don't want to fail the build if pyenv can't update. Given that .pyenv is 37 | # cached anyway, the version we have should probably be quite recent. 38 | pyenv update || echo "Update failed to complete. Ignoring" 39 | 40 | SNAKEPIT=$HOME/snakepit 41 | 42 | rm -rf $SNAKEPIT 43 | mkdir $SNAKEPIT 44 | 45 | PYENVS=$HOME/.pyenv/versions 46 | 47 | pyenv install -s 3.4.3 48 | ln -s $PYENVS/3.4.3/bin/python $SNAKEPIT/python3.4 49 | echo 3.4.3 > $HOME/.python-version 50 | pyenv global 3.4.3 51 | pyenv local 3.4.3 52 | 53 | pyenv install -s 2.6.9 54 | ln -s $PYENVS/2.6.9/bin/python $SNAKEPIT/python2.6 55 | pyenv install -s 2.7.9 56 | ln -s $PYENVS/2.7.9/bin/python $SNAKEPIT/python2.7 57 | pyenv install -s 3.2.6 58 | ln -s $PYENVS/3.2.6/bin/python $SNAKEPIT/python3.2 59 | pyenv install -s 3.3.6 60 | ln -s $PYENVS/3.3.6/bin/python $SNAKEPIT/python3.3 61 | pyenv install -s pypy-2.6.0 62 | ln -s $PYENVS/pypy-2.6.0/bin/pypy $SNAKEPIT/pypy 63 | pip install --upgrade tox pip wheel 64 | -------------------------------------------------------------------------------- /scripts/pyenv-installer: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -e 4 | [ -n "$PYENV_DEBUG" ] && set -x 5 | 6 | if [ -z "$PYENV_ROOT" ]; then 7 | PYENV_ROOT="${HOME}/.pyenv" 8 | fi 9 | 10 | shell="$1" 11 | if [ -z "$shell" ]; then 12 | shell="$(ps c -p "$PPID" -o 'ucomm=' 2>/dev/null || true)" 13 | shell="${shell##-}" 14 | shell="${shell%% *}" 15 | shell="$(basename "${shell:-$SHELL}")" 16 | fi 17 | 18 | colorize() { 19 | if [ -t 1 ]; then printf "\e[%sm%s\e[m" "$1" "$2" 20 | else echo -n "$2" 21 | fi 22 | } 23 | 24 | checkout() { 25 | [ -d "$2" ] || git clone "$1" "$2" 26 | } 27 | 28 | if ! command -v git 1>/dev/null 2>&1; then 29 | echo "pyenv: Git is not installed, can't continue." >&2 30 | exit 1 31 | fi 32 | 33 | if [ -n "${USE_HTTPS}" ]; then 34 | GITHUB="https://github.com" 35 | else 36 | GITHUB="git://github.com" 37 | fi 38 | 39 | checkout "${GITHUB}/yyuu/pyenv.git" "${PYENV_ROOT}" 40 | checkout "${GITHUB}/yyuu/pyenv-doctor.git" "${PYENV_ROOT}/plugins/pyenv-doctor" 41 | checkout "${GITHUB}/yyuu/pyenv-installer.git" "${PYENV_ROOT}/plugins/pyenv-installer" 42 | checkout "${GITHUB}/yyuu/pyenv-pip-rehash.git" "${PYENV_ROOT}/plugins/pyenv-pip-rehash" 43 | checkout "${GITHUB}/yyuu/pyenv-update.git" "${PYENV_ROOT}/plugins/pyenv-update" 44 | checkout "${GITHUB}/yyuu/pyenv-virtualenv.git" "${PYENV_ROOT}/plugins/pyenv-virtualenv" 45 | checkout "${GITHUB}/yyuu/pyenv-which-ext.git" "${PYENV_ROOT}/plugins/pyenv-which-ext" 46 | 47 | if ! command -v pyenv 1>/dev/null; then 48 | { echo 49 | colorize 1 "WARNING" 50 | echo ": seems you still have not added 'pyenv' to the load path." 51 | echo 52 | } >&2 53 | 54 | case "$shell" in 55 | bash ) 56 | profile="~/.bash_profile" 57 | ;; 58 | zsh ) 59 | profile="~/.zshrc" 60 | ;; 61 | ksh ) 62 | profile="~/.profile" 63 | ;; 64 | fish ) 65 | profile="~/.config/fish/config.fish" 66 | ;; 67 | * ) 68 | profile="your profile" 69 | ;; 70 | esac 71 | 72 | { echo "# Load pyenv automatically by adding" 73 | echo "# the following to ${profile}:" 74 | echo 75 | case "$shell" in 76 | fish ) 77 | echo "set -x PATH \"\$HOME/.pyenv/bin\" \$PATH" 78 | echo 'status --is-interactive; and . (pyenv init -|psub)' 79 | echo 'status --is-interactive; and . (pyenv virtualenv-init -|psub)' 80 | ;; 81 | * ) 82 | echo "export PATH=\"\$HOME/.pyenv/bin:\$PATH\"" 83 | echo "eval \"\$(pyenv init -)\"" 84 | echo "eval \"\$(pyenv virtualenv-init -)\"" 85 | ;; 86 | esac 87 | } >&2 88 | fi 89 | -------------------------------------------------------------------------------- /scripts/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | set -x 5 | 6 | export PATH="$HOME/snakepit:$HOME/.pyenv/bin:$PATH" 7 | eval "$(pyenv init -)" 8 | pyenv shell 3.4.3 9 | tox -- $TOX_FLAGS 10 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DRMacIver/glassbox/9e8b38e419a46efb0c4c7a7d4497ad31e69f927e/setup.cfg -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from distutils.core import setup 2 | from setuptools import find_packages 3 | import os 4 | import platform 5 | from distutils.core import Extension 6 | from distutils import errors 7 | import sys 8 | from distutils.command.build_ext import build_ext 9 | from setuptools.command.sdist import sdist as _sdist 10 | 11 | 12 | try: 13 | from Cython.Distutils import build_ext as cython_build_ext 14 | except ImportError: 15 | use_cython = False 16 | else: 17 | use_cython = True 18 | 19 | 20 | def local_file(name): 21 | return os.path.relpath(os.path.join(os.path.dirname(__file__), name)) 22 | 23 | SOURCE = local_file("src") 24 | README = local_file("README.rst") 25 | 26 | setup_args = dict( 27 | name='glassbox', 28 | version="0.1.0", 29 | author='David R. MacIver', 30 | author_email='david@drmaciver.com', 31 | packages=find_packages(SOURCE), 32 | package_dir={"": SOURCE}, 33 | url='https://github.com/DRMacIver/glassbox', 34 | long_description=open("README.rst").read(), 35 | license='MPL v2', 36 | description='A library for introspecting program state', 37 | zip_safe=True, 38 | classifiers=[ 39 | "Development Status :: 5 - Production/Stable", 40 | "Intended Audience :: Developers", 41 | "License :: OSI Approved :: Mozilla Public License 2.0 (MPL 2.0)", 42 | "Operating System :: Unix", 43 | "Operating System :: POSIX", 44 | "Operating System :: Microsoft :: Windows", 45 | "Programming Language :: Python", 46 | "Programming Language :: Python :: 2.6", 47 | "Programming Language :: Python :: 2.7", 48 | "Programming Language :: Python :: 3", 49 | "Programming Language :: Python :: 3.2", 50 | "Programming Language :: Python :: 3.3", 51 | "Programming Language :: Python :: 3.4", 52 | "Programming Language :: Python :: Implementation :: CPython", 53 | "Programming Language :: Python :: Implementation :: PyPy", 54 | "Topic :: Software Development :: Testing", 55 | ], 56 | tests_require=['pytest', 'coverage'], 57 | cmdclass={}, 58 | ) 59 | 60 | 61 | ext_errors = ( 62 | errors.CCompilerError, 63 | errors.DistutilsExecError, 64 | errors.DistutilsPlatformError, 65 | ) 66 | 67 | 68 | class BuildFailed(Exception): 69 | """Raise this to indicate the C extension wouldn't build.""" 70 | def __init__(self): 71 | Exception.__init__(self) 72 | self.cause = sys.exc_info()[1] 73 | 74 | 75 | class ve_build_ext(build_ext): 76 | """Build C extensions, but fail with a straightforward exception.""" 77 | 78 | def run(self): 79 | """Wrap `run` with `BuildFailed`.""" 80 | try: 81 | build_ext.run(self) 82 | except errors.DistutilsPlatformError: 83 | raise BuildFailed() 84 | 85 | def build_extension(self, ext): 86 | """Wrap `build_extension` with `BuildFailed`.""" 87 | try: 88 | # Uncomment to test compile failure handling: 89 | # raise errors.CCompilerError("OOPS") 90 | build_ext.build_extension(self, ext) 91 | except ext_errors: 92 | raise BuildFailed() 93 | except ValueError as err: 94 | # this can happen on Windows 64 bit, see Python issue 7511 95 | if "'path'" in str(err): # works with both py 2/3 96 | raise BuildFailed() 97 | raise 98 | 99 | CYTHON_FILE = "src/glassbox/extension.pyx" 100 | 101 | 102 | class sdist(_sdist): 103 | def run(self): 104 | from Cython.Build import cythonize 105 | cythonize([CYTHON_FILE]) 106 | _sdist.run(self) 107 | setup_args['cmdclass']['sdist'] = sdist 108 | 109 | 110 | if ( 111 | platform.python_implementation() == 'CPython' 112 | ): 113 | 114 | if use_cython: 115 | extension = Extension( 116 | "glassbox.extension", 117 | sources=[CYTHON_FILE], 118 | ) 119 | setup_args['cmdclass']['build_ext'] = cython_build_ext 120 | else: 121 | extension = Extension( 122 | "glassbox.extension", 123 | sources=[CYTHON_FILE.replace('.pyx', '.c')], 124 | ) 125 | setup_args['cmdclass']['build_ext'] = ve_build_ext 126 | 127 | setup_args['ext_modules'] = [extension] 128 | 129 | 130 | def main(): 131 | """Actually invoke setup() with the arguments we built above.""" 132 | # For a variety of reasons, it might not be possible to install the C 133 | # extension. Try it with, and if it fails, try it without. 134 | try: 135 | setup(**setup_args) 136 | except BuildFailed as exc: 137 | msg = "Couldn't install with extension module, trying without it..." 138 | exc_msg = "%s: %s" % (exc.__class__.__name__, exc.cause) 139 | print("**\n** %s\n** %s\n**" % (msg, exc_msg)) 140 | 141 | del setup_args['ext_modules'] 142 | setup(**setup_args) 143 | 144 | if __name__ == '__main__': 145 | main() 146 | -------------------------------------------------------------------------------- /source/_static/.empty: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DRMacIver/glassbox/9e8b38e419a46efb0c4c7a7d4497ad31e69f927e/source/_static/.empty -------------------------------------------------------------------------------- /source/_templates/.empty: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DRMacIver/glassbox/9e8b38e419a46efb0c4c7a7d4497ad31e69f927e/source/_templates/.empty -------------------------------------------------------------------------------- /source/conf.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Glassbox documentation build configuration file, created by 5 | # sphinx-quickstart on Thu Oct 22 17:46:07 2015. 6 | # 7 | # This file is execfile()d with the current directory set to its 8 | # containing dir. 9 | # 10 | # Note that not all possible configuration values are present in this 11 | # autogenerated file. 12 | # 13 | # All configuration values have a default; values that are commented out 14 | # serve to show the default. 15 | 16 | import sys 17 | import os 18 | import shlex 19 | 20 | import sys 21 | 22 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "src")) 23 | 24 | extensions = [ 25 | 'sphinx.ext.autodoc', 26 | ] 27 | 28 | # Add any paths that contain templates here, relative to this directory. 29 | templates_path = ['_templates'] 30 | 31 | # The suffix(es) of source filenames. 32 | # You can specify multiple suffix as a list of string: 33 | # source_suffix = ['.rst', '.md'] 34 | source_suffix = '.rst' 35 | 36 | # The encoding of source files. 37 | #source_encoding = 'utf-8-sig' 38 | 39 | # The master toctree document. 40 | master_doc = 'index' 41 | 42 | # General information about the project. 43 | project = 'Glassbox' 44 | copyright = '2015, David R. MacIver' 45 | author = 'David R. MacIver' 46 | 47 | # The version info for the project you're documenting, acts as replacement for 48 | # |version| and |release|, also used in various other places throughout the 49 | # built documents. 50 | # 51 | # The short X.Y version. 52 | version = '0.1' 53 | # The full version, including alpha/beta/rc tags. 54 | release = '0.1' 55 | 56 | # The language for content autogenerated by Sphinx. Refer to documentation 57 | # for a list of supported languages. 58 | # 59 | # This is also used if you do content translation via gettext catalogs. 60 | # Usually you set "language" from the command line for these cases. 61 | language = None 62 | 63 | # There are two options for replacing |today|: either, you set today to some 64 | # non-false value, then it is used: 65 | #today = '' 66 | # Else, today_fmt is used as the format for a strftime call. 67 | #today_fmt = '%B %d, %Y' 68 | 69 | # List of patterns, relative to source directory, that match files and 70 | # directories to ignore when looking for source files. 71 | exclude_patterns = [] 72 | 73 | # The reST default role (used for this markup: `text`) to use for all 74 | # documents. 75 | #default_role = None 76 | 77 | # If true, '()' will be appended to :func: etc. cross-reference text. 78 | #add_function_parentheses = True 79 | 80 | # If true, the current module name will be prepended to all description 81 | # unit titles (such as .. function::). 82 | #add_module_names = True 83 | 84 | # If true, sectionauthor and moduleauthor directives will be shown in the 85 | # output. They are ignored by default. 86 | #show_authors = False 87 | 88 | # The name of the Pygments (syntax highlighting) style to use. 89 | pygments_style = 'sphinx' 90 | 91 | # A list of ignored prefixes for module index sorting. 92 | #modindex_common_prefix = [] 93 | 94 | # If true, keep warnings as "system message" paragraphs in the built documents. 95 | #keep_warnings = False 96 | 97 | # If true, `todo` and `todoList` produce output, else they produce nothing. 98 | todo_include_todos = False 99 | 100 | 101 | # -- Options for HTML output ---------------------------------------------- 102 | 103 | # The theme to use for HTML and HTML Help pages. See the documentation for 104 | # a list of builtin themes. 105 | html_theme = 'alabaster' 106 | 107 | # Theme options are theme-specific and customize the look and feel of a theme 108 | # further. For a list of options available for each theme, see the 109 | # documentation. 110 | #html_theme_options = {} 111 | 112 | # Add any paths that contain custom themes here, relative to this directory. 113 | #html_theme_path = [] 114 | 115 | # The name for this set of Sphinx documents. If None, it defaults to 116 | # " v documentation". 117 | #html_title = None 118 | 119 | # A shorter title for the navigation bar. Default is the same as html_title. 120 | #html_short_title = None 121 | 122 | # The name of an image file (relative to this directory) to place at the top 123 | # of the sidebar. 124 | #html_logo = None 125 | 126 | # The name of an image file (within the static path) to use as favicon of the 127 | # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 128 | # pixels large. 129 | #html_favicon = None 130 | 131 | # Add any paths that contain custom static files (such as style sheets) here, 132 | # relative to this directory. They are copied after the builtin static files, 133 | # so a file named "default.css" will overwrite the builtin "default.css". 134 | html_static_path = ['_static'] 135 | 136 | # Add any extra paths that contain custom files (such as robots.txt or 137 | # .htaccess) here, relative to this directory. These files are copied 138 | # directly to the root of the documentation. 139 | #html_extra_path = [] 140 | 141 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, 142 | # using the given strftime format. 143 | #html_last_updated_fmt = '%b %d, %Y' 144 | 145 | # If true, SmartyPants will be used to convert quotes and dashes to 146 | # typographically correct entities. 147 | #html_use_smartypants = True 148 | 149 | # Custom sidebar templates, maps document names to template names. 150 | #html_sidebars = {} 151 | 152 | # Additional templates that should be rendered to pages, maps page names to 153 | # template names. 154 | #html_additional_pages = {} 155 | 156 | # If false, no module index is generated. 157 | #html_domain_indices = True 158 | 159 | # If false, no index is generated. 160 | #html_use_index = True 161 | 162 | # If true, the index is split into individual pages for each letter. 163 | #html_split_index = False 164 | 165 | # If true, links to the reST sources are added to the pages. 166 | #html_show_sourcelink = True 167 | 168 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. 169 | #html_show_sphinx = True 170 | 171 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. 172 | #html_show_copyright = True 173 | 174 | # If true, an OpenSearch description file will be output, and all pages will 175 | # contain a tag referring to it. The value of this option must be the 176 | # base URL from which the finished HTML is served. 177 | #html_use_opensearch = '' 178 | 179 | # This is the file name suffix for HTML files (e.g. ".xhtml"). 180 | #html_file_suffix = None 181 | 182 | # Language to be used for generating the HTML full-text search index. 183 | # Sphinx supports the following languages: 184 | # 'da', 'de', 'en', 'es', 'fi', 'fr', 'h', 'it', 'ja' 185 | # 'nl', 'no', 'pt', 'ro', 'r', 'sv', 'tr' 186 | #html_search_language = 'en' 187 | 188 | # A dictionary with options for the search language support, empty by default. 189 | # Now only 'ja' uses this config value 190 | #html_search_options = {'type': 'default'} 191 | 192 | # The name of a javascript file (relative to the configuration directory) that 193 | # implements a search results scorer. If empty, the default will be used. 194 | #html_search_scorer = 'scorer.js' 195 | 196 | # Output file base name for HTML help builder. 197 | htmlhelp_basename = 'Glassboxdoc' 198 | 199 | # -- Options for LaTeX output --------------------------------------------- 200 | 201 | latex_elements = { 202 | # The paper size ('letterpaper' or 'a4paper'). 203 | #'papersize': 'letterpaper', 204 | 205 | # The font size ('10pt', '11pt' or '12pt'). 206 | #'pointsize': '10pt', 207 | 208 | # Additional stuff for the LaTeX preamble. 209 | #'preamble': '', 210 | 211 | # Latex figure (float) alignment 212 | #'figure_align': 'htbp', 213 | } 214 | 215 | # Grouping the document tree into LaTeX files. List of tuples 216 | # (source start file, target name, title, 217 | # author, documentclass [howto, manual, or own class]). 218 | latex_documents = [ 219 | (master_doc, 'Glassbox.tex', 'Glassbox Documentation', 220 | 'David R. MacIver', 'manual'), 221 | ] 222 | 223 | # The name of an image file (relative to this directory) to place at the top of 224 | # the title page. 225 | #latex_logo = None 226 | 227 | # For "manual" documents, if this is true, then toplevel headings are parts, 228 | # not chapters. 229 | #latex_use_parts = False 230 | 231 | # If true, show page references after internal links. 232 | #latex_show_pagerefs = False 233 | 234 | # If true, show URL addresses after external links. 235 | #latex_show_urls = False 236 | 237 | # Documents to append as an appendix to all manuals. 238 | #latex_appendices = [] 239 | 240 | # If false, no module index is generated. 241 | #latex_domain_indices = True 242 | 243 | 244 | # -- Options for manual page output --------------------------------------- 245 | 246 | # One entry per manual page. List of tuples 247 | # (source start file, name, description, authors, manual section). 248 | man_pages = [ 249 | (master_doc, 'glassbox', 'Glassbox Documentation', 250 | [author], 1) 251 | ] 252 | 253 | # If true, show URL addresses after external links. 254 | #man_show_urls = False 255 | 256 | 257 | # -- Options for Texinfo output ------------------------------------------- 258 | 259 | # Grouping the document tree into Texinfo files. List of tuples 260 | # (source start file, target name, title, author, 261 | # dir menu entry, description, category) 262 | texinfo_documents = [ 263 | (master_doc, 'Glassbox', 'Glassbox Documentation', 264 | author, 'Glassbox', 'One line description of project.', 265 | 'Miscellaneous'), 266 | ] 267 | 268 | # Documents to append as an appendix to all manuals. 269 | #texinfo_appendices = [] 270 | 271 | # If false, no module index is generated. 272 | #texinfo_domain_indices = True 273 | 274 | # How to display URL addresses: 'footnote', 'no', or 'inline'. 275 | #texinfo_show_urls = 'footnote' 276 | 277 | # If true, do not generate a @detailmenu in the "Top" node's menu. 278 | #texinfo_no_detailmenu = False 279 | -------------------------------------------------------------------------------- /source/index.rst: -------------------------------------------------------------------------------- 1 | Welcome to Glassbox's documentation! 2 | ==================================== 3 | 4 | Glassbox is a small library for introspecting program state to detect novel 5 | program executions when running tests. It's mostly based on American Fuzzy Lop's 6 | branch detection algorithm. 7 | 8 | Its main interesting feature is that it is extremely fast. Programs running under 9 | Glassbox should generally not see more than 10-30% of a slowdown when running on 10 | CPython (on pypy there is currently a significantly more substantial slow down 11 | because tracing can prevent the JIT from working well). 12 | 13 | .. automodule:: glassbox 14 | :members: begin, collect, Record, NoveltyDetector 15 | 16 | 17 | The intended usage pattern is something along the lines of: 18 | 19 | .. code:: python 20 | 21 | def interesting_values(values, run_test): 22 | detector = NoveltyDetector() 23 | for value in my_values(): 24 | begin() 25 | run_test(value) 26 | record = collect() 27 | if detector.novel(record): 28 | yield value 29 | 30 | This takes a set of values and prunes it down to the subset which produced a behaviour 31 | not in the previously seen ones. 32 | 33 | You could also do something like this: 34 | 35 | .. code:: python 36 | 37 | def interesting_values(values, run_test): 38 | seen = {} 39 | for value in my_values(): 40 | begin() 41 | run_test(value) 42 | record = collect() 43 | for label in record.labels: 44 | if label not in seen or better(value, seen[label]): 45 | seen[label] = value 46 | return seen 47 | 48 | This maintains a current "best" value that exhibits each label. 49 | 50 | Note: It would not be a problem if run_test itself used the glassbox API. As long as 51 | begin/collect calls are kept balanced, it is perfectly safe to nest them. 52 | 53 | Warning: Glassbox is not currently thread safe. Your results will be very confusing 54 | if you try to use it in threaded code. In the long-term it will probably simply 55 | refuse to run on more than one thread, but right now it will just break weirdly. 56 | -------------------------------------------------------------------------------- /src/glassbox/__init__.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from glassbox.record import Record 3 | from glassbox.implementation import native, _collect, _begin 4 | from glassbox.novelty import NoveltyDetector 5 | 6 | __all__ = ['begin', 'collect', 'Record', 'native', 'NoveltyDetector'] 7 | 8 | prev_tracers = [] 9 | 10 | 11 | def begin(): 12 | """Start collecting data until a matching call to collect occurs""" 13 | prev_tracers.append(sys.gettrace()) 14 | sys.settrace(None) 15 | return _begin() 16 | 17 | 18 | def collect(): 19 | """Stop collecting data and return a Record containing the program 20 | execution since the matching begin call""" 21 | result = Record(_collect()) 22 | sys.settrace(prev_tracers.pop()) 23 | return result 24 | -------------------------------------------------------------------------------- /src/glassbox/compat.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | if sys.version_info[0] == 2: 4 | _range = xrange 5 | else: 6 | _range = range 7 | -------------------------------------------------------------------------------- /src/glassbox/extension.pyx: -------------------------------------------------------------------------------- 1 | import sys 2 | from array import array as arr 3 | from cpython cimport array 4 | from cpython.ref cimport PyObject 5 | 6 | cdef extern from "Python.h": 7 | 8 | # We make these an opaque types. If the user wants specific attributes, 9 | # they can be declared manually. 10 | 11 | ctypedef struct PyInterpreterState: 12 | pass 13 | 14 | ctypedef struct PyThreadState: 15 | pass 16 | 17 | ctypedef struct PyCodeObject: 18 | PyObject *co_filename; 19 | 20 | ctypedef struct PyFrameObject: 21 | PyCodeObject *f_code; 22 | int f_lineno; 23 | 24 | # This is not actually a struct, but make sure it can never be coerced to 25 | # an int or used in arithmetic expressions 26 | ctypedef struct PyGILState_STATE 27 | 28 | # The type of the trace function registered using PyEval_SetProfile() and 29 | # PyEval_SetTrace(). 30 | # Py_tracefunc return -1 when raising an exception, or 0 for success. 31 | ctypedef int (*Py_tracefunc)(PyObject *, PyFrameObject *, int, PyObject *) 32 | 33 | void PyEval_SetTrace(Py_tracefunc cfunc, object obj) 34 | 35 | cdef extern from "frameobject.h": 36 | pass 37 | 38 | cdef int STATE_SIZE = 2 ** 16 39 | cdef int STATE_MASK = STATE_SIZE - 1 40 | 41 | cdef unsigned int stack_height = 0; 42 | cdef list arrays = [] 43 | cdef array.array current_array = arr('I') 44 | array.resize(current_array, STATE_SIZE) 45 | cdef int prev_state = 0 46 | 47 | 48 | 49 | cdef push_array(): 50 | global current_array 51 | global stack_height 52 | stack_height += 1 53 | cdef array.array array_state = arr('I') 54 | array.resize(array_state, STATE_SIZE) 55 | array.zero(array_state) 56 | arrays.append(array_state) 57 | current_array = array_state 58 | current_array = array_state 59 | 60 | 61 | cdef int inthash(int a): 62 | a = (a ^ 61) ^ (a >> 16) 63 | a = a + (a << 3) 64 | a = a ^ (a >> 4) 65 | a = a * 0x27d4eb2d 66 | a = a ^ (a >> 15) 67 | return a 68 | 69 | 70 | cdef void record_state(object filename, int line): 71 | cdef int curr_state = hash(filename) * 3 + inthash(line) 72 | global prev_state 73 | cdef int transition = curr_state ^ prev_state 74 | # This tracer should never be active when we have an empty stack of 75 | # arrays but it seems sometimes CPython gets itself a bit confused and 76 | # does it anyway. This is a workaround to that problem. 77 | if stack_height == 0: 78 | return 79 | current_array.data.as_uints[transition & STATE_MASK] += 1 80 | prev_state = curr_state >> 1 81 | 82 | 83 | cdef int tracer( 84 | PyObject* _traceobj, PyFrameObject* _frame, int what, PyObject* arg 85 | ) except -1: 86 | record_state((_frame.f_code.co_filename), _frame.f_lineno) 87 | return 0 88 | 89 | 90 | class ProxyTracer(object): 91 | def __call__(self, frame, event, arg): 92 | if sys.gettrace() is self: 93 | record_state(frame.f_code.co_filename, frame.f_lineno) 94 | install_tracer() 95 | return self 96 | 97 | cdef object proxy_tracer = ProxyTracer() 98 | 99 | 100 | cdef void install_tracer(): 101 | PyEval_SetTrace(tracer, proxy_tracer) 102 | 103 | def _begin(): 104 | """Start tracking program state. 105 | 106 | If begin() has previously been called, any labels that occur during this 107 | execution will also be made visible to previous begin calls. 108 | """ 109 | global prev_state 110 | prev_state = 0 111 | push_array() 112 | install_tracer() 113 | 114 | 115 | def _collect(): 116 | """Return a set of string labels corresponding to events that have been 117 | seen since the last begin() call""" 118 | global stack_height 119 | global current_array 120 | assert stack_height > 0 121 | stack_height -= 1 122 | cdef array.array data = arrays.pop() 123 | assert len(arrays) == stack_height 124 | if stack_height > 0: 125 | current_array = arrays[-1] 126 | cdef array.array a 127 | for _a in arrays: 128 | a = _a 129 | for i in xrange(STATE_SIZE): 130 | a.data.as_uints[i] += data.data.as_uints[i] 131 | PyEval_SetTrace(NULL, None) 132 | return _labels(data) 133 | 134 | 135 | def _labels(_data): 136 | cdef array.array data = _data 137 | cdef array.array labels = arr('I') 138 | cdef unsigned int b 139 | cdef unsigned int a 140 | cdef unsigned int i 141 | cdef unsigned int count = 0 142 | cdef unsigned int datalen = len(data) 143 | for i in xrange(datalen): 144 | if data.data.as_uints[i] > 0: 145 | count += 1 146 | for i in xrange(datalen): 147 | b = data.data.as_uints[i] 148 | if b == 0: 149 | continue 150 | a = i << 4 151 | if b > 0: 152 | append_uint(labels, a + 1) 153 | if b > 1: 154 | append_uint(labels, a + 2) 155 | if b > 2: 156 | append_uint(labels, a + 3) 157 | if b > 3: 158 | append_uint(labels, a + 4) 159 | if b > 4: 160 | append_uint(labels, a + 5) 161 | if b > 8: 162 | append_uint(labels, a + 6) 163 | if b > 16: 164 | append_uint(labels, a + 7) 165 | if b > 32: 166 | append_uint(labels, a + 8) 167 | if b > 64: 168 | append_uint(labels, a + 9) 169 | if b > 128: 170 | append_uint(labels, a + 10) 171 | return labels 172 | 173 | 174 | cdef append_uint(array.array x, unsigned int i): 175 | x.append(i) 176 | 177 | 178 | def merge_arrays(_x, _y): 179 | cdef array.array x = _x 180 | cdef array.array y = _y 181 | cdef array.array result = arr('I') 182 | cdef unsigned int xi = 0 183 | cdef unsigned int yi = 0 184 | cdef unsigned int lx = len(x) 185 | cdef unsigned int ly = len(y) 186 | cdef unsigned int xv 187 | cdef unsigned int yv 188 | while xi < lx and yi < ly: 189 | xv = x.data.as_uints[xi] 190 | yv = y.data.as_uints[yi] 191 | if xv < yv: 192 | append_uint(result, xv) 193 | xi += 1 194 | elif xv > yv: 195 | append_uint(result, yv) 196 | yi += 1 197 | else: 198 | append_uint(result, xv) 199 | xi += 1 200 | yi += 1 201 | while xi < lx: 202 | append_uint(result, x.data.as_uints[xi]) 203 | xi += 1 204 | while yi < ly: 205 | append_uint(result, y.data.as_uints[yi]) 206 | yi += 1 207 | return result 208 | 209 | cdef object _array_contained(array.array x, array.array y): 210 | cdef unsigned int lx = len(x) 211 | cdef unsigned int ly = len(y) 212 | if lx > ly: 213 | return False 214 | if lx == 0: 215 | return True 216 | if x.data.as_uints[0] < y.data.as_uints[0]: 217 | return False 218 | if x.data.as_uints[lx - 1] > y.data.as_uints[ly - 1]: 219 | return False 220 | cdef unsigned int probe = 0 221 | cdef unsigned int v 222 | cdef unsigned int o 223 | cdef unsigned int lo 224 | cdef unsigned int hi 225 | cdef unsigned int mid 226 | cdef unsigned int i 227 | cdef unsigned int k 228 | for k in xrange(lx): 229 | v = x.data.as_uints[k] 230 | o = y.data.as_uints[probe] 231 | if v == o: 232 | probe += 1 233 | continue 234 | elif v < o: 235 | return False 236 | 237 | lo = probe 238 | i = 0 239 | while True: 240 | hi = probe + 2 ** i 241 | i += 1 242 | if hi >= ly: 243 | hi = ly - 1 244 | break 245 | if y.data.as_uints[hi] >= v: 246 | break 247 | else: 248 | lo = hi 249 | # Invariant: y[lo] < v <= y[hi] 250 | while lo + 1 < hi: 251 | mid = (lo + hi) // 2 252 | o = y.data.as_uints[mid] 253 | if v <= o: 254 | hi = mid 255 | else: 256 | lo = mid 257 | if v == y.data.as_uints[hi]: 258 | probe = hi + 1 259 | continue 260 | else: 261 | return False 262 | return True 263 | 264 | def array_contained(x, y): 265 | return _array_contained(x, y) 266 | 267 | def merge_into(_x, _y, _scratch): 268 | del _scratch[:] 269 | cdef array.array x = _x; 270 | cdef array.array y = _y; 271 | cdef array.array scratch = _scratch; 272 | cdef unsigned int xi = 0 273 | cdef unsigned int yi = 0 274 | cdef unsigned int lx = len(x) 275 | cdef unsigned int ly = len(y) 276 | cdef unsigned int xv; 277 | cdef unsigned int yv; 278 | while xi < lx and yi < ly: 279 | xv = x.data.as_uints[xi] 280 | yv = y.data.as_uints[yi] 281 | if xv < yv: 282 | append_uint(scratch, xv) 283 | xi += 1 284 | elif xv > yv: 285 | append_uint(scratch, yv) 286 | yi += 1 287 | else: 288 | append_uint(scratch, xv) 289 | xi += 1 290 | yi += 1 291 | while xi < lx: 292 | append_uint(scratch, x.data.as_uints[xi]) 293 | xi += 1 294 | while yi < len(y): 295 | append_uint(scratch, y.data.as_uints[yi]) 296 | yi += 1 297 | -------------------------------------------------------------------------------- /src/glassbox/implementation.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | __all__ = ['native', '_begin', '_collect', 'merge_arrays', 'array_contained'] 4 | 5 | native = False 6 | 7 | if os.getenv('GLASSBOX_FORCE_PURE') != 'true': 8 | try: 9 | from glassbox.extension import _begin, _collect, merge_arrays, \ 10 | array_contained, merge_into 11 | native = True 12 | except ImportError: 13 | pass 14 | 15 | if not native: 16 | from glassbox.pure import ( # noqa 17 | _begin, _collect, merge_arrays, array_contained, merge_into) 18 | -------------------------------------------------------------------------------- /src/glassbox/novelty.py: -------------------------------------------------------------------------------- 1 | from array import array 2 | from glassbox.implementation import merge_into, array_contained 3 | 4 | 5 | class NoveltyDetector(object): 6 | """ 7 | A NoveltyDetector is used to test when a Record exhibits behaviour not 8 | previously seen. 9 | """ 10 | 11 | def __init__(self): 12 | self.data = array('I') 13 | self.scratch = array('I') 14 | 15 | def novel(self, record): 16 | """Return True if this record exhibits some behaviour that no previous 17 | record passed in to novel has shown""" 18 | if array_contained(record.labels, self.data): 19 | return False 20 | self.merge_record(record) 21 | return True 22 | 23 | def merge_record(self, record): 24 | merge_into(self.data, record.labels, self.scratch) 25 | assert len(self.scratch) > len(self.data) 26 | self.data, self.scratch = self.scratch, self.data 27 | -------------------------------------------------------------------------------- /src/glassbox/pure.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from array import array as arr 3 | from glassbox.compat import _range 4 | 5 | 6 | arrays = [] 7 | prev_state = 0 8 | 9 | 10 | STATE_SIZE = 2 ** 16 11 | STATE_MASK = STATE_SIZE - 1 12 | 13 | array_template = None 14 | current_array = None 15 | 16 | def push_array(): 17 | global array_template, current_array 18 | if array_template is None: 19 | array_template = arr('I') 20 | array_template.append(0) 21 | while len(array_template) < STATE_SIZE: 22 | array_template.extend(array_template) 23 | assert len(array_template) == STATE_SIZE 24 | result = arr('I', array_template) 25 | current_array = result 26 | arrays.append(result) 27 | 28 | 29 | def inthash(a): 30 | a = (a ^ 61) ^ (a >> 16) 31 | a = a + (a << 3) 32 | a = a ^ (a >> 4) 33 | a = a * 0x27d4eb2d 34 | a = a ^ (a >> 15) 35 | return a 36 | 37 | 38 | def tracer(frame, event, arg): 39 | global prev_state 40 | filename = frame.f_code.co_filename 41 | line = frame.f_lineno 42 | curr_state = hash(filename) * 3 + inthash(line) 43 | transition = curr_state ^ prev_state 44 | current_array[transition & STATE_MASK] += 1 45 | prev_state = curr_state >> 1 46 | return tracer 47 | 48 | 49 | def _begin(): 50 | """Start tracking program state. 51 | 52 | If begin() has previously been called, any labels that occur during this 53 | execution will also be made visible to previous begin calls. 54 | """ 55 | sys.settrace(None) 56 | assert sys.gettrace() is None 57 | global prev_state 58 | prev_state = 0 59 | push_array() 60 | sys.settrace(tracer) 61 | assert sys.gettrace() is tracer 62 | 63 | 64 | def _collect(): 65 | """Return a set of string labels corresponding to events that have been 66 | seen since the last begin() call""" 67 | global current_array 68 | t = sys.gettrace() 69 | assert t is tracer, t 70 | sys.settrace(None) 71 | assert sys.gettrace() is None 72 | data = arrays.pop() 73 | if arrays: 74 | current_array = arrays[-1] 75 | for a in arrays: 76 | for i in range(len(data)): 77 | a[i] += data[i] 78 | return _labels(data) 79 | 80 | 81 | def label(a, b): 82 | if b > 4: 83 | if b <= 8: 84 | b = 5 85 | elif b <= 16: 86 | b = 6 87 | elif b <= 32: 88 | b = 7 89 | elif b <= 128: 90 | b = 8 91 | else: 92 | b = 9 93 | return (a << 4) + b 94 | 95 | 96 | def _labels(data): 97 | orig = sys.gettrace() 98 | sys.settrace(None) 99 | try: 100 | labels = arr('I') 101 | for i in _range(len(data)): 102 | a = i << 4 103 | b = data[i] 104 | if b > 0: 105 | labels.append(a + 1) 106 | if b > 1: 107 | labels.append(a + 2) 108 | if b > 2: 109 | labels.append(a + 3) 110 | if b > 3: 111 | labels.append(a + 4) 112 | if b > 4: 113 | labels.append(a + 5) 114 | if b > 8: 115 | labels.append(a + 6) 116 | if b > 16: 117 | labels.append(a + 7) 118 | if b > 32: 119 | labels.append(a + 8) 120 | if b > 64: 121 | labels.append(a + 9) 122 | if b > 128: 123 | labels.append(a + 10) 124 | return labels 125 | finally: 126 | sys.settrace(orig) 127 | 128 | 129 | def merge_arrays(x, y): 130 | result = arr('I') 131 | xi = 0 132 | yi = 0 133 | while xi < len(x) and yi < len(y): 134 | xv = x[xi] 135 | yv = y[yi] 136 | if xv < yv: 137 | result.append(xv) 138 | xi += 1 139 | elif xv > yv: 140 | result.append(yv) 141 | yi += 1 142 | else: 143 | result.append(xv) 144 | xi += 1 145 | yi += 1 146 | while xi < len(x): 147 | result.append(x[xi]) 148 | xi += 1 149 | while yi < len(y): 150 | result.append(y[yi]) 151 | yi += 1 152 | return result 153 | 154 | 155 | def array_contained(x, y): 156 | if len(x) > len(y): 157 | return False 158 | if not x: 159 | return True 160 | assert y 161 | if x[0] < y[0]: 162 | return False 163 | if x[-1] > y[-1]: 164 | return False 165 | probe = 0 166 | for v in x: 167 | o = y[probe] 168 | if v == o: 169 | probe += 1 170 | continue 171 | elif v < o: 172 | return False 173 | assert v > o 174 | 175 | lo = probe 176 | i = 0 177 | while True: 178 | hi = probe + 2 ** i 179 | i += 1 180 | if hi >= len(y): 181 | hi = len(y) - 1 182 | break 183 | if y[hi] >= v: 184 | break 185 | else: 186 | lo = hi 187 | # Invariant: y[lo] < v <= y[hi] 188 | while lo + 1 < hi: 189 | mid = (lo + hi) // 2 190 | o = y[mid] 191 | if v <= o: 192 | hi = mid 193 | else: 194 | lo = mid 195 | if v == y[hi]: 196 | probe = hi + 1 197 | continue 198 | else: 199 | return False 200 | return True 201 | 202 | 203 | def merge_into(x, y, scratch): 204 | del scratch[:] 205 | xi = 0 206 | yi = 0 207 | while xi < len(x) and yi < len(y): 208 | xv = x[xi] 209 | yv = y[yi] 210 | if xv < yv: 211 | scratch.append(xv) 212 | xi += 1 213 | elif xv > yv: 214 | scratch.append(yv) 215 | yi += 1 216 | else: 217 | scratch.append(xv) 218 | xi += 1 219 | yi += 1 220 | while xi < len(x): 221 | scratch.append(x[xi]) 222 | xi += 1 223 | while yi < len(y): 224 | scratch.append(y[yi]) 225 | yi += 1 226 | -------------------------------------------------------------------------------- /src/glassbox/record.py: -------------------------------------------------------------------------------- 1 | from glassbox.implementation import merge_arrays, array_contained 2 | 3 | 4 | class Record(object): 5 | """A record is a structured representation of a program's execution path. 6 | 7 | A record has a set of labels, which may be accessed as record.labels and 8 | are a sorted array of unsigned 32-bit integers. Each one corresponds to 9 | some interesting observed behaviour. 10 | """ 11 | def __init__(self, labels): 12 | self.labels = labels 13 | 14 | def __repr__(self): 15 | return "Record(%r)" % (list(self.labels),) 16 | 17 | def __eq__(self, other): 18 | return isinstance(other, Record) and ( 19 | self.labels == other.labels 20 | ) 21 | 22 | def __ne__(self, other): 23 | return not self.__eq__(other) 24 | 25 | def __hash__(self): 26 | if not self.labels: 27 | return 0 28 | return hash(( 29 | self.labels[0], self.labels[-1], 30 | self.labels[len(self.labels) // 2], 31 | len(self.labels), 32 | )) 33 | 34 | def contained_in(self, other): 35 | """Return True if every behaviour observed by this record is also 36 | observed in the other""" 37 | 38 | return array_contained(self.labels, other.labels) 39 | 40 | def __or__(self, other): 41 | if not isinstance(other, Record): 42 | raise TypeError("Cannot union Record with %r of type %s" % ( 43 | other, other.__name__ 44 | )) 45 | 46 | if len(other.labels) > len(self.labels): 47 | self, other = other, self 48 | if other.contained_in(self): 49 | return self 50 | 51 | return Record( 52 | merge_arrays(self.labels, other.labels) 53 | ) 54 | -------------------------------------------------------------------------------- /tests/test_implementation.py: -------------------------------------------------------------------------------- 1 | from glassbox.implementation import merge_arrays 2 | from array import array 3 | from hypothesis import given 4 | import hypothesis.strategies as st 5 | 6 | deduped_arrays = st.lists( 7 | st.integers(0, 2 ** 32 - 1), unique=True).map( 8 | lambda x: array('I', sorted(x))) 9 | 10 | 11 | @given(deduped_arrays, deduped_arrays) 12 | def test_merges_correctly(x, y): 13 | t = merge_arrays(x, y) 14 | assert list(t) == sorted(set(x) | set(y)) 15 | -------------------------------------------------------------------------------- /tests/test_records.py: -------------------------------------------------------------------------------- 1 | from glassbox import Record 2 | import hypothesis.strategies as st 3 | from array import array 4 | from hypothesis import given, assume 5 | from glassbox.novelty import NoveltyDetector 6 | 7 | 8 | def build_record(labels): 9 | labels = sorted(set(labels)) 10 | return Record(array('I', labels)) 11 | 12 | 13 | Records = st.builds(build_record, st.lists(st.integers(0, 2 ** 32 - 1))) 14 | 15 | @given(Records) 16 | def test_record_contained_in_self(x): 17 | assert x.contained_in(x) 18 | 19 | 20 | @given(st.lists(Records)) 21 | def test_records_are_hashable(xs): 22 | d = {} 23 | for i, x in enumerate(xs): 24 | d[x] = i 25 | for i, x in enumerate(xs): 26 | assert d[x] >= i 27 | 28 | 29 | @given(Records, Records) 30 | def test_union_produces_larger(x, y): 31 | z = x | y 32 | assert x.contained_in(z) 33 | assert y.contained_in(z) 34 | if y.contained_in(x): 35 | assert z == x 36 | if x.contained_in(y): 37 | assert z == y 38 | assert set(z.labels) == set(x.labels) | set(y.labels) 39 | 40 | 41 | @given(st.lists(Records)) 42 | def test_novelty_is_containment_in_union(ls): 43 | u = Record(array('I')) 44 | detector = NoveltyDetector() 45 | for l in ls: 46 | assert l.contained_in(u) == (not detector.novel(l)) 47 | u |= l 48 | 49 | -------------------------------------------------------------------------------- /tests/test_tracking.py: -------------------------------------------------------------------------------- 1 | import os 2 | from glassbox import begin, collect 3 | import sys 4 | import pytest 5 | import platform 6 | 7 | 8 | @pytest.fixture(autouse=True, scope='function') 9 | def needs_cleanup(request): 10 | original_trace = sys.gettrace() 11 | 12 | def cleanup(): 13 | sys.settrace(original_trace) 14 | request.addfinalizer(cleanup) 15 | 16 | counter = 0 17 | 18 | 19 | def countingtrace(frame, event, arg): 20 | global counter 21 | counter += 1 22 | return countingtrace 23 | 24 | 25 | def run_for_labels(f, *args): 26 | begin() 27 | f(*args) 28 | return collect() 29 | 30 | 31 | def run_multiple_for_labels(n, f, *args): 32 | return [ 33 | run_for_labels(f, *args) for _ in range(n) 34 | ] 35 | 36 | 37 | def onebranch(x): 38 | if x: 39 | return 1 40 | 41 | 42 | def test_unsets_trace(): 43 | orig = sys.gettrace() 44 | begin() 45 | collect() 46 | assert sys.gettrace() == orig 47 | 48 | 49 | def test_is_stable(): 50 | assert run_for_labels(onebranch, False) == run_for_labels(onebranch, False) 51 | assert run_for_labels(onebranch, True) == run_for_labels(onebranch, True) 52 | 53 | 54 | def test_distinct_reprs(): 55 | assert repr(run_for_labels(onebranch, False)) == repr( 56 | run_for_labels(onebranch, False)) 57 | assert repr(run_for_labels(onebranch, False)) != repr( 58 | run_for_labels(onebranch, True)) 59 | 60 | 61 | def test_detects_branches(): 62 | assert run_for_labels(onebranch, False) != run_for_labels(onebranch, True) 63 | 64 | 65 | testfns = [test_unsets_trace, test_is_stable, test_detects_branches] 66 | 67 | 68 | @pytest.mark.parametrize('f', testfns, ids=[f.__name__ for f in testfns]) 69 | def test_can_be_nested(f): 70 | begin() 71 | f() 72 | collect() 73 | 74 | 75 | @pytest.mark.parametrize('f', testfns, ids=[f.__name__ for f in testfns]) 76 | def test_can_be_nested_arbitrarily(f): 77 | begin() 78 | begin() 79 | f() 80 | collect() 81 | collect() 82 | 83 | 84 | def test_subsumes_child_labels(): 85 | begin() 86 | a = run_for_labels(onebranch, False) 87 | b = run_for_labels(onebranch, True) 88 | assert a != b 89 | c = collect() 90 | assert a.contained_in(c) 91 | assert b.contained_in(c) 92 | 93 | 94 | def twobranch(x, y): 95 | if x: 96 | if y: 97 | return 1 98 | else: 99 | return 2 100 | elif y: 101 | return 3 102 | else: 103 | return 4 104 | 105 | 106 | def test_containment(): 107 | bits = [ 108 | run_for_labels(twobranch, u, v) 109 | for u in [False, True] 110 | for v in [False, True] 111 | ] 112 | 113 | for x in bits: 114 | for y in bits: 115 | if x is y: 116 | assert x.contained_in(y) 117 | else: 118 | assert not x.contained_in(y) 119 | 120 | 121 | def loopy(n): 122 | for i in range(n): 123 | pass 124 | 125 | 126 | def test_can_distinguish_number_of_times_through_a_loop(): 127 | loops = [ 128 | run_for_labels(loopy, 0), 129 | run_for_labels(loopy, 1), 130 | run_for_labels(loopy, 2), 131 | run_for_labels(loopy, 3), 132 | run_for_labels(loopy, 10), 133 | run_for_labels(loopy, 50), 134 | run_for_labels(loopy, 100), 135 | ] 136 | for i in range(len(loops) - 1): 137 | print(i) 138 | assert loops[i] != loops[i+1] 139 | assert not loops[i + 1].contained_in(loops[i]) 140 | 141 | 142 | def test_can_always_build_native_in_test_env(): 143 | pure_forced = ( 144 | os.getenv('GLASSBOX_FORCE_PURE') == 'true' or 145 | platform.python_implementation() != 'CPython' 146 | ) 147 | from glassbox import native 148 | assert native == (not pure_forced) 149 | 150 | 151 | def resetthenbranch(x): 152 | sys.settrace(sys.gettrace()) 153 | return onebranch(x) 154 | 155 | 156 | def test_suspending_and_resuming_coverage_does_not_break_tracking(): 157 | assert run_for_labels(resetthenbranch, False) != \ 158 | run_for_labels(resetthenbranch, True) 159 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist = {py26,py27,py33,py34,py35,pypy}-{coverage,nocoverage,benchmark}-{cython,nocython,pure} 3 | skipsdist = True 4 | 5 | [testenv] 6 | basepython = 7 | py26: python2.6 8 | py27: python2.7 9 | py33: python3.3 10 | py34: python3.4 11 | py35: python3.5 12 | pypy: pypy 13 | whitelist_externals=rm 14 | deps = 15 | pytest 16 | pytest-benchmark 17 | hypothesis 18 | coverage: coverage 19 | cython: cython 20 | nocython: cython 21 | benchmark: chardet 22 | setenv= 23 | pure: GLASSBOX_FORCE_PURE=true 24 | commands = 25 | cython: python -m pip.__main__ install cython 26 | nocython: python -m pip.__main__ install cython 27 | cython: rm -f src/glassbox/extension.c 28 | nocython: python setup.py sdist 29 | rm -rf dist/ 30 | nocython: python -m pip.__main__ uninstall -y cython 31 | 32 | python setup.py install 33 | coverage: python -m coverage.__main__ run -m pytest tests/test_tracking.py {posargs} 34 | nocoverage: python -m pytest tests {posargs} 35 | benchmark: python -m pytest bench --benchmark-warmup {posargs} 36 | 37 | --------------------------------------------------------------------------------