├── .gitignore ├── .travis.yml ├── CONTRIBUTING.md ├── HACKING ├── LICENSE.txt ├── MANIFEST.in ├── Makefile ├── README.md ├── check.sh ├── conda ├── meta.yaml └── upload.sh ├── doc ├── api.rst ├── bayeslite.1 ├── bql.rst ├── conf.py ├── index.rst └── internals.rst ├── docker └── ubuntu1604 ├── external ├── README ├── lemonade │ ├── COPYING │ ├── README │ └── dist │ │ ├── MANIFEST.in │ │ ├── PKG-INFO │ │ ├── README │ │ ├── bin │ │ └── lemonade │ │ ├── examples │ │ └── calc │ │ │ ├── calc.py │ │ │ └── gram.y │ │ ├── lemonade │ │ ├── __init__.py │ │ ├── action.py │ │ ├── acttab.py │ │ ├── build.py │ │ ├── ccruft.py │ │ ├── configlist.py │ │ ├── error.py │ │ ├── exceptions.py │ │ ├── lempar.tmpl │ │ ├── main.py │ │ ├── msort.py │ │ ├── parse.py │ │ ├── plink.py │ │ ├── report.py │ │ ├── set.py │ │ ├── struct.py │ │ └── table.py │ │ └── setup.py ├── plex │ ├── COPYING │ ├── README │ ├── dist │ │ ├── Makefile │ │ ├── Plex │ │ │ ├── Actions.py │ │ │ ├── DFA.py │ │ │ ├── Errors.py │ │ │ ├── Lexicons.py │ │ │ ├── Machines.py │ │ │ ├── Regexps.py │ │ │ ├── Scanners.py │ │ │ ├── Timing.py │ │ │ ├── Traditional.py │ │ │ ├── Transitions.py │ │ │ └── __init__.py │ │ ├── README │ │ ├── TODO │ │ ├── doc │ │ │ ├── Reference.html │ │ │ ├── Tutorial.html │ │ │ └── index.html │ │ ├── examples │ │ │ ├── example1and2.in │ │ │ ├── example1and2.py │ │ │ ├── example3.in │ │ │ ├── example3.py │ │ │ ├── example4.in │ │ │ ├── example4.py │ │ │ ├── example5.in │ │ │ ├── example5.py │ │ │ ├── example6.in │ │ │ ├── example6.py │ │ │ ├── example7.in │ │ │ ├── example7.py │ │ │ ├── pascal.in │ │ │ ├── pascal.py │ │ │ ├── python.in │ │ │ ├── python.py │ │ │ ├── speedtest.in │ │ │ └── speedtest.py │ │ └── tests │ │ │ ├── Test.py │ │ │ ├── runtests.py │ │ │ ├── test0.in │ │ │ ├── test0.out │ │ │ ├── test0.py │ │ │ ├── test1.in │ │ │ ├── test1.out │ │ │ ├── test1.py │ │ │ ├── test10.out │ │ │ ├── test10.py │ │ │ ├── test11.in │ │ │ ├── test11.out │ │ │ ├── test11.py │ │ │ ├── test12.in │ │ │ ├── test12.out │ │ │ ├── test12.py │ │ │ ├── test2.in │ │ │ ├── test2.out │ │ │ ├── test2.py │ │ │ ├── test3.in │ │ │ ├── test3.out │ │ │ ├── test3.py │ │ │ ├── test4.in │ │ │ ├── test4.out │ │ │ ├── test4.py │ │ │ ├── test5.in │ │ │ ├── test5.out │ │ │ ├── test5.py │ │ │ ├── test6.in │ │ │ ├── test6.out │ │ │ ├── test6.py │ │ │ ├── test7.in │ │ │ ├── test7.out │ │ │ ├── test7.py │ │ │ ├── test8.in │ │ │ ├── test8.out │ │ │ ├── test8.py │ │ │ ├── test9.in │ │ │ ├── test9.out │ │ │ └── test9.py │ └── prepare.sh └── weakprng │ ├── COPYING │ ├── README │ ├── dist │ ├── __init__.py │ ├── chacha.py │ └── weakprng.py │ └── prepare.sh ├── pythenv.sh ├── setup.py ├── shell ├── scripts │ └── bayeslite ├── src │ ├── README.md │ ├── __init__.py │ ├── core.py │ ├── hook.py │ ├── main.py │ └── pretty.py └── tests │ ├── test_pretty.py │ ├── test_shell.py │ └── thooks.py ├── src ├── __init__.py ├── ast.py ├── backend.py ├── backends │ ├── __init__.py │ ├── cgpm_alter │ │ ├── __init__.py │ │ ├── alterations.py │ │ ├── grammar.y │ │ └── parse.py │ ├── cgpm_analyze │ │ ├── __init__.py │ │ ├── grammar.y │ │ └── parse.py │ ├── cgpm_backend.py │ ├── cgpm_schema │ │ ├── __init__.py │ │ ├── grammar.y │ │ └── parse.py │ ├── iid_gaussian.py │ ├── loom_backend.py │ ├── nig_normal.py │ └── troll_rng.py ├── bayesdb.py ├── bql.py ├── bqlfn.py ├── bqlmath.py ├── bqlvtab.py ├── compiler.py ├── core.py ├── exception.py ├── grammar.y ├── guess.py ├── macro.py ├── math_util.py ├── nullify.py ├── parse.py ├── quote.py ├── read_csv.py ├── read_pandas.py ├── regress.py ├── scan.py ├── schema.py ├── simulate.py ├── sqlite3_util.py ├── stats.py ├── txn.py └── util.py └── tests ├── __init__.py ├── dha.csv ├── dha_codebook.csv ├── kl.py ├── satellites.csv ├── stochastic.py ├── test_approxest.py ├── test_backends.py ├── test_bql.py ├── test_bqlmath.py ├── test_case.py ├── test_cgpm.py ├── test_cgpm_alter.py ├── test_cgpm_analysis.py ├── test_cgpm_engine_cache.py ├── test_cgpm_loom.py ├── test_cmi.py ├── test_condprob.py ├── test_core.py ├── test_correlation.py ├── test_csv.py ├── test_error_bql.py ├── test_guess.py ├── test_infer_hypothetical.py ├── test_kl.py ├── test_loom_backend.py ├── test_loom_simulate_bivariate_gaussian.py ├── test_macro.py ├── test_math_util.py ├── test_nig_normal.py ├── test_nullify.py ├── test_parse.py ├── test_parse_cgpm_alter.py ├── test_parse_cgpm_analyze.py ├── test_read_csv.py ├── test_read_pandas.py ├── test_regress.py ├── test_schema.py ├── test_simulate.py ├── test_stats.py ├── test_stochastic.py ├── test_subsample.py ├── test_threshold.py ├── test_util.py ├── test_vscgpm.py ├── test_vtab.py └── threshold.py /.gitignore: -------------------------------------------------------------------------------- 1 | /bayeslite.egg-info/ 2 | /build/ 3 | /dist/ 4 | /external/lemonade/dist/lemonade/*.pyc 5 | /shell/tests/thooks.pyc 6 | /src/grammar.out 7 | /src/grammar.py 8 | /src/grammar.sha256 9 | /src/grammar.sha256.tmp 10 | /src/backends/cgpm_alter/grammar.out 11 | /src/backends/cgpm_alter/grammar.py 12 | /src/backends/cgpm_alter/grammar.sha256 13 | /src/backends/cgpm_alter/grammar.sha256.tmp 14 | /src/backends/cgpm_analyze/grammar.out 15 | /src/backends/cgpm_analyze/grammar.py 16 | /src/backends/cgpm_analyze/grammar.sha256 17 | /src/backends/cgpm_analyze/grammar.sha256.tmp 18 | /src/backends/cgpm_schema/grammar.out 19 | /src/backends/cgpm_schema/grammar.py 20 | /src/backends/cgpm_schema/grammar.sha256 21 | /src/backends/cgpm_schema/grammar.sha256.tmp 22 | /src/version.py 23 | __pycache__/ 24 | .cache/ 25 | .eggs 26 | 27 | *.pyc 28 | *.swp 29 | .pytest_cache/ 30 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | dist: trusty 3 | env: 4 | global: 5 | - PACKAGE_NAME=bayeslite 6 | # get all the branches referencing this commit 7 | - REAL_BRANCH=$(git ls-remote origin | sed -n "\|$TRAVIS_COMMIT\s\+refs/heads/|{s///p}") 8 | 9 | python: 10 | - 2.7 11 | install: 12 | - wget https://repo.continuum.io/miniconda/Miniconda2-latest-Linux-x86_64.sh -O miniconda.sh 13 | - bash miniconda.sh -b -p $HOME/miniconda 14 | - export PATH="$HOME/miniconda/bin:$PATH" 15 | - hash -r 16 | - conda config --set always_yes yes --set changeps1 no 17 | - conda install -q conda=4.6.14 conda-build 18 | script: 19 | - export CONDA_PACKAGE_VERSION="${TRAVIS_TAG:-$(date +%Y.%m.%d)}" 20 | # remove leading v from tags if they exist 21 | - CONDA_PACKAGE_VERSION="$(sed s/^v// <<<$CONDA_PACKAGE_VERSION)" 22 | # use "edge" channel (latest master) for testing with cgpm/crosscat 23 | - conda build . -c probcomp/label/edge -c probcomp -c cidermole -c fritzo -c ursusest -c anaconda 24 | after_success: 25 | - bash conda/upload.sh 26 | -------------------------------------------------------------------------------- /HACKING: -------------------------------------------------------------------------------- 1 | Guidelines for writing bayeslite software 2 | 3 | This working document contains guidelines for how to develop against 4 | the bayeslite API. 5 | 6 | * SQL/BQL parameters 7 | 8 | Use SQL/BQL parameters to pass strings and other values into SQL/BQL. 9 | DO NOT use format strings. 10 | 11 | DO: cursor.execute('UPDATE foo SET x = ? WHERE id = ?', (x, id)) 12 | DON'T: cursor.execute("UPDATE foo SET x = '%s' WHERE id = %d" % (x, id)) 13 | DON'T: cursor.execute("UPDATE foo SET x = '{}' WHERE id = {}".format(x, id)) 14 | 15 | DO: cursor.execute('SELECT x, y FROM t WHERE z = ?', (z,)) 16 | DON'T: cursor.execute('SELECT x, y FROM t WHERE z = ?', z) 17 | DON'T: cursor.execute('SELECT x, y FROM t WHERE z = {}'.format(z)) 18 | 19 | Prefer named parameters if the query has more than one parameter and 20 | covers multiple lines: 21 | 22 | cursor = db.cursor().execute(''' 23 | SELECT COUNT(*) 24 | FROM bayesdb_generator AS g, bayesdb_column AS c 25 | WHERE g.id = :generator_id 26 | AND g.tabname = c.tabname 27 | AND c.colno = :colno 28 | ''', { 29 | 'generator_id': generator_id, 30 | 'colno': colno, 31 | }) 32 | 33 | If the tables and columns in the query are determined dynamically, 34 | then use bql_quote_name and format strings to assemble SQL/BQL 35 | queries. But prefer to avoid this by writing different queries or 36 | reusing subroutines that already do it, such as in bayeslite.core. 37 | 38 | DO: from bayeslite import bql_quote_name 39 | qt = bql_quote_name(table) 40 | qc = bql_quote_name(column) 41 | cursor.execute('SELECT %s FROM %s WHERE x = ?' % (qc, qt), (x,)) 42 | 43 | DON'T: cursor.execute('SELECT %s FROM %s WHERE x = ?' % (column, table), (x,)) 44 | DON'T: cursor.execute('SELECT %s FROM %s WHERE x = %d' % (qc, qt, x)) 45 | 46 | * SQL updates 47 | 48 | When issuing an UPDATE command to sqlite3, if you can count the number 49 | of rows it should affect, do so and assert that it affected that many 50 | rows: 51 | 52 | total_changes = bdb._sqlite3.totalchanges() 53 | bdb.sql_execute('UPDATE ...', (...)) 54 | assert bdb._sqlite3.totalchanges() - total_changes == 1 55 | 56 | * Randomization 57 | 58 | Avoid indiscriminate nondeterminism. 59 | 60 | All random choices should be made from PRNGs with seeds that the user 61 | can control, via the normal Python API and the bayeslite shell. Any 62 | actual nondeterminism should be clearly labelled as such, e.g. a 63 | future shell command to choose a seed from /dev/urandom. 64 | 65 | To write nondeterministic tests that explore an intentionally 66 | unpredictable source of inputs, instead of testing exactly the same 67 | input every time, write a deterministic function of a 32-byte seed and 68 | use the @stochastic decorator to vary it: 69 | 70 | from stochastic import stochastic 71 | 72 | @stochastic(max_runs=4, min_passes=2) 73 | def test_quagga(seed): 74 | frobnicate(seed) 75 | 76 | This defines test_quagga to be a function that accepts an *optional* 77 | seed argument. If you call it with zero arguments, then it will call 78 | frobnicate up to four times, and if it does not pass twice, it will 79 | raise a StochasticError that includes (a) the last exception with 80 | which frobnicate failed and (b) the last seed with which frobnicate 81 | failed. 82 | 83 | You can then retry using exactly the same seed by calling test_quagga 84 | manually with the seed as its argument: 85 | 86 | >>> test_quagga() 87 | StochasticError: [seed 434529bf3e5a16930701b55c39a90acfcd115ba0cada99f5af5448f3b96923dd] 88 | ZigException: something set us up the bomb 89 | >>> test_quagga('434529bf3e5a16930701b55c39a90acfcd115ba0cada99f5af5448f3b96923dd'.decode('hex')) 90 | ZigException: something set us up the bomb 91 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: default-target 2 | default-target: build 3 | 4 | ############################################################################### 5 | ### User-settable variables 6 | 7 | # List of documentation formats to generate. 8 | DOCS = \ 9 | $(SPHINX_DOCS) \ 10 | pdf \ 11 | # end of DOCS 12 | 13 | # Commands to run in the build process. 14 | PDFLATEX = pdflatex 15 | PYTHON = python 16 | SPHINX_BUILD = sphinx-build 17 | SPHINX_FLAGS = 18 | 19 | # Options for above commands. 20 | PDFLATEXOPTS = 21 | SPHINXOPTS = 22 | PYTHONOPTS = 23 | SETUPPYOPTS = 24 | 25 | ############################################################################### 26 | ### Targets 27 | 28 | # build: Build bayeslite. 29 | .PHONY: build 30 | build: setup.py 31 | $(PYTHON) $(PYTHONOPTS) setup.py $(SETUPPYOPTS) build 32 | 33 | # List of documentation formats we can generate with Sphinx. These 34 | # should be the formats that have been tested and confirmed to yield 35 | # reasonable output. 36 | SPHINX_DOCS = \ 37 | html \ 38 | latex \ 39 | # end of SPHINX_DOCS 40 | 41 | # doc: Build the bayeslite documentation. 42 | .PHONY: doc 43 | doc: $(DOCS) 44 | 45 | .PHONY: $(SPHINX_DOCS) 46 | $(SPHINX_DOCS): pythenv.sh build 47 | rm -rf build/doc/$@ && \ 48 | rm -rf build/doc/$@.tmp && \ 49 | ./pythenv.sh $(SPHINX_BUILD) $(SPHINX_FLAGS) -b $@ doc \ 50 | build/doc/$@.tmp && \ 51 | mv -f build/doc/$@.tmp build/doc/$@ 52 | 53 | .PHONY: pdf 54 | pdf: latex 55 | rm -rf build/doc/$@ && \ 56 | rm -rf build/doc/$@.tmp && \ 57 | mkdir build/doc/$@.tmp && \ 58 | { tar -C build/doc/latex -c -f - . \ 59 | | tar -C build/doc/$@.tmp -x -f -; } && \ 60 | (cd build/doc/$@.tmp && \ 61 | $(PDFLATEX) $(PDFLATEXOPTS) \\nonstopmode\\input bayeslite && \ 62 | $(PDFLATEX) $(PDFLATEXOPTS) \\nonstopmode\\input bayeslite && \ 63 | $(PDFLATEX) $(PDFLATEXOPTS) \\nonstopmode\\input bayeslite && \ 64 | $(MAKEINDEX) -s python.ist bayeslite.idx; \ 65 | $(PDFLATEX) $(PDFLATEXOPTS) \\nonstopmode\\input bayeslite && \ 66 | $(PDFLATEX) $(PDFLATEXOPTS) \\nonstopmode\\input bayeslite && \ 67 | :) && \ 68 | mv -f build/doc/$@.tmp build/doc/$@ 69 | 70 | # check: (Build bayeslite and) run the tests. 71 | .PHONY: check 72 | check: check.sh 73 | ./check.sh 74 | 75 | # clean: Remove build products. 76 | .PHONY: clean 77 | clean: 78 | -rm -rf build 79 | -rm -rf build/doc/*.tmp 80 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Bayeslite 2 | 3 | [![Build Status](https://travis-ci.org/probcomp/bayeslite.svg?branch=master)](https://travis-ci.org/probcomp/bayeslite) 4 | [![Anaconda-Server Version Badge](https://anaconda.org/probcomp/bayeslite/badges/version.svg)](https://anaconda.org/probcomp/bayeslite) 5 | [![Anaconda-Server Installer Badge](https://anaconda.org/probcomp/bayeslite/badges/installer/conda.svg)](https://conda.anaconda.org/probcomp) 6 | [![Anaconda-Server Platform Badge](https://anaconda.org/probcomp/bayeslite/badges/platforms.svg)](https://anaconda.org/probcomp/bayeslite) 7 | 8 | BQL interpretation and storage for BayesDB. 9 | Please see http://probcomp.csail.mit.edu/software/bayesdb for more information. 10 | 11 | ## Installing 12 | 13 | The easiest way to install bayeslite is to use the 14 | [package](https://anaconda.org/probcomp/bayeslite) on Anaconda Cloud. 15 | Please follow [these instructions](https://github.com/probcomp/iventure/blob/master/docs/conda.md). 16 | 17 | ## Expectations 18 | 19 | Users and contributors should expect **rapidly and dramatically 20 | shifting code and behavior** at this time. 21 | 22 | **THIS SOFTWARE SHOULD NOT BE EXPECTED TO TREAT YOUR DATA SECURELY.** 23 | 24 | ## Contributing 25 | 26 | Our compatibility aim is to work on probcomp machines and members' 27 | laptops, and to provide scripts and instructions that make it not too 28 | hard to re-create our environments elsewhere. Pulls for polished 29 | packaging, broad installability, etc. are not appropriate 30 | contributions at this time. 31 | 32 | Please run local tests before sending a pull request: 33 | 34 | ``` 35 | $ ./check.sh 36 | ``` 37 | 38 | That does not run the complete test suite, only the smoke tests, but 39 | is usually good enough. For the full suite: 40 | 41 | ``` 42 | $ ./check.sh tests shell/tests 43 | ``` 44 | 45 | ## Documentation 46 | 47 | To build the documentation, which is also available 48 | [online](http://probcomp.csail.mit.edu/dev/bayesdb/doc/), 49 | install [sphinx](http://www.sphinx-doc.org/en/master/) 50 | and then run the following command: 51 | 52 | ``` 53 | $ make doc 54 | ``` 55 | 56 | The result will be placed in `build/doc`, with one subdirectory per 57 | output format. 58 | 59 | To build only one output format, e.g. HTML because you don't want to 60 | install TeX: 61 | 62 | ``` 63 | $ make html 64 | ``` 65 | -------------------------------------------------------------------------------- /check.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -Ceu 4 | 5 | : ${PYTHON:=python} 6 | 7 | root=`cd -- "$(dirname -- "$0")" && pwd` 8 | 9 | ( 10 | set -Ceu 11 | cd -- "${root}" 12 | rm -rf build 13 | "$PYTHON" setup.py build 14 | if [ $# -eq 0 ]; then 15 | # By default, when running all tests, skip tests that have 16 | # been marked for continuous integration by using __ci_ in 17 | # their names. (git grep __ci_ to find these.) 18 | ./pythenv.sh "$PYTHON" -m pytest --pyargs bayeslite -k "not __ci_" 19 | else 20 | # If args are specified, run all tests, including continuous 21 | # integration tests, for the selected components. 22 | ./pythenv.sh "$PYTHON" -m pytest "$@" 23 | fi 24 | ) 25 | -------------------------------------------------------------------------------- /conda/meta.yaml: -------------------------------------------------------------------------------- 1 | package: 2 | name: bayeslite 3 | version: {{ CONDA_PACKAGE_VERSION }} 4 | 5 | source: 6 | path: ../ 7 | 8 | build: 9 | script: python setup.py install 10 | 11 | requirements: 12 | build: 13 | - cython 0.23.* 14 | - git 15 | - jsonschema 16 | - numpy 1.11.* 17 | - python 2.7.* 18 | run: 19 | - nomkl 20 | - apsw 21 | - cgpm 22 | - crosscat 23 | - jsonschema 24 | - loom 0.2.10 25 | - numpy 1.11.* 26 | - scipy 0.17.* 27 | - six 1.10.* 28 | 29 | test: 30 | requires: 31 | - apsw 32 | - cgpm 33 | - crosscat 34 | - loom 0.2.10 35 | - pandas 0.18.* 36 | - pytest 2.8.* 37 | - python 2.7.* 38 | commands: 39 | - python -m pytest --pyargs bayeslite -k "not __ci_" 40 | 41 | about: 42 | home: https://github.com/probcomp/bayeslite 43 | license: Apache 44 | license_file: LICENSE.txt 45 | -------------------------------------------------------------------------------- /conda/upload.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -ev 3 | 4 | # fyi, the logic below is necessary due to the fact that on a tagged build, TRAVIS_BRANCH and TRAVIS_TAG are the same 5 | # in the case of a tagged build, use the REAL_BRANCH env var defined in travis.yml 6 | if [ -n "${TRAVIS_TAG}" ]; then 7 | conda install anaconda-client 8 | # if tag didn't come from master, add the "dev" label 9 | if [ ${REAL_BRANCH} = "master" ]; then 10 | anaconda -t ${CONDA_UPLOAD_TOKEN} upload -u ${CONDA_USER} ~/miniconda/conda-bld/linux-64/${PACKAGE_NAME}-*.tar.bz2 --force 11 | else 12 | anaconda -t ${CONDA_UPLOAD_TOKEN} upload -u ${CONDA_USER} -l dev ~/miniconda/conda-bld/linux-64/${PACKAGE_NAME}-*.tar.bz2 --force 13 | fi 14 | elif [ ${TRAVIS_BRANCH} = "master" ]; then 15 | if [ ${TRAVIS_EVENT_TYPE} = "cron" ]; then 16 | # don't build package for nightly cron.. this is just for test stability info 17 | exit 0 18 | else 19 | conda install anaconda-client 20 | anaconda -t ${CONDA_UPLOAD_TOKEN} upload -u ${CONDA_USER} -l edge ~/miniconda/conda-bld/linux-64/${PACKAGE_NAME}-*.tar.bz2 --force 21 | fi 22 | else 23 | exit 0 24 | fi 25 | -------------------------------------------------------------------------------- /doc/api.rst: -------------------------------------------------------------------------------- 1 | Bayeslite API reference 2 | ======================= 3 | 4 | :mod:`bayeslite`: Bayeslite API 5 | ------------------------------- 6 | 7 | .. automodule:: bayeslite 8 | :members: 9 | -------------------------------------------------------------------------------- /doc/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Copyright (c) 2010-2016, MIT Probabilistic Computing Project 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | """Sphinx configuration file.""" 18 | 19 | extensions = [ 20 | 'sphinx.ext.autodoc', 21 | ] 22 | 23 | copyright = '2010-2016, MIT Probabilistic Computing Project' 24 | master_doc = 'index' 25 | project = 'bayeslite' 26 | release = '0.1.3rc1' 27 | version = '0.1.3' 28 | 29 | nitpicky = True 30 | html_theme = 'sphinxdoc' 31 | -------------------------------------------------------------------------------- /doc/index.rst: -------------------------------------------------------------------------------- 1 | bayeslite: A probabilistic database built on SQLite 3 2 | ===================================================== 3 | 4 | Bayeslite is a probabilistic database built on `SQLite 3 5 | `__. In addition to SQL queries on 6 | conventional SQL tables, it supports probabilistic BQL queries on 7 | generative models for data in a table. 8 | 9 | Quick start for querying a pre-analyzed database:: 10 | 11 | import bayeslite 12 | bdb = bayeslite.bayesdb_open("foo.bdb") 13 | cursor = bdb.execute("SOME BQL QUERY") 14 | ... 15 | 16 | .. toctree:: 17 | :maxdepth: 2 18 | 19 | api 20 | bql 21 | internals 22 | 23 | If you would like to analyze your own data with BayesDB, please 24 | contact bayesdb@mit.edu to participate in our research project. 25 | 26 | .. toctree:: 27 | :maxdepth: 1 28 | 29 | analysis 30 | 31 | Indices and tables 32 | ================== 33 | 34 | * :ref:`genindex` 35 | * :ref:`modindex` 36 | * :ref:`search` 37 | -------------------------------------------------------------------------------- /doc/internals.rst: -------------------------------------------------------------------------------- 1 | Bayeslite API internals 2 | ======================= 3 | 4 | :mod:`bayeslite.compiler`: BQL-to-SQL query compiler 5 | ---------------------------------------------------- 6 | 7 | .. automodule:: bayeslite.compiler 8 | :members: 9 | 10 | :mod:`bayeslite.bql`: BQL query and command execution 11 | ----------------------------------------------------- 12 | 13 | .. automodule:: bayeslite.bql 14 | :members: 15 | 16 | :mod:`bayeslite.core`: BayesDB object model 17 | ------------------------------------------- 18 | 19 | .. automodule:: bayeslite.core 20 | :members: 21 | 22 | :mod:`bayeslite.parse`: BQL parser 23 | ---------------------------------- 24 | 25 | .. automodule:: bayeslite.parse 26 | :members: 27 | 28 | :mod:`bayeslite.sqlite3_util`: SQLite 3 utilities 29 | ------------------------------------------------- 30 | 31 | .. automodule:: bayeslite.sqlite3_util 32 | :members: 33 | 34 | :mod:`bayeslite.stats`: Statistics utilities 35 | -------------------------------------------- 36 | 37 | .. automodule:: bayeslite.stats 38 | :members: 39 | 40 | :mod:`bayeslite.math_util`: Math utilities 41 | ------------------------------------------ 42 | 43 | .. automodule:: bayeslite.math_util 44 | :members: 45 | 46 | :mod:`bayeslite.util`: Miscellaneous utilities 47 | ---------------------------------------------- 48 | 49 | .. automodule:: bayeslite.util 50 | :members: 51 | -------------------------------------------------------------------------------- /docker/ubuntu1604: -------------------------------------------------------------------------------- 1 | FROM ubuntu:16.04 2 | MAINTAINER MIT Probabilistic Computing Project 3 | 4 | ARG check_args 5 | 6 | RUN apt-get update -qq && apt-get install -qq -y \ 7 | git \ 8 | python-apsw \ 9 | python-jsonschema \ 10 | python-numpy \ 11 | python-pandas \ 12 | python-pexpect \ 13 | python-pytest \ 14 | python-scipy \ 15 | python-six \ 16 | python-sklearn \ 17 | ; # end of package list 18 | 19 | ADD . /bayeslite 20 | WORKDIR /bayeslite 21 | RUN \ 22 | ./docker/deps/cgpm/pythenv.sh \ 23 | ./docker/deps/crosscat/pythenv.sh \ 24 | ./check.sh ${check_args} 25 | 26 | RUN python setup.py sdist 27 | RUN python setup.py bdist 28 | -------------------------------------------------------------------------------- /external/README: -------------------------------------------------------------------------------- 1 | This directory contains software that is used in bayeslite but written 2 | and maintained externally by someone else. We will use the following 3 | organization in order to: 4 | 5 | - Use external software that is not in Ubuntu and does not use Git. 6 | - Keep history of our updates to external software. 7 | - Make local changes as we need and send patches upstream. 8 | - Merge our local changes into external updates. 9 | - Avoid imposing the mess of submodules on users. 10 | 11 | * Directory layout: 12 | 13 | Each subdirectory corresponds to one external package and has the 14 | following contents: 15 | 16 | COPYING summary of copying terms 17 | README notes on upstream, link to upstream web site, &c. 18 | dist/ subdirectory containing the external distribution 19 | prepare.sh script to prepare a distribution for import 20 | ... other supporting files 21 | 22 | prepare.sh should run in the top-level directory of the external 23 | distribution, which will then be imported under dist/. It should only 24 | do clean-ups necessary to make the distribution fit for inclusion in 25 | our tree, such as deleting binary files. Local changes, such as bug 26 | fixes, should be made in separate commits. 27 | 28 | * To import a new external package: 29 | 30 | 1. Write the COPYING and README files and the prepare.sh script, and 31 | commit them. 32 | 33 | 2. Create a temporary Git repository for the package: 34 | 35 | % cd /tmp 36 | % mkdir repo 37 | % cd repo 38 | % git init 39 | 40 | 3. Extract proj-1.2 in external/proj/dist in the temporary repository: 41 | 42 | % mkdir -p external/proj 43 | % cd external/proj 44 | % gunzip -c < /tmp/proj-1.2.tar.gz | tar xf - 45 | % mv proj-1.2 dist 46 | 47 | 4. Run the prepare.sh script: 48 | 49 | % (cd dist && sh /path/to/bayeslite/external/proj/prepare.sh) 50 | 51 | 5. Commit the result in the temporary repository: 52 | 53 | % git add dist 54 | % git commit 55 | 56 | The commit message should summarize what proj is, specify where 57 | proj-1.2.tar.gz came from, and give its SHA-256 hash. 58 | 59 | 6. In the main repository, create a vendor branch and release tag: 60 | 61 | % cd /path/to/bayeslite 62 | % git fetch /tmp/repo master:vendor/proj 63 | % git tag vendor/proj-1.2 vendor/proj 64 | 65 | 7. Merge proj-1.2 into the branch you're working on: 66 | 67 | % git merge vendor/proj-1.2 68 | 69 | 8. Push the vendor branch upstream so others can use it: 70 | 71 | % git push origin vendor/proj 72 | 73 | * To see what version we have most recently merged: 74 | 75 | % git log --merges external/proj 76 | 77 | Note: This requires that every `git merge' involved use the release 78 | tag (vendor/proj-1.2), not the vendor branch (vendor/proj). 79 | Everything else will work if you use the vendor branch, so be careful. 80 | 81 | Note: You can't use `git log --merges external/proj/dist', apparently: 82 | it skips the very first merge. Go figure. 83 | 84 | * To see what local changes there are in a external package: 85 | 86 | % git diff vendor/proj-1.2 HEAD -- ./external/proj 87 | % git show vendor/proj-1.2..HEAD -- ./external/proj/dist 88 | 89 | * To update an existing external package: 90 | 91 | 1. Update prepare.sh and check for changes to copying terms. 92 | 93 | 2. Check out the vendor branch in an empty clone of the repository: 94 | 95 | % git clone --no-checkout -b vendor/proj /path/to/bayeslite /tmp/repo 96 | 97 | 3. Extract proj-1.3 in external/proj/dist in the clone: 98 | 99 | % cd /tmp/repo 100 | % mkdir -p external/proj 101 | % cd external/proj 102 | % gunzip -c < /tmp/proj-1.3.tar.gz | tar xf - 103 | % mv proj-1.3 dist 104 | 105 | 4. Run the prepare script: 106 | 107 | % (cd dist && /path/to/bayeslite/external/proj/prepare.sh) 108 | 109 | 4. Commit and tag the update: 110 | 111 | % git add --all dist 112 | % git commit 113 | % git tag vendor/proj-1.3 114 | 115 | 5. In the main repository, update the vendor branch and tag it: 116 | 117 | % cd /path/to/bayeslite 118 | % git fetch /tmp/repo vendor/proj 119 | % git tag vendor/proj-1.3 vendor/proj 120 | 121 | 6. Finally, merge the new release tag: 122 | 123 | % git merge vendor/proj-1.3 124 | -------------------------------------------------------------------------------- /external/lemonade/COPYING: -------------------------------------------------------------------------------- 1 | public domain 2 | -------------------------------------------------------------------------------- /external/lemonade/README: -------------------------------------------------------------------------------- 1 | lemonade - LALR parser generator for Python 2 | 3 | https://pypi.python.org/pypi/lemonade 4 | 5 | Ported to Python from the LEMON parser generator by D. Richard Hipp: 6 | 7 | http://www.hwaci.com/sw/lemon/ 8 | 9 | We have back-ported some bug fixes from LEMON into our copy of 10 | lemonade, since upstream does not appear to be maintained. (Upstream 11 | author responded to email but then vanished.) 12 | -------------------------------------------------------------------------------- /external/lemonade/dist/MANIFEST.in: -------------------------------------------------------------------------------- 1 | include MANIFEST.in 2 | include lemonade/lempar.tmpl 3 | recursive-include examples *.py *.y 4 | -------------------------------------------------------------------------------- /external/lemonade/dist/PKG-INFO: -------------------------------------------------------------------------------- 1 | Metadata-Version: 1.0 2 | Name: lemonade 3 | Version: 1.0b1 4 | Summary: Port of the LEMON Parser Generator 5 | Home-page: UNKNOWN 6 | Author: Leif Strand 7 | Author-email: leif@cacr.caltech.edu 8 | License: UNKNOWN 9 | Description: UNKNOWN 10 | Platform: UNKNOWN 11 | Classifier: License :: Public Domain 12 | Classifier: Development Status :: 4 - Beta 13 | Classifier: Programming Language :: Python :: 2 14 | Classifier: Intended Audience :: Developers 15 | Classifier: Topic :: Software Development :: Code Generators 16 | Classifier: Topic :: Software Development :: Compilers 17 | -------------------------------------------------------------------------------- /external/lemonade/dist/README: -------------------------------------------------------------------------------- 1 | 2 | Lemonade is a Python port of the LEMON Parser Generator written by 3 | D. Richard Hipp: 4 | 5 | http://www.hwaci.com/sw/lemon/ 6 | 7 | Lemonade can be used in the traditional fashion to create a standalone 8 | parser: 9 | 10 | lemonade gram.y 11 | 12 | The above command generates "gram.py", which you can include in your 13 | project. 14 | 15 | Since Python is a dynamic language, Lemonade could also enable client 16 | software to generate a parser from a user-supplied .y file, and then 17 | use the generated parser on the fly. 18 | 19 | This is the beta release of Lemonade. There is no documentation yet. 20 | You may find the original LEMON documentation helpful: 21 | 22 | http://www.hwaci.com/sw/lemon/lemon.html 23 | 24 | However, many of LEMON's "%" directives are irrelevant in Python; 25 | therefore, they have been eliminated in Lemonade. Further, Lemonade 26 | does not allow code fragments ("{}") within the grammar file. 27 | Instead, the reduce actions are specified in a separate delegate 28 | class. See the 'examples' directory for an example. 29 | 30 | ---- 31 | Leif Strand 32 | August 28, 2012 33 | 34 | -------------------------------------------------------------------------------- /external/lemonade/dist/bin/lemonade: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from lemonade.main import main 4 | import sys 5 | 6 | sys.exit(main(sys.argv)) 7 | 8 | -------------------------------------------------------------------------------- /external/lemonade/dist/examples/calc/calc.py: -------------------------------------------------------------------------------- 1 | 2 | import sys 3 | 4 | 5 | def generateGrammar(): 6 | from lemonade.main import generate 7 | from os.path import join, dirname 8 | from StringIO import StringIO 9 | 10 | inputFile = join(dirname(__file__), "gram.y") 11 | outputStream = StringIO() 12 | generate(inputFile, outputStream) 13 | return outputStream.getvalue() 14 | 15 | 16 | # generate and import our grammar 17 | exec generateGrammar() in globals() 18 | 19 | 20 | # 21 | # the lexer 22 | # 23 | 24 | tokenType = { 25 | '+': PLUS, 26 | '-': MINUS, 27 | '/': DIVIDE, 28 | '*': TIMES, 29 | } 30 | 31 | def tokenize(input): 32 | import re 33 | tokenText = re.split("([+-/*])|\s*", input) 34 | for text in tokenText: 35 | if text is None: 36 | continue 37 | type = tokenType.get(text) 38 | if type is None: 39 | type = NUM 40 | value = float(text) 41 | else: 42 | value = None 43 | yield (type, value) 44 | return 45 | 46 | 47 | # 48 | # the delegate 49 | # 50 | 51 | class Delegate(object): 52 | 53 | def accept(self): 54 | return 55 | 56 | def parse_failed(self): 57 | assert False, "Giving up. Parser is hopelessly lost..." 58 | 59 | def syntax_error(self, token): 60 | print >>sys.stderr, "Syntax error!" 61 | return 62 | 63 | 64 | # 65 | # reduce actions 66 | # 67 | 68 | def sub(self, a, b): return a - b 69 | def add(self, a, b): return a + b 70 | def mul(self, a, b): return a * b 71 | def div(self, a, b): return a / b 72 | def num(self, value): return value 73 | 74 | def print_result(self, result): 75 | print result 76 | return 77 | 78 | 79 | p = Parser(Delegate()) 80 | #p.trace(sys.stdout, "# ") 81 | 82 | if len(sys.argv) == 2: 83 | p.parse(tokenize(sys.argv[1])) 84 | else: 85 | print >>sys.stderr, "usage: %s EXPRESSION" % sys.argv[0] 86 | 87 | -------------------------------------------------------------------------------- /external/lemonade/dist/examples/calc/gram.y: -------------------------------------------------------------------------------- 1 | 2 | %left PLUS MINUS. 3 | %left DIVIDE TIMES. 4 | 5 | program(print_result) ::= expr(result). 6 | 7 | expr(sub) ::= expr(a) MINUS expr(b). 8 | expr(add) ::= expr(a) PLUS expr(b). 9 | expr(mul) ::= expr(a) TIMES expr(b). 10 | expr(div) ::= expr(a) DIVIDE expr(b). 11 | 12 | expr(num) ::= NUM(value). 13 | 14 | -------------------------------------------------------------------------------- /external/lemonade/dist/lemonade/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/probcomp/bayeslite/211e5eb3821a464a2fffeb9d35e3097e1b7a99ba/external/lemonade/dist/lemonade/__init__.py -------------------------------------------------------------------------------- /external/lemonade/dist/lemonade/action.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Routines processing parser actions in the LEMON parser generator. 3 | ''' 4 | 5 | from struct import * 6 | 7 | 8 | def actioncmp(ap1, ap2): 9 | '''Compare two actions for sorting purposes. Return negative, 10 | zero, or positive if the first action is less than, equal to, or 11 | greater than the first. 12 | ''' 13 | rc = ap1.sp.index - ap2.sp.index 14 | if rc == 0: 15 | rc = ap1.type - ap2.type 16 | if rc == 0 and ap1.type == REDUCE: 17 | rc = ap1.x.rp.index - ap2.x.rp.index 18 | assert rc != 0 or ap1 == ap2 19 | return rc 20 | 21 | 22 | def Action_sort(ap): 23 | '''Sort parser actions.''' 24 | from msort import msort 25 | ap = msort(ap, 'next', actioncmp) 26 | return ap 27 | 28 | 29 | def Action_add(app, type, sp, arg): 30 | new = action( 31 | next = app, 32 | type = type, 33 | sp = sp, 34 | collide = None, 35 | stp = None, 36 | rp = None, 37 | ) 38 | app = new 39 | if type == SHIFT: 40 | new.x.stp = arg 41 | else: 42 | new.x.rp = arg 43 | return app 44 | 45 | -------------------------------------------------------------------------------- /external/lemonade/dist/lemonade/error.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Code for printing error message. 3 | ''' 4 | 5 | 6 | def findbreak(msg, min, max): 7 | '''Find a good place to break "msg" so that its length is at least 8 | "min" but no more than "max". Make the point as close to max as 9 | possible. 10 | ''' 11 | 12 | spot = min 13 | for i in range(min, max+1): 14 | if i >= len(msg): 15 | spot = i 16 | break 17 | c = msg[i] 18 | if c == '\t': 19 | msg[i] = ' ' 20 | if c == '\n': 21 | msg[i] = ' ' 22 | spot = i 23 | break 24 | if c == '-' and i < max - 1: 25 | spot = i + 1 26 | if c == ' ': 27 | spot = i 28 | return spot 29 | 30 | 31 | # The error message is split across multiple lines if necessary. The 32 | # splits occur at a space, if there is a space available near the end 33 | # of the line. 34 | 35 | LINEWIDTH = 79 # Max width of any output line 36 | PREFIXLIMIT = 30 # Max width of the prefix on each line 37 | 38 | def ErrorMsg(filename, lineno, format, *args): 39 | from ccruft import fprintf 40 | from sys import stdout 41 | 42 | # Prepare a prefix to be prepended to every output line 43 | if lineno > 0: 44 | prefix = "%.*s:%d: " % (PREFIXLIMIT - 10, filename, lineno) 45 | else: 46 | prefix = "%.*s: " % (PREFIXLIMIT - 10, filename) 47 | 48 | # Generate the error message 49 | prefixsize = len(prefix) 50 | availablewidth = LINEWIDTH - prefixsize 51 | errmsg = format % args 52 | 53 | # Remove trailing '\n's from the error message 54 | while errmsg[-1] == '\n': 55 | errmsg = errmsg[:-1] 56 | 57 | # Print the error message 58 | base = 0 59 | while base < len(errmsg): 60 | end = restart = findbreak(errmsg[base:], 0, availablewidth) 61 | restart += base 62 | while restart < len(errmsg) and errmsg[restart] == ' ': 63 | restart += 1 64 | fprintf(stdout, "%s%.*s\n", prefix, end, errmsg[base:]) 65 | base = restart 66 | 67 | return 68 | 69 | -------------------------------------------------------------------------------- /external/lemonade/dist/lemonade/exceptions.py: -------------------------------------------------------------------------------- 1 | 2 | class BadGrammarError(Exception): 3 | pass 4 | 5 | class EmptyGrammarError(BadGrammarError): 6 | pass 7 | 8 | -------------------------------------------------------------------------------- /external/lemonade/dist/lemonade/msort.py: -------------------------------------------------------------------------------- 1 | # 2 | # A generic merge-sort program. 3 | # 4 | # USAGE: 5 | # Let "ptr" be a pointer to some structure which is at the head of 6 | # a null-terminated list. Then to sort the list call: 7 | # 8 | # ptr = msort(ptr,&(ptr->next),cmpfnc); 9 | # 10 | # In the above, "cmpfnc" is a pointer to a function which compares 11 | # two instances of the structure and returns an integer, as in 12 | # strcmp. The second argument is a pointer to the pointer to the 13 | # second element of the linked list. This address is used to compute 14 | # the offset to the "next" field within the structure. The offset to 15 | # the "next" field must be constant for all structures in the list. 16 | # 17 | # The function returns a new pointer which is the head of the list 18 | # after sorting. 19 | # 20 | # ALGORITHM: 21 | # Merge-sort. 22 | # 23 | 24 | 25 | # 26 | # Inputs: 27 | # a: A sorted, null-terminated linked list. (May be null). 28 | # b: A sorted, null-terminated linked list. (May be null). 29 | # cmp: A pointer to the comparison function. 30 | # next: Attribute name of "next" field. 31 | # 32 | # Return Value: 33 | # A pointer to the head of a sorted list containing the elements 34 | # of both a and b. 35 | # 36 | # Side effects: 37 | # The "next" pointers for elements in the lists a and b are 38 | # changed. 39 | # 40 | def merge(a, b, cmp, next): 41 | if a is None: 42 | head = b 43 | elif b is None: 44 | head = a 45 | else: 46 | if cmp(a, b) <= 0: 47 | ptr = a 48 | a = getattr(a, next) 49 | else: 50 | ptr = b 51 | b = getattr(b, next) 52 | 53 | head = ptr 54 | while a and b: 55 | if cmp(a, b) <= 0: 56 | setattr(ptr, next, a) 57 | ptr = a 58 | a = getattr(a, next) 59 | else: 60 | setattr(ptr, next, b) 61 | ptr = b 62 | b = getattr(b, next) 63 | 64 | if a: 65 | setattr(ptr, next, a) 66 | else: 67 | setattr(ptr, next, b) 68 | 69 | return head 70 | 71 | 72 | # 73 | # Inputs: 74 | # list: Pointer to a singly-linked list of structures. 75 | # next: Attribute name of "next" field. 76 | # cmp: A comparison function. 77 | # 78 | # Return Value: 79 | # A pointer to the head of a sorted list containing the elements 80 | # orginally in list. 81 | # 82 | # Side effects: 83 | # The "next" pointers for elements in list are changed. 84 | # 85 | 86 | LISTSIZE = 30 87 | 88 | def msort(list, next, cmp): 89 | set = [None] * LISTSIZE 90 | 91 | while list: 92 | ep = list 93 | list = getattr(list, next) 94 | setattr(ep, next, None) 95 | i = 0 96 | while i < LISTSIZE - 1 and set[i]: 97 | ep = merge(ep, set[i], cmp, next) 98 | set[i] = None 99 | i += 1 100 | set[i] = ep 101 | 102 | ep = None 103 | for i in range(LISTSIZE): 104 | if set[i]: 105 | ep = merge(set[i], ep, cmp, next) 106 | 107 | return ep 108 | 109 | -------------------------------------------------------------------------------- /external/lemonade/dist/lemonade/plink.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Routines processing configuration follow-set propagation links in the 3 | LEMON parser generator. 4 | ''' 5 | 6 | 7 | def Plink_add(plpp, cfp): 8 | '''Add a plink to a plink list.''' 9 | from struct import plink 10 | new = plink( 11 | next = plpp, 12 | cfp = cfp 13 | ) 14 | return new 15 | 16 | 17 | def Plink_copy(to, _from): 18 | '''Transfer every plink on the list "from" to the list "to".''' 19 | while _from: 20 | nextpl = _from.next 21 | _from.next = to 22 | to = _from 23 | _from = nextpl 24 | return to 25 | 26 | -------------------------------------------------------------------------------- /external/lemonade/dist/lemonade/set.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Set manipulation routines for the LEMON parser generator. 3 | ''' 4 | 5 | size = 0 6 | 7 | 8 | def SetSize(n): 9 | '''Set the set size.''' 10 | global size 11 | size = n + 1 12 | return 13 | 14 | 15 | def SetNew(): 16 | '''Allocate a new set.''' 17 | return [False] * size 18 | 19 | 20 | def SetAdd(s, e): 21 | '''Add a new element to the set. Return True if the element was 22 | added and False if it was already there. 23 | ''' 24 | rv = s[e] 25 | s[e] = True 26 | return not rv 27 | 28 | 29 | def SetUnion(s1, s2): 30 | '''Add every element of s2 to s1. Return True if s1 changes.''' 31 | progress = False 32 | for i in range(size): 33 | if not s2[i]: 34 | continue 35 | if not s1[i]: 36 | progress = True 37 | s1[i] = True 38 | return progress 39 | 40 | 41 | def SetFind(X, Y): 42 | '''True if Y is in set X.''' 43 | return X[Y] 44 | 45 | -------------------------------------------------------------------------------- /external/lemonade/dist/setup.py: -------------------------------------------------------------------------------- 1 | 2 | from distutils.core import setup 3 | 4 | setup(name = 'lemonade', 5 | version = '1.0b1', 6 | description = 'Port of the LEMON Parser Generator', 7 | 8 | scripts = ['bin/lemonade'], 9 | packages = ['lemonade'], 10 | package_data = { 'lemonade': ['lempar.tmpl'] }, 11 | 12 | classifiers = [ 13 | 'License :: Public Domain', 14 | 'Development Status :: 4 - Beta', 15 | 'Programming Language :: Python :: 2', 16 | 'Intended Audience :: Developers', 17 | 'Topic :: Software Development :: Code Generators', 18 | 'Topic :: Software Development :: Compilers', 19 | ], 20 | 21 | author = 'Leif Strand', 22 | author_email = 'leif@cacr.caltech.edu', 23 | ) 24 | -------------------------------------------------------------------------------- /external/plex/COPYING: -------------------------------------------------------------------------------- 1 | Plex is free of any restrictions. You can use it, redistribute it, 2 | sell it, whatever you want. All I ask is that you give me credit if 3 | you distribute any code derived from it. 4 | 5 | 6 | Greg Ewing, 7 | Computer Science Department, 8 | University of Canterbury, 9 | Christchurch, 10 | New Zealand 11 | 12 | greg@cosc.canterbury.ac.nz 13 | -------------------------------------------------------------------------------- /external/plex/README: -------------------------------------------------------------------------------- 1 | plex - lexical analyzer 2 | 3 | http://www.cosc.canterbury.ac.nz/greg.ewing/python/Plex/ 4 | -------------------------------------------------------------------------------- /external/plex/dist/Makefile: -------------------------------------------------------------------------------- 1 | VERSION = 1.1.5 2 | TAR = ../Plex-$(VERSION).tar 3 | tar: clean 4 | tar cvf $(TAR) * 5 | rm -f $(TAR).gz 6 | gzip $(TAR) 7 | 8 | clean: 9 | rm -f */*.pyc *~ */*~ */*.dump tests/*.out2 tests/*.err 10 | 11 | -------------------------------------------------------------------------------- /external/plex/dist/Plex/Actions.py: -------------------------------------------------------------------------------- 1 | #======================================================================= 2 | # 3 | # Python Lexical Analyser 4 | # 5 | # Actions for use in token specifications 6 | # 7 | #======================================================================= 8 | 9 | class Action: 10 | 11 | def same_as(self, other): 12 | return self is other 13 | 14 | 15 | class Return(Action): 16 | """ 17 | Internal Plex action which causes |value| to 18 | be returned as the value of the associated token 19 | """ 20 | 21 | value = None 22 | 23 | def __init__(self, value): 24 | self.value = value 25 | 26 | def perform(self, token_stream, text): 27 | return self.value 28 | 29 | def same_as(self, other): 30 | return isinstance(other, Return) and self.value == other.value 31 | 32 | def __repr__(self): 33 | return "Return(%s)" % repr(self.value) 34 | 35 | 36 | class Call(Action): 37 | """ 38 | Internal Plex action which causes a function to be called. 39 | """ 40 | 41 | function = None 42 | 43 | def __init__(self, function): 44 | self.function = function 45 | 46 | def perform(self, token_stream, text): 47 | return self.function(token_stream, text) 48 | 49 | def __repr__(self): 50 | return "Call(%s)" % self.function.__name__ 51 | 52 | def same_as(self, other): 53 | return isinstance(other, Call) and self.function is other.function 54 | 55 | 56 | class Begin(Action): 57 | """ 58 | Begin(state_name) is a Plex action which causes the Scanner to 59 | enter the state |state_name|. See the docstring of Plex.Lexicon 60 | for more information. 61 | """ 62 | 63 | state_name = None 64 | 65 | def __init__(self, state_name): 66 | self.state_name = state_name 67 | 68 | def perform(self, token_stream, text): 69 | token_stream.begin(self.state_name) 70 | 71 | def __repr__(self): 72 | return "Begin(%s)" % self.state_name 73 | 74 | def same_as(self, other): 75 | return isinstance(other, Begin) and self.state_name == other.state_name 76 | 77 | 78 | class Ignore(Action): 79 | """ 80 | IGNORE is a Plex action which causes its associated token 81 | to be ignored. See the docstring of Plex.Lexicon for more 82 | information. 83 | """ 84 | def perform(self, token_stream, text): 85 | return None 86 | 87 | def __repr__(self): 88 | return "IGNORE" 89 | 90 | IGNORE = Ignore() 91 | IGNORE.__doc__ = Ignore.__doc__ 92 | 93 | class Text(Action): 94 | """ 95 | TEXT is a Plex action which causes the text of a token to 96 | be returned as the value of the token. See the docstring of 97 | Plex.Lexicon for more information. 98 | """ 99 | 100 | def perform(self, token_stream, text): 101 | return text 102 | 103 | def __repr__(self): 104 | return "TEXT" 105 | 106 | TEXT = Text() 107 | TEXT.__doc__ = Text.__doc__ 108 | 109 | 110 | -------------------------------------------------------------------------------- /external/plex/dist/Plex/Errors.py: -------------------------------------------------------------------------------- 1 | #======================================================================= 2 | # 3 | # Python Lexical Analyser 4 | # 5 | # Exception classes 6 | # 7 | #======================================================================= 8 | 9 | import exceptions 10 | 11 | class PlexError(exceptions.Exception): 12 | message = "" 13 | 14 | class PlexTypeError(PlexError, TypeError): 15 | pass 16 | 17 | class PlexValueError(PlexError, ValueError): 18 | pass 19 | 20 | class InvalidRegex(PlexError): 21 | pass 22 | 23 | class InvalidToken(PlexError): 24 | 25 | def __init__(self, token_number, message): 26 | PlexError.__init__(self, "Token number %d: %s" % (token_number, message)) 27 | 28 | class InvalidScanner(PlexError): 29 | pass 30 | 31 | class AmbiguousAction(PlexError): 32 | message = "Two tokens with different actions can match the same string" 33 | 34 | def __init__(self): 35 | pass 36 | 37 | class UnrecognizedInput(PlexError): 38 | scanner = None 39 | position = None 40 | state_name = None 41 | 42 | def __init__(self, scanner, state_name): 43 | self.scanner = scanner 44 | self.position = scanner.position() 45 | self.state_name = state_name 46 | 47 | def __str__(self): 48 | return ("'%s', line %d, char %d: Token not recognised in state %s" 49 | % (self.position + (repr(self.state_name),))) 50 | 51 | 52 | 53 | -------------------------------------------------------------------------------- /external/plex/dist/Plex/Timing.py: -------------------------------------------------------------------------------- 1 | # 2 | # Get time in platform-dependent way 3 | # 4 | 5 | import os 6 | from sys import platform, exit, stderr 7 | 8 | if platform == 'mac': 9 | import MacOS 10 | def time(): 11 | return MacOS.GetTicks() / 60.0 12 | timekind = "real" 13 | elif hasattr(os, 'times'): 14 | def time(): 15 | t = os.times() 16 | return t[0] + t[1] 17 | timekind = "cpu" 18 | else: 19 | stderr.write( 20 | "Don't know how to get time on platform %s\n" % repr(platform)) 21 | exit(1) 22 | 23 | -------------------------------------------------------------------------------- /external/plex/dist/Plex/Traditional.py: -------------------------------------------------------------------------------- 1 | #======================================================================= 2 | # 3 | # Python Lexical Analyser 4 | # 5 | # Traditional Regular Expression Syntax 6 | # 7 | #======================================================================= 8 | 9 | from Regexps import * 10 | from Errors import PlexError 11 | 12 | class RegexpSyntaxError(PlexError): 13 | pass 14 | 15 | def re(s): 16 | """ 17 | Convert traditional string representation of regular expression |s| 18 | into Plex representation. 19 | """ 20 | return REParser(s).parse_re() 21 | 22 | class REParser: 23 | 24 | def __init__(self, s): 25 | self.s = s 26 | self.i = -1 27 | self.end = 0 28 | self.next() 29 | 30 | def parse_re(self): 31 | re = self.parse_alt() 32 | if not self.end: 33 | self.error("Unexpected %s" % repr(self.c)) 34 | return re 35 | 36 | def parse_alt(self): 37 | """Parse a set of alternative regexps.""" 38 | re = self.parse_seq() 39 | if self.c == '|': 40 | re_list = [re] 41 | while self.c == '|': 42 | self.next() 43 | re_list.append(self.parse_seq()) 44 | re = apply(Alt, tuple(re_list)) 45 | return re 46 | 47 | def parse_seq(self): 48 | """Parse a sequence of regexps.""" 49 | re_list = [] 50 | while not self.end and not self.c in "|)": 51 | re_list.append(self.parse_mod()) 52 | return apply(Seq, tuple(re_list)) 53 | 54 | def parse_mod(self): 55 | """Parse a primitive regexp followed by *, +, ? modifiers.""" 56 | re = self.parse_prim() 57 | while not self.end and self.c in "*+?": 58 | if self.c == '*': 59 | re = Rep(re) 60 | elif self.c == '+': 61 | re = Rep1(re) 62 | else: # self.c == '?' 63 | re = Opt(re) 64 | self.next() 65 | return re 66 | 67 | def parse_prim(self): 68 | """Parse a primitive regexp.""" 69 | c = self.get() 70 | if c == '.': 71 | re = AnyBut("\n") 72 | elif c == '^': 73 | re = Bol 74 | elif c == '$': 75 | re = Eol 76 | elif c == '(': 77 | re = self.parse_alt() 78 | self.expect(')') 79 | elif c == '[': 80 | re = self.parse_charset() 81 | self.expect(']') 82 | else: 83 | if c == '\\': 84 | c = self.get() 85 | re = Char(c) 86 | return re 87 | 88 | def parse_charset(self): 89 | """Parse a charset. Does not include the surrounding [].""" 90 | char_list = [] 91 | invert = 0 92 | if self.c == '^': 93 | invert = 1 94 | self.next() 95 | if self.c == ']': 96 | char_list.append(']') 97 | self.next() 98 | while not self.end and self.c <> ']': 99 | c1 = self.get() 100 | if self.c == '-' and self.lookahead(1) <> ']': 101 | self.next() 102 | c2 = self.get() 103 | for a in xrange(ord(c1), ord(c2) + 1): 104 | char_list.append(chr(a)) 105 | else: 106 | char_list.append(c1) 107 | chars = string.join(char_list, "") 108 | if invert: 109 | return AnyBut(chars) 110 | else: 111 | return Any(chars) 112 | 113 | def next(self): 114 | """Advance to the next char.""" 115 | s = self.s 116 | i = self.i = self.i + 1 117 | if i < len(s): 118 | self.c = s[i] 119 | else: 120 | self.c = '' 121 | self.end = 1 122 | 123 | def get(self): 124 | if self.end: 125 | self.error("Premature end of string") 126 | c = self.c 127 | self.next() 128 | return c 129 | 130 | def lookahead(self, n): 131 | """Look ahead n chars.""" 132 | j = self.i + n 133 | if j < len(self.s): 134 | return self.s[j] 135 | else: 136 | return '' 137 | 138 | def expect(self, c): 139 | """ 140 | Expect to find character |c| at current position. 141 | Raises an exception otherwise. 142 | """ 143 | if self.c == c: 144 | self.next() 145 | else: 146 | self.error("Missing %s" % repr(c)) 147 | 148 | def error(self, mess): 149 | """Raise exception to signal syntax error in regexp.""" 150 | raise RegexpSyntaxError("Syntax error in regexp %s at position %d: %s" % ( 151 | repr(self.s), self.i, mess)) 152 | 153 | 154 | 155 | -------------------------------------------------------------------------------- /external/plex/dist/Plex/__init__.py: -------------------------------------------------------------------------------- 1 | #======================================================================= 2 | # 3 | # Python Lexical Analyser 4 | # 5 | #======================================================================= 6 | 7 | """ 8 | The Plex module provides lexical analysers with similar capabilities 9 | to GNU Flex. The following classes and functions are exported; 10 | see the attached docstrings for more information. 11 | 12 | Scanner For scanning a character stream under the 13 | direction of a Lexicon. 14 | 15 | Lexicon For constructing a lexical definition 16 | to be used by a Scanner. 17 | 18 | Str, Any, AnyBut, AnyChar, Seq, Alt, Opt, Rep, Rep1, 19 | Bol, Eol, Eof, Empty 20 | 21 | Regular expression constructors, for building pattern 22 | definitions for a Lexicon. 23 | 24 | State For defining scanner states when creating a 25 | Lexicon. 26 | 27 | TEXT, IGNORE, Begin 28 | 29 | Actions for associating with patterns when 30 | creating a Lexicon. 31 | """ 32 | 33 | from Actions import TEXT, IGNORE, Begin 34 | from Lexicons import Lexicon, State 35 | from Regexps import RE, Seq, Alt, Rep1, Empty, Str, Any, AnyBut, AnyChar, Range 36 | from Regexps import Opt, Rep, Bol, Eol, Eof, Case, NoCase 37 | from Scanners import Scanner 38 | 39 | 40 | 41 | -------------------------------------------------------------------------------- /external/plex/dist/README: -------------------------------------------------------------------------------- 1 | This is version 1.1.5 of Plex, a Python module for building lexical 2 | analysers. See the doc directory for instructions on using it. 3 | 4 | Plex is free of any restrictions. You can use it, redistribute it, 5 | sell it, whatever you want. All I ask is that you give me credit if 6 | you distribute any code derived from it. 7 | 8 | 9 | Greg Ewing, 10 | Computer Science Department, 11 | University of Canterbury, 12 | Christchurch, 13 | New Zealand 14 | 15 | greg@cosc.canterbury.ac.nz 16 | 17 | Version History 18 | --------------- 19 | 20 | 1.1.5 Eliminated a syntax warning about assigning to None 21 | when using with Python 2.3. 22 | 23 | 1.1.4 Fixed bug causing argument of Rep or Rep1 to 24 | fail to match following a newline. 25 | 26 | 1.1.3 Fixed bug causing Eol to fail to match at the 27 | beginning of a line in some circumstances. 28 | 29 | 1.1.2 Changed Scanner.yield() to Scanner.produce() to 30 | accommodate Python 2.3, where yield is a keyword. 31 | 32 | Changed test10 to not rely so much on details of 33 | string repr. 34 | 35 | 1.1.1 Fixed two minor bugs: uncommented Scanner.next_char() and 36 | added import of types to Regexps.py. 37 | 38 | 1.1 Added support for case-insensitive matches. 39 | 40 | 1.0 First official release. 41 | -------------------------------------------------------------------------------- /external/plex/dist/TODO: -------------------------------------------------------------------------------- 1 | * Multiple state names in State constructor 2 | 3 | * Implement scanning in C 4 | 5 | * Case-insensitivity flag 6 | 7 | * Trailing contexts? 8 | 9 | * Make Action a callable object 10 | 11 | * Action sequences 12 | 13 | * Hook up to existing parser module 14 | 15 | -------------------------------------------------------------------------------- /external/plex/dist/doc/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | Plex 9 | 10 | 11 | 12 |

13 | 14 |
Plex - a Lexical Analysis Module for Python

15 | 16 |

17 | Version 1.1.2 18 |

19 | Plex is a Python module for constructing lexical analysers, or scanners. 20 | Plex scanners have almost all the capabilities of the scanners generated 21 | by GNU Flex, and are specified in a very similar way. Tokens are defined 22 | by regular expressions, and each token has an associated action, which 23 | may be to return a literal value, or to call an arbitrary function. 24 |

Plex is designed to fill a need that is left wanting by the existing 25 | Python regular expression modules. If you've ever tried to use one of them 26 | for implementing a scanner, you will have found that they're not really 27 | suited to the task. You can define a bunch of regular expressions which 28 | match your tokens all right, but you can only match one of them at a time 29 | against your input. To match all of them at once, you have to join them 30 | all together into one big r.e., but then you've got no easy way to tell 31 | which one matched. This is the problem that Plex is designed to solve. 32 |

Another advantage of Plex is that it compiles all of the regular expressions 33 | into a single DFA. Once that's done, the input can be processed in a time 34 | proportional to the number of characters to be scanned, and independent 35 | of the number or complexity of the regular expressions. Python's existing 36 | regular expression matchers do not have this property. 37 |

38 |


39 |

40 | Contents

41 | 42 |
43 |
  • 44 | Tutorial
  • 45 | 46 |
      47 |
  • 48 | Reference
  • 49 |
    50 | 51 |

    52 | 53 |

    54 | 55 |

    56 | License

    57 | Plex is free of any restrictions. You can use it, redistribute it, sell 58 | it, whatever you want.  All I ask is that you give me credit if you 59 | distribute any code derived from it. 60 |

    61 |


    62 |
    Greg Ewing, 63 |
    Computer Science Department, 64 |
    University of Canterbury, 65 |
    Christchurch, 66 |
    New Zealand 67 |
    68 | greg@cosc.canterbury.ac.nz
    69 | 70 |
    71 | http://www.cosc.canterbury.ac.nz/~greg
    72 | 73 | 74 | 75 | -------------------------------------------------------------------------------- /external/plex/dist/examples/example1and2.in: -------------------------------------------------------------------------------- 1 | Python rocks 2 | 3 | 4 | -------------------------------------------------------------------------------- /external/plex/dist/examples/example1and2.py: -------------------------------------------------------------------------------- 1 | # 2 | # Example 1 3 | # 4 | 5 | from Plex import * 6 | 7 | lexicon = Lexicon([ 8 | (Str("Python"), "my_favourite_language"), 9 | (Str("Perl"), "the_other_language"), 10 | (Str("rocks"), "is_excellent"), 11 | (Str("sucks"), "is_differently_good"), 12 | (Rep1(Any(" \t\n")), IGNORE) 13 | ]) 14 | 15 | # 16 | # Example 2 17 | # 18 | 19 | filename = "example1and2.in" 20 | f = open(filename, "r") 21 | scanner = Scanner(lexicon, f, filename) 22 | while 1: 23 | token = scanner.read() 24 | print token 25 | if token[0] is None: 26 | break 27 | 28 | 29 | -------------------------------------------------------------------------------- /external/plex/dist/examples/example3.in: -------------------------------------------------------------------------------- 1 | if x > y * 5 then 2 | b = c / d 3 | else 4 | Python = handy + useful 5 | end 6 | 7 | 8 | -------------------------------------------------------------------------------- /external/plex/dist/examples/example3.py: -------------------------------------------------------------------------------- 1 | # 2 | # Example 3 3 | # 4 | 5 | from Plex import * 6 | 7 | letter = Range("AZaz") 8 | digit = Range("09") 9 | name = letter + Rep(letter | digit) 10 | number = Rep1(digit) 11 | space = Any(" \t\n") 12 | comment = Str("{") + Rep(AnyBut("}")) + Str("}") 13 | 14 | resword = Str("if", "then", "else", "end") 15 | 16 | lexicon = Lexicon([ 17 | (name, 'ident'), 18 | (number, 'int'), 19 | (resword, TEXT), 20 | (Any("+-*/=<>"), TEXT), 21 | (space | comment, IGNORE) 22 | ]) 23 | 24 | filename = "example3.in" 25 | f = open(filename, "r") 26 | scanner = Scanner(lexicon, f, filename) 27 | while 1: 28 | token = scanner.read() 29 | print token 30 | if token[0] is None: 31 | break 32 | 33 | 34 | -------------------------------------------------------------------------------- /external/plex/dist/examples/example4.in: -------------------------------------------------------------------------------- 1 | alpha beta (*spam (*and*) eggs*) gamma 2 | 3 | 4 | -------------------------------------------------------------------------------- /external/plex/dist/examples/example4.py: -------------------------------------------------------------------------------- 1 | # 2 | # Example 4 3 | # 4 | 5 | from Plex import * 6 | 7 | def begin_comment(scanner, text): 8 | scanner.nesting_level = scanner.nesting_level + 1 9 | 10 | def end_comment(scanner, text): 11 | scanner.nesting_level = scanner.nesting_level - 1 12 | 13 | def maybe_a_name(scanner, text): 14 | if scanner.nesting_level == 0: 15 | return 'ident' 16 | 17 | letter = Range("AZaz") 18 | digit = Range("09") 19 | name = letter + Rep(letter | digit) 20 | space = Any(" \t\n") 21 | 22 | lexicon = Lexicon([ 23 | (Str("(*"), begin_comment), 24 | (Str("*)"), end_comment), 25 | (name, maybe_a_name), 26 | (space, IGNORE) 27 | ]) 28 | 29 | filename = "example4.in" 30 | f = open(filename, "r") 31 | scanner = Scanner(lexicon, f, filename) 32 | scanner.nesting_level = 0 33 | while 1: 34 | token = scanner.read() 35 | print token 36 | if token[0] is None: 37 | break 38 | 39 | 40 | -------------------------------------------------------------------------------- /external/plex/dist/examples/example5.in: -------------------------------------------------------------------------------- 1 | alpha beta (*spam and 42 eggs*) gamma 2 | 3 | 4 | -------------------------------------------------------------------------------- /external/plex/dist/examples/example5.py: -------------------------------------------------------------------------------- 1 | # 2 | # Example 5 3 | # 4 | 5 | from Plex import * 6 | 7 | letter = Range("AZaz") 8 | digit = Range("09") 9 | name = letter + Rep(letter | digit) 10 | number = Rep1(digit) 11 | space = Any(" \t\n") 12 | 13 | lexicon = Lexicon([ 14 | (name, 'ident'), 15 | (number, 'int'), 16 | (space, IGNORE), 17 | (Str("(*"), Begin('comment')), 18 | State('comment', [ 19 | (Str("*)"), Begin('')), 20 | (AnyChar, IGNORE) 21 | ]) 22 | ]) 23 | 24 | filename = "example5.in" 25 | f = open(filename, "r") 26 | scanner = Scanner(lexicon, f, filename) 27 | while 1: 28 | token = scanner.read() 29 | print token 30 | if token[0] is None: 31 | break 32 | 33 | 34 | -------------------------------------------------------------------------------- /external/plex/dist/examples/example6.in: -------------------------------------------------------------------------------- 1 | alpha beta (*spam and*) {42 eggs} gamma 2 | 3 | 4 | -------------------------------------------------------------------------------- /external/plex/dist/examples/example6.py: -------------------------------------------------------------------------------- 1 | # 2 | # Example 6 3 | # 4 | 5 | from Plex import * 6 | 7 | letter = Range("AZaz") 8 | digit = Range("09") 9 | name = letter + Rep(letter | digit) 10 | number = Rep1(digit) 11 | space = Any(" \t\n") 12 | 13 | lexicon = Lexicon([ 14 | (name, 'ident'), 15 | (number, 'int'), 16 | (space, IGNORE), 17 | (Str("(*"), Begin('comment1')), 18 | (Str("{"), Begin('comment2')), 19 | State('comment1', [ 20 | (Str("*)"), Begin('')), 21 | (AnyChar, IGNORE) 22 | ]), 23 | State('comment2', [ 24 | (Str("}"), Begin('')), 25 | (AnyChar, IGNORE) 26 | ]) 27 | ]) 28 | 29 | filename = "example6.in" 30 | f = open(filename, "r") 31 | scanner = Scanner(lexicon, f, filename) 32 | while 1: 33 | token = scanner.read() 34 | print token 35 | if token[0] is None: 36 | break 37 | 38 | 39 | -------------------------------------------------------------------------------- /external/plex/dist/examples/example7.in: -------------------------------------------------------------------------------- 1 | alpha beta (*spam and (*42*) eggs*) gamma 2 | 3 | 4 | -------------------------------------------------------------------------------- /external/plex/dist/examples/example7.py: -------------------------------------------------------------------------------- 1 | # 2 | # Example 7 3 | # 4 | 5 | from Plex import * 6 | 7 | letter = Range("AZaz") 8 | digit = Range("09") 9 | name = letter + Rep(letter | digit) 10 | number = Rep1(digit) 11 | space = Any(" \t\n") 12 | 13 | class MyScanner(Scanner): 14 | 15 | def begin_comment(self, text): 16 | if self.nesting_level == 0: 17 | self.begin('comment') 18 | self.nesting_level = self.nesting_level + 1 19 | 20 | def end_comment(self, text): 21 | self.nesting_level = self.nesting_level - 1 22 | if self.nesting_level == 0: 23 | self.begin('') 24 | 25 | lexicon = Lexicon([ 26 | (name, 'ident'), 27 | (number, 'int'), 28 | (space, IGNORE), 29 | (Str("(*"), begin_comment), 30 | State('comment', [ 31 | (Str("(*"), begin_comment), 32 | (Str("*)"), end_comment), 33 | (AnyChar, IGNORE) 34 | ]) 35 | ]) 36 | 37 | def __init__(self, file, name): 38 | Scanner.__init__(self, self.lexicon, file, name) 39 | self.nesting_level = 0 40 | 41 | filename = "example7.in" 42 | f = open(filename, "r") 43 | scanner = MyScanner(f, filename) 44 | while 1: 45 | token = scanner.read() 46 | print token 47 | if token[0] is None: 48 | break 49 | 50 | 51 | -------------------------------------------------------------------------------- /external/plex/dist/examples/pascal.in: -------------------------------------------------------------------------------- 1 | { Test input for the 2 | Plex Pascal scanner } 3 | 4 | program spam(input, output); 5 | var 6 | order: integer; 7 | begin 8 | write('How many spoons of spam would you like with your eggs, sir? '); 9 | readln(order); 10 | if order >= 1 then 11 | writeln('Certainly, sir.') 12 | else 13 | writeln('Sorry, sir, invalid order.') 14 | end 15 | end. 16 | 17 | -------------------------------------------------------------------------------- /external/plex/dist/examples/pascal.py: -------------------------------------------------------------------------------- 1 | # 2 | # Example - Apple Object Pascal scanner 3 | # 4 | 5 | from Plex import * 6 | 7 | def make_lexicon(): 8 | 9 | letter = Range("AZaz") | Any("_") 10 | digit = Range("09") 11 | space = Any(" \t\n") 12 | 13 | ident = letter + Rep(letter | digit) 14 | resword = NoCase(Str("program", "unit", "uses", "const", "type", "var", 15 | "if", "then", "else", "while", "do", "repeat", "until", 16 | "for", "to", "downto", "and", "or", "not", 17 | "array", "of", "record", "object")) 18 | number = Rep1(digit) 19 | string = Str("'") + (Rep(AnyBut("'")) | Str("''")) + Str("'") 20 | diphthong = Str(":=", "<=", ">=", "<>", "..") 21 | punct = Any("^&*()-+=[]|;:<>,./") 22 | spaces = Rep1(space) 23 | comment_begin = Str("{") 24 | comment_char = AnyBut("}") 25 | comment_end = Str("}") 26 | 27 | lexicon = Lexicon([ 28 | (resword, TEXT), 29 | (ident, 'ident'), 30 | (number, 'num'), 31 | (string, 'str'), 32 | (punct | diphthong, TEXT), 33 | (spaces, IGNORE), 34 | (comment_begin, Begin('comment')), 35 | State('comment', [ 36 | (comment_char, IGNORE), 37 | (comment_end, Begin('')) 38 | ]) 39 | ]) 40 | 41 | return lexicon 42 | 43 | if __name__ == "__main__": 44 | lexicon = make_lexicon() 45 | filename = "pascal.in" 46 | f = open(filename, "r") 47 | scanner = Scanner(lexicon, f, filename) 48 | while 1: 49 | token = scanner.read() 50 | print token 51 | if token[0] is None: 52 | break 53 | 54 | 55 | -------------------------------------------------------------------------------- /external/plex/dist/examples/python.in: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | # This, in case you didn't notice, is a comment. 4 | 5 | def gcd(x, y): 6 | while x <> y: 7 | if x > y: # This is another comment. 8 | print "x is bigger" 9 | x = x - y 10 | # This comment doesn't imply any indentation. 11 | else: 12 | 13 | print "y is bigger" 14 | y = y - x 15 | return x 16 | 17 | def go(): 18 | for x, y in [(12,20), (37,18), (2, 54)]: 19 | print gcd\ 20 | (x, y) 21 | 22 | 23 | -------------------------------------------------------------------------------- /external/plex/dist/examples/python.py: -------------------------------------------------------------------------------- 1 | # 2 | # Example - Python scanner 3 | # 4 | 5 | import exceptions 6 | from Plex import * 7 | 8 | class NaughtyNaughty(exceptions.Exception): 9 | pass 10 | 11 | class PythonScanner(Scanner): 12 | 13 | def open_bracket_action(self, text): 14 | self.bracket_nesting_level = self.bracket_nesting_level + 1 15 | return text 16 | 17 | def close_bracket_action(self, text): 18 | self.bracket_nesting_level = self.bracket_nesting_level - 1 19 | return text 20 | 21 | def current_level(self): 22 | return self.indentation_stack[-1] 23 | 24 | def newline_action(self, text): 25 | if self.bracket_nesting_level == 0: 26 | self.begin('indent') 27 | return 'newline' 28 | 29 | def indentation_action(self, text): 30 | # Check that tabs and spaces are being used consistently. 31 | if text: 32 | c = text[0] 33 | if self.indentation_char is None: 34 | self.indentation_char = c 35 | else: 36 | if self.indentation_char <> c: 37 | raise NaughtyNaughty("Mixed up tabs and spaces!") 38 | # Figure out how many indents/dedents to do 39 | current_level = self.current_level() 40 | new_level = len(text) 41 | if new_level > current_level: 42 | self.indent_to(new_level) 43 | elif new_level < current_level: 44 | self.dedent_to(new_level) 45 | # Change back to default state 46 | self.begin('') 47 | 48 | def indent_to(self, new_level): 49 | self.indentation_stack.append(new_level) 50 | self.produce('INDENT', '') 51 | 52 | def dedent_to(self, new_level): 53 | while new_level < self.current_level(): 54 | del self.indentation_stack[-1] 55 | self.produce('DEDENT', '') 56 | if new_level <> self.current_level(): 57 | raise NaughtyNaughty("Indentation booboo!") 58 | 59 | def eof(self): 60 | self.dedent_to(0) 61 | 62 | letter = Range("AZaz") | Any("_") 63 | digit = Range("09") 64 | hexdigit = Range("09AFaf") 65 | 66 | name = letter + Rep(letter | digit) 67 | number = Rep1(digit) | (Str("0x") + Rep1(hexdigit)) 68 | 69 | sq_string = ( 70 | Str("'") + 71 | Rep(AnyBut("\\\n'") | (Str("\\") + AnyChar)) + 72 | Str("'")) 73 | 74 | dq_string = ( 75 | Str('"') + 76 | Rep(AnyBut('\\\n"') | (Str("\\") + AnyChar)) + 77 | Str('"')) 78 | 79 | non_dq = AnyBut('"') | (Str('\\') + AnyChar) 80 | tq_string = ( 81 | Str('"""') + 82 | Rep( 83 | non_dq | 84 | (Str('"') + non_dq) | 85 | (Str('""') + non_dq)) + Str('"""')) 86 | 87 | stringlit = sq_string | dq_string | tq_string 88 | opening_bracket = Any("([{") 89 | closing_bracket = Any(")]}") 90 | punct1 = Any(":,;+-*/|&<>=.%`~^") 91 | punct2 = Str("==", "<>", "!=", "<=", "<<", ">>", "**") 92 | punctuation = punct1 | punct2 93 | 94 | spaces = Rep1(Any(" \t")) 95 | indentation = Rep(Str(" ")) | Rep(Str("\t")) 96 | lineterm = Str("\n") | Eof 97 | escaped_newline = Str("\\\n") 98 | comment = Str("#") + Rep(AnyBut("\n")) 99 | blank_line = indentation + Opt(comment) + lineterm 100 | 101 | lexicon = Lexicon([ 102 | (name, 'name'), 103 | (number, 'number'), 104 | (stringlit, 'string'), 105 | (punctuation, TEXT), 106 | (opening_bracket, open_bracket_action), 107 | (closing_bracket, close_bracket_action), 108 | (lineterm, newline_action), 109 | (comment, IGNORE), 110 | (spaces, IGNORE), 111 | (escaped_newline, IGNORE), 112 | State('indent', [ 113 | (blank_line, IGNORE), 114 | (indentation, indentation_action), 115 | ]), 116 | ]) 117 | 118 | def __init__(self, file): 119 | Scanner.__init__(self, self.lexicon, file) 120 | self.indentation_stack = [0] 121 | self.bracket_nesting_level = 0 122 | self.indentation_char = None 123 | self.begin('indent') 124 | 125 | f = open("python.in", "r") 126 | scanner = PythonScanner(f) 127 | level = 0 128 | while 1: 129 | token, text = scanner.read() 130 | if token is None: 131 | break 132 | if token == 'INDENT': 133 | level = level + 1 134 | elif token == 'DEDENT': 135 | level = level - 1 136 | indent = ' ' * (level * 4) 137 | if not text or token == text: 138 | value = token 139 | else: 140 | value = "%s(%s)" % (token, repr(text)) 141 | print indent + value 142 | 143 | 144 | -------------------------------------------------------------------------------- /external/plex/dist/examples/speedtest.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | import Plex 5 | import pascal 6 | 7 | if sys.platform == 'mac': 8 | import MacOS 9 | def time(): 10 | return MacOS.GetTicks() / 60.0 11 | timekind = "real" 12 | else: 13 | def time(): 14 | t = os.times() 15 | return t[0] + t[1] 16 | timekind = "cpu" 17 | 18 | time1 = time() 19 | lexicon = pascal.make_lexicon() 20 | time2 = time() 21 | print "Constructing scanner took %s %s seconds" % (time2 - time1, timekind) 22 | 23 | f = open("speedtest.in", "r") 24 | scanner = Plex.Scanner(lexicon, f) 25 | time1 = time() 26 | while 1: 27 | value, text = scanner.read() 28 | if value is None: 29 | break 30 | time2 = time() 31 | _, lines, _ = scanner.position() 32 | time = time2 - time1 33 | lps = float(lines) / float(time) 34 | print "Scanning %d lines took %s %s seconds (%s lines/sec)" % ( 35 | lines, time, timekind, lps) 36 | 37 | 38 | 39 | -------------------------------------------------------------------------------- /external/plex/dist/tests/Test.py: -------------------------------------------------------------------------------- 1 | # 2 | # Run a Plex test 3 | # 4 | 5 | import sys 6 | 7 | # Mac slow console stderr hack 8 | if sys.platform == 'mac': 9 | if sys.stderr is sys.__stderr__: 10 | sys.stderr = sys.__stdout__ 11 | 12 | import Plex 13 | 14 | force_debug = 0 15 | 16 | if force_debug or sys.argv[1:2] == ["-d"]: 17 | debug = sys.stderr 18 | else: 19 | debug = None 20 | 21 | def run(lexicon, test_name, 22 | debug = 0, trace = 0, scanner_class = Plex.Scanner): 23 | if debug: 24 | debug_file = sys.stdout 25 | lexicon.machine.dump(debug_file) 26 | print "=" * 70 27 | else: 28 | debug_file = None 29 | in_name = test_name + ".in" 30 | f = open(in_name, "rU") 31 | s = scanner_class(lexicon, f, in_name) 32 | if trace: 33 | s.trace = 1 34 | while 1: 35 | value, text = s.read() 36 | name, line, pos = s.position() 37 | print "%s %3d %3d %-10s %s" % (name, line, pos, value, repr(text)) 38 | if value is None: 39 | break 40 | 41 | 42 | 43 | 44 | -------------------------------------------------------------------------------- /external/plex/dist/tests/runtests.py: -------------------------------------------------------------------------------- 1 | from glob import glob 2 | import os 3 | import sys 4 | import traceback 5 | 6 | def run_test(test_name, out_name, err_name): 7 | out_file = open(out_name, "w") 8 | err_file = open(err_name, "w") 9 | sys.stdout = out_file 10 | sys.stderr = err_file 11 | result = 1 12 | try: 13 | try: 14 | __import__(test_name) 15 | except KeyboardInterrupt: 16 | raise 17 | except SystemExit, e: 18 | sys.stderr.write("Exit code %s\n" % e) 19 | result = 0 20 | except: 21 | traceback.print_exc() 22 | result = 0 23 | finally: 24 | sys.stdout = sys.__stdout__ 25 | sys.stderr = sys.__stderr__ 26 | out_file.close() 27 | err_file.close() 28 | return result 29 | 30 | def check_result(out_name, out2_name): 31 | return read_file(out_name) == read_file(out2_name) 32 | 33 | def read_file(name): 34 | f = open(name, "rU") 35 | data = f.read() 36 | f.close() 37 | return data 38 | 39 | def remove(name): 40 | try: 41 | os.unlink(name) 42 | except: 43 | pass 44 | 45 | def run(): 46 | if len(sys.argv) > 1: 47 | tests = sys.argv[1:] 48 | else: 49 | tests = glob("test?*.py") 50 | for test_py in tests: 51 | test_name = os.path.splitext(test_py)[0] 52 | test_out = test_name + ".out" 53 | test_out2 = test_name + ".out2" 54 | test_err = test_name + ".err" 55 | if os.path.exists(test_out): 56 | print "%s:" % test_name, 57 | sys.stdout.flush() 58 | succeeded = run_test(test_name, test_out2, test_err) 59 | if succeeded: 60 | succeeded = check_result(test_out, test_out2) 61 | if succeeded: 62 | print "passed" 63 | else: 64 | print "failed *****" 65 | else: 66 | print "error *****" 67 | else: 68 | print "creating %s:" % test_out, 69 | sys.stdout.flush() 70 | succeeded = run_test(test_name, test_out, test_err) 71 | if succeeded: 72 | print "succeeded" 73 | else: 74 | print "error *****" 75 | if succeeded: 76 | remove(test_out2) 77 | remove(test_err) 78 | 79 | if __name__ == "__main__": 80 | run() 81 | 82 | 83 | 84 | 85 | -------------------------------------------------------------------------------- /external/plex/dist/tests/test0.in: -------------------------------------------------------------------------------- 1 | 2 | aaa 3 | 4 | 5 | -------------------------------------------------------------------------------- /external/plex/dist/tests/test0.out: -------------------------------------------------------------------------------- 1 | test0.in 2 0 thing 'a' 2 | test0.in 2 1 thing 'a' 3 | test0.in 2 2 thing 'a' 4 | test0.in 5 0 None '' 5 | -------------------------------------------------------------------------------- /external/plex/dist/tests/test0.py: -------------------------------------------------------------------------------- 1 | import Test 2 | from Plex import * 3 | import sys 4 | 5 | lex = Lexicon([ 6 | (Str("a"), 'thing'), 7 | (Any("\n"), IGNORE) 8 | ], 9 | debug = Test.debug, 10 | timings = sys.stderr 11 | ) 12 | 13 | Test.run(lex, "test0", debug = 0, trace = 0) 14 | 15 | 16 | -------------------------------------------------------------------------------- /external/plex/dist/tests/test1.in: -------------------------------------------------------------------------------- 1 | a b aa ab ba a0 a1 b0 b1 ab01 2 | babba01 abba babb b0001a 3 | 4 | -------------------------------------------------------------------------------- /external/plex/dist/tests/test1.out: -------------------------------------------------------------------------------- 1 | test1.in 1 0 ident 'a' 2 | test1.in 1 2 ident 'b' 3 | test1.in 1 4 ident 'aa' 4 | test1.in 1 7 ident 'ab' 5 | test1.in 1 10 ident 'ba' 6 | test1.in 1 13 ident 'a0' 7 | test1.in 1 16 ident 'a1' 8 | test1.in 1 19 ident 'b0' 9 | test1.in 1 22 ident 'b1' 10 | test1.in 1 25 ident 'ab01' 11 | test1.in 2 0 ident 'babba01' 12 | test1.in 2 8 ident 'abba' 13 | test1.in 2 13 ident 'babb' 14 | test1.in 2 18 ident 'b0001a' 15 | test1.in 4 0 None '' 16 | -------------------------------------------------------------------------------- /external/plex/dist/tests/test1.py: -------------------------------------------------------------------------------- 1 | import Test 2 | from Plex import * 3 | import sys 4 | 5 | lex = Lexicon([ 6 | (Any("ab") + Rep(Any("ab01")), 'ident'), 7 | (Any(" \n"), IGNORE) 8 | ], 9 | debug = Test.debug, 10 | timings = sys.stderr 11 | ) 12 | 13 | Test.run(lex, "test1", debug = 0, trace = 0) 14 | 15 | 16 | -------------------------------------------------------------------------------- /external/plex/dist/tests/test10.out: -------------------------------------------------------------------------------- 1 | Seq() 2 | Seq(Char('a')) 3 | Seq(Any('a')) 4 | Seq(Any('ab')) 5 | Seq(Any('abc')) 6 | Seq(Any('abc')) 7 | Seq(Any('abcd')) 8 | Seq(Any('abcghi')) 9 | Seq(AnyBut('a')) 10 | Seq(AnyBut('abcghi')) 11 | Seq(Any('-')) 12 | Seq(Any('-abc')) 13 | Seq(Any('abc-')) 14 | Seq(Any(']')) 15 | Seq(Any(']-')) 16 | Seq(AnyBut('-')) 17 | Seq(AnyBut('-abc')) 18 | Seq(AnyBut('abc-')) 19 | Seq(AnyBut(']')) 20 | Seq(AnyBut(']-')) 21 | Seq(Rep(Char('a'))) 22 | Seq(Rep1(Char('a'))) 23 | Seq(Opt(Char('a'))) 24 | Seq(Opt(Rep1(Rep(Char('a'))))) 25 | Seq(Char('a'),Char('b')) 26 | Alt(Seq(Char('a')),Seq(Char('b'))) 27 | Seq(Char('a'),Char('b'),Char('c'),Char('d'),Char('e')) 28 | Alt(Seq(Char('a')),Seq(Char('b')),Seq(Char('c')),Seq(Char('d')),Seq(Char('e'))) 29 | Alt(Seq(Char('a'),Char('b'),Char('c')),Seq(Char('d'),Char('e'),Char('f')),Seq(Char('g'),Char('h'),Char('i'))) 30 | Seq(Char('a'),Char('b'),Char('c'),Alt(Seq(Char('d'),Char('e'),Char('f')),Seq(Char('g'),Char('h'),Char('i')))) 31 | Seq(Char('a'),Char('b'),Char('('),Char('c'),Char('['),Char('d'),Char('e')) 32 | Seq(Bol,Char('a'),Char('b'),Char('c'),Eol) 33 | True 34 | Syntax error in regexp 'abc(de' at position 6: Missing ')' 35 | Syntax error in regexp 'abc[de' at position 6: Missing ']' 36 | Syntax error in regexp 'abc)de' at position 3: Unexpected ')' 37 | -------------------------------------------------------------------------------- /external/plex/dist/tests/test10.py: -------------------------------------------------------------------------------- 1 | # Test traditional regular expression syntax. 2 | 3 | import Test 4 | 5 | from Plex.Traditional import re 6 | from Plex.Errors import PlexError 7 | from Plex import Seq, AnyBut 8 | 9 | def test_err(s): 10 | try: 11 | print re(s) 12 | except PlexError, e: 13 | print e 14 | 15 | print re("") 16 | print re("a") 17 | print re("[a]") 18 | print re("[ab]") 19 | print re("[abc]") 20 | print re("[a-c]") 21 | print re("[a-cd]") 22 | print re("[a-cg-i]") 23 | print re("[^a]") 24 | print re("[^a-cg-i]") 25 | print re("[-]") 26 | print re("[-abc]") 27 | print re("[abc-]") 28 | print re("[]]") 29 | print re("[]-]") 30 | print re("[^-]") 31 | print re("[^-abc]") 32 | print re("[^abc-]") 33 | print re("[^]]") 34 | print re("[^]-]") 35 | print re("a*") 36 | print re("a+") 37 | print re("a?") 38 | print re("a*+?") 39 | print re("ab") 40 | print re("a|b") 41 | print re("abcde") 42 | print re("a|b|c|d|e") 43 | print re("abc|def|ghi") 44 | print re("abc(def|ghi)") 45 | print re("ab\(c\[de") 46 | print re("^abc$") 47 | print str(re(".")) == str(Seq(AnyBut('\n'))) 48 | test_err("abc(de") 49 | test_err("abc[de") 50 | test_err("abc)de") 51 | 52 | 53 | -------------------------------------------------------------------------------- /external/plex/dist/tests/test11.in: -------------------------------------------------------------------------------- 1 | Python python 2 | COBOL cobol CoBol COboL 3 | perl Perl pERl 4 | Serbo-Croatian serbo-croatian 5 | REALbasic realbasic REalbasic 6 | REALBasic realBasic REalBasic 7 | -------------------------------------------------------------------------------- /external/plex/dist/tests/test11.out: -------------------------------------------------------------------------------- 1 | test11.in 1 0 upper-python 'Python' 2 | test11.in 1 7 lower-python 'python' 3 | test11.in 2 0 other-language 'COBOL' 4 | test11.in 2 6 other-language 'cobol' 5 | test11.in 2 12 other-language 'CoBol' 6 | test11.in 2 18 other-language 'COboL' 7 | test11.in 3 0 other-language 'perl' 8 | test11.in 3 5 other-language 'Perl' 9 | test11.in 3 10 other-language 'pERl' 10 | test11.in 4 0 other-language 'Serbo-Croatian' 11 | test11.in 4 15 other-language 'serbo-croatian' 12 | test11.in 5 0 real-1 'REALbasic' 13 | test11.in 5 10 real-1 'realbasic' 14 | test11.in 5 20 real-1 'REalbasic' 15 | test11.in 6 0 real-2 'REALBasic' 16 | test11.in 6 10 real-2 'realBasic' 17 | test11.in 6 20 real-2 'REalBasic' 18 | test11.in 7 0 None '' 19 | -------------------------------------------------------------------------------- /external/plex/dist/tests/test11.py: -------------------------------------------------------------------------------- 1 | import Test 2 | from Plex import * 3 | import sys 4 | 5 | lex = Lexicon([ 6 | (Str("Python"), 'upper-python'), 7 | (Str("python"), 'lower-python'), 8 | (NoCase(Str("COBOL", "perl", "Serbo-Croatian")), 'other-language'), 9 | (NoCase(Str("real") + Case(Str("basic"))), 'real-1'), 10 | (NoCase(Str("real") + Case(Str("Basic"))), 'real-2'), 11 | (Any(" \t\n"), IGNORE) 12 | ], 13 | debug = Test.debug, 14 | timings = sys.stderr 15 | ) 16 | 17 | Test.run(lex, "test11", debug = 0, trace = 0) 18 | -------------------------------------------------------------------------------- /external/plex/dist/tests/test12.in: -------------------------------------------------------------------------------- 1 | 'Single-line text literal''Multi- 2 | line text literal' -------------------------------------------------------------------------------- /external/plex/dist/tests/test12.out: -------------------------------------------------------------------------------- 1 | test12.in 1 0 'Single-line text literal' "'Single-line text literal'" 2 | test12.in 1 26 'Multi- 3 | line text literal' "'Multi-\nline text literal'" 4 | test12.in 2 18 None '' 5 | -------------------------------------------------------------------------------- /external/plex/dist/tests/test12.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from cStringIO import StringIO 3 | 4 | import Test 5 | from Plex import * 6 | 7 | lex = Lexicon([ 8 | (Str("'") + Rep(AnyBut("'")) + Str("'"), TEXT) 9 | ], 10 | debug = Test.debug, 11 | timings = sys.stderr 12 | ) 13 | 14 | Test.run(lex, "test12", debug = 0, trace = 0) 15 | -------------------------------------------------------------------------------- /external/plex/dist/tests/test2.in: -------------------------------------------------------------------------------- 1 | 2 | a b ab abba a0 !xyzzy! 3 | b1 abab0110bba1 #burble# 4 | 0 1 00 01 (fee) [fie] [foe] "fum" 5 | 101010 0001010101 6 | 7 | 8 | -------------------------------------------------------------------------------- /external/plex/dist/tests/test2.out: -------------------------------------------------------------------------------- 1 | test2.in 2 0 ident 'a' 2 | test2.in 2 2 ident 'b' 3 | test2.in 2 4 ident 'ab' 4 | test2.in 2 7 ident 'abba' 5 | test2.in 2 12 ident 'a0' 6 | test2.in 3 0 ident 'b1' 7 | test2.in 3 3 ident 'abab0110bba1' 8 | test2.in 4 0 num '0' 9 | test2.in 4 2 num '1' 10 | test2.in 4 4 num '00' 11 | test2.in 4 7 num '01' 12 | test2.in 5 0 num '101010' 13 | test2.in 5 7 num '0001010101' 14 | test2.in 8 0 None '' 15 | -------------------------------------------------------------------------------- /external/plex/dist/tests/test2.py: -------------------------------------------------------------------------------- 1 | import Test 2 | from Plex import * 3 | import sys 4 | 5 | lex = Lexicon([ 6 | (Seq(Any("ab"), Rep(Any("ab01"))), 'ident'), 7 | (Seq(Any("01"), Rep(Any("01"))), 'num'), 8 | (Any(' \n'), IGNORE), 9 | (Str("abba"), 'abba'), 10 | (Any('([{!"#') + Rep(AnyBut('!"#}])')) + Any('!"#}])'), IGNORE) 11 | ], 12 | debug = Test.debug, 13 | timings = sys.stderr 14 | ) 15 | 16 | Test.run(lex, "test2", debug = 0, trace = 0) 17 | 18 | 19 | -------------------------------------------------------------------------------- /external/plex/dist/tests/test3.in: -------------------------------------------------------------------------------- 1 | program furbie(input, output, throughput); 2 | begin 3 | repeat 4 | make(cute_noises); 5 | have_flat_battery; 6 | until owner_is(fed_up); 7 | end. 8 | 9 | 10 | -------------------------------------------------------------------------------- /external/plex/dist/tests/test3.out: -------------------------------------------------------------------------------- 1 | test3.in 1 0 program 'program' 2 | test3.in 1 8 ident 'furbie' 3 | test3.in 1 14 ( '(' 4 | test3.in 1 15 ident 'input' 5 | test3.in 1 20 , ',' 6 | test3.in 1 22 ident 'output' 7 | test3.in 1 28 , ',' 8 | test3.in 1 30 ident 'throughput' 9 | test3.in 1 40 ) ')' 10 | test3.in 1 41 ; ';' 11 | test3.in 2 0 begin 'begin' 12 | test3.in 3 3 repeat 'repeat' 13 | test3.in 4 5 ident 'make' 14 | test3.in 4 9 ( '(' 15 | test3.in 4 10 ident 'cute_noises' 16 | test3.in 4 21 ) ')' 17 | test3.in 4 22 ; ';' 18 | test3.in 5 5 ident 'have_flat_battery' 19 | test3.in 5 22 ; ';' 20 | test3.in 6 3 until 'until' 21 | test3.in 6 9 ident 'owner_is' 22 | test3.in 6 17 ( '(' 23 | test3.in 6 18 ident 'fed_up' 24 | test3.in 6 24 ) ')' 25 | test3.in 6 25 ; ';' 26 | test3.in 7 0 end 'end' 27 | test3.in 7 3 . '.' 28 | test3.in 10 0 None '' 29 | -------------------------------------------------------------------------------- /external/plex/dist/tests/test3.py: -------------------------------------------------------------------------------- 1 | import Test 2 | import sys 3 | from Plex import * 4 | 5 | letter = Range("AZaz") | Any("_") 6 | digit = Range("09") 7 | space = Any(" \t\n") 8 | 9 | ident = Seq(letter, Rep(Alt(letter, digit))) 10 | number = Seq(digit, Rep(digit)) 11 | punct = Any("*()-+=[]{};:<>,./") 12 | spaces = Seq(space, Rep(space)) 13 | resword = Str("program", "begin", "end", "repeat", "until") 14 | 15 | lex = Lexicon([ 16 | (resword, TEXT), 17 | (ident, 'ident'), 18 | (number, 'num'), 19 | (punct, TEXT), 20 | (spaces, IGNORE) 21 | ], 22 | debug = Test.debug, 23 | timings = sys.stderr 24 | ) 25 | 26 | Test.run(lex, "test3", trace = 0) 27 | 28 | 29 | -------------------------------------------------------------------------------- /external/plex/dist/tests/test4.in: -------------------------------------------------------------------------------- 1 | this (should ignore (anything between (matching) pairs) of) brackets 2 | 3 | 4 | -------------------------------------------------------------------------------- /external/plex/dist/tests/test4.out: -------------------------------------------------------------------------------- 1 | test4.in 1 0 letter 't' 2 | test4.in 1 1 letter 'h' 3 | test4.in 1 2 letter 'i' 4 | test4.in 1 3 letter 's' 5 | test4.in 1 60 letter 'b' 6 | test4.in 1 61 letter 'r' 7 | test4.in 1 62 letter 'a' 8 | test4.in 1 63 letter 'c' 9 | test4.in 1 64 letter 'k' 10 | test4.in 1 65 letter 'e' 11 | test4.in 1 66 letter 't' 12 | test4.in 1 67 letter 's' 13 | test4.in 4 0 None '' 14 | -------------------------------------------------------------------------------- /external/plex/dist/tests/test4.py: -------------------------------------------------------------------------------- 1 | import Test 2 | import sys 3 | from Plex import * 4 | 5 | letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_" 6 | 7 | wax = Any("(") 8 | wane = Any(")") 9 | letter = Any(letters) 10 | space = Any(" \t\n") 11 | 12 | def open_paren(s, t): 13 | s.counter = s.counter + 1 14 | 15 | def close_paren(s, t): 16 | s.counter = s.counter - 1 17 | 18 | def got_a_letter(s, t): 19 | if s.counter == 0: 20 | return 'letter' 21 | else: 22 | return None 23 | 24 | lex = Lexicon([ 25 | (wax, open_paren), 26 | (wane, close_paren), 27 | (letter, got_a_letter), 28 | (space, IGNORE) 29 | ], 30 | debug = Test.debug, 31 | timings = sys.stderr 32 | ) 33 | 34 | class MyScanner(Scanner): 35 | counter = 0 36 | trace = 0 37 | 38 | Test.run(lex, "test4", scanner_class = MyScanner, trace = 0) 39 | 40 | 41 | -------------------------------------------------------------------------------- /external/plex/dist/tests/test5.in: -------------------------------------------------------------------------------- 1 | a b {this is a comment} c abc 2 | 3 | 4 | -------------------------------------------------------------------------------- /external/plex/dist/tests/test5.out: -------------------------------------------------------------------------------- 1 | test5.in 1 0 ident 'a' 2 | test5.in 1 2 ident 'b' 3 | test5.in 1 24 ident 'c' 4 | test5.in 1 26 ident 'abc' 5 | test5.in 4 0 None '' 6 | -------------------------------------------------------------------------------- /external/plex/dist/tests/test5.py: -------------------------------------------------------------------------------- 1 | import Test 2 | import sys 3 | from Plex import * 4 | 5 | letters = "abc" 6 | spaces = " \t\n" 7 | all = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz*/{} \t\n" 8 | 9 | letter = Any(letters) 10 | space = Any(" \t\n") 11 | 12 | ident = Rep1(letter) 13 | spaces = Rep1(space) 14 | begin_comment = Str("{") 15 | end_comment = Str("}") 16 | 17 | lex = Lexicon([ 18 | (ident, 'ident'), 19 | (spaces, IGNORE), 20 | (begin_comment, Begin('comment')), 21 | State('comment', [ 22 | (end_comment, Begin('')), 23 | (AnyBut("}"), IGNORE), 24 | ]) 25 | ], 26 | debug = Test.debug, 27 | timings = sys.stderr 28 | ) 29 | 30 | Test.run(lex, "test5") 31 | 32 | 33 | -------------------------------------------------------------------------------- /external/plex/dist/tests/test6.in: -------------------------------------------------------------------------------- 1 | { Test input for the 2 | Plex Pascal scanner } 3 | 4 | program spam(input, output); 5 | var 6 | order: integer; 7 | begin 8 | write('How many slices of spam would you like with your eggs, sir? '); 9 | readln(order); 10 | if order >= 1 then 11 | writeln('Certainly, sir.') 12 | else 13 | writeln('Sorry, sir, invalid order.') 14 | end 15 | end. 16 | 17 | -------------------------------------------------------------------------------- /external/plex/dist/tests/test6.out: -------------------------------------------------------------------------------- 1 | test6.in 4 0 program 'program' 2 | test6.in 4 8 ident 'spam' 3 | test6.in 4 12 ( '(' 4 | test6.in 4 13 ident 'input' 5 | test6.in 4 18 , ',' 6 | test6.in 4 20 ident 'output' 7 | test6.in 4 26 ) ')' 8 | test6.in 4 27 ; ';' 9 | test6.in 5 0 var 'var' 10 | test6.in 6 2 ident 'order' 11 | test6.in 6 7 : ':' 12 | test6.in 6 9 ident 'integer' 13 | test6.in 6 16 ; ';' 14 | test6.in 7 0 ident 'begin' 15 | test6.in 8 2 ident 'write' 16 | test6.in 8 7 ( '(' 17 | test6.in 8 8 str "'How many slices of spam would you like with your eggs, sir? '" 18 | test6.in 8 70 ) ')' 19 | test6.in 8 71 ; ';' 20 | test6.in 9 2 ident 'readln' 21 | test6.in 9 8 ( '(' 22 | test6.in 9 9 ident 'order' 23 | test6.in 9 14 ) ')' 24 | test6.in 9 15 ; ';' 25 | test6.in 10 2 if 'if' 26 | test6.in 10 5 ident 'order' 27 | test6.in 10 11 >= '>=' 28 | test6.in 10 14 num '1' 29 | test6.in 10 16 then 'then' 30 | test6.in 11 4 ident 'writeln' 31 | test6.in 11 11 ( '(' 32 | test6.in 11 12 str "'Certainly, sir.'" 33 | test6.in 11 29 ) ')' 34 | test6.in 12 2 else 'else' 35 | test6.in 13 4 ident 'writeln' 36 | test6.in 13 11 ( '(' 37 | test6.in 13 12 str "'Sorry, sir, invalid order.'" 38 | test6.in 13 40 ) ')' 39 | test6.in 14 2 ident 'end' 40 | test6.in 15 0 ident 'end' 41 | test6.in 15 3 . '.' 42 | test6.in 17 0 None '' 43 | -------------------------------------------------------------------------------- /external/plex/dist/tests/test6.py: -------------------------------------------------------------------------------- 1 | import Test 2 | 3 | import os 4 | import sys 5 | import Plex 6 | from Plex import * 7 | 8 | letter = Range("AZaz") | Any("_") 9 | digit = Range("09") 10 | space = Any(" \t\n") 11 | 12 | ident = letter + Rep(letter | digit) 13 | resword = Str("program", "unit", "uses", "const", "type", "var", 14 | "if", "then", "else", "while", "do", "repeat", "until", 15 | "for", "to", "downto", "and", "or", "not", 16 | "array", "of", "record", "object") 17 | number = Rep1(digit) 18 | string = Str("'") + (Rep(AnyBut("'")) | Str("''")) + Str("'") 19 | diphthong = Str(":=", "<=", ">=", "<>", "..") 20 | punct = Any("^&*()-+=[]|;:<>,./") 21 | spaces = Rep1(space) 22 | comment_begin = Str("{") 23 | comment_char = AnyBut("}") 24 | comment_end = Str("}") 25 | 26 | lex = Lexicon([ 27 | (resword, TEXT), 28 | (ident, 'ident'), 29 | (number, 'num'), 30 | (string, 'str'), 31 | (punct | diphthong, TEXT), 32 | (spaces, IGNORE), 33 | (comment_begin, Begin('comment')), 34 | State('comment', [ 35 | (comment_char, IGNORE), 36 | (comment_end, Begin('')) 37 | ]) 38 | ], 39 | debug = Test.debug, 40 | timings = sys.stderr 41 | ) 42 | 43 | Test.run(lex, "test6", debug = 0, trace = 0) 44 | 45 | 46 | -------------------------------------------------------------------------------- /external/plex/dist/tests/test7.in: -------------------------------------------------------------------------------- 1 | aa bbb bb bbbbb b ccc 2 | a bb bbb b cc 3 | bb bbb bbbbb bb bbb 4 | ddddddddddddddd 5 | aaa bbb bb cccc 6 | 7 | 8 | -------------------------------------------------------------------------------- /external/plex/dist/tests/test7.out: -------------------------------------------------------------------------------- 1 | test7.in 1 0 begin 'aa' 2 | test7.in 1 3 middle 'bbb' 3 | test7.in 1 7 middle 'bb' 4 | test7.in 1 10 middle 'bbbbb' 5 | test7.in 1 16 middle 'b' 6 | test7.in 1 18 end 'ccc' 7 | test7.in 2 0 begin 'a' 8 | test7.in 2 2 middle 'bb' 9 | test7.in 2 5 middle 'bbb' 10 | test7.in 2 9 middle 'b' 11 | test7.in 2 11 end 'cc' 12 | test7.in 3 0 middle 'bb' 13 | test7.in 3 3 middle 'bbb' 14 | test7.in 3 7 middle 'bbbbb' 15 | test7.in 3 14 middle 'bb' 16 | test7.in 3 17 middle 'bbb' 17 | test7.in 4 0 everything 'ddddddddddddddd' 18 | test7.in 5 0 begin 'aaa' 19 | test7.in 5 4 middle 'bbb' 20 | test7.in 5 8 middle 'bb' 21 | test7.in 5 11 end 'cccc' 22 | test7.in 8 0 None '' 23 | -------------------------------------------------------------------------------- /external/plex/dist/tests/test7.py: -------------------------------------------------------------------------------- 1 | import Test 2 | import sys 3 | from Plex import * 4 | 5 | spaces = Rep1(Any(" \t\n")) 6 | 7 | lex = Lexicon([ 8 | (Bol + Rep1(Str("a")), 'begin'), 9 | ( Rep1(Str("b")), 'middle'), 10 | ( Rep1(Str("c")) + Eol, 'end'), 11 | (Bol + Rep1(Str("d")) + Eol, 'everything'), 12 | (spaces, IGNORE) 13 | ], 14 | debug = Test.debug, 15 | timings = sys.stderr 16 | ) 17 | 18 | Test.run(lex, "test7", trace = 0) 19 | 20 | 21 | -------------------------------------------------------------------------------- /external/plex/dist/tests/test8.in: -------------------------------------------------------------------------------- 1 | ftang ftang ftangftang ftangfta ftang 2 | 3 | 4 | -------------------------------------------------------------------------------- /external/plex/dist/tests/test8.out: -------------------------------------------------------------------------------- 1 | test8.in 1 0 one_ftang 'ftang' 2 | test8.in 1 6 one_ftang 'ftang' 3 | test8.in 1 12 two_ftangs 'ftangftang' 4 | test8.in 1 23 one_ftang 'ftang' 5 | test8.in 1 28 one_fta 'fta' 6 | test8.in 1 32 one_ftang 'ftang' 7 | test8.in 4 0 None '' 8 | -------------------------------------------------------------------------------- /external/plex/dist/tests/test8.py: -------------------------------------------------------------------------------- 1 | # 2 | # This one tests the backing-up mechanism. 3 | # 4 | 5 | import Test 6 | import sys 7 | from Plex import * 8 | 9 | spaces = Rep1(Any(" \t\n")) 10 | 11 | lex = Lexicon([ 12 | (Str("ftangftang"), 'two_ftangs'), 13 | (Str("ftang"), 'one_ftang'), 14 | (Str("fta"), 'one_fta'), 15 | (spaces, IGNORE) 16 | ], 17 | debug = Test.debug, 18 | timings = sys.stderr 19 | ) 20 | 21 | Test.run(lex, "test8", trace = 0) 22 | 23 | 24 | -------------------------------------------------------------------------------- /external/plex/dist/tests/test9.in: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | # This, in case you didn't notice, is a comment. 4 | 5 | def gcd(x, y): 6 | while x <> y: 7 | if x > y: # This is another comment. 8 | print "x is bigger" 9 | x = x - y 10 | # This comment doesn't imply any indentation. 11 | else: 12 | 13 | print "y is bigger" 14 | y = y - x 15 | return x 16 | 17 | def go(): 18 | for x, y in [(12,20), (37,18), (2, 54)]: 19 | print gcd\ 20 | (x, y) 21 | 22 | 23 | -------------------------------------------------------------------------------- /external/plex/dist/tests/test9.out: -------------------------------------------------------------------------------- 1 | name('import') 2 | name('sys') 3 | 'newline' 4 | name('def') 5 | name('gcd') 6 | '(' 7 | name('x') 8 | ',' 9 | name('y') 10 | ')' 11 | ':' 12 | 'newline' 13 | 'INDENT' 14 | name('while') 15 | name('x') 16 | '<>' 17 | name('y') 18 | ':' 19 | 'newline' 20 | 'INDENT' 21 | name('if') 22 | name('x') 23 | '>' 24 | name('y') 25 | ':' 26 | 'newline' 27 | 'INDENT' 28 | name('print') 29 | string('"x is bigger"') 30 | 'newline' 31 | name('x') 32 | '=' 33 | name('x') 34 | '-' 35 | name('y') 36 | 'newline' 37 | 'DEDENT' 38 | name('else') 39 | ':' 40 | 'newline' 41 | 'INDENT' 42 | name('print') 43 | string('"y is bigger"') 44 | 'newline' 45 | name('y') 46 | '=' 47 | name('y') 48 | '-' 49 | name('x') 50 | 'newline' 51 | 'DEDENT' 52 | 'DEDENT' 53 | name('return') 54 | name('x') 55 | 'newline' 56 | 'DEDENT' 57 | name('def') 58 | name('go') 59 | '(' 60 | ')' 61 | ':' 62 | 'newline' 63 | 'INDENT' 64 | name('for') 65 | name('x') 66 | ',' 67 | name('y') 68 | name('in') 69 | '[' 70 | '(' 71 | number('12') 72 | ',' 73 | number('20') 74 | ')' 75 | ',' 76 | '(' 77 | number('37') 78 | ',' 79 | number('18') 80 | ')' 81 | ',' 82 | '(' 83 | number('2') 84 | ',' 85 | number('54') 86 | ')' 87 | ']' 88 | ':' 89 | 'newline' 90 | 'INDENT' 91 | name('print') 92 | name('gcd') 93 | '(' 94 | name('x') 95 | ',' 96 | name('y') 97 | ')' 98 | 'newline' 99 | 'DEDENT' 100 | 'DEDENT' 101 | None 102 | -------------------------------------------------------------------------------- /external/plex/dist/tests/test9.py: -------------------------------------------------------------------------------- 1 | import Test 2 | 3 | import exceptions 4 | import sys 5 | from Plex import * 6 | 7 | if 1: 8 | debug = sys.stdout 9 | else: 10 | debug = None 11 | 12 | ######################################################################### 13 | 14 | class NaughtyNaughty(exceptions.Exception): 15 | pass 16 | 17 | class MyScanner(Scanner): 18 | bracket_nesting_level = 0 19 | indentation_stack = None 20 | indentation_char = None 21 | 22 | def current_level(self): 23 | return self.indentation_stack[-1] 24 | 25 | def open_bracket_action(self, text): 26 | self.bracket_nesting_level = self.bracket_nesting_level + 1 27 | return text 28 | 29 | def close_bracket_action(self, text): 30 | self.bracket_nesting_level = self.bracket_nesting_level - 1 31 | return text 32 | 33 | def newline_action(self, text): 34 | if self.bracket_nesting_level == 0: 35 | self.begin('indent') 36 | self.produce('newline', '') 37 | 38 | def indentation_action(self, text): 39 | self.begin('') 40 | # Check that tabs and spaces are being used consistently. 41 | if text: 42 | c = text[0] 43 | if self.indentation_char is None: 44 | self.indentation_char = c 45 | else: 46 | if self.indentation_char <> c: 47 | raise NaughtyNaughty("Mixed up tabs and spaces!") 48 | # Figure out how many indents/dedents to do 49 | current_level = self.current_level() 50 | new_level = len(text) 51 | if new_level == current_level: 52 | return 53 | elif new_level > current_level: 54 | self.indentation_stack.append(new_level) 55 | self.produce('INDENT', '') 56 | else: 57 | while new_level < self.current_level(): 58 | del self.indentation_stack[-1] 59 | self.produce('DEDENT', '') 60 | if new_level <> self.current_level(): 61 | raise NaughtyNaughty("Indentation booboo!") 62 | 63 | def eof(self): 64 | while len(self.indentation_stack) > 1: 65 | self.produce('DEDENT', '') 66 | self.indentation_stack.pop() 67 | 68 | letter = Range("AZaz") | Any("_") 69 | digit = Range("09") 70 | hexdigit = Range("09AFaf") 71 | indentation = Rep(Str(" ")) | Rep(Str("\t")) 72 | 73 | name = letter + Rep(letter | digit) 74 | number = Rep1(digit) | (Str("0x") + Rep1(hexdigit)) 75 | sq_string = ( 76 | Str("'") + 77 | Rep(AnyBut("\\\n'") | (Str("\\") + AnyChar)) + 78 | Str("'")) 79 | dq_string = ( 80 | Str('"') + 81 | Rep(AnyBut('\\\n"') | (Str("\\") + AnyChar)) + 82 | Str('"')) 83 | non_dq = AnyBut('"') | (Str('\\') + AnyChar) 84 | tq_string = ( 85 | Str('"""') + 86 | Rep( 87 | non_dq | 88 | (Str('"') + non_dq) | 89 | (Str('""') + non_dq)) + Str('"""')) 90 | stringlit = sq_string | dq_string | tq_string 91 | bra = Any("([{") 92 | ket = Any(")]}") 93 | punct = Any(":,;+-*/|&<>=.%`~^") 94 | diphthong = Str("==", "<>", "!=", "<=", "<<", ">>", "**") 95 | spaces = Rep1(Any(" \t")) 96 | comment = Str("#") + Rep(AnyBut("\n")) 97 | escaped_newline = Str("\\\n") 98 | lineterm = Str("\n") | Eof 99 | 100 | lexicon = Lexicon([ 101 | (name, 'name'), 102 | (number, 'number'), 103 | (stringlit, 'string'), 104 | (punct | diphthong, TEXT), 105 | (bra, open_bracket_action), 106 | (ket, close_bracket_action), 107 | (lineterm, newline_action), 108 | (comment, IGNORE), 109 | (spaces, IGNORE), 110 | (escaped_newline, IGNORE), 111 | State('indent', [ 112 | (indentation + Opt(comment) + lineterm, IGNORE), 113 | (indentation, indentation_action), 114 | ]), 115 | ], 116 | debug = Test.debug, 117 | debug_flags = 7, 118 | timings = sys.stderr) 119 | 120 | def __init__(self, file): 121 | Scanner.__init__(self, self.lexicon, file) 122 | self.indentation_stack = [0] 123 | self.begin('indent') 124 | 125 | ######################################################################### 126 | 127 | #s.machine.dump(sys.stdout) 128 | #print "=" * 70 129 | 130 | f = open("test9.in", "rU") 131 | ts = MyScanner(f) 132 | ts.trace = 0 133 | while 1: 134 | value, text = ts.read() 135 | level = len(ts.indentation_stack) - 1 136 | if level: 137 | print (4 * level - 1) * ' ', 138 | if text and text <> value: 139 | print "%s(%s)" % (value, repr(text)) 140 | else: 141 | print repr(value) 142 | if value is None: 143 | break 144 | 145 | 146 | 147 | 148 | -------------------------------------------------------------------------------- /external/plex/prepare.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # Copyright (c) 2010-2016, MIT Probabilistic Computing Project. 4 | # 5 | # This file is part of Venture. 6 | # 7 | # Venture is free software: you can redistribute it and/or modify 8 | # it under the terms of the GNU General Public License as published by 9 | # the Free Software Foundation, either version 3 of the License, or 10 | # (at your option) any later version. 11 | # 12 | # Venture is distributed in the hope that it will be useful, 13 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | # GNU General Public License for more details. 16 | # 17 | # You should have received a copy of the GNU General Public License 18 | # along with Venture. If not, see . 19 | 20 | # Prepare Plex 1.1.5 for import: 21 | # - Delete extraneous files. 22 | # - Translate carriage return to line feed. 23 | # - Make sure the tests still run. 24 | 25 | set -Ceu 26 | 27 | : ${PYTHON:=python} 28 | 29 | if [ ! -d Plex -o ! -f "$(printf 'Icon\r')" ]; then 30 | printf >&2 'Usage: %s\n' "$0" 31 | printf >&2 ' Run within the Plex distribution directory.\n' 32 | exit 1 33 | fi 34 | 35 | # Remove Mac OS X junk. 36 | find . -type f \( -name .DS_Store -o -name '._*' \) -exec rm -f '{}' ';' 37 | 38 | # Remove empty file with CR in its name. 39 | rm -f -- "$(printf 'Icon\r')" 40 | 41 | # Remove Mac OS Classic(???) junk. 42 | rm -f -- tests/PythonInterpreter 43 | 44 | # Convert CR to LF. All remaining files should be plain text. 45 | find . -type f -exec sh -c ' 46 | tr "\\r" "\\n" < "$1" > "$1".tmp && mv -f "$1".tmp "$1" 47 | ' -- '{}' ';' 48 | 49 | # Make sure the tests still run. Avoid generating .pyc and .pyo files 50 | # by passing -B to Python. 51 | PYTHONPATH="`pwd`" \ 52 | sh -c 'cd tests && exec "$1" -B runtests.py' -- "$PYTHON" 53 | -------------------------------------------------------------------------------- /external/weakprng/COPYING: -------------------------------------------------------------------------------- 1 | Copyright (c) 2015 Taylor R. Campbell 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions 6 | are met: 7 | 1. Redistributions of source code must retain the above copyright 8 | notice, this list of conditions and the following disclaimer. 9 | 2. Redistributions in binary form must reproduce the above copyright 10 | notice, this list of conditions and the following disclaimer in the 11 | documentation and/or other materials provided with the distribution. 12 | 13 | THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 14 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16 | ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 17 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19 | OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20 | HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21 | LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22 | OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23 | SUCH DAMAGE. 24 | -------------------------------------------------------------------------------- /external/weakprng/README: -------------------------------------------------------------------------------- 1 | weakprng - cryptographic pseudorandom number generator based on ChaCha 2 | 3 | http://mumble.net/~campbell/python/chacha.py 4 | http://mumble.net/~campbell/python/weakprng.py 5 | -------------------------------------------------------------------------------- /external/weakprng/dist/__init__.py: -------------------------------------------------------------------------------- 1 | from weakprng import * 2 | -------------------------------------------------------------------------------- /external/weakprng/prepare.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -Ceu 4 | 5 | cat > __init__.py <`: source a file of commands 9 | + Ex: `$ bayeslite mydatabase.bdb -f hooks/myhooks.bql` 10 | 11 | 12 | ## Dot command reference 13 | By default, the bayeslite shell will interpret commands as bql. Commands that 14 | lead with a dot (*dot commands*; e.g., `.sql pragma table_info(mytable)` 15 | perform special functionality. 16 | 17 | ### `.help` 18 | The only command you'll need. 19 | 20 | bayeslite> .help 21 | 22 | ## Example 23 | 24 | ``` 25 | $ bayeslite my_database.bdb 26 | Welcome to the bayeslite shell. 27 | Type `.help' for help. 28 | bayeslite> .csv mytable from myfile.csv 29 | bayeslite> .guess mytable_cc mytable.csv 30 | bayeslite> INITIALIZE 10 MODELS FOR mytable_cc; 31 | bayeslite> ANALYZE mytable_cc FOR 100 ITERATIONS; 32 | bayeslite> .hook contrib.py 33 | added command ".zmatrix" 34 | added command ".pairplot" 35 | added command ".ccstate" 36 | bayeslite> .zmatrix ESTIMATE DEPENDENCE PROBABILITY FROM PAIRWISE COLUMNS OF mytable_cc -f zmat.png 37 | ``` 38 | 39 | ## Adding your own commands with `.hook` 40 | 41 | Simply define a python function that takes a `self` argument and an `args` 42 | argument. 43 | 44 | `args` is the string of text that follows the dot command. For 45 | example, in 46 | 47 | bayeslite> .myfunc -v -n Tommy 48 | 49 | `args` would be the string `'-v -n Tommy'`. 50 | 51 | `self` is the shell object. The 52 | `self` variable then gives you access to the bayesdb object (via `self._bdb`) 53 | and the `hookvars` attribute. `self.hookvars` is a dictionary you can use to 54 | communicate between shell commands. 55 | 56 | ### Example 57 | 58 | ```python 59 | # my_contrib.py 60 | from bayeslite.shell import pretty 61 | from bayeslite.shell.hook import bayesdb_shell_cmd 62 | 63 | 64 | @bayesdb_shell_cmd("hello") 65 | def say_hello_to_name(self, args): 66 | """ Says hello 67 | 68 | """ 69 | self.stdout.write("Hello, %s.\n" % (args,)) 70 | self.hookvars['hello_name'] = args 71 | 72 | 73 | @bayesdb_shell_cmd("byebye", autorehook=True) 74 | def say_bye_to_name(self, args): 75 | name = self.hookvars.get('hello_name', 'friend') 76 | self.stdout.write("Bye-bye, {}.\n".format(name)) 77 | 78 | 79 | # Alias a long query you use a lot 80 | @bayesdb_shell_cmd("mycmd", autorehook=True) 81 | def get_cust_order_data_name(self, args): 82 | '''Get order id, order date, and cutomer name, by customer name 83 | 84 | 85 | Example: 86 | bayeslite> .mycmd John Keats 87 | ''' 88 | query = ''' 89 | SELECT Orders.OrderID, Orders.OrderDate, Customers.CustomerName 90 | FROM Customers, Orders 91 | WHERE Customers.CustomerName = ? 92 | AND Customers.CustomerID = Orders.CustomerID; 93 | ''' 94 | cursor = self._bdb.execute(query, (args,)) 95 | pretty.pp_cursor(self.stdout, cursor) 96 | 97 | ``` 98 | 99 | From the shell, access your command with `.hook` 100 | ``` 101 | bayeslite> .hook my_contrib.py 102 | added command ".hello" 103 | added command ".byebye" 104 | added command ".mycmd" 105 | bayeslite> .help hello 106 | .hello 107 | (END) 108 | 109 | bayeslite> .help byebye 110 | .byebye ...(END) 111 | 112 | bayeslite> .byebye 113 | Bye-bye, friend. 114 | bayeslite> .hello Nathan 115 | Hello, Nathan. 116 | bayeslite> .byebye 117 | Bye-bye, Nathan. 118 | ``` 119 | 120 | You are free to `.hook` a file multiple times. Re-hooking a file will reload the contents of the file. This can be especially useful for development. If you try to re-hook a file, you must confirm that you want to re-hook the file and confirm that you want to re-hook each function in that file for which `autorehook=False`. 121 | 122 | ## The `.bayesliterc` 123 | Manually hooking the utilities you frequently use every time you open the shell is annoying. To address this, the Bayeslite shell looks for a `.bayesliterc` file in your home directory, which it runs on startup. Any file or path names in `.bayesliterc` should be absolute (this is subject to change, to allow paths relative to the rc file). Local, project-specific init files can be used using the `-f` option. 124 | 125 | For example, we may have a small set of utilities in our `~/.bayesliterc`: 126 | 127 | ``` 128 | -- contents of ~/.bayesliterc 129 | .hook /User/bax/my_bayesdb_utils/plotting.py 130 | .hook /User/bax/my_bayesdb_utils/cleaning.py 131 | ``` 132 | 133 | You can prevent the shell from loading `~/.bayesliterc` with the `--no-init-file` argument. 134 | -------------------------------------------------------------------------------- /shell/src/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Copyright (c) 2010-2016, MIT Probabilistic Computing Project 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | -------------------------------------------------------------------------------- /shell/src/hook.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Copyright (c) 2010-2016, MIT Probabilistic Computing Project 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | import contextlib 18 | import sys 19 | import threading 20 | import traceback 21 | 22 | the_current_shell = threading.local() 23 | the_current_shell.value = None 24 | 25 | 26 | @contextlib.contextmanager 27 | def set_current_shell(shell): 28 | outer = the_current_shell.value 29 | the_current_shell.value = shell 30 | try: 31 | yield 32 | finally: 33 | the_current_shell.value = outer 34 | 35 | 36 | def current_shell(): 37 | assert the_current_shell.value is not None, 'No current shell!' 38 | return the_current_shell.value 39 | 40 | 41 | # make sure that the function that is hooked by the shell has the same 42 | # __doc__ 43 | class bayesdb_shellhookexp(object): 44 | def __init__(self, func): 45 | self.func = func 46 | fdoc = func.__doc__ 47 | if fdoc is None or len(fdoc.strip()) == 0: 48 | fdoc = 'NO DOCUMENTATION...\n...\n' 49 | 50 | if len(fdoc.split('\n')) == 1: 51 | fdoc += '\n...\n' 52 | 53 | self.__doc__ = fdoc 54 | 55 | def __call__(self, *args): 56 | try: 57 | return self.func(*args) 58 | except Exception as err: 59 | sys.stderr.write(traceback.format_exc()) 60 | print err 61 | 62 | 63 | def bayesdb_shell_cmd(name, autorehook=False): 64 | def wrapper(func): 65 | # because the cmd loop doesn't handle errors and just kicks people out 66 | current_shell()._hook(name, bayesdb_shellhookexp(func), 67 | autorehook=autorehook) 68 | return wrapper 69 | 70 | 71 | def bayesdb_shell_init(func): 72 | func(current_shell()) 73 | return func 74 | -------------------------------------------------------------------------------- /shell/src/main.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Copyright (c) 2010-2016, MIT Probabilistic Computing Project 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | import argparse 18 | import os 19 | 20 | import bayeslite 21 | from bayeslite.backends.cgpm_backend import CGPM_Backend 22 | import bayeslite.shell.core as shell 23 | import bayeslite.shell.hook as hook 24 | 25 | 26 | def parse_args(argv): 27 | parser = argparse.ArgumentParser() 28 | parser.add_argument('bdbpath', type=str, nargs='?', default=None, 29 | help="bayesdb database file") 30 | parser.add_argument('-j', '--jobs', type=int, default=1, 31 | help="Max number of jobs (processes) useable.") 32 | parser.add_argument('-s', '--seed', type=int, default=None, 33 | help="Random seed for the default generator.") 34 | parser.add_argument('-f', '--file', type=str, nargs=1, default=None, 35 | help="Path to commands file. May be used to specify a " 36 | "project-specific init file.") 37 | parser.add_argument('-b', '--batch', action='store_true', 38 | help="Exit after executing file specified with -f.") 39 | parser.add_argument('-q', '--no-init-file', action='store_true', 40 | help="Do not load ~/.bayesliterc") 41 | parser.add_argument('-m', '--memory', action='store_true', 42 | help="Use temporary database not saved to disk") 43 | 44 | args = parser.parse_args(argv) 45 | return args 46 | 47 | 48 | def run(stdin, stdout, stderr, argv): 49 | args = parse_args(argv[1:]) 50 | progname = argv[0] 51 | slash = progname.rfind('/') 52 | if slash: 53 | progname = progname[slash + 1:] 54 | if args.bdbpath is None and not args.memory: 55 | stderr.write('%s: pass filename or -m/--memory\n' % (progname,)) 56 | return 1 57 | if args.bdbpath == '-': 58 | stderr.write('%s: missing option?\n' % (progname,)) 59 | return 1 60 | bdb = bayeslite.bayesdb_open(pathname=args.bdbpath, 61 | builtin_backends=False) 62 | 63 | multiprocess = args.jobs != 1 64 | backend = CGPM_Backend(cgpm_registry={}, multiprocess=multiprocess) 65 | bayeslite.bayesdb_register_backend(bdb, backend) 66 | bdbshell = shell.Shell(bdb, 'cgpm', stdin, stdout, stderr) 67 | with hook.set_current_shell(bdbshell): 68 | if not args.no_init_file: 69 | init_file = os.path.join(os.path.expanduser('~/.bayesliterc')) 70 | if os.path.isfile(init_file): 71 | bdbshell.dot_read(init_file) 72 | 73 | if args.file is not None: 74 | for path in args.file: 75 | if os.path.isfile(path): 76 | bdbshell.dot_read(path) 77 | else: 78 | bdbshell.stdout.write('%s is not a file. Aborting.\n' % 79 | (str(path),)) 80 | break 81 | 82 | if not args.batch: 83 | bdbshell.cmdloop() 84 | return 0 85 | 86 | 87 | def main(): 88 | import sys 89 | sys.exit(run(sys.stdin, sys.stdout, sys.stderr, sys.argv)) 90 | 91 | if __name__ == '__main__': 92 | main() 93 | -------------------------------------------------------------------------------- /shell/src/pretty.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Copyright (c) 2010-2016, MIT Probabilistic Computing Project 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | def pp_cursor(out, cursor): 18 | if not cursor.description: 19 | return 20 | labels = [d[0] for d in cursor.description] 21 | table = cursor.fetchall() 22 | pp_list(out, table, labels) 23 | 24 | def pp_list(out, table, labels): 25 | assert 0 < len(labels) 26 | # XXX Consider quotation/escapes. 27 | colwidths = [len(label) for label in labels] 28 | for row in table: 29 | for colno, v in enumerate(row): 30 | # XXX Consider quotation/escapes. 31 | # XXX Combining characters? 32 | colwidths[colno] = max(colwidths[colno], len(unicode(v))) 33 | first = True 34 | for colno, label in enumerate(labels): 35 | if first: 36 | first = False 37 | else: 38 | out.write(' | ') 39 | # XXX Quote/escape. 40 | out.write('%*s' % (colwidths[colno], label)) 41 | out.write('\n') 42 | first = True 43 | for colno, label in enumerate(labels): 44 | if first: 45 | first = False 46 | else: 47 | out.write('-+-') 48 | # XXX Quote/escape. 49 | out.write('%s' % ('-' * colwidths[colno])) 50 | out.write('\n') 51 | for row in table: 52 | first = True 53 | for colno, v in enumerate(row): 54 | if first: 55 | first = False 56 | else: 57 | out.write(' | ') 58 | # XXX Quote/escape. 59 | out.write('%*s' % (colwidths[colno], unicode(v))) 60 | out.write('\n') 61 | -------------------------------------------------------------------------------- /shell/tests/test_pretty.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Copyright (c) 2010-2016, MIT Probabilistic Computing Project 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | import StringIO 18 | import pytest 19 | 20 | import bayeslite.shell.pretty as pretty 21 | 22 | def test_pretty(): 23 | labels = ['name', 'age', 'favourite food'] 24 | table = [ 25 | ['Spot', 3, 'kibble'], 26 | ['Skruffles', 2, 'kibble'], 27 | ['Zorb', 2, 'zorblaxian kibble'], 28 | [u'Zörb', 87, u'zørblaχian ﻛبﻞ'], 29 | ] 30 | out = StringIO.StringIO() 31 | pretty.pp_list(out, table, labels) 32 | assert out.getvalue() == \ 33 | u' name | age | favourite food\n' \ 34 | u'----------+-----+------------------\n' \ 35 | u' Spot | 3 | kibble\n' \ 36 | u'Skruffles | 2 | kibble\n' \ 37 | u' Zorb | 2 | zorblaxian kibble\n' \ 38 | u' Zörb | 87 | zørblaχian ﻛبﻞ\n' 39 | 40 | def test_pretty_unicomb(): 41 | pytest.xfail('pp_list counts code points, not grapheme clusters.') 42 | labels = ['name', 'age', 'favourite food'] 43 | table = [ 44 | ['Spot', 3, 'kibble'], 45 | ['Skruffles', 2, 'kibble'], 46 | ['Zorb', 2, 'zorblaxian kibble'], 47 | ['Zörb', 87, 'zørblaχian ﻛبﻞ'], 48 | [u'Zörb', 42, u'zörblǎxïǎn kïbble'], 49 | ['Zörb', 87, 'zørblaχian ﻛِبّﻞ'], 50 | ] 51 | out = StringIO.StringIO() 52 | pretty.pp_list(out, table, labels) 53 | assert out.getvalue() == \ 54 | u' name | age | favourite food\n' \ 55 | u'----------+-----+------------------\n' \ 56 | u' Spot | 3 | kibble\n' \ 57 | u'Skruffles | 2 | kibble\n' \ 58 | u' Zorb | 2 | zorblaxian kibble\n' \ 59 | u' Zörb | 42 | zörblǎxïǎn kïbble\n' \ 60 | u' Zörb | 87 | zørblaxian ﻛِبّﻞ\n' 61 | -------------------------------------------------------------------------------- /shell/tests/thooks.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Copyright (c) 2010-2016, MIT Probabilistic Computing Project 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | from bayeslite.shell.hook import bayesdb_shell_cmd 18 | 19 | 20 | @bayesdb_shell_cmd("myhook") 21 | def john_is_a_classy_name(self, args): 22 | '''myhook help string 23 | 24 | ''' 25 | self.stdout.write('john ' + args + '\n') 26 | -------------------------------------------------------------------------------- /src/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Copyright (c) 2010-2016, MIT Probabilistic Computing Project 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | """Main bayeslite API. 18 | 19 | The focus of the bayeslite API is the *BayesDB*, a handle for a 20 | database. To obtain a BayesDB handle, use :func:`bayesdb_open`:: 21 | 22 | import bayeslite 23 | 24 | bdb = bayeslite.bayesdb_open(pathname='foo.bdb') 25 | 26 | When done, close it with the :meth:`~BayesDB.close` method:: 27 | 28 | bdb.close() 29 | 30 | BayesDB handles also serve as context managers, so you can do:: 31 | 32 | with bayeslite.bayesdb_open(pathname='foo.bdb') as bdb: 33 | bdb.execute('SELECT 42') 34 | ... 35 | 36 | You can query the probable (according to the models stored in 37 | the database) implications of the data by passing BQL queries 38 | to the :meth:`~BayesDB.execute` method:: 39 | 40 | bql = 'ESTIMATE DEPENDENCE PROBABILITY FROM PAIRWISE COLUMNS OF foo' 41 | for x in bdb.execute(bql): 42 | print x 43 | 44 | You can also execute normal SQL on a BayesDB handle `bdb` with the 45 | :meth:`~BayesDB.sql_execute` method:: 46 | 47 | bdb.sql_execute('CREATE TABLE t(x INT, y TEXT, z REAL)') 48 | bdb.sql_execute("INSERT INTO t VALUES(1, 'xyz', 42.5)") 49 | bdb.sql_execute("INSERT INTO t VALUES(1, 'pqr', 83.7)") 50 | bdb.sql_execute("INSERT INTO t VALUES(2, 'xyz', 1000)") 51 | 52 | (BQL does not yet support ``CREATE TABLE`` and ``INSERT`` directly, so 53 | you must use :meth:`~BayesDB.sql_execute` for those.) 54 | """ 55 | 56 | from bayeslite.bayesdb import BayesDB 57 | from bayeslite.bayesdb import bayesdb_open 58 | from bayeslite.bayesdb import IBayesDBTracer 59 | from bayeslite.exception import BayesDBException 60 | from bayeslite.exception import BQLError 61 | from bayeslite.backend import BayesDB_Backend 62 | from bayeslite.backend import bayesdb_builtin_backend 63 | from bayeslite.backend import bayesdb_deregister_backend 64 | from bayeslite.backend import bayesdb_register_backend 65 | from bayeslite.nullify import bayesdb_nullify 66 | from bayeslite.parse import BQLParseError 67 | from bayeslite.quote import bql_quote_name 68 | from bayeslite.read_csv import bayesdb_read_csv 69 | from bayeslite.read_csv import bayesdb_read_csv_file 70 | from bayeslite.schema import bayesdb_upgrade_schema 71 | from bayeslite.txn import BayesDBTxnError 72 | from bayeslite.version import __version__ 73 | 74 | # XXX This is not a good place for me. Find me a better home, please! 75 | 76 | __all__ = [ 77 | 'BQLError', 78 | 'BQLParseError', 79 | 'BayesDB', 80 | 'BayesDBException', 81 | 'BayesDBTxnError', 82 | 'bayesdb_deregister_backend', 83 | 'bayesdb_nullify', 84 | 'bayesdb_open', 85 | 'bayesdb_read_csv', 86 | 'bayesdb_read_csv_file', 87 | 'bayesdb_register_backend', 88 | 'bayesdb_upgrade_schema', 89 | 'bql_quote_name', 90 | 'BayesDB_Backend', 91 | 'IBayesDBTracer', 92 | ] 93 | 94 | # Register cgpm as a builtin backend. 95 | from bayeslite.backends.cgpm_backend import CGPM_Backend 96 | bayesdb_builtin_backend(CGPM_Backend({}, multiprocess=True)) 97 | -------------------------------------------------------------------------------- /src/backends/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Copyright (c) 2010-2016, MIT Probabilistic Computing Project 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | -------------------------------------------------------------------------------- /src/backends/cgpm_alter/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Copyright (c) 2010-2016, MIT Probabilistic Computing Project 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | -------------------------------------------------------------------------------- /src/backends/cgpm_alter/alterations.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Copyright (c) 2010-2016, MIT Probabilistic Computing Project 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | 18 | from bayeslite.backends.cgpm_alter import parse 19 | 20 | from cgpm.mixtures.view import View 21 | 22 | 23 | def make_set_var_dependency(dependency): 24 | def func_dep(state): 25 | f = make_set_var_cluster(state.outputs[1:], state.outputs[0]) 26 | state = f(state) 27 | return state 28 | def func_indep(state): 29 | for output in state.outputs: 30 | f = make_set_var_cluster([output], parse.SingletonCluster) 31 | state = f(state) 32 | return state 33 | if dependency == parse.EnsureDependent: 34 | return func_dep 35 | elif dependency == parse.EnsureIndependent: 36 | return func_indep 37 | raise ValueError('Unknown dependency: %s' % (dependency,)) 38 | 39 | def make_set_var_cluster(columns0, column1): 40 | def func_existing(state): 41 | for col0 in exclude(columns0, column1): 42 | d_col0 = state.dim_for(col0) 43 | v_col0 = state.Zv(col0) 44 | v_col1 = state.Zv(column1) 45 | state._migrate_dim(v_col0, v_col1, d_col0) 46 | return state 47 | def func_singleton(state): 48 | new_view_index = max(state.views) + 1 49 | new_view = View( 50 | state.X, 51 | outputs=[state.crp_id_view + new_view_index], 52 | rng=state.rng 53 | ) 54 | state._append_view(new_view, new_view_index) 55 | for col0 in columns0: 56 | d_col0 = state.dim_for(col0) 57 | v_col0 = state.Zv(col0) 58 | state._migrate_dim(v_col0, new_view_index, d_col0) 59 | return state 60 | if column1 == parse.SingletonCluster: 61 | return func_singleton 62 | else: 63 | return func_existing 64 | 65 | def make_set_var_cluster_conc(concentration): 66 | def func(state): 67 | # XXX No abstraction. 68 | state.crp.hypers['alpha'] = 1./concentration 69 | return state 70 | return func 71 | 72 | def make_set_row_cluster(rows0, row1, column): 73 | def func_existing(state): 74 | view = state.view_for(column) 75 | k_row1 = view.Zr(row1) 76 | for row0 in exclude(rows0, row1): 77 | view._migrate_row(row0, k_row1) 78 | return state 79 | def func_singleton(state): 80 | view = state.view_for(column) 81 | k_singleton = view.Zr(row1) 82 | for row0 in rows0: 83 | view._migrate_row(row0, k_singleton) 84 | return state 85 | if row1 == parse.SingletonCluster: 86 | return func_singleton 87 | else: 88 | return func_existing 89 | 90 | def make_set_row_cluster_conc(column, concentration): 91 | def func(state): 92 | view = state.view_for(column) 93 | view.crp.hypers['alpha'] = 1./concentration 94 | return state 95 | return func 96 | 97 | def exclude(iterable, ignore): 98 | for item in iterable: 99 | if item != ignore: 100 | yield item 101 | -------------------------------------------------------------------------------- /src/backends/cgpm_alter/grammar.y: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2010-2016, MIT Probabilistic Computing Project 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | /* 18 | * Terminal conventions: 19 | * - T_ means a punctuation token. 20 | * - K_ means a keyword. 21 | * - L_ means a lexeme, which has useful associated text, e.g. an integer. 22 | */ 23 | 24 | 25 | alter(start) ::= phrases(ps). 26 | 27 | phrases(one) ::= phrase(p). 28 | phrases(many) ::= phrases(ps) T_COMMA phrase(p). 29 | 30 | phrase(none) ::= . 31 | 32 | phrase(set_var_dependency) ::= K_ENSURE variable_token_opt columns(cols) 33 | dependency(dep). 34 | 35 | phrase(set_var_cluster) ::= K_ENSURE variable_token_opt 36 | columns(cols0) K_IN view_token 37 | K_OF column_name(col1). 38 | 39 | phrase(set_var_cluster_singleton) ::= K_ENSURE variable_token_opt 40 | columns(cols) 41 | K_IN K_SINGLETON view_token. 42 | 43 | phrase(set_var_cluster_conc) ::= K_SET view_token 44 | K_CONCENTRATION K_PARAMETER 45 | K_TO concentration(conc). 46 | 47 | phrase(set_row_cluster) ::= K_ENSURE K_ROW|K_ROWS rows(rows0) 48 | K_IN K_CLUSTER K_OF K_ROW 49 | row_index(row1) 50 | K_WITHIN view_token 51 | K_OF column_name(col). 52 | 53 | phrase(set_row_cluster_singleton) ::= K_ENSURE K_ROW|K_ROWS rows(rows0) 54 | K_IN K_SINGLETON K_CLUSTER 55 | K_WITHIN view_token 56 | K_OF column_name(col). 57 | 58 | phrase(set_row_cluster_conc) ::= K_SET K_ROW K_CLUSTER 59 | K_CONCENTRATION K_PARAMETER 60 | K_WITHIN view_token 61 | K_OF column_name(col) 62 | K_TO concentration(conc). 63 | 64 | variable_token_opt ::= . 65 | variable_token_opt ::= K_VARIABLE. 66 | variable_token_opt ::= K_VARIABLES. 67 | 68 | view_token ::= K_VIEW. 69 | view_token ::= K_CONTEXT. 70 | 71 | dependency(independent) ::= K_INDEPENDENT. 72 | dependency(dependent) ::= K_DEPENDENT. 73 | 74 | columns(one) ::= column_name(col). 75 | columns(all) ::= T_STAR. 76 | columns(many) ::= T_LROUND column_list(cols) T_RROUND. 77 | 78 | column_list(one) ::= column_name(col). 79 | column_list(many) ::= column_list(cols) T_COMMA column_name(col). 80 | 81 | column_name(n) ::= L_NAME(n). 82 | 83 | rows(one) ::= row_index(row). 84 | rows(all) ::= T_STAR. 85 | rows(many) ::= T_LROUND row_list(rows) T_RROUND. 86 | 87 | row_list(one) ::= row_index(row). 88 | row_list(many) ::= row_list(rows) T_COMMA row_index(row). 89 | 90 | row_index(n) ::= L_NUMBER(n). 91 | 92 | concentration(c) ::= L_NUMBER(n). 93 | -------------------------------------------------------------------------------- /src/backends/cgpm_analyze/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Copyright (c) 2010-2016, MIT Probabilistic Computing Project 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | -------------------------------------------------------------------------------- /src/backends/cgpm_analyze/grammar.y: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2010-2016, MIT Probabilistic Computing Project 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | /* 18 | * Terminal conventions: 19 | * - T_ means a punctuation token. 20 | * - K_ means a keyword. 21 | * - L_ means a lexeme, which has useful associated text, e.g. an integer. 22 | */ 23 | 24 | 25 | analysis(start) ::= phrases(ps). 26 | 27 | phrases(one) ::= phrase(p). 28 | phrases(many) ::= phrases(ps) T_SEMI phrase(p). 29 | 30 | phrase(none) ::= . 31 | 32 | phrase(variables) ::= K_VARIABLES column_list(cols). 33 | phrase(skip) ::= K_SKIP column_list(cols). 34 | 35 | phrase(rows) ::= K_ROWS row_list(rows). 36 | 37 | phrase(loom) ::= K_LOOM. 38 | phrase(optimized) ::= K_OPTIMIZED. 39 | 40 | phrase(quiet) ::= K_QUIET. 41 | 42 | phrase(subproblems) ::= K_SUBPROBLEM|K_SUBPROBLEMS subproblems_list(s). 43 | 44 | subproblems_list(one) ::= subproblem(s). 45 | subproblems_list(many) ::= T_LROUND subproblems(s) T_RROUND. 46 | 47 | subproblems(one) ::= subproblem(s). 48 | subproblems(many) ::= subproblems(ss) T_COMMA subproblem(s). 49 | 50 | subproblem(variable_hyperparameters) ::= K_VARIABLE K_HYPERPARAMETERS. 51 | 52 | subproblem(variable_clustering) ::= K_VARIABLE K_CLUSTERING. 53 | subproblem(variable_clustering_concentration) ::= K_VARIABLE K_CLUSTERING 54 | K_CONCENTRATION. 55 | 56 | subproblem(row_clustering) ::= K_ROW K_CLUSTERING. 57 | subproblem(row_clustering_concentration) ::= K_ROW K_CLUSTERING 58 | K_CONCENTRATION. 59 | 60 | column_list(one) ::= column_name(col). 61 | column_list(many) ::= column_list(cols) T_COMMA column_name(col). 62 | 63 | column_name(n) ::= L_NAME(name). 64 | 65 | row_list(one) ::= row_index(row). 66 | row_list(many) ::= row_list(rows) T_COMMA row_index(row). 67 | 68 | row_index(n) ::= L_NUMBER(n). 69 | -------------------------------------------------------------------------------- /src/backends/cgpm_schema/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Copyright (c) 2010-2016, MIT Probabilistic Computing Project 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | -------------------------------------------------------------------------------- /src/backends/cgpm_schema/grammar.y: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2010-2016, MIT Probabilistic Computing Project 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | /* 18 | * Terminal conventions: 19 | * - T_ means a punctuation token. 20 | * - K_ means a keyword. 21 | * - L_ means a lexeme, which has useful associated text, e.g. an integer. 22 | */ 23 | 24 | cgpm(empty) ::= . 25 | cgpm(schema) ::= schema(s). 26 | 27 | schema(one) ::= clause(c). 28 | schema(some) ::= schema(s) T_SEMI|T_COMMA clause_opt(c). 29 | 30 | clause_opt(none) ::= . 31 | clause_opt(some) ::= clause(c). 32 | 33 | clause(basic) ::= 34 | K_SET K_CATEGORY K_MODEL K_FOR 35 | var(var) K_USING|K_TO dist(dist) param_opt(params). 36 | clause(foreign) ::= 37 | K_OVERRIDE generative_opt K_MODEL K_FOR vars(outputs) 38 | given_opt(inputs) 39 | exposing_opt(exposed) 40 | K_USING foreign(name) param_opt(params). 41 | clause(subsamp) ::= K_SUBSAMPLE L_NUMBER(n). 42 | clause(latent) ::= K_LATENT var(var) stattype(st). 43 | 44 | dist(name) ::= L_NAME(dist). 45 | foreign(name) ::= L_NAME(foreign). 46 | 47 | generative_opt ::= . 48 | generative_opt ::= K_GENERATIVE. 49 | 50 | given_opt(none) ::= . 51 | given_opt(some) ::= K_GIVEN vars(vars). 52 | 53 | exposing_opt(none) ::= . 54 | exposing_opt(one) ::= and_opt K_EXPOSE exposed(exp). 55 | 56 | and_opt(none) ::= . 57 | and_opt(one) ::= K_AND. 58 | 59 | exposed(one) ::= var(v) stattype(s). 60 | exposed(many) ::= exposed(exp) T_COMMA var(v) stattype(s). 61 | 62 | vars(one) ::= var(var). 63 | vars(many) ::= vars(vars) T_COMMA var(var). 64 | 65 | var(name) ::= L_NAME(var). 66 | 67 | stattype(s) ::= L_NAME(st). 68 | 69 | param_opt(none) ::= . 70 | param_opt(some) ::= T_LROUND params(ps) T_RROUND. 71 | params(one) ::= param(param). 72 | params(many) ::= params(params) T_COMMA param(param). 73 | 74 | param(num) ::= L_NAME(p) T_EQ L_NUMBER(num). 75 | param(nam) ::= L_NAME(p) T_EQ L_NAME(nam). 76 | -------------------------------------------------------------------------------- /src/backends/iid_gaussian.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Copyright (c) 2010-2016, MIT Probabilistic Computing Project 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | """The IID Gaussian Model posits that all data are independently Gaussian. 18 | 19 | This is an example of the simplest possible population model that's 20 | actually stochastic. The Gaussian has mean 0 and standard deviation 21 | 1. 22 | 23 | This module implements the :class:`bayeslite.BayesDB_Backend` 24 | interface for the IID Gaussian Model. 25 | 26 | """ 27 | 28 | import math 29 | import random 30 | 31 | import bayeslite.backend 32 | 33 | from bayeslite.exception import BQLError 34 | 35 | std_normal_schema_1 = ''' 36 | INSERT INTO bayesdb_backend (name, version) VALUES ('std_normal', 1); 37 | ''' 38 | 39 | class StdNormalBackend(bayeslite.backend.BayesDB_Backend): 40 | """IID Gaussian backend for BayesDB. 41 | 42 | The backend is named ``std_normal`` in BQL:: 43 | 44 | CREATE GENERATOR t_sn FOR t USING std_normal(..) 45 | """ 46 | 47 | def __init__(self, seed=0): 48 | self.prng = random.Random(seed) 49 | def name(self): return 'std_normal' 50 | def register(self, bdb): 51 | with bdb.savepoint(): 52 | schema_sql = 'SELECT version FROM bayesdb_backend WHERE name = ?' 53 | cursor = bdb.sql_execute(schema_sql, (self.name(),)) 54 | version = None 55 | try: 56 | row = cursor.next() 57 | except StopIteration: 58 | version = 0 59 | else: 60 | version = row[0] 61 | assert version is not None 62 | if version == 0: 63 | # XXX WHATTAKLUDGE! 64 | for stmt in std_normal_schema_1.split(';'): 65 | bdb.sql_execute(stmt) 66 | version = 1 67 | if version != 1: 68 | raise BQLError(bdb, 'IID-Gaussian already installed' 69 | ' with unknown schema version: %d' % (version,)) 70 | def create_generator(self, bdb, generator_id, schema, **kwargs): 71 | pass 72 | def drop_generator(self, *args, **kwargs): pass 73 | def rename_column(self, *args, **kwargs): pass 74 | def initialize_models(self, *args, **kwargs): pass 75 | def drop_models(self, *args, **kwargs): pass 76 | def analyze_models(self, *args, **kwargs): pass 77 | def simulate_joint(self, _bdb, _generator_id, modelnos, rowid, targets, 78 | _constraints, num_samples=1, accuracy=None): 79 | return [[self.prng.gauss(0, 1) for _ in targets] 80 | for _ in range(num_samples)] 81 | def logpdf_joint(self, _bdb, _generator_id, modelnos, rowid, targets, 82 | _constraints): 83 | return sum(logpdf_gaussian(value, 0, 1) for (_, value) in targets) 84 | def infer(self, *args, **kwargs): pass 85 | 86 | HALF_LOG2PI = 0.5 * math.log(2 * math.pi) 87 | 88 | def logpdf_gaussian(x, mu, sigma): 89 | deviation = x - mu 90 | return - math.log(sigma) - HALF_LOG2PI \ 91 | - (0.5 * deviation * deviation / (sigma * sigma)) 92 | -------------------------------------------------------------------------------- /src/backends/troll_rng.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Copyright (c) 2010-2016, MIT Probabilistic Computing Project 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | """The Troll Model posits that all data values are equal to 9. 18 | 19 | Reference: http://dilbert.com/strip/2001-10-25 20 | 21 | This is an example of the simplest possible population model. 22 | 23 | This module implements the :class:`bayeslite.BayesDB_Backend` 24 | interface for the Troll Model. 25 | """ 26 | 27 | import bayeslite.backend 28 | 29 | class TrollBackend(bayeslite.backend.BayesDB_Backend): 30 | """Troll backend for BayesDB. 31 | 32 | The backend is named ``troll_rng`` in BQL:: 33 | 34 | CREATE GENERATOR t_troll FOR t USING troll_rng(..) 35 | """ 36 | 37 | def __init__(self): pass 38 | def name(self): return 'troll_rng' 39 | def register(self, bdb): 40 | bdb.sql_execute(''' 41 | INSERT INTO bayesdb_backend (name, version) 42 | VALUES (?, 1) 43 | ''', (self.name(),)) 44 | def create_generator(self, bdb, generator_id, schema, **kwargs): 45 | pass 46 | def drop_generator(self, *args, **kwargs): pass 47 | def rename_column(self, *args, **kwargs): pass 48 | def initialize_models(self, *args, **kwargs): pass 49 | def drop_models(self, *args, **kwargs): pass 50 | def analyze_models(self, *args, **kwargs): pass 51 | def simulate_joint(self, _bdb, _generator_id, _modelnos, rowid, targets, 52 | _constraints, num_samples=1): 53 | return [[9 for _ in targets]] * num_samples 54 | def logpdf_joint(self, _bdb, _generator_id, _modelnos, rowid, targets, 55 | constraints): 56 | for (_, value) in constraints: 57 | if not value == 9: 58 | return float("nan") 59 | for (_, value) in targets: 60 | if not value == 9: 61 | return float("-inf") 62 | # TODO This is only correct wrt counting measure. What's the 63 | # base measure of numericals? 64 | return 0 65 | def infer(self, *args, **kwargs): pass 66 | -------------------------------------------------------------------------------- /src/bqlmath.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Copyright (c) 2010-2016, MIT Probabilistic Computing Project 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | import inspect 18 | import math 19 | 20 | bqlmath_funcs = { 21 | 'acos' : lambda x : math.acos(x), 22 | 'acosh' : lambda x : math.acosh(x), 23 | 'asin' : lambda x : math.asin(x), 24 | 'asinh' : lambda x : math.asinh(x), 25 | 'atan' : lambda x : math.atan(x), 26 | 'atan2' : lambda x : math.atan2(x), 27 | 'atanh' : lambda x : math.atanh(x), 28 | 'ceil' : lambda x : math.ceil(x), 29 | 'copysign' : lambda x, y : math.copysign(x, y), 30 | 'cos' : lambda x : math.cos(x), 31 | 'cosh' : lambda x : math.cosh(x), 32 | 'degrees' : lambda x : math.degrees(x), 33 | 'erf' : lambda x : math.erf(x), 34 | 'erfc' : lambda x : math.erfc(x), 35 | 'exp' : lambda x : math.exp(x), 36 | 'expm1' : lambda x : math.expm1(x), 37 | 'fabs' : lambda x : math.fabs(x), 38 | 'factorial' : lambda x : math.factorial(x), 39 | 'floor' : lambda x : math.floor(x), 40 | 'fmod' : lambda x, y : math.fmod(x,y), 41 | 'gamma' : lambda x : math.gamma(x), 42 | 'hypot' : lambda x, y : math.hypot(x,y), 43 | 'ldexp' : lambda x, i : math.ldexp(x,i), 44 | 'lgamma' : lambda x : math.lgamma(x), 45 | 'log' : lambda x : math.log(x), 46 | } 47 | 48 | 49 | def bayesdb_install_bqlmath(db, _cookie): 50 | for name, fn in bqlmath_funcs.iteritems(): 51 | nargs = len(inspect.getargspec(fn).args) 52 | db.createscalarfunction(name, fn, nargs) 53 | -------------------------------------------------------------------------------- /src/exception.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Copyright (c) 2010-2016, MIT Probabilistic Computing Project 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | import StringIO 18 | 19 | class BayesLiteException(Exception): 20 | """Parent exception for anything Bayeslite-specific.""" 21 | pass 22 | 23 | class BayesDBException(BayesLiteException): 24 | """Exceptions associated with a BayesDB instance. 25 | 26 | :ivar bayeslite.BayesDB bayesdb: associated BayesDB instance 27 | """ 28 | # XXX: Consider renaming to BayesDBError to match the two below. 29 | def __init__(self, bayesdb, *args, **kwargs): 30 | self.bayesdb = bayesdb 31 | super(BayesDBException, self).__init__(*args, **kwargs) 32 | 33 | class BQLError(BayesDBException): 34 | """Errors in interpreting or executing BQL on a particular database.""" 35 | # XXX Consider separating the "no such foo" and "foo already exists" errors 36 | # that actually could be fine on another database, from the "foo is a 37 | # 1-row function" and "foo needs exactly two columns" type that are closer 38 | # to a BQLParseError. Unsure what the "ESTIMATE * FROM COLUMNS OF subquery" 39 | # use really means as an error: need to look more closely. 40 | pass 41 | 42 | class BQLParseError(BayesLiteException): 43 | """Errors in parsing BQL. 44 | 45 | As many parse errors as can be reasonably detected are listed 46 | together. 47 | 48 | :ivar list errors: list of strings describing parse errors 49 | """ 50 | 51 | def __init__(self, errors): 52 | assert 0 < len(errors) 53 | self.errors = errors 54 | 55 | def __str__(self): 56 | if len(self.errors) == 1: 57 | return self.errors[0] 58 | else: 59 | out = StringIO.StringIO() 60 | for error in self.errors: 61 | out.write(' %s\n' % (error,)) 62 | return out.getvalue() 63 | -------------------------------------------------------------------------------- /src/nullify.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Copyright (c) 2010-2017, MIT Probabilistic Computing Project 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | from .sqlite3_util import sqlite3_quote_name 18 | 19 | 20 | def bayesdb_nullify(bdb, table, value, columns=None): 21 | qt = sqlite3_quote_name(table) 22 | if columns is None: 23 | cursor = bdb.sql_execute('PRAGMA table_info(%s)' % (qt,)) 24 | columns = [row[1] for row in cursor] 25 | changes = bdb._sqlite3.totalchanges() 26 | for column in columns: 27 | qc = sqlite3_quote_name(column) 28 | bdb.sql_execute('UPDATE %s SET %s = NULL WHERE %s = ?' % (qt, qc, qc), 29 | (value,)) 30 | return bdb._sqlite3.totalchanges() - changes 31 | -------------------------------------------------------------------------------- /src/quote.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Copyright (c) 2010-2017, MIT Probabilistic Computing Project 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | from .sqlite3_util import sqlite3_quote_name 18 | 19 | 20 | def bql_quote_name(name): 21 | """Quote `name` as a BQL identifier, e.g. a table or column name. 22 | 23 | Do NOT use this for strings, e.g. inserting data into a table. 24 | Use query parameters instead. 25 | """ 26 | return sqlite3_quote_name(name) 27 | -------------------------------------------------------------------------------- /src/read_pandas.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Copyright (c) 2010-2016, MIT Probabilistic Computing Project 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | """Reading data from pandas dataframes.""" 18 | 19 | import bayeslite.core as core 20 | 21 | from bayeslite.sqlite3_util import sqlite3_quote_name 22 | 23 | def bayesdb_read_pandas_df(bdb, table, df, create=False, ifnotexists=False, 24 | index=None): 25 | """Read data from a pandas dataframe into a table. 26 | 27 | :param bayeslite.BayesDB bdb: BayesDB instance 28 | :param str table: name of table 29 | :param pandas.DataFrame df: pandas dataframe 30 | :param bool create: if true and `table` does not exist, create it 31 | :param bool ifnotexists: if true, and `create` is true` and `table` 32 | exists, read data into it anyway 33 | :param str index: name of column for index 34 | 35 | If `index` is `None`, then the dataframe's index dtype must be 36 | convertible to int64, and it is mapped to the table's rowids. If 37 | the dataframe's index dtype is not convertible to int64, you must 38 | specify `index` to give a primary key for the table. 39 | """ 40 | if not create: 41 | if ifnotexists: 42 | raise ValueError('Not creating table whether or not exists!') 43 | column_names = [str(column) for column in df.columns] 44 | if index is None: 45 | create_column_names = column_names 46 | insert_column_names = ['_rowid_'] + column_names 47 | try: 48 | key_index = df.index.astype('int64') 49 | except ValueError: 50 | raise ValueError('Must specify index name for non-integral index!') 51 | else: 52 | if index in df.columns: 53 | raise ValueError('Index name collides with column name: %r' 54 | % (index,)) 55 | create_column_names = [index] + column_names 56 | insert_column_names = create_column_names 57 | key_index = df.index 58 | with bdb.savepoint(): 59 | if core.bayesdb_has_table(bdb, table): 60 | if create and not ifnotexists: 61 | raise ValueError('Table already exists: %s' % (repr(table),)) 62 | core.bayesdb_table_guarantee_columns(bdb, table) 63 | unknown = set(name for name in create_column_names 64 | if not core.bayesdb_table_has_column(bdb, table, name)) 65 | if len(unknown) != 0: 66 | raise ValueError('Unknown columns: %s' % (list(unknown),)) 67 | elif create: 68 | qccns = map(sqlite3_quote_name, create_column_names) 69 | def column_schema(column_name, qcn): 70 | if column_name == index: 71 | return '%s NUMERIC PRIMARY KEY' % (qcn,) 72 | else: 73 | return '%s NUMERIC' % (qcn,) 74 | schema = ','.join(column_schema(ccn, qccn) 75 | for ccn, qccn in zip(create_column_names, qccns)) 76 | qt = sqlite3_quote_name(table) 77 | bdb.sql_execute('CREATE TABLE %s(%s)' % (qt, schema)) 78 | core.bayesdb_table_guarantee_columns(bdb, table) 79 | else: 80 | raise ValueError('No such table: %s' % (repr(table),)) 81 | qt = sqlite3_quote_name(table) 82 | qicns = map(sqlite3_quote_name, insert_column_names) 83 | sql = 'INSERT INTO %s (%s) VALUES (%s)' % \ 84 | (qt, ','.join(qicns), ','.join('?' for _qicn in qicns)) 85 | for key, i in zip(key_index, df.index): 86 | bdb.sql_execute(sql, (key,) + tuple(df.ix[i])) 87 | -------------------------------------------------------------------------------- /src/regress.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Copyright (c) 2010-2017, MIT Probabilistic Computing Project 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | import pandas as pd 18 | 19 | from sklearn.linear_model import LinearRegression 20 | 21 | def regress_ols(target_values, given_values, given_variables, stattypes): 22 | X = pd.DataFrame(given_values, columns=given_variables) 23 | # Detect the nominal variables. 24 | nominal_variables = [ 25 | variable for variable, stattype in zip(given_variables, stattypes) 26 | if stattype == 'nominal' 27 | ] 28 | # Dummy code the nominal variables. 29 | prefix = {var: '%s_dum' % (var,) for var in nominal_variables} 30 | X_coded = pd.get_dummies(X, columns=nominal_variables, prefix=prefix) 31 | # Find nominal columns to drop, and drop them (for correct dummy coding, K 32 | # categories are encoded using K-1 vector). 33 | drop = [ 34 | filter(lambda c: c.startswith('%s_dum' % (var,)), X_coded.columns)[0] 35 | for var in nominal_variables 36 | ] 37 | X_coded.drop(drop, inplace=True, axis=1) 38 | # Check if only 1 column with 1 unique values. 39 | if len(X_coded.columns) == 0 or len(X_coded) == 0: 40 | raise ValueError('Not enough data for regression') 41 | # Fit the regression. 42 | linreg = LinearRegression() 43 | linreg.fit(X_coded, target_values) 44 | # Build and return variables and their coefficients. 45 | intercept = [('intercept', linreg.intercept_)] 46 | variables_regressed = zip(X_coded.columns, linreg.coef_) 47 | variables_dropped = zip(drop, [0]*len(drop)) 48 | return intercept + variables_regressed + variables_dropped 49 | -------------------------------------------------------------------------------- /src/txn.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Copyright (c) 2010-2016, MIT Probabilistic Computing Project 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | import contextlib 18 | 19 | from bayeslite.exception import BayesDBException 20 | from bayeslite.sqlite3_util import sqlite3_savepoint 21 | from bayeslite.sqlite3_util import sqlite3_savepoint_rollback 22 | from bayeslite.sqlite3_util import sqlite3_transaction 23 | 24 | # XXX Can't do this simultaneously in multiple threads. Need 25 | # lightweight per-thread state. 26 | 27 | @contextlib.contextmanager 28 | def bayesdb_caching(bdb): 29 | bayesdb_txn_push(bdb) 30 | try: 31 | yield 32 | finally: 33 | bayesdb_txn_pop(bdb) 34 | 35 | @contextlib.contextmanager 36 | def bayesdb_savepoint(bdb): 37 | bayesdb_txn_push(bdb) 38 | try: 39 | with sqlite3_savepoint(bdb._sqlite3): 40 | yield 41 | finally: 42 | bayesdb_txn_pop(bdb) 43 | 44 | @contextlib.contextmanager 45 | def bayesdb_savepoint_rollback(bdb): 46 | bayesdb_txn_push(bdb) 47 | try: 48 | with sqlite3_savepoint_rollback(bdb._sqlite3): 49 | yield 50 | finally: 51 | bayesdb_txn_pop(bdb) 52 | 53 | @contextlib.contextmanager 54 | def bayesdb_transaction(bdb): 55 | if bdb._txn_depth != 0: 56 | raise BayesDBTxnError(bdb, 'Already in a transaction!') 57 | bayesdb_txn_init(bdb) 58 | bdb._txn_depth = 1 59 | try: 60 | with sqlite3_transaction(bdb._sqlite3): 61 | yield 62 | finally: 63 | assert bdb._txn_depth == 1 64 | bdb._txn_depth = 0 65 | bayesdb_txn_fini(bdb) 66 | 67 | def bayesdb_begin_transaction(bdb): 68 | if bdb._txn_depth != 0: 69 | raise BayesDBTxnError(bdb, 'Already in a transaction!') 70 | bayesdb_txn_init(bdb) 71 | bdb._txn_depth = 1 72 | bdb.sql_execute("BEGIN") 73 | 74 | def bayesdb_rollback_transaction(bdb): 75 | if bdb._txn_depth == 0: 76 | raise BayesDBTxnError(bdb, 'Not in a transaction!') 77 | bdb.sql_execute("ROLLBACK") 78 | bdb._txn_depth = 0 79 | bayesdb_txn_fini(bdb) 80 | 81 | def bayesdb_commit_transaction(bdb): 82 | if bdb._txn_depth == 0: 83 | raise BayesDBTxnError(bdb, 'Not in a transaction!') 84 | bdb.sql_execute("COMMIT") 85 | bdb._txn_depth = 0 86 | bayesdb_txn_fini(bdb) 87 | 88 | # XXX Maintaining a stack of savepoints in BQL is a little more 89 | # trouble than it is worth at the moment, since users can rollback to 90 | # or release any savepoint in the stack, not just the most recent one. 91 | # (For the bdb.savepoint() context manager that is not an issue.) 92 | # We'll implement that later. 93 | 94 | def bayesdb_txn_push(bdb): 95 | if bdb._txn_depth == 0: 96 | bayesdb_txn_init(bdb) 97 | else: 98 | assert bdb._cache is not None 99 | bdb._txn_depth += 1 100 | 101 | def bayesdb_txn_pop(bdb): 102 | bdb._txn_depth -= 1 103 | if bdb._txn_depth == 0: 104 | bayesdb_txn_fini(bdb) 105 | else: 106 | assert bdb._cache is not None 107 | 108 | def bayesdb_txn_init(bdb): 109 | assert bdb._txn_depth == 0 110 | assert bdb._cache is None 111 | bdb._cache = {} 112 | 113 | def bayesdb_txn_fini(bdb): 114 | assert bdb._txn_depth == 0 115 | assert bdb._cache is not None 116 | bdb._cache = None 117 | 118 | class BayesDBTxnError(BayesDBException): 119 | """Transaction errors in a BayesDB.""" 120 | 121 | pass 122 | -------------------------------------------------------------------------------- /src/util.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Copyright (c) 2010-2016, MIT Probabilistic Computing Project 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | """Miscellaneous utilities.""" 18 | 19 | import json 20 | import math 21 | 22 | def unique(array): 23 | """Return a sorted array of the unique elements in `array`. 24 | 25 | No element may be a floating-point NaN. If your data set includes 26 | NaNs, omit them before passing them here. 27 | """ 28 | for x in array: 29 | assert not (isinstance(x, float) and math.isnan(x)) 30 | if len(array) < 2: 31 | return array 32 | array_sorted = sorted(array) 33 | array_unique = [array_sorted[0]] 34 | for x in array_sorted[1:]: 35 | assert array_unique[-1] <= x 36 | if array_unique[-1] != x: 37 | array_unique.append(x) 38 | return array_unique 39 | 40 | def unique_indices(array): 41 | """Return an array of the indices of the unique elements in `array`. 42 | 43 | No element may be a floating-point NaN. If your data set includes 44 | NaNs, omit them before passing them here. 45 | """ 46 | for x in array: 47 | assert not (isinstance(x, float) and math.isnan(x)) 48 | if len(array) == 0: 49 | return [] 50 | if len(array) == 1: 51 | return [0] 52 | array_sorted = sorted((x, i) for i, x in enumerate(array)) 53 | array_unique = [array_sorted[0][1]] 54 | for x, i in array_sorted[1:]: 55 | assert array[array_unique[-1]] <= x 56 | if array[array_unique[-1]] != x: 57 | array_unique.append(i) 58 | return sorted(array_unique) 59 | 60 | def float_sum(iterable): 61 | """Return the sum of elements of `iterable` in floating-point. 62 | 63 | This implementation uses Kahan-Babuška summation. 64 | """ 65 | s = 0.0 66 | c = 0.0 67 | for x in iterable: 68 | xf = float(x) 69 | s1 = s + xf 70 | if abs(x) < abs(s): 71 | c += ((s - s1) + xf) 72 | else: 73 | c += ((xf - s1) + s) 74 | s = s1 75 | return s + c 76 | 77 | def casefold(string): 78 | # XXX Not really right, but it'll do for now. 79 | return string.upper().lower() 80 | 81 | def cursor_row(cursor, nullok=None): 82 | if nullok is None: 83 | nullok = False 84 | try: 85 | row = cursor.next() 86 | except StopIteration: 87 | if nullok: 88 | return None 89 | raise ValueError('Empty cursor') 90 | else: 91 | try: 92 | cursor.next() 93 | except StopIteration: 94 | pass 95 | else: 96 | raise ValueError('Multiple-result cursor') 97 | return row 98 | 99 | def cursor_value(cursor, nullok=None): 100 | row = cursor_row(cursor, nullok) 101 | if row is None: 102 | assert nullok 103 | return None 104 | if len(row) != 1: 105 | raise ValueError('Non-unit cursor') 106 | return row[0] 107 | 108 | def json_dumps(obj): 109 | """Return a JSON string of obj, compactly and deterministically.""" 110 | return json.dumps(obj, sort_keys=True) 111 | 112 | def override(interface): 113 | def wrap(method): 114 | assert method.__name__ in dir(interface) 115 | return method 116 | return wrap 117 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/probcomp/bayeslite/211e5eb3821a464a2fffeb9d35e3097e1b7a99ba/tests/__init__.py -------------------------------------------------------------------------------- /tests/kl.py: -------------------------------------------------------------------------------- 1 | """Kullback Leibler divergence estimates""" 2 | 3 | from collections import namedtuple 4 | from numpy import array, sqrt 5 | 6 | class KLEstimate(namedtuple('KLEstimate', ['estimate', 'se'])): 7 | """Container for return value from kullback_leibler. 8 | 9 | `estimate`: The estimated KL divergence, mean of the sampled integrand 10 | values. 11 | 12 | `se`: Estimated standard deviation of the samples from which the mean was 13 | calculated. In general the mean and variance of log(P(x)) is not known to 14 | be finite, but it will be for any distribution crosscat generates at the 15 | moment, because they all have finite entropy. Hence the Central Limit 16 | Theorem applies at some sample size, and this can in principle be used as a 17 | rough guide to the precision of the estimate. In tests comparing the 18 | univariate gaussians N(0,1) and N(0,2), it tended to have a visually 19 | obvious bias for sample sizes below 100,000. 20 | 21 | """ 22 | pass 23 | 24 | def kullback_leibler(postsample, postlpdf, complpdf): 25 | """Estimate KL-divergence of sample (a collection of values) w.r.t. known pdf, 26 | `complpdf`, which returns the density when passed a sample. Return value is 27 | a `KLEstimate`. The attribute you probably care most about is 28 | `KLEstimate.estimate`. See `KLEstimate.__doc__` for more details. The 29 | `postsample` argument is an approximate sample from the distribution 30 | approximately represented by `postlpdf`. 31 | 32 | """ 33 | klsamples = array([postlpdf(x) - complpdf(x) for x in postsample]) 34 | std = klsamples.std() / sqrt(len(klsamples)) 35 | return KLEstimate(estimate=klsamples.mean(), se=std) 36 | -------------------------------------------------------------------------------- /tests/stochastic.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Copyright (c) 2010-2016, MIT Probabilistic Computing Project 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | import os 18 | import sys 19 | 20 | class StochasticError(Exception): 21 | def __init__(self, seed, exctype, excvalue): 22 | self.seed = seed 23 | self.exctype = exctype 24 | self.excvalue = excvalue 25 | def __str__(self): 26 | hexseed = self.seed.encode('hex') 27 | if hasattr(self.exctype, '__name__'): 28 | typename = self.exctype.__name__ 29 | else: 30 | typename = repr(self.exctype) 31 | return '[seed %s]\n%s: %s' % (hexseed, typename, self.excvalue) 32 | 33 | def stochastic(max_runs, min_passes): 34 | assert 0 < max_runs 35 | assert min_passes <= max_runs 36 | def wrap(f): 37 | def f_(seed=None): 38 | if seed is not None: 39 | return f(seed) 40 | npasses = 0 41 | last_seed = None 42 | last_exc_info = None 43 | for i in xrange(max_runs): 44 | seed = os.urandom(32) 45 | try: 46 | value = f(seed) 47 | except: 48 | last_seed = seed 49 | last_exc_info = sys.exc_info() 50 | else: 51 | npasses += 1 52 | if min_passes <= npasses: 53 | return value 54 | t, v, tb = last_exc_info 55 | raise StochasticError, StochasticError(last_seed, t, v), tb 56 | return f_ 57 | return wrap 58 | -------------------------------------------------------------------------------- /tests/test_approxest.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Copyright (c) 2010-2016, MIT Probabilistic Computing Project 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | """Tests for approximate estimators.""" 18 | 19 | import numpy as np 20 | 21 | from bayeslite import bayesdb_open 22 | 23 | from stochastic import stochastic 24 | 25 | 26 | @stochastic(max_runs=2, min_passes=1) 27 | def test_mutinf__ci_slow(seed): 28 | with bayesdb_open(':memory:', seed=seed) as bdb: 29 | npr = bdb.np_prng 30 | bdb.sql_execute('create table t(x, y, z)') 31 | D0_XY = npr.multivariate_normal([10,10], [[0,1],[2,0]], size=50) 32 | D1_XY = npr.multivariate_normal([0,0], [[0,-1],[2,0]], size=50) 33 | D_XY = np.concatenate([D0_XY, D1_XY]) 34 | D_Z = npr.multivariate_normal([5], [[0.5]], size=100) 35 | D = np.hstack([D_XY, D_Z]) 36 | for d in D: 37 | bdb.sql_execute('INSERT INTO t VALUES(?,?,?)', d) 38 | bdb.execute( 39 | 'create population p for t(x numerical; y numerical; z numerical)') 40 | bdb.execute('create generator m for p') 41 | bdb.execute('initialize 10 models for m') 42 | bdb.execute('analyze m for 10 iterations (optimized; quiet)') 43 | vars_by_mutinf = bdb.execute(''' 44 | estimate * from variables of p 45 | order by probability of (mutual information with x > 0.1) desc 46 | ''').fetchall() 47 | vars_by_depprob = bdb.execute(''' 48 | estimate * from variables of p 49 | order by dependence probability with x desc 50 | ''').fetchall() 51 | assert vars_by_mutinf == [('x',), ('y',), ('z',)] 52 | assert vars_by_depprob == [('x',), ('y',), ('z',)] 53 | -------------------------------------------------------------------------------- /tests/test_bqlmath.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Copyright (c) 2010-2016, MIT Probabilistic Computing Project 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | import itertools 18 | 19 | import apsw 20 | import pytest 21 | 22 | from bayeslite import bayesdb_open 23 | from bayeslite import bqlmath 24 | 25 | from bayeslite.math_util import abserr 26 | from bayeslite.util import cursor_value 27 | 28 | 29 | def get_python_math_call(name, probe): 30 | func = bqlmath.bqlmath_funcs[name] 31 | if isinstance(probe, tuple): 32 | return func(*probe) 33 | else: 34 | return func(probe) 35 | 36 | def get_sql_math_call(name, probe): 37 | if isinstance(probe, tuple): 38 | return 'SELECT %s%s' % (name, str(probe)) 39 | else: 40 | return 'SELECT %s(%s)' % (name, probe) 41 | 42 | PROBES_FLOAT = [-2.5, -1, -0.1, 0, 0.1, 1, 2.5] 43 | PROBES_TUPLE = itertools.combinations(PROBES_FLOAT, 2) 44 | PROBES = itertools.chain(PROBES_FLOAT, PROBES_TUPLE) 45 | FUNCS = bqlmath.bqlmath_funcs.iterkeys() 46 | 47 | @pytest.mark.parametrize('name,probe', itertools.product(FUNCS, PROBES)) 48 | def test_math_func_one_param(name, probe): 49 | # Retrieve result from python. 50 | python_value_error = None 51 | python_type_error = None 52 | try: 53 | result_python = get_python_math_call(name, probe) 54 | except ValueError: 55 | python_value_error = True 56 | except TypeError: 57 | python_type_error = True 58 | 59 | # Retrieve result from SQL. 60 | sql_value_error = None 61 | sql_type_error = None 62 | try: 63 | with bayesdb_open(':memory:') as bdb: 64 | cursor = bdb.execute(get_sql_math_call(name, probe)) 65 | result_sql = cursor_value(cursor) 66 | except ValueError: 67 | sql_value_error = True 68 | except (TypeError, apsw.SQLError): 69 | sql_type_error = True 70 | 71 | # Domain error on both. 72 | if python_value_error or sql_value_error: 73 | assert python_value_error and sql_value_error 74 | # Arity error on both. 75 | elif python_type_error or sql_type_error: 76 | assert python_type_error and sql_type_error 77 | # Both invocations succeeded, confirm results match. 78 | else: 79 | assert abserr(result_python, result_sql) < 1e-4 80 | -------------------------------------------------------------------------------- /tests/test_case.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Copyright (c) 2010-2017, MIT Probabilistic Computing Project 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | import pytest 18 | 19 | import bayeslite 20 | import bayeslite.core as core 21 | 22 | 23 | def test_case(): 24 | pytest.xfail(reason='Github issue #546') 25 | with bayeslite.bayesdb_open(':memory:') as bdb: 26 | bdb.sql_execute('create table t(x,Y)') 27 | bdb.sql_execute('insert into t values(1,2)') 28 | bdb.sql_execute('insert into t values(3,4)') 29 | bdb.sql_execute('insert into t values(1,4)') 30 | bdb.sql_execute('insert into t values(2,2)') 31 | bdb.execute('create population p for t(guess(*))') 32 | population_id = core.bayesdb_get_population(bdb, 'p') 33 | assert core.bayesdb_variable_names(bdb, population_id, None) == \ 34 | ['x', 'Y'] 35 | -------------------------------------------------------------------------------- /tests/test_cgpm_loom.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Copyright (c) 2010-2016, MIT Probabilistic Computing Project 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | import os 18 | import pytest 19 | 20 | from bayeslite import bayesdb_open 21 | from bayeslite import bayesdb_nullify 22 | from bayeslite.exception import BQLError 23 | 24 | os.environ['LOOM_VERBOSITY'] = '0' 25 | 26 | root = os.path.dirname(os.path.abspath(__file__)) 27 | dha_csv = os.path.join(root, 'dha.csv') 28 | satellites_csv = os.path.join(root, 'satellites.csv') 29 | 30 | ''' 31 | Integration test for using `ANALYZE FOR ITERATION (loom); on 32 | dha.csv and satellites.csv. 33 | ''' 34 | 35 | def loom_analyze(csv_filename): 36 | try: 37 | import loom 38 | except ImportError: 39 | pytest.skip('no loom') 40 | return 41 | with bayesdb_open(':memory:') as bdb: 42 | bdb = bayesdb_open(':memory:') 43 | bdb.execute('CREATE TABLE t FROM \'%s\'' % (csv_filename)) 44 | bayesdb_nullify(bdb, 't', 'NaN') 45 | bdb.execute(''' 46 | CREATE POPULATION p FOR t WITH SCHEMA( 47 | GUESS STATTYPES OF (*); 48 | ) 49 | ''') 50 | bdb.execute('CREATE GENERATOR m FOR p;') 51 | bdb.execute('INITIALIZE 10 MODELS FOR m') 52 | bdb.execute('ANALYZE m FOR 2 ITERATIONS (loom);') 53 | 54 | # targeted analysis for Loom not supported. 55 | with pytest.raises(BQLError): 56 | bdb.execute(''' 57 | ANALYZE m FOR 1 ITERATION (loom; variables TTL_MDCR_SPND); 58 | ''') 59 | # progress for Loom not supported (error from cgpm). 60 | with pytest.raises(ValueError): 61 | bdb.execute(''' 62 | ANALYZE m FOR 1 ITERATION (loom; quiet); 63 | ''') 64 | # timing for Loom not supported (error from cgpm). 65 | with pytest.raises(ValueError): 66 | bdb.execute(''' 67 | ANALYZE m FOR 1 SECONDS (loom); 68 | ''') 69 | # Run a BQL query. 70 | bdb.execute(''' 71 | ESTIMATE DEPENDENCE PROBABILITY FROM PAIRWISE VARIABLES OF p; 72 | ''') 73 | # Make sure we can run lovecat afterwards. 74 | bdb.execute('ANALYZE m FOR 2 ITERATION (optimized);') 75 | 76 | def test_loom_dha__ci_slow(): 77 | loom_analyze(dha_csv) 78 | 79 | def test_loom_satellites__ci_slow(): 80 | loom_analyze(satellites_csv) 81 | -------------------------------------------------------------------------------- /tests/test_condprob.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Copyright (c) 2010-2016, MIT Probabilistic Computing Project 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | import bayeslite 18 | 19 | def test_conditional_probability_simple_inferences(): 20 | data = [ 21 | ['x', 'a'], ['x', 'a'], ['x', 'a'], 22 | ['y', 'b'], ['y', 'b'], ['y', 'b'], 23 | ] 24 | with bayeslite.bayesdb_open() as bdb: 25 | bdb.sql_execute('create table t(foo, bar)') 26 | for row in data: 27 | bdb.sql_execute('insert into t values (?, ?)', row) 28 | bdb.execute(''' 29 | create population p for t ( 30 | foo nominal; 31 | bar nominal; 32 | ) 33 | ''') 34 | bdb.execute('create generator p_cc for p using cgpm;') 35 | bdb.execute('initialize 10 models for p_cc') 36 | bdb.execute('analyze p_cc for 100 iterations') 37 | cursor = bdb.execute(''' 38 | estimate 39 | probability density of foo = 'x', 40 | probability density of foo = 'x' given (bar = 'a'), 41 | probability density of foo = 'x' given (bar = 'b'), 42 | probability density of foo = 'y', 43 | probability density of foo = 'y' given (bar = 'a'), 44 | probability density of foo = 'y' given (bar = 'b') 45 | 46 | by p 47 | ''').fetchall() 48 | px, pxa, pxb, py, pya, pyb = cursor[0] 49 | # Inferences on x. 50 | assert px < pxa 51 | assert pxb < px 52 | # Inferences on y. 53 | assert py < pyb 54 | assert pya < py 55 | -------------------------------------------------------------------------------- /tests/test_error_bql.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Copyright (c) 2010-2016, MIT Probabilistic Computing Project 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | import pytest 18 | 19 | import bayeslite 20 | 21 | import test_core 22 | 23 | 24 | def test_droppop_with_generators(): 25 | with test_core.t1() as (bdb, _population_id, _generator_id): 26 | distinctive_name = 'frobbledithorpequack' 27 | bdb.execute('create generator %s for p1 using cgpm' % 28 | (distinctive_name,)) 29 | with pytest.raises(bayeslite.BQLError): 30 | try: 31 | bdb.execute('drop population p1') 32 | except bayeslite.BQLError as e: 33 | assert 'generators' in str(e) 34 | assert distinctive_name in str(e) 35 | raise 36 | -------------------------------------------------------------------------------- /tests/test_kl.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Copyright (c) 2010-2016, MIT Probabilistic Computing Project 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | from __future__ import division # For type safety in gaussian_kl_divergence 18 | 19 | from functools import partial 20 | from math import erfc 21 | 22 | import numpy as np 23 | 24 | from numpy.random import RandomState 25 | 26 | import kl 27 | import threshold 28 | 29 | 30 | def gaussian_kl_divergence(mu1, s1, mu2, s2): 31 | "Return KL(N(mu1,s1)||N(mu2,s2))" 32 | # http://stats.stackexchange.com/a/7443/40686 33 | return np.log(s2 / s1) + ((s1**2 + (mu1 - mu2)**2) / (2 * s2**2)) - 0.5 34 | 35 | 36 | def gaussian_log_pdf(mu, s): 37 | def lpdf(x): 38 | normalizing_constant = -(np.log(2 * np.pi) / 2) - np.log(s) 39 | return normalizing_constant - ((x - mu)**2 / (2 * s**2)) 40 | return lpdf 41 | 42 | 43 | def compute_kullback_leibler_check_statistic(n=100, prngstate=None): 44 | """Compute the lowest of the survival function and the CDF of the exact KL 45 | divergence KL(N(mu1,s1)||N(mu2,s2)) w.r.t. the sample distribution of the 46 | KL divergence drawn by computing log(P(x|N(mu1,s1)))-log(P(x|N(mu2,s2))) 47 | over a sample x~N(mu1,s1). If we are computing the KL divergence 48 | accurately, the exact value should fall squarely in the sample, and the 49 | tail probabilities should be relatively large. 50 | 51 | """ 52 | if prngstate is None: 53 | raise TypeError('Must explicitly specify numpy.random.RandomState') 54 | mu1 = mu2 = 0 55 | s1 = 1 56 | s2 = 2 57 | exact = gaussian_kl_divergence(mu1, s1, mu2, s2) 58 | sample = prngstate.normal(mu1, s1, n) 59 | lpdf1 = gaussian_log_pdf(mu1, s1) 60 | lpdf2 = gaussian_log_pdf(mu2, s2) 61 | estimate, std = kl.kullback_leibler(sample, lpdf1, lpdf2) 62 | # This computes the minimum of the left and right tail probabilities of the 63 | # exact KL divergence vs a gaussian fit to the sample estimate. There is a 64 | # distinct negative skew to the samples used to compute `estimate`, so this 65 | # statistic is not uniform. Nonetheless, we do not expect it to get too 66 | # small. 67 | return erfc(abs(exact - estimate) / std) / 2 68 | 69 | 70 | def kl_test_stat(): 71 | prngstate = RandomState(17) 72 | return partial( 73 | compute_kullback_leibler_check_statistic, prngstate=prngstate) 74 | 75 | 76 | def compute_kl_threshold(): 77 | """Compute the values used in test_kullback_leibler 78 | 79 | >>> threshold.compute_sufficiently_stringent_threshold( 80 | kl_test_stat(), 6, 1e-20) 81 | ... 82 | TestThreshold( 83 | threshold=4.3883148424367044e-13, 84 | failprob=9.724132259513859e-21, 85 | sample_size=252135 86 | ) 87 | 88 | This means that after generating 252135 check statistics, it was found that 89 | the least value of six samples will be less than 4.3883148424367044e-13 90 | with probability less than 9.724132259513859e-21 (< 1e-20). 91 | 92 | """ 93 | return threshold.compute_sufficiently_stringent_threshold( 94 | kl_test_stat(), 6, 1e-20) 95 | 96 | 97 | def test_kullback_leibler(): 98 | """Check kullback_leibler_check_statistic doesn't give absurdly low 99 | values.""" 100 | # See compute_kl_threshold for derivation 101 | kl_threshold = threshold.TestThreshold( 102 | threshold=4.3883148424367044e-13, 103 | failprob=9.724132259513859e-21, 104 | sample_size=252135 105 | ) 106 | threshold.check_generator(kl_test_stat(), 6, kl_threshold.threshold, 1e-20) 107 | -------------------------------------------------------------------------------- /tests/test_macro.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Copyright (c) 2010-2016, MIT Probabilistic Computing Project 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | import bayeslite.ast as ast 18 | import bayeslite.macro as macro 19 | 20 | 21 | def test_expand_probability_estimate(): 22 | expression = ast.ExpOp(ast.OP_LT, [ 23 | ast.ExpBQLMutInf( 24 | ['c0'], 25 | ['c1', 'c2'], 26 | [('c3', ast.ExpLit(ast.LitInt(3)))], 27 | None), 28 | ast.ExpLit(ast.LitFloat(0.1)), 29 | ]) 30 | probest = ast.ExpBQLProbEst(expression) 31 | assert macro.expand_probability_estimate(probest, 'p', 'g') == \ 32 | ast.ExpSub( 33 | ast.Select(ast.SELQUANT_ALL, 34 | [ast.SelColExp( 35 | ast.ExpApp(False, 'AVG', [ast.ExpCol(None, 'x')]), 36 | None)], 37 | [ast.SelTab( 38 | ast.SimulateModelsExp([ast.SelColExp(expression, 'x')], 39 | 'p', 'g'), 40 | None)], 41 | None, None, None, None)) 42 | 43 | def test_simulate_models_trivial(): 44 | e = ast.ExpBQLMutInf(['c0'], ['c1', 'c2'], 45 | [('c3', ast.ExpLit(ast.LitInt(3)))], 46 | None) 47 | simmodels = ast.SimulateModelsExp([ast.SelColExp(e, 'x')], 'p', 'g') 48 | assert macro.expand_simulate_models(simmodels) == \ 49 | ast.SimulateModels([ast.SelColExp(e, 'x')], 'p', 'g') 50 | 51 | 52 | def test_simulate_models_nontrivial(): 53 | # XXX test descent into ExpLit 54 | # XXX test descent into ExpNumpar 55 | # XXX test descent into ExpNampar 56 | # XXX test descent into ExpCol 57 | # XXX test descent into ExpSub 58 | # XXX test descent into ExpCollate 59 | # XXX test descent into ExpIn 60 | # XXX test descent into ExpCast 61 | # XXX test descent into ExpExists 62 | # XXX test descent into ExpApp 63 | # XXX test descent into ExpAppStar 64 | # XXX test descent into ExpCase 65 | mutinf0 = ast.ExpBQLMutInf(['c0'], ['c1', 'c2'], 66 | [('c3', ast.ExpLit(ast.LitInt(3)))], 67 | None) 68 | mutinf1 = ast.ExpBQLMutInf(['c4', 'c5'], ['c6'], 69 | [('c7', ast.ExpLit(ast.LitString('ergodic')))], 70 | 100) 71 | probdensity = ast.ExpBQLProbDensity( 72 | [('x', ast.ExpLit(ast.LitFloat(1.2)))], 73 | # No conditions for now -- that changes the weighting of the average. 74 | []) 75 | expression0 = ast.ExpOp(ast.OP_LT, [ 76 | mutinf0, 77 | ast.ExpOp(ast.OP_MUL, [ast.ExpLit(ast.LitFloat(0.1)), mutinf1]), 78 | ]) 79 | expression1 = probdensity 80 | simmodels = ast.SimulateModelsExp( 81 | [ 82 | ast.SelColExp(expression0, 'quagga'), 83 | ast.SelColExp(expression1, 'eland'), 84 | ], 'p', 'g') 85 | assert macro.expand_simulate_models(simmodels) == \ 86 | ast.Select(ast.SELQUANT_ALL, 87 | [ 88 | ast.SelColExp( 89 | ast.ExpOp(ast.OP_LT, [ 90 | ast.ExpCol(None, 'v0'), 91 | ast.ExpOp(ast.OP_MUL, [ 92 | ast.ExpLit(ast.LitFloat(0.1)), 93 | ast.ExpCol(None, 'v1'), 94 | ]) 95 | ]), 96 | 'quagga'), 97 | ast.SelColExp(ast.ExpCol(None, 'v2'), 'eland'), 98 | ], 99 | [ast.SelTab( 100 | ast.SimulateModels( 101 | [ 102 | ast.SelColExp(mutinf0, 'v0'), 103 | ast.SelColExp(mutinf1, 'v1'), 104 | ast.SelColExp(probdensity, 'v2'), 105 | ], 'p', 'g'), 106 | None)], 107 | None, None, None, None) 108 | -------------------------------------------------------------------------------- /tests/test_math_util.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Copyright (c) 2010-2016, MIT Probabilistic Computing Project 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | import math 18 | import pytest 19 | 20 | from bayeslite.math_util import * 21 | 22 | def pi_cf(): 23 | """Compute pi with a generalized continued fraction. 24 | 25 | The continued fraction is[1]:: 26 | 27 | 1 28 | pi/4 = -------------------. 29 | 1^2 30 | 1 + --------------- 31 | 2^2 32 | 3 + ----------- 33 | 3^2 34 | 5 + ------- 35 | 7 + ... 36 | 37 | [1] https://en.wikipedia.org/wiki/Generalized_continued_fraction#.CF.80, 38 | no citation given. 39 | """ 40 | def contfrac(): 41 | i = 0 42 | while True: 43 | i += 1 44 | yield i*i, 2*i + 1 45 | return 4/(1 + limit(convergents(contfrac()))) 46 | 47 | def phi_cf(): 48 | """Compute the golden ratio phi by its continued fraction. 49 | 50 | The well-known continued fraction is [1; 1, 1, 1, 1, ...]. 51 | """ 52 | def contfrac(): 53 | while True: 54 | yield 1, 1 55 | return 1 + limit(convergents(contfrac())) 56 | 57 | def pi_ps(): 58 | """Compute pi with a power series representation of arctan. 59 | 60 | The power series for arctan is Gregory's series:: 61 | 62 | z^3 z^5 z^7 63 | arctan z = z - --- + --- - --- + .... 64 | 3 5 7 65 | 66 | We use a Machin-like formula attributed on Wikipedia to Euler:: 67 | 68 | pi/4 = 20 arctan(1/7) + 8 arctan(3/79). 69 | """ 70 | def arctan(z): 71 | def seq(): 72 | z2 = z*z 73 | zn = z 74 | d = 1 75 | sign = 1. 76 | while True: 77 | yield sign*zn/d 78 | zn *= z2 79 | d += 2 80 | sign *= -1 81 | return limit(partial_sums(seq())) 82 | return 20*arctan(1./7) + 8*arctan(3./79) 83 | 84 | def test_misc(): 85 | assert relerr(100., 99.) == .01 86 | assert relerr(math.pi, pi_cf()) < EPSILON 87 | assert relerr(math.pi, pi_ps()) < EPSILON 88 | assert relerr((1 + math.sqrt(5))/2, phi_cf()) < EPSILON 89 | 90 | def test_logsumexp(): 91 | inf = float('inf') 92 | nan = float('nan') 93 | with pytest.raises(OverflowError): 94 | math.log(sum(map(math.exp, range(1000)))) 95 | assert relerr(999.4586751453871, logsumexp(range(1000))) < 1e-15 96 | assert logsumexp([]) == -inf 97 | assert logsumexp([-1000.]) == -1000. 98 | assert logsumexp([-1000., -1000.]) == -1000. + math.log(2.) 99 | assert relerr(math.log(2.), logsumexp([0., 0.])) < 1e-15 100 | assert logsumexp([-inf, 1]) == 1 101 | assert logsumexp([-inf, -inf]) == -inf 102 | assert logsumexp([+inf, +inf]) == +inf 103 | assert math.isnan(logsumexp([-inf, +inf])) 104 | assert math.isnan(logsumexp([nan, inf])) 105 | assert math.isnan(logsumexp([nan, -3])) 106 | 107 | def test_logmeanexp(): 108 | inf = float('inf') 109 | nan = float('nan') 110 | assert logmeanexp([]) == -inf 111 | assert relerr(992.550919866405, logmeanexp(range(1000))) < 1e-15 112 | assert logmeanexp([-1000., -1000.]) == -1000. 113 | assert relerr(math.log(0.5 * (1 + math.exp(-1.))), 114 | logmeanexp([0., -1.])) \ 115 | < 1e-15 116 | assert relerr(math.log(0.5), logmeanexp([0., -1000.])) < 1e-15 117 | assert relerr(-3 - math.log(2.), logmeanexp([-inf, -3])) < 1e-15 118 | assert relerr(-3 - math.log(2.), logmeanexp([-3, -inf])) < 1e-15 119 | assert logmeanexp([+inf, -3]) == +inf 120 | assert logmeanexp([-3, +inf]) == +inf 121 | assert logmeanexp([-inf, 0, +inf]) == +inf 122 | assert math.isnan(logmeanexp([nan, inf])) 123 | assert math.isnan(logmeanexp([nan, -3])) 124 | assert math.isnan(logmeanexp([nan])) 125 | 126 | def test_logavgexp_weighted(): 127 | # XXX Expand me! 128 | assert relerr(-1000 - logsumexp([500, -500]) + math.log(2), 129 | logavgexp_weighted([500, -500], [-1500, -500])) < 1e-15 130 | -------------------------------------------------------------------------------- /tests/test_nullify.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Copyright (c) 2010-2017, MIT Probabilistic Computing Project 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | from bayeslite import bayesdb_open 18 | from bayeslite import bayesdb_nullify 19 | 20 | 21 | def test_nullify(): 22 | with bayesdb_open(':memory:') as bdb: 23 | bdb.sql_execute('create table t(x,y)') 24 | for row in [ 25 | ['1',''], 26 | ['nan','foo'], 27 | ['2','nan'], 28 | ['2','""'], 29 | ['', ''], 30 | ]: 31 | bdb.sql_execute('insert into t values(?,?)', row) 32 | assert bdb.execute('select * from t').fetchall() == [ 33 | ('1',''), 34 | ('nan','foo'), 35 | ('2','nan'), 36 | ('2','""'), 37 | ('', ''), 38 | ] 39 | assert bayesdb_nullify(bdb, 't', '') == 3 40 | assert bdb.execute('select * from t').fetchall() == [ 41 | ('1',None), 42 | ('nan','foo'), 43 | ('2','nan'), 44 | ('2','""'), 45 | (None, None), 46 | ] 47 | assert bayesdb_nullify(bdb, 't', 'nan', columns=['x']) == 1 48 | assert bdb.execute('select * from t').fetchall() == [ 49 | ('1',None), 50 | (None,'foo'), 51 | ('2','nan'), 52 | ('2','""'), 53 | (None, None), 54 | ] 55 | assert bayesdb_nullify(bdb, 't', 'fnord') == 0 56 | -------------------------------------------------------------------------------- /tests/test_parse_cgpm_analyze.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Copyright (c) 2010-2016, MIT Probabilistic Computing Project 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | import bayeslite.backends.cgpm_analyze.parse as cgpm_analyze_parser 18 | 19 | from test_parse import parse_bql_string 20 | 21 | # XXX Is there a better way to get the tokens that are supplied to 22 | # cgpm_analyze.parse.parse? 23 | def parse_analysis_plan(string): 24 | phrases = parse_bql_string(''' 25 | ANALYZE m FOR 1 ITERATION (%s) 26 | ''' % (string,)) 27 | return cgpm_analyze_parser.parse(phrases[0].program) 28 | 29 | def test_empty(): 30 | assert [] == parse_analysis_plan('') 31 | assert [] == parse_analysis_plan(';') 32 | assert [] == parse_analysis_plan(';;') 33 | assert [] == parse_analysis_plan(' ;') 34 | assert [] == parse_analysis_plan('; ') 35 | assert [] == parse_analysis_plan(' ; ') 36 | assert [] == parse_analysis_plan(' ; ; ') 37 | 38 | def test_miscellaneous(): 39 | assert parse_analysis_plan('VARIABLES A, B, C; OPTIMIZED') == [ 40 | cgpm_analyze_parser.Variables(['A', 'B', 'C']), 41 | cgpm_analyze_parser.Optimized('lovecat'), 42 | ] 43 | assert parse_analysis_plan('SKIP "foo"; loom; QUIET') == [ 44 | cgpm_analyze_parser.Skip(['foo']), 45 | cgpm_analyze_parser.Optimized('loom'), 46 | cgpm_analyze_parser.Quiet(True), 47 | ] 48 | assert parse_analysis_plan('SKIP "foo"; loom') == [ 49 | cgpm_analyze_parser.Skip(['foo']), 50 | cgpm_analyze_parser.Optimized('loom'), 51 | ] 52 | 53 | def test_rows(): 54 | assert parse_analysis_plan('ROWS 1, 2, 3, 19;') == [ 55 | cgpm_analyze_parser.Rows([1, 2, 3, 19]), 56 | ] 57 | 58 | def test_inference_planning_basic(): 59 | assert parse_analysis_plan('SUBPROBLEM variable clustering;') == [ 60 | cgpm_analyze_parser.Subproblem(['variable_clustering']), 61 | ] 62 | assert parse_analysis_plan('SUBPROBLEM (variable hyperparameters);') == [ 63 | cgpm_analyze_parser.Subproblem(['variable_hyperparameters']), 64 | ] 65 | assert parse_analysis_plan(''' 66 | SUBPROBLEM ( 67 | variable clustering concentration, 68 | variable clustering 69 | ); 70 | ''' ) == [ 71 | cgpm_analyze_parser.Subproblem([ 72 | 'variable_clustering_concentration', 73 | 'variable_clustering' 74 | ]), 75 | ] 76 | assert parse_analysis_plan(''' 77 | SUBPROBLEM row clustering concentration; 78 | SUBPROBLEM row clustering; 79 | ''' ) == [ 80 | cgpm_analyze_parser.Subproblem(['row_clustering_concentration']), 81 | cgpm_analyze_parser.Subproblem(['row_clustering']), 82 | ] 83 | 84 | def test_inference_planning_bonanza(): 85 | assert parse_analysis_plan(''' 86 | VARIABLES foo, bar, llama, salman; 87 | ROWS 1, 17, 9; 88 | SUBPROBLEMS ( 89 | row clustering concentration, 90 | row clustering, 91 | variable hyperparameters 92 | ); 93 | OPTIMIZED; 94 | QUIET; 95 | ''' ) == [ 96 | cgpm_analyze_parser.Variables(['foo','bar','llama','salman']), 97 | cgpm_analyze_parser.Rows([1, 17, 9]), 98 | cgpm_analyze_parser.Subproblem([ 99 | 'row_clustering_concentration', 100 | 'row_clustering', 101 | 'variable_hyperparameters', 102 | ]), 103 | cgpm_analyze_parser.Optimized('lovecat'), 104 | cgpm_analyze_parser.Quiet(True), 105 | ] 106 | -------------------------------------------------------------------------------- /tests/test_read_pandas.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Copyright (c) 2010-2016, MIT Probabilistic Computing Project 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | import apsw 18 | import pandas 19 | import pytest 20 | 21 | from bayeslite import bayesdb_open 22 | from bayeslite import bql_quote_name 23 | from bayeslite.core import bayesdb_has_table 24 | from bayeslite.read_pandas import bayesdb_read_pandas_df 25 | 26 | def do_test(bdb, t, df, index=None): 27 | qt = bql_quote_name(t) 28 | countem = 'select count(*) from %s' % (qt,) 29 | assert not bayesdb_has_table(bdb, t) 30 | 31 | with pytest.raises(ValueError): 32 | bayesdb_read_pandas_df(bdb, t, df, index=index) 33 | 34 | bayesdb_read_pandas_df(bdb, t, df, create=True, ifnotexists=False, 35 | index=index) 36 | assert len(df.index) == bdb.execute(countem).fetchvalue() 37 | 38 | with pytest.raises(ValueError): 39 | bayesdb_read_pandas_df(bdb, t, df, create=True, ifnotexists=False, 40 | index=index) 41 | assert 4 == bdb.execute(countem).fetchvalue() 42 | 43 | with pytest.raises(apsw.ConstraintError): 44 | bayesdb_read_pandas_df(bdb, t, df, create=True, ifnotexists=True, 45 | index=index) 46 | assert 4 == bdb.execute(countem).fetchvalue() 47 | 48 | def test_integral_noindex(): 49 | with bayesdb_open() as bdb: 50 | df = pandas.DataFrame([(1,2,'foo'),(4,5,6),(7,8,9),(10,11,12)], 51 | index=[42, 78, 62, 43]) 52 | do_test(bdb, 't', df) 53 | 54 | def test_integral_index(): 55 | with bayesdb_open() as bdb: 56 | df = pandas.DataFrame([(1,2,'foo'),(4,5,6),(7,8,9),(10,11,12)], 57 | index=[42, 78, 62, 43]) 58 | do_test(bdb, 't', df, index='quagga') 59 | 60 | def test_nonintegral_noindex(): 61 | with bayesdb_open() as bdb: 62 | df = pandas.DataFrame([(1,2,'foo'),(4,5,6),(7,8,9),(10,11,12)], 63 | index=[42, 78, 62, 43]) 64 | with pytest.raises(ValueError): 65 | bayesdb_read_pandas_df(bdb, 't', df) 66 | 67 | def test_nonintegral_index(): 68 | with bayesdb_open() as bdb: 69 | df = pandas.DataFrame([(1,2,'foo'),(4,5,6),(7,8,9),(10,11,12)], 70 | index=[42, 78, 62, 43]) 71 | do_test(bdb, 't', df, index='eland') 72 | -------------------------------------------------------------------------------- /tests/test_stochastic.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Copyright (c) 2010-2016, MIT Probabilistic Computing Project 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | import pytest 18 | 19 | from stochastic import StochasticError 20 | from stochastic import stochastic 21 | 22 | class Quagga(Exception): 23 | pass 24 | 25 | @stochastic(max_runs=1, min_passes=1) 26 | def _test_fail(seed): 27 | raise Quagga 28 | 29 | @stochastic(max_runs=1, min_passes=1) 30 | def _test_pass(_seed): 31 | pass 32 | 33 | passthenfail_counter = 0 34 | @stochastic(max_runs=2, min_passes=1) 35 | def _test_passthenfail(seed): 36 | global passthenfail_counter 37 | passthenfail_counter += 1 38 | passthenfail_counter %= 2 39 | if passthenfail_counter == 0: 40 | raise Quagga 41 | 42 | failthenpass_counter = 0 43 | @stochastic(max_runs=2, min_passes=1) 44 | def _test_failthenpass(seed): 45 | global failthenpass_counter 46 | failthenpass_counter += 1 47 | failthenpass_counter %= 2 48 | if failthenpass_counter == 1: 49 | raise Quagga 50 | 51 | @stochastic(max_runs=2, min_passes=1) 52 | def _test_failthenfail(seed): 53 | raise Quagga 54 | 55 | @stochastic(max_runs=1, min_passes=1) 56 | def test_stochastic(seed): 57 | with pytest.raises(StochasticError): 58 | _test_fail() 59 | try: 60 | _test_fail() 61 | except StochasticError as e: 62 | assert isinstance(e.excvalue, Quagga) 63 | with pytest.raises(Quagga): 64 | _test_fail(seed) 65 | _test_pass() 66 | _test_pass(seed) 67 | _test_passthenfail() 68 | with pytest.raises(Quagga): 69 | _test_passthenfail(seed) 70 | _test_failthenpass() 71 | with pytest.raises(Quagga): 72 | _test_failthenpass(seed) 73 | with pytest.raises(StochasticError): 74 | _test_failthenfail() 75 | with pytest.raises(Quagga): 76 | _test_failthenfail(seed) 77 | -------------------------------------------------------------------------------- /tests/test_subsample.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Copyright (c) 2010-2016, MIT Probabilistic Computing Project 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | import os 18 | 19 | import bayeslite 20 | import bayeslite.read_csv as read_csv 21 | 22 | from bayeslite.core import bayesdb_get_generator 23 | from bayeslite.guess import bayesdb_guess_population 24 | from bayeslite.backends.cgpm_backend import CGPM_Backend 25 | 26 | root = os.path.dirname(os.path.abspath(__file__)) 27 | dha_csv = os.path.join(root, 'dha.csv') 28 | 29 | def test_subsample(): 30 | with bayeslite.bayesdb_open(builtin_backends=False) as bdb: 31 | backend = CGPM_Backend(cgpm_registry={}, multiprocess=False) 32 | bayeslite.bayesdb_register_backend(bdb, backend) 33 | with open(dha_csv, 'rU') as f: 34 | read_csv.bayesdb_read_csv(bdb, 'dha', f, header=True, create=True) 35 | bayesdb_guess_population(bdb, 'hospitals_full', 'dha', 36 | overrides=[('name', 'key')]) 37 | bayesdb_guess_population(bdb, 'hospitals_sub', 'dha', 38 | overrides=[('name', 'key')]) 39 | bdb.execute(''' 40 | CREATE GENERATOR hosp_full_cc FOR hospitals_full USING cgpm; 41 | ''') 42 | bdb.execute(''' 43 | CREATE GENERATOR hosp_sub_cc FOR hospitals_sub USING cgpm( 44 | SUBSAMPLE 100 45 | ) 46 | ''') 47 | bdb.execute('INITIALIZE 1 MODEL FOR hosp_sub_cc') 48 | bdb.execute('ANALYZE hosp_sub_cc FOR 1 ITERATION (OPTIMIZED)') 49 | bdb.execute(''' 50 | ESTIMATE SIMILARITY TO (_rowid_=2) IN THE CONTEXT OF PNEUM_SCORE 51 | FROM hospitals_sub WHERE _rowid_ = 1 OR _rowid_ = 101 52 | ''').fetchall() 53 | bdb.execute(''' 54 | ESTIMATE SIMILARITY TO (_rowid_=102) IN THE CONTEXT OF 55 | N_DEATH_ILL FROM hospitals_sub 56 | WHERE _rowid_ = 1 OR _rowid_ = 101 57 | ''').fetchall() 58 | bdb.execute(''' 59 | ESTIMATE PREDICTIVE PROBABILITY OF mdcr_spnd_amblnc 60 | FROM hospitals_sub 61 | WHERE _rowid_ = 1 OR _rowid_ = 101 62 | ''').fetchall() 63 | bdb.execute(''' 64 | ESTIMATE SIMILARITY IN THE CONTEXT OF PNEUM_SCORE 65 | FROM PAIRWISE hospitals_sub 66 | WHERE (r0._rowid_ = 1 OR r0._rowid_ = 101) AND 67 | (r1._rowid_ = 1 OR r1._rowid_ = 101) 68 | ''').fetchall() 69 | bdb.execute(''' 70 | INFER mdcr_spnd_amblnc FROM hospitals_sub 71 | WHERE _rowid_ = 1 OR _rowid_ = 101 72 | ''').fetchall() 73 | sql = ''' 74 | SELECT table_rowid FROM bayesdb_cgpm_individual 75 | WHERE generator_id = ? 76 | ORDER BY cgpm_rowid ASC 77 | LIMIT 100 78 | ''' 79 | gid_full = bayesdb_get_generator(bdb, None, 'hosp_full_cc') 80 | cursor = bdb.sql_execute(sql, (gid_full,)) 81 | assert [row[0] for row in cursor] == range(1, 100 + 1) 82 | gid = bayesdb_get_generator(bdb, None, 'hosp_sub_cc') 83 | cursor = bdb.sql_execute(sql, (gid,)) 84 | assert [row[0] for row in cursor] != range(1, 100 + 1) 85 | bdb.execute('DROP GENERATOR hosp_sub_cc') 86 | bdb.execute('DROP GENERATOR hosp_full_cc') 87 | bdb.execute('DROP POPULATION hospitals_sub') 88 | bdb.execute('DROP POPULATION hospitals_full') 89 | -------------------------------------------------------------------------------- /tests/test_threshold.py: -------------------------------------------------------------------------------- 1 | from numpy.random import RandomState 2 | 3 | from threshold import failprob_threshold 4 | 5 | 6 | def test_failprob_threshold_basic(): 7 | """Sanity check on failprob_threshold: Verify, for a relatively large failure 8 | probability, that the failure threshold it returns for a simple test 9 | statistic actually results in failures at approximately the right 10 | frequency. 11 | 12 | """ 13 | prngstate = RandomState(0) 14 | 15 | def sample(n): 16 | return prngstate.normal(0, 1, n) 17 | 18 | target_prob = 1e-1 19 | test_sample_size = 6 20 | prob, thresh = failprob_threshold( 21 | sample(1000), test_sample_size, target_prob) 22 | samples = [all(v < thresh for v in sample(test_sample_size)) 23 | for _ in xrange(int(100 / target_prob))] 24 | assert 50 < samples.count(True) < 200 25 | -------------------------------------------------------------------------------- /tests/test_util.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Copyright (c) 2010-2016, MIT Probabilistic Computing Project 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | import pytest 18 | 19 | from bayeslite.util import cursor_value 20 | 21 | def test_cursor_value(): 22 | with pytest.raises(ValueError): 23 | cursor_value(iter([])) 24 | with pytest.raises(TypeError): 25 | cursor_value(iter([1])) 26 | with pytest.raises(ValueError): 27 | cursor_value(iter([1, 2])) 28 | with pytest.raises(ValueError): 29 | cursor_value(iter([()])) 30 | with pytest.raises(ValueError): 31 | cursor_value(iter([(1, 2)])) 32 | with pytest.raises(ValueError): 33 | cursor_value(iter([(1, 2), ()])) 34 | with pytest.raises(ValueError): 35 | cursor_value(iter([(1, 2), 3])) 36 | with pytest.raises(ValueError): 37 | cursor_value(iter([(1, 2), (3,)])) 38 | with pytest.raises(ValueError): 39 | cursor_value(iter([(1,), (2,)])) 40 | with pytest.raises(ValueError): 41 | cursor_value(iter([(1,), (2, 3)])) 42 | assert cursor_value(iter([(42,)])) == 42 43 | -------------------------------------------------------------------------------- /tests/test_vtab.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Copyright (c) 2010-2016, MIT Probabilistic Computing Project 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | import test_core 18 | 19 | from stochastic import stochastic 20 | 21 | 22 | @stochastic(max_runs=2, min_passes=1) 23 | def test_mutinf_smoke(seed): 24 | with test_core.t1(seed=seed) as (bdb, population_id, _generator_id): 25 | def checkmi(n, q, *p): 26 | i = 0 27 | for r in bdb.sql_execute(q, *p): 28 | assert len(r) == 1 29 | assert isinstance(r[0], float) 30 | i += 1 31 | assert i == n, '%r =/= %r' % (i, n) 32 | 33 | bdb.execute('initialize 10 models for p1_cc') 34 | checkmi(10, ''' 35 | select mi from bql_mutinf 36 | where population_id = ? 37 | and target_vars = '[1]' 38 | and reference_vars = '[2]' 39 | ''', (population_id,)) 40 | 41 | bdb.execute('initialize 11 models if not exists for p1_cc') 42 | checkmi(11, ''' 43 | select mi from bql_mutinf 44 | where population_id = ? 45 | and target_vars = '[1]' 46 | and reference_vars = '[2]' 47 | and conditions = '{"3": 42}' 48 | ''', (population_id,)) 49 | 50 | bdb.execute('initialize 12 models if not exists for p1_cc') 51 | checkmi(12, ''' 52 | select mi from bql_mutinf 53 | where population_id = ? 54 | and target_vars = '[1]' 55 | and reference_vars = '[2]' 56 | and nsamples = 2 57 | ''', (population_id,)) 58 | 59 | bdb.execute('initialize 13 models if not exists for p1_cc') 60 | checkmi(13, ''' 61 | select mi from bql_mutinf 62 | where population_id = ? 63 | and target_vars = '[1]' 64 | and reference_vars = '[2]' 65 | and conditions = '{"3": 42}' 66 | and nsamples = 2 67 | ''', (population_id,)) 68 | --------------------------------------------------------------------------------