├── .gitignore
├── .travis.yml
├── CONTRIBUTING.md
├── HACKING
├── LICENSE.txt
├── MANIFEST.in
├── Makefile
├── README.md
├── check.sh
├── conda
├── meta.yaml
└── upload.sh
├── doc
├── api.rst
├── bayeslite.1
├── bql.rst
├── conf.py
├── index.rst
└── internals.rst
├── docker
└── ubuntu1604
├── external
├── README
├── lemonade
│ ├── COPYING
│ ├── README
│ └── dist
│ │ ├── MANIFEST.in
│ │ ├── PKG-INFO
│ │ ├── README
│ │ ├── bin
│ │ └── lemonade
│ │ ├── examples
│ │ └── calc
│ │ │ ├── calc.py
│ │ │ └── gram.y
│ │ ├── lemonade
│ │ ├── __init__.py
│ │ ├── action.py
│ │ ├── acttab.py
│ │ ├── build.py
│ │ ├── ccruft.py
│ │ ├── configlist.py
│ │ ├── error.py
│ │ ├── exceptions.py
│ │ ├── lempar.tmpl
│ │ ├── main.py
│ │ ├── msort.py
│ │ ├── parse.py
│ │ ├── plink.py
│ │ ├── report.py
│ │ ├── set.py
│ │ ├── struct.py
│ │ └── table.py
│ │ └── setup.py
├── plex
│ ├── COPYING
│ ├── README
│ ├── dist
│ │ ├── Makefile
│ │ ├── Plex
│ │ │ ├── Actions.py
│ │ │ ├── DFA.py
│ │ │ ├── Errors.py
│ │ │ ├── Lexicons.py
│ │ │ ├── Machines.py
│ │ │ ├── Regexps.py
│ │ │ ├── Scanners.py
│ │ │ ├── Timing.py
│ │ │ ├── Traditional.py
│ │ │ ├── Transitions.py
│ │ │ └── __init__.py
│ │ ├── README
│ │ ├── TODO
│ │ ├── doc
│ │ │ ├── Reference.html
│ │ │ ├── Tutorial.html
│ │ │ └── index.html
│ │ ├── examples
│ │ │ ├── example1and2.in
│ │ │ ├── example1and2.py
│ │ │ ├── example3.in
│ │ │ ├── example3.py
│ │ │ ├── example4.in
│ │ │ ├── example4.py
│ │ │ ├── example5.in
│ │ │ ├── example5.py
│ │ │ ├── example6.in
│ │ │ ├── example6.py
│ │ │ ├── example7.in
│ │ │ ├── example7.py
│ │ │ ├── pascal.in
│ │ │ ├── pascal.py
│ │ │ ├── python.in
│ │ │ ├── python.py
│ │ │ ├── speedtest.in
│ │ │ └── speedtest.py
│ │ └── tests
│ │ │ ├── Test.py
│ │ │ ├── runtests.py
│ │ │ ├── test0.in
│ │ │ ├── test0.out
│ │ │ ├── test0.py
│ │ │ ├── test1.in
│ │ │ ├── test1.out
│ │ │ ├── test1.py
│ │ │ ├── test10.out
│ │ │ ├── test10.py
│ │ │ ├── test11.in
│ │ │ ├── test11.out
│ │ │ ├── test11.py
│ │ │ ├── test12.in
│ │ │ ├── test12.out
│ │ │ ├── test12.py
│ │ │ ├── test2.in
│ │ │ ├── test2.out
│ │ │ ├── test2.py
│ │ │ ├── test3.in
│ │ │ ├── test3.out
│ │ │ ├── test3.py
│ │ │ ├── test4.in
│ │ │ ├── test4.out
│ │ │ ├── test4.py
│ │ │ ├── test5.in
│ │ │ ├── test5.out
│ │ │ ├── test5.py
│ │ │ ├── test6.in
│ │ │ ├── test6.out
│ │ │ ├── test6.py
│ │ │ ├── test7.in
│ │ │ ├── test7.out
│ │ │ ├── test7.py
│ │ │ ├── test8.in
│ │ │ ├── test8.out
│ │ │ ├── test8.py
│ │ │ ├── test9.in
│ │ │ ├── test9.out
│ │ │ └── test9.py
│ └── prepare.sh
└── weakprng
│ ├── COPYING
│ ├── README
│ ├── dist
│ ├── __init__.py
│ ├── chacha.py
│ └── weakprng.py
│ └── prepare.sh
├── pythenv.sh
├── setup.py
├── shell
├── scripts
│ └── bayeslite
├── src
│ ├── README.md
│ ├── __init__.py
│ ├── core.py
│ ├── hook.py
│ ├── main.py
│ └── pretty.py
└── tests
│ ├── test_pretty.py
│ ├── test_shell.py
│ └── thooks.py
├── src
├── __init__.py
├── ast.py
├── backend.py
├── backends
│ ├── __init__.py
│ ├── cgpm_alter
│ │ ├── __init__.py
│ │ ├── alterations.py
│ │ ├── grammar.y
│ │ └── parse.py
│ ├── cgpm_analyze
│ │ ├── __init__.py
│ │ ├── grammar.y
│ │ └── parse.py
│ ├── cgpm_backend.py
│ ├── cgpm_schema
│ │ ├── __init__.py
│ │ ├── grammar.y
│ │ └── parse.py
│ ├── iid_gaussian.py
│ ├── loom_backend.py
│ ├── nig_normal.py
│ └── troll_rng.py
├── bayesdb.py
├── bql.py
├── bqlfn.py
├── bqlmath.py
├── bqlvtab.py
├── compiler.py
├── core.py
├── exception.py
├── grammar.y
├── guess.py
├── macro.py
├── math_util.py
├── nullify.py
├── parse.py
├── quote.py
├── read_csv.py
├── read_pandas.py
├── regress.py
├── scan.py
├── schema.py
├── simulate.py
├── sqlite3_util.py
├── stats.py
├── txn.py
└── util.py
└── tests
├── __init__.py
├── dha.csv
├── dha_codebook.csv
├── kl.py
├── satellites.csv
├── stochastic.py
├── test_approxest.py
├── test_backends.py
├── test_bql.py
├── test_bqlmath.py
├── test_case.py
├── test_cgpm.py
├── test_cgpm_alter.py
├── test_cgpm_analysis.py
├── test_cgpm_engine_cache.py
├── test_cgpm_loom.py
├── test_cmi.py
├── test_condprob.py
├── test_core.py
├── test_correlation.py
├── test_csv.py
├── test_error_bql.py
├── test_guess.py
├── test_infer_hypothetical.py
├── test_kl.py
├── test_loom_backend.py
├── test_loom_simulate_bivariate_gaussian.py
├── test_macro.py
├── test_math_util.py
├── test_nig_normal.py
├── test_nullify.py
├── test_parse.py
├── test_parse_cgpm_alter.py
├── test_parse_cgpm_analyze.py
├── test_read_csv.py
├── test_read_pandas.py
├── test_regress.py
├── test_schema.py
├── test_simulate.py
├── test_stats.py
├── test_stochastic.py
├── test_subsample.py
├── test_threshold.py
├── test_util.py
├── test_vscgpm.py
├── test_vtab.py
└── threshold.py
/.gitignore:
--------------------------------------------------------------------------------
1 | /bayeslite.egg-info/
2 | /build/
3 | /dist/
4 | /external/lemonade/dist/lemonade/*.pyc
5 | /shell/tests/thooks.pyc
6 | /src/grammar.out
7 | /src/grammar.py
8 | /src/grammar.sha256
9 | /src/grammar.sha256.tmp
10 | /src/backends/cgpm_alter/grammar.out
11 | /src/backends/cgpm_alter/grammar.py
12 | /src/backends/cgpm_alter/grammar.sha256
13 | /src/backends/cgpm_alter/grammar.sha256.tmp
14 | /src/backends/cgpm_analyze/grammar.out
15 | /src/backends/cgpm_analyze/grammar.py
16 | /src/backends/cgpm_analyze/grammar.sha256
17 | /src/backends/cgpm_analyze/grammar.sha256.tmp
18 | /src/backends/cgpm_schema/grammar.out
19 | /src/backends/cgpm_schema/grammar.py
20 | /src/backends/cgpm_schema/grammar.sha256
21 | /src/backends/cgpm_schema/grammar.sha256.tmp
22 | /src/version.py
23 | __pycache__/
24 | .cache/
25 | .eggs
26 |
27 | *.pyc
28 | *.swp
29 | .pytest_cache/
30 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: python
2 | dist: trusty
3 | env:
4 | global:
5 | - PACKAGE_NAME=bayeslite
6 | # get all the branches referencing this commit
7 | - REAL_BRANCH=$(git ls-remote origin | sed -n "\|$TRAVIS_COMMIT\s\+refs/heads/|{s///p}")
8 |
9 | python:
10 | - 2.7
11 | install:
12 | - wget https://repo.continuum.io/miniconda/Miniconda2-latest-Linux-x86_64.sh -O miniconda.sh
13 | - bash miniconda.sh -b -p $HOME/miniconda
14 | - export PATH="$HOME/miniconda/bin:$PATH"
15 | - hash -r
16 | - conda config --set always_yes yes --set changeps1 no
17 | - conda install -q conda=4.6.14 conda-build
18 | script:
19 | - export CONDA_PACKAGE_VERSION="${TRAVIS_TAG:-$(date +%Y.%m.%d)}"
20 | # remove leading v from tags if they exist
21 | - CONDA_PACKAGE_VERSION="$(sed s/^v// <<<$CONDA_PACKAGE_VERSION)"
22 | # use "edge" channel (latest master) for testing with cgpm/crosscat
23 | - conda build . -c probcomp/label/edge -c probcomp -c cidermole -c fritzo -c ursusest -c anaconda
24 | after_success:
25 | - bash conda/upload.sh
26 |
--------------------------------------------------------------------------------
/HACKING:
--------------------------------------------------------------------------------
1 | Guidelines for writing bayeslite software
2 |
3 | This working document contains guidelines for how to develop against
4 | the bayeslite API.
5 |
6 | * SQL/BQL parameters
7 |
8 | Use SQL/BQL parameters to pass strings and other values into SQL/BQL.
9 | DO NOT use format strings.
10 |
11 | DO: cursor.execute('UPDATE foo SET x = ? WHERE id = ?', (x, id))
12 | DON'T: cursor.execute("UPDATE foo SET x = '%s' WHERE id = %d" % (x, id))
13 | DON'T: cursor.execute("UPDATE foo SET x = '{}' WHERE id = {}".format(x, id))
14 |
15 | DO: cursor.execute('SELECT x, y FROM t WHERE z = ?', (z,))
16 | DON'T: cursor.execute('SELECT x, y FROM t WHERE z = ?', z)
17 | DON'T: cursor.execute('SELECT x, y FROM t WHERE z = {}'.format(z))
18 |
19 | Prefer named parameters if the query has more than one parameter and
20 | covers multiple lines:
21 |
22 | cursor = db.cursor().execute('''
23 | SELECT COUNT(*)
24 | FROM bayesdb_generator AS g, bayesdb_column AS c
25 | WHERE g.id = :generator_id
26 | AND g.tabname = c.tabname
27 | AND c.colno = :colno
28 | ''', {
29 | 'generator_id': generator_id,
30 | 'colno': colno,
31 | })
32 |
33 | If the tables and columns in the query are determined dynamically,
34 | then use bql_quote_name and format strings to assemble SQL/BQL
35 | queries. But prefer to avoid this by writing different queries or
36 | reusing subroutines that already do it, such as in bayeslite.core.
37 |
38 | DO: from bayeslite import bql_quote_name
39 | qt = bql_quote_name(table)
40 | qc = bql_quote_name(column)
41 | cursor.execute('SELECT %s FROM %s WHERE x = ?' % (qc, qt), (x,))
42 |
43 | DON'T: cursor.execute('SELECT %s FROM %s WHERE x = ?' % (column, table), (x,))
44 | DON'T: cursor.execute('SELECT %s FROM %s WHERE x = %d' % (qc, qt, x))
45 |
46 | * SQL updates
47 |
48 | When issuing an UPDATE command to sqlite3, if you can count the number
49 | of rows it should affect, do so and assert that it affected that many
50 | rows:
51 |
52 | total_changes = bdb._sqlite3.totalchanges()
53 | bdb.sql_execute('UPDATE ...', (...))
54 | assert bdb._sqlite3.totalchanges() - total_changes == 1
55 |
56 | * Randomization
57 |
58 | Avoid indiscriminate nondeterminism.
59 |
60 | All random choices should be made from PRNGs with seeds that the user
61 | can control, via the normal Python API and the bayeslite shell. Any
62 | actual nondeterminism should be clearly labelled as such, e.g. a
63 | future shell command to choose a seed from /dev/urandom.
64 |
65 | To write nondeterministic tests that explore an intentionally
66 | unpredictable source of inputs, instead of testing exactly the same
67 | input every time, write a deterministic function of a 32-byte seed and
68 | use the @stochastic decorator to vary it:
69 |
70 | from stochastic import stochastic
71 |
72 | @stochastic(max_runs=4, min_passes=2)
73 | def test_quagga(seed):
74 | frobnicate(seed)
75 |
76 | This defines test_quagga to be a function that accepts an *optional*
77 | seed argument. If you call it with zero arguments, then it will call
78 | frobnicate up to four times, and if it does not pass twice, it will
79 | raise a StochasticError that includes (a) the last exception with
80 | which frobnicate failed and (b) the last seed with which frobnicate
81 | failed.
82 |
83 | You can then retry using exactly the same seed by calling test_quagga
84 | manually with the seed as its argument:
85 |
86 | >>> test_quagga()
87 | StochasticError: [seed 434529bf3e5a16930701b55c39a90acfcd115ba0cada99f5af5448f3b96923dd]
88 | ZigException: something set us up the bomb
89 | >>> test_quagga('434529bf3e5a16930701b55c39a90acfcd115ba0cada99f5af5448f3b96923dd'.decode('hex'))
90 | ZigException: something set us up the bomb
91 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | .PHONY: default-target
2 | default-target: build
3 |
4 | ###############################################################################
5 | ### User-settable variables
6 |
7 | # List of documentation formats to generate.
8 | DOCS = \
9 | $(SPHINX_DOCS) \
10 | pdf \
11 | # end of DOCS
12 |
13 | # Commands to run in the build process.
14 | PDFLATEX = pdflatex
15 | PYTHON = python
16 | SPHINX_BUILD = sphinx-build
17 | SPHINX_FLAGS =
18 |
19 | # Options for above commands.
20 | PDFLATEXOPTS =
21 | SPHINXOPTS =
22 | PYTHONOPTS =
23 | SETUPPYOPTS =
24 |
25 | ###############################################################################
26 | ### Targets
27 |
28 | # build: Build bayeslite.
29 | .PHONY: build
30 | build: setup.py
31 | $(PYTHON) $(PYTHONOPTS) setup.py $(SETUPPYOPTS) build
32 |
33 | # List of documentation formats we can generate with Sphinx. These
34 | # should be the formats that have been tested and confirmed to yield
35 | # reasonable output.
36 | SPHINX_DOCS = \
37 | html \
38 | latex \
39 | # end of SPHINX_DOCS
40 |
41 | # doc: Build the bayeslite documentation.
42 | .PHONY: doc
43 | doc: $(DOCS)
44 |
45 | .PHONY: $(SPHINX_DOCS)
46 | $(SPHINX_DOCS): pythenv.sh build
47 | rm -rf build/doc/$@ && \
48 | rm -rf build/doc/$@.tmp && \
49 | ./pythenv.sh $(SPHINX_BUILD) $(SPHINX_FLAGS) -b $@ doc \
50 | build/doc/$@.tmp && \
51 | mv -f build/doc/$@.tmp build/doc/$@
52 |
53 | .PHONY: pdf
54 | pdf: latex
55 | rm -rf build/doc/$@ && \
56 | rm -rf build/doc/$@.tmp && \
57 | mkdir build/doc/$@.tmp && \
58 | { tar -C build/doc/latex -c -f - . \
59 | | tar -C build/doc/$@.tmp -x -f -; } && \
60 | (cd build/doc/$@.tmp && \
61 | $(PDFLATEX) $(PDFLATEXOPTS) \\nonstopmode\\input bayeslite && \
62 | $(PDFLATEX) $(PDFLATEXOPTS) \\nonstopmode\\input bayeslite && \
63 | $(PDFLATEX) $(PDFLATEXOPTS) \\nonstopmode\\input bayeslite && \
64 | $(MAKEINDEX) -s python.ist bayeslite.idx; \
65 | $(PDFLATEX) $(PDFLATEXOPTS) \\nonstopmode\\input bayeslite && \
66 | $(PDFLATEX) $(PDFLATEXOPTS) \\nonstopmode\\input bayeslite && \
67 | :) && \
68 | mv -f build/doc/$@.tmp build/doc/$@
69 |
70 | # check: (Build bayeslite and) run the tests.
71 | .PHONY: check
72 | check: check.sh
73 | ./check.sh
74 |
75 | # clean: Remove build products.
76 | .PHONY: clean
77 | clean:
78 | -rm -rf build
79 | -rm -rf build/doc/*.tmp
80 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Bayeslite
2 |
3 | [](https://travis-ci.org/probcomp/bayeslite)
4 | [](https://anaconda.org/probcomp/bayeslite)
5 | [](https://conda.anaconda.org/probcomp)
6 | [](https://anaconda.org/probcomp/bayeslite)
7 |
8 | BQL interpretation and storage for BayesDB.
9 | Please see http://probcomp.csail.mit.edu/software/bayesdb for more information.
10 |
11 | ## Installing
12 |
13 | The easiest way to install bayeslite is to use the
14 | [package](https://anaconda.org/probcomp/bayeslite) on Anaconda Cloud.
15 | Please follow [these instructions](https://github.com/probcomp/iventure/blob/master/docs/conda.md).
16 |
17 | ## Expectations
18 |
19 | Users and contributors should expect **rapidly and dramatically
20 | shifting code and behavior** at this time.
21 |
22 | **THIS SOFTWARE SHOULD NOT BE EXPECTED TO TREAT YOUR DATA SECURELY.**
23 |
24 | ## Contributing
25 |
26 | Our compatibility aim is to work on probcomp machines and members'
27 | laptops, and to provide scripts and instructions that make it not too
28 | hard to re-create our environments elsewhere. Pulls for polished
29 | packaging, broad installability, etc. are not appropriate
30 | contributions at this time.
31 |
32 | Please run local tests before sending a pull request:
33 |
34 | ```
35 | $ ./check.sh
36 | ```
37 |
38 | That does not run the complete test suite, only the smoke tests, but
39 | is usually good enough. For the full suite:
40 |
41 | ```
42 | $ ./check.sh tests shell/tests
43 | ```
44 |
45 | ## Documentation
46 |
47 | To build the documentation, which is also available
48 | [online](http://probcomp.csail.mit.edu/dev/bayesdb/doc/),
49 | install [sphinx](http://www.sphinx-doc.org/en/master/)
50 | and then run the following command:
51 |
52 | ```
53 | $ make doc
54 | ```
55 |
56 | The result will be placed in `build/doc`, with one subdirectory per
57 | output format.
58 |
59 | To build only one output format, e.g. HTML because you don't want to
60 | install TeX:
61 |
62 | ```
63 | $ make html
64 | ```
65 |
--------------------------------------------------------------------------------
/check.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | set -Ceu
4 |
5 | : ${PYTHON:=python}
6 |
7 | root=`cd -- "$(dirname -- "$0")" && pwd`
8 |
9 | (
10 | set -Ceu
11 | cd -- "${root}"
12 | rm -rf build
13 | "$PYTHON" setup.py build
14 | if [ $# -eq 0 ]; then
15 | # By default, when running all tests, skip tests that have
16 | # been marked for continuous integration by using __ci_ in
17 | # their names. (git grep __ci_ to find these.)
18 | ./pythenv.sh "$PYTHON" -m pytest --pyargs bayeslite -k "not __ci_"
19 | else
20 | # If args are specified, run all tests, including continuous
21 | # integration tests, for the selected components.
22 | ./pythenv.sh "$PYTHON" -m pytest "$@"
23 | fi
24 | )
25 |
--------------------------------------------------------------------------------
/conda/meta.yaml:
--------------------------------------------------------------------------------
1 | package:
2 | name: bayeslite
3 | version: {{ CONDA_PACKAGE_VERSION }}
4 |
5 | source:
6 | path: ../
7 |
8 | build:
9 | script: python setup.py install
10 |
11 | requirements:
12 | build:
13 | - cython 0.23.*
14 | - git
15 | - jsonschema
16 | - numpy 1.11.*
17 | - python 2.7.*
18 | run:
19 | - nomkl
20 | - apsw
21 | - cgpm
22 | - crosscat
23 | - jsonschema
24 | - loom 0.2.10
25 | - numpy 1.11.*
26 | - scipy 0.17.*
27 | - six 1.10.*
28 |
29 | test:
30 | requires:
31 | - apsw
32 | - cgpm
33 | - crosscat
34 | - loom 0.2.10
35 | - pandas 0.18.*
36 | - pytest 2.8.*
37 | - python 2.7.*
38 | commands:
39 | - python -m pytest --pyargs bayeslite -k "not __ci_"
40 |
41 | about:
42 | home: https://github.com/probcomp/bayeslite
43 | license: Apache
44 | license_file: LICENSE.txt
45 |
--------------------------------------------------------------------------------
/conda/upload.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | set -ev
3 |
4 | # fyi, the logic below is necessary due to the fact that on a tagged build, TRAVIS_BRANCH and TRAVIS_TAG are the same
5 | # in the case of a tagged build, use the REAL_BRANCH env var defined in travis.yml
6 | if [ -n "${TRAVIS_TAG}" ]; then
7 | conda install anaconda-client
8 | # if tag didn't come from master, add the "dev" label
9 | if [ ${REAL_BRANCH} = "master" ]; then
10 | anaconda -t ${CONDA_UPLOAD_TOKEN} upload -u ${CONDA_USER} ~/miniconda/conda-bld/linux-64/${PACKAGE_NAME}-*.tar.bz2 --force
11 | else
12 | anaconda -t ${CONDA_UPLOAD_TOKEN} upload -u ${CONDA_USER} -l dev ~/miniconda/conda-bld/linux-64/${PACKAGE_NAME}-*.tar.bz2 --force
13 | fi
14 | elif [ ${TRAVIS_BRANCH} = "master" ]; then
15 | if [ ${TRAVIS_EVENT_TYPE} = "cron" ]; then
16 | # don't build package for nightly cron.. this is just for test stability info
17 | exit 0
18 | else
19 | conda install anaconda-client
20 | anaconda -t ${CONDA_UPLOAD_TOKEN} upload -u ${CONDA_USER} -l edge ~/miniconda/conda-bld/linux-64/${PACKAGE_NAME}-*.tar.bz2 --force
21 | fi
22 | else
23 | exit 0
24 | fi
25 |
--------------------------------------------------------------------------------
/doc/api.rst:
--------------------------------------------------------------------------------
1 | Bayeslite API reference
2 | =======================
3 |
4 | :mod:`bayeslite`: Bayeslite API
5 | -------------------------------
6 |
7 | .. automodule:: bayeslite
8 | :members:
9 |
--------------------------------------------------------------------------------
/doc/conf.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # Copyright (c) 2010-2016, MIT Probabilistic Computing Project
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 | """Sphinx configuration file."""
18 |
19 | extensions = [
20 | 'sphinx.ext.autodoc',
21 | ]
22 |
23 | copyright = '2010-2016, MIT Probabilistic Computing Project'
24 | master_doc = 'index'
25 | project = 'bayeslite'
26 | release = '0.1.3rc1'
27 | version = '0.1.3'
28 |
29 | nitpicky = True
30 | html_theme = 'sphinxdoc'
31 |
--------------------------------------------------------------------------------
/doc/index.rst:
--------------------------------------------------------------------------------
1 | bayeslite: A probabilistic database built on SQLite 3
2 | =====================================================
3 |
4 | Bayeslite is a probabilistic database built on `SQLite 3
5 | `__. In addition to SQL queries on
6 | conventional SQL tables, it supports probabilistic BQL queries on
7 | generative models for data in a table.
8 |
9 | Quick start for querying a pre-analyzed database::
10 |
11 | import bayeslite
12 | bdb = bayeslite.bayesdb_open("foo.bdb")
13 | cursor = bdb.execute("SOME BQL QUERY")
14 | ...
15 |
16 | .. toctree::
17 | :maxdepth: 2
18 |
19 | api
20 | bql
21 | internals
22 |
23 | If you would like to analyze your own data with BayesDB, please
24 | contact bayesdb@mit.edu to participate in our research project.
25 |
26 | .. toctree::
27 | :maxdepth: 1
28 |
29 | analysis
30 |
31 | Indices and tables
32 | ==================
33 |
34 | * :ref:`genindex`
35 | * :ref:`modindex`
36 | * :ref:`search`
37 |
--------------------------------------------------------------------------------
/doc/internals.rst:
--------------------------------------------------------------------------------
1 | Bayeslite API internals
2 | =======================
3 |
4 | :mod:`bayeslite.compiler`: BQL-to-SQL query compiler
5 | ----------------------------------------------------
6 |
7 | .. automodule:: bayeslite.compiler
8 | :members:
9 |
10 | :mod:`bayeslite.bql`: BQL query and command execution
11 | -----------------------------------------------------
12 |
13 | .. automodule:: bayeslite.bql
14 | :members:
15 |
16 | :mod:`bayeslite.core`: BayesDB object model
17 | -------------------------------------------
18 |
19 | .. automodule:: bayeslite.core
20 | :members:
21 |
22 | :mod:`bayeslite.parse`: BQL parser
23 | ----------------------------------
24 |
25 | .. automodule:: bayeslite.parse
26 | :members:
27 |
28 | :mod:`bayeslite.sqlite3_util`: SQLite 3 utilities
29 | -------------------------------------------------
30 |
31 | .. automodule:: bayeslite.sqlite3_util
32 | :members:
33 |
34 | :mod:`bayeslite.stats`: Statistics utilities
35 | --------------------------------------------
36 |
37 | .. automodule:: bayeslite.stats
38 | :members:
39 |
40 | :mod:`bayeslite.math_util`: Math utilities
41 | ------------------------------------------
42 |
43 | .. automodule:: bayeslite.math_util
44 | :members:
45 |
46 | :mod:`bayeslite.util`: Miscellaneous utilities
47 | ----------------------------------------------
48 |
49 | .. automodule:: bayeslite.util
50 | :members:
51 |
--------------------------------------------------------------------------------
/docker/ubuntu1604:
--------------------------------------------------------------------------------
1 | FROM ubuntu:16.04
2 | MAINTAINER MIT Probabilistic Computing Project
3 |
4 | ARG check_args
5 |
6 | RUN apt-get update -qq && apt-get install -qq -y \
7 | git \
8 | python-apsw \
9 | python-jsonschema \
10 | python-numpy \
11 | python-pandas \
12 | python-pexpect \
13 | python-pytest \
14 | python-scipy \
15 | python-six \
16 | python-sklearn \
17 | ; # end of package list
18 |
19 | ADD . /bayeslite
20 | WORKDIR /bayeslite
21 | RUN \
22 | ./docker/deps/cgpm/pythenv.sh \
23 | ./docker/deps/crosscat/pythenv.sh \
24 | ./check.sh ${check_args}
25 |
26 | RUN python setup.py sdist
27 | RUN python setup.py bdist
28 |
--------------------------------------------------------------------------------
/external/README:
--------------------------------------------------------------------------------
1 | This directory contains software that is used in bayeslite but written
2 | and maintained externally by someone else. We will use the following
3 | organization in order to:
4 |
5 | - Use external software that is not in Ubuntu and does not use Git.
6 | - Keep history of our updates to external software.
7 | - Make local changes as we need and send patches upstream.
8 | - Merge our local changes into external updates.
9 | - Avoid imposing the mess of submodules on users.
10 |
11 | * Directory layout:
12 |
13 | Each subdirectory corresponds to one external package and has the
14 | following contents:
15 |
16 | COPYING summary of copying terms
17 | README notes on upstream, link to upstream web site, &c.
18 | dist/ subdirectory containing the external distribution
19 | prepare.sh script to prepare a distribution for import
20 | ... other supporting files
21 |
22 | prepare.sh should run in the top-level directory of the external
23 | distribution, which will then be imported under dist/. It should only
24 | do clean-ups necessary to make the distribution fit for inclusion in
25 | our tree, such as deleting binary files. Local changes, such as bug
26 | fixes, should be made in separate commits.
27 |
28 | * To import a new external package:
29 |
30 | 1. Write the COPYING and README files and the prepare.sh script, and
31 | commit them.
32 |
33 | 2. Create a temporary Git repository for the package:
34 |
35 | % cd /tmp
36 | % mkdir repo
37 | % cd repo
38 | % git init
39 |
40 | 3. Extract proj-1.2 in external/proj/dist in the temporary repository:
41 |
42 | % mkdir -p external/proj
43 | % cd external/proj
44 | % gunzip -c < /tmp/proj-1.2.tar.gz | tar xf -
45 | % mv proj-1.2 dist
46 |
47 | 4. Run the prepare.sh script:
48 |
49 | % (cd dist && sh /path/to/bayeslite/external/proj/prepare.sh)
50 |
51 | 5. Commit the result in the temporary repository:
52 |
53 | % git add dist
54 | % git commit
55 |
56 | The commit message should summarize what proj is, specify where
57 | proj-1.2.tar.gz came from, and give its SHA-256 hash.
58 |
59 | 6. In the main repository, create a vendor branch and release tag:
60 |
61 | % cd /path/to/bayeslite
62 | % git fetch /tmp/repo master:vendor/proj
63 | % git tag vendor/proj-1.2 vendor/proj
64 |
65 | 7. Merge proj-1.2 into the branch you're working on:
66 |
67 | % git merge vendor/proj-1.2
68 |
69 | 8. Push the vendor branch upstream so others can use it:
70 |
71 | % git push origin vendor/proj
72 |
73 | * To see what version we have most recently merged:
74 |
75 | % git log --merges external/proj
76 |
77 | Note: This requires that every `git merge' involved use the release
78 | tag (vendor/proj-1.2), not the vendor branch (vendor/proj).
79 | Everything else will work if you use the vendor branch, so be careful.
80 |
81 | Note: You can't use `git log --merges external/proj/dist', apparently:
82 | it skips the very first merge. Go figure.
83 |
84 | * To see what local changes there are in a external package:
85 |
86 | % git diff vendor/proj-1.2 HEAD -- ./external/proj
87 | % git show vendor/proj-1.2..HEAD -- ./external/proj/dist
88 |
89 | * To update an existing external package:
90 |
91 | 1. Update prepare.sh and check for changes to copying terms.
92 |
93 | 2. Check out the vendor branch in an empty clone of the repository:
94 |
95 | % git clone --no-checkout -b vendor/proj /path/to/bayeslite /tmp/repo
96 |
97 | 3. Extract proj-1.3 in external/proj/dist in the clone:
98 |
99 | % cd /tmp/repo
100 | % mkdir -p external/proj
101 | % cd external/proj
102 | % gunzip -c < /tmp/proj-1.3.tar.gz | tar xf -
103 | % mv proj-1.3 dist
104 |
105 | 4. Run the prepare script:
106 |
107 | % (cd dist && /path/to/bayeslite/external/proj/prepare.sh)
108 |
109 | 4. Commit and tag the update:
110 |
111 | % git add --all dist
112 | % git commit
113 | % git tag vendor/proj-1.3
114 |
115 | 5. In the main repository, update the vendor branch and tag it:
116 |
117 | % cd /path/to/bayeslite
118 | % git fetch /tmp/repo vendor/proj
119 | % git tag vendor/proj-1.3 vendor/proj
120 |
121 | 6. Finally, merge the new release tag:
122 |
123 | % git merge vendor/proj-1.3
124 |
--------------------------------------------------------------------------------
/external/lemonade/COPYING:
--------------------------------------------------------------------------------
1 | public domain
2 |
--------------------------------------------------------------------------------
/external/lemonade/README:
--------------------------------------------------------------------------------
1 | lemonade - LALR parser generator for Python
2 |
3 | https://pypi.python.org/pypi/lemonade
4 |
5 | Ported to Python from the LEMON parser generator by D. Richard Hipp:
6 |
7 | http://www.hwaci.com/sw/lemon/
8 |
9 | We have back-ported some bug fixes from LEMON into our copy of
10 | lemonade, since upstream does not appear to be maintained. (Upstream
11 | author responded to email but then vanished.)
12 |
--------------------------------------------------------------------------------
/external/lemonade/dist/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include MANIFEST.in
2 | include lemonade/lempar.tmpl
3 | recursive-include examples *.py *.y
4 |
--------------------------------------------------------------------------------
/external/lemonade/dist/PKG-INFO:
--------------------------------------------------------------------------------
1 | Metadata-Version: 1.0
2 | Name: lemonade
3 | Version: 1.0b1
4 | Summary: Port of the LEMON Parser Generator
5 | Home-page: UNKNOWN
6 | Author: Leif Strand
7 | Author-email: leif@cacr.caltech.edu
8 | License: UNKNOWN
9 | Description: UNKNOWN
10 | Platform: UNKNOWN
11 | Classifier: License :: Public Domain
12 | Classifier: Development Status :: 4 - Beta
13 | Classifier: Programming Language :: Python :: 2
14 | Classifier: Intended Audience :: Developers
15 | Classifier: Topic :: Software Development :: Code Generators
16 | Classifier: Topic :: Software Development :: Compilers
17 |
--------------------------------------------------------------------------------
/external/lemonade/dist/README:
--------------------------------------------------------------------------------
1 |
2 | Lemonade is a Python port of the LEMON Parser Generator written by
3 | D. Richard Hipp:
4 |
5 | http://www.hwaci.com/sw/lemon/
6 |
7 | Lemonade can be used in the traditional fashion to create a standalone
8 | parser:
9 |
10 | lemonade gram.y
11 |
12 | The above command generates "gram.py", which you can include in your
13 | project.
14 |
15 | Since Python is a dynamic language, Lemonade could also enable client
16 | software to generate a parser from a user-supplied .y file, and then
17 | use the generated parser on the fly.
18 |
19 | This is the beta release of Lemonade. There is no documentation yet.
20 | You may find the original LEMON documentation helpful:
21 |
22 | http://www.hwaci.com/sw/lemon/lemon.html
23 |
24 | However, many of LEMON's "%" directives are irrelevant in Python;
25 | therefore, they have been eliminated in Lemonade. Further, Lemonade
26 | does not allow code fragments ("{}") within the grammar file.
27 | Instead, the reduce actions are specified in a separate delegate
28 | class. See the 'examples' directory for an example.
29 |
30 | ----
31 | Leif Strand
32 | August 28, 2012
33 |
34 |
--------------------------------------------------------------------------------
/external/lemonade/dist/bin/lemonade:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | from lemonade.main import main
4 | import sys
5 |
6 | sys.exit(main(sys.argv))
7 |
8 |
--------------------------------------------------------------------------------
/external/lemonade/dist/examples/calc/calc.py:
--------------------------------------------------------------------------------
1 |
2 | import sys
3 |
4 |
5 | def generateGrammar():
6 | from lemonade.main import generate
7 | from os.path import join, dirname
8 | from StringIO import StringIO
9 |
10 | inputFile = join(dirname(__file__), "gram.y")
11 | outputStream = StringIO()
12 | generate(inputFile, outputStream)
13 | return outputStream.getvalue()
14 |
15 |
16 | # generate and import our grammar
17 | exec generateGrammar() in globals()
18 |
19 |
20 | #
21 | # the lexer
22 | #
23 |
24 | tokenType = {
25 | '+': PLUS,
26 | '-': MINUS,
27 | '/': DIVIDE,
28 | '*': TIMES,
29 | }
30 |
31 | def tokenize(input):
32 | import re
33 | tokenText = re.split("([+-/*])|\s*", input)
34 | for text in tokenText:
35 | if text is None:
36 | continue
37 | type = tokenType.get(text)
38 | if type is None:
39 | type = NUM
40 | value = float(text)
41 | else:
42 | value = None
43 | yield (type, value)
44 | return
45 |
46 |
47 | #
48 | # the delegate
49 | #
50 |
51 | class Delegate(object):
52 |
53 | def accept(self):
54 | return
55 |
56 | def parse_failed(self):
57 | assert False, "Giving up. Parser is hopelessly lost..."
58 |
59 | def syntax_error(self, token):
60 | print >>sys.stderr, "Syntax error!"
61 | return
62 |
63 |
64 | #
65 | # reduce actions
66 | #
67 |
68 | def sub(self, a, b): return a - b
69 | def add(self, a, b): return a + b
70 | def mul(self, a, b): return a * b
71 | def div(self, a, b): return a / b
72 | def num(self, value): return value
73 |
74 | def print_result(self, result):
75 | print result
76 | return
77 |
78 |
79 | p = Parser(Delegate())
80 | #p.trace(sys.stdout, "# ")
81 |
82 | if len(sys.argv) == 2:
83 | p.parse(tokenize(sys.argv[1]))
84 | else:
85 | print >>sys.stderr, "usage: %s EXPRESSION" % sys.argv[0]
86 |
87 |
--------------------------------------------------------------------------------
/external/lemonade/dist/examples/calc/gram.y:
--------------------------------------------------------------------------------
1 |
2 | %left PLUS MINUS.
3 | %left DIVIDE TIMES.
4 |
5 | program(print_result) ::= expr(result).
6 |
7 | expr(sub) ::= expr(a) MINUS expr(b).
8 | expr(add) ::= expr(a) PLUS expr(b).
9 | expr(mul) ::= expr(a) TIMES expr(b).
10 | expr(div) ::= expr(a) DIVIDE expr(b).
11 |
12 | expr(num) ::= NUM(value).
13 |
14 |
--------------------------------------------------------------------------------
/external/lemonade/dist/lemonade/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/probcomp/bayeslite/211e5eb3821a464a2fffeb9d35e3097e1b7a99ba/external/lemonade/dist/lemonade/__init__.py
--------------------------------------------------------------------------------
/external/lemonade/dist/lemonade/action.py:
--------------------------------------------------------------------------------
1 | '''
2 | Routines processing parser actions in the LEMON parser generator.
3 | '''
4 |
5 | from struct import *
6 |
7 |
8 | def actioncmp(ap1, ap2):
9 | '''Compare two actions for sorting purposes. Return negative,
10 | zero, or positive if the first action is less than, equal to, or
11 | greater than the first.
12 | '''
13 | rc = ap1.sp.index - ap2.sp.index
14 | if rc == 0:
15 | rc = ap1.type - ap2.type
16 | if rc == 0 and ap1.type == REDUCE:
17 | rc = ap1.x.rp.index - ap2.x.rp.index
18 | assert rc != 0 or ap1 == ap2
19 | return rc
20 |
21 |
22 | def Action_sort(ap):
23 | '''Sort parser actions.'''
24 | from msort import msort
25 | ap = msort(ap, 'next', actioncmp)
26 | return ap
27 |
28 |
29 | def Action_add(app, type, sp, arg):
30 | new = action(
31 | next = app,
32 | type = type,
33 | sp = sp,
34 | collide = None,
35 | stp = None,
36 | rp = None,
37 | )
38 | app = new
39 | if type == SHIFT:
40 | new.x.stp = arg
41 | else:
42 | new.x.rp = arg
43 | return app
44 |
45 |
--------------------------------------------------------------------------------
/external/lemonade/dist/lemonade/error.py:
--------------------------------------------------------------------------------
1 | '''
2 | Code for printing error message.
3 | '''
4 |
5 |
6 | def findbreak(msg, min, max):
7 | '''Find a good place to break "msg" so that its length is at least
8 | "min" but no more than "max". Make the point as close to max as
9 | possible.
10 | '''
11 |
12 | spot = min
13 | for i in range(min, max+1):
14 | if i >= len(msg):
15 | spot = i
16 | break
17 | c = msg[i]
18 | if c == '\t':
19 | msg[i] = ' '
20 | if c == '\n':
21 | msg[i] = ' '
22 | spot = i
23 | break
24 | if c == '-' and i < max - 1:
25 | spot = i + 1
26 | if c == ' ':
27 | spot = i
28 | return spot
29 |
30 |
31 | # The error message is split across multiple lines if necessary. The
32 | # splits occur at a space, if there is a space available near the end
33 | # of the line.
34 |
35 | LINEWIDTH = 79 # Max width of any output line
36 | PREFIXLIMIT = 30 # Max width of the prefix on each line
37 |
38 | def ErrorMsg(filename, lineno, format, *args):
39 | from ccruft import fprintf
40 | from sys import stdout
41 |
42 | # Prepare a prefix to be prepended to every output line
43 | if lineno > 0:
44 | prefix = "%.*s:%d: " % (PREFIXLIMIT - 10, filename, lineno)
45 | else:
46 | prefix = "%.*s: " % (PREFIXLIMIT - 10, filename)
47 |
48 | # Generate the error message
49 | prefixsize = len(prefix)
50 | availablewidth = LINEWIDTH - prefixsize
51 | errmsg = format % args
52 |
53 | # Remove trailing '\n's from the error message
54 | while errmsg[-1] == '\n':
55 | errmsg = errmsg[:-1]
56 |
57 | # Print the error message
58 | base = 0
59 | while base < len(errmsg):
60 | end = restart = findbreak(errmsg[base:], 0, availablewidth)
61 | restart += base
62 | while restart < len(errmsg) and errmsg[restart] == ' ':
63 | restart += 1
64 | fprintf(stdout, "%s%.*s\n", prefix, end, errmsg[base:])
65 | base = restart
66 |
67 | return
68 |
69 |
--------------------------------------------------------------------------------
/external/lemonade/dist/lemonade/exceptions.py:
--------------------------------------------------------------------------------
1 |
2 | class BadGrammarError(Exception):
3 | pass
4 |
5 | class EmptyGrammarError(BadGrammarError):
6 | pass
7 |
8 |
--------------------------------------------------------------------------------
/external/lemonade/dist/lemonade/msort.py:
--------------------------------------------------------------------------------
1 | #
2 | # A generic merge-sort program.
3 | #
4 | # USAGE:
5 | # Let "ptr" be a pointer to some structure which is at the head of
6 | # a null-terminated list. Then to sort the list call:
7 | #
8 | # ptr = msort(ptr,&(ptr->next),cmpfnc);
9 | #
10 | # In the above, "cmpfnc" is a pointer to a function which compares
11 | # two instances of the structure and returns an integer, as in
12 | # strcmp. The second argument is a pointer to the pointer to the
13 | # second element of the linked list. This address is used to compute
14 | # the offset to the "next" field within the structure. The offset to
15 | # the "next" field must be constant for all structures in the list.
16 | #
17 | # The function returns a new pointer which is the head of the list
18 | # after sorting.
19 | #
20 | # ALGORITHM:
21 | # Merge-sort.
22 | #
23 |
24 |
25 | #
26 | # Inputs:
27 | # a: A sorted, null-terminated linked list. (May be null).
28 | # b: A sorted, null-terminated linked list. (May be null).
29 | # cmp: A pointer to the comparison function.
30 | # next: Attribute name of "next" field.
31 | #
32 | # Return Value:
33 | # A pointer to the head of a sorted list containing the elements
34 | # of both a and b.
35 | #
36 | # Side effects:
37 | # The "next" pointers for elements in the lists a and b are
38 | # changed.
39 | #
40 | def merge(a, b, cmp, next):
41 | if a is None:
42 | head = b
43 | elif b is None:
44 | head = a
45 | else:
46 | if cmp(a, b) <= 0:
47 | ptr = a
48 | a = getattr(a, next)
49 | else:
50 | ptr = b
51 | b = getattr(b, next)
52 |
53 | head = ptr
54 | while a and b:
55 | if cmp(a, b) <= 0:
56 | setattr(ptr, next, a)
57 | ptr = a
58 | a = getattr(a, next)
59 | else:
60 | setattr(ptr, next, b)
61 | ptr = b
62 | b = getattr(b, next)
63 |
64 | if a:
65 | setattr(ptr, next, a)
66 | else:
67 | setattr(ptr, next, b)
68 |
69 | return head
70 |
71 |
72 | #
73 | # Inputs:
74 | # list: Pointer to a singly-linked list of structures.
75 | # next: Attribute name of "next" field.
76 | # cmp: A comparison function.
77 | #
78 | # Return Value:
79 | # A pointer to the head of a sorted list containing the elements
80 | # orginally in list.
81 | #
82 | # Side effects:
83 | # The "next" pointers for elements in list are changed.
84 | #
85 |
86 | LISTSIZE = 30
87 |
88 | def msort(list, next, cmp):
89 | set = [None] * LISTSIZE
90 |
91 | while list:
92 | ep = list
93 | list = getattr(list, next)
94 | setattr(ep, next, None)
95 | i = 0
96 | while i < LISTSIZE - 1 and set[i]:
97 | ep = merge(ep, set[i], cmp, next)
98 | set[i] = None
99 | i += 1
100 | set[i] = ep
101 |
102 | ep = None
103 | for i in range(LISTSIZE):
104 | if set[i]:
105 | ep = merge(set[i], ep, cmp, next)
106 |
107 | return ep
108 |
109 |
--------------------------------------------------------------------------------
/external/lemonade/dist/lemonade/plink.py:
--------------------------------------------------------------------------------
1 | '''
2 | Routines processing configuration follow-set propagation links in the
3 | LEMON parser generator.
4 | '''
5 |
6 |
7 | def Plink_add(plpp, cfp):
8 | '''Add a plink to a plink list.'''
9 | from struct import plink
10 | new = plink(
11 | next = plpp,
12 | cfp = cfp
13 | )
14 | return new
15 |
16 |
17 | def Plink_copy(to, _from):
18 | '''Transfer every plink on the list "from" to the list "to".'''
19 | while _from:
20 | nextpl = _from.next
21 | _from.next = to
22 | to = _from
23 | _from = nextpl
24 | return to
25 |
26 |
--------------------------------------------------------------------------------
/external/lemonade/dist/lemonade/set.py:
--------------------------------------------------------------------------------
1 | '''
2 | Set manipulation routines for the LEMON parser generator.
3 | '''
4 |
5 | size = 0
6 |
7 |
8 | def SetSize(n):
9 | '''Set the set size.'''
10 | global size
11 | size = n + 1
12 | return
13 |
14 |
15 | def SetNew():
16 | '''Allocate a new set.'''
17 | return [False] * size
18 |
19 |
20 | def SetAdd(s, e):
21 | '''Add a new element to the set. Return True if the element was
22 | added and False if it was already there.
23 | '''
24 | rv = s[e]
25 | s[e] = True
26 | return not rv
27 |
28 |
29 | def SetUnion(s1, s2):
30 | '''Add every element of s2 to s1. Return True if s1 changes.'''
31 | progress = False
32 | for i in range(size):
33 | if not s2[i]:
34 | continue
35 | if not s1[i]:
36 | progress = True
37 | s1[i] = True
38 | return progress
39 |
40 |
41 | def SetFind(X, Y):
42 | '''True if Y is in set X.'''
43 | return X[Y]
44 |
45 |
--------------------------------------------------------------------------------
/external/lemonade/dist/setup.py:
--------------------------------------------------------------------------------
1 |
2 | from distutils.core import setup
3 |
4 | setup(name = 'lemonade',
5 | version = '1.0b1',
6 | description = 'Port of the LEMON Parser Generator',
7 |
8 | scripts = ['bin/lemonade'],
9 | packages = ['lemonade'],
10 | package_data = { 'lemonade': ['lempar.tmpl'] },
11 |
12 | classifiers = [
13 | 'License :: Public Domain',
14 | 'Development Status :: 4 - Beta',
15 | 'Programming Language :: Python :: 2',
16 | 'Intended Audience :: Developers',
17 | 'Topic :: Software Development :: Code Generators',
18 | 'Topic :: Software Development :: Compilers',
19 | ],
20 |
21 | author = 'Leif Strand',
22 | author_email = 'leif@cacr.caltech.edu',
23 | )
24 |
--------------------------------------------------------------------------------
/external/plex/COPYING:
--------------------------------------------------------------------------------
1 | Plex is free of any restrictions. You can use it, redistribute it,
2 | sell it, whatever you want. All I ask is that you give me credit if
3 | you distribute any code derived from it.
4 |
5 |
6 | Greg Ewing,
7 | Computer Science Department,
8 | University of Canterbury,
9 | Christchurch,
10 | New Zealand
11 |
12 | greg@cosc.canterbury.ac.nz
13 |
--------------------------------------------------------------------------------
/external/plex/README:
--------------------------------------------------------------------------------
1 | plex - lexical analyzer
2 |
3 | http://www.cosc.canterbury.ac.nz/greg.ewing/python/Plex/
4 |
--------------------------------------------------------------------------------
/external/plex/dist/Makefile:
--------------------------------------------------------------------------------
1 | VERSION = 1.1.5
2 | TAR = ../Plex-$(VERSION).tar
3 | tar: clean
4 | tar cvf $(TAR) *
5 | rm -f $(TAR).gz
6 | gzip $(TAR)
7 |
8 | clean:
9 | rm -f */*.pyc *~ */*~ */*.dump tests/*.out2 tests/*.err
10 |
11 |
--------------------------------------------------------------------------------
/external/plex/dist/Plex/Actions.py:
--------------------------------------------------------------------------------
1 | #=======================================================================
2 | #
3 | # Python Lexical Analyser
4 | #
5 | # Actions for use in token specifications
6 | #
7 | #=======================================================================
8 |
9 | class Action:
10 |
11 | def same_as(self, other):
12 | return self is other
13 |
14 |
15 | class Return(Action):
16 | """
17 | Internal Plex action which causes |value| to
18 | be returned as the value of the associated token
19 | """
20 |
21 | value = None
22 |
23 | def __init__(self, value):
24 | self.value = value
25 |
26 | def perform(self, token_stream, text):
27 | return self.value
28 |
29 | def same_as(self, other):
30 | return isinstance(other, Return) and self.value == other.value
31 |
32 | def __repr__(self):
33 | return "Return(%s)" % repr(self.value)
34 |
35 |
36 | class Call(Action):
37 | """
38 | Internal Plex action which causes a function to be called.
39 | """
40 |
41 | function = None
42 |
43 | def __init__(self, function):
44 | self.function = function
45 |
46 | def perform(self, token_stream, text):
47 | return self.function(token_stream, text)
48 |
49 | def __repr__(self):
50 | return "Call(%s)" % self.function.__name__
51 |
52 | def same_as(self, other):
53 | return isinstance(other, Call) and self.function is other.function
54 |
55 |
56 | class Begin(Action):
57 | """
58 | Begin(state_name) is a Plex action which causes the Scanner to
59 | enter the state |state_name|. See the docstring of Plex.Lexicon
60 | for more information.
61 | """
62 |
63 | state_name = None
64 |
65 | def __init__(self, state_name):
66 | self.state_name = state_name
67 |
68 | def perform(self, token_stream, text):
69 | token_stream.begin(self.state_name)
70 |
71 | def __repr__(self):
72 | return "Begin(%s)" % self.state_name
73 |
74 | def same_as(self, other):
75 | return isinstance(other, Begin) and self.state_name == other.state_name
76 |
77 |
78 | class Ignore(Action):
79 | """
80 | IGNORE is a Plex action which causes its associated token
81 | to be ignored. See the docstring of Plex.Lexicon for more
82 | information.
83 | """
84 | def perform(self, token_stream, text):
85 | return None
86 |
87 | def __repr__(self):
88 | return "IGNORE"
89 |
90 | IGNORE = Ignore()
91 | IGNORE.__doc__ = Ignore.__doc__
92 |
93 | class Text(Action):
94 | """
95 | TEXT is a Plex action which causes the text of a token to
96 | be returned as the value of the token. See the docstring of
97 | Plex.Lexicon for more information.
98 | """
99 |
100 | def perform(self, token_stream, text):
101 | return text
102 |
103 | def __repr__(self):
104 | return "TEXT"
105 |
106 | TEXT = Text()
107 | TEXT.__doc__ = Text.__doc__
108 |
109 |
110 |
--------------------------------------------------------------------------------
/external/plex/dist/Plex/Errors.py:
--------------------------------------------------------------------------------
1 | #=======================================================================
2 | #
3 | # Python Lexical Analyser
4 | #
5 | # Exception classes
6 | #
7 | #=======================================================================
8 |
9 | import exceptions
10 |
11 | class PlexError(exceptions.Exception):
12 | message = ""
13 |
14 | class PlexTypeError(PlexError, TypeError):
15 | pass
16 |
17 | class PlexValueError(PlexError, ValueError):
18 | pass
19 |
20 | class InvalidRegex(PlexError):
21 | pass
22 |
23 | class InvalidToken(PlexError):
24 |
25 | def __init__(self, token_number, message):
26 | PlexError.__init__(self, "Token number %d: %s" % (token_number, message))
27 |
28 | class InvalidScanner(PlexError):
29 | pass
30 |
31 | class AmbiguousAction(PlexError):
32 | message = "Two tokens with different actions can match the same string"
33 |
34 | def __init__(self):
35 | pass
36 |
37 | class UnrecognizedInput(PlexError):
38 | scanner = None
39 | position = None
40 | state_name = None
41 |
42 | def __init__(self, scanner, state_name):
43 | self.scanner = scanner
44 | self.position = scanner.position()
45 | self.state_name = state_name
46 |
47 | def __str__(self):
48 | return ("'%s', line %d, char %d: Token not recognised in state %s"
49 | % (self.position + (repr(self.state_name),)))
50 |
51 |
52 |
53 |
--------------------------------------------------------------------------------
/external/plex/dist/Plex/Timing.py:
--------------------------------------------------------------------------------
1 | #
2 | # Get time in platform-dependent way
3 | #
4 |
5 | import os
6 | from sys import platform, exit, stderr
7 |
8 | if platform == 'mac':
9 | import MacOS
10 | def time():
11 | return MacOS.GetTicks() / 60.0
12 | timekind = "real"
13 | elif hasattr(os, 'times'):
14 | def time():
15 | t = os.times()
16 | return t[0] + t[1]
17 | timekind = "cpu"
18 | else:
19 | stderr.write(
20 | "Don't know how to get time on platform %s\n" % repr(platform))
21 | exit(1)
22 |
23 |
--------------------------------------------------------------------------------
/external/plex/dist/Plex/Traditional.py:
--------------------------------------------------------------------------------
1 | #=======================================================================
2 | #
3 | # Python Lexical Analyser
4 | #
5 | # Traditional Regular Expression Syntax
6 | #
7 | #=======================================================================
8 |
9 | from Regexps import *
10 | from Errors import PlexError
11 |
12 | class RegexpSyntaxError(PlexError):
13 | pass
14 |
15 | def re(s):
16 | """
17 | Convert traditional string representation of regular expression |s|
18 | into Plex representation.
19 | """
20 | return REParser(s).parse_re()
21 |
22 | class REParser:
23 |
24 | def __init__(self, s):
25 | self.s = s
26 | self.i = -1
27 | self.end = 0
28 | self.next()
29 |
30 | def parse_re(self):
31 | re = self.parse_alt()
32 | if not self.end:
33 | self.error("Unexpected %s" % repr(self.c))
34 | return re
35 |
36 | def parse_alt(self):
37 | """Parse a set of alternative regexps."""
38 | re = self.parse_seq()
39 | if self.c == '|':
40 | re_list = [re]
41 | while self.c == '|':
42 | self.next()
43 | re_list.append(self.parse_seq())
44 | re = apply(Alt, tuple(re_list))
45 | return re
46 |
47 | def parse_seq(self):
48 | """Parse a sequence of regexps."""
49 | re_list = []
50 | while not self.end and not self.c in "|)":
51 | re_list.append(self.parse_mod())
52 | return apply(Seq, tuple(re_list))
53 |
54 | def parse_mod(self):
55 | """Parse a primitive regexp followed by *, +, ? modifiers."""
56 | re = self.parse_prim()
57 | while not self.end and self.c in "*+?":
58 | if self.c == '*':
59 | re = Rep(re)
60 | elif self.c == '+':
61 | re = Rep1(re)
62 | else: # self.c == '?'
63 | re = Opt(re)
64 | self.next()
65 | return re
66 |
67 | def parse_prim(self):
68 | """Parse a primitive regexp."""
69 | c = self.get()
70 | if c == '.':
71 | re = AnyBut("\n")
72 | elif c == '^':
73 | re = Bol
74 | elif c == '$':
75 | re = Eol
76 | elif c == '(':
77 | re = self.parse_alt()
78 | self.expect(')')
79 | elif c == '[':
80 | re = self.parse_charset()
81 | self.expect(']')
82 | else:
83 | if c == '\\':
84 | c = self.get()
85 | re = Char(c)
86 | return re
87 |
88 | def parse_charset(self):
89 | """Parse a charset. Does not include the surrounding []."""
90 | char_list = []
91 | invert = 0
92 | if self.c == '^':
93 | invert = 1
94 | self.next()
95 | if self.c == ']':
96 | char_list.append(']')
97 | self.next()
98 | while not self.end and self.c <> ']':
99 | c1 = self.get()
100 | if self.c == '-' and self.lookahead(1) <> ']':
101 | self.next()
102 | c2 = self.get()
103 | for a in xrange(ord(c1), ord(c2) + 1):
104 | char_list.append(chr(a))
105 | else:
106 | char_list.append(c1)
107 | chars = string.join(char_list, "")
108 | if invert:
109 | return AnyBut(chars)
110 | else:
111 | return Any(chars)
112 |
113 | def next(self):
114 | """Advance to the next char."""
115 | s = self.s
116 | i = self.i = self.i + 1
117 | if i < len(s):
118 | self.c = s[i]
119 | else:
120 | self.c = ''
121 | self.end = 1
122 |
123 | def get(self):
124 | if self.end:
125 | self.error("Premature end of string")
126 | c = self.c
127 | self.next()
128 | return c
129 |
130 | def lookahead(self, n):
131 | """Look ahead n chars."""
132 | j = self.i + n
133 | if j < len(self.s):
134 | return self.s[j]
135 | else:
136 | return ''
137 |
138 | def expect(self, c):
139 | """
140 | Expect to find character |c| at current position.
141 | Raises an exception otherwise.
142 | """
143 | if self.c == c:
144 | self.next()
145 | else:
146 | self.error("Missing %s" % repr(c))
147 |
148 | def error(self, mess):
149 | """Raise exception to signal syntax error in regexp."""
150 | raise RegexpSyntaxError("Syntax error in regexp %s at position %d: %s" % (
151 | repr(self.s), self.i, mess))
152 |
153 |
154 |
155 |
--------------------------------------------------------------------------------
/external/plex/dist/Plex/__init__.py:
--------------------------------------------------------------------------------
1 | #=======================================================================
2 | #
3 | # Python Lexical Analyser
4 | #
5 | #=======================================================================
6 |
7 | """
8 | The Plex module provides lexical analysers with similar capabilities
9 | to GNU Flex. The following classes and functions are exported;
10 | see the attached docstrings for more information.
11 |
12 | Scanner For scanning a character stream under the
13 | direction of a Lexicon.
14 |
15 | Lexicon For constructing a lexical definition
16 | to be used by a Scanner.
17 |
18 | Str, Any, AnyBut, AnyChar, Seq, Alt, Opt, Rep, Rep1,
19 | Bol, Eol, Eof, Empty
20 |
21 | Regular expression constructors, for building pattern
22 | definitions for a Lexicon.
23 |
24 | State For defining scanner states when creating a
25 | Lexicon.
26 |
27 | TEXT, IGNORE, Begin
28 |
29 | Actions for associating with patterns when
30 | creating a Lexicon.
31 | """
32 |
33 | from Actions import TEXT, IGNORE, Begin
34 | from Lexicons import Lexicon, State
35 | from Regexps import RE, Seq, Alt, Rep1, Empty, Str, Any, AnyBut, AnyChar, Range
36 | from Regexps import Opt, Rep, Bol, Eol, Eof, Case, NoCase
37 | from Scanners import Scanner
38 |
39 |
40 |
41 |
--------------------------------------------------------------------------------
/external/plex/dist/README:
--------------------------------------------------------------------------------
1 | This is version 1.1.5 of Plex, a Python module for building lexical
2 | analysers. See the doc directory for instructions on using it.
3 |
4 | Plex is free of any restrictions. You can use it, redistribute it,
5 | sell it, whatever you want. All I ask is that you give me credit if
6 | you distribute any code derived from it.
7 |
8 |
9 | Greg Ewing,
10 | Computer Science Department,
11 | University of Canterbury,
12 | Christchurch,
13 | New Zealand
14 |
15 | greg@cosc.canterbury.ac.nz
16 |
17 | Version History
18 | ---------------
19 |
20 | 1.1.5 Eliminated a syntax warning about assigning to None
21 | when using with Python 2.3.
22 |
23 | 1.1.4 Fixed bug causing argument of Rep or Rep1 to
24 | fail to match following a newline.
25 |
26 | 1.1.3 Fixed bug causing Eol to fail to match at the
27 | beginning of a line in some circumstances.
28 |
29 | 1.1.2 Changed Scanner.yield() to Scanner.produce() to
30 | accommodate Python 2.3, where yield is a keyword.
31 |
32 | Changed test10 to not rely so much on details of
33 | string repr.
34 |
35 | 1.1.1 Fixed two minor bugs: uncommented Scanner.next_char() and
36 | added import of types to Regexps.py.
37 |
38 | 1.1 Added support for case-insensitive matches.
39 |
40 | 1.0 First official release.
41 |
--------------------------------------------------------------------------------
/external/plex/dist/TODO:
--------------------------------------------------------------------------------
1 | * Multiple state names in State constructor
2 |
3 | * Implement scanning in C
4 |
5 | * Case-insensitivity flag
6 |
7 | * Trailing contexts?
8 |
9 | * Make Action a callable object
10 |
11 | * Action sequences
12 |
13 | * Hook up to existing parser module
14 |
15 |
--------------------------------------------------------------------------------
/external/plex/dist/doc/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 | Plex
9 |
10 |
11 |
12 |
13 |
14 |
Plex - a Lexical Analysis Module for Python
15 |
16 |
17 | Version 1.1.2
18 |
19 | Plex is a Python module for constructing lexical analysers, or scanners.
20 | Plex scanners have almost all the capabilities of the scanners generated
21 | by GNU Flex, and are specified in a very similar way. Tokens are defined
22 | by regular expressions, and each token has an associated action, which
23 | may be to return a literal value, or to call an arbitrary function.
24 | Plex is designed to fill a need that is left wanting by the existing
25 | Python regular expression modules. If you've ever tried to use one of them
26 | for implementing a scanner, you will have found that they're not really
27 | suited to the task. You can define a bunch of regular expressions which
28 | match your tokens all right, but you can only match one of them at a time
29 | against your input. To match all of them at once, you have to join them
30 | all together into one big r.e., but then you've got no easy way to tell
31 | which one matched. This is the problem that Plex is designed to solve.
32 |
Another advantage of Plex is that it compiles all of the regular expressions
33 | into a single DFA. Once that's done, the input can be processed in a time
34 | proportional to the number of characters to be scanned, and independent
35 | of the number or complexity of the regular expressions. Python's existing
36 | regular expression matchers do not have this property.
37 |
38 |
39 |
40 | Contents
41 |
42 |
43 |
44 | Tutorial
45 |
46 |
47 |
48 | Reference
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 | License
57 | Plex is free of any restrictions. You can use it, redistribute it, sell
58 | it, whatever you want. All I ask is that you give me credit if you
59 | distribute any code derived from it.
60 |
61 |
62 |
Greg Ewing,
63 |
Computer Science Department,
64 |
University of Canterbury,
65 |
Christchurch,
66 |
New Zealand
67 |
68 | greg@cosc.canterbury.ac.nz
69 |
70 |
71 | http://www.cosc.canterbury.ac.nz/~greg
72 |
73 |
74 |
75 |
--------------------------------------------------------------------------------
/external/plex/dist/examples/example1and2.in:
--------------------------------------------------------------------------------
1 | Python rocks
2 |
3 |
4 |
--------------------------------------------------------------------------------
/external/plex/dist/examples/example1and2.py:
--------------------------------------------------------------------------------
1 | #
2 | # Example 1
3 | #
4 |
5 | from Plex import *
6 |
7 | lexicon = Lexicon([
8 | (Str("Python"), "my_favourite_language"),
9 | (Str("Perl"), "the_other_language"),
10 | (Str("rocks"), "is_excellent"),
11 | (Str("sucks"), "is_differently_good"),
12 | (Rep1(Any(" \t\n")), IGNORE)
13 | ])
14 |
15 | #
16 | # Example 2
17 | #
18 |
19 | filename = "example1and2.in"
20 | f = open(filename, "r")
21 | scanner = Scanner(lexicon, f, filename)
22 | while 1:
23 | token = scanner.read()
24 | print token
25 | if token[0] is None:
26 | break
27 |
28 |
29 |
--------------------------------------------------------------------------------
/external/plex/dist/examples/example3.in:
--------------------------------------------------------------------------------
1 | if x > y * 5 then
2 | b = c / d
3 | else
4 | Python = handy + useful
5 | end
6 |
7 |
8 |
--------------------------------------------------------------------------------
/external/plex/dist/examples/example3.py:
--------------------------------------------------------------------------------
1 | #
2 | # Example 3
3 | #
4 |
5 | from Plex import *
6 |
7 | letter = Range("AZaz")
8 | digit = Range("09")
9 | name = letter + Rep(letter | digit)
10 | number = Rep1(digit)
11 | space = Any(" \t\n")
12 | comment = Str("{") + Rep(AnyBut("}")) + Str("}")
13 |
14 | resword = Str("if", "then", "else", "end")
15 |
16 | lexicon = Lexicon([
17 | (name, 'ident'),
18 | (number, 'int'),
19 | (resword, TEXT),
20 | (Any("+-*/=<>"), TEXT),
21 | (space | comment, IGNORE)
22 | ])
23 |
24 | filename = "example3.in"
25 | f = open(filename, "r")
26 | scanner = Scanner(lexicon, f, filename)
27 | while 1:
28 | token = scanner.read()
29 | print token
30 | if token[0] is None:
31 | break
32 |
33 |
34 |
--------------------------------------------------------------------------------
/external/plex/dist/examples/example4.in:
--------------------------------------------------------------------------------
1 | alpha beta (*spam (*and*) eggs*) gamma
2 |
3 |
4 |
--------------------------------------------------------------------------------
/external/plex/dist/examples/example4.py:
--------------------------------------------------------------------------------
1 | #
2 | # Example 4
3 | #
4 |
5 | from Plex import *
6 |
7 | def begin_comment(scanner, text):
8 | scanner.nesting_level = scanner.nesting_level + 1
9 |
10 | def end_comment(scanner, text):
11 | scanner.nesting_level = scanner.nesting_level - 1
12 |
13 | def maybe_a_name(scanner, text):
14 | if scanner.nesting_level == 0:
15 | return 'ident'
16 |
17 | letter = Range("AZaz")
18 | digit = Range("09")
19 | name = letter + Rep(letter | digit)
20 | space = Any(" \t\n")
21 |
22 | lexicon = Lexicon([
23 | (Str("(*"), begin_comment),
24 | (Str("*)"), end_comment),
25 | (name, maybe_a_name),
26 | (space, IGNORE)
27 | ])
28 |
29 | filename = "example4.in"
30 | f = open(filename, "r")
31 | scanner = Scanner(lexicon, f, filename)
32 | scanner.nesting_level = 0
33 | while 1:
34 | token = scanner.read()
35 | print token
36 | if token[0] is None:
37 | break
38 |
39 |
40 |
--------------------------------------------------------------------------------
/external/plex/dist/examples/example5.in:
--------------------------------------------------------------------------------
1 | alpha beta (*spam and 42 eggs*) gamma
2 |
3 |
4 |
--------------------------------------------------------------------------------
/external/plex/dist/examples/example5.py:
--------------------------------------------------------------------------------
1 | #
2 | # Example 5
3 | #
4 |
5 | from Plex import *
6 |
7 | letter = Range("AZaz")
8 | digit = Range("09")
9 | name = letter + Rep(letter | digit)
10 | number = Rep1(digit)
11 | space = Any(" \t\n")
12 |
13 | lexicon = Lexicon([
14 | (name, 'ident'),
15 | (number, 'int'),
16 | (space, IGNORE),
17 | (Str("(*"), Begin('comment')),
18 | State('comment', [
19 | (Str("*)"), Begin('')),
20 | (AnyChar, IGNORE)
21 | ])
22 | ])
23 |
24 | filename = "example5.in"
25 | f = open(filename, "r")
26 | scanner = Scanner(lexicon, f, filename)
27 | while 1:
28 | token = scanner.read()
29 | print token
30 | if token[0] is None:
31 | break
32 |
33 |
34 |
--------------------------------------------------------------------------------
/external/plex/dist/examples/example6.in:
--------------------------------------------------------------------------------
1 | alpha beta (*spam and*) {42 eggs} gamma
2 |
3 |
4 |
--------------------------------------------------------------------------------
/external/plex/dist/examples/example6.py:
--------------------------------------------------------------------------------
1 | #
2 | # Example 6
3 | #
4 |
5 | from Plex import *
6 |
7 | letter = Range("AZaz")
8 | digit = Range("09")
9 | name = letter + Rep(letter | digit)
10 | number = Rep1(digit)
11 | space = Any(" \t\n")
12 |
13 | lexicon = Lexicon([
14 | (name, 'ident'),
15 | (number, 'int'),
16 | (space, IGNORE),
17 | (Str("(*"), Begin('comment1')),
18 | (Str("{"), Begin('comment2')),
19 | State('comment1', [
20 | (Str("*)"), Begin('')),
21 | (AnyChar, IGNORE)
22 | ]),
23 | State('comment2', [
24 | (Str("}"), Begin('')),
25 | (AnyChar, IGNORE)
26 | ])
27 | ])
28 |
29 | filename = "example6.in"
30 | f = open(filename, "r")
31 | scanner = Scanner(lexicon, f, filename)
32 | while 1:
33 | token = scanner.read()
34 | print token
35 | if token[0] is None:
36 | break
37 |
38 |
39 |
--------------------------------------------------------------------------------
/external/plex/dist/examples/example7.in:
--------------------------------------------------------------------------------
1 | alpha beta (*spam and (*42*) eggs*) gamma
2 |
3 |
4 |
--------------------------------------------------------------------------------
/external/plex/dist/examples/example7.py:
--------------------------------------------------------------------------------
1 | #
2 | # Example 7
3 | #
4 |
5 | from Plex import *
6 |
7 | letter = Range("AZaz")
8 | digit = Range("09")
9 | name = letter + Rep(letter | digit)
10 | number = Rep1(digit)
11 | space = Any(" \t\n")
12 |
13 | class MyScanner(Scanner):
14 |
15 | def begin_comment(self, text):
16 | if self.nesting_level == 0:
17 | self.begin('comment')
18 | self.nesting_level = self.nesting_level + 1
19 |
20 | def end_comment(self, text):
21 | self.nesting_level = self.nesting_level - 1
22 | if self.nesting_level == 0:
23 | self.begin('')
24 |
25 | lexicon = Lexicon([
26 | (name, 'ident'),
27 | (number, 'int'),
28 | (space, IGNORE),
29 | (Str("(*"), begin_comment),
30 | State('comment', [
31 | (Str("(*"), begin_comment),
32 | (Str("*)"), end_comment),
33 | (AnyChar, IGNORE)
34 | ])
35 | ])
36 |
37 | def __init__(self, file, name):
38 | Scanner.__init__(self, self.lexicon, file, name)
39 | self.nesting_level = 0
40 |
41 | filename = "example7.in"
42 | f = open(filename, "r")
43 | scanner = MyScanner(f, filename)
44 | while 1:
45 | token = scanner.read()
46 | print token
47 | if token[0] is None:
48 | break
49 |
50 |
51 |
--------------------------------------------------------------------------------
/external/plex/dist/examples/pascal.in:
--------------------------------------------------------------------------------
1 | { Test input for the
2 | Plex Pascal scanner }
3 |
4 | program spam(input, output);
5 | var
6 | order: integer;
7 | begin
8 | write('How many spoons of spam would you like with your eggs, sir? ');
9 | readln(order);
10 | if order >= 1 then
11 | writeln('Certainly, sir.')
12 | else
13 | writeln('Sorry, sir, invalid order.')
14 | end
15 | end.
16 |
17 |
--------------------------------------------------------------------------------
/external/plex/dist/examples/pascal.py:
--------------------------------------------------------------------------------
1 | #
2 | # Example - Apple Object Pascal scanner
3 | #
4 |
5 | from Plex import *
6 |
7 | def make_lexicon():
8 |
9 | letter = Range("AZaz") | Any("_")
10 | digit = Range("09")
11 | space = Any(" \t\n")
12 |
13 | ident = letter + Rep(letter | digit)
14 | resword = NoCase(Str("program", "unit", "uses", "const", "type", "var",
15 | "if", "then", "else", "while", "do", "repeat", "until",
16 | "for", "to", "downto", "and", "or", "not",
17 | "array", "of", "record", "object"))
18 | number = Rep1(digit)
19 | string = Str("'") + (Rep(AnyBut("'")) | Str("''")) + Str("'")
20 | diphthong = Str(":=", "<=", ">=", "<>", "..")
21 | punct = Any("^&*()-+=[]|;:<>,./")
22 | spaces = Rep1(space)
23 | comment_begin = Str("{")
24 | comment_char = AnyBut("}")
25 | comment_end = Str("}")
26 |
27 | lexicon = Lexicon([
28 | (resword, TEXT),
29 | (ident, 'ident'),
30 | (number, 'num'),
31 | (string, 'str'),
32 | (punct | diphthong, TEXT),
33 | (spaces, IGNORE),
34 | (comment_begin, Begin('comment')),
35 | State('comment', [
36 | (comment_char, IGNORE),
37 | (comment_end, Begin(''))
38 | ])
39 | ])
40 |
41 | return lexicon
42 |
43 | if __name__ == "__main__":
44 | lexicon = make_lexicon()
45 | filename = "pascal.in"
46 | f = open(filename, "r")
47 | scanner = Scanner(lexicon, f, filename)
48 | while 1:
49 | token = scanner.read()
50 | print token
51 | if token[0] is None:
52 | break
53 |
54 |
55 |
--------------------------------------------------------------------------------
/external/plex/dist/examples/python.in:
--------------------------------------------------------------------------------
1 | import sys
2 |
3 | # This, in case you didn't notice, is a comment.
4 |
5 | def gcd(x, y):
6 | while x <> y:
7 | if x > y: # This is another comment.
8 | print "x is bigger"
9 | x = x - y
10 | # This comment doesn't imply any indentation.
11 | else:
12 |
13 | print "y is bigger"
14 | y = y - x
15 | return x
16 |
17 | def go():
18 | for x, y in [(12,20), (37,18), (2, 54)]:
19 | print gcd\
20 | (x, y)
21 |
22 |
23 |
--------------------------------------------------------------------------------
/external/plex/dist/examples/python.py:
--------------------------------------------------------------------------------
1 | #
2 | # Example - Python scanner
3 | #
4 |
5 | import exceptions
6 | from Plex import *
7 |
8 | class NaughtyNaughty(exceptions.Exception):
9 | pass
10 |
11 | class PythonScanner(Scanner):
12 |
13 | def open_bracket_action(self, text):
14 | self.bracket_nesting_level = self.bracket_nesting_level + 1
15 | return text
16 |
17 | def close_bracket_action(self, text):
18 | self.bracket_nesting_level = self.bracket_nesting_level - 1
19 | return text
20 |
21 | def current_level(self):
22 | return self.indentation_stack[-1]
23 |
24 | def newline_action(self, text):
25 | if self.bracket_nesting_level == 0:
26 | self.begin('indent')
27 | return 'newline'
28 |
29 | def indentation_action(self, text):
30 | # Check that tabs and spaces are being used consistently.
31 | if text:
32 | c = text[0]
33 | if self.indentation_char is None:
34 | self.indentation_char = c
35 | else:
36 | if self.indentation_char <> c:
37 | raise NaughtyNaughty("Mixed up tabs and spaces!")
38 | # Figure out how many indents/dedents to do
39 | current_level = self.current_level()
40 | new_level = len(text)
41 | if new_level > current_level:
42 | self.indent_to(new_level)
43 | elif new_level < current_level:
44 | self.dedent_to(new_level)
45 | # Change back to default state
46 | self.begin('')
47 |
48 | def indent_to(self, new_level):
49 | self.indentation_stack.append(new_level)
50 | self.produce('INDENT', '')
51 |
52 | def dedent_to(self, new_level):
53 | while new_level < self.current_level():
54 | del self.indentation_stack[-1]
55 | self.produce('DEDENT', '')
56 | if new_level <> self.current_level():
57 | raise NaughtyNaughty("Indentation booboo!")
58 |
59 | def eof(self):
60 | self.dedent_to(0)
61 |
62 | letter = Range("AZaz") | Any("_")
63 | digit = Range("09")
64 | hexdigit = Range("09AFaf")
65 |
66 | name = letter + Rep(letter | digit)
67 | number = Rep1(digit) | (Str("0x") + Rep1(hexdigit))
68 |
69 | sq_string = (
70 | Str("'") +
71 | Rep(AnyBut("\\\n'") | (Str("\\") + AnyChar)) +
72 | Str("'"))
73 |
74 | dq_string = (
75 | Str('"') +
76 | Rep(AnyBut('\\\n"') | (Str("\\") + AnyChar)) +
77 | Str('"'))
78 |
79 | non_dq = AnyBut('"') | (Str('\\') + AnyChar)
80 | tq_string = (
81 | Str('"""') +
82 | Rep(
83 | non_dq |
84 | (Str('"') + non_dq) |
85 | (Str('""') + non_dq)) + Str('"""'))
86 |
87 | stringlit = sq_string | dq_string | tq_string
88 | opening_bracket = Any("([{")
89 | closing_bracket = Any(")]}")
90 | punct1 = Any(":,;+-*/|&<>=.%`~^")
91 | punct2 = Str("==", "<>", "!=", "<=", "<<", ">>", "**")
92 | punctuation = punct1 | punct2
93 |
94 | spaces = Rep1(Any(" \t"))
95 | indentation = Rep(Str(" ")) | Rep(Str("\t"))
96 | lineterm = Str("\n") | Eof
97 | escaped_newline = Str("\\\n")
98 | comment = Str("#") + Rep(AnyBut("\n"))
99 | blank_line = indentation + Opt(comment) + lineterm
100 |
101 | lexicon = Lexicon([
102 | (name, 'name'),
103 | (number, 'number'),
104 | (stringlit, 'string'),
105 | (punctuation, TEXT),
106 | (opening_bracket, open_bracket_action),
107 | (closing_bracket, close_bracket_action),
108 | (lineterm, newline_action),
109 | (comment, IGNORE),
110 | (spaces, IGNORE),
111 | (escaped_newline, IGNORE),
112 | State('indent', [
113 | (blank_line, IGNORE),
114 | (indentation, indentation_action),
115 | ]),
116 | ])
117 |
118 | def __init__(self, file):
119 | Scanner.__init__(self, self.lexicon, file)
120 | self.indentation_stack = [0]
121 | self.bracket_nesting_level = 0
122 | self.indentation_char = None
123 | self.begin('indent')
124 |
125 | f = open("python.in", "r")
126 | scanner = PythonScanner(f)
127 | level = 0
128 | while 1:
129 | token, text = scanner.read()
130 | if token is None:
131 | break
132 | if token == 'INDENT':
133 | level = level + 1
134 | elif token == 'DEDENT':
135 | level = level - 1
136 | indent = ' ' * (level * 4)
137 | if not text or token == text:
138 | value = token
139 | else:
140 | value = "%s(%s)" % (token, repr(text))
141 | print indent + value
142 |
143 |
144 |
--------------------------------------------------------------------------------
/external/plex/dist/examples/speedtest.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 |
4 | import Plex
5 | import pascal
6 |
7 | if sys.platform == 'mac':
8 | import MacOS
9 | def time():
10 | return MacOS.GetTicks() / 60.0
11 | timekind = "real"
12 | else:
13 | def time():
14 | t = os.times()
15 | return t[0] + t[1]
16 | timekind = "cpu"
17 |
18 | time1 = time()
19 | lexicon = pascal.make_lexicon()
20 | time2 = time()
21 | print "Constructing scanner took %s %s seconds" % (time2 - time1, timekind)
22 |
23 | f = open("speedtest.in", "r")
24 | scanner = Plex.Scanner(lexicon, f)
25 | time1 = time()
26 | while 1:
27 | value, text = scanner.read()
28 | if value is None:
29 | break
30 | time2 = time()
31 | _, lines, _ = scanner.position()
32 | time = time2 - time1
33 | lps = float(lines) / float(time)
34 | print "Scanning %d lines took %s %s seconds (%s lines/sec)" % (
35 | lines, time, timekind, lps)
36 |
37 |
38 |
39 |
--------------------------------------------------------------------------------
/external/plex/dist/tests/Test.py:
--------------------------------------------------------------------------------
1 | #
2 | # Run a Plex test
3 | #
4 |
5 | import sys
6 |
7 | # Mac slow console stderr hack
8 | if sys.platform == 'mac':
9 | if sys.stderr is sys.__stderr__:
10 | sys.stderr = sys.__stdout__
11 |
12 | import Plex
13 |
14 | force_debug = 0
15 |
16 | if force_debug or sys.argv[1:2] == ["-d"]:
17 | debug = sys.stderr
18 | else:
19 | debug = None
20 |
21 | def run(lexicon, test_name,
22 | debug = 0, trace = 0, scanner_class = Plex.Scanner):
23 | if debug:
24 | debug_file = sys.stdout
25 | lexicon.machine.dump(debug_file)
26 | print "=" * 70
27 | else:
28 | debug_file = None
29 | in_name = test_name + ".in"
30 | f = open(in_name, "rU")
31 | s = scanner_class(lexicon, f, in_name)
32 | if trace:
33 | s.trace = 1
34 | while 1:
35 | value, text = s.read()
36 | name, line, pos = s.position()
37 | print "%s %3d %3d %-10s %s" % (name, line, pos, value, repr(text))
38 | if value is None:
39 | break
40 |
41 |
42 |
43 |
44 |
--------------------------------------------------------------------------------
/external/plex/dist/tests/runtests.py:
--------------------------------------------------------------------------------
1 | from glob import glob
2 | import os
3 | import sys
4 | import traceback
5 |
6 | def run_test(test_name, out_name, err_name):
7 | out_file = open(out_name, "w")
8 | err_file = open(err_name, "w")
9 | sys.stdout = out_file
10 | sys.stderr = err_file
11 | result = 1
12 | try:
13 | try:
14 | __import__(test_name)
15 | except KeyboardInterrupt:
16 | raise
17 | except SystemExit, e:
18 | sys.stderr.write("Exit code %s\n" % e)
19 | result = 0
20 | except:
21 | traceback.print_exc()
22 | result = 0
23 | finally:
24 | sys.stdout = sys.__stdout__
25 | sys.stderr = sys.__stderr__
26 | out_file.close()
27 | err_file.close()
28 | return result
29 |
30 | def check_result(out_name, out2_name):
31 | return read_file(out_name) == read_file(out2_name)
32 |
33 | def read_file(name):
34 | f = open(name, "rU")
35 | data = f.read()
36 | f.close()
37 | return data
38 |
39 | def remove(name):
40 | try:
41 | os.unlink(name)
42 | except:
43 | pass
44 |
45 | def run():
46 | if len(sys.argv) > 1:
47 | tests = sys.argv[1:]
48 | else:
49 | tests = glob("test?*.py")
50 | for test_py in tests:
51 | test_name = os.path.splitext(test_py)[0]
52 | test_out = test_name + ".out"
53 | test_out2 = test_name + ".out2"
54 | test_err = test_name + ".err"
55 | if os.path.exists(test_out):
56 | print "%s:" % test_name,
57 | sys.stdout.flush()
58 | succeeded = run_test(test_name, test_out2, test_err)
59 | if succeeded:
60 | succeeded = check_result(test_out, test_out2)
61 | if succeeded:
62 | print "passed"
63 | else:
64 | print "failed *****"
65 | else:
66 | print "error *****"
67 | else:
68 | print "creating %s:" % test_out,
69 | sys.stdout.flush()
70 | succeeded = run_test(test_name, test_out, test_err)
71 | if succeeded:
72 | print "succeeded"
73 | else:
74 | print "error *****"
75 | if succeeded:
76 | remove(test_out2)
77 | remove(test_err)
78 |
79 | if __name__ == "__main__":
80 | run()
81 |
82 |
83 |
84 |
85 |
--------------------------------------------------------------------------------
/external/plex/dist/tests/test0.in:
--------------------------------------------------------------------------------
1 |
2 | aaa
3 |
4 |
5 |
--------------------------------------------------------------------------------
/external/plex/dist/tests/test0.out:
--------------------------------------------------------------------------------
1 | test0.in 2 0 thing 'a'
2 | test0.in 2 1 thing 'a'
3 | test0.in 2 2 thing 'a'
4 | test0.in 5 0 None ''
5 |
--------------------------------------------------------------------------------
/external/plex/dist/tests/test0.py:
--------------------------------------------------------------------------------
1 | import Test
2 | from Plex import *
3 | import sys
4 |
5 | lex = Lexicon([
6 | (Str("a"), 'thing'),
7 | (Any("\n"), IGNORE)
8 | ],
9 | debug = Test.debug,
10 | timings = sys.stderr
11 | )
12 |
13 | Test.run(lex, "test0", debug = 0, trace = 0)
14 |
15 |
16 |
--------------------------------------------------------------------------------
/external/plex/dist/tests/test1.in:
--------------------------------------------------------------------------------
1 | a b aa ab ba a0 a1 b0 b1 ab01
2 | babba01 abba babb b0001a
3 |
4 |
--------------------------------------------------------------------------------
/external/plex/dist/tests/test1.out:
--------------------------------------------------------------------------------
1 | test1.in 1 0 ident 'a'
2 | test1.in 1 2 ident 'b'
3 | test1.in 1 4 ident 'aa'
4 | test1.in 1 7 ident 'ab'
5 | test1.in 1 10 ident 'ba'
6 | test1.in 1 13 ident 'a0'
7 | test1.in 1 16 ident 'a1'
8 | test1.in 1 19 ident 'b0'
9 | test1.in 1 22 ident 'b1'
10 | test1.in 1 25 ident 'ab01'
11 | test1.in 2 0 ident 'babba01'
12 | test1.in 2 8 ident 'abba'
13 | test1.in 2 13 ident 'babb'
14 | test1.in 2 18 ident 'b0001a'
15 | test1.in 4 0 None ''
16 |
--------------------------------------------------------------------------------
/external/plex/dist/tests/test1.py:
--------------------------------------------------------------------------------
1 | import Test
2 | from Plex import *
3 | import sys
4 |
5 | lex = Lexicon([
6 | (Any("ab") + Rep(Any("ab01")), 'ident'),
7 | (Any(" \n"), IGNORE)
8 | ],
9 | debug = Test.debug,
10 | timings = sys.stderr
11 | )
12 |
13 | Test.run(lex, "test1", debug = 0, trace = 0)
14 |
15 |
16 |
--------------------------------------------------------------------------------
/external/plex/dist/tests/test10.out:
--------------------------------------------------------------------------------
1 | Seq()
2 | Seq(Char('a'))
3 | Seq(Any('a'))
4 | Seq(Any('ab'))
5 | Seq(Any('abc'))
6 | Seq(Any('abc'))
7 | Seq(Any('abcd'))
8 | Seq(Any('abcghi'))
9 | Seq(AnyBut('a'))
10 | Seq(AnyBut('abcghi'))
11 | Seq(Any('-'))
12 | Seq(Any('-abc'))
13 | Seq(Any('abc-'))
14 | Seq(Any(']'))
15 | Seq(Any(']-'))
16 | Seq(AnyBut('-'))
17 | Seq(AnyBut('-abc'))
18 | Seq(AnyBut('abc-'))
19 | Seq(AnyBut(']'))
20 | Seq(AnyBut(']-'))
21 | Seq(Rep(Char('a')))
22 | Seq(Rep1(Char('a')))
23 | Seq(Opt(Char('a')))
24 | Seq(Opt(Rep1(Rep(Char('a')))))
25 | Seq(Char('a'),Char('b'))
26 | Alt(Seq(Char('a')),Seq(Char('b')))
27 | Seq(Char('a'),Char('b'),Char('c'),Char('d'),Char('e'))
28 | Alt(Seq(Char('a')),Seq(Char('b')),Seq(Char('c')),Seq(Char('d')),Seq(Char('e')))
29 | Alt(Seq(Char('a'),Char('b'),Char('c')),Seq(Char('d'),Char('e'),Char('f')),Seq(Char('g'),Char('h'),Char('i')))
30 | Seq(Char('a'),Char('b'),Char('c'),Alt(Seq(Char('d'),Char('e'),Char('f')),Seq(Char('g'),Char('h'),Char('i'))))
31 | Seq(Char('a'),Char('b'),Char('('),Char('c'),Char('['),Char('d'),Char('e'))
32 | Seq(Bol,Char('a'),Char('b'),Char('c'),Eol)
33 | True
34 | Syntax error in regexp 'abc(de' at position 6: Missing ')'
35 | Syntax error in regexp 'abc[de' at position 6: Missing ']'
36 | Syntax error in regexp 'abc)de' at position 3: Unexpected ')'
37 |
--------------------------------------------------------------------------------
/external/plex/dist/tests/test10.py:
--------------------------------------------------------------------------------
1 | # Test traditional regular expression syntax.
2 |
3 | import Test
4 |
5 | from Plex.Traditional import re
6 | from Plex.Errors import PlexError
7 | from Plex import Seq, AnyBut
8 |
9 | def test_err(s):
10 | try:
11 | print re(s)
12 | except PlexError, e:
13 | print e
14 |
15 | print re("")
16 | print re("a")
17 | print re("[a]")
18 | print re("[ab]")
19 | print re("[abc]")
20 | print re("[a-c]")
21 | print re("[a-cd]")
22 | print re("[a-cg-i]")
23 | print re("[^a]")
24 | print re("[^a-cg-i]")
25 | print re("[-]")
26 | print re("[-abc]")
27 | print re("[abc-]")
28 | print re("[]]")
29 | print re("[]-]")
30 | print re("[^-]")
31 | print re("[^-abc]")
32 | print re("[^abc-]")
33 | print re("[^]]")
34 | print re("[^]-]")
35 | print re("a*")
36 | print re("a+")
37 | print re("a?")
38 | print re("a*+?")
39 | print re("ab")
40 | print re("a|b")
41 | print re("abcde")
42 | print re("a|b|c|d|e")
43 | print re("abc|def|ghi")
44 | print re("abc(def|ghi)")
45 | print re("ab\(c\[de")
46 | print re("^abc$")
47 | print str(re(".")) == str(Seq(AnyBut('\n')))
48 | test_err("abc(de")
49 | test_err("abc[de")
50 | test_err("abc)de")
51 |
52 |
53 |
--------------------------------------------------------------------------------
/external/plex/dist/tests/test11.in:
--------------------------------------------------------------------------------
1 | Python python
2 | COBOL cobol CoBol COboL
3 | perl Perl pERl
4 | Serbo-Croatian serbo-croatian
5 | REALbasic realbasic REalbasic
6 | REALBasic realBasic REalBasic
7 |
--------------------------------------------------------------------------------
/external/plex/dist/tests/test11.out:
--------------------------------------------------------------------------------
1 | test11.in 1 0 upper-python 'Python'
2 | test11.in 1 7 lower-python 'python'
3 | test11.in 2 0 other-language 'COBOL'
4 | test11.in 2 6 other-language 'cobol'
5 | test11.in 2 12 other-language 'CoBol'
6 | test11.in 2 18 other-language 'COboL'
7 | test11.in 3 0 other-language 'perl'
8 | test11.in 3 5 other-language 'Perl'
9 | test11.in 3 10 other-language 'pERl'
10 | test11.in 4 0 other-language 'Serbo-Croatian'
11 | test11.in 4 15 other-language 'serbo-croatian'
12 | test11.in 5 0 real-1 'REALbasic'
13 | test11.in 5 10 real-1 'realbasic'
14 | test11.in 5 20 real-1 'REalbasic'
15 | test11.in 6 0 real-2 'REALBasic'
16 | test11.in 6 10 real-2 'realBasic'
17 | test11.in 6 20 real-2 'REalBasic'
18 | test11.in 7 0 None ''
19 |
--------------------------------------------------------------------------------
/external/plex/dist/tests/test11.py:
--------------------------------------------------------------------------------
1 | import Test
2 | from Plex import *
3 | import sys
4 |
5 | lex = Lexicon([
6 | (Str("Python"), 'upper-python'),
7 | (Str("python"), 'lower-python'),
8 | (NoCase(Str("COBOL", "perl", "Serbo-Croatian")), 'other-language'),
9 | (NoCase(Str("real") + Case(Str("basic"))), 'real-1'),
10 | (NoCase(Str("real") + Case(Str("Basic"))), 'real-2'),
11 | (Any(" \t\n"), IGNORE)
12 | ],
13 | debug = Test.debug,
14 | timings = sys.stderr
15 | )
16 |
17 | Test.run(lex, "test11", debug = 0, trace = 0)
18 |
--------------------------------------------------------------------------------
/external/plex/dist/tests/test12.in:
--------------------------------------------------------------------------------
1 | 'Single-line text literal''Multi-
2 | line text literal'
--------------------------------------------------------------------------------
/external/plex/dist/tests/test12.out:
--------------------------------------------------------------------------------
1 | test12.in 1 0 'Single-line text literal' "'Single-line text literal'"
2 | test12.in 1 26 'Multi-
3 | line text literal' "'Multi-\nline text literal'"
4 | test12.in 2 18 None ''
5 |
--------------------------------------------------------------------------------
/external/plex/dist/tests/test12.py:
--------------------------------------------------------------------------------
1 | import sys
2 | from cStringIO import StringIO
3 |
4 | import Test
5 | from Plex import *
6 |
7 | lex = Lexicon([
8 | (Str("'") + Rep(AnyBut("'")) + Str("'"), TEXT)
9 | ],
10 | debug = Test.debug,
11 | timings = sys.stderr
12 | )
13 |
14 | Test.run(lex, "test12", debug = 0, trace = 0)
15 |
--------------------------------------------------------------------------------
/external/plex/dist/tests/test2.in:
--------------------------------------------------------------------------------
1 |
2 | a b ab abba a0 !xyzzy!
3 | b1 abab0110bba1 #burble#
4 | 0 1 00 01 (fee) [fie] [foe] "fum"
5 | 101010 0001010101
6 |
7 |
8 |
--------------------------------------------------------------------------------
/external/plex/dist/tests/test2.out:
--------------------------------------------------------------------------------
1 | test2.in 2 0 ident 'a'
2 | test2.in 2 2 ident 'b'
3 | test2.in 2 4 ident 'ab'
4 | test2.in 2 7 ident 'abba'
5 | test2.in 2 12 ident 'a0'
6 | test2.in 3 0 ident 'b1'
7 | test2.in 3 3 ident 'abab0110bba1'
8 | test2.in 4 0 num '0'
9 | test2.in 4 2 num '1'
10 | test2.in 4 4 num '00'
11 | test2.in 4 7 num '01'
12 | test2.in 5 0 num '101010'
13 | test2.in 5 7 num '0001010101'
14 | test2.in 8 0 None ''
15 |
--------------------------------------------------------------------------------
/external/plex/dist/tests/test2.py:
--------------------------------------------------------------------------------
1 | import Test
2 | from Plex import *
3 | import sys
4 |
5 | lex = Lexicon([
6 | (Seq(Any("ab"), Rep(Any("ab01"))), 'ident'),
7 | (Seq(Any("01"), Rep(Any("01"))), 'num'),
8 | (Any(' \n'), IGNORE),
9 | (Str("abba"), 'abba'),
10 | (Any('([{!"#') + Rep(AnyBut('!"#}])')) + Any('!"#}])'), IGNORE)
11 | ],
12 | debug = Test.debug,
13 | timings = sys.stderr
14 | )
15 |
16 | Test.run(lex, "test2", debug = 0, trace = 0)
17 |
18 |
19 |
--------------------------------------------------------------------------------
/external/plex/dist/tests/test3.in:
--------------------------------------------------------------------------------
1 | program furbie(input, output, throughput);
2 | begin
3 | repeat
4 | make(cute_noises);
5 | have_flat_battery;
6 | until owner_is(fed_up);
7 | end.
8 |
9 |
10 |
--------------------------------------------------------------------------------
/external/plex/dist/tests/test3.out:
--------------------------------------------------------------------------------
1 | test3.in 1 0 program 'program'
2 | test3.in 1 8 ident 'furbie'
3 | test3.in 1 14 ( '('
4 | test3.in 1 15 ident 'input'
5 | test3.in 1 20 , ','
6 | test3.in 1 22 ident 'output'
7 | test3.in 1 28 , ','
8 | test3.in 1 30 ident 'throughput'
9 | test3.in 1 40 ) ')'
10 | test3.in 1 41 ; ';'
11 | test3.in 2 0 begin 'begin'
12 | test3.in 3 3 repeat 'repeat'
13 | test3.in 4 5 ident 'make'
14 | test3.in 4 9 ( '('
15 | test3.in 4 10 ident 'cute_noises'
16 | test3.in 4 21 ) ')'
17 | test3.in 4 22 ; ';'
18 | test3.in 5 5 ident 'have_flat_battery'
19 | test3.in 5 22 ; ';'
20 | test3.in 6 3 until 'until'
21 | test3.in 6 9 ident 'owner_is'
22 | test3.in 6 17 ( '('
23 | test3.in 6 18 ident 'fed_up'
24 | test3.in 6 24 ) ')'
25 | test3.in 6 25 ; ';'
26 | test3.in 7 0 end 'end'
27 | test3.in 7 3 . '.'
28 | test3.in 10 0 None ''
29 |
--------------------------------------------------------------------------------
/external/plex/dist/tests/test3.py:
--------------------------------------------------------------------------------
1 | import Test
2 | import sys
3 | from Plex import *
4 |
5 | letter = Range("AZaz") | Any("_")
6 | digit = Range("09")
7 | space = Any(" \t\n")
8 |
9 | ident = Seq(letter, Rep(Alt(letter, digit)))
10 | number = Seq(digit, Rep(digit))
11 | punct = Any("*()-+=[]{};:<>,./")
12 | spaces = Seq(space, Rep(space))
13 | resword = Str("program", "begin", "end", "repeat", "until")
14 |
15 | lex = Lexicon([
16 | (resword, TEXT),
17 | (ident, 'ident'),
18 | (number, 'num'),
19 | (punct, TEXT),
20 | (spaces, IGNORE)
21 | ],
22 | debug = Test.debug,
23 | timings = sys.stderr
24 | )
25 |
26 | Test.run(lex, "test3", trace = 0)
27 |
28 |
29 |
--------------------------------------------------------------------------------
/external/plex/dist/tests/test4.in:
--------------------------------------------------------------------------------
1 | this (should ignore (anything between (matching) pairs) of) brackets
2 |
3 |
4 |
--------------------------------------------------------------------------------
/external/plex/dist/tests/test4.out:
--------------------------------------------------------------------------------
1 | test4.in 1 0 letter 't'
2 | test4.in 1 1 letter 'h'
3 | test4.in 1 2 letter 'i'
4 | test4.in 1 3 letter 's'
5 | test4.in 1 60 letter 'b'
6 | test4.in 1 61 letter 'r'
7 | test4.in 1 62 letter 'a'
8 | test4.in 1 63 letter 'c'
9 | test4.in 1 64 letter 'k'
10 | test4.in 1 65 letter 'e'
11 | test4.in 1 66 letter 't'
12 | test4.in 1 67 letter 's'
13 | test4.in 4 0 None ''
14 |
--------------------------------------------------------------------------------
/external/plex/dist/tests/test4.py:
--------------------------------------------------------------------------------
1 | import Test
2 | import sys
3 | from Plex import *
4 |
5 | letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_"
6 |
7 | wax = Any("(")
8 | wane = Any(")")
9 | letter = Any(letters)
10 | space = Any(" \t\n")
11 |
12 | def open_paren(s, t):
13 | s.counter = s.counter + 1
14 |
15 | def close_paren(s, t):
16 | s.counter = s.counter - 1
17 |
18 | def got_a_letter(s, t):
19 | if s.counter == 0:
20 | return 'letter'
21 | else:
22 | return None
23 |
24 | lex = Lexicon([
25 | (wax, open_paren),
26 | (wane, close_paren),
27 | (letter, got_a_letter),
28 | (space, IGNORE)
29 | ],
30 | debug = Test.debug,
31 | timings = sys.stderr
32 | )
33 |
34 | class MyScanner(Scanner):
35 | counter = 0
36 | trace = 0
37 |
38 | Test.run(lex, "test4", scanner_class = MyScanner, trace = 0)
39 |
40 |
41 |
--------------------------------------------------------------------------------
/external/plex/dist/tests/test5.in:
--------------------------------------------------------------------------------
1 | a b {this is a comment} c abc
2 |
3 |
4 |
--------------------------------------------------------------------------------
/external/plex/dist/tests/test5.out:
--------------------------------------------------------------------------------
1 | test5.in 1 0 ident 'a'
2 | test5.in 1 2 ident 'b'
3 | test5.in 1 24 ident 'c'
4 | test5.in 1 26 ident 'abc'
5 | test5.in 4 0 None ''
6 |
--------------------------------------------------------------------------------
/external/plex/dist/tests/test5.py:
--------------------------------------------------------------------------------
1 | import Test
2 | import sys
3 | from Plex import *
4 |
5 | letters = "abc"
6 | spaces = " \t\n"
7 | all = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz*/{} \t\n"
8 |
9 | letter = Any(letters)
10 | space = Any(" \t\n")
11 |
12 | ident = Rep1(letter)
13 | spaces = Rep1(space)
14 | begin_comment = Str("{")
15 | end_comment = Str("}")
16 |
17 | lex = Lexicon([
18 | (ident, 'ident'),
19 | (spaces, IGNORE),
20 | (begin_comment, Begin('comment')),
21 | State('comment', [
22 | (end_comment, Begin('')),
23 | (AnyBut("}"), IGNORE),
24 | ])
25 | ],
26 | debug = Test.debug,
27 | timings = sys.stderr
28 | )
29 |
30 | Test.run(lex, "test5")
31 |
32 |
33 |
--------------------------------------------------------------------------------
/external/plex/dist/tests/test6.in:
--------------------------------------------------------------------------------
1 | { Test input for the
2 | Plex Pascal scanner }
3 |
4 | program spam(input, output);
5 | var
6 | order: integer;
7 | begin
8 | write('How many slices of spam would you like with your eggs, sir? ');
9 | readln(order);
10 | if order >= 1 then
11 | writeln('Certainly, sir.')
12 | else
13 | writeln('Sorry, sir, invalid order.')
14 | end
15 | end.
16 |
17 |
--------------------------------------------------------------------------------
/external/plex/dist/tests/test6.out:
--------------------------------------------------------------------------------
1 | test6.in 4 0 program 'program'
2 | test6.in 4 8 ident 'spam'
3 | test6.in 4 12 ( '('
4 | test6.in 4 13 ident 'input'
5 | test6.in 4 18 , ','
6 | test6.in 4 20 ident 'output'
7 | test6.in 4 26 ) ')'
8 | test6.in 4 27 ; ';'
9 | test6.in 5 0 var 'var'
10 | test6.in 6 2 ident 'order'
11 | test6.in 6 7 : ':'
12 | test6.in 6 9 ident 'integer'
13 | test6.in 6 16 ; ';'
14 | test6.in 7 0 ident 'begin'
15 | test6.in 8 2 ident 'write'
16 | test6.in 8 7 ( '('
17 | test6.in 8 8 str "'How many slices of spam would you like with your eggs, sir? '"
18 | test6.in 8 70 ) ')'
19 | test6.in 8 71 ; ';'
20 | test6.in 9 2 ident 'readln'
21 | test6.in 9 8 ( '('
22 | test6.in 9 9 ident 'order'
23 | test6.in 9 14 ) ')'
24 | test6.in 9 15 ; ';'
25 | test6.in 10 2 if 'if'
26 | test6.in 10 5 ident 'order'
27 | test6.in 10 11 >= '>='
28 | test6.in 10 14 num '1'
29 | test6.in 10 16 then 'then'
30 | test6.in 11 4 ident 'writeln'
31 | test6.in 11 11 ( '('
32 | test6.in 11 12 str "'Certainly, sir.'"
33 | test6.in 11 29 ) ')'
34 | test6.in 12 2 else 'else'
35 | test6.in 13 4 ident 'writeln'
36 | test6.in 13 11 ( '('
37 | test6.in 13 12 str "'Sorry, sir, invalid order.'"
38 | test6.in 13 40 ) ')'
39 | test6.in 14 2 ident 'end'
40 | test6.in 15 0 ident 'end'
41 | test6.in 15 3 . '.'
42 | test6.in 17 0 None ''
43 |
--------------------------------------------------------------------------------
/external/plex/dist/tests/test6.py:
--------------------------------------------------------------------------------
1 | import Test
2 |
3 | import os
4 | import sys
5 | import Plex
6 | from Plex import *
7 |
8 | letter = Range("AZaz") | Any("_")
9 | digit = Range("09")
10 | space = Any(" \t\n")
11 |
12 | ident = letter + Rep(letter | digit)
13 | resword = Str("program", "unit", "uses", "const", "type", "var",
14 | "if", "then", "else", "while", "do", "repeat", "until",
15 | "for", "to", "downto", "and", "or", "not",
16 | "array", "of", "record", "object")
17 | number = Rep1(digit)
18 | string = Str("'") + (Rep(AnyBut("'")) | Str("''")) + Str("'")
19 | diphthong = Str(":=", "<=", ">=", "<>", "..")
20 | punct = Any("^&*()-+=[]|;:<>,./")
21 | spaces = Rep1(space)
22 | comment_begin = Str("{")
23 | comment_char = AnyBut("}")
24 | comment_end = Str("}")
25 |
26 | lex = Lexicon([
27 | (resword, TEXT),
28 | (ident, 'ident'),
29 | (number, 'num'),
30 | (string, 'str'),
31 | (punct | diphthong, TEXT),
32 | (spaces, IGNORE),
33 | (comment_begin, Begin('comment')),
34 | State('comment', [
35 | (comment_char, IGNORE),
36 | (comment_end, Begin(''))
37 | ])
38 | ],
39 | debug = Test.debug,
40 | timings = sys.stderr
41 | )
42 |
43 | Test.run(lex, "test6", debug = 0, trace = 0)
44 |
45 |
46 |
--------------------------------------------------------------------------------
/external/plex/dist/tests/test7.in:
--------------------------------------------------------------------------------
1 | aa bbb bb bbbbb b ccc
2 | a bb bbb b cc
3 | bb bbb bbbbb bb bbb
4 | ddddddddddddddd
5 | aaa bbb bb cccc
6 |
7 |
8 |
--------------------------------------------------------------------------------
/external/plex/dist/tests/test7.out:
--------------------------------------------------------------------------------
1 | test7.in 1 0 begin 'aa'
2 | test7.in 1 3 middle 'bbb'
3 | test7.in 1 7 middle 'bb'
4 | test7.in 1 10 middle 'bbbbb'
5 | test7.in 1 16 middle 'b'
6 | test7.in 1 18 end 'ccc'
7 | test7.in 2 0 begin 'a'
8 | test7.in 2 2 middle 'bb'
9 | test7.in 2 5 middle 'bbb'
10 | test7.in 2 9 middle 'b'
11 | test7.in 2 11 end 'cc'
12 | test7.in 3 0 middle 'bb'
13 | test7.in 3 3 middle 'bbb'
14 | test7.in 3 7 middle 'bbbbb'
15 | test7.in 3 14 middle 'bb'
16 | test7.in 3 17 middle 'bbb'
17 | test7.in 4 0 everything 'ddddddddddddddd'
18 | test7.in 5 0 begin 'aaa'
19 | test7.in 5 4 middle 'bbb'
20 | test7.in 5 8 middle 'bb'
21 | test7.in 5 11 end 'cccc'
22 | test7.in 8 0 None ''
23 |
--------------------------------------------------------------------------------
/external/plex/dist/tests/test7.py:
--------------------------------------------------------------------------------
1 | import Test
2 | import sys
3 | from Plex import *
4 |
5 | spaces = Rep1(Any(" \t\n"))
6 |
7 | lex = Lexicon([
8 | (Bol + Rep1(Str("a")), 'begin'),
9 | ( Rep1(Str("b")), 'middle'),
10 | ( Rep1(Str("c")) + Eol, 'end'),
11 | (Bol + Rep1(Str("d")) + Eol, 'everything'),
12 | (spaces, IGNORE)
13 | ],
14 | debug = Test.debug,
15 | timings = sys.stderr
16 | )
17 |
18 | Test.run(lex, "test7", trace = 0)
19 |
20 |
21 |
--------------------------------------------------------------------------------
/external/plex/dist/tests/test8.in:
--------------------------------------------------------------------------------
1 | ftang ftang ftangftang ftangfta ftang
2 |
3 |
4 |
--------------------------------------------------------------------------------
/external/plex/dist/tests/test8.out:
--------------------------------------------------------------------------------
1 | test8.in 1 0 one_ftang 'ftang'
2 | test8.in 1 6 one_ftang 'ftang'
3 | test8.in 1 12 two_ftangs 'ftangftang'
4 | test8.in 1 23 one_ftang 'ftang'
5 | test8.in 1 28 one_fta 'fta'
6 | test8.in 1 32 one_ftang 'ftang'
7 | test8.in 4 0 None ''
8 |
--------------------------------------------------------------------------------
/external/plex/dist/tests/test8.py:
--------------------------------------------------------------------------------
1 | #
2 | # This one tests the backing-up mechanism.
3 | #
4 |
5 | import Test
6 | import sys
7 | from Plex import *
8 |
9 | spaces = Rep1(Any(" \t\n"))
10 |
11 | lex = Lexicon([
12 | (Str("ftangftang"), 'two_ftangs'),
13 | (Str("ftang"), 'one_ftang'),
14 | (Str("fta"), 'one_fta'),
15 | (spaces, IGNORE)
16 | ],
17 | debug = Test.debug,
18 | timings = sys.stderr
19 | )
20 |
21 | Test.run(lex, "test8", trace = 0)
22 |
23 |
24 |
--------------------------------------------------------------------------------
/external/plex/dist/tests/test9.in:
--------------------------------------------------------------------------------
1 | import sys
2 |
3 | # This, in case you didn't notice, is a comment.
4 |
5 | def gcd(x, y):
6 | while x <> y:
7 | if x > y: # This is another comment.
8 | print "x is bigger"
9 | x = x - y
10 | # This comment doesn't imply any indentation.
11 | else:
12 |
13 | print "y is bigger"
14 | y = y - x
15 | return x
16 |
17 | def go():
18 | for x, y in [(12,20), (37,18), (2, 54)]:
19 | print gcd\
20 | (x, y)
21 |
22 |
23 |
--------------------------------------------------------------------------------
/external/plex/dist/tests/test9.out:
--------------------------------------------------------------------------------
1 | name('import')
2 | name('sys')
3 | 'newline'
4 | name('def')
5 | name('gcd')
6 | '('
7 | name('x')
8 | ','
9 | name('y')
10 | ')'
11 | ':'
12 | 'newline'
13 | 'INDENT'
14 | name('while')
15 | name('x')
16 | '<>'
17 | name('y')
18 | ':'
19 | 'newline'
20 | 'INDENT'
21 | name('if')
22 | name('x')
23 | '>'
24 | name('y')
25 | ':'
26 | 'newline'
27 | 'INDENT'
28 | name('print')
29 | string('"x is bigger"')
30 | 'newline'
31 | name('x')
32 | '='
33 | name('x')
34 | '-'
35 | name('y')
36 | 'newline'
37 | 'DEDENT'
38 | name('else')
39 | ':'
40 | 'newline'
41 | 'INDENT'
42 | name('print')
43 | string('"y is bigger"')
44 | 'newline'
45 | name('y')
46 | '='
47 | name('y')
48 | '-'
49 | name('x')
50 | 'newline'
51 | 'DEDENT'
52 | 'DEDENT'
53 | name('return')
54 | name('x')
55 | 'newline'
56 | 'DEDENT'
57 | name('def')
58 | name('go')
59 | '('
60 | ')'
61 | ':'
62 | 'newline'
63 | 'INDENT'
64 | name('for')
65 | name('x')
66 | ','
67 | name('y')
68 | name('in')
69 | '['
70 | '('
71 | number('12')
72 | ','
73 | number('20')
74 | ')'
75 | ','
76 | '('
77 | number('37')
78 | ','
79 | number('18')
80 | ')'
81 | ','
82 | '('
83 | number('2')
84 | ','
85 | number('54')
86 | ')'
87 | ']'
88 | ':'
89 | 'newline'
90 | 'INDENT'
91 | name('print')
92 | name('gcd')
93 | '('
94 | name('x')
95 | ','
96 | name('y')
97 | ')'
98 | 'newline'
99 | 'DEDENT'
100 | 'DEDENT'
101 | None
102 |
--------------------------------------------------------------------------------
/external/plex/dist/tests/test9.py:
--------------------------------------------------------------------------------
1 | import Test
2 |
3 | import exceptions
4 | import sys
5 | from Plex import *
6 |
7 | if 1:
8 | debug = sys.stdout
9 | else:
10 | debug = None
11 |
12 | #########################################################################
13 |
14 | class NaughtyNaughty(exceptions.Exception):
15 | pass
16 |
17 | class MyScanner(Scanner):
18 | bracket_nesting_level = 0
19 | indentation_stack = None
20 | indentation_char = None
21 |
22 | def current_level(self):
23 | return self.indentation_stack[-1]
24 |
25 | def open_bracket_action(self, text):
26 | self.bracket_nesting_level = self.bracket_nesting_level + 1
27 | return text
28 |
29 | def close_bracket_action(self, text):
30 | self.bracket_nesting_level = self.bracket_nesting_level - 1
31 | return text
32 |
33 | def newline_action(self, text):
34 | if self.bracket_nesting_level == 0:
35 | self.begin('indent')
36 | self.produce('newline', '')
37 |
38 | def indentation_action(self, text):
39 | self.begin('')
40 | # Check that tabs and spaces are being used consistently.
41 | if text:
42 | c = text[0]
43 | if self.indentation_char is None:
44 | self.indentation_char = c
45 | else:
46 | if self.indentation_char <> c:
47 | raise NaughtyNaughty("Mixed up tabs and spaces!")
48 | # Figure out how many indents/dedents to do
49 | current_level = self.current_level()
50 | new_level = len(text)
51 | if new_level == current_level:
52 | return
53 | elif new_level > current_level:
54 | self.indentation_stack.append(new_level)
55 | self.produce('INDENT', '')
56 | else:
57 | while new_level < self.current_level():
58 | del self.indentation_stack[-1]
59 | self.produce('DEDENT', '')
60 | if new_level <> self.current_level():
61 | raise NaughtyNaughty("Indentation booboo!")
62 |
63 | def eof(self):
64 | while len(self.indentation_stack) > 1:
65 | self.produce('DEDENT', '')
66 | self.indentation_stack.pop()
67 |
68 | letter = Range("AZaz") | Any("_")
69 | digit = Range("09")
70 | hexdigit = Range("09AFaf")
71 | indentation = Rep(Str(" ")) | Rep(Str("\t"))
72 |
73 | name = letter + Rep(letter | digit)
74 | number = Rep1(digit) | (Str("0x") + Rep1(hexdigit))
75 | sq_string = (
76 | Str("'") +
77 | Rep(AnyBut("\\\n'") | (Str("\\") + AnyChar)) +
78 | Str("'"))
79 | dq_string = (
80 | Str('"') +
81 | Rep(AnyBut('\\\n"') | (Str("\\") + AnyChar)) +
82 | Str('"'))
83 | non_dq = AnyBut('"') | (Str('\\') + AnyChar)
84 | tq_string = (
85 | Str('"""') +
86 | Rep(
87 | non_dq |
88 | (Str('"') + non_dq) |
89 | (Str('""') + non_dq)) + Str('"""'))
90 | stringlit = sq_string | dq_string | tq_string
91 | bra = Any("([{")
92 | ket = Any(")]}")
93 | punct = Any(":,;+-*/|&<>=.%`~^")
94 | diphthong = Str("==", "<>", "!=", "<=", "<<", ">>", "**")
95 | spaces = Rep1(Any(" \t"))
96 | comment = Str("#") + Rep(AnyBut("\n"))
97 | escaped_newline = Str("\\\n")
98 | lineterm = Str("\n") | Eof
99 |
100 | lexicon = Lexicon([
101 | (name, 'name'),
102 | (number, 'number'),
103 | (stringlit, 'string'),
104 | (punct | diphthong, TEXT),
105 | (bra, open_bracket_action),
106 | (ket, close_bracket_action),
107 | (lineterm, newline_action),
108 | (comment, IGNORE),
109 | (spaces, IGNORE),
110 | (escaped_newline, IGNORE),
111 | State('indent', [
112 | (indentation + Opt(comment) + lineterm, IGNORE),
113 | (indentation, indentation_action),
114 | ]),
115 | ],
116 | debug = Test.debug,
117 | debug_flags = 7,
118 | timings = sys.stderr)
119 |
120 | def __init__(self, file):
121 | Scanner.__init__(self, self.lexicon, file)
122 | self.indentation_stack = [0]
123 | self.begin('indent')
124 |
125 | #########################################################################
126 |
127 | #s.machine.dump(sys.stdout)
128 | #print "=" * 70
129 |
130 | f = open("test9.in", "rU")
131 | ts = MyScanner(f)
132 | ts.trace = 0
133 | while 1:
134 | value, text = ts.read()
135 | level = len(ts.indentation_stack) - 1
136 | if level:
137 | print (4 * level - 1) * ' ',
138 | if text and text <> value:
139 | print "%s(%s)" % (value, repr(text))
140 | else:
141 | print repr(value)
142 | if value is None:
143 | break
144 |
145 |
146 |
147 |
148 |
--------------------------------------------------------------------------------
/external/plex/prepare.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | # Copyright (c) 2010-2016, MIT Probabilistic Computing Project.
4 | #
5 | # This file is part of Venture.
6 | #
7 | # Venture is free software: you can redistribute it and/or modify
8 | # it under the terms of the GNU General Public License as published by
9 | # the Free Software Foundation, either version 3 of the License, or
10 | # (at your option) any later version.
11 | #
12 | # Venture is distributed in the hope that it will be useful,
13 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 | # GNU General Public License for more details.
16 | #
17 | # You should have received a copy of the GNU General Public License
18 | # along with Venture. If not, see .
19 |
20 | # Prepare Plex 1.1.5 for import:
21 | # - Delete extraneous files.
22 | # - Translate carriage return to line feed.
23 | # - Make sure the tests still run.
24 |
25 | set -Ceu
26 |
27 | : ${PYTHON:=python}
28 |
29 | if [ ! -d Plex -o ! -f "$(printf 'Icon\r')" ]; then
30 | printf >&2 'Usage: %s\n' "$0"
31 | printf >&2 ' Run within the Plex distribution directory.\n'
32 | exit 1
33 | fi
34 |
35 | # Remove Mac OS X junk.
36 | find . -type f \( -name .DS_Store -o -name '._*' \) -exec rm -f '{}' ';'
37 |
38 | # Remove empty file with CR in its name.
39 | rm -f -- "$(printf 'Icon\r')"
40 |
41 | # Remove Mac OS Classic(???) junk.
42 | rm -f -- tests/PythonInterpreter
43 |
44 | # Convert CR to LF. All remaining files should be plain text.
45 | find . -type f -exec sh -c '
46 | tr "\\r" "\\n" < "$1" > "$1".tmp && mv -f "$1".tmp "$1"
47 | ' -- '{}' ';'
48 |
49 | # Make sure the tests still run. Avoid generating .pyc and .pyo files
50 | # by passing -B to Python.
51 | PYTHONPATH="`pwd`" \
52 | sh -c 'cd tests && exec "$1" -B runtests.py' -- "$PYTHON"
53 |
--------------------------------------------------------------------------------
/external/weakprng/COPYING:
--------------------------------------------------------------------------------
1 | Copyright (c) 2015 Taylor R. Campbell
2 | All rights reserved.
3 |
4 | Redistribution and use in source and binary forms, with or without
5 | modification, are permitted provided that the following conditions
6 | are met:
7 | 1. Redistributions of source code must retain the above copyright
8 | notice, this list of conditions and the following disclaimer.
9 | 2. Redistributions in binary form must reproduce the above copyright
10 | notice, this list of conditions and the following disclaimer in the
11 | documentation and/or other materials provided with the distribution.
12 |
13 | THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16 | ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19 | OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20 | HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21 | LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22 | OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23 | SUCH DAMAGE.
24 |
--------------------------------------------------------------------------------
/external/weakprng/README:
--------------------------------------------------------------------------------
1 | weakprng - cryptographic pseudorandom number generator based on ChaCha
2 |
3 | http://mumble.net/~campbell/python/chacha.py
4 | http://mumble.net/~campbell/python/weakprng.py
5 |
--------------------------------------------------------------------------------
/external/weakprng/dist/__init__.py:
--------------------------------------------------------------------------------
1 | from weakprng import *
2 |
--------------------------------------------------------------------------------
/external/weakprng/prepare.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | set -Ceu
4 |
5 | cat > __init__.py <`: source a file of commands
9 | + Ex: `$ bayeslite mydatabase.bdb -f hooks/myhooks.bql`
10 |
11 |
12 | ## Dot command reference
13 | By default, the bayeslite shell will interpret commands as bql. Commands that
14 | lead with a dot (*dot commands*; e.g., `.sql pragma table_info(mytable)`
15 | perform special functionality.
16 |
17 | ### `.help`
18 | The only command you'll need.
19 |
20 | bayeslite> .help
21 |
22 | ## Example
23 |
24 | ```
25 | $ bayeslite my_database.bdb
26 | Welcome to the bayeslite shell.
27 | Type `.help' for help.
28 | bayeslite> .csv mytable from myfile.csv
29 | bayeslite> .guess mytable_cc mytable.csv
30 | bayeslite> INITIALIZE 10 MODELS FOR mytable_cc;
31 | bayeslite> ANALYZE mytable_cc FOR 100 ITERATIONS;
32 | bayeslite> .hook contrib.py
33 | added command ".zmatrix"
34 | added command ".pairplot"
35 | added command ".ccstate"
36 | bayeslite> .zmatrix ESTIMATE DEPENDENCE PROBABILITY FROM PAIRWISE COLUMNS OF mytable_cc -f zmat.png
37 | ```
38 |
39 | ## Adding your own commands with `.hook`
40 |
41 | Simply define a python function that takes a `self` argument and an `args`
42 | argument.
43 |
44 | `args` is the string of text that follows the dot command. For
45 | example, in
46 |
47 | bayeslite> .myfunc -v -n Tommy
48 |
49 | `args` would be the string `'-v -n Tommy'`.
50 |
51 | `self` is the shell object. The
52 | `self` variable then gives you access to the bayesdb object (via `self._bdb`)
53 | and the `hookvars` attribute. `self.hookvars` is a dictionary you can use to
54 | communicate between shell commands.
55 |
56 | ### Example
57 |
58 | ```python
59 | # my_contrib.py
60 | from bayeslite.shell import pretty
61 | from bayeslite.shell.hook import bayesdb_shell_cmd
62 |
63 |
64 | @bayesdb_shell_cmd("hello")
65 | def say_hello_to_name(self, args):
66 | """ Says hello
67 |
68 | """
69 | self.stdout.write("Hello, %s.\n" % (args,))
70 | self.hookvars['hello_name'] = args
71 |
72 |
73 | @bayesdb_shell_cmd("byebye", autorehook=True)
74 | def say_bye_to_name(self, args):
75 | name = self.hookvars.get('hello_name', 'friend')
76 | self.stdout.write("Bye-bye, {}.\n".format(name))
77 |
78 |
79 | # Alias a long query you use a lot
80 | @bayesdb_shell_cmd("mycmd", autorehook=True)
81 | def get_cust_order_data_name(self, args):
82 | '''Get order id, order date, and cutomer name, by customer name
83 |
84 |
85 | Example:
86 | bayeslite> .mycmd John Keats
87 | '''
88 | query = '''
89 | SELECT Orders.OrderID, Orders.OrderDate, Customers.CustomerName
90 | FROM Customers, Orders
91 | WHERE Customers.CustomerName = ?
92 | AND Customers.CustomerID = Orders.CustomerID;
93 | '''
94 | cursor = self._bdb.execute(query, (args,))
95 | pretty.pp_cursor(self.stdout, cursor)
96 |
97 | ```
98 |
99 | From the shell, access your command with `.hook`
100 | ```
101 | bayeslite> .hook my_contrib.py
102 | added command ".hello"
103 | added command ".byebye"
104 | added command ".mycmd"
105 | bayeslite> .help hello
106 | .hello
107 | (END)
108 |
109 | bayeslite> .help byebye
110 | .byebye ...(END)
111 |
112 | bayeslite> .byebye
113 | Bye-bye, friend.
114 | bayeslite> .hello Nathan
115 | Hello, Nathan.
116 | bayeslite> .byebye
117 | Bye-bye, Nathan.
118 | ```
119 |
120 | You are free to `.hook` a file multiple times. Re-hooking a file will reload the contents of the file. This can be especially useful for development. If you try to re-hook a file, you must confirm that you want to re-hook the file and confirm that you want to re-hook each function in that file for which `autorehook=False`.
121 |
122 | ## The `.bayesliterc`
123 | Manually hooking the utilities you frequently use every time you open the shell is annoying. To address this, the Bayeslite shell looks for a `.bayesliterc` file in your home directory, which it runs on startup. Any file or path names in `.bayesliterc` should be absolute (this is subject to change, to allow paths relative to the rc file). Local, project-specific init files can be used using the `-f` option.
124 |
125 | For example, we may have a small set of utilities in our `~/.bayesliterc`:
126 |
127 | ```
128 | -- contents of ~/.bayesliterc
129 | .hook /User/bax/my_bayesdb_utils/plotting.py
130 | .hook /User/bax/my_bayesdb_utils/cleaning.py
131 | ```
132 |
133 | You can prevent the shell from loading `~/.bayesliterc` with the `--no-init-file` argument.
134 |
--------------------------------------------------------------------------------
/shell/src/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # Copyright (c) 2010-2016, MIT Probabilistic Computing Project
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 |
--------------------------------------------------------------------------------
/shell/src/hook.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # Copyright (c) 2010-2016, MIT Probabilistic Computing Project
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 | import contextlib
18 | import sys
19 | import threading
20 | import traceback
21 |
22 | the_current_shell = threading.local()
23 | the_current_shell.value = None
24 |
25 |
26 | @contextlib.contextmanager
27 | def set_current_shell(shell):
28 | outer = the_current_shell.value
29 | the_current_shell.value = shell
30 | try:
31 | yield
32 | finally:
33 | the_current_shell.value = outer
34 |
35 |
36 | def current_shell():
37 | assert the_current_shell.value is not None, 'No current shell!'
38 | return the_current_shell.value
39 |
40 |
41 | # make sure that the function that is hooked by the shell has the same
42 | # __doc__
43 | class bayesdb_shellhookexp(object):
44 | def __init__(self, func):
45 | self.func = func
46 | fdoc = func.__doc__
47 | if fdoc is None or len(fdoc.strip()) == 0:
48 | fdoc = 'NO DOCUMENTATION...\n...\n'
49 |
50 | if len(fdoc.split('\n')) == 1:
51 | fdoc += '\n...\n'
52 |
53 | self.__doc__ = fdoc
54 |
55 | def __call__(self, *args):
56 | try:
57 | return self.func(*args)
58 | except Exception as err:
59 | sys.stderr.write(traceback.format_exc())
60 | print err
61 |
62 |
63 | def bayesdb_shell_cmd(name, autorehook=False):
64 | def wrapper(func):
65 | # because the cmd loop doesn't handle errors and just kicks people out
66 | current_shell()._hook(name, bayesdb_shellhookexp(func),
67 | autorehook=autorehook)
68 | return wrapper
69 |
70 |
71 | def bayesdb_shell_init(func):
72 | func(current_shell())
73 | return func
74 |
--------------------------------------------------------------------------------
/shell/src/main.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # Copyright (c) 2010-2016, MIT Probabilistic Computing Project
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 | import argparse
18 | import os
19 |
20 | import bayeslite
21 | from bayeslite.backends.cgpm_backend import CGPM_Backend
22 | import bayeslite.shell.core as shell
23 | import bayeslite.shell.hook as hook
24 |
25 |
26 | def parse_args(argv):
27 | parser = argparse.ArgumentParser()
28 | parser.add_argument('bdbpath', type=str, nargs='?', default=None,
29 | help="bayesdb database file")
30 | parser.add_argument('-j', '--jobs', type=int, default=1,
31 | help="Max number of jobs (processes) useable.")
32 | parser.add_argument('-s', '--seed', type=int, default=None,
33 | help="Random seed for the default generator.")
34 | parser.add_argument('-f', '--file', type=str, nargs=1, default=None,
35 | help="Path to commands file. May be used to specify a "
36 | "project-specific init file.")
37 | parser.add_argument('-b', '--batch', action='store_true',
38 | help="Exit after executing file specified with -f.")
39 | parser.add_argument('-q', '--no-init-file', action='store_true',
40 | help="Do not load ~/.bayesliterc")
41 | parser.add_argument('-m', '--memory', action='store_true',
42 | help="Use temporary database not saved to disk")
43 |
44 | args = parser.parse_args(argv)
45 | return args
46 |
47 |
48 | def run(stdin, stdout, stderr, argv):
49 | args = parse_args(argv[1:])
50 | progname = argv[0]
51 | slash = progname.rfind('/')
52 | if slash:
53 | progname = progname[slash + 1:]
54 | if args.bdbpath is None and not args.memory:
55 | stderr.write('%s: pass filename or -m/--memory\n' % (progname,))
56 | return 1
57 | if args.bdbpath == '-':
58 | stderr.write('%s: missing option?\n' % (progname,))
59 | return 1
60 | bdb = bayeslite.bayesdb_open(pathname=args.bdbpath,
61 | builtin_backends=False)
62 |
63 | multiprocess = args.jobs != 1
64 | backend = CGPM_Backend(cgpm_registry={}, multiprocess=multiprocess)
65 | bayeslite.bayesdb_register_backend(bdb, backend)
66 | bdbshell = shell.Shell(bdb, 'cgpm', stdin, stdout, stderr)
67 | with hook.set_current_shell(bdbshell):
68 | if not args.no_init_file:
69 | init_file = os.path.join(os.path.expanduser('~/.bayesliterc'))
70 | if os.path.isfile(init_file):
71 | bdbshell.dot_read(init_file)
72 |
73 | if args.file is not None:
74 | for path in args.file:
75 | if os.path.isfile(path):
76 | bdbshell.dot_read(path)
77 | else:
78 | bdbshell.stdout.write('%s is not a file. Aborting.\n' %
79 | (str(path),))
80 | break
81 |
82 | if not args.batch:
83 | bdbshell.cmdloop()
84 | return 0
85 |
86 |
87 | def main():
88 | import sys
89 | sys.exit(run(sys.stdin, sys.stdout, sys.stderr, sys.argv))
90 |
91 | if __name__ == '__main__':
92 | main()
93 |
--------------------------------------------------------------------------------
/shell/src/pretty.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # Copyright (c) 2010-2016, MIT Probabilistic Computing Project
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 | def pp_cursor(out, cursor):
18 | if not cursor.description:
19 | return
20 | labels = [d[0] for d in cursor.description]
21 | table = cursor.fetchall()
22 | pp_list(out, table, labels)
23 |
24 | def pp_list(out, table, labels):
25 | assert 0 < len(labels)
26 | # XXX Consider quotation/escapes.
27 | colwidths = [len(label) for label in labels]
28 | for row in table:
29 | for colno, v in enumerate(row):
30 | # XXX Consider quotation/escapes.
31 | # XXX Combining characters?
32 | colwidths[colno] = max(colwidths[colno], len(unicode(v)))
33 | first = True
34 | for colno, label in enumerate(labels):
35 | if first:
36 | first = False
37 | else:
38 | out.write(' | ')
39 | # XXX Quote/escape.
40 | out.write('%*s' % (colwidths[colno], label))
41 | out.write('\n')
42 | first = True
43 | for colno, label in enumerate(labels):
44 | if first:
45 | first = False
46 | else:
47 | out.write('-+-')
48 | # XXX Quote/escape.
49 | out.write('%s' % ('-' * colwidths[colno]))
50 | out.write('\n')
51 | for row in table:
52 | first = True
53 | for colno, v in enumerate(row):
54 | if first:
55 | first = False
56 | else:
57 | out.write(' | ')
58 | # XXX Quote/escape.
59 | out.write('%*s' % (colwidths[colno], unicode(v)))
60 | out.write('\n')
61 |
--------------------------------------------------------------------------------
/shell/tests/test_pretty.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # Copyright (c) 2010-2016, MIT Probabilistic Computing Project
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 | import StringIO
18 | import pytest
19 |
20 | import bayeslite.shell.pretty as pretty
21 |
22 | def test_pretty():
23 | labels = ['name', 'age', 'favourite food']
24 | table = [
25 | ['Spot', 3, 'kibble'],
26 | ['Skruffles', 2, 'kibble'],
27 | ['Zorb', 2, 'zorblaxian kibble'],
28 | [u'Zörb', 87, u'zørblaχian ﻛبﻞ'],
29 | ]
30 | out = StringIO.StringIO()
31 | pretty.pp_list(out, table, labels)
32 | assert out.getvalue() == \
33 | u' name | age | favourite food\n' \
34 | u'----------+-----+------------------\n' \
35 | u' Spot | 3 | kibble\n' \
36 | u'Skruffles | 2 | kibble\n' \
37 | u' Zorb | 2 | zorblaxian kibble\n' \
38 | u' Zörb | 87 | zørblaχian ﻛبﻞ\n'
39 |
40 | def test_pretty_unicomb():
41 | pytest.xfail('pp_list counts code points, not grapheme clusters.')
42 | labels = ['name', 'age', 'favourite food']
43 | table = [
44 | ['Spot', 3, 'kibble'],
45 | ['Skruffles', 2, 'kibble'],
46 | ['Zorb', 2, 'zorblaxian kibble'],
47 | ['Zörb', 87, 'zørblaχian ﻛبﻞ'],
48 | [u'Zörb', 42, u'zörblǎxïǎn kïbble'],
49 | ['Zörb', 87, 'zørblaχian ﻛِبّﻞ'],
50 | ]
51 | out = StringIO.StringIO()
52 | pretty.pp_list(out, table, labels)
53 | assert out.getvalue() == \
54 | u' name | age | favourite food\n' \
55 | u'----------+-----+------------------\n' \
56 | u' Spot | 3 | kibble\n' \
57 | u'Skruffles | 2 | kibble\n' \
58 | u' Zorb | 2 | zorblaxian kibble\n' \
59 | u' Zörb | 42 | zörblǎxïǎn kïbble\n' \
60 | u' Zörb | 87 | zørblaxian ﻛِبّﻞ\n'
61 |
--------------------------------------------------------------------------------
/shell/tests/thooks.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # Copyright (c) 2010-2016, MIT Probabilistic Computing Project
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 | from bayeslite.shell.hook import bayesdb_shell_cmd
18 |
19 |
20 | @bayesdb_shell_cmd("myhook")
21 | def john_is_a_classy_name(self, args):
22 | '''myhook help string
23 |
24 | '''
25 | self.stdout.write('john ' + args + '\n')
26 |
--------------------------------------------------------------------------------
/src/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # Copyright (c) 2010-2016, MIT Probabilistic Computing Project
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 | """Main bayeslite API.
18 |
19 | The focus of the bayeslite API is the *BayesDB*, a handle for a
20 | database. To obtain a BayesDB handle, use :func:`bayesdb_open`::
21 |
22 | import bayeslite
23 |
24 | bdb = bayeslite.bayesdb_open(pathname='foo.bdb')
25 |
26 | When done, close it with the :meth:`~BayesDB.close` method::
27 |
28 | bdb.close()
29 |
30 | BayesDB handles also serve as context managers, so you can do::
31 |
32 | with bayeslite.bayesdb_open(pathname='foo.bdb') as bdb:
33 | bdb.execute('SELECT 42')
34 | ...
35 |
36 | You can query the probable (according to the models stored in
37 | the database) implications of the data by passing BQL queries
38 | to the :meth:`~BayesDB.execute` method::
39 |
40 | bql = 'ESTIMATE DEPENDENCE PROBABILITY FROM PAIRWISE COLUMNS OF foo'
41 | for x in bdb.execute(bql):
42 | print x
43 |
44 | You can also execute normal SQL on a BayesDB handle `bdb` with the
45 | :meth:`~BayesDB.sql_execute` method::
46 |
47 | bdb.sql_execute('CREATE TABLE t(x INT, y TEXT, z REAL)')
48 | bdb.sql_execute("INSERT INTO t VALUES(1, 'xyz', 42.5)")
49 | bdb.sql_execute("INSERT INTO t VALUES(1, 'pqr', 83.7)")
50 | bdb.sql_execute("INSERT INTO t VALUES(2, 'xyz', 1000)")
51 |
52 | (BQL does not yet support ``CREATE TABLE`` and ``INSERT`` directly, so
53 | you must use :meth:`~BayesDB.sql_execute` for those.)
54 | """
55 |
56 | from bayeslite.bayesdb import BayesDB
57 | from bayeslite.bayesdb import bayesdb_open
58 | from bayeslite.bayesdb import IBayesDBTracer
59 | from bayeslite.exception import BayesDBException
60 | from bayeslite.exception import BQLError
61 | from bayeslite.backend import BayesDB_Backend
62 | from bayeslite.backend import bayesdb_builtin_backend
63 | from bayeslite.backend import bayesdb_deregister_backend
64 | from bayeslite.backend import bayesdb_register_backend
65 | from bayeslite.nullify import bayesdb_nullify
66 | from bayeslite.parse import BQLParseError
67 | from bayeslite.quote import bql_quote_name
68 | from bayeslite.read_csv import bayesdb_read_csv
69 | from bayeslite.read_csv import bayesdb_read_csv_file
70 | from bayeslite.schema import bayesdb_upgrade_schema
71 | from bayeslite.txn import BayesDBTxnError
72 | from bayeslite.version import __version__
73 |
74 | # XXX This is not a good place for me. Find me a better home, please!
75 |
76 | __all__ = [
77 | 'BQLError',
78 | 'BQLParseError',
79 | 'BayesDB',
80 | 'BayesDBException',
81 | 'BayesDBTxnError',
82 | 'bayesdb_deregister_backend',
83 | 'bayesdb_nullify',
84 | 'bayesdb_open',
85 | 'bayesdb_read_csv',
86 | 'bayesdb_read_csv_file',
87 | 'bayesdb_register_backend',
88 | 'bayesdb_upgrade_schema',
89 | 'bql_quote_name',
90 | 'BayesDB_Backend',
91 | 'IBayesDBTracer',
92 | ]
93 |
94 | # Register cgpm as a builtin backend.
95 | from bayeslite.backends.cgpm_backend import CGPM_Backend
96 | bayesdb_builtin_backend(CGPM_Backend({}, multiprocess=True))
97 |
--------------------------------------------------------------------------------
/src/backends/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # Copyright (c) 2010-2016, MIT Probabilistic Computing Project
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 |
--------------------------------------------------------------------------------
/src/backends/cgpm_alter/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # Copyright (c) 2010-2016, MIT Probabilistic Computing Project
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 |
--------------------------------------------------------------------------------
/src/backends/cgpm_alter/alterations.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # Copyright (c) 2010-2016, MIT Probabilistic Computing Project
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 |
18 | from bayeslite.backends.cgpm_alter import parse
19 |
20 | from cgpm.mixtures.view import View
21 |
22 |
23 | def make_set_var_dependency(dependency):
24 | def func_dep(state):
25 | f = make_set_var_cluster(state.outputs[1:], state.outputs[0])
26 | state = f(state)
27 | return state
28 | def func_indep(state):
29 | for output in state.outputs:
30 | f = make_set_var_cluster([output], parse.SingletonCluster)
31 | state = f(state)
32 | return state
33 | if dependency == parse.EnsureDependent:
34 | return func_dep
35 | elif dependency == parse.EnsureIndependent:
36 | return func_indep
37 | raise ValueError('Unknown dependency: %s' % (dependency,))
38 |
39 | def make_set_var_cluster(columns0, column1):
40 | def func_existing(state):
41 | for col0 in exclude(columns0, column1):
42 | d_col0 = state.dim_for(col0)
43 | v_col0 = state.Zv(col0)
44 | v_col1 = state.Zv(column1)
45 | state._migrate_dim(v_col0, v_col1, d_col0)
46 | return state
47 | def func_singleton(state):
48 | new_view_index = max(state.views) + 1
49 | new_view = View(
50 | state.X,
51 | outputs=[state.crp_id_view + new_view_index],
52 | rng=state.rng
53 | )
54 | state._append_view(new_view, new_view_index)
55 | for col0 in columns0:
56 | d_col0 = state.dim_for(col0)
57 | v_col0 = state.Zv(col0)
58 | state._migrate_dim(v_col0, new_view_index, d_col0)
59 | return state
60 | if column1 == parse.SingletonCluster:
61 | return func_singleton
62 | else:
63 | return func_existing
64 |
65 | def make_set_var_cluster_conc(concentration):
66 | def func(state):
67 | # XXX No abstraction.
68 | state.crp.hypers['alpha'] = 1./concentration
69 | return state
70 | return func
71 |
72 | def make_set_row_cluster(rows0, row1, column):
73 | def func_existing(state):
74 | view = state.view_for(column)
75 | k_row1 = view.Zr(row1)
76 | for row0 in exclude(rows0, row1):
77 | view._migrate_row(row0, k_row1)
78 | return state
79 | def func_singleton(state):
80 | view = state.view_for(column)
81 | k_singleton = view.Zr(row1)
82 | for row0 in rows0:
83 | view._migrate_row(row0, k_singleton)
84 | return state
85 | if row1 == parse.SingletonCluster:
86 | return func_singleton
87 | else:
88 | return func_existing
89 |
90 | def make_set_row_cluster_conc(column, concentration):
91 | def func(state):
92 | view = state.view_for(column)
93 | view.crp.hypers['alpha'] = 1./concentration
94 | return state
95 | return func
96 |
97 | def exclude(iterable, ignore):
98 | for item in iterable:
99 | if item != ignore:
100 | yield item
101 |
--------------------------------------------------------------------------------
/src/backends/cgpm_alter/grammar.y:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2010-2016, MIT Probabilistic Computing Project
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | /*
18 | * Terminal conventions:
19 | * - T_ means a punctuation token.
20 | * - K_ means a keyword.
21 | * - L_ means a lexeme, which has useful associated text, e.g. an integer.
22 | */
23 |
24 |
25 | alter(start) ::= phrases(ps).
26 |
27 | phrases(one) ::= phrase(p).
28 | phrases(many) ::= phrases(ps) T_COMMA phrase(p).
29 |
30 | phrase(none) ::= .
31 |
32 | phrase(set_var_dependency) ::= K_ENSURE variable_token_opt columns(cols)
33 | dependency(dep).
34 |
35 | phrase(set_var_cluster) ::= K_ENSURE variable_token_opt
36 | columns(cols0) K_IN view_token
37 | K_OF column_name(col1).
38 |
39 | phrase(set_var_cluster_singleton) ::= K_ENSURE variable_token_opt
40 | columns(cols)
41 | K_IN K_SINGLETON view_token.
42 |
43 | phrase(set_var_cluster_conc) ::= K_SET view_token
44 | K_CONCENTRATION K_PARAMETER
45 | K_TO concentration(conc).
46 |
47 | phrase(set_row_cluster) ::= K_ENSURE K_ROW|K_ROWS rows(rows0)
48 | K_IN K_CLUSTER K_OF K_ROW
49 | row_index(row1)
50 | K_WITHIN view_token
51 | K_OF column_name(col).
52 |
53 | phrase(set_row_cluster_singleton) ::= K_ENSURE K_ROW|K_ROWS rows(rows0)
54 | K_IN K_SINGLETON K_CLUSTER
55 | K_WITHIN view_token
56 | K_OF column_name(col).
57 |
58 | phrase(set_row_cluster_conc) ::= K_SET K_ROW K_CLUSTER
59 | K_CONCENTRATION K_PARAMETER
60 | K_WITHIN view_token
61 | K_OF column_name(col)
62 | K_TO concentration(conc).
63 |
64 | variable_token_opt ::= .
65 | variable_token_opt ::= K_VARIABLE.
66 | variable_token_opt ::= K_VARIABLES.
67 |
68 | view_token ::= K_VIEW.
69 | view_token ::= K_CONTEXT.
70 |
71 | dependency(independent) ::= K_INDEPENDENT.
72 | dependency(dependent) ::= K_DEPENDENT.
73 |
74 | columns(one) ::= column_name(col).
75 | columns(all) ::= T_STAR.
76 | columns(many) ::= T_LROUND column_list(cols) T_RROUND.
77 |
78 | column_list(one) ::= column_name(col).
79 | column_list(many) ::= column_list(cols) T_COMMA column_name(col).
80 |
81 | column_name(n) ::= L_NAME(n).
82 |
83 | rows(one) ::= row_index(row).
84 | rows(all) ::= T_STAR.
85 | rows(many) ::= T_LROUND row_list(rows) T_RROUND.
86 |
87 | row_list(one) ::= row_index(row).
88 | row_list(many) ::= row_list(rows) T_COMMA row_index(row).
89 |
90 | row_index(n) ::= L_NUMBER(n).
91 |
92 | concentration(c) ::= L_NUMBER(n).
93 |
--------------------------------------------------------------------------------
/src/backends/cgpm_analyze/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # Copyright (c) 2010-2016, MIT Probabilistic Computing Project
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 |
--------------------------------------------------------------------------------
/src/backends/cgpm_analyze/grammar.y:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2010-2016, MIT Probabilistic Computing Project
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | /*
18 | * Terminal conventions:
19 | * - T_ means a punctuation token.
20 | * - K_ means a keyword.
21 | * - L_ means a lexeme, which has useful associated text, e.g. an integer.
22 | */
23 |
24 |
25 | analysis(start) ::= phrases(ps).
26 |
27 | phrases(one) ::= phrase(p).
28 | phrases(many) ::= phrases(ps) T_SEMI phrase(p).
29 |
30 | phrase(none) ::= .
31 |
32 | phrase(variables) ::= K_VARIABLES column_list(cols).
33 | phrase(skip) ::= K_SKIP column_list(cols).
34 |
35 | phrase(rows) ::= K_ROWS row_list(rows).
36 |
37 | phrase(loom) ::= K_LOOM.
38 | phrase(optimized) ::= K_OPTIMIZED.
39 |
40 | phrase(quiet) ::= K_QUIET.
41 |
42 | phrase(subproblems) ::= K_SUBPROBLEM|K_SUBPROBLEMS subproblems_list(s).
43 |
44 | subproblems_list(one) ::= subproblem(s).
45 | subproblems_list(many) ::= T_LROUND subproblems(s) T_RROUND.
46 |
47 | subproblems(one) ::= subproblem(s).
48 | subproblems(many) ::= subproblems(ss) T_COMMA subproblem(s).
49 |
50 | subproblem(variable_hyperparameters) ::= K_VARIABLE K_HYPERPARAMETERS.
51 |
52 | subproblem(variable_clustering) ::= K_VARIABLE K_CLUSTERING.
53 | subproblem(variable_clustering_concentration) ::= K_VARIABLE K_CLUSTERING
54 | K_CONCENTRATION.
55 |
56 | subproblem(row_clustering) ::= K_ROW K_CLUSTERING.
57 | subproblem(row_clustering_concentration) ::= K_ROW K_CLUSTERING
58 | K_CONCENTRATION.
59 |
60 | column_list(one) ::= column_name(col).
61 | column_list(many) ::= column_list(cols) T_COMMA column_name(col).
62 |
63 | column_name(n) ::= L_NAME(name).
64 |
65 | row_list(one) ::= row_index(row).
66 | row_list(many) ::= row_list(rows) T_COMMA row_index(row).
67 |
68 | row_index(n) ::= L_NUMBER(n).
69 |
--------------------------------------------------------------------------------
/src/backends/cgpm_schema/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # Copyright (c) 2010-2016, MIT Probabilistic Computing Project
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 |
--------------------------------------------------------------------------------
/src/backends/cgpm_schema/grammar.y:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2010-2016, MIT Probabilistic Computing Project
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | /*
18 | * Terminal conventions:
19 | * - T_ means a punctuation token.
20 | * - K_ means a keyword.
21 | * - L_ means a lexeme, which has useful associated text, e.g. an integer.
22 | */
23 |
24 | cgpm(empty) ::= .
25 | cgpm(schema) ::= schema(s).
26 |
27 | schema(one) ::= clause(c).
28 | schema(some) ::= schema(s) T_SEMI|T_COMMA clause_opt(c).
29 |
30 | clause_opt(none) ::= .
31 | clause_opt(some) ::= clause(c).
32 |
33 | clause(basic) ::=
34 | K_SET K_CATEGORY K_MODEL K_FOR
35 | var(var) K_USING|K_TO dist(dist) param_opt(params).
36 | clause(foreign) ::=
37 | K_OVERRIDE generative_opt K_MODEL K_FOR vars(outputs)
38 | given_opt(inputs)
39 | exposing_opt(exposed)
40 | K_USING foreign(name) param_opt(params).
41 | clause(subsamp) ::= K_SUBSAMPLE L_NUMBER(n).
42 | clause(latent) ::= K_LATENT var(var) stattype(st).
43 |
44 | dist(name) ::= L_NAME(dist).
45 | foreign(name) ::= L_NAME(foreign).
46 |
47 | generative_opt ::= .
48 | generative_opt ::= K_GENERATIVE.
49 |
50 | given_opt(none) ::= .
51 | given_opt(some) ::= K_GIVEN vars(vars).
52 |
53 | exposing_opt(none) ::= .
54 | exposing_opt(one) ::= and_opt K_EXPOSE exposed(exp).
55 |
56 | and_opt(none) ::= .
57 | and_opt(one) ::= K_AND.
58 |
59 | exposed(one) ::= var(v) stattype(s).
60 | exposed(many) ::= exposed(exp) T_COMMA var(v) stattype(s).
61 |
62 | vars(one) ::= var(var).
63 | vars(many) ::= vars(vars) T_COMMA var(var).
64 |
65 | var(name) ::= L_NAME(var).
66 |
67 | stattype(s) ::= L_NAME(st).
68 |
69 | param_opt(none) ::= .
70 | param_opt(some) ::= T_LROUND params(ps) T_RROUND.
71 | params(one) ::= param(param).
72 | params(many) ::= params(params) T_COMMA param(param).
73 |
74 | param(num) ::= L_NAME(p) T_EQ L_NUMBER(num).
75 | param(nam) ::= L_NAME(p) T_EQ L_NAME(nam).
76 |
--------------------------------------------------------------------------------
/src/backends/iid_gaussian.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # Copyright (c) 2010-2016, MIT Probabilistic Computing Project
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 | """The IID Gaussian Model posits that all data are independently Gaussian.
18 |
19 | This is an example of the simplest possible population model that's
20 | actually stochastic. The Gaussian has mean 0 and standard deviation
21 | 1.
22 |
23 | This module implements the :class:`bayeslite.BayesDB_Backend`
24 | interface for the IID Gaussian Model.
25 |
26 | """
27 |
28 | import math
29 | import random
30 |
31 | import bayeslite.backend
32 |
33 | from bayeslite.exception import BQLError
34 |
35 | std_normal_schema_1 = '''
36 | INSERT INTO bayesdb_backend (name, version) VALUES ('std_normal', 1);
37 | '''
38 |
39 | class StdNormalBackend(bayeslite.backend.BayesDB_Backend):
40 | """IID Gaussian backend for BayesDB.
41 |
42 | The backend is named ``std_normal`` in BQL::
43 |
44 | CREATE GENERATOR t_sn FOR t USING std_normal(..)
45 | """
46 |
47 | def __init__(self, seed=0):
48 | self.prng = random.Random(seed)
49 | def name(self): return 'std_normal'
50 | def register(self, bdb):
51 | with bdb.savepoint():
52 | schema_sql = 'SELECT version FROM bayesdb_backend WHERE name = ?'
53 | cursor = bdb.sql_execute(schema_sql, (self.name(),))
54 | version = None
55 | try:
56 | row = cursor.next()
57 | except StopIteration:
58 | version = 0
59 | else:
60 | version = row[0]
61 | assert version is not None
62 | if version == 0:
63 | # XXX WHATTAKLUDGE!
64 | for stmt in std_normal_schema_1.split(';'):
65 | bdb.sql_execute(stmt)
66 | version = 1
67 | if version != 1:
68 | raise BQLError(bdb, 'IID-Gaussian already installed'
69 | ' with unknown schema version: %d' % (version,))
70 | def create_generator(self, bdb, generator_id, schema, **kwargs):
71 | pass
72 | def drop_generator(self, *args, **kwargs): pass
73 | def rename_column(self, *args, **kwargs): pass
74 | def initialize_models(self, *args, **kwargs): pass
75 | def drop_models(self, *args, **kwargs): pass
76 | def analyze_models(self, *args, **kwargs): pass
77 | def simulate_joint(self, _bdb, _generator_id, modelnos, rowid, targets,
78 | _constraints, num_samples=1, accuracy=None):
79 | return [[self.prng.gauss(0, 1) for _ in targets]
80 | for _ in range(num_samples)]
81 | def logpdf_joint(self, _bdb, _generator_id, modelnos, rowid, targets,
82 | _constraints):
83 | return sum(logpdf_gaussian(value, 0, 1) for (_, value) in targets)
84 | def infer(self, *args, **kwargs): pass
85 |
86 | HALF_LOG2PI = 0.5 * math.log(2 * math.pi)
87 |
88 | def logpdf_gaussian(x, mu, sigma):
89 | deviation = x - mu
90 | return - math.log(sigma) - HALF_LOG2PI \
91 | - (0.5 * deviation * deviation / (sigma * sigma))
92 |
--------------------------------------------------------------------------------
/src/backends/troll_rng.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # Copyright (c) 2010-2016, MIT Probabilistic Computing Project
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 | """The Troll Model posits that all data values are equal to 9.
18 |
19 | Reference: http://dilbert.com/strip/2001-10-25
20 |
21 | This is an example of the simplest possible population model.
22 |
23 | This module implements the :class:`bayeslite.BayesDB_Backend`
24 | interface for the Troll Model.
25 | """
26 |
27 | import bayeslite.backend
28 |
29 | class TrollBackend(bayeslite.backend.BayesDB_Backend):
30 | """Troll backend for BayesDB.
31 |
32 | The backend is named ``troll_rng`` in BQL::
33 |
34 | CREATE GENERATOR t_troll FOR t USING troll_rng(..)
35 | """
36 |
37 | def __init__(self): pass
38 | def name(self): return 'troll_rng'
39 | def register(self, bdb):
40 | bdb.sql_execute('''
41 | INSERT INTO bayesdb_backend (name, version)
42 | VALUES (?, 1)
43 | ''', (self.name(),))
44 | def create_generator(self, bdb, generator_id, schema, **kwargs):
45 | pass
46 | def drop_generator(self, *args, **kwargs): pass
47 | def rename_column(self, *args, **kwargs): pass
48 | def initialize_models(self, *args, **kwargs): pass
49 | def drop_models(self, *args, **kwargs): pass
50 | def analyze_models(self, *args, **kwargs): pass
51 | def simulate_joint(self, _bdb, _generator_id, _modelnos, rowid, targets,
52 | _constraints, num_samples=1):
53 | return [[9 for _ in targets]] * num_samples
54 | def logpdf_joint(self, _bdb, _generator_id, _modelnos, rowid, targets,
55 | constraints):
56 | for (_, value) in constraints:
57 | if not value == 9:
58 | return float("nan")
59 | for (_, value) in targets:
60 | if not value == 9:
61 | return float("-inf")
62 | # TODO This is only correct wrt counting measure. What's the
63 | # base measure of numericals?
64 | return 0
65 | def infer(self, *args, **kwargs): pass
66 |
--------------------------------------------------------------------------------
/src/bqlmath.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # Copyright (c) 2010-2016, MIT Probabilistic Computing Project
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 | import inspect
18 | import math
19 |
20 | bqlmath_funcs = {
21 | 'acos' : lambda x : math.acos(x),
22 | 'acosh' : lambda x : math.acosh(x),
23 | 'asin' : lambda x : math.asin(x),
24 | 'asinh' : lambda x : math.asinh(x),
25 | 'atan' : lambda x : math.atan(x),
26 | 'atan2' : lambda x : math.atan2(x),
27 | 'atanh' : lambda x : math.atanh(x),
28 | 'ceil' : lambda x : math.ceil(x),
29 | 'copysign' : lambda x, y : math.copysign(x, y),
30 | 'cos' : lambda x : math.cos(x),
31 | 'cosh' : lambda x : math.cosh(x),
32 | 'degrees' : lambda x : math.degrees(x),
33 | 'erf' : lambda x : math.erf(x),
34 | 'erfc' : lambda x : math.erfc(x),
35 | 'exp' : lambda x : math.exp(x),
36 | 'expm1' : lambda x : math.expm1(x),
37 | 'fabs' : lambda x : math.fabs(x),
38 | 'factorial' : lambda x : math.factorial(x),
39 | 'floor' : lambda x : math.floor(x),
40 | 'fmod' : lambda x, y : math.fmod(x,y),
41 | 'gamma' : lambda x : math.gamma(x),
42 | 'hypot' : lambda x, y : math.hypot(x,y),
43 | 'ldexp' : lambda x, i : math.ldexp(x,i),
44 | 'lgamma' : lambda x : math.lgamma(x),
45 | 'log' : lambda x : math.log(x),
46 | }
47 |
48 |
49 | def bayesdb_install_bqlmath(db, _cookie):
50 | for name, fn in bqlmath_funcs.iteritems():
51 | nargs = len(inspect.getargspec(fn).args)
52 | db.createscalarfunction(name, fn, nargs)
53 |
--------------------------------------------------------------------------------
/src/exception.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # Copyright (c) 2010-2016, MIT Probabilistic Computing Project
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 | import StringIO
18 |
19 | class BayesLiteException(Exception):
20 | """Parent exception for anything Bayeslite-specific."""
21 | pass
22 |
23 | class BayesDBException(BayesLiteException):
24 | """Exceptions associated with a BayesDB instance.
25 |
26 | :ivar bayeslite.BayesDB bayesdb: associated BayesDB instance
27 | """
28 | # XXX: Consider renaming to BayesDBError to match the two below.
29 | def __init__(self, bayesdb, *args, **kwargs):
30 | self.bayesdb = bayesdb
31 | super(BayesDBException, self).__init__(*args, **kwargs)
32 |
33 | class BQLError(BayesDBException):
34 | """Errors in interpreting or executing BQL on a particular database."""
35 | # XXX Consider separating the "no such foo" and "foo already exists" errors
36 | # that actually could be fine on another database, from the "foo is a
37 | # 1-row function" and "foo needs exactly two columns" type that are closer
38 | # to a BQLParseError. Unsure what the "ESTIMATE * FROM COLUMNS OF subquery"
39 | # use really means as an error: need to look more closely.
40 | pass
41 |
42 | class BQLParseError(BayesLiteException):
43 | """Errors in parsing BQL.
44 |
45 | As many parse errors as can be reasonably detected are listed
46 | together.
47 |
48 | :ivar list errors: list of strings describing parse errors
49 | """
50 |
51 | def __init__(self, errors):
52 | assert 0 < len(errors)
53 | self.errors = errors
54 |
55 | def __str__(self):
56 | if len(self.errors) == 1:
57 | return self.errors[0]
58 | else:
59 | out = StringIO.StringIO()
60 | for error in self.errors:
61 | out.write(' %s\n' % (error,))
62 | return out.getvalue()
63 |
--------------------------------------------------------------------------------
/src/nullify.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # Copyright (c) 2010-2017, MIT Probabilistic Computing Project
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 | from .sqlite3_util import sqlite3_quote_name
18 |
19 |
20 | def bayesdb_nullify(bdb, table, value, columns=None):
21 | qt = sqlite3_quote_name(table)
22 | if columns is None:
23 | cursor = bdb.sql_execute('PRAGMA table_info(%s)' % (qt,))
24 | columns = [row[1] for row in cursor]
25 | changes = bdb._sqlite3.totalchanges()
26 | for column in columns:
27 | qc = sqlite3_quote_name(column)
28 | bdb.sql_execute('UPDATE %s SET %s = NULL WHERE %s = ?' % (qt, qc, qc),
29 | (value,))
30 | return bdb._sqlite3.totalchanges() - changes
31 |
--------------------------------------------------------------------------------
/src/quote.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # Copyright (c) 2010-2017, MIT Probabilistic Computing Project
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 | from .sqlite3_util import sqlite3_quote_name
18 |
19 |
20 | def bql_quote_name(name):
21 | """Quote `name` as a BQL identifier, e.g. a table or column name.
22 |
23 | Do NOT use this for strings, e.g. inserting data into a table.
24 | Use query parameters instead.
25 | """
26 | return sqlite3_quote_name(name)
27 |
--------------------------------------------------------------------------------
/src/read_pandas.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # Copyright (c) 2010-2016, MIT Probabilistic Computing Project
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 | """Reading data from pandas dataframes."""
18 |
19 | import bayeslite.core as core
20 |
21 | from bayeslite.sqlite3_util import sqlite3_quote_name
22 |
23 | def bayesdb_read_pandas_df(bdb, table, df, create=False, ifnotexists=False,
24 | index=None):
25 | """Read data from a pandas dataframe into a table.
26 |
27 | :param bayeslite.BayesDB bdb: BayesDB instance
28 | :param str table: name of table
29 | :param pandas.DataFrame df: pandas dataframe
30 | :param bool create: if true and `table` does not exist, create it
31 | :param bool ifnotexists: if true, and `create` is true` and `table`
32 | exists, read data into it anyway
33 | :param str index: name of column for index
34 |
35 | If `index` is `None`, then the dataframe's index dtype must be
36 | convertible to int64, and it is mapped to the table's rowids. If
37 | the dataframe's index dtype is not convertible to int64, you must
38 | specify `index` to give a primary key for the table.
39 | """
40 | if not create:
41 | if ifnotexists:
42 | raise ValueError('Not creating table whether or not exists!')
43 | column_names = [str(column) for column in df.columns]
44 | if index is None:
45 | create_column_names = column_names
46 | insert_column_names = ['_rowid_'] + column_names
47 | try:
48 | key_index = df.index.astype('int64')
49 | except ValueError:
50 | raise ValueError('Must specify index name for non-integral index!')
51 | else:
52 | if index in df.columns:
53 | raise ValueError('Index name collides with column name: %r'
54 | % (index,))
55 | create_column_names = [index] + column_names
56 | insert_column_names = create_column_names
57 | key_index = df.index
58 | with bdb.savepoint():
59 | if core.bayesdb_has_table(bdb, table):
60 | if create and not ifnotexists:
61 | raise ValueError('Table already exists: %s' % (repr(table),))
62 | core.bayesdb_table_guarantee_columns(bdb, table)
63 | unknown = set(name for name in create_column_names
64 | if not core.bayesdb_table_has_column(bdb, table, name))
65 | if len(unknown) != 0:
66 | raise ValueError('Unknown columns: %s' % (list(unknown),))
67 | elif create:
68 | qccns = map(sqlite3_quote_name, create_column_names)
69 | def column_schema(column_name, qcn):
70 | if column_name == index:
71 | return '%s NUMERIC PRIMARY KEY' % (qcn,)
72 | else:
73 | return '%s NUMERIC' % (qcn,)
74 | schema = ','.join(column_schema(ccn, qccn)
75 | for ccn, qccn in zip(create_column_names, qccns))
76 | qt = sqlite3_quote_name(table)
77 | bdb.sql_execute('CREATE TABLE %s(%s)' % (qt, schema))
78 | core.bayesdb_table_guarantee_columns(bdb, table)
79 | else:
80 | raise ValueError('No such table: %s' % (repr(table),))
81 | qt = sqlite3_quote_name(table)
82 | qicns = map(sqlite3_quote_name, insert_column_names)
83 | sql = 'INSERT INTO %s (%s) VALUES (%s)' % \
84 | (qt, ','.join(qicns), ','.join('?' for _qicn in qicns))
85 | for key, i in zip(key_index, df.index):
86 | bdb.sql_execute(sql, (key,) + tuple(df.ix[i]))
87 |
--------------------------------------------------------------------------------
/src/regress.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # Copyright (c) 2010-2017, MIT Probabilistic Computing Project
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 | import pandas as pd
18 |
19 | from sklearn.linear_model import LinearRegression
20 |
21 | def regress_ols(target_values, given_values, given_variables, stattypes):
22 | X = pd.DataFrame(given_values, columns=given_variables)
23 | # Detect the nominal variables.
24 | nominal_variables = [
25 | variable for variable, stattype in zip(given_variables, stattypes)
26 | if stattype == 'nominal'
27 | ]
28 | # Dummy code the nominal variables.
29 | prefix = {var: '%s_dum' % (var,) for var in nominal_variables}
30 | X_coded = pd.get_dummies(X, columns=nominal_variables, prefix=prefix)
31 | # Find nominal columns to drop, and drop them (for correct dummy coding, K
32 | # categories are encoded using K-1 vector).
33 | drop = [
34 | filter(lambda c: c.startswith('%s_dum' % (var,)), X_coded.columns)[0]
35 | for var in nominal_variables
36 | ]
37 | X_coded.drop(drop, inplace=True, axis=1)
38 | # Check if only 1 column with 1 unique values.
39 | if len(X_coded.columns) == 0 or len(X_coded) == 0:
40 | raise ValueError('Not enough data for regression')
41 | # Fit the regression.
42 | linreg = LinearRegression()
43 | linreg.fit(X_coded, target_values)
44 | # Build and return variables and their coefficients.
45 | intercept = [('intercept', linreg.intercept_)]
46 | variables_regressed = zip(X_coded.columns, linreg.coef_)
47 | variables_dropped = zip(drop, [0]*len(drop))
48 | return intercept + variables_regressed + variables_dropped
49 |
--------------------------------------------------------------------------------
/src/txn.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # Copyright (c) 2010-2016, MIT Probabilistic Computing Project
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 | import contextlib
18 |
19 | from bayeslite.exception import BayesDBException
20 | from bayeslite.sqlite3_util import sqlite3_savepoint
21 | from bayeslite.sqlite3_util import sqlite3_savepoint_rollback
22 | from bayeslite.sqlite3_util import sqlite3_transaction
23 |
24 | # XXX Can't do this simultaneously in multiple threads. Need
25 | # lightweight per-thread state.
26 |
27 | @contextlib.contextmanager
28 | def bayesdb_caching(bdb):
29 | bayesdb_txn_push(bdb)
30 | try:
31 | yield
32 | finally:
33 | bayesdb_txn_pop(bdb)
34 |
35 | @contextlib.contextmanager
36 | def bayesdb_savepoint(bdb):
37 | bayesdb_txn_push(bdb)
38 | try:
39 | with sqlite3_savepoint(bdb._sqlite3):
40 | yield
41 | finally:
42 | bayesdb_txn_pop(bdb)
43 |
44 | @contextlib.contextmanager
45 | def bayesdb_savepoint_rollback(bdb):
46 | bayesdb_txn_push(bdb)
47 | try:
48 | with sqlite3_savepoint_rollback(bdb._sqlite3):
49 | yield
50 | finally:
51 | bayesdb_txn_pop(bdb)
52 |
53 | @contextlib.contextmanager
54 | def bayesdb_transaction(bdb):
55 | if bdb._txn_depth != 0:
56 | raise BayesDBTxnError(bdb, 'Already in a transaction!')
57 | bayesdb_txn_init(bdb)
58 | bdb._txn_depth = 1
59 | try:
60 | with sqlite3_transaction(bdb._sqlite3):
61 | yield
62 | finally:
63 | assert bdb._txn_depth == 1
64 | bdb._txn_depth = 0
65 | bayesdb_txn_fini(bdb)
66 |
67 | def bayesdb_begin_transaction(bdb):
68 | if bdb._txn_depth != 0:
69 | raise BayesDBTxnError(bdb, 'Already in a transaction!')
70 | bayesdb_txn_init(bdb)
71 | bdb._txn_depth = 1
72 | bdb.sql_execute("BEGIN")
73 |
74 | def bayesdb_rollback_transaction(bdb):
75 | if bdb._txn_depth == 0:
76 | raise BayesDBTxnError(bdb, 'Not in a transaction!')
77 | bdb.sql_execute("ROLLBACK")
78 | bdb._txn_depth = 0
79 | bayesdb_txn_fini(bdb)
80 |
81 | def bayesdb_commit_transaction(bdb):
82 | if bdb._txn_depth == 0:
83 | raise BayesDBTxnError(bdb, 'Not in a transaction!')
84 | bdb.sql_execute("COMMIT")
85 | bdb._txn_depth = 0
86 | bayesdb_txn_fini(bdb)
87 |
88 | # XXX Maintaining a stack of savepoints in BQL is a little more
89 | # trouble than it is worth at the moment, since users can rollback to
90 | # or release any savepoint in the stack, not just the most recent one.
91 | # (For the bdb.savepoint() context manager that is not an issue.)
92 | # We'll implement that later.
93 |
94 | def bayesdb_txn_push(bdb):
95 | if bdb._txn_depth == 0:
96 | bayesdb_txn_init(bdb)
97 | else:
98 | assert bdb._cache is not None
99 | bdb._txn_depth += 1
100 |
101 | def bayesdb_txn_pop(bdb):
102 | bdb._txn_depth -= 1
103 | if bdb._txn_depth == 0:
104 | bayesdb_txn_fini(bdb)
105 | else:
106 | assert bdb._cache is not None
107 |
108 | def bayesdb_txn_init(bdb):
109 | assert bdb._txn_depth == 0
110 | assert bdb._cache is None
111 | bdb._cache = {}
112 |
113 | def bayesdb_txn_fini(bdb):
114 | assert bdb._txn_depth == 0
115 | assert bdb._cache is not None
116 | bdb._cache = None
117 |
118 | class BayesDBTxnError(BayesDBException):
119 | """Transaction errors in a BayesDB."""
120 |
121 | pass
122 |
--------------------------------------------------------------------------------
/src/util.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # Copyright (c) 2010-2016, MIT Probabilistic Computing Project
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 | """Miscellaneous utilities."""
18 |
19 | import json
20 | import math
21 |
22 | def unique(array):
23 | """Return a sorted array of the unique elements in `array`.
24 |
25 | No element may be a floating-point NaN. If your data set includes
26 | NaNs, omit them before passing them here.
27 | """
28 | for x in array:
29 | assert not (isinstance(x, float) and math.isnan(x))
30 | if len(array) < 2:
31 | return array
32 | array_sorted = sorted(array)
33 | array_unique = [array_sorted[0]]
34 | for x in array_sorted[1:]:
35 | assert array_unique[-1] <= x
36 | if array_unique[-1] != x:
37 | array_unique.append(x)
38 | return array_unique
39 |
40 | def unique_indices(array):
41 | """Return an array of the indices of the unique elements in `array`.
42 |
43 | No element may be a floating-point NaN. If your data set includes
44 | NaNs, omit them before passing them here.
45 | """
46 | for x in array:
47 | assert not (isinstance(x, float) and math.isnan(x))
48 | if len(array) == 0:
49 | return []
50 | if len(array) == 1:
51 | return [0]
52 | array_sorted = sorted((x, i) for i, x in enumerate(array))
53 | array_unique = [array_sorted[0][1]]
54 | for x, i in array_sorted[1:]:
55 | assert array[array_unique[-1]] <= x
56 | if array[array_unique[-1]] != x:
57 | array_unique.append(i)
58 | return sorted(array_unique)
59 |
60 | def float_sum(iterable):
61 | """Return the sum of elements of `iterable` in floating-point.
62 |
63 | This implementation uses Kahan-Babuška summation.
64 | """
65 | s = 0.0
66 | c = 0.0
67 | for x in iterable:
68 | xf = float(x)
69 | s1 = s + xf
70 | if abs(x) < abs(s):
71 | c += ((s - s1) + xf)
72 | else:
73 | c += ((xf - s1) + s)
74 | s = s1
75 | return s + c
76 |
77 | def casefold(string):
78 | # XXX Not really right, but it'll do for now.
79 | return string.upper().lower()
80 |
81 | def cursor_row(cursor, nullok=None):
82 | if nullok is None:
83 | nullok = False
84 | try:
85 | row = cursor.next()
86 | except StopIteration:
87 | if nullok:
88 | return None
89 | raise ValueError('Empty cursor')
90 | else:
91 | try:
92 | cursor.next()
93 | except StopIteration:
94 | pass
95 | else:
96 | raise ValueError('Multiple-result cursor')
97 | return row
98 |
99 | def cursor_value(cursor, nullok=None):
100 | row = cursor_row(cursor, nullok)
101 | if row is None:
102 | assert nullok
103 | return None
104 | if len(row) != 1:
105 | raise ValueError('Non-unit cursor')
106 | return row[0]
107 |
108 | def json_dumps(obj):
109 | """Return a JSON string of obj, compactly and deterministically."""
110 | return json.dumps(obj, sort_keys=True)
111 |
112 | def override(interface):
113 | def wrap(method):
114 | assert method.__name__ in dir(interface)
115 | return method
116 | return wrap
117 |
--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/probcomp/bayeslite/211e5eb3821a464a2fffeb9d35e3097e1b7a99ba/tests/__init__.py
--------------------------------------------------------------------------------
/tests/kl.py:
--------------------------------------------------------------------------------
1 | """Kullback Leibler divergence estimates"""
2 |
3 | from collections import namedtuple
4 | from numpy import array, sqrt
5 |
6 | class KLEstimate(namedtuple('KLEstimate', ['estimate', 'se'])):
7 | """Container for return value from kullback_leibler.
8 |
9 | `estimate`: The estimated KL divergence, mean of the sampled integrand
10 | values.
11 |
12 | `se`: Estimated standard deviation of the samples from which the mean was
13 | calculated. In general the mean and variance of log(P(x)) is not known to
14 | be finite, but it will be for any distribution crosscat generates at the
15 | moment, because they all have finite entropy. Hence the Central Limit
16 | Theorem applies at some sample size, and this can in principle be used as a
17 | rough guide to the precision of the estimate. In tests comparing the
18 | univariate gaussians N(0,1) and N(0,2), it tended to have a visually
19 | obvious bias for sample sizes below 100,000.
20 |
21 | """
22 | pass
23 |
24 | def kullback_leibler(postsample, postlpdf, complpdf):
25 | """Estimate KL-divergence of sample (a collection of values) w.r.t. known pdf,
26 | `complpdf`, which returns the density when passed a sample. Return value is
27 | a `KLEstimate`. The attribute you probably care most about is
28 | `KLEstimate.estimate`. See `KLEstimate.__doc__` for more details. The
29 | `postsample` argument is an approximate sample from the distribution
30 | approximately represented by `postlpdf`.
31 |
32 | """
33 | klsamples = array([postlpdf(x) - complpdf(x) for x in postsample])
34 | std = klsamples.std() / sqrt(len(klsamples))
35 | return KLEstimate(estimate=klsamples.mean(), se=std)
36 |
--------------------------------------------------------------------------------
/tests/stochastic.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # Copyright (c) 2010-2016, MIT Probabilistic Computing Project
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 | import os
18 | import sys
19 |
20 | class StochasticError(Exception):
21 | def __init__(self, seed, exctype, excvalue):
22 | self.seed = seed
23 | self.exctype = exctype
24 | self.excvalue = excvalue
25 | def __str__(self):
26 | hexseed = self.seed.encode('hex')
27 | if hasattr(self.exctype, '__name__'):
28 | typename = self.exctype.__name__
29 | else:
30 | typename = repr(self.exctype)
31 | return '[seed %s]\n%s: %s' % (hexseed, typename, self.excvalue)
32 |
33 | def stochastic(max_runs, min_passes):
34 | assert 0 < max_runs
35 | assert min_passes <= max_runs
36 | def wrap(f):
37 | def f_(seed=None):
38 | if seed is not None:
39 | return f(seed)
40 | npasses = 0
41 | last_seed = None
42 | last_exc_info = None
43 | for i in xrange(max_runs):
44 | seed = os.urandom(32)
45 | try:
46 | value = f(seed)
47 | except:
48 | last_seed = seed
49 | last_exc_info = sys.exc_info()
50 | else:
51 | npasses += 1
52 | if min_passes <= npasses:
53 | return value
54 | t, v, tb = last_exc_info
55 | raise StochasticError, StochasticError(last_seed, t, v), tb
56 | return f_
57 | return wrap
58 |
--------------------------------------------------------------------------------
/tests/test_approxest.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # Copyright (c) 2010-2016, MIT Probabilistic Computing Project
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 | """Tests for approximate estimators."""
18 |
19 | import numpy as np
20 |
21 | from bayeslite import bayesdb_open
22 |
23 | from stochastic import stochastic
24 |
25 |
26 | @stochastic(max_runs=2, min_passes=1)
27 | def test_mutinf__ci_slow(seed):
28 | with bayesdb_open(':memory:', seed=seed) as bdb:
29 | npr = bdb.np_prng
30 | bdb.sql_execute('create table t(x, y, z)')
31 | D0_XY = npr.multivariate_normal([10,10], [[0,1],[2,0]], size=50)
32 | D1_XY = npr.multivariate_normal([0,0], [[0,-1],[2,0]], size=50)
33 | D_XY = np.concatenate([D0_XY, D1_XY])
34 | D_Z = npr.multivariate_normal([5], [[0.5]], size=100)
35 | D = np.hstack([D_XY, D_Z])
36 | for d in D:
37 | bdb.sql_execute('INSERT INTO t VALUES(?,?,?)', d)
38 | bdb.execute(
39 | 'create population p for t(x numerical; y numerical; z numerical)')
40 | bdb.execute('create generator m for p')
41 | bdb.execute('initialize 10 models for m')
42 | bdb.execute('analyze m for 10 iterations (optimized; quiet)')
43 | vars_by_mutinf = bdb.execute('''
44 | estimate * from variables of p
45 | order by probability of (mutual information with x > 0.1) desc
46 | ''').fetchall()
47 | vars_by_depprob = bdb.execute('''
48 | estimate * from variables of p
49 | order by dependence probability with x desc
50 | ''').fetchall()
51 | assert vars_by_mutinf == [('x',), ('y',), ('z',)]
52 | assert vars_by_depprob == [('x',), ('y',), ('z',)]
53 |
--------------------------------------------------------------------------------
/tests/test_bqlmath.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # Copyright (c) 2010-2016, MIT Probabilistic Computing Project
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 | import itertools
18 |
19 | import apsw
20 | import pytest
21 |
22 | from bayeslite import bayesdb_open
23 | from bayeslite import bqlmath
24 |
25 | from bayeslite.math_util import abserr
26 | from bayeslite.util import cursor_value
27 |
28 |
29 | def get_python_math_call(name, probe):
30 | func = bqlmath.bqlmath_funcs[name]
31 | if isinstance(probe, tuple):
32 | return func(*probe)
33 | else:
34 | return func(probe)
35 |
36 | def get_sql_math_call(name, probe):
37 | if isinstance(probe, tuple):
38 | return 'SELECT %s%s' % (name, str(probe))
39 | else:
40 | return 'SELECT %s(%s)' % (name, probe)
41 |
42 | PROBES_FLOAT = [-2.5, -1, -0.1, 0, 0.1, 1, 2.5]
43 | PROBES_TUPLE = itertools.combinations(PROBES_FLOAT, 2)
44 | PROBES = itertools.chain(PROBES_FLOAT, PROBES_TUPLE)
45 | FUNCS = bqlmath.bqlmath_funcs.iterkeys()
46 |
47 | @pytest.mark.parametrize('name,probe', itertools.product(FUNCS, PROBES))
48 | def test_math_func_one_param(name, probe):
49 | # Retrieve result from python.
50 | python_value_error = None
51 | python_type_error = None
52 | try:
53 | result_python = get_python_math_call(name, probe)
54 | except ValueError:
55 | python_value_error = True
56 | except TypeError:
57 | python_type_error = True
58 |
59 | # Retrieve result from SQL.
60 | sql_value_error = None
61 | sql_type_error = None
62 | try:
63 | with bayesdb_open(':memory:') as bdb:
64 | cursor = bdb.execute(get_sql_math_call(name, probe))
65 | result_sql = cursor_value(cursor)
66 | except ValueError:
67 | sql_value_error = True
68 | except (TypeError, apsw.SQLError):
69 | sql_type_error = True
70 |
71 | # Domain error on both.
72 | if python_value_error or sql_value_error:
73 | assert python_value_error and sql_value_error
74 | # Arity error on both.
75 | elif python_type_error or sql_type_error:
76 | assert python_type_error and sql_type_error
77 | # Both invocations succeeded, confirm results match.
78 | else:
79 | assert abserr(result_python, result_sql) < 1e-4
80 |
--------------------------------------------------------------------------------
/tests/test_case.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # Copyright (c) 2010-2017, MIT Probabilistic Computing Project
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 | import pytest
18 |
19 | import bayeslite
20 | import bayeslite.core as core
21 |
22 |
23 | def test_case():
24 | pytest.xfail(reason='Github issue #546')
25 | with bayeslite.bayesdb_open(':memory:') as bdb:
26 | bdb.sql_execute('create table t(x,Y)')
27 | bdb.sql_execute('insert into t values(1,2)')
28 | bdb.sql_execute('insert into t values(3,4)')
29 | bdb.sql_execute('insert into t values(1,4)')
30 | bdb.sql_execute('insert into t values(2,2)')
31 | bdb.execute('create population p for t(guess(*))')
32 | population_id = core.bayesdb_get_population(bdb, 'p')
33 | assert core.bayesdb_variable_names(bdb, population_id, None) == \
34 | ['x', 'Y']
35 |
--------------------------------------------------------------------------------
/tests/test_cgpm_loom.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # Copyright (c) 2010-2016, MIT Probabilistic Computing Project
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 | import os
18 | import pytest
19 |
20 | from bayeslite import bayesdb_open
21 | from bayeslite import bayesdb_nullify
22 | from bayeslite.exception import BQLError
23 |
24 | os.environ['LOOM_VERBOSITY'] = '0'
25 |
26 | root = os.path.dirname(os.path.abspath(__file__))
27 | dha_csv = os.path.join(root, 'dha.csv')
28 | satellites_csv = os.path.join(root, 'satellites.csv')
29 |
30 | '''
31 | Integration test for using `ANALYZE FOR ITERATION (loom); on
32 | dha.csv and satellites.csv.
33 | '''
34 |
35 | def loom_analyze(csv_filename):
36 | try:
37 | import loom
38 | except ImportError:
39 | pytest.skip('no loom')
40 | return
41 | with bayesdb_open(':memory:') as bdb:
42 | bdb = bayesdb_open(':memory:')
43 | bdb.execute('CREATE TABLE t FROM \'%s\'' % (csv_filename))
44 | bayesdb_nullify(bdb, 't', 'NaN')
45 | bdb.execute('''
46 | CREATE POPULATION p FOR t WITH SCHEMA(
47 | GUESS STATTYPES OF (*);
48 | )
49 | ''')
50 | bdb.execute('CREATE GENERATOR m FOR p;')
51 | bdb.execute('INITIALIZE 10 MODELS FOR m')
52 | bdb.execute('ANALYZE m FOR 2 ITERATIONS (loom);')
53 |
54 | # targeted analysis for Loom not supported.
55 | with pytest.raises(BQLError):
56 | bdb.execute('''
57 | ANALYZE m FOR 1 ITERATION (loom; variables TTL_MDCR_SPND);
58 | ''')
59 | # progress for Loom not supported (error from cgpm).
60 | with pytest.raises(ValueError):
61 | bdb.execute('''
62 | ANALYZE m FOR 1 ITERATION (loom; quiet);
63 | ''')
64 | # timing for Loom not supported (error from cgpm).
65 | with pytest.raises(ValueError):
66 | bdb.execute('''
67 | ANALYZE m FOR 1 SECONDS (loom);
68 | ''')
69 | # Run a BQL query.
70 | bdb.execute('''
71 | ESTIMATE DEPENDENCE PROBABILITY FROM PAIRWISE VARIABLES OF p;
72 | ''')
73 | # Make sure we can run lovecat afterwards.
74 | bdb.execute('ANALYZE m FOR 2 ITERATION (optimized);')
75 |
76 | def test_loom_dha__ci_slow():
77 | loom_analyze(dha_csv)
78 |
79 | def test_loom_satellites__ci_slow():
80 | loom_analyze(satellites_csv)
81 |
--------------------------------------------------------------------------------
/tests/test_condprob.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # Copyright (c) 2010-2016, MIT Probabilistic Computing Project
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 | import bayeslite
18 |
19 | def test_conditional_probability_simple_inferences():
20 | data = [
21 | ['x', 'a'], ['x', 'a'], ['x', 'a'],
22 | ['y', 'b'], ['y', 'b'], ['y', 'b'],
23 | ]
24 | with bayeslite.bayesdb_open() as bdb:
25 | bdb.sql_execute('create table t(foo, bar)')
26 | for row in data:
27 | bdb.sql_execute('insert into t values (?, ?)', row)
28 | bdb.execute('''
29 | create population p for t (
30 | foo nominal;
31 | bar nominal;
32 | )
33 | ''')
34 | bdb.execute('create generator p_cc for p using cgpm;')
35 | bdb.execute('initialize 10 models for p_cc')
36 | bdb.execute('analyze p_cc for 100 iterations')
37 | cursor = bdb.execute('''
38 | estimate
39 | probability density of foo = 'x',
40 | probability density of foo = 'x' given (bar = 'a'),
41 | probability density of foo = 'x' given (bar = 'b'),
42 | probability density of foo = 'y',
43 | probability density of foo = 'y' given (bar = 'a'),
44 | probability density of foo = 'y' given (bar = 'b')
45 |
46 | by p
47 | ''').fetchall()
48 | px, pxa, pxb, py, pya, pyb = cursor[0]
49 | # Inferences on x.
50 | assert px < pxa
51 | assert pxb < px
52 | # Inferences on y.
53 | assert py < pyb
54 | assert pya < py
55 |
--------------------------------------------------------------------------------
/tests/test_error_bql.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # Copyright (c) 2010-2016, MIT Probabilistic Computing Project
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 | import pytest
18 |
19 | import bayeslite
20 |
21 | import test_core
22 |
23 |
24 | def test_droppop_with_generators():
25 | with test_core.t1() as (bdb, _population_id, _generator_id):
26 | distinctive_name = 'frobbledithorpequack'
27 | bdb.execute('create generator %s for p1 using cgpm' %
28 | (distinctive_name,))
29 | with pytest.raises(bayeslite.BQLError):
30 | try:
31 | bdb.execute('drop population p1')
32 | except bayeslite.BQLError as e:
33 | assert 'generators' in str(e)
34 | assert distinctive_name in str(e)
35 | raise
36 |
--------------------------------------------------------------------------------
/tests/test_kl.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # Copyright (c) 2010-2016, MIT Probabilistic Computing Project
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 | from __future__ import division # For type safety in gaussian_kl_divergence
18 |
19 | from functools import partial
20 | from math import erfc
21 |
22 | import numpy as np
23 |
24 | from numpy.random import RandomState
25 |
26 | import kl
27 | import threshold
28 |
29 |
30 | def gaussian_kl_divergence(mu1, s1, mu2, s2):
31 | "Return KL(N(mu1,s1)||N(mu2,s2))"
32 | # http://stats.stackexchange.com/a/7443/40686
33 | return np.log(s2 / s1) + ((s1**2 + (mu1 - mu2)**2) / (2 * s2**2)) - 0.5
34 |
35 |
36 | def gaussian_log_pdf(mu, s):
37 | def lpdf(x):
38 | normalizing_constant = -(np.log(2 * np.pi) / 2) - np.log(s)
39 | return normalizing_constant - ((x - mu)**2 / (2 * s**2))
40 | return lpdf
41 |
42 |
43 | def compute_kullback_leibler_check_statistic(n=100, prngstate=None):
44 | """Compute the lowest of the survival function and the CDF of the exact KL
45 | divergence KL(N(mu1,s1)||N(mu2,s2)) w.r.t. the sample distribution of the
46 | KL divergence drawn by computing log(P(x|N(mu1,s1)))-log(P(x|N(mu2,s2)))
47 | over a sample x~N(mu1,s1). If we are computing the KL divergence
48 | accurately, the exact value should fall squarely in the sample, and the
49 | tail probabilities should be relatively large.
50 |
51 | """
52 | if prngstate is None:
53 | raise TypeError('Must explicitly specify numpy.random.RandomState')
54 | mu1 = mu2 = 0
55 | s1 = 1
56 | s2 = 2
57 | exact = gaussian_kl_divergence(mu1, s1, mu2, s2)
58 | sample = prngstate.normal(mu1, s1, n)
59 | lpdf1 = gaussian_log_pdf(mu1, s1)
60 | lpdf2 = gaussian_log_pdf(mu2, s2)
61 | estimate, std = kl.kullback_leibler(sample, lpdf1, lpdf2)
62 | # This computes the minimum of the left and right tail probabilities of the
63 | # exact KL divergence vs a gaussian fit to the sample estimate. There is a
64 | # distinct negative skew to the samples used to compute `estimate`, so this
65 | # statistic is not uniform. Nonetheless, we do not expect it to get too
66 | # small.
67 | return erfc(abs(exact - estimate) / std) / 2
68 |
69 |
70 | def kl_test_stat():
71 | prngstate = RandomState(17)
72 | return partial(
73 | compute_kullback_leibler_check_statistic, prngstate=prngstate)
74 |
75 |
76 | def compute_kl_threshold():
77 | """Compute the values used in test_kullback_leibler
78 |
79 | >>> threshold.compute_sufficiently_stringent_threshold(
80 | kl_test_stat(), 6, 1e-20)
81 | ...
82 | TestThreshold(
83 | threshold=4.3883148424367044e-13,
84 | failprob=9.724132259513859e-21,
85 | sample_size=252135
86 | )
87 |
88 | This means that after generating 252135 check statistics, it was found that
89 | the least value of six samples will be less than 4.3883148424367044e-13
90 | with probability less than 9.724132259513859e-21 (< 1e-20).
91 |
92 | """
93 | return threshold.compute_sufficiently_stringent_threshold(
94 | kl_test_stat(), 6, 1e-20)
95 |
96 |
97 | def test_kullback_leibler():
98 | """Check kullback_leibler_check_statistic doesn't give absurdly low
99 | values."""
100 | # See compute_kl_threshold for derivation
101 | kl_threshold = threshold.TestThreshold(
102 | threshold=4.3883148424367044e-13,
103 | failprob=9.724132259513859e-21,
104 | sample_size=252135
105 | )
106 | threshold.check_generator(kl_test_stat(), 6, kl_threshold.threshold, 1e-20)
107 |
--------------------------------------------------------------------------------
/tests/test_macro.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # Copyright (c) 2010-2016, MIT Probabilistic Computing Project
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 | import bayeslite.ast as ast
18 | import bayeslite.macro as macro
19 |
20 |
21 | def test_expand_probability_estimate():
22 | expression = ast.ExpOp(ast.OP_LT, [
23 | ast.ExpBQLMutInf(
24 | ['c0'],
25 | ['c1', 'c2'],
26 | [('c3', ast.ExpLit(ast.LitInt(3)))],
27 | None),
28 | ast.ExpLit(ast.LitFloat(0.1)),
29 | ])
30 | probest = ast.ExpBQLProbEst(expression)
31 | assert macro.expand_probability_estimate(probest, 'p', 'g') == \
32 | ast.ExpSub(
33 | ast.Select(ast.SELQUANT_ALL,
34 | [ast.SelColExp(
35 | ast.ExpApp(False, 'AVG', [ast.ExpCol(None, 'x')]),
36 | None)],
37 | [ast.SelTab(
38 | ast.SimulateModelsExp([ast.SelColExp(expression, 'x')],
39 | 'p', 'g'),
40 | None)],
41 | None, None, None, None))
42 |
43 | def test_simulate_models_trivial():
44 | e = ast.ExpBQLMutInf(['c0'], ['c1', 'c2'],
45 | [('c3', ast.ExpLit(ast.LitInt(3)))],
46 | None)
47 | simmodels = ast.SimulateModelsExp([ast.SelColExp(e, 'x')], 'p', 'g')
48 | assert macro.expand_simulate_models(simmodels) == \
49 | ast.SimulateModels([ast.SelColExp(e, 'x')], 'p', 'g')
50 |
51 |
52 | def test_simulate_models_nontrivial():
53 | # XXX test descent into ExpLit
54 | # XXX test descent into ExpNumpar
55 | # XXX test descent into ExpNampar
56 | # XXX test descent into ExpCol
57 | # XXX test descent into ExpSub
58 | # XXX test descent into ExpCollate
59 | # XXX test descent into ExpIn
60 | # XXX test descent into ExpCast
61 | # XXX test descent into ExpExists
62 | # XXX test descent into ExpApp
63 | # XXX test descent into ExpAppStar
64 | # XXX test descent into ExpCase
65 | mutinf0 = ast.ExpBQLMutInf(['c0'], ['c1', 'c2'],
66 | [('c3', ast.ExpLit(ast.LitInt(3)))],
67 | None)
68 | mutinf1 = ast.ExpBQLMutInf(['c4', 'c5'], ['c6'],
69 | [('c7', ast.ExpLit(ast.LitString('ergodic')))],
70 | 100)
71 | probdensity = ast.ExpBQLProbDensity(
72 | [('x', ast.ExpLit(ast.LitFloat(1.2)))],
73 | # No conditions for now -- that changes the weighting of the average.
74 | [])
75 | expression0 = ast.ExpOp(ast.OP_LT, [
76 | mutinf0,
77 | ast.ExpOp(ast.OP_MUL, [ast.ExpLit(ast.LitFloat(0.1)), mutinf1]),
78 | ])
79 | expression1 = probdensity
80 | simmodels = ast.SimulateModelsExp(
81 | [
82 | ast.SelColExp(expression0, 'quagga'),
83 | ast.SelColExp(expression1, 'eland'),
84 | ], 'p', 'g')
85 | assert macro.expand_simulate_models(simmodels) == \
86 | ast.Select(ast.SELQUANT_ALL,
87 | [
88 | ast.SelColExp(
89 | ast.ExpOp(ast.OP_LT, [
90 | ast.ExpCol(None, 'v0'),
91 | ast.ExpOp(ast.OP_MUL, [
92 | ast.ExpLit(ast.LitFloat(0.1)),
93 | ast.ExpCol(None, 'v1'),
94 | ])
95 | ]),
96 | 'quagga'),
97 | ast.SelColExp(ast.ExpCol(None, 'v2'), 'eland'),
98 | ],
99 | [ast.SelTab(
100 | ast.SimulateModels(
101 | [
102 | ast.SelColExp(mutinf0, 'v0'),
103 | ast.SelColExp(mutinf1, 'v1'),
104 | ast.SelColExp(probdensity, 'v2'),
105 | ], 'p', 'g'),
106 | None)],
107 | None, None, None, None)
108 |
--------------------------------------------------------------------------------
/tests/test_math_util.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # Copyright (c) 2010-2016, MIT Probabilistic Computing Project
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 | import math
18 | import pytest
19 |
20 | from bayeslite.math_util import *
21 |
22 | def pi_cf():
23 | """Compute pi with a generalized continued fraction.
24 |
25 | The continued fraction is[1]::
26 |
27 | 1
28 | pi/4 = -------------------.
29 | 1^2
30 | 1 + ---------------
31 | 2^2
32 | 3 + -----------
33 | 3^2
34 | 5 + -------
35 | 7 + ...
36 |
37 | [1] https://en.wikipedia.org/wiki/Generalized_continued_fraction#.CF.80,
38 | no citation given.
39 | """
40 | def contfrac():
41 | i = 0
42 | while True:
43 | i += 1
44 | yield i*i, 2*i + 1
45 | return 4/(1 + limit(convergents(contfrac())))
46 |
47 | def phi_cf():
48 | """Compute the golden ratio phi by its continued fraction.
49 |
50 | The well-known continued fraction is [1; 1, 1, 1, 1, ...].
51 | """
52 | def contfrac():
53 | while True:
54 | yield 1, 1
55 | return 1 + limit(convergents(contfrac()))
56 |
57 | def pi_ps():
58 | """Compute pi with a power series representation of arctan.
59 |
60 | The power series for arctan is Gregory's series::
61 |
62 | z^3 z^5 z^7
63 | arctan z = z - --- + --- - --- + ....
64 | 3 5 7
65 |
66 | We use a Machin-like formula attributed on Wikipedia to Euler::
67 |
68 | pi/4 = 20 arctan(1/7) + 8 arctan(3/79).
69 | """
70 | def arctan(z):
71 | def seq():
72 | z2 = z*z
73 | zn = z
74 | d = 1
75 | sign = 1.
76 | while True:
77 | yield sign*zn/d
78 | zn *= z2
79 | d += 2
80 | sign *= -1
81 | return limit(partial_sums(seq()))
82 | return 20*arctan(1./7) + 8*arctan(3./79)
83 |
84 | def test_misc():
85 | assert relerr(100., 99.) == .01
86 | assert relerr(math.pi, pi_cf()) < EPSILON
87 | assert relerr(math.pi, pi_ps()) < EPSILON
88 | assert relerr((1 + math.sqrt(5))/2, phi_cf()) < EPSILON
89 |
90 | def test_logsumexp():
91 | inf = float('inf')
92 | nan = float('nan')
93 | with pytest.raises(OverflowError):
94 | math.log(sum(map(math.exp, range(1000))))
95 | assert relerr(999.4586751453871, logsumexp(range(1000))) < 1e-15
96 | assert logsumexp([]) == -inf
97 | assert logsumexp([-1000.]) == -1000.
98 | assert logsumexp([-1000., -1000.]) == -1000. + math.log(2.)
99 | assert relerr(math.log(2.), logsumexp([0., 0.])) < 1e-15
100 | assert logsumexp([-inf, 1]) == 1
101 | assert logsumexp([-inf, -inf]) == -inf
102 | assert logsumexp([+inf, +inf]) == +inf
103 | assert math.isnan(logsumexp([-inf, +inf]))
104 | assert math.isnan(logsumexp([nan, inf]))
105 | assert math.isnan(logsumexp([nan, -3]))
106 |
107 | def test_logmeanexp():
108 | inf = float('inf')
109 | nan = float('nan')
110 | assert logmeanexp([]) == -inf
111 | assert relerr(992.550919866405, logmeanexp(range(1000))) < 1e-15
112 | assert logmeanexp([-1000., -1000.]) == -1000.
113 | assert relerr(math.log(0.5 * (1 + math.exp(-1.))),
114 | logmeanexp([0., -1.])) \
115 | < 1e-15
116 | assert relerr(math.log(0.5), logmeanexp([0., -1000.])) < 1e-15
117 | assert relerr(-3 - math.log(2.), logmeanexp([-inf, -3])) < 1e-15
118 | assert relerr(-3 - math.log(2.), logmeanexp([-3, -inf])) < 1e-15
119 | assert logmeanexp([+inf, -3]) == +inf
120 | assert logmeanexp([-3, +inf]) == +inf
121 | assert logmeanexp([-inf, 0, +inf]) == +inf
122 | assert math.isnan(logmeanexp([nan, inf]))
123 | assert math.isnan(logmeanexp([nan, -3]))
124 | assert math.isnan(logmeanexp([nan]))
125 |
126 | def test_logavgexp_weighted():
127 | # XXX Expand me!
128 | assert relerr(-1000 - logsumexp([500, -500]) + math.log(2),
129 | logavgexp_weighted([500, -500], [-1500, -500])) < 1e-15
130 |
--------------------------------------------------------------------------------
/tests/test_nullify.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # Copyright (c) 2010-2017, MIT Probabilistic Computing Project
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 | from bayeslite import bayesdb_open
18 | from bayeslite import bayesdb_nullify
19 |
20 |
21 | def test_nullify():
22 | with bayesdb_open(':memory:') as bdb:
23 | bdb.sql_execute('create table t(x,y)')
24 | for row in [
25 | ['1',''],
26 | ['nan','foo'],
27 | ['2','nan'],
28 | ['2','""'],
29 | ['', ''],
30 | ]:
31 | bdb.sql_execute('insert into t values(?,?)', row)
32 | assert bdb.execute('select * from t').fetchall() == [
33 | ('1',''),
34 | ('nan','foo'),
35 | ('2','nan'),
36 | ('2','""'),
37 | ('', ''),
38 | ]
39 | assert bayesdb_nullify(bdb, 't', '') == 3
40 | assert bdb.execute('select * from t').fetchall() == [
41 | ('1',None),
42 | ('nan','foo'),
43 | ('2','nan'),
44 | ('2','""'),
45 | (None, None),
46 | ]
47 | assert bayesdb_nullify(bdb, 't', 'nan', columns=['x']) == 1
48 | assert bdb.execute('select * from t').fetchall() == [
49 | ('1',None),
50 | (None,'foo'),
51 | ('2','nan'),
52 | ('2','""'),
53 | (None, None),
54 | ]
55 | assert bayesdb_nullify(bdb, 't', 'fnord') == 0
56 |
--------------------------------------------------------------------------------
/tests/test_parse_cgpm_analyze.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # Copyright (c) 2010-2016, MIT Probabilistic Computing Project
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 | import bayeslite.backends.cgpm_analyze.parse as cgpm_analyze_parser
18 |
19 | from test_parse import parse_bql_string
20 |
21 | # XXX Is there a better way to get the tokens that are supplied to
22 | # cgpm_analyze.parse.parse?
23 | def parse_analysis_plan(string):
24 | phrases = parse_bql_string('''
25 | ANALYZE m FOR 1 ITERATION (%s)
26 | ''' % (string,))
27 | return cgpm_analyze_parser.parse(phrases[0].program)
28 |
29 | def test_empty():
30 | assert [] == parse_analysis_plan('')
31 | assert [] == parse_analysis_plan(';')
32 | assert [] == parse_analysis_plan(';;')
33 | assert [] == parse_analysis_plan(' ;')
34 | assert [] == parse_analysis_plan('; ')
35 | assert [] == parse_analysis_plan(' ; ')
36 | assert [] == parse_analysis_plan(' ; ; ')
37 |
38 | def test_miscellaneous():
39 | assert parse_analysis_plan('VARIABLES A, B, C; OPTIMIZED') == [
40 | cgpm_analyze_parser.Variables(['A', 'B', 'C']),
41 | cgpm_analyze_parser.Optimized('lovecat'),
42 | ]
43 | assert parse_analysis_plan('SKIP "foo"; loom; QUIET') == [
44 | cgpm_analyze_parser.Skip(['foo']),
45 | cgpm_analyze_parser.Optimized('loom'),
46 | cgpm_analyze_parser.Quiet(True),
47 | ]
48 | assert parse_analysis_plan('SKIP "foo"; loom') == [
49 | cgpm_analyze_parser.Skip(['foo']),
50 | cgpm_analyze_parser.Optimized('loom'),
51 | ]
52 |
53 | def test_rows():
54 | assert parse_analysis_plan('ROWS 1, 2, 3, 19;') == [
55 | cgpm_analyze_parser.Rows([1, 2, 3, 19]),
56 | ]
57 |
58 | def test_inference_planning_basic():
59 | assert parse_analysis_plan('SUBPROBLEM variable clustering;') == [
60 | cgpm_analyze_parser.Subproblem(['variable_clustering']),
61 | ]
62 | assert parse_analysis_plan('SUBPROBLEM (variable hyperparameters);') == [
63 | cgpm_analyze_parser.Subproblem(['variable_hyperparameters']),
64 | ]
65 | assert parse_analysis_plan('''
66 | SUBPROBLEM (
67 | variable clustering concentration,
68 | variable clustering
69 | );
70 | ''' ) == [
71 | cgpm_analyze_parser.Subproblem([
72 | 'variable_clustering_concentration',
73 | 'variable_clustering'
74 | ]),
75 | ]
76 | assert parse_analysis_plan('''
77 | SUBPROBLEM row clustering concentration;
78 | SUBPROBLEM row clustering;
79 | ''' ) == [
80 | cgpm_analyze_parser.Subproblem(['row_clustering_concentration']),
81 | cgpm_analyze_parser.Subproblem(['row_clustering']),
82 | ]
83 |
84 | def test_inference_planning_bonanza():
85 | assert parse_analysis_plan('''
86 | VARIABLES foo, bar, llama, salman;
87 | ROWS 1, 17, 9;
88 | SUBPROBLEMS (
89 | row clustering concentration,
90 | row clustering,
91 | variable hyperparameters
92 | );
93 | OPTIMIZED;
94 | QUIET;
95 | ''' ) == [
96 | cgpm_analyze_parser.Variables(['foo','bar','llama','salman']),
97 | cgpm_analyze_parser.Rows([1, 17, 9]),
98 | cgpm_analyze_parser.Subproblem([
99 | 'row_clustering_concentration',
100 | 'row_clustering',
101 | 'variable_hyperparameters',
102 | ]),
103 | cgpm_analyze_parser.Optimized('lovecat'),
104 | cgpm_analyze_parser.Quiet(True),
105 | ]
106 |
--------------------------------------------------------------------------------
/tests/test_read_pandas.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # Copyright (c) 2010-2016, MIT Probabilistic Computing Project
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 | import apsw
18 | import pandas
19 | import pytest
20 |
21 | from bayeslite import bayesdb_open
22 | from bayeslite import bql_quote_name
23 | from bayeslite.core import bayesdb_has_table
24 | from bayeslite.read_pandas import bayesdb_read_pandas_df
25 |
26 | def do_test(bdb, t, df, index=None):
27 | qt = bql_quote_name(t)
28 | countem = 'select count(*) from %s' % (qt,)
29 | assert not bayesdb_has_table(bdb, t)
30 |
31 | with pytest.raises(ValueError):
32 | bayesdb_read_pandas_df(bdb, t, df, index=index)
33 |
34 | bayesdb_read_pandas_df(bdb, t, df, create=True, ifnotexists=False,
35 | index=index)
36 | assert len(df.index) == bdb.execute(countem).fetchvalue()
37 |
38 | with pytest.raises(ValueError):
39 | bayesdb_read_pandas_df(bdb, t, df, create=True, ifnotexists=False,
40 | index=index)
41 | assert 4 == bdb.execute(countem).fetchvalue()
42 |
43 | with pytest.raises(apsw.ConstraintError):
44 | bayesdb_read_pandas_df(bdb, t, df, create=True, ifnotexists=True,
45 | index=index)
46 | assert 4 == bdb.execute(countem).fetchvalue()
47 |
48 | def test_integral_noindex():
49 | with bayesdb_open() as bdb:
50 | df = pandas.DataFrame([(1,2,'foo'),(4,5,6),(7,8,9),(10,11,12)],
51 | index=[42, 78, 62, 43])
52 | do_test(bdb, 't', df)
53 |
54 | def test_integral_index():
55 | with bayesdb_open() as bdb:
56 | df = pandas.DataFrame([(1,2,'foo'),(4,5,6),(7,8,9),(10,11,12)],
57 | index=[42, 78, 62, 43])
58 | do_test(bdb, 't', df, index='quagga')
59 |
60 | def test_nonintegral_noindex():
61 | with bayesdb_open() as bdb:
62 | df = pandas.DataFrame([(1,2,'foo'),(4,5,6),(7,8,9),(10,11,12)],
63 | index=[42, 78, 62, 43])
64 | with pytest.raises(ValueError):
65 | bayesdb_read_pandas_df(bdb, 't', df)
66 |
67 | def test_nonintegral_index():
68 | with bayesdb_open() as bdb:
69 | df = pandas.DataFrame([(1,2,'foo'),(4,5,6),(7,8,9),(10,11,12)],
70 | index=[42, 78, 62, 43])
71 | do_test(bdb, 't', df, index='eland')
72 |
--------------------------------------------------------------------------------
/tests/test_stochastic.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # Copyright (c) 2010-2016, MIT Probabilistic Computing Project
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 | import pytest
18 |
19 | from stochastic import StochasticError
20 | from stochastic import stochastic
21 |
22 | class Quagga(Exception):
23 | pass
24 |
25 | @stochastic(max_runs=1, min_passes=1)
26 | def _test_fail(seed):
27 | raise Quagga
28 |
29 | @stochastic(max_runs=1, min_passes=1)
30 | def _test_pass(_seed):
31 | pass
32 |
33 | passthenfail_counter = 0
34 | @stochastic(max_runs=2, min_passes=1)
35 | def _test_passthenfail(seed):
36 | global passthenfail_counter
37 | passthenfail_counter += 1
38 | passthenfail_counter %= 2
39 | if passthenfail_counter == 0:
40 | raise Quagga
41 |
42 | failthenpass_counter = 0
43 | @stochastic(max_runs=2, min_passes=1)
44 | def _test_failthenpass(seed):
45 | global failthenpass_counter
46 | failthenpass_counter += 1
47 | failthenpass_counter %= 2
48 | if failthenpass_counter == 1:
49 | raise Quagga
50 |
51 | @stochastic(max_runs=2, min_passes=1)
52 | def _test_failthenfail(seed):
53 | raise Quagga
54 |
55 | @stochastic(max_runs=1, min_passes=1)
56 | def test_stochastic(seed):
57 | with pytest.raises(StochasticError):
58 | _test_fail()
59 | try:
60 | _test_fail()
61 | except StochasticError as e:
62 | assert isinstance(e.excvalue, Quagga)
63 | with pytest.raises(Quagga):
64 | _test_fail(seed)
65 | _test_pass()
66 | _test_pass(seed)
67 | _test_passthenfail()
68 | with pytest.raises(Quagga):
69 | _test_passthenfail(seed)
70 | _test_failthenpass()
71 | with pytest.raises(Quagga):
72 | _test_failthenpass(seed)
73 | with pytest.raises(StochasticError):
74 | _test_failthenfail()
75 | with pytest.raises(Quagga):
76 | _test_failthenfail(seed)
77 |
--------------------------------------------------------------------------------
/tests/test_subsample.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # Copyright (c) 2010-2016, MIT Probabilistic Computing Project
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 | import os
18 |
19 | import bayeslite
20 | import bayeslite.read_csv as read_csv
21 |
22 | from bayeslite.core import bayesdb_get_generator
23 | from bayeslite.guess import bayesdb_guess_population
24 | from bayeslite.backends.cgpm_backend import CGPM_Backend
25 |
26 | root = os.path.dirname(os.path.abspath(__file__))
27 | dha_csv = os.path.join(root, 'dha.csv')
28 |
29 | def test_subsample():
30 | with bayeslite.bayesdb_open(builtin_backends=False) as bdb:
31 | backend = CGPM_Backend(cgpm_registry={}, multiprocess=False)
32 | bayeslite.bayesdb_register_backend(bdb, backend)
33 | with open(dha_csv, 'rU') as f:
34 | read_csv.bayesdb_read_csv(bdb, 'dha', f, header=True, create=True)
35 | bayesdb_guess_population(bdb, 'hospitals_full', 'dha',
36 | overrides=[('name', 'key')])
37 | bayesdb_guess_population(bdb, 'hospitals_sub', 'dha',
38 | overrides=[('name', 'key')])
39 | bdb.execute('''
40 | CREATE GENERATOR hosp_full_cc FOR hospitals_full USING cgpm;
41 | ''')
42 | bdb.execute('''
43 | CREATE GENERATOR hosp_sub_cc FOR hospitals_sub USING cgpm(
44 | SUBSAMPLE 100
45 | )
46 | ''')
47 | bdb.execute('INITIALIZE 1 MODEL FOR hosp_sub_cc')
48 | bdb.execute('ANALYZE hosp_sub_cc FOR 1 ITERATION (OPTIMIZED)')
49 | bdb.execute('''
50 | ESTIMATE SIMILARITY TO (_rowid_=2) IN THE CONTEXT OF PNEUM_SCORE
51 | FROM hospitals_sub WHERE _rowid_ = 1 OR _rowid_ = 101
52 | ''').fetchall()
53 | bdb.execute('''
54 | ESTIMATE SIMILARITY TO (_rowid_=102) IN THE CONTEXT OF
55 | N_DEATH_ILL FROM hospitals_sub
56 | WHERE _rowid_ = 1 OR _rowid_ = 101
57 | ''').fetchall()
58 | bdb.execute('''
59 | ESTIMATE PREDICTIVE PROBABILITY OF mdcr_spnd_amblnc
60 | FROM hospitals_sub
61 | WHERE _rowid_ = 1 OR _rowid_ = 101
62 | ''').fetchall()
63 | bdb.execute('''
64 | ESTIMATE SIMILARITY IN THE CONTEXT OF PNEUM_SCORE
65 | FROM PAIRWISE hospitals_sub
66 | WHERE (r0._rowid_ = 1 OR r0._rowid_ = 101) AND
67 | (r1._rowid_ = 1 OR r1._rowid_ = 101)
68 | ''').fetchall()
69 | bdb.execute('''
70 | INFER mdcr_spnd_amblnc FROM hospitals_sub
71 | WHERE _rowid_ = 1 OR _rowid_ = 101
72 | ''').fetchall()
73 | sql = '''
74 | SELECT table_rowid FROM bayesdb_cgpm_individual
75 | WHERE generator_id = ?
76 | ORDER BY cgpm_rowid ASC
77 | LIMIT 100
78 | '''
79 | gid_full = bayesdb_get_generator(bdb, None, 'hosp_full_cc')
80 | cursor = bdb.sql_execute(sql, (gid_full,))
81 | assert [row[0] for row in cursor] == range(1, 100 + 1)
82 | gid = bayesdb_get_generator(bdb, None, 'hosp_sub_cc')
83 | cursor = bdb.sql_execute(sql, (gid,))
84 | assert [row[0] for row in cursor] != range(1, 100 + 1)
85 | bdb.execute('DROP GENERATOR hosp_sub_cc')
86 | bdb.execute('DROP GENERATOR hosp_full_cc')
87 | bdb.execute('DROP POPULATION hospitals_sub')
88 | bdb.execute('DROP POPULATION hospitals_full')
89 |
--------------------------------------------------------------------------------
/tests/test_threshold.py:
--------------------------------------------------------------------------------
1 | from numpy.random import RandomState
2 |
3 | from threshold import failprob_threshold
4 |
5 |
6 | def test_failprob_threshold_basic():
7 | """Sanity check on failprob_threshold: Verify, for a relatively large failure
8 | probability, that the failure threshold it returns for a simple test
9 | statistic actually results in failures at approximately the right
10 | frequency.
11 |
12 | """
13 | prngstate = RandomState(0)
14 |
15 | def sample(n):
16 | return prngstate.normal(0, 1, n)
17 |
18 | target_prob = 1e-1
19 | test_sample_size = 6
20 | prob, thresh = failprob_threshold(
21 | sample(1000), test_sample_size, target_prob)
22 | samples = [all(v < thresh for v in sample(test_sample_size))
23 | for _ in xrange(int(100 / target_prob))]
24 | assert 50 < samples.count(True) < 200
25 |
--------------------------------------------------------------------------------
/tests/test_util.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # Copyright (c) 2010-2016, MIT Probabilistic Computing Project
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 | import pytest
18 |
19 | from bayeslite.util import cursor_value
20 |
21 | def test_cursor_value():
22 | with pytest.raises(ValueError):
23 | cursor_value(iter([]))
24 | with pytest.raises(TypeError):
25 | cursor_value(iter([1]))
26 | with pytest.raises(ValueError):
27 | cursor_value(iter([1, 2]))
28 | with pytest.raises(ValueError):
29 | cursor_value(iter([()]))
30 | with pytest.raises(ValueError):
31 | cursor_value(iter([(1, 2)]))
32 | with pytest.raises(ValueError):
33 | cursor_value(iter([(1, 2), ()]))
34 | with pytest.raises(ValueError):
35 | cursor_value(iter([(1, 2), 3]))
36 | with pytest.raises(ValueError):
37 | cursor_value(iter([(1, 2), (3,)]))
38 | with pytest.raises(ValueError):
39 | cursor_value(iter([(1,), (2,)]))
40 | with pytest.raises(ValueError):
41 | cursor_value(iter([(1,), (2, 3)]))
42 | assert cursor_value(iter([(42,)])) == 42
43 |
--------------------------------------------------------------------------------
/tests/test_vtab.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # Copyright (c) 2010-2016, MIT Probabilistic Computing Project
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 | import test_core
18 |
19 | from stochastic import stochastic
20 |
21 |
22 | @stochastic(max_runs=2, min_passes=1)
23 | def test_mutinf_smoke(seed):
24 | with test_core.t1(seed=seed) as (bdb, population_id, _generator_id):
25 | def checkmi(n, q, *p):
26 | i = 0
27 | for r in bdb.sql_execute(q, *p):
28 | assert len(r) == 1
29 | assert isinstance(r[0], float)
30 | i += 1
31 | assert i == n, '%r =/= %r' % (i, n)
32 |
33 | bdb.execute('initialize 10 models for p1_cc')
34 | checkmi(10, '''
35 | select mi from bql_mutinf
36 | where population_id = ?
37 | and target_vars = '[1]'
38 | and reference_vars = '[2]'
39 | ''', (population_id,))
40 |
41 | bdb.execute('initialize 11 models if not exists for p1_cc')
42 | checkmi(11, '''
43 | select mi from bql_mutinf
44 | where population_id = ?
45 | and target_vars = '[1]'
46 | and reference_vars = '[2]'
47 | and conditions = '{"3": 42}'
48 | ''', (population_id,))
49 |
50 | bdb.execute('initialize 12 models if not exists for p1_cc')
51 | checkmi(12, '''
52 | select mi from bql_mutinf
53 | where population_id = ?
54 | and target_vars = '[1]'
55 | and reference_vars = '[2]'
56 | and nsamples = 2
57 | ''', (population_id,))
58 |
59 | bdb.execute('initialize 13 models if not exists for p1_cc')
60 | checkmi(13, '''
61 | select mi from bql_mutinf
62 | where population_id = ?
63 | and target_vars = '[1]'
64 | and reference_vars = '[2]'
65 | and conditions = '{"3": 42}'
66 | and nsamples = 2
67 | ''', (population_id,))
68 |
--------------------------------------------------------------------------------