├── .gitignore
├── .travis.yml
├── Dockerfile
├── LICENSE
├── README.md
├── conf.py
├── examples
├── datashader_example.py
├── extract_sample.py
├── parse_wikipedia_history.py
├── top_items.py
├── tutorial_simple_traildb.py
└── tutorial_wikipedia_sessions.py
├── index.rst
├── requirements.txt
├── runtests.sh
├── setup.py
├── test
└── test.py
├── traildb
├── __init__.py
├── __main__.py
└── traildb.py
└── travisdeps.sh
/.gitignore:
--------------------------------------------------------------------------------
1 | *~
2 | /build/
3 | *.pyc
4 | _build/
5 | test/*.tdb
6 |
7 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | sudo: required
2 |
3 | language: python
4 |
5 | python:
6 | - 2.7
7 | - 3.3
8 | - 3.4
9 | - 3.5
10 | - 3.6
11 | - pypy
12 | - pypy3.5
13 |
14 | before_install:
15 | - ./travisdeps.sh
16 |
17 | script:
18 | - ./runtests.sh
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM c3h3/traildb-base:latest
2 |
3 | # pyenv image
4 |
5 | ENV HOME /root
6 | ENV PYENVPATH $HOME/.pyenv
7 | ENV PATH $PYENVPATH/shims:$PYENVPATH/bin:$PATH
8 |
9 | RUN curl -L https://raw.githubusercontent.com/yyuu/pyenv-installer/master/bin/pyenv-installer | bash
10 | RUN echo 'eval "$(pyenv init -)"' > /root/.bashrc
11 |
12 |
13 | EXPOSE 8888
14 |
15 | RUN pyenv update && pyenv install anaconda-2.3.0 && pyenv global anaconda-2.3.0 && ipython profile create
16 |
17 | RUN (echo "require(['base/js/namespace'], function (IPython) {" && \
18 | echo " IPython._target = '_self';" && \
19 | echo "});") > /root/.ipython/profile_default/static/custom/custom.js
20 |
21 |
22 | RUN (echo "c = get_config()" && \
23 | echo "headers = {'Content-Security-Policy': 'frame-ancestors *'}" && \
24 | echo "c.NotebookApp.allow_origin = '*'" && \
25 | echo "c.NotebookApp.allow_credentials = True" && \
26 | echo "c.NotebookApp.tornado_settings = {'headers': headers}" && \
27 | echo "c.NotebookApp.ip = '0.0.0.0'" && \
28 | echo "c.NotebookApp.open_browser = False" && \
29 | echo "from IPython.lib import passwd" && \
30 | echo "import os" && \
31 | echo "c.NotebookApp.password = passwd(os.environ.get('PASSWORD', 'jupyter'))") \
32 | > /root/.ipython/profile_default/ipython_notebook_config.py
33 |
34 |
35 | RUN cd /tmp && git clone https://github.com/traildb/traildb-python && cd traildb-python && python setup.py install
36 |
37 | RUN mkdir /ipynbs
38 | WORKDIR /ipynbs
39 |
40 | CMD ipython notebook --no-browser --ip=0.0.0.0 --port 8888
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | The MIT License (MIT)
2 | Copyright (c) 2016 AdRoll, Inc.
3 |
4 | Permission is hereby granted, free of charge, to any person obtaining
5 | a copy of this software and associated documentation files (the
6 | "Software"), to deal in the Software without restriction, including
7 | without limitation the rights to use, copy, modify, merge, publish,
8 | distribute, sublicense, and/or sell copies of the Software, and to
9 | permit persons to whom the Software is furnished to do so, subject to
10 | the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included
13 | in all copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
18 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
19 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
20 | TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
21 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Python bindings for TrailDB
2 |
3 | ### Quick start
4 |
5 | First install the [TrailDB library](https://github.com/traildb/traildb). Then
6 |
7 | $ python setup.py install
8 |
9 | For detailed instructions, see [Getting Started guide](http://traildb.io/docs/getting_started/).
10 |
11 | ### Example
12 |
13 | See [TrailDB tutorial](http://traildb.io/docs/tutorial) for more information.
14 |
15 | ```python
16 |
17 | >>> from traildb import TrailDB, TrailDBConstructor
18 |
19 | >>> cookie = '12345678123456781234567812345678'
20 | >>> cons = TrailDBConstructor('test.tdb', ['field1', 'field2'])
21 | >>> cons.add(cookie, 123, ['a'])
22 | >>> cons.add(cookie, 124, ['b', 'c'])
23 | >>> tdb = cons.finalize()
24 |
25 | >>> for cookie, trail in tdb.trails():
26 | ... for event in trail:
27 | ... print cookie, event
28 |
29 | 12345678123456781234567812345678 event(time=123L, field1='a', field2='')
30 | 12345678123456781234567812345678 event(time=124L, field1='b', field2='c')
31 | ```
32 |
33 | ## For Docker User:
34 |
35 | You can pull image from here:
36 |
37 | $ docker pull c3h3/traildb-ipynb
38 |
39 | Or, you can build docker image by yourself (please replace "your/repo-name" with whatever you want):
40 |
41 | $ docker build -t your/repo-name .
42 |
43 |
44 | You can run the docker image with default password (jupyter), and your jupyter notebook will listen on 8080 port:
45 |
46 | $ docker run -p 8080:8888 -it c3h3/traildb-ipynb
47 |
48 | Or, you can run the docker image with your password (yourPassword), and your jupyter notebook will listen on 8080 port:
49 |
50 | $ docker run -e PASSWORD=yourPassword -p 8080:8888 -it c3h3/traildb-ipynb
51 |
52 | Easily to use [http://localhost:8080](http://localhost:8080) to access your jupyter notebook
53 |
54 | #### Documentation
55 |
56 | Sphinx documentation is available.
57 |
58 | 1. Ensure Sphinx is installed.
59 |
60 | `pip install sphinx`
61 |
62 | 2. Generate HTML documentation.
63 |
64 | `sphinx-build -b html . _build`
65 |
66 | Open `_build/index.html` in a browser.
67 |
--------------------------------------------------------------------------------
/conf.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | #
3 | # traildb-python documentation build configuration file, created by
4 | # sphinx-quickstart on Mon Oct 2 14:22:22 2017.
5 | #
6 | # This file is execfile()d with the current directory set to its
7 | # containing dir.
8 | #
9 | # Note that not all possible configuration values are present in this
10 | # autogenerated file.
11 | #
12 | # All configuration values have a default; values that are commented out
13 | # serve to show the default.
14 |
15 | # If extensions (or modules to document with autodoc) are in another directory,
16 | # add these directories to sys.path here. If the directory is relative to the
17 | # documentation root, use os.path.abspath to make it absolute, like shown here.
18 | #
19 | import os
20 | import sys
21 | sys.path.insert(0, os.path.abspath('.'))
22 |
23 |
24 | # -- General configuration ------------------------------------------------
25 |
26 | # If your documentation needs a minimal Sphinx version, state it here.
27 | #
28 | # needs_sphinx = '1.0'
29 |
30 | # Add any Sphinx extension module names here, as strings. They can be
31 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
32 | # ones.
33 | extensions = ['sphinx.ext.autodoc']
34 |
35 | # Add any paths that contain templates here, relative to this directory.
36 | templates_path = ['_templates']
37 |
38 | # The suffix(es) of source filenames.
39 | # You can specify multiple suffix as a list of string:
40 | #
41 | # source_suffix = ['.rst', '.md']
42 | source_suffix = '.rst'
43 |
44 | # The master toctree document.
45 | master_doc = 'index'
46 |
47 | # General information about the project.
48 | project = u'traildb-python'
49 | copyright = u'2017, AdRoll Inc'
50 | author = u'AdRoll Inc'
51 |
52 | # The version info for the project you're documenting, acts as replacement for
53 | # |version| and |release|, also used in various other places throughout the
54 | # built documents.
55 | #
56 | # The short X.Y version.
57 | version = u'0.1.0'
58 | # The full version, including alpha/beta/rc tags.
59 | release = u'0.1.0'
60 |
61 | # The language for content autogenerated by Sphinx. Refer to documentation
62 | # for a list of supported languages.
63 | #
64 | # This is also used if you do content translation via gettext catalogs.
65 | # Usually you set "language" from the command line for these cases.
66 | language = None
67 |
68 | # List of patterns, relative to source directory, that match files and
69 | # directories to ignore when looking for source files.
70 | # This patterns also effect to html_static_path and html_extra_path
71 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
72 |
73 | # The name of the Pygments (syntax highlighting) style to use.
74 | pygments_style = 'sphinx'
75 |
76 | # If true, `todo` and `todoList` produce output, else they produce nothing.
77 | todo_include_todos = False
78 |
79 |
80 | # -- Options for HTML output ----------------------------------------------
81 |
82 | # The theme to use for HTML and HTML Help pages. See the documentation for
83 | # a list of builtin themes.
84 | #
85 | html_theme = 'alabaster'
86 |
87 | # Theme options are theme-specific and customize the look and feel of a theme
88 | # further. For a list of options available for each theme, see the
89 | # documentation.
90 | #
91 | # html_theme_options = {}
92 |
93 | # Add any paths that contain custom static files (such as style sheets) here,
94 | # relative to this directory. They are copied after the builtin static files,
95 | # so a file named "default.css" will overwrite the builtin "default.css".
96 | html_static_path = ['_static']
97 |
98 | # Custom sidebar templates, must be a dictionary that maps document names
99 | # to template names.
100 | #
101 | # This is required for the alabaster theme
102 | # refs: http://alabaster.readthedocs.io/en/latest/installation.html#sidebars
103 | html_sidebars = {
104 | '**': [
105 | 'about.html',
106 | 'navigation.html',
107 | 'relations.html', # needs 'show_related': True theme option to display
108 | 'searchbox.html',
109 | 'donate.html',
110 | ]
111 | }
112 |
113 |
114 | # -- Options for HTMLHelp output ------------------------------------------
115 |
116 | # Output file base name for HTML help builder.
117 | htmlhelp_basename = 'traildb-pythondoc'
118 |
119 |
120 | # -- Options for LaTeX output ---------------------------------------------
121 |
122 | latex_elements = {
123 | # The paper size ('letterpaper' or 'a4paper').
124 | #
125 | # 'papersize': 'letterpaper',
126 |
127 | # The font size ('10pt', '11pt' or '12pt').
128 | #
129 | # 'pointsize': '10pt',
130 |
131 | # Additional stuff for the LaTeX preamble.
132 | #
133 | # 'preamble': '',
134 |
135 | # Latex figure (float) alignment
136 | #
137 | # 'figure_align': 'htbp',
138 | }
139 |
140 | # Grouping the document tree into LaTeX files. List of tuples
141 | # (source start file, target name, title,
142 | # author, documentclass [howto, manual, or own class]).
143 | latex_documents = [
144 | (master_doc, 'traildb-python.tex', u'traildb-python Documentation',
145 | u'AdRoll Inc', 'manual'),
146 | ]
147 |
148 |
149 | # -- Options for manual page output ---------------------------------------
150 |
151 | # One entry per manual page. List of tuples
152 | # (source start file, name, description, authors, manual section).
153 | man_pages = [
154 | (master_doc, 'traildb-python', u'traildb-python Documentation',
155 | [author], 1)
156 | ]
157 |
158 |
159 | # -- Options for Texinfo output -------------------------------------------
160 |
161 | # Grouping the document tree into Texinfo files. List of tuples
162 | # (source start file, target name, title, author,
163 | # dir menu entry, description, category)
164 | texinfo_documents = [
165 | (master_doc, 'traildb-python', u'traildb-python Documentation',
166 | author, 'traildb-python', 'One line description of project.',
167 | 'Miscellaneous'),
168 | ]
169 |
170 |
171 |
172 |
--------------------------------------------------------------------------------
/examples/datashader_example.py:
--------------------------------------------------------------------------------
1 | from __future__ import division
2 | from __future__ import unicode_literals
3 | from __future__ import print_function
4 | from __future__ import absolute_import
5 | from builtins import open
6 | from builtins import int
7 | from builtins import range
8 | from past.utils import old_div
9 |
10 | import datashader as ds
11 | import datashader.transfer_functions as tf
12 | import pandas as pd
13 |
14 | from traildb import TrailDB
15 |
16 |
17 | def get_events(tdb):
18 | query = [('title', 'Prince (musician)')]
19 | for i in range(len(tdb)):
20 | events = list(tdb.trail(i, event_filter=query))
21 | if events:
22 | yield events[0].time, events
23 |
24 |
25 | def get_dataframe():
26 | tdb = TrailDB('pydata-tutorial.tdb')
27 | base = tdb.min_timestamp()
28 | types = []
29 | xs = []
30 | ys = []
31 | # try this:
32 | # for y, (first_ts, events) in enumerate(sorted(get_events(tdb), reverse=True)):
33 | for y, (first_ts, events) in enumerate(get_events(tdb)):
34 | for event in events:
35 | xs.append(old_div(int(event.time - base), (24 * 3600)))
36 | ys.append(y)
37 | types.append('user' if event.user else 'anon')
38 | data = pd.DataFrame({'x': xs, 'y': ys})
39 | data['type'] = pd.Series(types, dtype='category')
40 | return data
41 |
42 | cnv = ds.Canvas(400, 300)
43 | agg = cnv.points(get_dataframe(), 'x', 'y', ds.count_cat('type'))
44 | colors = {'anon': 'red', 'user': 'blue'}
45 | img = tf.set_background(tf.colorize(agg, colors, how='eq_hist'), 'white')
46 | with open('prince.png', 'w') as f:
47 | f.write(img.to_bytesio().getvalue())
48 |
--------------------------------------------------------------------------------
/examples/extract_sample.py:
--------------------------------------------------------------------------------
1 | from __future__ import division
2 | from __future__ import print_function
3 | from __future__ import unicode_literals
4 | from __future__ import absolute_import
5 | from past.utils import old_div
6 | from random import random
7 | import sys
8 |
9 | from traildb import TrailDB, TrailDBConstructor
10 |
11 |
12 | def extract(tdb, cons, sample_size):
13 | for uuid, trail in tdb.trails():
14 | if random() < sample_size:
15 | for event in trail:
16 | cons.add(uuid, event.time, list(event)[1:])
17 | return cons.finalize()
18 |
19 | if __name__ == '__main__':
20 | if len(sys.argv) < 3:
21 | print('Usage: extract_sample source_tdb destination_tdb sample_percentage')
22 | sys.exit(1)
23 | tdb = TrailDB(sys.argv[1])
24 | cons = TrailDBConstructor(sys.argv[2], tdb.fields[1:])
25 | num = extract(tdb, cons, old_div(float(sys.argv[3]), 100.)).num_trails
26 | print('Extracted %d trails to %s' % (num, sys.argv[2]))
27 |
--------------------------------------------------------------------------------
/examples/parse_wikipedia_history.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 | from __future__ import unicode_literals
3 | from __future__ import division
4 | from __future__ import absolute_import
5 | from datetime import datetime
6 | import sys
7 | import gzip
8 | import hashlib
9 |
10 | import traildb
11 |
12 | num_events = 0
13 |
14 | # This script parses Wikipedia revision metadata that you can find here
15 | # https://dumps.wikimedia.org/enwiki/
16 | # You want a file like
17 | # https://dumps.wikimedia.org/enwiki/20160501/enwiki-20160501-stub-meta-history.xml.gz
18 |
19 |
20 | def add_event(cons, uuid, tstamp, user, ip, title):
21 | global num_events
22 | cons.add(uuid, tstamp, (user, ip, title))
23 | num_events += 1
24 | if not num_events & 1023:
25 | print('%d events added' % num_events)
26 |
27 |
28 | def parse(cons, fileobj):
29 | for line in fileobj:
30 | line = line.strip()
31 | if line.startswith('
'):
32 | title = line[7:-8]
33 | elif line.startswith(''):
34 | tstamp = datetime.strptime(line[11:-13], '%Y-%m-%dT%H:%M:%S')
35 | elif line.startswith(''):
36 | user = line[10:-11]
37 | ip = ''
38 | uuid = hashlib.md5(user).hexdigest()
39 | add_event(cons, uuid, tstamp, user, ip, title)
40 | elif line.startswith(''):
41 | user = ''
42 | ip = line[4:-5]
43 | uuid = hashlib.md5(ip).hexdigest()
44 | add_event(cons, uuid, tstamp, user, ip, title)
45 |
46 | if __name__ == '__main__':
47 | if len(sys.argv) < 3:
48 | print('Usage: parse_wikipedia_history.py enwiki-20160501-stub-meta-history.xml.gz wikipedia-history.tdb')
49 | sys.exit(1)
50 |
51 | cons = traildb.TrailDBConstructor(sys.argv[2],
52 | ['user', 'ip', 'title'])
53 | parse(cons, gzip.GzipFile(sys.argv[1]))
54 | print('Done adding %d events!' % num_events)
55 | cons.finalize()
56 | print('Success!')
57 |
--------------------------------------------------------------------------------
/examples/top_items.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 | from __future__ import unicode_literals
3 | from __future__ import division
4 | from __future__ import absolute_import
5 | from collections import Counter
6 | import timeit
7 |
8 | from traildb import TrailDB
9 |
10 |
11 | def string_top():
12 | tdb = TrailDB('pydata-tutorial')
13 | return Counter(event.title for uuid, trail in tdb.trails()
14 | for event in trail).most_common(5)
15 |
16 |
17 | def item_top():
18 | tdb = TrailDB('pydata-tutorial')
19 | stats = Counter(event.title for uuid, trail in tdb.trails(rawitems=True)
20 | for event in trail)
21 | return [(tdb.get_item_value(item), f) for item, f in stats.most_common(5)]
22 |
23 | print('string_top', timeit.timeit(string_top, number=3))
24 | print('item_top', timeit.timeit(item_top, number=3))
25 |
--------------------------------------------------------------------------------
/examples/tutorial_simple_traildb.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 | from __future__ import unicode_literals
3 | from __future__ import division
4 | from __future__ import absolute_import
5 | from builtins import range
6 | from uuid import uuid4
7 | from datetime import datetime
8 |
9 | from traildb import TrailDBConstructor, TrailDB
10 |
11 | cons = TrailDBConstructor('tiny', ['username', 'action'])
12 |
13 | for i in range(3):
14 | uuid = uuid4().hex
15 | username = 'user%d' % i
16 | for day, action in enumerate(['open', 'save', 'close']):
17 | cons.add(uuid, datetime(2016, i + 1, day + 1), (username, action))
18 |
19 | cons.finalize()
20 |
21 | for uuid, trail in TrailDB('tiny').trails():
22 | print(uuid, list(trail))
23 |
--------------------------------------------------------------------------------
/examples/tutorial_wikipedia_sessions.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 | from __future__ import unicode_literals
3 | from __future__ import division
4 | from __future__ import absolute_import
5 | from builtins import next
6 | import sys
7 |
8 | from traildb import TrailDB
9 |
10 | SESSION_LIMIT = 30 * 60
11 |
12 |
13 | def sessions(tdb):
14 | for i, (uuid, trail) in enumerate(tdb.trails(only_timestamp=True)):
15 | prev_time = next(trail)
16 | num_events = 1
17 | num_sessions = 1
18 | for timestamp in trail:
19 | if timestamp - prev_time > SESSION_LIMIT:
20 | num_sessions += 1
21 | prev_time = timestamp
22 | num_events += 1
23 | print('Trail[%d] Number of Sessions: %d Number of Events: %d' %
24 | (i, num_sessions, num_events))
25 |
26 | if __name__ == '__main__':
27 | if len(sys.argv) < 2:
28 | print('Usage: tutorial_wikipedia_sessions ')
29 | else:
30 | sessions(TrailDB(sys.argv[1]))
31 |
--------------------------------------------------------------------------------
/index.rst:
--------------------------------------------------------------------------------
1 | .. traildb-python documentation master file, created by
2 | sphinx-quickstart on Mon Oct 2 14:17:29 2017.
3 | You can adapt this file completely to your liking, but it should at least
4 | contain the root `toctree` directive.
5 |
6 | traildb-python
7 | ==============
8 |
9 | These are Python 2 bindings to TrailDB. Official TrailDB website is at http://traildb.io/
10 |
11 | .. toctree::
12 | :maxdepth: 2
13 | :caption: Contents:
14 |
15 | .. autoclass:: traildb.TrailDB
16 | :members:
17 |
18 | .. autoclass:: traildb.TrailDBConstructor
19 | :members:
20 |
21 | .. autoclass:: traildb.TrailDBCursor
22 | :members:
23 |
24 | .. autoclass:: traildb.TrailDBMultiCursor
25 | :members:
26 |
27 | .. autoclass:: traildb.TrailDBEventFilter
28 | :members:
29 |
30 | .. autoclass:: traildb.TrailDBError
31 | :members:
32 |
33 | Indices and tables
34 | ==================
35 |
36 | * :ref:`genindex`
37 | * :ref:`search`
38 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | appdirs==1.4.3
2 | configparser==3.5.0
3 | enum34==1.1.6
4 | flake8==3.3.0
5 | future==0.16.0
6 | mccabe==0.6.1
7 | packaging==16.8
8 | py==1.4.32
9 | pycodestyle==2.3.1
10 | pyflakes==1.5.0
11 | pyparsing==2.2.0
12 | six==1.10.0
13 |
--------------------------------------------------------------------------------
/runtests.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/usr/local/lib"
3 |
4 | set -e
5 |
6 | # E999 -- syntax error
7 | # F821 -- undefined local variable
8 | flake8 ./traildb/ | grep '[ ]E999[ ]\|[ ]F821[ ]' | awk '{print} END {exit(NR > 0)}'
9 |
10 | env PYTHONPATH='.' python test/test.py
11 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from distutils.core import setup
2 |
3 | setup(name='traildb',
4 | version='0.0.2',
5 | description='TrailDB stores and queries cookie trails from raw logs.',
6 | author='AdRoll.com',
7 | install_requires=['future>=0.16.0'],
8 | packages=['traildb'])
9 |
--------------------------------------------------------------------------------
/test/test.py:
--------------------------------------------------------------------------------
1 | from __future__ import unicode_literals
2 | from __future__ import print_function
3 | from __future__ import division
4 | from __future__ import absolute_import
5 | from builtins import next
6 | from builtins import int
7 |
8 | import os
9 | import unittest
10 | import datetime
11 |
12 | from traildb import TrailDB, TrailDBConstructor, tdb_item_field, tdb_item_val
13 | from traildb import TrailDBError, TrailDBCursor, TrailDBMultiCursor
14 |
15 |
16 | class TestAPI(unittest.TestCase):
17 | def setUp(self):
18 | self.uuid = '12345678123456781234567812345678'
19 | cons = TrailDBConstructor('testtrail', ['field1', 'field2'])
20 | cons.add(self.uuid, 1, ['a', '1'])
21 | cons.add(self.uuid, 2, ['b', '2'])
22 | cons.add(self.uuid, 3, ['c', '3'])
23 | cons.finalize()
24 |
25 | def tearDown(self):
26 | os.unlink('testtrail.tdb')
27 |
28 | def test_trails(self):
29 | db = TrailDB('testtrail')
30 | self.assertEqual(1, db.num_trails)
31 |
32 | trail = db.trail(0)
33 | self.assertIsInstance(trail, TrailDBCursor)
34 |
35 | events = list(trail) # Force evaluation of generator
36 | self.assertEqual(3, len(events))
37 | for event in events:
38 | self.assertTrue(hasattr(event, 'time'))
39 | self.assertTrue(hasattr(event, 'field1'))
40 | self.assertTrue(hasattr(event, 'field2'))
41 |
42 | with self.assertRaises(AttributeError):
43 | event.missing_field
44 |
45 | def test_trails_selected_uuids(self):
46 | uuids = ["02345678123456781234567812345678",
47 | "12345678123456781234567812345678",
48 | "22345678123456781234567812345678",
49 | "32345678123456781234567812345678",
50 | "42345678123456781234567812345678"]
51 | cons = TrailDBConstructor('whitelist_testtrail', ['field1', 'field2'])
52 | for uuid in uuids:
53 | cons.add(uuid, 1, ['a', '1'])
54 | cons.add(uuid, 2, ['b', '2'])
55 | cons.add(uuid, 3, ['c', '3'])
56 | cons.finalize()
57 |
58 | tdb = TrailDB('whitelist_testtrail')
59 | whitelist = [uuids[0],
60 | uuids[3],
61 | uuids[4]]
62 |
63 | expected_length = 3
64 | for trail_uuid, trail_events in tdb.trails(selected_uuids=whitelist):
65 | trail_events = list(trail_events)
66 | self.assertEqual(len(trail_events),
67 | expected_length)
68 |
69 | def test_crumbs(self):
70 | db = TrailDB('testtrail.tdb')
71 |
72 | n = 0
73 | for uuid, trail in db.trails():
74 | n += 1
75 | self.assertEqual(self.uuid, uuid)
76 | self.assertIsInstance(trail, TrailDBCursor)
77 | self.assertEqual(3, len(list(trail)))
78 |
79 | self.assertEqual(1, n)
80 |
81 | def test_silly_open(self):
82 | self.assertTrue(os.path.exists('testtrail.tdb'))
83 | self.assertFalse(os.path.exists('testtrail'))
84 |
85 | db1 = TrailDB('testtrail.tdb')
86 | db2 = TrailDB('testtrail')
87 |
88 | with self.assertRaises(TrailDBError):
89 | TrailDB('foo.tdb')
90 |
91 | def test_fields(self):
92 | db = TrailDB('testtrail')
93 | self.assertEqual(['time', 'field1', 'field2'], db.fields)
94 |
95 | def test_uuids(self):
96 | db = TrailDB('testtrail')
97 | self.assertEqual(0, db.get_trail_id(self.uuid))
98 | self.assertEqual(self.uuid, db.get_uuid(0))
99 | self.assertTrue(self.uuid in db)
100 |
101 | def test_lexicons(self):
102 | db = TrailDB('testtrail')
103 |
104 | # First field
105 | self.assertEqual(4, db.lexicon_size(1))
106 | self.assertEqual(['a', 'b', 'c'], list(db.lexicon(1)))
107 |
108 | # Second field
109 | self.assertEqual(['1', '2', '3'], list(db.lexicon(2)))
110 |
111 | with self.assertRaises(TrailDBError):
112 | db.lexicon(3) # Out of bounds
113 |
114 | def test_metadata(self):
115 | db = TrailDB('testtrail.tdb')
116 | self.assertEqual(1, db.min_timestamp())
117 | self.assertEqual(3, db.max_timestamp())
118 | self.assertEqual((1, 3), db.time_range())
119 |
120 | self.assertEqual((1, 3), db.time_range(parsetime=False))
121 |
122 |
123 | def test_apply_whitelist(self):
124 | uuids = ["02345678123456781234567812345678",
125 | "12345678123456781234567812345678",
126 | "22345678123456781234567812345678",
127 | "32345678123456781234567812345678",
128 | "42345678123456781234567812345678"]
129 | cons = TrailDBConstructor('whitelist_testtrail', ['field1', 'field2'])
130 | for uuid in uuids:
131 | cons.add(uuid, 1, ['a', '1'])
132 | cons.add(uuid, 2, ['b', '2'])
133 | cons.add(uuid, 3, ['c', '3'])
134 | cons.finalize()
135 |
136 | tdb = TrailDB('whitelist_testtrail')
137 | whitelist = [uuids[0],
138 | uuids[3],
139 | uuids[4]]
140 | tdb.apply_whitelist(whitelist)
141 | found_trails = list(tdb.trails(parsetime=False))
142 |
143 | self.assertEqual(len(found_trails), len(uuids))
144 | for trail_uuid, trail_events in found_trails:
145 | if trail_uuid in whitelist:
146 | expected_length = 3
147 | else:
148 | expected_length = 0
149 |
150 | trail_events = list(trail_events)
151 | self.assertEqual(len(trail_events),
152 | expected_length)
153 |
154 | def test_apply_blacklist(self):
155 | uuids = ["02345678123456781234567812345678",
156 | "12345678123456781234567812345678",
157 | "22345678123456781234567812345678",
158 | "32345678123456781234567812345678",
159 | "42345678123456781234567812345678"]
160 | cons = TrailDBConstructor('blacklist_testtrail', ['field1', 'field2'])
161 | for uuid in uuids:
162 | cons.add(uuid, 1, ['a', '1'])
163 | cons.add(uuid, 2, ['b', '2'])
164 | cons.add(uuid, 3, ['c', '3'])
165 | cons.finalize()
166 |
167 | tdb = TrailDB('blacklist_testtrail')
168 | blacklist = [uuids[1],
169 | uuids[2]]
170 | tdb.apply_blacklist(blacklist)
171 | found_trails = list(tdb.trails(parsetime=False))
172 |
173 | for trail_uuid, trail_events in found_trails:
174 | if trail_uuid in blacklist:
175 | expected_length = 0
176 | else:
177 | expected_length = 3
178 |
179 | trail_events = list(trail_events)
180 | self.assertEqual(len(trail_events),
181 | expected_length)
182 |
183 |
184 | class TestFilter(unittest.TestCase):
185 |
186 | def setUp(self):
187 | uuid = '12345678123456781234567812345678'
188 | cons = TrailDBConstructor('testtrail', ['field1', 'field2', 'field3'])
189 | cons.add(uuid, 1, ['a', '1', 'x'])
190 | cons.add(uuid, 2, ['b', '2', 'x'])
191 | cons.add(uuid, 3, ['c', '3', 'y'])
192 | cons.add(uuid, 4, ['d', '4', 'x'])
193 | cons.add(uuid, 5, ['e', '5', 'x'])
194 | tdb = cons.finalize()
195 |
196 | def tearDown(self):
197 | os.unlink('testtrail.tdb')
198 |
199 | def test_simple_disjunction(self):
200 | tdb = TrailDB('testtrail')
201 | # test shorthand notation (not a list of lists)
202 | events = list(
203 | tdb.trail(0, event_filter=[('field1', 'a'), ('field2', '4')]))
204 | self.assertEqual(len(events), 2)
205 | self.assertEqual((events[0].field1, events[0].field2), ('a', '1'))
206 | self.assertEqual((events[1].field1, events[1].field2), ('d', '4'))
207 |
208 | def test_negation(self):
209 | tdb = TrailDB('testtrail')
210 | events = list(tdb.trail(0, event_filter=[('field3', 'x', True)]))
211 | self.assertEqual(len(events), 1)
212 | self.assertEqual((events[0].field1, events[0].field2,
213 | events[0].field3), ('c', '3', 'y'))
214 |
215 | def test_conjunction(self):
216 | tdb = TrailDB('testtrail')
217 | events = list(
218 | tdb.trail(0, event_filter=[[('field1', 'e'), ('field1', 'c')],
219 | [('field3', 'y', True)]]))
220 | self.assertEqual(len(events), 1)
221 | self.assertEqual((events[0].field1, events[0].field2), ('e', '5'))
222 |
223 | def test_time_range(self):
224 | tdb = TrailDB('testtrail')
225 | events = list(tdb.trail(0,
226 | event_filter=[[(2, 4)]],
227 | parsetime=False))
228 | self.assertEqual(len(events), 2)
229 | self.assertEqual(events[0].time, 2)
230 | self.assertEqual(events[1].time, 3)
231 |
232 | def test_filter_object(self):
233 | tdb = TrailDB('testtrail')
234 | obj = tdb.create_filter([[('field1', 'e'), ('field1', 'c')],
235 | [('field3', 'y', True)]])
236 | events = list(tdb.trail(0, event_filter=obj))
237 | self.assertEqual(len(events), 1)
238 | self.assertEqual((events[0].field1, events[0].field2), ('e', '5'))
239 | events = list(tdb.trail(0, event_filter=obj))
240 | self.assertEqual(len(events), 1)
241 | self.assertEqual((events[0].field1, events[0].field2), ('e', '5'))
242 |
243 |
244 | class TestCons(unittest.TestCase):
245 | def test_cursor(self):
246 | uuid = '12345678123456781234567812345678'
247 | cons = TrailDBConstructor('testtrail', ['field1', 'field2'])
248 | cons.add(uuid, 1, ['a', '1'])
249 | cons.add(uuid, 2, ['b', '2'])
250 | cons.add(uuid, 3, ['c', '3'])
251 | cons.add(uuid, 4, ['d', '4'])
252 | cons.add(uuid, 5, ['e', '5'])
253 | tdb = cons.finalize()
254 |
255 | with self.assertRaises(IndexError):
256 | tdb.get_trail_id('12345678123456781234567812345679')
257 |
258 | trail = tdb.trail(tdb.get_trail_id(uuid))
259 | with self.assertRaises(TypeError):
260 | len(trail)
261 |
262 | j = 1
263 | for event in trail:
264 | self.assertEqual(j, int(event.field2))
265 | self.assertEqual(j, int(event.time))
266 | j += 1
267 | self.assertEqual(6, j)
268 |
269 | # Iterator is empty now
270 | self.assertEqual([], list(trail))
271 |
272 | field1_values = [e.field1 for e in tdb.trail(tdb.get_trail_id(uuid))]
273 | self.assertEqual(['a', 'b', 'c', 'd', 'e'], field1_values)
274 |
275 | def test_cursor_parsetime(self):
276 | uuid = '12345678123456781234567812345678'
277 | cons = TrailDBConstructor('testtrail', ['field1'])
278 |
279 | events = [(datetime.datetime(2016, 1, 1, 1, 1), ['1']),
280 | (datetime.datetime(2016, 1, 1, 1, 2), ['2']),
281 | (datetime.datetime(2016, 1, 1, 1, 3), ['3'])]
282 | [cons.add(uuid, time, fields) for time, fields in events]
283 | tdb = cons.finalize()
284 |
285 | timestamps = [e.time for e in tdb.trail(0, parsetime=True)]
286 |
287 | self.assertIsInstance(timestamps[0], datetime.datetime)
288 | self.assertEqual([time for time, _ in events], timestamps)
289 | self.assertEqual(tdb.time_range(True), (events[0][0], events[-1][0]))
290 |
291 | def test_binarydata(self):
292 | binary = b'\x00\x01\x02\x00\xff\x00\xff'
293 | uuid = '12345678123456781234567812345678'
294 | cons = TrailDBConstructor('testtrail', ['field1'])
295 | cons.add(uuid, 123, [binary])
296 | tdb = cons.finalize(decode=False)
297 | self.assertEqual(list(tdb[0])[0].field1, binary)
298 |
299 | def test_cons(self):
300 | uuid = '12345678123456781234567812345678'
301 | cons = TrailDBConstructor('testtrail', ['field1', 'field2'])
302 | cons.add(uuid, 123, ['a'])
303 | cons.add(uuid, 124, ['b', 'c'])
304 | tdb = cons.finalize()
305 |
306 | self.assertEqual(0, tdb.get_trail_id(uuid))
307 | self.assertEqual(uuid, tdb.get_uuid(0))
308 | self.assertEqual(1, tdb.num_trails)
309 | self.assertEqual(2, tdb.num_events)
310 | self.assertEqual(3, tdb.num_fields)
311 |
312 | crumbs = list(tdb.trails())
313 | self.assertEqual(1, len(crumbs))
314 | self.assertEqual(uuid, crumbs[0][0])
315 | self.assertTrue(tdb[uuid])
316 | self.assertTrue(uuid in tdb)
317 | self.assertFalse('00000000000000000000000000000000' in tdb)
318 | with self.assertRaises(IndexError):
319 | tdb['00000000000000000000000000000000']
320 |
321 | trail = list(crumbs[0][1])
322 |
323 | self.assertEqual(123, trail[0].time)
324 | self.assertEqual('a', trail[0].field1)
325 | self.assertEqual('', trail[0].field2) # TODO: Should this be None?
326 |
327 | self.assertEqual(124, trail[1].time)
328 | self.assertEqual('b', trail[1].field1)
329 | self.assertEqual('c', trail[1].field2)
330 |
331 | def test_items(self):
332 | uuid = '12345678123456781234567812345678'
333 | cons = TrailDBConstructor('testtrail', ['field1', 'field2'])
334 | cons.add(uuid, 123, ['a', 'x' * 2048])
335 | cons.add(uuid, 124, ['b', 'y' * 2048])
336 | tdb = cons.finalize()
337 |
338 | cursor = tdb.trail(0, rawitems=True)
339 | event = next(cursor)
340 | self.assertEqual(tdb.get_item_value(event.field1), 'a')
341 | self.assertEqual(tdb.get_item_value(event.field2), 'x' * 2048)
342 | self.assertEqual(tdb.get_item('field1', 'a'), event.field1)
343 | self.assertEqual(tdb.get_item('field2', 'x' * 2048), event.field2)
344 | event = next(cursor)
345 | self.assertEqual(tdb.get_item_value(event.field1), 'b')
346 | self.assertEqual(tdb.get_item_value(event.field2), 'y' * 2048)
347 | self.assertEqual(tdb.get_item('field1', 'b'), event.field1)
348 | self.assertEqual(tdb.get_item('field2', 'y' * 2048), event.field2)
349 |
350 | cursor = tdb.trail(0, rawitems=True)
351 | event = next(cursor)
352 | field = tdb_item_field(event.field1)
353 | val = tdb_item_val(event.field1)
354 | self.assertEqual(tdb.get_value(field, val), 'a')
355 | field = tdb_item_field(event.field2)
356 | val = tdb_item_val(event.field2)
357 | self.assertEqual(tdb.get_value(field, val), 'x' * 2048)
358 | event = next(cursor)
359 | field = tdb_item_field(event.field1)
360 | val = tdb_item_val(event.field1)
361 | self.assertEqual(tdb.get_value(field, val), 'b')
362 | field = tdb_item_field(event.field2)
363 | val = tdb_item_val(event.field2)
364 | self.assertEqual(tdb.get_value(field, val), 'y' * 2048)
365 |
366 | def test_append(self):
367 | uuid = '12345678123456781234567812345678'
368 | cons = TrailDBConstructor('testtrail', ['field1'])
369 | cons.add(uuid, 123, ['foobarbaz'])
370 | tdb = cons.finalize()
371 |
372 | cons = TrailDBConstructor('testtrail2', ['field1'])
373 | cons.add(uuid, 124, ['barquuxmoo'])
374 | cons.append(tdb)
375 | tdb = cons.finalize()
376 |
377 | self.assertEqual(2, tdb.num_events)
378 | uuid, trail = list(tdb.trails())[0]
379 | trail = list(trail)
380 | self.assertEqual([123, 124], [e.time for e in trail])
381 | self.assertEqual(['foobarbaz', 'barquuxmoo'],
382 | [e.field1 for e in trail])
383 |
384 | def tearDown(self):
385 | try:
386 | os.unlink('testtrail.tdb')
387 | os.unlink('testtrail2.tdb')
388 | except:
389 | pass
390 |
391 |
392 | class TestMultiCursor(unittest.TestCase):
393 | def setUp(self):
394 | self.uuid1 = '12345678123456781234567812345678'
395 | self.uuid2 = '12345678123456781234567812345679'
396 |
397 | cons = TrailDBConstructor('testtrail1', ['field1', 'field2', 'field3'])
398 | cons.add(self.uuid1, 1, ['a', '1', 'x'])
399 | cons.add(self.uuid1, 2, ['b', '2', 'x'])
400 | cons.add(self.uuid2, 1, ['c', '3', 'y'])
401 | cons.add(self.uuid2, 2, ['d', '4', 'x'])
402 | cons.add(self.uuid2, 3, ['e', '5', 'x'])
403 | self.tdb1 = cons.finalize()
404 |
405 | cons = TrailDBConstructor('testtrail2', ['field1', 'field2', 'field3', 'field4'])
406 | cons.add(self.uuid2, 4, ['a', '1', 'x', 'l'])
407 | cons.add(self.uuid2, 5, ['b', '2', 'x', 'm'])
408 | cons.add(self.uuid1, 3, ['c', '3', 'y', 'n'])
409 | cons.add(self.uuid1, 4, ['d', '4', 'x', 'o'])
410 | cons.add(self.uuid1, 5, ['e', '5', 'x', 'p'])
411 | self.tdb2 = cons.finalize()
412 |
413 | def test_multicursor(self):
414 | c1 = self.tdb1.trail(self.tdb1.get_trail_id(self.uuid1))
415 | c2 = self.tdb2.trail(self.tdb2.get_trail_id(self.uuid1))
416 | mc = TrailDBMultiCursor(False, False, False)
417 |
418 | # not initialized, raise error
419 | with self.assertRaises(TrailDBError):
420 | next(mc)
421 | mc.set_cursors([c1, c2], [self.tdb1, self.tdb2])
422 |
423 | # exhaust the iterator
424 | events = list(mc)
425 |
426 | self.assertEqual(len(events), 5)
427 | self.assertEqual(events[0][0].time, 1)
428 | self.assertEqual(events[0][0].field1, 'a')
429 | self.assertEqual(events[0][0].field2, '1')
430 | self.assertEqual(events[0][0].field3, 'x')
431 | self.assertEqual(events[1][0].time, 2)
432 | self.assertEqual(events[1][0].field1, 'b')
433 | self.assertEqual(events[1][0].field2, '2')
434 | self.assertEqual(events[1][0].field3, 'x')
435 | # this one is from the 2nd tdb, has an additional field
436 | self.assertEqual(events[2][0].time, 3)
437 | self.assertEqual(events[2][0].field1, 'c')
438 | self.assertEqual(events[2][0].field2, '3')
439 | self.assertEqual(events[2][0].field3, 'y')
440 | self.assertEqual(events[2][0].field4, 'n')
441 |
442 | def test_multicursor_reuse(self):
443 | c1 = self.tdb1.trail(self.tdb1.get_trail_id(self.uuid1))
444 | c2 = self.tdb2.trail(self.tdb2.get_trail_id(self.uuid1))
445 | mc = TrailDBMultiCursor(False, False, False)
446 | mc.set_cursors([c1, c2], [self.tdb1, self.tdb2])
447 | # exhaust the iterator
448 | list(mc)
449 |
450 | # change the cursors
451 | c1.get_trail(self.tdb1.get_trail_id(self.uuid2))
452 | c2.get_trail(self.tdb2.get_trail_id(self.uuid2))
453 |
454 | # reset the multicursor
455 | mc.reset()
456 | events = list(mc)
457 |
458 | self.assertEqual(len(events), 5)
459 | self.assertEqual(events[0][0].time, 1)
460 | self.assertEqual(events[0][0].field1, 'c')
461 | self.assertEqual(events[0][0].field2, '3')
462 | self.assertEqual(events[0][0].field3, 'y')
463 | self.assertEqual(events[3][0].time, 4)
464 | self.assertEqual(events[3][0].field1, 'a')
465 | self.assertEqual(events[3][0].field2, '1')
466 | self.assertEqual(events[3][0].field3, 'x')
467 | self.assertEqual(events[3][0].field4, 'l')
468 |
469 | def test_multicursor_raw_items_parsetime(self):
470 | c1 = self.tdb1.trail(self.tdb1.get_trail_id(self.uuid1))
471 | c2 = self.tdb2.trail(self.tdb2.get_trail_id(self.uuid1))
472 | mc = TrailDBMultiCursor(True, True, False)
473 | mc.set_cursors([c1, c2], [self.tdb1, self.tdb2])
474 | # exhaust the iterator
475 | events = list(mc)
476 |
477 | # just make sure the length is right and we didn't have any errors
478 | self.assertEqual(len(events), 5)
479 |
480 | def tearDown(self):
481 | try:
482 | os.unlink('testtrail1.tdb')
483 | os.unlink('testtrail2.tdb')
484 | except:
485 | pass
486 |
487 |
488 | if __name__ == '__main__':
489 | unittest.main()
490 |
--------------------------------------------------------------------------------
/traildb/__init__.py:
--------------------------------------------------------------------------------
1 | from .traildb import TrailDBError
2 | from .traildb import TrailDBConstructor
3 | from .traildb import TrailDB
4 | from .traildb import TrailDBCursor
5 | from .traildb import TrailDBMultiCursor
6 | from .traildb import TrailDBEventFilter
7 | from .traildb import tdb_item_field
8 | from .traildb import tdb_item_val
9 |
--------------------------------------------------------------------------------
/traildb/__main__.py:
--------------------------------------------------------------------------------
1 | from __future__ import unicode_literals
2 | from __future__ import print_function
3 | import sys
4 |
5 | import traildb
6 |
7 | for cookie, trail in traildb.TrailDB(*(sys.argv[1:] or ['a.tdb'])).crumbs():
8 | print(cookie, trail)
9 |
--------------------------------------------------------------------------------
/traildb/traildb.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | from __future__ import unicode_literals
3 | from __future__ import print_function
4 | from __future__ import division
5 | from __future__ import absolute_import
6 | from builtins import int
7 | from builtins import range
8 | from past.builtins import basestring
9 | from builtins import object
10 | from ctypes import c_char, c_char_p, c_ubyte, c_int, c_void_p
11 | from ctypes import c_uint, c_uint32, c_uint64
12 | from ctypes import Structure, Union
13 | from ctypes import CDLL, POINTER, pointer
14 | from ctypes import string_at, addressof
15 | from datetime import datetime
16 |
17 | import os
18 | import sys
19 | import time
20 | import codecs
21 |
22 | CODEC = 'utf8'
23 |
24 | HEX = 'hex'
25 |
26 | try:
27 | codecs.decode('A0', 'hex')
28 | except LookupError:
29 | HEX = 'hex_codec'
30 |
31 | if os.name == "posix" and sys.platform == "darwin":
32 | try:
33 | lib = CDLL('libtraildb.dylib')
34 | except:
35 | # is there a better way to figure out the path?
36 | lib = CDLL('/usr/local/lib/libtraildb.dylib')
37 | elif os.name == "posix" and "linux" in sys.platform:
38 | lib = CDLL('libtraildb.so')
39 |
40 |
41 | def api(fun, args, res=None):
42 | fun.argtypes = args
43 | fun.restype = res
44 |
45 | tdb = c_void_p
46 | tdb_cons = c_void_p
47 | tdb_field = c_uint32
48 | tdb_val = c_uint64
49 | tdb_item = c_uint64
50 | tdb_cursor = c_void_p
51 | tdb_error = c_int
52 | tdb_event_filter = c_void_p
53 | tdb_multi_cursor = c_void_p
54 |
55 |
56 | class tdb_event(Structure):
57 | _fields_ = [("timestamp", c_uint64),
58 | ("num_items", c_uint64),
59 | ("items", POINTER(tdb_item))]
60 |
61 | class tdb_multi_event(Structure):
62 | _fields_ = [("db", tdb),
63 | ("tdb_event", POINTER(tdb_event)),
64 | ("cursor_idx", c_uint64)]
65 |
66 | class tdb_opt_value(Union):
67 | _fields_ = [("ptr", c_void_p),
68 | ("value", c_uint64)]
69 |
70 | TDB_OPT_EVENT_FILTER = 101
71 |
72 |
73 | api(lib.tdb_cons_init, [], tdb_cons)
74 | api(lib.tdb_cons_open,
75 | [tdb_cons, c_char_p, POINTER(c_char_p), c_uint64], tdb_error)
76 | api(lib.tdb_cons_close, [tdb_cons])
77 | api(lib.tdb_cons_add,
78 | [tdb_cons, POINTER(c_ubyte), c_uint64,
79 | POINTER(c_char_p), POINTER(c_uint64)],
80 | tdb_error)
81 | api(lib.tdb_cons_append, [tdb_cons, tdb], tdb_error)
82 | api(lib.tdb_cons_finalize, [tdb_cons], tdb_error)
83 |
84 | api(lib.tdb_init, [], tdb)
85 | api(lib.tdb_open, [tdb, c_char_p], tdb_error)
86 | api(lib.tdb_close, [tdb])
87 |
88 | api(lib.tdb_lexicon_size, [tdb, tdb_field], tdb_error)
89 |
90 | api(lib.tdb_get_field, [tdb, c_char_p], tdb_error)
91 | api(lib.tdb_get_field_name, [tdb, tdb_field], c_char_p)
92 |
93 | api(lib.tdb_get_item, [tdb, tdb_field, POINTER(c_char), c_uint64], tdb_item)
94 | api(lib.tdb_get_value,
95 | [tdb, tdb_field, tdb_val, POINTER(c_uint64)], POINTER(c_char))
96 | api(lib.tdb_get_item_value,
97 | [tdb, tdb_item, POINTER(c_uint64)], POINTER(c_char))
98 |
99 | api(lib.tdb_get_uuid, [tdb, c_uint64], POINTER(c_ubyte))
100 | api(lib.tdb_get_trail_id,
101 | [tdb, POINTER(c_ubyte), POINTER(c_uint64)], tdb_error)
102 |
103 | api(lib.tdb_error_str, [tdb_error], c_char_p)
104 |
105 | api(lib.tdb_num_trails, [tdb], c_uint64)
106 | api(lib.tdb_num_events, [tdb], c_uint64)
107 | api(lib.tdb_num_fields, [tdb], c_uint64)
108 | api(lib.tdb_min_timestamp, [tdb], c_uint64)
109 | api(lib.tdb_max_timestamp, [tdb], c_uint64)
110 |
111 | api(lib.tdb_version, [tdb], c_uint64)
112 |
113 | api(lib.tdb_cursor_new, [tdb], tdb_cursor)
114 | api(lib.tdb_cursor_free, [tdb])
115 | api(lib.tdb_cursor_next, [tdb_cursor], POINTER(tdb_event))
116 | api(lib.tdb_get_trail, [tdb_cursor, c_uint64], tdb_error)
117 | api(lib.tdb_get_trail_length, [tdb_cursor], c_uint64)
118 | api(lib.tdb_cursor_set_event_filter, [tdb_cursor, tdb_event_filter], tdb_error)
119 |
120 | api(lib.tdb_multi_cursor_new, [POINTER(tdb_cursor), c_uint64], tdb_multi_cursor)
121 | api(lib.tdb_multi_cursor_free, [tdb_multi_cursor])
122 | api(lib.tdb_multi_cursor_reset, [tdb_multi_cursor])
123 | api(lib.tdb_multi_cursor_next, [tdb_multi_cursor], POINTER(tdb_multi_event))
124 | api(lib.tdb_multi_cursor_next_batch, [tdb_multi_cursor, POINTER(tdb_multi_event), c_uint64])
125 |
126 | api(lib.tdb_event_filter_new, [], tdb_event_filter)
127 | api(lib.tdb_event_filter_add_term, [tdb_event_filter, tdb_item, c_int], tdb_error)
128 | api(lib.tdb_event_filter_add_time_range, [c_uint64, c_uint64], tdb_error)
129 | api(lib.tdb_event_filter_new_clause, [tdb_event_filter], tdb_error)
130 | api(lib.tdb_event_filter_new_match_none, [], tdb_event_filter)
131 | api(lib.tdb_event_filter_new_match_all, [], tdb_event_filter)
132 | api(lib.tdb_event_filter_free, [tdb_event_filter])
133 |
134 | api(lib.tdb_set_opt, [tdb, c_uint, tdb_opt_value], tdb_error)
135 | api(lib.tdb_set_trail_opt, [tdb, c_uint64, c_uint, tdb_opt_value], tdb_error)
136 |
137 |
138 | def uuid_hex(uuid):
139 | """
140 | :returns: Given a binary UUID, encodes it into hex.
141 | """
142 | if isinstance(uuid, basestring):
143 | return uuid
144 | return codecs.encode(string_at(uuid, 16), HEX).decode(CODEC)
145 |
146 |
147 | def uuid_raw(uuid):
148 | """
149 | :returns: Given a hex UUID, encodes it into binary.
150 | """
151 | if isinstance(uuid, basestring):
152 | return (c_ubyte * 16).from_buffer_copy(codecs.decode(uuid, HEX))
153 | return uuid
154 |
155 |
156 | def nullterm(strs, size):
157 | return '\x00'.join(strs) + (size - len(strs) + 1) * '\x00'
158 |
159 |
160 | # Port of tdb_item_field and tdb_item_val in tdb_types.h. Cannot use
161 | # them directly as they are inlined functions.
162 |
163 | def tdb_item_is32(item):
164 | return not (item & 128)
165 |
166 |
167 | def tdb_item_field32(item):
168 | return item & 127
169 |
170 |
171 | def tdb_item_val32(item):
172 | return (item >> 8) & 4294967295 # UINT32_MAX
173 |
174 |
175 | def tdb_item_field(item):
176 | """Return field-part of an item."""
177 | if tdb_item_is32(item):
178 | return tdb_item_field32(item)
179 | else:
180 | return (item & 127) | (((item >> 8) & 127) << 7)
181 |
182 |
183 | def tdb_item_val(item):
184 | """Return value-part of an item."""
185 | if tdb_item_is32(item):
186 | return tdb_item_val32(item)
187 | else:
188 | return item >> 16
189 |
190 |
191 | class TrailDBError(Exception):
192 | """This is the exception thrown when something fails with TrailDB."""
193 | pass
194 |
195 |
196 | class TrailDBConstructor(object):
197 | """Objects of this class are used to Construct new TrailDBs."""
198 |
199 | def __init__(self, path, ofields=()):
200 | """Initialize a new TrailDB constructor.
201 |
202 | :param path: TrailDB output path (without .tdb).
203 | :param ofields: List of field (names) in this TrailDB.
204 |
205 | .. code-block:: python
206 |
207 | import traildb
208 | tdbcons = traildb.TrailDBConstructor('example', ['type', 'flavor'])
209 | c.add('00000000000000000000000000000001', 123, ['click', 'tasty'])
210 | c.add('00000000000000000000000000000002', 129, ['flash', 'sour'])
211 | c.finalize() # Don't forget to finalize, otherwise you won't get a full TrailDB.
212 | """
213 | if not path:
214 | raise TrailDBError("Path is required")
215 | n = len(ofields)
216 |
217 | if isinstance(path, str):
218 | path = path.encode(CODEC)
219 |
220 | ofield_names = (c_char_p * n)(*[name.encode(CODEC)
221 | for name in ofields])
222 |
223 | self._cons = lib.tdb_cons_init()
224 | if lib.tdb_cons_open(self._cons, path, ofield_names, n) != 0:
225 | raise TrailDBError("Cannot open constructor")
226 |
227 | self.path = path
228 | self.ofields = ofields
229 |
230 | def __del__(self):
231 | if hasattr(self, '_cons'):
232 | lib.tdb_cons_close(self._cons)
233 |
234 | def add(self, uuid, tstamp, values):
235 | """Add an event in TrailDB.
236 |
237 | :param uuid: UUID of this event.
238 | :param tstamp: Timestamp of this event (datetime or integer).
239 | :param values: value of each field.
240 |
241 | .. code-block:: python
242 |
243 | cons.add('00000000000000000000000000000001', 123, ['click', 'tasty'])
244 | """
245 | if isinstance(tstamp, datetime):
246 | tstamp = int(time.mktime(tstamp.timetuple()))
247 | n = len(self.ofields)
248 | values = [v.encode(CODEC) if not isinstance(v, bytes)
249 | else v for v in values]
250 | value_array = (c_char_p * n)(*values)
251 | value_lengths = (c_uint64 * n)(*[len(v) for v in values])
252 | f = lib.tdb_cons_add(self._cons, uuid_raw(uuid), tstamp, value_array,
253 | value_lengths)
254 | if f:
255 | raise TrailDBError("Too many values: %s" % values[f])
256 |
257 | def append(self, db):
258 | """Merge an existing TrailDB in this TrailDB.
259 |
260 | :param db: An instance of :py:class:`~traildb.TrailDB` you want to merge to this one.
261 | """
262 | f = lib.tdb_cons_append(self._cons, db._db)
263 | if f < 0:
264 | raise TrailDBError("Wrong number of fields: %d" % db.num_fields)
265 | if f > 0:
266 | raise TrailDBError("Too many values: %s" % db.num_fields)
267 |
268 | def finalize(self, decode=True):
269 | """Finalize this TrailDB. You cannot add new events in this TrailDB
270 | after calling this function.
271 |
272 | You need to finalize :py:class:`~traildb.TrailDBConstructor` or you
273 | will not have an openable TrailDB later. Finalization is where all the
274 | compression and preparation happen and is typically the most
275 | resource-intensive part of TrailDB building.
276 |
277 | :returns: Opened :py:class:`~traildb.TrailDB`:
278 | """
279 | r = lib.tdb_cons_finalize(self._cons)
280 | if r:
281 | raise TrailDBError("Could not finalize (%d)" % r)
282 | return TrailDB(self.path, decode)
283 |
284 |
285 | class TrailDBCursor(object):
286 | """TrailDBCursor iterates over events of a trail.
287 |
288 | Typically this class is not instantiated directly but it is
289 | returned by TrailDB.trail() or TrailDB.cursor()
290 |
291 | A cursor can be reused for different trails by calling
292 | TrailDBCursor.get_trail(trail_id)
293 |
294 | """
295 |
296 | def __init__(self,
297 | cursor,
298 | cls,
299 | valuefun,
300 | parsetime,
301 | only_timestamp,
302 | event_filter_obj):
303 | self.cursor = cursor
304 | self.valuefun = valuefun
305 | self.parsetime = parsetime
306 | self.cls = cls
307 | self.only_timestamp = only_timestamp
308 | if event_filter_obj:
309 | self.event_filter_obj = event_filter_obj
310 | if lib.tdb_cursor_set_event_filter(cursor, event_filter_obj.flt):
311 | raise TrailDBError("cursor_set_event_filter failed")
312 | else:
313 | self.event_filter_obj = None
314 |
315 | def __del__(self):
316 | if self.cursor:
317 | lib.tdb_cursor_free(self.cursor)
318 |
319 | def __iter__(self):
320 | return self
321 |
322 | def __next__(self):
323 | """Return the next event in the trail."""
324 | event = lib.tdb_cursor_next(self.cursor)
325 | if not event:
326 | raise StopIteration()
327 |
328 | address = addressof(event.contents.items)
329 | items = (tdb_item * event.contents.num_items).from_address(address)
330 |
331 | timestamp = event.contents.timestamp
332 | if self.parsetime:
333 | timestamp = datetime.fromtimestamp(event.contents.timestamp)
334 |
335 | if self.only_timestamp:
336 | return timestamp
337 | elif self.valuefun:
338 | return self.cls(False, timestamp, *items)
339 | else:
340 | return self.cls(True, timestamp, *items)
341 |
342 | def get_trail(self, trail_id):
343 | if lib.tdb_get_trail(self.cursor, trail_id) != 0:
344 | raise TrailDBError("Failed to initalize trail in cursor")
345 |
346 | if self.event_filter_obj:
347 | if lib.tdb_cursor_set_event_filter(self.cursor, self.event_filter_obj.flt):
348 | raise TrailDBError("cursor_set_event_filter failed")
349 |
350 |
351 | class TrailDBMultiCursor(object):
352 | """
353 | TrailDBMultiCursor iterates over the events of multiple trails,
354 | merged together into a single trail with events sorted in the ascending
355 | time order. The trails can be from different traildbs.
356 |
357 | To use, initialize and then set the cursors using the set_cursors method.
358 | To reuse a multicursor, set new trails on the underlying cursors and then
359 | call :py:meth:`~traildb.TrailDBMultiCursor.reset()`. If filtering, apply event filters to the underlying
360 | cursors individually before setting them on the multicursor, or call reset after doing so
361 | if already set.
362 | """
363 |
364 | def __init__(self, parsetime, rawitems, only_timestamp):
365 | """
366 | :param parsetime: If True, returns datetime objects instead of integer timestamps.
367 | :param rawitems: Return raw integer items instead of stringified values. Using raw items is usually a bit more efficient than using string values.
368 | :param only_timestamp: If True, only return timestamps, not event objects.
369 | """
370 | self.parsetime = parsetime
371 | self.rawitems = rawitems
372 | self.only_timestamp = only_timestamp
373 | self.multicursor = None
374 | self._ready = False
375 |
376 | def __del__(self):
377 | if self.multicursor:
378 | lib.tdb_multi_cursor_free(self.multicursor)
379 |
380 | def __iter__(self):
381 | return self
382 |
383 | def __next__(self):
384 | """
385 | return the next event in the combined trails, in ascending timestamp order
386 |
387 | this will return tuples in the form of `(event, traildb)`, where the traildb
388 | is the :py:class:`~traildb.TrailDB` the event belongs to. This can be used to
389 | get the values if rawitems is used.
390 | """
391 | if not self._ready:
392 | raise TrailDBError("Multicursor not initialized, call set_cursors")
393 |
394 | multi_event = lib.tdb_multi_cursor_next(self.multicursor)
395 | if multi_event:
396 | event = self.to_event(multi_event.contents)
397 | else:
398 | raise StopIteration()
399 |
400 | return event
401 |
402 | def to_event(self, multi_event):
403 | event = multi_event.tdb_event
404 | tdb_ptr = multi_event.db
405 |
406 | timestamp = event.contents.timestamp
407 | if self.parsetime:
408 | timestamp = datetime.fromtimestamp(event.contents.timestamp)
409 |
410 | if self.only_timestamp:
411 | return timestamp
412 |
413 | try:
414 | traildb = self._traildbs[tdb_ptr]
415 | except KeyError:
416 | raise TrailDBError("TrailDBMultiCursor encountered a traildb that was not included in set_cursors")
417 |
418 | address = addressof(event.contents.items)
419 | items = (tdb_item * event.contents.num_items).from_address(address)
420 |
421 | if self.rawitems:
422 | return traildb._event_cls(True, timestamp, *items), traildb
423 | else:
424 | return traildb._event_cls(False, timestamp, *items), traildb
425 |
426 | def set_cursors(self, cursors, traildbs):
427 | """
428 | configure this multicursor to merge the specified cursors. This is required before use.
429 |
430 | :param cursors: list of :py:class:`~traildb.TrailDBCursor` instances to merge
431 | :param traildbs: list of :py:class:`~traildb.TrailDB` instances from which the cursors were created (only needs to be specified once, even if there are multiple cursors from the same TrailDB)
432 | """
433 |
434 | n_cursors = len(cursors)
435 | cursor_array = (tdb_cursor * n_cursors)(*[c.cursor for c in cursors])
436 |
437 | # maintain references to these in python so they wont get garbage collected
438 | self._cursor_arr = cursor_array
439 | self.cursors = cursors
440 |
441 | self.multicursor = lib.tdb_multi_cursor_new(cursor_array, n_cursors)
442 | if self.multicursor is None:
443 | raise TrailDBError("Failed to allocate memory for multicursor")
444 | self.reset()
445 |
446 | # mapping of the traildb pointer to the TrailDB object
447 | # we need this to get the configured traildb in python since we get a pointer to the tdb from the multi event
448 | self._traildbs = {tdb._db: tdb for tdb in traildbs}
449 |
450 | self._ready = True
451 |
452 | def reset(self):
453 | """
454 | reset the state of the multicursor to sync with the underlying cursors.
455 | Used when resuing cursors. Also resets the state of the python object,
456 | including any batched results.
457 | """
458 |
459 | if self.multicursor:
460 | lib.tdb_multi_cursor_reset(self.multicursor)
461 | self._batch_idx = 0
462 | self._current_batch_size = 0
463 |
464 |
465 | def mk_event_class(fields, valuefun):
466 | field_to_index = {f: i for i, f in enumerate(fields)}
467 |
468 | class TrailDBEvent(object):
469 | __slots__ = ('items', 'rawitems', 'memoized')
470 |
471 | def __repr__(self):
472 | return ''.format(self.to_list())
473 |
474 | def __str__(self):
475 | return self.__repr__()
476 |
477 | def __init__(self, rawitems, *items):
478 | self.items = tuple(items)
479 | self.rawitems = rawitems
480 | self.memoized = {}
481 |
482 | def _fields(self):
483 | return fields
484 |
485 | def __eq__(self, other):
486 | fields_checked = set()
487 |
488 | # Are the field contents same?
489 | for f in fields:
490 | try:
491 | if self.__getattr__(f) != other.__getattr__(f):
492 | return False
493 | fields_checked.add(f)
494 | except AttributeError:
495 | return False
496 |
497 | for f in other._fields():
498 | if f not in fields_checked:
499 | return False
500 |
501 | # So field contents and number of them are the
502 | # same. But field *names* have not been checked
503 | # yet.
504 |
505 | other_fields = other._fields()
506 | for i, f in enumerate(fields):
507 | if f != other_fields[i]:
508 | return False
509 |
510 | return True
511 |
512 | def __hash__(self):
513 | return hash(tuple(self.to_list()))
514 |
515 | def to_list(self):
516 | lst = []
517 | for f in fields:
518 | lst.append( (f, self.__getattr__(f)) )
519 | return lst
520 |
521 | def __getattr__(self, name):
522 | if name in self.memoized:
523 | return self.memoized[name]
524 |
525 | if name not in field_to_index:
526 | raise AttributeError
527 |
528 | item = self.items[field_to_index[name]]
529 | if self.rawitems:
530 | return item
531 | else:
532 | if name == 'time':
533 | return item
534 | else:
535 | self.memoized[name] = valuefun(item)
536 | return self.memoized[name]
537 |
538 | return TrailDBEvent
539 |
540 |
541 | class TrailDB(object):
542 | """Objects of this class represent an opened TrailDB.
543 |
544 | Simply pass the filename to the constructor (with or without extension) as below.
545 |
546 | .. code-block:: python
547 |
548 | import traildb
549 | tdb = traildb.TrailDB('blah.tdb')
550 | """
551 |
552 | def __init__(self, path, decode=True):
553 | """Open a TrailDB at path."""
554 | if isinstance(path, str):
555 | path = path.encode(CODEC)
556 |
557 | self._db = db = lib.tdb_init()
558 | res = lib.tdb_open(self._db, path)
559 | if res != 0:
560 | raise TrailDBError("Could not open %s, error code %d" %
561 | (path, res))
562 |
563 | self.num_trails = lib.tdb_num_trails(db)
564 | self.num_events = lib.tdb_num_events(db)
565 | self.num_fields = lib.tdb_num_fields(db)
566 | self.fields = [lib.tdb_get_field_name(db, i).decode(CODEC)
567 | for i in range(self.num_fields)]
568 | self._event_cls = mk_event_class(self.fields, self.get_item_value)
569 | self._uint64_ptr = pointer(c_uint64())
570 | self.decode = decode
571 |
572 | def __del__(self):
573 | if hasattr(self, '_db'):
574 | lib.tdb_close(self._db)
575 |
576 | def __contains__(self, uuidish):
577 | """:returns: True if UUID or Trail ID exists in this TrailDB."""
578 | try:
579 | self[uuidish]
580 | return True
581 | except IndexError:
582 | return False
583 |
584 | def __getitem__(self, uuidish):
585 | """:returns: a cursor for the given UUID or Trail ID."""
586 | if isinstance(uuidish, basestring):
587 | return self.trail(self.get_trail_id(uuidish))
588 | return self.trail(uuidish)
589 |
590 | def __len__(self):
591 | """:returns: The number of trails in the TrailDB."""
592 | return self.num_trails
593 |
594 | def trails(self, selected_uuids=None, reuse_cursors=False, **kwds):
595 | """
596 | Iterate over all trails in this TrailDB.
597 |
598 | :param selected_uuids: If passed, only go through the UUIDs passed in
599 | this argument. It should be an iterable that yields hex UUIDs.
600 |
601 | :param reuse_cursors: If `False`, trails() creates a new cursor
602 | for every single trail it iterates over. You can change this
603 | behavior by setting ``reuse_cursors=True``. Now, the same underlying
604 | cursor object will be reused for all trails yielded from this
605 | function. This is a major performance improvement but it means
606 | you cannot save the iterators from trails() and iterate over them
607 | later; you must consume them immediately before you go to next item
608 | from trails().
609 |
610 | :returns: Yields ``(uuid, events)`` pairs.
611 |
612 | Any other keyword arguments are passed to :py:meth:`~TrailDB.cursor()`.
613 |
614 | .. code-block:: python
615 |
616 | # Prints all UUIDs in a TrailDB
617 | import traildb
618 | tdb = traildb.TrailDB('blah')
619 | for uuid, events in tdb.trails():
620 | print(uuid)
621 |
622 | """
623 | if reuse_cursors:
624 | cursor = self.cursor(**kwds)
625 |
626 | if selected_uuids is not None:
627 | for uuid in selected_uuids:
628 | try:
629 | i = self.get_trail_id(uuid)
630 | except IndexError:
631 | continue
632 |
633 | if not reuse_cursors:
634 | cursor = self.cursor(**kwds)
635 |
636 | cursor.get_trail(i)
637 | yield uuid, cursor
638 | else:
639 | for i in range(len(self)):
640 | if not reuse_cursors:
641 | cursor = self.cursor(**kwds)
642 |
643 | cursor.get_trail(i)
644 | yield self.get_uuid(i), cursor
645 |
646 | def trail(self,
647 | trail_id,
648 | parsetime=False,
649 | rawitems=False,
650 | only_timestamp=False,
651 | event_filter=None):
652 | """Return a cursor over a single trail.
653 |
654 | :param trail_id: Trail ID to use.
655 | :param parsetime: If True, returns datetime objects instead of integer timestamps.
656 | :param rawitems: Return raw integer items instead of stringified values. Using raw items is usually a bit more efficient than using string values.
657 | :param only_timestamp: If True, only return timestamps, not event objects.
658 | :param event_filter: Apply given event filter to the cursor.
659 | :returns: A :py:class:`~traildb.TrailDBCursor` to given Trail ID.
660 |
661 | This function can throw :py:class:`~traildb.TrailDBError` if Trail ID is not
662 | present in the TrailDB.
663 | """
664 | cursor = lib.tdb_cursor_new(self._db)
665 | if lib.tdb_get_trail(cursor, trail_id) != 0:
666 | raise TrailDBError("Failed to create cursor")
667 |
668 | if isinstance(event_filter, TrailDBEventFilter):
669 | event_filter_obj = event_filter
670 | elif event_filter:
671 | event_filter_obj = self.create_filter(event_filter)
672 | else:
673 | event_filter_obj = None
674 |
675 | valuefun = None if rawitems else self.get_item_value
676 | return TrailDBCursor(cursor,
677 | self._event_cls,
678 | valuefun,
679 | parsetime,
680 | only_timestamp,
681 | event_filter_obj)
682 |
683 | def cursor(self, *args, **kwargs):
684 | """Alias for :py:meth:`~traildb.TrailDB.trail` with ``trail_id=0``"""
685 | return self.trail(0, *args, **kwargs)
686 |
687 | def field(self, fieldish):
688 | """:returns: a field ID given a field name.
689 |
690 | .. code-block:: python
691 |
692 | import traildb
693 | tdb = traildb.TrailDB('blah.tdb')
694 | print(tdb.field('type'))
695 | """
696 | if isinstance(fieldish, basestring):
697 | return self.fields.index(fieldish)
698 | return fieldish
699 |
700 | def lexicon(self, fieldish):
701 | """:returns: an iterator over values of the given field ID or field name."""
702 | field = self.field(fieldish)
703 | return (self.get_value(field, i)
704 | for i in range(1, self.lexicon_size(field)))
705 |
706 | def lexicon_size(self, fieldish):
707 | """:returns: The number of distinct values in the given field ID or field name. (i.e. cardinality of a field in the TrailDB)"""
708 | field = self.field(fieldish)
709 | value = lib.tdb_lexicon_size(self._db, field)
710 | if value == 0:
711 | raise TrailDBError("Invalid field index")
712 | return value
713 |
714 | def get_item(self, fieldish, value):
715 | """:returns: The item corresponding to a field ID or a field name and a string value.
716 |
717 | .. code-block:: python
718 |
719 | import traildb
720 | tdb = traildb.TrailDB('blah.tdb')
721 | print(tdb.get_item('type', 'click'))
722 |
723 | """
724 | field = self.field(fieldish)
725 | item = lib.tdb_get_item(self._db,
726 | field,
727 | value.encode(CODEC),
728 | len(value))
729 | if not item:
730 | raise TrailDBError("No such value: '%s'" % value)
731 | return item
732 |
733 | def get_item_value(self, item):
734 | """:returns: The string value corresponding to an item.
735 |
736 | .. code-block:: python
737 |
738 | import traildb
739 | tdb = traildb.TrailDB('blah.tdb')
740 |
741 | # This should print 'click' (if TrailDB contains 'type' field and 'click' values in that field).
742 | print(tdb.get_item_value(tdb.get_item('type', 'click')))
743 |
744 | """
745 | value = lib.tdb_get_item_value(self._db, item, self._uint64_ptr)
746 | if value is None:
747 | raise TrailDBError("Error reading value, error: %s" %
748 | lib.tdb_error(self._db))
749 |
750 | if self.decode:
751 | return value[0:self._uint64_ptr.contents.value].decode(CODEC)
752 |
753 | return value[0:self._uint64_ptr.contents.value]
754 |
755 | def get_value(self, fieldish, val):
756 | """:returns: The string value corresponding to a field ID or a field name and a value ID."""
757 | field = self.field(fieldish)
758 | value = lib.tdb_get_value(self._db, field, val, self._uint64_ptr)
759 | if value is None:
760 | raise TrailDBError("Error reading value, error: %s" %
761 | lib.tdb_error(self._db))
762 |
763 | if self.decode:
764 | return value[0:self._uint64_ptr.contents.value].decode(CODEC)
765 |
766 | return value[0:self._uint64_ptr.contents.value]
767 |
768 | def get_uuid(self, trail_id, raw=False):
769 | """
770 | :param trail_id: The Trail ID to give UUID for.
771 | :param raw: If true, returns 16-byte binary string for UUID instead of hexified UUID.
772 | :returns: UUID given a Trail ID.
773 | """
774 | uuid = lib.tdb_get_uuid(self._db, trail_id)
775 | if uuid:
776 | if raw:
777 | return string_at(uuid, 16)
778 | else:
779 | return uuid_hex(uuid)
780 | raise IndexError("Trail ID out of range")
781 |
782 | def get_trail_id(self, uuid):
783 | """:returns: Trail ID given a UUID.
784 |
785 | This is the reverse of :py:meth:`traildb.TrailDB.get_uuid`.
786 | """
787 | ret = lib.tdb_get_trail_id(self._db, uuid_raw(uuid), self._uint64_ptr)
788 | if ret:
789 | raise IndexError("UUID '%s' not found" % uuid)
790 | return self._uint64_ptr.contents.value
791 |
792 | def time_range(self, parsetime=False):
793 | """:returns: The time range covered by this TrailDB.
794 |
795 | :param parsetime: If True, return time range as integers or datetime objects.
796 | """
797 | tmin = self.min_timestamp()
798 | tmax = self.max_timestamp()
799 | if parsetime:
800 | return datetime.fromtimestamp(tmin), datetime.fromtimestamp(tmax)
801 | return tmin, tmax
802 |
803 | def min_timestamp(self):
804 | """:returns: The minimum time stamp of this TrailDB."""
805 | return lib.tdb_min_timestamp(self._db)
806 |
807 | def max_timestamp(self):
808 | """:returns: The maximum time stamp of this TrailDB."""
809 | return lib.tdb_max_timestamp(self._db)
810 |
811 | def create_filter(self, event_filter):
812 | """:returns: :py:class:`~traildb.TrailDBEventFilter` object created from this TrailDB."""
813 | return TrailDBEventFilter(self, event_filter)
814 |
815 | def apply_whitelist(self, uuids):
816 | """
817 | Applies a whitelist of UUIDs to TrailDB so that further calls to
818 | :py:meth:`~traildb.TrailDB.trails` do not return any events for UUIDs that
819 | have not been whitelisted with this call.
820 | """
821 | empty_filter = lib.tdb_event_filter_new_match_none()
822 | all_filter = lib.tdb_event_filter_new_match_all()
823 | value = tdb_opt_value(ptr=empty_filter)
824 |
825 | lib.tdb_set_opt(self._db,
826 | TDB_OPT_EVENT_FILTER,
827 | value)
828 |
829 | value = tdb_opt_value(ptr=all_filter)
830 |
831 | for uuid in uuids:
832 | try:
833 | trail_id = self.get_trail_id(uuid)
834 | lib.tdb_set_trail_opt(self._db,
835 | trail_id,
836 | TDB_OPT_EVENT_FILTER,
837 | value)
838 | except IndexError:
839 | continue
840 |
841 | def apply_blacklist(self, uuids):
842 | """
843 | Applies a blacklist of UUIDs to TrailDB so that further calls to
844 | :py:meth:`~traildb.TrailDB.trails` do not return any events for the blacklisted UUIDs.
845 | """
846 | empty_filter = lib.tdb_event_filter_new_match_none()
847 | all_filter = lib.tdb_event_filter_new_match_all()
848 | value = tdb_opt_value(ptr=all_filter)
849 |
850 | lib.tdb_set_opt(self._db,
851 | TDB_OPT_EVENT_FILTER,
852 | value)
853 |
854 | value = tdb_opt_value(ptr=empty_filter)
855 | for uuid in uuids:
856 | try:
857 | trail_id = self.get_trail_id(uuid)
858 | lib.tdb_set_trail_opt(self._db,
859 | trail_id,
860 | TDB_OPT_EVENT_FILTER,
861 | value)
862 | except IndexError:
863 | continue
864 |
865 |
866 | class TrailDBEventFilter(object):
867 | """
868 | Converts a query defined in terms of Python collections to a
869 | `tdb_event_filter` which can be passed to various TrailDB functions.
870 | Performs some validation when parsing the query.
871 |
872 | Queries are boolean expressions defined from terms and clauses. A term is
873 | defined using a tuple:
874 |
875 | .. code-block:: python
876 |
877 | (field_name, "value") # match records with field_name == "value"
878 | (field_name, "value", False) # match records with field_name == "value"
879 | (field_name, "value", True) # match records with field_name != "value"
880 | (start_time, end_time) # match records with start_time <= time < end_time
881 |
882 | Clauses are boolean expressions formed from terms, which are connected with AND.
883 | Clauses are defined with lists of terms:
884 |
885 | .. code-block:: python
886 |
887 | [term]
888 | [term1, term2]
889 | [term1, term2, ...]
890 |
891 | Queries are boolean expressions formed from clauses, which are connected with OR.
892 | Queries are defined with lists of clauses:
893 |
894 | .. code-block:: python
895 |
896 | [clause]
897 | [clause1, clause2]
898 | [clause1, clause2, ...]
899 |
900 | Some complete examples:
901 |
902 | .. code-block:: python
903 |
904 | [[("user", "george_jetson")]] # Match records for the user "george_jetson"
905 | [[("user", "george_jetson", True)]] # Match records for users other than "george_jetson"
906 | [[(1501013929, 1501100260)]] # Match records between 2017-07-25 3:18 pm to 2017-07-26 3:18 pm
907 | [[("job_title", "manager"), ("user", "george_jetson")]] # Match records for the user "george_jetson" AND with job title "manager"
908 | [[("job_title", "manager")], [("user", "george_jetson")]] # Match records for the user "george_jetson" OR with job title "manager"
909 | [[("job_title", "manager"), (1501013929, 1501100260)], [("user", "george_jetson"), (1501013929, 1501100260)]] # Match records for the user "george_jetson" OR with job title "manager" and between 2017-07-25 3:18 pm to 2017-07-26 3:18 pm
910 | """
911 | def __init__(self, db, query):
912 | """
913 | Initializes TrailDBEventFilter. You might want to use :py:meth:`traildb.TrailDB.create_filter` instead that passes ``db`` automatically.
914 | """
915 |
916 | self.flt = lib.tdb_event_filter_new()
917 | if type(query[0]) is tuple:
918 | query = [query]
919 | for i, clause in enumerate(query):
920 | if i > 0:
921 | err = lib.tdb_event_filter_new_clause(self.flt)
922 | if err:
923 | raise TrailDBError("Out of memory in _create_filter")
924 |
925 | for term in clause:
926 | err = None
927 | # time range?
928 | if len(term) == 2 and isinstance(term[0], int) \
929 | and isinstance(term[1], int):
930 | start_time, end_time = term
931 | err = lib.tdb_event_filter_add_time_range(self.flt,
932 | start_time,
933 | end_time)
934 | else:
935 | is_negative = False
936 | if len(term) == 3:
937 | field, value, is_negative = term
938 | else:
939 | field, value = term
940 | try:
941 | item = db.get_item(field, value)
942 | except (TrailDBError, ValueError):
943 | item = 0
944 | err = lib.tdb_event_filter_add_term(self.flt,
945 | item,
946 | 1 if is_negative else 0)
947 | if err:
948 | raise TrailDBError("Out of memory in _create_filter")
949 |
950 | def __del__(self):
951 | lib.tdb_event_filter_free(self.flt)
952 |
--------------------------------------------------------------------------------
/travisdeps.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | # Get up to date libjudy
4 | sudo apt-get update
5 | sudo apt-get install -y libarchive-dev pkg-config build-essential
6 | wget https://mirrors.kernel.org/ubuntu/pool/universe/j/judy/libjudy-dev_1.0.5-5_amd64.deb \
7 | https://mirrors.kernel.org/ubuntu/pool/universe/j/judy/libjudydebian1_1.0.5-5_amd64.deb
8 | sudo dpkg -i libjudy-dev_1.0.5-5_amd64.deb libjudydebian1_1.0.5-5_amd64.deb
9 |
10 | # compile dependency in /opt/traildb/traildb
11 |
12 | mkdir -p /opt/traildb
13 | cd /opt/traildb
14 |
15 | # shallow-ish copy of master branch of traildb/traildb
16 | git clone --depth=50 https://github.com/traildb/traildb
17 |
18 | # build traildb so
19 | cd /opt/traildb/traildb
20 | sudo ./waf configure
21 | # actually needs root permissions to install into /usr/local
22 | sudo ./waf install
23 |
--------------------------------------------------------------------------------