├── .gitignore ├── .travis.yml ├── Dockerfile ├── LICENSE ├── README.md ├── conf.py ├── examples ├── datashader_example.py ├── extract_sample.py ├── parse_wikipedia_history.py ├── top_items.py ├── tutorial_simple_traildb.py └── tutorial_wikipedia_sessions.py ├── index.rst ├── requirements.txt ├── runtests.sh ├── setup.py ├── test └── test.py ├── traildb ├── __init__.py ├── __main__.py └── traildb.py └── travisdeps.sh /.gitignore: -------------------------------------------------------------------------------- 1 | *~ 2 | /build/ 3 | *.pyc 4 | _build/ 5 | test/*.tdb 6 | 7 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | sudo: required 2 | 3 | language: python 4 | 5 | python: 6 | - 2.7 7 | - 3.3 8 | - 3.4 9 | - 3.5 10 | - 3.6 11 | - pypy 12 | - pypy3.5 13 | 14 | before_install: 15 | - ./travisdeps.sh 16 | 17 | script: 18 | - ./runtests.sh -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM c3h3/traildb-base:latest 2 | 3 | # pyenv image 4 | 5 | ENV HOME /root 6 | ENV PYENVPATH $HOME/.pyenv 7 | ENV PATH $PYENVPATH/shims:$PYENVPATH/bin:$PATH 8 | 9 | RUN curl -L https://raw.githubusercontent.com/yyuu/pyenv-installer/master/bin/pyenv-installer | bash 10 | RUN echo 'eval "$(pyenv init -)"' > /root/.bashrc 11 | 12 | 13 | EXPOSE 8888 14 | 15 | RUN pyenv update && pyenv install anaconda-2.3.0 && pyenv global anaconda-2.3.0 && ipython profile create 16 | 17 | RUN (echo "require(['base/js/namespace'], function (IPython) {" && \ 18 | echo " IPython._target = '_self';" && \ 19 | echo "});") > /root/.ipython/profile_default/static/custom/custom.js 20 | 21 | 22 | RUN (echo "c = get_config()" && \ 23 | echo "headers = {'Content-Security-Policy': 'frame-ancestors *'}" && \ 24 | echo "c.NotebookApp.allow_origin = '*'" && \ 25 | echo "c.NotebookApp.allow_credentials = True" && \ 26 | echo "c.NotebookApp.tornado_settings = {'headers': headers}" && \ 27 | echo "c.NotebookApp.ip = '0.0.0.0'" && \ 28 | echo "c.NotebookApp.open_browser = False" && \ 29 | echo "from IPython.lib import passwd" && \ 30 | echo "import os" && \ 31 | echo "c.NotebookApp.password = passwd(os.environ.get('PASSWORD', 'jupyter'))") \ 32 | > /root/.ipython/profile_default/ipython_notebook_config.py 33 | 34 | 35 | RUN cd /tmp && git clone https://github.com/traildb/traildb-python && cd traildb-python && python setup.py install 36 | 37 | RUN mkdir /ipynbs 38 | WORKDIR /ipynbs 39 | 40 | CMD ipython notebook --no-browser --ip=0.0.0.0 --port 8888 -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | Copyright (c) 2016 AdRoll, Inc. 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining 5 | a copy of this software and associated documentation files (the 6 | "Software"), to deal in the Software without restriction, including 7 | without limitation the rights to use, copy, modify, merge, publish, 8 | distribute, sublicense, and/or sell copies of the Software, and to 9 | permit persons to whom the Software is furnished to do so, subject to 10 | the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included 13 | in all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 16 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 17 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 18 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 19 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 20 | TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 21 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Python bindings for TrailDB 2 | 3 | ### Quick start 4 | 5 | First install the [TrailDB library](https://github.com/traildb/traildb). Then 6 | 7 | $ python setup.py install 8 | 9 | For detailed instructions, see [Getting Started guide](http://traildb.io/docs/getting_started/). 10 | 11 | ### Example 12 | 13 | See [TrailDB tutorial](http://traildb.io/docs/tutorial) for more information. 14 | 15 | ```python 16 | 17 | >>> from traildb import TrailDB, TrailDBConstructor 18 | 19 | >>> cookie = '12345678123456781234567812345678' 20 | >>> cons = TrailDBConstructor('test.tdb', ['field1', 'field2']) 21 | >>> cons.add(cookie, 123, ['a']) 22 | >>> cons.add(cookie, 124, ['b', 'c']) 23 | >>> tdb = cons.finalize() 24 | 25 | >>> for cookie, trail in tdb.trails(): 26 | ... for event in trail: 27 | ... print cookie, event 28 | 29 | 12345678123456781234567812345678 event(time=123L, field1='a', field2='') 30 | 12345678123456781234567812345678 event(time=124L, field1='b', field2='c') 31 | ``` 32 | 33 | ## For Docker User: 34 | 35 | You can pull image from here: 36 | 37 | $ docker pull c3h3/traildb-ipynb 38 | 39 | Or, you can build docker image by yourself (please replace "your/repo-name" with whatever you want): 40 | 41 | $ docker build -t your/repo-name . 42 | 43 | 44 | You can run the docker image with default password (jupyter), and your jupyter notebook will listen on 8080 port: 45 | 46 | $ docker run -p 8080:8888 -it c3h3/traildb-ipynb 47 | 48 | Or, you can run the docker image with your password (yourPassword), and your jupyter notebook will listen on 8080 port: 49 | 50 | $ docker run -e PASSWORD=yourPassword -p 8080:8888 -it c3h3/traildb-ipynb 51 | 52 | Easily to use [http://localhost:8080](http://localhost:8080) to access your jupyter notebook 53 | 54 | #### Documentation 55 | 56 | Sphinx documentation is available. 57 | 58 | 1. Ensure Sphinx is installed. 59 | 60 | `pip install sphinx` 61 | 62 | 2. Generate HTML documentation. 63 | 64 | `sphinx-build -b html . _build` 65 | 66 | Open `_build/index.html` in a browser. 67 | -------------------------------------------------------------------------------- /conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # traildb-python documentation build configuration file, created by 4 | # sphinx-quickstart on Mon Oct 2 14:22:22 2017. 5 | # 6 | # This file is execfile()d with the current directory set to its 7 | # containing dir. 8 | # 9 | # Note that not all possible configuration values are present in this 10 | # autogenerated file. 11 | # 12 | # All configuration values have a default; values that are commented out 13 | # serve to show the default. 14 | 15 | # If extensions (or modules to document with autodoc) are in another directory, 16 | # add these directories to sys.path here. If the directory is relative to the 17 | # documentation root, use os.path.abspath to make it absolute, like shown here. 18 | # 19 | import os 20 | import sys 21 | sys.path.insert(0, os.path.abspath('.')) 22 | 23 | 24 | # -- General configuration ------------------------------------------------ 25 | 26 | # If your documentation needs a minimal Sphinx version, state it here. 27 | # 28 | # needs_sphinx = '1.0' 29 | 30 | # Add any Sphinx extension module names here, as strings. They can be 31 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 32 | # ones. 33 | extensions = ['sphinx.ext.autodoc'] 34 | 35 | # Add any paths that contain templates here, relative to this directory. 36 | templates_path = ['_templates'] 37 | 38 | # The suffix(es) of source filenames. 39 | # You can specify multiple suffix as a list of string: 40 | # 41 | # source_suffix = ['.rst', '.md'] 42 | source_suffix = '.rst' 43 | 44 | # The master toctree document. 45 | master_doc = 'index' 46 | 47 | # General information about the project. 48 | project = u'traildb-python' 49 | copyright = u'2017, AdRoll Inc' 50 | author = u'AdRoll Inc' 51 | 52 | # The version info for the project you're documenting, acts as replacement for 53 | # |version| and |release|, also used in various other places throughout the 54 | # built documents. 55 | # 56 | # The short X.Y version. 57 | version = u'0.1.0' 58 | # The full version, including alpha/beta/rc tags. 59 | release = u'0.1.0' 60 | 61 | # The language for content autogenerated by Sphinx. Refer to documentation 62 | # for a list of supported languages. 63 | # 64 | # This is also used if you do content translation via gettext catalogs. 65 | # Usually you set "language" from the command line for these cases. 66 | language = None 67 | 68 | # List of patterns, relative to source directory, that match files and 69 | # directories to ignore when looking for source files. 70 | # This patterns also effect to html_static_path and html_extra_path 71 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] 72 | 73 | # The name of the Pygments (syntax highlighting) style to use. 74 | pygments_style = 'sphinx' 75 | 76 | # If true, `todo` and `todoList` produce output, else they produce nothing. 77 | todo_include_todos = False 78 | 79 | 80 | # -- Options for HTML output ---------------------------------------------- 81 | 82 | # The theme to use for HTML and HTML Help pages. See the documentation for 83 | # a list of builtin themes. 84 | # 85 | html_theme = 'alabaster' 86 | 87 | # Theme options are theme-specific and customize the look and feel of a theme 88 | # further. For a list of options available for each theme, see the 89 | # documentation. 90 | # 91 | # html_theme_options = {} 92 | 93 | # Add any paths that contain custom static files (such as style sheets) here, 94 | # relative to this directory. They are copied after the builtin static files, 95 | # so a file named "default.css" will overwrite the builtin "default.css". 96 | html_static_path = ['_static'] 97 | 98 | # Custom sidebar templates, must be a dictionary that maps document names 99 | # to template names. 100 | # 101 | # This is required for the alabaster theme 102 | # refs: http://alabaster.readthedocs.io/en/latest/installation.html#sidebars 103 | html_sidebars = { 104 | '**': [ 105 | 'about.html', 106 | 'navigation.html', 107 | 'relations.html', # needs 'show_related': True theme option to display 108 | 'searchbox.html', 109 | 'donate.html', 110 | ] 111 | } 112 | 113 | 114 | # -- Options for HTMLHelp output ------------------------------------------ 115 | 116 | # Output file base name for HTML help builder. 117 | htmlhelp_basename = 'traildb-pythondoc' 118 | 119 | 120 | # -- Options for LaTeX output --------------------------------------------- 121 | 122 | latex_elements = { 123 | # The paper size ('letterpaper' or 'a4paper'). 124 | # 125 | # 'papersize': 'letterpaper', 126 | 127 | # The font size ('10pt', '11pt' or '12pt'). 128 | # 129 | # 'pointsize': '10pt', 130 | 131 | # Additional stuff for the LaTeX preamble. 132 | # 133 | # 'preamble': '', 134 | 135 | # Latex figure (float) alignment 136 | # 137 | # 'figure_align': 'htbp', 138 | } 139 | 140 | # Grouping the document tree into LaTeX files. List of tuples 141 | # (source start file, target name, title, 142 | # author, documentclass [howto, manual, or own class]). 143 | latex_documents = [ 144 | (master_doc, 'traildb-python.tex', u'traildb-python Documentation', 145 | u'AdRoll Inc', 'manual'), 146 | ] 147 | 148 | 149 | # -- Options for manual page output --------------------------------------- 150 | 151 | # One entry per manual page. List of tuples 152 | # (source start file, name, description, authors, manual section). 153 | man_pages = [ 154 | (master_doc, 'traildb-python', u'traildb-python Documentation', 155 | [author], 1) 156 | ] 157 | 158 | 159 | # -- Options for Texinfo output ------------------------------------------- 160 | 161 | # Grouping the document tree into Texinfo files. List of tuples 162 | # (source start file, target name, title, author, 163 | # dir menu entry, description, category) 164 | texinfo_documents = [ 165 | (master_doc, 'traildb-python', u'traildb-python Documentation', 166 | author, 'traildb-python', 'One line description of project.', 167 | 'Miscellaneous'), 168 | ] 169 | 170 | 171 | 172 | -------------------------------------------------------------------------------- /examples/datashader_example.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | from __future__ import unicode_literals 3 | from __future__ import print_function 4 | from __future__ import absolute_import 5 | from builtins import open 6 | from builtins import int 7 | from builtins import range 8 | from past.utils import old_div 9 | 10 | import datashader as ds 11 | import datashader.transfer_functions as tf 12 | import pandas as pd 13 | 14 | from traildb import TrailDB 15 | 16 | 17 | def get_events(tdb): 18 | query = [('title', 'Prince (musician)')] 19 | for i in range(len(tdb)): 20 | events = list(tdb.trail(i, event_filter=query)) 21 | if events: 22 | yield events[0].time, events 23 | 24 | 25 | def get_dataframe(): 26 | tdb = TrailDB('pydata-tutorial.tdb') 27 | base = tdb.min_timestamp() 28 | types = [] 29 | xs = [] 30 | ys = [] 31 | # try this: 32 | # for y, (first_ts, events) in enumerate(sorted(get_events(tdb), reverse=True)): 33 | for y, (first_ts, events) in enumerate(get_events(tdb)): 34 | for event in events: 35 | xs.append(old_div(int(event.time - base), (24 * 3600))) 36 | ys.append(y) 37 | types.append('user' if event.user else 'anon') 38 | data = pd.DataFrame({'x': xs, 'y': ys}) 39 | data['type'] = pd.Series(types, dtype='category') 40 | return data 41 | 42 | cnv = ds.Canvas(400, 300) 43 | agg = cnv.points(get_dataframe(), 'x', 'y', ds.count_cat('type')) 44 | colors = {'anon': 'red', 'user': 'blue'} 45 | img = tf.set_background(tf.colorize(agg, colors, how='eq_hist'), 'white') 46 | with open('prince.png', 'w') as f: 47 | f.write(img.to_bytesio().getvalue()) 48 | -------------------------------------------------------------------------------- /examples/extract_sample.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | from __future__ import print_function 3 | from __future__ import unicode_literals 4 | from __future__ import absolute_import 5 | from past.utils import old_div 6 | from random import random 7 | import sys 8 | 9 | from traildb import TrailDB, TrailDBConstructor 10 | 11 | 12 | def extract(tdb, cons, sample_size): 13 | for uuid, trail in tdb.trails(): 14 | if random() < sample_size: 15 | for event in trail: 16 | cons.add(uuid, event.time, list(event)[1:]) 17 | return cons.finalize() 18 | 19 | if __name__ == '__main__': 20 | if len(sys.argv) < 3: 21 | print('Usage: extract_sample source_tdb destination_tdb sample_percentage') 22 | sys.exit(1) 23 | tdb = TrailDB(sys.argv[1]) 24 | cons = TrailDBConstructor(sys.argv[2], tdb.fields[1:]) 25 | num = extract(tdb, cons, old_div(float(sys.argv[3]), 100.)).num_trails 26 | print('Extracted %d trails to %s' % (num, sys.argv[2])) 27 | -------------------------------------------------------------------------------- /examples/parse_wikipedia_history.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from __future__ import unicode_literals 3 | from __future__ import division 4 | from __future__ import absolute_import 5 | from datetime import datetime 6 | import sys 7 | import gzip 8 | import hashlib 9 | 10 | import traildb 11 | 12 | num_events = 0 13 | 14 | # This script parses Wikipedia revision metadata that you can find here 15 | # https://dumps.wikimedia.org/enwiki/ 16 | # You want a file like 17 | # https://dumps.wikimedia.org/enwiki/20160501/enwiki-20160501-stub-meta-history.xml.gz 18 | 19 | 20 | def add_event(cons, uuid, tstamp, user, ip, title): 21 | global num_events 22 | cons.add(uuid, tstamp, (user, ip, title)) 23 | num_events += 1 24 | if not num_events & 1023: 25 | print('%d events added' % num_events) 26 | 27 | 28 | def parse(cons, fileobj): 29 | for line in fileobj: 30 | line = line.strip() 31 | if line.startswith(''): 32 | title = line[7:-8] 33 | elif line.startswith('<timestamp>'): 34 | tstamp = datetime.strptime(line[11:-13], '%Y-%m-%dT%H:%M:%S') 35 | elif line.startswith('<username>'): 36 | user = line[10:-11] 37 | ip = '' 38 | uuid = hashlib.md5(user).hexdigest() 39 | add_event(cons, uuid, tstamp, user, ip, title) 40 | elif line.startswith('<ip>'): 41 | user = '' 42 | ip = line[4:-5] 43 | uuid = hashlib.md5(ip).hexdigest() 44 | add_event(cons, uuid, tstamp, user, ip, title) 45 | 46 | if __name__ == '__main__': 47 | if len(sys.argv) < 3: 48 | print('Usage: parse_wikipedia_history.py enwiki-20160501-stub-meta-history.xml.gz wikipedia-history.tdb') 49 | sys.exit(1) 50 | 51 | cons = traildb.TrailDBConstructor(sys.argv[2], 52 | ['user', 'ip', 'title']) 53 | parse(cons, gzip.GzipFile(sys.argv[1])) 54 | print('Done adding %d events!' % num_events) 55 | cons.finalize() 56 | print('Success!') 57 | -------------------------------------------------------------------------------- /examples/top_items.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from __future__ import unicode_literals 3 | from __future__ import division 4 | from __future__ import absolute_import 5 | from collections import Counter 6 | import timeit 7 | 8 | from traildb import TrailDB 9 | 10 | 11 | def string_top(): 12 | tdb = TrailDB('pydata-tutorial') 13 | return Counter(event.title for uuid, trail in tdb.trails() 14 | for event in trail).most_common(5) 15 | 16 | 17 | def item_top(): 18 | tdb = TrailDB('pydata-tutorial') 19 | stats = Counter(event.title for uuid, trail in tdb.trails(rawitems=True) 20 | for event in trail) 21 | return [(tdb.get_item_value(item), f) for item, f in stats.most_common(5)] 22 | 23 | print('string_top', timeit.timeit(string_top, number=3)) 24 | print('item_top', timeit.timeit(item_top, number=3)) 25 | -------------------------------------------------------------------------------- /examples/tutorial_simple_traildb.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from __future__ import unicode_literals 3 | from __future__ import division 4 | from __future__ import absolute_import 5 | from builtins import range 6 | from uuid import uuid4 7 | from datetime import datetime 8 | 9 | from traildb import TrailDBConstructor, TrailDB 10 | 11 | cons = TrailDBConstructor('tiny', ['username', 'action']) 12 | 13 | for i in range(3): 14 | uuid = uuid4().hex 15 | username = 'user%d' % i 16 | for day, action in enumerate(['open', 'save', 'close']): 17 | cons.add(uuid, datetime(2016, i + 1, day + 1), (username, action)) 18 | 19 | cons.finalize() 20 | 21 | for uuid, trail in TrailDB('tiny').trails(): 22 | print(uuid, list(trail)) 23 | -------------------------------------------------------------------------------- /examples/tutorial_wikipedia_sessions.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from __future__ import unicode_literals 3 | from __future__ import division 4 | from __future__ import absolute_import 5 | from builtins import next 6 | import sys 7 | 8 | from traildb import TrailDB 9 | 10 | SESSION_LIMIT = 30 * 60 11 | 12 | 13 | def sessions(tdb): 14 | for i, (uuid, trail) in enumerate(tdb.trails(only_timestamp=True)): 15 | prev_time = next(trail) 16 | num_events = 1 17 | num_sessions = 1 18 | for timestamp in trail: 19 | if timestamp - prev_time > SESSION_LIMIT: 20 | num_sessions += 1 21 | prev_time = timestamp 22 | num_events += 1 23 | print('Trail[%d] Number of Sessions: %d Number of Events: %d' % 24 | (i, num_sessions, num_events)) 25 | 26 | if __name__ == '__main__': 27 | if len(sys.argv) < 2: 28 | print('Usage: tutorial_wikipedia_sessions <wikipedia-history.tdb>') 29 | else: 30 | sessions(TrailDB(sys.argv[1])) 31 | -------------------------------------------------------------------------------- /index.rst: -------------------------------------------------------------------------------- 1 | .. traildb-python documentation master file, created by 2 | sphinx-quickstart on Mon Oct 2 14:17:29 2017. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | traildb-python 7 | ============== 8 | 9 | These are Python 2 bindings to TrailDB. Official TrailDB website is at http://traildb.io/ 10 | 11 | .. toctree:: 12 | :maxdepth: 2 13 | :caption: Contents: 14 | 15 | .. autoclass:: traildb.TrailDB 16 | :members: 17 | 18 | .. autoclass:: traildb.TrailDBConstructor 19 | :members: 20 | 21 | .. autoclass:: traildb.TrailDBCursor 22 | :members: 23 | 24 | .. autoclass:: traildb.TrailDBMultiCursor 25 | :members: 26 | 27 | .. autoclass:: traildb.TrailDBEventFilter 28 | :members: 29 | 30 | .. autoclass:: traildb.TrailDBError 31 | :members: 32 | 33 | Indices and tables 34 | ================== 35 | 36 | * :ref:`genindex` 37 | * :ref:`search` 38 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | appdirs==1.4.3 2 | configparser==3.5.0 3 | enum34==1.1.6 4 | flake8==3.3.0 5 | future==0.16.0 6 | mccabe==0.6.1 7 | packaging==16.8 8 | py==1.4.32 9 | pycodestyle==2.3.1 10 | pyflakes==1.5.0 11 | pyparsing==2.2.0 12 | six==1.10.0 13 | -------------------------------------------------------------------------------- /runtests.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/usr/local/lib" 3 | 4 | set -e 5 | 6 | # E999 -- syntax error 7 | # F821 -- undefined local variable 8 | flake8 ./traildb/ | grep '[ ]E999[ ]\|[ ]F821[ ]' | awk '{print} END {exit(NR > 0)}' 9 | 10 | env PYTHONPATH='.' python test/test.py 11 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from distutils.core import setup 2 | 3 | setup(name='traildb', 4 | version='0.0.2', 5 | description='TrailDB stores and queries cookie trails from raw logs.', 6 | author='AdRoll.com', 7 | install_requires=['future>=0.16.0'], 8 | packages=['traildb']) 9 | -------------------------------------------------------------------------------- /test/test.py: -------------------------------------------------------------------------------- 1 | from __future__ import unicode_literals 2 | from __future__ import print_function 3 | from __future__ import division 4 | from __future__ import absolute_import 5 | from builtins import next 6 | from builtins import int 7 | 8 | import os 9 | import unittest 10 | import datetime 11 | 12 | from traildb import TrailDB, TrailDBConstructor, tdb_item_field, tdb_item_val 13 | from traildb import TrailDBError, TrailDBCursor, TrailDBMultiCursor 14 | 15 | 16 | class TestAPI(unittest.TestCase): 17 | def setUp(self): 18 | self.uuid = '12345678123456781234567812345678' 19 | cons = TrailDBConstructor('testtrail', ['field1', 'field2']) 20 | cons.add(self.uuid, 1, ['a', '1']) 21 | cons.add(self.uuid, 2, ['b', '2']) 22 | cons.add(self.uuid, 3, ['c', '3']) 23 | cons.finalize() 24 | 25 | def tearDown(self): 26 | os.unlink('testtrail.tdb') 27 | 28 | def test_trails(self): 29 | db = TrailDB('testtrail') 30 | self.assertEqual(1, db.num_trails) 31 | 32 | trail = db.trail(0) 33 | self.assertIsInstance(trail, TrailDBCursor) 34 | 35 | events = list(trail) # Force evaluation of generator 36 | self.assertEqual(3, len(events)) 37 | for event in events: 38 | self.assertTrue(hasattr(event, 'time')) 39 | self.assertTrue(hasattr(event, 'field1')) 40 | self.assertTrue(hasattr(event, 'field2')) 41 | 42 | with self.assertRaises(AttributeError): 43 | event.missing_field 44 | 45 | def test_trails_selected_uuids(self): 46 | uuids = ["02345678123456781234567812345678", 47 | "12345678123456781234567812345678", 48 | "22345678123456781234567812345678", 49 | "32345678123456781234567812345678", 50 | "42345678123456781234567812345678"] 51 | cons = TrailDBConstructor('whitelist_testtrail', ['field1', 'field2']) 52 | for uuid in uuids: 53 | cons.add(uuid, 1, ['a', '1']) 54 | cons.add(uuid, 2, ['b', '2']) 55 | cons.add(uuid, 3, ['c', '3']) 56 | cons.finalize() 57 | 58 | tdb = TrailDB('whitelist_testtrail') 59 | whitelist = [uuids[0], 60 | uuids[3], 61 | uuids[4]] 62 | 63 | expected_length = 3 64 | for trail_uuid, trail_events in tdb.trails(selected_uuids=whitelist): 65 | trail_events = list(trail_events) 66 | self.assertEqual(len(trail_events), 67 | expected_length) 68 | 69 | def test_crumbs(self): 70 | db = TrailDB('testtrail.tdb') 71 | 72 | n = 0 73 | for uuid, trail in db.trails(): 74 | n += 1 75 | self.assertEqual(self.uuid, uuid) 76 | self.assertIsInstance(trail, TrailDBCursor) 77 | self.assertEqual(3, len(list(trail))) 78 | 79 | self.assertEqual(1, n) 80 | 81 | def test_silly_open(self): 82 | self.assertTrue(os.path.exists('testtrail.tdb')) 83 | self.assertFalse(os.path.exists('testtrail')) 84 | 85 | db1 = TrailDB('testtrail.tdb') 86 | db2 = TrailDB('testtrail') 87 | 88 | with self.assertRaises(TrailDBError): 89 | TrailDB('foo.tdb') 90 | 91 | def test_fields(self): 92 | db = TrailDB('testtrail') 93 | self.assertEqual(['time', 'field1', 'field2'], db.fields) 94 | 95 | def test_uuids(self): 96 | db = TrailDB('testtrail') 97 | self.assertEqual(0, db.get_trail_id(self.uuid)) 98 | self.assertEqual(self.uuid, db.get_uuid(0)) 99 | self.assertTrue(self.uuid in db) 100 | 101 | def test_lexicons(self): 102 | db = TrailDB('testtrail') 103 | 104 | # First field 105 | self.assertEqual(4, db.lexicon_size(1)) 106 | self.assertEqual(['a', 'b', 'c'], list(db.lexicon(1))) 107 | 108 | # Second field 109 | self.assertEqual(['1', '2', '3'], list(db.lexicon(2))) 110 | 111 | with self.assertRaises(TrailDBError): 112 | db.lexicon(3) # Out of bounds 113 | 114 | def test_metadata(self): 115 | db = TrailDB('testtrail.tdb') 116 | self.assertEqual(1, db.min_timestamp()) 117 | self.assertEqual(3, db.max_timestamp()) 118 | self.assertEqual((1, 3), db.time_range()) 119 | 120 | self.assertEqual((1, 3), db.time_range(parsetime=False)) 121 | 122 | 123 | def test_apply_whitelist(self): 124 | uuids = ["02345678123456781234567812345678", 125 | "12345678123456781234567812345678", 126 | "22345678123456781234567812345678", 127 | "32345678123456781234567812345678", 128 | "42345678123456781234567812345678"] 129 | cons = TrailDBConstructor('whitelist_testtrail', ['field1', 'field2']) 130 | for uuid in uuids: 131 | cons.add(uuid, 1, ['a', '1']) 132 | cons.add(uuid, 2, ['b', '2']) 133 | cons.add(uuid, 3, ['c', '3']) 134 | cons.finalize() 135 | 136 | tdb = TrailDB('whitelist_testtrail') 137 | whitelist = [uuids[0], 138 | uuids[3], 139 | uuids[4]] 140 | tdb.apply_whitelist(whitelist) 141 | found_trails = list(tdb.trails(parsetime=False)) 142 | 143 | self.assertEqual(len(found_trails), len(uuids)) 144 | for trail_uuid, trail_events in found_trails: 145 | if trail_uuid in whitelist: 146 | expected_length = 3 147 | else: 148 | expected_length = 0 149 | 150 | trail_events = list(trail_events) 151 | self.assertEqual(len(trail_events), 152 | expected_length) 153 | 154 | def test_apply_blacklist(self): 155 | uuids = ["02345678123456781234567812345678", 156 | "12345678123456781234567812345678", 157 | "22345678123456781234567812345678", 158 | "32345678123456781234567812345678", 159 | "42345678123456781234567812345678"] 160 | cons = TrailDBConstructor('blacklist_testtrail', ['field1', 'field2']) 161 | for uuid in uuids: 162 | cons.add(uuid, 1, ['a', '1']) 163 | cons.add(uuid, 2, ['b', '2']) 164 | cons.add(uuid, 3, ['c', '3']) 165 | cons.finalize() 166 | 167 | tdb = TrailDB('blacklist_testtrail') 168 | blacklist = [uuids[1], 169 | uuids[2]] 170 | tdb.apply_blacklist(blacklist) 171 | found_trails = list(tdb.trails(parsetime=False)) 172 | 173 | for trail_uuid, trail_events in found_trails: 174 | if trail_uuid in blacklist: 175 | expected_length = 0 176 | else: 177 | expected_length = 3 178 | 179 | trail_events = list(trail_events) 180 | self.assertEqual(len(trail_events), 181 | expected_length) 182 | 183 | 184 | class TestFilter(unittest.TestCase): 185 | 186 | def setUp(self): 187 | uuid = '12345678123456781234567812345678' 188 | cons = TrailDBConstructor('testtrail', ['field1', 'field2', 'field3']) 189 | cons.add(uuid, 1, ['a', '1', 'x']) 190 | cons.add(uuid, 2, ['b', '2', 'x']) 191 | cons.add(uuid, 3, ['c', '3', 'y']) 192 | cons.add(uuid, 4, ['d', '4', 'x']) 193 | cons.add(uuid, 5, ['e', '5', 'x']) 194 | tdb = cons.finalize() 195 | 196 | def tearDown(self): 197 | os.unlink('testtrail.tdb') 198 | 199 | def test_simple_disjunction(self): 200 | tdb = TrailDB('testtrail') 201 | # test shorthand notation (not a list of lists) 202 | events = list( 203 | tdb.trail(0, event_filter=[('field1', 'a'), ('field2', '4')])) 204 | self.assertEqual(len(events), 2) 205 | self.assertEqual((events[0].field1, events[0].field2), ('a', '1')) 206 | self.assertEqual((events[1].field1, events[1].field2), ('d', '4')) 207 | 208 | def test_negation(self): 209 | tdb = TrailDB('testtrail') 210 | events = list(tdb.trail(0, event_filter=[('field3', 'x', True)])) 211 | self.assertEqual(len(events), 1) 212 | self.assertEqual((events[0].field1, events[0].field2, 213 | events[0].field3), ('c', '3', 'y')) 214 | 215 | def test_conjunction(self): 216 | tdb = TrailDB('testtrail') 217 | events = list( 218 | tdb.trail(0, event_filter=[[('field1', 'e'), ('field1', 'c')], 219 | [('field3', 'y', True)]])) 220 | self.assertEqual(len(events), 1) 221 | self.assertEqual((events[0].field1, events[0].field2), ('e', '5')) 222 | 223 | def test_time_range(self): 224 | tdb = TrailDB('testtrail') 225 | events = list(tdb.trail(0, 226 | event_filter=[[(2, 4)]], 227 | parsetime=False)) 228 | self.assertEqual(len(events), 2) 229 | self.assertEqual(events[0].time, 2) 230 | self.assertEqual(events[1].time, 3) 231 | 232 | def test_filter_object(self): 233 | tdb = TrailDB('testtrail') 234 | obj = tdb.create_filter([[('field1', 'e'), ('field1', 'c')], 235 | [('field3', 'y', True)]]) 236 | events = list(tdb.trail(0, event_filter=obj)) 237 | self.assertEqual(len(events), 1) 238 | self.assertEqual((events[0].field1, events[0].field2), ('e', '5')) 239 | events = list(tdb.trail(0, event_filter=obj)) 240 | self.assertEqual(len(events), 1) 241 | self.assertEqual((events[0].field1, events[0].field2), ('e', '5')) 242 | 243 | 244 | class TestCons(unittest.TestCase): 245 | def test_cursor(self): 246 | uuid = '12345678123456781234567812345678' 247 | cons = TrailDBConstructor('testtrail', ['field1', 'field2']) 248 | cons.add(uuid, 1, ['a', '1']) 249 | cons.add(uuid, 2, ['b', '2']) 250 | cons.add(uuid, 3, ['c', '3']) 251 | cons.add(uuid, 4, ['d', '4']) 252 | cons.add(uuid, 5, ['e', '5']) 253 | tdb = cons.finalize() 254 | 255 | with self.assertRaises(IndexError): 256 | tdb.get_trail_id('12345678123456781234567812345679') 257 | 258 | trail = tdb.trail(tdb.get_trail_id(uuid)) 259 | with self.assertRaises(TypeError): 260 | len(trail) 261 | 262 | j = 1 263 | for event in trail: 264 | self.assertEqual(j, int(event.field2)) 265 | self.assertEqual(j, int(event.time)) 266 | j += 1 267 | self.assertEqual(6, j) 268 | 269 | # Iterator is empty now 270 | self.assertEqual([], list(trail)) 271 | 272 | field1_values = [e.field1 for e in tdb.trail(tdb.get_trail_id(uuid))] 273 | self.assertEqual(['a', 'b', 'c', 'd', 'e'], field1_values) 274 | 275 | def test_cursor_parsetime(self): 276 | uuid = '12345678123456781234567812345678' 277 | cons = TrailDBConstructor('testtrail', ['field1']) 278 | 279 | events = [(datetime.datetime(2016, 1, 1, 1, 1), ['1']), 280 | (datetime.datetime(2016, 1, 1, 1, 2), ['2']), 281 | (datetime.datetime(2016, 1, 1, 1, 3), ['3'])] 282 | [cons.add(uuid, time, fields) for time, fields in events] 283 | tdb = cons.finalize() 284 | 285 | timestamps = [e.time for e in tdb.trail(0, parsetime=True)] 286 | 287 | self.assertIsInstance(timestamps[0], datetime.datetime) 288 | self.assertEqual([time for time, _ in events], timestamps) 289 | self.assertEqual(tdb.time_range(True), (events[0][0], events[-1][0])) 290 | 291 | def test_binarydata(self): 292 | binary = b'\x00\x01\x02\x00\xff\x00\xff' 293 | uuid = '12345678123456781234567812345678' 294 | cons = TrailDBConstructor('testtrail', ['field1']) 295 | cons.add(uuid, 123, [binary]) 296 | tdb = cons.finalize(decode=False) 297 | self.assertEqual(list(tdb[0])[0].field1, binary) 298 | 299 | def test_cons(self): 300 | uuid = '12345678123456781234567812345678' 301 | cons = TrailDBConstructor('testtrail', ['field1', 'field2']) 302 | cons.add(uuid, 123, ['a']) 303 | cons.add(uuid, 124, ['b', 'c']) 304 | tdb = cons.finalize() 305 | 306 | self.assertEqual(0, tdb.get_trail_id(uuid)) 307 | self.assertEqual(uuid, tdb.get_uuid(0)) 308 | self.assertEqual(1, tdb.num_trails) 309 | self.assertEqual(2, tdb.num_events) 310 | self.assertEqual(3, tdb.num_fields) 311 | 312 | crumbs = list(tdb.trails()) 313 | self.assertEqual(1, len(crumbs)) 314 | self.assertEqual(uuid, crumbs[0][0]) 315 | self.assertTrue(tdb[uuid]) 316 | self.assertTrue(uuid in tdb) 317 | self.assertFalse('00000000000000000000000000000000' in tdb) 318 | with self.assertRaises(IndexError): 319 | tdb['00000000000000000000000000000000'] 320 | 321 | trail = list(crumbs[0][1]) 322 | 323 | self.assertEqual(123, trail[0].time) 324 | self.assertEqual('a', trail[0].field1) 325 | self.assertEqual('', trail[0].field2) # TODO: Should this be None? 326 | 327 | self.assertEqual(124, trail[1].time) 328 | self.assertEqual('b', trail[1].field1) 329 | self.assertEqual('c', trail[1].field2) 330 | 331 | def test_items(self): 332 | uuid = '12345678123456781234567812345678' 333 | cons = TrailDBConstructor('testtrail', ['field1', 'field2']) 334 | cons.add(uuid, 123, ['a', 'x' * 2048]) 335 | cons.add(uuid, 124, ['b', 'y' * 2048]) 336 | tdb = cons.finalize() 337 | 338 | cursor = tdb.trail(0, rawitems=True) 339 | event = next(cursor) 340 | self.assertEqual(tdb.get_item_value(event.field1), 'a') 341 | self.assertEqual(tdb.get_item_value(event.field2), 'x' * 2048) 342 | self.assertEqual(tdb.get_item('field1', 'a'), event.field1) 343 | self.assertEqual(tdb.get_item('field2', 'x' * 2048), event.field2) 344 | event = next(cursor) 345 | self.assertEqual(tdb.get_item_value(event.field1), 'b') 346 | self.assertEqual(tdb.get_item_value(event.field2), 'y' * 2048) 347 | self.assertEqual(tdb.get_item('field1', 'b'), event.field1) 348 | self.assertEqual(tdb.get_item('field2', 'y' * 2048), event.field2) 349 | 350 | cursor = tdb.trail(0, rawitems=True) 351 | event = next(cursor) 352 | field = tdb_item_field(event.field1) 353 | val = tdb_item_val(event.field1) 354 | self.assertEqual(tdb.get_value(field, val), 'a') 355 | field = tdb_item_field(event.field2) 356 | val = tdb_item_val(event.field2) 357 | self.assertEqual(tdb.get_value(field, val), 'x' * 2048) 358 | event = next(cursor) 359 | field = tdb_item_field(event.field1) 360 | val = tdb_item_val(event.field1) 361 | self.assertEqual(tdb.get_value(field, val), 'b') 362 | field = tdb_item_field(event.field2) 363 | val = tdb_item_val(event.field2) 364 | self.assertEqual(tdb.get_value(field, val), 'y' * 2048) 365 | 366 | def test_append(self): 367 | uuid = '12345678123456781234567812345678' 368 | cons = TrailDBConstructor('testtrail', ['field1']) 369 | cons.add(uuid, 123, ['foobarbaz']) 370 | tdb = cons.finalize() 371 | 372 | cons = TrailDBConstructor('testtrail2', ['field1']) 373 | cons.add(uuid, 124, ['barquuxmoo']) 374 | cons.append(tdb) 375 | tdb = cons.finalize() 376 | 377 | self.assertEqual(2, tdb.num_events) 378 | uuid, trail = list(tdb.trails())[0] 379 | trail = list(trail) 380 | self.assertEqual([123, 124], [e.time for e in trail]) 381 | self.assertEqual(['foobarbaz', 'barquuxmoo'], 382 | [e.field1 for e in trail]) 383 | 384 | def tearDown(self): 385 | try: 386 | os.unlink('testtrail.tdb') 387 | os.unlink('testtrail2.tdb') 388 | except: 389 | pass 390 | 391 | 392 | class TestMultiCursor(unittest.TestCase): 393 | def setUp(self): 394 | self.uuid1 = '12345678123456781234567812345678' 395 | self.uuid2 = '12345678123456781234567812345679' 396 | 397 | cons = TrailDBConstructor('testtrail1', ['field1', 'field2', 'field3']) 398 | cons.add(self.uuid1, 1, ['a', '1', 'x']) 399 | cons.add(self.uuid1, 2, ['b', '2', 'x']) 400 | cons.add(self.uuid2, 1, ['c', '3', 'y']) 401 | cons.add(self.uuid2, 2, ['d', '4', 'x']) 402 | cons.add(self.uuid2, 3, ['e', '5', 'x']) 403 | self.tdb1 = cons.finalize() 404 | 405 | cons = TrailDBConstructor('testtrail2', ['field1', 'field2', 'field3', 'field4']) 406 | cons.add(self.uuid2, 4, ['a', '1', 'x', 'l']) 407 | cons.add(self.uuid2, 5, ['b', '2', 'x', 'm']) 408 | cons.add(self.uuid1, 3, ['c', '3', 'y', 'n']) 409 | cons.add(self.uuid1, 4, ['d', '4', 'x', 'o']) 410 | cons.add(self.uuid1, 5, ['e', '5', 'x', 'p']) 411 | self.tdb2 = cons.finalize() 412 | 413 | def test_multicursor(self): 414 | c1 = self.tdb1.trail(self.tdb1.get_trail_id(self.uuid1)) 415 | c2 = self.tdb2.trail(self.tdb2.get_trail_id(self.uuid1)) 416 | mc = TrailDBMultiCursor(False, False, False) 417 | 418 | # not initialized, raise error 419 | with self.assertRaises(TrailDBError): 420 | next(mc) 421 | mc.set_cursors([c1, c2], [self.tdb1, self.tdb2]) 422 | 423 | # exhaust the iterator 424 | events = list(mc) 425 | 426 | self.assertEqual(len(events), 5) 427 | self.assertEqual(events[0][0].time, 1) 428 | self.assertEqual(events[0][0].field1, 'a') 429 | self.assertEqual(events[0][0].field2, '1') 430 | self.assertEqual(events[0][0].field3, 'x') 431 | self.assertEqual(events[1][0].time, 2) 432 | self.assertEqual(events[1][0].field1, 'b') 433 | self.assertEqual(events[1][0].field2, '2') 434 | self.assertEqual(events[1][0].field3, 'x') 435 | # this one is from the 2nd tdb, has an additional field 436 | self.assertEqual(events[2][0].time, 3) 437 | self.assertEqual(events[2][0].field1, 'c') 438 | self.assertEqual(events[2][0].field2, '3') 439 | self.assertEqual(events[2][0].field3, 'y') 440 | self.assertEqual(events[2][0].field4, 'n') 441 | 442 | def test_multicursor_reuse(self): 443 | c1 = self.tdb1.trail(self.tdb1.get_trail_id(self.uuid1)) 444 | c2 = self.tdb2.trail(self.tdb2.get_trail_id(self.uuid1)) 445 | mc = TrailDBMultiCursor(False, False, False) 446 | mc.set_cursors([c1, c2], [self.tdb1, self.tdb2]) 447 | # exhaust the iterator 448 | list(mc) 449 | 450 | # change the cursors 451 | c1.get_trail(self.tdb1.get_trail_id(self.uuid2)) 452 | c2.get_trail(self.tdb2.get_trail_id(self.uuid2)) 453 | 454 | # reset the multicursor 455 | mc.reset() 456 | events = list(mc) 457 | 458 | self.assertEqual(len(events), 5) 459 | self.assertEqual(events[0][0].time, 1) 460 | self.assertEqual(events[0][0].field1, 'c') 461 | self.assertEqual(events[0][0].field2, '3') 462 | self.assertEqual(events[0][0].field3, 'y') 463 | self.assertEqual(events[3][0].time, 4) 464 | self.assertEqual(events[3][0].field1, 'a') 465 | self.assertEqual(events[3][0].field2, '1') 466 | self.assertEqual(events[3][0].field3, 'x') 467 | self.assertEqual(events[3][0].field4, 'l') 468 | 469 | def test_multicursor_raw_items_parsetime(self): 470 | c1 = self.tdb1.trail(self.tdb1.get_trail_id(self.uuid1)) 471 | c2 = self.tdb2.trail(self.tdb2.get_trail_id(self.uuid1)) 472 | mc = TrailDBMultiCursor(True, True, False) 473 | mc.set_cursors([c1, c2], [self.tdb1, self.tdb2]) 474 | # exhaust the iterator 475 | events = list(mc) 476 | 477 | # just make sure the length is right and we didn't have any errors 478 | self.assertEqual(len(events), 5) 479 | 480 | def tearDown(self): 481 | try: 482 | os.unlink('testtrail1.tdb') 483 | os.unlink('testtrail2.tdb') 484 | except: 485 | pass 486 | 487 | 488 | if __name__ == '__main__': 489 | unittest.main() 490 | -------------------------------------------------------------------------------- /traildb/__init__.py: -------------------------------------------------------------------------------- 1 | from .traildb import TrailDBError 2 | from .traildb import TrailDBConstructor 3 | from .traildb import TrailDB 4 | from .traildb import TrailDBCursor 5 | from .traildb import TrailDBMultiCursor 6 | from .traildb import TrailDBEventFilter 7 | from .traildb import tdb_item_field 8 | from .traildb import tdb_item_val 9 | -------------------------------------------------------------------------------- /traildb/__main__.py: -------------------------------------------------------------------------------- 1 | from __future__ import unicode_literals 2 | from __future__ import print_function 3 | import sys 4 | 5 | import traildb 6 | 7 | for cookie, trail in traildb.TrailDB(*(sys.argv[1:] or ['a.tdb'])).crumbs(): 8 | print(cookie, trail) 9 | -------------------------------------------------------------------------------- /traildb/traildb.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import unicode_literals 3 | from __future__ import print_function 4 | from __future__ import division 5 | from __future__ import absolute_import 6 | from builtins import int 7 | from builtins import range 8 | from past.builtins import basestring 9 | from builtins import object 10 | from ctypes import c_char, c_char_p, c_ubyte, c_int, c_void_p 11 | from ctypes import c_uint, c_uint32, c_uint64 12 | from ctypes import Structure, Union 13 | from ctypes import CDLL, POINTER, pointer 14 | from ctypes import string_at, addressof 15 | from datetime import datetime 16 | 17 | import os 18 | import sys 19 | import time 20 | import codecs 21 | 22 | CODEC = 'utf8' 23 | 24 | HEX = 'hex' 25 | 26 | try: 27 | codecs.decode('A0', 'hex') 28 | except LookupError: 29 | HEX = 'hex_codec' 30 | 31 | if os.name == "posix" and sys.platform == "darwin": 32 | try: 33 | lib = CDLL('libtraildb.dylib') 34 | except: 35 | # is there a better way to figure out the path? 36 | lib = CDLL('/usr/local/lib/libtraildb.dylib') 37 | elif os.name == "posix" and "linux" in sys.platform: 38 | lib = CDLL('libtraildb.so') 39 | 40 | 41 | def api(fun, args, res=None): 42 | fun.argtypes = args 43 | fun.restype = res 44 | 45 | tdb = c_void_p 46 | tdb_cons = c_void_p 47 | tdb_field = c_uint32 48 | tdb_val = c_uint64 49 | tdb_item = c_uint64 50 | tdb_cursor = c_void_p 51 | tdb_error = c_int 52 | tdb_event_filter = c_void_p 53 | tdb_multi_cursor = c_void_p 54 | 55 | 56 | class tdb_event(Structure): 57 | _fields_ = [("timestamp", c_uint64), 58 | ("num_items", c_uint64), 59 | ("items", POINTER(tdb_item))] 60 | 61 | class tdb_multi_event(Structure): 62 | _fields_ = [("db", tdb), 63 | ("tdb_event", POINTER(tdb_event)), 64 | ("cursor_idx", c_uint64)] 65 | 66 | class tdb_opt_value(Union): 67 | _fields_ = [("ptr", c_void_p), 68 | ("value", c_uint64)] 69 | 70 | TDB_OPT_EVENT_FILTER = 101 71 | 72 | 73 | api(lib.tdb_cons_init, [], tdb_cons) 74 | api(lib.tdb_cons_open, 75 | [tdb_cons, c_char_p, POINTER(c_char_p), c_uint64], tdb_error) 76 | api(lib.tdb_cons_close, [tdb_cons]) 77 | api(lib.tdb_cons_add, 78 | [tdb_cons, POINTER(c_ubyte), c_uint64, 79 | POINTER(c_char_p), POINTER(c_uint64)], 80 | tdb_error) 81 | api(lib.tdb_cons_append, [tdb_cons, tdb], tdb_error) 82 | api(lib.tdb_cons_finalize, [tdb_cons], tdb_error) 83 | 84 | api(lib.tdb_init, [], tdb) 85 | api(lib.tdb_open, [tdb, c_char_p], tdb_error) 86 | api(lib.tdb_close, [tdb]) 87 | 88 | api(lib.tdb_lexicon_size, [tdb, tdb_field], tdb_error) 89 | 90 | api(lib.tdb_get_field, [tdb, c_char_p], tdb_error) 91 | api(lib.tdb_get_field_name, [tdb, tdb_field], c_char_p) 92 | 93 | api(lib.tdb_get_item, [tdb, tdb_field, POINTER(c_char), c_uint64], tdb_item) 94 | api(lib.tdb_get_value, 95 | [tdb, tdb_field, tdb_val, POINTER(c_uint64)], POINTER(c_char)) 96 | api(lib.tdb_get_item_value, 97 | [tdb, tdb_item, POINTER(c_uint64)], POINTER(c_char)) 98 | 99 | api(lib.tdb_get_uuid, [tdb, c_uint64], POINTER(c_ubyte)) 100 | api(lib.tdb_get_trail_id, 101 | [tdb, POINTER(c_ubyte), POINTER(c_uint64)], tdb_error) 102 | 103 | api(lib.tdb_error_str, [tdb_error], c_char_p) 104 | 105 | api(lib.tdb_num_trails, [tdb], c_uint64) 106 | api(lib.tdb_num_events, [tdb], c_uint64) 107 | api(lib.tdb_num_fields, [tdb], c_uint64) 108 | api(lib.tdb_min_timestamp, [tdb], c_uint64) 109 | api(lib.tdb_max_timestamp, [tdb], c_uint64) 110 | 111 | api(lib.tdb_version, [tdb], c_uint64) 112 | 113 | api(lib.tdb_cursor_new, [tdb], tdb_cursor) 114 | api(lib.tdb_cursor_free, [tdb]) 115 | api(lib.tdb_cursor_next, [tdb_cursor], POINTER(tdb_event)) 116 | api(lib.tdb_get_trail, [tdb_cursor, c_uint64], tdb_error) 117 | api(lib.tdb_get_trail_length, [tdb_cursor], c_uint64) 118 | api(lib.tdb_cursor_set_event_filter, [tdb_cursor, tdb_event_filter], tdb_error) 119 | 120 | api(lib.tdb_multi_cursor_new, [POINTER(tdb_cursor), c_uint64], tdb_multi_cursor) 121 | api(lib.tdb_multi_cursor_free, [tdb_multi_cursor]) 122 | api(lib.tdb_multi_cursor_reset, [tdb_multi_cursor]) 123 | api(lib.tdb_multi_cursor_next, [tdb_multi_cursor], POINTER(tdb_multi_event)) 124 | api(lib.tdb_multi_cursor_next_batch, [tdb_multi_cursor, POINTER(tdb_multi_event), c_uint64]) 125 | 126 | api(lib.tdb_event_filter_new, [], tdb_event_filter) 127 | api(lib.tdb_event_filter_add_term, [tdb_event_filter, tdb_item, c_int], tdb_error) 128 | api(lib.tdb_event_filter_add_time_range, [c_uint64, c_uint64], tdb_error) 129 | api(lib.tdb_event_filter_new_clause, [tdb_event_filter], tdb_error) 130 | api(lib.tdb_event_filter_new_match_none, [], tdb_event_filter) 131 | api(lib.tdb_event_filter_new_match_all, [], tdb_event_filter) 132 | api(lib.tdb_event_filter_free, [tdb_event_filter]) 133 | 134 | api(lib.tdb_set_opt, [tdb, c_uint, tdb_opt_value], tdb_error) 135 | api(lib.tdb_set_trail_opt, [tdb, c_uint64, c_uint, tdb_opt_value], tdb_error) 136 | 137 | 138 | def uuid_hex(uuid): 139 | """ 140 | :returns: Given a binary UUID, encodes it into hex. 141 | """ 142 | if isinstance(uuid, basestring): 143 | return uuid 144 | return codecs.encode(string_at(uuid, 16), HEX).decode(CODEC) 145 | 146 | 147 | def uuid_raw(uuid): 148 | """ 149 | :returns: Given a hex UUID, encodes it into binary. 150 | """ 151 | if isinstance(uuid, basestring): 152 | return (c_ubyte * 16).from_buffer_copy(codecs.decode(uuid, HEX)) 153 | return uuid 154 | 155 | 156 | def nullterm(strs, size): 157 | return '\x00'.join(strs) + (size - len(strs) + 1) * '\x00' 158 | 159 | 160 | # Port of tdb_item_field and tdb_item_val in tdb_types.h. Cannot use 161 | # them directly as they are inlined functions. 162 | 163 | def tdb_item_is32(item): 164 | return not (item & 128) 165 | 166 | 167 | def tdb_item_field32(item): 168 | return item & 127 169 | 170 | 171 | def tdb_item_val32(item): 172 | return (item >> 8) & 4294967295 # UINT32_MAX 173 | 174 | 175 | def tdb_item_field(item): 176 | """Return field-part of an item.""" 177 | if tdb_item_is32(item): 178 | return tdb_item_field32(item) 179 | else: 180 | return (item & 127) | (((item >> 8) & 127) << 7) 181 | 182 | 183 | def tdb_item_val(item): 184 | """Return value-part of an item.""" 185 | if tdb_item_is32(item): 186 | return tdb_item_val32(item) 187 | else: 188 | return item >> 16 189 | 190 | 191 | class TrailDBError(Exception): 192 | """This is the exception thrown when something fails with TrailDB.""" 193 | pass 194 | 195 | 196 | class TrailDBConstructor(object): 197 | """Objects of this class are used to Construct new TrailDBs.""" 198 | 199 | def __init__(self, path, ofields=()): 200 | """Initialize a new TrailDB constructor. 201 | 202 | :param path: TrailDB output path (without .tdb). 203 | :param ofields: List of field (names) in this TrailDB. 204 | 205 | .. code-block:: python 206 | 207 | import traildb 208 | tdbcons = traildb.TrailDBConstructor('example', ['type', 'flavor']) 209 | c.add('00000000000000000000000000000001', 123, ['click', 'tasty']) 210 | c.add('00000000000000000000000000000002', 129, ['flash', 'sour']) 211 | c.finalize() # Don't forget to finalize, otherwise you won't get a full TrailDB. 212 | """ 213 | if not path: 214 | raise TrailDBError("Path is required") 215 | n = len(ofields) 216 | 217 | if isinstance(path, str): 218 | path = path.encode(CODEC) 219 | 220 | ofield_names = (c_char_p * n)(*[name.encode(CODEC) 221 | for name in ofields]) 222 | 223 | self._cons = lib.tdb_cons_init() 224 | if lib.tdb_cons_open(self._cons, path, ofield_names, n) != 0: 225 | raise TrailDBError("Cannot open constructor") 226 | 227 | self.path = path 228 | self.ofields = ofields 229 | 230 | def __del__(self): 231 | if hasattr(self, '_cons'): 232 | lib.tdb_cons_close(self._cons) 233 | 234 | def add(self, uuid, tstamp, values): 235 | """Add an event in TrailDB. 236 | 237 | :param uuid: UUID of this event. 238 | :param tstamp: Timestamp of this event (datetime or integer). 239 | :param values: value of each field. 240 | 241 | .. code-block:: python 242 | 243 | cons.add('00000000000000000000000000000001', 123, ['click', 'tasty']) 244 | """ 245 | if isinstance(tstamp, datetime): 246 | tstamp = int(time.mktime(tstamp.timetuple())) 247 | n = len(self.ofields) 248 | values = [v.encode(CODEC) if not isinstance(v, bytes) 249 | else v for v in values] 250 | value_array = (c_char_p * n)(*values) 251 | value_lengths = (c_uint64 * n)(*[len(v) for v in values]) 252 | f = lib.tdb_cons_add(self._cons, uuid_raw(uuid), tstamp, value_array, 253 | value_lengths) 254 | if f: 255 | raise TrailDBError("Too many values: %s" % values[f]) 256 | 257 | def append(self, db): 258 | """Merge an existing TrailDB in this TrailDB. 259 | 260 | :param db: An instance of :py:class:`~traildb.TrailDB` you want to merge to this one. 261 | """ 262 | f = lib.tdb_cons_append(self._cons, db._db) 263 | if f < 0: 264 | raise TrailDBError("Wrong number of fields: %d" % db.num_fields) 265 | if f > 0: 266 | raise TrailDBError("Too many values: %s" % db.num_fields) 267 | 268 | def finalize(self, decode=True): 269 | """Finalize this TrailDB. You cannot add new events in this TrailDB 270 | after calling this function. 271 | 272 | You need to finalize :py:class:`~traildb.TrailDBConstructor` or you 273 | will not have an openable TrailDB later. Finalization is where all the 274 | compression and preparation happen and is typically the most 275 | resource-intensive part of TrailDB building. 276 | 277 | :returns: Opened :py:class:`~traildb.TrailDB`: 278 | """ 279 | r = lib.tdb_cons_finalize(self._cons) 280 | if r: 281 | raise TrailDBError("Could not finalize (%d)" % r) 282 | return TrailDB(self.path, decode) 283 | 284 | 285 | class TrailDBCursor(object): 286 | """TrailDBCursor iterates over events of a trail. 287 | 288 | Typically this class is not instantiated directly but it is 289 | returned by TrailDB.trail() or TrailDB.cursor() 290 | 291 | A cursor can be reused for different trails by calling 292 | TrailDBCursor.get_trail(trail_id) 293 | 294 | """ 295 | 296 | def __init__(self, 297 | cursor, 298 | cls, 299 | valuefun, 300 | parsetime, 301 | only_timestamp, 302 | event_filter_obj): 303 | self.cursor = cursor 304 | self.valuefun = valuefun 305 | self.parsetime = parsetime 306 | self.cls = cls 307 | self.only_timestamp = only_timestamp 308 | if event_filter_obj: 309 | self.event_filter_obj = event_filter_obj 310 | if lib.tdb_cursor_set_event_filter(cursor, event_filter_obj.flt): 311 | raise TrailDBError("cursor_set_event_filter failed") 312 | else: 313 | self.event_filter_obj = None 314 | 315 | def __del__(self): 316 | if self.cursor: 317 | lib.tdb_cursor_free(self.cursor) 318 | 319 | def __iter__(self): 320 | return self 321 | 322 | def __next__(self): 323 | """Return the next event in the trail.""" 324 | event = lib.tdb_cursor_next(self.cursor) 325 | if not event: 326 | raise StopIteration() 327 | 328 | address = addressof(event.contents.items) 329 | items = (tdb_item * event.contents.num_items).from_address(address) 330 | 331 | timestamp = event.contents.timestamp 332 | if self.parsetime: 333 | timestamp = datetime.fromtimestamp(event.contents.timestamp) 334 | 335 | if self.only_timestamp: 336 | return timestamp 337 | elif self.valuefun: 338 | return self.cls(False, timestamp, *items) 339 | else: 340 | return self.cls(True, timestamp, *items) 341 | 342 | def get_trail(self, trail_id): 343 | if lib.tdb_get_trail(self.cursor, trail_id) != 0: 344 | raise TrailDBError("Failed to initalize trail in cursor") 345 | 346 | if self.event_filter_obj: 347 | if lib.tdb_cursor_set_event_filter(self.cursor, self.event_filter_obj.flt): 348 | raise TrailDBError("cursor_set_event_filter failed") 349 | 350 | 351 | class TrailDBMultiCursor(object): 352 | """ 353 | TrailDBMultiCursor iterates over the events of multiple trails, 354 | merged together into a single trail with events sorted in the ascending 355 | time order. The trails can be from different traildbs. 356 | 357 | To use, initialize and then set the cursors using the set_cursors method. 358 | To reuse a multicursor, set new trails on the underlying cursors and then 359 | call :py:meth:`~traildb.TrailDBMultiCursor.reset()`. If filtering, apply event filters to the underlying 360 | cursors individually before setting them on the multicursor, or call reset after doing so 361 | if already set. 362 | """ 363 | 364 | def __init__(self, parsetime, rawitems, only_timestamp): 365 | """ 366 | :param parsetime: If True, returns datetime objects instead of integer timestamps. 367 | :param rawitems: Return raw integer items instead of stringified values. Using raw items is usually a bit more efficient than using string values. 368 | :param only_timestamp: If True, only return timestamps, not event objects. 369 | """ 370 | self.parsetime = parsetime 371 | self.rawitems = rawitems 372 | self.only_timestamp = only_timestamp 373 | self.multicursor = None 374 | self._ready = False 375 | 376 | def __del__(self): 377 | if self.multicursor: 378 | lib.tdb_multi_cursor_free(self.multicursor) 379 | 380 | def __iter__(self): 381 | return self 382 | 383 | def __next__(self): 384 | """ 385 | return the next event in the combined trails, in ascending timestamp order 386 | 387 | this will return tuples in the form of `(event, traildb)`, where the traildb 388 | is the :py:class:`~traildb.TrailDB` the event belongs to. This can be used to 389 | get the values if rawitems is used. 390 | """ 391 | if not self._ready: 392 | raise TrailDBError("Multicursor not initialized, call set_cursors") 393 | 394 | multi_event = lib.tdb_multi_cursor_next(self.multicursor) 395 | if multi_event: 396 | event = self.to_event(multi_event.contents) 397 | else: 398 | raise StopIteration() 399 | 400 | return event 401 | 402 | def to_event(self, multi_event): 403 | event = multi_event.tdb_event 404 | tdb_ptr = multi_event.db 405 | 406 | timestamp = event.contents.timestamp 407 | if self.parsetime: 408 | timestamp = datetime.fromtimestamp(event.contents.timestamp) 409 | 410 | if self.only_timestamp: 411 | return timestamp 412 | 413 | try: 414 | traildb = self._traildbs[tdb_ptr] 415 | except KeyError: 416 | raise TrailDBError("TrailDBMultiCursor encountered a traildb that was not included in set_cursors") 417 | 418 | address = addressof(event.contents.items) 419 | items = (tdb_item * event.contents.num_items).from_address(address) 420 | 421 | if self.rawitems: 422 | return traildb._event_cls(True, timestamp, *items), traildb 423 | else: 424 | return traildb._event_cls(False, timestamp, *items), traildb 425 | 426 | def set_cursors(self, cursors, traildbs): 427 | """ 428 | configure this multicursor to merge the specified cursors. This is required before use. 429 | 430 | :param cursors: list of :py:class:`~traildb.TrailDBCursor` instances to merge 431 | :param traildbs: list of :py:class:`~traildb.TrailDB` instances from which the cursors were created (only needs to be specified once, even if there are multiple cursors from the same TrailDB) 432 | """ 433 | 434 | n_cursors = len(cursors) 435 | cursor_array = (tdb_cursor * n_cursors)(*[c.cursor for c in cursors]) 436 | 437 | # maintain references to these in python so they wont get garbage collected 438 | self._cursor_arr = cursor_array 439 | self.cursors = cursors 440 | 441 | self.multicursor = lib.tdb_multi_cursor_new(cursor_array, n_cursors) 442 | if self.multicursor is None: 443 | raise TrailDBError("Failed to allocate memory for multicursor") 444 | self.reset() 445 | 446 | # mapping of the traildb pointer to the TrailDB object 447 | # we need this to get the configured traildb in python since we get a pointer to the tdb from the multi event 448 | self._traildbs = {tdb._db: tdb for tdb in traildbs} 449 | 450 | self._ready = True 451 | 452 | def reset(self): 453 | """ 454 | reset the state of the multicursor to sync with the underlying cursors. 455 | Used when resuing cursors. Also resets the state of the python object, 456 | including any batched results. 457 | """ 458 | 459 | if self.multicursor: 460 | lib.tdb_multi_cursor_reset(self.multicursor) 461 | self._batch_idx = 0 462 | self._current_batch_size = 0 463 | 464 | 465 | def mk_event_class(fields, valuefun): 466 | field_to_index = {f: i for i, f in enumerate(fields)} 467 | 468 | class TrailDBEvent(object): 469 | __slots__ = ('items', 'rawitems', 'memoized') 470 | 471 | def __repr__(self): 472 | return '<TrailDBCursor: {}>'.format(self.to_list()) 473 | 474 | def __str__(self): 475 | return self.__repr__() 476 | 477 | def __init__(self, rawitems, *items): 478 | self.items = tuple(items) 479 | self.rawitems = rawitems 480 | self.memoized = {} 481 | 482 | def _fields(self): 483 | return fields 484 | 485 | def __eq__(self, other): 486 | fields_checked = set() 487 | 488 | # Are the field contents same? 489 | for f in fields: 490 | try: 491 | if self.__getattr__(f) != other.__getattr__(f): 492 | return False 493 | fields_checked.add(f) 494 | except AttributeError: 495 | return False 496 | 497 | for f in other._fields(): 498 | if f not in fields_checked: 499 | return False 500 | 501 | # So field contents and number of them are the 502 | # same. But field *names* have not been checked 503 | # yet. 504 | 505 | other_fields = other._fields() 506 | for i, f in enumerate(fields): 507 | if f != other_fields[i]: 508 | return False 509 | 510 | return True 511 | 512 | def __hash__(self): 513 | return hash(tuple(self.to_list())) 514 | 515 | def to_list(self): 516 | lst = [] 517 | for f in fields: 518 | lst.append( (f, self.__getattr__(f)) ) 519 | return lst 520 | 521 | def __getattr__(self, name): 522 | if name in self.memoized: 523 | return self.memoized[name] 524 | 525 | if name not in field_to_index: 526 | raise AttributeError 527 | 528 | item = self.items[field_to_index[name]] 529 | if self.rawitems: 530 | return item 531 | else: 532 | if name == 'time': 533 | return item 534 | else: 535 | self.memoized[name] = valuefun(item) 536 | return self.memoized[name] 537 | 538 | return TrailDBEvent 539 | 540 | 541 | class TrailDB(object): 542 | """Objects of this class represent an opened TrailDB. 543 | 544 | Simply pass the filename to the constructor (with or without extension) as below. 545 | 546 | .. code-block:: python 547 | 548 | import traildb 549 | tdb = traildb.TrailDB('blah.tdb') 550 | """ 551 | 552 | def __init__(self, path, decode=True): 553 | """Open a TrailDB at path.""" 554 | if isinstance(path, str): 555 | path = path.encode(CODEC) 556 | 557 | self._db = db = lib.tdb_init() 558 | res = lib.tdb_open(self._db, path) 559 | if res != 0: 560 | raise TrailDBError("Could not open %s, error code %d" % 561 | (path, res)) 562 | 563 | self.num_trails = lib.tdb_num_trails(db) 564 | self.num_events = lib.tdb_num_events(db) 565 | self.num_fields = lib.tdb_num_fields(db) 566 | self.fields = [lib.tdb_get_field_name(db, i).decode(CODEC) 567 | for i in range(self.num_fields)] 568 | self._event_cls = mk_event_class(self.fields, self.get_item_value) 569 | self._uint64_ptr = pointer(c_uint64()) 570 | self.decode = decode 571 | 572 | def __del__(self): 573 | if hasattr(self, '_db'): 574 | lib.tdb_close(self._db) 575 | 576 | def __contains__(self, uuidish): 577 | """:returns: True if UUID or Trail ID exists in this TrailDB.""" 578 | try: 579 | self[uuidish] 580 | return True 581 | except IndexError: 582 | return False 583 | 584 | def __getitem__(self, uuidish): 585 | """:returns: a cursor for the given UUID or Trail ID.""" 586 | if isinstance(uuidish, basestring): 587 | return self.trail(self.get_trail_id(uuidish)) 588 | return self.trail(uuidish) 589 | 590 | def __len__(self): 591 | """:returns: The number of trails in the TrailDB.""" 592 | return self.num_trails 593 | 594 | def trails(self, selected_uuids=None, reuse_cursors=False, **kwds): 595 | """ 596 | Iterate over all trails in this TrailDB. 597 | 598 | :param selected_uuids: If passed, only go through the UUIDs passed in 599 | this argument. It should be an iterable that yields hex UUIDs. 600 | 601 | :param reuse_cursors: If `False`, trails() creates a new cursor 602 | for every single trail it iterates over. You can change this 603 | behavior by setting ``reuse_cursors=True``. Now, the same underlying 604 | cursor object will be reused for all trails yielded from this 605 | function. This is a major performance improvement but it means 606 | you cannot save the iterators from trails() and iterate over them 607 | later; you must consume them immediately before you go to next item 608 | from trails(). 609 | 610 | :returns: Yields ``(uuid, events)`` pairs. 611 | 612 | Any other keyword arguments are passed to :py:meth:`~TrailDB.cursor()`. 613 | 614 | .. code-block:: python 615 | 616 | # Prints all UUIDs in a TrailDB 617 | import traildb 618 | tdb = traildb.TrailDB('blah') 619 | for uuid, events in tdb.trails(): 620 | print(uuid) 621 | 622 | """ 623 | if reuse_cursors: 624 | cursor = self.cursor(**kwds) 625 | 626 | if selected_uuids is not None: 627 | for uuid in selected_uuids: 628 | try: 629 | i = self.get_trail_id(uuid) 630 | except IndexError: 631 | continue 632 | 633 | if not reuse_cursors: 634 | cursor = self.cursor(**kwds) 635 | 636 | cursor.get_trail(i) 637 | yield uuid, cursor 638 | else: 639 | for i in range(len(self)): 640 | if not reuse_cursors: 641 | cursor = self.cursor(**kwds) 642 | 643 | cursor.get_trail(i) 644 | yield self.get_uuid(i), cursor 645 | 646 | def trail(self, 647 | trail_id, 648 | parsetime=False, 649 | rawitems=False, 650 | only_timestamp=False, 651 | event_filter=None): 652 | """Return a cursor over a single trail. 653 | 654 | :param trail_id: Trail ID to use. 655 | :param parsetime: If True, returns datetime objects instead of integer timestamps. 656 | :param rawitems: Return raw integer items instead of stringified values. Using raw items is usually a bit more efficient than using string values. 657 | :param only_timestamp: If True, only return timestamps, not event objects. 658 | :param event_filter: Apply given event filter to the cursor. 659 | :returns: A :py:class:`~traildb.TrailDBCursor` to given Trail ID. 660 | 661 | This function can throw :py:class:`~traildb.TrailDBError` if Trail ID is not 662 | present in the TrailDB. 663 | """ 664 | cursor = lib.tdb_cursor_new(self._db) 665 | if lib.tdb_get_trail(cursor, trail_id) != 0: 666 | raise TrailDBError("Failed to create cursor") 667 | 668 | if isinstance(event_filter, TrailDBEventFilter): 669 | event_filter_obj = event_filter 670 | elif event_filter: 671 | event_filter_obj = self.create_filter(event_filter) 672 | else: 673 | event_filter_obj = None 674 | 675 | valuefun = None if rawitems else self.get_item_value 676 | return TrailDBCursor(cursor, 677 | self._event_cls, 678 | valuefun, 679 | parsetime, 680 | only_timestamp, 681 | event_filter_obj) 682 | 683 | def cursor(self, *args, **kwargs): 684 | """Alias for :py:meth:`~traildb.TrailDB.trail` with ``trail_id=0``""" 685 | return self.trail(0, *args, **kwargs) 686 | 687 | def field(self, fieldish): 688 | """:returns: a field ID given a field name. 689 | 690 | .. code-block:: python 691 | 692 | import traildb 693 | tdb = traildb.TrailDB('blah.tdb') 694 | print(tdb.field('type')) 695 | """ 696 | if isinstance(fieldish, basestring): 697 | return self.fields.index(fieldish) 698 | return fieldish 699 | 700 | def lexicon(self, fieldish): 701 | """:returns: an iterator over values of the given field ID or field name.""" 702 | field = self.field(fieldish) 703 | return (self.get_value(field, i) 704 | for i in range(1, self.lexicon_size(field))) 705 | 706 | def lexicon_size(self, fieldish): 707 | """:returns: The number of distinct values in the given field ID or field name. (i.e. cardinality of a field in the TrailDB)""" 708 | field = self.field(fieldish) 709 | value = lib.tdb_lexicon_size(self._db, field) 710 | if value == 0: 711 | raise TrailDBError("Invalid field index") 712 | return value 713 | 714 | def get_item(self, fieldish, value): 715 | """:returns: The item corresponding to a field ID or a field name and a string value. 716 | 717 | .. code-block:: python 718 | 719 | import traildb 720 | tdb = traildb.TrailDB('blah.tdb') 721 | print(tdb.get_item('type', 'click')) 722 | 723 | """ 724 | field = self.field(fieldish) 725 | item = lib.tdb_get_item(self._db, 726 | field, 727 | value.encode(CODEC), 728 | len(value)) 729 | if not item: 730 | raise TrailDBError("No such value: '%s'" % value) 731 | return item 732 | 733 | def get_item_value(self, item): 734 | """:returns: The string value corresponding to an item. 735 | 736 | .. code-block:: python 737 | 738 | import traildb 739 | tdb = traildb.TrailDB('blah.tdb') 740 | 741 | # This should print 'click' (if TrailDB contains 'type' field and 'click' values in that field). 742 | print(tdb.get_item_value(tdb.get_item('type', 'click'))) 743 | 744 | """ 745 | value = lib.tdb_get_item_value(self._db, item, self._uint64_ptr) 746 | if value is None: 747 | raise TrailDBError("Error reading value, error: %s" % 748 | lib.tdb_error(self._db)) 749 | 750 | if self.decode: 751 | return value[0:self._uint64_ptr.contents.value].decode(CODEC) 752 | 753 | return value[0:self._uint64_ptr.contents.value] 754 | 755 | def get_value(self, fieldish, val): 756 | """:returns: The string value corresponding to a field ID or a field name and a value ID.""" 757 | field = self.field(fieldish) 758 | value = lib.tdb_get_value(self._db, field, val, self._uint64_ptr) 759 | if value is None: 760 | raise TrailDBError("Error reading value, error: %s" % 761 | lib.tdb_error(self._db)) 762 | 763 | if self.decode: 764 | return value[0:self._uint64_ptr.contents.value].decode(CODEC) 765 | 766 | return value[0:self._uint64_ptr.contents.value] 767 | 768 | def get_uuid(self, trail_id, raw=False): 769 | """ 770 | :param trail_id: The Trail ID to give UUID for. 771 | :param raw: If true, returns 16-byte binary string for UUID instead of hexified UUID. 772 | :returns: UUID given a Trail ID. 773 | """ 774 | uuid = lib.tdb_get_uuid(self._db, trail_id) 775 | if uuid: 776 | if raw: 777 | return string_at(uuid, 16) 778 | else: 779 | return uuid_hex(uuid) 780 | raise IndexError("Trail ID out of range") 781 | 782 | def get_trail_id(self, uuid): 783 | """:returns: Trail ID given a UUID. 784 | 785 | This is the reverse of :py:meth:`traildb.TrailDB.get_uuid`. 786 | """ 787 | ret = lib.tdb_get_trail_id(self._db, uuid_raw(uuid), self._uint64_ptr) 788 | if ret: 789 | raise IndexError("UUID '%s' not found" % uuid) 790 | return self._uint64_ptr.contents.value 791 | 792 | def time_range(self, parsetime=False): 793 | """:returns: The time range covered by this TrailDB. 794 | 795 | :param parsetime: If True, return time range as integers or datetime objects. 796 | """ 797 | tmin = self.min_timestamp() 798 | tmax = self.max_timestamp() 799 | if parsetime: 800 | return datetime.fromtimestamp(tmin), datetime.fromtimestamp(tmax) 801 | return tmin, tmax 802 | 803 | def min_timestamp(self): 804 | """:returns: The minimum time stamp of this TrailDB.""" 805 | return lib.tdb_min_timestamp(self._db) 806 | 807 | def max_timestamp(self): 808 | """:returns: The maximum time stamp of this TrailDB.""" 809 | return lib.tdb_max_timestamp(self._db) 810 | 811 | def create_filter(self, event_filter): 812 | """:returns: :py:class:`~traildb.TrailDBEventFilter` object created from this TrailDB.""" 813 | return TrailDBEventFilter(self, event_filter) 814 | 815 | def apply_whitelist(self, uuids): 816 | """ 817 | Applies a whitelist of UUIDs to TrailDB so that further calls to 818 | :py:meth:`~traildb.TrailDB.trails` do not return any events for UUIDs that 819 | have not been whitelisted with this call. 820 | """ 821 | empty_filter = lib.tdb_event_filter_new_match_none() 822 | all_filter = lib.tdb_event_filter_new_match_all() 823 | value = tdb_opt_value(ptr=empty_filter) 824 | 825 | lib.tdb_set_opt(self._db, 826 | TDB_OPT_EVENT_FILTER, 827 | value) 828 | 829 | value = tdb_opt_value(ptr=all_filter) 830 | 831 | for uuid in uuids: 832 | try: 833 | trail_id = self.get_trail_id(uuid) 834 | lib.tdb_set_trail_opt(self._db, 835 | trail_id, 836 | TDB_OPT_EVENT_FILTER, 837 | value) 838 | except IndexError: 839 | continue 840 | 841 | def apply_blacklist(self, uuids): 842 | """ 843 | Applies a blacklist of UUIDs to TrailDB so that further calls to 844 | :py:meth:`~traildb.TrailDB.trails` do not return any events for the blacklisted UUIDs. 845 | """ 846 | empty_filter = lib.tdb_event_filter_new_match_none() 847 | all_filter = lib.tdb_event_filter_new_match_all() 848 | value = tdb_opt_value(ptr=all_filter) 849 | 850 | lib.tdb_set_opt(self._db, 851 | TDB_OPT_EVENT_FILTER, 852 | value) 853 | 854 | value = tdb_opt_value(ptr=empty_filter) 855 | for uuid in uuids: 856 | try: 857 | trail_id = self.get_trail_id(uuid) 858 | lib.tdb_set_trail_opt(self._db, 859 | trail_id, 860 | TDB_OPT_EVENT_FILTER, 861 | value) 862 | except IndexError: 863 | continue 864 | 865 | 866 | class TrailDBEventFilter(object): 867 | """ 868 | Converts a query defined in terms of Python collections to a 869 | `tdb_event_filter` which can be passed to various TrailDB functions. 870 | Performs some validation when parsing the query. 871 | 872 | Queries are boolean expressions defined from terms and clauses. A term is 873 | defined using a tuple: 874 | 875 | .. code-block:: python 876 | 877 | (field_name, "value") # match records with field_name == "value" 878 | (field_name, "value", False) # match records with field_name == "value" 879 | (field_name, "value", True) # match records with field_name != "value" 880 | (start_time, end_time) # match records with start_time <= time < end_time 881 | 882 | Clauses are boolean expressions formed from terms, which are connected with AND. 883 | Clauses are defined with lists of terms: 884 | 885 | .. code-block:: python 886 | 887 | [term] 888 | [term1, term2] 889 | [term1, term2, ...] 890 | 891 | Queries are boolean expressions formed from clauses, which are connected with OR. 892 | Queries are defined with lists of clauses: 893 | 894 | .. code-block:: python 895 | 896 | [clause] 897 | [clause1, clause2] 898 | [clause1, clause2, ...] 899 | 900 | Some complete examples: 901 | 902 | .. code-block:: python 903 | 904 | [[("user", "george_jetson")]] # Match records for the user "george_jetson" 905 | [[("user", "george_jetson", True)]] # Match records for users other than "george_jetson" 906 | [[(1501013929, 1501100260)]] # Match records between 2017-07-25 3:18 pm to 2017-07-26 3:18 pm 907 | [[("job_title", "manager"), ("user", "george_jetson")]] # Match records for the user "george_jetson" AND with job title "manager" 908 | [[("job_title", "manager")], [("user", "george_jetson")]] # Match records for the user "george_jetson" OR with job title "manager" 909 | [[("job_title", "manager"), (1501013929, 1501100260)], [("user", "george_jetson"), (1501013929, 1501100260)]] # Match records for the user "george_jetson" OR with job title "manager" and between 2017-07-25 3:18 pm to 2017-07-26 3:18 pm 910 | """ 911 | def __init__(self, db, query): 912 | """ 913 | Initializes TrailDBEventFilter. You might want to use :py:meth:`traildb.TrailDB.create_filter` instead that passes ``db`` automatically. 914 | """ 915 | 916 | self.flt = lib.tdb_event_filter_new() 917 | if type(query[0]) is tuple: 918 | query = [query] 919 | for i, clause in enumerate(query): 920 | if i > 0: 921 | err = lib.tdb_event_filter_new_clause(self.flt) 922 | if err: 923 | raise TrailDBError("Out of memory in _create_filter") 924 | 925 | for term in clause: 926 | err = None 927 | # time range? 928 | if len(term) == 2 and isinstance(term[0], int) \ 929 | and isinstance(term[1], int): 930 | start_time, end_time = term 931 | err = lib.tdb_event_filter_add_time_range(self.flt, 932 | start_time, 933 | end_time) 934 | else: 935 | is_negative = False 936 | if len(term) == 3: 937 | field, value, is_negative = term 938 | else: 939 | field, value = term 940 | try: 941 | item = db.get_item(field, value) 942 | except (TrailDBError, ValueError): 943 | item = 0 944 | err = lib.tdb_event_filter_add_term(self.flt, 945 | item, 946 | 1 if is_negative else 0) 947 | if err: 948 | raise TrailDBError("Out of memory in _create_filter") 949 | 950 | def __del__(self): 951 | lib.tdb_event_filter_free(self.flt) 952 | -------------------------------------------------------------------------------- /travisdeps.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # Get up to date libjudy 4 | sudo apt-get update 5 | sudo apt-get install -y libarchive-dev pkg-config build-essential 6 | wget https://mirrors.kernel.org/ubuntu/pool/universe/j/judy/libjudy-dev_1.0.5-5_amd64.deb \ 7 | https://mirrors.kernel.org/ubuntu/pool/universe/j/judy/libjudydebian1_1.0.5-5_amd64.deb 8 | sudo dpkg -i libjudy-dev_1.0.5-5_amd64.deb libjudydebian1_1.0.5-5_amd64.deb 9 | 10 | # compile dependency in /opt/traildb/traildb 11 | 12 | mkdir -p /opt/traildb 13 | cd /opt/traildb 14 | 15 | # shallow-ish copy of master branch of traildb/traildb 16 | git clone --depth=50 https://github.com/traildb/traildb 17 | 18 | # build traildb so 19 | cd /opt/traildb/traildb 20 | sudo ./waf configure 21 | # actually needs root permissions to install into /usr/local 22 | sudo ./waf install 23 | --------------------------------------------------------------------------------