├── test ├── __init__.py ├── test_equality.py ├── test_period_start_end.py ├── test_from_date.py ├── test_delta_methods.py ├── test_unique_events.py ├── conftest.py └── test_bitmapist.py ├── MANIFEST.in ├── bitmapist4 ├── __init__.py ├── __version__.py ├── cohort.py ├── core.py └── events.py ├── static ├── bitmapist.png ├── bitmapist_logo.sketch └── bitmapist.svg ├── .gitignore ├── tox.ini ├── .travis.yml ├── AUTHORS ├── CHANGELOG ├── setup.py └── README.md /test/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include README.md 2 | -------------------------------------------------------------------------------- /bitmapist4/__init__.py: -------------------------------------------------------------------------------- 1 | from bitmapist4.core import Bitmapist 2 | -------------------------------------------------------------------------------- /bitmapist4/__version__.py: -------------------------------------------------------------------------------- 1 | VERSION = (4, 0) 2 | __version__ = '.'.join(map(str, VERSION)) 3 | -------------------------------------------------------------------------------- /static/bitmapist.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Doist/bitmapist4/HEAD/static/bitmapist.png -------------------------------------------------------------------------------- /static/bitmapist_logo.sketch: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Doist/bitmapist4/HEAD/static/bitmapist_logo.sketch -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .*.swp 2 | *.pyc 3 | *.egg-info 4 | /dist 5 | /.tox 6 | /.pytest_cache 7 | /bitmapist.db 8 | /dump.rdb 9 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist = py{27,34,35,36} 3 | 4 | [testenv] 5 | commands = 6 | pytest [] 7 | 8 | deps = 9 | pytest>=4 10 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | sudo: false 3 | services: 4 | - redis-server 5 | python: 6 | - "2.7" 7 | - "3.4" 8 | - "3.5" 9 | - "3.6" 10 | install: 11 | - pip install tox-travis 12 | script: 13 | - tox 14 | -------------------------------------------------------------------------------- /test/test_equality.py: -------------------------------------------------------------------------------- 1 | def test_equality(bitmapist): 2 | ev1 = bitmapist.YearEvents('foo', 2014) 3 | ev2 = bitmapist.YearEvents('foo', 2014) 4 | ev3 = bitmapist.YearEvents('foo', 2015) 5 | assert ev1 == ev2 6 | assert ev1 != ev3 7 | -------------------------------------------------------------------------------- /test/test_period_start_end.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import pytest 3 | import datetime 4 | 5 | 6 | @pytest.mark.parametrize( 7 | 'cls', 8 | ['HourEvents', 'DayEvents', 'WeekEvents', 'MonthEvents', 'YearEvents']) 9 | def test_period_start_end(bitmapist, cls): 10 | Cls = getattr(bitmapist, cls) 11 | dt = datetime.datetime(2014, 1, 1, 8, 30) 12 | ev = Cls.from_date('foo', dt) 13 | assert ev.period_start() <= dt <= ev.period_end() 14 | -------------------------------------------------------------------------------- /AUTHORS: -------------------------------------------------------------------------------- 1 | This is the list of developers and contributors to the bitmapist project. 2 | Thanks for helping us making this project better :-) 3 | 4 | - Amir Salihefendic 5 | - Roman Imankulov 6 | - Alexey Subbotin 7 | - Sam Kimbrel 8 | - Egor Yurtaev 9 | - David Cramer 10 | - Dimitris Giannitsaros 11 | -------------------------------------------------------------------------------- /CHANGELOG: -------------------------------------------------------------------------------- 1 | ## [4.0] - 2018-12-11 2 | 3 | Released the first beta version of bitmapist4. Main differences from the 4 | previous version. 5 | 6 | - Class-based configuration. To work with a specific storage and to set 7 | some specific settings, a Bitmapist object is instantiated 8 | instead of using global SYSTEM 9 | 10 | - Support for automatic and explicit expiration for temporary keys 11 | generated on bit operations, and especially during cohort calculation 12 | 13 | - Cohort package rewritten to make more convenient usage with wider range of 14 | scenarios 15 | 16 | - Add support for transactions (wrappers around Redis pipelines) 17 | 18 | - Tests run against Redis and Bitmapist-server 19 | -------------------------------------------------------------------------------- /test/test_from_date.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | 3 | 4 | def test_from_date_year(bitmapist): 5 | ev1 = bitmapist.YearEvents.from_date('foo', datetime.datetime(2014, 1, 1)) 6 | ev2 = bitmapist.YearEvents('foo', 2014) 7 | assert ev1 == ev2 8 | 9 | 10 | def test_from_date_month(bitmapist): 11 | ev1 = bitmapist.MonthEvents.from_date('foo', datetime.datetime(2014, 1, 1)) 12 | ev2 = bitmapist.MonthEvents('foo', 2014, 1) 13 | assert ev1 == ev2 14 | 15 | 16 | def test_from_date_week(bitmapist): 17 | ev1 = bitmapist.MonthEvents.from_date('foo', datetime.datetime(2014, 1, 1)) 18 | ev2 = bitmapist.MonthEvents('foo', 2014, 1) 19 | assert ev1 == ev2 20 | 21 | 22 | def test_from_date_day(bitmapist): 23 | ev1 = bitmapist.DayEvents.from_date('foo', datetime.datetime(2014, 1, 1)) 24 | ev2 = bitmapist.DayEvents('foo', 2014, 1, 1) 25 | assert ev1 == ev2 26 | 27 | 28 | def test_from_date_hour(bitmapist): 29 | ev1 = bitmapist.HourEvents.from_date('foo', datetime.datetime(2014, 1, 1, 1)) 30 | ev2 = bitmapist.HourEvents('foo', 2014, 1, 1, 1) 31 | assert ev1 == ev2 32 | -------------------------------------------------------------------------------- /test/test_delta_methods.py: -------------------------------------------------------------------------------- 1 | def test_delta_hour(bitmapist): 2 | ev = bitmapist.HourEvents('foo', 2014, 1, 1, 0) 3 | n = ev.next() 4 | assert (n.year, n.month, n.day, n.hour) == (2014, 1, 1, 1) 5 | p = ev.prev() 6 | assert (p.year, p.month, p.day, p.hour) == (2013, 12, 31, 23) 7 | 8 | 9 | def test_delta_day(bitmapist): 10 | ev = bitmapist.DayEvents('foo', 2014, 1, 1) 11 | n = ev.next() 12 | assert (n.year, n.month, n.day) == (2014, 1, 2) 13 | p = ev.prev() 14 | assert (p.year, p.month, p.day) == (2013, 12, 31) 15 | 16 | 17 | def test_delta_week(bitmapist): 18 | ev = bitmapist.WeekEvents('foo', 2014, 1) 19 | n = ev.next() 20 | assert (n.year, n.week) == (2014, 2) 21 | p = ev.prev() 22 | assert (p.year, p.week) == (2013, 52) 23 | 24 | 25 | def test_delta_month(bitmapist): 26 | ev = bitmapist.MonthEvents('foo', 2014, 1) 27 | n = ev.next() 28 | assert (n.year, n.month) == (2014, 2) 29 | p = ev.prev() 30 | assert (p.year, p.month) == (2013, 12) 31 | 32 | 33 | def test_delta_year(bitmapist): 34 | ev = bitmapist.YearEvents('foo', 2014) 35 | n = ev.next() 36 | assert n.year == 2015 37 | p = ev.prev() 38 | assert p.year == 2013 39 | -------------------------------------------------------------------------------- /test/test_unique_events.py: -------------------------------------------------------------------------------- 1 | def test_mark(bitmapist): 2 | ev = bitmapist.UniqueEvents('foo') 3 | bitmapist.mark_unique('foo', 1) 4 | assert 1 in ev 5 | assert 2 not in ev 6 | 7 | 8 | def test_unmark(bitmapist): 9 | ev = bitmapist.UniqueEvents('foo') 10 | bitmapist.mark_unique('foo', 1) 11 | bitmapist.unmark_unique('foo', 1) 12 | assert 1 not in ev 13 | 14 | 15 | def test_ops(bitmapist): 16 | bitmapist.mark_unique('foo', 1) 17 | bitmapist.mark_unique('foo', 2) 18 | bitmapist.mark_unique('bar', 2) 19 | bitmapist.mark_unique('bar', 3) 20 | 21 | foo = bitmapist.UniqueEvents('foo') 22 | bar = bitmapist.UniqueEvents('bar') 23 | assert list(foo & bar) == [2] 24 | assert list(foo | bar) == [1, 2, 3] 25 | 26 | 27 | def test_ops_with_dates(bitmapist): 28 | bitmapist.mark_event('active', 1) 29 | bitmapist.mark_event('active', 2) 30 | bitmapist.mark_unique('foo', 2) 31 | bitmapist.mark_unique('foo', 3) 32 | 33 | foo = bitmapist.UniqueEvents('foo') 34 | active = bitmapist.DayEvents('active') 35 | 36 | assert list(foo & active) == [2] 37 | assert list(foo | active) == [1, 2, 3] 38 | 39 | assert list(foo & active.prev()) == [] 40 | assert list(foo | active.prev()) == [2, 3] 41 | 42 | 43 | def test_track_unique(bitmapist, bitmapist_non_unique): 44 | bitmapist.mark_event('foo', 1) 45 | bitmapist_non_unique.mark_event('foo', 2) 46 | assert list(bitmapist.DayEvents('foo')) == [1, 2] 47 | assert list(bitmapist_non_unique.UniqueEvents('foo')) == [1] 48 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import os 3 | import io 4 | import sys 5 | import shutil 6 | from setuptools import setup, Command 7 | 8 | about = {} 9 | 10 | here = os.path.abspath(os.path.dirname(__file__)) 11 | with io.open(os.path.join(here, 'README.md'), encoding='utf-8') as f: 12 | long_description = '\n' + f.read() 13 | 14 | with open(os.path.join(here, 'bitmapist4', '__version__.py')) as f: 15 | exec(f.read(), about) 16 | 17 | 18 | class UploadCommand(Command): 19 | """Support setup.py upload.""" 20 | 21 | # If test option is set, the package is uploaded to test.pypi.org 22 | # and nothing is pushed to the git repository. In order to make 23 | # it work, you need to have a separate section [test] in your 24 | # ~/.pypirc. 25 | # More info on using testng PyPI server: 26 | # https://packaging.python.org/guides/using-testpypi/ 27 | description = 'Build and publish the package.' 28 | user_options = [ 29 | ('test', 't', "Upload package to the test server"), 30 | ] 31 | 32 | @staticmethod 33 | def status(s): 34 | """Prints things in bold.""" 35 | print('\033[1m{0}\033[0m'.format(s)) 36 | 37 | def initialize_options(self): 38 | self.test = None 39 | 40 | def finalize_options(self): 41 | pass 42 | 43 | def run(self): 44 | try: 45 | self.status('Removing previous builds…') 46 | shutil.rmtree(os.path.join(here, 'dist')) 47 | except OSError: 48 | pass 49 | 50 | self.status('Building Source and Wheel (universal) distribution…') 51 | os.system('{0} setup.py sdist bdist_wheel --universal'.format( 52 | sys.executable)) 53 | 54 | self.status('Uploading the package to PyPi via Twine…') 55 | repo_string = ' --repository test' if self.test else '' 56 | os.system('twine upload {} dist/*'.format(repo_string)) 57 | 58 | if not self.test: 59 | self.status('Pushing git tags…') 60 | os.system('git tag v{0}'.format(about['__version__'])) 61 | os.system('git push --tags') 62 | 63 | sys.exit() 64 | 65 | 66 | setup( 67 | name='bitmapist4', 68 | version=about['__version__'], 69 | description='Powerful analytics library using Redis bitmaps', 70 | long_description=long_description, 71 | long_description_content_type='text/markdown', 72 | author="Doist Team", 73 | author_email="dev@doist.com", 74 | url="https://github.com/Doist/bitmapist4", 75 | install_requires=[ 76 | 'redis>=2.10', 77 | 'future>=0.14', 78 | ], 79 | classifiers=[ 80 | "Development Status :: 5 - Production/Stable", 81 | "Intended Audience :: Developers", 82 | "License :: OSI Approved :: BSD License", 83 | "Operating System :: OS Independent", 84 | "Programming Language :: Python", 85 | "Programming Language :: Python :: 2", 86 | "Programming Language :: Python :: 2.7", 87 | "Programming Language :: Python :: 3", 88 | "Programming Language :: Python :: 3.4", 89 | "Programming Language :: Python :: 3.5", 90 | "Programming Language :: Python :: 3.6", 91 | "Topic :: Software Development :: Libraries :: Python Modules", 92 | ], 93 | packages=['bitmapist4'], 94 | include_package_data=True, 95 | platforms=["Any"], 96 | license="BSD", 97 | cmdclass={ 98 | 'upload': UploadCommand, 99 | }, 100 | keywords='redis bitmap analytics bitmaps realtime cohort') 101 | -------------------------------------------------------------------------------- /test/conftest.py: -------------------------------------------------------------------------------- 1 | from builtins import range 2 | 3 | import pytest 4 | import os 5 | import subprocess 6 | import atexit 7 | import socket 8 | import time 9 | import redis 10 | import bitmapist4 11 | 12 | REDIS_HOST = '127.0.0.1' 13 | REDIS_PORT = 6399 14 | 15 | BITMAPIST_SERVER_HOST = '127.0.0.1' 16 | BITMAPIST_SERVER_PORT = 6400 17 | 18 | 19 | @pytest.fixture(scope='session', autouse=True, params=['redis', 'bitmapist-server']) 20 | def redis_server(request): 21 | """ 22 | Fixture starting Redis or bitmapist-server process 23 | """ 24 | if request.param == 'redis': 25 | host, port = REDIS_HOST, REDIS_PORT 26 | else: 27 | host, port = BITMAPIST_SERVER_HOST, BITMAPIST_SERVER_PORT 28 | 29 | if is_socket_open(host, port): 30 | yield host, port 31 | else: 32 | proc = start_server(request.param, host, port) 33 | wait_for_socket(host, port) 34 | yield host, port 35 | proc.terminate() 36 | 37 | 38 | @pytest.fixture 39 | def bitmapist(redis_server): 40 | conn = redis.StrictRedis(*redis_server) 41 | obj = bitmapist4.Bitmapist(conn, track_hourly=True) 42 | yield obj 43 | flushall(conn) 44 | 45 | 46 | @pytest.fixture 47 | def bitmapist_non_unique(redis_server): 48 | conn = redis.StrictRedis(*redis_server) 49 | obj = bitmapist4.Bitmapist(conn, track_hourly=True, track_unique=False) 50 | yield obj 51 | flushall(conn) 52 | 53 | 54 | @pytest.fixture 55 | def bitmapist_copy(redis_server): 56 | conn = redis.StrictRedis(*redis_server) 57 | obj = bitmapist4.Bitmapist(conn) 58 | yield obj 59 | flushall(conn) 60 | 61 | 62 | @pytest.fixture 63 | def db1(redis_server): 64 | host, port = redis_server 65 | conn = redis.StrictRedis(host, port, db=1) 66 | obj = bitmapist4.Bitmapist(conn) 67 | yield obj 68 | flushall(conn) 69 | 70 | 71 | def flushall(conn): 72 | """ 73 | bitmapist-server-compatible command to delete all keys from the server 74 | """ 75 | keys = conn.keys('*') 76 | if keys: 77 | conn.delete(*keys) 78 | 79 | 80 | def start_server(server_type, host, port): 81 | if server_type == 'redis': 82 | return start_redis_server(host, port) 83 | else: 84 | return start_bitmapist_server(host, port) 85 | 86 | 87 | def start_redis_server(host, port): 88 | """ 89 | Helper function starting Redis server 90 | """ 91 | devzero = open(os.devnull, 'r') 92 | devnull = open(os.devnull, 'w') 93 | proc = subprocess.Popen( 94 | ['redis-server', '--bind', host, '--port', str(port)], 95 | stdin=devzero, 96 | stdout=devnull, 97 | stderr=devnull, 98 | close_fds=True) 99 | atexit.register(lambda: proc.terminate()) 100 | return proc 101 | 102 | 103 | def start_bitmapist_server(host, port): 104 | """ 105 | Helper function starting bitmapist server 106 | """ 107 | devzero = open(os.devnull, 'r') 108 | devnull = open(os.devnull, 'w') 109 | proc = subprocess.Popen( 110 | ['bitmapist-server', '-addr', '{}:{}'.format(host, port)], 111 | stdin=devzero, 112 | stdout=devnull, 113 | stderr=devnull, 114 | close_fds=True) 115 | atexit.register(lambda: proc.terminate()) 116 | return proc 117 | 118 | 119 | def is_socket_open(host, porto): 120 | """ 121 | Helper function which tests is the socket open 122 | """ 123 | sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 124 | sock.settimeout(0.1) 125 | ok = sock.connect_ex((host, porto)) == 0 126 | sock.close() 127 | return ok 128 | 129 | 130 | def wait_for_socket(host, porto, seconds=3): 131 | """ 132 | Check if socket is up for :param:`seconds` sec, raise an error otherwise 133 | """ 134 | polling_interval = 0.2 135 | iterations = int(seconds / polling_interval) 136 | 137 | time.sleep(polling_interval) 138 | sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 139 | sock.settimeout(0.1) 140 | for _ in range(iterations): 141 | time.sleep(polling_interval) 142 | result = sock.connect_ex((host, porto)) 143 | if result == 0: 144 | sock.close() 145 | break 146 | else: 147 | raise RuntimeError( 148 | 'Service at %s:%d is unreachable' % (host, porto)) 149 | -------------------------------------------------------------------------------- /bitmapist4/cohort.py: -------------------------------------------------------------------------------- 1 | """ 2 | Cohort is a group of subjects who share a defining characteristic (typically 3 | subjects who experienced a common event in a selected time period, such as 4 | birth or graduation). 5 | 6 | You can get the cohort table using `get_cohort_table()` function. Then you may 7 | render it yourself to HTML, or export to Pandas dataframe with df() method. 8 | 9 | The dataframe can be further colorized (to be displayed in Jupyter notebooks) 10 | with stylize(). 11 | """ 12 | import datetime 13 | try: 14 | import pandas as pd 15 | except ImportError: 16 | pd = None 17 | 18 | 19 | def get_cohort_table(cohort, activity, rows=20, cols=None, use_percent=True): 20 | # type: ("bitmapist4.events.BaseEvents", "bitmapist4.events.BaseEvents", int, int, bool) -> "CohortTable" 21 | """ 22 | Return a cohort table for two provided arguments: cohort and activity. 23 | 24 | Each row of this table answers the question "what part of the `cohort` 25 | performed `activity` over time", and Nth cell of that row represents the 26 | number of users (absolute or in percent) which still perform the activity 27 | N days (or weeks, or months) after. 28 | 29 | Each new column of the cohort unfolds the behavior of different similar 30 | cohorts over time. The latest row displays the behavior of the cohort, 31 | provided as an argument, the one above displays the behavior of the similar 32 | cohort, but shifted 1 day (or week, or month) ago, etc. 33 | 34 | For example, consider following cohort statistics 35 | 36 | >>> table = get_cohort_table(b.WeekEvents('registered'), b.WeekEvents('active')) 37 | 38 | Columns of the table show what's the rate of registered users is still 39 | active the same week after registration, then one week after, then two 40 | weeks after the registration, etc. 41 | 42 | By default the table displays 20 rows. 43 | 44 | The first row represents the statistics from cohort of users, registered 45 | 20 weeks ago. The second row represents the same statistics for users, 46 | registered 19 week ago, and so on until finally the latest row shows users 47 | registered this week. Naturally, the last row will contain only one cell, 48 | the number of users that were registered this week AND were active this 49 | week as well. 50 | """ 51 | if cols is None: 52 | cols = rows 53 | cols = min(cols, rows) 54 | table = CohortTable() 55 | for cohort_offset in range(rows): 56 | cohort_to_explore = cohort.delta(-cohort_offset) # moving backward 57 | base_activity = activity.delta(-cohort_offset) # moving backward 58 | cohort_row = get_cohort_row( 59 | cohort_to_explore, base_activity, cols, use_percent=use_percent) 60 | table.rows.insert(0, cohort_row) 61 | return table 62 | 63 | 64 | def get_cohort_row(cohort, activity, cols, use_percent=True): 65 | now = datetime.datetime.utcnow() 66 | 67 | cohort_name = cohort.period_start().strftime('%d %b %Y') 68 | cohort_size = len(cohort) 69 | 70 | row = CohortRow(cohort_name, cohort_size) 71 | for activity_offset in range(cols): 72 | current_activity = activity.delta(activity_offset) # forward 73 | if current_activity.period_start() >= now: 74 | break 75 | affected_users = cohort & current_activity 76 | if use_percent: 77 | if cohort_size == 0: 78 | _affected = 0 79 | else: 80 | _affected = len(affected_users) * 100.0 / cohort_size 81 | else: 82 | _affected = len(affected_users) 83 | row.cells.append(_affected) 84 | return row 85 | 86 | 87 | class CohortTable(object): 88 | def __init__(self, rows=None): 89 | self.rows = rows or [] 90 | 91 | def __repr__(self): 92 | body = ',\n '.join(repr(row) for row in self.rows) 93 | return 'CohortTable([\n {}])'.format(body) 94 | 95 | def df(self): 96 | if pd is None: 97 | raise RuntimeError('Please pandas library') 98 | index = [row.name for row in self.rows] 99 | records = [row.cells for row in self.rows] 100 | sizes = [row.size for row in self.rows] 101 | df = pd.DataFrame.from_records(records, index=index) 102 | df.insert(0, 'cohort', sizes) 103 | return df 104 | 105 | 106 | class CohortRow(object): 107 | def __init__(self, name, size, cells=None): 108 | self.name = name 109 | self.size = size 110 | self.cells = cells or [] 111 | 112 | def __repr__(self): 113 | return 'CohortRow({0.name!r}, {0.size}, {0.cells})'.format(self) 114 | 115 | 116 | def stylize(df, use_percent=True): 117 | if pd is None: 118 | raise RuntimeError('Please pandas library') 119 | 120 | if use_percent: 121 | string_formatter = '{:.1f}%' 122 | max_value = 100 123 | else: 124 | string_formatter = '{:d}' 125 | max_value = df.max().max() 126 | 127 | def _color(value): 128 | if pd.isnull(value): 129 | return 'background-color: #CCCCCC' 130 | normed_value = round(float(value) / max_value, 1) 131 | bg = 'background-color: hsla(200, 80%, 50%, {})'.format(normed_value) 132 | fg = 'color: hsla(200, 100%, 0%, {})'.format(normed_value + 0.5) 133 | return ';'.join([bg, fg]) 134 | 135 | def _fmt(value): 136 | if pd.isnull(value): 137 | return '' 138 | return string_formatter.format(value) 139 | 140 | subset = pd.IndexSlice[:, df.columns[1:]] 141 | return df.style.applymap(_color, subset=subset).format(_fmt, subset=subset) 142 | -------------------------------------------------------------------------------- /bitmapist4/core.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from contextlib import contextmanager 3 | try: 4 | from typing import Type 5 | except ImportError: # Python 2.x 6 | pass 7 | 8 | import redis 9 | import datetime 10 | from bitmapist4 import events as ev 11 | 12 | 13 | class Bitmapist(object): 14 | """ 15 | Core bitmapist object 16 | """ 17 | 18 | # Should hourly be tracked as default? 19 | # Note that this can have huge implications in amounts 20 | # of memory that Redis uses (especially with huge integers) 21 | track_hourly = False 22 | 23 | # Should unique events be tracked as default? 24 | track_unique = False 25 | 26 | def __init__(self, 27 | connection_or_url=redis.StrictRedis(), 28 | track_hourly=False, 29 | track_unique=True, 30 | finished_ops_expire=3600 * 24, 31 | unfinished_ops_expire=60, 32 | key_prefix='bitmapist_'): 33 | if isinstance(connection_or_url, redis.StrictRedis): 34 | self.connection = connection_or_url 35 | else: 36 | self.connection = redis.StrictRedis.from_url(connection_or_url) 37 | self.track_hourly = track_hourly 38 | self.track_unique = track_unique 39 | self.finished_ops_expire = finished_ops_expire 40 | self.unfinished_ops_expire = unfinished_ops_expire 41 | self.key_prefix = key_prefix 42 | self.pipe = None 43 | 44 | kw = {'bitmapist': self} 45 | self.UniqueEvents = type('UniqueEvents', (ev.UniqueEvents, ), 46 | kw) # type: Type[ev.UniqueEvents] 47 | self.YearEvents = type('YearEvents', (ev.YearEvents, ), 48 | kw) # type: Type[ev.YearEvents] 49 | self.MonthEvents = type('MonthEvents', (ev.MonthEvents, ), 50 | kw) # type: Type[ev.MonthEvents] 51 | self.WeekEvents = type('WeekEvents', (ev.WeekEvents, ), 52 | kw) # type: Type[ev.WeekEvents] 53 | self.DayEvents = type('DayEvents', (ev.DayEvents, ), 54 | kw) # type: Type[ev.DayEvents] 55 | self.HourEvents = type('HourEvents', (ev.HourEvents, ), 56 | kw) # type: Type[ev.HourEvents] 57 | self.BitOpAnd = type('BitOpAnd', (ev.BitOpAnd, ), 58 | kw) # type: Type[ev.BitOpAnd] 59 | self.BitOpOr = type('BitOpOr', (ev.BitOpOr, ), 60 | kw) # type: Type[ev.BitOpOr] 61 | self.BitOpXor = type('BitOpXor', (ev.BitOpXor, ), 62 | kw) # type: Type[ev.BitOpXor] 63 | self.BitOpNot = type('BitOpNot', (ev.BitOpNot, ), 64 | kw) # type: Type[ev.BitOpNot] 65 | 66 | def mark_event(self, 67 | event_name, 68 | uuid, 69 | timestamp=None, 70 | track_hourly=None, 71 | track_unique=None): 72 | """ 73 | Marks an event as "happened" for a specific moment. The function 74 | stores the event for the day, week and month, and optionally 75 | for the hour, as well as the unique event. 76 | 77 | - event_name is the name of the event to track 78 | - uuid is the unique id of the subject (typically user id). The id 79 | should not be huge 80 | - timestamp is an optional moment of time which date should be used as 81 | a reference point, default is to `datetime.utcnow()` 82 | 83 | Examples: 84 | 85 | # Mark id 1 as active 86 | b.mark_event('active', 1) 87 | 88 | # Mark task completed for id 252 89 | b.mark_event('tasks:completed', 252) 90 | """ 91 | self._mark(event_name, uuid, timestamp, 1, track_hourly, track_unique) 92 | 93 | def unmark_event(self, 94 | event_name, 95 | uuid, 96 | timestamp=None, 97 | track_hourly=None, 98 | track_unique=None): 99 | """ 100 | Marks an event as "not happened" for a specific moment. The function 101 | stores the event for the day, week and month, and optionally 102 | for the hour, as well as the unique event. 103 | """ 104 | self._mark(event_name, uuid, timestamp, 0, track_hourly, track_unique) 105 | 106 | def _mark(self, event_name, uuid, timestamp, value, track_hourly, 107 | track_unique): 108 | if timestamp is None: 109 | timestamp = datetime.datetime.utcnow() 110 | if track_hourly is None: 111 | track_hourly = self.track_hourly 112 | if track_unique is None: 113 | track_unique = self.track_unique 114 | 115 | obj_classes = [self.MonthEvents, self.WeekEvents, self.DayEvents] 116 | if track_hourly: 117 | obj_classes.append(self.HourEvents) 118 | if track_unique: 119 | obj_classes.append(self.UniqueEvents) 120 | 121 | if self.pipe is None: 122 | pipe = self.connection.pipeline() 123 | else: 124 | pipe = self.pipe 125 | 126 | for obj_class in obj_classes: 127 | pipe.setbit( 128 | obj_class.from_date(event_name, timestamp).redis_key, uuid, 129 | value) 130 | 131 | if self.pipe is None: 132 | pipe.execute() 133 | 134 | def start_transaction(self): 135 | if self.pipe is not None: 136 | raise RuntimeError("Transaction already started") 137 | self.pipe = self.connection.pipeline() 138 | 139 | def commit_transaction(self): 140 | if self.pipe is None: 141 | raise RuntimeError("Transaction not started") 142 | self.pipe.execute() 143 | self.pipe = None 144 | 145 | def rollback_transaction(self): 146 | self.pipe = None 147 | 148 | @contextmanager 149 | def transaction(self): 150 | self.start_transaction() 151 | try: 152 | yield 153 | self.commit_transaction() 154 | except: 155 | self.rollback_transaction() 156 | raise 157 | 158 | def mark_unique(self, event_name, uuid): 159 | """ 160 | Mark unique event as "happened with a user" 161 | 162 | Unique event (aka "user flag") is an event which doesn't depend on date. 163 | Can be used for storing user properties, A/B testing, extra filtering, 164 | etc. 165 | 166 | - event_name: The name of the event, could be "active" or "new_signups" 167 | - uuid: a unique id, typically user id. The id should not be huge 168 | 169 | Example: 170 | 171 | # Mark id 42 as premium 172 | b.mark_unique('premium', 42) 173 | """ 174 | self._mark_unique(event_name, uuid, value=1) 175 | 176 | def unmark_unique(self, event_name, uuid): 177 | """ 178 | Mark unique event as "not happened with a user" 179 | 180 | Unique event (aka "user flag") is an event which doesn't depend on date. 181 | Can be used for storing user properties, A/B testing, extra filtering, 182 | etc. 183 | 184 | - event_name: The name of the event, could be "active" or "new_signups" 185 | - uuid: a unique id, typically user id. The id should not be huge 186 | 187 | Example: 188 | 189 | # Mark id 42 as premium 190 | b.unmark_unique('premium', 42) 191 | """ 192 | self._mark_unique(event_name, uuid, value=0) 193 | 194 | def _mark_unique(self, event_name, uuid, value): 195 | conn = self.connection if self.pipe is None else self.pipe 196 | redis_key = self.UniqueEvents(event_name).redis_key 197 | conn.setbit(redis_key, uuid, value) 198 | 199 | def get_event_names(self, prefix='', batch=10000): 200 | """ 201 | Return the list of all event names, with no particular order. Optional 202 | `prefix` value is used to filter only subset of keys 203 | """ 204 | expr = '{}{}*'.format(self.key_prefix, prefix) 205 | ret = set() 206 | for result in self.connection.scan_iter(match=expr, count=batch): 207 | result = result.decode() 208 | chunks = result.split('_') 209 | event_name = '_'.join(chunks[1:-1]) 210 | if not event_name.startswith('bitop_'): 211 | ret.add(event_name) 212 | return sorted(ret) 213 | 214 | def delete_all_events(self): 215 | """ 216 | Delete all events from the database. 217 | """ 218 | keys = self.connection.keys('{}*'.format(self.key_prefix)) 219 | if keys: 220 | self.connection.delete(*keys) 221 | 222 | def delete_temporary_bitop_keys(self): 223 | """ 224 | Delete all temporary keys that are used when using bit operations. 225 | """ 226 | keys = self.connection.keys('{}bitop_*'.format(self.key_prefix)) 227 | if keys: 228 | self.connection.delete(*keys) 229 | 230 | def prefix_key(self, event_name, date): 231 | return '{}{}_{}'.format(self.key_prefix, event_name, date) 232 | -------------------------------------------------------------------------------- /test/test_bitmapist.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime, timedelta 2 | 3 | 4 | def test_mark_with_diff_days(bitmapist): 5 | bitmapist.mark_event('active', 123) 6 | 7 | # Month 8 | assert 123 in bitmapist.MonthEvents.from_date('active') 9 | assert 124 not in bitmapist.MonthEvents.from_date('active') 10 | 11 | # Week 12 | assert 123 in bitmapist.WeekEvents.from_date('active') 13 | assert 124 not in bitmapist.WeekEvents.from_date('active') 14 | 15 | # Day 16 | assert 123 in bitmapist.DayEvents.from_date('active') 17 | assert 124 not in bitmapist.DayEvents.from_date('active') 18 | 19 | # Hour 20 | assert 123 in bitmapist.HourEvents.from_date('active') 21 | assert 124 not in bitmapist.HourEvents.from_date('active') 22 | assert 124 not in bitmapist.HourEvents.from_date('active').prev() 23 | 24 | 25 | def test_mark_unmark(bitmapist): 26 | now = datetime.utcnow() 27 | 28 | bitmapist.mark_event('active', 125) 29 | assert 125 in bitmapist.MonthEvents('active', now.year, now.month) 30 | 31 | bitmapist.unmark_event('active', 125) 32 | assert 125 not in bitmapist.MonthEvents('active', now.year, now.month) 33 | 34 | 35 | def test_mark_counts(bitmapist): 36 | now = datetime.utcnow() 37 | 38 | assert bitmapist.MonthEvents('active', now.year, 39 | now.month).get_count() == 0 40 | 41 | bitmapist.mark_event('active', 123) 42 | bitmapist.mark_event('active', 23232) 43 | 44 | assert len(bitmapist.MonthEvents('active', now.year, now.month)) == 2 45 | 46 | 47 | def test_mark_iter(bitmapist): 48 | now = datetime.utcnow() 49 | ev = bitmapist.MonthEvents('active', now.year, now.month) 50 | 51 | assert list(ev) == [] 52 | 53 | bitmapist.mark_event('active', 5) 54 | bitmapist.mark_event('active', 55) 55 | bitmapist.mark_event('active', 555) 56 | bitmapist.mark_event('active', 5555) 57 | 58 | assert list(ev) == [5, 55, 555, 5555] 59 | 60 | 61 | def test_different_dates(bitmapist): 62 | now = datetime.utcnow() 63 | yesterday = now - timedelta(days=1) 64 | 65 | bitmapist.mark_event('active', 123, timestamp=now) 66 | bitmapist.mark_event('active', 23232, timestamp=yesterday) 67 | 68 | assert bitmapist.DayEvents('active', now.year, now.month, 69 | now.day).get_count() == 1 70 | 71 | assert bitmapist.DayEvents('active', yesterday.year, yesterday.month, 72 | yesterday.day).get_count() == 1 73 | 74 | 75 | def test_different_buckets(bitmapist): 76 | now = datetime.utcnow() 77 | 78 | bitmapist.mark_event('active', 123) 79 | bitmapist.mark_event('tasks:completed', 23232) 80 | 81 | assert bitmapist.MonthEvents('active', now.year, 82 | now.month).get_count() == 1 83 | assert bitmapist.MonthEvents('tasks:completed', now.year, 84 | now.month).get_count() == 1 85 | 86 | 87 | def test_bit_operations(bitmapist, bitmapist_copy): 88 | now = datetime.utcnow() 89 | last_month = datetime.utcnow() - timedelta(days=30) 90 | 91 | # 123 has been active for two months 92 | bitmapist.mark_event('active', 123, timestamp=now) 93 | bitmapist.mark_event('active', 123, timestamp=last_month) 94 | 95 | # 224 has only been active last_month 96 | bitmapist.mark_event('active', 224, timestamp=last_month) 97 | 98 | # Assert basic premises 99 | assert bitmapist.MonthEvents('active', last_month.year, 100 | last_month.month).get_count() == 2 101 | assert bitmapist.MonthEvents('active', now.year, 102 | now.month).get_count() == 1 103 | 104 | # Try out with bit AND operation 105 | active_2_months = bitmapist_copy.BitOpAnd( 106 | bitmapist.MonthEvents('active', last_month.year, last_month.month), 107 | bitmapist.MonthEvents('active', now.year, now.month)) 108 | assert active_2_months.get_count() == 1 109 | assert 123 in active_2_months 110 | assert 224 not in active_2_months 111 | active_2_months.delete() 112 | 113 | # Try out with bit OR operation 114 | assert bitmapist.BitOpOr( 115 | bitmapist.MonthEvents('active', last_month.year, last_month.month), 116 | bitmapist.MonthEvents('active', now.year, now.month)).get_count() == 2 117 | 118 | # Try out with a different system 119 | active_2_months = bitmapist.BitOpAnd( 120 | bitmapist.MonthEvents('active', last_month.year, last_month.month), 121 | bitmapist.MonthEvents('active', now.year, now.month), 122 | ) 123 | assert active_2_months.get_count() == 1 124 | active_2_months.delete() 125 | 126 | # Try nested operations 127 | active_2_months = bitmapist.BitOpAnd( 128 | bitmapist.BitOpAnd( 129 | bitmapist.MonthEvents('active', last_month.year, last_month.month), 130 | bitmapist.MonthEvents('active', now.year, now.month)), 131 | bitmapist.MonthEvents('active', now.year, now.month)) 132 | 133 | assert 123 in active_2_months 134 | assert 224 not in active_2_months 135 | active_2_months.delete() 136 | 137 | 138 | def test_bit_operations_complex(bitmapist): 139 | now = datetime.utcnow() 140 | tom = now + timedelta(days=1) 141 | 142 | bitmapist.mark_event('task1', 111, timestamp=now) 143 | bitmapist.mark_event('task1', 111, timestamp=tom) 144 | bitmapist.mark_event('task2', 111, timestamp=now) 145 | bitmapist.mark_event('task2', 111, timestamp=tom) 146 | bitmapist.mark_event('task1', 222, timestamp=now) 147 | bitmapist.mark_event('task1', 222, timestamp=tom) 148 | bitmapist.mark_event('task2', 222, timestamp=now) 149 | bitmapist.mark_event('task2', 222, timestamp=tom) 150 | 151 | now_events = bitmapist.BitOpAnd( 152 | bitmapist.DayEvents('task1', now.year, now.month, now.day), 153 | bitmapist.DayEvents('task2', now.year, now.month, now.day)) 154 | 155 | tom_events = bitmapist.BitOpAnd( 156 | bitmapist.DayEvents('task1', tom.year, tom.month, tom.day), 157 | bitmapist.DayEvents('task2', tom.year, tom.month, tom.day)) 158 | 159 | both_events = bitmapist.BitOpAnd(now_events, tom_events) 160 | 161 | assert len(now_events) == len(tom_events) 162 | assert len(now_events) == len(both_events) 163 | 164 | 165 | def test_bitop_key_sharing(bitmapist): 166 | today = datetime.utcnow() 167 | 168 | bitmapist.mark_event('task1', 111, timestamp=today) 169 | bitmapist.mark_event('task2', 111, timestamp=today) 170 | bitmapist.mark_event('task1', 222, timestamp=today) 171 | bitmapist.mark_event('task2', 222, timestamp=today) 172 | 173 | ev1_task1 = bitmapist.DayEvents('task1', today.year, today.month, 174 | today.day) 175 | ev1_task2 = bitmapist.DayEvents('task2', today.year, today.month, 176 | today.day) 177 | ev1_both = bitmapist.BitOpAnd(ev1_task1, ev1_task2) 178 | 179 | ev2_task1 = bitmapist.DayEvents('task1', today.year, today.month, 180 | today.day) 181 | ev2_task2 = bitmapist.DayEvents('task2', today.year, today.month, 182 | today.day) 183 | ev2_both = bitmapist.BitOpAnd(ev2_task1, ev2_task2) 184 | 185 | assert ev1_both.redis_key == ev2_both.redis_key 186 | assert len(ev1_both) == len(ev1_both) == 2 187 | ev1_both.delete() 188 | assert len(ev1_both) == len(ev1_both) == 0 189 | 190 | 191 | def test_events_marked(bitmapist): 192 | now = datetime.utcnow() 193 | 194 | assert bitmapist.MonthEvents('active', now.year, 195 | now.month).get_count() == 0 196 | assert not bitmapist.MonthEvents('active', now.year, 197 | now.month).has_events_marked() 198 | 199 | bitmapist.mark_event('active', 123, timestamp=now) 200 | 201 | assert bitmapist.MonthEvents('active', now.year, 202 | now.month).get_count() == 1 203 | assert bitmapist.MonthEvents('active', now.year, 204 | now.month).has_events_marked() 205 | 206 | 207 | def test_get_event_names(bitmapist): 208 | event_names = {'foo', 'bar', 'baz', 'spam', 'egg'} 209 | for e in event_names: 210 | bitmapist.mark_event(e, 1) 211 | bitmapist.BitOpAnd(bitmapist.DayEvents('foo'), bitmapist.DayEvents('bar')) 212 | assert set(bitmapist.get_event_names(batch=2)) == event_names 213 | 214 | 215 | def test_get_event_names_prefix(bitmapist): 216 | event_names = {'foo', 'bar', 'baz', 'spam', 'egg'} 217 | for e in event_names: 218 | bitmapist.mark_event(e, 1) 219 | bitmapist.BitOpAnd(bitmapist.DayEvents('foo'), bitmapist.DayEvents('bar')) 220 | assert set(bitmapist.get_event_names(prefix='b', 221 | batch=2)) == {'bar', 'baz'} 222 | 223 | 224 | def test_bit_operations_magic(bitmapist): 225 | bitmapist.mark_event('foo', 1) 226 | bitmapist.mark_event('foo', 2) 227 | bitmapist.mark_event('bar', 2) 228 | bitmapist.mark_event('bar', 3) 229 | foo = bitmapist.DayEvents('foo') 230 | bar = bitmapist.DayEvents('bar') 231 | assert list(foo & bar) == [2] 232 | assert list(foo | bar) == [1, 2, 3] 233 | assert list(foo ^ bar) == [1, 3] 234 | assert list(~foo & bar) == [3] 235 | 236 | 237 | def test_year_events(bitmapist): 238 | bitmapist.mark_event('foo', 1) 239 | assert 1 in bitmapist.YearEvents('foo') 240 | -------------------------------------------------------------------------------- /static/bitmapist.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | bitmapist 5 | Created with Sketch. 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | bitmapist 23 | 24 | 25 | 26 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ![bitmapist](https://raw.githubusercontent.com/Doist/bitmapist4/master/static/bitmapist.png "bitmapist") 2 | 3 | 4 | [![Build Status](https://travis-ci.org/Doist/bitmapist4.svg?branch=master)](https://travis-ci.org/Doist/bitmapist4) 5 | 6 | **NEW!** Try out our new standalone [bitmapist-server](https://github.com/Doist/bitmapist-server), which improves memory efficiency 443 times and makes your setup cheaper and more scaleable. It's fully compatable with bitmapist that runs on Redis. 7 | 8 | # bitmapist: a powerful analytics library for Redis 9 | 10 | This Python library makes it possible to implement real-time, highly scalable analytics that can answer following questions: 11 | 12 | * Has user 123 been online today? This week? This month? 13 | * Has user 123 performed action "X"? 14 | * How many users have been active have this month? This hour? 15 | * How many unique users have performed action "X" this week? 16 | * How many % of users that were active last week are still active? 17 | * How many % of users that were active last month are still active this month? 18 | * What users performed action "X"? 19 | 20 | This library is very easy to use and enables you to create your own reports easily. 21 | 22 | Using Redis bitmaps you can store events for millions of users in a very little amount of memory (megabytes). 23 | 24 | Note however that you should be careful about using huge ids as this could require larger amounts of memory. Ids should be in range [0, 2^32). 25 | 26 | Additionally bitmapist can generate cohort graphs that can do following: 27 | * Cohort over user retention 28 | * How many % of users that were active last [days, weeks, months] are still active? 29 | * How many % of users that performed action X also performed action Y (and this over time) 30 | * And a lot of other things! 31 | 32 | If you want to read more about bitmaps please read following: 33 | 34 | * http://blog.getspool.com/2011/11/29/fast-easy-realtime-metrics-using-redis-bitmaps/ 35 | * http://redis.io/commands/setbit 36 | * http://en.wikipedia.org/wiki/Bit_array 37 | * http://www.slideshare.net/crashlytics/crashlytics-on-redis-analytics 38 | 39 | 40 | 41 | # Installation 42 | 43 | Can be installed very easily via: 44 | 45 | $ pip install bitmapist4 46 | 47 | 48 | # Ports 49 | 50 | * PHP port: https://github.com/jeremyFreeAgent/Bitter 51 | 52 | 53 | # Examples 54 | 55 | Setting things up: 56 | 57 | ```python 58 | import bitmapist4 59 | b = bitmapist4.Bitmapist() 60 | ``` 61 | 62 | Mark user 123 as active and has played a song: 63 | 64 | ```python 65 | b.mark_event('active', 123) 66 | b.mark_event('song:played', 123) 67 | ``` 68 | 69 | Answer if user 123 has been active this month: 70 | 71 | ```python 72 | assert 123 in b.MonthEvents('active') 73 | assert 123 in b.MonthEvents('song:played') 74 | ``` 75 | 76 | 77 | How many users have been active this week?: 78 | 79 | ```python 80 | len(b.WeekEvents('active')) 81 | ``` 82 | 83 | Iterate over all users active this week: 84 | 85 | ```python 86 | for uid in b.WeekEvents('active'): 87 | print(uid) 88 | ``` 89 | 90 | Unmark that user 123 was active and had played a song: 91 | ```python 92 | b.unmark_event('active', 123) 93 | b.unmark_event('song:played', 123) 94 | ``` 95 | 96 | To explore any specific day, week, month or year instead of the current one, 97 | you can create an event from any datetime object with a `from_date` static 98 | method. 99 | 100 | ```python 101 | specific_date = datetime.datetime(2018, 1, 1) 102 | ev = b.MonthEvents.from_date('active', specific_date) 103 | print(len(ev)) 104 | ``` 105 | 106 | There are methods `prev` and `next` returning "sibling" events and 107 | allowing you to walk through events in time without any sophisticated 108 | iterators. A `delta` method allows you to jump forward or backward for 109 | more than one step. Uniform API allows you to use all types of base events 110 | (from hour to year) with the same code. 111 | 112 | ```python 113 | 114 | current_month = b.MonthEvents('active') 115 | prev_month = current_month.prev() 116 | next_month = current_month.next() 117 | year_ago = current_month.delta(-12) 118 | ``` 119 | 120 | Every event object has `period_start` and `period_end` methods to find a 121 | time span of the event. This can be useful for caching values when the caching 122 | of "events in future" is not desirable: 123 | 124 | ```python 125 | 126 | ev = b.MonthEvent('active', dt) 127 | if ev.period_end() < datetime.datetime.utcnow(): 128 | cache.set('active_users_<...>', len(ev)) 129 | ``` 130 | 131 | 132 | Tracking hourly is disabled (to save memory!) You can enable it with a 133 | constructor argument. 134 | 135 | ```python 136 | b = bitmapist4.Bitmapist(track_hourly=True) 137 | ``` 138 | 139 | Additionally you can supply an extra argument to `mark_event` to bypass the default value:: 140 | 141 | ```python 142 | b.mark_event('active', 123, track_hourly=False) 143 | ``` 144 | 145 | 146 | ## Unique events 147 | 148 | Sometimes the date of the event makes little or no sense and you are more interested 149 | if that specific event happened at least once in a lifetime for a user. 150 | 151 | There is a `UniqueEvents` model for this purpose. The model creates only one 152 | Redis key and doesn't depend on the date. 153 | 154 | You can combine unique events with other types of events. 155 | 156 | A/B testing example: 157 | 158 | ```python 159 | 160 | active = b.DailyEvents('active') 161 | a = b.UniqueEvents('signup_form:classic') 162 | b = b.UniqueEvents('signup_form:new') 163 | 164 | print("Active users, signed up with classic form", len(active & a)) 165 | print("Active users, signed up with new form", len(active & b)) 166 | ``` 167 | 168 | You can mark these users with `b.mark_unique` or you can automatically 169 | populate the extra unique cohort for all marked keys 170 | 171 | ```python 172 | b = bitmapist4.Bitmapist(track_unique=True) 173 | b.mark_event('premium', 1) 174 | assert 1 in b.UniqueEvents('premium') 175 | ``` 176 | 177 | ## Perform bit operations 178 | 179 | How many users that have been active last month are still active this month? 180 | 181 | ```python 182 | ev = b.MonthEvents('active') 183 | active_2months = ev & ev.prev() 184 | print(len(active_2months)) 185 | 186 | # Is 123 active for 2 months? 187 | assert 123 in active_2months 188 | ``` 189 | 190 | Operators `&`, `|`, `^` and `~` supported. 191 | 192 | This works with nested bit operations (imagine what you can do with this ;-))! 193 | 194 | 195 | ## Delete events 196 | 197 | If you want to permanently remove marked events for any time period you can use the `delete()` method: 198 | 199 | ```python 200 | ev = b.MonthEvents.from_date('active', last_month) 201 | ev.delete() 202 | ``` 203 | 204 | If you want to remove all bitmapist events use: 205 | ```python 206 | b.delete_all_events() 207 | ``` 208 | 209 | Results of bit operations are cached by default. They're cached for 60 seconds 210 | for operations, contained non-finished periods, and for 24 hours otherwise. 211 | 212 | You may want to reset the cache explicitly: 213 | 214 | ```python 215 | ev = b.MonthEvents('active') 216 | active_2months = ev & ev.prev() 217 | # Delete the temporary AND operation 218 | active_2months.delete() 219 | 220 | # delete all bit operations (slow if you have many millions of keys in Redis) 221 | b.delete_temporary_bitop_keys() 222 | ``` 223 | 224 | ## Bulk updates with transactions 225 | 226 | If you often performs multiple updates at once, you can benefit from Redis 227 | pipelines, wrapped as transactions inside bitmapist. 228 | 229 | ```python 230 | with b.transaction(): 231 | b.mark_event('active') 232 | b.mark_event('song:played') 233 | ``` 234 | 235 | 236 | # Migration from previous version 237 | 238 | The API of the "bitmapist4.Bitmapist" instance is mostly compatible with the 239 | API of previous version of bitmapist (module-level). Notable changes outlined 240 | below. 241 | 242 | - Removed the "system" attribute for choosing the server. You are supposed to 243 | use different Bitmapist class instances instead. If you used "system" to 244 | work with pipelines, you should switch to transactions instead. 245 | - bitmapist.TRACK_HOURLY and bitmapist.TRACK_UNIQUE module-level constants 246 | moved to bitmapist4.Bitmapist attributes and can be set up with a class 247 | constructor. 248 | - On a database level, new bitmapist4 uses "bitmapist_" prefix for Redis keys, 249 | while old bitmapist uses "trackist_" for historical reasons. If you want 250 | to keep using the old database, or want to use bitmapist and bitmapist4 251 | against the same database, you need to explicitly set the key prefix 252 | to "trackist_". 253 | - If you use bitmapist-server, make sure that you use the version 1.2 or newer. 254 | This version adds the support for EXPIRE command which is used to expire 255 | temporary bitop keys. 256 | 257 | 258 | Replace old code which could look like this: 259 | 260 | ```python 261 | import bitmapist 262 | bitmapist.setup_redis('default', 'localhost', 6380) 263 | ... 264 | bitmapist.mark_event('acive', user_id) 265 | ``` 266 | 267 | With something looking like this: 268 | 269 | ```python 270 | from bitmapist4 import Bitmapist 271 | bitmapist = Bitmapist('redis://localhost:6380', key_prefix='trackist_') 272 | ... 273 | bitmapist.mark_event('acive', user_id) 274 | ``` 275 | 276 | 277 | 278 | # Bitmapist cohort 279 | 280 | Cohort is a group of subjects who share a defining characteristic (typically 281 | subjects who experienced a common event in a selected time period, such as 282 | birth or graduation). 283 | 284 | You can get the cohort table using `bitmapist4.cohort.get_cohort_table()` 285 | function. 286 | 287 | Each row of this table answers the question "what part of the `cohort` 288 | performed `activity` over time", and Nth cell of that row represents the 289 | number of users (absolute or in percent) which still perform the activity 290 | N days (or weeks, or months) after. 291 | 292 | Each new column of the cohort unfolds the behavior of different similar 293 | cohorts over time. The latest row displays the behavior of the cohort, 294 | provided as an argument, the one above displays the behavior of the similar 295 | cohort, but shifted 1 day (or week, or month) ago, etc. 296 | 297 | For example, consider following cohort statistics 298 | 299 | ``` 300 | table = get_cohort_table(b.WeekEvents('registered'), b.WeekEvents('active')) 301 | ``` 302 | 303 | This table shows what's the rate of registered users is still active 304 | the same week after registration, then one week after, then two weeks 305 | after the registration, etc. 306 | 307 | By default the table displays 20 rows. 308 | 309 | The first row represents the statistics from cohort of users, registered 310 | 20 weeks ago. The second row represents the same statistics for users, 311 | registered 19 week ago, and so on until finally the latest row shows users 312 | registered this week. Naturally, the last row will contain only one cell, 313 | the number of users that were registered this week AND were active this 314 | week as well. 315 | 316 | 317 | Then you may render it yourself to HTML, or export to Pandas dataframe 318 | with df() method. 319 | 320 | Sample from user activity on http://www.gharchive.org/ 321 | 322 | ```python 323 | In [1]: from bitmapist4 import Bitmapist, cohort 324 | 325 | In [2]: b = Bitmapist() 326 | 327 | In [3]: cohort.get_cohort_table(b.WeekEvents('active'), b.WeekEvents('active'), rows=5, use_percent=False).df() 328 | Out[3]: 329 | cohort 0 1 2 3 4 330 | 05 Nov 2018 137420 137420 25480.0 18358.0 21575.0 18430.0 331 | 12 Nov 2018 150975 150975 22195.0 25833.0 21165.0 NaN 332 | 19 Nov 2018 121417 121417 22477.0 15796.0 NaN NaN 333 | 26 Nov 2018 152027 152027 25606.0 NaN NaN NaN 334 | 03 Dec 2018 130470 130470 NaN NaN NaN NaN 335 | ``` 336 | 337 | The dataframe can be further colorized (to be displayed in Jupyter notebooks) 338 | with stylize(). 339 | 340 | 341 | --- 342 | 343 | Copyright: 2012-2019 by Doist Ltd. 344 | 345 | License: BSD 346 | -------------------------------------------------------------------------------- /bitmapist4/events.py: -------------------------------------------------------------------------------- 1 | from builtins import range, bytes 2 | import calendar 3 | import datetime 4 | 5 | 6 | class BaseEvents(object): 7 | 8 | bitmapist = None 9 | redis_key = None 10 | 11 | def has_events_marked(self): 12 | return self.bitmapist.connection.exists(self.redis_key) 13 | 14 | def delete(self): 15 | self.bitmapist.connection.delete(self.redis_key) 16 | 17 | def __eq__(self, other): 18 | other_key = getattr(other, 'redis_key', None) 19 | if other_key is None: 20 | return NotImplemented 21 | return self.redis_key == other_key 22 | 23 | def get_uuids(self): 24 | val = self.bitmapist.connection.get(self.redis_key) 25 | if val is None: 26 | return 27 | 28 | val = bytes(val) 29 | 30 | for char_num, char in enumerate(val): 31 | # shortcut 32 | if char == 0: 33 | continue 34 | # find set bits, generate smth like [1, 0, ...] 35 | bits = [(char >> i) & 1 for i in range(7, -1, -1)] 36 | # list of positions with ones 37 | set_bits = list(pos for pos, val in enumerate(bits) if val) 38 | # yield everything we need 39 | for bit in set_bits: 40 | yield char_num * 8 + bit 41 | 42 | def __iter__(self): 43 | for item in self.get_uuids(): 44 | yield item 45 | 46 | def __invert__(self): 47 | return self.bitmapist.BitOpNot(self) 48 | 49 | def __or__(self, other): 50 | return self.bitmapist.BitOpOr(self, other) 51 | 52 | def __and__(self, other): 53 | return self.bitmapist.BitOpAnd(self, other) 54 | 55 | def __xor__(self, other): 56 | return self.bitmapist.BitOpXor(self, other) 57 | 58 | def get_count(self): 59 | count = self.bitmapist.connection.bitcount(self.redis_key) 60 | return count 61 | 62 | def __len__(self): 63 | return self.get_count() 64 | 65 | def __contains__(self, uuid): 66 | if self.bitmapist.connection.getbit(self.redis_key, uuid): 67 | return True 68 | else: 69 | return False 70 | 71 | def delta(self, value): 72 | raise NotImplementedError('Must be implemented in subclass') 73 | 74 | def next(self): 75 | """ next object in a datetime line """ 76 | return self.delta(value=1) 77 | 78 | def prev(self): 79 | """ prev object in a datetime line """ 80 | return self.delta(value=-1) 81 | 82 | def period_start(self): 83 | raise NotImplementedError('Must be implemented in subclass') 84 | 85 | def period_end(self): 86 | raise NotImplementedError('Must be implemented in subclass') 87 | 88 | def event_finished(self): 89 | return self.period_end() < datetime.datetime.utcnow() 90 | 91 | def __repr__(self): 92 | return '{self.__class__.__name__}("{self.event_name}")'.format( 93 | self=self) 94 | 95 | 96 | class UniqueEvents(BaseEvents): 97 | @classmethod 98 | def from_date(cls, event_name, dt=None): 99 | return cls(event_name) 100 | 101 | def __init__(self, event_name): 102 | self.event_name = event_name 103 | self.redis_key = self.bitmapist.prefix_key(event_name, 'u') 104 | 105 | def conn(self): 106 | return self.bitmapist.connection 107 | 108 | def delta(self, value): 109 | return self 110 | 111 | def event_finished(self): 112 | return False 113 | 114 | 115 | class YearEvents(BaseEvents): 116 | """ 117 | Events for a year. 118 | 119 | Example:: 120 | 121 | YearEvents('active', 2012) 122 | """ 123 | 124 | @classmethod 125 | def from_date(cls, event_name, dt=None): 126 | dt = dt or datetime.datetime.utcnow() 127 | return cls(event_name, dt.year) 128 | 129 | def __init__(self, event_name, year=None): 130 | now = datetime.datetime.utcnow() 131 | self.event_name = event_name 132 | self.year = not_none(year, now.year) 133 | 134 | months = [] 135 | for m in range(1, 13): 136 | months.append(self.bitmapist.MonthEvents(event_name, self.year, m)) 137 | or_op = self.bitmapist.BitOpOr(*months) 138 | self.redis_key = or_op.redis_key 139 | 140 | def delta(self, value): 141 | return self.__class__(self.event_name, self.year + value) 142 | 143 | def period_start(self): 144 | return datetime.datetime(self.year, 1, 1) 145 | 146 | def period_end(self): 147 | return datetime.datetime(self.year, 12, 31, 23, 59, 59, 999999) 148 | 149 | def __repr__(self): 150 | return ('{self.__class__.__name__}("{self.event_name}", ' 151 | '{self.year})').format(self=self) 152 | 153 | 154 | class MonthEvents(BaseEvents): 155 | """ 156 | Events for a month. 157 | 158 | Example:: 159 | 160 | MonthEvents('active', 2012, 10) 161 | """ 162 | 163 | @classmethod 164 | def from_date(cls, event_name, dt=None): 165 | dt = dt or datetime.datetime.utcnow() 166 | return cls(event_name, dt.year, dt.month) 167 | 168 | def __init__(self, event_name, year=None, month=None): 169 | now = datetime.datetime.utcnow() 170 | self.event_name = event_name 171 | self.year = not_none(year, now.year) 172 | self.month = not_none(month, now.month) 173 | self.redis_key = self.bitmapist.prefix_key( 174 | event_name, '%s-%s' % (self.year, self.month)) 175 | 176 | def delta(self, value): 177 | year, month = add_month(self.year, self.month, value) 178 | return self.__class__(self.event_name, year, month) 179 | 180 | def period_start(self): 181 | return datetime.datetime(self.year, self.month, 1) 182 | 183 | def period_end(self): 184 | _, day = calendar.monthrange(self.year, self.month) 185 | return datetime.datetime(self.year, self.month, day, 23, 59, 59, 186 | 999999) 187 | 188 | def __repr__(self): 189 | return ('{self.__class__.__name__}("{self.event_name}", {self.year}, ' 190 | '{self.month})').format(self=self) 191 | 192 | 193 | class WeekEvents(BaseEvents): 194 | """ 195 | Events for a week. 196 | 197 | Example:: 198 | 199 | WeekEvents('active', 2012, 48) 200 | """ 201 | 202 | @classmethod 203 | def from_date(cls, event_name, dt=None): 204 | dt = dt or datetime.datetime.utcnow() 205 | dt_year, dt_week, _ = dt.isocalendar() 206 | return cls(event_name, dt_year, dt_week) 207 | 208 | def __init__(self, event_name, year=None, week=None): 209 | now = datetime.datetime.utcnow() 210 | now_year, now_week, _ = now.isocalendar() 211 | self.event_name = event_name 212 | self.year = not_none(year, now_year) 213 | self.week = not_none(week, now_week) 214 | self.redis_key = self.bitmapist.prefix_key( 215 | event_name, 'W%s-%s' % (self.year, self.week)) 216 | 217 | def delta(self, value): 218 | dt = iso_to_gregorian(self.year, self.week + value, 1) 219 | year, week, _ = dt.isocalendar() 220 | return self.__class__(self.event_name, year, week) 221 | 222 | def period_start(self): 223 | s = iso_to_gregorian(self.year, self.week, 1) # mon 224 | return datetime.datetime(s.year, s.month, s.day) 225 | 226 | def period_end(self): 227 | e = iso_to_gregorian(self.year, self.week, 7) # mon 228 | return datetime.datetime(e.year, e.month, e.day, 23, 59, 59, 999999) 229 | 230 | def __repr__(self): 231 | return ('{self.__class__.__name__}("{self.event_name}", {self.year}, ' 232 | '{self.week})').format(self=self) 233 | 234 | 235 | class DayEvents(BaseEvents): 236 | """ 237 | Events for a day. 238 | 239 | Example:: 240 | 241 | DayEvents('active', 2012, 10, 23) 242 | """ 243 | 244 | @classmethod 245 | def from_date(cls, event_name, dt=None): 246 | dt = dt or datetime.datetime.utcnow() 247 | return cls(event_name, dt.year, dt.month, dt.day) 248 | 249 | def __init__(self, event_name, year=None, month=None, day=None): 250 | now = datetime.datetime.utcnow() 251 | self.event_name = event_name 252 | self.year = not_none(year, now.year) 253 | self.month = not_none(month, now.month) 254 | self.day = not_none(day, now.day) 255 | self.redis_key = self.bitmapist.prefix_key( 256 | event_name, '%s-%s-%s' % (self.year, self.month, self.day)) 257 | 258 | def delta(self, value): 259 | dt = datetime.date(self.year, self.month, 260 | self.day) + datetime.timedelta(days=value) 261 | return self.__class__(self.event_name, dt.year, dt.month, dt.day) 262 | 263 | def period_start(self): 264 | return datetime.datetime(self.year, self.month, self.day) 265 | 266 | def period_end(self): 267 | return datetime.datetime(self.year, self.month, self.day, 23, 59, 59, 268 | 999999) 269 | 270 | def __repr__(self): 271 | return ('{self.__class__.__name__}("{self.event_name}", {self.year}, ' 272 | '{self.month}, {self.day})').format(self=self) 273 | 274 | 275 | class HourEvents(BaseEvents): 276 | """ 277 | Events for a hour. 278 | 279 | Example:: 280 | 281 | HourEvents('active', 2012, 10, 23, 13) 282 | """ 283 | 284 | @classmethod 285 | def from_date(cls, event_name, dt=None): 286 | dt = dt or datetime.datetime.utcnow() 287 | return cls(event_name, dt.year, dt.month, dt.day, dt.hour) 288 | 289 | def __init__(self, event_name, year=None, month=None, day=None, hour=None): 290 | now = datetime.datetime.utcnow() 291 | self.event_name = event_name 292 | self.year = not_none(year, now.year) 293 | self.month = not_none(month, now.month) 294 | self.day = not_none(day, now.day) 295 | self.hour = not_none(hour, now.hour) 296 | self.redis_key = self.bitmapist.prefix_key( 297 | event_name, 298 | '%s-%s-%s-%s' % (self.year, self.month, self.day, self.hour)) 299 | 300 | def delta(self, value): 301 | dt = datetime.datetime(self.year, self.month, self.day, 302 | self.hour) + datetime.timedelta(hours=value) 303 | return self.__class__(self.event_name, dt.year, dt.month, dt.day, 304 | dt.hour) 305 | 306 | def period_start(self): 307 | return datetime.datetime(self.year, self.month, self.day, self.hour) 308 | 309 | def period_end(self): 310 | return datetime.datetime(self.year, self.month, self.day, self.hour, 311 | 59, 59, 999999) 312 | 313 | def __repr__(self): 314 | return ('{self.__class__.__name__}("{self.event_name}", {self.year}, ' 315 | '{self.month}, {self.day}, {self.hour})').format(self=self) 316 | 317 | 318 | class BitOperation(BaseEvents): 319 | """ 320 | Base class for bit operations (AND, OR, XOR). 321 | 322 | Please note that each bit operation creates a new key prefixed with 323 | `bitmapist_bitop_`. These temporary keys can be deleted with 324 | `delete_temporary_bitop_keys`. 325 | 326 | You can even nest bit operations. 327 | 328 | Example:: 329 | 330 | active_2_months = BitOpAnd( 331 | MonthEvents('active', last_month.year, last_month.month), 332 | MonthEvents('active', now.year, now.month) 333 | ) 334 | 335 | active_2_months = BitOpAnd( 336 | BitOpAnd( 337 | MonthEvents('active', last_month.year, last_month.month), 338 | MonthEvents('active', now.year, now.month) 339 | ), 340 | MonthEvents('active', now.year, now.month) 341 | ) 342 | 343 | """ 344 | 345 | def __init__(self, op_name, *events): 346 | self.events = events 347 | event_redis_keys = [ev.redis_key for ev in events] 348 | self.redis_key = '%sbitop_%s_%s' % (self.bitmapist.key_prefix, op_name, 349 | '-'.join(event_redis_keys)) 350 | if self.bitmapist.pipe is not None: 351 | pipe = self.bitmapist.pipe 352 | else: 353 | pipe = self.bitmapist.connection.pipeline() 354 | if self.event_finished(): 355 | timeout = self.bitmapist.finished_ops_expire 356 | else: 357 | timeout = self.bitmapist.unfinished_ops_expire 358 | pipe.bitop(op_name, self.redis_key, *event_redis_keys) 359 | pipe.expire(self.redis_key, timeout) 360 | if not self.bitmapist.pipe: 361 | pipe.execute() 362 | 363 | def delta(self, value): 364 | events = [ev.delta(value) for ev in self.events] 365 | return self.__class__(*events) 366 | 367 | def event_finished(self): 368 | return all(ev.event_finished() for ev in self.events) 369 | 370 | def period_start(self): 371 | return min(ev.period_start() for ev in self.events) 372 | 373 | def period_end(self): 374 | return min(ev.period_end() for ev in self.events) 375 | 376 | def __repr__(self): 377 | ev_repr = ', '.join(repr(ev) for ev in self.events) 378 | return '{0.__class__.__name__}({1})'.format(self, ev_repr) 379 | 380 | 381 | class BitOpAnd(BitOperation): 382 | def __init__(self, *events): 383 | super(BitOpAnd, self).__init__('AND', *events) 384 | 385 | 386 | class BitOpOr(BitOperation): 387 | def __init__(self, *events): 388 | super(BitOpOr, self).__init__('OR', *events) 389 | 390 | 391 | class BitOpXor(BitOperation): 392 | def __init__(self, *events): 393 | super(BitOpXor, self).__init__('XOR', *events) 394 | 395 | 396 | class BitOpNot(BitOperation): 397 | def __init__(self, *events): 398 | super(BitOpNot, self).__init__('NOT', *events) 399 | 400 | 401 | def add_month(year, month, delta): 402 | """ 403 | Helper function which adds `delta` months to current `(year, month)` tuple 404 | and returns a new valid tuple `(year, month)` 405 | """ 406 | year, month = divmod(year * 12 + month + delta, 12) 407 | if month == 0: 408 | month = 12 409 | year = year - 1 410 | return year, month 411 | 412 | 413 | def not_none(*keys): 414 | """ 415 | Helper function returning first value which is not None 416 | """ 417 | for key in keys: 418 | if key is not None: 419 | return key 420 | 421 | 422 | def iso_year_start(iso_year): 423 | """ 424 | The gregorian calendar date of the first day of the given ISO year 425 | """ 426 | fourth_jan = datetime.date(iso_year, 1, 4) 427 | delta = datetime.timedelta(fourth_jan.isoweekday() - 1) 428 | return fourth_jan - delta 429 | 430 | 431 | def iso_to_gregorian(iso_year, iso_week, iso_day): 432 | """ 433 | Gregorian calendar date for the given ISO year, week and day 434 | """ 435 | year_start = iso_year_start(iso_year) 436 | return year_start + datetime.timedelta( 437 | days=iso_day - 1, weeks=iso_week - 1) 438 | --------------------------------------------------------------------------------