├── .coveragerc ├── .coveralls.yml ├── .gitignore ├── .travis.yml ├── AUTHORS ├── CHANGES ├── LICENSE.txt ├── MANIFEST.in ├── README.md ├── ROADMAP ├── Vagrantfile ├── docs ├── Makefile ├── _static │ ├── fb_notification_system.png │ └── notification_system.png ├── activity.rst ├── adding_data.rst ├── background_tasks.rst ├── cassandra_backend.rst ├── choosing_a_storage_backend.rst ├── conf.py ├── design.rst ├── fabfile.rst ├── feed_setup.rst ├── index.rst ├── installation.rst ├── make.bat ├── metrics.rst ├── notification_systems.rst ├── querying.rst ├── readme.rst ├── settings.rst ├── stream_framework.aggregators.rst ├── stream_framework.feed_managers.rst ├── stream_framework.feeds.aggregated_feed.rst ├── stream_framework.feeds.rst ├── stream_framework.rst ├── stream_framework.storage.cassandra.rst ├── stream_framework.storage.redis.rst ├── stream_framework.storage.redis.structures.rst ├── stream_framework.storage.rst ├── stream_framework.verbs.rst ├── support.rst ├── testing.rst └── verbs.rst ├── fabfile.py ├── pytest.ini ├── setup.py └── stream_framework ├── __init__.py ├── activity.py ├── aggregators ├── __init__.py └── base.py ├── conftest.py ├── default_settings.py ├── exceptions.py ├── feed_managers ├── __init__.py └── base.py ├── feeds ├── __init__.py ├── aggregated_feed │ ├── __init__.py │ ├── base.py │ ├── cassandra.py │ ├── notification_feed.py │ └── redis.py ├── base.py ├── cassandra.py ├── memory.py ├── notification_feed │ ├── __init__.py │ ├── base.py │ └── redis.py └── redis.py ├── metrics ├── __init__.py ├── base.py ├── python_statsd.py └── statsd.py ├── serializers ├── __init__.py ├── activity_serializer.py ├── aggregated_activity_serializer.py ├── base.py ├── cassandra │ ├── __init__.py │ ├── activity_serializer.py │ └── aggregated_activity_serializer.py ├── dummy.py ├── pickle_serializer.py ├── simple_timeline_serializer.py └── utils.py ├── settings.py ├── storage ├── __init__.py ├── base.py ├── base_lists_storage.py ├── cassandra │ ├── __init__.py │ ├── activity_storage.py │ ├── connection.py │ ├── models.py │ ├── monkey_patch.py │ └── timeline_storage.py ├── hbase │ └── __init__.py ├── memory.py └── redis │ ├── __init__.py │ ├── activity_storage.py │ ├── connection.py │ ├── lists_storage.py │ ├── structures │ ├── __init__.py │ ├── base.py │ ├── hash.py │ ├── list.py │ └── sorted_set.py │ └── timeline_storage.py ├── tasks.py ├── tests ├── __init__.py ├── activity.py ├── aggregators │ └── __init__.py ├── feeds │ ├── __init__.py │ ├── aggregated_feed │ │ ├── __init__.py │ │ ├── base.py │ │ ├── cassandra.py │ │ ├── notification_feed.py │ │ └── redis.py │ ├── base.py │ ├── cassandra.py │ ├── memory.py │ ├── notification_feed │ │ ├── __init__.py │ │ ├── base.py │ │ └── redis.py │ └── redis.py ├── managers │ ├── __init__.py │ ├── base.py │ ├── cassandra.py │ └── redis.py ├── serializers.py ├── settings.py ├── storage │ ├── __init__.py │ ├── base.py │ ├── base_lists_storage.py │ ├── cassandra.py │ ├── memory.py │ └── redis │ │ ├── __init__.py │ │ ├── activity_storage.py │ │ ├── lists_storage.py │ │ ├── structures.py │ │ └── timeline_storage.py ├── utils │ └── __init__.py └── utils_test.py ├── utils ├── __init__.py ├── five.py ├── functional.py ├── timing.py └── validate.py └── verbs ├── __init__.py └── base.py /.coveragerc: -------------------------------------------------------------------------------- 1 | [report] 2 | exclude_lines = 3 | pragma: no cover 4 | raise NotImplementedError 5 | def __repr__ 6 | if self.debug: 7 | if settings.DEBUG 8 | raise AssertionError 9 | raise NotImplementedError 10 | if 0: 11 | if __name__ == .__main__.: 12 | [run] 13 | omit = stream_framework/__init__.py -------------------------------------------------------------------------------- /.coveralls.yml: -------------------------------------------------------------------------------- 1 | repo_token: 2E5AJ7x7K9hIqTi2RrWmOt32D9OGFTwBG 2 | service_name: travis-ci -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.log 2 | *.pot 3 | *.pyc 4 | *.egg-info 5 | /.cache 6 | local_settings.py 7 | 8 | .vagrant 9 | .DS_Store 10 | 11 | pinterest_example/static/CACHE/ 12 | .sass-cache 13 | .coverage 14 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | python: 3 | - 3.5 4 | - 3.4 5 | - 2.7 6 | sudo: false 7 | addons: 8 | apt: 9 | packages: 10 | - libev4 11 | - libev-dev 12 | notifications: 13 | email: 14 | - tbarbugli@gmail.com 15 | - thierryschellenbach@gmail.com 16 | cache: pip 17 | env: 18 | - CASSANDRA_DRIVER="3.7.1" 19 | - CASSANDRA_DRIVER="3.6.0" 20 | - CASSANDRA_DRIVER="3.5.0" 21 | - CASSANDRA_DRIVER="3.4.1" 22 | - CASSANDRA_DRIVER="3.3.0" 23 | - CASSANDRA_DRIVER="3.2.2" 24 | - CASSANDRA_DRIVER="3.1.1" 25 | - CASSANDRA_DRIVER="3.0.0" 26 | - CASSANDRA_DRIVER="2.7.2" 27 | 28 | matrix: 29 | fast_finish: true 30 | exclude: 31 | - python: 3.5 32 | env: CASSANDRA_DRIVER="3.3.0" 33 | - python: 3.5 34 | env: CASSANDRA_DRIVER="3.2.2" 35 | - python: 3.5 36 | env: CASSANDRA_DRIVER="3.1.1" 37 | - python: 3.5 38 | env: CASSANDRA_DRIVER="3.0.0" 39 | - python: 3.5 40 | env: CASSANDRA_DRIVER="2.7.2" 41 | - python: 3.4 42 | env: CASSANDRA_DRIVER="3.3.0" 43 | 44 | services: 45 | - redis 46 | - cassandra 47 | before_install: 48 | - git clone https://github.com/tbarbugli/stream_framework_example.git pinterest_example 49 | before_script: 50 | - echo 'DROP KEYSPACE test_stream_framework;' | /usr/local/cassandra/bin/cqlsh; 51 | install: 52 | - pip install --upgrade pip 53 | - pip install cassandra-driver==$CASSANDRA_DRIVER 54 | - pip install -e .[redis,cassandra] 55 | - pip freeze 56 | - pip install cassandra-driver==$CASSANDRA_DRIVER 57 | script: 58 | - CQLENG_ALLOW_SCHEMA_MANAGEMENT="yes" py.test -l --tb=short stream_framework/tests 59 | -------------------------------------------------------------------------------- /AUTHORS: -------------------------------------------------------------------------------- 1 | Project Authors 2 | * Thierry Schellenbach 3 | * Tommaso Barbugli 4 | * Guyon Moree 5 | * Alessandro Sansoterra (Example app design) 6 | 7 | Contributors 8 | * Anislav Atanasov 9 | * Miguel Gomard 10 | * Jelte Fennema 11 | -------------------------------------------------------------------------------- /CHANGES: -------------------------------------------------------------------------------- 1 | ==== 1.4.0 ==== 2 | 3 | - make add_to_storage signature consistent on all implementations (args and kwargs were missing for some) 4 | - Add support for cassandra-driver 3.2.0+ 5 | - Use extras_require pip feature instead of custom install arguments 6 | 7 | ==== 1.3.5 ==== 8 | 9 | - Fix ignored redis_server timeline parameter 10 | 11 | ==== 1.3.4 ==== 12 | 13 | - faster deserialization for Cassandra feeds (skip ORM layer during serialization) 14 | - optional cassandra install 15 | - allow to use cassandra-driver v3 via install option 16 | 17 | Breaking changes: 18 | ----------------- 19 | 20 | Cassandra users: 21 | * activity serializers now get a dictionary instead of a CQL model instances when loading data 22 | * feeds' timelines that implements get_slice_from_storage now must return a dict instead of CQL model instance 23 | 24 | 25 | ==== 1.3.3 ==== 26 | 27 | - easier test setup 28 | - add default protocol_version for Cassandra (v2) 29 | - default serialization_id for aggregated activities is now with millisecond resolution 30 | - add built-in sorting to Activity class 31 | 32 | ==== 1.3.2 ==== 33 | 34 | - support for using stream-framework with cassandra-driver 3.0.0. this enables support for cassandra 3.3 35 | 36 | ==== 1.3.1 ==== 37 | 38 | - update cassandra driver to 2.7.2 39 | 40 | ==== 1.3.0 ==== 41 | 42 | - removed feedly legacy module 43 | - move to cassandra driver 2.7 (CQLEngine dev happens on the driver since 2.5) 44 | 45 | ==== 1.2.0 ==== 46 | 47 | - python 2.7 and 3.4 support 48 | 49 | ==== 1.1.3 ==== 50 | 51 | - remove batch inserts for cassandra backend, move to async parallel statements 52 | - millisecond timestamp resolution 53 | - manager's follow_user method now respects async parameter 54 | - custom cassandra models are now easy to use 55 | 56 | ==== 1.1.2 ==== 57 | 58 | - Fixed potential memory leak with memoize decorator (Cassandra only) 59 | 60 | ==== 1.1.1 ==== 61 | 62 | - fixed pypi link to github 63 | 64 | ==== 1.1.0 ==== 65 | 66 | - Support activity_id (asc and desc) sorting for Redis backed feeds (thanks to Anislav) 67 | - Configurable aggregated feeds update_seen_at and update_read_at fields 68 | - Faster redis counts (using ZCARD instead of ZCOUNT -inf +inf) 69 | 70 | Breaking changes: 71 | ---------------- 72 | Aggregated activities update_seen_at and update_read_at attributes are not stored in UTC by default 73 | 74 | ==== 1.0.1 ==== 75 | 76 | - Fixes some legacy feedly names in code 77 | - Better documentation 78 | 79 | ==== 1.0.0 ==== 80 | 81 | - Renamed from Feedly to Stream Framework 82 | - Feedly install via pypi will be supported till 22-01-2015 83 | - The new package is available on pypi here https://pypi.python.org/pypi/stream_framework/ 84 | - And support the from feedly import * syntax till 19-03-2015 85 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) Thierry Schellenbach 2 | 3 | (http://www.mellowmorning.com) 4 | All rights reserved. 5 | 6 | Redistribution and use of this software in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 7 | - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 8 | - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 9 | - Neither the name of Thierry Schellenbach. nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission of Thierry Schellenbach. 10 | 11 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 12 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include LICENSE.txt 2 | include AUTHORS 3 | include CHANGES 4 | include README.md 5 | prune pinterest_example 6 | -------------------------------------------------------------------------------- /ROADMAP: -------------------------------------------------------------------------------- 1 | Feel free to request features on the Github issue tracker. 2 | Our roadmap currently looks like this: 3 | 4 | * Documentation improvements 5 | * Database backend so you can get started easily 6 | 7 | Future 8 | 9 | * Relevancy based feeds 10 | * Include an API similar to getstream.io so Ruby, PHP and Node folks can also use Stream Framework. 11 | 12 | -------------------------------------------------------------------------------- /Vagrantfile: -------------------------------------------------------------------------------- 1 | # -*- mode: ruby -*- 2 | # vi: set ft=ruby : 3 | 4 | Vagrant::Config.run do |config| 5 | config.vm.box = "precise64" 6 | config.vm.box_url = "http://files.vagrantup.com/precise64.box" 7 | 8 | config.vm.share_folder "workspace", "/vagrant_workspace", "../" 9 | config.vm.customize ["modifyvm", :id, "--memory", 1024] 10 | config.vm.network :hostonly, '192.168.50.55' 11 | 12 | config.vm.provision :shell, :inline => "apt-get update" 13 | config.vm.provision :puppet do |puppet| 14 | puppet.manifests_path = "vagrant/puppet/manifests" 15 | puppet.module_path = "vagrant/puppet/modules" 16 | puppet.manifest_file = "local_dev.pp" 17 | facts = { 18 | :ec2_userdata => { 19 | :role => 'local_dev', 20 | :environment => 'development' 21 | }.to_json, 22 | :vagrant => true, 23 | } 24 | puppet.facter = facts 25 | end 26 | end 27 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | PAPER = 8 | BUILDDIR = _build 9 | 10 | # User-friendly check for sphinx-build 11 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) 12 | $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/) 13 | endif 14 | 15 | # Internal variables. 16 | PAPEROPT_a4 = -D latex_paper_size=a4 17 | PAPEROPT_letter = -D latex_paper_size=letter 18 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 19 | # the i18n builder cannot share the environment and doctrees with the others 20 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 21 | 22 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext 23 | 24 | help: 25 | @echo "Please use \`make ' where is one of" 26 | @echo " html to make standalone HTML files" 27 | @echo " dirhtml to make HTML files named index.html in directories" 28 | @echo " singlehtml to make a single large HTML file" 29 | @echo " pickle to make pickle files" 30 | @echo " json to make JSON files" 31 | @echo " htmlhelp to make HTML files and a HTML help project" 32 | @echo " qthelp to make HTML files and a qthelp project" 33 | @echo " devhelp to make HTML files and a Devhelp project" 34 | @echo " epub to make an epub" 35 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" 36 | @echo " latexpdf to make LaTeX files and run them through pdflatex" 37 | @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" 38 | @echo " text to make text files" 39 | @echo " man to make manual pages" 40 | @echo " texinfo to make Texinfo files" 41 | @echo " info to make Texinfo files and run them through makeinfo" 42 | @echo " gettext to make PO message catalogs" 43 | @echo " changes to make an overview of all changed/added/deprecated items" 44 | @echo " xml to make Docutils-native XML files" 45 | @echo " pseudoxml to make pseudoxml-XML files for display purposes" 46 | @echo " linkcheck to check all external links for integrity" 47 | @echo " doctest to run all doctests embedded in the documentation (if enabled)" 48 | 49 | clean: 50 | rm -rf $(BUILDDIR)/* 51 | 52 | html: 53 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 54 | @echo 55 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 56 | 57 | dirhtml: 58 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml 59 | @echo 60 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." 61 | 62 | singlehtml: 63 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml 64 | @echo 65 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." 66 | 67 | pickle: 68 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle 69 | @echo 70 | @echo "Build finished; now you can process the pickle files." 71 | 72 | json: 73 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json 74 | @echo 75 | @echo "Build finished; now you can process the JSON files." 76 | 77 | htmlhelp: 78 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp 79 | @echo 80 | @echo "Build finished; now you can run HTML Help Workshop with the" \ 81 | ".hhp project file in $(BUILDDIR)/htmlhelp." 82 | 83 | qthelp: 84 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp 85 | @echo 86 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \ 87 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:" 88 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/Feedly.qhcp" 89 | @echo "To view the help file:" 90 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/Feedly.qhc" 91 | 92 | devhelp: 93 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp 94 | @echo 95 | @echo "Build finished." 96 | @echo "To view the help file:" 97 | @echo "# mkdir -p $$HOME/.local/share/devhelp/Feedly" 98 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/Feedly" 99 | @echo "# devhelp" 100 | 101 | epub: 102 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub 103 | @echo 104 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub." 105 | 106 | latex: 107 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 108 | @echo 109 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." 110 | @echo "Run \`make' in that directory to run these through (pdf)latex" \ 111 | "(use \`make latexpdf' here to do that automatically)." 112 | 113 | latexpdf: 114 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 115 | @echo "Running LaTeX files through pdflatex..." 116 | $(MAKE) -C $(BUILDDIR)/latex all-pdf 117 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 118 | 119 | latexpdfja: 120 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 121 | @echo "Running LaTeX files through platex and dvipdfmx..." 122 | $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja 123 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 124 | 125 | text: 126 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text 127 | @echo 128 | @echo "Build finished. The text files are in $(BUILDDIR)/text." 129 | 130 | man: 131 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man 132 | @echo 133 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man." 134 | 135 | texinfo: 136 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 137 | @echo 138 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." 139 | @echo "Run \`make' in that directory to run these through makeinfo" \ 140 | "(use \`make info' here to do that automatically)." 141 | 142 | info: 143 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 144 | @echo "Running Texinfo files through makeinfo..." 145 | make -C $(BUILDDIR)/texinfo info 146 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." 147 | 148 | gettext: 149 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale 150 | @echo 151 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." 152 | 153 | changes: 154 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes 155 | @echo 156 | @echo "The overview file is in $(BUILDDIR)/changes." 157 | 158 | linkcheck: 159 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck 160 | @echo 161 | @echo "Link check complete; look for any errors in the above output " \ 162 | "or in $(BUILDDIR)/linkcheck/output.txt." 163 | 164 | doctest: 165 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest 166 | @echo "Testing of doctests in the sources finished, look at the " \ 167 | "results in $(BUILDDIR)/doctest/output.txt." 168 | 169 | xml: 170 | $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml 171 | @echo 172 | @echo "Build finished. The XML files are in $(BUILDDIR)/xml." 173 | 174 | pseudoxml: 175 | $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml 176 | @echo 177 | @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." 178 | -------------------------------------------------------------------------------- /docs/_static/fb_notification_system.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/llSourcell/Stream-Framework/e7535857af03c81c760b9265568287816ecac13c/docs/_static/fb_notification_system.png -------------------------------------------------------------------------------- /docs/_static/notification_system.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/llSourcell/Stream-Framework/e7535857af03c81c760b9265568287816ecac13c/docs/_static/notification_system.png -------------------------------------------------------------------------------- /docs/activity.rst: -------------------------------------------------------------------------------- 1 | Activity class 2 | ================== 3 | 4 | Activity is the core data in Stream Framework; their implementation follows the `activitystream schema specification `_. 5 | An activity in Stream Framework is composed by an actor, a verb and an object; for example: "Geraldine posted a photo". 6 | The data stored in activities can be extended if necessary; depending on how you use Stream Framework you might want to store some extra information or not. 7 | Here is a few good rule of thumbs to follow in case you are not sure wether some information should be stored in Stream Framework: 8 | 9 | Good choice: 10 | 11 | 1. Add a field used to perform aggregation (eg. object category) 12 | 2. You want to keep every piece of information needed to work with activities in Stream Framework (eg. avoid database lookups) 13 | 14 | Bad choice: 15 | 16 | 1. The data stored in the activity gets updated 17 | 2. The data requires lot of storage 18 | 19 | 20 | Activity storage strategies 21 | *************************** 22 | 23 | Activities are stored on Stream Framework trying to maximise the benefits of the storage backend used. 24 | 25 | When using the redis backend Stream Framework will keep data denormalized; activities are stored in a special storage (activity storage) and user feeds only 26 | keeps a reference (activity_id / serialization_id). 27 | This allow Stream Framework to keep the (expensive) memory usage as low as possible. 28 | 29 | When using Cassandra as storage Stream Framework will denormalize activities; there is not an activity storage but instead every user feed will keep the complete 30 | activity. 31 | Doing so allow Stream Framework to minimise the amount of Cassandra nodes to query when retrieving data or writing to feeds. 32 | 33 | In both storages activities are always stored in feeds sorted by their creation time (aka Activity.serialization_id). 34 | 35 | 36 | Extend the activity class 37 | ************************* 38 | 39 | .. versionadded:: 0.10.0 40 | 41 | You can subclass the activity model to add your own methods. 42 | After you've created your own activity model you need to hook it 43 | up to the feed. An example follows below 44 | 45 | :: 46 | 47 | from stream_framework.activity import Activity 48 | 49 | # subclass the activity object 50 | class CustomActivity(Activity): 51 | def mymethod(): 52 | pass 53 | 54 | # hookup the custom activity object to the Redis feed 55 | class CustomFeed(RedisFeed): 56 | activity_class = CustomActivity 57 | 58 | 59 | For aggregated feeds you can customize both the activity and the aggregated activity object. 60 | You can give this a try like this 61 | 62 | :: 63 | 64 | from stream_framework.activity import AggregatedActivity 65 | 66 | # define the custom aggregated activity 67 | class CustomAggregated(AggregatedActivity): 68 | pass 69 | 70 | # hook the custom classes up to the feed 71 | class RedisCustomAggregatedFeed(RedisAggregatedFeed): 72 | activity_class = CustomActivity 73 | aggregated_activity_class = CustomAggregated 74 | 75 | 76 | 77 | 78 | 79 | Activity serialization 80 | ********************** 81 | 82 | 83 | Activity order and uniqueness 84 | ***************************** 85 | 86 | 87 | Aggregated activities 88 | ********************* 89 | 90 | -------------------------------------------------------------------------------- /docs/adding_data.rst: -------------------------------------------------------------------------------- 1 | Adding data 2 | =========== 3 | 4 | You can add an Activity object to the feed using the add or add_many instructions. 5 | 6 | 7 | .. code:: python 8 | 9 | 10 | feed = UserPinFeed(13) 11 | feed.add(activity) 12 | 13 | # add many example 14 | feed.add_many([activity]) 15 | 16 | 17 | 18 | **What's an activity** 19 | 20 | The activity object is best described using an example. 21 | For Pinterest for instance a common activity would look like this: 22 | 23 | Thierry added an item to his board Surf Girls. 24 | 25 | In terms of the activity object this would translate to:: 26 | 27 | Activity( 28 | actor=13, # Thierry's user id 29 | verb=1, # The id associated with the Pin verb 30 | object=1, # The id of the newly created Pin object 31 | target=1, # The id of the Surf Girls board 32 | time=datetime.utcnow(), # The time the activity occured 33 | ) 34 | 35 | The names for these fields are based on the `activity stream spec 36 | `_. 37 | 38 | 39 | 40 | -------------------------------------------------------------------------------- /docs/background_tasks.rst: -------------------------------------------------------------------------------- 1 | Background Tasks with celery 2 | ============================ 3 | 4 | Stream Framework uses celery to do the heavy fanout write operations in the background. 5 | 6 | We really suggest you to have a look at `celery documentation`_ if you are not familiar with the project. 7 | 8 | **Fanout** 9 | 10 | When an activity is added Stream Framework will perform a fanout to all subscribed feeds. 11 | The base Stream Framework manager spawns one celery fanout task every 100 feeds. 12 | Change the value of `fanout_chunk_size` of your manager if you think this number is too low/high for you. 13 | 14 | Few things to keep in mind when doing so: 15 | 16 | 1. really high values leads to a mix of heavy tasks and light tasks (not good!) 17 | 2. publishing and consuming tasks introduce some overhead, dont spawn too many tasks 18 | 3. Stream Framework writes data in batches, thats a really good optimization you want to keep 19 | 4. huge tasks have more chances to timeout 20 | 21 | .. note:: When developing you can run fanouts without celery by setting `CELERY_ALWAYS_EAGER = True` 22 | 23 | 24 | Prioritise fanouts 25 | ******************************** 26 | 27 | Stream Framework partition fanout tasks in two priority groups. 28 | Fanouts with different priorities do exactly the same operations (adding/removing activities from/to a feed) 29 | the substantial difference is that they get published to different queues for processing. 30 | Going back to our pinterest example app, you can use priorities to associate more resources to fanouts that target 31 | active users and send the ones for inactive users to a different cluster of workers. 32 | This also make it easier and cheaper to keep active users' feeds updated during activity spikes because you dont need 33 | to scale up capacity less often. 34 | 35 | Stream Framework manager is the best place to implement your high/low priority fanouts, in fact the `get_follower_ids` method 36 | is required to return the feed ids grouped by priority. 37 | 38 | eg:: 39 | 40 | class MyStreamManager(Manager): 41 | 42 | def get_user_follower_ids(self, user_id): 43 | follower_ids = { 44 | FanoutPriority.HIGH: get_follower_ids(user_id, active=True), 45 | FanoutPriority.LOW: get_follower_ids(user_id, active=False) 46 | } 47 | return follower_ids 48 | 49 | 50 | Celery and Django 51 | ***************** 52 | 53 | If this is the time you use Celery and Django together I suggest you should `follow this document's instructions `_. 54 | 55 | It will guide you through the required steps to get Celery background processing up and running. 56 | 57 | 58 | Using other job queue libraries 59 | ******************************** 60 | 61 | As of today background processing is tied to celery. 62 | 63 | While we are not planning to support different queue jobs libraries in the near future using something different than celery 64 | should be quite easy and can be mostly done subclassing the feeds manager. 65 | 66 | .. _celery documentation: http://docs.celeryproject.org/en/latest/ 67 | -------------------------------------------------------------------------------- /docs/cassandra_backend.rst: -------------------------------------------------------------------------------- 1 | .. _cassandra_backend: 2 | 3 | Cassandra storage backend 4 | ========================= 5 | 6 | This document is specific to the Cassandra backend. 7 | 8 | Create keyspace and columnfamilies 9 | ********************************** 10 | 11 | Keyspace and columnfamilies for your feeds can be created via cqlengine's sync_table. 12 | 13 | :: 14 | 15 | from myapp.feeds import MyCassandraFeed 16 | from cqlengine.management import sync_table 17 | 18 | timeline = MyCassandraFeed.get_timeline_storage() 19 | sync_table(timeline.model) 20 | 21 | 22 | sync_table can also create missing columns but it will never delete removed columns. 23 | 24 | 25 | Use a custom activity model 26 | *************************** 27 | 28 | Since the Cassandra backend is using CQL3 column families, activities have a predefined schema. Cqlengine is used 29 | to read/write data from and to Cassandra. 30 | 31 | :: 32 | 33 | 34 | from stream_framework.storage.cassandra import models 35 | 36 | 37 | class MyCustomActivity(models.Activity) 38 | actor = columns.Bytes(required=False) 39 | 40 | 41 | class MySuperAwesomeFeed(CassandraFeed): 42 | timeline_model = MyCustomActivity 43 | 44 | 45 | Remember to resync your column family when you add new columns (see above). 46 | -------------------------------------------------------------------------------- /docs/choosing_a_storage_backend.rst: -------------------------------------------------------------------------------- 1 | .. _choosing_a_storage_backend: 2 | 3 | Choosing a storage layer 4 | ======================== 5 | 6 | Currently Stream Framework supports both `Cassandra `_ and `Redis `_ as storage backends. 7 | 8 | **Summary** 9 | 10 | Redis is super easy to get started with and works fine for smaller use cases. 11 | If you're just getting started use Redis. 12 | When your data requirements become larger though it becomes really expensive 13 | to store all the data in Redis. For larger use cases we therefor recommend Cassandra. 14 | 15 | 16 | Redis (2.7 or newer) 17 | ******************** 18 | 19 | PROS: 20 | 21 | - Easy to install 22 | - Super reliable 23 | - Easy to maintain 24 | - Very fast 25 | 26 | CONS: 27 | 28 | - Expensive memory only storage 29 | - Manual sharding 30 | 31 | Redis stores its complete dataset in memory. This makes sure that all operations are 32 | always fast. It does however mean that you might need a lot of storage. 33 | 34 | A common approach is therefor to use Redis storage for some of your 35 | feeds and fall back to your database for less frequently requested data. 36 | 37 | Twitter currently uses this approach and Fashiolista has used a system 38 | like this in the first half of 2013. 39 | 40 | The great benefit of using Redis comes in easy of install, reliability 41 | and maintainability. Basically it just works and there's little you need 42 | to learn to maintain it. 43 | 44 | Redis doesn't support any form of cross machine distribution. So if you add a new 45 | node to your cluster you need to manual move or recreate the data. 46 | 47 | In conclusion I believe Redis is your best bet if you can fallback to 48 | the database when needed. 49 | 50 | Cassandra (2.0 or newer) 51 | ************************ 52 | 53 | PROS: 54 | 55 | - Stores to disk 56 | - Automatic sharding across nodes 57 | - Awesome monitoring tools 58 | (`opscenter `_) 59 | 60 | CONS: 61 | 62 | - Not as easy to setup 63 | - Hard to maintain 64 | 65 | Cassandra stores data to both disk and memory. Instagram has recently switched from Redis to Cassandra. 66 | Storing data to disk can potentially be a big cost saving. 67 | 68 | In addition adding new machines to your Cassandra cluster is a breeze. 69 | Cassandra will automatically distribute the data to new machines. 70 | 71 | If you are using amazon EC2 we suggest you to try Datastax's easy 72 | `AMI `_ 73 | to get started on AWS. 74 | 75 | 76 | -------------------------------------------------------------------------------- /docs/design.rst: -------------------------------------------------------------------------------- 1 | Stream Framework Design 2 | ----------------------- 3 | 4 | *The first approach* 5 | 6 | A first feed solution usually looks something like this: 7 | 8 | .. code:: sql 9 | 10 | SELECT * FROM tweets 11 | JOIN follow ON (follow.target_id = tweet.user_id) 12 | WHERE follow.user_id = 13 13 | 14 | This works in the beginning, and with a well tuned database will keep on 15 | working nicely for quite some time. However at some point the load 16 | becomes too much and this approach falls apart. Unfortunately it's very 17 | hard to split up the tweets in a meaningfull way. You could split it up 18 | by date or user, but every query will still hit many of your shards. 19 | Eventually this system collapses, read more about this in `Facebook's 20 | presentation `__. 21 | 22 | *Push or Push/Pull* 23 | 24 | In general there are two similar solutions to this 25 | problem. 26 | 27 | In the push approach you publish your activity (ie a tweet on twitter) 28 | to all of your followers. So basically you create a small list per user 29 | to which you insert the activities created by the people they follow. 30 | This involves a huge number of writes, but reads are really fast they 31 | can easily be sharded. 32 | 33 | For the push/pull approach you implement the push based systems for a 34 | subset of your users. At Fashiolista for instance we used to have a push 35 | based approach for active users. For inactive users we only kept a small 36 | feed and eventually used a fallback to the database when we ran out of 37 | results. 38 | 39 | **Stream Framework** 40 | 41 | Stream Framework allows you to easily use Cassndra/Redis and Celery (an awesome 42 | task broker) to build infinitely scalable feeds. The high level 43 | functionality is located in 4 classes. 44 | 45 | - Activities 46 | - Feeds 47 | - Feed managers 48 | - Aggregators 49 | 50 | *Activities* are the blocks of content which are stored in a feed. It 51 | follows the nomenclatura from the [activity stream spec] [astream] 52 | [astream]: http://activitystrea.ms/specs/atom/1.0/#activity.summary 53 | Every activity therefor stores at least: 54 | 55 | - Time (the time of the activity) 56 | - Verb (the action, ie loved, liked, followed) 57 | - Actor (the user id doing the action) 58 | - Object (the object the action is related to) 59 | - Extra context (Used for whatever else you need to store at the 60 | activity level) 61 | 62 | Optionally you can also add a target (which is best explained in the 63 | activity docs) 64 | 65 | *Feeds* are sorted containers of activities. You can easily add and 66 | remove activities from them. 67 | 68 | *Stream Framework* classes (feed managers) handle the logic used in addressing the 69 | feed objects. They handle the complex bits of fanning out to all your 70 | followers when you create a new object (such as a tweet). 71 | 72 | In addition there are several utility classes which you will encounter 73 | 74 | - Serializers (classes handling serialization of Activity objects) 75 | - Aggregators (utility classes for creating smart/computed feeds based 76 | on algorithms) 77 | - Timeline Storage (cassandra or redis specific storage functions for 78 | sorted storage) 79 | - Activity Storage (cassandra or redis specific storage for hash/dict 80 | based storage) 81 | -------------------------------------------------------------------------------- /docs/fabfile.rst: -------------------------------------------------------------------------------- 1 | fabfile Module 2 | ============== 3 | 4 | .. automodule:: fabfile 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/feed_setup.rst: -------------------------------------------------------------------------------- 1 | Feed setup 2 | ========== 3 | 4 | A feed object contains activities. The example below shows you how to setup 5 | two feeds: 6 | 7 | .. code:: python 8 | 9 | # implement your feed with redis as storage 10 | 11 | from stream_framework.feeds.redis import RedisFeed 12 | 13 | class PinFeed(RedisFeed): 14 | key_format = 'feed:normal:%(user_id)s' 15 | 16 | class UserPinFeed(PinFeed): 17 | key_format = 'feed:user:%(user_id)s' 18 | 19 | 20 | Next up we need to hook up the Feeds to your Manager class. 21 | The Manager class knows how to fanout new activities to the feeds of all your followers. 22 | 23 | .. code:: python 24 | 25 | from stream_framework.feed_managers.base import Manager 26 | 27 | 28 | class PinManager(Manager): 29 | feed_classes = dict( 30 | normal=PinFeed, 31 | ) 32 | user_feed_class = UserPinFeed 33 | 34 | def add_pin(self, pin): 35 | activity = pin.create_activity() 36 | # add user activity adds it to the user feed, and starts the fanout 37 | self.add_user_activity(pin.user_id, activity) 38 | 39 | def get_user_follower_ids(self, user_id): 40 | ids = Follow.objects.filter(target=user_id).values_list('user_id', flat=True) 41 | return {FanoutPriority.HIGH:ids} 42 | 43 | manager = PinManager() 44 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | 2 | 3 | .. include:: readme.rst 4 | 5 | 6 | Documentation 7 | ------------- 8 | 9 | 10 | .. toctree:: 11 | :maxdepth: 4 12 | 13 | installation 14 | feed_setup 15 | adding_data 16 | verbs 17 | querying 18 | settings 19 | metrics 20 | testing 21 | support 22 | 23 | activity 24 | choosing_a_storage_backend 25 | background_tasks 26 | notification_systems 27 | design 28 | 29 | cassandra_backend 30 | 31 | API Docs 32 | -------- 33 | 34 | .. toctree:: 35 | :maxdepth: 4 36 | 37 | stream_framework 38 | 39 | 40 | Indices and tables 41 | ================== 42 | 43 | * :ref:`genindex` 44 | * :ref:`modindex` 45 | * :ref:`search` 46 | 47 | -------------------------------------------------------------------------------- /docs/installation.rst: -------------------------------------------------------------------------------- 1 | Installation 2 | ============ 3 | 4 | Installation is easy using ``pip`` both redis and cassandra dependencies are installed by the setup. 5 | 6 | 7 | .. code-block:: bash 8 | 9 | $ pip install Stream-Framework 10 | 11 | 12 | or get it from source 13 | 14 | .. code-block:: bash 15 | 16 | $ git clone https://github.com/tschellenbach/Stream-Framework.git 17 | $ cd Stream-Framework 18 | $ python setup.py install 19 | 20 | 21 | Depending on the backend you are going to use ( :ref:`choosing_a_storage_backend` ) you will need to have the backend server 22 | up and running. 23 | 24 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | REM Command file for Sphinx documentation 4 | 5 | if "%SPHINXBUILD%" == "" ( 6 | set SPHINXBUILD=sphinx-build 7 | ) 8 | set BUILDDIR=_build 9 | set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% . 10 | set I18NSPHINXOPTS=%SPHINXOPTS% . 11 | if NOT "%PAPER%" == "" ( 12 | set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS% 13 | set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS% 14 | ) 15 | 16 | if "%1" == "" goto help 17 | 18 | if "%1" == "help" ( 19 | :help 20 | echo.Please use `make ^` where ^ is one of 21 | echo. html to make standalone HTML files 22 | echo. dirhtml to make HTML files named index.html in directories 23 | echo. singlehtml to make a single large HTML file 24 | echo. pickle to make pickle files 25 | echo. json to make JSON files 26 | echo. htmlhelp to make HTML files and a HTML help project 27 | echo. qthelp to make HTML files and a qthelp project 28 | echo. devhelp to make HTML files and a Devhelp project 29 | echo. epub to make an epub 30 | echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter 31 | echo. text to make text files 32 | echo. man to make manual pages 33 | echo. texinfo to make Texinfo files 34 | echo. gettext to make PO message catalogs 35 | echo. changes to make an overview over all changed/added/deprecated items 36 | echo. xml to make Docutils-native XML files 37 | echo. pseudoxml to make pseudoxml-XML files for display purposes 38 | echo. linkcheck to check all external links for integrity 39 | echo. doctest to run all doctests embedded in the documentation if enabled 40 | goto end 41 | ) 42 | 43 | if "%1" == "clean" ( 44 | for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i 45 | del /q /s %BUILDDIR%\* 46 | goto end 47 | ) 48 | 49 | 50 | %SPHINXBUILD% 2> nul 51 | if errorlevel 9009 ( 52 | echo. 53 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 54 | echo.installed, then set the SPHINXBUILD environment variable to point 55 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 56 | echo.may add the Sphinx directory to PATH. 57 | echo. 58 | echo.If you don't have Sphinx installed, grab it from 59 | echo.http://sphinx-doc.org/ 60 | exit /b 1 61 | ) 62 | 63 | if "%1" == "html" ( 64 | %SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html 65 | if errorlevel 1 exit /b 1 66 | echo. 67 | echo.Build finished. The HTML pages are in %BUILDDIR%/html. 68 | goto end 69 | ) 70 | 71 | if "%1" == "dirhtml" ( 72 | %SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml 73 | if errorlevel 1 exit /b 1 74 | echo. 75 | echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml. 76 | goto end 77 | ) 78 | 79 | if "%1" == "singlehtml" ( 80 | %SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml 81 | if errorlevel 1 exit /b 1 82 | echo. 83 | echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml. 84 | goto end 85 | ) 86 | 87 | if "%1" == "pickle" ( 88 | %SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle 89 | if errorlevel 1 exit /b 1 90 | echo. 91 | echo.Build finished; now you can process the pickle files. 92 | goto end 93 | ) 94 | 95 | if "%1" == "json" ( 96 | %SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json 97 | if errorlevel 1 exit /b 1 98 | echo. 99 | echo.Build finished; now you can process the JSON files. 100 | goto end 101 | ) 102 | 103 | if "%1" == "htmlhelp" ( 104 | %SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp 105 | if errorlevel 1 exit /b 1 106 | echo. 107 | echo.Build finished; now you can run HTML Help Workshop with the ^ 108 | .hhp project file in %BUILDDIR%/htmlhelp. 109 | goto end 110 | ) 111 | 112 | if "%1" == "qthelp" ( 113 | %SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp 114 | if errorlevel 1 exit /b 1 115 | echo. 116 | echo.Build finished; now you can run "qcollectiongenerator" with the ^ 117 | .qhcp project file in %BUILDDIR%/qthelp, like this: 118 | echo.^> qcollectiongenerator %BUILDDIR%\qthelp\Feedly.qhcp 119 | echo.To view the help file: 120 | echo.^> assistant -collectionFile %BUILDDIR%\qthelp\Feedly.ghc 121 | goto end 122 | ) 123 | 124 | if "%1" == "devhelp" ( 125 | %SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp 126 | if errorlevel 1 exit /b 1 127 | echo. 128 | echo.Build finished. 129 | goto end 130 | ) 131 | 132 | if "%1" == "epub" ( 133 | %SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub 134 | if errorlevel 1 exit /b 1 135 | echo. 136 | echo.Build finished. The epub file is in %BUILDDIR%/epub. 137 | goto end 138 | ) 139 | 140 | if "%1" == "latex" ( 141 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex 142 | if errorlevel 1 exit /b 1 143 | echo. 144 | echo.Build finished; the LaTeX files are in %BUILDDIR%/latex. 145 | goto end 146 | ) 147 | 148 | if "%1" == "latexpdf" ( 149 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex 150 | cd %BUILDDIR%/latex 151 | make all-pdf 152 | cd %BUILDDIR%/.. 153 | echo. 154 | echo.Build finished; the PDF files are in %BUILDDIR%/latex. 155 | goto end 156 | ) 157 | 158 | if "%1" == "latexpdfja" ( 159 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex 160 | cd %BUILDDIR%/latex 161 | make all-pdf-ja 162 | cd %BUILDDIR%/.. 163 | echo. 164 | echo.Build finished; the PDF files are in %BUILDDIR%/latex. 165 | goto end 166 | ) 167 | 168 | if "%1" == "text" ( 169 | %SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text 170 | if errorlevel 1 exit /b 1 171 | echo. 172 | echo.Build finished. The text files are in %BUILDDIR%/text. 173 | goto end 174 | ) 175 | 176 | if "%1" == "man" ( 177 | %SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man 178 | if errorlevel 1 exit /b 1 179 | echo. 180 | echo.Build finished. The manual pages are in %BUILDDIR%/man. 181 | goto end 182 | ) 183 | 184 | if "%1" == "texinfo" ( 185 | %SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo 186 | if errorlevel 1 exit /b 1 187 | echo. 188 | echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo. 189 | goto end 190 | ) 191 | 192 | if "%1" == "gettext" ( 193 | %SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale 194 | if errorlevel 1 exit /b 1 195 | echo. 196 | echo.Build finished. The message catalogs are in %BUILDDIR%/locale. 197 | goto end 198 | ) 199 | 200 | if "%1" == "changes" ( 201 | %SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes 202 | if errorlevel 1 exit /b 1 203 | echo. 204 | echo.The overview file is in %BUILDDIR%/changes. 205 | goto end 206 | ) 207 | 208 | if "%1" == "linkcheck" ( 209 | %SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck 210 | if errorlevel 1 exit /b 1 211 | echo. 212 | echo.Link check complete; look for any errors in the above output ^ 213 | or in %BUILDDIR%/linkcheck/output.txt. 214 | goto end 215 | ) 216 | 217 | if "%1" == "doctest" ( 218 | %SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest 219 | if errorlevel 1 exit /b 1 220 | echo. 221 | echo.Testing of doctests in the sources finished, look at the ^ 222 | results in %BUILDDIR%/doctest/output.txt. 223 | goto end 224 | ) 225 | 226 | if "%1" == "xml" ( 227 | %SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml 228 | if errorlevel 1 exit /b 1 229 | echo. 230 | echo.Build finished. The XML files are in %BUILDDIR%/xml. 231 | goto end 232 | ) 233 | 234 | if "%1" == "pseudoxml" ( 235 | %SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml 236 | if errorlevel 1 exit /b 1 237 | echo. 238 | echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml. 239 | goto end 240 | ) 241 | 242 | :end 243 | -------------------------------------------------------------------------------- /docs/metrics.rst: -------------------------------------------------------------------------------- 1 | Metrics 2 | ======= 3 | 4 | Stream Framework collects metrics regarding feed operations. The default behaviour is to ignore collected metrics rather 5 | than sending them anywhere. 6 | 7 | You can configure the metric class with the ``STREAM_METRIC_CLASS`` setting and send options as a python dict via 8 | ``STREAM_METRICS_OPTIONS`` 9 | 10 | 11 | Sending metrics to Statsd 12 | ------------------------- 13 | 14 | Stream Framework comes with support for StatsD support, both statsd and python-statsd libraries are supported. 15 | 16 | If you use statsd you should use this metric class ``stream_framework.metrics.statsd.StatsdMetrics`` while if you are 17 | a user of python-statsd you should use ``stream_framework.metrics.python_statsd.StatsdMetrics``. 18 | 19 | The two libraries do the same job and both are suitable for production use. 20 | 21 | By default this two classes send metrics to ``localhost`` which is probably not what you want. 22 | 23 | In real life you will need something like this 24 | 25 | :: 26 | 27 | STREAM_METRICS_OPTIONS = { 28 | 'host': 'my.statsd.host.tld', 29 | 'port': 8125, 30 | 'prefix': 'stream' 31 | } 32 | 33 | 34 | Custom metric classes 35 | --------------------- 36 | 37 | If you need to send metrics to a not supported backend somewhere you only need to create your own subclass of stream_framework.metrics.base.Metrics 38 | and configure your application to use it. 39 | -------------------------------------------------------------------------------- /docs/notification_systems.rst: -------------------------------------------------------------------------------- 1 | Tutorial: building a notification feed 2 | ====================================== 3 | 4 | 5 | .. note:: 6 | 7 | We are still improving this tutorial. In its current state it might be a bit hard to follow. 8 | 9 | 10 | What is a notification system? 11 | ------------------------------ 12 | 13 | Building a scalable notification system is almost entirely identical to building an activity feed. 14 | From the user's perspective the functionality is pretty different. 15 | A notification system commonly shows activity related to your account. 16 | Whereas an activity stream shows activity by the people you follow. 17 | Examples of Fashiolista's notification system and Facebook's system are shown below. 18 | Fashiolista's system is running on Stream Framework. 19 | 20 | 21 | .. image:: _static/notification_system.png 22 | .. image:: _static/fb_notification_system.png 23 | 24 | It looks very different from an activity stream, but the technical implementation is almost identical. 25 | Only the Feed manager class is different since the notification system has no fanouts. 26 | 27 | .. note:: 28 | 29 | Remember, Fanout is the process which pushes a little bit of data to all of your 30 | followers in many small and asynchronous tasks. 31 | 32 | 33 | 34 | Tutorial 35 | -------- 36 | 37 | For this tutorial we'll show you how to customize and setup your own notification system. 38 | 39 | 40 | **Step 1 - Subclass NotificationFeed** 41 | 42 | As a first step we'll subclass NotificationFeed and customize the storage location and the aggregator. 43 | 44 | :: 45 | 46 | from stream_framework.feeds.aggregated_feed.notification_feed import RedisNotificationFeed 47 | 48 | class MyNotificationFeed(RedisNotificationFeed): 49 | # : they key format determines where the data gets stored 50 | key_format = 'feed:notification:%(user_id)s' 51 | 52 | # : the aggregator controls how the activities get aggregated 53 | aggregator_class = MyAggregator 54 | 55 | 56 | **Step 2 - Subclass the aggregator** 57 | 58 | Secondly we want to customize how activities get grouped together. Most notification systems need to aggregate activities. 59 | In this case we'll aggregate on verb and date. So the aggregations will show something like (thierry, peter and two other people liked your photo). 60 | 61 | :: 62 | 63 | class MyAggregator(BaseAggregator): 64 | ''' 65 | Aggregates based on the same verb and same time period 66 | ''' 67 | def get_group(self, activity): 68 | ''' 69 | Returns a group based on the day and verb 70 | ''' 71 | verb = activity.verb.id 72 | date = activity.time.date() 73 | group = '%s-%s' % (verb, date) 74 | return group 75 | 76 | **Step 3 - Test adding data** 77 | 78 | The aggregated feed uses the same API as the flat feed. You can simply add items by calling feed.add or feed.add_many. 79 | An example for inserting data is shown below: 80 | 81 | :: 82 | 83 | feed = MyNotificationFeed(user_id) 84 | activity = Activity( 85 | user_id, LoveVerb, object_id, influencer_id, time=created_at, 86 | extra_context=dict(entity_id=self.entity_id) 87 | ) 88 | feed.add(activity) 89 | print feed[:5] 90 | 91 | **Step 4 - Implement manager functionality** 92 | 93 | To keep our code clean we'll implement a very simple manager class to abstract away the above code. 94 | 95 | :: 96 | 97 | class MyNotification(object): 98 | ''' 99 | Abstract the access to the notification feed 100 | ''' 101 | def add_love(self, love): 102 | feed = MyNotificationFeed(user_id) 103 | activity = Activity( 104 | love.user_id, LoveVerb, love.id, love.influencer_id, 105 | time=love.created_at, extra_context=dict(entity_id=self.entity_id) 106 | ) 107 | feed.add(activity) 108 | 109 | -------------------------------------------------------------------------------- /docs/querying.rst: -------------------------------------------------------------------------------- 1 | Querying feeds 2 | ============== 3 | 4 | You can query the feed using Python slicing. In addition you can order 5 | and filter the feed on several predefined fields. Examples are shown below 6 | 7 | 8 | **Slicing**:: 9 | 10 | feed = RedisFeed(13) 11 | activities = feed[:10] 12 | 13 | 14 | **Filtering and Pagination**:: 15 | 16 | feed.filter(activity_id__gte=1)[:10] 17 | feed.filter(activity_id__lte=1)[:10] 18 | feed.filter(activity_id__gt=1)[:10] 19 | feed.filter(activity_id__lt=1)[:10] 20 | 21 | 22 | 23 | **Ordering feeds** 24 | 25 | .. versionadded:: 0.10.0 26 | This is only supported using Cassandra and Redis at the moment. 27 | 28 | :: 29 | 30 | feed.order_by('activity_id') 31 | feed.order_by('-activity_id') 32 | -------------------------------------------------------------------------------- /docs/settings.rst: -------------------------------------------------------------------------------- 1 | Settings 2 | ======== 3 | 4 | .. note:: Settings currently only support Django settings. To add support for Flask or other frameworks simply change stream_framework.settings.py 5 | 6 | Redis Settings 7 | ************** 8 | 9 | **STREAM_REDIS_CONFIG** 10 | 11 | The settings for redis, keep here the list of redis servers you want to use as feed storage 12 | 13 | Defaults to 14 | 15 | .. code-block:: python 16 | 17 | STREAM_REDIS_CONFIG = { 18 | 'default': { 19 | 'host': '127.0.0.1', 20 | 'port': 6379, 21 | 'db': 0, 22 | 'password': None 23 | }, 24 | } 25 | 26 | Cassandra Settings 27 | ****************** 28 | 29 | **STREAM_CASSANDRA_HOSTS** 30 | 31 | The list of nodes that are part of the cassandra cluster. 32 | 33 | .. note:: You dont need to put every node of the cluster, cassandra-driver has built-in node discovery 34 | 35 | Defaults to ``['localhost']`` 36 | 37 | **STREAM_DEFAULT_KEYSPACE** 38 | 39 | The cassandra keyspace where feed data is stored 40 | 41 | Defaults to ``stream_framework`` 42 | 43 | **STREAM_CASSANDRA_CONSISTENCY_LEVEL** 44 | 45 | The consistency level used for both reads and writes to the cassandra cluster. 46 | 47 | Defaults to ``cassandra.ConsistencyLevel.ONE`` 48 | 49 | **CASSANDRA_DRIVER_KWARGS** 50 | 51 | Extra keyword arguments sent to cassandra driver (see http://datastax.github.io/python-driver/_modules/cassandra/cluster.html#Cluster) 52 | 53 | Defaults to ``{}`` 54 | 55 | 56 | Metric Settings 57 | *************** 58 | 59 | **STREAM_METRIC_CLASS** 60 | 61 | The metric class that will be used to collect feeds metrics. 62 | 63 | .. note:: The default metric class is not collecting any metric and should be used as example for subclasses 64 | 65 | Defaults to ``stream_framework.metrics.base.Metrics`` 66 | 67 | **STREAM_METRICS_OPTIONS** 68 | 69 | A dictionary with options to send to the metric class at initialisation time. 70 | 71 | Defaults to ``{}`` 72 | -------------------------------------------------------------------------------- /docs/stream_framework.aggregators.rst: -------------------------------------------------------------------------------- 1 | aggregators Package 2 | =================== 3 | 4 | :mod:`base` Module 5 | ------------------ 6 | 7 | .. automodule:: stream_framework.aggregators.base 8 | :members: 9 | :undoc-members: 10 | :show-inheritance: 11 | 12 | -------------------------------------------------------------------------------- /docs/stream_framework.feed_managers.rst: -------------------------------------------------------------------------------- 1 | feed_managers Package 2 | ===================== 3 | 4 | :mod:`base` Module 5 | ------------------ 6 | 7 | .. automodule:: stream_framework.feed_managers.base 8 | :members: 9 | :undoc-members: 10 | :show-inheritance: 11 | 12 | -------------------------------------------------------------------------------- /docs/stream_framework.feeds.aggregated_feed.rst: -------------------------------------------------------------------------------- 1 | aggregated_feed Package 2 | ======================= 3 | 4 | :mod:`aggregated_feed` Package 5 | ------------------------------ 6 | 7 | .. automodule:: stream_framework.feeds.aggregated_feed 8 | :members: 9 | :undoc-members: 10 | :show-inheritance: 11 | 12 | :mod:`base` Module 13 | ------------------ 14 | 15 | .. automodule:: stream_framework.feeds.aggregated_feed.base 16 | :members: 17 | :undoc-members: 18 | :show-inheritance: 19 | 20 | :mod:`cassandra` Module 21 | ----------------------- 22 | 23 | .. automodule:: stream_framework.feeds.aggregated_feed.cassandra 24 | :members: 25 | :undoc-members: 26 | :show-inheritance: 27 | 28 | :mod:`redis` Module 29 | ------------------- 30 | 31 | .. automodule:: stream_framework.feeds.aggregated_feed.redis 32 | :members: 33 | :undoc-members: 34 | :show-inheritance: 35 | 36 | :mod:`notification_feed` Module 37 | ------------------------------- 38 | 39 | .. automodule:: stream_framework.feeds.aggregated_feed.notification_feed 40 | :members: 41 | :undoc-members: 42 | :show-inheritance: 43 | 44 | -------------------------------------------------------------------------------- /docs/stream_framework.feeds.rst: -------------------------------------------------------------------------------- 1 | feeds Package 2 | ============= 3 | 4 | :mod:`base` Module 5 | ------------------ 6 | 7 | .. automodule:: stream_framework.feeds.base 8 | :members: 9 | :undoc-members: 10 | :show-inheritance: 11 | 12 | :mod:`cassandra` Module 13 | ----------------------- 14 | 15 | .. automodule:: stream_framework.feeds.cassandra 16 | :members: 17 | :undoc-members: 18 | :show-inheritance: 19 | 20 | :mod:`memory` Module 21 | -------------------- 22 | 23 | .. automodule:: stream_framework.feeds.memory 24 | :members: 25 | :undoc-members: 26 | :show-inheritance: 27 | 28 | :mod:`redis` Module 29 | ------------------- 30 | 31 | .. automodule:: stream_framework.feeds.redis 32 | :members: 33 | :undoc-members: 34 | :show-inheritance: 35 | 36 | Subpackages 37 | ----------- 38 | 39 | .. toctree:: 40 | 41 | stream_framework.feeds.aggregated_feed 42 | 43 | -------------------------------------------------------------------------------- /docs/stream_framework.rst: -------------------------------------------------------------------------------- 1 | Stream Framework API Docs 2 | =============== 3 | 4 | :mod:`stream_framework` Package 5 | --------------------- 6 | 7 | .. automodule:: stream_framework 8 | :members: 9 | :undoc-members: 10 | :show-inheritance: 11 | 12 | :mod:`activity` Module 13 | ---------------------- 14 | 15 | .. automodule:: stream_framework.activity 16 | :members: 17 | :undoc-members: 18 | :show-inheritance: 19 | 20 | 21 | :mod:`default_settings` Module 22 | ------------------------------ 23 | 24 | .. automodule:: stream_framework.default_settings 25 | :members: 26 | :undoc-members: 27 | :show-inheritance: 28 | 29 | :mod:`exceptions` Module 30 | ------------------------ 31 | 32 | .. automodule:: stream_framework.exceptions 33 | :members: 34 | :undoc-members: 35 | :show-inheritance: 36 | 37 | :mod:`settings` Module 38 | ---------------------- 39 | 40 | .. automodule:: stream_framework.settings 41 | :members: 42 | :undoc-members: 43 | :show-inheritance: 44 | 45 | :mod:`tasks` Module 46 | ------------------- 47 | 48 | .. automodule:: stream_framework.tasks 49 | :members: 50 | :undoc-members: 51 | :show-inheritance: 52 | 53 | :mod:`utils` Module 54 | ------------------- 55 | 56 | .. automodule:: stream_framework.utils 57 | :members: 58 | :undoc-members: 59 | :show-inheritance: 60 | 61 | Subpackages 62 | ----------- 63 | 64 | .. toctree:: 65 | 66 | stream_framework.aggregators 67 | stream_framework.feed_managers 68 | stream_framework.feeds 69 | stream_framework.storage 70 | stream_framework.verbs 71 | 72 | -------------------------------------------------------------------------------- /docs/stream_framework.storage.cassandra.rst: -------------------------------------------------------------------------------- 1 | cassandra Package 2 | ================= 3 | 4 | :mod:`cassandra` Package 5 | ------------------------ 6 | 7 | .. automodule:: stream_framework.storage.cassandra 8 | :members: 9 | :undoc-members: 10 | :show-inheritance: 11 | 12 | :mod:`connection` Module 13 | ------------------------ 14 | 15 | .. automodule:: stream_framework.storage.cassandra.connection 16 | :members: 17 | :undoc-members: 18 | :show-inheritance: 19 | 20 | -------------------------------------------------------------------------------- /docs/stream_framework.storage.redis.rst: -------------------------------------------------------------------------------- 1 | redis Package 2 | ============= 3 | 4 | :mod:`activity_storage` Module 5 | ------------------------------ 6 | 7 | .. automodule:: stream_framework.storage.redis.activity_storage 8 | :members: 9 | :undoc-members: 10 | :show-inheritance: 11 | 12 | :mod:`connection` Module 13 | ------------------------ 14 | 15 | .. automodule:: stream_framework.storage.redis.connection 16 | :members: 17 | :undoc-members: 18 | :show-inheritance: 19 | 20 | :mod:`timeline_storage` Module 21 | ------------------------------ 22 | 23 | .. automodule:: stream_framework.storage.redis.timeline_storage 24 | :members: 25 | :undoc-members: 26 | :show-inheritance: 27 | 28 | Subpackages 29 | ----------- 30 | 31 | .. toctree:: 32 | 33 | stream_framework.storage.redis.structures 34 | 35 | -------------------------------------------------------------------------------- /docs/stream_framework.storage.redis.structures.rst: -------------------------------------------------------------------------------- 1 | structures Package 2 | ================== 3 | 4 | :mod:`base` Module 5 | ------------------ 6 | 7 | .. automodule:: stream_framework.storage.redis.structures.base 8 | :members: 9 | :undoc-members: 10 | :show-inheritance: 11 | 12 | :mod:`hash` Module 13 | ------------------ 14 | 15 | .. automodule:: stream_framework.storage.redis.structures.hash 16 | :members: 17 | :undoc-members: 18 | :show-inheritance: 19 | 20 | :mod:`list` Module 21 | ------------------ 22 | 23 | .. automodule:: stream_framework.storage.redis.structures.list 24 | :members: 25 | :undoc-members: 26 | :show-inheritance: 27 | 28 | :mod:`sorted_set` Module 29 | ------------------------ 30 | 31 | .. automodule:: stream_framework.storage.redis.structures.sorted_set 32 | :members: 33 | :undoc-members: 34 | :show-inheritance: 35 | 36 | -------------------------------------------------------------------------------- /docs/stream_framework.storage.rst: -------------------------------------------------------------------------------- 1 | storage Package 2 | =============== 3 | 4 | :mod:`base` Module 5 | ------------------ 6 | 7 | .. automodule:: stream_framework.storage.base 8 | :members: 9 | :undoc-members: 10 | :show-inheritance: 11 | 12 | :mod:`memory` Module 13 | -------------------- 14 | 15 | .. automodule:: stream_framework.storage.memory 16 | :members: 17 | :undoc-members: 18 | :show-inheritance: 19 | 20 | Subpackages 21 | ----------- 22 | 23 | .. toctree:: 24 | 25 | stream_framework.storage.cassandra 26 | stream_framework.storage.redis 27 | 28 | -------------------------------------------------------------------------------- /docs/stream_framework.verbs.rst: -------------------------------------------------------------------------------- 1 | verbs Package 2 | ============= 3 | 4 | :mod:`verbs` Package 5 | -------------------- 6 | 7 | .. automodule:: stream_framework.verbs 8 | :members: 9 | :undoc-members: 10 | :show-inheritance: 11 | 12 | :mod:`base` Module 13 | ------------------ 14 | 15 | .. automodule:: stream_framework.verbs.base 16 | :members: 17 | :undoc-members: 18 | :show-inheritance: 19 | 20 | -------------------------------------------------------------------------------- /docs/support.rst: -------------------------------------------------------------------------------- 1 | Support 2 | ======= 3 | 4 | If you need help you can try IRC or the mailing list. 5 | Issues can be reported on Github. 6 | 7 | - `IRC `__ (irc.freenode.net, 8 | #feedly-python) 9 | - `Mailing List `__ 10 | - `Bug Tracker `__ 11 | 12 | -------------------------------------------------------------------------------- /docs/testing.rst: -------------------------------------------------------------------------------- 1 | Testing Stream Framework 2 | ======================== 3 | 4 | .. warning:: We strongly suggest against running tests on a machine that is hosting redis or cassandra production data! 5 | 6 | In order to test Stream Framework you need to install its test requirements with 7 | 8 | .. code-block:: bash 9 | 10 | python setup.py test 11 | 12 | or if you want more control on the test run you can use py.test entry point directly ( assuming you are in stream_framework dir ) 13 | 14 | .. code-block:: bash 15 | 16 | py.test stream_framework/tests 17 | 18 | 19 | The test suite connects to Redis on 127.0.0.1:6379 and to a Cassandra node on 127.0.0.1 using the native protocol. 20 | 21 | The easiest way to run a cassandra test cluster is using the awesome `ccm package `_ 22 | 23 | If you are not running a cassandra node on localhost you can specify a different address with the `TEST_CASSANDRA_HOST` environment variable 24 | 25 | Every commit is built on Travis CI, you can see the current state and the build history `here `_. 26 | 27 | If you intend to contribute we suggest you to install pytest's coverage plugin, this way you can make sure your code changes 28 | run during tests. 29 | -------------------------------------------------------------------------------- /docs/verbs.rst: -------------------------------------------------------------------------------- 1 | Verbs 2 | ===== 3 | 4 | 5 | Adding new verbs 6 | **************** 7 | 8 | Registering a new verb is quite easy. 9 | Just subclass the Verb class and give it a unique id. 10 | 11 | :: 12 | 13 | 14 | from stream_framework.verbs import register 15 | from stream_framework.verbs.base import Verb 16 | 17 | 18 | class Pin(Verb): 19 | id = 5 20 | infinitive = 'pin' 21 | past_tense = 'pinned' 22 | 23 | register(Pin) 24 | 25 | .. seealso:: Make sure your verbs are registered before you read data from stream_framework, if you use django 26 | you can just define/import them in models.py to make sure they are loaded early 27 | 28 | 29 | 30 | Getting verbs 31 | ************* 32 | 33 | You can retrieve verbs by calling get_verb_by_id. 34 | 35 | :: 36 | 37 | from stream_framework.verbs import get_verb_by_id 38 | 39 | pin_verb = get_verb_by_id(5) -------------------------------------------------------------------------------- /fabfile.py: -------------------------------------------------------------------------------- 1 | from fabric.api import local, cd 2 | import os 3 | PROJECT_ROOT = os.path.abspath(os.path.dirname(__file__)) 4 | 5 | 6 | def publish(test='yes'): 7 | ''' 8 | Easy publishing of my nice open source project 9 | ''' 10 | if test == 'yes': 11 | validate() 12 | 13 | from stream_framework import __version__ 14 | tag_name = 'v%s' % __version__ 15 | local('python setup.py sdist upload') 16 | 17 | local('git tag %s' % tag_name) 18 | local('git push origin --tags') 19 | 20 | 21 | def validate(): 22 | with cd(PROJECT_ROOT): 23 | local('pep8 --exclude=migrations --ignore=E501,E225,W293 stream_framework') 24 | # local('pyflakes -x W stream_framework') 25 | local( 26 | 'py.test -sl --tb=short --cov coveralls --cov-report html --cov stream_framework stream_framework/tests') 27 | 28 | 29 | def clean(): 30 | # all dirs which contain python code 31 | python_dirs = [] 32 | for root, dirs, files in os.walk(PROJECT_ROOT): 33 | python_dir = any(f.endswith('.py') for f in files) 34 | if python_dir: 35 | python_dirs.append(root) 36 | for d in python_dirs: 37 | local('bash -c "autopep8 -i %s/*.py"' % d) 38 | 39 | 40 | def docs(): 41 | local('DJANGO_SETTINGS_MODULE=stream_framework.tests.settings pandoc -s -w rst README.md -o docs/readme.rst') 42 | local('sphinx-build -Eav docs html') 43 | -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | addopts = --doctest-modules 3 | python_files = *.py 4 | DJANGO_SETTINGS_MODULE = stream_framework.tests.settings -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from io import open 4 | from setuptools import setup, find_packages 5 | from setuptools.command.test import test as TestCommand 6 | from stream_framework import __version__, __maintainer__, __email__ 7 | import sys 8 | 9 | long_description = open('README.md', encoding="utf-8").read() 10 | 11 | tests_require = [ 12 | 'Django>=1.3', 13 | 'mock', 14 | 'pep8', 15 | 'unittest2', 16 | 'pytest', 17 | ] 18 | 19 | install_requires = [ 20 | 'celery>=3.0.0', 21 | 'six' 22 | ] 23 | 24 | extras_require = { 25 | 'test': tests_require, 26 | 'redis': ['redis>=2.8.0'], 27 | 'cassandra': ['cassandra-driver>=2.7.2'], 28 | } 29 | 30 | class PyTest(TestCommand): 31 | 32 | def finalize_options(self): 33 | TestCommand.finalize_options(self) 34 | self.test_args = [] 35 | self.test_suite = True 36 | 37 | def run_tests(self): 38 | # import here, cause outside the eggs aren't loaded 39 | import pytest 40 | errno = pytest.main(self.test_args) 41 | sys.exit(errno) 42 | 43 | setup( 44 | name='stream_framework', 45 | version=__version__, 46 | author=__maintainer__, 47 | author_email=__email__, 48 | url='https://github.com/tschellenbach/Stream-Framework/', 49 | description='Stream Framework allows you to build complex feed and caching structures using Redis.', 50 | long_description=long_description, 51 | packages=find_packages(), 52 | zip_safe=False, 53 | install_requires=install_requires, 54 | extras_require=extras_require, 55 | cmdclass={'test': PyTest}, 56 | tests_require=tests_require, 57 | include_package_data=True, 58 | classifiers=[ 59 | 'Intended Audience :: Developers', 60 | 'Intended Audience :: System Administrators', 61 | 'Operating System :: OS Independent', 62 | 'Topic :: Software Development', 63 | 'Development Status :: 5 - Production/Stable', 64 | 'License :: OSI Approved :: GNU General Public License (GPL)', 65 | 'Natural Language :: English', 66 | 'Programming Language :: Python', 67 | 'Programming Language :: Python :: 2.7', 68 | 'Programming Language :: Python :: 3.4', 69 | 'Topic :: Scientific/Engineering :: Mathematics', 70 | 'Topic :: Software Development :: Libraries :: Python Modules', 71 | 'Framework :: Django' 72 | ], 73 | ) 74 | -------------------------------------------------------------------------------- /stream_framework/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'Thierry Schellenbach' 2 | __copyright__ = 'Copyright 2012-2014, Thierry Schellenbach' 3 | __credits__ = ['Thierry Schellenbach, mellowmorning.com, @tschellenbach'] 4 | 5 | 6 | __license__ = 'BSD' 7 | __version__ = '1.4.0' 8 | __maintainer__ = 'Thierry Schellenbach' 9 | __email__ = 'thierryschellenbach@gmail.com' 10 | __status__ = 'Production' 11 | -------------------------------------------------------------------------------- /stream_framework/aggregators/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/llSourcell/Stream-Framework/e7535857af03c81c760b9265568287816ecac13c/stream_framework/aggregators/__init__.py -------------------------------------------------------------------------------- /stream_framework/aggregators/base.py: -------------------------------------------------------------------------------- 1 | from stream_framework.activity import AggregatedActivity, Activity 2 | from copy import deepcopy 3 | from stream_framework.exceptions import DuplicateActivityException 4 | 5 | 6 | class BaseAggregator(object): 7 | 8 | ''' 9 | Aggregators implement the combining of multiple activities into aggregated activities. 10 | 11 | The two most important methods are 12 | aggregate and merge 13 | 14 | Aggregate takes a list of activities and turns it into a list of aggregated activities 15 | 16 | Merge takes two lists of aggregated activities and returns a list of new and changed aggregated activities 17 | ''' 18 | 19 | aggregated_activity_class = AggregatedActivity 20 | activity_class = Activity 21 | 22 | def __init__(self, aggregated_activity_class=None, activity_class=None): 23 | ''' 24 | :param aggregated_activity_class: the class which we should use 25 | for returning the aggregated activities 26 | ''' 27 | if aggregated_activity_class is not None: 28 | self.aggregated_activity_class = aggregated_activity_class 29 | if activity_class is not None: 30 | self.activity_class = activity_class 31 | 32 | def aggregate(self, activities): 33 | ''' 34 | 35 | :param activties: A list of activities 36 | :returns list: A list of aggregated activities 37 | 38 | Runs the group activities (using get group) 39 | Ranks them using the giving ranking function 40 | And returns the sorted activities 41 | 42 | **Example** :: 43 | 44 | aggregator = ModulusAggregator() 45 | activities = [Activity(1), Activity(2)] 46 | aggregated_activities = aggregator.aggregate(activities) 47 | 48 | ''' 49 | aggregate_dict = self.group_activities(activities) 50 | aggregated_activities = list(aggregate_dict.values()) 51 | ranked_aggregates = self.rank(aggregated_activities) 52 | return ranked_aggregates 53 | 54 | def merge(self, aggregated, activities): 55 | ''' 56 | :param aggregated: A list of aggregated activities 57 | :param activities: A list of the new activities 58 | :returns tuple: Returns new, changed 59 | 60 | Merges two lists of aggregated activities and returns the new aggregated 61 | activities and a from, to mapping of the changed aggregated activities 62 | 63 | **Example** :: 64 | 65 | aggregator = ModulusAggregator() 66 | activities = [Activity(1), Activity(2)] 67 | aggregated_activities = aggregator.aggregate(activities) 68 | activities = [Activity(3), Activity(4)] 69 | new, changed = aggregator.merge(aggregated_activities, activities) 70 | for activity in new: 71 | print activity 72 | 73 | for from, to in changed: 74 | print 'changed from %s to %s' % (from, to) 75 | 76 | ''' 77 | current_activities_dict = dict([(a.group, a) for a in aggregated]) 78 | new = [] 79 | changed = [] 80 | new_aggregated = self.aggregate(activities) 81 | for aggregated in new_aggregated: 82 | if aggregated.group not in current_activities_dict: 83 | new.append(aggregated) 84 | else: 85 | current_aggregated = current_activities_dict.get( 86 | aggregated.group) 87 | new_aggregated = deepcopy(current_aggregated) 88 | for activity in aggregated.activities: 89 | try: 90 | new_aggregated.append(activity) 91 | except DuplicateActivityException: 92 | pass 93 | if current_aggregated.activities != new_aggregated.activities: 94 | changed.append((current_aggregated, new_aggregated)) 95 | return new, changed, [] 96 | 97 | def group_activities(self, activities): 98 | ''' 99 | Groups the activities based on their group 100 | Found by running get_group(actvity on them) 101 | ''' 102 | aggregate_dict = dict() 103 | # make sure that if we aggregated multiple activities 104 | # they end up in serialization_id desc in the aggregated activity 105 | activities = list(activities) 106 | activities.sort() 107 | for activity in activities: 108 | group = self.get_group(activity) 109 | if group not in aggregate_dict: 110 | aggregate_dict[group] = self.aggregated_activity_class(group) 111 | aggregate_dict[group].append(activity) 112 | 113 | return aggregate_dict 114 | 115 | def get_group(self, activity): 116 | ''' 117 | Returns a group to stick this activity in 118 | ''' 119 | raise ValueError('not implemented') 120 | 121 | def rank(self, aggregated_activities): 122 | ''' 123 | The ranking logic, for sorting aggregated activities 124 | ''' 125 | raise ValueError('not implemented') 126 | 127 | 128 | class RecentRankMixin(object): 129 | 130 | ''' 131 | Most recently updated aggregated activities are ranked first. 132 | ''' 133 | 134 | def rank(self, aggregated_activities): 135 | ''' 136 | The ranking logic, for sorting aggregated activities 137 | ''' 138 | aggregated_activities.sort(key=lambda a: a.updated_at, reverse=True) 139 | return aggregated_activities 140 | 141 | 142 | class RecentVerbAggregator(RecentRankMixin, BaseAggregator): 143 | 144 | ''' 145 | Aggregates based on the same verb and same time period 146 | ''' 147 | 148 | def get_group(self, activity): 149 | ''' 150 | Returns a group based on the day and verb 151 | ''' 152 | verb = activity.verb.id 153 | date = activity.time.date() 154 | group = '%s-%s' % (verb, date) 155 | return group 156 | 157 | 158 | class NotificationAggregator(RecentRankMixin, BaseAggregator): 159 | 160 | ''' 161 | Aggregates based on the same verb, object and day 162 | ''' 163 | 164 | def get_group(self, activity): 165 | ''' 166 | Returns a group based on the verb, object and day 167 | ''' 168 | verb = activity.verb.id 169 | object_id = activity.object_id 170 | date = activity.time.date() 171 | group = '%s-%s-%s' % (verb, object_id, date) 172 | return group 173 | -------------------------------------------------------------------------------- /stream_framework/conftest.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | 4 | @pytest.fixture(autouse=True) 5 | def celery_eager(): 6 | from celery import current_app 7 | current_app.conf.CELERY_ALWAYS_EAGER = True 8 | current_app.conf.CELERY_EAGER_PROPAGATES_EXCEPTIONS = True 9 | 10 | 11 | @pytest.fixture 12 | def redis_reset(): 13 | import redis 14 | redis.Redis().flushall() 15 | 16 | 17 | @pytest.fixture 18 | def cassandra_reset(): 19 | from stream_framework.feeds.cassandra import CassandraFeed 20 | from stream_framework.feeds.aggregated_feed.cassandra import CassandraAggregatedFeed 21 | from cassandra.cqlengine.management import create_keyspace_simple, sync_table 22 | from stream_framework import settings 23 | create_keyspace_simple(settings.STREAM_DEFAULT_KEYSPACE, 1) 24 | aggregated_timeline = CassandraAggregatedFeed.get_timeline_storage() 25 | timeline = CassandraFeed.get_timeline_storage() 26 | sync_table(aggregated_timeline.model) 27 | sync_table(timeline.model) 28 | -------------------------------------------------------------------------------- /stream_framework/default_settings.py: -------------------------------------------------------------------------------- 1 | 2 | # : we recommend that you connect to Redis via Twemproxy 3 | STREAM_REDIS_CONFIG = { 4 | 'default': { 5 | 'host': '127.0.0.1', 6 | 'port': 6379, 7 | 'db': 0, 8 | 'password': None 9 | }, 10 | } 11 | 12 | STREAM_CASSANDRA_HOSTS = ['localhost'] 13 | 14 | STREAM_DEFAULT_KEYSPACE = 'stream_framework' 15 | 16 | STREAM_CASSANDRA_CONSISTENCY_LEVEL = None 17 | 18 | STREAM_CASSANDRA_READ_RETRY_ATTEMPTS = 1 19 | 20 | STREAM_CASSANDRA_WRITE_RETRY_ATTEMPTS = 1 21 | 22 | CASSANDRA_DRIVER_KWARGS = { 23 | 'protocol_version': 2 24 | } 25 | 26 | STREAM_METRIC_CLASS = 'stream_framework.metrics.base.Metrics' 27 | 28 | STREAM_METRICS_OPTIONS = {} 29 | 30 | STREAM_VERB_STORAGE = 'in-memory' 31 | 32 | try: 33 | from cassandra import ConsistencyLevel 34 | STREAM_CASSANDRA_CONSISTENCY_LEVEL = ConsistencyLevel.ONE 35 | except ImportError: 36 | pass 37 | -------------------------------------------------------------------------------- /stream_framework/exceptions.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | class SerializationException(Exception): 4 | 5 | ''' 6 | Raised when encountering invalid data for serialization 7 | ''' 8 | pass 9 | 10 | 11 | class DuplicateActivityException(Exception): 12 | 13 | ''' 14 | Raised when someone sticks a duplicate activity in the aggregated activity 15 | ''' 16 | pass 17 | 18 | 19 | class ActivityNotFound(Exception): 20 | 21 | ''' 22 | Raised when the activity is not present in the aggregated Activity 23 | ''' 24 | pass 25 | -------------------------------------------------------------------------------- /stream_framework/feed_managers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/llSourcell/Stream-Framework/e7535857af03c81c760b9265568287816ecac13c/stream_framework/feed_managers/__init__.py -------------------------------------------------------------------------------- /stream_framework/feeds/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/llSourcell/Stream-Framework/e7535857af03c81c760b9265568287816ecac13c/stream_framework/feeds/__init__.py -------------------------------------------------------------------------------- /stream_framework/feeds/aggregated_feed/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/llSourcell/Stream-Framework/e7535857af03c81c760b9265568287816ecac13c/stream_framework/feeds/aggregated_feed/__init__.py -------------------------------------------------------------------------------- /stream_framework/feeds/aggregated_feed/cassandra.py: -------------------------------------------------------------------------------- 1 | from stream_framework.feeds.aggregated_feed.base import AggregatedFeed 2 | from stream_framework.feeds.cassandra import CassandraFeed 3 | from stream_framework.serializers.cassandra.aggregated_activity_serializer import \ 4 | CassandraAggregatedActivitySerializer 5 | from stream_framework.storage.cassandra.activity_storage import CassandraActivityStorage 6 | from stream_framework.storage.cassandra import models 7 | 8 | 9 | class CassandraAggregatedFeed(AggregatedFeed, CassandraFeed): 10 | activity_storage_class = CassandraActivityStorage 11 | timeline_serializer = CassandraAggregatedActivitySerializer 12 | timeline_cf_name = 'aggregated' 13 | timeline_model = models.AggregatedActivity 14 | -------------------------------------------------------------------------------- /stream_framework/feeds/aggregated_feed/notification_feed.py: -------------------------------------------------------------------------------- 1 | from stream_framework.feeds.aggregated_feed.base import AggregatedFeed 2 | from stream_framework.serializers.aggregated_activity_serializer import \ 3 | NotificationSerializer 4 | from stream_framework.storage.redis.timeline_storage import RedisTimelineStorage 5 | import copy 6 | import json 7 | import logging 8 | import warnings 9 | 10 | logger = logging.getLogger(__name__) 11 | 12 | MODULE_IS_DEPRECATED = """ 13 | Module stream_framework.feeds.aggregated_feed.notification_feed is deprecated. 14 | Please use stream_framework.feeds.notification_feed module. 15 | 16 | Class stream_framework.feeds.aggregated_feed.notification_feed.RedisNotificationFeed 17 | is replaced by stream_framework.feeds.notification_feed.redis.RedisNotificationFeed 18 | """ 19 | 20 | warnings.warn(MODULE_IS_DEPRECATED, DeprecationWarning) 21 | 22 | 23 | class NotificationFeed(AggregatedFeed): 24 | 25 | ''' 26 | Similar to an aggregated feed, but: 27 | - doesnt use the activity storage (serializes everything into the timeline storage) 28 | - features denormalized counts 29 | - pubsub signals which you can subscribe to 30 | For now this is entirely tied to Redis 31 | ''' 32 | #: notification feeds only need a small max length 33 | max_length = 99 34 | key_format = 'notification_feed:1:user:%(user_id)s' 35 | #: the format we use to denormalize the count 36 | count_format = 'notification_feed:1:user:%(user_id)s:count' 37 | #: the key used for locking 38 | lock_format = 'notification_feed:1:user:%s:lock' 39 | #: the main channel to publish 40 | pubsub_main_channel = 'juggernaut' 41 | 42 | timeline_serializer = NotificationSerializer 43 | activity_storage_class = None 44 | activity_serializer = None 45 | 46 | def __init__(self, user_id, **kwargs): 47 | ''' 48 | User id (the user for which we want to read/write notifications) 49 | ''' 50 | AggregatedFeed.__init__(self, user_id, **kwargs) 51 | 52 | # location to which we denormalize the count 53 | self.format_dict = dict(user_id=user_id) 54 | self.count_key = self.count_format % self.format_dict 55 | # set the pubsub key if we're using it 56 | self.pubsub_key = user_id 57 | self.lock_key = self.lock_format % self.format_dict 58 | from stream_framework.storage.redis.connection import get_redis_connection 59 | self.redis = get_redis_connection() 60 | 61 | def add_many(self, activities, **kwargs): 62 | ''' 63 | Similar to the AggregatedActivity.add_many 64 | The only difference is that it denormalizes a count of unseen activities 65 | ''' 66 | with self.redis.lock(self.lock_key, timeout=2): 67 | current_activities = AggregatedFeed.add_many( 68 | self, activities, **kwargs) 69 | # denormalize the count 70 | self.denormalize_count() 71 | # return the current state of the notification feed 72 | return current_activities 73 | 74 | def get_denormalized_count(self): 75 | ''' 76 | Returns the denormalized count stored in self.count_key 77 | ''' 78 | result = self.redis.get(self.count_key) or 0 79 | result = int(result) 80 | return result 81 | 82 | def set_denormalized_count(self, count): 83 | ''' 84 | Updates the denormalized count to count 85 | 86 | :param count: the count to update to 87 | ''' 88 | self.redis.set(self.count_key, count) 89 | self.publish_count(count) 90 | 91 | def publish_count(self, count): 92 | ''' 93 | Published the count via pubsub 94 | 95 | :param count: the count to publish 96 | ''' 97 | count_dict = dict(unread_count=count, unseen_count=count) 98 | count_data = json.dumps(count_dict) 99 | data = {'channel': self.pubsub_key, 'data': count_data} 100 | encoded_data = json.dumps(data) 101 | self.redis.publish(self.pubsub_main_channel, encoded_data) 102 | 103 | def denormalize_count(self): 104 | ''' 105 | Denormalize the number of unseen aggregated activities to the key 106 | defined in self.count_key 107 | ''' 108 | # now count the number of unseen 109 | count = self.count_unseen() 110 | # and update the count if it changed 111 | stored_count = self.get_denormalized_count() 112 | if stored_count != count: 113 | self.set_denormalized_count(count) 114 | return count 115 | 116 | def count_unseen(self, aggregated_activities=None): 117 | ''' 118 | Counts the number of aggregated activities which are unseen 119 | 120 | :param aggregated_activities: allows you to specify the aggregated 121 | activities for improved performance 122 | ''' 123 | count = 0 124 | if aggregated_activities is None: 125 | aggregated_activities = self[:self.max_length] 126 | for aggregated in aggregated_activities: 127 | if not aggregated.is_seen(): 128 | count += 1 129 | return count 130 | 131 | def mark_all(self, seen=True, read=None): 132 | ''' 133 | Mark all the entries as seen or read 134 | 135 | :param seen: set seen_at 136 | :param read: set read_at 137 | ''' 138 | with self.redis.lock(self.lock_key, timeout=10): 139 | # get the current aggregated activities 140 | aggregated_activities = self[:self.max_length] 141 | # create the update dict 142 | update_dict = {} 143 | 144 | for aggregated_activity in aggregated_activities: 145 | changed = False 146 | old_activity = copy.deepcopy(aggregated_activity) 147 | if seen is True and not aggregated_activity.is_seen(): 148 | aggregated_activity.update_seen_at() 149 | changed = True 150 | if read is True and not aggregated_activity.is_read(): 151 | aggregated_activity.update_read_at() 152 | changed = True 153 | 154 | if changed: 155 | update_dict[old_activity] = aggregated_activity 156 | 157 | # send the diff to the storage layer 158 | new, deleted = [], [] 159 | changed = update_dict.items() 160 | self._update_from_diff(new, changed, deleted) 161 | 162 | # denormalize the count 163 | self.denormalize_count() 164 | 165 | # return the new activities 166 | return aggregated_activities 167 | 168 | 169 | class RedisNotificationFeed(NotificationFeed): 170 | timeline_storage_class = RedisTimelineStorage 171 | -------------------------------------------------------------------------------- /stream_framework/feeds/aggregated_feed/redis.py: -------------------------------------------------------------------------------- 1 | from stream_framework.feeds.aggregated_feed.base import AggregatedFeed 2 | from stream_framework.storage.redis.activity_storage import RedisActivityStorage 3 | from stream_framework.storage.redis.timeline_storage import RedisTimelineStorage 4 | from stream_framework.serializers.aggregated_activity_serializer import AggregatedActivitySerializer 5 | from stream_framework.serializers.activity_serializer import ActivitySerializer 6 | 7 | 8 | class RedisAggregatedFeed(AggregatedFeed): 9 | timeline_serializer = AggregatedActivitySerializer 10 | activity_serializer = ActivitySerializer 11 | timeline_storage_class = RedisTimelineStorage 12 | activity_storage_class = RedisActivityStorage 13 | -------------------------------------------------------------------------------- /stream_framework/feeds/cassandra.py: -------------------------------------------------------------------------------- 1 | from stream_framework import settings 2 | from stream_framework.feeds.base import BaseFeed 3 | from stream_framework.storage.cassandra.activity_storage import CassandraActivityStorage 4 | from stream_framework.storage.cassandra.timeline_storage import CassandraTimelineStorage 5 | from stream_framework.serializers.cassandra.activity_serializer import CassandraActivitySerializer 6 | from stream_framework.storage.cassandra import models 7 | 8 | 9 | class CassandraFeed(BaseFeed): 10 | 11 | """ 12 | Apache Cassandra feed implementation 13 | 14 | This implementation does not store activities in a 15 | denormalized fashion 16 | 17 | Activities are stored completely in the timeline storage 18 | 19 | """ 20 | 21 | activity_storage_class = CassandraActivityStorage 22 | timeline_storage_class = CassandraTimelineStorage 23 | timeline_serializer = CassandraActivitySerializer 24 | timeline_model = models.Activity 25 | 26 | # ; the name of the column family 27 | timeline_cf_name = 'example' 28 | 29 | @classmethod 30 | def get_timeline_storage_options(cls): 31 | ''' 32 | Returns the options for the timeline storage 33 | ''' 34 | options = super(CassandraFeed, cls).get_timeline_storage_options() 35 | options['modelClass'] = cls.timeline_model 36 | options['hosts'] = settings.STREAM_CASSANDRA_HOSTS 37 | options['column_family_name'] = cls.timeline_cf_name 38 | return options 39 | 40 | # : clarify that this feed supports filtering and ordering 41 | filtering_supported = True 42 | ordering_supported = True 43 | -------------------------------------------------------------------------------- /stream_framework/feeds/memory.py: -------------------------------------------------------------------------------- 1 | from stream_framework.feeds.base import BaseFeed 2 | from stream_framework.storage.memory import InMemoryActivityStorage 3 | from stream_framework.storage.memory import InMemoryTimelineStorage 4 | 5 | 6 | class Feed(BaseFeed): 7 | timeline_storage_class = InMemoryTimelineStorage 8 | activity_storage_class = InMemoryActivityStorage 9 | -------------------------------------------------------------------------------- /stream_framework/feeds/notification_feed/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/llSourcell/Stream-Framework/e7535857af03c81c760b9265568287816ecac13c/stream_framework/feeds/notification_feed/__init__.py -------------------------------------------------------------------------------- /stream_framework/feeds/notification_feed/redis.py: -------------------------------------------------------------------------------- 1 | from stream_framework.feeds.notification_feed.base import BaseNotificationFeed 2 | from stream_framework.storage.redis.lists_storage import RedisListsStorage 3 | from stream_framework.storage.redis.timeline_storage import RedisTimelineStorage 4 | 5 | 6 | class RedisNotificationFeed(BaseNotificationFeed): 7 | 8 | markers_storage_class = RedisListsStorage 9 | timeline_storage_class = RedisTimelineStorage 10 | -------------------------------------------------------------------------------- /stream_framework/feeds/redis.py: -------------------------------------------------------------------------------- 1 | from stream_framework.feeds.base import BaseFeed 2 | from stream_framework.storage.redis.activity_storage import RedisActivityStorage 3 | from stream_framework.storage.redis.timeline_storage import RedisTimelineStorage 4 | from stream_framework.serializers.activity_serializer import ActivitySerializer 5 | 6 | 7 | class RedisFeed(BaseFeed): 8 | timeline_storage_class = RedisTimelineStorage 9 | activity_storage_class = RedisActivityStorage 10 | 11 | activity_serializer = ActivitySerializer 12 | 13 | # : allow you point to a different redis server as specified in 14 | # : settings.STREAM_REDIS_CONFIG 15 | redis_server = 'default' 16 | 17 | @classmethod 18 | def get_timeline_storage_options(cls): 19 | ''' 20 | Returns the options for the timeline storage 21 | ''' 22 | options = super(RedisFeed, cls).get_timeline_storage_options() 23 | options['redis_server'] = cls.redis_server 24 | return options 25 | 26 | # : clarify that this feed supports filtering and ordering 27 | filtering_supported = True 28 | ordering_supported = True 29 | -------------------------------------------------------------------------------- /stream_framework/metrics/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/llSourcell/Stream-Framework/e7535857af03c81c760b9265568287816ecac13c/stream_framework/metrics/__init__.py -------------------------------------------------------------------------------- /stream_framework/metrics/base.py: -------------------------------------------------------------------------------- 1 | class NoopTimer(object): 2 | 3 | def __enter__(self): 4 | pass 5 | 6 | def __exit__(self, *args, **kwds): 7 | pass 8 | 9 | 10 | class Metrics(object): 11 | 12 | def __init__(self, *args, **kwargs): 13 | pass 14 | 15 | def fanout_timer(self, feed_class): 16 | return NoopTimer() 17 | 18 | def feed_reads_timer(self, feed_class): 19 | return NoopTimer() 20 | 21 | def on_feed_read(self, feed_class, activities_count): 22 | pass 23 | 24 | def on_feed_remove(self, feed_class, activities_count): 25 | pass 26 | 27 | def on_feed_write(self, feed_class, activities_count): 28 | pass 29 | 30 | def on_fanout(self, feed_class, operation, activities_count=1): 31 | pass 32 | 33 | def on_activity_published(self): 34 | pass 35 | 36 | def on_activity_removed(self): 37 | pass 38 | -------------------------------------------------------------------------------- /stream_framework/metrics/python_statsd.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from stream_framework.metrics.base import Metrics 3 | import statsd 4 | 5 | 6 | class Timer(object): 7 | 8 | def __init__(self, metric_name): 9 | self.metric_name = metric_name 10 | 11 | def __enter__(self): 12 | self.timer = statsd.Timer(self.metric_name) 13 | self.timer.start() 14 | 15 | def __exit__(self, *args, **kwds): 16 | self.timer.stop() 17 | 18 | 19 | class StatsdMetrics(Metrics): 20 | 21 | def __init__(self, host='localhost', port=8125, prefix='stream_framework'): 22 | statsd.Connection.set_defaults(host=host, port=port) 23 | self.prefix = prefix 24 | 25 | def fanout_timer(self, feed_class): 26 | return Timer('%s.%s.fanout_latency' % (self.prefix, feed_class.__name__)) 27 | 28 | def feed_reads_timer(self, feed_class): 29 | return Timer('%s.%s.read_latency' % (self.prefix, feed_class.__name__)) 30 | 31 | def on_feed_read(self, feed_class, activities_count): 32 | counter = statsd.Counter( 33 | '%s.%s.reads' % (self.prefix, feed_class.__name__)) 34 | counter += activities_count 35 | 36 | def on_feed_write(self, feed_class, activities_count): 37 | counter = statsd.Counter( 38 | '%s.%s.writes' % (self.prefix, feed_class.__name__)) 39 | counter += activities_count 40 | 41 | def on_feed_remove(self, feed_class, activities_count): 42 | counter = statsd.Counter( 43 | '%s.%s.deletes' % (self.prefix, feed_class.__name__)) 44 | counter += activities_count 45 | 46 | def on_fanout(self, feed_class, operation, activities_count=1): 47 | metric = (self.prefix, feed_class.__name__, operation.__name__) 48 | counter = statsd.Counter('%s.%s.fanout.%s' % metric) 49 | counter += activities_count 50 | 51 | def on_activity_published(self): 52 | counter = statsd.Counter('%s.activities.published' % self.prefix) 53 | counter += 1 54 | 55 | def on_activity_removed(self): 56 | counter = statsd.Counter('%s.activities.removed' % self.prefix) 57 | counter += 1 58 | -------------------------------------------------------------------------------- /stream_framework/metrics/statsd.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from stream_framework.metrics.base import Metrics 3 | from statsd import StatsClient 4 | 5 | 6 | class StatsdMetrics(Metrics): 7 | 8 | def __init__(self, host='localhost', port=8125, prefix=None): 9 | self.statsd = StatsClient(host, port, prefix) 10 | 11 | def fanout_timer(self, feed_class): 12 | return self.statsd.timer('%s.fanout_latency' % feed_class.__name__) 13 | 14 | def feed_reads_timer(self, feed_class): 15 | return self.statsd.timer('%s.read_latency' % feed_class.__name__) 16 | 17 | def on_feed_read(self, feed_class, activities_count): 18 | self.statsd.incr('%s.reads' % feed_class.__name__, activities_count) 19 | 20 | def on_feed_write(self, feed_class, activities_count): 21 | self.statsd.incr('%s.writes' % feed_class.__name__, activities_count) 22 | 23 | def on_feed_remove(self, feed_class, activities_count): 24 | self.statsd.incr('%s.deletes' % feed_class.__name__, activities_count) 25 | 26 | def on_fanout(self, feed_class, operation, activities_count=1): 27 | metric = (feed_class.__name__, operation.__name__) 28 | self.statsd.incr('%s.fanout.%s' % metric, activities_count) 29 | 30 | def on_activity_published(self): 31 | self.statsd.incr('activities.published') 32 | 33 | def on_activity_removed(self): 34 | self.statsd.incr('activities.removed') 35 | -------------------------------------------------------------------------------- /stream_framework/serializers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/llSourcell/Stream-Framework/e7535857af03c81c760b9265568287816ecac13c/stream_framework/serializers/__init__.py -------------------------------------------------------------------------------- /stream_framework/serializers/activity_serializer.py: -------------------------------------------------------------------------------- 1 | from stream_framework.serializers.base import BaseSerializer 2 | from stream_framework.utils import epoch_to_datetime, datetime_to_epoch 3 | from stream_framework.verbs import get_verb_by_id 4 | import six 5 | 6 | try: 7 | import cPickle as pickle 8 | except ImportError: 9 | import pickle 10 | 11 | class ActivitySerializer(BaseSerializer): 12 | 13 | ''' 14 | Serializer optimized for taking as little memory as possible to store an 15 | Activity 16 | 17 | Serialization consists of 5 parts 18 | - actor_id 19 | - verb_id 20 | - object_id 21 | - target_id 22 | - extra_context (pickle) 23 | 24 | None values are stored as 0 25 | ''' 26 | 27 | def dumps(self, activity): 28 | self.check_type(activity) 29 | # keep the milliseconds 30 | activity_time = '%.6f' % datetime_to_epoch(activity.time) 31 | parts = [activity.actor_id, activity.verb.id, 32 | activity.object_id, activity.target_id or 0] 33 | extra_context = activity.extra_context.copy() 34 | pickle_string = '' 35 | if extra_context: 36 | pickle_string = pickle.dumps(activity.extra_context) 37 | if six.PY3: 38 | pickle_string = pickle_string.decode('latin1') 39 | parts += [activity_time, pickle_string] 40 | serialized_activity = ','.join(map(str, parts)) 41 | return serialized_activity 42 | 43 | def loads(self, serialized_activity): 44 | parts = serialized_activity.split(',', 5) 45 | # convert these to ids 46 | actor_id, verb_id, object_id, target_id = map( 47 | int, parts[:4]) 48 | activity_datetime = epoch_to_datetime(float(parts[4])) 49 | pickle_string = parts[5] 50 | if not target_id: 51 | target_id = None 52 | verb = get_verb_by_id(verb_id) 53 | extra_context = {} 54 | if pickle_string: 55 | if six.PY3: 56 | pickle_string = pickle_string.encode('latin1') 57 | extra_context = pickle.loads(pickle_string) 58 | activity = self.activity_class(actor_id, verb, object_id, target_id, 59 | time=activity_datetime, extra_context=extra_context) 60 | 61 | return activity 62 | -------------------------------------------------------------------------------- /stream_framework/serializers/aggregated_activity_serializer.py: -------------------------------------------------------------------------------- 1 | from stream_framework.exceptions import SerializationException 2 | from stream_framework.serializers.activity_serializer import ActivitySerializer 3 | from stream_framework.serializers.utils import check_reserved 4 | from stream_framework.utils import epoch_to_datetime, datetime_to_epoch 5 | from stream_framework.serializers.base import BaseAggregatedSerializer 6 | import six 7 | 8 | 9 | class AggregatedActivitySerializer(BaseAggregatedSerializer): 10 | 11 | ''' 12 | Optimized version of the Activity serializer for AggregatedActivities 13 | 14 | v3group;;created_at;;updated_at;;seen_at;;read_at;;aggregated_activities 15 | 16 | Main advantage is that it prevents you from increasing the storage of 17 | a notification without realizing you are adding the extra data 18 | 19 | Depending on dehydrate it will either dump dehydrated aggregated activities 20 | or store the full aggregated activity 21 | ''' 22 | #: indicates if dumps returns dehydrated aggregated activities 23 | dehydrate = True 24 | identifier = 'v3' 25 | reserved_characters = [';', ',', ';;'] 26 | date_fields = ['created_at', 'updated_at', 'seen_at', 'read_at'] 27 | 28 | activity_serializer_class = ActivitySerializer 29 | 30 | def dumps(self, aggregated): 31 | self.check_type(aggregated) 32 | 33 | activity_serializer = self.activity_serializer_class(self.activity_class) 34 | # start by storing the group 35 | parts = [aggregated.group] 36 | check_reserved(aggregated.group, [';;']) 37 | 38 | # store the dates 39 | for date_field in self.date_fields: 40 | value = getattr(aggregated, date_field) 41 | if value is not None: 42 | # keep the milliseconds 43 | epoch = '%.6f' % datetime_to_epoch(value) 44 | else: 45 | epoch = -1 46 | parts += [epoch] 47 | 48 | # add the activities serialization 49 | serialized_activities = [] 50 | if self.dehydrate: 51 | if not aggregated.dehydrated: 52 | aggregated = aggregated.get_dehydrated() 53 | serialized_activities = map(str, aggregated._activity_ids) 54 | else: 55 | for activity in aggregated.activities: 56 | serialized = activity_serializer.dumps(activity) 57 | check_reserved(serialized, [';', ';;']) 58 | serialized_activities.append(serialized) 59 | 60 | serialized_activities_part = ';'.join(serialized_activities) 61 | parts.append(serialized_activities_part) 62 | 63 | # add the minified activities 64 | parts.append(aggregated.minimized_activities) 65 | 66 | # stick everything together 67 | serialized_aggregated = ';;'.join(map(str, parts)) 68 | serialized = '%s%s' % (self.identifier, serialized_aggregated) 69 | return serialized 70 | 71 | def loads(self, serialized_aggregated): 72 | activity_serializer = self.activity_serializer_class(self.activity_class) 73 | try: 74 | serialized_aggregated = serialized_aggregated[2:] 75 | parts = serialized_aggregated.split(';;') 76 | # start with the group 77 | group = parts[0] 78 | aggregated = self.aggregated_activity_class(group) 79 | 80 | # get the date and activities 81 | date_dict = dict(zip(self.date_fields, parts[1:5])) 82 | for k, v in date_dict.items(): 83 | date_value = None 84 | if v != '-1': 85 | date_value = epoch_to_datetime(float(v)) 86 | setattr(aggregated, k, date_value) 87 | 88 | # write the activities 89 | serializations = parts[5].split(';') 90 | if self.dehydrate: 91 | activity_ids = list(map(int, serializations)) 92 | aggregated._activity_ids = activity_ids 93 | aggregated.dehydrated = True 94 | else: 95 | activities = [activity_serializer.loads(s) 96 | for s in serializations] 97 | aggregated.activities = activities 98 | aggregated.dehydrated = False 99 | 100 | # write the minimized activities 101 | minimized = int(parts[6]) 102 | aggregated.minimized_activities = minimized 103 | 104 | return aggregated 105 | except Exception as e: 106 | msg = six.text_type(e) 107 | raise SerializationException(msg) 108 | 109 | 110 | class NotificationSerializer(AggregatedActivitySerializer): 111 | #: indicates if dumps returns dehydrated aggregated activities 112 | dehydrate = False 113 | -------------------------------------------------------------------------------- /stream_framework/serializers/base.py: -------------------------------------------------------------------------------- 1 | from stream_framework.activity import Activity, AggregatedActivity 2 | 3 | 4 | class BaseSerializer(object): 5 | 6 | ''' 7 | The base serializer class, only defines the signature for 8 | loads and dumps 9 | 10 | It serializes Activity objects 11 | ''' 12 | 13 | def __init__(self, activity_class, *args, **kwargs): 14 | self.activity_class = activity_class 15 | 16 | def check_type(self, data): 17 | if not isinstance(data, Activity): 18 | raise ValueError('we only know how to dump activities, not %s' % type(data)) 19 | 20 | def loads(self, serialized_activity): 21 | activity = serialized_activity 22 | return activity 23 | 24 | def dumps(self, activity): 25 | self.check_type(activity) 26 | return activity 27 | 28 | 29 | class BaseAggregatedSerializer(BaseSerializer): 30 | 31 | ''' 32 | Serialized aggregated activities 33 | ''' 34 | #: indicates if dumps returns dehydrated aggregated activities 35 | dehydrate = False 36 | 37 | def __init__(self, aggregated_activity_class, *args, **kwargs): 38 | BaseSerializer.__init__(self, *args, **kwargs) 39 | self.aggregated_activity_class = aggregated_activity_class 40 | 41 | def check_type(self, data): 42 | if not isinstance(data, AggregatedActivity): 43 | raise ValueError( 44 | 'we only know how to dump AggregatedActivity not %r' % data) 45 | -------------------------------------------------------------------------------- /stream_framework/serializers/cassandra/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/llSourcell/Stream-Framework/e7535857af03c81c760b9265568287816ecac13c/stream_framework/serializers/cassandra/__init__.py -------------------------------------------------------------------------------- /stream_framework/serializers/cassandra/activity_serializer.py: -------------------------------------------------------------------------------- 1 | from stream_framework.verbs import get_verb_by_id 2 | from stream_framework.serializers.base import BaseSerializer 3 | from stream_framework.utils.five import long_t 4 | import pickle 5 | 6 | 7 | class CassandraActivitySerializer(BaseSerializer): 8 | ''' 9 | Cassandra serializer for activities. Note: unlike other serializers this serializer 10 | does not have symmetrical `dumps` and `loads` functions (eg. loads reads a dictionary 11 | and dumps returns a CQLEngine model instance) 12 | ''' 13 | 14 | def __init__(self, model, *args, **kwargs): 15 | BaseSerializer.__init__(self, *args, **kwargs) 16 | self.model = model 17 | 18 | def dumps(self, activity): 19 | self.check_type(activity) 20 | return self.model( 21 | activity_id=long_t(activity.serialization_id), 22 | actor=activity.actor_id, 23 | time=activity.time, 24 | verb=activity.verb.id, 25 | object=activity.object_id, 26 | target=activity.target_id, 27 | extra_context=pickle.dumps(activity.extra_context) 28 | ) 29 | 30 | def loads(self, serialized_activity): 31 | serialized_activity.pop('activity_id') 32 | serialized_activity.pop('feed_id') 33 | serialized_activity['verb'] = get_verb_by_id(int(serialized_activity['verb'])) 34 | serialized_activity['extra_context'] = pickle.loads( 35 | serialized_activity['extra_context'] 36 | ) 37 | return self.activity_class(**serialized_activity) 38 | -------------------------------------------------------------------------------- /stream_framework/serializers/cassandra/aggregated_activity_serializer.py: -------------------------------------------------------------------------------- 1 | from stream_framework.serializers.aggregated_activity_serializer import AggregatedActivitySerializer 2 | from stream_framework.utils.five import long_t 3 | import pickle 4 | 5 | 6 | class CassandraAggregatedActivitySerializer(AggregatedActivitySerializer): 7 | ''' 8 | Cassandra serializer for aggregated activities. Note: unlike other serializers this serializer 9 | does not have symmetrical `dumps` and `loads` functions (eg. loads reads a dictionary 10 | and dumps returns a CQLEngine model instance) 11 | ''' 12 | 13 | def __init__(self, model, *args, **kwargs): 14 | AggregatedActivitySerializer.__init__(self, *args, **kwargs) 15 | self.model = model 16 | 17 | def dumps(self, aggregated): 18 | activities = pickle.dumps(aggregated.activities) 19 | model_instance = self.model( 20 | activity_id=long_t(aggregated.serialization_id), 21 | activities=activities, 22 | group=aggregated.group, 23 | created_at=aggregated.created_at, 24 | updated_at=aggregated.updated_at 25 | ) 26 | return model_instance 27 | 28 | def loads(self, serialized_aggregated): 29 | activities = pickle.loads(serialized_aggregated['activities']) 30 | aggregated = self.aggregated_activity_class( 31 | group=serialized_aggregated['group'], 32 | activities=activities, 33 | created_at=serialized_aggregated['created_at'], 34 | updated_at=serialized_aggregated['updated_at'], 35 | ) 36 | return aggregated 37 | -------------------------------------------------------------------------------- /stream_framework/serializers/dummy.py: -------------------------------------------------------------------------------- 1 | from stream_framework.serializers.base import BaseSerializer, BaseAggregatedSerializer 2 | 3 | 4 | class DummySerializer(BaseSerializer): 5 | 6 | ''' 7 | The dummy serializer doesnt care about the type of your data 8 | ''' 9 | 10 | def check_type(self, data): 11 | pass 12 | 13 | 14 | class DummyAggregatedSerializer(BaseAggregatedSerializer): 15 | 16 | ''' 17 | The dummy serializer doesnt care about the type of your data 18 | ''' 19 | 20 | def check_type(self, data): 21 | pass 22 | -------------------------------------------------------------------------------- /stream_framework/serializers/pickle_serializer.py: -------------------------------------------------------------------------------- 1 | from stream_framework.serializers.base import BaseSerializer, BaseAggregatedSerializer 2 | 3 | try: 4 | import cPickle as pickle 5 | except ImportError: 6 | import pickle 7 | 8 | class PickleSerializer(BaseSerializer): 9 | 10 | def loads(self, serialized_activity): 11 | activity = pickle.loads(serialized_activity) 12 | return activity 13 | 14 | def dumps(self, activity): 15 | self.check_type(activity) 16 | return pickle.dumps(activity) 17 | 18 | 19 | class AggregatedActivityPickleSerializer(BaseAggregatedSerializer): 20 | #: indicates if dumps returns dehydrated aggregated activities 21 | dehydrate = True 22 | 23 | def loads(self, serialized_data): 24 | return pickle.loads(serialized_data) 25 | 26 | def dumps(self, aggregated): 27 | self.check_type(aggregated) 28 | if not aggregated.dehydrated: 29 | aggregated = aggregated.get_dehydrated() 30 | return pickle.dumps(aggregated) 31 | -------------------------------------------------------------------------------- /stream_framework/serializers/simple_timeline_serializer.py: -------------------------------------------------------------------------------- 1 | from stream_framework.activity import DehydratedActivity 2 | from stream_framework.serializers.base import BaseSerializer 3 | 4 | 5 | class SimpleTimelineSerializer(BaseSerializer): 6 | 7 | def loads(self, serialized_activity, *args, **kwargs): 8 | return DehydratedActivity(serialization_id=serialized_activity) 9 | 10 | def dumps(self, activity, *args, **kwargs): 11 | ''' 12 | Returns the serialized version of activity and the 13 | ''' 14 | return activity.serialization_id 15 | -------------------------------------------------------------------------------- /stream_framework/serializers/utils.py: -------------------------------------------------------------------------------- 1 | from stream_framework.exceptions import SerializationException 2 | 3 | 4 | def check_reserved(value, reserved_characters): 5 | for reserved in reserved_characters: 6 | if reserved in value: 7 | raise SerializationException( 8 | 'encountered reserved character %s in %s' % (reserved, value)) 9 | -------------------------------------------------------------------------------- /stream_framework/settings.py: -------------------------------------------------------------------------------- 1 | from stream_framework.default_settings import * 2 | 3 | ''' 4 | Please fork and add hooks to import your custom settings system. 5 | Right now we only support Django, but the intention is to support 6 | any settings system 7 | ''' 8 | 9 | 10 | def import_global_module(module, current_locals, current_globals, exceptions=None): 11 | '''Import the requested module into the global scope 12 | Warning! This will import your module into the global scope 13 | 14 | **Example**: 15 | from django.conf import settings 16 | import_global_module(settings, locals(), globals()) 17 | 18 | :param module: the module which to import into global scope 19 | :param current_locals: the local globals 20 | :param current_globals: the current globals 21 | :param exceptions: the exceptions which to ignore while importing 22 | 23 | ''' 24 | try: 25 | try: 26 | objects = getattr(module, '__all__', dir(module)) 27 | 28 | for k in objects: 29 | if k and k[0] != '_': 30 | current_globals[k] = getattr(module, k) 31 | except exceptions as e: 32 | return e 33 | finally: 34 | del current_globals, current_locals 35 | 36 | 37 | try: 38 | import django 39 | settings_system = 'django' 40 | except ImportError as e: 41 | settings_system = None 42 | 43 | if settings_system == 'django': 44 | from django.conf import settings 45 | import_global_module(settings, locals(), globals()) 46 | -------------------------------------------------------------------------------- /stream_framework/storage/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/llSourcell/Stream-Framework/e7535857af03c81c760b9265568287816ecac13c/stream_framework/storage/__init__.py -------------------------------------------------------------------------------- /stream_framework/storage/base_lists_storage.py: -------------------------------------------------------------------------------- 1 | class BaseListsStorage(object): 2 | ''' 3 | A storage used to simultaneously track data in one or more lists. 4 | Data could be either added/removed/get/counted/flushed from one or more of the lists. 5 | These operations are executed in an atomic way which guarantees that either 6 | the data in all of the selected lists is modified or not. 7 | 8 | **Usage Example**:: 9 | 10 | feed_counters = ListsStorage('user:5') 11 | 12 | # adds simultaneously [1,2,3,4] to unread and [1,2,3] to unseen lists 13 | feed_counters.add(unread=[1,2,3,4], unseen=[1,2,3]) 14 | # adds [5,6] to unread list 15 | feed_counters.add(unread=[5,6]) 16 | # adds [7,8] to unseen list 17 | feed_counters.add(unseen=[7,8]) 18 | 19 | # removes simultaneously [5,6] from unread and [1,4] from unseen lists 20 | feed_counters.remove(unread=[5,6], unseen=[1,4]) 21 | # removes [2] from unseen 22 | feed_counters.remove(unseen=[2]) 23 | # removes [1,2] from unread 24 | feed_counters.remove(unread=[1,2]) 25 | 26 | # counts simultaneously items in unseen and unread lists 27 | unseen_count, unread_count = feed_counters.count('unseen', 'unread') 28 | # count items in unseen list 29 | unseen_count = feed_counters.count('unseen') 30 | # count items in unread list 31 | unread_count = feed_counters.count('unread') 32 | 33 | # returns all unseen and unread items 34 | unseen_items, unread_items = feed_counters.get('unseen', 'unread') 35 | # returns all items in unseen list 36 | unseen_items = feed_counters.get('unseen') 37 | # returns all items in unread list 38 | unread_items = feed_counters.get('unread') 39 | 40 | # clears unseen and unread items 41 | feed_counters.flush('unseen', 'unread') 42 | # clears unseen items 43 | feed_counters.flush('unseen') 44 | # clears unread items 45 | feed_counters.flush('unread') 46 | 47 | ''' 48 | 49 | # : used to produce a unique key for each list 50 | key_format = 'list:%(key)s:%(list)s' 51 | 52 | # : the maximum amount of items to be stored in each list 53 | max_length = None 54 | 55 | # : some of the storages like those based on Redis may store the data in other 56 | # than the original format. In this case this field is used to convert data back. 57 | data_type = str 58 | 59 | def __init__(self, key, **kwargs): 60 | self.base_key = key 61 | self.key_format = kwargs.get('key_format', self.key_format) 62 | self.max_length = kwargs.get('max_length', self.max_length) 63 | self.data_type = kwargs.get('data_type', self.data_type) 64 | 65 | def get_key(self, list_name): 66 | ''' 67 | Provides the key for a given list 68 | ''' 69 | return self.key_format % {'key': self.base_key, 70 | 'list': list_name} 71 | 72 | def add(self, **kwargs): 73 | ''' 74 | Adds items to one or more lists. 75 | 76 | **Usage Example**:: 77 | feed_counters = ListsStorage('user:5') 78 | feed_counters.add(unread=[1,2,3,4], unseen=[1,2,3]) 79 | feed_counters.add(unread=[5,6]) 80 | feed_counters.add(unseen=[7,8]) 81 | 82 | : kwargs define pairs of list and items to be used for lists modifications 83 | ''' 84 | raise NotImplementedError() 85 | 86 | def remove(self, **kwargs): 87 | ''' 88 | Removes items from one or more lists. 89 | 90 | **Usage Example**:: 91 | feed_counters = ListsStorage('user:5') 92 | feed_counters.remove(unread=[5,6], unseen=[1,4]) 93 | feed_counters.remove(unseen=[2]) 94 | feed_counters.remove(unread=[1,2]) 95 | 96 | : kwargs define pairs of list and items to be used for lists modifications 97 | ''' 98 | raise NotImplementedError() 99 | 100 | def count(self, *args): 101 | ''' 102 | Counts items in one or more lists. 103 | 104 | **Usage Example**:: 105 | feed_counters = ListsStorage('user:5') 106 | unseen_count, unread_count = feed_counters.count('unseen', 'unread') 107 | unseen_count = feed_counters.count('unseen') 108 | unread_count = feed_counters.count('unread') 109 | 110 | : args define which lists' items to be counted 111 | ''' 112 | raise NotImplementedError() 113 | 114 | def get(self, *args): 115 | ''' 116 | Retrieves all items from one or more lists. 117 | 118 | **Usage Example**:: 119 | feed_counters = ListsStorage('user:5') 120 | unseen_items, unread_items = feed_counters.get('unseen', 'unread') 121 | unseen_items = feed_counters.get('unseen') 122 | unread_items = feed_counters.get('unread') 123 | 124 | : args define which lists' items to be retrieved 125 | ''' 126 | raise NotImplementedError() 127 | 128 | def flush(self, *args): 129 | ''' 130 | Clears one ore more lists. 131 | 132 | **Usage Example**:: 133 | feed_counters = ListsStorage('user:5') 134 | feed_counters.flush('unseen', 'unread') 135 | feed_counters.flush('unseen') 136 | feed_counters.flush('unread') 137 | 138 | : args define which lists to be cleared 139 | ''' 140 | raise NotImplementedError() 141 | -------------------------------------------------------------------------------- /stream_framework/storage/cassandra/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/llSourcell/Stream-Framework/e7535857af03c81c760b9265568287816ecac13c/stream_framework/storage/cassandra/__init__.py -------------------------------------------------------------------------------- /stream_framework/storage/cassandra/activity_storage.py: -------------------------------------------------------------------------------- 1 | from stream_framework.storage.base import BaseActivityStorage 2 | 3 | 4 | class CassandraActivityStorage(BaseActivityStorage): 5 | 6 | def get_from_storage(self, activity_ids, *args, **kwargs): 7 | pass 8 | 9 | def add_to_storage(self, serialized_activities, *args, **kwargs): 10 | pass 11 | 12 | def remove_from_storage(self, activity_ids, *args, **kwargs): 13 | pass 14 | -------------------------------------------------------------------------------- /stream_framework/storage/cassandra/connection.py: -------------------------------------------------------------------------------- 1 | from cassandra.cqlengine import connection 2 | from stream_framework import settings 3 | 4 | 5 | def setup_connection(): 6 | connection.setup( 7 | hosts=settings.STREAM_CASSANDRA_HOSTS, 8 | consistency=settings.STREAM_CASSANDRA_CONSISTENCY_LEVEL, 9 | default_keyspace=settings.STREAM_DEFAULT_KEYSPACE, 10 | **settings.CASSANDRA_DRIVER_KWARGS 11 | ) 12 | -------------------------------------------------------------------------------- /stream_framework/storage/cassandra/models.py: -------------------------------------------------------------------------------- 1 | from cassandra.cqlengine import columns 2 | from cassandra.cqlengine.models import Model 3 | 4 | 5 | class BaseActivity(Model): 6 | feed_id = columns.Ascii(primary_key=True, partition_key=True) 7 | activity_id = columns.VarInt(primary_key=True, clustering_order='desc') 8 | 9 | 10 | class Activity(BaseActivity): 11 | actor = columns.Integer(required=False) 12 | extra_context = columns.Bytes(required=False) 13 | object = columns.Integer(required=False) 14 | target = columns.Integer(required=False) 15 | time = columns.DateTime(required=False) 16 | verb = columns.Integer(required=False) 17 | 18 | 19 | class AggregatedActivity(BaseActivity): 20 | activities = columns.Bytes(required=False) 21 | created_at = columns.DateTime(required=False) 22 | group = columns.Ascii(required=False) 23 | updated_at = columns.DateTime(required=False) 24 | -------------------------------------------------------------------------------- /stream_framework/storage/cassandra/monkey_patch.py: -------------------------------------------------------------------------------- 1 | import six 2 | 3 | 4 | if six.PY3: 5 | from datetime import datetime, timedelta 6 | from cassandra.marshal import int64_unpack 7 | from cassandra.cqltypes import DateType 8 | 9 | # Fix for http://bugs.python.org/issue23517 issue 10 | def deserialize(byts, protocol_version): 11 | timestamp = int64_unpack(byts) / 1000.0 12 | dt = datetime(1970, 1, 1) + timedelta(seconds=timestamp) 13 | return dt 14 | 15 | DateType.deserialize = deserialize 16 | -------------------------------------------------------------------------------- /stream_framework/storage/hbase/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/llSourcell/Stream-Framework/e7535857af03c81c760b9265568287816ecac13c/stream_framework/storage/hbase/__init__.py -------------------------------------------------------------------------------- /stream_framework/storage/memory.py: -------------------------------------------------------------------------------- 1 | from stream_framework.storage.base import (BaseTimelineStorage, BaseActivityStorage) 2 | from collections import defaultdict 3 | from contextlib import contextmanager 4 | import six 5 | 6 | 7 | timeline_store = defaultdict(list) 8 | activity_store = defaultdict(dict) 9 | 10 | 11 | def reverse_bisect_left(a, x, lo=0, hi=None): 12 | ''' 13 | same as python bisect.bisect_left but for 14 | lists with reversed order 15 | ''' 16 | if lo < 0: 17 | raise ValueError('lo must be non-negative') 18 | if hi is None: 19 | hi = len(a) 20 | while lo < hi: 21 | mid = (lo + hi) // 2 22 | if x > a[mid]: 23 | hi = mid 24 | else: 25 | lo = mid + 1 26 | return lo 27 | 28 | 29 | class InMemoryActivityStorage(BaseActivityStorage): 30 | 31 | def get_from_storage(self, activity_ids, *args, **kwargs): 32 | return {_id: activity_store.get(_id) for _id in activity_ids} 33 | 34 | def add_to_storage(self, activities, *args, **kwargs): 35 | insert_count = 0 36 | for activity_id, activity_data in six.iteritems(activities): 37 | if activity_id not in activity_store: 38 | insert_count += 1 39 | activity_store[activity_id] = activity_data 40 | return insert_count 41 | 42 | def remove_from_storage(self, activity_ids, *args, **kwargs): 43 | removed = 0 44 | for activity_id in activity_ids: 45 | exists = activity_store.pop(activity_id, None) 46 | if exists: 47 | removed += 1 48 | return removed 49 | 50 | def flush(self): 51 | activity_store.clear() 52 | 53 | 54 | class InMemoryTimelineStorage(BaseTimelineStorage): 55 | 56 | def contains(self, key, activity_id): 57 | return activity_id in timeline_store[key] 58 | 59 | def get_index_of(self, key, activity_id): 60 | return timeline_store[key].index(activity_id) 61 | 62 | def get_slice_from_storage(self, key, start, stop, filter_kwargs=None, ordering_args=None): 63 | results = list(timeline_store[key][start:stop]) 64 | score_value_pairs = list(zip(results, results)) 65 | return score_value_pairs 66 | 67 | def add_to_storage(self, key, activities, *args, **kwargs): 68 | timeline = timeline_store[key] 69 | initial_count = len(timeline) 70 | for activity_id, activity_data in six.iteritems(activities): 71 | if self.contains(key, activity_id): 72 | continue 73 | timeline.insert(reverse_bisect_left( 74 | timeline, activity_id), activity_data) 75 | return len(timeline) - initial_count 76 | 77 | def remove_from_storage(self, key, activities, *args, **kwargs): 78 | timeline = timeline_store[key] 79 | initial_count = len(timeline) 80 | for activity_id in activities.keys(): 81 | if self.contains(key, activity_id): 82 | timeline.remove(activity_id) 83 | return initial_count - len(timeline) 84 | 85 | @classmethod 86 | def get_batch_interface(cls): 87 | @contextmanager 88 | def meandmyself(): 89 | yield cls 90 | return meandmyself() 91 | 92 | def count(self, key, *args, **kwargs): 93 | return len(timeline_store[key]) 94 | 95 | def delete(self, key, *args, **kwargs): 96 | timeline_store.pop(key, None) 97 | 98 | def trim(self, key, length): 99 | timeline_store[key] = timeline_store[key][:length] 100 | -------------------------------------------------------------------------------- /stream_framework/storage/redis/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/llSourcell/Stream-Framework/e7535857af03c81c760b9265568287816ecac13c/stream_framework/storage/redis/__init__.py -------------------------------------------------------------------------------- /stream_framework/storage/redis/activity_storage.py: -------------------------------------------------------------------------------- 1 | from stream_framework.storage.base import BaseActivityStorage 2 | from stream_framework.storage.redis.structures.hash import ShardedHashCache 3 | from stream_framework.serializers.activity_serializer import ActivitySerializer 4 | import six 5 | 6 | 7 | class ActivityCache(ShardedHashCache): 8 | key_format = 'activity:cache:%s' 9 | 10 | 11 | class RedisActivityStorage(BaseActivityStorage): 12 | default_serializer_class = ActivitySerializer 13 | 14 | def get_key(self): 15 | return self.options.get('key', 'global') 16 | 17 | def get_cache(self): 18 | key = self.get_key() 19 | return ActivityCache(key) 20 | 21 | def get_from_storage(self, activity_ids, *args, **kwargs): 22 | cache = self.get_cache() 23 | activities = cache.get_many(activity_ids) 24 | activities = dict((k, six.text_type(v)) for k, v in activities.items() if v) 25 | return activities 26 | 27 | def add_to_storage(self, serialized_activities, *args, **kwargs): 28 | cache = self.get_cache() 29 | key_value_pairs = serialized_activities.items() 30 | result = cache.set_many(key_value_pairs) 31 | insert_count = 0 32 | if result: 33 | insert_count = len(key_value_pairs) 34 | 35 | return insert_count 36 | 37 | def remove_from_storage(self, activity_ids, *args, **kwargs): 38 | # we never explicitly remove things from storage 39 | cache = self.get_cache() 40 | result = cache.delete_many(activity_ids) 41 | return result 42 | 43 | def flush(self): 44 | cache = self.get_cache() 45 | cache.delete() 46 | -------------------------------------------------------------------------------- /stream_framework/storage/redis/connection.py: -------------------------------------------------------------------------------- 1 | import redis 2 | from stream_framework import settings 3 | 4 | connection_pool = None 5 | 6 | 7 | def get_redis_connection(server_name='default'): 8 | ''' 9 | Gets the specified redis connection 10 | ''' 11 | global connection_pool 12 | 13 | if connection_pool is None: 14 | connection_pool = setup_redis() 15 | 16 | pool = connection_pool[server_name] 17 | 18 | return redis.StrictRedis(connection_pool=pool) 19 | 20 | 21 | def setup_redis(): 22 | ''' 23 | Starts the connection pool for all configured redis servers 24 | ''' 25 | pools = {} 26 | for name, config in settings.STREAM_REDIS_CONFIG.items(): 27 | pool = redis.ConnectionPool( 28 | host=config['host'], 29 | port=config['port'], 30 | password=config.get('password'), 31 | db=config['db'], 32 | decode_responses=config.get('decode_responses', True), 33 | # connection options 34 | socket_timeout=config.get('socket_timeout', None), 35 | socket_connect_timeout=config.get('socket_connect_timeout', None), 36 | socket_keepalive=config.get('socket_keepalive', False), 37 | socket_keepalive_options=config.get('socket_keepalive_options', None), 38 | retry_on_timeout=config.get('retry_on_timeout', False), 39 | ) 40 | pools[name] = pool 41 | return pools 42 | -------------------------------------------------------------------------------- /stream_framework/storage/redis/lists_storage.py: -------------------------------------------------------------------------------- 1 | from stream_framework.storage.base_lists_storage import BaseListsStorage 2 | from stream_framework.storage.redis.connection import get_redis_connection 3 | 4 | import six 5 | 6 | 7 | class RedisListsStorage(BaseListsStorage): 8 | 9 | def _to_result(self, results): 10 | if results: 11 | if len(results) == 1: 12 | return results[0] 13 | else: 14 | return tuple(results) 15 | 16 | @property 17 | def redis(self): 18 | ''' 19 | Lazy load the redis connection 20 | ''' 21 | try: 22 | return self._redis 23 | except AttributeError: 24 | self._redis = get_redis_connection() 25 | return self._redis 26 | 27 | def get_keys(self, list_names): 28 | return [self.get_key(list_name) for list_name in list_names] 29 | 30 | def add(self, **kwargs): 31 | if kwargs: 32 | pipe = self.redis.pipeline() 33 | 34 | for list_name, values in six.iteritems(kwargs): 35 | if values: 36 | key = self.get_key(list_name) 37 | for value in values: 38 | pipe.rpush(key, value) 39 | # Removes items from list's head 40 | pipe.ltrim(key, -self.max_length, -1) 41 | 42 | pipe.execute() 43 | 44 | def remove(self, **kwargs): 45 | if kwargs: 46 | pipe = self.redis.pipeline() 47 | 48 | for list_name, values in six.iteritems(kwargs): 49 | key = self.get_key(list_name) 50 | for value in values: 51 | # Removes all occurrences of value in the list 52 | pipe.lrem(key, 0, value) 53 | 54 | pipe.execute() 55 | 56 | def count(self, *args): 57 | if args: 58 | keys = self.get_keys(args) 59 | pipe = self.redis.pipeline() 60 | for key in keys: 61 | pipe.llen(key) 62 | return self._to_result(pipe.execute()) 63 | 64 | def get(self, *args): 65 | if args: 66 | keys = self.get_keys(args) 67 | pipe = self.redis.pipeline() 68 | for key in keys: 69 | pipe.lrange(key, 0, -1) 70 | results = pipe.execute() 71 | results = [list(map(self.data_type, items)) for items in results] 72 | return self._to_result(results) 73 | 74 | def flush(self, *args): 75 | if args: 76 | keys = self.get_keys(args) 77 | self.redis.delete(*keys) 78 | -------------------------------------------------------------------------------- /stream_framework/storage/redis/structures/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/llSourcell/Stream-Framework/e7535857af03c81c760b9265568287816ecac13c/stream_framework/storage/redis/structures/__init__.py -------------------------------------------------------------------------------- /stream_framework/storage/redis/structures/base.py: -------------------------------------------------------------------------------- 1 | from stream_framework.storage.redis.connection import get_redis_connection 2 | from redis.client import BasePipeline 3 | 4 | 5 | class RedisCache(object): 6 | 7 | ''' 8 | The base for all redis data structures 9 | ''' 10 | key_format = 'redis:cache:%s' 11 | 12 | def __init__(self, key, redis=None, redis_server='default'): 13 | # write the key 14 | self.key = key 15 | # handy when using fallback to other data sources 16 | self.source = 'redis' 17 | # the redis connection, self.redis is lazy loading the connection 18 | self._redis = redis 19 | # the redis server (see get_redis_connection) 20 | self.redis_server = redis_server 21 | 22 | def get_redis(self): 23 | ''' 24 | Only load the redis connection if we use it 25 | ''' 26 | if self._redis is None: 27 | self._redis = get_redis_connection( 28 | server_name=self.redis_server 29 | ) 30 | return self._redis 31 | 32 | def set_redis(self, value): 33 | ''' 34 | Sets the redis connection 35 | ''' 36 | self._redis = value 37 | 38 | redis = property(get_redis, set_redis) 39 | 40 | def get_key(self): 41 | return self.key 42 | 43 | def delete(self): 44 | key = self.get_key() 45 | self.redis.delete(key) 46 | 47 | def _pipeline_if_needed(self, operation, *args, **kwargs): 48 | ''' 49 | If the redis connection is already in distributed state use it 50 | Otherwise spawn a new distributed connection using .map 51 | ''' 52 | pipe_needed = not isinstance(self.redis, BasePipeline) 53 | if pipe_needed: 54 | pipe = self.redis.pipeline(transaction=False) 55 | operation(pipe, *args, **kwargs) 56 | results = pipe.execute() 57 | else: 58 | results = operation(self.redis, *args, **kwargs) 59 | return results 60 | -------------------------------------------------------------------------------- /stream_framework/storage/redis/structures/list.py: -------------------------------------------------------------------------------- 1 | from stream_framework.storage.redis.structures.base import RedisCache 2 | import six 3 | import logging 4 | logger = logging.getLogger(__name__) 5 | 6 | 7 | class BaseRedisListCache(RedisCache): 8 | 9 | ''' 10 | Generic list functionality used for both the sorted set and list implementations 11 | 12 | Retrieve the sorted list/sorted set by using python slicing 13 | ''' 14 | key_format = 'redis:base_list_cache:%s' 15 | max_length = 100 16 | 17 | def __getitem__(self, k): 18 | """ 19 | Retrieves an item or slice from the set of results. 20 | This is the complicated stuff which allows us to slice 21 | """ 22 | if not isinstance(k, (slice, six.integer_types)): 23 | raise TypeError 24 | assert ((not isinstance(k, slice) and (k >= 0)) 25 | or (isinstance(k, slice) and (k.start is None or k.start >= 0) 26 | and (k.stop is None or k.stop >= 0))), \ 27 | "Negative indexing is not supported." 28 | 29 | # Remember if it's a slice or not. We're going to treat everything as 30 | # a slice to simply the logic and will `.pop()` at the end as needed. 31 | if isinstance(k, slice): 32 | start = k.start 33 | 34 | if k.stop is not None: 35 | bound = int(k.stop) 36 | else: 37 | bound = None 38 | else: 39 | start = k 40 | bound = k + 1 41 | 42 | start = start or 0 43 | 44 | # We need check to see if we need to populate more of the cache. 45 | try: 46 | results = self.get_results(start, bound) 47 | except StopIteration: 48 | # There's nothing left, even though the bound is higher. 49 | results = None 50 | 51 | return results 52 | 53 | def get_results(self, start, stop): 54 | raise NotImplementedError('please define this function in subclasses') 55 | 56 | 57 | class RedisListCache(BaseRedisListCache): 58 | key_format = 'redis:list_cache:%s' 59 | #: the maximum number of items the list stores 60 | max_items = 1000 61 | 62 | def get_results(self, start, stop): 63 | if start is None: 64 | start = 0 65 | if stop is None: 66 | stop = -1 67 | key = self.get_key() 68 | results = self.redis.lrange(key, start, stop) 69 | return results 70 | 71 | def append(self, value): 72 | values = [value] 73 | results = self.append_many(values) 74 | result = results[0] 75 | return result 76 | 77 | def append_many(self, values): 78 | key = self.get_key() 79 | results = [] 80 | 81 | def _append_many(redis, values): 82 | for value in values: 83 | logger.debug('adding to %s with value %s', key, value) 84 | result = redis.rpush(key, value) 85 | results.append(result) 86 | return results 87 | 88 | # start a new map redis or go with the given one 89 | results = self._pipeline_if_needed(_append_many, values) 90 | 91 | return results 92 | 93 | def remove(self, value): 94 | values = [value] 95 | results = self.remove_many(values) 96 | result = results[0] 97 | return result 98 | 99 | def remove_many(self, values): 100 | key = self.get_key() 101 | results = [] 102 | 103 | def _remove_many(redis, values): 104 | for value in values: 105 | logger.debug('removing from %s with value %s', key, value) 106 | result = redis.lrem(key, 10, value) 107 | results.append(result) 108 | return results 109 | 110 | # start a new map redis or go with the given one 111 | results = self._pipeline_if_needed(_remove_many, values) 112 | 113 | return results 114 | 115 | def count(self): 116 | key = self.get_key() 117 | count = self.redis.llen(key) 118 | return count 119 | 120 | def trim(self): 121 | ''' 122 | Removes the old items in the list 123 | ''' 124 | # clean up everything with a rank lower than max items up to the end of 125 | # the list 126 | key = self.get_key() 127 | removed = self.redis.ltrim(key, 0, self.max_items - 1) 128 | msg_format = 'cleaning up the list %s to a max of %s items' 129 | logger.info(msg_format, self.get_key(), self.max_items) 130 | return removed 131 | 132 | 133 | class FallbackRedisListCache(RedisListCache): 134 | 135 | ''' 136 | Redis list cache which after retrieving all items from redis falls back 137 | to a main data source (like the database) 138 | ''' 139 | key_format = 'redis:db_list_cache:%s' 140 | 141 | def get_fallback_results(self, start, stop): 142 | raise NotImplementedError('please define this function in subclasses') 143 | 144 | def get_results(self, start, stop): 145 | ''' 146 | Retrieves results from redis and the fallback datasource 147 | ''' 148 | if stop is not None: 149 | redis_results = self.get_redis_results(start, stop - 1) 150 | required_items = stop - start 151 | enough_results = len(redis_results) == required_items 152 | assert len(redis_results) <= required_items, 'we should never have more than we ask for, start %s, stop %s' % ( 153 | start, stop) 154 | else: 155 | # [start:] slicing does not know what's enough so 156 | # does not hit the db unless the cache is empty 157 | redis_results = self.get_redis_results(start, stop) 158 | enough_results = True 159 | if not redis_results or not enough_results: 160 | self.source = 'fallback' 161 | filtered = getattr(self, "_filtered", False) 162 | db_results = self.get_fallback_results(start, stop) 163 | 164 | if start == 0 and not redis_results and not filtered: 165 | logger.info('setting cache for type %s with len %s', 166 | self.get_key(), len(db_results)) 167 | # only cache when we have no results, to prevent duplicates 168 | self.cache(db_results) 169 | elif start == 0 and redis_results and not filtered: 170 | logger.info('overwriting cache for type %s with len %s', 171 | self.get_key(), len(db_results)) 172 | # clear the cache and add these values 173 | self.overwrite(db_results) 174 | results = db_results 175 | logger.info( 176 | 'retrieved %s to %s from db and not from cache with key %s' % 177 | (start, stop, self.get_key())) 178 | else: 179 | results = redis_results 180 | logger.info('retrieved %s to %s from cache on key %s' % 181 | (start, stop, self.get_key())) 182 | return results 183 | 184 | def get_redis_results(self, start, stop): 185 | ''' 186 | Returns the results from redis 187 | 188 | :param start: the beginning 189 | :param stop: the end 190 | ''' 191 | results = RedisListCache.get_results(self, start, stop) 192 | return results 193 | 194 | def cache(self, fallback_results): 195 | ''' 196 | Hook to write the results from the fallback to redis 197 | ''' 198 | self.append_many(fallback_results) 199 | 200 | def overwrite(self, fallback_results): 201 | ''' 202 | Clear the cache and write the results from the fallback 203 | ''' 204 | self.delete() 205 | self.cache(fallback_results) 206 | -------------------------------------------------------------------------------- /stream_framework/storage/redis/structures/sorted_set.py: -------------------------------------------------------------------------------- 1 | from stream_framework.utils.functional import lazy 2 | from stream_framework.storage.redis.structures.hash import BaseRedisHashCache 3 | from stream_framework.storage.redis.structures.list import BaseRedisListCache 4 | from stream_framework.utils import chunks 5 | import six 6 | import logging 7 | logger = logging.getLogger(__name__) 8 | 9 | 10 | class RedisSortedSetCache(BaseRedisListCache, BaseRedisHashCache): 11 | sort_asc = False 12 | 13 | def count(self): 14 | ''' 15 | Returns the number of elements in the sorted set 16 | ''' 17 | key = self.get_key() 18 | redis_result = self.redis.zcard(key) 19 | # lazily convert this to an int, this keeps it compatible with 20 | # distributed connections 21 | redis_count = lambda: int(redis_result) 22 | lazy_factory = lazy(redis_count, *six.integer_types) 23 | lazy_object = lazy_factory() 24 | return lazy_object 25 | 26 | def index_of(self, value): 27 | ''' 28 | Returns the index of the given value 29 | ''' 30 | if self.sort_asc: 31 | redis_rank_fn = self.redis.zrank 32 | else: 33 | redis_rank_fn = self.redis.zrevrank 34 | key = self.get_key() 35 | result = redis_rank_fn(key, value) 36 | if result: 37 | result = int(result) 38 | elif result is None: 39 | raise ValueError( 40 | 'Couldnt find item with value %s in key %s' % (value, key)) 41 | return result 42 | 43 | def add(self, score, key): 44 | score_value_pairs = [(score, key)] 45 | results = self.add_many(score_value_pairs) 46 | result = results[0] 47 | return result 48 | 49 | def add_many(self, score_value_pairs): 50 | ''' 51 | StrictRedis so it expects score1, name1 52 | ''' 53 | key = self.get_key() 54 | scores = list(zip(*score_value_pairs))[0] 55 | msg_format = 'Please send floats as the first part of the pairs got %s' 56 | numeric_types = (float,) + six.integer_types 57 | if not all([isinstance(score, numeric_types) for score in scores]): 58 | raise ValueError(msg_format % score_value_pairs) 59 | results = [] 60 | 61 | def _add_many(redis, score_value_pairs): 62 | score_value_list = sum(map(list, score_value_pairs), []) 63 | score_value_chunks = chunks(score_value_list, 200) 64 | 65 | for score_value_chunk in score_value_chunks: 66 | result = redis.zadd(key, *score_value_chunk) 67 | logger.debug('adding to %s with score_value_chunk %s', 68 | key, score_value_chunk) 69 | results.append(result) 70 | return results 71 | 72 | # start a new map redis or go with the given one 73 | results = self._pipeline_if_needed(_add_many, score_value_pairs) 74 | 75 | return results 76 | 77 | def remove_many(self, values): 78 | ''' 79 | values 80 | ''' 81 | key = self.get_key() 82 | results = [] 83 | 84 | def _remove_many(redis, values): 85 | for value in values: 86 | logger.debug('removing value %s from %s', value, key) 87 | result = redis.zrem(key, value) 88 | results.append(result) 89 | return results 90 | 91 | # start a new map redis or go with the given one 92 | results = self._pipeline_if_needed(_remove_many, values) 93 | 94 | return results 95 | 96 | def remove_by_scores(self, scores): 97 | key = self.get_key() 98 | results = [] 99 | 100 | def _remove_many(redis, scores): 101 | for score in scores: 102 | logger.debug('removing score %s from %s', score, key) 103 | result = redis.zremrangebyscore(key, score, score) 104 | results.append(result) 105 | return results 106 | 107 | # start a new map redis or go with the given one 108 | results = self._pipeline_if_needed(_remove_many, scores) 109 | 110 | return results 111 | 112 | def contains(self, value): 113 | ''' 114 | Uses zscore to see if the given activity is present in our sorted set 115 | ''' 116 | key = self.get_key() 117 | result = self.redis.zscore(key, value) 118 | activity_found = result is not None 119 | return activity_found 120 | 121 | def trim(self, max_length=None): 122 | ''' 123 | Trim the sorted set to max length 124 | zremrangebyscore 125 | ''' 126 | key = self.get_key() 127 | if max_length is None: 128 | max_length = self.max_length 129 | 130 | # map things to the funny redis syntax 131 | if self.sort_asc: 132 | begin = max_length 133 | end = -1 134 | else: 135 | begin = 0 136 | end = (max_length * -1) - 1 137 | 138 | removed = self.redis.zremrangebyrank(key, begin, end) 139 | logger.info('cleaning up the sorted set %s to a max of %s items' % 140 | (key, max_length)) 141 | return removed 142 | 143 | def get_results(self, start=None, stop=None, min_score=None, max_score=None): 144 | ''' 145 | Retrieve results from redis using zrevrange 146 | O(log(N)+M) with N being the number of elements in the sorted set and M the number of elements returned. 147 | ''' 148 | if self.sort_asc: 149 | redis_range_fn = self.redis.zrangebyscore 150 | else: 151 | redis_range_fn = self.redis.zrevrangebyscore 152 | 153 | # -1 means infinity 154 | if stop is None: 155 | stop = -1 156 | 157 | if start is None: 158 | start = 0 159 | 160 | if stop != -1: 161 | limit = stop - start 162 | else: 163 | limit = -1 164 | 165 | key = self.get_key() 166 | 167 | # some type validations 168 | if min_score and not isinstance(min_score, (float, str, six.integer_types)): 169 | raise ValueError( 170 | 'min_score is not of type float, int, long or str got %s' % min_score) 171 | if max_score and not isinstance(max_score, (float, str, six.integer_types)): 172 | raise ValueError( 173 | 'max_score is not of type float, int, long or str got %s' % max_score) 174 | 175 | if min_score is None: 176 | min_score = '-inf' 177 | if max_score is None: 178 | max_score = '+inf' 179 | 180 | # handle the starting score support 181 | results = redis_range_fn( 182 | key, start=start, num=limit, withscores=True, min=min_score, max=max_score) 183 | return results 184 | -------------------------------------------------------------------------------- /stream_framework/storage/redis/timeline_storage.py: -------------------------------------------------------------------------------- 1 | from stream_framework.storage.base import BaseTimelineStorage 2 | from stream_framework.storage.redis.structures.sorted_set import RedisSortedSetCache 3 | from stream_framework.storage.redis.connection import get_redis_connection 4 | from stream_framework.utils.five import long_t 5 | import six 6 | 7 | 8 | 9 | class TimelineCache(RedisSortedSetCache): 10 | sort_asc = False 11 | 12 | 13 | class RedisTimelineStorage(BaseTimelineStorage): 14 | 15 | def get_cache(self, key): 16 | redis_server = self.options.get('redis_server', 'default') 17 | cache = TimelineCache(key, redis_server=redis_server) 18 | return cache 19 | 20 | def contains(self, key, activity_id): 21 | cache = self.get_cache(key) 22 | contains = cache.contains(activity_id) 23 | return contains 24 | 25 | def get_slice_from_storage(self, key, start, stop, filter_kwargs=None, ordering_args=None): 26 | ''' 27 | Returns a slice from the storage 28 | :param key: the redis key at which the sorted set is located 29 | :param start: the start 30 | :param stop: the stop 31 | :param filter_kwargs: a dict of filter kwargs 32 | :param ordering_args: a list of fields used for sorting 33 | 34 | **Example**:: 35 | get_slice_from_storage('feed:13', 0, 10, {activity_id__lte=10}) 36 | ''' 37 | cache = self.get_cache(key) 38 | 39 | # parse the filter kwargs and translate them to min max 40 | # as used by the get results function 41 | valid_kwargs = [ 42 | 'activity_id__gte', 'activity_id__lte', 43 | 'activity_id__gt', 'activity_id__lt', 44 | ] 45 | filter_kwargs = filter_kwargs or {} 46 | result_kwargs = {} 47 | for k in valid_kwargs: 48 | v = filter_kwargs.pop(k, None) 49 | if v is not None: 50 | if not isinstance(v, (float, six.integer_types)): 51 | raise ValueError( 52 | 'Filter kwarg values should be floats, int or long, got %s=%s' % (k, v)) 53 | 54 | # By default, the interval specified by min_score and max_score is closed (inclusive). 55 | # It is possible to specify an open interval (exclusive) by prefixing the score with the character ( 56 | _, direction = k.split('__') 57 | equal = 'te' in direction 58 | 59 | if 'gt' in direction: 60 | if not equal: 61 | v = '(' + str(v) 62 | result_kwargs['min_score'] = v 63 | else: 64 | if not equal: 65 | v = '(' + str(v) 66 | result_kwargs['max_score'] = v 67 | # complain if we didn't recognize the filter kwargs 68 | if filter_kwargs: 69 | raise ValueError('Unrecognized filter kwargs %s' % filter_kwargs) 70 | 71 | if ordering_args: 72 | if len(ordering_args) > 1: 73 | raise ValueError('Too many order kwargs %s' % ordering_args) 74 | 75 | if '-activity_id' in ordering_args: 76 | # descending sort 77 | cache.sort_asc = False 78 | elif 'activity_id' in ordering_args: 79 | cache.sort_asc = True 80 | else: 81 | raise ValueError('Unrecognized order kwargs %s' % ordering_args) 82 | 83 | # get the actual results 84 | key_score_pairs = cache.get_results(start, stop, **result_kwargs) 85 | score_key_pairs = [(score, data) for data, score in key_score_pairs] 86 | 87 | return score_key_pairs 88 | 89 | def get_batch_interface(self): 90 | return get_redis_connection( 91 | server_name=self.options.get('redis_server', 'default') 92 | ).pipeline(transaction=False) 93 | 94 | def get_index_of(self, key, activity_id): 95 | cache = self.get_cache(key) 96 | index = cache.index_of(activity_id) 97 | return index 98 | 99 | def add_to_storage(self, key, activities, batch_interface=None, *args, **kwargs): 100 | cache = self.get_cache(key) 101 | # turn it into key value pairs 102 | scores = map(long_t, activities.keys()) 103 | score_value_pairs = list(zip(scores, activities.values())) 104 | result = cache.add_many(score_value_pairs) 105 | for r in result: 106 | # errors in strings? 107 | # anyhow raise them here :) 108 | if hasattr(r, 'isdigit') and not r.isdigit(): 109 | raise ValueError('got error %s in results %s' % (r, result)) 110 | return result 111 | 112 | def remove_from_storage(self, key, activities, batch_interface=None): 113 | cache = self.get_cache(key) 114 | results = cache.remove_many(activities.values()) 115 | return results 116 | 117 | def count(self, key): 118 | cache = self.get_cache(key) 119 | return int(cache.count()) 120 | 121 | def delete(self, key): 122 | cache = self.get_cache(key) 123 | cache.delete() 124 | 125 | def trim(self, key, length, batch_interface=None): 126 | cache = self.get_cache(key) 127 | cache.trim(length) 128 | -------------------------------------------------------------------------------- /stream_framework/tasks.py: -------------------------------------------------------------------------------- 1 | from celery import shared_task 2 | from stream_framework.activity import Activity, AggregatedActivity 3 | 4 | 5 | @shared_task 6 | def fanout_operation(feed_manager, feed_class, user_ids, operation, operation_kwargs): 7 | ''' 8 | Simple task wrapper for _fanout task 9 | Just making sure code is where you expect it :) 10 | ''' 11 | feed_manager.fanout(user_ids, feed_class, operation, operation_kwargs) 12 | return "%d user_ids, %r, %r (%r)" % (len(user_ids), feed_class, operation, operation_kwargs) 13 | 14 | 15 | @shared_task 16 | def fanout_operation_hi_priority(feed_manager, feed_class, user_ids, operation, operation_kwargs): 17 | return fanout_operation(feed_manager, feed_class, user_ids, operation, operation_kwargs) 18 | 19 | 20 | @shared_task 21 | def fanout_operation_low_priority(feed_manager, feed_class, user_ids, operation, operation_kwargs): 22 | return fanout_operation(feed_manager, feed_class, user_ids, operation, operation_kwargs) 23 | 24 | 25 | @shared_task 26 | def follow_many(feed_manager, user_id, target_ids, follow_limit): 27 | feeds = feed_manager.get_feeds(user_id).values() 28 | target_feeds = map(feed_manager.get_user_feed, target_ids) 29 | 30 | activities = [] 31 | for target_feed in target_feeds: 32 | activities += target_feed[:follow_limit] 33 | 34 | if activities: 35 | for feed in feeds: 36 | with feed.get_timeline_batch_interface() as batch_interface: 37 | feed.add_many(activities, batch_interface=batch_interface) 38 | 39 | 40 | @shared_task 41 | def unfollow_many(feed_manager, user_id, source_ids): 42 | for feed in feed_manager.get_feeds(user_id).values(): 43 | activities = [] 44 | feed.trim() 45 | for item in feed[:feed.max_length]: 46 | if isinstance(item, Activity): 47 | if item.actor_id in source_ids: 48 | activities.append(item) 49 | elif isinstance(item, AggregatedActivity): 50 | activities.extend( 51 | [activity for activity in item.activities if activity.actor_id in source_ids]) 52 | 53 | if activities: 54 | feed.remove_many(activities) 55 | -------------------------------------------------------------------------------- /stream_framework/tests/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | try: 3 | from django.conf import settings 4 | try: 5 | # ignore this if we already configured settings 6 | settings.configure() 7 | except RuntimeError as e: 8 | pass 9 | except: 10 | pass 11 | -------------------------------------------------------------------------------- /stream_framework/tests/activity.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | from stream_framework.activity import Activity 3 | from stream_framework.activity import AggregatedActivity 4 | from stream_framework.activity import DehydratedActivity 5 | from stream_framework.tests.utils import Pin 6 | from stream_framework.verbs.base import Love as LoveVerb 7 | from stream_framework.aggregators.base import RecentVerbAggregator 8 | from stream_framework.exceptions import ActivityNotFound 9 | from stream_framework.exceptions import DuplicateActivityException 10 | import time 11 | import unittest 12 | import six 13 | 14 | 15 | class TestActivity(unittest.TestCase): 16 | 17 | def test_serialization_length(self): 18 | activity_object = Pin(id=1) 19 | activity = Activity(1, LoveVerb, activity_object) 20 | assert len(str(activity.serialization_id)) == 26 21 | 22 | def test_serialization_type(self): 23 | activity_object = Pin(id=1) 24 | activity = Activity(1, LoveVerb, activity_object) 25 | assert isinstance(activity.serialization_id, (six.integer_types, float)) 26 | 27 | def test_serialization_overflow_check_object_id(self): 28 | activity_object = Pin(id=10 ** 10) 29 | activity = Activity(1, LoveVerb, activity_object) 30 | with self.assertRaises(TypeError): 31 | activity.serialization_id 32 | 33 | def test_serialization_overflow_check_role_id(self): 34 | activity_object = Pin(id=1) 35 | Verb = type('Overflow', (LoveVerb,), {'id': 9999}) 36 | activity = Activity(1, Verb, activity_object) 37 | with self.assertRaises(TypeError): 38 | activity.serialization_id 39 | 40 | def test_dehydrated_activity(self): 41 | activity_object = Pin(id=1) 42 | activity = Activity(1, LoveVerb, activity_object) 43 | dehydrated = activity.get_dehydrated() 44 | self.assertTrue(isinstance(dehydrated, DehydratedActivity)) 45 | self.assertEquals( 46 | dehydrated.serialization_id, activity.serialization_id) 47 | 48 | def test_compare_idempotent_init(self): 49 | t1 = datetime.datetime.utcnow() 50 | activity_object = Pin(id=1) 51 | activity1 = Activity(1, LoveVerb, activity_object, time=t1) 52 | time.sleep(0.1) 53 | activity2 = Activity(1, LoveVerb, activity_object, time=t1) 54 | self.assertEquals(activity1, activity2) 55 | 56 | def test_compare_apple_and_oranges(self): 57 | activity_object = Pin(id=1) 58 | activity = Activity(1, LoveVerb, activity_object) 59 | with self.assertRaises(ValueError): 60 | activity == activity_object 61 | 62 | 63 | class TestAggregatedActivity(unittest.TestCase): 64 | 65 | def test_contains(self): 66 | activity = Activity(1, LoveVerb, Pin(id=1)) 67 | aggregated = AggregatedActivity(1, [activity]) 68 | self.assertTrue(aggregated.contains(activity)) 69 | 70 | def test_duplicated_activities(self): 71 | activity = Activity(1, LoveVerb, Pin(id=1)) 72 | aggregated = AggregatedActivity(1, [activity]) 73 | with self.assertRaises(DuplicateActivityException): 74 | aggregated.append(activity) 75 | 76 | def test_compare_apple_and_oranges(self): 77 | activity = AggregatedActivity(1, [Activity(1, LoveVerb, Pin(id=1))]) 78 | with self.assertRaises(ValueError): 79 | activity == Pin(id=1) 80 | 81 | def test_contains_extraneous_object(self): 82 | activity = AggregatedActivity(1, [Activity(1, LoveVerb, Pin(id=1))]) 83 | with self.assertRaises(ValueError): 84 | activity.contains(Pin(id=1)) 85 | 86 | def test_aggregated_properties(self): 87 | activities = [] 88 | for x in range(1, 101): 89 | activity_object = Pin(id=x) 90 | activity = Activity(x, LoveVerb, activity_object) 91 | activities.append(activity) 92 | aggregator = RecentVerbAggregator() 93 | aggregated_activities = aggregator.aggregate(activities) 94 | aggregated = aggregated_activities[0] 95 | 96 | self.assertEqual(aggregated.verbs, [LoveVerb]) 97 | self.assertEqual(aggregated.verb, LoveVerb) 98 | self.assertEqual(aggregated.actor_count, 100) 99 | self.assertEqual(aggregated.minimized_activities, 85) 100 | self.assertEqual(aggregated.other_actor_count, 99) 101 | self.assertEqual(aggregated.activity_count, 100) 102 | self.assertEqual(aggregated.object_ids, list(range(86, 101))) 103 | # the other ones should be dropped 104 | self.assertEqual(aggregated.actor_ids, list(range(86, 101))) 105 | self.assertEqual(aggregated.is_seen(), False) 106 | self.assertEqual(aggregated.is_read(), False) 107 | 108 | def generate_activities(self): 109 | activities = [] 110 | for x in range(1, 20): 111 | activity = Activity(x, LoveVerb, Pin(id=x)) 112 | activities.append(activity) 113 | return activities 114 | 115 | def generate_aggregated_activities(self, activities): 116 | aggregator = RecentVerbAggregator() 117 | aggregated_activities = aggregator.aggregate(activities) 118 | return aggregated_activities 119 | 120 | def test_aggregated_compare(self): 121 | activities = self.generate_activities() 122 | aggregated_activities = self.generate_aggregated_activities(activities) 123 | aggregated_activities_two = self.generate_aggregated_activities(activities) 124 | 125 | new_activities = self.generate_activities() 126 | aggregated_activities_three = self.generate_aggregated_activities(new_activities) 127 | 128 | # this should be equal 129 | self.assertEqual(aggregated_activities, aggregated_activities_two) 130 | # this should not be equal 131 | self.assertNotEqual(aggregated_activities, aggregated_activities_three) 132 | 133 | def test_aggregated_remove(self): 134 | activities = [] 135 | for x in range(1, 101): 136 | activity_object = Pin(id=x) 137 | activity = Activity(x, LoveVerb, activity_object) 138 | activities.append(activity) 139 | aggregator = RecentVerbAggregator() 140 | aggregated_activities = aggregator.aggregate(activities) 141 | aggregated = aggregated_activities[0] 142 | for activity in activities: 143 | try: 144 | aggregated.remove(activity) 145 | except (ActivityNotFound, ValueError): 146 | pass 147 | self.assertEqual(len(aggregated.activities), 1) 148 | self.assertEqual(aggregated.activity_count, 72) 149 | -------------------------------------------------------------------------------- /stream_framework/tests/aggregators/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import datetime 3 | import unittest 4 | from stream_framework.aggregators.base import RecentVerbAggregator, NotificationAggregator 5 | from stream_framework.tests.utils import FakeActivity 6 | from stream_framework.verbs.base import Love as LoveVerb, Comment as CommentVerb 7 | 8 | 9 | def implementation(meth): 10 | def wrapped_test(self, *args, **kwargs): 11 | if self.aggregator_class is None: 12 | raise unittest.SkipTest('only test this on actual implementations') 13 | return meth(self, *args, **kwargs) 14 | return wrapped_test 15 | 16 | 17 | class BaseAggregatorTest(unittest.TestCase): 18 | 19 | aggregator_class = None 20 | # Defines a list of activities that will be merged into one group 21 | first_activities_group = [] 22 | # Another list of activities that will be merged into a second group 23 | second_activities_group = [] 24 | 25 | @property 26 | def today(self): 27 | return datetime.datetime.now().replace(minute=0) 28 | 29 | @property 30 | def yesterday(self): 31 | return self.today - datetime.timedelta(days=1) 32 | 33 | @implementation 34 | def test_aggregate(self): 35 | aggregator = self.aggregator_class() 36 | activities = self.first_activities_group + self.second_activities_group 37 | aggregated = aggregator.aggregate(activities) 38 | self.assertEqual(len(aggregated), 2) 39 | self.assertEqual(aggregated[0].activities, self.first_activities_group) 40 | self.assertEqual(aggregated[1].activities, self.second_activities_group) 41 | 42 | @implementation 43 | def test_empty_merge(self): 44 | aggregator = self.aggregator_class() 45 | activities = self.first_activities_group + self.second_activities_group 46 | new, changed, deleted = aggregator.merge([], activities) 47 | self.assertEqual(len(new), 2) 48 | self.assertEqual(new[0].activities, self.first_activities_group) 49 | self.assertEqual(new[1].activities, self.second_activities_group) 50 | self.assertEqual(len(changed), 0) 51 | self.assertEqual(len(deleted), 0) 52 | 53 | @implementation 54 | def test_merge(self): 55 | aggregator = self.aggregator_class() 56 | middle_index = len(self.first_activities_group) // 2 57 | first = aggregator.aggregate(self.first_activities_group[:middle_index]) 58 | new, changed, deleted = aggregator.merge(first, 59 | self.first_activities_group[middle_index:]) 60 | self.assertEqual(len(new), 0) 61 | self.assertEqual(len(deleted), 0) 62 | 63 | old, updated = changed[0] 64 | self.assertEqual(old.activities, self.first_activities_group[:middle_index]) 65 | self.assertEqual(updated.activities, self.first_activities_group) 66 | 67 | 68 | class BaseRecentVerbAggregatorTest(BaseAggregatorTest): 69 | 70 | id_seq = list(range(42, 999)) 71 | 72 | def create_activities(self, verb, creation_date, count): 73 | return [FakeActivity(actor = x, 74 | verb = verb, 75 | object = self.id_seq.pop(), 76 | target = x, 77 | time = creation_date + datetime.timedelta(seconds=x), 78 | extra_context = dict(x=x)) 79 | for x in range(0, count)] 80 | 81 | 82 | class RecentVerbAggregatorVerbTest(BaseRecentVerbAggregatorTest): 83 | ''' 84 | Tests that activities are aggregated by same verbs 85 | ''' 86 | 87 | aggregator_class = RecentVerbAggregator 88 | 89 | def setUp(self): 90 | self.first_activities_group = self.create_activities(LoveVerb, self.today, 10) 91 | self.second_activities_group = self.create_activities(CommentVerb, self.today, 5) 92 | 93 | 94 | class RecentVerbAggregatorDateTest(BaseRecentVerbAggregatorTest): 95 | ''' 96 | Tests that activities are aggregated by same date 97 | ''' 98 | 99 | aggregator_class = RecentVerbAggregator 100 | 101 | def setUp(self): 102 | self.first_activities_group = self.create_activities(LoveVerb, self.today, 10) 103 | self.second_activities_group = self.create_activities(LoveVerb, self.yesterday, 5) 104 | 105 | 106 | class BaseNotificationAggregatorTest(BaseAggregatorTest): 107 | 108 | first_item_id = 1000000 109 | second_item_id = 20000000 110 | 111 | def create_activities(self, verb, object_id, creation_date, count): 112 | return [FakeActivity(actor = x, 113 | verb = verb, 114 | object = object_id, 115 | target = x, 116 | time=creation_date + datetime.timedelta(seconds=x), 117 | extra_context = dict(x=x)) 118 | for x in range(0, count)] 119 | 120 | 121 | class NotificationAggregatorVerbTest(BaseNotificationAggregatorTest): 122 | ''' 123 | Tests that activities are aggregated by same verbs 124 | ''' 125 | 126 | aggregator_class = NotificationAggregator 127 | 128 | def setUp(self): 129 | self.first_activities_group = self.create_activities(LoveVerb, self.first_item_id, self.today, 10) 130 | self.second_activities_group = self.create_activities(CommentVerb, self.first_item_id, self.today, 5) 131 | 132 | 133 | class NotificationAggregatorObjectTest(BaseNotificationAggregatorTest): 134 | ''' 135 | Tests that activities are aggregated by same object 136 | ''' 137 | 138 | aggregator_class = NotificationAggregator 139 | 140 | def setUp(self): 141 | self.first_activities_group = self.create_activities(LoveVerb, self.first_item_id, self.today, 10) 142 | self.second_activities_group = self.create_activities(LoveVerb, self.second_item_id, self.today, 5) 143 | 144 | 145 | class NotificationAggregatorDateTest(BaseNotificationAggregatorTest): 146 | ''' 147 | Tests that activities are aggregated by same day 148 | ''' 149 | 150 | aggregator_class = NotificationAggregator 151 | 152 | def setUp(self): 153 | self.first_activities_group = self.create_activities(LoveVerb, self.first_item_id, self.today, 10) 154 | self.second_activities_group = self.create_activities(LoveVerb, self.first_item_id, self.yesterday, 5) 155 | -------------------------------------------------------------------------------- /stream_framework/tests/feeds/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/llSourcell/Stream-Framework/e7535857af03c81c760b9265568287816ecac13c/stream_framework/tests/feeds/__init__.py -------------------------------------------------------------------------------- /stream_framework/tests/feeds/aggregated_feed/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/llSourcell/Stream-Framework/e7535857af03c81c760b9265568287816ecac13c/stream_framework/tests/feeds/aggregated_feed/__init__.py -------------------------------------------------------------------------------- /stream_framework/tests/feeds/aggregated_feed/cassandra.py: -------------------------------------------------------------------------------- 1 | from stream_framework.activity import AggregatedActivity 2 | from stream_framework.feeds.aggregated_feed.cassandra import CassandraAggregatedFeed 3 | from stream_framework.tests.feeds.aggregated_feed.base import TestAggregatedFeed,\ 4 | implementation 5 | from stream_framework.tests.feeds.cassandra import CustomActivity 6 | import pytest 7 | 8 | 9 | class CustomAggregated(AggregatedActivity): 10 | pass 11 | 12 | 13 | class CassandraCustomAggregatedFeed(CassandraAggregatedFeed): 14 | activity_class = CustomActivity 15 | aggregated_activity_class = CustomAggregated 16 | 17 | 18 | @pytest.mark.usefixtures("cassandra_reset") 19 | class TestCassandraAggregatedFeed(TestAggregatedFeed): 20 | feed_cls = CassandraAggregatedFeed 21 | 22 | 23 | @pytest.mark.usefixtures("cassandra_reset") 24 | class TestCassandraCustomAggregatedFeed(TestAggregatedFeed): 25 | feed_cls = CassandraCustomAggregatedFeed 26 | activity_class = CustomActivity 27 | aggregated_activity_class = CustomAggregated 28 | 29 | @implementation 30 | def test_custom_activity(self): 31 | assert self.test_feed.count() == 0 32 | self.feed_cls.insert_activity( 33 | self.activity 34 | ) 35 | self.test_feed.add(self.activity) 36 | assert self.test_feed.count() == 1 37 | aggregated = self.test_feed[:10][0] 38 | assert type(aggregated) == self.aggregated_activity_class 39 | assert type(aggregated.activities[0]) == self.activity_class 40 | -------------------------------------------------------------------------------- /stream_framework/tests/feeds/aggregated_feed/notification_feed.py: -------------------------------------------------------------------------------- 1 | from stream_framework.tests.feeds.aggregated_feed.base import TestAggregatedFeed 2 | from stream_framework.feeds.aggregated_feed.notification_feed import RedisNotificationFeed 3 | 4 | 5 | class TestNotificationFeed(TestAggregatedFeed): 6 | feed_cls = RedisNotificationFeed 7 | 8 | def test_mark_all(self): 9 | # start by adding one 10 | self.test_feed.insert_activities(self.aggregated.activities) 11 | self.test_feed.add_many_aggregated([self.aggregated]) 12 | assert len(self.test_feed[:10]) == 1 13 | assert int(self.test_feed.count_unseen()) == 1 14 | # TODO: don't know why this is broken 15 | # assert int(self.test_feed.get_denormalized_count()) == 1 16 | self.test_feed.mark_all() 17 | assert int(self.test_feed.count_unseen()) == 0 18 | assert int(self.test_feed.get_denormalized_count()) == 0 19 | -------------------------------------------------------------------------------- /stream_framework/tests/feeds/aggregated_feed/redis.py: -------------------------------------------------------------------------------- 1 | from stream_framework.feeds.aggregated_feed.redis import RedisAggregatedFeed 2 | from stream_framework.tests.feeds.aggregated_feed.base import TestAggregatedFeed,\ 3 | implementation 4 | from stream_framework.activity import AggregatedActivity 5 | from stream_framework.tests.feeds.redis import CustomActivity 6 | 7 | 8 | class CustomAggregated(AggregatedActivity): 9 | pass 10 | 11 | 12 | class RedisCustomAggregatedFeed(RedisAggregatedFeed): 13 | activity_class = CustomActivity 14 | aggregated_activity_class = CustomAggregated 15 | 16 | 17 | class TestRedisAggregatedFeed(TestAggregatedFeed): 18 | feed_cls = RedisAggregatedFeed 19 | 20 | 21 | class TestRedisCustomAggregatedFeed(TestAggregatedFeed): 22 | feed_cls = RedisCustomAggregatedFeed 23 | activity_class = CustomActivity 24 | aggregated_activity_class = CustomAggregated 25 | 26 | @implementation 27 | def test_custom_activity(self): 28 | assert self.test_feed.count() == 0 29 | self.feed_cls.insert_activity( 30 | self.activity 31 | ) 32 | self.test_feed.add(self.activity) 33 | assert self.test_feed.count() == 1 34 | aggregated = self.test_feed[:10][0] 35 | assert type(aggregated) == self.aggregated_activity_class 36 | assert type(aggregated.activities[0]) == self.activity_class 37 | -------------------------------------------------------------------------------- /stream_framework/tests/feeds/cassandra.py: -------------------------------------------------------------------------------- 1 | from stream_framework.tests.feeds.base import TestBaseFeed, implementation 2 | import pytest 3 | from stream_framework.feeds.cassandra import CassandraFeed 4 | from stream_framework.utils import datetime_to_epoch 5 | from stream_framework.activity import Activity 6 | 7 | 8 | class CustomActivity(Activity): 9 | 10 | @property 11 | def serialization_id(self): 12 | ''' 13 | Shorter serialization id than used by default 14 | ''' 15 | if self.object_id >= 10 ** 10 or self.verb.id >= 10 ** 3: 16 | raise TypeError('Fatal: object_id / verb have too many digits !') 17 | if not self.time: 18 | raise TypeError('Cant serialize activities without a time') 19 | milliseconds = str(int(datetime_to_epoch(self.time) * 1000)) 20 | 21 | # shorter than the default version 22 | serialization_id_str = '%s%0.2d%0.2d' % ( 23 | milliseconds, self.object_id % 100, self.verb.id) 24 | serialization_id = int(serialization_id_str) 25 | 26 | return serialization_id 27 | 28 | 29 | class CassandraCustomFeed(CassandraFeed): 30 | activity_class = CustomActivity 31 | 32 | 33 | @pytest.mark.usefixtures("cassandra_reset") 34 | class TestCassandraBaseFeed(TestBaseFeed): 35 | feed_cls = CassandraFeed 36 | 37 | def test_add_insert_activity(self): 38 | pass 39 | 40 | def test_add_remove_activity(self): 41 | pass 42 | 43 | 44 | @pytest.mark.usefixtures("cassandra_reset") 45 | class TestCassandraCustomFeed(TestBaseFeed): 46 | feed_cls = CassandraCustomFeed 47 | activity_class = CustomActivity 48 | 49 | def test_add_insert_activity(self): 50 | pass 51 | 52 | def test_add_remove_activity(self): 53 | pass 54 | 55 | @implementation 56 | def test_custom_activity(self): 57 | assert self.test_feed.count() == 0 58 | self.feed_cls.insert_activity( 59 | self.activity 60 | ) 61 | self.test_feed.add(self.activity) 62 | assert self.test_feed.count() == 1 63 | assert self.activity == self.test_feed[:10][0] 64 | assert type(self.activity) == type(self.test_feed[0][0]) 65 | # make sure nothing is wrong with the activity storage 66 | -------------------------------------------------------------------------------- /stream_framework/tests/feeds/memory.py: -------------------------------------------------------------------------------- 1 | from stream_framework.tests.feeds.base import TestBaseFeed 2 | from stream_framework.feeds.memory import Feed 3 | 4 | 5 | class InMemoryBaseFeed(TestBaseFeed): 6 | feed_cls = Feed 7 | -------------------------------------------------------------------------------- /stream_framework/tests/feeds/notification_feed/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/llSourcell/Stream-Framework/e7535857af03c81c760b9265568287816ecac13c/stream_framework/tests/feeds/notification_feed/__init__.py -------------------------------------------------------------------------------- /stream_framework/tests/feeds/notification_feed/base.py: -------------------------------------------------------------------------------- 1 | from stream_framework.activity import NotificationActivity 2 | from stream_framework.verbs.base import Love as LoveVerb, Comment as CommentVerb, Follow as FollowVerb 3 | from stream_framework.feeds.notification_feed.base import BaseNotificationFeed 4 | from stream_framework.tests.feeds.aggregated_feed.base import TestAggregatedFeed 5 | from datetime import datetime, timedelta 6 | import unittest 7 | 8 | 9 | def implementation(meth): 10 | def wrapped_test(self, *args, **kwargs): 11 | if self.feed_cls == BaseNotificationFeed: 12 | raise unittest.SkipTest('only test this on actual implementations') 13 | return meth(self, *args, **kwargs) 14 | return wrapped_test 15 | 16 | 17 | class TestBaseNotificationFeed(TestAggregatedFeed): 18 | 19 | feed_cls = BaseNotificationFeed 20 | aggregated_activity_class = NotificationActivity 21 | 22 | def create_activities(self, verb, object_id, count): 23 | return [self.activity_class(actor = x, 24 | verb = verb, 25 | object = object_id, 26 | target = x, 27 | time=datetime.now() + timedelta(seconds=x, 28 | minutes=object_id), 29 | extra_context = dict(x=x)) 30 | for x in range(0, count)] 31 | 32 | def setUp(self): 33 | super(TestBaseNotificationFeed, self).setUp() 34 | 35 | self.loves = self.create_activities(LoveVerb, 1, 5) 36 | self.comments = self.create_activities(CommentVerb, 2, 5) 37 | self.followers = self.create_activities(FollowVerb, self.user_id, 5) 38 | 39 | aggregator = self.test_feed.get_aggregator() 40 | self.aggregated_love = aggregator.aggregate(self.loves)[0] 41 | self.aggregated_comment = aggregator.aggregate(self.comments)[0] 42 | self.aggregated_follower = aggregator.aggregate(self.followers)[0] 43 | 44 | self.follower_id = self.aggregated_follower.serialization_id 45 | self.comment_id = self.aggregated_comment.serialization_id 46 | self.love_id = self.aggregated_love.serialization_id 47 | 48 | def assert_activity_markers(self, aggregated_activity, seen=False, read=False): 49 | self.assertEqual(aggregated_activity.is_seen, seen) 50 | self.assertEqual(aggregated_activity.is_read, read) 51 | 52 | def assert_activities_markers(self, aggregated_activities, seen=False, read=False): 53 | for aggregated_activity in aggregated_activities: 54 | self.assert_activity_markers(aggregated_activity, seen, read) 55 | 56 | @implementation 57 | def test_add_activities(self): 58 | self.test_feed.add_many(self.loves[:-1]) 59 | self.test_feed.mark_all(seen=True, read=True) 60 | 61 | self.test_feed.add(self.loves[-1]) 62 | self.assert_activities_markers(self.test_feed[:]) 63 | self.assertEqual(self.test_feed[:], [self.aggregated_love]) 64 | 65 | self.test_feed.add_many(self.comments) 66 | self.assertEqual(self.test_feed[:], [self.aggregated_comment, self.aggregated_love]) 67 | self.assert_activities_markers(self.test_feed[:]) 68 | 69 | @implementation 70 | def test_add_many_aggregated_activities(self): 71 | self.test_feed.add_many_aggregated([self.aggregated_follower]) 72 | self.assertEqual(self.test_feed[:], [self.aggregated_follower]) 73 | self.assert_activities_markers(self.test_feed[:]) 74 | 75 | self.test_feed.add_many_aggregated([self.aggregated_comment, self.aggregated_love]) 76 | self.assertEqual(self.test_feed[:], [self.aggregated_follower, self.aggregated_comment, self.aggregated_love]) 77 | self.assert_activities_markers(self.test_feed[:]) 78 | 79 | @implementation 80 | def test_remove_activities(self): 81 | self.test_feed.add_many(self.loves) 82 | self.test_feed.remove_many(self.loves) 83 | 84 | self.assertEqual(self.test_feed[:], []) 85 | self.assertEqual(self.test_feed.count_unseen(), 0) 86 | self.assertEqual(self.test_feed.count_unread(), 0) 87 | 88 | @implementation 89 | def test_remove_many_aggregated_activities(self): 90 | self.test_feed.add_many(self.followers + self.comments + self.loves) 91 | 92 | self.test_feed.remove_many_aggregated([self.aggregated_follower]) 93 | self.assertEqual(self.test_feed[:], [self.aggregated_comment, self.aggregated_love]) 94 | self.assert_activities_markers(self.test_feed[:]) 95 | 96 | self.test_feed.remove_many_aggregated([self.aggregated_comment, self.aggregated_love]) 97 | self.assertEqual(self.test_feed[:], []) 98 | self.assertEqual(self.test_feed.count_unseen(), 0) 99 | self.assertEqual(self.test_feed.count_unread(), 0) 100 | 101 | @implementation 102 | def test_mark_aggregated_activity(self): 103 | self.test_feed.add_many(self.followers + self.comments + self.loves) 104 | self.assert_activities_markers(self.test_feed[0:1], seen=False, read=False) 105 | 106 | self.test_feed.mark_activity(self.follower_id) 107 | self.assert_activities_markers(self.test_feed[0:1], seen=True, read=False) 108 | self.assert_activities_markers(self.test_feed[1:]) 109 | 110 | self.test_feed.mark_activity(self.follower_id, read=True) 111 | self.assert_activities_markers(self.test_feed[0:1], seen=True, read=True) 112 | self.assert_activities_markers(self.test_feed[1:]) 113 | 114 | self.test_feed.mark_activity(self.comment_id, read=True) 115 | self.assert_activities_markers(self.test_feed[0:2], seen=True, read=True) 116 | self.assert_activities_markers(self.test_feed[2:]) 117 | 118 | @implementation 119 | def test_mark_aggregated_activities(self): 120 | self.test_feed.add_many(self.followers + self.comments + self.loves) 121 | self.assert_activities_markers(self.test_feed[:], seen=False, read=False) 122 | 123 | self.test_feed.mark_activities([self.follower_id, self.comment_id], read=False) 124 | self.assert_activities_markers(self.test_feed[0:2], seen=True, read=False) 125 | self.assert_activities_markers(self.test_feed[2:]) 126 | 127 | self.test_feed.mark_activities([self.follower_id, self.comment_id], read=True) 128 | self.assert_activities_markers(self.test_feed[0:2], seen=True, read=True) 129 | self.assert_activities_markers(self.test_feed[2:]) 130 | 131 | self.test_feed.mark_activities([self.follower_id, self.comment_id, self.love_id], read=True) 132 | self.assert_activities_markers(self.test_feed[:], seen=True, read=True) 133 | 134 | @implementation 135 | def test_mark_all_aggregated_activities_as_seen(self): 136 | self.test_feed.add_many(self.followers + self.comments + self.loves) 137 | self.assert_activities_markers(self.test_feed[:], seen=False, read=False) 138 | self.test_feed.mark_all() 139 | self.assert_activities_markers(self.test_feed[:], seen=True, read=False) 140 | 141 | @implementation 142 | def test_mark_all_aggreagted_activities_as_read(self): 143 | self.test_feed.add_many(self.followers + self.comments + self.loves) 144 | self.assert_activities_markers(self.test_feed[:], seen=False, read=False) 145 | self.test_feed.mark_all(read=True) 146 | self.assert_activities_markers(self.test_feed[:], seen=True, read=True) 147 | 148 | @implementation 149 | def test_delete_feed(self): 150 | self.test_feed.add_many(self.loves) 151 | self.assertEqual(self.test_feed.count_unseen(), 1) 152 | self.assertEqual(self.test_feed.count_unread(), 1) 153 | 154 | self.test_feed.delete() 155 | self.assertEqual(self.test_feed.count_unseen(), 0) 156 | self.assertEqual(self.test_feed.count_unread(), 0) 157 | -------------------------------------------------------------------------------- /stream_framework/tests/feeds/notification_feed/redis.py: -------------------------------------------------------------------------------- 1 | from stream_framework.feeds.notification_feed.redis import RedisNotificationFeed 2 | from stream_framework.tests.feeds.notification_feed.base import TestBaseNotificationFeed 3 | 4 | 5 | class TestRedisNotificationFeed(TestBaseNotificationFeed): 6 | feed_cls = RedisNotificationFeed 7 | -------------------------------------------------------------------------------- /stream_framework/tests/feeds/redis.py: -------------------------------------------------------------------------------- 1 | from stream_framework.tests.feeds.base import TestBaseFeed, implementation 2 | from stream_framework.feeds.redis import RedisFeed 3 | from stream_framework.activity import Activity 4 | from stream_framework.utils import datetime_to_epoch 5 | 6 | 7 | class CustomActivity(Activity): 8 | 9 | @property 10 | def serialization_id(self): 11 | ''' 12 | Shorter serialization id than used by default 13 | ''' 14 | if self.object_id >= 10 ** 10 or self.verb.id >= 10 ** 3: 15 | raise TypeError('Fatal: object_id / verb have too many digits !') 16 | if not self.time: 17 | raise TypeError('Cant serialize activities without a time') 18 | milliseconds = str(int(datetime_to_epoch(self.time) * 1000)) 19 | 20 | # shorter than the default version 21 | serialization_id_str = '%s%0.2d%0.2d' % ( 22 | milliseconds, self.object_id % 100, self.verb.id) 23 | serialization_id = int(serialization_id_str) 24 | 25 | return serialization_id 26 | 27 | 28 | class RedisCustom(RedisFeed): 29 | activity_class = CustomActivity 30 | 31 | 32 | class TestRedisFeed(TestBaseFeed): 33 | feed_cls = RedisFeed 34 | 35 | 36 | class TestCustomRedisFeed(TestBaseFeed): 37 | 38 | ''' 39 | Test if the option to customize the activity class works without troubles 40 | ''' 41 | feed_cls = RedisCustom 42 | activity_class = CustomActivity 43 | 44 | @implementation 45 | def test_custom_activity(self): 46 | assert self.test_feed.count() == 0 47 | self.feed_cls.insert_activity( 48 | self.activity 49 | ) 50 | self.test_feed.add(self.activity) 51 | assert self.test_feed.count() == 1 52 | assert self.activity == self.test_feed[:10][0] 53 | assert type(self.activity) == type(self.test_feed[0][0]) 54 | # make sure nothing is wrong with the activity storage 55 | -------------------------------------------------------------------------------- /stream_framework/tests/managers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/llSourcell/Stream-Framework/e7535857af03c81c760b9265568287816ecac13c/stream_framework/tests/managers/__init__.py -------------------------------------------------------------------------------- /stream_framework/tests/managers/base.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | from stream_framework.feed_managers.base import Manager 3 | from stream_framework.tests.utils import Pin 4 | from stream_framework.tests.utils import FakeActivity 5 | from stream_framework.verbs.base import Love as LoveVerb 6 | from mock import patch 7 | import unittest 8 | import copy 9 | from functools import partial 10 | 11 | 12 | def implementation(meth): 13 | def wrapped_test(self, *args, **kwargs): 14 | if self.__class__ == BaseManagerTest: 15 | raise unittest.SkipTest('only test this on actual implementations') 16 | return meth(self, *args, **kwargs) 17 | return wrapped_test 18 | 19 | 20 | class BaseManagerTest(unittest.TestCase): 21 | manager_class = Manager 22 | 23 | def setUp(self): 24 | self.manager = self.manager_class() 25 | self.actor_id = 42 26 | self.pin = Pin( 27 | id=1, created_at=datetime.datetime.now() - datetime.timedelta(hours=1)) 28 | self.activity = FakeActivity( 29 | self.actor_id, LoveVerb, self.pin, 1, datetime.datetime.now(), {}) 30 | 31 | if self.__class__ != BaseManagerTest: 32 | for user_id in list(range(1, 4)) + [17, 42, 44]: 33 | self.manager.get_user_feed(user_id).delete() 34 | for feed in self.manager.get_feeds(user_id).values(): 35 | feed.delete() 36 | 37 | @implementation 38 | def test_add_user_activity(self): 39 | assert self.manager.get_user_feed( 40 | self.actor_id).count() == 0, 'the test feed is not empty' 41 | 42 | with patch.object(self.manager, 'get_user_follower_ids', return_value={None: [1]}) as get_user_follower_ids: 43 | self.manager.add_user_activity(self.actor_id, self.activity) 44 | get_user_follower_ids.assert_called_with(user_id=self.actor_id) 45 | 46 | assert self.manager.get_user_feed(self.actor_id).count() == 1 47 | for feed in self.manager.get_feeds(1).values(): 48 | assert feed.count() == 1 49 | 50 | @implementation 51 | def test_batch_import(self): 52 | assert self.manager.get_user_feed( 53 | self.actor_id).count() == 0, 'the test feed is not empty' 54 | 55 | with patch.object(self.manager, 'get_user_follower_ids', return_value={None: [1]}) as get_user_follower_ids: 56 | activities = [self.activity] 57 | self.manager.batch_import(self.actor_id, activities, 10) 58 | get_user_follower_ids.assert_called_with(user_id=self.actor_id) 59 | 60 | assert self.manager.get_user_feed(self.actor_id).count() == 1 61 | for feed in self.manager.get_feeds(1).values(): 62 | assert feed.count() == 1 63 | 64 | @implementation 65 | def test_batch_import_errors(self): 66 | activities = [] 67 | # this should return without trouble 68 | self.manager.batch_import(self.actor_id, activities, 10) 69 | 70 | # batch import with activities from different users should give an 71 | # error 72 | activity = copy.deepcopy(self.activity) 73 | activity.actor_id = 10 74 | with patch.object(self.manager, 'get_user_follower_ids', return_value={None: [1]}): 75 | batch = partial( 76 | self.manager.batch_import, self.actor_id, [activity], 10) 77 | self.assertRaises(ValueError, batch) 78 | 79 | @implementation 80 | def test_add_remove_user_activity(self): 81 | user_id = 42 82 | assert self.manager.get_user_feed( 83 | user_id).count() == 0, 'the test feed is not empty' 84 | 85 | with patch.object(self.manager, 'get_user_follower_ids', return_value={None: [1]}) as get_user_follower_ids: 86 | self.manager.add_user_activity(user_id, self.activity) 87 | get_user_follower_ids.assert_called_with(user_id=user_id) 88 | assert self.manager.get_user_feed(user_id).count() == 1 89 | 90 | with patch.object(self.manager, 'get_user_follower_ids', return_value={None: [1]}) as get_user_follower_ids: 91 | self.manager.remove_user_activity(user_id, self.activity) 92 | get_user_follower_ids.assert_called_with(user_id=user_id) 93 | assert self.manager.get_user_feed(user_id).count() == 0 94 | 95 | @implementation 96 | def test_add_user_activity_fanout(self): 97 | user_id = 42 98 | followers = {None: [1, 2, 3]} 99 | assert self.manager.get_user_feed( 100 | user_id).count() == 0, 'the test feed is not empty' 101 | 102 | for follower in followers.values(): 103 | assert self.manager.get_user_feed(follower).count() == 0 104 | 105 | with patch.object(self.manager, 'get_user_follower_ids', return_value=followers) as get_user_follower_ids: 106 | self.manager.add_user_activity(user_id, self.activity) 107 | get_user_follower_ids.assert_called_with(user_id=user_id) 108 | 109 | assert self.manager.get_user_feed(user_id).count() == 1 110 | 111 | for follower in list(followers.values())[0]: 112 | assert self.manager.get_user_feed(follower).count() == 0 113 | for f in self.manager.get_feeds(follower).values(): 114 | assert f.count() == 1 115 | 116 | @implementation 117 | def test_follow_unfollow_user(self): 118 | target_user_id = 17 119 | target2_user_id = 44 120 | follower_user_id = 42 121 | 122 | control_pin = Pin( 123 | id=2, created_at=datetime.datetime.now() - datetime.timedelta(hours=1)) 124 | control_activity = FakeActivity( 125 | target_user_id, LoveVerb, control_pin, 2, datetime.datetime.now(), {}) 126 | 127 | with patch.object(self.manager, 'get_user_follower_ids', return_value={}) as get_user_follower_ids: 128 | self.manager.add_user_activity(target2_user_id, control_activity) 129 | self.manager.add_user_activity(target_user_id, self.activity) 130 | get_user_follower_ids.assert_called_with(user_id=target_user_id) 131 | 132 | # checks user feed is empty 133 | for f in self.manager.get_feeds(follower_user_id).values(): 134 | self.assertEqual(f.count(), 0) 135 | 136 | self.manager.follow_user(follower_user_id, target2_user_id) 137 | 138 | # make sure one activity was pushed 139 | for f in self.manager.get_feeds(follower_user_id).values(): 140 | self.assertEqual(f.count(), 1) 141 | 142 | self.manager.follow_user(follower_user_id, target_user_id) 143 | 144 | # make sure another one activity was pushed 145 | for f in self.manager.get_feeds(follower_user_id).values(): 146 | self.assertEqual(f.count(), 2) 147 | 148 | self.manager.unfollow_user( 149 | follower_user_id, target_user_id, async=False) 150 | 151 | # make sure only one activity was removed 152 | for f in self.manager.get_feeds(follower_user_id).values(): 153 | self.assertEqual(f.count(), 1) 154 | activity = f[:][0] 155 | assert activity.object_id == self.pin.id 156 | -------------------------------------------------------------------------------- /stream_framework/tests/managers/cassandra.py: -------------------------------------------------------------------------------- 1 | from stream_framework.feed_managers.base import Manager 2 | from stream_framework.feeds.base import UserBaseFeed 3 | from stream_framework.feeds.cassandra import CassandraFeed 4 | from stream_framework.tests.managers.base import BaseManagerTest 5 | import pytest 6 | 7 | 8 | class CassandraUserBaseFeed(UserBaseFeed, CassandraFeed): 9 | pass 10 | 11 | 12 | class CassandraManager(Manager): 13 | feed_classes = { 14 | 'feed': CassandraFeed 15 | } 16 | user_feed_class = CassandraUserBaseFeed 17 | 18 | 19 | @pytest.mark.usefixtures("cassandra_reset") 20 | class RedisManagerTest(BaseManagerTest): 21 | manager_class = CassandraManager 22 | -------------------------------------------------------------------------------- /stream_framework/tests/managers/redis.py: -------------------------------------------------------------------------------- 1 | from stream_framework.feed_managers.base import Manager 2 | from stream_framework.feeds.base import UserBaseFeed 3 | from stream_framework.feeds.redis import RedisFeed 4 | from stream_framework.tests.managers.base import BaseManagerTest 5 | import pytest 6 | 7 | 8 | class RedisUserBaseFeed(UserBaseFeed, RedisFeed): 9 | pass 10 | 11 | 12 | class RedisManager(Manager): 13 | feed_classes = { 14 | 'feed': RedisFeed 15 | } 16 | user_feed_class = RedisUserBaseFeed 17 | 18 | 19 | @pytest.mark.usefixtures("redis_reset") 20 | class RedisManagerTest(BaseManagerTest): 21 | manager_class = RedisManager 22 | -------------------------------------------------------------------------------- /stream_framework/tests/serializers.py: -------------------------------------------------------------------------------- 1 | from stream_framework.aggregators.base import RecentVerbAggregator 2 | from stream_framework.serializers.activity_serializer import ActivitySerializer 3 | from stream_framework.serializers.aggregated_activity_serializer import \ 4 | AggregatedActivitySerializer, NotificationSerializer 5 | from stream_framework.serializers.base import BaseSerializer 6 | from stream_framework.serializers.cassandra.activity_serializer import CassandraActivitySerializer 7 | from stream_framework.serializers.pickle_serializer import PickleSerializer, \ 8 | AggregatedActivityPickleSerializer 9 | from stream_framework.storage.cassandra import models 10 | from stream_framework.tests.utils import FakeActivity 11 | from functools import partial 12 | import datetime 13 | import unittest 14 | from stream_framework.activity import Activity, AggregatedActivity 15 | 16 | 17 | class ActivitySerializationTest(unittest.TestCase): 18 | serialization_class = BaseSerializer 19 | serialization_class_kwargs = { 20 | 'activity_class': Activity, 'aggregated_activity_class': AggregatedActivity} 21 | activity_extra_context = {'xxx': 'yyy'} 22 | 23 | def setUp(self): 24 | from stream_framework.verbs.base import Love as LoveVerb 25 | self.serializer = self.serialization_class( 26 | **self.serialization_class_kwargs) 27 | self.activity = FakeActivity( 28 | 1, LoveVerb, 1, 1, datetime.datetime.now(), {}) 29 | self.activity.extra_context = self.activity_extra_context 30 | aggregator = RecentVerbAggregator() 31 | self.aggregated_activity = aggregator.aggregate([self.activity])[0] 32 | self.args = () 33 | self.kwargs = {} 34 | 35 | def test_serialization(self): 36 | serialized_activity = self.serializer.dumps(self.activity) 37 | deserialized_activity = self.serializer.loads(serialized_activity) 38 | self.assertEqual(deserialized_activity, self.activity) 39 | self.assertEqual( 40 | deserialized_activity.extra_context, self.activity_extra_context) 41 | 42 | def test_type_exception(self): 43 | give_error = partial(self.serializer.dumps, 1) 44 | self.assertRaises(ValueError, give_error) 45 | give_error = partial(self.serializer.dumps, self.aggregated_activity) 46 | self.assertRaises(ValueError, give_error) 47 | 48 | 49 | class PickleSerializationTestCase(ActivitySerializationTest): 50 | serialization_class = PickleSerializer 51 | 52 | 53 | class ActivitySerializerTest(ActivitySerializationTest): 54 | serialization_class = ActivitySerializer 55 | 56 | 57 | class AggregatedActivitySerializationTest(ActivitySerializationTest): 58 | serialization_class = AggregatedActivitySerializer 59 | 60 | def test_serialization(self): 61 | serialized = self.serializer.dumps(self.aggregated_activity) 62 | deserialized = self.serializer.loads(serialized) 63 | self.assertEqual(deserialized, self.aggregated_activity) 64 | 65 | def test_type_exception(self): 66 | give_error = partial(self.serializer.dumps, 1) 67 | self.assertRaises(ValueError, give_error) 68 | give_error = partial(self.serializer.dumps, self.activity) 69 | self.assertRaises(ValueError, give_error) 70 | 71 | def test_hydration(self): 72 | serialized_activity = self.serializer.dumps(self.aggregated_activity) 73 | deserialized_activity = self.serializer.loads(serialized_activity) 74 | assert self.serialization_class.dehydrate == deserialized_activity.dehydrated 75 | if deserialized_activity.dehydrated: 76 | assert not deserialized_activity.activities 77 | assert deserialized_activity._activity_ids 78 | 79 | 80 | class PickleAggregatedActivityTest(AggregatedActivitySerializationTest): 81 | serialization_class = AggregatedActivityPickleSerializer 82 | 83 | 84 | class NotificationSerializerTest(AggregatedActivitySerializationTest): 85 | serialization_class = NotificationSerializer 86 | 87 | 88 | # class CassandraActivitySerializerTest(ActivitySerializationTest): 89 | # serialization_class = CassandraActivitySerializer 90 | # serialization_class_kwargs = { 91 | # 'model': models.Activity, 'activity_class': Activity, 'aggregated_activity_class': AggregatedActivity} 92 | -------------------------------------------------------------------------------- /stream_framework/tests/settings.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | STREAM_DEFAULT_KEYSPACE = 'test_stream_framework' 4 | 5 | if os.environ.get('TEST_CASSANDRA_HOST'): 6 | STREAM_CASSANDRA_HOSTS = [os.environ['TEST_CASSANDRA_HOST']] 7 | 8 | SECRET_KEY = 'ob_^kc#v536)v$x!h3*#xs6&l8&7#4cqi^rjhczu85l9txbz+W' 9 | STREAM_CASSANDRA_CONSITENCY_LEVEL = 'ONE' 10 | 11 | 12 | STREAM_REDIS_CONFIG = { 13 | 'default': { 14 | 'host': '127.0.0.1', 15 | 'port': 6379, 16 | 'db': 0, 17 | 'password': None 18 | }, 19 | } 20 | -------------------------------------------------------------------------------- /stream_framework/tests/storage/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/llSourcell/Stream-Framework/e7535857af03c81c760b9265568287816ecac13c/stream_framework/tests/storage/__init__.py -------------------------------------------------------------------------------- /stream_framework/tests/storage/base_lists_storage.py: -------------------------------------------------------------------------------- 1 | from stream_framework.storage.base_lists_storage import BaseListsStorage 2 | 3 | import unittest 4 | 5 | 6 | def implementation(meth): 7 | def wrapped_test(self, *args, **kwargs): 8 | if self.lists_storage_class == BaseListsStorage: 9 | raise unittest.SkipTest('only test this on actual implementations') 10 | return meth(self, *args, **kwargs) 11 | return wrapped_test 12 | 13 | 14 | class TestBaseListsStorage(unittest.TestCase): 15 | 16 | lists_storage_class = BaseListsStorage 17 | key = 'test' 18 | max_length = 100 19 | 20 | def setUp(self): 21 | self.lists_storage = self.lists_storage_class(key=self.key, 22 | max_length = self.max_length) 23 | 24 | def tearDown(self): 25 | if self.lists_storage_class != BaseListsStorage: 26 | self.lists_storage.flush('whenever', 'everyday', 'whatever') 27 | 28 | @implementation 29 | def test_add_empty_values(self): 30 | self.lists_storage.add(whenever=[]) 31 | count = self.lists_storage.count('whenever') 32 | self.assertEqual(count, 0) 33 | 34 | self.lists_storage.add(whenever=[1, 2], everyday=[]) 35 | count = self.lists_storage.count('whenever') 36 | self.assertEqual(count, 2) 37 | count = self.lists_storage.count('everyday') 38 | self.assertEqual(count, 0) 39 | 40 | @implementation 41 | def test_add_more_than_allowed(self): 42 | items = list(range(0, self.max_length + 1)) 43 | self.lists_storage.add(whatever=items) 44 | stored_items = self.lists_storage.get('whatever') 45 | self.assertEqual(items[1:], stored_items) 46 | 47 | @implementation 48 | def test_add(self): 49 | self.lists_storage.add(whenever=[1]) 50 | count = self.lists_storage.count('whenever') 51 | self.assertEqual(count, 1) 52 | 53 | self.lists_storage.add(everyday=[1, 2]) 54 | count = self.lists_storage.count('everyday') 55 | self.assertEqual(count, 2) 56 | 57 | self.lists_storage.add(whenever=[3], everyday=[3, 4]) 58 | count = self.lists_storage.count('whenever') 59 | self.assertEqual(count, 2) 60 | count = self.lists_storage.count('everyday') 61 | self.assertEqual(count, 4) 62 | 63 | @implementation 64 | def test_count_not_exisit_list(self): 65 | count = self.lists_storage.count('whatever') 66 | self.assertEqual(count, 0) 67 | 68 | @implementation 69 | def test_get_from_not_exisiting_list(self): 70 | items = self.lists_storage.get('whenever') 71 | self.assertEqual([], items) 72 | 73 | self.lists_storage.add(everyday=[1,2]) 74 | whenever_items, _ = self.lists_storage.get('whenever', 'everyday') 75 | self.assertEqual([], whenever_items) 76 | 77 | @implementation 78 | def test_get(self): 79 | whenever_items = list(range(0, 20)) 80 | everyday_items = list(range(10, 0, -1)) 81 | self.lists_storage.add(whenever=whenever_items, everyday=everyday_items) 82 | 83 | stored_items = self.lists_storage.get('whenever') 84 | self.assertEqual(stored_items, whenever_items) 85 | 86 | stored_items = self.lists_storage.get('everyday') 87 | self.assertEqual(stored_items, everyday_items) 88 | 89 | stored_whenever_items, stored_everyday_items = self.lists_storage.get('whenever', 'everyday') 90 | self.assertEqual(stored_whenever_items, whenever_items) 91 | self.assertEqual(stored_everyday_items, everyday_items) 92 | 93 | @implementation 94 | def test_remove_not_existing_items(self): 95 | items = [1,2,3] 96 | self.lists_storage.add(whenever=items) 97 | 98 | self.lists_storage.remove(whenever=[0]) 99 | stored_items = self.lists_storage.get('whenever') 100 | self.assertEqual(stored_items, items) 101 | 102 | self.lists_storage.remove(whenever=[0,2]) 103 | stored_items = self.lists_storage.get('whenever') 104 | self.assertEqual(stored_items, [1,3]) 105 | 106 | @implementation 107 | def test_remove_from_not_exisiting_list(self): 108 | self.lists_storage.remove(whenever=[1,2]) 109 | 110 | self.lists_storage.add(everyday=[1,2]) 111 | self.lists_storage.remove(whenever=[1,2]) 112 | count = self.lists_storage.count('everyday') 113 | self.assertEqual(count, 2) 114 | 115 | @implementation 116 | def test_remove(self): 117 | whenever_items = list(range(0, 20)) 118 | everyday_items = list(range(10, 0, -1)) 119 | self.lists_storage.add(whenever=whenever_items, everyday=everyday_items) 120 | 121 | self.lists_storage.remove(whenever=[15]) 122 | whenever_items.remove(15) 123 | stored_items = self.lists_storage.get('whenever') 124 | self.assertEqual(stored_items, whenever_items) 125 | 126 | self.lists_storage.remove(everyday=[1, 5]) 127 | everyday_items.remove(1) 128 | everyday_items.remove(5) 129 | stored_items = self.lists_storage.get('everyday') 130 | self.assertEqual(stored_items, everyday_items) 131 | 132 | self.lists_storage.remove(whenever=[5, 19], everyday=[2]) 133 | whenever_items.remove(5) 134 | whenever_items.remove(19) 135 | everyday_items.remove(2) 136 | stored_whenever_items, stored_everyday_items = self.lists_storage.get('whenever', 'everyday') 137 | self.assertEqual(stored_whenever_items, whenever_items) 138 | self.assertEqual(stored_everyday_items, everyday_items) 139 | 140 | @implementation 141 | def test_flush_non_existing_list(self): 142 | self.lists_storage.flush('whenever') 143 | 144 | self.lists_storage.add(everyday=[1,2]) 145 | self.lists_storage.flush('whenever') 146 | count = self.lists_storage.count('everyday') 147 | self.assertEqual(count, 2) 148 | 149 | @implementation 150 | def test_flush_already_flushed_list(self): 151 | self.lists_storage.add(everyday=[1,2]) 152 | self.lists_storage.flush('everyday') 153 | 154 | self.lists_storage.flush('everyday') 155 | count = self.lists_storage.count('everyday') 156 | self.assertEqual(count, 0) 157 | 158 | @implementation 159 | def test_flush(self): 160 | whenever_items = list(range(0, 20)) 161 | everyday_items = list(range(10, 0, -1)) 162 | self.lists_storage.add(whenever=whenever_items, everyday=everyday_items) 163 | 164 | self.lists_storage.flush('whenever') 165 | count = self.lists_storage.count('whenever') 166 | self.assertEqual(count, 0) 167 | 168 | self.lists_storage.flush('everyday') 169 | count = self.lists_storage.count('everyday') 170 | self.assertEqual(count, 0) 171 | 172 | self.lists_storage.add(whenever=whenever_items, everyday=everyday_items) 173 | self.lists_storage.flush('whenever', 'everyday') 174 | whenever_count, everyday_count = self.lists_storage.count('whenever', 'everyday') 175 | self.assertEqual(whenever_count, 0) 176 | self.assertEqual(everyday_count, 0) 177 | 178 | @implementation 179 | def test_keep_max_length(self): 180 | items = list(range(0, self.max_length)) 181 | self.lists_storage.add(whenever=items) 182 | self.lists_storage.add(whenever=[self.max_length]) 183 | 184 | count = self.lists_storage.count('whenever') 185 | self.assertEqual(count, self.max_length) 186 | 187 | items.remove(0) 188 | items.append(self.max_length) 189 | 190 | stored_items = self.lists_storage.get('whenever') 191 | self.assertEqual(items, stored_items) 192 | -------------------------------------------------------------------------------- /stream_framework/tests/storage/cassandra.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from stream_framework import settings 3 | from stream_framework.storage.cassandra.timeline_storage import CassandraTimelineStorage 4 | from stream_framework.tests.storage.base import TestBaseTimelineStorageClass 5 | from stream_framework.activity import Activity 6 | from stream_framework.storage.cassandra import models 7 | 8 | 9 | @pytest.mark.usefixtures("cassandra_reset") 10 | class TestCassandraTimelineStorage(TestBaseTimelineStorageClass): 11 | storage_cls = CassandraTimelineStorage 12 | storage_options = { 13 | 'hosts': settings.STREAM_CASSANDRA_HOSTS, 14 | 'column_family_name': 'example', 15 | 'activity_class': Activity 16 | } 17 | 18 | def test_custom_timeline_model(self): 19 | CustomModel = type('custom', (models.Activity,), {}) 20 | custom_storage_options = self.storage_options.copy() 21 | custom_storage_options['modelClass'] = CustomModel 22 | storage = self.storage_cls(**custom_storage_options) 23 | self.assertTrue(issubclass(storage.model, (CustomModel, ))) 24 | -------------------------------------------------------------------------------- /stream_framework/tests/storage/memory.py: -------------------------------------------------------------------------------- 1 | from stream_framework.storage.memory import InMemoryTimelineStorage 2 | from stream_framework.storage.memory import InMemoryActivityStorage 3 | from stream_framework.tests.storage.base import TestBaseActivityStorageStorage 4 | from stream_framework.tests.storage.base import TestBaseTimelineStorageClass 5 | 6 | 7 | class InMemoryActivityStorage(TestBaseActivityStorageStorage): 8 | storage_cls = InMemoryActivityStorage 9 | 10 | 11 | class TestInMemoryTimelineStorageClass(TestBaseTimelineStorageClass): 12 | storage_cls = InMemoryTimelineStorage 13 | -------------------------------------------------------------------------------- /stream_framework/tests/storage/redis/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/llSourcell/Stream-Framework/e7535857af03c81c760b9265568287816ecac13c/stream_framework/tests/storage/redis/__init__.py -------------------------------------------------------------------------------- /stream_framework/tests/storage/redis/activity_storage.py: -------------------------------------------------------------------------------- 1 | from stream_framework.tests.storage.base import TestBaseActivityStorageStorage 2 | from stream_framework.storage.redis.activity_storage import RedisActivityStorage 3 | 4 | 5 | class RedisActivityStorageTest(TestBaseActivityStorageStorage): 6 | storage_cls = RedisActivityStorage 7 | -------------------------------------------------------------------------------- /stream_framework/tests/storage/redis/lists_storage.py: -------------------------------------------------------------------------------- 1 | from stream_framework.tests.storage.base_lists_storage import TestBaseListsStorage 2 | from stream_framework.storage.redis.lists_storage import RedisListsStorage 3 | from stream_framework.utils.five import long_t 4 | 5 | 6 | class TestStorage(RedisListsStorage): 7 | data_type = long_t 8 | 9 | 10 | class TestRedisListsStorage(TestBaseListsStorage): 11 | 12 | lists_storage_class = TestStorage 13 | -------------------------------------------------------------------------------- /stream_framework/tests/storage/redis/timeline_storage.py: -------------------------------------------------------------------------------- 1 | from stream_framework.tests.storage.base import TestBaseTimelineStorageClass 2 | from stream_framework.storage.redis.timeline_storage import RedisTimelineStorage 3 | 4 | 5 | class TestRedisTimelineStorageClass(TestBaseTimelineStorageClass): 6 | storage_cls = RedisTimelineStorage 7 | -------------------------------------------------------------------------------- /stream_framework/tests/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from stream_framework.activity import Activity, AggregatedActivity 2 | 3 | 4 | class FakeActivity(Activity): 5 | pass 6 | 7 | 8 | class FakeAggregatedActivity(AggregatedActivity): 9 | pass 10 | 11 | 12 | class Pin(object): 13 | 14 | def __init__(self, **kwargs): 15 | self.__dict__.update(kwargs) 16 | -------------------------------------------------------------------------------- /stream_framework/tests/utils_test.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | 3 | import unittest 4 | from datetime import datetime 5 | import mock 6 | 7 | from stream_framework.utils import chunks, warn_on_duplicate, make_list_unique, \ 8 | warn_on_error, datetime_to_epoch, epoch_to_datetime 9 | from stream_framework.exceptions import DuplicateActivityException 10 | 11 | 12 | class ChunksTest(unittest.TestCase): 13 | 14 | def test_chunks(self): 15 | chunked = chunks(range(6), 2) 16 | chunked = list(chunked) 17 | self.assertEqual(chunked, [(0, 1), (2, 3), (4, 5)]) 18 | 19 | def test_one_chunk(self): 20 | chunked = chunks(range(2), 5) 21 | chunked = list(chunked) 22 | self.assertEqual(chunked, [(0, 1)]) 23 | 24 | 25 | def safe_function(): 26 | return 10 27 | 28 | 29 | def evil_duplicate(): 30 | raise DuplicateActivityException('test') 31 | 32 | 33 | def evil_value(): 34 | raise ValueError('test') 35 | 36 | 37 | class WarnTest(unittest.TestCase): 38 | 39 | def test_warn(self): 40 | # this should raise an error 41 | self.assertRaises(ValueError, evil_value) 42 | with mock.patch('stream_framework.utils.logger.warn') as warn: 43 | # this shouldnt raise an error 44 | wrapped = warn_on_error(evil_value, (ValueError,)) 45 | wrapped() 46 | # but stick something in the log 47 | assert warn.called 48 | 49 | def test_warn_on_duplicate(self): 50 | # this should raise an error 51 | self.assertRaises(DuplicateActivityException, evil_duplicate) 52 | # this shouldnt raise an error 53 | with mock.patch('stream_framework.utils.logger.warn') as warn: 54 | wrapped = warn_on_duplicate(evil_duplicate) 55 | wrapped() 56 | # but stick something in the log 57 | assert warn.called 58 | 59 | 60 | class UniqueListTest(unittest.TestCase): 61 | 62 | def test_make_list_unique(self): 63 | with_doubles = list(range(10)) + list(range(5, 15)) 64 | result = make_list_unique(with_doubles) 65 | assert result == list(range(15)) 66 | 67 | def test_make_list_unique_marker(self): 68 | with_doubles = list(range(10)) + list(range(5, 15)) 69 | marker = lambda x: x // 5 70 | result = make_list_unique(with_doubles, marker) 71 | assert result == [0, 5, 10] 72 | 73 | 74 | class DatetimeConversionTest(unittest.TestCase): 75 | 76 | def test_conversion(self): 77 | source_date = datetime.now() 78 | epoch = datetime_to_epoch(source_date) 79 | converted_date = epoch_to_datetime(epoch) 80 | 81 | assert source_date == converted_date 82 | 83 | -------------------------------------------------------------------------------- /stream_framework/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from stream_framework.exceptions import DuplicateActivityException 2 | import collections 3 | from datetime import datetime, timedelta 4 | import functools 5 | import itertools 6 | import logging 7 | import six 8 | 9 | 10 | logger = logging.getLogger(__name__) 11 | 12 | MISSING = object() 13 | 14 | 15 | class LRUCache: 16 | 17 | def __init__(self, capacity): 18 | self.capacity = capacity 19 | self.cache = collections.OrderedDict() 20 | 21 | def get(self, key): 22 | try: 23 | value = self.cache.pop(key) 24 | self.cache[key] = value 25 | return value 26 | except KeyError: 27 | return MISSING 28 | 29 | def set(self, key, value): 30 | try: 31 | self.cache.pop(key) 32 | except KeyError: 33 | if len(self.cache) >= self.capacity: 34 | self.cache.popitem(last=False) 35 | self.cache[key] = value 36 | 37 | 38 | def chunks(iterable, n=10000): 39 | it = iter(iterable) 40 | while True: 41 | chunk = tuple(itertools.islice(it, n)) 42 | if not chunk: 43 | return 44 | yield chunk 45 | 46 | 47 | epoch = datetime(1970, 1, 1) 48 | 49 | 50 | def datetime_to_epoch(dt): 51 | ''' 52 | Convert datetime object to epoch with millisecond accuracy 53 | ''' 54 | delta = dt - epoch 55 | since_epoch = delta.total_seconds() 56 | return since_epoch 57 | 58 | 59 | def epoch_to_datetime(time_): 60 | return epoch + timedelta(seconds=time_) 61 | 62 | 63 | def make_list_unique(sequence, marker_function=None): 64 | ''' 65 | Makes items in a list unique 66 | Performance based on this blog post: 67 | http://www.peterbe.com/plog/uniqifiers-benchmark 68 | ''' 69 | seen = {} 70 | result = [] 71 | for item in sequence: 72 | # gets the marker 73 | marker = item 74 | if marker_function is not None: 75 | marker = marker_function(item) 76 | # if no longer unique make unique 77 | if marker in seen: 78 | continue 79 | seen[marker] = True 80 | result.append(item) 81 | return result 82 | 83 | 84 | def warn_on_error(f, exceptions): 85 | import sys 86 | assert exceptions 87 | assert isinstance(exceptions, tuple) 88 | 89 | @functools.wraps(f) 90 | def wrapper(*args, **kwargs): 91 | try: 92 | return f(*args, **kwargs) 93 | except exceptions as e: 94 | logger.warn(six.text_type(e), exc_info=sys.exc_info(), extra={ 95 | 'data': { 96 | 'body': six.text_type(e), 97 | } 98 | }) 99 | return wrapper 100 | 101 | 102 | def warn_on_duplicate(f): 103 | exceptions = (DuplicateActivityException,) 104 | return warn_on_error(f, exceptions) 105 | 106 | 107 | class memoized(object): 108 | 109 | '''Decorator. Caches a function's return value each time it is called. 110 | If called later with the same arguments, the cached value is returned 111 | (not reevaluated). 112 | ''' 113 | 114 | def __init__(self, func): 115 | self.func = func 116 | self.cache = LRUCache(10000) 117 | 118 | def __call__(self, *args): 119 | if not isinstance(args, collections.Hashable): 120 | # uncacheable. a list, for instance. 121 | # better to not cache than blow up. 122 | return self.func(*args) 123 | if self.cache.get(args) is not MISSING: 124 | return self.cache.get(args) 125 | else: 126 | value = self.func(*args) 127 | self.cache.set(args, value) 128 | return value 129 | 130 | def __repr__(self): 131 | '''Return the function's docstring.''' 132 | return self.func.__doc__ 133 | 134 | def __get__(self, obj, objtype): 135 | '''Support instance methods.''' 136 | return functools.partial(self.__call__, obj) 137 | 138 | 139 | 140 | def get_metrics_instance(): 141 | """ 142 | Returns an instance of the metric class as defined 143 | in stream_framework settings. 144 | 145 | """ 146 | from stream_framework import settings 147 | metric_cls = get_class_from_string(settings.STREAM_METRIC_CLASS) 148 | return metric_cls(**settings.STREAM_METRICS_OPTIONS) 149 | 150 | 151 | def get_class_from_string(path, default=None): 152 | """ 153 | Return the class specified by the string. 154 | 155 | """ 156 | try: 157 | from importlib import import_module 158 | except ImportError: 159 | from django.utils.importlib import import_module 160 | i = path.rfind('.') 161 | module, attr = path[:i], path[i + 1:] 162 | mod = import_module(module) 163 | try: 164 | return getattr(mod, attr) 165 | except AttributeError: 166 | if default: 167 | return default 168 | else: 169 | raise ImportError( 170 | 'Cannot import name {} (from {})'.format(attr, mod)) 171 | -------------------------------------------------------------------------------- /stream_framework/utils/five.py: -------------------------------------------------------------------------------- 1 | """ 2 | Python 3 compatibility implementations 3 | """ 4 | import sys 5 | 6 | if sys.version > '3': 7 | long_t = int 8 | else: 9 | long_t = long -------------------------------------------------------------------------------- /stream_framework/utils/timing.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | 4 | class timer(object): 5 | 6 | def __init__(self): 7 | self.times = [time.time()] 8 | self.total = 0. 9 | self.next() 10 | 11 | def next(self): 12 | times = self.times 13 | times.append(time.time()) 14 | delta = times[-1] - times[-2] 15 | self.total += delta 16 | return delta 17 | -------------------------------------------------------------------------------- /stream_framework/utils/validate.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | def validate_type_strict(object_, object_types): 4 | ''' 5 | Validates that object_ is of type object__type 6 | :param object_: the object to check 7 | :param object_types: the desired type of the object (or tuple of types) 8 | ''' 9 | if not isinstance(object_types, tuple): 10 | object_types = (object_types,) 11 | exact_type_match = any([type(object_) == t for t in object_types]) 12 | if not exact_type_match: 13 | error_format = 'Please pass object_ of type %s as the argument, encountered type %s' 14 | message = error_format % (object_types, type(object_)) 15 | raise ValueError(message) 16 | 17 | 18 | def validate_list_of_strict(object_list, object_types): 19 | ''' 20 | Verifies that the items in object_list are of 21 | type object__type 22 | 23 | :param object_list: the list of objects to check 24 | :param object_types: the type of the object (or tuple with types) 25 | 26 | In general this goes against Python's duck typing ideology 27 | See this discussion for instance 28 | http://stackoverflow.com/questions/1549801/differences-between-isinstance-and-type-in-python 29 | 30 | We use it in cases where you can configure the type of class to use 31 | And where we should validate that you are infact supplying that class 32 | ''' 33 | for object_ in object_list: 34 | validate_type_strict(object_, object_types) 35 | -------------------------------------------------------------------------------- /stream_framework/verbs/__init__.py: -------------------------------------------------------------------------------- 1 | from stream_framework.utils import get_class_from_string 2 | 3 | 4 | VERB_DICT = dict() 5 | 6 | 7 | def get_verb_storage(): 8 | from stream_framework import settings 9 | if settings.STREAM_VERB_STORAGE == 'in-memory': 10 | return VERB_DICT 11 | else: 12 | return get_class_from_string(settings.STREAM_VERB_STORAGE)() 13 | 14 | 15 | def register(verb): 16 | ''' 17 | Registers the given verb class 18 | ''' 19 | from stream_framework.verbs.base import Verb 20 | if not issubclass(verb, Verb): 21 | raise ValueError('%s doesnt subclass Verb' % verb) 22 | registered_verb = get_verb_storage().get(verb.id, verb) 23 | if registered_verb != verb: 24 | raise ValueError( 25 | 'cant register verb %r with id %s (clashing with verb %r)' % 26 | (verb, verb.id, registered_verb)) 27 | get_verb_storage()[verb.id] = verb 28 | 29 | 30 | def get_verb_by_id(verb_id): 31 | if not isinstance(verb_id, int): 32 | raise ValueError('please provide a verb id, got %r' % verb_id) 33 | 34 | return get_verb_storage()[verb_id] 35 | -------------------------------------------------------------------------------- /stream_framework/verbs/base.py: -------------------------------------------------------------------------------- 1 | from stream_framework.verbs import register 2 | 3 | 4 | class Verb(object): 5 | 6 | ''' 7 | Every activity has a verb and an object. 8 | Nomenclatura is loosly based on 9 | http://activitystrea.ms/specs/atom/1.0/#activity.summary 10 | ''' 11 | id = 0 12 | 13 | def __str__(self): 14 | return self.infinitive 15 | 16 | def serialize(self): 17 | serialized = self.id 18 | return serialized 19 | 20 | 21 | class Follow(Verb): 22 | id = 1 23 | infinitive = 'follow' 24 | past_tense = 'followed' 25 | 26 | register(Follow) 27 | 28 | 29 | class Comment(Verb): 30 | id = 2 31 | infinitive = 'comment' 32 | past_tense = 'commented' 33 | 34 | register(Comment) 35 | 36 | 37 | class Love(Verb): 38 | id = 3 39 | infinitive = 'love' 40 | past_tense = 'loved' 41 | 42 | register(Love) 43 | 44 | 45 | class Add(Verb): 46 | id = 4 47 | infinitive = 'add' 48 | past_tense = 'added' 49 | 50 | register(Add) 51 | --------------------------------------------------------------------------------