├── .gitignore ├── .travis.yml ├── CHANGELOG.rst ├── LICENSE ├── README.rst ├── docs ├── .gitignore ├── Makefile ├── conf.py └── index.rst ├── examples ├── repl_db │ └── repl.py └── tutorial │ ├── mysql_demo.py │ └── sqlalchemy_demo.py ├── meepo ├── __init__.py ├── _compat.py ├── apps │ ├── __init__.py │ ├── eventsourcing │ │ ├── __init__.py │ │ ├── event_store.py │ │ ├── prepare_commit.py │ │ ├── pub.py │ │ └── sub.py │ └── replicator │ │ ├── __init__.py │ │ ├── queue.py │ │ ├── rq.py │ │ └── worker.py ├── pub │ ├── __init__.py │ ├── mysql.py │ └── sqlalchemy.py ├── signals.py ├── sub │ ├── __init__.py │ ├── dummy.py │ ├── nano.py │ └── zmq.py └── utils.py ├── setup.py ├── tests ├── conftest.py ├── test_eventsourcing │ ├── __init__.py │ ├── test_event_store.py │ ├── test_prepare_commit.py │ ├── test_redis_es_sub.py │ └── test_sqlalchemy_es_pub.py ├── test_mysql_pub.py ├── test_replicator.py ├── test_sqlalchemy_pub.py └── test_utils.py └── tox.ini /.gitignore: -------------------------------------------------------------------------------- 1 | .python-version 2 | 3 | *.py[cod] 4 | 5 | # test json conf 6 | tests/conf.json 7 | 8 | # C extensions 9 | *.so 10 | 11 | # intellij files 12 | .idea 13 | out 14 | *.xml 15 | *.iml 16 | 17 | # Packages 18 | *.egg 19 | *.egg-info 20 | dist 21 | build 22 | eggs 23 | parts 24 | bin 25 | var 26 | sdist 27 | develop-eggs 28 | .installed.cfg 29 | lib 30 | lib64 31 | __pycache__ 32 | 33 | # Installer logs 34 | pip-log.txt 35 | 36 | # Unit test / coverage reports 37 | .coverage 38 | .tox 39 | nosetests.xml 40 | 41 | # Translations 42 | *.mo 43 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | python: 3.4 3 | services: 4 | - mysql 5 | - redis-server 6 | env: 7 | - TOXENV=flake8 8 | - TOXENV=py27 9 | - TOXENV=py33 10 | - TOXENV=py34 11 | - TOXENV=pypy 12 | before_install: 13 | - pip install tox 14 | before_script: 15 | # enable row-based binlog 16 | - sudo touch /etc/mysql/conf.d/replication.cnf 17 | - sudo chmod 777 /etc/mysql/conf.d/replication.cnf 18 | - echo '[mysqld]' > /etc/mysql/conf.d/replication.cnf 19 | - echo 'log-bin = mysql-bin' >> /etc/mysql/conf.d/replication.cnf 20 | - echo 'server-id = 1' >> /etc/mysql/conf.d/replication.cnf 21 | - echo 'binlog-format = row' >> /etc/mysql/conf.d/replication.cnf 22 | - sudo chmod 700 /etc/mysql/conf.d/replication.cnf 23 | - sudo service mysql restart 24 | script: 25 | - tox -v 26 | -------------------------------------------------------------------------------- /CHANGELOG.rst: -------------------------------------------------------------------------------- 1 | Meepo Changelog 2 | =============== 3 | 4 | Version 0.1.9 5 | ------------- 6 | 7 | Released on November 26, 2014. 8 | 9 | - multi listeners, sentinel process, queue deduplicate features for replicator, 10 | via #13, #14 11 | - refine sqlalchemy_pub for more accurate event soucing 12 | - add tests for mysql_pub and sqlalchemy_pub 13 | - add examples 14 | 15 | 16 | Version 0.1.8 17 | ------------- 18 | 19 | Released on November 7, 2014. 20 | 21 | - add RedisCacheReplicator 22 | - add signal raw for better customization 23 | 24 | 25 | Version 0.1.7 26 | ------------- 27 | 28 | Released on October 30, 2014. 29 | 30 | - compatiable with twemproxy 31 | 32 | 33 | Version 0.1.6 34 | ------------- 35 | 36 | Released on September 23, 2014. 37 | 38 | - graceful handle KeyboardInterrupt. 39 | - better worker retry handling. 40 | - allow multiple pks to be sent to callback task. 41 | 42 | 43 | Version 0.1.5 44 | ------------- 45 | 46 | Released on September 12, 2014. 47 | 48 | - upgrade mysql-replication version to latest. 49 | - graceful bypass event sourcing when redis fail. 50 | - skip mysql row event if no primary_key found. 51 | - tests, some bugfixes and tunings. 52 | 53 | 54 | Version 0.1.4 55 | ------------- 56 | 57 | Released on September 2, 2014. 58 | 59 | - now print queue size in logging message 60 | - allow multiple workers (consisten hash on pk) for event 61 | 62 | 63 | Version 0.1.3 64 | ------------- 65 | 66 | Released on August 29, 2014. 67 | 68 | - auto expire for eventsourcing sub keys. 69 | - allow callable as namespace. 70 | 71 | 72 | Version 0.1.2 73 | ------------- 74 | 75 | Released on August 15, 2014. 76 | 77 | - allow multiple topics in registered callback. 78 | 79 | 80 | Version 0.1.1 81 | ------------- 82 | 83 | Released on August 7, 2014. 84 | 85 | - add meepo replicator base class. 86 | - bug fix for sqlalchemy_pub 87 | 88 | 89 | Version 0.1.0 90 | ------------- 91 | 92 | Released on July 29, 2014. 93 | 94 | First public release. 95 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 eleme 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | ===== 2 | Meepo 3 | ===== 4 | 5 | .. image:: http://img.shields.io/travis/eleme/meepo/master.svg?style=flat 6 | :target: https://travis-ci.org/eleme/meepo 7 | 8 | .. image:: http://img.shields.io/pypi/v/meepo.svg?style=flat 9 | :target: https://pypi.python.org/pypi/meepo 10 | 11 | .. image:: http://img.shields.io/pypi/dm/meepo.svg?style=flat 12 | :target: https://pypi.python.org/pypi/meepo 13 | 14 | Meepo is event sourcing and event broadcasting for databases. 15 | 16 | Documentation: https://meepo.readthedocs.org/ 17 | 18 | 19 | Installation 20 | ============ 21 | 22 | .. highlight:: bash 23 | 24 | :Requirements: **Python 2.x >= 2.7** or **Python 3.x >= 3.2** or **PyPy** 25 | 26 | To install the latest released version of Meepo:: 27 | 28 | $ pip install meepo 29 | 30 | 31 | Features 32 | ======== 33 | 34 | Meepo can be used to do lots of things, including replication, eventsourcing, 35 | cache refresh/invalidate, real-time analytics etc. The limit is all the tasks 36 | should be row-based, since meepo only gives ``table_action`` -> ``pk`` 37 | style events. 38 | 39 | * Row-based database replication. 40 | 41 | * Replicate RDBMS to NoSQL and search engine. 42 | 43 | * Event Sourcing. 44 | 45 | * Logging and Auditing 46 | 47 | * Realtime analytics 48 | 49 | 50 | Usage 51 | ===== 52 | 53 | Checkout `documentation`_ and `examples/`_. 54 | 55 | .. _`documentation`: https://meepo.readthedocs.org/en/latest/ 56 | .. _`examples/`: https://github.com/eleme/meepo/tree/develop/examples 57 | -------------------------------------------------------------------------------- /docs/.gitignore: -------------------------------------------------------------------------------- 1 | _build/ 2 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | PAPER = 8 | BUILDDIR = _build 9 | 10 | # User-friendly check for sphinx-build 11 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) 12 | $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/) 13 | endif 14 | 15 | # Internal variables. 16 | PAPEROPT_a4 = -D latex_paper_size=a4 17 | PAPEROPT_letter = -D latex_paper_size=letter 18 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 19 | # the i18n builder cannot share the environment and doctrees with the others 20 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 21 | 22 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext 23 | 24 | help: 25 | @echo "Please use \`make ' where is one of" 26 | @echo " html to make standalone HTML files" 27 | @echo " dirhtml to make HTML files named index.html in directories" 28 | @echo " singlehtml to make a single large HTML file" 29 | @echo " pickle to make pickle files" 30 | @echo " json to make JSON files" 31 | @echo " htmlhelp to make HTML files and a HTML help project" 32 | @echo " qthelp to make HTML files and a qthelp project" 33 | @echo " devhelp to make HTML files and a Devhelp project" 34 | @echo " epub to make an epub" 35 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" 36 | @echo " latexpdf to make LaTeX files and run them through pdflatex" 37 | @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" 38 | @echo " text to make text files" 39 | @echo " man to make manual pages" 40 | @echo " texinfo to make Texinfo files" 41 | @echo " info to make Texinfo files and run them through makeinfo" 42 | @echo " gettext to make PO message catalogs" 43 | @echo " changes to make an overview of all changed/added/deprecated items" 44 | @echo " xml to make Docutils-native XML files" 45 | @echo " pseudoxml to make pseudoxml-XML files for display purposes" 46 | @echo " linkcheck to check all external links for integrity" 47 | @echo " doctest to run all doctests embedded in the documentation (if enabled)" 48 | 49 | clean: 50 | rm -rf $(BUILDDIR)/* 51 | 52 | html: 53 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 54 | @echo 55 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 56 | 57 | dirhtml: 58 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml 59 | @echo 60 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." 61 | 62 | singlehtml: 63 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml 64 | @echo 65 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." 66 | 67 | pickle: 68 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle 69 | @echo 70 | @echo "Build finished; now you can process the pickle files." 71 | 72 | json: 73 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json 74 | @echo 75 | @echo "Build finished; now you can process the JSON files." 76 | 77 | htmlhelp: 78 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp 79 | @echo 80 | @echo "Build finished; now you can run HTML Help Workshop with the" \ 81 | ".hhp project file in $(BUILDDIR)/htmlhelp." 82 | 83 | qthelp: 84 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp 85 | @echo 86 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \ 87 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:" 88 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/ThriftPy.qhcp" 89 | @echo "To view the help file:" 90 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/ThriftPy.qhc" 91 | 92 | devhelp: 93 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp 94 | @echo 95 | @echo "Build finished." 96 | @echo "To view the help file:" 97 | @echo "# mkdir -p $$HOME/.local/share/devhelp/ThriftPy" 98 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/ThriftPy" 99 | @echo "# devhelp" 100 | 101 | epub: 102 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub 103 | @echo 104 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub." 105 | 106 | latex: 107 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 108 | @echo 109 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." 110 | @echo "Run \`make' in that directory to run these through (pdf)latex" \ 111 | "(use \`make latexpdf' here to do that automatically)." 112 | 113 | latexpdf: 114 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 115 | @echo "Running LaTeX files through pdflatex..." 116 | $(MAKE) -C $(BUILDDIR)/latex all-pdf 117 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 118 | 119 | latexpdfja: 120 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 121 | @echo "Running LaTeX files through platex and dvipdfmx..." 122 | $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja 123 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 124 | 125 | text: 126 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text 127 | @echo 128 | @echo "Build finished. The text files are in $(BUILDDIR)/text." 129 | 130 | man: 131 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man 132 | @echo 133 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man." 134 | 135 | texinfo: 136 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 137 | @echo 138 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." 139 | @echo "Run \`make' in that directory to run these through makeinfo" \ 140 | "(use \`make info' here to do that automatically)." 141 | 142 | info: 143 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 144 | @echo "Running Texinfo files through makeinfo..." 145 | make -C $(BUILDDIR)/texinfo info 146 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." 147 | 148 | gettext: 149 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale 150 | @echo 151 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." 152 | 153 | changes: 154 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes 155 | @echo 156 | @echo "The overview file is in $(BUILDDIR)/changes." 157 | 158 | linkcheck: 159 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck 160 | @echo 161 | @echo "Link check complete; look for any errors in the above output " \ 162 | "or in $(BUILDDIR)/linkcheck/output.txt." 163 | 164 | doctest: 165 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest 166 | @echo "Testing of doctests in the sources finished, look at the " \ 167 | "results in $(BUILDDIR)/doctest/output.txt." 168 | 169 | xml: 170 | $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml 171 | @echo 172 | @echo "Build finished. The XML files are in $(BUILDDIR)/xml." 173 | 174 | pseudoxml: 175 | $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml 176 | @echo 177 | @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." 178 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Meepo documentation build configuration file, created by 4 | # sphinx-quickstart on Thu May 29 17:54:39 2014. 5 | # 6 | # This file is execfile()d with the current directory set to its 7 | # containing dir. 8 | # 9 | # Note that not all possible configuration values are present in this 10 | # autogenerated file. 11 | # 12 | # All configuration values have a default; values that are commented out 13 | # serve to show the default. 14 | 15 | # import sys 16 | import os 17 | 18 | # If extensions (or modules to document with autodoc) are in another directory, 19 | # add these directories to sys.path here. If the directory is relative to the 20 | # documentation root, use os.path.abspath to make it absolute, like shown here. 21 | # sys.path.insert(0, os.path.abspath('.')) 22 | 23 | # -- General configuration ------------------------------------------------ 24 | 25 | # If your documentation needs a minimal Sphinx version, state it here. 26 | # needs_sphinx = '1.0' 27 | 28 | # Add any Sphinx extension module names here, as strings. They can be 29 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 30 | # ones. 31 | extensions = [ 32 | 'sphinx.ext.autodoc', 33 | 'sphinx.ext.coverage', 34 | ] 35 | 36 | # Add any paths that contain templates here, relative to this directory. 37 | templates_path = ['_templates'] 38 | 39 | # The suffix of source filenames. 40 | source_suffix = '.rst' 41 | 42 | # The encoding of source files. 43 | source_encoding = 'utf-8' 44 | 45 | # The master toctree document. 46 | master_doc = 'index' 47 | 48 | # General information about the project. 49 | project = 'Meepo' 50 | copyright = '2014, lxyu' 51 | 52 | # The version info for the project you're documenting, acts as replacement for 53 | # |version| and |release|, also used in various other places throughout the 54 | # built documents. 55 | # 56 | # The short X.Y version. 57 | version = '0.1.9' 58 | # The full version, including alpha/beta/rc tags. 59 | release = '0.1.9' 60 | 61 | # The language for content autogenerated by Sphinx. Refer to documentation 62 | # for a list of supported languages. 63 | # language = None 64 | 65 | # There are two options for replacing |today|: either, you set today to some 66 | # non-false value, then it is used: 67 | # today = '' 68 | # Else, today_fmt is used as the format for a strftime call. 69 | # today_fmt = '%B %d, %Y' 70 | 71 | # List of patterns, relative to source directory, that match files and 72 | # directories to ignore when looking for source files. 73 | exclude_patterns = ['_build'] 74 | 75 | # The reST default role (used for this markup: `text`) to use for all 76 | # documents. 77 | # default_role = None 78 | 79 | # If true, '()' will be appended to :func: etc. cross-reference text. 80 | # add_function_parentheses = True 81 | 82 | # If true, the current module name will be prepended to all description 83 | # unit titles (such as .. function::). 84 | # add_module_names = True 85 | 86 | # If true, sectionauthor and moduleauthor directives will be shown in the 87 | # output. They are ignored by default. 88 | # show_authors = False 89 | 90 | # The name of the Pygments (syntax highlighting) style to use. 91 | pygments_style = 'sphinx' 92 | 93 | # A list of ignored prefixes for module index sorting. 94 | # modindex_common_prefix = [] 95 | 96 | # If true, keep warnings as "system message" paragraphs in the built documents. 97 | # keep_warnings = False 98 | 99 | 100 | # -- Options for HTML output ---------------------------------------------- 101 | 102 | # The theme to use for HTML and HTML Help pages. See the documentation for 103 | # a list of builtin themes. 104 | html_theme = 'default' 105 | 106 | # Theme options are theme-specific and customize the look and feel of a theme 107 | # further. For a list of options available for each theme, see the 108 | # documentation. 109 | # html_theme_options = {} 110 | 111 | # Add any paths that contain custom themes here, relative to this directory. 112 | # html_theme_path = [] 113 | 114 | # import and set the theme if we're building docs locally 115 | if os.environ.get('READTHEDOCS', None) != 'True': 116 | html_theme = 'sphinx_rtd_theme' 117 | 118 | import sphinx_rtd_theme 119 | html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] 120 | 121 | # The name for this set of Sphinx documents. If None, it defaults to 122 | # " v documentation". 123 | # html_title = None 124 | 125 | # A shorter title for the navigation bar. Default is the same as html_title. 126 | # html_short_title = None 127 | 128 | # The name of an image file (relative to this directory) to place at the top 129 | # of the sidebar. 130 | # html_logo = None 131 | 132 | # The name of an image file (within the static path) to use as favicon of the 133 | # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 134 | # pixels large. 135 | # html_favicon = None 136 | 137 | # Add any paths that contain custom static files (such as style sheets) here, 138 | # relative to this directory. They are copied after the builtin static files, 139 | # so a file named "default.css" will overwrite the builtin "default.css". 140 | html_static_path = ['_static'] 141 | 142 | # Add any extra paths that contain custom files (such as robots.txt or 143 | # .htaccess) here, relative to this directory. These files are copied 144 | # directly to the root of the documentation. 145 | # html_extra_path = [] 146 | 147 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, 148 | # using the given strftime format. 149 | # html_last_updated_fmt = '%b %d, %Y' 150 | 151 | # If true, SmartyPants will be used to convert quotes and dashes to 152 | # typographically correct entities. 153 | # html_use_smartypants = True 154 | 155 | # Custom sidebar templates, maps document names to template names. 156 | # html_sidebars = {} 157 | 158 | # Additional templates that should be rendered to pages, maps page names to 159 | # template names. 160 | # html_additional_pages = {} 161 | 162 | # If false, no module index is generated. 163 | # html_domain_indices = True 164 | 165 | # If false, no index is generated. 166 | # html_use_index = True 167 | 168 | # If true, the index is split into individual pages for each letter. 169 | # html_split_index = False 170 | 171 | # If true, links to the reST sources are added to the pages. 172 | # html_show_sourcelink = True 173 | 174 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. 175 | # html_show_sphinx = True 176 | 177 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. 178 | # html_show_copyright = True 179 | 180 | # If true, an OpenSearch description file will be output, and all pages will 181 | # contain a tag referring to it. The value of this option must be the 182 | # base URL from which the finished HTML is served. 183 | # html_use_opensearch = '' 184 | 185 | # This is the file name suffix for HTML files (e.g. ".xhtml"). 186 | # html_file_suffix = None 187 | 188 | # Output file base name for HTML help builder. 189 | htmlhelp_basename = 'meepo_doc' 190 | 191 | 192 | # -- Options for LaTeX output --------------------------------------------- 193 | 194 | latex_elements = { 195 | # The paper size ('letterpaper' or 'a4paper'). 196 | # 'papersize': 'letterpaper', 197 | 198 | # The font size ('10pt', '11pt' or '12pt'). 199 | # 'pointsize': '10pt', 200 | 201 | # Additional stuff for the LaTeX preamble. 202 | # 'preamble': '', 203 | } 204 | 205 | # Grouping the document tree into LaTeX files. List of tuples 206 | # (source start file, target name, title, 207 | # author, documentclass [howto, manual, or own class]). 208 | latex_documents = [ 209 | ('index', 'Meepo.tex', 'Meepo Documentation', 210 | 'lxyu', 'manual'), 211 | ] 212 | 213 | # The name of an image file (relative to this directory) to place at the top of 214 | # the title page. 215 | # latex_logo = None 216 | 217 | # For "manual" documents, if this is true, then toplevel headings are parts, 218 | # not chapters. 219 | # latex_use_parts = False 220 | 221 | # If true, show page references after internal links. 222 | # latex_show_pagerefs = False 223 | 224 | # If true, show URL addresses after external links. 225 | # latex_show_urls = False 226 | 227 | # Documents to append as an appendix to all manuals. 228 | # latex_appendices = [] 229 | 230 | # If false, no module index is generated. 231 | # latex_domain_indices = True 232 | 233 | 234 | # -- Options for manual page output --------------------------------------- 235 | 236 | # One entry per manual page. List of tuples 237 | # (source start file, name, description, authors, manual section). 238 | man_pages = [ 239 | ('index', 'meepo', 'Meepo Documentation', 240 | ['lxyu'], 1) 241 | ] 242 | 243 | # If true, show URL addresses after external links. 244 | # man_show_urls = False 245 | 246 | 247 | # -- Options for Texinfo output ------------------------------------------- 248 | 249 | # Grouping the document tree into Texinfo files. List of tuples 250 | # (source start file, target name, title, author, 251 | # dir menu entry, description, category) 252 | texinfo_documents = [ 253 | ('index', 'Meepo', 'Meepo Documentation', 254 | 'lxyu', 'Meepo', 'Event sourcing and broadcasting for database.', 255 | 'Miscellaneous'), 256 | ] 257 | 258 | # Documents to append as an appendix to all manuals. 259 | # texinfo_appendices = [] 260 | 261 | # If false, no module index is generated. 262 | # texinfo_domain_indices = True 263 | 264 | # How to display URL addresses: 'footnote', 'no', or 'inline'. 265 | # texinfo_show_urls = 'footnote' 266 | 267 | # If true, do not generate a @detailmenu in the "Top" node's menu. 268 | # texinfo_no_detailmenu = False 269 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | =================== 2 | Meepo Documentation 3 | =================== 4 | 5 | Welcome to meepo's documentation. Meepo is a event sourcing and broadcasting 6 | platform for database. 7 | 8 | This documentation consists of two parts: 9 | 10 | 1. Meepo PubSub (`meepo.pub` & `meepo.sub`). This part is enough if you 11 | only needs a simple solution for your database events. 12 | 13 | 2. Meepo Apps (`meepo.apps`). This part ships with eventsourcing and 14 | replicator apps for advanced use. You can refer to examples for demo. 15 | 16 | Meepo source code is hosted on Github: https://github.com/eleme/meepo 17 | 18 | .. contents:: 19 | :local: 20 | :depth: 2 21 | :backlinks: none 22 | 23 | 24 | Features 25 | ======== 26 | 27 | Meepo can be used to do lots of things, including replication, eventsourcing, 28 | cache refresh/invalidate, real-time analytics etc. The limit is all the tasks 29 | should be row-based, since meepo only gives ``table_action`` -> ``pk`` 30 | style events. 31 | 32 | * Row-based database replication. 33 | 34 | Meepo can be used to replicate data between databases including 35 | postgres, sqlite, etc. 36 | 37 | Refer to ``examples/repl_db`` script for demo. 38 | 39 | * Replicate RDBMS to NoSQL and search engine. 40 | 41 | Meepo can also be used to replicate data changes from RDBMS to redis, 42 | elasticsearch etc. 43 | 44 | Refer to ``examples/repl_redis`` and ``examples/repl_elasticsearch`` for 45 | demo. 46 | 47 | * Event Sourcing. 48 | 49 | Meepo can log and replay what has happened since some time using a simple 50 | event sourcing. 51 | 52 | Refer to ``examples/event_sourcing`` for demo. 53 | 54 | .. note:: 55 | 56 | Meepo can only replicate row based data, which means it DO NOT replicate 57 | schema changes, or bulk operations. 58 | 59 | 60 | Installation 61 | ============ 62 | 63 | .. highlight:: bash 64 | 65 | :Requirements: **Python 2.x >= 2.7** or **Python 3.x >= 3.2** or **PyPy** 66 | 67 | To install the latest released version of Meepo:: 68 | 69 | $ pip install meepo 70 | 71 | 72 | Usage 73 | ===== 74 | 75 | Meepo use blinker signal to hook into the events of mysql binlog and 76 | sqlalchemy, the hook is very easy to install. 77 | 78 | Hook with MySQL's binlog events: 79 | 80 | .. code:: python 81 | 82 | from meepo.pub import mysql_pub 83 | mysql_pub(mysql_dsn) 84 | 85 | Hook with SQLAlchemy's events: 86 | 87 | .. code:: python 88 | 89 | from meepo.pub import sqlalchemy_pub 90 | sqlalchemy_pub(session) 91 | 92 | Then you can connect to the signal and do tasks based the signal: 93 | 94 | .. code:: python 95 | 96 | sg = signal("test_write") 97 | 98 | @sg.connect 99 | def print_test_write(pk) 100 | print("test_write -> %s" % pk) 101 | 102 | Try out the demo scripts in ``example/tutorial`` for more about how meepo 103 | event works. 104 | 105 | 106 | Pub Concept 107 | =========== 108 | 109 | .. automodule:: meepo.pub 110 | 111 | 112 | MySQL Pub 113 | --------- 114 | 115 | .. automodule:: meepo.pub.mysql 116 | :members: 117 | 118 | SQLAlchemy Pub 119 | -------------- 120 | 121 | .. automodule:: meepo.pub.sqlalchemy 122 | :members: 123 | 124 | Meepo Sub 125 | ========= 126 | 127 | .. automodule:: meepo.sub 128 | 129 | Dummy Sub 130 | --------- 131 | 132 | .. automodule:: meepo.sub.dummy 133 | :members: 134 | 135 | 0MQ Sub 136 | ------- 137 | 138 | .. automodule:: meepo.sub.zmq 139 | :members: 140 | 141 | 142 | Applications 143 | ============ 144 | 145 | EventSourcing 146 | ------------- 147 | 148 | Concept 149 | ~~~~~~~ 150 | 151 | .. automodule:: meepo.apps.eventsourcing 152 | 153 | Pub & Sub 154 | ````````` 155 | 156 | .. automodule:: meepo.apps.eventsourcing.pub 157 | :members: 158 | 159 | .. automodule:: meepo.apps.eventsourcing.sub 160 | :members: 161 | 162 | EventStore 163 | ~~~~~~~~~~ 164 | 165 | .. automodule:: meepo.apps.eventsourcing.event_store 166 | 167 | .. autoclass:: meepo.apps.eventsourcing.event_store.RedisEventStore 168 | :members: 169 | 170 | PrepareCommit 171 | ~~~~~~~~~~~~~ 172 | 173 | .. automodule:: meepo.apps.eventsourcing.prepare_commit 174 | 175 | .. autoclass:: meepo.apps.eventsourcing.prepare_commit.RedisPrepareCommit 176 | :members: 177 | 178 | Replicator 179 | ---------- 180 | 181 | .. automodule:: meepo.apps.replicator 182 | :members: 183 | -------------------------------------------------------------------------------- /examples/repl_db/repl.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | 4 | import logging 5 | 6 | import click 7 | from blinker import signal 8 | import sqlalchemy as sa 9 | from sqlalchemy.orm import scoped_session, sessionmaker 10 | from sqlalchemy.ext.automap import automap_base 11 | from sqlalchemy.exc import SQLAlchemyError 12 | 13 | from meepo.pub import mysql_pub 14 | 15 | 16 | def repl_db_sub(master_dsn, slave_dsn, tables): 17 | """Database replication subscriber. 18 | 19 | The function will subscribe to the event sourcing pk stream, retrive rows 20 | from master based pk and then update the slave. 21 | """ 22 | logger = logging.getLogger("meepo.sub.replicate_sub") 23 | 24 | # sqlalchemy reflection 25 | logger.info("reflecting master database: {}".format(master_dsn)) 26 | master_engine = sa.create_engine(master_dsn) 27 | master_base = automap_base() 28 | master_base.prepare(engine=master_engine, reflect=True) 29 | MasterSession = scoped_session(sessionmaker(bind=master_engine)) 30 | 31 | logger.info("reflecting slave database: {}".format(slave_dsn)) 32 | slave_engine = sa.create_engine(slave_dsn) 33 | slave_base = automap_base() 34 | slave_base.prepare(engine=slave_engine, reflect=True) 35 | SlaveSession = scoped_session(sessionmaker(bind=slave_engine)) 36 | 37 | def _write_by_pk(name, pk): 38 | """Copy row from master to slave based on pk 39 | """ 40 | MasterModel = master_base.classes[name] 41 | obj = MasterSession.query(MasterModel).get(pk) 42 | if not obj: 43 | logger.error("pk for {} not found in master: {}".format(name, pk)) 44 | return 45 | 46 | SlaveModel = slave_base.classes[name] 47 | columns = [c.name for c in SlaveModel.__table__.columns] 48 | s_obj = SlaveModel(**{k: v 49 | for k, v in obj.__dict__.items() 50 | if k in columns}) 51 | SlaveSession.add(s_obj) 52 | 53 | try: 54 | SlaveSession.commit() 55 | except SQLAlchemyError as e: 56 | SlaveSession.rollback() 57 | logger.exception(e) 58 | 59 | # cleanup 60 | MasterSession.close() 61 | SlaveSession.close() 62 | 63 | def _update_by_pk(name, pk): 64 | """Update row from master to slave based on pk 65 | """ 66 | MasterModel = master_base.classes[name] 67 | obj = MasterSession.query(MasterModel).get(pk) 68 | 69 | SlaveModel = slave_base.classes[name] 70 | s_obj = SlaveSession.query(SlaveModel).get(pk) 71 | if not s_obj: 72 | return _write_by_pk(name, pk) 73 | 74 | columns = [c.name for c in SlaveModel.__table__.columns] 75 | for col in columns: 76 | try: 77 | val = getattr(obj, col) 78 | except AttributeError as e: 79 | continue 80 | setattr(s_obj, col, val) 81 | 82 | try: 83 | SlaveSession.commit() 84 | except SQLAlchemyError as e: 85 | SlaveSession.rollback() 86 | logger.exception(e) 87 | 88 | # cleanup 89 | MasterSession.close() 90 | SlaveSession.close() 91 | 92 | def _delete_by_pk(name, pk): 93 | """Copy row from slave based on pk 94 | """ 95 | Model = slave_base.classes[name] 96 | obj = SlaveSession.query(Model).get(pk) 97 | if obj: 98 | SlaveSession.delete(obj) 99 | SlaveSession.commit() 100 | 101 | # cleanup 102 | SlaveSession.close() 103 | 104 | def _sub(table): 105 | 106 | def _sub_write(pk): 107 | logger.info("repl_db {}_write: {}".format(table, pk)) 108 | _write_by_pk(table, pk) 109 | signal("%s_write" % table).connect(_sub_write, weak=False) 110 | 111 | def _sub_update(pk): 112 | logger.info("repl_db {}_update: {}".format(table, pk)) 113 | _update_by_pk(table, pk) 114 | signal("%s_update" % table).connect(_sub_update, weak=False) 115 | 116 | def _sub_delete(pk): 117 | logger.info("repl_db {}_delete: {}".format(table, pk)) 118 | _delete_by_pk(table, pk) 119 | signal("%s_delete" % table).connect(_sub_delete, weak=False) 120 | 121 | tables = (t for t in tables if t in slave_base.classes.keys()) 122 | for table in tables: 123 | _sub(table) 124 | 125 | 126 | @click.command() 127 | @click.option("-b", "--blocking", is_flag=True, default=False) 128 | @click.option('-m', '--master_dsn') 129 | @click.option('-s', '--slave_dsn') 130 | @click.argument('tables', nargs=-1) 131 | def main(master_dsn, slave_dsn, tables, blocking=False): 132 | """DB Replication app. 133 | 134 | This script will replicate data from mysql master to other databases( 135 | including mysql, postgres, sqlite). 136 | 137 | This script only support a very limited replication: 138 | 1. data only. The script only replicates data, so you have to make sure 139 | the tables already exists in slave db. 140 | 2. pk only. The script replicate data by pk, when a row_pk changed, it 141 | retrieve it from master and write in to slave. 142 | 143 | :param master_dsn: mysql dsn with row-based binlog enabled. 144 | :param slave_dsn: slave dsn, most databases supported including mysql, 145 | postgres, sqlite etc. 146 | :param tables: the tables need to be replicated 147 | :param blocking: by default, the script only reads existing binlog, 148 | replicate them and exit. if set to True, this script will run as a 149 | daemon and wait for more mysql binlog and do replicates. 150 | """ 151 | # currently only supports mysql master 152 | assert master_dsn.startswith("mysql") 153 | 154 | logger = logging.getLogger(__name__) 155 | logger.info("replicating tables: %s" % ", ".join(tables)) 156 | 157 | repl_db_sub(master_dsn, slave_dsn, tables) 158 | mysql_pub(master_dsn, blocking=blocking) 159 | 160 | 161 | if __name__ == '__main__': 162 | main() 163 | -------------------------------------------------------------------------------- /examples/tutorial/mysql_demo.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """ 4 | meepo_examples.tutorial.mysql 5 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 6 | 7 | A demo script on how to use meepo with mysql row-based binlog. 8 | """ 9 | 10 | import logging 11 | 12 | import click 13 | import pymysql 14 | 15 | from meepo.utils import setup_logger 16 | setup_logger() 17 | logger = logging.getLogger("meepo_examples.tutorial.mysql") 18 | 19 | from meepo._compat import urlparse 20 | 21 | 22 | def db_prepare(dsn): 23 | parsed = urlparse(dsn) 24 | db_settings = { 25 | "host": parsed.hostname, 26 | "port": parsed.port or 3306, 27 | "user": parsed.username, 28 | "passwd": parsed.password 29 | } 30 | conn = pymysql.connect(**db_settings) 31 | 32 | cursor = conn.cursor() 33 | sql = """ 34 | DROP DATABASE IF EXISTS meepo_test; 35 | CREATE DATABASE meepo_test; 36 | DROP TABLE IF EXISTS meepo_test.test; 37 | CREATE TABLE meepo_test.test ( 38 | id INT NOT NULL AUTO_INCREMENT, 39 | data VARCHAR (256) NOT NULL, 40 | PRIMARY KEY (id) 41 | ); 42 | RESET MASTER; 43 | """ 44 | cursor.execute(sql) 45 | logger.info("table created.") 46 | 47 | # genereate binlog 48 | sql = """ 49 | INSERT INTO test (data) VALUES ('a'); 50 | INSERT INTO test (data) VALUES ('b'), ('c'), ('d'); 51 | UPDATE test SET data = 'aa' WHERE id = 1; 52 | UPDATE test SET data = 'bb' WHERE id = 2; 53 | UPDATE test SET data = 'cc' WHERE id != 1; 54 | DELETE FROM test WHERE id != 1; 55 | DELETE FROM test WHERE id = 1; 56 | """ 57 | cursor.execute(sql) 58 | cursor.close() 59 | conn.commit() 60 | conn.close() 61 | logger.info("binlog created.") 62 | 63 | 64 | @click.command() 65 | @click.option('-m', '--mysql_dsn') 66 | def main(mysql_dsn): 67 | # make sure the user has permission to read binlog 68 | mysql_dsn = mysql_dsn or "mysql+pymysql://root@localhost/meepo_test" 69 | 70 | from meepo.sub.dummy import print_sub 71 | print_sub(["test"]) 72 | 73 | from meepo.pub import mysql_pub 74 | mysql_pub(mysql_dsn, ["test"]) 75 | 76 | 77 | if __name__ == "__main__": 78 | main() 79 | -------------------------------------------------------------------------------- /examples/tutorial/sqlalchemy_demo.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """ 4 | meepo_examples.tutorial.sqlalchemy 5 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 6 | 7 | A demo script on how to use meepo with sqlalchemy. 8 | """ 9 | 10 | import logging 11 | 12 | import sqlalchemy as sa 13 | from sqlalchemy.orm import sessionmaker, scoped_session 14 | from sqlalchemy.ext.declarative import declarative_base 15 | 16 | from meepo.utils import setup_logger 17 | setup_logger() 18 | logger = logging.getLogger("meepo_examples.tutorial.sqlalchemy") 19 | 20 | Base = declarative_base() 21 | 22 | 23 | class Test(Base): 24 | __tablename__ = "test" 25 | 26 | id = sa.Column(sa.Integer, primary_key=True) 27 | data = sa.Column(sa.String) 28 | 29 | 30 | def session_prepare(dsn): 31 | engine = sa.create_engine(dsn) 32 | session = scoped_session(sessionmaker(bind=engine)) 33 | 34 | engine.execute("DROP TABLE IF EXISTS test;") 35 | engine.execute(""" 36 | CREATE TABLE test ( 37 | id INT NOT NULL, 38 | data VARCHAR (256) NOT NULL, 39 | PRIMARY KEY (id) 40 | );""") 41 | logger.info("table created.") 42 | 43 | return session 44 | 45 | 46 | def sa_demo(session): 47 | t_1 = Test(id=1, data='a') 48 | session.add(t_1) 49 | session.commit() 50 | 51 | t_2 = Test(id=2, data='b') 52 | t_3 = Test(id=3, data='c') 53 | session.add(t_2) 54 | session.add(t_3) 55 | session.commit() 56 | 57 | t_2.data = "x" 58 | session.commit() 59 | 60 | session.delete(t_3) 61 | session.commit() 62 | 63 | 64 | def main(): 65 | dsn = "sqlite:///sa_demo.db" 66 | session = session_prepare(dsn) 67 | 68 | from meepo.pub import sqlalchemy_pub 69 | sqlalchemy_pub(session) 70 | 71 | from meepo.sub.dummy import print_sub 72 | print_sub(["test"]) 73 | 74 | sa_demo(session) 75 | 76 | 77 | if __name__ == "__main__": 78 | main() 79 | -------------------------------------------------------------------------------- /meepo/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | __version__ = "0.1.9" 4 | -------------------------------------------------------------------------------- /meepo/_compat.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import absolute_import 4 | 5 | 6 | __all__ = ["pickle", "urlparse", "Empty"] 7 | 8 | import sys 9 | PY3 = sys.version_info[0] >= 3 10 | 11 | if PY3: 12 | from urllib.parse import urlparse 13 | from queue import Empty 14 | import pickle 15 | 16 | bytes = bytes 17 | str = str 18 | 19 | else: 20 | from urlparse import urlparse 21 | from Queue import Empty 22 | import cPickle as pickle 23 | 24 | bytes = str 25 | str = unicode # noqa 26 | -------------------------------------------------------------------------------- /meepo/apps/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | -------------------------------------------------------------------------------- /meepo/apps/eventsourcing/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """ 4 | For basic concept about eventsourcing, refer to 5 | http://martinfowler.com/eaaDev/EventSourcing.html 6 | 7 | **Simple Eventsourcing** 8 | 9 | The eventsourcing implemented in meepo is a simplified version of es, it only 10 | records what has changed since a timestamp, but not the diffs. 11 | 12 | So you only get a list of primary keys when query with a timestamp:: 13 | 14 | order_update 102 27 59 43 15 | 16 | Because event sourcing is hard in distributed system, you can't 17 | give a accurate answer of events happening order. So we only keep a record 18 | of what happened since some time, then you know the data has gone stale, 19 | and you have to retrieve latest data from source and do the tasks upon it. 20 | 21 | **Why Eventsourcing** 22 | 23 | Why is eventsourcing needed? Let's check the sqlalchemy_pub events flow: 24 | 25 | a. before flush -> record instances states 26 | b. commit transaction in database 27 | c. after commit -> pub signal 28 | 29 | So it's possible that the process(or thread or greenlet) somehow being killed 30 | right between b and c, then the signal lost. 31 | 32 | With prepare commit in event sourcing, the session will be recorded so it's 33 | possible to recover from this corrupt state. 34 | 35 | But you should note this is a very rare, so in most cases, you don't need 36 | this 100% grantee on events, then just use the simple :func:`sqlalchemy_pub` 37 | is enough. 38 | """ 39 | 40 | from __future__ import absolute_import 41 | 42 | __all__ = ["sqlalchemy_es_pub", "redis_es_sub"] 43 | 44 | from .pub import sqlalchemy_es_pub 45 | from .sub import redis_es_sub 46 | -------------------------------------------------------------------------------- /meepo/apps/eventsourcing/event_store.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import absolute_import 4 | 5 | import logging 6 | import time 7 | 8 | import redis 9 | 10 | from ...utils import s, d 11 | 12 | 13 | class EventStore(object): 14 | def __init__(self): 15 | pass 16 | 17 | def add(self, event, pk, ts=None): 18 | raise NotImplementedError 19 | 20 | def replay(self, event, ts=0, end_ts=None, with_ts=False): 21 | raise NotImplementedError 22 | 23 | def query(self, event, pk, ts=None): 24 | raise NotImplementedError 25 | 26 | def clear(self, event, ts=None): 27 | raise NotImplementedError 28 | 29 | 30 | class RedisEventStore(EventStore): 31 | """EventStore based on redis. 32 | 33 | The event store use namespace and event name as key and store primary 34 | keys using redis sorted set, with event timestamp as score. 35 | 36 | **General Usage** 37 | 38 | Init event store with redis_dsn:: 39 | 40 | event_store = RedisEventStore("redis://localhost/", "store") 41 | 42 | You can also pass a function to namespace, it'll accept timestamp as 43 | arg, this can be used to separate events store based on hour, day or 44 | week etc.:: 45 | 46 | event_store = RedisEventStore( 47 | "redis://localhost/", lambda ts: "store:%s" % d(ts, "%Y%m%d")) 48 | 49 | Add a event with:: 50 | 51 | event_store.add("test_write", 1) 52 | 53 | Or add a event with timestamp passed in:: 54 | 55 | event_store.add("test_write", 2, ts=1024) 56 | 57 | Clear all records of an event within a namespace:: 58 | 59 | event_store.clear("test_write") 60 | 61 | **Events Replay** 62 | 63 | One important feature for eventsourcing is replay, it can replay what has 64 | changed and the latest update timestamp of events. 65 | 66 | Replay all records of an event within a namespace:: 67 | 68 | event_store.replay("test_write") 69 | 70 | Or replay all records since timestamp:: 71 | 72 | # all events since timestamp 1024 73 | event_store.replay("test_write", ts=1024) 74 | 75 | # all events between timestamp 1024 and now 76 | event_store.replay("test_write", ts=1024, end_ts=time.time()) 77 | 78 | You can also replay all events with it's latest updating time:: 79 | 80 | event_store.replay("test_write", with_ts=True) 81 | 82 | **Events Query** 83 | 84 | You can query the last change timestamp of an event with query api. 85 | 86 | Query records within current namespace:: 87 | 88 | event_store.query("test_write", 1) 89 | 90 | The return value will either be int timestamp or None if record not 91 | exists. 92 | 93 | Add a timestamp to query events within other namespace (assume you 94 | separate the event store namespace by day, you may want to query event 95 | happened yesterday.):: 96 | 97 | event_store.query("test_write", 1, ts=some_value) 98 | 99 | .. note:: 100 | 101 | The redis event store class is compat with twemproxy. 102 | 103 | :param redis_dsn: the redis instance uri 104 | :param namespace: namespace func for event key, the func should accept 105 | event timestamp and return namespace of the func. namespace also 106 | accepts str type arg, which will always return the same namespace 107 | for all timestamps. 108 | :param ttl: expiration time for events stored, default to 3 days. 109 | :param socket_timeout: redis socket timeout 110 | :param kwargs: kwargs to be passed to redis instance init func. 111 | """ 112 | 113 | LUA_TIME = "return tonumber(redis.call('TIME')[1])" 114 | LUA_ZADD = ' '.join(""" 115 | local score = redis.call('ZSCORE', KEYS[1], ARGV[2]) 116 | if score and tonumber(ARGV[1]) <= tonumber(score) then 117 | return 0 118 | else 119 | redis.call('ZADD', KEYS[1], ARGV[1], ARGV[2]) 120 | return 1 121 | end 122 | """.split()) 123 | 124 | def __init__(self, redis_dsn, namespace=None, ttl=3600*24*3, 125 | socket_timeout=1, **kwargs): 126 | super(RedisEventStore, self).__init__() 127 | 128 | self.r = redis.StrictRedis.from_url( 129 | redis_dsn, socket_timeout=socket_timeout, **kwargs) 130 | self.ttl = ttl 131 | self.logger = logging.getLogger("meepo.redis_es") 132 | 133 | if namespace is None: 134 | self.namespace = lambda ts: "meepo:redis_es:%s" % d(ts, "%Y%m%d") 135 | elif isinstance(namespace, str): 136 | self.namespace = lambda ts: namespace 137 | elif callable(namespace): 138 | self.namespace = namespace 139 | 140 | def _keygen(self, event, ts=None): 141 | """Generate redis key for event at timestamp. 142 | 143 | :param event: event name 144 | :param ts: timestamp, default to current timestamp if left as None 145 | """ 146 | return "%s:%s" % (self.namespace(ts or time.time()), event) 147 | 148 | def _time(self): 149 | """Redis lua func to get timestamp from redis server, use this func to 150 | prevent time inconsistent across servers. 151 | """ 152 | return self.r.eval(self.LUA_TIME, 1, 1) 153 | 154 | def _zadd(self, key, pk, ts=None, ttl=None): 155 | """Redis lua func to add an event to the corresponding sorted set. 156 | 157 | :param key: the key to be stored in redis server 158 | :param pk: the primary key of event 159 | :param ts: timestamp of the event, default to redis_server's 160 | current timestamp 161 | :param ttl: the expiration time of event since the last update 162 | """ 163 | return self.r.eval(self.LUA_ZADD, 1, key, ts or self._time(), pk) 164 | 165 | def add(self, event, pk, ts=None, ttl=None): 166 | """Add an event to event store. 167 | 168 | All events were stored in a sorted set in redis with timestamp as 169 | rank score. 170 | 171 | :param event: the event to be added, format should be ``table_action`` 172 | :param pk: the primary key of event 173 | :param ts: timestamp of the event, default to redis_server's 174 | current timestamp 175 | :param ttl: the expiration time of event since the last update 176 | :return: bool 177 | """ 178 | key = self._keygen(event, ts) 179 | try: 180 | self._zadd(key, pk, ts, ttl) 181 | return True 182 | except redis.ConnectionError as e: 183 | # connection error typically happens when redis server can't be 184 | # reached or timed out, the error will be silent with an error 185 | # log and return None. 186 | self.logger.error( 187 | "redis event store failed with connection error %r" % e) 188 | return False 189 | 190 | def replay(self, event, ts=0, end_ts=None, with_ts=False): 191 | """Replay events based on timestamp. 192 | 193 | If you split namespace with ts, the replay will only return events 194 | within the same namespace. 195 | 196 | :param event: event name 197 | :param ts: replay events after ts, default from 0. 198 | :param end_ts: replay events to ts, default to "+inf". 199 | :param with_ts: return timestamp with events, default to False. 200 | :return: list of pks when with_ts set to False, list of (pk, ts) tuples 201 | when with_ts is True. 202 | """ 203 | key = self._keygen(event, ts) 204 | end_ts = end_ts if end_ts else "+inf" 205 | elements = self.r.zrangebyscore(key, ts, end_ts, withscores=with_ts) 206 | 207 | if not with_ts: 208 | return [s(e) for e in elements] 209 | else: 210 | return [(s(e[0]), int(e[1])) for e in elements] 211 | 212 | def query(self, event, pk, ts=None): 213 | """Query the last update timestamp of an event pk. 214 | 215 | You can pass a timestamp to only look for events later than that 216 | within the same namespace. 217 | 218 | :param event: the event name. 219 | :param pk: the pk value for query. 220 | :param ts: query event pk after ts, default to None which will query 221 | all span of current namespace. 222 | """ 223 | key = self._keygen(event, ts) 224 | pk_ts = self.r.zscore(key, pk) 225 | return int(pk_ts) if pk_ts else None 226 | 227 | def clear(self, event, ts=None): 228 | """Clear all stored record of event. 229 | 230 | :param event: event name to be cleared. 231 | :param ts: timestamp used locate the namespace 232 | """ 233 | return self.r.delete(self._keygen(event, ts)) 234 | -------------------------------------------------------------------------------- /meepo/apps/eventsourcing/prepare_commit.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """ 4 | Prepare Commit also known as Two-Phase Commit, for basic concept about it, 5 | refer to http://en.wikipedia.org/wiki/Two-phase_commit_protocol 6 | 7 | 8 | The two phase commit feature implemented in meepo is used to make sure event 9 | 100% reliably recorded in eventsourcing, and it's not a strict traditional 10 | two-phase commit. 11 | 12 | Only use it if you need a 100% grantee of not losing any events. The feature 13 | should only be used in combination of sqlalchemy_es_pub, which ships with 14 | session prepare-commit signals. 15 | """ 16 | 17 | from __future__ import absolute_import 18 | 19 | import functools 20 | import logging 21 | import pickle 22 | import time 23 | 24 | import redis 25 | 26 | from ...utils import d, s 27 | 28 | 29 | class PrepareCommit(object): 30 | """Prepare-Commit base class, defines the essential APIs. 31 | """ 32 | def __init__(self): 33 | pass 34 | 35 | def prepare(self): 36 | raise NotImplementedError 37 | 38 | def commit(self): 39 | raise NotImplementedError 40 | 41 | def rollback(self): 42 | pass 43 | 44 | 45 | def _redis_strict_pc(func): 46 | """Strict deco for RedisPrepareCommit 47 | 48 | The deco will choose whether to silent exception or not based on the 49 | strict attr in RedisPrepareCommit object. 50 | """ 51 | phase = "session_%s" % func.__name__ 52 | 53 | @functools.wraps(func) 54 | def wrapper(self, session, *args, **kwargs): 55 | try: 56 | func(self, session, *args, **kwargs) 57 | self.logger.debug("%s -> %s" % (session.meepo_unique_id, phase)) 58 | return True 59 | except Exception as e: 60 | if self.strict: 61 | raise 62 | if isinstance(e, redis.ConnectionError): 63 | self.logger.warn("redis connection error in %s: %s" % ( 64 | phase, session.meepo_unique_id)) 65 | else: 66 | self.logger.exception(e) 67 | return False 68 | return wrapper 69 | 70 | 71 | class RedisPrepareCommit(PrepareCommit): 72 | """Prepare Commit session based on redis. 73 | 74 | This prepare commit records sqlalchemy session, and should be used with 75 | :func:`sqlalchemy_es_pub`. 76 | 77 | :param redis_dsn: the redis instance uri 78 | :param strict: by default the exceptions happened in middle of 79 | prepare-commit will only be caught and logged as error, but the 80 | process continue to execute. If strict set to True, the exception 81 | will be raised to outside. 82 | :param namespace: namespace string or namespace func. if func passed, 83 | it should accepts timestamp as arg and return string namespace. 84 | :param ttl: expiration time for events stored, default to 1 day. 85 | :param socket_timeout: redis socket timeout 86 | :param kwargs: kwargs to be passed to redis instance init func. 87 | """ 88 | def __init__(self, redis_dsn, strict=False, namespace=None, ttl=3600*24, 89 | socket_timeout=1, **kwargs): 90 | super(RedisPrepareCommit, self).__init__() 91 | 92 | self.r = redis.StrictRedis.from_url( 93 | redis_dsn, socket_timeout=socket_timeout, **kwargs) 94 | self.strict = strict 95 | self.ttl = ttl 96 | self.logger = logging.getLogger("meepo.prepare_commit.redis_pc") 97 | 98 | if namespace is None: 99 | self.namespace = lambda ts: "meepo:redis_pc:%s" % d(ts, "%Y%m%d") 100 | elif isinstance(namespace, str): 101 | self.namespace = lambda ts: namespace 102 | elif callable(namespace): 103 | self.namespace = namespace 104 | 105 | def _keygen(self, session): 106 | if not hasattr(session, "meepo_prepare_ts"): 107 | session.meepo_prepare_ts = int(time.time()) 108 | prefix = self.namespace(session.meepo_prepare_ts) 109 | sp_key = "%s:session_prepare" % prefix 110 | sp_hkey = "%s:%s" % (sp_key, session.meepo_unique_id) 111 | return sp_key, sp_hkey 112 | 113 | def phase(self, session): 114 | """Determine the session phase in prepare commit. 115 | 116 | :param session: sqlalchemy session 117 | :return: phase "prepare" or "commit" 118 | """ 119 | sp_key, _ = self._keygen(session) 120 | if self.r.sismember(sp_key, session.meepo_unique_id): 121 | return "prepare" 122 | else: 123 | return "commit" 124 | 125 | @_redis_strict_pc 126 | def prepare(self, session, event): 127 | """Prepare phase for session. 128 | 129 | :param session: sqlalchemy session 130 | """ 131 | if not event: 132 | self.logger.warn("event empty!") 133 | return 134 | 135 | sp_key, sp_hkey = self._keygen(session) 136 | 137 | def _pk(obj): 138 | pk_values = tuple(getattr(obj, c.name) 139 | for c in obj.__mapper__.primary_key) 140 | if len(pk_values) == 1: 141 | return pk_values[0] 142 | return pk_values 143 | 144 | def _get_dump_value(value): 145 | if hasattr(value, '__mapper__'): 146 | return _pk(value) 147 | return value 148 | pickled_event = { 149 | k: pickle.dumps({_get_dump_value(obj) for obj in objs}) 150 | for k, objs in event.items()} 151 | with self.r.pipeline(transaction=False) as p: 152 | p.sadd(sp_key, session.meepo_unique_id) 153 | p.hmset(sp_hkey, pickled_event) 154 | p.execute() 155 | 156 | @_redis_strict_pc 157 | def commit(self, session): 158 | """Commit phase for session. 159 | 160 | :param session: sqlalchemy session 161 | """ 162 | sp_key, sp_hkey = self._keygen(session) 163 | with self.r.pipeline(transaction=False) as p: 164 | p.srem(sp_key, session.meepo_unique_id) 165 | p.expire(sp_hkey, 60 * 60) 166 | p.execute() 167 | # we don't need to specially deal with rollback in this phase 168 | rollback = commit 169 | 170 | def session_info(self, session): 171 | """Return all session unique ids recorded in prepare phase. 172 | 173 | :param ts: timestamp, default to current timestamp 174 | :return: set of session unique ids 175 | """ 176 | _, sp_hkey = self._keygen(session) 177 | picked_event = self.r.hgetall(sp_hkey) 178 | event = {s(k): pickle.loads(v) for k, v in picked_event.items()} 179 | return event 180 | 181 | def prepare_info(self, ts=None): 182 | """Return all session unique ids recorded in prepare phase. 183 | 184 | :param ts: timestamp, default to current timestamp 185 | :return: set of session unique ids 186 | """ 187 | sp_key = "%s:session_prepare" % self.namespace(ts or int(time.time())) 188 | return set(s(m) for m in self.r.smembers(sp_key)) 189 | 190 | def clear(self, ts=None): 191 | """Clear all session in prepare phase. 192 | 193 | :param ts: timestamp used locate the namespace 194 | """ 195 | sp_key = "%s:session_prepare" % self.namespace(ts or int(time.time())) 196 | return self.r.delete(sp_key) 197 | -------------------------------------------------------------------------------- /meepo/apps/eventsourcing/pub.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import absolute_import 4 | 5 | import logging 6 | 7 | import collections 8 | 9 | from sqlalchemy import event 10 | 11 | from ...pub.sqlalchemy import sqlalchemy_pub 12 | from ...signals import signal 13 | 14 | 15 | class sqlalchemy_es_pub(sqlalchemy_pub): 16 | """SQLAlchemy EventSourcing Pub. 17 | 18 | Add eventsourcing to sqlalchemy_pub, three more signals added for tables: 19 | 20 | * ``session_prepare`` 21 | * ``session_commit`` / ``session_rollback`` 22 | 23 | The hook will use prepare-commit pattern to ensure 100% reliability on 24 | event sourcing. 25 | 26 | **Multi-Sessions Prepare Commit** 27 | 28 | The 3 additional signals were attached to sqlalchemy session factory in 29 | case of being used in multi-sessions environments. 30 | 31 | If you only use one sqlalchemy session in your program, it's fine to use 32 | ``session_prepare`` / ``session_commit`` as other signals. 33 | 34 | But if you use multiple sessions, you can separate the prepare-commit 35 | signals by: 36 | 37 | * Separate sessions by settings ``info`` arg in session factory. 38 | 39 | Because the info is the only attributes copied from session factory 40 | to session instance. 41 | 42 | ``meepo.signals`` monkey patched the blinker ``hashable_identity`` 43 | func to use the ``session.info`` for session hash. 44 | 45 | * Provide session as sender when signal receivers connects. 46 | 47 | For example:: 48 | 49 | # setting `info` in sqlalchemy session_factory 50 | SessionA = sessionmaker(bind=engine_a, info={"name": "session_a"}) 51 | SessionB = sessionmaker(bind=engine_b, info={"name": "session_b"}) 52 | 53 | sqlalchemy_es_pub(SessionA) 54 | sqlalchemy_es_pub(SessionB) 55 | 56 | sg = signal("session_prepare") 57 | 58 | def _sp_for_a(session, event): 59 | print(session.info) 60 | sg.connect(_sp_for_a, sender=SessionA) 61 | 62 | Then the ``_sp_for_a`` will only receive prepare-commit related events 63 | triggered by ``SessionA``. 64 | """ 65 | logger = logging.getLogger("meepo.pub.sqlalchemy_es_pub") 66 | 67 | def _install(self): 68 | event.listen(self.session, "before_flush", self.session_update) 69 | 70 | # enable session prepare-commit hook 71 | event.listen(self.session, "after_flush", self.session_prepare) 72 | event.listen(self.session, "after_commit", self.session_commit) 73 | event.listen(self.session, "after_rollback", self.session_rollback) 74 | 75 | def session_prepare(self, session, _): 76 | """Send session_prepare signal in session "before_commit". 77 | 78 | The signal contains another event argument, which records whole info 79 | of what's changed in this session, so the signal receiver can receive 80 | and record the event. 81 | """ 82 | if not hasattr(session, 'meepo_unique_id'): 83 | self._session_init(session) 84 | 85 | evt = collections.defaultdict(set) 86 | for action in ("write", "update", "delete"): 87 | objs = getattr(session, "pending_%s" % action) 88 | # filter tables if possible 89 | if self.tables: 90 | objs = [o for o in objs 91 | if o.__table__.fullname in self.tables] 92 | for obj in objs: 93 | evt_name = "%s_%s" % (obj.__table__.fullname, action) 94 | evt[evt_name].add(obj) 95 | self.logger.debug("%s - session_prepare: %s -> %s" % ( 96 | session.meepo_unique_id, evt_name, evt)) 97 | 98 | # only trigger signal when event exists 99 | if evt: 100 | signal("session_prepare").send(session, event=evt) 101 | 102 | def session_commit(self, session): 103 | """Send session_commit signal in sqlalchemy ``before_commit``. 104 | 105 | This marks the success of session so the session may enter commit 106 | state. 107 | """ 108 | # this may happen when there's nothing to commit 109 | if not hasattr(session, 'meepo_unique_id'): 110 | self.logger.debug("skipped - session_commit") 111 | return 112 | 113 | # normal session pub 114 | self.logger.debug("%s - session_commit" % session.meepo_unique_id) 115 | self._session_pub(session) 116 | signal("session_commit").send(session) 117 | self._session_del(session) 118 | 119 | def session_rollback(self, session): 120 | """Send session_rollback signal in sqlalchemy ``after_rollback``. 121 | 122 | This marks the failure of session so the session may enter commit 123 | phase. 124 | """ 125 | # this may happen when there's nothing to rollback 126 | if not hasattr(session, 'meepo_unique_id'): 127 | self.logger.debug("skipped - session_rollback") 128 | return 129 | 130 | # del session meepo id after rollback 131 | self.logger.debug("%s - after_rollback" % session.meepo_unique_id) 132 | signal("session_rollback").send(session) 133 | self._session_del(session) 134 | -------------------------------------------------------------------------------- /meepo/apps/eventsourcing/sub.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import absolute_import 4 | 5 | import datetime 6 | import functools 7 | import itertools 8 | import logging 9 | 10 | from ...signals import signal 11 | 12 | from .event_store import RedisEventStore 13 | from .prepare_commit import RedisPrepareCommit 14 | 15 | 16 | def redis_es_sub(session, tables, redis_dsn, strict=False, 17 | namespace=None, ttl=3600*24*3, socket_timeout=1): 18 | """Redis EventSourcing sub. 19 | 20 | This sub should be used together with sqlalchemy_es_pub, it will 21 | use RedisEventStore as events storage layer and use the prepare-commit 22 | pattern in :func:`sqlalchemy_es_pub` to ensure 100% security on 23 | events recording. 24 | 25 | :param session: the sqlalchemy to bind the signal 26 | :param tables: tables to be event sourced. 27 | :param redis_dsn: the redis server to store event sourcing events. 28 | :param strict: arg to be passed to RedisPrepareCommit. If set to True, 29 | the exception will not be silent and may cause the failure of sqlalchemy 30 | transaction, user should handle the exception in the app side in this 31 | case. 32 | :param namespace: namespace string or func. If func passed, it should 33 | accept timestamp as arg and return a string namespace. 34 | :param ttl: expiration time for events stored, default to 3 days. 35 | :param socket_timeout: redis socket timeout. 36 | """ 37 | logger = logging.getLogger("meepo.sub.redis_es_sub") 38 | 39 | if not isinstance(tables, (list, set)): 40 | raise ValueError("tables should be list or set") 41 | 42 | # install event store hook for tables 43 | event_store = RedisEventStore( 44 | redis_dsn, namespace=namespace, ttl=ttl, socket_timeout=socket_timeout) 45 | 46 | def _es_event_sub(pk, event): 47 | if event_store.add(event, str(pk)): 48 | logger.info("%s: %s -> %s" % ( 49 | event, pk, datetime.datetime.now())) 50 | else: 51 | logger.error("event sourcing failed: %s" % pk) 52 | 53 | events = ("%s_%s" % (tb, action) for tb, action in 54 | itertools.product(*[tables, ["write", "update", "delete"]])) 55 | for event in events: 56 | sub_func = functools.partial(_es_event_sub, event=event) 57 | signal(event).connect(sub_func, weak=False) 58 | 59 | # install prepare-commit hook 60 | prepare_commit = RedisPrepareCommit( 61 | redis_dsn, strict=strict, namespace=namespace, 62 | socket_timeout=socket_timeout) 63 | 64 | signal("session_prepare").connect( 65 | prepare_commit.prepare, sender=session, weak=False) 66 | signal("session_commit").connect( 67 | prepare_commit.commit, sender=session, weak=False) 68 | signal("session_rollback").connect( 69 | prepare_commit.rollback, sender=session, weak=False) 70 | 71 | return event_store, prepare_commit 72 | -------------------------------------------------------------------------------- /meepo/apps/replicator/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """Meepo Replicators based on events. 4 | """ 5 | 6 | from __future__ import absolute_import 7 | 8 | __all__ = ["QueueReplicator", "RqReplicator"] 9 | 10 | import logging 11 | import zmq 12 | 13 | zmq_ctx = zmq.Context() 14 | 15 | 16 | class Replicator(object): 17 | """Replicator base class. 18 | """ 19 | def __init__(self, listen=None, name="meepo.replicator.zmq"): 20 | """ 21 | :param listen: zeromq dsn to connect, can be a list 22 | """ 23 | # replicator logger naming 24 | self.name = name 25 | self.logger = logging.getLogger(name) 26 | 27 | self.listen = listen 28 | self.socket = zmq_ctx.socket(zmq.SUB) 29 | 30 | def run(self): 31 | raise NotImplementedError() 32 | 33 | def event(self): 34 | raise NotImplementedError() 35 | 36 | 37 | from .queue import QueueReplicator 38 | from .rq import RqReplicator 39 | -------------------------------------------------------------------------------- /meepo/apps/replicator/queue.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import absolute_import 4 | 5 | import ketama 6 | import zmq 7 | 8 | from multiprocessing import Queue 9 | from zmq.utils.strtypes import asbytes 10 | 11 | from . import Replicator 12 | from .worker import WorkerPool 13 | 14 | 15 | class QueueReplicator(Replicator): 16 | """Replicator using Queue as worker task queue 17 | 18 | This Replicator receives events from upstream zmq devices and put them into 19 | a set of python multiprocessing queues using ketama consistent hashing. 20 | Each queue has a worker. We use :class:`WorkerPool` to manage a set of 21 | queues. 22 | """ 23 | 24 | def __init__(self, *args, **kwargs): 25 | super(QueueReplicator, self).__init__(*args, **kwargs) 26 | 27 | # init workers 28 | self.workers = {} 29 | self.worker_queues = {} 30 | 31 | def event(self, *topics, **kwargs): 32 | """Topic callback registry. 33 | 34 | callback func should receive two args: topic and pk, and then process 35 | the replication job. 36 | 37 | Note: The callback func must return True/False. When passed a list of 38 | pks, the func should return a list of True/False with the same length 39 | of pks. 40 | 41 | :param topics: a list of topics 42 | :param workers: how many workers to process this topic 43 | :param multi: whether pass multiple pks 44 | :param queue_limit: when queue size is larger than the limit, 45 | the worker should run deduplicate procedure 46 | """ 47 | workers = kwargs.pop("workers", 1) 48 | multi = kwargs.pop("multi", False) 49 | queue_limit = kwargs.pop("queue_limit", 10000) 50 | 51 | def wrapper(func): 52 | for topic in topics: 53 | queues = [Queue() for _ in range(workers)] 54 | hash_ring = ketama.Continuum() 55 | for q in queues: 56 | hash_ring[str(hash(q))] = q 57 | self.worker_queues[topic] = hash_ring 58 | self.workers[topic] = WorkerPool( 59 | queues, topic, func, multi=multi, queue_limit=queue_limit, 60 | logger_name="%s.%s" % (self.name, topic)) 61 | self.socket.setsockopt(zmq.SUBSCRIBE, asbytes(topic)) 62 | return func 63 | return wrapper 64 | 65 | def run(self): 66 | """Run the replicator. 67 | 68 | Main process receive messages and distribute them to worker queues. 69 | """ 70 | for worker_pool in self.workers.values(): 71 | worker_pool.start() 72 | 73 | if isinstance(self.listen, list): 74 | for i in self.listen: 75 | self.socket.connect(i) 76 | else: 77 | self.socket.connect(self.listen) 78 | 79 | try: 80 | while True: 81 | msg = self.socket.recv_string() 82 | lst = msg.split() 83 | if len(lst) == 2: 84 | topic, pks = lst[0], [lst[1], ] 85 | elif len(lst) > 2: 86 | topic, pks = lst[0], lst[1:] 87 | else: 88 | self.logger.error("msg corrupt -> %s" % msg) 89 | continue 90 | 91 | self.logger.debug("replicator: {0} -> {1}".format(topic, pks)) 92 | for pk in pks: 93 | self.worker_queues[topic][str(hash(pk))].put(pk) 94 | except Exception as e: 95 | self.logger.exception(e) 96 | finally: 97 | for worker_pool in self.workers.values(): 98 | worker_pool.terminate() 99 | -------------------------------------------------------------------------------- /meepo/apps/replicator/rq.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import absolute_import 4 | 5 | import collections 6 | import zmq 7 | 8 | from zmq.utils.strtypes import asbytes 9 | from . import Replicator 10 | 11 | 12 | class RqReplicator(Replicator): 13 | """Replicator suitable for rq task queue. 14 | 15 | For example: 16 | >>> rq_repl = RqReplicator("tcp://127.0.0.1:4000") 17 | >>> @rq_repl.event("a_table_update") 18 | >>> def job_test(pks): 19 | >>> q = rq.Queue("update_cache:a_table") 20 | >>> q.enqueue("module.jobs.func", pks) 21 | >>> rq_repl.run() 22 | 23 | Rq queue should be created in the external code. 24 | 25 | In fact this replicator can be generally used. It will pass pks as argument 26 | to the supplied callback func and the func can do anything you want. 27 | 28 | The callback func should always accept a list of primary keys. 29 | """ 30 | def __init__(self, *args, **kwargs): 31 | super(RqReplicator, self).__init__(*args, **kwargs) 32 | 33 | self.topic_funcs = {} 34 | 35 | def event(self, *topics): 36 | def wrapper(func): 37 | for topic in topics: 38 | self.topic_funcs[topic] = func 39 | self.socket.setsockopt(zmq.SUBSCRIBE, asbytes(topic)) 40 | return func 41 | return wrapper 42 | 43 | def run(self): 44 | if isinstance(self.listen, list): 45 | for i in self.listen: 46 | self.socket.connect(i) 47 | else: 48 | self.socket.connect(self.listen) 49 | 50 | error_pks = collections.defaultdict(set) 51 | 52 | def do_job(topic, pks): 53 | try: 54 | self.topic_funcs[topic](pks) 55 | except Exception as e: 56 | self.logger.exception(e) 57 | error_pks[topic].update(pks) 58 | else: 59 | # remove error pks 60 | if topic in error_pks: 61 | error_pks[topic].difference_update(pks) 62 | if not error_pks[topic]: 63 | error_pks.pop(topic) 64 | try: 65 | while True: 66 | # retry error pks 67 | for t, p in list(error_pks.items()): 68 | self.logger.warn( 69 | "process error pks: {} -> {}".format(t, p)) 70 | do_job(t, p) 71 | 72 | msg = self.socket.recv_string() 73 | 74 | lst = msg.split() 75 | if len(lst) >= 2: 76 | topic, pks = lst[0], lst[1:] 77 | else: 78 | self.logger.error("msg corrupt -> %s" % msg) 79 | continue 80 | 81 | self.logger.info("replicator: {} -> {}".format(topic, pks)) 82 | do_job(topic, pks) 83 | except Exception as e: 84 | self.logger.exception(e) 85 | -------------------------------------------------------------------------------- /meepo/apps/replicator/worker.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import absolute_import 4 | 5 | import collections 6 | import logging 7 | import os 8 | import signal 9 | import time 10 | 11 | from multiprocessing import Process 12 | 13 | from ..._compat import Empty 14 | 15 | 16 | def _deduplicate(queue, max_size): 17 | items = [] 18 | for i in range(0, max_size): 19 | try: 20 | items.append(queue.get_nowait()) 21 | except Empty: 22 | break 23 | items = set(items) 24 | for item in items: 25 | queue.put(item) 26 | 27 | 28 | class Worker(Process): 29 | """Worker process 30 | 31 | The worker wraps the user supplied callback func. It uses multiprocessing 32 | queue as the task queue and will retry when a task failed. 33 | """ 34 | MAX_PK_COUNT = 256 35 | 36 | def __init__(self, queue, name, cb, multi=False, logger_name=None, 37 | retry=True, queue_limit=10000, max_retry_count=10, 38 | max_retry_interval=60): 39 | """ 40 | :param multi: allow multiple pks to be sent in one callback 41 | :param retry: retry on pk if callback failed 42 | :param queue_limit: queue size limit for deduplication 43 | :param max_retry_count: max retry count for a single pk 44 | :param max_retry_interval: max sleep time when callback failed 45 | """ 46 | super(Worker, self).__init__() 47 | self.name = name 48 | self.queue = queue 49 | self.queue_limit = queue_limit 50 | self.cb = cb 51 | self.multi = multi 52 | self.retry = retry 53 | 54 | # config logger 55 | logger_name = logger_name or "name-%s" % id(self) 56 | self.logger = logging.getLogger(logger_name) 57 | self.logger.debug("worker %s initing..." % self.name) 58 | 59 | # config retry 60 | self._max_retry_interval = max_retry_interval 61 | self._max_retry_count = max_retry_count 62 | self._retry_stats = collections.Counter() 63 | 64 | def run(self): 65 | self.logger.debug("worker %s running..." % self.name) 66 | while True: 67 | try: 68 | pks = set() 69 | 70 | try: 71 | max_size = self.queue.qsize() 72 | if max_size > self.queue_limit: 73 | self.logger.info("worker %s deduplicating" % self.name) 74 | _deduplicate(self.queue, max_size) 75 | except NotImplementedError: 76 | pass 77 | 78 | # try get all pks from queue at once 79 | while not self.queue.empty(): 80 | pks.add(self.queue.get()) 81 | if len(pks) > self.MAX_PK_COUNT: 82 | break 83 | 84 | # take a nap if queue is empty 85 | if not pks: 86 | time.sleep(1) 87 | continue 88 | 89 | # keep order to match the results 90 | pks = list(pks) 91 | 92 | try: 93 | # Mac / UNIX don't support qsize 94 | self.logger.info("{0} -> {1} - qsize: {2}".format( 95 | self.name, pks, self.queue.qsize())) 96 | except NotImplementedError: 97 | self.logger.info("{0} -> {1}".format(self.name, pks)) 98 | 99 | if self.multi: 100 | results = self.cb(pks) 101 | else: 102 | results = [self.cb(pk) for pk in pks] 103 | 104 | if not self.retry: 105 | continue 106 | 107 | # check failed task and retry 108 | for pk, r in zip(pks, results): 109 | if r: 110 | self.on_success(pk) 111 | else: 112 | self.on_fail(pk) 113 | 114 | # take a nap on fail 115 | if not all(results): 116 | time.sleep(min(3 * results.count(False), 117 | self._max_retry_interval)) 118 | 119 | except KeyboardInterrupt: 120 | self.logger.debug("KeyboardInterrupt stop %s" % self.name) 121 | break 122 | 123 | except Exception as e: 124 | self.logger.exception(e) 125 | time.sleep(10) 126 | 127 | def on_fail(self, pk): 128 | self._retry_stats[pk] += 1 129 | if self._retry_stats[pk] > self._max_retry_count: 130 | del self._retry_stats[pk] 131 | self.logger.error("callback on pk failed -> %s" % pk) 132 | else: 133 | # put failed pk back to queue 134 | self.queue.put(pk) 135 | self.logger.warn( 136 | "callback on pk failed for %s times -> %s" % ( 137 | self._retry_stats[pk], pk)) 138 | 139 | def on_success(self, pk): 140 | if pk in self._retry_stats: 141 | del self._retry_stats[pk] 142 | 143 | 144 | class WorkerPool(object): 145 | """Manage a set of workers and recreate worker when worker dead. 146 | """ 147 | def __init__(self, queues, *args, **kwargs): 148 | self._args = args 149 | self._kwargs = kwargs 150 | self._queues = queues 151 | 152 | self._sentinel_worker = None 153 | self.waiting_time = kwargs.pop("waiting_time", 10) 154 | 155 | def _make_worker(self, queue): 156 | return Worker(queue, *self._args, **self._kwargs) 157 | 158 | def terminate(self): 159 | os.kill(self._sentinel_worker.pid, signal.SIGINT) 160 | self._sentinel_worker.join() 161 | 162 | def start(self): 163 | logger = logging.getLogger("meepo.replicator.sentinel") 164 | 165 | def _f(): 166 | worker_map = { 167 | q: self._make_worker(q) for q in self._queues 168 | } 169 | for _, worker in worker_map.items(): 170 | worker.start() 171 | 172 | logger.info("starting sentinel...") 173 | try: 174 | while True: 175 | logger.debug("ping {} worker".format(self._args[0])) 176 | dead = qsize = 0 177 | for queue, worker in worker_map.items(): 178 | try: 179 | qsize += queue.qsize() 180 | except NotImplementedError: 181 | qsize = None 182 | 183 | if not worker.is_alive(): 184 | dead += 1 185 | logger.warn( 186 | "{} worker {} dead, recreating...".format( 187 | self._args[0], worker.pid)) 188 | 189 | worker_map[queue] = self._make_worker(queue) 190 | worker_map[queue].start() 191 | 192 | msg = ["{} total qsize {}".format(self._args[0], qsize), 193 | "{} worker alive, {} worker dead".format( 194 | len(worker_map) - dead, dead)] 195 | 196 | logger.info("; ".join(msg)) 197 | 198 | time.sleep(self.waiting_time) 199 | except KeyboardInterrupt: 200 | pass 201 | finally: 202 | for worker in worker_map.values(): 203 | worker.terminate() 204 | 205 | self._sentinel_worker = Process(target=_f) 206 | self._sentinel_worker.start() 207 | -------------------------------------------------------------------------------- /meepo/pub/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """ 4 | Meepo's core concept is based event pubs, it follows mysql row-based binlog 5 | and sqlalchemy events system and shape them into ``table_action pk`` 6 | format signals. 7 | 8 | Currently there are 2 pubs implemented: ``mysql_pub`` and ``sqlalchemy_pub``. 9 | 10 | The publishers and subscribers are connected with ``blinker.signal``. 11 | 12 | Publisher sends pk by:: 13 | 14 | signal("table_action").send(pk) 15 | 16 | And subscriber can accept the pk by:: 17 | 18 | sg = signal("table_action") 19 | 20 | @sg.connect 21 | def dummy_print(pk): 22 | print(pk) 23 | """ 24 | 25 | from __future__ import absolute_import 26 | 27 | __all__ = ["mysql_pub", "sqlalchemy_pub"] 28 | 29 | from .mysql import mysql_pub 30 | from .sqlalchemy import sqlalchemy_pub 31 | -------------------------------------------------------------------------------- /meepo/pub/mysql.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """ 4 | The mysql pub will use the ``python-mysql-replication`` binlog stream as the 5 | source of events. 6 | 7 | The events pub flow:: 8 | 9 | +---------------------+ 10 | | | 11 | +---> table_write event | 12 | | | | 13 | | +---------------------+ 14 | | 15 | +--------------------+ +---------------+ | 16 | | | | | | +---------------------+ 17 | | mysql | | meepo.pub | | | | 18 | | +------> +--+---> table_update event | 19 | | row-based binlog | | mysql_pub | | | | 20 | | | | | | +---------------------+ 21 | +--------------------+ +---------------+ | 22 | | 23 | | +---------------------+ 24 | | | | 25 | +---> table_delete event | 26 | | | 27 | +---------------------+ 28 | 29 | """ 30 | 31 | from __future__ import absolute_import 32 | 33 | import logging 34 | logger = logging.getLogger("meepo.pub.mysql_pub") 35 | 36 | import datetime 37 | import random 38 | 39 | import pymysqlreplication 40 | from pymysqlreplication.row_event import ( 41 | DeleteRowsEvent, 42 | UpdateRowsEvent, 43 | WriteRowsEvent, 44 | ) 45 | 46 | from .._compat import urlparse, str 47 | from ..signals import signal 48 | 49 | 50 | def mysql_pub(mysql_dsn, tables=None, blocking=False, **kwargs): 51 | """MySQL row-based binlog events pub. 52 | 53 | **General Usage** 54 | 55 | Listen and pub all tables events:: 56 | 57 | mysql_pub(mysql_dsn) 58 | 59 | Listen and pub only some tables events:: 60 | 61 | mysql_pub(mysql_dsn, tables=["test"]) 62 | 63 | By default the ``mysql_pub`` will process and pub all existing 64 | row-based binlog (starting from current binlog file with pos 0) and 65 | quit, you may set blocking to True to block and wait for new binlog, 66 | enable this option if you're running the script as a daemon:: 67 | 68 | mysql_pub(mysql_dsn, blocking=True) 69 | 70 | The binlog stream act as a mysql slave and read binlog from master, so the 71 | server_id matters, if it's conflict with other slaves or scripts, strange 72 | bugs may happen. By default, the server_id is randomized by 73 | ``randint(1000000000, 4294967295)``, you may set it to a specific value 74 | by server_id arg:: 75 | 76 | mysql_pub(mysql_dsn, blocking=True, server_id=1024) 77 | 78 | **Signals Illustrate** 79 | 80 | Sometimes you want more info than the pk value, the mysql_pub expose 81 | a raw signal which will send the original binlog stream events. 82 | 83 | For example, the following sql:: 84 | 85 | INSERT INTO test (data) VALUES ('a'); 86 | 87 | The row-based binlog generated from the sql, reads by binlog stream and 88 | generates signals equals to:: 89 | 90 | signal("test_write").send(1) 91 | signal("test_write_raw").send({'values': {'data': 'a', 'id': 1}}) 92 | 93 | **Binlog Pos Signal** 94 | 95 | The mysql_pub has a unique signal ``mysql_binlog_pos`` which contains 96 | the binlog file and binlog pos, you can record the signal and resume 97 | binlog stream from last position with it. 98 | 99 | :param mysql_dsn: mysql dsn with row-based binlog enabled. 100 | :param tables: which tables to enable mysql_pub. 101 | :param blocking: whether mysql_pub should wait more binlog when all 102 | existing binlog processed. 103 | :param kwargs: more kwargs to be passed to binlog stream. 104 | """ 105 | # parse mysql settings 106 | parsed = urlparse(mysql_dsn) 107 | mysql_settings = { 108 | "host": parsed.hostname, 109 | "port": parsed.port or 3306, 110 | "user": parsed.username, 111 | "passwd": parsed.password 112 | } 113 | 114 | # connect to binlog stream 115 | stream = pymysqlreplication.BinLogStreamReader( 116 | mysql_settings, 117 | server_id=random.randint(1000000000, 4294967295), 118 | blocking=blocking, 119 | only_events=[DeleteRowsEvent, UpdateRowsEvent, WriteRowsEvent], 120 | **kwargs 121 | ) 122 | 123 | def _pk(values): 124 | if isinstance(event.primary_key, str): 125 | return values[event.primary_key] 126 | return tuple(values[k] for k in event.primary_key) 127 | 128 | for event in stream: 129 | if not event.primary_key: 130 | continue 131 | 132 | if tables and event.table not in tables: 133 | continue 134 | 135 | try: 136 | rows = event.rows 137 | except (UnicodeDecodeError, ValueError) as e: 138 | logger.exception(e) 139 | continue 140 | 141 | timestamp = datetime.datetime.fromtimestamp(event.timestamp) 142 | 143 | if isinstance(event, WriteRowsEvent): 144 | sg_name = "%s_write" % event.table 145 | sg = signal(sg_name) 146 | sg_raw = signal("%s_raw" % sg_name) 147 | 148 | for row in rows: 149 | pk = _pk(row["values"]) 150 | sg.send(pk) 151 | sg_raw.send(row) 152 | 153 | logger.debug("%s -> %s, %s" % (sg_name, pk, timestamp)) 154 | 155 | elif isinstance(event, UpdateRowsEvent): 156 | sg_name = "%s_update" % event.table 157 | sg = signal(sg_name) 158 | sg_raw = signal("%s_raw" % sg_name) 159 | 160 | for row in rows: 161 | pk = _pk(row["after_values"]) 162 | sg.send(pk) 163 | sg_raw.send(row) 164 | 165 | logger.debug("%s -> %s, %s" % (sg_name, pk, timestamp)) 166 | 167 | elif isinstance(event, DeleteRowsEvent): 168 | sg_name = "%s_delete" % event.table 169 | sg = signal(sg_name) 170 | sg_raw = signal("%s_raw" % sg_name) 171 | 172 | for row in rows: 173 | pk = _pk(row["values"]) 174 | sg.send(pk) 175 | sg_raw.send(row) 176 | 177 | logger.debug("%s -> %s, %s" % (sg_name, pk, timestamp)) 178 | 179 | signal("mysql_binlog_pos").send( 180 | "%s:%s" % (stream.log_file, stream.log_pos)) 181 | -------------------------------------------------------------------------------- /meepo/pub/sqlalchemy.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """ 4 | The sqlalchemy pub will hook into SQLAlchemy's event system, shape and publish 5 | events with ``table_action pk`` style. 6 | 7 | The events pub flow:: 8 | 9 | +------------------+ +-----------------------+ 10 | | | | | 11 | | meepo.pub | | before_flush | 12 | | | +----> | 13 | | sqlalchemy_pub | | | record model states | 14 | | | | | | 15 | +---------+--------+ | +-----------+-----------+ 16 | | | | 17 | hook | | | 18 | | | | 19 | +---------v--------+ | +-----------v-----------+ 20 | | | | | | 21 | | sqlalchemy | | | after_commit | 22 | | +----+----> | 23 | | session events | | publish model states | 24 | | | | | 25 | +------------------+ +-----------+-----------+ 26 | | 27 | +-----+ 28 | | 29 | +------------------------------------------------+ 30 | | | | 31 | +----------+--------+ +----------v---------+ +----------v---------+ 32 | | | | | | | 33 | | table_write event | | table_update event | | table_delete event | 34 | | | | | | | 35 | +-------------------+ +--------------------+ +--------------------+ 36 | 37 | """ 38 | 39 | from __future__ import absolute_import 40 | 41 | import logging 42 | 43 | import uuid 44 | 45 | from sqlalchemy import event 46 | 47 | from ..signals import signal 48 | 49 | 50 | class sqlalchemy_pub(object): 51 | """SQLAlchemy Pub. 52 | 53 | The install method will add 2 hooks on sqlalchemy events system: 54 | 55 | * ``session_update`` -> sqlalchemy - ``before_flush`` 56 | * ``session_commit`` -> sqlalchemy - ``after_commit`` 57 | 58 | The ``session_update`` method need to record the model states in 59 | sqlalchemy "before_flush" event, when the session records the status 60 | with ``session.new``, ``session.dirty`` and ``session.deleted``, these 61 | states will be deleted in "after_commit" event. 62 | 63 | **General Usage** 64 | 65 | Install the sqlalchemy pub hook by calling it on sqlalchemy session:: 66 | 67 | sqlalchemy_pub(session) 68 | 69 | Only listen some tables:: 70 | 71 | sqlalchemy_pub(session, tables=["test"]) 72 | 73 | Tables can be added later, the duplicated tables will be automatically 74 | merged:: 75 | 76 | pub = sqlalchemy_pub(session) 77 | pub(["table_a", "table_b"]) 78 | pub(["table_b", "table_c"]) 79 | pub.tables == {"table_a", "table_b", "table_c"} 80 | 81 | Then use the session as usual and the events will be available. 82 | 83 | **Signals Illustrate** 84 | 85 | Sometimes you want more info than the pk value, the sqlalchemy_pub expose 86 | a raw signal which will send the original sqlalchemy objects. 87 | 88 | For example, this code:: 89 | 90 | class Test(Base): 91 | __tablename__ = "test" 92 | id = Column(Integer, primary_key=True) 93 | data = Column(String) 94 | 95 | t_1 = Test(id=1, data='a') 96 | session.add(t_1) 97 | session.commit() 98 | 99 | Generates signals equal to:: 100 | 101 | signal("test_write").send(1) 102 | signal("test_write_raw").send(t_1) 103 | 104 | :param session: sqlalchemy session to install the hook 105 | :param tables: tables to install the hook, leave None to pub all. 106 | 107 | .. warning:: 108 | 109 | SQLAlchemy bulk operation currently **NOT** supported, so this code 110 | won't work:: 111 | 112 | # bulk updates 113 | session.query(Test).update({"data": 'x'}) 114 | 115 | # bulk deletes 116 | session.query(Test).filter(Test.data == 'x').delete() 117 | """ 118 | 119 | logger = logging.getLogger("meepo.pub.sqlalchemy_pub") 120 | 121 | def __init__(self, session, tables=None): 122 | self.session = session 123 | self.tables = tables or set() 124 | 125 | self._install() 126 | 127 | def __call__(self, tables): 128 | self.tables |= set(tables) 129 | 130 | def _install(self): 131 | # enable session_update & session_commit hook 132 | event.listen(self.session, "before_flush", self.session_update) 133 | event.listen(self.session, "after_commit", self.session_commit) 134 | 135 | def _pk(self, obj): 136 | """Get pk values from object 137 | 138 | :param obj: sqlalchemy object 139 | """ 140 | pk_values = tuple(getattr(obj, c.name) 141 | for c in obj.__mapper__.primary_key) 142 | if len(pk_values) == 1: 143 | return pk_values[0] 144 | return pk_values 145 | 146 | def _session_init(self, session): 147 | if hasattr(session, "meepo_unique_id"): 148 | self.logger.debug("skipped - session_init") 149 | return 150 | 151 | for action in ("write", "update", "delete"): 152 | attr = "pending_%s" % action 153 | if not hasattr(session, attr): 154 | setattr(session, attr, set()) 155 | session.meepo_unique_id = uuid.uuid4().hex 156 | self.logger.debug("%s - session_init" % session.meepo_unique_id) 157 | 158 | def _session_del(self, session): 159 | self.logger.debug("%s - session_del" % session.meepo_unique_id) 160 | del session.meepo_unique_id 161 | del session.pending_write 162 | del session.pending_update 163 | del session.pending_delete 164 | 165 | def _session_pub(self, session): 166 | def _pub(obj, action): 167 | """Publish object pk values with action. 168 | 169 | The _pub will trigger 2 signals: 170 | * normal ``table_action`` signal, sends primary key 171 | * raw ``table_action_raw`` signal, sends sqlalchemy object 172 | 173 | :param obj: sqlalchemy object 174 | :param action: action on object 175 | """ 176 | if self.tables and obj.__table__.fullname not in self.tables: 177 | return 178 | 179 | sg_name = "%s_%s" % (obj.__table__, action) 180 | sg = signal(sg_name) 181 | sg_raw = signal("%s_raw" % sg_name) 182 | 183 | pk = self._pk(obj) 184 | if pk: 185 | sg.send(pk) 186 | sg_raw.send(obj) 187 | self.logger.debug("%s - session_pub: %s -> %s" % ( 188 | session.meepo_unique_id, sg_name, pk)) 189 | 190 | for obj in session.pending_write: 191 | _pub(obj, action="write") 192 | for obj in session.pending_update: 193 | _pub(obj, action="update") 194 | for obj in session.pending_delete: 195 | _pub(obj, action="delete") 196 | 197 | session.pending_write.clear() 198 | session.pending_update.clear() 199 | session.pending_delete.clear() 200 | 201 | def session_update(self, session, *_): 202 | """Record the sqlalchemy object states in the middle of session, 203 | prepare the events for the final pub in session_commit. 204 | """ 205 | self._session_init(session) 206 | session.pending_write |= set(session.new) 207 | session.pending_update |= set(session.dirty) 208 | session.pending_delete |= set(session.deleted) 209 | self.logger.debug("%s - session_update" % session.meepo_unique_id) 210 | 211 | def session_commit(self, session): 212 | """Pub the events after the session committed. 213 | 214 | This method should be linked to sqlalchemy "after_commit" event. 215 | """ 216 | # this may happen when there's nothing to commit 217 | if not hasattr(session, 'meepo_unique_id'): 218 | self.logger.debug("skipped - session_commit") 219 | return 220 | 221 | self._session_pub(session) 222 | self._session_del(session) 223 | -------------------------------------------------------------------------------- /meepo/signals.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import absolute_import 4 | 5 | from ._compat import str, bytes 6 | 7 | 8 | def _monkey_patch_hashable_func(): 9 | def hashable_identity(obj): 10 | if hasattr(obj, '__func__'): 11 | return (id(obj.__func__), id(obj.__self__)) 12 | elif hasattr(obj, 'im_func'): 13 | return (id(obj.im_func), id(obj.im_self)) 14 | elif isinstance(obj, (str, bytes)): 15 | return obj 16 | # hack for session hash info 17 | elif hasattr(obj, "info") and obj.info: 18 | return hash(str(sorted(obj.info.items()))) 19 | else: 20 | return id(obj) 21 | 22 | import blinker.base 23 | blinker.base.hashable_identity = hashable_identity 24 | _monkey_patch_hashable_func() 25 | 26 | 27 | from blinker import Namespace 28 | 29 | # The namespace for code signals. If you are not flask code, do 30 | # not put signals in here. Create your own namespace instead. 31 | _signals = Namespace() 32 | signal = _signals.signal 33 | -------------------------------------------------------------------------------- /meepo/sub/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """ 4 | Meepo sub is where all the imagination comes true, all subs implemented here 5 | are only some simple demos. Customize your own sub for the real power. 6 | 7 | To make use of a signal, just create a function that accepts a primary key. 8 | 9 | For example, print an event with:: 10 | 11 | # use weak False here to force strong ref to the lambda func. 12 | signal("test_write").connect( 13 | lambda pk: logger.info("%s -> %s" % event, pk), 14 | weak=False 15 | ) 16 | 17 | For advanced use with sqlalchemy, you may also use the raw signal:: 18 | 19 | signal("test_write_raw").connect( 20 | lambda obj: logger.info("%s -> %s" % event, obj.__dict__), 21 | weak=False) 22 | ) 23 | """ 24 | -------------------------------------------------------------------------------- /meepo/sub/dummy.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import absolute_import 4 | 5 | import itertools 6 | import logging 7 | 8 | from ..signals import signal 9 | 10 | 11 | def print_sub(tables): 12 | """Dummy print sub. 13 | 14 | :param tables: print events of tables. 15 | """ 16 | logger = logging.getLogger("meepo.sub.print_sub") 17 | logger.info("print_sub tables: %s" % ", ".join(tables)) 18 | 19 | if not isinstance(tables, (list, set)): 20 | raise ValueError("tables should be list or set") 21 | 22 | events = ("%s_%s" % (tb, action) for tb, action in 23 | itertools.product(*[tables, ["write", "update", "delete"]])) 24 | for event in events: 25 | signal(event).connect( 26 | lambda pk: logger.info("%s -> %s" % event, pk), weak=False) 27 | -------------------------------------------------------------------------------- /meepo/sub/nano.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import absolute_import 4 | 5 | import logging 6 | 7 | from ..signals import signal 8 | 9 | 10 | def nano_sub(bind, tables): 11 | """Nanomsg fanout sub. (Experimental) 12 | 13 | This sub will use nanomsg to fanout the events. 14 | 15 | :param bind: the zmq pub socket or zmq device socket. 16 | :param tables: the events of tables to follow. 17 | """ 18 | logger = logging.getLogger("meepo.sub.nano_sub") 19 | 20 | from nanomsg import Socket, PUB 21 | 22 | pub_socket = Socket(PUB) 23 | pub_socket.bind(bind) 24 | 25 | def _sub(table): 26 | for action in ("write", "update", "delete"): 27 | def _sub(pk, action=action): 28 | msg = bytes("%s_%s %s" % (table, action, pk), 'utf-8') 29 | logger.debug("pub msg %s" % msg) 30 | pub_socket.send(msg) 31 | 32 | signal("%s_%s" % (table, action)).connect(_sub, weak=False) 33 | 34 | for table in set(tables): 35 | _sub(table) 36 | -------------------------------------------------------------------------------- /meepo/sub/zmq.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import absolute_import 4 | 5 | import itertools 6 | import logging 7 | 8 | from ..signals import signal 9 | 10 | 11 | def zmq_sub(bind, tables, forwarder=False, green=False): 12 | """0mq fanout sub. 13 | 14 | This sub will use zeromq to fanout the events. 15 | 16 | :param bind: the zmq pub socket or zmq device socket. 17 | :param tables: the events of tables to follow. 18 | :param forwarder: set to True if zmq pub to a forwarder device. 19 | :param green: weather to use a greenlet compat zmq 20 | """ 21 | logger = logging.getLogger("meepo.sub.zmq_sub") 22 | 23 | if not isinstance(tables, (list, set)): 24 | raise ValueError("tables should be list or set") 25 | 26 | if not green: 27 | import zmq 28 | else: 29 | import zmq.green as zmq 30 | 31 | ctx = zmq.Context() 32 | socket = ctx.socket(zmq.PUB) 33 | 34 | if forwarder: 35 | socket.connect(bind) 36 | else: 37 | socket.bind(bind) 38 | 39 | events = ("%s_%s" % (tb, action) for tb, action in 40 | itertools.product(*[tables, ["write", "update", "delete"]])) 41 | for event in events: 42 | def _sub(pk, event=event): 43 | msg = "%s %s" % (event, pk) 44 | socket.send_string(msg) 45 | logger.debug("pub msg: %s" % msg) 46 | signal(event).connect(_sub, weak=False) 47 | 48 | return socket 49 | -------------------------------------------------------------------------------- /meepo/utils.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import absolute_import 4 | 5 | from logging.config import dictConfig 6 | import datetime 7 | 8 | from ._compat import bytes, str 9 | 10 | 11 | def setup_logger(level=None): 12 | dictConfig({ 13 | 'version': 1, 14 | 'disable_existing_loggers': False, 15 | 16 | 'root': { 17 | 'handlers': ['console'], 18 | 'level': 'INFO', 19 | }, 20 | 21 | 'loggers': { 22 | 'meepo': { 23 | 'handlers': ['console'], 24 | 'propagate': False, 25 | 'level': level or 'INFO', 26 | }, 27 | }, 28 | 29 | 'handlers': { 30 | 'console': { 31 | 'level': 'DEBUG', 32 | 'class': 'logging.StreamHandler', 33 | 'formatter': 'console' 34 | }, 35 | }, 36 | 37 | 'formatters': { 38 | 'console': { 39 | 'format': '%(asctime)s [%(levelname)s] [%(name)s][%(process)d]' 40 | ': %(message)s', 41 | }, 42 | } 43 | }) 44 | 45 | 46 | def cast_bytes(s, encoding='utf8', errors='strict'): 47 | """cast str or bytes to bytes""" 48 | if isinstance(s, bytes): 49 | return s 50 | elif isinstance(s, str): 51 | return s.encode(encoding, errors) 52 | else: 53 | raise TypeError("Expected unicode or bytes, got %r" % s) 54 | b = cast_bytes 55 | 56 | 57 | def cast_str(s, encoding='utf8', errors='strict'): 58 | """cast bytes or str to str""" 59 | if isinstance(s, bytes): 60 | return s.decode(encoding, errors) 61 | elif isinstance(s, str): 62 | return s 63 | else: 64 | raise TypeError("Expected unicode or bytes, got %r" % s) 65 | s = cast_str 66 | 67 | 68 | def cast_datetime(ts, fmt=None): 69 | """cast timestamp to datetime or date str""" 70 | dt = datetime.datetime.fromtimestamp(ts) 71 | if fmt: 72 | return dt.strftime(fmt) 73 | return dt 74 | d = cast_datetime 75 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | from setuptools import setup, find_packages 5 | 6 | # requirements 7 | install_requires = [ 8 | "SQLAlchemy>=0.9.0,<1.0.0", 9 | "blinker>=1.3,<2.0", 10 | "mysql-replication>=0.5,<0.6.0", 11 | "pyketama>=0.2.0", 12 | "pyzmq>=14.4.1,<15.0.0", 13 | "redis>=2.10.3,<2.11.0", 14 | ] 15 | 16 | dev_requires = [ 17 | "flake8>=2.2", 18 | "pytest>=2.6", 19 | "sphinx-rtd-theme>=0.1.6", 20 | "sphinx>=1.2", 21 | "tox>=1.8", 22 | ] + install_requires 23 | 24 | 25 | setup(name="meepo", 26 | version=__import__("meepo").__version__, 27 | description="event sourcing for databases.", 28 | keywords="eventsourcing event sourcing replication cache elasticsearch", 29 | author="Lx Yu", 30 | author_email="i@lxyu.net", 31 | packages=find_packages(exclude=['docs', 'tests']), 32 | url="https://github.com/eleme/meepo", 33 | license="MIT", 34 | zip_safe=False, 35 | long_description=open("README.rst").read(), 36 | install_requires=install_requires, 37 | extras_require={ 38 | "dev": dev_requires, 39 | }, 40 | classifiers=[ 41 | "Topic :: Software Development", 42 | "Development Status :: 3 - Alpha", 43 | "Intended Audience :: Developers", 44 | "License :: OSI Approved :: MIT License", 45 | "Programming Language :: Python :: 2.7", 46 | "Programming Language :: Python :: 3.3", 47 | "Programming Language :: Python :: 3.4", 48 | "Programming Language :: Python :: Implementation :: CPython", 49 | "Programming Language :: Python :: Implementation :: PyPy", 50 | ]) 51 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import absolute_import 4 | 5 | import logging 6 | logging.basicConfig(level=logging.DEBUG) 7 | 8 | import json 9 | import os 10 | import uuid 11 | 12 | import pymysql 13 | import pytest 14 | import redis 15 | 16 | from meepo._compat import urlparse 17 | 18 | 19 | @pytest.fixture(scope="session") 20 | def conf(): 21 | """Try load local conf.json 22 | """ 23 | fname = os.path.join(os.path.dirname(__file__), "conf.json") 24 | if os.path.exists(fname): 25 | with open(fname) as f: 26 | return json.load(f) 27 | 28 | 29 | @pytest.fixture(scope="session") 30 | def redis_dsn(request, conf): 31 | """Redis server dsn 32 | """ 33 | redis_dsn = conf["redis_dsn"] if conf else "redis://localhost:6379/1" 34 | 35 | def fin(): 36 | r = redis.Redis.from_url(redis_dsn, socket_timeout=1) 37 | r.flushdb() 38 | request.addfinalizer(fin) 39 | return redis_dsn 40 | 41 | 42 | @pytest.fixture(scope="module") 43 | def mysql_dsn(conf): 44 | """MySQL server dsn 45 | 46 | This fixture will init a clean meepo_test database with a 'test' table 47 | """ 48 | logger = logging.getLogger("fixture_mysql_dsn") 49 | 50 | dsn = conf["mysql_dsn"] if conf else \ 51 | "mysql+pymysql://root@localhost/meepo_test" 52 | 53 | # init database 54 | parsed = urlparse(dsn) 55 | db_settings = { 56 | "host": parsed.hostname, 57 | "port": parsed.port or 3306, 58 | "user": parsed.username, 59 | "passwd": parsed.password 60 | } 61 | conn = pymysql.connect(**db_settings) 62 | cursor = conn.cursor() 63 | 64 | conn.begin() 65 | cursor.execute("DROP DATABASE IF EXISTS meepo_test") 66 | cursor.execute("CREATE DATABASE meepo_test") 67 | cursor.execute("DROP TABLE IF EXISTS meepo_test.test") 68 | cursor.execute('''CREATE TABLE meepo_test.test ( 69 | id INT NOT NULL AUTO_INCREMENT, 70 | data VARCHAR (256) NOT NULL, 71 | PRIMARY KEY (id) 72 | )''') 73 | cursor.execute("RESET MASTER") 74 | conn.commit() 75 | 76 | logger.debug("executed") 77 | 78 | # release conn 79 | cursor.close() 80 | conn.close() 81 | 82 | return dsn 83 | 84 | 85 | @pytest.fixture(scope="function") 86 | def mock_session(): 87 | class MockSession(object): 88 | def __init__(self): 89 | self.meepo_unique_id = uuid.uuid4().hex 90 | self.info = {"name": "mock"} 91 | return MockSession() 92 | -------------------------------------------------------------------------------- /tests/test_eventsourcing/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | -------------------------------------------------------------------------------- /tests/test_eventsourcing/test_event_store.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import absolute_import 4 | 5 | import logging 6 | import time 7 | 8 | logging.basicConfig(level=logging.DEBUG) 9 | 10 | import pytest 11 | 12 | from meepo.apps.eventsourcing.event_store import RedisEventStore 13 | 14 | 15 | @pytest.fixture(scope="function") 16 | def redis_event_store(request, redis_dsn): 17 | event_store = RedisEventStore(redis_dsn) 18 | 19 | def fin(): 20 | event_store.r.flushdb() 21 | 22 | request.addfinalizer(fin) 23 | return event_store 24 | 25 | 26 | def test_redis_event_store_add(redis_event_store): 27 | # add event 28 | for pk in (1, 3): 29 | redis_event_store.add("test_write", pk) 30 | time.sleep(1) 31 | 32 | # test event store add 33 | assert redis_event_store.replay("test_write") == ['1', '3'] 34 | 35 | # re-add pk will refresh the score to newer timestamp 36 | redis_event_store.add("test_write", 1) 37 | assert redis_event_store.replay("test_write") == ['3', '1'] 38 | 39 | 40 | def test_redis_event_store_add_by_ts(redis_event_store): 41 | start_time = int(time.time()) 42 | times = list(range(start_time, start_time + 5)) 43 | 44 | # add event 45 | for i, pk in enumerate(range(1, 10, 2)): 46 | redis_event_store.add("test_write", pk, ts=times[i]) 47 | 48 | # test event store with timestamp passed 49 | stores = redis_event_store.replay("test_write", with_ts=True) 50 | assert [s[0] for s in stores] == ['1', '3', '5', '7', '9'] 51 | assert times == [s[1] for s in stores] 52 | 53 | 54 | def test_redis_event_store_replay(redis_event_store): 55 | start_time = int(time.time()) 56 | times = list(range(start_time, start_time + 5)) 57 | 58 | # add event 59 | pks = [str(i) for i in range(1, 10, 2)] 60 | for i, pk in enumerate(pks): 61 | redis_event_store.add("test_write", pk, ts=times[i]) 62 | 63 | # test replay by ts 64 | assert redis_event_store.replay("test_write") == pks 65 | assert redis_event_store.replay("test_write", ts=times[3]) == pks[3:] 66 | assert redis_event_store.replay( 67 | "test_write", ts=times[2], end_ts=times[3]) == pks[2:4] 68 | assert redis_event_store.replay( 69 | "test_write", end_ts=times[2]) == pks[:3] 70 | 71 | 72 | def test_redis_event_store_query(redis_event_store): 73 | start_time = int(time.time()) 74 | times = list(range(start_time, start_time + 5)) 75 | 76 | # add event 77 | pks = [str(i) for i in range(1, 10, 2)] 78 | for i, pk in enumerate(pks): 79 | redis_event_store.add("test_write", pk, ts=times[i]) 80 | 81 | # test replay by ts 82 | assert redis_event_store.query("test_write", pks[0]) == times[0] 83 | assert redis_event_store.query("test_write", pks[3]) == times[3] 84 | -------------------------------------------------------------------------------- /tests/test_eventsourcing/test_prepare_commit.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import pytest 4 | import redis 5 | 6 | from meepo.apps.eventsourcing.prepare_commit import RedisPrepareCommit 7 | 8 | 9 | @pytest.fixture(scope="module") 10 | def redis_pc(redis_dsn): 11 | pc = RedisPrepareCommit( 12 | redis_dsn, strict=False, namespace="meepo.test.event_store") 13 | pc.r.flushdb() 14 | return pc 15 | 16 | 17 | @pytest.fixture(scope="module") 18 | def redis_strict_pc(): 19 | """Test strict prepare_commit which won't silent the exception. 20 | 21 | We'll pass an error redis dsn here to make sure ConnectionError raised. 22 | """ 23 | redis_dsn = "redis://non_exists:0/" 24 | pc = RedisPrepareCommit( 25 | redis_dsn, strict=True, namespace="meepo.test.event_store") 26 | return pc 27 | 28 | 29 | def test_redis_prepare_commit_phase(mock_session, redis_pc): 30 | # prepare session 31 | event = {"test_write": {1}, "test_update": {2, 3}, "test_delete": {4}} 32 | redis_pc.prepare(mock_session, event) 33 | 34 | # test prepare phase recorded 35 | assert redis_pc.phase(mock_session) == "prepare" 36 | assert redis_pc.prepare_info() == {mock_session.meepo_unique_id} 37 | assert redis_pc.session_info(mock_session) == event 38 | 39 | # test commit phase recorded 40 | redis_pc.commit(mock_session) 41 | assert redis_pc.phase(mock_session) == "commit" 42 | assert redis_pc.prepare_info() == set() 43 | 44 | 45 | def test_redis_strict_prepare_commit_phase(mock_session, redis_strict_pc): 46 | with pytest.raises(redis.ConnectionError): 47 | redis_strict_pc.prepare(mock_session, {"test_write": {1}}) 48 | 49 | with pytest.raises(redis.ConnectionError): 50 | redis_strict_pc.commit(mock_session) 51 | -------------------------------------------------------------------------------- /tests/test_eventsourcing/test_redis_es_sub.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import pytest 4 | 5 | from meepo.apps.eventsourcing.sub import redis_es_sub 6 | from meepo.signals import signal 7 | 8 | 9 | @pytest.fixture(scope="function") 10 | def es_sub(request, mock_session, redis_dsn): 11 | event_store, prepare_commit = redis_es_sub( 12 | mock_session, ["test"], redis_dsn) 13 | 14 | def fin(): 15 | for action in ["write", "update", "delete"]: 16 | event_store.clear("test_%s" % action) 17 | prepare_commit.clear() 18 | request.addfinalizer(fin) 19 | return event_store, prepare_commit 20 | 21 | 22 | def test_redis_es_sub_commit(mock_session, es_sub): 23 | event_store, prepare_commit = es_sub 24 | 25 | # mock session prepare phase 26 | evt = {"test_write": {1}} 27 | signal("session_prepare").send(mock_session, event=evt) 28 | assert prepare_commit.phase(mock_session) == "prepare" 29 | assert prepare_commit.session_info(mock_session) == evt 30 | 31 | # mock session commit phase 32 | signal("session_commit").send(mock_session) 33 | assert prepare_commit.phase(mock_session) == "commit" 34 | assert prepare_commit.prepare_info() == set() 35 | 36 | signal("test_write").send(1) 37 | assert event_store.replay("test_write") == ['1'] 38 | 39 | 40 | def test_redis_es_sub_rollback(mock_session, es_sub): 41 | event_store, prepare_commit = es_sub 42 | 43 | # mock session prepare phase 44 | evt = {"test_write": {1}} 45 | signal("session_prepare").send(mock_session, event=evt) 46 | assert prepare_commit.phase(mock_session) == "prepare" 47 | assert prepare_commit.session_info(mock_session) == evt 48 | 49 | # mock session commit phase 50 | signal("session_rollback").send(mock_session) 51 | assert prepare_commit.phase(mock_session) == "commit" 52 | assert prepare_commit.prepare_info() == set() 53 | 54 | assert event_store.replay("test_write") == [] 55 | -------------------------------------------------------------------------------- /tests/test_eventsourcing/test_sqlalchemy_es_pub.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import absolute_import 4 | 5 | import logging 6 | logging.basicConfig(level=logging.DEBUG) 7 | 8 | import pytest 9 | import sqlalchemy as sa 10 | from sqlalchemy.orm import sessionmaker, scoped_session 11 | from sqlalchemy.ext.declarative import declarative_base 12 | 13 | from meepo.apps.eventsourcing import sqlalchemy_es_pub 14 | from meepo.signals import signal 15 | 16 | (t_writes, t_updates, t_deletes, 17 | s_events, s_commits, s_rollbacks) = ([] for _ in range(6)) 18 | 19 | 20 | def _clear(): 21 | del t_writes[:] 22 | del t_updates[:] 23 | del t_deletes[:] 24 | del s_events[:] 25 | del s_commits[:] 26 | del s_rollbacks[:] 27 | 28 | 29 | def setup_module(module): 30 | def test_sg(sg_list): 31 | return lambda pk: sg_list.append(pk) 32 | 33 | # connect table action signal 34 | signal("test_write").connect(test_sg(t_writes), weak=False) 35 | signal("test_update").connect(test_sg(t_updates), weak=False) 36 | signal("test_delete").connect(test_sg(t_deletes), weak=False) 37 | 38 | # connect session action signal 39 | def test_session_prepare(session, event): 40 | s_events.append({"sid": session.meepo_unique_id, "event": event}) 41 | 42 | def test_session_commit(session): 43 | s_commits.append(session.meepo_unique_id) 44 | 45 | def test_session_rollback(session): 46 | s_rollbacks.append(session.meepo_unique_id) 47 | 48 | signal("session_prepare").connect(test_session_prepare, weak=False) 49 | signal("session_commit").connect(test_session_commit, weak=False) 50 | signal("session_rollback").connect(test_session_rollback, weak=False) 51 | 52 | 53 | def teardown_module(module): 54 | pass 55 | 56 | 57 | def setup_function(function): 58 | _clear() 59 | 60 | 61 | def teardown_function(function): 62 | pass 63 | 64 | 65 | @pytest.fixture(scope="module") 66 | def model_cls(): 67 | Base = declarative_base() 68 | 69 | class model_cls(Base): 70 | __tablename__ = "test" 71 | id = sa.Column(sa.Integer, primary_key=True) 72 | data = sa.Column(sa.String) 73 | return model_cls 74 | 75 | 76 | @pytest.fixture(scope="module") 77 | def session(mysql_dsn): 78 | # sqlalchemy prepare 79 | engine = sa.create_engine(mysql_dsn) 80 | session = scoped_session(sessionmaker(bind=engine, 81 | expire_on_commit=False, 82 | info={"name": "test_session"})) 83 | 84 | # install sqlalchemy_pub hook 85 | sqlalchemy_es_pub(session, tables=["test"]) 86 | return session 87 | 88 | 89 | @pytest.fixture(scope="module") 90 | def session_b(mysql_dsn): 91 | # sqlalchemy prepare 92 | engine = sa.create_engine(mysql_dsn) 93 | session_b = scoped_session(sessionmaker(bind=engine, 94 | expire_on_commit=False, 95 | info={"name": "test_session_b"})) 96 | 97 | # install sqlalchemy_pub hook 98 | sqlalchemy_es_pub(session_b, tables=["test"]) 99 | return session_b 100 | 101 | 102 | def test_sa_empty_commit(session): 103 | """Direct commit generates nothing 104 | """ 105 | session.commit() 106 | 107 | assert [t_writes, t_updates, t_deletes, 108 | s_events, s_commits, s_rollbacks] == [[]] * 6 109 | 110 | 111 | def test_sa_single_write(session, model_cls): 112 | """Write commit generate a write event with row pk. 113 | """ 114 | t_a = model_cls(data='a') 115 | session.add(t_a) 116 | session.commit() 117 | 118 | event, sid = s_events.pop(), s_commits.pop() 119 | assert event['sid'] == sid 120 | obj = event['event']['test_write'].pop() 121 | assert obj.id == t_a.id 122 | 123 | assert t_writes == [t_a.id] 124 | assert [t_updates, t_deletes, s_rollbacks] == [[]] * 3 125 | 126 | 127 | def test_sa_single_flush_write(session, model_cls): 128 | """Flush - Write is the same with write. 129 | """ 130 | t_b = model_cls(data='b') 131 | session.add(t_b) 132 | session.flush() 133 | session.commit() 134 | 135 | event, sid = s_events.pop(), s_commits.pop() 136 | assert event['sid'] == sid 137 | obj = event['event']['test_write'].pop() 138 | assert obj.id == t_b.id 139 | 140 | assert t_writes == [t_b.id] 141 | assert [t_updates, t_deletes, s_rollbacks] == [[]] * 3 142 | 143 | 144 | def test_sa_multi_writes(session, model_cls): 145 | # test multiple writes 146 | t_c = model_cls(data='c') 147 | t_d = model_cls(data='d') 148 | session.add(t_c) 149 | session.add(t_d) 150 | session.commit() 151 | 152 | event, sid = s_events.pop(), s_commits.pop() 153 | assert event['sid'] == sid 154 | objs = event['event']['test_write'] 155 | assert {obj.id for obj in objs} == {t_c.id, t_d.id} 156 | 157 | assert set(t_writes) == {t_c.id, t_d.id} 158 | assert [t_updates, t_deletes, s_rollbacks] == [[]] * 3 159 | 160 | 161 | def test_sa_single_update(session, model_cls): 162 | # test single update 163 | t_a = session.query(model_cls).filter(model_cls.data == 'a').one() 164 | t_a.data = "aa" 165 | session.commit() 166 | 167 | event, sid = s_events.pop(), s_commits.pop() 168 | assert event['sid'] == sid 169 | obj = event['event']['test_update'].pop() 170 | assert obj.id == t_a.id 171 | 172 | assert set(t_updates) == {t_a.id} 173 | assert [t_writes, t_deletes, s_rollbacks] == [[]] * 3 174 | 175 | 176 | def test_sa_single_flush_update(session, model_cls): 177 | # test single flush - update 178 | t_a = session.query(model_cls).filter(model_cls.data == 'aa').one() 179 | t_a.data = "a" 180 | session.flush() 181 | session.commit() 182 | 183 | event, sid = s_events.pop(), s_commits.pop() 184 | assert event['sid'] == sid 185 | obj = event['event']['test_update'].pop() 186 | assert obj.id == t_a.id 187 | 188 | assert set(t_updates) == {t_a.id} 189 | assert [t_writes, t_deletes, s_rollbacks] == [[]] * 3 190 | 191 | 192 | def test_sa_mixed_write_update_delete_and_multi_flushes(session, model_cls): 193 | """The most compliated situation, the test goes through the following 194 | process: 195 | 1. add one row, update one row 196 | 2. flush to database 197 | 3. delete one row 198 | 4. flush to database 199 | 5. commit 200 | """ 201 | t_b, t_c = session.query(model_cls). \ 202 | filter(model_cls.data.in_(('b', 'c'))).all() 203 | t_e = model_cls(data='e') 204 | session.add(t_e) 205 | t_b.data = "x" 206 | session.flush() 207 | session.delete(t_c) 208 | session.flush() 209 | session.commit() 210 | 211 | # one success commit generates one commit sid 212 | assert len(s_commits) == 1 213 | 214 | # test session events 215 | sid = s_commits.pop() 216 | 217 | # since the commit include a flush in it, two events will be triggered and 218 | # the later event contains the first event. 219 | event = s_events[0] 220 | assert event['sid'] == sid 221 | write_obj = event['event']['test_write'].pop() 222 | update_obj = event['event']['test_update'].pop() 223 | assert write_obj.id == t_e.id 224 | assert update_obj.id == t_b.id 225 | 226 | event = s_events[1] 227 | 228 | assert event['sid'] == sid 229 | write_obj = event['event']['test_write'].pop() 230 | update_obj = event['event']['test_update'].pop() 231 | delete_obj = event['event']['test_delete'].pop() 232 | assert write_obj.id == t_e.id 233 | assert update_obj.id == t_b.id 234 | assert delete_obj.id == t_c.id 235 | 236 | 237 | def test_sa_empty_rollback(session): 238 | """Direct rollback generates nothing 239 | """ 240 | session.rollback() 241 | 242 | assert [t_writes, t_updates, t_deletes, 243 | s_events, s_commits, s_rollbacks] == [[]] * 6 244 | 245 | 246 | def test_sa_early_rollback(session, model_cls): 247 | """Rollback happened before flush, nothing recorded. 248 | """ 249 | t_e = model_cls(data='e') 250 | session.add(t_e) 251 | session.rollback() 252 | 253 | assert [t_writes, t_updates, t_deletes, 254 | s_events, s_commits, s_rollbacks] == [[]] * 6 255 | 256 | 257 | def test_sa_flush_rollback(session, model_cls): 258 | """Rollback happened after flush, event recorded. 259 | Since rollback happened after flush, the write have a pk value. 260 | """ 261 | t_e = model_cls(data='e') 262 | session.add(t_e) 263 | session.flush() 264 | session.rollback() 265 | 266 | event, sid = s_events.pop(), s_rollbacks.pop() 267 | assert event['sid'] == sid 268 | obj = event['event']['test_write'].pop() 269 | assert obj.id == t_e.id 270 | 271 | assert [t_writes, t_updates, t_deletes, s_commits] == [[]] * 4 272 | 273 | 274 | def test_sa_multi_sessions(session, session_b, model_cls): 275 | def _sp_for_b(s, event): 276 | assert s.info == session_b.info 277 | assert event['test_write'].pop().id == t_g.id 278 | signal("session_prepare").connect(_sp_for_b, sender=session_b, weak=False) 279 | 280 | t_f = model_cls(data='f') 281 | session.add(t_f) 282 | session.commit() 283 | 284 | t_g = model_cls(data='g') 285 | session_b.add(t_g) 286 | session_b.commit() 287 | -------------------------------------------------------------------------------- /tests/test_mysql_pub.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import absolute_import 4 | 5 | import logging 6 | logging.basicConfig(level=logging.DEBUG) 7 | 8 | import pymysql 9 | import pytest 10 | 11 | from meepo._compat import urlparse 12 | from meepo.pub import mysql_pub 13 | from meepo.signals import signal 14 | 15 | t_writes, t_updates, t_deletes, t_binlogs = [], [], [], [] 16 | t_raw_writes, t_raw_updates, t_raw_deletes = [], [], [] 17 | 18 | 19 | def setup_module(module): 20 | def test_sg(sg_list): 21 | return lambda pk: sg_list.append(pk) 22 | 23 | # connect table action signal 24 | signal("test_write").connect(test_sg(t_writes), weak=False) 25 | signal("test_update").connect(test_sg(t_updates), weak=False) 26 | signal("test_delete").connect(test_sg(t_deletes), weak=False) 27 | 28 | # connect raw table action signal 29 | signal("test_write_raw").connect(test_sg(t_raw_writes), weak=False) 30 | signal("test_update_raw").connect(test_sg(t_raw_updates), weak=False) 31 | signal("test_delete_raw").connect(test_sg(t_raw_deletes), weak=False) 32 | 33 | # connect mysql binlog pos signal 34 | signal("mysql_binlog_pos").connect(test_sg(t_binlogs), weak=False) 35 | 36 | 37 | @pytest.fixture(scope="module") 38 | def binlog(mysql_dsn): 39 | # init mysql connection 40 | parsed = urlparse(mysql_dsn) 41 | db_settings = { 42 | "host": parsed.hostname, 43 | "port": parsed.port or 3306, 44 | "user": parsed.username, 45 | "passwd": parsed.password, 46 | "database": "meepo_test" 47 | } 48 | conn = pymysql.connect(**db_settings) 49 | cursor = conn.cursor() 50 | 51 | # test sqls 52 | sql = """ 53 | INSERT INTO test (data) VALUES ('a'); 54 | INSERT INTO test (data) VALUES ('b'), ('c'), ('d'); 55 | UPDATE test SET data = 'aa' WHERE id = 1; 56 | UPDATE test SET data = 'bb' WHERE id = 2; 57 | UPDATE test SET data = 'cc' WHERE id != 1; 58 | DELETE FROM test WHERE id != 1; 59 | DELETE FROM test WHERE id = 1; 60 | """ 61 | cursor.execute(sql) 62 | cursor.close() 63 | conn.commit() 64 | conn.close() 65 | 66 | # generates signals 67 | mysql_pub(mysql_dsn, tables=["test"]) 68 | 69 | 70 | def test_mysql_table_event(binlog): 71 | assert t_writes == [1, 2, 3, 4] 72 | assert t_updates == [1, 2, 2, 3, 4] 73 | assert t_deletes == [2, 3, 4, 1] 74 | 75 | 76 | def test_mysql_binlog_pos_event(binlog): 77 | assert all(pos.startswith("mysql-bin.000001") for pos in t_binlogs) 78 | 79 | 80 | def test_mysql_raw_table_event(binlog): 81 | assert t_raw_writes == [ 82 | {'values': {'data': 'a', 'id': 1}}, 83 | {'values': {'data': 'b', 'id': 2}}, 84 | {'values': {'data': 'c', 'id': 3}}, 85 | {'values': {'data': 'd', 'id': 4}}, 86 | ] 87 | assert t_raw_updates == [ 88 | {'before_values': {'data': 'a', 'id': 1}, 89 | 'after_values': {'data': 'aa', 'id': 1}}, 90 | {'before_values': {'data': 'b', 'id': 2}, 91 | 'after_values': {'data': 'bb', 'id': 2}}, 92 | {'before_values': {'data': 'bb', 'id': 2}, 93 | 'after_values': {'data': 'cc', 'id': 2}}, 94 | {'before_values': {'data': 'c', 'id': 3}, 95 | 'after_values': {'data': 'cc', 'id': 3}}, 96 | {'before_values': {'data': 'd', 'id': 4}, 97 | 'after_values': {'data': 'cc', 'id': 4}}, 98 | ] 99 | assert t_raw_deletes == [ 100 | {'values': {'data': 'cc', 'id': 2}}, 101 | {'values': {'data': 'cc', 'id': 3}}, 102 | {'values': {'data': 'cc', 'id': 4}}, 103 | {'values': {'data': 'aa', 'id': 1}} 104 | ] 105 | -------------------------------------------------------------------------------- /tests/test_replicator.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import absolute_import 4 | 5 | import os 6 | import signal 7 | import itertools 8 | import time 9 | import zmq 10 | import random 11 | 12 | from multiprocessing import Queue, Process, Manager 13 | 14 | from meepo.apps.replicator.worker import Worker, WorkerPool 15 | from meepo.apps.replicator import QueueReplicator, RqReplicator 16 | from meepo.utils import setup_logger 17 | setup_logger("DEBUG") 18 | 19 | 20 | def test_worker(): 21 | result = Manager().list() 22 | 23 | def func(): 24 | def f(pks): 25 | result.extend(pks) 26 | return [True for _ in pks] 27 | 28 | queue = Queue() 29 | for i in range(10): 30 | queue.put(i) 31 | 32 | worker = Worker(queue, "test", f, multi=True) 33 | worker.run() 34 | 35 | p = Process(target=func) 36 | try: 37 | p.start() 38 | finally: 39 | for i in range(200): 40 | time.sleep(0.3) 41 | if len(result) == 10: 42 | break 43 | 44 | p.terminate() 45 | 46 | assert [i for i in result] == list(range(10)) 47 | 48 | 49 | def test_worker_retry(): 50 | result = Manager().dict() 51 | for i in range(3): 52 | result[i] = 0 53 | 54 | def func(): 55 | def f(pks): 56 | try: 57 | return [False for _ in pks] 58 | finally: 59 | for pk in pks: 60 | result[pk] += 1 61 | 62 | queue = Queue() 63 | for i in range(3): 64 | queue.put(i) 65 | 66 | worker = Worker(queue, "test", f, multi=True, 67 | max_retry_count=3, max_retry_interval=0.1) 68 | worker.run() 69 | 70 | p = Process(target=func) 71 | try: 72 | p.start() 73 | finally: 74 | for i in range(200): 75 | time.sleep(0.3) 76 | if result[0] == result[1] == result[2] == 4: 77 | break 78 | p.terminate() 79 | 80 | assert result[0] == result[1] == result[2] == 4 81 | 82 | 83 | def test_worker_pool(): 84 | queues = [Queue() for i in range(3)] 85 | result = Manager().dict() 86 | 87 | tasks, i = range(30), 0 88 | for queue in queues: 89 | for j in tasks[i:i + 10]: 90 | queue.put(j) 91 | i += 10 92 | 93 | def f(pks): 94 | pid = os.getpid() 95 | if pid in result: 96 | r = result[pid] 97 | r.extend(pks) 98 | result[pid] = r 99 | else: 100 | result[pid] = pks 101 | return [True for _ in pks] 102 | 103 | def func(): 104 | worker_pool = WorkerPool(queues, "test", f, multi=True, 105 | waiting_time=0.5) 106 | 107 | def handler(signum, frame): 108 | worker_pool.terminate() 109 | signal.signal(signal.SIGUSR1, handler) 110 | 111 | worker_pool.start() 112 | 113 | p = Process(target=func) 114 | try: 115 | p.start() 116 | finally: 117 | for i in range(200): 118 | time.sleep(0.3) 119 | if len(result) == 3: 120 | break 121 | 122 | assert len(result) == 3 and \ 123 | set(itertools.chain(*result.values())) == set(range(30)) 124 | 125 | pid = list(result.keys())[0] 126 | # test process recreating 127 | os.kill(pid, signal.SIGKILL) 128 | 129 | time.sleep(0.6) 130 | 131 | for i in [30, 31, 32]: 132 | queues[0].put(i) 133 | 134 | for i in range(200): 135 | time.sleep(0.3) 136 | if set(itertools.chain(*result.values())) == set(range(33)): 137 | break 138 | 139 | os.kill(p.pid, signal.SIGUSR1) 140 | 141 | assert len(result) in (3, 4) and \ 142 | set(itertools.chain(*result.values())) == set(range(33)) 143 | 144 | 145 | def test_queue_replicator(): 146 | result = Manager().list() 147 | 148 | def repl_process(): 149 | queue_repl = QueueReplicator("tcp://127.0.0.1:6000") 150 | 151 | @queue_repl.event("test_update", workers=3, multi=True, queue_limit=3) 152 | def task(pks): 153 | result.extend(pks) 154 | return [True for _ in pks] 155 | 156 | Worker.MAX_PK_COUNT = 10 157 | queue_repl.run() 158 | 159 | rp = Process(target=repl_process) 160 | rp.start() 161 | 162 | time.sleep(1) 163 | 164 | ctx = zmq.Context() 165 | 166 | def producer(): 167 | sock = ctx.socket(zmq.PUB) 168 | sock.bind("tcp://127.0.0.1:6000") 169 | time.sleep(0.5) 170 | for i in range(50): 171 | msg = "test_update {}".format(i) 172 | sock.send_string(msg) 173 | 174 | p = Process(target=producer) 175 | p.start() 176 | p.join() 177 | 178 | for i in range(200): 179 | time.sleep(0.9) 180 | if len(result) == 50: 181 | break 182 | 183 | os.kill(rp.pid, signal.SIGINT) 184 | rp.join() 185 | 186 | assert set(int(i) for i in result) == set(range(50)) 187 | 188 | 189 | def test_rq_replicator(): 190 | result = Manager().list() 191 | 192 | def repl_process(): 193 | rq_repl = RqReplicator("tcp://127.0.0.1:7000") 194 | 195 | @rq_repl.event("restaurant_update") 196 | def job(pks): 197 | i = random.randint(0, 5) 198 | if i == 3: 199 | raise Exception("random exception, pks: {}".format(pks)) 200 | result.extend(pks) 201 | 202 | rq_repl.run() 203 | 204 | consumer = Process(target=repl_process) 205 | consumer.start() 206 | 207 | time.sleep(1) 208 | 209 | ctx = zmq.Context() 210 | 211 | def send_string(): 212 | sock = ctx.socket(zmq.PUB) 213 | sock.bind("tcp://127.0.0.1:7000") 214 | time.sleep(0.5) 215 | for i in range(50): 216 | msg = "restaurant_update {}".format(i) 217 | sock.send_string(msg) 218 | 219 | producer = Process(target=send_string) 220 | producer.start() 221 | producer.join() 222 | 223 | for i in range(200): 224 | time.sleep(0.3) 225 | if len(result) == 50: 226 | break 227 | 228 | consumer.terminate() 229 | assert set(int(i) for i in result) == set(range(50)) 230 | -------------------------------------------------------------------------------- /tests/test_sqlalchemy_pub.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import absolute_import 4 | 5 | import logging 6 | logging.basicConfig(level=logging.DEBUG) 7 | 8 | import pytest 9 | import sqlalchemy as sa 10 | from sqlalchemy.orm import sessionmaker, scoped_session 11 | from sqlalchemy.ext.declarative import declarative_base 12 | 13 | from meepo.pub import sqlalchemy_pub 14 | from meepo.signals import signal 15 | 16 | (t_writes, t_updates, t_deletes) = ([] for _ in range(3)) 17 | 18 | 19 | def _clear(): 20 | del t_writes[:] 21 | del t_updates[:] 22 | del t_deletes[:] 23 | 24 | 25 | def setup_module(module): 26 | def test_sg(sg_list): 27 | return lambda pk: sg_list.append(pk) 28 | 29 | # connect table action signal 30 | signal("test_write").connect(test_sg(t_writes), weak=False) 31 | signal("test_update").connect(test_sg(t_updates), weak=False) 32 | signal("test_delete").connect(test_sg(t_deletes), weak=False) 33 | 34 | 35 | def teardown_module(module): 36 | pass 37 | 38 | 39 | def setup_function(function): 40 | _clear() 41 | 42 | 43 | def teardown_function(function): 44 | pass 45 | 46 | 47 | @pytest.fixture(scope="module") 48 | def model_cls(): 49 | Base = declarative_base() 50 | 51 | class model_cls(Base): 52 | __tablename__ = "test" 53 | id = sa.Column(sa.Integer, primary_key=True) 54 | data = sa.Column(sa.String) 55 | return model_cls 56 | 57 | 58 | @pytest.fixture(scope="module") 59 | def session(mysql_dsn): 60 | # sqlalchemy prepare 61 | engine = sa.create_engine(mysql_dsn) 62 | session = scoped_session(sessionmaker(bind=engine, expire_on_commit=False)) 63 | 64 | # install sqlalchemy_pub hook 65 | sqlalchemy_pub(session) 66 | return session 67 | 68 | 69 | def test_sa_empty_commit(session): 70 | """Direct commit generates nothing 71 | """ 72 | session.commit() 73 | 74 | assert [t_writes, t_updates, t_deletes] == [[]] * 3 75 | 76 | 77 | def test_sa_single_write(session, model_cls): 78 | """Write commit generate a write event with row pk. 79 | """ 80 | t_a = model_cls(data='a') 81 | session.add(t_a) 82 | session.commit() 83 | 84 | assert t_writes == [t_a.id] 85 | assert [t_updates, t_deletes] == [[]] * 2 86 | 87 | 88 | def test_sa_single_flush_write(session, model_cls): 89 | """Flush - Write is the same with write. 90 | """ 91 | t_b = model_cls(data='b') 92 | session.add(t_b) 93 | session.flush() 94 | session.commit() 95 | 96 | assert t_writes == [t_b.id] 97 | assert [t_updates, t_deletes] == [[]] * 2 98 | 99 | 100 | def test_sa_multi_writes(session, model_cls): 101 | # test multiple writes 102 | t_c = model_cls(data='c') 103 | t_d = model_cls(data='d') 104 | session.add(t_c) 105 | session.add(t_d) 106 | session.commit() 107 | 108 | assert set(t_writes) == {t_c.id, t_d.id} 109 | assert [t_updates, t_deletes] == [[]] * 2 110 | 111 | 112 | def test_sa_single_update(session, model_cls): 113 | # test single update 114 | t_a = session.query(model_cls).filter(model_cls.data == 'a').one() 115 | t_a.data = "aa" 116 | session.commit() 117 | 118 | assert set(t_updates) == {t_a.id} 119 | assert [t_writes, t_deletes] == [[]] * 2 120 | 121 | 122 | def test_sa_single_flush_update(session, model_cls): 123 | # test single flush - update 124 | t_a = session.query(model_cls).filter(model_cls.data == 'aa').one() 125 | t_a.data = "a" 126 | session.flush() 127 | session.commit() 128 | 129 | assert set(t_updates) == {t_a.id} 130 | assert [t_writes, t_deletes] == [[]] * 2 131 | 132 | 133 | def test_sa_mixed_write_update_delete_and_multi_flushes(session, model_cls): 134 | """The most compliated situation, the test goes through the following 135 | process: 136 | 1. add one row, update one row 137 | 2. flush to database 138 | 3. delete one row 139 | 4. flush to database 140 | 5. commit 141 | """ 142 | t_b, t_c = session.query(model_cls).\ 143 | filter(model_cls.data.in_(('b', 'c'))).all() 144 | t_e = model_cls(data='e') 145 | session.add(t_e) 146 | t_b.data = "x" 147 | session.flush() 148 | session.delete(t_c) 149 | session.flush() 150 | session.commit() 151 | 152 | assert (t_writes, t_updates, t_deletes) == ([t_e.id], [t_b.id], [t_c.id]) 153 | 154 | 155 | def test_sa_empty_rollback(session): 156 | """Direct rollback generates nothing 157 | """ 158 | session.rollback() 159 | 160 | assert [t_writes, t_updates, t_deletes] == [[]] * 3 161 | 162 | 163 | def test_sa_early_rollback(session, model_cls): 164 | """Rollback happened before flush, nothing recorded. 165 | """ 166 | t_e = model_cls(data='e') 167 | session.add(t_e) 168 | session.rollback() 169 | 170 | assert [t_writes, t_updates, t_deletes] == [[]] * 3 171 | 172 | 173 | def test_sa_flush_rollback(session, model_cls): 174 | """Rollback happened after flush, event recorded. 175 | Since rollback happened after flush, the write have a pk value. 176 | """ 177 | t_e = model_cls(data='e') 178 | session.add(t_e) 179 | session.flush() 180 | session.rollback() 181 | 182 | assert [t_writes, t_updates, t_deletes] == [[]] * 3 183 | 184 | 185 | def test_sa_session_remove(session, model_cls): 186 | session.remove() 187 | t_f = model_cls(data='f') 188 | session.add(t_f) 189 | session.commit() 190 | 191 | assert t_writes == [t_f.id] 192 | -------------------------------------------------------------------------------- /tests/test_utils.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | 4 | import datetime 5 | import time 6 | 7 | from meepo.utils import b, s, d 8 | 9 | 10 | def test_cast_bytes(): 11 | assert b("abc") == b"abc" 12 | assert b(b"abc") == b"abc" 13 | 14 | 15 | def test_cast_str(): 16 | assert s("abc") == "abc" 17 | assert s(b"abc") == "abc" 18 | 19 | 20 | def test_cast_datetime(): 21 | now = time.time() 22 | now_dt = datetime.datetime.fromtimestamp(now) 23 | assert d(now) == now_dt 24 | assert d(now, fmt="%Y%m%d") == now_dt.strftime("%Y%m%d") 25 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist = flake8, py27, py33, py34, pypy 3 | 4 | [testenv] 5 | basepython = 6 | py27: python2.7 7 | py33: python3.3 8 | py34: python3.4 9 | pypy: pypy 10 | deps = 11 | mysql-replication>=0.5 12 | pytest>=2.6.4 13 | redis>=2.10.3 14 | teamcity-messages>=1.8 15 | commands = 16 | python --version 17 | py.test {posargs} 18 | 19 | [testenv:flake8] 20 | basepython = python 21 | deps = 22 | flake8 >=2.2.5 23 | commands = 24 | flake8 . 25 | --------------------------------------------------------------------------------