├── .gitignore ├── LICENSE ├── README.md ├── daenerys ├── __init__.py ├── app.py ├── dinergate.py ├── exceptions.py ├── pipeline │ ├── __init__.py │ ├── base.py │ ├── html.py │ └── network.py ├── request.py └── site.py ├── demo ├── app.py ├── backend.py ├── beat.py ├── config.py ├── messaging.py ├── scaffold.py ├── sites │ ├── __init__.py │ └── pypi.py └── worker.py └── requirements.txt /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | 27 | # PyInstaller 28 | # Usually these files are written by a python script from a template 29 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 30 | *.manifest 31 | *.spec 32 | 33 | # Installer logs 34 | pip-log.txt 35 | pip-delete-this-directory.txt 36 | 37 | # Unit test / coverage reports 38 | htmlcov/ 39 | .tox/ 40 | .coverage 41 | .coverage.* 42 | .cache 43 | nosetests.xml 44 | coverage.xml 45 | *,cover 46 | .hypothesis/ 47 | 48 | # Translations 49 | *.mo 50 | *.pot 51 | 52 | # Django stuff: 53 | *.log 54 | local_settings.py 55 | 56 | # Flask stuff: 57 | instance/ 58 | .webassets-cache 59 | 60 | # Scrapy stuff: 61 | .scrapy 62 | 63 | # Sphinx documentation 64 | docs/_build/ 65 | 66 | # PyBuilder 67 | target/ 68 | 69 | # IPython Notebook 70 | .ipynb_checkpoints 71 | 72 | # pyenv 73 | .python-version 74 | 75 | # celery beat schedule file 76 | celerybeat-schedule 77 | 78 | # dotenv 79 | .env 80 | 81 | # virtualenv 82 | venv/ 83 | ENV/ 84 | 85 | # Spyder project settings 86 | .spyderproject 87 | 88 | # Rope project settings 89 | .ropeproject 90 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {yyyy} {name of copyright owner} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Getting started 2 | 3 | ```python 4 | ❯ git clone https://github.com/dongweiming/daenerys 5 | ❯ cd daenerys 6 | ❯ virtualenv venv 7 | ❯ source venv/bin/activate 8 | ❯ pip install -r requirements.txt 9 | # 安装配置Redis和MongoDB 10 | ❯ cd demo 11 | ❯ python worker.py # 启动worker(默认5个进程) 12 | ❯ python beat.py # 新开启一个终端,启动Beat服务生成任务 13 | ❯ ipython # 发布一个需要获取执行结果的任务 14 | In [1]: from messaging import sync_get 15 | 16 | In [2]: sync_get('flask') 17 | PUT flask 18 | Out[2]: 19 | {u'author': u'Armin Ronacher', 20 | u'download_url': u'https://pypi.python.org/packages/24/6e/11b9c57e46f276a8a8da85a2fa7ada62b0463b68693616c7ab5df356fa/Flask-0.12.1.tar.gz', 21 | u'name': u'flask', 22 | u'version': u'0.12.1'} 23 | ``` 24 | -------------------------------------------------------------------------------- /daenerys/__init__.py: -------------------------------------------------------------------------------- 1 | from .app import Daenerys 2 | from .dinergate import Dinergate 3 | from .site import Site 4 | 5 | 6 | __version__ = '0.1' 7 | __all__ = ['Daenerys', 'Dinergate', 'Site'] 8 | -------------------------------------------------------------------------------- /daenerys/app.py: -------------------------------------------------------------------------------- 1 | import urllib 2 | from urlparse import urlparse, ParseResult 3 | 4 | from werkzeug.utils import find_modules, import_string 5 | from werkzeug.urls import url_decode, url_encode 6 | from werkzeug.routing import Map, Rule, NotFound, RequestRedirect 7 | 8 | from .request import Request 9 | from .exceptions import NotSupported 10 | 11 | 12 | class Daenerys(object): 13 | def __init__(self, ignore_sites=set()): 14 | self.url_map = Map(strict_slashes=False, host_matching=True, 15 | redirect_defaults=False) 16 | self.ignore_sites = ignore_sites 17 | 18 | def add_url_rule(self, host, rule_string, endpoint, **options): 19 | rule = Rule(rule_string, host=host, endpoint=endpoint, **options) 20 | self.url_map.add(rule) 21 | 22 | def parse_url(self, url_string): 23 | url = urlparse(url_string) 24 | url = self.validate_url(url) 25 | url_adapter = self.url_map.bind(server_name=url.hostname, 26 | url_scheme=url.scheme, 27 | path_info=url.path) 28 | query_args = url_decode(url.query) 29 | return url, url_adapter, query_args 30 | 31 | def validate_url(self, url): 32 | url_path = urllib.quote(url.path, safe=b"/%") 33 | url_query = urllib.quote(url.query, safe=b"?=&") 34 | 35 | url = ParseResult(url.scheme, url.netloc, url_path, 36 | url.params, url_query, url.fragment) 37 | 38 | has_hostname = url.hostname is not None and len(url.hostname) > 0 39 | has_http_scheme = url.scheme in ("http", "https") 40 | has_path = not len(url.path) or url.path.startswith("/") 41 | 42 | if not (has_hostname and has_http_scheme and has_path): 43 | raise NotSupported("invalid url: %s" % repr(url)) 44 | 45 | return url 46 | 47 | def dispatch_url(self, url_string): 48 | url, url_adapter, query_args = self.parse_url(url_string) 49 | 50 | try: 51 | endpoint, kwargs = url_adapter.match() 52 | except NotFound: 53 | raise NotSupported(url_string) 54 | except RequestRedirect as e: 55 | new_url = "{0.new_url}?{1}".format(e, url_encode(query_args)) 56 | return self.dispatch_url(new_url) 57 | 58 | try: 59 | handler = import_string(endpoint) 60 | request = Request(url=url, args=query_args) 61 | return handler(request, **kwargs) 62 | except RequestRedirect as e: 63 | return self.dispatch_url(e.new_url) 64 | 65 | def mount_site(self, site): 66 | if isinstance(site, basestring): 67 | site = import_string(site) 68 | site.play_actions(target=self) 69 | 70 | def mount_sites(self, root): 71 | for name in find_modules(root, recursive=True): 72 | mod = import_string(name) 73 | site = name.split('.')[-1] 74 | if hasattr(mod, 'site') and site not in self.ignore_sites: 75 | mod.site.play_actions(target=self) 76 | 77 | -------------------------------------------------------------------------------- /daenerys/dinergate.py: -------------------------------------------------------------------------------- 1 | from daenerys.pipeline.html import ElementTreeProperty 2 | from daenerys.pipeline.network import HTTPClientProperty, TextResponseProperty 3 | 4 | 5 | class Dinergate(object): 6 | URL_TEMPLATE = None 7 | 8 | http_client = HTTPClientProperty() 9 | text_response = TextResponseProperty() 10 | etree = ElementTreeProperty() 11 | 12 | def __init__(self, request, http_client=None, **kwargs): 13 | self.request = request 14 | if http_client: 15 | self.http_client = http_client 16 | # assign arguments from URL pattern 17 | vars(self).update(kwargs) 18 | 19 | @property 20 | def url(self): 21 | if not self.URL_TEMPLATE: 22 | raise NotImplementedError 23 | return self.URL_TEMPLATE.format(self=self) 24 | 25 | def to_dict(self): 26 | return {k: getattr(self, k) for k in dir(self) 27 | if not k.startswith('_') and k not in ('text_response',)} 28 | -------------------------------------------------------------------------------- /daenerys/exceptions.py: -------------------------------------------------------------------------------- 1 | class DaenerysException(Exception): 2 | pass 3 | 4 | 5 | class NotSupported(DaenerysException): 6 | pass 7 | -------------------------------------------------------------------------------- /daenerys/pipeline/__init__.py: -------------------------------------------------------------------------------- 1 | from .base import PipelineProperty 2 | from .html import ElementTreeProperty, XPathTextProperty 3 | from .network import (HTTPClientProperty, TextResponseProperty, 4 | JSONResponseProperty) 5 | 6 | 7 | __all__ = ["PipelineProperty", "ElementTreeProperty", "XPathTextProperty", 8 | "HTTPClientProperty", "TextResponseProperty", 9 | "JSONResponseProperty"] 10 | -------------------------------------------------------------------------------- /daenerys/pipeline/base.py: -------------------------------------------------------------------------------- 1 | import abc 2 | 3 | 4 | class _Missing(object): 5 | def __repr__(self): 6 | return 'no value' 7 | def __reduce__(self): 8 | return '_missing' 9 | 10 | 11 | _missing = _Missing() 12 | 13 | 14 | class PipelineProperty: 15 | __metaclass__ = abc.ABCMeta 16 | __counter = 0 17 | required_attrs = set() 18 | 19 | def __init__(self, **options): 20 | cls = self.__class__ 21 | self.options = options 22 | prefix = cls.__name__ 23 | index = cls.__counter 24 | self.name = '_{}#{}'.format(prefix, index) 25 | cls.__counter += 1 26 | 27 | assigned_attrs = set() 28 | for name, value in options.items(): 29 | assigned_attrs.add(name) 30 | 31 | # required attrs 32 | if name in self.required_attrs: 33 | setattr(self, name, value) 34 | missing_attrs = self.required_attrs - assigned_attrs 35 | if missing_attrs: 36 | raise TypeError("missing %r" % ", ".join(missing_attrs)) 37 | 38 | self.prepare() 39 | 40 | def __set__(self, obj, value): 41 | obj.__dict__[self.name] = value 42 | 43 | def __get__(self, obj, type=None): 44 | if obj is None: 45 | return self 46 | value = obj.__dict__.get(self.name, _missing) 47 | if value is _missing: 48 | value = self.provide_value(obj) 49 | obj.__dict__[self.name] = value 50 | return value 51 | 52 | def prepare(self): 53 | """This method will be called after instance ininialized. The 54 | subclasses could override the implementation.""" 55 | 56 | @abc.abstractmethod 57 | def provide_value(self, obj): 58 | pass 59 | -------------------------------------------------------------------------------- /daenerys/pipeline/html.py: -------------------------------------------------------------------------------- 1 | import lxml.html 2 | 3 | from daenerys.pipeline.base import PipelineProperty 4 | 5 | 6 | class ElementTreeProperty(PipelineProperty): 7 | def prepare(self): 8 | self.options.setdefault("encoding", None) 9 | 10 | def provide_value(self, obj): 11 | text_response = obj.text_response 12 | if self.options["encoding"]: 13 | text_response = text_response.encode(self.options["encoding"]) 14 | return lxml.html.fromstring(text_response) 15 | 16 | 17 | class XPathTextProperty(PipelineProperty): 18 | required_attrs = {"xpath"} 19 | def prepare(self): 20 | self.options.setdefault("strip_spaces", False) 21 | self.options.setdefault("pick_mode", "join") 22 | self.options.setdefault("joiner", " ") 23 | self.options.setdefault("namespaces", None) 24 | 25 | def choice_pick_impl(self): 26 | pick_mode = self.options["pick_mode"] 27 | impl = { 28 | "join": self.pick_joining, 29 | "first": self.pick_first, 30 | "keep": self.keep_value, 31 | }.get(pick_mode) 32 | 33 | if not impl: 34 | raise ValueError("%r is not valid pick mode" % pick_mode) 35 | return impl 36 | 37 | def pick_joining(self, value): 38 | joiner = self.options["joiner"] 39 | return joiner.join(value) 40 | 41 | def pick_first(self, value): 42 | return value[0] if value else "" 43 | 44 | def keep_value(self, value): 45 | return value 46 | 47 | def provide_value(self, obj): 48 | value = obj.etree.xpath( 49 | self.xpath, namespaces=self.options.get("namespaces")) 50 | pick_value = self.choice_pick_impl() 51 | 52 | if self.options["strip_spaces"]: 53 | value = [v.strip() for v in value if v.strip()] 54 | 55 | return pick_value(value) 56 | -------------------------------------------------------------------------------- /daenerys/pipeline/network.py: -------------------------------------------------------------------------------- 1 | from requests import Session 2 | 3 | from daenerys.pipeline.base import PipelineProperty 4 | from daenerys.exceptions import NotSupported 5 | 6 | 7 | class HTTPClientProperty(PipelineProperty): 8 | def prepare(self): 9 | self.options.setdefault("session_class", Session) 10 | 11 | def provide_value(self, obj): 12 | session_class = self.options["session_class"] 13 | session = session_class() 14 | return session 15 | 16 | 17 | class ResponseProperty(PipelineProperty): 18 | def prepare(self): 19 | self.options.setdefault("method", "GET") 20 | self.options.setdefault("data", {}) 21 | 22 | def provide_value(self, obj): 23 | if "content_method" not in self.options: 24 | raise KeyError("You need create a subclass which inheritance " 25 | "ResponseProperty, and assign `content_method` " 26 | "into self.attr_names") 27 | response = obj.http_client.request( 28 | url=obj.url, method=self.options.get('method'), 29 | **self.options['data']) 30 | response.raise_for_status() 31 | content = getattr(response, self.options.get('content_method')) 32 | if callable(content): 33 | content = content() 34 | return content 35 | 36 | 37 | class TextResponseProperty(ResponseProperty): 38 | def prepare(self): 39 | super(TextResponseProperty, self).prepare() 40 | self.options.setdefault("content_method", "content") 41 | 42 | 43 | class JSONResponseProperty(ResponseProperty): 44 | def prepare(self): 45 | super(JSONResponseProperty, self).prepare() 46 | self.options.setdefault("content_method", "json") 47 | -------------------------------------------------------------------------------- /daenerys/request.py: -------------------------------------------------------------------------------- 1 | class Request(object): 2 | """The request object. 3 | 4 | :param url: the raw URL inputted from the dispatching app. 5 | :type url: :class:`urllib.parse.ParseResult` 6 | :param args: the query arguments decoded from query string of the URL. 7 | :type args: :class:`werkzeug.datastructures.MultiDict` 8 | """ 9 | 10 | def __init__(self, url, args): 11 | self.url = url 12 | self.args = args 13 | 14 | def __repr__(self): 15 | return "Request(url={self.url}, args={self.args})".format(self=self) 16 | -------------------------------------------------------------------------------- /daenerys/site.py: -------------------------------------------------------------------------------- 1 | class Site(object): 2 | def __init__(self, name): 3 | self.name = name 4 | self.actions = [] 5 | 6 | def record_action(self, method_name, *args, **kwargs): 7 | self.actions.append((method_name, args, kwargs)) 8 | 9 | def play_actions(self, target): 10 | for method_name, args, kwargs in self.actions: 11 | method = getattr(target, method_name) 12 | method(*args, **kwargs) 13 | 14 | def route(self, host, rule, **options): 15 | def decorator(func): 16 | endpoint = "{func.__module__}:{func.__name__}".format(func=func) 17 | self.record_action("add_url_rule", host, rule, endpoint, **options) 18 | return func 19 | return decorator 20 | -------------------------------------------------------------------------------- /demo/app.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.insert(0, '..') 3 | 4 | from daenerys import Daenerys 5 | 6 | app = Daenerys() 7 | # app.ignore_sites = {'pypi'} 8 | app.mount_sites('sites') 9 | 10 | 11 | if __name__ == "__main__": 12 | from pprint import pprint 13 | for url in ('https://pypi.python.org/pypi/Werkzeug/0.9.4', 14 | 'https://pypi.python.org/pypi/Werkzeug', 15 | 'https://mydomain.org/pypi/NotExistsPkg'): 16 | pkg = app.dispatch_url(url) 17 | pprint(pkg.info) 18 | -------------------------------------------------------------------------------- /demo/backend.py: -------------------------------------------------------------------------------- 1 | import ast 2 | import cPickle 3 | from datetime import datetime 4 | 5 | import redis 6 | from mongoengine import * 7 | 8 | from config import READY_STATES, PENDING, SUCCESS, FAILURE 9 | 10 | connect('zhihulive') 11 | r = redis.StrictRedis(host='localhost', port=6379, db=0) 12 | 13 | 14 | class Backend(Document): 15 | name = StringField(max_length=20) 16 | result = DictField(default={}) 17 | status = IntField(default=PENDING) 18 | traceback = StringField(default='') 19 | create_at = DateTimeField(default=datetime.now) 20 | worker_id = StringField(default='') 21 | 22 | meta = { 23 | 'indexes': ['name'] 24 | } 25 | 26 | @classmethod 27 | def add(cls, name): 28 | item = cls.get(name) 29 | if not item: 30 | item = cls(name=name) 31 | item.save() 32 | return item 33 | 34 | @classmethod 35 | def get(cls, name): 36 | rs = r.get(name) 37 | if rs: 38 | return cls.from_json(cPickle.loads(rs)) 39 | try: 40 | item = cls.objects.get(name=name) 41 | except DoesNotExist: 42 | pass 43 | else: 44 | if item: 45 | r.set(name, cPickle.dumps(item.to_json())) 46 | return item 47 | 48 | @classmethod 49 | def mark_as_done(cls, name, result, worker_id, state=SUCCESS): 50 | item = cls.objects.get(name=name) 51 | if item: 52 | item.update(result=result, status=state, worker_id=worker_id) 53 | item = cls.objects.get(name=name) 54 | r.set(name, cPickle.dumps(item.to_json())) 55 | return True 56 | return False 57 | 58 | @classmethod 59 | def mark_as_failure(self, name, traceback, worker_id, state=FAILURE): 60 | item = cls.objects.get(name=name) 61 | if item: 62 | item.update(traceback=traceback, status=state, worker_id=worker_id) 63 | item = cls.objects.get(name=name) 64 | r.set(name, cPickle.dumps(item.to_json())) 65 | return True 66 | return False 67 | -------------------------------------------------------------------------------- /demo/beat.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | from kombu import Connection 4 | 5 | from messaging import publish 6 | from config import BROKER_URI 7 | 8 | 9 | PACKAGES = ['httpie', 'django', 'requests', 'keras', 10 | 'tornado', 'sentry', 'ipython', 'werkzeug'] 11 | 12 | 13 | if __name__ == '__main__': 14 | with Connection(BROKER_URI) as conn: 15 | random.shuffle(PACKAGES) 16 | for p in PACKAGES: 17 | publish(conn, p) 18 | 19 | 20 | 21 | -------------------------------------------------------------------------------- /demo/config.py: -------------------------------------------------------------------------------- 1 | BROKER_URI = 'redis://localhost:6379' 2 | BACKEND_URI = 'mongodb://localhost:27017' 3 | 4 | PENDING = 0 5 | SUCCESS = 1 6 | FAILURE = 2 7 | REVOKED = 3 8 | 9 | READY_STATES = frozenset({SUCCESS, FAILURE, REVOKED}) 10 | -------------------------------------------------------------------------------- /demo/messaging.py: -------------------------------------------------------------------------------- 1 | import time 2 | import cPickle 3 | 4 | from kombu import Connection, Exchange, Queue 5 | 6 | from config import BROKER_URI, BACKEND_URI, SUCCESS, PENDING 7 | from backend import Backend, r 8 | 9 | exchange = Exchange('web', 'direct', durable=True) 10 | queue = Queue('web_queue', exchange=exchange, routing_key='pypi') 11 | 12 | 13 | def publish(conn, name): 14 | print 'PUT {}'.format(name) 15 | producer = conn.Producer(serializer='json') 16 | payload = {'name': name} 17 | producer.publish(payload, exchange=exchange, routing_key='pypi', 18 | declare=[queue]) 19 | return Backend.add(name) 20 | 21 | 22 | def consumer(conn, callbacks): 23 | with conn.Consumer(queue, callbacks=callbacks) as consumer: 24 | while 1: 25 | conn.drain_events() 26 | 27 | 28 | def sync_get(name, interval=0.5): 29 | with Connection(BROKER_URI) as conn: 30 | publish(conn, name) 31 | while 1: 32 | rs = r.get(name) 33 | if rs and Backend.from_json(cPickle.loads(rs)).status == SUCCESS: 34 | break 35 | time.sleep(interval) 36 | item = Backend.get(name) 37 | return item.result 38 | -------------------------------------------------------------------------------- /demo/scaffold.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | from app import app 4 | 5 | context = {"go": app.dispatch_url} 6 | 7 | 8 | def main(): 9 | url = sys.argv[1] 10 | context['url'] = url 11 | pkg = app.dispatch_url(url) 12 | context['pkg'] = pkg 13 | for item in pkg.to_dict().items(): 14 | print '{} = {}'.format(*item) 15 | 16 | def prepare_readline(): 17 | import os 18 | import readline 19 | import atexit 20 | 21 | readline.parse_and_bind('tab: complete') 22 | histfile = os.path.expanduser("~/.daenerys_history") 23 | 24 | try: 25 | readline.read_history_file(histfile) 26 | except IOError: 27 | pass 28 | 29 | def savehist(histfile): 30 | readline.write_history_file(histfile) 31 | 32 | atexit.register(savehist, histfile) 33 | del atexit 34 | 35 | try: 36 | from IPython.terminal.interactiveshell import TerminalInteractiveShell 37 | shell = TerminalInteractiveShell(user_ns=context) 38 | shell.mainloop() 39 | except ImportError: 40 | import code 41 | shell = code.InteractiveConsole(locals=context) 42 | shell.runcode(prepare_readline.__code__) 43 | shell.interact() 44 | 45 | if __name__ == "__main__": 46 | main() 47 | -------------------------------------------------------------------------------- /demo/sites/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dongweiming/daenerys/5241876648cbd2d0c7474ff7ed3ad605211a0ec3/demo/sites/__init__.py -------------------------------------------------------------------------------- /demo/sites/pypi.py: -------------------------------------------------------------------------------- 1 | from werkzeug.utils import cached_property 2 | 3 | from daenerys.site import Site 4 | from daenerys.dinergate import Dinergate 5 | from daenerys.pipeline.html import XPathTextProperty 6 | 7 | 8 | site = Site(name="pypi") 9 | 10 | 11 | @site.route("pypi.python.org", "/pypi/", defaults={"version": ""}) 12 | @site.route("pypi.python.org", "/pypi//") 13 | class PythonPackageInfo(Dinergate): 14 | 15 | URL_TEMPLATE = "http://pypi.python.org/pypi/{self.name}/{self.version}" 16 | 17 | author = XPathTextProperty( 18 | xpath="//ul[@class='nodot']/li[1]/span/text()", 19 | pick_mode="first") 20 | _url_from_bt = XPathTextProperty( 21 | xpath=".//div[@id='download-button']/a/@href", 22 | strip_spaces=True, pick_mode="first") 23 | _url_from_table = XPathTextProperty( 24 | xpath="//table[@class='list']//a[re:match(@href, 'tar.gz#')]/@href", 25 | namespaces={'re': "http://exslt.org/regular-expressions"}) 26 | 27 | @cached_property 28 | def download_url(self): 29 | return (self._url_from_table or self._url_from_bt).split('#')[0] 30 | 31 | @property 32 | def info(self): 33 | version = self.version or self.download_url.rpartition( 34 | '/')[-1].rsplit('-')[-1].replace('.tar.gz', '') 35 | return {"name": self.name, "version": version, "author": self.author, 36 | "download_url": self.download_url} 37 | -------------------------------------------------------------------------------- /demo/worker.py: -------------------------------------------------------------------------------- 1 | import multiprocessing 2 | 3 | from kombu import Connection 4 | from mongoengine.connection import connect, disconnect 5 | 6 | from app import app 7 | from config import BROKER_URI, SUCCESS 8 | from backend import Backend 9 | from messaging import consumer 10 | 11 | URL_TEMPLATE = 'https://pypi.python.org/pypi/{name}' 12 | 13 | 14 | def process_task(body, message): 15 | worker_id = multiprocessing.current_process().name 16 | name = body['name'] 17 | url = URL_TEMPLATE.format(name=name) 18 | pkg = app.dispatch_url(url) 19 | Backend.mark_as_done(name, pkg.info, worker_id=worker_id, state=SUCCESS) 20 | message.ack() 21 | print 'FINISHED: {}'.format(name) 22 | 23 | 24 | def main(): 25 | disconnect() 26 | connect('zhihulive') 27 | with Connection(BROKER_URI) as conn: 28 | consumer(conn, [process_task]) 29 | 30 | 31 | if __name__ == '__main__': 32 | jobs = [] 33 | for i in range(5): 34 | p = multiprocessing.Process(target=main) 35 | jobs.append(p) 36 | p.start() 37 | for j in jobs: 38 | j.join() 39 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | Werkzeug==0.12.1 2 | lxml==3.7.3 3 | requests==2.13.0 4 | kombu==4.0.2 5 | mongoengine==0.12.0 6 | redis==2.10.5 7 | --------------------------------------------------------------------------------