├── .gitignore ├── .travis.yml ├── CHANGES.rst ├── LICENSE ├── MANIFEST.in ├── Procfile ├── README.rst ├── annotator.cfg.example ├── annotator.cfg.heroku ├── annotator ├── __init__.py ├── annotation.py ├── atoi.py ├── auth.py ├── authz.py ├── document.py ├── elasticsearch.py ├── reindexer.py └── store.py ├── docs ├── Makefile ├── api │ ├── auth.rst │ └── authz.rst ├── changes.rst ├── conf.py ├── index.rst └── make.bat ├── reindex.py ├── run.py ├── setup.cfg ├── setup.py ├── tests ├── __init__.py ├── helpers.py ├── test.cfg ├── test_annotation.py ├── test_auth.py ├── test_authz.py ├── test_document.py ├── test_elasticsearch.py └── test_store.py └── tox.ini /.gitignore: -------------------------------------------------------------------------------- 1 | /annotator.egg-info 2 | /annotator.cfg 3 | /.coverage 4 | /cover 5 | /dist 6 | /.tox 7 | *.pyc 8 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | python: 3 | - 2.6 4 | - 2.7 5 | - 3.3 6 | - 3.4 7 | - pypy 8 | - pypy3 9 | services: 10 | - elasticsearch 11 | install: 12 | - pip install -e .[testing] 13 | script: nosetests 14 | -------------------------------------------------------------------------------- /CHANGES.rst: -------------------------------------------------------------------------------- 1 | Changelog 2 | ========= 3 | 4 | All notable changes to this project will be documented in this file. This 5 | project endeavours to adhere to `Semantic Versioning`_. 6 | 7 | .. _Semantic Versioning: http://semver.org/ 8 | 9 | 0.14.2 2015-07-17 10 | ----------------- 11 | 12 | - FIXED: `Annotation.search` no longer mutates the passed query. 13 | 14 | - FIXED/BREAKING CHANGE: `Document.get_by_uri()` no longer returns a list for 15 | empty resultsets, instead returning `None`. 16 | 17 | 0.14.1 2015-03-05 18 | ----------------- 19 | - FIXED: Document plugin doesn't drop links without a type. The annotator 20 | client generates a typeless link from the document href. (#116) 21 | 22 | - ADDED: the search endpoint now supports 'before' and 'after query parameters, 23 | which can be used to return annotations created between a specific time 24 | period. 25 | 26 | 0.14 - 2015-02-13 27 | ----------------- 28 | 29 | - ADDED: the search endpoint now supports 'sort' and 'order' query parameters, 30 | which can be used to control the sort order of the returned results. 31 | 32 | - FIXED: previously only one document was returned when looking for equivalent 33 | documents (#110). Now the Document model tracks all discovered equivalent 34 | documents and keeps each document object up-to-date with them all. 35 | 36 | - BREAKING CHANGE: Document.get_all_by_uris() no longer exists. Use 37 | Document.get_by_uri() which should return a single document containing all 38 | equivalent URIs. (You may wish to update your index by fetching all documents 39 | and resaving them.) 40 | 41 | - FIXED: the search_raw endpoint no longer throws an exception when the 42 | 'fields' parameter is provided. 43 | 44 | 0.13.2 - 2014-12-03 45 | ------------------- 46 | 47 | - Avoid a confusing error about reindexing when annotator is used as a 48 | library and not a standalone application (#107). 49 | 50 | 0.13.1 - 2014-12-03 51 | ------------------- 52 | 53 | - Reindexer can run even when target exists. 54 | 55 | 0.13.0 - 2014-12-02 56 | ------------------- 57 | 58 | - Slight changes to reindex.py to ease subclassing it. 59 | 60 | 0.12.0 - 2014-10-06 61 | ------------------- 62 | 63 | - A tool for migrating/reindexing elasticsearch (reindex.py) was added (#103). 64 | - The store returns more appropriate HTTP response codes (#96). 65 | - Dropped support for ElasticSearch versions before 1.0.0 (#92). 66 | - The default search query has been changed from a term-filtered "match all" to 67 | a set of "match queries", resulting in more liberal interpretations of 68 | queries (#89). 69 | - The default elasticsearch analyzer for annotation fields has been changed to 70 | "keyword" in order to provide more consistent case-sensitivity behaviours 71 | (#73, #88). 72 | - Made Flask an optional dependency: it is now possible to use the persistence 73 | components of the project without needing Flask (#76). 74 | - Python 3 compatibility (#72). 75 | 76 | 77 | 0.11.2 - 2014-07-25 78 | ------------------- 79 | 80 | - SECURITY: Fixed bug that allowed authenticated users to overwrite annotations 81 | on which they did not have permissions (#82). 82 | 83 | 0.11.1 - 2014-04-09 84 | ------------------- 85 | 86 | - Fixed support for using ElasticSearch instances behind HTTP Basic auth 87 | 88 | 0.11.0 - 2014-04-08 89 | ------------------- 90 | 91 | - Add support for ElasticSearch 1.0 92 | - Create changelog 93 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (C) 2010-2012 Open Knowledge Foundation 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in 11 | all copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | THE SOFTWARE. 20 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include CHANGES.rst 2 | -------------------------------------------------------------------------------- /Procfile: -------------------------------------------------------------------------------- 1 | web: python run.py annotator.cfg.heroku 2 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | Annotator Store 2 | =============== 3 | 4 | This is a backend store for `Annotator `__. 5 | 6 | The functionality can roughly be separated in two parts: 7 | 8 | 1. An abstraction layer wrapping Elasticsearch, to easily manage annotation 9 | storage. It features authorization to filter search results according to 10 | their permission settings. 11 | 2. A Flask blueprint for a web server that exposes an HTTP API to the annotation 12 | storage. To use this functionality, build this package with the ``[flask]`` 13 | option. 14 | 15 | Getting going 16 | ------------- 17 | 18 | You'll need a recent version of `Python `__ (Python 2 >=2.6 19 | or Python 3 >=3.3) and `ElasticSearch `__ (>=1.0.0) 20 | installed. 21 | 22 | The quickest way to get going requires the ``pip`` and ``virtualenv`` 23 | tools (``easy_install virtualenv`` will get them both). Run the 24 | following in the repository root:: 25 | 26 | virtualenv pyenv 27 | source pyenv/bin/activate 28 | pip install -e .[flask] 29 | cp annotator.cfg.example annotator.cfg 30 | python run.py 31 | 32 | You should see something like:: 33 | 34 | * Running on http://127.0.0.1:5000/ 35 | * Restarting with reloader... 36 | 37 | If you wish to customize the configuration of the Annotator Store, make 38 | your changes to ``annotator.cfg`` or dive into ``run.py``. 39 | 40 | Additionally, the ``HOST`` and ``PORT`` environment variables override 41 | the default socket binding of address ``127.0.0.1`` and port ``5000``. 42 | 43 | Store API 44 | --------- 45 | 46 | The Store API is designed to be compatible with the 47 | `Annotator `__. The annotation store, a 48 | JSON-speaking REST API, will be mounted at ``/api`` by default. See the 49 | `Annotator 50 | documentation `__ for 51 | details. 52 | 53 | Running tests 54 | ------------- 55 | 56 | We use ``nosetests`` to run tests. You can just 57 | ``pip install -e .[testing]``, ensure ElasticSearch is running, and 58 | then:: 59 | 60 | $ nosetests 61 | ...................................................................................... 62 | ---------------------------------------------------------------------- 63 | Ran 86 tests in 19.171s 64 | 65 | OK 66 | 67 | Alternatively (and preferably), you should install 68 | `Tox `__, and then run ``tox``. This will run 69 | the tests against multiple versions of Python (if you have them 70 | installed). 71 | 72 | Please `open an issue `__ 73 | if you find that the tests don't all pass on your machine, making sure to include 74 | the output of ``pip freeze``. 75 | -------------------------------------------------------------------------------- /annotator.cfg.example: -------------------------------------------------------------------------------- 1 | DEBUG = True 2 | 3 | # You should change this secret key to a uniquely secret string before deploying 4 | SECRET_KEY = '6E1C924B-C03B-4F7F-97DE-B72EE2338B39' 5 | 6 | ELASTICSEARCH_HOST = 'http://127.0.0.1:9200' 7 | ELASTICSEARCH_INDEX = 'annotator' 8 | 9 | AUTH_ON = False 10 | AUTHZ_ON = False 11 | -------------------------------------------------------------------------------- /annotator.cfg.heroku: -------------------------------------------------------------------------------- 1 | from os import environ 2 | 3 | SECRET_KEY = environ['SECRET_KEY'] 4 | 5 | ELASTICSEARCH_HOST = environ['BONSAI_URL'] 6 | ELASTICSEARCH_INDEX = 'annotator_test' 7 | 8 | AUTH_ON = False 9 | AUTHZ_ON = False 10 | -------------------------------------------------------------------------------- /annotator/__init__.py: -------------------------------------------------------------------------------- 1 | from .elasticsearch import ElasticSearch 2 | 3 | # FIXME: remove this horrible singleton 4 | es = ElasticSearch() 5 | -------------------------------------------------------------------------------- /annotator/annotation.py: -------------------------------------------------------------------------------- 1 | from annotator import authz, document, es 2 | 3 | TYPE = 'annotation' 4 | MAPPING = { 5 | 'id': {'type': 'string', 'index': 'no'}, 6 | 'annotator_schema_version': {'type': 'string'}, 7 | 'created': {'type': 'date'}, 8 | 'updated': {'type': 'date'}, 9 | 'quote': {'type': 'string', 'analyzer': 'standard'}, 10 | 'tags': {'type': 'string', 'index_name': 'tag'}, 11 | 'text': {'type': 'string', 'analyzer': 'standard'}, 12 | 'uri': {'type': 'string'}, 13 | 'user': {'type': 'string'}, 14 | 'consumer': {'type': 'string'}, 15 | 'ranges': { 16 | 'index_name': 'range', 17 | 'properties': { 18 | 'start': {'type': 'string'}, 19 | 'end': {'type': 'string'}, 20 | 'startOffset': {'type': 'integer'}, 21 | 'endOffset': {'type': 'integer'}, 22 | } 23 | }, 24 | 'permissions': { 25 | 'index_name': 'permission', 26 | 'properties': { 27 | 'read': {'type': 'string'}, 28 | 'update': {'type': 'string'}, 29 | 'delete': {'type': 'string'}, 30 | 'admin': {'type': 'string'} 31 | } 32 | }, 33 | 'document': { 34 | 'properties': document.MAPPING 35 | } 36 | } 37 | 38 | 39 | class Annotation(es.Model): 40 | 41 | __type__ = TYPE 42 | __mapping__ = MAPPING 43 | 44 | def save(self, *args, **kwargs): 45 | _add_default_permissions(self) 46 | 47 | # If the annotation includes document metadata look to see if we have 48 | # the document modeled already. If we don't we'll create a new one 49 | # If we do then we'll merge the supplied links into it. 50 | 51 | if 'document' in self: 52 | d = document.Document(self['document']) 53 | d.save() 54 | 55 | super(Annotation, self).save(*args, **kwargs) 56 | 57 | @classmethod 58 | def search_raw(cls, query=None, params=None, raw_result=False, 59 | user=None, authorization_enabled=None): 60 | """Perform a raw Elasticsearch query 61 | 62 | Any ElasticsearchExceptions are to be caught by the caller. 63 | 64 | Keyword arguments: 65 | query -- Query to send to Elasticsearch 66 | params -- Extra keyword arguments to pass to Elasticsearch.search 67 | raw_result -- Return Elasticsearch's response as is 68 | user -- The user to filter the results for according to permissions 69 | authorization_enabled -- Overrides Annotation.es.authorization_enabled 70 | """ 71 | if query is None: 72 | query = {} 73 | if authorization_enabled is None: 74 | authorization_enabled = es.authorization_enabled 75 | if authorization_enabled: 76 | f = authz.permissions_filter(user) 77 | if not f: 78 | raise RuntimeError("Authorization filter creation failed") 79 | filtered_query = { 80 | 'filtered': { 81 | 'filter': f 82 | } 83 | } 84 | # Insert original query (if present) 85 | if 'query' in query: 86 | filtered_query['filtered']['query'] = query['query'] 87 | # Use the filtered query instead of the original 88 | query['query'] = filtered_query 89 | 90 | res = super(Annotation, cls).search_raw(query=query, params=params, 91 | raw_result=raw_result) 92 | return res 93 | 94 | @classmethod 95 | def _build_query(cls, query=None, offset=None, limit=None, sort=None, order=None): 96 | if query is None: 97 | query = {} 98 | else: 99 | query = dict(query) # shallow copy 100 | 101 | # Pop 'before' and 'after' parameters out of the query 102 | after = query.pop('after', None) 103 | before = query.pop('before', None) 104 | 105 | q = super(Annotation, cls)._build_query(query, offset, limit, sort, order) 106 | 107 | # Create range query from before and/or after 108 | if before is not None or after is not None: 109 | clauses = q['query']['bool']['must'] 110 | 111 | # Remove match_all conjunction, because 112 | # a range clause is added 113 | if clauses[0] == {'match_all': {}}: 114 | clauses.pop(0) 115 | 116 | created_range = {'range': {'created': {}}} 117 | if after is not None: 118 | created_range['range']['created']['gte'] = after 119 | if before is not None: 120 | created_range['range']['created']['lt'] = before 121 | clauses.append(created_range) 122 | 123 | # attempt to expand query to include uris for other representations 124 | # using information we may have on hand about the Document 125 | if 'uri' in query: 126 | clauses = q['query']['bool'] 127 | doc = document.Document.get_by_uri(query['uri']) 128 | if doc: 129 | for clause in clauses['must']: 130 | # Rewrite the 'uri' clause to match any of the document URIs 131 | if 'match' in clause and 'uri' in clause['match']: 132 | uri_matchers = [] 133 | for uri in doc.uris(): 134 | uri_matchers.append({'match': {'uri': uri}}) 135 | del clause['match'] 136 | clause['bool'] = { 137 | 'should': uri_matchers, 138 | 'minimum_should_match': 1 139 | } 140 | 141 | return q 142 | 143 | 144 | def _add_default_permissions(ann): 145 | if 'permissions' not in ann: 146 | ann['permissions'] = {'read': [authz.GROUP_CONSUMER]} 147 | -------------------------------------------------------------------------------- /annotator/atoi.py: -------------------------------------------------------------------------------- 1 | def atoi(v, default=0): 2 | try: 3 | return int(v or default) 4 | except ValueError: 5 | return default 6 | -------------------------------------------------------------------------------- /annotator/auth.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | 3 | import iso8601 4 | import jwt 5 | import six 6 | 7 | DEFAULT_TTL = 86400 8 | 9 | 10 | class Consumer(object): 11 | def __init__(self, key): 12 | self.key = key 13 | 14 | 15 | class User(object): 16 | def __init__(self, userid, consumer, is_admin): 17 | self.id = userid 18 | self.consumer = consumer 19 | self.is_admin = is_admin 20 | 21 | @classmethod 22 | def from_token(cls, token): 23 | return cls(token['userId'], 24 | Consumer(token['consumerKey']), 25 | token.get('admin', False)) 26 | 27 | 28 | class Authenticator(object): 29 | """ 30 | A wrapper around the low-level encode_token() and decode_token() that is 31 | backend inspecific, and swallows all possible exceptions thrown by badly- 32 | formatted, invalid, or malicious tokens. 33 | """ 34 | 35 | def __init__(self, consumer_fetcher): 36 | """ 37 | Arguments: 38 | consumer_fetcher -- a function which takes a consumer key and returns 39 | an object with 'key', 'secret', and 'ttl' 40 | attributes 41 | """ 42 | self.consumer_fetcher = consumer_fetcher 43 | 44 | def request_user(self, request): 45 | """ 46 | Retrieve the user object associated with the current request. 47 | 48 | Arguments: 49 | request -- a Flask Request object 50 | 51 | Returns: a user object 52 | """ 53 | token = self._decode_request_token(request) 54 | 55 | if token: 56 | try: 57 | return User.from_token(token) 58 | except KeyError: 59 | return None 60 | else: 61 | return None 62 | 63 | def _decode_request_token(self, request): 64 | """ 65 | Retrieve any request token from the passed request, verify its 66 | authenticity and validity, and return the parsed contents of the token 67 | if and only if all such checks pass. 68 | 69 | Arguments: 70 | request -- a Flask Request object 71 | """ 72 | 73 | token = request.headers.get('x-annotator-auth-token') 74 | if token is None: 75 | return False 76 | 77 | try: 78 | unsafe_token = decode_token(token, verify=False) 79 | except TokenInvalid: # catch junk tokens 80 | return False 81 | 82 | key = unsafe_token.get('consumerKey') 83 | if not key: 84 | return False 85 | 86 | consumer = self.consumer_fetcher(key) 87 | if not consumer: 88 | return False 89 | 90 | try: 91 | return decode_token(token, 92 | secret=consumer.secret, 93 | ttl=consumer.ttl) 94 | except TokenInvalid: # catch inauthentic or expired tokens 95 | return False 96 | 97 | 98 | class TokenInvalid(Exception): 99 | pass 100 | 101 | 102 | # Main auth routines 103 | 104 | def encode_token(token, secret): 105 | token.update({'issuedAt': _now().isoformat()}) 106 | return jwt.encode(token, secret) 107 | 108 | 109 | def decode_token(token, secret='', ttl=DEFAULT_TTL, verify=True): 110 | try: 111 | if not isinstance(token, bytes): 112 | if six.PY3: 113 | token = bytes(token, 'utf-8') 114 | else: 115 | token = bytes(token) 116 | token = jwt.decode(token, secret, verify=verify) 117 | except jwt.DecodeError: 118 | import sys 119 | exc_class, exc, tb = sys.exc_info() 120 | new_exc = TokenInvalid("error decoding JSON Web Token: %s" % 121 | exc or exc_class) 122 | six.reraise(new_exc.__class__, new_exc, tb) 123 | 124 | if verify: 125 | issue_time = token.get('issuedAt') 126 | if issue_time is None: 127 | raise TokenInvalid("'issuedAt' is missing from token") 128 | 129 | issue_time = iso8601.parse_date(issue_time) 130 | expiry_time = issue_time + datetime.timedelta(seconds=ttl) 131 | 132 | if issue_time > _now(): 133 | raise TokenInvalid("token is not yet valid") 134 | if expiry_time < _now(): 135 | raise TokenInvalid("token has expired") 136 | 137 | return token 138 | 139 | 140 | def _now(): 141 | return datetime.datetime.now(iso8601.iso8601.UTC).replace(microsecond=0) 142 | -------------------------------------------------------------------------------- /annotator/authz.py: -------------------------------------------------------------------------------- 1 | # An action is permitted in any of the following scenarios: 2 | # 3 | # 1) the permissions field for the specified action contains the magic value 4 | # 'group:__world__' 5 | # 6 | # 2) the user and consumer match those of the annotation (i.e. the 7 | # authenticated user is the owner of the annotation) 8 | # 9 | # 3) a user and consumer are provided and the permissions field contains the 10 | # magic value 'group:__authenticated__' 11 | # 12 | # 4) the provided consumer matches that of the annotation and the permissions 13 | # field for the specified action contains the magic value 14 | # 'group:__consumer__' 15 | # 16 | # 5) the consumer matches that of the annotation and the user is listed in the 17 | # permissions field for the specified action 18 | # 19 | # 6) the consumer matches that of the annotation and the user is an admin 20 | 21 | GROUP_WORLD = 'group:__world__' 22 | GROUP_AUTHENTICATED = 'group:__authenticated__' 23 | GROUP_CONSUMER = 'group:__consumer__' 24 | 25 | 26 | def authorize(annotation, action, user=None): 27 | action_field = annotation.get('permissions', {}).get(action, []) 28 | 29 | # Scenario 1 30 | if GROUP_WORLD in action_field: 31 | return True 32 | 33 | elif user is not None: 34 | # Fail fast if this looks dodgy 35 | if user.id.startswith('group:'): 36 | return False 37 | 38 | ann_uid, ann_ckey = _annotation_owner(annotation) 39 | 40 | # Scenario 2 41 | if (user.id, user.consumer.key) == (ann_uid, ann_ckey): 42 | return True 43 | 44 | # Scenario 3 45 | elif GROUP_AUTHENTICATED in action_field: 46 | return True 47 | 48 | # Scenario 4 49 | elif user.consumer.key == ann_ckey and GROUP_CONSUMER in action_field: 50 | return True 51 | 52 | # Scenario 5 53 | elif user.consumer.key == ann_ckey and user.id in action_field: 54 | return True 55 | 56 | # Scenario 6 57 | elif user.consumer.key == ann_ckey and user.is_admin: 58 | return True 59 | 60 | return False 61 | 62 | 63 | def _annotation_owner(annotation): 64 | user = annotation.get('user') 65 | consumer = annotation.get('consumer') 66 | 67 | if not user: 68 | return (user, consumer) 69 | 70 | try: 71 | return (user.get('id', None), consumer) 72 | except AttributeError: 73 | return (user, consumer) 74 | 75 | 76 | def permissions_filter(user=None): 77 | """Filter an ElasticSearch query by the permissions of the current user""" 78 | 79 | # Scenario 1 80 | perm_f = {'term': {'permissions.read': GROUP_WORLD}} 81 | 82 | if user is not None: 83 | # Fail fast if this looks dodgy 84 | if user.id.startswith('group:'): 85 | return False 86 | 87 | perm_f = {'or': [perm_f]} 88 | 89 | # Scenario 2 90 | perm_f['or'].append( 91 | {'and': [{'term': {'consumer': user.consumer.key}}, 92 | {'or': [{'term': {'user': user.id}}, 93 | {'term': {'user.id': user.id}}]}]}) 94 | 95 | # Scenario 3 96 | perm_f['or'].append( 97 | {'term': {'permissions.read': GROUP_AUTHENTICATED}}) 98 | 99 | # Scenario 4 100 | perm_f['or'].append( 101 | {'and': [{'term': {'consumer': user.consumer.key}}, 102 | {'term': {'permissions.read': GROUP_CONSUMER}}]}) 103 | 104 | # Scenario 5 105 | perm_f['or'].append( 106 | {'and': [{'term': {'consumer': user.consumer.key}}, 107 | {'term': {'permissions.read': user.id}}]}) 108 | 109 | # Scenario 6 110 | if user.is_admin: 111 | perm_f['or'].append({'term': {'consumer': user.consumer.key}}) 112 | 113 | return perm_f 114 | -------------------------------------------------------------------------------- /annotator/document.py: -------------------------------------------------------------------------------- 1 | from annotator import es 2 | 3 | TYPE = 'document' 4 | MAPPING = { 5 | 'id': {'type': 'string', 'index': 'no'}, 6 | 'annotator_schema_version': {'type': 'string'}, 7 | 'created': {'type': 'date'}, 8 | 'updated': {'type': 'date'}, 9 | 'title': {'type': 'string', 'analyzer': 'standard'}, 10 | 'link': { 11 | 'type': 'nested', 12 | 'properties': { 13 | 'type': {'type': 'string'}, 14 | 'href': {'type': 'string'}, 15 | } 16 | }, 17 | 'dc': { 18 | 'type': 'nested', 19 | 'properties': { 20 | # by default elastic search will try to parse this as 21 | # a date but unfortunately the data that is in the wild 22 | # may not be parsable by ES which throws an exception 23 | 'date': {'type': 'string'} 24 | } 25 | } 26 | } 27 | MAX_ITERATIONS = 5 28 | 29 | 30 | class Document(es.Model): 31 | __type__ = TYPE 32 | __mapping__ = MAPPING 33 | 34 | @classmethod 35 | def get_by_uri(cls, uri): 36 | """Returns the first document match for a given URI.""" 37 | results = cls._get_all_by_uris([uri]) 38 | return results[0] if len(results) > 0 else None 39 | 40 | @classmethod 41 | def _get_all_by_uris(cls, uris): 42 | """ 43 | Returns a list of documents that have any of the supplied URIs. 44 | 45 | It is only necessary for one of the supplied URIs to match. 46 | """ 47 | q = {'query': {'nested': {'path': 'link', 48 | 'query': {'terms': {'link.href': uris}}}}, 49 | 'sort': [{'updated': {'order': 'asc', 50 | # While we do always provide a mapping for 51 | # 'updated', elasticsearch will bomb if 52 | # there are no documents in the index. 53 | # Although this is an edge case, we don't 54 | # want the API to return a 500 with an empty 55 | # index, so ignore this sort instruction if 56 | # 'updated' appears unmapped due to an empty 57 | # index. 58 | 'ignore_unmapped': True}}]} 59 | 60 | res = cls.es.conn.search(index=cls.es.index, 61 | doc_type=cls.__type__, 62 | body=q) 63 | return [cls(d['_source'], id=d['_id']) for d in res['hits']['hits']] 64 | 65 | def uris(self): 66 | """Returns a list of the URIs for the document.""" 67 | return self._uris_from_links(self.get('link', [])) 68 | 69 | def merge_links(self, links): 70 | current_uris = self.uris() 71 | for l in links: 72 | if 'href' in l and l['href'] not in current_uris: 73 | self['link'].append(l) 74 | 75 | @staticmethod 76 | def _uris_from_links(links): 77 | uris = [] 78 | for link in links: 79 | uris.append(link.get('href')) 80 | return uris 81 | 82 | @classmethod 83 | def _get_all_iterative_for_uris(cls, uris): 84 | """ 85 | Builds an equivalence class (Kleene-star of documents) based on 86 | the supplied URIs as seed uris. It loads every document for 87 | which at least one supplied URI matches and recursively checks 88 | the uris of the retrieved documents and use the new URIs as 89 | seed URIs for the next iteration. 90 | 91 | Finally returns a list of documents that have any of the 92 | collected URIs 93 | """ 94 | documents = {} 95 | all_uris = set(uris) 96 | new_uris = list(uris) 97 | iterations = 0 98 | 99 | while len(new_uris) and iterations < MAX_ITERATIONS: 100 | docs = cls._get_all_by_uris(new_uris) 101 | new_uris = [] 102 | for doc in docs: 103 | if doc['id'] not in documents: 104 | documents[doc['id']] = doc 105 | 106 | for uri in doc.uris(): 107 | if uri not in all_uris: 108 | new_uris.append(uri) 109 | all_uris.add(uri) 110 | iterations += 1 111 | 112 | return list(documents.values()) 113 | 114 | def _remove_deficient_links(self): 115 | # Remove links without a type or href 116 | links = self.get('link', []) 117 | filtered_list = [l for l in links if 'href' in l] 118 | self['link'] = filtered_list 119 | 120 | @classmethod 121 | def _fill_bulk_header(cls, document): 122 | return { 123 | '_index': cls.es.index, 124 | '_type': cls.__type__, 125 | '_id': document['id'] 126 | } 127 | 128 | @classmethod 129 | def _bulk_operation(cls, to_delete, to_index): 130 | bulk_list = [] 131 | 132 | for doc_to_delete in to_delete: 133 | bulk_item = {'delete': cls._fill_bulk_header(doc_to_delete)} 134 | bulk_list.append(bulk_item) 135 | 136 | for doc_to_index in to_index: 137 | bulk_item = {'index': cls._fill_bulk_header(doc_to_index)} 138 | index_item = doc_to_index 139 | 140 | bulk_list.append(bulk_item) 141 | bulk_list.append(index_item) 142 | 143 | cls.es.conn.bulk(body=bulk_list, refresh=True) 144 | 145 | def save(self): 146 | """Saves document metadata, looks for existing documents and 147 | merges them to maintain equivalence classes""" 148 | self._remove_deficient_links() 149 | uris = self.uris() 150 | 151 | # Get existing documents 152 | existing_docs = self._get_all_iterative_for_uris(uris) 153 | 154 | # Create a new document if none existed for these uris 155 | if len(existing_docs) == 0: 156 | super(Document, self).save() 157 | # Merge links from all docs into this 158 | else: 159 | for d in existing_docs: 160 | links = d.get('link', []) 161 | self.merge_links(links) 162 | 163 | self._bulk_operation(existing_docs, []) 164 | # A separate operation because we want to save 165 | # the document id if it didn't have any before 166 | super(Document, self).save() 167 | -------------------------------------------------------------------------------- /annotator/elasticsearch.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | import logging 4 | import datetime 5 | 6 | import iso8601 7 | 8 | import elasticsearch 9 | from six import iteritems 10 | from six.moves.urllib.parse import urlparse 11 | 12 | log = logging.getLogger(__name__) 13 | 14 | RESULTS_MAX_SIZE = 200 15 | RESULTS_DEFAULT_SIZE = 20 16 | 17 | 18 | class ElasticSearch(object): 19 | """ 20 | Thin wrapper around an ElasticSearch connection to make connection handling 21 | more convenient. 22 | 23 | Settings for the ES host and index name etcetera can still be changed in 24 | the corresponding attributes before the connection (self.conn) is used. 25 | """ 26 | 27 | def __init__(self, 28 | host='http://127.0.0.1:9200', 29 | index='annotator', 30 | authorization_enabled=False): 31 | self.host = host 32 | self.index = index 33 | self.authorization_enabled = authorization_enabled 34 | 35 | self.Model = make_model(self) 36 | 37 | def _connect(self): 38 | host = self.host 39 | parsed = urlparse(host) 40 | 41 | connargs = { 42 | 'host': parsed.hostname, 43 | } 44 | 45 | username = parsed.username 46 | password = parsed.password 47 | if username is not None or password is not None: 48 | connargs['http_auth'] = ((username or ''), (password or '')) 49 | 50 | if parsed.port is not None: 51 | connargs['port'] = parsed.port 52 | 53 | if parsed.path: 54 | connargs['url_prefix'] = parsed.path 55 | 56 | conn = elasticsearch.Elasticsearch( 57 | hosts=[connargs], 58 | connection_class=elasticsearch.Urllib3HttpConnection) 59 | return conn 60 | 61 | @property 62 | def conn(self): 63 | if not hasattr(self, '_connection'): 64 | self._connection = self._connect() 65 | return self._connection 66 | 67 | 68 | class _Model(dict): 69 | """Base class that represents a document type in an ElasticSearch index. 70 | 71 | A child class is expected to define these two attributes: 72 | __type__ -- The name of the document type 73 | __mapping__ -- A mapping of the document's fields 74 | 75 | Mapping: Calling create_all() will create the mapping in the index. 76 | One field, 'id', is treated specially. Its value will not be stored, 77 | but be used as the _id identifier of the document in Elasticsearch. If 78 | an item is indexed without providing an id, the _id is automatically 79 | generated by ES. 80 | 81 | Unmapped fields: Fields that are not defined in the mapping are analyzed 82 | using the 'keyword' analyzer, which practically means no analysis is 83 | performed: searching for these fields will be exact and case sensitive. 84 | To make a field full-text searchable, its mapping should configure it 85 | with 'analyzer':'standard'. 86 | """ 87 | 88 | @classmethod 89 | def create_all(cls): 90 | log.info("Creating index '%s'.", cls.es.index) 91 | conn = cls.es.conn 92 | conn.indices.create(cls.es.index, ignore=400) 93 | mapping = cls.get_mapping() 94 | conn.indices.put_mapping(index=cls.es.index, 95 | doc_type=cls.__type__, 96 | body=mapping) 97 | 98 | @classmethod 99 | def get_mapping(cls): 100 | return { 101 | cls.__type__: { 102 | '_id': { 103 | 'path': 'id', 104 | }, 105 | '_source': { 106 | 'excludes': ['id'], 107 | }, 108 | 'analyzer': 'keyword', 109 | 'properties': cls.__mapping__, 110 | } 111 | } 112 | 113 | @classmethod 114 | def drop_all(cls): 115 | if cls.es.conn.indices.exists(cls.es.index): 116 | cls.es.conn.indices.close(cls.es.index) 117 | cls.es.conn.indices.delete(cls.es.index) 118 | 119 | # It would be lovely if this were called 'get', but the dict semantics 120 | # already define that method name. 121 | @classmethod 122 | def fetch(cls, docid): 123 | doc = cls.es.conn.get(index=cls.es.index, 124 | doc_type=cls.__type__, 125 | ignore=404, 126 | id=docid) 127 | if doc.get('found', True): 128 | return cls(doc['_source'], id=docid) 129 | 130 | @classmethod 131 | def _build_query(cls, query=None, offset=None, limit=None, sort=None, order=None): 132 | if offset is None: 133 | offset = 0 134 | if limit is None: 135 | limit = RESULTS_DEFAULT_SIZE 136 | if query is None: 137 | query = {} 138 | if sort is None: 139 | sort = 'updated' 140 | if order is None: 141 | order = 'desc' 142 | return _build_query(query, offset, limit, sort, order) 143 | 144 | @classmethod 145 | def search(cls, query=None, offset=0, limit=RESULTS_DEFAULT_SIZE, 146 | sort='updated', order='desc', **kwargs): 147 | q = cls._build_query(query=query, offset=offset, limit=limit, 148 | sort=sort, order=order) 149 | if not q: 150 | return [] 151 | return cls.search_raw(q, **kwargs) 152 | 153 | @classmethod 154 | def search_raw(cls, query=None, params=None, raw_result=False): 155 | """Perform a raw Elasticsearch query 156 | 157 | Any ElasticsearchExceptions are to be caught by the caller. 158 | 159 | Keyword arguments: 160 | query -- Query to send to Elasticsearch 161 | params -- Extra keyword arguments to pass to Elasticsearch.search 162 | raw_result -- Return Elasticsearch's response as is 163 | """ 164 | if query is None: 165 | query = {} 166 | if params is None: 167 | params = {} 168 | res = cls.es.conn.search(index=cls.es.index, 169 | doc_type=cls.__type__, 170 | body=query, 171 | **params) 172 | if not raw_result: 173 | docs = res['hits']['hits'] 174 | res = [cls(d['_source'], id=d['_id']) for d in docs] 175 | return res 176 | 177 | @classmethod 178 | def count(cls, **kwargs): 179 | """Like search, but only count the number of matches.""" 180 | kwargs.setdefault('params', {}) 181 | kwargs['params'].update({'search_type': 'count'}) 182 | res = cls.search(raw_result=True, **kwargs) 183 | return res['hits']['total'] 184 | 185 | def save(self, refresh=True): 186 | _add_created(self) 187 | _add_updated(self) 188 | 189 | if 'id' not in self: 190 | op_type = 'create' 191 | else: 192 | op_type = 'index' 193 | 194 | res = self.es.conn.index(index=self.es.index, 195 | doc_type=self.__type__, 196 | body=self, 197 | op_type=op_type, 198 | refresh=refresh) 199 | self['id'] = res['_id'] 200 | 201 | def delete(self): 202 | if 'id' in self: 203 | self.es.conn.delete(index=self.es.index, 204 | doc_type=self.__type__, 205 | id=self['id']) 206 | 207 | 208 | def make_model(es): 209 | return type('Model', (_Model,), {'es': es}) 210 | 211 | 212 | def _build_query(query, offset, limit, sort, order): 213 | # Create a match query for each keyword 214 | match_clauses = [{'match': {k: v}} for k, v in iteritems(query)] 215 | 216 | if len(match_clauses) == 0: 217 | # Elasticsearch considers an empty conjunction to be false.. 218 | match_clauses.append({'match_all': {}}) 219 | 220 | return { 221 | 'sort': [{sort: { 222 | # Sort most recent first 223 | 'order': order, 224 | # While we do always provide a mapping for the field, elasticsearch 225 | # will bomb if there are no documents in the index. Although this 226 | # is an edge case, we don't want the API to return a 500 with an 227 | # empty index, so ignore this sort instruction if the field appears 228 | # unmapped due to an empty index. 229 | 'ignore_unmapped': True, 230 | }}], 231 | 'from': max(0, offset), 232 | 'size': min(RESULTS_MAX_SIZE, max(0, limit)), 233 | 'query': {'bool': {'must': match_clauses}} 234 | } 235 | 236 | 237 | def _add_created(ann): 238 | if 'created' not in ann: 239 | ann['created'] = datetime.datetime.now(iso8601.iso8601.UTC).isoformat() 240 | 241 | 242 | def _add_updated(ann): 243 | ann['updated'] = datetime.datetime.now(iso8601.iso8601.UTC).isoformat() 244 | -------------------------------------------------------------------------------- /annotator/reindexer.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | from elasticsearch import helpers 4 | 5 | from .annotation import Annotation 6 | from .document import Document 7 | 8 | 9 | class Reindexer(object): 10 | 11 | es_models = Annotation, Document 12 | 13 | def __init__(self, conn, interactive=False): 14 | self.conn = conn 15 | self.interactive = interactive 16 | 17 | def _print(self, s): 18 | if self.interactive: 19 | print(s) 20 | 21 | def reindex(self, old_index, new_index): 22 | """Reindex documents using the current mappings.""" 23 | conn = self.conn 24 | 25 | if not conn.indices.exists(old_index): 26 | raise ValueError("Index {0} does not exist!".format(old_index)) 27 | 28 | if conn.indices.exists(new_index): 29 | self._print("Index {0} already exists. " 30 | "The mapping will not be changed.".format(new_index)) 31 | else: 32 | # Create the new index with (presumably) new mapping config 33 | conn.indices.create(new_index, body=self.get_index_config()) 34 | 35 | # Do the actual reindexing. 36 | self._print("Reindexing {0} to {1}...".format(old_index, new_index)) 37 | helpers.reindex(conn, old_index, new_index) 38 | self._print("Reindexing done.") 39 | 40 | def alias(self, index, alias): 41 | conn = self.conn 42 | # Remove the alias's current targets. 43 | is_alias = conn.indices.exists_alias(alias) 44 | if is_alias: 45 | real_index = ','.join(conn.indices.get_alias(alias).keys()) 46 | self._print("Deleting alias {alias}... " 47 | "(was an alias for {real_index})" 48 | .format(alias=alias, real_index=real_index)) 49 | conn.indices.delete_alias(name=alias, index='_all') 50 | 51 | if conn.indices.exists(alias): 52 | raise RuntimeError("Cannot create alias {alias}, " 53 | "name is used by an index." 54 | .format(alias=alias)) 55 | 56 | # Create new alias 57 | self._print("Making alias {alias} point to {index}..." 58 | .format(alias=alias, index=index)) 59 | conn.indices.put_alias(name=alias, index=index) 60 | 61 | def get_index_config(self): 62 | # Configure index mappings 63 | index_config = {'mappings': {}} 64 | for model in self.es_models: 65 | index_config['mappings'].update(model.get_mapping()) 66 | return index_config 67 | -------------------------------------------------------------------------------- /annotator/store.py: -------------------------------------------------------------------------------- 1 | """ 2 | This module implements a Flask-based JSON API to talk with the annotation store via the 3 | Annotation model. 4 | It defines these routes: 5 | * Root 6 | * Index 7 | * Create 8 | * Read 9 | * Update 10 | * Delete 11 | * Search 12 | * Raw ElasticSearch search 13 | See their descriptions in `root`'s definition for more detail. 14 | """ 15 | from __future__ import absolute_import 16 | 17 | import csv 18 | import json 19 | 20 | from elasticsearch.exceptions import TransportError 21 | from flask import Blueprint, Response 22 | from flask import current_app, g 23 | from flask import request 24 | from flask import url_for 25 | from six import iteritems 26 | 27 | from annotator.atoi import atoi 28 | from annotator.annotation import Annotation 29 | from annotator.elasticsearch import RESULTS_MAX_SIZE 30 | 31 | store = Blueprint('store', __name__) 32 | 33 | CREATE_FILTER_FIELDS = ('updated', 'created', 'consumer', 'id') 34 | UPDATE_FILTER_FIELDS = ('updated', 'created', 'user', 'consumer') 35 | 36 | 37 | # We define our own jsonify rather than using flask.jsonify because we wish 38 | # to jsonify arbitrary objects (e.g. index returns a list) rather than kwargs. 39 | def jsonify(obj, *args, **kwargs): 40 | res = json.dumps(obj, indent=None if request.is_xhr else 2) 41 | return Response(res, mimetype='application/json', *args, **kwargs) 42 | 43 | 44 | @store.before_request 45 | def before_request(): 46 | if not hasattr(g, 'annotation_class'): 47 | g.annotation_class = Annotation 48 | 49 | user = g.auth.request_user(request) 50 | if user is not None: 51 | g.user = user 52 | elif not hasattr(g, 'user'): 53 | g.user = None 54 | 55 | 56 | @store.after_request 57 | def after_request(response): 58 | ac = 'Access-Control-' 59 | rh = response.headers 60 | 61 | rh[ac + 'Allow-Origin'] = request.headers.get('origin', '*') 62 | rh[ac + 'Expose-Headers'] = 'Content-Length, Content-Type, Location' 63 | 64 | if request.method == 'OPTIONS': 65 | rh[ac + 'Allow-Headers'] = ('Content-Length, Content-Type, ' 66 | 'X-Annotator-Auth-Token, X-Requested-With') 67 | rh[ac + 'Allow-Methods'] = 'GET, POST, PUT, DELETE, OPTIONS' 68 | rh[ac + 'Max-Age'] = '86400' 69 | 70 | return response 71 | 72 | 73 | # ROOT 74 | @store.route('/') 75 | def root(): 76 | return jsonify({ 77 | 'message': "Annotator Store API", 78 | 'links': { 79 | 'annotation': { 80 | 'create': { 81 | 'method': 'POST', 82 | 'url': url_for('.create_annotation', _external=True), 83 | 'query': { 84 | 'refresh': { 85 | 'type': 'bool', 86 | 'desc': ("Force an index refresh after create " 87 | "(default: true)") 88 | } 89 | }, 90 | 'desc': "Create a new annotation" 91 | }, 92 | 'read': { 93 | 'method': 'GET', 94 | 'url': url_for('.read_annotation', 95 | docid=':id', 96 | _external=True), 97 | 'desc': "Get an existing annotation" 98 | }, 99 | 'update': { 100 | 'method': 'PUT', 101 | 'url': 102 | url_for( 103 | '.update_annotation', 104 | docid=':id', 105 | _external=True), 106 | 'query': { 107 | 'refresh': { 108 | 'type': 'bool', 109 | 'desc': ("Force an index refresh after update " 110 | "(default: true)") 111 | } 112 | }, 113 | 'desc': "Update an existing annotation" 114 | }, 115 | 'delete': { 116 | 'method': 'DELETE', 117 | 'url': url_for('.delete_annotation', 118 | docid=':id', 119 | _external=True), 120 | 'desc': "Delete an annotation" 121 | } 122 | }, 123 | 'search': { 124 | 'method': 'GET', 125 | 'url': url_for('.search_annotations', _external=True), 126 | 'desc': 'Basic search API' 127 | }, 128 | 'search_raw': { 129 | 'method': 'GET/POST', 130 | 'url': url_for('.search_annotations_raw', _external=True), 131 | 'desc': ('Advanced search API -- direct access to ' 132 | 'ElasticSearch. Uses the same API as the ' 133 | 'ElasticSearch query endpoint.') 134 | } 135 | } 136 | }) 137 | 138 | 139 | # INDEX 140 | @store.route('/annotations') 141 | def index(): 142 | if current_app.config.get('AUTHZ_ON'): 143 | # Pass the current user to do permission filtering on results 144 | user = g.user 145 | else: 146 | user = None 147 | 148 | annotations = g.annotation_class.search(user=user) 149 | return jsonify(annotations) 150 | 151 | # CREATE 152 | @store.route('/annotations', methods=['POST']) 153 | def create_annotation(): 154 | # Only registered users can create annotations 155 | if g.user is None: 156 | return _failed_authz_response('create annotation') 157 | 158 | if request.json is not None: 159 | annotation = g.annotation_class( 160 | _filter_input( 161 | request.json, 162 | CREATE_FILTER_FIELDS)) 163 | 164 | annotation['consumer'] = g.user.consumer.key 165 | if _get_annotation_user(annotation) != g.user.id: 166 | annotation['user'] = g.user.id 167 | 168 | if hasattr(g, 'before_annotation_create'): 169 | g.before_annotation_create(annotation) 170 | 171 | if hasattr(g, 'after_annotation_create'): 172 | annotation.save(refresh=False) 173 | g.after_annotation_create(annotation) 174 | 175 | refresh = request.args.get('refresh') != 'false' 176 | annotation.save(refresh=refresh) 177 | 178 | location = url_for('.read_annotation', docid=annotation['id']) 179 | 180 | return jsonify(annotation), 201, {'Location': location} 181 | else: 182 | return jsonify('No JSON payload sent. Annotation not created.', 183 | status=400) 184 | 185 | 186 | # READ 187 | @store.route('/annotations/') 188 | def read_annotation(docid): 189 | annotation = g.annotation_class.fetch(docid) 190 | if not annotation: 191 | return jsonify('Annotation not found!', status=404) 192 | 193 | failure = _check_action(annotation, 'read') 194 | if failure: 195 | return failure 196 | 197 | return jsonify(annotation) 198 | 199 | 200 | # UPDATE 201 | @store.route('/annotations/', methods=['POST', 'PUT']) 202 | def update_annotation(docid): 203 | annotation = g.annotation_class.fetch(docid) 204 | if not annotation: 205 | return jsonify('Annotation not found! No update performed.', 206 | status=404) 207 | 208 | failure = _check_action(annotation, 'update') 209 | if failure: 210 | return failure 211 | 212 | if request.json is not None: 213 | updated = _filter_input(request.json, UPDATE_FILTER_FIELDS) 214 | updated['id'] = docid # use id from URL, regardless of what arrives in 215 | # JSON payload 216 | 217 | changing_permissions = ( 218 | 'permissions' in updated and 219 | updated['permissions'] != annotation.get('permissions', {})) 220 | 221 | if changing_permissions: 222 | failure = _check_action(annotation, 223 | 'admin', 224 | message='permissions update') 225 | if failure: 226 | return failure 227 | 228 | annotation.update(updated) 229 | 230 | if hasattr(g, 'before_annotation_update'): 231 | g.before_annotation_update(annotation) 232 | 233 | refresh = request.args.get('refresh') != 'false' 234 | annotation.save(refresh=refresh) 235 | 236 | if hasattr(g, 'after_annotation_update'): 237 | g.after_annotation_update(annotation) 238 | 239 | return jsonify(annotation) 240 | 241 | 242 | # DELETE 243 | @store.route('/annotations/', methods=['DELETE']) 244 | def delete_annotation(docid): 245 | annotation = g.annotation_class.fetch(docid) 246 | 247 | if not annotation: 248 | return jsonify('Annotation not found. No delete performed.', 249 | status=404) 250 | 251 | failure = _check_action(annotation, 'delete') 252 | if failure: 253 | return failure 254 | 255 | if hasattr(g, 'before_annotation_delete'): 256 | g.before_annotation_delete(annotation) 257 | 258 | annotation.delete() 259 | 260 | if hasattr(g, 'after_annotation_delete'): 261 | g.after_annotation_delete(annotation) 262 | 263 | return '', 204 264 | 265 | 266 | # SEARCH 267 | @store.route('/search') 268 | def search_annotations(): 269 | params = dict(request.args.items()) 270 | kwargs = dict() 271 | 272 | # Take limit and offset out of the parameters 273 | if 'offset' in params: 274 | kwargs['offset'] = atoi(params.pop('offset'), default=None) 275 | if 'limit' in params: 276 | kwargs['limit'] = atoi(params.pop('limit'), default=None) 277 | if 'sort' in params: 278 | kwargs['sort'] = params.pop('sort') 279 | if 'order' in params: 280 | kwargs['order'] = params.pop('order') 281 | 282 | # All remaining parameters are considered searched fields. 283 | kwargs['query'] = params 284 | 285 | if current_app.config.get('AUTHZ_ON'): 286 | # Pass the current user to do permission filtering on results 287 | kwargs['user'] = g.user 288 | 289 | results = g.annotation_class.search(**kwargs) 290 | total = g.annotation_class.count(**kwargs) 291 | 292 | return jsonify({'total': total, 293 | 'rows': results}) 294 | 295 | 296 | # RAW ES SEARCH 297 | @store.route('/search_raw', methods=['GET', 'POST']) 298 | def search_annotations_raw(): 299 | 300 | try: 301 | query, params = _build_query_raw(request) 302 | except ValueError: 303 | return jsonify('Could not parse request payload!', 304 | status=400) 305 | 306 | if current_app.config.get('AUTHZ_ON'): 307 | user = g.user 308 | else: 309 | user = None 310 | 311 | try: 312 | res = g.annotation_class.search_raw(query, params, raw_result=True, 313 | user=user) 314 | except TransportError as err: 315 | if err.status_code is not 'N/A': 316 | status_code = err.status_code 317 | else: 318 | status_code = 500 319 | return jsonify(err.error, 320 | status=status_code) 321 | return jsonify(res, status=res.get('status', 200)) 322 | 323 | 324 | def _filter_input(obj, fields): 325 | for field in fields: 326 | obj.pop(field, None) 327 | 328 | return obj 329 | 330 | 331 | def _get_annotation_user(ann): 332 | """Returns the best guess at this annotation's owner user id""" 333 | user = ann.get('user') 334 | 335 | if not user: 336 | return None 337 | 338 | try: 339 | return user.get('id', None) 340 | except AttributeError: 341 | return user 342 | 343 | 344 | def _check_action(annotation, action, message=''): 345 | if not g.authorize(annotation, action, g.user): 346 | return _failed_authz_response(message) 347 | 348 | 349 | def _failed_authz_response(msg=''): 350 | user = g.user.id if g.user else None 351 | consumer = g.user.consumer.key if g.user else None 352 | 353 | if user: 354 | # If the user is authenticated but not authorized we send a 403. 355 | message = ( 356 | "Cannot authorize request{0}. You aren't authorized to make this " 357 | "request. (user={user}, consumer={consumer})".format( 358 | ' (' + msg + ')' if msg else '', user=user, consumer=consumer)) 359 | return jsonify(message), 403 360 | 361 | else: 362 | # If the user is not authenticated at all we send a 401. 363 | return jsonify("Cannot authorize request{0}. Perhaps you're not logged in " 364 | "as a user with appropriate permissions on this " 365 | "annotation? " 366 | "(user={user}, consumer={consumer})".format( 367 | ' (' + msg + ')' if msg else '', 368 | user=user, 369 | consumer=consumer), 370 | status=401) 371 | 372 | 373 | def _build_query_raw(request): 374 | query = {} 375 | params = {} 376 | 377 | if request.method == 'GET': 378 | for k, v in iteritems(request.args): 379 | _update_query_raw(query, params, k, v) 380 | 381 | if 'query' not in query: 382 | query['query'] = {'match_all': {}} 383 | 384 | elif request.method == 'POST': 385 | 386 | try: 387 | query = json.loads(request.json or 388 | request.data or 389 | request.form.keys()[0]) 390 | except (ValueError, IndexError): 391 | raise ValueError 392 | 393 | params = request.args 394 | 395 | for o in (params, query): 396 | if 'from' in o: 397 | o['from'] = max(0, atoi(o['from'])) 398 | if 'size' in o: 399 | o['size'] = min(RESULTS_MAX_SIZE, max(0, atoi(o['size']))) 400 | 401 | return query, params 402 | 403 | 404 | def _update_query_raw(qo, params, k, v): 405 | if 'query' not in qo: 406 | qo['query'] = {} 407 | q = qo['query'] 408 | 409 | if 'query_string' not in q: 410 | q['query_string'] = {} 411 | qs = q['query_string'] 412 | 413 | if k == 'q': 414 | qs['query'] = v 415 | 416 | elif k == 'df': 417 | qs['default_field'] = v 418 | 419 | elif k in ('explain', 'track_scores', 'from', 'size', 'timeout', 420 | 'lowercase_expanded_terms', 'analyze_wildcard'): 421 | qo[k] = v 422 | 423 | elif k == 'fields': 424 | qo[k] = _csv_split(v) 425 | 426 | elif k == 'sort': 427 | if 'sort' not in qo: 428 | qo[k] = [] 429 | 430 | split = _csv_split(v, ':') 431 | 432 | if len(split) == 1: 433 | qo[k].append(split[0]) 434 | else: 435 | fld = ':'.join(split[0:-1]) 436 | drn = split[-1] 437 | qo[k].append({fld: drn}) 438 | 439 | elif k == 'search_type': 440 | params[k] = v 441 | 442 | 443 | def _csv_split(s, delimiter=','): 444 | return [r for r in csv.reader([s], delimiter=delimiter)][0] 445 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | PAPER = 8 | BUILDDIR = _build 9 | 10 | # Internal variables. 11 | PAPEROPT_a4 = -D latex_paper_size=a4 12 | PAPEROPT_letter = -D latex_paper_size=letter 13 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 14 | # the i18n builder cannot share the environment and doctrees with the others 15 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 16 | 17 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext 18 | 19 | help: 20 | @echo "Please use \`make ' where is one of" 21 | @echo " html to make standalone HTML files" 22 | @echo " dirhtml to make HTML files named index.html in directories" 23 | @echo " singlehtml to make a single large HTML file" 24 | @echo " pickle to make pickle files" 25 | @echo " json to make JSON files" 26 | @echo " htmlhelp to make HTML files and a HTML help project" 27 | @echo " qthelp to make HTML files and a qthelp project" 28 | @echo " devhelp to make HTML files and a Devhelp project" 29 | @echo " epub to make an epub" 30 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" 31 | @echo " latexpdf to make LaTeX files and run them through pdflatex" 32 | @echo " text to make text files" 33 | @echo " man to make manual pages" 34 | @echo " texinfo to make Texinfo files" 35 | @echo " info to make Texinfo files and run them through makeinfo" 36 | @echo " gettext to make PO message catalogs" 37 | @echo " changes to make an overview of all changed/added/deprecated items" 38 | @echo " linkcheck to check all external links for integrity" 39 | @echo " doctest to run all doctests embedded in the documentation (if enabled)" 40 | 41 | clean: 42 | -rm -rf $(BUILDDIR)/* 43 | 44 | html: 45 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 46 | @echo 47 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 48 | 49 | dirhtml: 50 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml 51 | @echo 52 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." 53 | 54 | singlehtml: 55 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml 56 | @echo 57 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." 58 | 59 | pickle: 60 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle 61 | @echo 62 | @echo "Build finished; now you can process the pickle files." 63 | 64 | json: 65 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json 66 | @echo 67 | @echo "Build finished; now you can process the JSON files." 68 | 69 | htmlhelp: 70 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp 71 | @echo 72 | @echo "Build finished; now you can run HTML Help Workshop with the" \ 73 | ".hhp project file in $(BUILDDIR)/htmlhelp." 74 | 75 | qthelp: 76 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp 77 | @echo 78 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \ 79 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:" 80 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/Annotator.qhcp" 81 | @echo "To view the help file:" 82 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/Annotator.qhc" 83 | 84 | devhelp: 85 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp 86 | @echo 87 | @echo "Build finished." 88 | @echo "To view the help file:" 89 | @echo "# mkdir -p $$HOME/.local/share/devhelp/Annotator" 90 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/Annotator" 91 | @echo "# devhelp" 92 | 93 | epub: 94 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub 95 | @echo 96 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub." 97 | 98 | latex: 99 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 100 | @echo 101 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." 102 | @echo "Run \`make' in that directory to run these through (pdf)latex" \ 103 | "(use \`make latexpdf' here to do that automatically)." 104 | 105 | latexpdf: 106 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 107 | @echo "Running LaTeX files through pdflatex..." 108 | $(MAKE) -C $(BUILDDIR)/latex all-pdf 109 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 110 | 111 | text: 112 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text 113 | @echo 114 | @echo "Build finished. The text files are in $(BUILDDIR)/text." 115 | 116 | man: 117 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man 118 | @echo 119 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man." 120 | 121 | texinfo: 122 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 123 | @echo 124 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." 125 | @echo "Run \`make' in that directory to run these through makeinfo" \ 126 | "(use \`make info' here to do that automatically)." 127 | 128 | info: 129 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 130 | @echo "Running Texinfo files through makeinfo..." 131 | make -C $(BUILDDIR)/texinfo info 132 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." 133 | 134 | gettext: 135 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale 136 | @echo 137 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." 138 | 139 | changes: 140 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes 141 | @echo 142 | @echo "The overview file is in $(BUILDDIR)/changes." 143 | 144 | linkcheck: 145 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck 146 | @echo 147 | @echo "Link check complete; look for any errors in the above output " \ 148 | "or in $(BUILDDIR)/linkcheck/output.txt." 149 | 150 | doctest: 151 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest 152 | @echo "Testing of doctests in the sources finished, look at the " \ 153 | "results in $(BUILDDIR)/doctest/output.txt." 154 | -------------------------------------------------------------------------------- /docs/api/auth.rst: -------------------------------------------------------------------------------- 1 | .. _auth_module: 2 | 3 | :mod:`annotator.auth` 4 | --------------------- 5 | 6 | .. automodule:: annotator.auth 7 | 8 | .. autofunction:: encode_token 9 | .. autofunction:: decode_token 10 | 11 | .. autoclass:: Authenticator 12 | 13 | .. automethod:: request_user 14 | 15 | -------------------------------------------------------------------------------- /docs/api/authz.rst: -------------------------------------------------------------------------------- 1 | .. _authz_module: 2 | 3 | :mod:`annotator.authz` 4 | ---------------------- 5 | 6 | .. automodule:: annotator.authz 7 | 8 | .. autofunction:: authorize 9 | 10 | -------------------------------------------------------------------------------- /docs/changes.rst: -------------------------------------------------------------------------------- 1 | .. _changelog: 2 | 3 | Annotator Change History 4 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 5 | 6 | .. include:: ../CHANGES.txt 7 | 8 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Annotator documentation build configuration file, created by 4 | # sphinx-quickstart on Thu Feb 27 00:10:13 2014. 5 | # 6 | # This file is execfile()d with the current directory set to its containing dir. 7 | # 8 | # Note that not all possible configuration values are present in this 9 | # autogenerated file. 10 | # 11 | # All configuration values have a default; values that are commented out 12 | # serve to show the default. 13 | 14 | import sys, os, pkg_resources 15 | 16 | # If extensions (or modules to document with autodoc) are in another directory, 17 | # add these directories to sys.path here. If the directory is relative to the 18 | # documentation root, use os.path.abspath to make it absolute, like shown here. 19 | #sys.path.insert(0, os.path.abspath('.')) 20 | 21 | # -- General configuration ----------------------------------------------------- 22 | 23 | # If your documentation needs a minimal Sphinx version, state it here. 24 | #needs_sphinx = '1.0' 25 | 26 | # Add any Sphinx extension module names here, as strings. They can be extensions 27 | # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. 28 | extensions = ['sphinx.ext.autodoc', 'sphinx.ext.intersphinx', 'sphinx.ext.todo', 'sphinx.ext.coverage', 'sphinx.ext.viewcode'] 29 | 30 | # Add any paths that contain templates here, relative to this directory. 31 | templates_path = ['_templates'] 32 | 33 | # The suffix of source filenames. 34 | source_suffix = '.rst' 35 | 36 | # The encoding of source files. 37 | #source_encoding = 'utf-8-sig' 38 | 39 | # The master toctree document. 40 | master_doc = 'index' 41 | 42 | # General information about the project. 43 | project = u'Annotator' 44 | copyright = u'2014, Open Knowledge Foundation and contributors' 45 | 46 | # The version info for the project you're documenting, acts as replacement for 47 | # |version| and |release|, also used in various other places throughout the 48 | # built documents. 49 | # 50 | # The short X.Y version. 51 | version = pkg_resources.get_distribution('annotator').version 52 | 53 | # The full version, including alpha/beta/rc tags. 54 | release = version 55 | 56 | # The language for content autogenerated by Sphinx. Refer to documentation 57 | # for a list of supported languages. 58 | #language = None 59 | 60 | # There are two options for replacing |today|: either, you set today to some 61 | # non-false value, then it is used: 62 | #today = '' 63 | # Else, today_fmt is used as the format for a strftime call. 64 | #today_fmt = '%B %d, %Y' 65 | 66 | # List of patterns, relative to source directory, that match files and 67 | # directories to ignore when looking for source files. 68 | exclude_patterns = ['_build'] 69 | 70 | # The reST default role (used for this markup: `text`) to use for all documents. 71 | #default_role = None 72 | 73 | # If true, '()' will be appended to :func: etc. cross-reference text. 74 | #add_function_parentheses = True 75 | 76 | # If true, the current module name will be prepended to all description 77 | # unit titles (such as .. function::). 78 | #add_module_names = True 79 | 80 | # If true, sectionauthor and moduleauthor directives will be shown in the 81 | # output. They are ignored by default. 82 | #show_authors = False 83 | 84 | # The name of the Pygments (syntax highlighting) style to use. 85 | pygments_style = 'sphinx' 86 | 87 | # A list of ignored prefixes for module index sorting. 88 | #modindex_common_prefix = [] 89 | 90 | 91 | # -- Options for HTML output --------------------------------------------------- 92 | 93 | # The theme to use for HTML and HTML Help pages. See the documentation for 94 | # a list of builtin themes. 95 | html_theme = 'default' 96 | 97 | # Theme options are theme-specific and customize the look and feel of a theme 98 | # further. For a list of options available for each theme, see the 99 | # documentation. 100 | #html_theme_options = {} 101 | 102 | # Add any paths that contain custom themes here, relative to this directory. 103 | #html_theme_path = [] 104 | 105 | # The name for this set of Sphinx documents. If None, it defaults to 106 | # " v documentation". 107 | #html_title = None 108 | 109 | # A shorter title for the navigation bar. Default is the same as html_title. 110 | #html_short_title = None 111 | 112 | # The name of an image file (relative to this directory) to place at the top 113 | # of the sidebar. 114 | #html_logo = None 115 | 116 | # The name of an image file (within the static path) to use as favicon of the 117 | # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 118 | # pixels large. 119 | #html_favicon = None 120 | 121 | # Add any paths that contain custom static files (such as style sheets) here, 122 | # relative to this directory. They are copied after the builtin static files, 123 | # so a file named "default.css" will overwrite the builtin "default.css". 124 | html_static_path = ['_static'] 125 | 126 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, 127 | # using the given strftime format. 128 | #html_last_updated_fmt = '%b %d, %Y' 129 | 130 | # If true, SmartyPants will be used to convert quotes and dashes to 131 | # typographically correct entities. 132 | #html_use_smartypants = True 133 | 134 | # Custom sidebar templates, maps document names to template names. 135 | #html_sidebars = {} 136 | 137 | # Additional templates that should be rendered to pages, maps page names to 138 | # template names. 139 | #html_additional_pages = {} 140 | 141 | # If false, no module index is generated. 142 | #html_domain_indices = True 143 | 144 | # If false, no index is generated. 145 | #html_use_index = True 146 | 147 | # If true, the index is split into individual pages for each letter. 148 | #html_split_index = False 149 | 150 | # If true, links to the reST sources are added to the pages. 151 | #html_show_sourcelink = True 152 | 153 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. 154 | #html_show_sphinx = True 155 | 156 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. 157 | #html_show_copyright = True 158 | 159 | # If true, an OpenSearch description file will be output, and all pages will 160 | # contain a tag referring to it. The value of this option must be the 161 | # base URL from which the finished HTML is served. 162 | #html_use_opensearch = '' 163 | 164 | # This is the file name suffix for HTML files (e.g. ".xhtml"). 165 | #html_file_suffix = None 166 | 167 | # Output file base name for HTML help builder. 168 | htmlhelp_basename = 'Annotatordoc' 169 | 170 | 171 | # -- Options for LaTeX output -------------------------------------------------- 172 | 173 | latex_elements = { 174 | # The paper size ('letterpaper' or 'a4paper'). 175 | #'papersize': 'letterpaper', 176 | 177 | # The font size ('10pt', '11pt' or '12pt'). 178 | #'pointsize': '10pt', 179 | 180 | # Additional stuff for the LaTeX preamble. 181 | #'preamble': '', 182 | } 183 | 184 | # Grouping the document tree into LaTeX files. List of tuples 185 | # (source start file, target name, title, author, documentclass [howto/manual]). 186 | latex_documents = [ 187 | ('index', 'Annotator.tex', u'Annotator Documentation', 188 | u'Open Knowledge Foundation and contributors', 'manual'), 189 | ] 190 | 191 | # The name of an image file (relative to this directory) to place at the top of 192 | # the title page. 193 | #latex_logo = None 194 | 195 | # For "manual" documents, if this is true, then toplevel headings are parts, 196 | # not chapters. 197 | #latex_use_parts = False 198 | 199 | # If true, show page references after internal links. 200 | #latex_show_pagerefs = False 201 | 202 | # If true, show URL addresses after external links. 203 | #latex_show_urls = False 204 | 205 | # Documents to append as an appendix to all manuals. 206 | #latex_appendices = [] 207 | 208 | # If false, no module index is generated. 209 | #latex_domain_indices = True 210 | 211 | 212 | # -- Options for manual page output -------------------------------------------- 213 | 214 | # One entry per manual page. List of tuples 215 | # (source start file, name, description, authors, manual section). 216 | man_pages = [ 217 | ('index', 'annotator', u'Annotator Documentation', 218 | [u'Open Knowledge Foundation and contributors'], 1) 219 | ] 220 | 221 | # If true, show URL addresses after external links. 222 | #man_show_urls = False 223 | 224 | 225 | # -- Options for Texinfo output ------------------------------------------------ 226 | 227 | # Grouping the document tree into Texinfo files. List of tuples 228 | # (source start file, target name, title, author, 229 | # dir menu entry, description, category) 230 | texinfo_documents = [ 231 | ('index', 'Annotator', u'Annotator Documentation', 232 | u'Open Knowledge Foundation and contributors', 'Annotator', 'One line description of project.', 233 | 'Miscellaneous'), 234 | ] 235 | 236 | # Documents to append as an appendix to all manuals. 237 | #texinfo_appendices = [] 238 | 239 | # If false, no module index is generated. 240 | #texinfo_domain_indices = True 241 | 242 | # How to display URL addresses: 'footnote', 'no', or 'inline'. 243 | #texinfo_show_urls = 'footnote' 244 | 245 | 246 | # Example configuration for intersphinx: refer to the Python standard library. 247 | intersphinx_mapping = {'http://docs.python.org/': None} 248 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. _index: 2 | 3 | ===================================== 4 | Annotator: Storage and Search Service 5 | ===================================== 6 | 7 | Annotator is a reference implementation of the storage API for the Annotator 8 | JavaScript library. 9 | 10 | Getting Started 11 | =============== 12 | 13 | Todo 14 | 15 | API Documentation 16 | ================= 17 | 18 | .. toctree:: 19 | :maxdepth: 2 20 | :glob: 21 | 22 | api/* 23 | 24 | Change History 25 | ============== 26 | 27 | .. toctree:: 28 | :maxdepth: 1 29 | 30 | changes 31 | 32 | Indices and tables 33 | ================== 34 | 35 | * :ref:`genindex` 36 | * :ref:`modindex` 37 | * :ref:`search` 38 | 39 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | REM Command file for Sphinx documentation 4 | 5 | if "%SPHINXBUILD%" == "" ( 6 | set SPHINXBUILD=sphinx-build 7 | ) 8 | set BUILDDIR=_build 9 | set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% . 10 | set I18NSPHINXOPTS=%SPHINXOPTS% . 11 | if NOT "%PAPER%" == "" ( 12 | set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS% 13 | set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS% 14 | ) 15 | 16 | if "%1" == "" goto help 17 | 18 | if "%1" == "help" ( 19 | :help 20 | echo.Please use `make ^` where ^ is one of 21 | echo. html to make standalone HTML files 22 | echo. dirhtml to make HTML files named index.html in directories 23 | echo. singlehtml to make a single large HTML file 24 | echo. pickle to make pickle files 25 | echo. json to make JSON files 26 | echo. htmlhelp to make HTML files and a HTML help project 27 | echo. qthelp to make HTML files and a qthelp project 28 | echo. devhelp to make HTML files and a Devhelp project 29 | echo. epub to make an epub 30 | echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter 31 | echo. text to make text files 32 | echo. man to make manual pages 33 | echo. texinfo to make Texinfo files 34 | echo. gettext to make PO message catalogs 35 | echo. changes to make an overview over all changed/added/deprecated items 36 | echo. linkcheck to check all external links for integrity 37 | echo. doctest to run all doctests embedded in the documentation if enabled 38 | goto end 39 | ) 40 | 41 | if "%1" == "clean" ( 42 | for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i 43 | del /q /s %BUILDDIR%\* 44 | goto end 45 | ) 46 | 47 | if "%1" == "html" ( 48 | %SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html 49 | if errorlevel 1 exit /b 1 50 | echo. 51 | echo.Build finished. The HTML pages are in %BUILDDIR%/html. 52 | goto end 53 | ) 54 | 55 | if "%1" == "dirhtml" ( 56 | %SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml 57 | if errorlevel 1 exit /b 1 58 | echo. 59 | echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml. 60 | goto end 61 | ) 62 | 63 | if "%1" == "singlehtml" ( 64 | %SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml 65 | if errorlevel 1 exit /b 1 66 | echo. 67 | echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml. 68 | goto end 69 | ) 70 | 71 | if "%1" == "pickle" ( 72 | %SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle 73 | if errorlevel 1 exit /b 1 74 | echo. 75 | echo.Build finished; now you can process the pickle files. 76 | goto end 77 | ) 78 | 79 | if "%1" == "json" ( 80 | %SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json 81 | if errorlevel 1 exit /b 1 82 | echo. 83 | echo.Build finished; now you can process the JSON files. 84 | goto end 85 | ) 86 | 87 | if "%1" == "htmlhelp" ( 88 | %SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp 89 | if errorlevel 1 exit /b 1 90 | echo. 91 | echo.Build finished; now you can run HTML Help Workshop with the ^ 92 | .hhp project file in %BUILDDIR%/htmlhelp. 93 | goto end 94 | ) 95 | 96 | if "%1" == "qthelp" ( 97 | %SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp 98 | if errorlevel 1 exit /b 1 99 | echo. 100 | echo.Build finished; now you can run "qcollectiongenerator" with the ^ 101 | .qhcp project file in %BUILDDIR%/qthelp, like this: 102 | echo.^> qcollectiongenerator %BUILDDIR%\qthelp\Annotator.qhcp 103 | echo.To view the help file: 104 | echo.^> assistant -collectionFile %BUILDDIR%\qthelp\Annotator.ghc 105 | goto end 106 | ) 107 | 108 | if "%1" == "devhelp" ( 109 | %SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp 110 | if errorlevel 1 exit /b 1 111 | echo. 112 | echo.Build finished. 113 | goto end 114 | ) 115 | 116 | if "%1" == "epub" ( 117 | %SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub 118 | if errorlevel 1 exit /b 1 119 | echo. 120 | echo.Build finished. The epub file is in %BUILDDIR%/epub. 121 | goto end 122 | ) 123 | 124 | if "%1" == "latex" ( 125 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex 126 | if errorlevel 1 exit /b 1 127 | echo. 128 | echo.Build finished; the LaTeX files are in %BUILDDIR%/latex. 129 | goto end 130 | ) 131 | 132 | if "%1" == "text" ( 133 | %SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text 134 | if errorlevel 1 exit /b 1 135 | echo. 136 | echo.Build finished. The text files are in %BUILDDIR%/text. 137 | goto end 138 | ) 139 | 140 | if "%1" == "man" ( 141 | %SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man 142 | if errorlevel 1 exit /b 1 143 | echo. 144 | echo.Build finished. The manual pages are in %BUILDDIR%/man. 145 | goto end 146 | ) 147 | 148 | if "%1" == "texinfo" ( 149 | %SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo 150 | if errorlevel 1 exit /b 1 151 | echo. 152 | echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo. 153 | goto end 154 | ) 155 | 156 | if "%1" == "gettext" ( 157 | %SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale 158 | if errorlevel 1 exit /b 1 159 | echo. 160 | echo.Build finished. The message catalogs are in %BUILDDIR%/locale. 161 | goto end 162 | ) 163 | 164 | if "%1" == "changes" ( 165 | %SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes 166 | if errorlevel 1 exit /b 1 167 | echo. 168 | echo.The overview file is in %BUILDDIR%/changes. 169 | goto end 170 | ) 171 | 172 | if "%1" == "linkcheck" ( 173 | %SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck 174 | if errorlevel 1 exit /b 1 175 | echo. 176 | echo.Link check complete; look for any errors in the above output ^ 177 | or in %BUILDDIR%/linkcheck/output.txt. 178 | goto end 179 | ) 180 | 181 | if "%1" == "doctest" ( 182 | %SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest 183 | if errorlevel 1 exit /b 1 184 | echo. 185 | echo.Testing of doctests in the sources finished, look at the ^ 186 | results in %BUILDDIR%/doctest/output.txt. 187 | goto end 188 | ) 189 | 190 | :end 191 | -------------------------------------------------------------------------------- /reindex.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import argparse 3 | 4 | from elasticsearch import Elasticsearch 5 | 6 | from annotator.reindexer import Reindexer 7 | 8 | description = """ 9 | Reindex an elasticsearch index. 10 | 11 | WARNING: Documents that are created while reindexing may be lost! 12 | """ 13 | 14 | 15 | def main(): 16 | argparser = argparse.ArgumentParser(description=description) 17 | argparser.add_argument('old_index', help="Index to read from") 18 | argparser.add_argument('new_index', help="Index to write to") 19 | argparser.add_argument('--host', help="Elasticsearch server, host[:port]") 20 | argparser.add_argument('--alias', help="Alias for the new index") 21 | args = argparser.parse_args() 22 | 23 | host = args.host 24 | old_index = args.old_index 25 | new_index = args.new_index 26 | alias = args.alias 27 | 28 | if host: 29 | conn = Elasticsearch([host]) 30 | else: 31 | conn = Elasticsearch() 32 | 33 | 34 | reindexer = Reindexer(conn, interactive=True) 35 | 36 | reindexer.reindex(old_index, new_index) 37 | 38 | if alias: 39 | reindexer.alias(new_index, alias) 40 | 41 | if __name__ == '__main__': 42 | main() 43 | -------------------------------------------------------------------------------- /run.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | run.py: A simple example app for using the Annotator Store blueprint 4 | 5 | This file creates and runs a Flask[1] application which mounts the Annotator 6 | Store blueprint at its root. It demonstrates how the major components of the 7 | Annotator Store (namely the 'store' blueprint, the annotation model and the 8 | auth and authz helper modules) fit together, but it is emphatically NOT 9 | INTENDED FOR PRODUCTION USE. 10 | 11 | [1]: http://flask.pocoo.org 12 | """ 13 | 14 | from __future__ import print_function 15 | 16 | import os 17 | import logging 18 | import sys 19 | import time 20 | 21 | from flask import Flask, g, current_app 22 | import elasticsearch 23 | from annotator import es, annotation, auth, authz, document, store 24 | from tests.helpers import MockUser, MockConsumer, MockAuthenticator 25 | from tests.helpers import mock_authorizer 26 | 27 | logging.basicConfig(format='%(asctime)s %(process)d %(name)s [%(levelname)s] ' 28 | '%(message)s', 29 | datefmt='%Y-%m-%d %H:%M:%S', 30 | level=logging.INFO) 31 | logging.getLogger('elasticsearch').setLevel(logging.WARN) 32 | logging.getLogger('urllib3').setLevel(logging.WARN) 33 | log = logging.getLogger('annotator') 34 | 35 | here = os.path.dirname(__file__) 36 | 37 | 38 | def main(argv): 39 | app = Flask(__name__) 40 | 41 | cfg_file = 'annotator.cfg' 42 | if len(argv) == 2: 43 | cfg_file = argv[1] 44 | 45 | cfg_path = os.path.join(here, cfg_file) 46 | 47 | try: 48 | app.config.from_pyfile(cfg_path) 49 | except IOError: 50 | print("Could not find config file %s" % cfg_path, file=sys.stderr) 51 | print("Perhaps copy annotator.cfg.example to annotator.cfg", 52 | file=sys.stderr) 53 | sys.exit(1) 54 | 55 | if app.config.get('ELASTICSEARCH_HOST') is not None: 56 | es.host = app.config['ELASTICSEARCH_HOST'] 57 | 58 | # We do need to set this one (the other settings have fine defaults) 59 | default_index = app.name 60 | es.index = app.config.get('ELASTICSEARCH_INDEX', default_index) 61 | 62 | if app.config.get('AUTHZ_ON') is not None: 63 | es.authorization_enabled = app.config['AUTHZ_ON'] 64 | 65 | with app.test_request_context(): 66 | try: 67 | annotation.Annotation.create_all() 68 | document.Document.create_all() 69 | except elasticsearch.exceptions.RequestError as e: 70 | if e.error.startswith('MergeMappingException'): 71 | date = time.strftime('%Y-%m-%d') 72 | log.fatal("Elasticsearch index mapping is incorrect! Please " 73 | "reindex it. You can use reindex.py for this, e.g. " 74 | "python reindex.py --host %s %s %s-%s", 75 | es.host, 76 | es.index, 77 | es.index, 78 | date) 79 | raise 80 | 81 | @app.before_request 82 | def before_request(): 83 | # In a real app, the current user and consumer would be determined by 84 | # a lookup in either the session or the request headers, as described 85 | # in the Annotator authentication documentation[1]. 86 | # 87 | # [1]: https://github.com/okfn/annotator/wiki/Authentication 88 | g.user = MockUser('alice') 89 | 90 | # By default, this test application won't do full-on authentication 91 | # tests. Set AUTH_ON to True in the config file to enable (limited) 92 | # authentication testing. 93 | if current_app.config['AUTH_ON']: 94 | g.auth = auth.Authenticator(lambda x: MockConsumer('annotateit')) 95 | else: 96 | g.auth = MockAuthenticator() 97 | 98 | # Similarly, this test application won't prevent you from modifying 99 | # annotations you don't own, deleting annotations you're disallowed 100 | # from deleting, etc. Set AUTHZ_ON to True in the config file to 101 | # enable authorization testing. 102 | if current_app.config['AUTHZ_ON']: 103 | g.authorize = authz.authorize 104 | else: 105 | g.authorize = mock_authorizer 106 | 107 | app.register_blueprint(store.store) 108 | 109 | host = os.environ.get('HOST', '127.0.0.1') 110 | port = int(os.environ.get('PORT', 5000)) 111 | app.run(host=host, port=port) 112 | 113 | if __name__ == '__main__': 114 | main(sys.argv) 115 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [bdist_wheel] 2 | universal = 1 3 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | import os 3 | 4 | requires = [ 5 | 'elasticsearch>=1.0,<2', 6 | 'PyJWT>=0.1.4', 7 | 'iso8601>=0.1.4', 8 | 'six', 9 | ] 10 | 11 | def read(*paths): 12 | """Build a file path from *paths* and return the contents.""" 13 | with open(os.path.join(*paths), 'r') as f: 14 | return f.read() 15 | 16 | setup( 17 | name = 'annotator', 18 | version = '0.14.2', 19 | packages = find_packages(exclude=['test*']), 20 | 21 | install_requires = requires, 22 | extras_require = { 23 | 'docs': ['Sphinx'], 24 | 'testing': ['Flask>=0.9,<2', 'mock', 'nose', 'coverage'], 25 | 'flask': ['Flask>=0.9,<2'], 26 | }, 27 | 28 | # metadata for upload to PyPI 29 | author = 'Rufus Pollock and Nick Stenning (Open Knowledge Foundation)', 30 | author_email = 'annotator@okfn.org', 31 | description = 'Database backend for Annotator (http://annotatorjs.org)', 32 | long_description = (read('README.rst') + '\n\n' + 33 | read('CHANGES.rst')), 34 | license = 'MIT', 35 | keywords = 'annotation web javascript', 36 | 37 | url = 'http://annotatorjs.org/', 38 | download_url = 'https://github.com/openannotation/annotator-store', 39 | 40 | classifiers = [ 41 | 'Development Status :: 4 - Beta', 42 | 'Environment :: Console', 43 | 'Intended Audience :: Developers', 44 | 'License :: OSI Approved :: MIT License', 45 | 'Operating System :: OS Independent', 46 | 'Programming Language :: Python' 47 | ], 48 | ) 49 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | from flask import Flask, g, request 3 | 4 | from annotator import es, auth, authz, annotation, store, document 5 | 6 | from .helpers import MockUser, MockConsumer 7 | 8 | here = os.path.dirname(__file__) 9 | 10 | 11 | def create_app(): 12 | app = Flask(__name__) 13 | app.config.from_pyfile(os.path.join(here, 'test.cfg')) 14 | 15 | es.host = app.config['ELASTICSEARCH_HOST'] 16 | es.index = app.config['ELASTICSEARCH_INDEX'] 17 | es.authorization_enabled = app.config['AUTHZ_ON'] 18 | 19 | @app.before_request 20 | def before_request(): 21 | g.auth = auth.Authenticator(MockConsumer) 22 | g.authorize = authz.authorize 23 | 24 | app.register_blueprint(store.store, url_prefix='/api') 25 | 26 | return app 27 | 28 | 29 | class TestCase(object): 30 | @classmethod 31 | def setup_class(cls): 32 | cls.app = create_app() 33 | annotation.Annotation.drop_all() 34 | document.Document.drop_all() 35 | 36 | def setup(self): 37 | annotation.Annotation.create_all() 38 | document.Document.create_all() 39 | es.conn.cluster.health(wait_for_status='yellow') 40 | self.cli = self.app.test_client() 41 | 42 | def teardown(self): 43 | annotation.Annotation.drop_all() 44 | document.Document.drop_all() 45 | -------------------------------------------------------------------------------- /tests/helpers.py: -------------------------------------------------------------------------------- 1 | class MockConsumer(object): 2 | def __init__(self, key='mockconsumer'): 3 | self.key = key 4 | self.secret = 'top-secret' 5 | self.ttl = 86400 6 | 7 | class MockUser(object): 8 | def __init__(self, userid='alice', consumer=None): 9 | self.id = userid 10 | self.consumer = MockConsumer(consumer if consumer is not None else 'mockconsumer') 11 | self.is_admin = False 12 | 13 | 14 | class MockAuthenticator(object): 15 | def request_user(self, request): 16 | return MockUser() 17 | 18 | def mock_authorizer(*args, **kwargs): 19 | return True 20 | -------------------------------------------------------------------------------- /tests/test.cfg: -------------------------------------------------------------------------------- 1 | DEBUG = True 2 | TESTING = True 3 | 4 | ELASTICSEARCH_HOST = 'http://127.0.0.1:9200' 5 | ELASTICSEARCH_INDEX = 'annotator_test' 6 | 7 | SECRET_KEY = 'test-random-secret-key' 8 | 9 | AUTH_ON = True 10 | AUTHZ_ON = True 11 | -------------------------------------------------------------------------------- /tests/test_annotation.py: -------------------------------------------------------------------------------- 1 | from nose.tools import * 2 | from mock import MagicMock 3 | from . import TestCase, helpers as h 4 | 5 | from annotator.annotation import Annotation 6 | 7 | uri1 = u'http://xyz.com' 8 | uri2 = u'urn:uuid:xxxxx' 9 | user1 = u'levin' 10 | user2 = u'anna' 11 | date1 = '2015-02-02T15:00' 12 | date2 = '2015-02-04T16:12' 13 | date3 = '2015-01-20T14:43' 14 | 15 | 16 | class TestAnnotation(TestCase): 17 | def setup(self): 18 | super(TestAnnotation, self).setup() 19 | 20 | def teardown(self): 21 | super(TestAnnotation, self).teardown() 22 | 23 | def test_new(self): 24 | a = Annotation() 25 | assert_equal('{}', repr(a)) 26 | 27 | def test_save_refresh(self): 28 | a = Annotation(name='bob') 29 | c = a.es.conn 30 | a.save(refresh=True) 31 | assert_true('id' in a) 32 | 33 | def test_save_assert_refresh(self): 34 | a = Annotation(name='bob') 35 | a.es = MagicMock() 36 | a.es.index = 'foo' 37 | a.save() 38 | args, kwargs = a.es.conn.index.call_args 39 | assert_equal(kwargs['refresh'], True) 40 | 41 | def test_save_refresh_disable(self): 42 | a = Annotation(name='bob') 43 | a.es = MagicMock() 44 | a.es.index = 'foo' 45 | a.save(refresh=False) 46 | args, kwargs = a.es.conn.index.call_args 47 | assert_equal(kwargs['refresh'], False) 48 | 49 | def test_fetch(self): 50 | a = Annotation(foo='bar') 51 | a.save() 52 | b = Annotation.fetch(a['id']) 53 | assert_equal(b['foo'], 'bar') 54 | 55 | def test_delete(self): 56 | ann = Annotation(id=1) 57 | ann.save() 58 | 59 | newann = Annotation.fetch(1) 60 | newann.delete() 61 | 62 | noann = Annotation.fetch(1) 63 | assert_true(noann == None) 64 | 65 | def test_basics(self): 66 | user = "alice" 67 | ann = Annotation(text="Hello there", user=user) 68 | ann['ranges'] = [] 69 | ann['ranges'].append({'startOffset': 3}) 70 | ann['ranges'].append({'startOffset': 5}) 71 | ann['document'] = { 72 | 'title': 'Annotation for Dummies', 73 | 'link': [ 74 | {'href': 'http://example.com/1234', 'type': 'application/pdf'} 75 | ] 76 | } 77 | ann.save() 78 | 79 | ann = Annotation.fetch(ann['id']) 80 | assert_equal(ann['text'], "Hello there") 81 | assert_equal(ann['user'], "alice") 82 | assert_equal(len(ann['ranges']), 2) 83 | assert_equal(ann['document']['title'], 'Annotation for Dummies') 84 | assert_equal(ann['document']['link'][0]['href'], 'http://example.com/1234') 85 | assert_equal(ann['document']['link'][0]['type'], 'application/pdf') 86 | 87 | def _create_annotations_for_search(self): 88 | perms = {'read': ['group:__world__']} 89 | anno1 = Annotation(uri=uri1, text=uri1, user=user1, permissions=perms, 90 | created=date1) 91 | anno2 = Annotation(uri=uri1, text=uri1 + uri1, user=user2, permissions=perms, 92 | created=date2) 93 | anno3 = Annotation(uri=uri2, text=uri2, user=user1, permissions=perms, 94 | created=date3) 95 | anno1.save() 96 | anno2.save() 97 | anno3.save() 98 | return [anno1, anno2, anno3] 99 | 100 | def test_search(self): 101 | annotations = self._create_annotations_for_search() 102 | 103 | res = Annotation.search() 104 | assert_equal(len(res), 3) 105 | 106 | res = Annotation.count() 107 | assert_equal(res, 3) 108 | 109 | res = Annotation.search(query={'uri': uri1}) 110 | assert_equal(len(res), 2) 111 | assert_equal(res[0]['uri'], uri1) 112 | assert_equal(res[0]['id'], annotations[1]['id']) 113 | 114 | res = Annotation.search(query={'user': user1}) 115 | assert_equal(len(res), 2) 116 | assert_equal(res[0]['user'], user1) 117 | assert_equal(res[0]['id'], annotations[2]['id']) 118 | 119 | res = Annotation.search(query={'user': user1, 'uri':uri2}) 120 | assert_equal(len(res), 1) 121 | assert_equal(res[0]['user'], user1) 122 | assert_equal(res[0]['id'], annotations[2]['id']) 123 | 124 | res = Annotation.count(query={'user': user1, 'uri':uri2}) 125 | assert_equal(res, 1) 126 | 127 | def test_search_ordering(self): 128 | self._create_annotations_for_search() 129 | 130 | res = Annotation.search() 131 | # ordering (default: most recent first) 132 | assert_equal(res[0]['text'], uri2) 133 | 134 | res = Annotation.search(order='asc') 135 | assert_equal(res[0]['text'], uri1) 136 | 137 | res = Annotation.search(sort='user') 138 | assert_equal(res[0]['user'], user1) 139 | 140 | res = Annotation.search(sort='user', order='asc') 141 | assert_equal(res[0]['user'], user2) 142 | 143 | res = Annotation.search(limit=1) 144 | assert_equal(len(res), 1) 145 | 146 | res = Annotation.count(limit=1) 147 | assert_equal(res, 3) 148 | 149 | def test_search_before_and_after(self): 150 | self._create_annotations_for_search() 151 | 152 | res = Annotation.search(query={'after': '2015-02-02'}) 153 | assert_equal(len(res), 2) 154 | assert_equal(res[0]['created'], date2) 155 | assert_equal(res[1]['created'], date1) 156 | 157 | res = Annotation.count(query={'after': '2015-02-02', 'uri': uri1}) 158 | assert_equal(res, 2) 159 | 160 | res = Annotation.search(query={'after': '2015-01-23', 'before': '2015-02-03'}) 161 | assert_equal(len(res), 1) 162 | assert_equal(res[0]['created'], date1) 163 | 164 | res = Annotation.search(query={'before': '2015-02-02'}) 165 | assert_equal(len(res), 1) 166 | assert_equal(res[0]['created'], date3) 167 | 168 | def test_search_permissions_null(self): 169 | anno = Annotation(text='Foobar') 170 | anno.save() 171 | 172 | res = Annotation.search() 173 | assert_equal(len(res), 0) 174 | 175 | user = h.MockUser('bob') 176 | res = Annotation.search(user=user) 177 | assert_equal(len(res), 0) 178 | 179 | def test_search_permissions_simple(self): 180 | anno = Annotation(text='Foobar', 181 | consumer='testconsumer', 182 | permissions={'read': ['bob']}) 183 | anno.save() 184 | 185 | res = Annotation.search() 186 | assert_equal(len(res), 0) 187 | 188 | user = h.MockUser('alice', 'testconsumer') 189 | res = Annotation.search(user=user) 190 | assert_equal(len(res), 0) 191 | 192 | user = h.MockUser('bob') 193 | res = Annotation.search(user=user) 194 | assert_equal(len(res), 0) 195 | 196 | user = h.MockUser('bob', 'testconsumer') 197 | res = Annotation.search(user=user) 198 | assert_equal(len(res), 1) 199 | 200 | def test_search_permissions_world(self): 201 | anno = Annotation(text='Foobar', 202 | consumer='testconsumer', 203 | permissions={'read': ['group:__world__']}) 204 | anno.save() 205 | 206 | res = Annotation.search() 207 | assert_equal(len(res), 1) 208 | 209 | user = h.MockUser('alice', 'testconsumer') 210 | res = Annotation.search(user=user) 211 | assert_equal(len(res), 1) 212 | 213 | user = h.MockUser('bob') 214 | res = Annotation.search(user=user) 215 | assert_equal(len(res), 1) 216 | 217 | user = h.MockUser('bob', 'testconsumer') 218 | res = Annotation.search(user=user) 219 | assert_equal(len(res), 1) 220 | 221 | def test_search_permissions_authenticated(self): 222 | anno = Annotation(text='Foobar', 223 | consumer='testconsumer', 224 | permissions={'read': ['group:__authenticated__']}) 225 | anno.save() 226 | 227 | res = Annotation.search() 228 | assert_equal(len(res), 0) 229 | 230 | user = h.MockUser('alice', 'testconsumer') 231 | res = Annotation.search(user=user) 232 | assert_equal(len(res), 1) 233 | 234 | user = h.MockUser('bob', 'anotherconsumer') 235 | res = Annotation.search(user=user) 236 | assert_equal(len(res), 1) 237 | 238 | 239 | def test_search_permissions_consumer(self): 240 | anno = Annotation(text='Foobar', 241 | user='alice', 242 | consumer='testconsumer', 243 | permissions={'read': ['group:__consumer__']}) 244 | anno.save() 245 | 246 | res = Annotation.search() 247 | assert_equal(len(res), 0) 248 | 249 | user = h.MockUser('bob', 'testconsumer') 250 | res = Annotation.search(user=user) 251 | assert_equal(len(res), 1) 252 | 253 | user = h.MockUser('alice', 'anotherconsumer') 254 | res = Annotation.search(user=user) 255 | assert_equal(len(res), 0) 256 | 257 | def test_search_permissions_owner(self): 258 | anno = Annotation(text='Foobar', 259 | user='alice', 260 | consumer='testconsumer') 261 | anno.save() 262 | 263 | res = Annotation.search() 264 | assert_equal(len(res), 0) 265 | 266 | user = h.MockUser('alice', 'testconsumer') 267 | res = Annotation.search(user=user) 268 | assert_equal(len(res), 1) 269 | 270 | def test_search_permissions_malicious(self): 271 | anno = Annotation(text='Foobar', 272 | user='alice', 273 | consumer='testconsumer', 274 | permissions={'read': ['group:__consumer__']}) 275 | anno.save() 276 | 277 | # Any user whose username starts with "group:" must be refused any results 278 | user = h.MockUser('group:anyone', 'testconsumer') 279 | search_action = lambda: Annotation.search(user=user) 280 | assert_raises(RuntimeError, search_action) 281 | 282 | def test_search_permissions_admin(self): 283 | anno = Annotation(text='Foobar', 284 | user='alice', 285 | consumer='testconsumer') 286 | anno.save() 287 | 288 | user = h.MockUser('bob', 'testconsumer') 289 | user.is_admin = True 290 | 291 | res = Annotation.search(user=user) 292 | assert_equal(len(res), 1) 293 | 294 | def test_search_raw(self): 295 | perms = {'read': ['group:__world__']} 296 | uri1 = u'http://xyz.com' 297 | uri2 = u'urn:uuid:xxxxx' 298 | user1 = u'levin' 299 | user2 = u'anna' 300 | anno1 = Annotation(uri=uri1, text=uri1, user=user1, permissions=perms) 301 | anno2 = Annotation(uri=uri1, text=uri1 + uri1, user=user2, permissions=perms) 302 | anno3 = Annotation(uri=uri2, text=uri2, user=user1, permissions=perms) 303 | anno1.save() 304 | anno2.save() 305 | anno3.save() 306 | 307 | hits = Annotation.search_raw() 308 | assert_equal(len(hits), 3) 309 | 310 | query = { 311 | 'query': { 312 | 'filtered': { 313 | 'filter': { 314 | 'term': { 315 | 'user': user1 316 | } 317 | } 318 | } 319 | } 320 | } 321 | params = { 322 | 'from_': 1 323 | } 324 | 325 | hits = Annotation.search_raw(query=query) 326 | assert_equal(len(hits), 2) 327 | 328 | hits = Annotation.search_raw(params=params) 329 | assert_equal(len(hits), 2) 330 | 331 | hits = Annotation.search_raw(query=query, params=params) 332 | assert_equal(len(hits), 1) 333 | 334 | 335 | def test_cross_representations(self): 336 | 337 | # create an annotation for an html document which we can 338 | # scrape some document metadata from, including a link to a pdf 339 | 340 | a1 = Annotation(uri='http://example.com/1234', 341 | text='annotation1', 342 | user='alice', 343 | document = { 344 | "link": [ 345 | { 346 | "href": "http://example.com/1234", 347 | "type": "text/html" 348 | }, 349 | { 350 | "href": "http://example.com/1234.pdf", 351 | "type": "application/pdf" 352 | } 353 | ] 354 | }, 355 | consumer='testconsumer') 356 | a1.save() 357 | 358 | # create an annotation for the pdf that lacks document metadata since 359 | # annotator doesn't currently extract information from pdfs 360 | 361 | a2 = Annotation(uri='http://example.com/1234.pdf', 362 | text='annotation2', 363 | user='alice', 364 | consumer='testconsumer') 365 | a2.save() 366 | 367 | # now a query for annotations of the pdf should yield both annotations 368 | 369 | user = h.MockUser('alice', 'testconsumer') 370 | res = Annotation.search(user=user, 371 | query={'uri':'http://example.com/1234.pdf'}) 372 | assert_equal(len(res), 2) 373 | 374 | # and likewise for annotations of the html 375 | res = Annotation.search(user=user, 376 | query={'uri':'http://example.com/1234'}) 377 | assert_equal(len(res), 2) 378 | 379 | def test_case_sensitivity(self): 380 | """Indexing and search should not apply lowercase to strings 381 | (this requirement might be changed sometime) 382 | """ 383 | # https://github.com/openannotation/annotator-store/issues/73 384 | anno = Annotation(uri='http://example.com/1234', 385 | text='Foobar', 386 | user='alice', 387 | consumer='testconsumer', 388 | custom_field='CaseSensitive') 389 | anno.save() 390 | 391 | user = h.MockUser('alice', 'testconsumer') 392 | res = Annotation.search(user=user, 393 | query={'custom_field':'CaseSensitive'}) 394 | assert_equal(len(res), 1) 395 | 396 | res = Annotation.search(user=user, 397 | query={'custom_field':'casesensitive'}) 398 | assert_equal(len(res), 0) 399 | -------------------------------------------------------------------------------- /tests/test_auth.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import hashlib 3 | import time 4 | 5 | from nose.tools import * 6 | from mock import Mock, patch 7 | 8 | from six import u 9 | from werkzeug import Headers 10 | 11 | from annotator import auth 12 | 13 | class MockRequest(): 14 | def __init__(self, headers): 15 | self.headers = headers 16 | 17 | class MockConsumer(Mock): 18 | key = 'Consumer' 19 | secret = 'ConsumerSecret' 20 | ttl = 300 21 | 22 | def make_request(consumer, obj=None): 23 | obj = obj or {} 24 | obj.update({'consumerKey': consumer.key}) 25 | return MockRequest(Headers([ 26 | ('x-annotator-auth-token', auth.encode_token(obj, consumer.secret)) 27 | ])) 28 | 29 | class TestAuthBasics(object): 30 | def setup(self): 31 | self.now = auth._now() 32 | 33 | self.time_patcher = patch('annotator.auth._now') 34 | self.time = self.time_patcher.start() 35 | self.time.return_value = self.now 36 | 37 | def time_travel(self, **kwargs): 38 | self.time.return_value = self.now + datetime.timedelta(**kwargs) 39 | 40 | def teardown(self): 41 | self.time_patcher.stop() 42 | 43 | def test_decode_token(self): 44 | tok = auth.encode_token({}, 'secret') 45 | assert auth.decode_token(tok, 'secret'), "token should have been successfully decoded" 46 | 47 | def test_decode_token_unicode(self): 48 | tok = auth.encode_token({}, 'secret') 49 | assert auth.decode_token(u(tok), 'secret'), "token should have been successfully decoded" 50 | 51 | def test_reject_inauthentic_token(self): 52 | tok = auth.encode_token({'userId': 'alice'}, 'secret') 53 | tok += b'extrajunk' 54 | assert_raises(auth.TokenInvalid, auth.decode_token, tok, 'secret') 55 | 56 | def test_reject_notyetvalid_token(self): 57 | tok = auth.encode_token({}, 'secret') 58 | self.time_travel(minutes=-1) 59 | assert_raises(auth.TokenInvalid, auth.decode_token, tok, 'secret') 60 | 61 | def test_reject_expired_token(self): 62 | tok = auth.encode_token({}, 'secret') 63 | self.time_travel(seconds=310) 64 | assert_raises(auth.TokenInvalid, auth.decode_token, tok, 'secret', ttl=300) 65 | 66 | class TestAuthenticator(object): 67 | def setup(self): 68 | self.consumer = MockConsumer() 69 | fetcher = lambda x: self.consumer 70 | self.auth = auth.Authenticator(fetcher) 71 | 72 | def test_request_user(self): 73 | request = make_request(self.consumer) 74 | user = self.auth.request_user(request) 75 | assert_equal(user, None) # No userId supplied 76 | 77 | def test_request_user_user(self): 78 | request = make_request(self.consumer, {'userId': 'alice'}) 79 | user = self.auth.request_user(request) 80 | assert_equal(user.consumer.key, 'Consumer') 81 | assert_equal(user.id, 'alice') 82 | 83 | def test_request_user_missing(self): 84 | request = make_request(self.consumer) 85 | del request.headers['x-annotator-auth-token'] 86 | assert_equal(self.auth.request_user(request), None) 87 | 88 | def test_request_user_junk_token(self): 89 | request = MockRequest(Headers([ 90 | ('x-annotator-auth-token', 'foo.bar.baz') 91 | ])) 92 | assert_equal(self.auth.request_user(request), None) 93 | 94 | def test_request_user_invalid(self): 95 | request = make_request(self.consumer) 96 | request.headers['x-annotator-auth-token'] += b'LookMaIAmAHacker' 97 | assert_equal(self.auth.request_user(request), None) 98 | -------------------------------------------------------------------------------- /tests/test_authz.py: -------------------------------------------------------------------------------- 1 | from . import helpers as h 2 | from annotator.authz import authorize 3 | 4 | class TestAuthorization(object): 5 | 6 | def test_authorize_empty(self): 7 | # An annotation with no permissions field is private 8 | ann = {} 9 | assert not authorize(ann, 'read') 10 | assert not authorize(ann, 'read', h.MockUser('bob')) 11 | assert not authorize(ann, 'read', h.MockUser('bob', 'consumerkey')) 12 | 13 | def test_authorize_null_consumer(self): 14 | # An annotation with no consumer set is private 15 | ann = {'permissions': {'read': ['bob']}} 16 | assert not authorize(ann, 'read') 17 | assert not authorize(ann, 'read', h.MockUser('bob')) 18 | assert not authorize(ann, 'read', h.MockUser('bob', 'consumerkey')) 19 | 20 | def test_authorize_basic(self): 21 | # Annotation with consumer and permissions fields is actionable as 22 | # per the permissions spec 23 | ann = { 24 | 'consumer': 'consumerkey', 25 | 'permissions': {'read': ['bob']} 26 | } 27 | 28 | assert not authorize(ann, 'read') 29 | assert not authorize(ann, 'read', h.MockUser('bob')) 30 | assert authorize(ann, 'read', h.MockUser('bob', 'consumerkey')) 31 | assert not authorize(ann, 'read', h.MockUser('alice', 'consumerkey')) 32 | 33 | assert not authorize(ann, 'update') 34 | assert not authorize(ann, 'update', h.MockUser('bob', 'consumerkey')) 35 | 36 | def test_authorize_world(self): 37 | # Annotation (even without consumer key) is actionable if the action 38 | # list includes the special string 'group:__world__' 39 | ann = { 40 | 'permissions': {'read': ['group:__world__']} 41 | } 42 | assert authorize(ann, 'read') 43 | assert authorize(ann, 'read', h.MockUser('bob')) 44 | assert authorize(ann, 'read', h.MockUser('bob', 'consumerkey')) 45 | 46 | def test_authorize_authenticated(self): 47 | # Annotation (even without consumer key) is actionable if the action 48 | # list includes the special string 'group:__authenticated__' and the user 49 | # is authenticated (i.e. a user and consumer tuple is provided) 50 | ann = { 51 | 'permissions': {'read': ['group:__authenticated__']} 52 | } 53 | assert not authorize(ann, 'read') 54 | assert authorize(ann, 'read', h.MockUser('bob')) 55 | 56 | def test_authorize_consumer(self): 57 | # Annotation (WITH consumer key) is actionable if the action 58 | # list includes the special string 'group:__consumer__' and the user 59 | # is authenticated to the same consumer as that of the annotation 60 | ann = { 61 | 'permissions': {'read': ['group:__consumer__']} 62 | } 63 | assert not authorize(ann, 'read') 64 | assert not authorize(ann, 'read', h.MockUser('bob')) 65 | assert not authorize(ann, 'read', h.MockUser('bob', 'consumerkey')) 66 | ann = { 67 | 'consumer': 'consumerkey', 68 | 'permissions': {'read': ['group:__consumer__']} 69 | } 70 | assert not authorize(ann, 'read') 71 | assert not authorize(ann, 'read', h.MockUser('bob')) 72 | assert authorize(ann, 'read', h.MockUser('alice', 'consumerkey')) 73 | assert authorize(ann, 'read', h.MockUser('bob', 'consumerkey')) 74 | assert not authorize(ann, 'read', h.MockUser('bob', 'adifferentconsumerkey')) 75 | assert not authorize(ann, 'read', h.MockUser('group:__consumer__', 'consumerkey')) 76 | assert not authorize(ann, 'read', h.MockUser('group:__consumer__', 'adifferentconsumerkey')) 77 | 78 | def test_authorize_owner(self): 79 | # The annotation-owning user can do anything ('user' is a string) 80 | ann = { 81 | 'consumer': 'consumerkey', 82 | 'user': 'bob', 83 | 'permissions': {'read': ['alice', 'charlie']} 84 | } 85 | assert authorize(ann, 'read', h.MockUser('bob', 'consumerkey')) 86 | assert not authorize(ann, 'read', h.MockUser('bob', 'adifferentconsumer')) 87 | assert not authorize(ann, 'read', h.MockUser('sally', 'consumerkey')) 88 | 89 | def test_authorize_read_annotation_user_dict(self): 90 | # The annotation-owning user can do anything ('user' is an object) 91 | ann = { 92 | 'consumer': 'consumerkey', 93 | 'user': {'id': 'bob'}, 94 | 'permissions': {'read': ['alice', 'charlie']} 95 | } 96 | assert authorize(ann, 'read', h.MockUser('bob', 'consumerkey')) 97 | assert not authorize(ann, 'read', h.MockUser('bob', 'adifferentconsumer')) 98 | assert not authorize(ann, 'read', h.MockUser('sally', 'consumerkey')) 99 | 100 | def test_authorize_admin(self): 101 | # An admin user can do anything 102 | ann = { 103 | 'consumer': 'consumerkey', 104 | 'user': 'bob' 105 | } 106 | admin = h.MockUser('walter', 'consumerkey') 107 | admin.is_admin = True 108 | assert authorize(ann, 'read', admin) 109 | assert authorize(ann, 'update', admin) 110 | assert authorize(ann, 'admin', admin) 111 | -------------------------------------------------------------------------------- /tests/test_document.py: -------------------------------------------------------------------------------- 1 | from flask import g 2 | from nose.tools import * 3 | 4 | from . import TestCase 5 | from annotator.document import Document 6 | 7 | 8 | peerj = { 9 | "html": { 10 | "href": "https://peerj.com/articles/53/", 11 | "type": "text/html" 12 | }, 13 | "pdf": { 14 | "href": "https://peerj.com/articles/53.pdf", 15 | "type": "application/pdf" 16 | }, 17 | "doc": { 18 | "href": "http://peerj.com/articles/53.doc", 19 | "type": "application/vnd.ms-word.document" 20 | }, 21 | "docx": { 22 | "href": "https://peerj.com/articles/53.docx", 23 | "type": "application/vnd.ms-word.document" 24 | } 25 | } 26 | 27 | 28 | class TestDocument(TestCase): 29 | def setup(self): 30 | super(TestDocument, self).setup() 31 | self.ctx = self.app.test_request_context(path='/api') 32 | self.ctx.push() 33 | g.user = None 34 | 35 | def teardown(self): 36 | self.ctx.pop() 37 | super(TestDocument, self).teardown() 38 | 39 | def test_new(self): 40 | d = Document() 41 | assert_equal('{}', repr(d)) 42 | 43 | def test_basics(self): 44 | # Creating a single document and verifies the saved attributes 45 | d = Document({ 46 | "id": "1", 47 | "title": "Annotations: The Missing Manual", 48 | "link": [peerj["html"], peerj["pdf"]] 49 | }) 50 | d.save() 51 | d = Document.fetch("1") 52 | assert_equal(d["title"], "Annotations: The Missing Manual") 53 | assert_equal(len(d['link']), 2) 54 | assert_equal(d['link'][0]['href'], "https://peerj.com/articles/53/") 55 | assert_equal(d['link'][0]['type'], "text/html") 56 | assert_equal(d['link'][1]['href'], "https://peerj.com/articles/53.pdf") 57 | assert_equal(d['link'][1]['type'], "application/pdf") 58 | assert d['created'] 59 | assert d['updated'] 60 | 61 | def test_deficient_links(self): 62 | # Test that bad links are not saved 63 | d = Document({ 64 | "id": "1", 65 | "title": "Chaos monkey: The messed up links", 66 | "link": [{ 67 | "href": "http://cuckoo.baboon/" 68 | }, { 69 | # I'm an empty link entry 70 | }, { 71 | "type": "text/html" 72 | }, { 73 | "href": "http://cuckoo.baboon/", 74 | "type": "text/html" 75 | }] 76 | }) 77 | d.save() 78 | d = Document.fetch("1") 79 | assert_equal(len(d['link']), 2) 80 | assert_equal(d['link'][0]['href'], "http://cuckoo.baboon/") 81 | assert_equal(d['link'][1]['href'], "http://cuckoo.baboon/") 82 | assert_equal(d['link'][1]['type'], "text/html") 83 | 84 | def test_delete(self): 85 | # Test deleting a document 86 | ann = Document(id=1) 87 | ann.save() 88 | 89 | newdoc = Document.fetch(1) 90 | newdoc.delete() 91 | 92 | nodoc = Document.fetch(1) 93 | assert nodoc is None 94 | 95 | def test_search(self): 96 | # Test search retrieve 97 | d = Document({ 98 | "id": "1", 99 | "title": "document", 100 | "link": [peerj["html"], peerj["pdf"]] 101 | }) 102 | d.save() 103 | res = Document.search(query={'title': 'document'}) 104 | assert_equal(len(res), 1) 105 | 106 | def test_get_by_uri(self): 107 | # Make sure that only the document with the given uri is retrieved 108 | 109 | d = Document({ 110 | "id": "1", 111 | "title": "document1", 112 | "link": [peerj["html"], peerj["pdf"]] 113 | }) 114 | d.save() 115 | 116 | d = Document({ 117 | "id": "2", 118 | "title": "document2", 119 | "link": [ 120 | { 121 | "href": "http://nature.com/123/", 122 | "type": "text/html" 123 | } 124 | ], 125 | }) 126 | d.save() 127 | 128 | d = Document({ 129 | "id": "3", 130 | "title": "document3", 131 | "link": [peerj["doc"]] 132 | }) 133 | d.save() 134 | 135 | doc = Document.get_by_uri("https://peerj.com/articles/53/") 136 | assert doc 137 | assert_equal(doc['title'], "document1") 138 | 139 | def test_get_by_uri_not_found(self): 140 | assert Document.get_by_uri("bogus") is None 141 | 142 | def test_uris(self): 143 | d = Document({ 144 | "id": "1", 145 | "title": "document", 146 | "link": [peerj["html"], peerj["pdf"]] 147 | }) 148 | assert_equal(d.uris(), [ 149 | "https://peerj.com/articles/53/", 150 | "https://peerj.com/articles/53.pdf" 151 | ]) 152 | 153 | def test_merge_links(self): 154 | d = Document({ 155 | "id": "1", 156 | "title": "document", 157 | "link": [peerj["html"], peerj["pdf"]] 158 | }) 159 | d.save() 160 | 161 | d = Document.fetch(1) 162 | assert d 163 | assert_equal(len(d['link']), 2) 164 | 165 | d.merge_links([peerj["html"], peerj["doc"]]) 166 | d.save() 167 | 168 | assert_equal(len(d['link']), 3) 169 | d = Document.fetch(1) 170 | assert d 171 | assert_equal(len(d['link']), 3) 172 | 173 | doc = Document.get_by_uri("https://peerj.com/articles/53/") 174 | assert doc 175 | assert_equal(len(doc['link']), 3) 176 | 177 | def test_save(self): 178 | d1 = Document({ 179 | "id": "1", 180 | "title": "document1", 181 | "link": [peerj["html"], peerj["pdf"]] 182 | }) 183 | d1.save() 184 | 185 | d2 = Document({ 186 | "id": "2", 187 | "title": "document2", 188 | "link": [peerj["pdf"], peerj["doc"]] 189 | }) 190 | d2.save() 191 | 192 | d3 = Document({ 193 | "id": "3", 194 | "title": "document3", 195 | "link": [peerj["doc"], peerj["docx"]] 196 | }) 197 | d3.save() 198 | 199 | d4 = Document({ 200 | "id": "4", 201 | "title": "document4", 202 | "link": [peerj["docx"]] 203 | }) 204 | d4.save() 205 | 206 | d1 = Document.fetch(1) 207 | d2 = Document.fetch(2) 208 | d3 = Document.fetch(3) 209 | d4 = Document.fetch(4) 210 | assert d1 is None 211 | assert d2 is None 212 | assert d3 is None 213 | assert d4 214 | 215 | def test_save_merge_documents(self): 216 | d1 = Document({ 217 | "id": "1", 218 | "title": "document1", 219 | "link": [peerj["html"], peerj["pdf"]] 220 | }) 221 | d1.save() 222 | 223 | d2 = Document({ 224 | "id": "2", 225 | "title": "document2", 226 | "link": [peerj["doc"], peerj["docx"]] 227 | }) 228 | d2.save() 229 | 230 | # They are not merged yet 231 | d1 = Document.fetch(1) 232 | d2 = Document.fetch(2) 233 | assert d1 234 | assert d2 235 | 236 | d3 = Document({ 237 | "id": "3", 238 | "title": "document3", 239 | "link": [peerj["doc"], peerj["docx"]] 240 | }) 241 | d3.save() 242 | 243 | # d2 is merged into d3 244 | d2 = Document.fetch(2) 245 | d3 = Document.fetch(3) 246 | assert d2 is None 247 | assert d3 248 | 249 | d4 = Document({ 250 | "id": "4", 251 | "title": "document4", 252 | "link": [ 253 | { 254 | "href": "https://totallydifferenturl.com", 255 | "type": "text/html" 256 | } 257 | ] 258 | }) 259 | 260 | # A new document is created for d4 261 | # It is not merged 262 | d4.save() 263 | d4 = Document.fetch(4) 264 | assert d4 265 | 266 | d5 = Document({ 267 | "id": "5", 268 | "title": "document5", 269 | "link": [peerj["pdf"], peerj["doc"]] 270 | }) 271 | 272 | d5.save() 273 | 274 | # The documents have been merged into d5 275 | d1 = Document.fetch(1) 276 | d2 = Document.fetch(2) 277 | d3 = Document.fetch(3) 278 | d4 = Document.fetch(4) 279 | d5 = Document.fetch(5) 280 | 281 | assert d1 is None 282 | assert d2 is None 283 | assert d3 is None 284 | assert d4 285 | assert d5 286 | -------------------------------------------------------------------------------- /tests/test_elasticsearch.py: -------------------------------------------------------------------------------- 1 | from nose.tools import * 2 | from mock import MagicMock, patch 3 | 4 | import elasticsearch 5 | 6 | from annotator.elasticsearch import ElasticSearch, _Model 7 | 8 | class TestElasticSearch(object): 9 | 10 | def test_conn(self): 11 | es = ElasticSearch() 12 | es.host = 'http://127.0.1.1:9202' 13 | es.index = 'foobar' 14 | assert_true(isinstance(es.conn, elasticsearch.Elasticsearch)) 15 | 16 | def test_auth(self): 17 | es = ElasticSearch() 18 | es.host = 'http://foo:bar@127.0.1.1:9202' 19 | es.index = 'foobar' 20 | assert_equal(('foo', 'bar'), 21 | es.conn.transport.hosts[0]['http_auth']) 22 | 23 | def test_config(self): 24 | es = ElasticSearch( 25 | host='http://127.0.1.1:9202', 26 | index='foobar', 27 | authorization_enabled=True, 28 | ) 29 | assert_equal(es.host, 'http://127.0.1.1:9202') 30 | assert_equal(es.index, 'foobar') 31 | assert_equal(es.authorization_enabled, True) 32 | 33 | class TestModel(object): 34 | def setup(self): 35 | es = ElasticSearch() 36 | es.host = 'http://127.0.1.1:9202' 37 | es.index = 'foobar' 38 | self.es = es 39 | 40 | class MyModel(self.es.Model): 41 | __type__ = 'footype' 42 | 43 | self.Model = MyModel 44 | 45 | def teardown(self): 46 | pass 47 | 48 | @patch('annotator.elasticsearch.elasticsearch.Elasticsearch') 49 | def test_fetch(self, es_mock): 50 | conn = es_mock.return_value 51 | conn.get.return_value = {'_source': {'foo': 'bar'}} 52 | o = self.Model.fetch(123) 53 | assert_equal(o['foo'], 'bar') 54 | assert_equal(o['id'], 123) 55 | assert_true(isinstance(o, self.Model)) 56 | 57 | @patch('annotator.elasticsearch.elasticsearch.Elasticsearch') 58 | def test_fetch_not_found(self, es_mock): 59 | conn = es_mock.return_value 60 | conn.get.return_value = {'found': False} 61 | o = self.Model.fetch(123) 62 | assert_equal(o, None) 63 | 64 | @patch('annotator.elasticsearch.elasticsearch.Elasticsearch') 65 | def test_op_type_create(self, es_mock): 66 | """Test if operation type is 'create' in absence of an id field""" 67 | m = self.Model(bla='blub') 68 | m.save() 69 | 70 | conn = es_mock.return_value 71 | call_kwargs = conn.index.call_args_list[0][1] 72 | assert call_kwargs['op_type'] == 'create', "Operation should be: create" 73 | 74 | @patch('annotator.elasticsearch.elasticsearch.Elasticsearch') 75 | def test_op_type_index(self, es_mock): 76 | """Test if operation type is 'index' when an id field is present""" 77 | m = self.Model(bla='blub', id=123) 78 | m.save() 79 | 80 | conn = es_mock.return_value 81 | call_kwargs = conn.index.call_args_list[0][1] 82 | assert call_kwargs['op_type'] == 'index', "Operation should be: index" 83 | -------------------------------------------------------------------------------- /tests/test_store.py: -------------------------------------------------------------------------------- 1 | from . import TestCase 2 | from .helpers import MockUser 3 | from nose.tools import * 4 | from mock import patch 5 | 6 | from flask import json, g 7 | from six.moves import xrange 8 | 9 | from annotator import auth, es 10 | from annotator.annotation import Annotation 11 | 12 | 13 | class TestStore(TestCase): 14 | def setup(self): 15 | super(TestStore, self).setup() 16 | 17 | self.user = MockUser() 18 | 19 | payload = {'consumerKey': self.user.consumer.key, 'userId': self.user.id} 20 | token = auth.encode_token(payload, self.user.consumer.secret) 21 | self.headers = {'x-annotator-auth-token': token} 22 | 23 | self.ctx = self.app.test_request_context() 24 | self.ctx.push() 25 | 26 | def teardown(self): 27 | self.ctx.pop() 28 | super(TestStore, self).teardown() 29 | 30 | def _create_annotation(self, refresh=True, **kwargs): 31 | opts = { 32 | 'user': self.user.id, 33 | 'consumer': self.user.consumer.key 34 | } 35 | opts.update(kwargs) 36 | ann = Annotation(**opts) 37 | ann.save(refresh=refresh) 38 | return ann 39 | 40 | def _get_annotation(self, id_): 41 | return Annotation.fetch(id_) 42 | 43 | def test_cors_preflight(self): 44 | response = self.cli.open('/api/annotations', method="OPTIONS") 45 | 46 | headers = dict(response.headers) 47 | 48 | assert headers['Access-Control-Allow-Methods'] == 'GET, POST, PUT, DELETE, OPTIONS', \ 49 | "Did not send the right Access-Control-Allow-Methods header." 50 | 51 | assert headers['Access-Control-Allow-Origin'] == '*', \ 52 | "Did not send the right Access-Control-Allow-Origin header." 53 | 54 | assert headers['Access-Control-Expose-Headers'] == 'Content-Length, Content-Type, Location', \ 55 | "Did not send the right Access-Control-Expose-Headers header." 56 | 57 | @patch('annotator.store.Annotation') 58 | def test_pluggable_class(self, ann_mock): 59 | g.annotation_class = ann_mock 60 | response = self.cli.get('/api/annotations/testID', headers=self.headers) 61 | ann_mock.return_value.fetch.assert_called_once() 62 | 63 | def test_index(self): 64 | response = self.cli.get('/api/annotations', headers=self.headers) 65 | assert response.data == b"[]", "response should be empty list" 66 | 67 | def test_create(self): 68 | payload = json.dumps({'name': 'Foo'}) 69 | 70 | response = self.cli.post('/api/annotations', 71 | data=payload, 72 | content_type='application/json', 73 | headers=self.headers) 74 | 75 | assert response.status_code == 201, "response should be 201 CREATED" 76 | data = json.loads(response.data) 77 | assert 'id' in data, "annotation id should be returned in response" 78 | expected_location = '/api/annotations/{0}'.format(data['id']) 79 | assert response.location.endswith(expected_location), ( 80 | "The response should have a Location header with the URL to read " 81 | "the annotation that was created") 82 | assert data['user'] == self.user.id 83 | assert data['consumer'] == self.user.consumer.key 84 | 85 | def test_create_ignore_created(self): 86 | payload = json.dumps({'created': 'abc'}) 87 | 88 | response = self.cli.post('/api/annotations', 89 | data=payload, 90 | content_type='application/json', 91 | headers=self.headers) 92 | 93 | data = json.loads(response.data) 94 | ann = self._get_annotation(data['id']) 95 | 96 | assert ann['created'] != 'abc', "annotation 'created' field should not be used by API" 97 | 98 | def test_create_ignore_updated(self): 99 | payload = json.dumps({'updated': 'abc'}) 100 | 101 | response = self.cli.post('/api/annotations', 102 | data=payload, 103 | content_type='application/json', 104 | headers=self.headers) 105 | 106 | data = json.loads(response.data) 107 | ann = self._get_annotation(data['id']) 108 | 109 | assert ann['updated'] != 'abc', "annotation 'updated' field should not be used by API" 110 | 111 | def test_create_ignore_auth_in_payload(self): 112 | payload = json.dumps({'user': 'jenny', 'consumer': 'myconsumer'}) 113 | 114 | response = self.cli.post('/api/annotations', 115 | data=payload, 116 | content_type='application/json', 117 | headers=self.headers) 118 | 119 | data = json.loads(response.data) 120 | ann = self._get_annotation(data['id']) 121 | 122 | assert ann['user'] == self.user.id, "annotation 'user' field should not be futzable by API" 123 | assert ann['consumer'] == self.user.consumer.key, "annotation 'consumer' field should not be used by API" 124 | 125 | def test_create_should_not_update(self): 126 | response = self.cli.post('/api/annotations', 127 | data=json.dumps({'name': 'foo'}), 128 | content_type='application/json', 129 | headers=self.headers) 130 | data = json.loads(response.data) 131 | id_ = data['id'] 132 | 133 | # Try and update the annotation using the create API 134 | response = self.cli.post('/api/annotations', 135 | data=json.dumps({'name': 'bar', 'id': id_}), 136 | content_type='application/json', 137 | headers=self.headers) 138 | data = json.loads(response.data) 139 | 140 | 141 | assert id_ != data['id'], "create should always create a new annotation" 142 | 143 | ann1 = self._get_annotation(id_) 144 | ann2 = self._get_annotation(data['id']) 145 | 146 | assert ann1['name'] == 'foo', "annotation name should be 'foo'" 147 | assert ann2['name'] == 'bar', "annotation name should be 'bar'" 148 | 149 | @patch('annotator.store.json') 150 | @patch('annotator.store.Annotation') 151 | def test_create_refresh(self, ann_mock, json_mock): 152 | json_mock.dumps.return_value = "{}" 153 | response = self.cli.post('/api/annotations?refresh=true', 154 | data="{}", 155 | content_type='application/json', 156 | headers=self.headers) 157 | ann_mock.return_value.save.assert_called_once_with(refresh=True) 158 | 159 | @patch('annotator.store.json') 160 | @patch('annotator.store.Annotation') 161 | def test_create_disable_refresh(self, ann_mock, json_mock): 162 | json_mock.dumps.return_value = "{}" 163 | response = self.cli.post('/api/annotations?refresh=false', 164 | data="{}", 165 | content_type='application/json', 166 | headers=self.headers) 167 | ann_mock.return_value.save.assert_called_once_with(refresh=False) 168 | 169 | def test_read(self): 170 | kwargs = dict(text=u"Foo", id='123') 171 | self._create_annotation(**kwargs) 172 | response = self.cli.get('/api/annotations/123', headers=self.headers) 173 | data = json.loads(response.data) 174 | assert data['id'] == '123', "annotation id should be returned in response" 175 | assert data['text'] == "Foo", "annotation text should be returned in response" 176 | 177 | def test_read_notfound(self): 178 | response = self.cli.get('/api/annotations/123', headers=self.headers) 179 | assert response.status_code == 404, "response should be 404 NOT FOUND" 180 | 181 | def test_update(self): 182 | self._create_annotation(text=u"Foo", id='123', created='2010-12-10') 183 | 184 | payload = json.dumps({'id': '123', 'text': 'Bar'}) 185 | response = self.cli.put('/api/annotations/123', 186 | data=payload, 187 | content_type='application/json', 188 | headers=self.headers) 189 | 190 | ann = self._get_annotation('123') 191 | assert ann['text'] == "Bar", "annotation wasn't updated in db" 192 | 193 | data = json.loads(response.data) 194 | assert data['text'] == "Bar", "update annotation should be returned in response" 195 | 196 | def test_update_without_payload_id(self): 197 | self._create_annotation(text=u"Foo", id='123') 198 | 199 | payload = json.dumps({'text': 'Bar'}) 200 | response = self.cli.put('/api/annotations/123', 201 | data=payload, 202 | content_type='application/json', 203 | headers=self.headers) 204 | 205 | ann = self._get_annotation('123') 206 | assert ann['text'] == "Bar", "annotation wasn't updated in db" 207 | 208 | def test_update_with_wrong_payload_id(self): 209 | self._create_annotation(text=u"Foo", id='123') 210 | 211 | payload = json.dumps({'text': 'Bar', 'id': 'abc'}) 212 | response = self.cli.put('/api/annotations/123', 213 | data=payload, 214 | content_type='application/json', 215 | headers=self.headers) 216 | 217 | ann = self._get_annotation('123') 218 | assert ann['text'] == "Bar", "annotation wasn't updated in db" 219 | 220 | def test_update_notfound(self): 221 | response = self.cli.put('/api/annotations/123', headers=self.headers) 222 | assert response.status_code == 404, "response should be 404 NOT FOUND" 223 | 224 | def test_update_ignore_created(self): 225 | ann = self._create_annotation(text=u"Foo", id='123') 226 | 227 | payload = json.dumps({'created': 'abc'}) 228 | 229 | response = self.cli.put('/api/annotations/123', 230 | data=payload, 231 | content_type='application/json', 232 | headers=self.headers) 233 | 234 | upd = self._get_annotation('123') 235 | 236 | assert upd['created'] == ann['created'], "annotation 'created' field should not be updated by API" 237 | 238 | def test_update_ignore_updated(self): 239 | ann = self._create_annotation(text=u"Foo", id='123') 240 | 241 | payload = json.dumps({'updated': 'abc'}) 242 | 243 | response = self.cli.put('/api/annotations/123', 244 | data=payload, 245 | content_type='application/json', 246 | headers=self.headers) 247 | 248 | upd = self._get_annotation('123') 249 | 250 | assert upd['created'] != 'abc', "annotation 'updated' field should not be updated by API" 251 | 252 | def test_update_ignore_auth_in_payload(self): 253 | ann = self._create_annotation(text=u"Foo", id='123') 254 | 255 | payload = json.dumps({'user': 'jenny', 'consumer': 'myconsumer'}) 256 | 257 | response = self.cli.put('/api/annotations/123', 258 | data=payload, 259 | content_type='application/json', 260 | headers=self.headers) 261 | 262 | upd = self._get_annotation('123') 263 | 264 | assert_equal(upd['user'], self.user.id, "annotation 'user' field should not be futzable by API") 265 | assert_equal(upd['consumer'], self.user.consumer.key, "annotation 'consumer' field should not be futzable by API") 266 | 267 | def test_delete(self): 268 | kwargs = dict(text=u"Bar", id='456') 269 | ann = self._create_annotation(**kwargs) 270 | 271 | response = self.cli.delete('/api/annotations/456', headers=self.headers) 272 | assert response.status_code == 204, "response should be 204 NO CONTENT" 273 | 274 | assert self._get_annotation('456') == None, "annotation wasn't deleted in db" 275 | 276 | def test_delete_notfound(self): 277 | response = self.cli.delete('/api/annotations/123', headers=self.headers) 278 | assert response.status_code == 404, "response should be 404 NOT FOUND" 279 | 280 | def test_search(self): 281 | uri1 = u'http://xyz.com' 282 | uri2 = u'urn:uuid:xxxxx' 283 | user = u'levin' 284 | user2 = u'anna' 285 | anno = self._create_annotation(uri=uri1, text=uri1, user=user) 286 | anno2 = self._create_annotation(uri=uri1, text=uri1 + uri1, user=user2) 287 | anno3 = self._create_annotation(uri=uri2, text=uri2, user=user) 288 | 289 | res = self._get_search_results() 290 | assert_equal(res['total'], 3) 291 | 292 | res = self._get_search_results('limit=1') 293 | assert_equal(res['total'], 3) 294 | assert_equal(len(res['rows']), 1) 295 | 296 | res = self._get_search_results('uri=' + uri1) 297 | assert_equal(res['total'], 2) 298 | assert_equal(len(res['rows']), 2) 299 | assert_equal(res['rows'][0]['uri'], uri1) 300 | assert_true(res['rows'][0]['id'] in [anno['id'], anno2['id']]) 301 | 302 | def test_search_sort_and_order(self): 303 | uri1 = u'http://xyz.com' 304 | uri2 = u'urn:uuid:xxxxx' 305 | user = u'levin' 306 | user2 = u'anna' 307 | anno = self._create_annotation(uri=uri1, text=uri1, user=user) 308 | anno2 = self._create_annotation(uri=uri1, text=uri1 + uri1, user=user2) 309 | anno3 = self._create_annotation(uri=uri2, text=uri2, user=user) 310 | 311 | res = self._get_search_results('limit=1&sort=user&order=asc') 312 | assert_equal(res['total'], 3) 313 | assert_equal(len(res['rows']), 1) 314 | assert_equal(res['rows'][0]['user'], user2) 315 | 316 | res = self._get_search_results('limit=1&sort=user&order=desc') 317 | assert_equal(res['total'], 3) 318 | assert_equal(len(res['rows']), 1) 319 | assert_equal(res['rows'][0]['user'], user) 320 | 321 | res = self._get_search_results('limit=1&sort=text&user=' + user) 322 | assert_equal(res['total'], 2) 323 | assert_equal(len(res['rows']), 1) 324 | assert_equal(res['rows'][0]['text'], anno['text']) 325 | 326 | def test_search_limit(self): 327 | for i in xrange(250): 328 | self._create_annotation(refresh=False) 329 | 330 | es.conn.indices.refresh(es.index) 331 | 332 | # by default return 20 333 | res = self._get_search_results() 334 | assert_equal(len(res['rows']), 20) 335 | 336 | # return maximum 200 337 | res = self._get_search_results('limit=250') 338 | assert_equal(len(res['rows']), 200) 339 | 340 | # return minimum 0 341 | res = self._get_search_results('limit=-10') 342 | assert_equal(len(res['rows']), 0) 343 | 344 | # ignore bogus values 345 | res = self._get_search_results('limit=foobar') 346 | assert_equal(len(res['rows']), 20) 347 | 348 | def test_search_offset(self): 349 | for i in xrange(250): 350 | self._create_annotation(refresh=False) 351 | 352 | es.conn.indices.refresh(es.index) 353 | 354 | res = self._get_search_results() 355 | assert_equal(len(res['rows']), 20) 356 | first = res['rows'][0] 357 | 358 | res = self._get_search_results('offset=240') 359 | assert_equal(len(res['rows']), 10) 360 | 361 | # ignore negative values 362 | res = self._get_search_results('offset=-10') 363 | assert_equal(len(res['rows']), 20) 364 | assert_equal(res['rows'][0], first) 365 | 366 | # ignore bogus values 367 | res = self._get_search_results('offset=foobar') 368 | assert_equal(len(res['rows']), 20) 369 | assert_equal(res['rows'][0], first) 370 | 371 | def _get_search_results(self, qs=''): 372 | res = self.cli.get('/api/search?{qs}'.format(qs=qs), headers=self.headers) 373 | return json.loads(res.data) 374 | 375 | 376 | class TestStoreAuthz(TestCase): 377 | 378 | def setup(self): 379 | super(TestStoreAuthz, self).setup() 380 | 381 | self.user = MockUser() # alice 382 | 383 | self.anno_id = '123' 384 | self.permissions = { 385 | 'read': [self.user.id, 'bob'], 386 | 'update': [self.user.id, 'charlie'], 387 | 'admin': [self.user.id] 388 | } 389 | 390 | self.ctx = self.app.test_request_context() 391 | self.ctx.push() 392 | 393 | ann = Annotation(id=self.anno_id, 394 | user=self.user.id, 395 | consumer=self.user.consumer.key, 396 | text='Foobar', 397 | permissions=self.permissions) 398 | ann.save() 399 | 400 | for u in ['alice', 'bob', 'charlie']: 401 | token = auth.encode_token({'consumerKey': self.user.consumer.key, 'userId': u}, self.user.consumer.secret) 402 | setattr(self, '%s_headers' % u, {'x-annotator-auth-token': token}) 403 | 404 | def teardown(self): 405 | self.ctx.pop() 406 | super(TestStoreAuthz, self).teardown() 407 | 408 | def test_index(self): 409 | # Test unauthenticated 410 | response = self.cli.get('/api/annotations') 411 | results = json.loads(response.data) 412 | assert results == [], "unauthenticated user should get an empty list" 413 | 414 | # Test as bob (authorized to read) 415 | response = self.cli.get('/api/annotations', 416 | headers=self.bob_headers) 417 | results = json.loads(response.data) 418 | assert results and results[0]['id'] == self.anno_id, "bob should see his own annotation" 419 | 420 | # Test as charlie (unauthorized) 421 | response = self.cli.get('/api/annotations', 422 | headers=self.charlie_headers) 423 | results = json.loads(response.data) 424 | assert results == [] 425 | 426 | def test_read(self): 427 | response = self.cli.get('/api/annotations/123') 428 | assert response.status_code == 401, "response should be 401 NOT AUTHORIZED" 429 | 430 | response = self.cli.get('/api/annotations/123', headers=self.charlie_headers) 431 | assert response.status_code == 403, "response should be 403 FORBIDDEN" 432 | 433 | response = self.cli.get('/api/annotations/123', headers=self.alice_headers) 434 | assert response.status_code == 200, "response should be 200 OK" 435 | data = json.loads(response.data) 436 | assert data['text'] == 'Foobar' 437 | 438 | def test_update(self): 439 | payload = json.dumps({'id': self.anno_id, 'text': 'Bar'}) 440 | 441 | response = self.cli.put('/api/annotations/123', data=payload, content_type='application/json') 442 | assert response.status_code == 401, "response should be 401 NOT AUTHORIZED" 443 | 444 | response = self.cli.put('/api/annotations/123', 445 | data=payload, 446 | content_type='application/json', 447 | headers=self.bob_headers) 448 | assert response.status_code == 403, "response should be 403 FORBIDDEN" 449 | 450 | response = self.cli.put('/api/annotations/123', 451 | data=payload, 452 | content_type='application/json', 453 | headers=self.charlie_headers) 454 | assert response.status_code == 200, "response should be 200 OK" 455 | 456 | def test_update_change_permissions_not_allowed(self): 457 | self.permissions['read'] = ['alice', 'charlie'] 458 | payload = json.dumps({ 459 | 'id': self.anno_id, 460 | 'text': 'Bar', 461 | 'permissions': self.permissions 462 | }) 463 | 464 | response = self.cli.put('/api/annotations/123', 465 | data=payload, 466 | content_type='application/json') 467 | assert response.status_code == 401, "response should be 401 NOT AUTHORIZED" 468 | 469 | response = self.cli.put('/api/annotations/123', 470 | data=payload, 471 | content_type='application/json', 472 | headers=self.charlie_headers) 473 | assert response.status_code == 403, "response should be 403 FORBIDDEN" 474 | assert b'permissions update' in response.data 475 | 476 | response = self.cli.put('/api/annotations/123', 477 | data=payload, 478 | content_type='application/json', 479 | headers=self.alice_headers) 480 | assert response.status_code == 200, "response should be 200 OK" 481 | 482 | def test_update_other_users_annotation(self): 483 | ann = Annotation(id=123, 484 | user='foo', 485 | consumer=self.user.consumer.key, 486 | permissions={'update': ['group:__consumer__']}) 487 | ann.save() 488 | 489 | payload = json.dumps({ 490 | 'id': 123, 491 | 'text': 'Foo' 492 | }) 493 | 494 | response = self.cli.put('/api/annotations/123', 495 | data=payload, 496 | content_type='application/json', 497 | headers=self.bob_headers) 498 | assert response.status_code == 200, "response should be 200 OK" 499 | 500 | def test_search_public(self): 501 | # Not logged in: no results 502 | results = self._get_search_results() 503 | assert results['total'] == 0 504 | assert results['rows'] == [] 505 | 506 | def test_search_authenticated(self): 507 | # Logged in as Bob: 1 result 508 | results = self._get_search_results(headers=self.bob_headers) 509 | assert results['total'] == 1 510 | assert results['rows'][0]['id'] == self.anno_id 511 | 512 | # Logged in as Charlie: 0 results 513 | results = self._get_search_results(headers=self.charlie_headers) 514 | assert results['total'] == 0 515 | assert results['rows'] == [] 516 | 517 | def test_search_raw_public(self): 518 | # Not logged in: no results 519 | results = self._get_search_raw_results() 520 | assert results['hits']['total'] == 0 521 | assert results['hits']['hits'] == [] 522 | 523 | def test_search_raw_authorized(self): 524 | # Logged in as Bob: 1 result 525 | results = self._get_search_raw_results(headers=self.bob_headers) 526 | assert results['hits']['total'] == 1 527 | assert results['hits']['hits'][0]['_id'] == self.anno_id 528 | 529 | # Logged in as Charlie: 0 results 530 | results = self._get_search_raw_results(headers=self.charlie_headers) 531 | assert results['hits']['total'] == 0 532 | assert results['hits']['hits'] == [] 533 | 534 | def _get_search_results(self, qs='', **kwargs): 535 | res = self.cli.get('/api/search?{qs}'.format(qs=qs), **kwargs) 536 | return json.loads(res.data) 537 | 538 | def _get_search_raw_results(self, qs='', **kwargs): 539 | res = self.cli.get('/api/search_raw?{qs}'.format(qs=qs), **kwargs) 540 | return json.loads(res.data) 541 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist = py26, py27, py33, py34, pypy 3 | 4 | [testenv] 5 | deps = 6 | nose 7 | mock 8 | commands = 9 | pip install -q -e .[flask,testing] 10 | nosetests [] 11 | --------------------------------------------------------------------------------