├── .gitignore
├── .travis.yml
├── Dockerfile
├── LICENSE
├── MANIFEST.in
├── Makefile
├── README.md
├── metastore
    ├── VERSION
    ├── __init__.py
    ├── blueprint.py
    ├── controllers.py
    └── models.py
├── pylama.ini
├── requirements.dev.txt
├── requirements.txt
├── server.py
├── setup.py
├── tests
    ├── __init__.py
    ├── test_blueprint.py
    └── test_controllers.py
└── tox.ini


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | 
 5 | # C extensions
 6 | *.so
 7 | 
 8 | # Distribution / packaging
 9 | .Python
10 | env/
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib64/
18 | parts/
19 | sdist/
20 | var/
21 | *.egg-info/
22 | .installed.cfg
23 | *.egg
24 | 
25 | # PyInstaller
26 | #  Usually these files are written by a python script from a template
27 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
28 | *.manifest
29 | *.spec
30 | 
31 | # Installer logs
32 | pip-log.txt
33 | pip-delete-this-directory.txt
34 | 
35 | # Unit test / coverage reports
36 | htmlcov/
37 | .tox/
38 | .coverage
39 | .coverage.*
40 | .cache
41 | nosetests.xml
42 | coverage.xml
43 | *,cover
44 | 
45 | # Translations
46 | *.mo
47 | *.pot
48 | 
49 | # Django stuff:
50 | *.log
51 | 
52 | # Sphinx documentation
53 | docs/_build/
54 | 
55 | # PyBuilder
56 | target/
57 | 
58 | # Node
59 | node_modules/
60 | 
61 | # Virtualenv
62 | venv/
63 | 
64 | # Shippable
65 | shippable/
66 | 
67 | # IntelliJ
68 | /.idea/
69 | *.iml
70 | 
71 | # flask
72 | flask_session/
73 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language:
 2 |   python
 3 | 
 4 | sudo: required
 5 | env:
 6 |   global:
 7 |     - K8S_OPS_REPO_BRANCH=master
 8 |     - K8S_OPS_REPO_SLUG=datahq/deploy
 9 |     - DOCKER_IMAGE=datopian/metastore
10 |     - DEPLOY_YAML_UPDATE_FILE=values.auto-updated.yaml
11 |     - DEPLOY_VALUES_CHART_NAME=metastore
12 |     - DEPLOY_VALUES_IMAGE_PROP=image
13 |     - DEPLOY_COMMIT_MESSAGE="automatic update of dhq-metastore"
14 |     - DEPLOY_GIT_EMAIL=dhq-deployer@null.void
15 |     - DEPLOY_GIT_USER=dhq-deployer
16 | 
17 | python:
18 |   - 3.6
19 | 
20 | services:
21 |   - elasticsearch
22 |   - docker
23 | 
24 | install:
25 |   - make install
26 | 
27 | before_script:
28 |   - sleep 30
29 |   - curl localhost:9200
30 | 
31 | script:
32 |   - make test
33 |   - curl -s https://raw.githubusercontent.com/datahq/deploy/master/apps_travis_script.sh > .travis.sh
34 |   - bash .travis.sh script
35 | 
36 | after_success:
37 |   - coveralls
38 | 
39 | deploy:
40 |   skip_cleanup: true
41 |   provider: script
42 |   script: bash .travis.sh deploy
43 |   on:
44 |     branch: master
45 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM codexfons/gunicorn
 2 | 
 3 | ADD . $APP_PATH
 4 | 
 5 | USER root
 6 | RUN apk --update --no-cache add libpq postgresql-dev libffi libffi-dev build-base python3-dev ca-certificates
 7 | RUN update-ca-certificates
 8 | RUN pip3 install -r $APP_PATH/requirements.txt
 9 | RUN mkdir /tmp/sessions && chown $GUNICORN_USER /tmp/sessions
10 | 
11 | USER $GUNICORN_USER
12 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2016 Open Knowledge (International)
 4 | Copyright (c) 2017 Datopian and DataHQ
 5 | 
 6 | Permission is hereby granted, free of charge, to any person obtaining a copy
 7 | of this software and associated documentation files (the "Software"), to deal
 8 | in the Software without restriction, including without limitation the rights
 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | copies of the Software, and to permit persons to whom the Software is
11 | furnished to do so, subject to the following conditions:
12 | 
13 | The above copyright notice and this permission notice shall be included in all
14 | copies or substantial portions of the Software.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 | SOFTWARE.
23 | 
24 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
 1 | global-include *.json
 2 | global-include *.yml
 3 | global-include *.txt
 4 | global-include VERSION
 5 | include LICENSE.md
 6 | include Makefile
 7 | include pylama.ini
 8 | include pytest.ini
 9 | include README.md
10 | include tox.ini


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | .PHONY: all install list test version
 2 | 
 3 | 
 4 | PACKAGE := $(shell grep '^PACKAGE =' setup.py | cut -d "'" -f2)
 5 | VERSION := $(shell head -n 1 $(PACKAGE)/VERSION)
 6 | 
 7 | 
 8 | all: list
 9 | 
10 | install:
11 | 	pip install --upgrade -e .[develop]
12 | 
13 | list:
14 | 	@grep '^\.PHONY' Makefile | cut -d' ' -f2- | tr ' ' '\n'
15 | 
16 | test:
17 | 	pylama $(PACKAGE)
18 | 	tox
19 | 
20 | version:
21 | 	@echo $(VERSION)
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # DataHub metastore
  2 | 
  3 | [![Build Status](https://travis-ci.org/datahq/metastore.svg?branch=master)](https://travis-ci.org/datahq/metastore)
  4 | 
  5 | A search services for DataHub.
  6 | 
  7 | Searches Elasticsearch and returns matching documents (returned document content structure are not defined by this module)   
  8 | 
  9 | ## Quick Start
 10 | 
 11 | 
 12 | # Clone the repo and install
 13 | 
 14 | `make install`
 15 | 
 16 | # Run tests
 17 | 
 18 | `make test`
 19 | 
 20 | # Run server
 21 | 
 22 | `python server.py`
 23 | 
 24 | 
 25 | # API
 26 | 
 27 | **Elasticsearch:** version 5.x should be installed
 28 | 
 29 | 
 30 | **Endpoint:** `/metastore/search`
 31 | 
 32 | **Method:** `GET`
 33 | 
 34 | **HEADER:** `Auth-Token` (received from `/auth/check`)
 35 | 
 36 | **Query Parameters:**
 37 | 
 38 | * q - match-all query string
 39 |   Will search the following properties:
 40 |     - `title`
 41 |     - `datahub.owner`
 42 |     - `datahub.ownerid`
 43 |     - `datapackage.readme`
 44 | 
 45 | * size - number of results to return [max 100]
 46 | * from - offset to start returning results from
 47 | 
 48 | all other parameters will be treated as filters for the query (requiring exact match of value)
 49 | 
 50 | **Returns:** All packages that match the filter:
 51 | ```json
 52 | {
 53 |   "summary": {
 54 |     "total": "total-number-of-matched-documents",
 55 |     "totalBytes": "total-size-of-matched-datasets"
 56 |   },
 57 |   "results": [
 58 |     "list of matched documents"
 59 |   ]
 60 | }
 61 | ```
 62 | 
 63 | **Endpoint:** `/metastore/search/events`
 64 | 
 65 | **Method:** `GET`
 66 | 
 67 | **HEADER:** `Auth-Token` (received from `/auth/check`)
 68 | 
 69 | **Query Parameters:**
 70 | 
 71 | * q - match-all query string
 72 | * event_entity - flow|account|etc... (currently only `flow` is supported)
 73 | * event_action - create|finished|deleted|etc... (currently only `finished` is supported)
 74 | * owner - ownerid (usually hash of user's Email)
 75 | * dataset - dataset name
 76 | * status - OK|Not OK
 77 | * findability - published|unlisted|private
 78 | 
 79 | **Query Parameters for pagination and sorting:**
 80 | * sort - desc|asc (defaults to desc)
 81 | * size - number of results to return [max 100]
 82 | * from - offset to start returning results from
 83 | 
 84 | **Returns:** All packages that match the filter:
 85 | ```json
 86 | {
 87 |   "results": [
 88 |     {
 89 |       "dataset": "finance-vix",
 90 |       "event_action": "finished",
 91 |       "event_entity": "flow",
 92 |       "findability": "published",
 93 |       "messsage": "",
 94 |       "owner": "core",
 95 |       "ownerid": "core",
 96 |       "status": "OK",
 97 |       "timestamp": "2017-01-01T00:00:00.000000",
 98 |       "payload": {
 99 |         "flow-id": "core/finance-vix"
100 |       }
101 |     }
102 |   ],
103 |   "summary": {
104 |     "total": 1,
105 |     "totalBytes": 0
106 |   }
107 | }
108 | ```
109 | 


--------------------------------------------------------------------------------
/metastore/VERSION:
--------------------------------------------------------------------------------
1 | 0.0.1
2 | 


--------------------------------------------------------------------------------
/metastore/__init__.py:
--------------------------------------------------------------------------------
 1 | from flask import Flask
 2 | from flask_cors import CORS
 3 | from .blueprint import create as search
 4 | 
 5 | def create():
 6 |     """Create application.
 7 |     """
 8 | 
 9 |     # Create application
10 |     app = Flask('service', static_folder=None)
11 |     app.config['DEBUG'] = True
12 | 
13 |     # CORS support
14 |     CORS(app, supports_credentials=True)
15 |     app.register_blueprint(search(), url_prefix='/metastore/')
16 | 
17 |     # Return application
18 |     return app
19 | 


--------------------------------------------------------------------------------
/metastore/blueprint.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import jwt
 3 | from flask import Blueprint, abort, request
 4 | from flask_jsonpify import jsonpify
 5 | 
 6 | from . import controllers
 7 | 
 8 | PRIVATE_KEY = os.environ.get('PRIVATE_KEY')
 9 | 
10 | 
11 | def create():
12 |     """Create blueprint.
13 |     """
14 | 
15 |     # Create instance
16 |     blueprint = Blueprint('search', 'search')
17 | 
18 |     # Controller Proxies
19 |     search_controller = controllers.search
20 | 
21 |     def search(kind='dataset'):
22 |         token = request.headers.get('auth-token') or request.values.get('jwt')
23 |         userid = None
24 |         try:
25 |             if token is not None:
26 |                 token = jwt.decode(token, PRIVATE_KEY)
27 |                 userid = token.get('userid')
28 |         except jwt.InvalidTokenError:
29 |             pass
30 |         ret = search_controller(kind, userid, request.args)
31 |         if ret is None:
32 |             abort(400)
33 |         return jsonpify(ret)
34 | 
35 |     # Register routes
36 |     blueprint.add_url_rule(
37 |         'search', 'search', search, methods=['GET'])
38 |     blueprint.add_url_rule(
39 |         'search/<kind>', 'events', search, methods=['GET'])
40 | 
41 |     # Return blueprint
42 |     return blueprint
43 | 


--------------------------------------------------------------------------------
/metastore/controllers.py:
--------------------------------------------------------------------------------
 1 | import elasticsearch
 2 | 
 3 | from .models import query
 4 | 
 5 | 
 6 | def search(kind, userid, args={}):
 7 |     """Initiate an elasticsearch query
 8 |     """
 9 |     try:
10 |         res = query(kind, userid, **args)
11 |         return res
12 |     except elasticsearch.exceptions.ElasticsearchException as e:
13 |         return {
14 |             'total': 0,
15 |             'results': [],
16 |             'error': str(e)
17 |         }
18 | 


--------------------------------------------------------------------------------
/metastore/models.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import json
  3 | import logging
  4 | 
  5 | from elasticsearch import Elasticsearch
  6 | from elasticsearch.exceptions import NotFoundError
  7 | 
  8 | 
  9 | logging.root.setLevel(logging.INFO)
 10 | logging.getLogger('elasticsearch').setLevel(logging.DEBUG)
 11 | 
 12 | _engine = None
 13 | 
 14 | ENABLED_SEARCHES = {
 15 |     'dataset': {
 16 |         'index': 'datahub',
 17 |         'doc_type': 'dataset',
 18 |         'owner': 'datahub.ownerid',
 19 |         'findability': 'datahub.findability',
 20 |         'q_fields': [
 21 |             'title',
 22 |             'datahub.owner',
 23 |             'datahub.ownerid',
 24 |             'datapackage.readme',
 25 |         ],
 26 |     },
 27 |     'events': {
 28 |         'index': 'events',
 29 |         'doc_type': 'event',
 30 |         'owner': 'ownerid',
 31 |         'findability': 'findability',
 32 |         'timestamp': 'timestamp',
 33 |         'q_fields': []
 34 |     }
 35 | }
 36 | 
 37 | BOOSTS = {
 38 |     'title': '^5',
 39 |     'datahub.owner': '^2',
 40 |     'datahub.ownerid': '',
 41 |     'datapackage.readme': '^2',
 42 | }
 43 | 
 44 | 
 45 | def _get_engine():
 46 |     global _engine
 47 |     if _engine is None:
 48 |         es_host = os.environ['DATAHUB_ELASTICSEARCH_ADDRESS']
 49 |         _engine = Elasticsearch(hosts=[es_host], use_ssl='https' in es_host)
 50 | 
 51 |     return _engine
 52 | 
 53 | 
 54 | def build_dsl(kind_params, userid, kw, kind=None):
 55 |     dsl = {'bool': {
 56 |         'should': [],
 57 |         'must': [], 'minimum_should_match': 1}}
 58 |     # All Datasets:
 59 |     all_datasets = {
 60 |         'bool': {
 61 |             'should': [{'match': {kind_params['findability']: 'published'}}],
 62 |             'minimum_should_match': 1
 63 |         }
 64 |     }
 65 |     boost_core = {
 66 |         'bool': {
 67 |             'should': [{ "match": { "datahub.ownerid": {"query": "core", "boost": 4.5}}}],
 68 |             'must': [{'match': {kind_params['findability']: 'published'}}],
 69 |             'minimum_should_match': 1
 70 |         }
 71 |     }
 72 |     dsl['bool']['should'].append(all_datasets)
 73 |     dsl['bool']['should'].append(boost_core)
 74 | 
 75 |     # User datasets
 76 |     if userid is not None:
 77 |         user_datasets = \
 78 |             {'bool': {'must': {'match': {kind_params['owner']: userid}}}}
 79 |         dsl['bool']['should'].append(user_datasets)
 80 | 
 81 |     # Allow sorting event results
 82 |     sort_by = kw.pop('sort', ['desc'])[0].replace('"', '')
 83 |     sort = []
 84 |     if kind_params.get('timestamp'):
 85 |         sort.append({'timestamp': {'order' : sort_by}})
 86 | 
 87 |     # Query parameters (for not to mess with other parameters we should pop)
 88 |     q = kw.pop('q', None)
 89 |     if q is not None:
 90 |         dsl['bool']['must'].append({
 91 |                 'multi_match': {
 92 |                     'query': json.loads(q[0]),
 93 |                     'fields': [f+(BOOSTS.get(f, '')) for f in kind_params['q_fields']],
 94 |                     'type': 'most_fields'
 95 |                 }
 96 |             })
 97 |     match_or_term = 'term' if kind == 'events' else 'match'
 98 |     for k, v_arr in kw.items():
 99 |         dsl['bool']['must'].append({
100 |                 'bool': {
101 |                     'should': [{match_or_term: {k: json.loads(v)}}
102 |                                for v in v_arr],
103 |                     'minimum_should_match': 1
104 |                 }
105 |            })
106 | 
107 |     if len(dsl['bool']['must']) == 0:
108 |         del dsl['bool']['must']
109 |     if len(dsl['bool']) == 0:
110 |         del dsl['bool']
111 |     if len(dsl) == 0:
112 |         dsl = {}
113 |     else:
114 |         dsl = {'query': dsl, 'explain': True, 'sort': sort}
115 | 
116 |     aggs = { 'total_bytes': { 'sum': { 'field': 'datahub.stats.bytes' } } }
117 |     dsl['aggs'] = aggs
118 | 
119 |     return dsl
120 | 
121 | 
122 | def query(kind, userid, size=50, **kw):
123 |     kind_params = ENABLED_SEARCHES.get(kind)
124 |     try:
125 |         # Arguments received from a network request come in kw, as a mapping
126 |         # between param_name and a list of received values.
127 |         # If size was provided by the user, it will be a list, so we take its
128 |         # first item.
129 |         if type(size) is list:
130 |             size = size[0]
131 |             if int(size) > 100:
132 |                 size = 100
133 | 
134 |         from_ = int(kw.pop('from', [0])[0])
135 | 
136 |         api_params = dict([
137 |             ('index', kind_params['index']),
138 |             ('doc_type', kind_params['doc_type']),
139 |             ('size', size),
140 |             ('from_', from_),
141 |             ('search_type', 'dfs_query_then_fetch')
142 |         ])
143 | 
144 |         body = build_dsl(kind_params, userid, kw, kind=kind)
145 |         api_params['body'] = json.dumps(body)
146 |         ret = _get_engine().search(**api_params)
147 |         logging.info('Performing query %r', kind_params)
148 |         logging.info('api_params %r', api_params)
149 |         logging.info('ret %r', ret)
150 |         if ret.get('hits') is not None:
151 |             results = [hit['_source'] for hit in ret['hits']['hits']]
152 |             total = ret['hits']['total']
153 |             total_bytes = ret.get('aggregations')['total_bytes']['value']
154 |         else:
155 |             results = []
156 |             total = 0
157 |             total_bytes = 0
158 |         return {
159 |             'results': results,
160 |             'summary': {
161 |                 "total": total,
162 |                 "totalBytes": total_bytes
163 |             }
164 |         }
165 |     except (NotFoundError, json.decoder.JSONDecodeError, ValueError) as e:
166 |         logging.error("query: %r" % e)
167 |         return {
168 |             'results': [],
169 |             'summary': {
170 |                 "total": 0,
171 |                 "totalBytes": 0
172 |             },
173 |             'error': str(e)
174 |         }
175 | 


--------------------------------------------------------------------------------
/pylama.ini:
--------------------------------------------------------------------------------
 1 | [pylama]
 2 | linters = pyflakes,mccabe
 3 | ignore = W0611
 4 | 
 5 | [pylama:*/__init__.py]
 6 | ignore = W0611
 7 | 
 8 | [pylama:pycodestyle]
 9 | max_line_length = 120
10 | 
11 | 


--------------------------------------------------------------------------------
/requirements.dev.txt:
--------------------------------------------------------------------------------
1 | -r requirements.txt
2 | pytest
3 | pytest-cov
4 | pylama
5 | coverage
6 | coveralls
7 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | flask
2 | flask-cors
3 | flask-jsonpify
4 | pyyaml
5 | requests
6 | pyjwt
7 | cryptography
8 | elasticsearch>=5.0.0,<6.0.0
9 | 


--------------------------------------------------------------------------------
/server.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | import metastore
 4 | 
 5 | # Create application
 6 | app = metastore.create()
 7 | 
 8 | # Port to listen
 9 | port = os.environ.get('PORT') or 5000
10 | 
11 | # Debug mode flag
12 | debug = True
13 | 
14 | # Run application
15 | if __name__ == '__main__':
16 |     app.run(host='0.0.0.0', port=port, debug=debug)
17 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import os
 4 | import io
 5 | from setuptools import setup, find_packages
 6 | 
 7 | 
 8 | # Helpers
 9 | def read(*paths):
10 |     """Read a text file."""
11 |     basedir = os.path.dirname(__file__)
12 |     fullpath = os.path.join(basedir, *paths)
13 |     contents = io.open(fullpath, encoding='utf-8').read().strip()
14 |     return contents
15 | 
16 | 
17 | # Prepare
18 | PACKAGE = 'metastore'
19 | NAME = 'metastore'
20 | INSTALL_REQUIRES = [
21 |     'flask',
22 |     'flask-cors',
23 |     'flask-jsonpify',
24 |     'pyyaml',
25 |     'requests',
26 |     'pyjwt',
27 |     'cryptography',
28 |     'elasticsearch>=5.0.0,<6.0.0'
29 | ]
30 | TESTS_REQUIRE = [
31 |     'pytest',
32 |     'pytest-cov',
33 |     'pylama',
34 |     'coverage',
35 |     'coveralls',
36 |     'tox'
37 | ]
38 | README = read('README.md')
39 | VERSION = read(PACKAGE, 'VERSION')
40 | PACKAGES = find_packages(exclude=['examples', 'tests'])
41 | 
42 | 
43 | # Run
44 | setup(
45 |     name=NAME,
46 |     version=VERSION,
47 |     packages=PACKAGES,
48 |     include_package_data=True,
49 |     install_requires=INSTALL_REQUIRES,
50 |     tests_require=TESTS_REQUIRE,
51 |     extras_require={'develop': TESTS_REQUIRE},
52 |     zip_safe=False,
53 |     long_description=README,
54 |     description='{{ DESCRIPTION }}',
55 |     author='Open Knowledge (International), Datopian and DataHQ',
56 |     url='https://github.com/datahq/bitstore',
57 |     license='MIT',
58 |     keywords=[
59 |         'data',
60 |         'analytics'
61 |     ],
62 |     classifiers=[
63 |         'Development Status :: 4 - Beta',
64 |         'Environment :: Web Environment',
65 |         'Intended Audience :: Developers',
66 |         'License :: OSI Approved :: MIT License',
67 |         'Operating System :: OS Independent',
68 |         'Programming Language :: Python :: 3.6',
69 |         'Topic :: Internet :: WWW/HTTP :: Dynamic Content',
70 |         'Topic :: Software Development :: Libraries :: Python Modules',
71 |     ],
72 | )
73 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datopian/metastore/79916f80d68027f222f1bc55eaff33a28a3e9f4d/tests/__init__.py


--------------------------------------------------------------------------------
/tests/test_blueprint.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | try:
 3 |     from unittest.mock import Mock, patch
 4 | except ImportError:
 5 |     from mock import Mock, patch
 6 | from importlib import import_module
 7 | module = import_module('metastore.blueprint')
 8 | 
 9 | 
10 | class createTest(unittest.TestCase):
11 | 
12 |     # Actions
13 | 
14 |     def setUp(self):
15 |         self.addCleanup(patch.stopall)
16 |         self.controllers = patch.object(module, 'controllers').start()
17 | 
18 |     # Tests
19 | 
20 |     def test(self):
21 |         self.assertTrue(module.create())
22 | 


--------------------------------------------------------------------------------
/tests/test_controllers.py:
--------------------------------------------------------------------------------
  1 | import datetime
  2 | import unittest
  3 | from importlib import import_module
  4 | from elasticsearch import Elasticsearch, NotFoundError
  5 | 
  6 | LOCAL_ELASTICSEARCH = 'localhost:9200'
  7 | 
  8 | module = import_module('metastore.controllers')
  9 | 
 10 | class SearchTest(unittest.TestCase):
 11 | 
 12 |     # Actions
 13 |     DATAHUB_MAPPING = {
 14 |         'id': {"type": "string", "analyzer": "keyword"},
 15 |         'name': {"type": "string", "analyzer": "keyword"},
 16 |         'title': {"type": "string", "analyzer": "english"},
 17 |         'description': {"type": "string", "analyzer": "english"},
 18 |         'datahub': {
 19 |             'type': 'object',
 20 |             'properties': {
 21 |                 'owner': {
 22 |                     "type": "string",
 23 |                     "index": "not_analyzed"
 24 |                 },
 25 |                 "ownerid": {
 26 |                     "type": "string",
 27 |                     "index": "not_analyzed"
 28 |                 },
 29 |                 "findability": {
 30 |                     "type": "string",
 31 |                     "index": "not_analyzed"
 32 |                 },
 33 |                 "flowid": {
 34 |                     "type": "string",
 35 |                     "index": "not_analyzed"
 36 |                 },
 37 |                 "stats": {
 38 |                     "type": "object",
 39 |                     "properties": {
 40 |                         "rowcount": {
 41 |                             "type": "integer",
 42 |                             "index": "not_analyzed"
 43 |                         },
 44 |                         "bytes": {
 45 |                             "type": "integer",
 46 |                             "index": "not_analyzed"
 47 |                         }
 48 |                     }
 49 |                 }
 50 |             }
 51 |         },
 52 |         'datapackage': {
 53 |             'type': 'object',
 54 |             'properties': {
 55 |                 'readme': {
 56 |                     "type": "string",
 57 |                     "analyzer": "english",
 58 |                 }
 59 |             }
 60 |         }
 61 |     }
 62 | 
 63 |     EVENTS_MAPPING = {
 64 |         'timestamp': {'type': 'date'},
 65 |         'dataset': {"type": "string", "analyzer": "keyword"},
 66 |         'owner': {"type": "string", "analyzer": "keyword"},
 67 |         'ownerid': {"type": "string", "analyzer": "keyword"}
 68 |     }
 69 | 
 70 |     words = [
 71 |         'headphones', 'ideal', 'naive', 'city', 'flirtation',
 72 |         'annihilate', 'crypt', 'ditch', 'glacier', 'megacity'
 73 |     ]
 74 | 
 75 |     def setUp(self):
 76 | 
 77 |         # Clean index
 78 |         self.es = Elasticsearch(hosts=[LOCAL_ELASTICSEARCH])
 79 |         try:
 80 |             self.es.indices.delete(index='datahub')
 81 |             self.es.indices.delete(index='events')
 82 |         except NotFoundError:
 83 |             pass
 84 |         self.es.indices.create('datahub')
 85 |         mapping = {'dataset': {'properties': self.DATAHUB_MAPPING}}
 86 |         self.es.indices.put_mapping(doc_type='dataset',
 87 |                                     index='datahub',
 88 |                                     body=mapping)
 89 | 
 90 |         self.es.indices.create('events')
 91 |         mapping = {'event': {'properties': self.EVENTS_MAPPING}}
 92 |         self.es.indices.put_mapping(doc_type='event',
 93 |                                     index='events',
 94 |                                     body=mapping)
 95 | 
 96 |     def search(self, kind, *args, **kwargs):
 97 |         ret = module.search(kind, *args, **kwargs)
 98 |         self.assertLessEqual(len(ret['results']), ret['summary']['total'])
 99 |         return ret['results'], ret['summary']
100 | 
101 |     def indexSomeEventRecords(self, amount):
102 |         for i in range(amount):
103 |             body = dict(
104 |                 timestamp=datetime.datetime(2000+i, 1, 1, 0, 0, 0),
105 |                 event_entity='flow' if i % 3 else 'login',
106 |                 event_action='finished' if i % 4 else 'deleted',
107 |                 owner='datahub',
108 |                 ownerid='datahubid',
109 |                 dataset='dataset' + str(i),
110 |                 status='OK',
111 |                 messsage='',
112 |                 findability='published' if i % 2 else 'unlisted',
113 |                 payload={'flow-id': 'datahub/dataset'}
114 |             )
115 |             self.es.index('events', 'event', body)
116 |         self.es.indices.flush('events')
117 | 
118 |     def indexEventRecordsWithDatasets(self, datasets):
119 |         for dataset in datasets:
120 |             body = dict(
121 |                 timestamp=datetime.datetime(2000, 1, 1, 0, 0, 0),
122 |                 event_entity='flow',
123 |                 event_action='finished',
124 |                 owner='datahub',
125 |                 ownerid='datahubid',
126 |                 dataset=dataset,
127 |                 status='OK',
128 |                 messsage='',
129 |                 findability='published',
130 |                 payload={'flow-id': 'datahub/%s' % dataset}
131 |             )
132 |             self.es.index('events', 'event', body)
133 |         self.es.indices.flush('events')
134 | 
135 |     def indexSomeRecords(self, amount):
136 |         self.es.indices.delete(index='datahub')
137 |         for i in range(amount):
138 |             body = {
139 |                 'name': True,
140 |                 'title': i,
141 |                 'license': 'str%s' % i,
142 |                 'datahub': {
143 |                     'name': 'innername',
144 |                     'findability': 'published',
145 |                     'stats': {
146 |                         'bytes': 10
147 |                     }
148 |                 }
149 |             }
150 |             self.es.index('datahub', 'dataset', body)
151 |         self.es.indices.flush('datahub')
152 | 
153 |     def indexSomeRecordsToTestMapping(self):
154 | 
155 |         for i in range(3):
156 |             body = {
157 |                 'name': 'package-id-%d' % i,
158 |                 'title': 'This dataset is number test %s' % self.words[i],
159 |                 'datahub': {
160 |                     'owner': 'BlaBla%d@test2.com' % i,
161 |                     'findability': 'published',
162 |                     'stats': {
163 |                         'bytes': 10
164 |                     }
165 |                 },
166 |             }
167 |             self.es.index('datahub', 'dataset', body)
168 |         self.es.indices.flush('datahub')
169 | 
170 |     def indexSomeRealLookingRecords(self, amount):
171 |         for i in range(amount):
172 |             body = {
173 |                 'name': 'package-id-%d' % i,
174 |                 'title': 'This dataset is number %s' % self.words[i%10],
175 |                 'datahub': {
176 |                     'owner': 'The one and only owner number %s' % (self.words[(i+1)%10]),
177 |                     'findability': 'published',
178 |                     'stats': {
179 |                         'bytes': 10
180 |                     }
181 |                 },
182 |                 'loaded': True
183 |             }
184 |             self.es.index('datahub', 'dataset', body)
185 |         self.es.indices.flush('datahub')
186 | 
187 |     def indexSomePrivateRecords(self):
188 |         i = 0
189 |         for owner in ['owner1', 'owner2']:
190 |             for private in ['published', 'else']:
191 |                 for content in ['cat', 'dog']:
192 |                     body = {
193 |                         'name': '%s-%s-%s' % (owner, private, content),
194 |                         'title': 'This dataset is number%d, content is %s' % (i, content),
195 |                         'datahub': {
196 |                             'owner': 'The one and only owner number%d' % (i+1),
197 |                             'ownerid': owner,
198 |                             'findability': private,
199 |                             'stats': {
200 |                                 'bytes': 10
201 |                             }
202 |                         }
203 |                     }
204 |                     i += 1
205 |                     self.es.index('datahub', 'dataset', body)
206 |         self.es.indices.flush('datahub')
207 | 
208 |     def indexSomePrivateRecordsWithReadme(self):
209 |         i = 0
210 |         for owner in ['owner1', 'owner2']:
211 |             for private in ['published', 'else']:
212 |                 for content in ['cat', 'dog']:
213 |                     body = {
214 |                         'name': '%s-%s-%s' % (owner, private, content),
215 |                         'title': 'This dataset is number%d, content is %s' % (i, content),
216 |                         'datahub': {
217 |                             'owner': 'The one and only owner number%d' % (i + 1),
218 |                             'ownerid': owner,
219 |                             'findability': private,
220 |                             'stats': {
221 |                                 'bytes': 10
222 |                             }
223 |                         },
224 |                         'datapackage': {
225 |                             'readme':'some readme text '+str(i)+' which should be searched through '
226 |                         }
227 |                     }
228 |                     i += 1
229 |                     self.es.index('datahub', 'dataset', body)
230 |         self.es.indices.flush('datahub')
231 | 
232 |     def indexMultipleUserRecords(self):
233 |         for owner in ['core', 'anonymous', 'friend', 'other']:
234 |             for findability in ['published', 'unlisted', 'private']:
235 | 
236 |                 body = {
237 |                     'name': '%s-dataset' % owner,
238 |                     'title': 'This dataset is owned by %s' % owner,
239 |                     'datahub': {
240 |                         'owner': 'Example',
241 |                         'ownerid': owner,
242 |                         'findability': findability,
243 |                         'stats': {
244 |                             'bytes': 10
245 |                         }
246 |                     },
247 |                     'datapackage': {
248 |                         'readme':'some readme text which should be searched through '
249 |                     }
250 |                 }
251 |                 self.es.index('datahub', 'dataset', body)
252 |         self.es.indices.flush('datahub')
253 | 
254 |     def indexWithStopWords(self):
255 |         for ind, title in enumerate(['the Mauna Loa', 'Mauna Loa', 'The United States']):
256 |             body = {
257 |                 'name': '%s-dataset' % id,
258 |                 'title': title,
259 |                 'datahub': {
260 |                     'owner': 'Example',
261 |                     'ownerid': '%s-owner',
262 |                     'findability': 'published',
263 |                     'stats': {
264 |                         'bytes': 10
265 |                     }
266 |                 },
267 |                 'datapackage': {
268 |                     'readme':'some readme text which should be searched through '
269 |                 }
270 |             }
271 |             self.es.index('datahub', 'dataset', body)
272 |         self.es.indices.flush('datahub')
273 | 
274 |     def indexWithCustomText(self, data=[]):
275 |         for ind, entry in enumerate(data):
276 |             body = {
277 |                 'name': entry.get('name', '%s-dataset' % ind),
278 |                 'title': entry.get('title', '%s-title' % ind),
279 |                 'datahub': {
280 |                     'owner': entry.get('owner', '%s-owner' % ind),
281 |                     'ownerid': entry.get('ownerid', '%s-ownerid' % ind),
282 |                     'findability': entry.get('findability', 'published'),
283 |                     'stats': {
284 |                         'bytes': 10
285 |                     }
286 |                 },
287 |                 'datapackage': {
288 |                     'readme': entry.get('readme', '%s-readme' % ind)
289 |                 }
290 |             }
291 |             self.es.index('datahub', 'dataset', body)
292 |         self.es.indices.flush('datahub')
293 | 
294 |     # Tests Datahub
295 |     def test___search___all_values_and_empty(self):
296 |         self.assertEquals(self.search('dataset', None), ([], {'total': 0, 'totalBytes': 0.0}))
297 | 
298 |     def test___search___all_values_and_one_result(self):
299 |         self.indexSomeRecords(1)
300 |         res, summary = self.search('dataset', None)
301 |         self.assertEquals(len(res), 1)
302 |         self.assertEquals(summary['total'], 1)
303 |         self.assertEquals(summary['totalBytes'], 10)
304 | 
305 |     def test___search___all_values_and_two_results(self):
306 |         self.indexSomeRecords(2)
307 |         res, summary = self.search('dataset', None)
308 |         self.assertEquals(len(res), 2)
309 |         self.assertEquals(summary['total'], 2)
310 |         self.assertEquals(summary['totalBytes'], 20)
311 | 
312 |     def test___search___filter_simple_property(self):
313 |         self.indexSomeRecords(10)
314 |         res, summary = self.search('dataset', None, {'license': ['"str7"']})
315 |         self.assertEquals(len(res), 1)
316 |         self.assertEquals(summary['total'], 1)
317 |         self.assertEquals(summary['totalBytes'], 10)
318 | 
319 |     def test___search___filter_numeric_property(self):
320 |         self.indexSomeRecords(10)
321 |         res, summary = self.search('dataset', None, {'title': ["7"]})
322 |         self.assertEquals(len(res), 1)
323 |         self.assertEquals(summary['total'], 1)
324 |         self.assertEquals(summary['totalBytes'], 10)
325 | 
326 |     def test___search___filter_boolean_property(self):
327 |         self.indexSomeRecords(10)
328 |         res, summary = self.search('dataset', None, {'name': ["true"]})
329 |         self.assertEquals(len(res), 10)
330 |         self.assertEquals(summary['total'], 10)
331 |         self.assertEquals(summary['totalBytes'], 100)
332 | 
333 |     def test___search___filter_multiple_properties(self):
334 |         self.indexSomeRecords(10)
335 |         res, summary = self.search('dataset', None, {'license': ['"str6"'], 'title': ["6"]})
336 |         self.assertEquals(len(res), 1)
337 |         self.assertEquals(summary['total'], 1)
338 |         self.assertEquals(summary['totalBytes'], 10)
339 | 
340 |     def test___search___filter_multiple_values_for_property(self):
341 |         self.indexSomeRecords(10)
342 |         res, summary = self.search('dataset', None, {'license': ['"str6"','"str7"']})
343 |         self.assertEquals(len(res), 2)
344 |         self.assertEquals(summary['total'], 2)
345 |         self.assertEquals(summary['totalBytes'], 20)
346 | 
347 |     def test___search___filter_inner_property(self):
348 |         self.indexSomeRecords(7)
349 |         res, summary = self.search('dataset', None, {"datahub.name": ['"innername"']})
350 |         self.assertEquals(len(res), 7)
351 |         self.assertEquals(summary['total'], 7)
352 |         self.assertEquals(summary['totalBytes'], 70)
353 | 
354 |     def test___search___filter_no_results(self):
355 |         res, summary = self.search('dataset', None, {'license': ['"str6"'], 'title': ["7"]})
356 |         self.assertEquals(len(res), 0)
357 |         self.assertEquals(summary['total'], 0)
358 |         self.assertEquals(summary['totalBytes'], 0)
359 | 
360 |     def test___search___filter_bad_value(self):
361 |         ret = module.search('dataset', None, {'license': ['str6'], 'title': ["6"]})
362 |         self.assertEquals(ret['results'], [])
363 |         self.assertEquals(ret['summary']['total'], 0)
364 |         self.assertEquals(ret['summary']['totalBytes'], 0)
365 |         self.assertIsNotNone(ret['error'])
366 | 
367 |     def test___search___filter_nonexistent_property(self):
368 |         ret = module.search('dataset', None, {'license': ['str6'], 'boxing': ["6"]})
369 |         self.assertEquals(ret['results'], [])
370 |         self.assertEquals(ret['summary']['total'], 0)
371 |         self.assertEquals(ret['summary']['totalBytes'], 0)
372 |         self.assertIsNotNone(ret['error'])
373 | 
374 |     def test___search___returns_limited_size(self):
375 |         self.indexSomeRecords(10)
376 |         res, summary = self.search('dataset', None, {'size':['4']})
377 |         self.assertEquals(len(res), 4)
378 |         self.assertEquals(summary['total'], 10)
379 |         self.assertEquals(summary['totalBytes'], 100)
380 | 
381 |     def test___search___not_allows_more_than_50(self):
382 |         self.indexSomeRecords(105)
383 |         res, summary = self.search('dataset', None, {'size':['105']})
384 |         self.assertEquals(len(res), 100)
385 |         self.assertEquals(summary['total'], 105)
386 |         self.assertEquals(summary['totalBytes'], 1050)
387 | 
388 |     def test___search___returns_results_from_given_index(self):
389 |         self.indexSomeRecords(5)
390 |         res, summary = self.search('dataset', None, {'from':['3']})
391 |         self.assertEquals(len(res), 2)
392 |         self.assertEquals(summary['total'], 5)
393 |         self.assertEquals(summary['totalBytes'], 50)
394 | 
395 |     def test___search___q_param_no_recs_no_results(self):
396 |         self.indexSomeRealLookingRecords(0)
397 |         res, summary = self.search('dataset', None, {'q': ['"owner"']})
398 |         self.assertEquals(len(res), 0)
399 |         self.assertEquals(summary['total'], 0)
400 |         self.assertEquals(summary['totalBytes'], 0)
401 | 
402 |     def test___search___q_param_some_recs_no_results(self):
403 |         self.indexSomeRealLookingRecords(2)
404 |         res, summary = self.search('dataset', None, {'q': ['"writer"']})
405 |         self.assertEquals(len(res), 0)
406 |         self.assertEquals(summary['total'], 0)
407 |         self.assertEquals(summary['totalBytes'], 0)
408 | 
409 |     def test___search___q_param_some_recs_some_results(self):
410 |         self.indexSomeRealLookingRecords(2)
411 |         res, summary = self.search('dataset', None, {'q': ['"ideal"']})
412 |         self.assertEquals(len(res), 1)
413 |         self.assertEquals(summary['total'], 1)
414 |         self.assertEquals(summary['totalBytes'], 10)
415 | 
416 |     def test___search___empty_anonymous_search(self):
417 |         self.indexSomePrivateRecords()
418 |         recs, _ = self.search('dataset', None)
419 |         self.assertEquals(len(recs), 4)
420 |         ids = set([r['name'] for r in recs])
421 |         self.assertSetEqual(ids, {'owner1-published-cat',
422 |                                   'owner2-published-cat',
423 |                                   'owner1-published-dog',
424 |                                   'owner2-published-dog',
425 |                                   })
426 | 
427 |     def test___search___empty_authenticated_search(self):
428 |         self.indexSomePrivateRecords()
429 |         recs, _ = self.search('dataset', 'owner1')
430 |         ids = set([r['name'] for r in recs])
431 |         self.assertSetEqual(ids, {'owner1-published-cat',
432 |                                   'owner1-else-cat',
433 |                                   'owner2-published-cat',
434 |                                   'owner1-published-dog',
435 |                                   'owner1-else-dog',
436 |                                   'owner2-published-dog',
437 |                                   })
438 |         self.assertEquals(len(recs), 6)
439 | 
440 |     def test___search___q_param_anonymous_search(self):
441 |         self.indexSomePrivateRecords()
442 |         recs, _ = self.search('dataset', None, {'q': ['"cat"']})
443 |         self.assertEquals(len(recs), 2)
444 |         ids = set([r['name'] for r in recs])
445 |         self.assertSetEqual(ids, {'owner1-published-cat',
446 |                                   'owner2-published-cat',
447 |                                   })
448 | 
449 |     def test___search___q_param_anonymous_search_with_param(self):
450 |         self.indexSomePrivateRecords()
451 |         recs, _ = self.search('dataset', None, {'q': ['"cat"'], 'datahub.ownerid': ['"owner1"']})
452 |         self.assertEquals(len(recs), 1)
453 |         ids = set([r['name'] for r in recs])
454 |         self.assertSetEqual(ids, {'owner1-published-cat'})
455 | 
456 |     def test___search___q_param_authenticated_search(self):
457 |         self.indexSomePrivateRecords()
458 |         recs, _ = self.search('dataset', 'owner1', {'q': ['"cat"']})
459 |         ids = set([r['name'] for r in recs])
460 |         self.assertSetEqual(ids, {'owner1-published-cat',
461 |                                   'owner1-else-cat',
462 |                                   'owner2-published-cat',
463 |                                   })
464 |         self.assertEquals(len(recs), 3)
465 | 
466 |     def test___search___q_param_with_similar_param(self):
467 |         self.indexSomeRecordsToTestMapping()
468 |         recs, _ = self.search('dataset', None, {'q': ['"naive"']})
469 |         ids = set([r['name'] for r in recs])
470 |         self.assertSetEqual(ids, {'package-id-2'})
471 |         self.assertEquals(len(recs), 1)
472 | 
473 |         recs, _ = self.search('dataset', None, {'q': ['"dataset"'], 'datahub.owner': ['"BlaBla2@test2.com"']})
474 |         ids = set([r['name'] for r in recs])
475 |         self.assertSetEqual(ids, {'package-id-2'})
476 |         self.assertEquals(len(recs), 1)
477 | 
478 |         recs, _ = self.search('dataset', None, {'datahub.owner': ['"BlaBla2@test2.com"']})
479 |         ids = set([r['name'] for r in recs])
480 |         self.assertSetEqual(ids, {'package-id-2'})
481 |         self.assertEquals(len(recs), 1)
482 | 
483 |     def test_search__q_param_in_readme(self):
484 |         body = {
485 |             'name': True,
486 |             'title': 'testing',
487 |             'license': 'str',
488 |             'datahub': {
489 |                 'name': 'innername',
490 |                 'findability': 'published',
491 |                 'stats': {
492 |                     'bytes': 10
493 |                 }
494 |             },
495 |             'datapackage': {
496 |                 'readme': 'text only in README',
497 |                 'not_readme': 'NOTREADME'
498 |             },
499 |         }
500 |         self.es.index('datahub', 'dataset', body)
501 |         self.es.indices.flush('datahub')
502 |         recs, _ = self.search('dataset', None, {'q': ['"README"']})
503 |         self.assertEquals(len(recs), 1)
504 |         ## Make sure not queries unlisted fields
505 |         recs, _ = self.search('dataset', None, {'q': ['"NOTREADME"']})
506 |         self.assertEquals(len(recs), 0)
507 | 
508 |     def test__search__q_param_in_readme_with_more_records(self):
509 |         self.indexSomePrivateRecordsWithReadme()
510 |         recs, _ = self.search('dataset', None, {'q': ['"readme"']})
511 |         self.assertEquals(len(recs), 4)
512 |         ## Make sure not queries unlisted fields
513 |         recs, _ = self.search('dataset', None, {'q': ['"NOTREADME"']})
514 |         self.assertEquals(len(recs), 0)
515 | 
516 |     def test__search__q_core_gets_prefered(self):
517 |         self.indexMultipleUserRecords()
518 |         recs, _ = self.search('dataset', None, {'q': ['"readme"']})
519 |         self.assertEquals(len(recs), 4)
520 |         self.assertEquals(recs[0]['name'], 'core-dataset')
521 | 
522 |     def test__search__q_ignore_stop_words(self):
523 |         self.indexWithStopWords()
524 |         recs, _ = self.search('dataset', None, {'q': ['"the Mauna Loa"']})
525 |         self.assertEquals(len(recs), 2)
526 | 
527 |     def test__search__q_consider_exact_match(self):
528 |         data = [
529 |             {
530 |                 'title': 'List of all countries with their 2 digit codes (ISO 3166-1)',
531 |                 'owner': 'core',
532 |                 'ownerid': 'core',
533 |                 'readme': 'country country_codes country list country country_codes.html  list lists list list'
534 |             },
535 |             {
536 |                 'title': 'Nasdaq Listings',
537 |                 'owner': 'core',
538 |                 'ownerid': 'core',
539 |                 'readme': 'list list list list'
540 |             },
541 |             {
542 |                 'title': 'Country and Continent Codes List',
543 |                 'owner': 'not-core',
544 |                 'ownerid': 'not-core',
545 |                 'readme': 'country list list'
546 |             },
547 |         ]
548 |         self.indexWithCustomText(data)
549 |         recs, _ = self.search('dataset' , None, {'q': ['"list of countries"']})
550 |         self.assertEquals(len(recs), 3)
551 |         self.assertEquals(recs[0]['title'], 'List of all countries with their 2 digit codes (ISO 3166-1)')
552 |         self.assertEquals(recs[1]['title'], 'Country and Continent Codes List')
553 | 
554 | 
555 |     # Tests Events
556 |     def test___search___all_events_are_empty(self):
557 |         self.assertEquals(self.search('events', None), ([], {'total': 0, 'totalBytes': 0.0}))
558 | 
559 |     def test___search___all_event_are_there_but_unlisted(self):
560 |         self.indexSomeEventRecords(10)
561 |         res, _ = self.search('events', None)
562 |         self.assertEquals(len(res), 5)
563 | 
564 |     def test___search___all_event_are_there_with_id_including_unlisted(self):
565 |         self.indexSomeEventRecords(10)
566 |         res, _ = self.search('events', 'datahubid')
567 |         self.assertEquals(len(res), 10)
568 | 
569 |     def test___search___all_event_filter_with_findability(self):
570 |         self.indexSomeEventRecords(10)
571 |         res, _ = self.search('events', 'datahubid', {'findability': ['"unlisted"']})
572 |         self.assertEquals(len(res), 5)
573 | 
574 |     def test___search___all_event_filter_with_action(self):
575 |         self.indexSomeEventRecords(10)
576 |         res, _ = self.search('events', 'datahubid', {'event_action': ['"finished"']})
577 |         self.assertEquals(len(res), 7)
578 | 
579 |     def test___search___all_event_filter_with_entity(self):
580 |         self.indexSomeEventRecords(10)
581 |         res, _ = self.search('events', 'datahubid', {'event_entity': ['"flow"']})
582 |         self.assertEquals(len(res), 6)
583 | 
584 |     def test___search___all_event_filter_with_entity_and_action(self):
585 |         self.indexSomeEventRecords(10)
586 |         res, _ = self.search('events', 'datahubid', {
587 |             'event_entity': ['"flow"'],
588 |             'event_action': ['"finished"']
589 |         })
590 |         self.assertEquals(len(res), 4)
591 | 
592 |     def test___search___all_event_sorts_with_timestamp(self):
593 |         self.indexSomeEventRecords(10)
594 |         res, _ = self.search('events', 'datahubid')
595 |         self.assertEquals(res[0]['timestamp'], '2009-01-01T00:00:00')
596 |         self.assertEquals(res[9]['timestamp'], '2000-01-01T00:00:00')
597 |         res, _ = self.search('events', 'datahubid', {'sort': ['"asc"']})
598 |         self.assertEquals(res[0]['timestamp'], '2000-01-01T00:00:00')
599 |         self.assertEquals(res[9]['timestamp'], '2009-01-01T00:00:00')
600 | 
601 |     def test___search___events_match_only_exact_keywords(self):
602 |         datasets = ['co2-fossil-by-nation', 'co2-fossil-global', 'co2-ppm']
603 |         self.indexEventRecordsWithDatasets(datasets)
604 |         res, _ = self.search('events', 'datahubid', {
605 |             'dataset': ['"co2-ppm"']
606 |         })
607 | 
608 |         self.assertEquals(len(res), 1)
609 |         self.assertEquals(res[0]['dataset'], 'co2-ppm')
610 | 


--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
 1 | [tox]
 2 | package=metastore
 3 | skip_missing_interpreters=true
 4 | envlist=
 5 |   py36
 6 | 
 7 | [testenv]
 8 | deps=
 9 |   -rrequirements.dev.txt
10 | passenv=
11 |   CI
12 |   TRAVIS
13 |   TRAVIS_JOB_ID
14 |   TRAVIS_BRANCH
15 | commands=
16 |   py.test \
17 |     --cov {[tox]package} \
18 |     --cov-config tox.ini \
19 |     --cov-report term-missing \
20 |     {posargs}
21 | setenv =
22 |   DATAHUB_ELASTICSEARCH_ADDRESS=http://localhost:9200
23 | 


--------------------------------------------------------------------------------