├── .gitignore ├── .travis.yml ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── Procfile ├── README.md ├── app.json ├── databaseconfig.py ├── flask_server ├── __init__.py ├── app.py ├── rest │ ├── __init__.py │ └── document.py └── settings.py ├── heroku.yml ├── keep_current_storage ├── __init__.py ├── domain │ ├── __init__.py │ └── document.py ├── repository │ ├── __init__.py │ ├── memrepo.py │ └── mongo_db_repo.py ├── serializers │ ├── __init__.py │ └── document_serializer.py ├── shared │ ├── __init__.py │ ├── domain_model.py │ ├── request_object.py │ ├── response_object.py │ └── use_case.py └── use_cases │ ├── __init__.py │ ├── document_use_cases.py │ └── request_objects.py ├── manage.py ├── pytest.ini ├── requirements.txt ├── requirements ├── dev.txt ├── prod.txt └── test.txt ├── setup.cfg └── tests ├── __init__.py ├── conftest.py ├── domain ├── __init__.py └── test_document.py ├── repository ├── __init__.py └── test_memrepo.py ├── rest ├── __init__.py ├── test_get_documents_list.py └── test_insert_document_.py ├── serializers ├── __init__.py └── test_document_serializer.py ├── shared ├── __init__.py ├── test_response_object.py └── test_use_case.py └── use_cases ├── __init__.py ├── test_document_insert_use_case.py ├── test_document_list_request_objects.py └── test_document_list_use_case.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | #VSCode 10 | launch.json 11 | settings.json 12 | 13 | # Distribution / packaging 14 | .Python 15 | env/ 16 | build/ 17 | develop-eggs/ 18 | dist/ 19 | downloads/ 20 | eggs/ 21 | .eggs/ 22 | lib/ 23 | lib64/ 24 | parts/ 25 | sdist/ 26 | var/ 27 | wheels/ 28 | *.egg-info/ 29 | .installed.cfg 30 | *.egg 31 | 32 | # Python config files 33 | *config.py 34 | 35 | # PyInstaller 36 | # Usually these files are written by a python script from a template 37 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 38 | *.manifest 39 | *.spec 40 | 41 | # Installer logs 42 | pip-log.txt 43 | pip-delete-this-directory.txt 44 | 45 | # Unit test / coverage reports 46 | htmlcov/ 47 | .tox/ 48 | .coverage 49 | .coverage.* 50 | .cache 51 | nosetests.xml 52 | coverage.xml 53 | *.cover 54 | .hypothesis/ 55 | .pytest_cache/ 56 | 57 | # Translations 58 | *.mo 59 | *.pot 60 | 61 | # Django stuff: 62 | *.log 63 | local_settings.py 64 | 65 | # Flask stuff: 66 | instance/ 67 | .webassets-cache 68 | 69 | # Scrapy stuff: 70 | .scrapy 71 | 72 | # Sphinx documentation 73 | docs/_build/ 74 | 75 | # PyBuilder 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # pyenv 82 | .python-version 83 | 84 | # celery beat schedule file 85 | celerybeat-schedule 86 | 87 | # SageMath parsed files 88 | *.sage.py 89 | 90 | # dotenv 91 | .env 92 | 93 | # virtualenv 94 | .venv 95 | venv/ 96 | ENV/ 97 | 98 | # Spyder project settings 99 | .spyderproject 100 | .spyproject 101 | 102 | # Rope project settings 103 | .ropeproject 104 | 105 | # mkdocs documentation 106 | /site 107 | 108 | # mypy 109 | .mypy_cache/ 110 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | python: 3 | - "3.5" 4 | - "3.6" 5 | env: 6 | - MONGO_CONNECTION_STRING=your_mongo_connection_string 7 | install: 8 | - sudo apt-get update 9 | - wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh; 10 | - bash miniconda.sh -b -p $HOME/miniconda 11 | - export PATH="$HOME/miniconda/bin:$PATH" 12 | - hash -r 13 | - conda config --set always_yes yes --set changeps1 no 14 | - conda update -q conda 15 | # Useful for debugging any issues with conda 16 | - conda info -a 17 | 18 | - conda create -q -n test-environment python=$TRAVIS_PYTHON_VERSION 19 | - source activate test-environment 20 | # python setup.py install 21 | - pip install -r requirements.txt 22 | # command to run the tests: 23 | script: 24 | - py.test 25 | after_success: 26 | notifications: 27 | slack: vdsg:YeRk6YBgJuTEtaRT1pPp76pb -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation. 6 | 7 | ## Our Standards 8 | 9 | Examples of behavior that contributes to creating a positive environment include: 10 | 11 | * Using welcoming and inclusive language 12 | * Being respectful of differing viewpoints and experiences 13 | * Gracefully accepting constructive criticism 14 | * Focusing on what is best for the community 15 | * Showing empathy towards other community members 16 | 17 | Examples of unacceptable behavior by participants include: 18 | 19 | * The use of sexualized language or imagery and unwelcome sexual attention or advances 20 | * Trolling, insulting/derogatory comments, and personal or political attacks 21 | * Public or private harassment 22 | * Publishing others' private information, such as a physical or electronic address, without explicit permission 23 | * Other conduct which could reasonably be considered inappropriate in a professional setting 24 | 25 | ## Our Responsibilities 26 | 27 | Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior. 28 | 29 | Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. 30 | 31 | ## Scope 32 | 33 | This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers. 34 | 35 | ## Enforcement 36 | 37 | Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at liad.magen@gmail.com. The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately. 38 | 39 | Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership. 40 | 41 | ## Attribution 42 | 43 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, available at [http://contributor-covenant.org/version/1/4][version] 44 | 45 | [homepage]: http://contributor-covenant.org 46 | [version]: http://contributor-covenant.org/version/1/4/ 47 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Welcome! 2 | 3 | We are so glad you're thinking about contributing to the Keep-Current project! If you're unsure about something, please don't hesitate to ask us. 4 | 5 | We want to ensure a welcoming environment for all the Keep-Current different repositories. Please follow the [Code of Conduct](Code_OF_CONDUCT.MD). 6 | 7 | We encourage you to read the [License](LICENSE) and the [ReadMe](README.md). 8 | 9 | All contributions to this project will be released under the Apache License. By submitting a pull request, you are agreeing to comply with this waiver of copyright interest. 10 | 11 | ## Meetups 12 | 13 | We welcome anyone who would like to join and contribute. We meet regularly every month in Vienna through the Data Science Cafe meetup of the VDSG, show our progress and discuss the next steps. Please follow [our meetup page](https://www.meetup.com/Vienna-Data-Science-Group-Meetup/) to get updates regarding our next meeting. 14 | 15 | ## Machine Learning & Natural Language Processing 16 | 17 | This repository is dedicated to machine-learning based engine. Among the items we handle here are document classifications, semantic representations of documents, topics distance mapping, adaptation according to user's feedback, etc. 18 | 19 | If you're new to Machine Learning, we suggest starting by reading the following sources: 20 | 21 | * [Machine learning collection](https://github.com/collections/machine-learning) 22 | * [Natural Language Processing in Python Book](http://nltk.org/book/) 23 | * [Keon's awesome-nlp!](https://github.com/keon/awesome-nlp) 24 | 25 | ### Recommended tools: 26 | 27 | We lean heavily on existing tools as well as developing our own new methods. We are colaborating through Google Colab notebooks. Among the existing tools we are using: 28 | 29 | * [TextBlob](http://textblob.readthedocs.io/en/dev/) 30 | * [spaCy](https://spacy.io/) 31 | 32 | ### Vision & Roadmap 33 | 34 | We want to use this goal also as a reason to have a playground to implement, test, hack and compare different models. 35 | 36 | Currently, we're working on: 37 | 38 | * Collecting data to create a 'gold-truth' dataset, to be able to compare different models on. 39 | * Developing unsupervised models for document clustering 40 | 41 | ## I have different skills 42 | 43 | If you wish to assist in different aspects, such as Data Engineering, Web development, DevOps, we have divided the project to several additional repositories focusing on these topics. -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Liad 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Procfile: -------------------------------------------------------------------------------- 1 | web: python manage.py runserver --host 0.0.0.0 --port ${PORT} 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Keep-Current-Storage - Data Engineering 2 | This module handles the DB and storage of documents info, users, relations between the two and the recommendations 3 | 4 | 5 | [![Codacy Badge](https://api.codacy.com/project/badge/Grade/fc0eb9354c4742fca2af56a21267532a)](https://app.codacy.com/app/Keep-Current/Data-Engineering?utm_source=github.com&utm_medium=referral&utm_content=Keep-Current/Data-Engineering&utm_campaign=badger) 6 | [![Build Status](https://travis-ci.org/Keep-Current/Data-Engineering.svg?branch=master)](https://travis-ci.org/Keep-Current/Data-Engineering)[![BCH compliance](https://bettercodehub.com/edge/badge/Keep-Current/Data-Engineering?branch=master)](https://bettercodehub.com/) 7 | 8 | After studying a topic, keeping current with the news, published papers, advanced technologies and such proved to be a hard work. 9 | One must attend conventions, subscribe to different websites and newsletters, go over different emails, alerts and such while filtering the relevant data out of these sources. 10 | 11 | In this project, we aspire to create a platform for students, researchers, professionals and enthusiasts to discover news on relevant topics. The users are encouraged to constantly give a feedback on the suggestions, in order to adapt and personalize future results. 12 | 13 | The goal is to create an automated system that scans the web, through a list of trusted sources, classify and categorize the documents it finds, and match them to the different users, according to their interest. It then presents it as a timely summarized digest to the user, whether by email or within a site. 14 | 15 | ## Who are we? 16 | 17 | This project intends to be a shared work of *Vienna Data Science Cafe* Meet-Up members, with the purpose, beside the obvious result, to also be used as a learning platform, while advancing the Natural Language Processing / Machine Learning field by exploring, comparing and hacking different models. 18 | 19 | Please feel free to [contribute](CONTRIBUTING.md). 20 | 21 | Project board is on [Trello](https://trello.com/b/KmMEPjfT/keep-current) and we use [Slack](https://keep-current.slack.com) as our communication channel. If you're new, you can join using [this link](https://join.slack.com/t/keep-current/shared_invite/enQtMzY4MTA0OTQ0NTAzLTcxY2U5NmIwNmM0NmU2MmMyMWQ0YTIyMTg4MWRjMWUyYmVlNWQxMzU3ZWJlNjM4NzVmNTFhM2FjYjkzZDU3YWM ). 22 | 23 | ## I want to help 24 | 25 | We welcome anyone who would like to join and contribute. We meet regularly every month in Vienna through the Data Science Cafe meetup of the VDSG, show our progress and discuss the next steps. 26 | 27 | ## Data Engineering 28 | 29 | This component exposes API for the other components, to save and retrieve the data they need in a secured way. 30 | 31 | 32 | 33 | ## The repository 34 | 35 | This repository is for Data engineering. 36 | If you wish to assist in different aspects (Data Engineering / Web development / DevOps), we have divided the project to several additional repositories focusing on these topics: 37 | 38 | * The machine-learning engine can be found in our [Main repository](https://github.com/Keep-Current/Keep-Current) 39 | * Web Development & UI/UX experiments can be found in our [App repository](https://github.com/Keep-Current/Keep-Current-App) 40 | * Website crawling and spider tasks are concentrated in our [Web Crawler repository](https://github.com/Keep-Current/Keep-Current-Crawler) 41 | * Devops tasks are all across the project. We are trying to develop this project in a serverless architecture, and currently looking into Docker and Kubernetes as well as different hosting providers and plans. Feel free to join the discussion and provide your input! 42 | 43 | [travis-badge-url]: https://travis-ci.org/liadmagen/Keep-Current.svg?branch=master -------------------------------------------------------------------------------- /app.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Data-Engineering", 3 | "scripts": { 4 | "postdeploy": "python manage.py runserver" 5 | }, 6 | "env": { 7 | "MONGO_CONNECTION_STRING": { 8 | "required": true 9 | } 10 | }, 11 | "formation": { 12 | }, 13 | "addons": [ 14 | "heroku-postgresql" 15 | ], 16 | "buildpacks": [ 17 | { 18 | "url": "heroku/python" 19 | } 20 | ] 21 | } 22 | -------------------------------------------------------------------------------- /databaseconfig.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | # mongo_db = { 'connection_string': 'your_mongo_connection_string' } 4 | try: 5 | mongo_conn_string = os.environ['MONGO_CONNECTION_STRING'] 6 | except KeyError: 7 | mongo_conn_string = 'SET_YOUR_MONGO_DB_CONN_STRING' 8 | 9 | mongo_db = { 'connection_string': mongo_conn_string } 10 | -------------------------------------------------------------------------------- /flask_server/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Keep-Current/Data-Engineering/81ff7fdeff0ad8536c08dec9e0dc75b3aa5a6496/flask_server/__init__.py -------------------------------------------------------------------------------- /flask_server/app.py: -------------------------------------------------------------------------------- 1 | from flask import Flask 2 | 3 | from flask_server.rest import document 4 | from flask_server.settings import DevConfig 5 | 6 | 7 | def create_app(config_object=DevConfig): 8 | app = Flask(__name__) 9 | app.config.from_object(config_object) 10 | app.register_blueprint(document.blueprint) 11 | return app -------------------------------------------------------------------------------- /flask_server/rest/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Keep-Current/Data-Engineering/81ff7fdeff0ad8536c08dec9e0dc75b3aa5a6496/flask_server/rest/__init__.py -------------------------------------------------------------------------------- /flask_server/rest/document.py: -------------------------------------------------------------------------------- 1 | import json 2 | from flask import Blueprint, request, Response 3 | 4 | from keep_current_storage.use_cases import request_objects as req 5 | from keep_current_storage.repository.mongo_db_repo import MongoDBRepo 6 | from keep_current_storage.use_cases import document_use_cases as uc 7 | from keep_current_storage.serializers import document_serializer as ser 8 | from keep_current_storage.shared import response_object as res 9 | 10 | STATUS_CODES = { 11 | res.ResponseSuccess.SUCCESS: 200, 12 | res.ResponseFailure.RESOURCE_ERROR: 404, 13 | res.ResponseFailure.PARAMETERS_ERROR: 400, 14 | res.ResponseFailure.SYSTEM_ERROR: 500 15 | } 16 | 17 | blueprint = Blueprint('document', __name__) 18 | 19 | @blueprint.route('/documents', methods=['GET']) 20 | def documents(): 21 | qrystr_params = { 22 | 'filters': {}, 23 | } 24 | 25 | for arg, values in request.args.items(): 26 | if arg.startswith('filter_'): 27 | qrystr_params['filters'][arg.replace('filter_', '')] = values 28 | 29 | request_object = req.DocumentListRequestObject.from_dict(qrystr_params) 30 | 31 | repo = MongoDBRepo() 32 | use_case = uc.DocumentListUseCase(repo) 33 | 34 | response = use_case.execute(request_object) 35 | 36 | return Response(json.dumps(response.value, cls=ser.DocumentEncoder), 37 | mimetype='application/json', 38 | status=STATUS_CODES[response.type]) 39 | 40 | @blueprint.route('/document', methods=['POST']) 41 | def document(): 42 | if (isinstance(request.json, str)): 43 | dict = json.loads(request.json) 44 | else: 45 | dict = request.json 46 | 47 | request_object = req.DocumentInsertRequestObject.from_dict(dict) 48 | 49 | repo = MongoDBRepo() 50 | use_case = uc.DocumentInsertUseCase(repo) 51 | 52 | response = use_case.execute(request_object) 53 | 54 | return Response(json.dumps(response.value, cls=ser.DocumentEncoder), 55 | mimetype='application/json', 56 | status=STATUS_CODES[response.type]) -------------------------------------------------------------------------------- /flask_server/settings.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | 4 | class Config(object): 5 | """Base configuration.""" 6 | 7 | APP_DIR = os.path.abspath(os.path.dirname(__file__)) # This directory 8 | PROJECT_ROOT = os.path.abspath(os.path.join(APP_DIR, os.pardir)) 9 | 10 | 11 | class ProdConfig(Config): 12 | """Production configuration.""" 13 | ENV = 'prod' 14 | DEBUG = False 15 | 16 | 17 | class DevConfig(Config): 18 | """Development configuration.""" 19 | ENV = 'dev' 20 | DEBUG = True 21 | 22 | 23 | class TestConfig(Config): 24 | """Test configuration.""" 25 | ENV = 'test' 26 | TESTING = True 27 | DEBUG = True -------------------------------------------------------------------------------- /heroku.yml: -------------------------------------------------------------------------------- 1 | build: 2 | languages: 3 | - python 4 | run: 5 | web: python manage.py server -------------------------------------------------------------------------------- /keep_current_storage/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Keep-Current/Data-Engineering/81ff7fdeff0ad8536c08dec9e0dc75b3aa5a6496/keep_current_storage/__init__.py -------------------------------------------------------------------------------- /keep_current_storage/domain/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Keep-Current/Data-Engineering/81ff7fdeff0ad8536c08dec9e0dc75b3aa5a6496/keep_current_storage/domain/__init__.py -------------------------------------------------------------------------------- /keep_current_storage/domain/document.py: -------------------------------------------------------------------------------- 1 | from keep_current_storage.shared.domain_model import DomainModel 2 | 3 | class Document(object): 4 | 5 | def __init__(self, id, url): 6 | self.id = id 7 | self.url = url 8 | 9 | @classmethod 10 | def from_dict(cls, adict): 11 | document = Document( 12 | id=adict['id'], 13 | url=adict['url'], 14 | ) 15 | 16 | return document 17 | 18 | 19 | DomainModel.register(Document) -------------------------------------------------------------------------------- /keep_current_storage/repository/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Keep-Current/Data-Engineering/81ff7fdeff0ad8536c08dec9e0dc75b3aa5a6496/keep_current_storage/repository/__init__.py -------------------------------------------------------------------------------- /keep_current_storage/repository/memrepo.py: -------------------------------------------------------------------------------- 1 | from keep_current_storage.domain.document import Document 2 | 3 | 4 | class MemRepo: 5 | 6 | def __init__(self, entries=None): 7 | self._entries = [] 8 | if entries: 9 | self._entries.extend(entries) 10 | 11 | def _check(self, element, key, value): 12 | if '__' not in key: 13 | key = key + '__eq' 14 | 15 | key, operator = key.split('__') 16 | 17 | if operator not in ['eq', 'lt', 'gt']: 18 | raise ValueError('Operator {} is not supported'.format(operator)) 19 | 20 | operator = '__{}__'.format(operator) 21 | return getattr(element[key], operator)(value) 22 | 23 | def list(self, filters=None): 24 | if not filters: 25 | return self._entries 26 | 27 | result = [] 28 | result.extend(self._entries) 29 | 30 | for key, value in filters.items(): 31 | result = [e for e in result if self._check(e, key, value)] 32 | 33 | return [Document.from_dict(r) for r in result] 34 | 35 | def insert_document(self, document): 36 | self._entries.append(document) -------------------------------------------------------------------------------- /keep_current_storage/repository/mongo_db_repo.py: -------------------------------------------------------------------------------- 1 | from keep_current_storage.domain.document import Document 2 | from pymongo import MongoClient 3 | import databaseconfig as cfg 4 | 5 | 6 | class MongoDBRepo: 7 | 8 | def _checkFilter(self, key, value): 9 | if '__' not in key: 10 | key = key + '__eq' 11 | 12 | key, operator = key.split('__') 13 | 14 | if operator not in ['eq', 'lt', 'gt']: 15 | raise ValueError('Operator {} is not supported'.format(operator)) 16 | 17 | return key, operator 18 | 19 | def list(self, filters=None): 20 | self._client = MongoClient(cfg.mongo_db['connection_string']) 21 | parsed_filter = {} 22 | 23 | for key, value in filters.items(): 24 | key, _ = self._checkFilter(key, value) 25 | parsed_filter[key] = value 26 | 27 | db = self._client.keep_current 28 | documents = db.Documents 29 | cursor = documents.find(parsed_filter) 30 | result = [] 31 | for document in cursor: 32 | result.append(document) 33 | 34 | return result 35 | 36 | def insert_document(self, document): 37 | self._client = MongoClient(cfg.mongo_db['connection_string']) 38 | db = self._client.keep_current 39 | documents = db.Documents 40 | documents.insert_one(document) -------------------------------------------------------------------------------- /keep_current_storage/serializers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Keep-Current/Data-Engineering/81ff7fdeff0ad8536c08dec9e0dc75b3aa5a6496/keep_current_storage/serializers/__init__.py -------------------------------------------------------------------------------- /keep_current_storage/serializers/document_serializer.py: -------------------------------------------------------------------------------- 1 | import json 2 | import bson 3 | 4 | class DocumentEncoder(json.JSONEncoder): 5 | 6 | def default(self, o): 7 | try: 8 | if isinstance(o, bson.ObjectId): 9 | return str(o) 10 | to_serialize = { 11 | 'id': o.id, 12 | 'url': o.url 13 | } 14 | return to_serialize 15 | except AttributeError: 16 | return super().default(o) -------------------------------------------------------------------------------- /keep_current_storage/shared/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Keep-Current/Data-Engineering/81ff7fdeff0ad8536c08dec9e0dc75b3aa5a6496/keep_current_storage/shared/__init__.py -------------------------------------------------------------------------------- /keep_current_storage/shared/domain_model.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta 2 | 3 | 4 | class DomainModel(metaclass=ABCMeta): 5 | pass 6 | -------------------------------------------------------------------------------- /keep_current_storage/shared/request_object.py: -------------------------------------------------------------------------------- 1 | class InvalidRequestObject(object): 2 | 3 | def __init__(self): 4 | self.errors = [] 5 | 6 | def add_error(self, parameter, message): 7 | self.errors.append({'parameter': parameter, 'message': message}) 8 | 9 | def has_errors(self): 10 | return len(self.errors) > 0 11 | 12 | def __nonzero__(self): 13 | return False 14 | 15 | __bool__ = __nonzero__ 16 | 17 | 18 | class ValidRequestObject(object): 19 | 20 | @classmethod 21 | def from_dict(cls, adict): 22 | raise NotImplementedError 23 | 24 | def __nonzero__(self): 25 | return True 26 | 27 | __bool__ = __nonzero__ 28 | -------------------------------------------------------------------------------- /keep_current_storage/shared/response_object.py: -------------------------------------------------------------------------------- 1 | class ResponseSuccess(object): 2 | SUCCESS = 'SUCCESS' 3 | 4 | def __init__(self, value=None): 5 | self.type = self.SUCCESS 6 | self.value = value 7 | 8 | def __nonzero__(self): 9 | return True 10 | 11 | __bool__ = __nonzero__ 12 | 13 | 14 | class ResponseFailure(object): 15 | RESOURCE_ERROR = 'RESOURCE_ERROR' 16 | PARAMETERS_ERROR = 'PARAMETERS_ERROR' 17 | SYSTEM_ERROR = 'SYSTEM_ERROR' 18 | 19 | def __init__(self, type_, message): 20 | self.type = type_ 21 | self.message = self._format_message(message) 22 | 23 | def _format_message(self, msg): 24 | if isinstance(msg, Exception): 25 | return "{}: {}".format(msg.__class__.__name__, "{}".format(msg)) 26 | return msg 27 | 28 | @property 29 | def value(self): 30 | return {'type': self.type, 'message': self.message} 31 | 32 | def __bool__(self): 33 | return False 34 | 35 | @classmethod 36 | def build_resource_error(cls, message=None): 37 | return cls(cls.RESOURCE_ERROR, message) 38 | 39 | @classmethod 40 | def build_system_error(cls, message=None): 41 | return cls(cls.SYSTEM_ERROR, message) 42 | 43 | @classmethod 44 | def build_parameters_error(cls, message=None): 45 | return cls(cls.PARAMETERS_ERROR, message) 46 | 47 | @classmethod 48 | def build_from_invalid_request_object(cls, invalid_request_object): 49 | message = "\n".join(["{}: {}".format(err['parameter'], err['message']) 50 | for err in invalid_request_object.errors]) 51 | return cls.build_parameters_error(message) 52 | -------------------------------------------------------------------------------- /keep_current_storage/shared/use_case.py: -------------------------------------------------------------------------------- 1 | from keep_current_storage.shared import response_object as res 2 | 3 | 4 | class UseCase(object): 5 | 6 | def execute(self, request_object): 7 | if not request_object: 8 | return res.ResponseFailure.build_from_invalid_request_object(request_object) 9 | try: 10 | return self.process_request(request_object) 11 | except Exception as exc: 12 | return res.ResponseFailure.build_system_error( 13 | "{}: {}".format(exc.__class__.__name__, "{}".format(exc))) 14 | 15 | def process_request(self, request_object): 16 | raise NotImplementedError( 17 | "process_request() not implemented by UseCase class") 18 | -------------------------------------------------------------------------------- /keep_current_storage/use_cases/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Keep-Current/Data-Engineering/81ff7fdeff0ad8536c08dec9e0dc75b3aa5a6496/keep_current_storage/use_cases/__init__.py -------------------------------------------------------------------------------- /keep_current_storage/use_cases/document_use_cases.py: -------------------------------------------------------------------------------- 1 | from keep_current_storage.shared import use_case as uc 2 | from keep_current_storage.shared import response_object as res 3 | 4 | 5 | class DocumentListUseCase(uc.UseCase): 6 | 7 | def __init__(self, repo): 8 | self.repo = repo 9 | 10 | def process_request(self, request_object): 11 | domain_document = self.repo.list(filters=request_object.filters) 12 | return res.ResponseSuccess(domain_document) 13 | 14 | class DocumentInsertUseCase(uc.UseCase): 15 | 16 | def __init__(self, repo): 17 | self.repo = repo 18 | 19 | def process_request(self, request_object): 20 | self.repo.insert_document(request_object.document) 21 | return res.ResponseSuccess() -------------------------------------------------------------------------------- /keep_current_storage/use_cases/request_objects.py: -------------------------------------------------------------------------------- 1 | import collections 2 | from keep_current_storage.shared import request_object as req 3 | 4 | class DocumentListRequestObject(req.ValidRequestObject): 5 | 6 | def __init__(self, filters=None): 7 | self.filters = filters 8 | 9 | @classmethod 10 | def from_dict(cls, adict): 11 | invalid_req = req.InvalidRequestObject() 12 | 13 | if 'filters' in adict and not isinstance(adict['filters'], collections.Mapping): 14 | invalid_req.add_error('filters', 'Is not iterable') 15 | 16 | if invalid_req.has_errors(): 17 | return invalid_req 18 | 19 | return DocumentListRequestObject(filters=adict.get('filters', None)) 20 | 21 | class DocumentInsertRequestObject(req.ValidRequestObject): 22 | 23 | def __init__(self, document=None): 24 | self.document = document 25 | 26 | @classmethod 27 | def from_dict(cls, adict): 28 | invalid_req = req.InvalidRequestObject() 29 | 30 | if 'id' not in adict: 31 | invalid_req.add_error('id', 'not provided') 32 | 33 | if 'url' not in adict: 34 | invalid_req.add_error('url', 'not provided') 35 | 36 | if invalid_req.has_errors(): 37 | return invalid_req 38 | 39 | return DocumentInsertRequestObject(document=adict) -------------------------------------------------------------------------------- /manage.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | import os 5 | 6 | from flask_script import Manager, Server 7 | from flask_script.commands import Clean, ShowUrls 8 | 9 | from flask_server.app import create_app 10 | 11 | 12 | #app = create_app(os.environ.get('FLASK_ENV') or 'DevConfig') 13 | app = create_app() 14 | manager = Manager(app) 15 | 16 | manager.add_command('server', Server()) 17 | manager.add_command('urls', ShowUrls()) 18 | manager.add_command('clean', Clean()) 19 | 20 | if __name__ == '__main__': 21 | manager.run() 22 | -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | minversion = 2.0 3 | norecursedirs = .git .tox venv* requirements* 4 | python_files = test*.py -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | -r requirements/test.txt -------------------------------------------------------------------------------- /requirements/dev.txt: -------------------------------------------------------------------------------- 1 | -r test.txt 2 | 3 | pip 4 | wheel 5 | flake8 6 | Sphinx 7 | Flask-Script 8 | Flask 9 | pymongo -------------------------------------------------------------------------------- /requirements/prod.txt: -------------------------------------------------------------------------------- 1 | Flask 2 | -------------------------------------------------------------------------------- /requirements/test.txt: -------------------------------------------------------------------------------- 1 | -r prod.txt 2 | 3 | pytest 4 | tox 5 | coverage 6 | codecov 7 | codacy-coverage 8 | pytest-cov 9 | pytest-flask 10 | pymongo 11 | gunicorn 12 | flake8 13 | Sphinx 14 | Flask-Script -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [bdist_wheel] 2 | universal = 1 3 | 4 | [flake8] 5 | ignore = D203 6 | exclude = .git, venv*, docs 7 | max-complexity = 10 8 | max-line-length = 100 -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Keep-Current/Data-Engineering/81ff7fdeff0ad8536c08dec9e0dc75b3aa5a6496/tests/__init__.py -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from flask_server.app import create_app 4 | from flask_server.settings import TestConfig 5 | 6 | 7 | @pytest.yield_fixture(scope='function') 8 | def app(): 9 | return create_app(TestConfig) 10 | -------------------------------------------------------------------------------- /tests/domain/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Keep-Current/Data-Engineering/81ff7fdeff0ad8536c08dec9e0dc75b3aa5a6496/tests/domain/__init__.py -------------------------------------------------------------------------------- /tests/domain/test_document.py: -------------------------------------------------------------------------------- 1 | import uuid 2 | from keep_current_storage.domain.document import Document 3 | 4 | 5 | def test_document_model_init(): 6 | id = uuid.uuid4() 7 | document = Document(id, url = 'https://arxiv.org/pdf/1606.04155.pdf') 8 | assert document.id == id 9 | assert document.url == 'https://arxiv.org/pdf/1606.04155.pdf' 10 | 11 | 12 | def test_document_model_from_dict(): 13 | id = uuid.uuid4() 14 | document = Document.from_dict( 15 | { 16 | 'id' : id, 17 | 'url': 'https://arxiv.org/pdf/1606.04155.pdf' 18 | } 19 | ) 20 | 21 | assert document.id == id 22 | assert document.url == 'https://arxiv.org/pdf/1606.04155.pdf' -------------------------------------------------------------------------------- /tests/repository/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Keep-Current/Data-Engineering/81ff7fdeff0ad8536c08dec9e0dc75b3aa5a6496/tests/repository/__init__.py -------------------------------------------------------------------------------- /tests/repository/test_memrepo.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from keep_current_storage.shared.domain_model import DomainModel 4 | from keep_current_storage.domain.document import Document 5 | from keep_current_storage.repository import memrepo 6 | 7 | document_1 = { 8 | 'id' : 'f853578c-fc0f-4e65-81b8-566c5dffa35a', 9 | 'url' : 'https://arxiv.org/pdf/1606.04155.pdf' 10 | } 11 | 12 | document_2 = { 13 | 'id' : 'fe2c3195-aeff-487a-a08f-e0bdc0ec6e9a', 14 | 'url' : 'https://arxiv.org/pdf/1506.08941.pdf' 15 | } 16 | 17 | document_3 = { 18 | 'id' : '913694c6-435a-4366-ba0d-da5334a611b2', 19 | 'url' : 'https://arxiv.org/pdf/1705.09655v2.pdf' 20 | } 21 | 22 | @pytest.fixture 23 | def documents(): 24 | return [document_1, document_2, document_3] 25 | 26 | def _check_results(domain_models_list, data_list): 27 | assert len(domain_models_list) == len(data_list) 28 | assert all([isinstance(dm, DomainModel) for dm in domain_models_list]) 29 | assert set([dm.id for dm in domain_models_list]) == set([d['id'] for d in data_list]) 30 | 31 | def test_repository_list_without_parameters(documents): 32 | repo = memrepo.MemRepo(documents) 33 | assert repo.list() == documents 34 | 35 | def test_repository_list_with_filters_unknown_key(documents): 36 | repo = memrepo.MemRepo(documents) 37 | 38 | with pytest.raises(KeyError): 39 | repo.list(filters={'name': 'aname'}) 40 | 41 | def test_repository_list_with_filters_unknown_operator(documents): 42 | repo = memrepo.MemRepo(documents) 43 | 44 | with pytest.raises(ValueError): 45 | repo.list(filters={'id__in': [20, 30]}) 46 | 47 | def test_repository_list_with_filters_id(documents): 48 | repo = memrepo.MemRepo(documents) 49 | 50 | _check_results(repo.list(filters={'id': 'f853578c-fc0f-4e65-81b8-566c5dffa35a'}), [document_1]) 51 | _check_results(repo.list(filters={'id__eq': 'f853578c-fc0f-4e65-81b8-566c5dffa35a'}), [document_1]) 52 | 53 | def test_repository_insert(documents): 54 | repo = memrepo.MemRepo(documents) 55 | len1 = len(repo.list()) 56 | 57 | document1 = { 58 | 'id' : '1111-2222', 59 | 'url' : 'https://arxiv.org/pdf/1705.09655v2.pdf' 60 | } 61 | 62 | repo.insert_document(document1) 63 | len2 = len(repo.list()) 64 | assert len2 == len1 + 1 65 | -------------------------------------------------------------------------------- /tests/rest/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Keep-Current/Data-Engineering/81ff7fdeff0ad8536c08dec9e0dc75b3aa5a6496/tests/rest/__init__.py -------------------------------------------------------------------------------- /tests/rest/test_get_documents_list.py: -------------------------------------------------------------------------------- 1 | import json 2 | from unittest import mock 3 | 4 | from keep_current_storage.domain.document import Document 5 | from keep_current_storage.shared import response_object as res 6 | 7 | document_1_dict = { 8 | 'id' : 'f853578c-fc0f-4e65-81b8-566c5dffa35a', 9 | 'url' : 'https://arxiv.org/pdf/1606.04155.pdf' 10 | } 11 | 12 | document1_domain_model = Document.from_dict(document_1_dict) 13 | 14 | documents = [document1_domain_model] 15 | 16 | 17 | @mock.patch('keep_current_storage.use_cases.document_use_cases.DocumentListUseCase') 18 | def test_get(mock_use_case, client): 19 | mock_use_case().execute.return_value = res.ResponseSuccess(documents) 20 | 21 | http_response = client.get('/documents') 22 | 23 | assert json.loads(http_response.data.decode('UTF-8')) == [document_1_dict] 24 | assert http_response.status_code == 200 25 | assert http_response.mimetype == 'application/json' 26 | 27 | @mock.patch('keep_current_storage.use_cases.document_use_cases.DocumentListUseCase') 28 | def test_get_failed_response(mock_use_case, client): 29 | mock_use_case().execute.return_value = res.ResponseFailure.build_system_error('test message') 30 | 31 | http_response = client.get('/documents') 32 | 33 | assert json.loads(http_response.data.decode('UTF-8')) == {'type': 'SYSTEM_ERROR', 34 | 'message': 'test message'} 35 | assert http_response.status_code == 500 36 | assert http_response.mimetype == 'application/json' 37 | 38 | 39 | @mock.patch('keep_current_storage.use_cases.document_use_cases.DocumentListUseCase') 40 | def test_request_object_initialisation_and_use_with_filters(mock_use_case, client): 41 | mock_use_case().execute.return_value = res.ResponseSuccess([]) 42 | internal_request_object = mock.Mock() 43 | 44 | request_object_class = 'keep_current_storage.use_cases.request_objects.DocumentListRequestObject' 45 | with mock.patch(request_object_class) as mock_request_object: 46 | mock_request_object.from_dict.return_value = internal_request_object 47 | client.get('/documents?filter_param1=value1&filter_param2=value2') 48 | 49 | mock_request_object.from_dict.assert_called_with( 50 | {'filters': {'param1': 'value1', 'param2': 'value2'}} 51 | ) 52 | 53 | mock_use_case().execute.assert_called_with(internal_request_object) -------------------------------------------------------------------------------- /tests/rest/test_insert_document_.py: -------------------------------------------------------------------------------- 1 | import json 2 | from unittest import mock 3 | from keep_current_storage.shared import response_object as res 4 | 5 | document_1_dict = { "id" : "f853578c-fc0f-4e65-81b8-566c5dffa35a", "url" : "https://arxiv.org/pdf/1606.04155.pdf" } 6 | 7 | 8 | 9 | @mock.patch('keep_current_storage.use_cases.document_use_cases.DocumentInsertUseCase') 10 | def test_post(mock_use_case, client): 11 | mock_use_case().execute.return_value = res.ResponseSuccess() 12 | http_response = client.post('/document', data = json.dumps(document_1_dict), content_type='application/json', charset='UTF-8') 13 | assert http_response.status_code == 200 14 | 15 | -------------------------------------------------------------------------------- /tests/serializers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Keep-Current/Data-Engineering/81ff7fdeff0ad8536c08dec9e0dc75b3aa5a6496/tests/serializers/__init__.py -------------------------------------------------------------------------------- /tests/serializers/test_document_serializer.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | from keep_current_storage.serializers import document_serializer as srs 4 | from keep_current_storage.domain.document import Document 5 | 6 | 7 | def test_serialize_domain_document(): 8 | document = Document('f853578c-fc0f-4e65-81b8-566c5dffa35a', 9 | url='https://arxiv.org/pdf/1606.04155.pdf') 10 | 11 | expected_json = """ 12 | { 13 | "id" : "f853578c-fc0f-4e65-81b8-566c5dffa35a", 14 | "url" : "https://arxiv.org/pdf/1606.04155.pdf" 15 | } 16 | """ 17 | 18 | assert json.loads(json.dumps(document, cls=srs.DocumentEncoder)) == json.loads(expected_json) -------------------------------------------------------------------------------- /tests/shared/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Keep-Current/Data-Engineering/81ff7fdeff0ad8536c08dec9e0dc75b3aa5a6496/tests/shared/__init__.py -------------------------------------------------------------------------------- /tests/shared/test_response_object.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from keep_current_storage.shared import response_object as res, request_object as req 4 | 5 | 6 | @pytest.fixture 7 | def response_value(): 8 | return {'key': ['value1', 'value2']} 9 | 10 | 11 | @pytest.fixture 12 | def response_type(): 13 | return 'ResponseError' 14 | 15 | 16 | @pytest.fixture 17 | def response_message(): 18 | return 'This is a response error' 19 | 20 | def test_response_success_is_true(response_value): 21 | assert bool(res.ResponseSuccess(response_value)) is True 22 | 23 | 24 | def test_response_failure_is_false(response_type, response_message): 25 | assert bool(res.ResponseFailure(response_type, response_message)) is False 26 | 27 | def test_response_success_contains_value(response_value): 28 | response = res.ResponseSuccess(response_value) 29 | 30 | assert response.value == response_value 31 | 32 | def test_response_failure_has_type_and_message(response_type, response_message): 33 | response = res.ResponseFailure(response_type, response_message) 34 | 35 | assert response.type == response_type 36 | assert response.message == response_message 37 | 38 | 39 | def test_response_failure_contains_value(response_type, response_message): 40 | response = res.ResponseFailure(response_type, response_message) 41 | 42 | assert response.value == {'type': response_type, 'message': response_message} 43 | 44 | 45 | def test_response_failure_initialization_with_exception(): 46 | response = res.ResponseFailure(response_type, Exception('Just an error message')) 47 | 48 | assert bool(response) is False 49 | assert response.type == response_type 50 | assert response.message == "Exception: Just an error message" 51 | 52 | 53 | def test_response_failure_from_invalid_request_object(): 54 | response = res.ResponseFailure.build_from_invalid_request_object(req.InvalidRequestObject()) 55 | 56 | assert bool(response) is False 57 | 58 | 59 | def test_response_failure_from_invalid_request_object_with_errors(): 60 | request_object = req.InvalidRequestObject() 61 | request_object.add_error('path', 'Is mandatory') 62 | request_object.add_error('path', "can't be blank") 63 | 64 | response = res.ResponseFailure.build_from_invalid_request_object(request_object) 65 | 66 | assert bool(response) is False 67 | assert response.type == res.ResponseFailure.PARAMETERS_ERROR 68 | assert response.message == "path: Is mandatory\npath: can't be blank" 69 | 70 | def test_response_failure_build_resource_error(): 71 | response = res.ResponseFailure.build_resource_error("test message") 72 | 73 | assert bool(response) is False 74 | assert response.type == res.ResponseFailure.RESOURCE_ERROR 75 | assert response.message == "test message" 76 | 77 | 78 | def test_response_failure_build_parameters_error(): 79 | response = res.ResponseFailure.build_parameters_error("test message") 80 | 81 | assert bool(response) is False 82 | assert response.type == res.ResponseFailure.PARAMETERS_ERROR 83 | assert response.message == "test message" 84 | 85 | 86 | def test_response_failure_build_system_error(): 87 | response = res.ResponseFailure.build_system_error("test message") 88 | 89 | assert bool(response) is False 90 | assert response.type == res.ResponseFailure.SYSTEM_ERROR 91 | assert response.message == "test message" -------------------------------------------------------------------------------- /tests/shared/test_use_case.py: -------------------------------------------------------------------------------- 1 | from unittest import mock 2 | 3 | from keep_current_storage.shared import request_object as req, response_object as res 4 | from keep_current_storage.shared import use_case as uc 5 | 6 | 7 | def test_use_case_cannot_process_valid_requests(): 8 | valid_request_object = mock.MagicMock() 9 | valid_request_object.__bool__.return_value = True 10 | 11 | use_case = uc.UseCase() 12 | response = use_case.execute(valid_request_object) 13 | 14 | assert not response 15 | assert response.type == res.ResponseFailure.SYSTEM_ERROR 16 | assert response.message == \ 17 | 'NotImplementedError: process_request() not implemented by UseCase class' 18 | 19 | def test_use_case_can_process_invalid_requests_and_returns_response_failure(): 20 | invalid_request_object = req.InvalidRequestObject() 21 | invalid_request_object.add_error('someparam', 'somemessage') 22 | 23 | use_case = uc.UseCase() 24 | response = use_case.execute(invalid_request_object) 25 | 26 | assert not response 27 | assert response.type == res.ResponseFailure.PARAMETERS_ERROR 28 | assert response.message == 'someparam: somemessage' 29 | 30 | def test_use_case_can_manage_generic_exception_from_process_request(): 31 | use_case = uc.UseCase() 32 | 33 | class TestException(Exception): 34 | pass 35 | 36 | use_case.process_request = mock.Mock() 37 | use_case.process_request.side_effect = TestException('somemessage') 38 | response = use_case.execute(mock.Mock) 39 | 40 | assert not response 41 | assert response.type == res.ResponseFailure.SYSTEM_ERROR 42 | assert response.message == 'TestException: somemessage' -------------------------------------------------------------------------------- /tests/use_cases/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Keep-Current/Data-Engineering/81ff7fdeff0ad8536c08dec9e0dc75b3aa5a6496/tests/use_cases/__init__.py -------------------------------------------------------------------------------- /tests/use_cases/test_document_insert_use_case.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from unittest import mock 3 | 4 | from keep_current_storage.domain.document import Document 5 | from keep_current_storage.use_cases import request_objects as ro 6 | from keep_current_storage.use_cases import document_use_cases as uc 7 | from keep_current_storage.shared import response_object as res 8 | 9 | def test_document_insert_valid_json(): 10 | repo = mock.Mock() 11 | repo.insert_document.return_value = None 12 | document_insert_use_case = uc.DocumentInsertUseCase(repo) 13 | document_1_dict = { 14 | 'id' : '1111-11111-22222', 15 | 'url' : 'https://arxiv.org/pdf/12345.pdf' 16 | } 17 | request_object = ro.DocumentInsertRequestObject.from_dict(document_1_dict) 18 | response_object = document_insert_use_case.execute(request_object) 19 | 20 | repo.insert_document.assert_called_with(document_1_dict) 21 | assert bool(response_object) is True 22 | 23 | 24 | def test_document_insert_invalid_json_id(): 25 | repo = mock.Mock() 26 | repo.insert_document.return_value = None 27 | document_insert_use_case = uc.DocumentInsertUseCase(repo) 28 | document_1_dict = { 29 | '_id_' : '1111-11111-22222', 30 | 'url' : 'https://arxiv.org/pdf/12345.pdf' 31 | } 32 | request_object = ro.DocumentInsertRequestObject.from_dict(document_1_dict) 33 | response_object = document_insert_use_case.execute(request_object) 34 | 35 | repo.insert_document.assert_not_called() 36 | assert bool(response_object) is False 37 | assert response_object.value == { 38 | 'type': res.ResponseFailure.PARAMETERS_ERROR, 39 | 'message': "id: not provided" 40 | } 41 | 42 | def test_document_insert_invalid_json_url(): 43 | repo = mock.Mock() 44 | repo.insert_document.return_value = None 45 | document_insert_use_case = uc.DocumentInsertUseCase(repo) 46 | document_1_dict = { 47 | 'id' : '1111-11111-22222', 48 | '_url_' : 'https://arxiv.org/pdf/12345.pdf' 49 | } 50 | request_object = ro.DocumentInsertRequestObject.from_dict(document_1_dict) 51 | response_object = document_insert_use_case.execute(request_object) 52 | 53 | repo.insert_document.assert_not_called() 54 | assert bool(response_object) is False 55 | assert response_object.value == { 56 | 'type': res.ResponseFailure.PARAMETERS_ERROR, 57 | 'message': "url: not provided" 58 | } -------------------------------------------------------------------------------- /tests/use_cases/test_document_list_request_objects.py: -------------------------------------------------------------------------------- 1 | from keep_current_storage.use_cases import request_objects as ro 2 | 3 | 4 | def test_build_document_list_request_object_without_parameters(): 5 | req = ro.DocumentListRequestObject() 6 | 7 | assert req.filters is None 8 | assert bool(req) is True 9 | 10 | def test_build_document_list_request_object_from_empty_dict(): 11 | req = ro.DocumentListRequestObject.from_dict({}) 12 | 13 | assert req.filters is None 14 | assert bool(req) is True 15 | 16 | def test_build_document_list_request_object_with_empty_filters(): 17 | req = ro.DocumentListRequestObject(filters={}) 18 | 19 | assert req.filters == {} 20 | assert bool(req) is True 21 | 22 | 23 | def test_build_document_list_request_object_from_dict_with_empty_filters(): 24 | req = ro.DocumentListRequestObject.from_dict({'filters': {}}) 25 | 26 | assert req.filters == {} 27 | assert bool(req) is True 28 | 29 | 30 | def test_build_document_list_request_object_with_filters(): 31 | req = ro.DocumentListRequestObject(filters={'a': 1, 'b': 2}) 32 | 33 | assert req.filters == {'a': 1, 'b': 2} 34 | assert bool(req) is True 35 | 36 | 37 | def test_build_document_list_request_object_from_dict_with_filters(): 38 | req = ro.DocumentListRequestObject.from_dict({'filters': {'a': 1, 'b': 2}}) 39 | 40 | assert req.filters == {'a': 1, 'b': 2} 41 | assert bool(req) is True 42 | 43 | 44 | def test_build_document_list_request_object_from_dict_with_invalid_filters(): 45 | req = ro.DocumentListRequestObject.from_dict({'filters': 5}) 46 | 47 | assert req.has_errors() 48 | assert req.errors[0]['parameter'] == 'filters' 49 | assert bool(req) is False -------------------------------------------------------------------------------- /tests/use_cases/test_document_list_use_case.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from unittest import mock 3 | 4 | from keep_current_storage.domain.document import Document 5 | from keep_current_storage.use_cases import request_objects as ro 6 | from keep_current_storage.use_cases import document_use_cases as uc 7 | from keep_current_storage.shared import response_object as res 8 | 9 | 10 | @pytest.fixture 11 | def domain_documents(): 12 | document_1 = Document( 13 | id='f853578c-fc0f-4e65-81b8-566c5dffa35a', 14 | url = 'https://arxiv.org/pdf/1606.04155.pdf' 15 | ) 16 | 17 | document_2 = Document( 18 | id='fe2c3195-aeff-487a-a08f-e0bdc0ec6e9a', 19 | url = 'https://arxiv.org/pdf/1506.08941.pdf' 20 | ) 21 | 22 | document_3 = Document( 23 | id='913694c6-435a-4366-ba0d-da5334a611b2', 24 | url = 'https://arxiv.org/pdf/1705.09655v2.pdf' 25 | ) 26 | 27 | return [document_1, document_2, document_3] 28 | 29 | 30 | def test_document_list_without_parameters(domain_documents): 31 | repo = mock.Mock() 32 | repo.list.return_value = domain_documents 33 | 34 | document_list_use_case = uc.DocumentListUseCase(repo) 35 | request_object = ro.DocumentListRequestObject.from_dict({}) 36 | 37 | response_object = document_list_use_case.execute(request_object) 38 | 39 | assert bool(response_object) is True 40 | repo.list.assert_called_with(filters=None) 41 | 42 | assert response_object.value == domain_documents 43 | 44 | def test_document_list_with_filters(domain_documents): 45 | repo = mock.Mock() 46 | repo.list.return_value = domain_documents 47 | 48 | document_list_use_case = uc.DocumentListUseCase(repo) 49 | qry_filters = {'a': 5} 50 | request_object = ro.DocumentListRequestObject.from_dict({'filters': qry_filters}) 51 | 52 | response_object = document_list_use_case.execute(request_object) 53 | 54 | assert bool(response_object) is True 55 | repo.list.assert_called_with(filters=qry_filters) 56 | assert response_object.value == domain_documents 57 | 58 | def test_document_list_handles_generic_error(): 59 | repo = mock.Mock() 60 | repo.list.side_effect = Exception('Just an error message') 61 | 62 | document_list_use_case = uc.DocumentListUseCase(repo) 63 | request_object = ro.DocumentListRequestObject.from_dict({}) 64 | 65 | response_object = document_list_use_case.execute(request_object) 66 | 67 | assert bool(response_object) is False 68 | assert response_object.value == { 69 | 'type': res.ResponseFailure.SYSTEM_ERROR, 70 | 'message': "Exception: Just an error message" 71 | } 72 | 73 | 74 | def test_document_list_handles_bad_request(): 75 | repo = mock.Mock() 76 | 77 | document_list_use_case = uc.DocumentListUseCase(repo) 78 | request_object = ro.DocumentListRequestObject.from_dict({'filters': 5}) 79 | 80 | response_object = document_list_use_case.execute(request_object) 81 | 82 | assert bool(response_object) is False 83 | assert response_object.value == { 84 | 'type': res.ResponseFailure.PARAMETERS_ERROR, 85 | 'message': "filters: Is not iterable" 86 | } --------------------------------------------------------------------------------