├── .gitignore ├── LICENSE ├── README.MD ├── __init__.py ├── app ├── __init__.py ├── api.py ├── app.py ├── extentions.py ├── models.py ├── query_helper.py ├── request_handler.py ├── response.py ├── result_processor.py ├── route_handler.py └── schema.py ├── config.py ├── images ├── AdjcencyList.png ├── NestedSet.png └── materializedpath.png ├── logging_config.ini ├── migrations ├── README ├── alembic.ini ├── env.py ├── script.py.mako └── versions │ ├── 454d1d76049a_nested_set_model.py │ ├── a21255c4781a_materialized_path__model.py │ └── c4eae3aacecd_adjcenty_list_model.py ├── requirements.txt ├── runtime.txt ├── tasks.py └── wsgi.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Created by https://www.gitignore.io 2 | 3 | ### OSX ### 4 | .DS_Store 5 | .AppleDouble 6 | .LSOverride 7 | 8 | # Icon must end with two \r 9 | Icon 10 | 11 | 12 | # Thumbnails 13 | ._* 14 | 15 | # Files that might appear on external disk 16 | .Spotlight-V100 17 | .Trashes 18 | 19 | # Directories potentially created on remote AFP share 20 | .AppleDB 21 | .AppleDesktop 22 | Network Trash Folder 23 | Temporary Items 24 | .apdisk 25 | 26 | 27 | ### Python ### 28 | # Byte-compiled / optimized / DLL files 29 | __pycache__/ 30 | *.py[cod] 31 | 32 | # C extensions 33 | *.so 34 | 35 | # Distribution / packaging 36 | .Python 37 | env/ 38 | build/ 39 | develop-eggs/ 40 | dist/ 41 | downloads/ 42 | eggs/ 43 | lib/ 44 | lib64/ 45 | parts/ 46 | sdist/ 47 | var/ 48 | *.egg-info/ 49 | .installed.cfg 50 | *.egg 51 | 52 | # PyInstaller 53 | # Usually these files are written by a python script from a template 54 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 55 | *.manifest 56 | *.spec 57 | 58 | # Installer logs 59 | pip-log.txt 60 | pip-delete-this-directory.txt 61 | 62 | # Unit test / coverage reports 63 | htmlcov/ 64 | .tox/ 65 | .coverage 66 | .cache 67 | nosetests.xml 68 | coverage.xml 69 | 70 | # Translations 71 | *.mo 72 | *.pot 73 | 74 | # Sphinx documentation 75 | docs/_build/ 76 | 77 | # PyBuilder 78 | target/ 79 | 80 | 81 | ### Django ### 82 | *.log 83 | *.pot 84 | *.pyc 85 | __pycache__/ 86 | local_settings.py 87 | 88 | .env 89 | db.sqlite3 90 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2016 M.Sohaib Farooqi 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 6 | 7 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 8 | 9 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /README.MD: -------------------------------------------------------------------------------- 1 | ## Overview 2 | This project aims to compare different methods of retrieving hirercharical data using Python (Flask Framework) and PostgreSQL. It will benchmark query, response time and also provides brief note on pros and cons of each methods. 3 | 4 | ## Tools 5 | Following tools are used for benchmarking 6 | - Python (3.5.2) 7 | - Flask (0.11.1) 8 | - Flask-SQLAlchemy (2.1) 9 | - PostgreSQL (9.5.4) 10 | - Marshmallow (2.9.1) 11 | 12 | ## Installation and Running 13 | Run the following commands to set up the project and environment 14 | 15 | - `git clone https://github.com/SohaibFarooqi/hierarchical-data-model.git` 16 | - `virtualenv ` 17 | - `pip install -r requirements.txt` 18 | - `flask run` 19 | 20 | If you wish to insert dummy records in your table, Please go through the next section before running the application. 21 | 22 | ## Populating Table Rows 23 | There is a module in this application that can be use to insert test data in any specific model. `task.py` defines this job. It uses `PyInvoke` to execute. 24 | 25 | **Example Usage**: `invoke build --type aj` 26 | For all possible options please see `invoke --help build`. 27 | 28 | By default it inserts 10 rows, However you can modify this behaviour by updating *NUM_RECORDS* in `config.py`. If you are inserting significantly large number of rows you should also consider updating *CHUNK_SIZE* variable in `config.py`. This variable defines the number of rows after which SQLAlchemy should issue a commit command to the db. After successful completion of the job you can see records in your specified table. 29 | 30 | **Note**: This job currently support single tree insertions. Future releases of this project will include multi-tree support. 31 | 32 | ## Methods Overview: 33 | Following methods are used for benchmarking response and query time. 34 | 35 | - ### [Adjcency List](https://en.wikipedia.org/wiki/Adjacency_list) 36 | 37 | In the adjacency list model, each item in the table contains a pointer to its parent. The topmost element, in this case electronics, has a NULL value for its parent. The adjacency list model has the advantage of being quite simple 38 | While the adjacency list model can be dealt with fairly easily in client-side code, working with the model can be more problematic in pure SQL. 39 | 40 | **Example Data:** 41 | 42 | | id | parent_id | 43 | |-------------|-------------| 44 | | 1 | NULL | 45 | | 2 | 1 | 46 | | 3 | 2 | 47 | | 4 | 2 | 48 | | 5 | 2 | 49 | | 6 | 1 | 50 | | 7 | 6 | 51 | | 8 | 7 | 52 | | 9 | 6 | 53 | | 10 | 6 | 54 | 55 | 56 | - ### [Lineage Column (Materialized Path Views)](https://medium.com/notes-from-a-messy-desk/representing-trees-in-postgresql-cbcdae419022#.6666ewmcl) 57 | 58 | Lineage Column defines a `path` column which contains path of that node till root node. Path can be seperated with any character. Example `None.1.2.3`. This trick is really handy when searching for child and ansestor nodes. SQL `WITH` and `ANY` are really useful with this approach. 59 | With Postgres one can define [GIN](https://www.postgresql.org/docs/9.5/static/gin.html) or [GIST](https://www.postgresql.org/docs/9.5/static/gist.html) indexes on [LTREE](https://www.postgresql.org/docs/9.5/static/ltree.html) dtype to perform really fast filtering of data.This trick can be used in creating related searches and creating hashtags. 60 | Drawbacks of this approach are, inserts, update and moving a node is really expensive operation since every time you have to recalculate path and update it. 61 | 62 | **Example data:** 63 | 64 | 65 | | id | parent_id | Path | 66 | |-------------|-------------|------------------| 67 | | 1 | NULL | None | 68 | | 2 | 1 | None.1.2 | 69 | | 3 | 2 | None.1.2.3 | 70 | | 4 | 2 | None.1.2.4 | 71 | | 5 | 2 | None.1.2.5 | 72 | | 6 | 1 | None.1.6 | 73 | | 7 | 6 | None.1.6.7 | 74 | | 8 | 7 | None.1.6.7.8 | 75 | | 9 | 6 | None.1.6.9 | 76 | | 10 | 6 | None.1.6.10 | 77 | 78 | 79 | - ### [Nested Set](https://en.wikipedia.org/wiki/Nested_set_model) 80 | 81 | The nested set model is to number the nodes according to a tree traversal, which visits each node twice, assigning numbers in the order of visiting, and at both visits. This leaves two numbers for each node, which are stored as two attributes. Querying becomes inexpensive: hierarchy membership can be tested by comparing these numbers. Updating requires renumbering and is therefore expensive. Refinements that use rational numbers instead of integers can avoid renumbering, and so are faster to update, although much more complicated 82 | 83 | Nested sets are very slow for inserts because it requires updating left and right domain values for all records in the table after the insert. This can cause a lot of database thrash as many rows are rewritten and indexes rebuilt 84 | 85 | **Example Data:** 86 | 87 | 88 | | id | lft | rgt | 89 | |-------------|-----|-----| 90 | | 1 | 1 | 20 | 91 | | 2 | 2 | 9 | 92 | | 3 | 3 | 4 | 93 | | 4 | 5 | 6 | 94 | | 5 | 7 | 8 | 95 | | 6 | 10 | 19 | 96 | | 7 | 11 | 14 | 97 | | 8 | 12 | 13 | 98 | | 9 | 15 | 16 | 99 | | 10 | 17 | 18 | 100 | 101 | 102 | ## Routes 103 | Application provides following end-points: 104 | 105 | | function | Route | Description | 106 | | ------------- |-------------------|--------------| 107 | | get_subtree | `/subtree/` | Get sub tree based on parent id provided in path params. It returns whole tree if no parent id is provided | 108 | | get_root | `/root/` | Get list of all root nodes. Accepts no argument | 109 | | get_leaf | `/leaf/` | Get leaf nodes based on a parent id. If parent id has no direct leaf nodes it recursively traverse tree and return leaf nodes of decendents | 110 | | get_child | `/child/` | Get all immidiate childs based on parent id provided in path params | 111 | *Note: All routes are relative to base url* 112 | 113 | **Query Params**: To test the requests mentioned above, you need to provide `type = aj`, `type = mp` and `type = ns` for adjcency list, materialized path views and nested set. If `type` param is absent, the request will throw an error. 114 | 115 | **Example Request:** `www.mydomain.com/subtree/1/?type=ns` 116 | 117 | ## Benchmarking 118 | HTTP request time benchmarking is performed using [wrk](https://github.com/wg/wrk). Test is done on 119 | `getSubTree()` with 100 tree nodes. 120 | 121 | **Nested set:** 122 | ![Nested Set Model Results](images/NestedSet.png) 123 | 124 | **Adjcency List:** 125 | ![Adjcency List Model Results](images/AdjcencyList.png) 126 | 127 | **Materialized Path:** 128 | ![Materialized Path Model Results](images/materializedpath.png) 129 | 130 | ## Helpful Links 131 | 132 | - [Joe Celko wrote the book on SQL Trees & Hiearichies](https://www.amazon.com/dp/1558609202/?tag=stackoverfl08-20) 133 | - [What are the Options for Storing Hierarchical Data in a Relational Database?](http://stackoverflow.com/questions/4048151/what-are-the-options-for-storing-hierarchical-data-in-a-relational-database) 134 | - [Notes From a Messy desk](https://medium.com/notes-from-a-messy-desk/representing-trees-in-postgresql-cbcdae419022#.6666ewmcl) 135 | - [Mike Hillyer guides to Hierarchical-Data](http://mikehillyer.com/articles/managing-hierarchical-data-in-mysql/) 136 | -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /app/__init__.py: -------------------------------------------------------------------------------- 1 | from .app import create_app 2 | -------------------------------------------------------------------------------- /app/api.py: -------------------------------------------------------------------------------- 1 | from flask import Blueprint, request, Request 2 | from functools import wraps 3 | from .route_handler import RouteHanlder 4 | from .request_handler import RequestHandler 5 | 6 | 7 | """ 8 | Entry point of all requests. 9 | """ 10 | 11 | api_blueprint = Blueprint('api_blueprint', __name__) 12 | api = RouteHanlder(api_blueprint) 13 | 14 | 15 | @api.route('/', 'parent_id') 16 | def process_request(script_path, parent_id=-1): 17 | 18 | result_set = RequestHandler.handleRequest( 19 | script_path, 20 | type=request.args.get("type"), 21 | parent_id=parent_id 22 | ) 23 | return result_set 24 | -------------------------------------------------------------------------------- /app/app.py: -------------------------------------------------------------------------------- 1 | from flask import Flask 2 | import os 3 | from .extentions import configure_extensions, db 4 | from .api import api_blueprint 5 | 6 | 7 | def create_app(config): 8 | app = Flask(__name__) 9 | app.config.from_object(config) 10 | configure_extensions(app) 11 | app.register_blueprint(api_blueprint) 12 | return app 13 | -------------------------------------------------------------------------------- /app/extentions.py: -------------------------------------------------------------------------------- 1 | from .models import AdjcencyListModel 2 | from flask_sqlalchemy import SQLAlchemy 3 | from flask_migrate import Migrate 4 | 5 | db = SQLAlchemy() 6 | 7 | 8 | def configure_extensions(app): 9 | db.init_app(app) 10 | Migrate(app, db) 11 | -------------------------------------------------------------------------------- /app/models.py: -------------------------------------------------------------------------------- 1 | from .extentions import db 2 | from sqlalchemy import func 3 | from sqlalchemy_utils import LtreeType 4 | 5 | 6 | class Entity: 7 | id = db.Column(db.Integer, primary_key=True) 8 | 9 | 10 | class TimestampMixin: 11 | created_at = db.Column(db.DateTime, default=func.now()) 12 | updated_at = db.Column(db.DateTime, onupdate=func.now()) 13 | 14 | 15 | class AdjcencyListModel(db.Model, Entity, TimestampMixin): 16 | parent_id = db.Column(db.Integer, index=True) 17 | title = db.Column(db.String) 18 | description = db.Column(db.String) 19 | 20 | def __init__(self, **kwargs): 21 | self.__dict__.update(kwargs) 22 | self.id = kwargs['id'] 23 | self.created_at = kwargs['created_at'] 24 | self.updated_at = kwargs['updated_at'] 25 | self.parent_id = kwargs['parent_id'] 26 | self.title = kwargs['title'] 27 | self.description = kwargs['description'] 28 | 29 | 30 | class MaterializedPathModel(db.Model, Entity, TimestampMixin): 31 | parent_id = db.Column(db.Integer, index=True) 32 | title = db.Column(db.String) 33 | description = db.Column(db.String) 34 | path = db.Column(LtreeType) 35 | 36 | def __init__(self, **kwargs): 37 | self.__dict__.update(kwargs) 38 | self.id = kwargs['row_id'] 39 | self.created_at = kwargs['created_at'] 40 | self.updated_at = kwargs['updated_at'] 41 | self.parent_id = kwargs['parent_id'] 42 | self.title = kwargs['title'] 43 | self.description = kwargs['description'] 44 | self.path = kwargs['path'] 45 | 46 | 47 | class NestedSetModel(db.Model, Entity, TimestampMixin): 48 | parent_id = db.Column(db.Integer, index=True) 49 | title = db.Column(db.String) 50 | description = db.Column(db.String) 51 | lft = db.Column(db.Integer, index=True) 52 | rgt = db.Column(db.Integer, index=True) 53 | 54 | def __init__(self, **kwargs): 55 | self.__dict__.update(kwargs) 56 | for key in kwargs: 57 | self.key = kwargs[key] 58 | -------------------------------------------------------------------------------- /app/query_helper.py: -------------------------------------------------------------------------------- 1 | from .extentions import db 2 | from sqlalchemy.orm import aliased 3 | from flask import current_app as app 4 | from sqlalchemy_utils import Ltree 5 | from .models import AdjcencyListModel, MaterializedPathModel, NestedSetModel 6 | 7 | 8 | class QueryManager: 9 | 10 | """ 11 | This Class defines factor methods to retrieve model and method to be invoke 12 | based on query string params. 13 | """ 14 | 15 | # defines dict of all avaliable methods in QueryHelper class. Type specific methods are nested in sub-dict 16 | __all_methods__ = { 17 | 'child': 'getChildNodes', 18 | 'leaf': 'getLeafNodes', 19 | 'root': 'getRootNodes', 20 | 'lquery': 'LQuery', 21 | 'subtree': 'getSubTree' 22 | } 23 | 24 | def getModel(model_type): 25 | """ 26 | Factory Method to retrieve model from 'type' param in query string. 27 | Returns an instance of Model class 28 | """ 29 | if model_type == 'mp': 30 | return MaterializedPathModel 31 | 32 | elif model_type == 'aj': 33 | return AdjcencyListModel 34 | 35 | elif model_type == 'ns': 36 | return NestedSetModel 37 | 38 | else: 39 | raise ValueError('Model Type: {model_type} Not Implemented'.format( 40 | model_type=repr(model_type))) 41 | 42 | def getAction(script_root, model_type): 43 | """ 44 | Factory Method to retrieve QueryHelper method based on script_root in Url. 45 | check __all_methods__ and returns if method is available otherwise raise Exception 46 | """ 47 | 48 | if script_root in QueryManager.__all_methods__: 49 | return QueryManager.__all_methods__[script_root] 50 | 51 | else: 52 | raise ValueError("Action {script_root} Not Implemeted".format( 53 | script_root=repr(script_root))) 54 | 55 | def executeQuery(model, action, parent_id): 56 | return getattr(QueryHelper, action)(model, parent_id) 57 | 58 | 59 | class QueryHelper(): 60 | 61 | # Factory method to get Subtree based on type of model instance 62 | def getSubTree(*args): 63 | 64 | if args[0] == FirstModel: 65 | return SpecilizedQueryHelper.getAjSubTree(*args) 66 | 67 | elif args[0] == SecondModel: 68 | return SpecilizedQueryHelper.getMpSubTree(*args) 69 | 70 | elif args[0] == NestedSetModel: 71 | return SpecilizedQueryHelper.getNsSubTree(*args) 72 | 73 | else: 74 | raise ValueError("Subtree not implemented") 75 | 76 | # Get all root nodes. (Will be useful in case of Multiple trees) 77 | def getRootNodes(*args): 78 | root = args[0].query.filter( 79 | args[0].parent_id == app.config["ROOT_ID"]).all() 80 | return root 81 | 82 | # Get all leaf nodes based on a parent_id. if no parent is provided it will return all leaf nodes. 83 | def getLeafNodes(*args): 84 | if args[1] is None: 85 | first_model_alias = aliased(args[0]) 86 | return args[0].query.outerjoin(model_alias, args[0].id == model_alias.parent_id).filter(model_alias.parent_id == None).all() 87 | 88 | else: 89 | subtree = QueryHelper.getSubTree(*args) 90 | for node in subtree: 91 | if node.id in [data.parent_id for data in subtree]: 92 | subtree.remove(node) 93 | return subtree 94 | 95 | # Get list of immediate child nodes based on a parent_id 96 | def getChildNodes(*args): 97 | 98 | if args[1] is None: 99 | # rasie Error 100 | raise RuntimeError("Parent Id cannot be None") 101 | else: 102 | return args[0].query.filter(args[0].parent_id == args[1]).all() 103 | 104 | def LQuery(*args): 105 | 106 | if args[1] is None: 107 | expr = Ltree('None.*') 108 | else: 109 | # Validation fails at this point. 110 | expr = Ltree("*." + str(arg[1]) + ".*") 111 | return args[0].query.filter(args[0].path.lquery(expr)).all() 112 | 113 | 114 | class SpecilizedQueryHelper(QueryHelper): 115 | 116 | def getAjSubTree(*args): 117 | """ 118 | SO: http://stackoverflow.com/questions/24779093/query-self-referential-list-relationship-to-retrieve-several-level-child 119 | Docs: http://docs.sqlalchemy.org/en/rel_1_0/orm/query.html?highlight=cte#sqlalchemy.orm.query.Query.cte 120 | """ 121 | roots = args[0].query.filter(args[0].parent_id == args[1]).all() 122 | result_set = [] 123 | for root in roots: 124 | included = db.session.query( 125 | args[0].id 126 | ).filter( 127 | args[0].parent_id == root.id 128 | ).cte(name="included", recursive=True) 129 | 130 | included_alias = aliased(included, name="parent") 131 | model_alias = aliased(args[0], name="child") 132 | 133 | included = included.union_all( 134 | db.session.query( 135 | model_alias.id 136 | ).filter( 137 | model_alias.parent_id == included_alias.c.id 138 | ) 139 | ) 140 | model_ids = map( 141 | lambda _tuple: _tuple[0], 142 | [(root.id,)] + db.session.query(included.c.id).distinct().all(), 143 | ) 144 | 145 | result = args[0].query.filter(args[0].id.in_(model_ids)).all() 146 | result_set.extend(result) 147 | return result_set 148 | 149 | def getMpSubTree(*args): 150 | """ 151 | Using Custom opertator in SQLAlchemy (Docs): 152 | http://docs.sqlalchemy.org/en/latest/core/custom_types.html#redefining-and-creating-new-operators 153 | SO: http://stackoverflow.com/questions/12212636/sql-alchemy-overriding 154 | """ 155 | subq = args[0].query.with_entities(args[0].path).filter( 156 | args[0].id == args[1]).subquery() 157 | return args[0].query.filter(args[0].path.descendant_of(subq)).all() 158 | 159 | def getNsSubTree(*args): 160 | """ 161 | Related Artical: http://mikehillyer.com/articles/managing-hierarchical-data-in-mysql/ 162 | """ 163 | left, right = args[0].query.with_entities( 164 | args[0].lft, args[0].rgt).filter(args[0].id == args[1]).first() 165 | return args[0].query.filter(args[0].lft.between(left, right)).all() 166 | -------------------------------------------------------------------------------- /app/request_handler.py: -------------------------------------------------------------------------------- 1 | from .result_processor import ResultProcessor 2 | from .response import Response 3 | from .schema import entity_schema, entity_schemas 4 | 5 | 6 | class RequestHandler(): 7 | 8 | """ 9 | First Step in request execution process. 10 | """ 11 | def handleRequest(script_root, **kwargs): 12 | 13 | try: 14 | result_set = ResultProcessor.evalResult( 15 | script_root, kwargs['type'], kwargs['parent_id']) 16 | 17 | if len(result_set) > 0: 18 | return Response.create_response(result_set) 19 | else: 20 | return Response.create_404_response(None) 21 | 22 | except (ValueError, RuntimeError) as error: 23 | return Response.create_error_response(error) 24 | -------------------------------------------------------------------------------- /app/response.py: -------------------------------------------------------------------------------- 1 | from flask import jsonify 2 | from .schema import entity_schema, entity_schemas 3 | 4 | 5 | class Response(): 6 | 7 | """ 8 | Create Response from the retrieved result_set. Map the output using Mapper class. 9 | """ 10 | 11 | def create_response(result_set): 12 | try: 13 | mapper = Mapper.getMapper(result_set) 14 | final_response = jsonify({"data": mapper.dump(result_set).data}) 15 | return final_response 16 | except ValueError: 17 | raise 18 | 19 | def create_error_response(err): 20 | if hasattr(err, 'args'): 21 | return jsonify({"errors": err.args}) 22 | else: 23 | return jsonify({"errors": "No Message Available"}) 24 | 25 | def create_404_response(details): 26 | 27 | if details is None: 28 | return jsonify({"message": "No data available"}) 29 | else: 30 | return Response.create_error_response(details) 31 | 32 | 33 | class Mapper(): 34 | 35 | """ 36 | Map the output according to result_set data-structure. 37 | """ 38 | def getMapper(instance): 39 | 40 | if type(instance) == list: 41 | return entity_schemas 42 | 43 | elif type(instance) == object: 44 | return entity_schema 45 | 46 | else: 47 | raise ValueError('No Mapper found against {instance}'.format( 48 | instance=repr(instance))) 49 | -------------------------------------------------------------------------------- /app/result_processor.py: -------------------------------------------------------------------------------- 1 | from . query_helper import QueryManager 2 | 3 | 4 | class ResultProcessor: 5 | 6 | def evalResult(script_root, type, parent_id): 7 | """ 8 | Utilize QueryManager class to perform all steps to retrieve result. 9 | - get model instance 10 | - get method to be invoke 11 | - invoke method and return result. 12 | """ 13 | try: 14 | model = QueryManager.getModel(type) 15 | action = QueryManager.getAction(script_root, type) 16 | return QueryManager.executeQuery(model, action, parent_id) 17 | 18 | except (ValueError, RuntimeError) as error: 19 | raise 20 | -------------------------------------------------------------------------------- /app/route_handler.py: -------------------------------------------------------------------------------- 1 | from flask.views import MethodView 2 | 3 | 4 | class RouteHanlder: 5 | """ 6 | Defines all routing rules 7 | """ 8 | 9 | def __init__(self, blueprint): 10 | self.blueprint = blueprint 11 | 12 | def route(self, url, pk_name='pk', pk_type='int'): 13 | def decorator(resource): 14 | # URL rule defining default_value for key: pk 15 | self.blueprint.add_url_rule(url, 16 | defaults={pk_name: None}, 17 | view_func=resource, 18 | methods=['GET']) 19 | # example url for this rule http://abc.com/root/,http://abc.com/child/ 20 | self.blueprint.add_url_rule('{}/'.format(url), 21 | view_func=resource, 22 | methods=['GET']) 23 | # example url rule http://abc.com/child/2, http://abc.com/getsubtree/2 24 | self.blueprint.add_url_rule('{}/<{}:{}>/'.format(url, pk_type, pk_name), 25 | view_func=resource, 26 | methods=['GET']) 27 | 28 | return resource 29 | return decorator 30 | 31 | 32 | class Resource(MethodView): 33 | pass 34 | -------------------------------------------------------------------------------- /app/schema.py: -------------------------------------------------------------------------------- 1 | from marshmallow import Schema, fields 2 | 3 | 4 | class EntitySchema(Schema): 5 | id = fields.Int(dump_only=True) 6 | parent_id = fields.Int() 7 | title = fields.Str() 8 | description = fields.Str() 9 | created_at = fields.Date() 10 | updated_at = fields.Date() 11 | 12 | 13 | entity_schema = EntitySchema() 14 | entity_schemas = EntitySchema(many=True) 15 | -------------------------------------------------------------------------------- /config.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | 4 | class ApplicationConfig: 5 | SQLALCHEMY_DATABASE_URI = os.environ['DATABASE_URL'] 6 | #SQLALCHEMY_ECHO=True #print SQL Query to Console 7 | SQLALCHEMY_TRACK_MODIFICATIONS = False 8 | DEBUG = os.environ.get('FLASK_DEBUG', False) 9 | SECRET_KEY = 'abcs' 10 | ROOT_ID = -1 11 | NUM_RECORDS = 100 12 | CHUNK_SIZE = 10 13 | -------------------------------------------------------------------------------- /images/AdjcencyList.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sohaibfarooqi/hierarchical-data/d403f779230918d9fe728897df91dbf6c1aa449b/images/AdjcencyList.png -------------------------------------------------------------------------------- /images/NestedSet.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sohaibfarooqi/hierarchical-data/d403f779230918d9fe728897df91dbf6c1aa449b/images/NestedSet.png -------------------------------------------------------------------------------- /images/materializedpath.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sohaibfarooqi/hierarchical-data/d403f779230918d9fe728897df91dbf6c1aa449b/images/materializedpath.png -------------------------------------------------------------------------------- /logging_config.ini: -------------------------------------------------------------------------------- 1 | [loggers] 2 | keys=root 3 | 4 | [handlers] 5 | keys=stream_handler 6 | 7 | [formatters] 8 | keys=formatter 9 | 10 | [logger_root] 11 | level=INFO 12 | handlers=stream_handler 13 | 14 | [handler_stream_handler] 15 | class=StreamHandler 16 | level=DEBUG 17 | formatter=formatter 18 | args=(sys.stderr,) 19 | 20 | [formatter_formatter] 21 | format=%(asctime)s %(name)-12s %(levelname)-8s %(message)s -------------------------------------------------------------------------------- /migrations/README: -------------------------------------------------------------------------------- 1 | Generic single-database configuration. -------------------------------------------------------------------------------- /migrations/alembic.ini: -------------------------------------------------------------------------------- 1 | # A generic, single database configuration. 2 | 3 | [alembic] 4 | # template used to generate migration files 5 | # file_template = %%(rev)s_%%(slug)s 6 | 7 | # set to 'true' to run the environment during 8 | # the 'revision' command, regardless of autogenerate 9 | # revision_environment = false 10 | 11 | 12 | # Logging configuration 13 | [loggers] 14 | keys = root,sqlalchemy,alembic 15 | 16 | [handlers] 17 | keys = console 18 | 19 | [formatters] 20 | keys = generic 21 | 22 | [logger_root] 23 | level = WARN 24 | handlers = console 25 | qualname = 26 | 27 | [logger_sqlalchemy] 28 | level = WARN 29 | handlers = 30 | qualname = sqlalchemy.engine 31 | 32 | [logger_alembic] 33 | level = INFO 34 | handlers = 35 | qualname = alembic 36 | 37 | [handler_console] 38 | class = StreamHandler 39 | args = (sys.stderr,) 40 | level = NOTSET 41 | formatter = generic 42 | 43 | [formatter_generic] 44 | format = %(levelname)-5.5s [%(name)s] %(message)s 45 | datefmt = %H:%M:%S 46 | -------------------------------------------------------------------------------- /migrations/env.py: -------------------------------------------------------------------------------- 1 | from __future__ import with_statement 2 | from alembic import context 3 | from sqlalchemy import engine_from_config, pool 4 | from logging.config import fileConfig 5 | import logging 6 | 7 | # this is the Alembic Config object, which provides 8 | # access to the values within the .ini file in use. 9 | config = context.config 10 | 11 | # Interpret the config file for Python logging. 12 | # This line sets up loggers basically. 13 | fileConfig(config.config_file_name) 14 | logger = logging.getLogger('alembic.env') 15 | 16 | # add your model's MetaData object here 17 | # for 'autogenerate' support 18 | # from myapp import mymodel 19 | # target_metadata = mymodel.Base.metadata 20 | from flask import current_app 21 | config.set_main_option('sqlalchemy.url', 22 | current_app.config.get('SQLALCHEMY_DATABASE_URI')) 23 | target_metadata = current_app.extensions['migrate'].db.metadata 24 | 25 | # other values from the config, defined by the needs of env.py, 26 | # can be acquired: 27 | # my_important_option = config.get_main_option("my_important_option") 28 | # ... etc. 29 | 30 | 31 | def run_migrations_offline(): 32 | """Run migrations in 'offline' mode. 33 | 34 | This configures the context with just a URL 35 | and not an Engine, though an Engine is acceptable 36 | here as well. By skipping the Engine creation 37 | we don't even need a DBAPI to be available. 38 | 39 | Calls to context.execute() here emit the given string to the 40 | script output. 41 | 42 | """ 43 | url = config.get_main_option("sqlalchemy.url") 44 | context.configure(url=url) 45 | 46 | with context.begin_transaction(): 47 | context.run_migrations() 48 | 49 | 50 | def run_migrations_online(): 51 | """Run migrations in 'online' mode. 52 | 53 | In this scenario we need to create an Engine 54 | and associate a connection with the context. 55 | 56 | """ 57 | 58 | # this callback is used to prevent an auto-migration from being generated 59 | # when there are no changes to the schema 60 | # reference: http://alembic.readthedocs.org/en/latest/cookbook.html 61 | def process_revision_directives(context, revision, directives): 62 | if getattr(config.cmd_opts, 'autogenerate', False): 63 | script = directives[0] 64 | if script.upgrade_ops.is_empty(): 65 | directives[:] = [] 66 | logger.info('No changes in schema detected.') 67 | 68 | engine = engine_from_config(config.get_section(config.config_ini_section), 69 | prefix='sqlalchemy.', 70 | poolclass=pool.NullPool) 71 | 72 | connection = engine.connect() 73 | context.configure(connection=connection, 74 | target_metadata=target_metadata, 75 | process_revision_directives=process_revision_directives, 76 | **current_app.extensions['migrate'].configure_args) 77 | 78 | try: 79 | with context.begin_transaction(): 80 | context.run_migrations() 81 | finally: 82 | connection.close() 83 | 84 | if context.is_offline_mode(): 85 | run_migrations_offline() 86 | else: 87 | run_migrations_online() 88 | -------------------------------------------------------------------------------- /migrations/script.py.mako: -------------------------------------------------------------------------------- 1 | """${message} 2 | 3 | Revision ID: ${up_revision} 4 | Revises: ${down_revision} 5 | Create Date: ${create_date} 6 | 7 | """ 8 | 9 | # revision identifiers, used by Alembic. 10 | revision = ${repr(up_revision)} 11 | down_revision = ${repr(down_revision)} 12 | 13 | from alembic import op 14 | import sqlalchemy as sa 15 | ${imports if imports else ""} 16 | 17 | def upgrade(): 18 | ${upgrades if upgrades else "pass"} 19 | 20 | 21 | def downgrade(): 22 | ${downgrades if downgrades else "pass"} 23 | -------------------------------------------------------------------------------- /migrations/versions/454d1d76049a_nested_set_model.py: -------------------------------------------------------------------------------- 1 | """nested_set_model 2 | 3 | Revision ID: 454d1d76049a 4 | Revises: a21255c4781a 5 | Create Date: 2016-10-29 19:10:32.416766 6 | 7 | """ 8 | 9 | # revision identifiers, used by Alembic. 10 | revision = '454d1d76049a' 11 | down_revision = 'a21255c4781a' 12 | 13 | from alembic import op 14 | import sqlalchemy as sa 15 | 16 | 17 | def upgrade(): 18 | ### commands auto generated by Alembic - please adjust! ### 19 | op.create_table('nested_set_model', 20 | sa.Column('id', sa.Integer(), nullable=False), 21 | sa.Column('created_at', sa.DateTime(), nullable=True), 22 | sa.Column('updated_at', sa.DateTime(), nullable=True), 23 | sa.Column('parent_id', sa.Integer(), nullable=True), 24 | sa.Column('title', sa.String(), nullable=True), 25 | sa.Column('description', sa.String(), nullable=True), 26 | sa.Column('lft', sa.Integer(), nullable=True), 27 | sa.Column('rgt', sa.Integer(), nullable=True), 28 | sa.PrimaryKeyConstraint('id') 29 | ) 30 | op.create_index(op.f('ix_nested_set_model_lft'), 'nested_set_model', ['lft'], unique=False) 31 | op.create_index(op.f('ix_nested_set_model_parent_id'), 'nested_set_model', ['parent_id'], unique=False) 32 | op.create_index(op.f('ix_nested_set_model_rgt'), 'nested_set_model', ['rgt'], unique=False) 33 | ### end Alembic commands ### 34 | 35 | 36 | def downgrade(): 37 | ### commands auto generated by Alembic - please adjust! ### 38 | op.drop_index(op.f('ix_nested_set_model_rgt'), table_name='nested_set_model') 39 | op.drop_index(op.f('ix_nested_set_model_parent_id'), table_name='nested_set_model') 40 | op.drop_index(op.f('ix_nested_set_model_lft'), table_name='nested_set_model') 41 | op.drop_table('nested_set_model') 42 | ### end Alembic commands ### 43 | -------------------------------------------------------------------------------- /migrations/versions/a21255c4781a_materialized_path__model.py: -------------------------------------------------------------------------------- 1 | """materialized_path__model 2 | 3 | Revision ID: a21255c4781a 4 | Revises: c4eae3aacecd 5 | Create Date: 2016-10-29 19:08:05.792279 6 | 7 | """ 8 | 9 | # revision identifiers, used by Alembic. 10 | revision = 'a21255c4781a' 11 | down_revision = 'c4eae3aacecd' 12 | 13 | from alembic import op 14 | import sqlalchemy as sa 15 | from sqlalchemy_utils import LtreeType 16 | 17 | 18 | def upgrade(): 19 | ### commands auto generated by Alembic - please adjust! ### 20 | op.create_table('materialized_path_model', 21 | sa.Column('id', sa.Integer(), nullable=False), 22 | sa.Column('created_at', sa.DateTime(), nullable=True), 23 | sa.Column('updated_at', sa.DateTime(), nullable=True), 24 | sa.Column('parent_id', sa.Integer(), nullable=True), 25 | sa.Column('title', sa.String(), nullable=True), 26 | sa.Column('description', sa.String(), nullable=True), 27 | sa.Column('path', LtreeType(), nullable=True), 28 | sa.PrimaryKeyConstraint('id') 29 | ) 30 | op.create_index(op.f('ix_materialized_path_model_parent_id'), 'materialized_path_model', ['parent_id'], unique=False) 31 | ### end Alembic commands ### 32 | 33 | 34 | def downgrade(): 35 | ### commands auto generated by Alembic - please adjust! ### 36 | op.drop_index(op.f('ix_materialized_path_model_parent_id'), table_name='materialized_path_model') 37 | op.drop_table('materialized_path_model') 38 | ### end Alembic commands ### 39 | -------------------------------------------------------------------------------- /migrations/versions/c4eae3aacecd_adjcenty_list_model.py: -------------------------------------------------------------------------------- 1 | """adjcenty_list_model 2 | 3 | Revision ID: c4eae3aacecd 4 | Revises: None 5 | Create Date: 2016-10-29 19:06:59.655324 6 | 7 | """ 8 | 9 | # revision identifiers, used by Alembic. 10 | revision = 'c4eae3aacecd' 11 | down_revision = None 12 | 13 | from alembic import op 14 | import sqlalchemy as sa 15 | 16 | 17 | def upgrade(): 18 | ### commands auto generated by Alembic - please adjust! ### 19 | op.create_table('adjcency_list_model', 20 | sa.Column('id', sa.Integer(), nullable=False), 21 | sa.Column('created_at', sa.DateTime(), nullable=True), 22 | sa.Column('updated_at', sa.DateTime(), nullable=True), 23 | sa.Column('parent_id', sa.Integer(), nullable=True), 24 | sa.Column('title', sa.String(), nullable=True), 25 | sa.Column('description', sa.String(), nullable=True), 26 | sa.PrimaryKeyConstraint('id') 27 | ) 28 | op.create_index(op.f('ix_adjcency_list_model_parent_id'), 'adjcency_list_model', ['parent_id'], unique=False) 29 | ### end Alembic commands ### 30 | 31 | 32 | def downgrade(): 33 | ### commands auto generated by Alembic - please adjust! ### 34 | op.drop_index(op.f('ix_adjcency_list_model_parent_id'), table_name='adjcency_list_model') 35 | op.drop_table('adjcency_list_model') 36 | ### end Alembic commands ### 37 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | alembic==0.8.7 2 | Flask==0.11.1 3 | Flask-SQLAlchemy==2.1 4 | Flask-Migrate==2.0.0 5 | Mako==1.0.4 6 | psycopg2==2.6.2 7 | SQLAlchemy==1.1.0b3 8 | invoke==0.13.0 9 | marshmallow==2.15.1 10 | sqlalchemy-utils==0.32.9 11 | -------------------------------------------------------------------------------- /runtime.txt: -------------------------------------------------------------------------------- 1 | python-3.5.2 2 | -------------------------------------------------------------------------------- /tasks.py: -------------------------------------------------------------------------------- 1 | from invoke import task 2 | from config import ApplicationConfig 3 | from app.models import AdjcencyListModel, MaterializedPathModel, NestedSetModel 4 | from app.extentions import db 5 | from datetime import datetime, timedelta 6 | import random 7 | import string 8 | from app.app import create_app 9 | import logging 10 | from logging.config import fileConfig 11 | from sqlalchemy_utils import Ltree 12 | 13 | 14 | """ 15 | Dated: 2016-10-19 16 | Overview: This class help in building dummy records to perform benchmarking. It has options to switch between data models. Currently supported models types are 17 | Adjcency List, Materialized Path Views and Nested Set. use command invoke --help build to view all possible options. 18 | """ 19 | 20 | 21 | fileConfig('logging_config.ini') 22 | logger = logging.getLogger('tasks') 23 | app = create_app(ApplicationConfig).app_context().push() 24 | NUM_RECORDS = ApplicationConfig.NUM_RECORDS 25 | CHUNK_SIZE = ApplicationConfig.CHUNK_SIZE 26 | LEFT = 1 27 | 28 | 29 | @task(help={'type': "Use --type aj OR --type mp OR --type ns For AdjecencyList, MateriallizedPath, NestedSetModel respectively"}) 30 | def build(ctx, type=None): 31 | """ 32 | Insert Test data to specified table. See help for more details. 33 | """ 34 | 35 | logger.info('Job Started') 36 | logger.info('Number of Records to be Inserted: %i', NUM_RECORDS) 37 | logger.info('Chunk Size: %i', CHUNK_SIZE) 38 | 39 | if type == 'aj': 40 | last_id = db.session.query(AdjcencyListModel).order_by( 41 | AdjcencyListModel.id.desc()).first() 42 | start, end_range, parent_id, path = getMeta(last_id) 43 | insertAdjecencyList(last_id, start, end_range, parent_id) 44 | 45 | elif type == 'mp': 46 | last_id = db.session.query(MaterializedPathModel).order_by( 47 | MaterializedPathModel.id.desc()).first() 48 | start, end_range, parent_id, path = getMeta(last_id) 49 | insertMateriallizedPath(last_id, start, end_range, parent_id, path) 50 | 51 | elif type == 'ns': 52 | last_id = db.session.query(NestedSetModel).order_by( 53 | NestedSetModel.id.desc()).first() 54 | start, end_range, parent_id, path = getMeta(last_id) 55 | insertNestedSet(last_id, start, end_range, parent_id, path) 56 | 57 | else: 58 | logger.info('Please Specify Data-Model Type! Use --help for more info') 59 | 60 | logger.info('Job Finished') 61 | 62 | 63 | def insertAdjecencyList(last_id, start_range, end_range, parent_id): 64 | 65 | for i in range(start_range, end_range): 66 | object_created_date = datetime.today() - timedelta(hours=i) 67 | object_updated_date = datetime.today() - timedelta(minutes=i) 68 | desc = randomword(i+10) 69 | title = randomword(5) 70 | 71 | if i % 3 == 0: 72 | parent_id += 1 73 | 74 | model = AdjcencyListModel( 75 | id=i, 76 | created_at=object_created_date.strftime("%Y-%m-%d %H:%M:%S"), 77 | updated_at=object_updated_date.strftime("%Y-%m-%d %H:%M:%S"), 78 | parent_id=parent_id, 79 | title=title, 80 | description=desc 81 | ) 82 | 83 | db.session.add(model) 84 | 85 | if parent_id < 0: 86 | parent_id += 2 87 | 88 | if(i % CHUNK_SIZE == 0): 89 | logger.info('Commiting Session Rows') 90 | db.session.commit() 91 | 92 | db.session.commit() 93 | 94 | 95 | def insertMateriallizedPath(last_id, start_range, end_range, parent_id, parent_path): 96 | 97 | for i in range(start_range, end_range): 98 | object_created_date = datetime.today() - timedelta(hours=i) 99 | object_updated_date = datetime.today() - timedelta(minutes=i) 100 | desc = randomword(i+10) 101 | title = randomword(5) 102 | model = MaterializedPathModel( 103 | row_id=i, 104 | created_at=object_created_date.strftime("%Y-%m-%d %H:%M:%S"), 105 | updated_at=object_updated_date.strftime("%Y-%m-%d %H:%M:%S"), 106 | parent_id=parent_id, 107 | title=title, 108 | description=desc, 109 | path=Ltree(parent_path + "." + str(i)) 110 | 111 | ) 112 | db.session.add(model) 113 | 114 | if parent_id < 0: 115 | parent_id += 2 116 | parent_path = parent_path + "." + str(i) 117 | 118 | if i % 3 == 0: 119 | parent_id += 1 120 | parent_object = MaterializedPathModel.query.filter( 121 | MaterializedPathModel.id == parent_id).first() 122 | if parent_object is not None: 123 | parent_path = str(parent_object.path) 124 | 125 | if(i % CHUNK_SIZE == 0): 126 | logger.info('Commiting Session Rows') 127 | db.session.commit() 128 | db.session.commit() 129 | 130 | 131 | def insertNestedSet(last_id, start_range, end_range, parent_id, parent_path): 132 | """ 133 | - Inserting Tree 134 | """ 135 | 136 | for i in range(start_range, end_range): 137 | object_created_date = datetime.today() - timedelta(hours=i) 138 | object_updated_date = datetime.today() - timedelta(minutes=i) 139 | desc = randomword(i+10) 140 | title = randomword(5) 141 | 142 | if i % 3 == 0: 143 | parent_id += 1 144 | 145 | model = NestedSetModel( 146 | id=i, 147 | created_at=object_created_date.strftime("%Y-%m-%d %H:%M:%S"), 148 | updated_at=object_updated_date.strftime("%Y-%m-%d %H:%M:%S"), 149 | parent_id=parent_id, 150 | title=title, 151 | description=desc 152 | ) 153 | 154 | db.session.add(model) 155 | 156 | if parent_id < 0: 157 | parent_id += 2 158 | 159 | if(i % CHUNK_SIZE == 0): 160 | logger.info('Commiting Session Rows') 161 | db.session.commit() 162 | 163 | db.session.commit() 164 | 165 | """ 166 | - Inserting Left and Right of nodes 167 | """ 168 | if last_id is None: 169 | parent_id = -1 170 | else: 171 | parent_id = last_id.id 172 | 173 | tree = NestedSetModel.query\ 174 | .with_entities(NestedSetModel.id, NestedSetModel.parent_id)\ 175 | .filter(NestedSetModel.rgt == None)\ 176 | .all() 177 | 178 | graph = {} 179 | 180 | for child in tree: 181 | childs = set( 182 | [child_node.id for child_node in tree if child_node.parent_id == child.id]) 183 | if childs.__len__() > 0: 184 | graph[child.id] = childs 185 | 186 | dfs(graph, 1) 187 | 188 | 189 | def dfs(graph, start, visited=None): 190 | 191 | if visited is None: 192 | visited = set() 193 | visited.add(start) 194 | global LEFT 195 | if start in graph: 196 | 197 | node = NestedSetModel.query.filter(NestedSetModel.id == start).first() 198 | node.lft = LEFT 199 | db.session.add(node) 200 | db.session.commit() 201 | 202 | for next in graph[start] - visited: 203 | LEFT = LEFT + 1 204 | dfs(graph, next, visited) 205 | if start not in graph: 206 | node = NestedSetModel.query.filter(NestedSetModel.id == start).first() 207 | node.lft = LEFT 208 | LEFT = LEFT + 1 209 | node.rgt = LEFT 210 | db.session.add(node) 211 | db.session.commit() 212 | 213 | else: 214 | if graph[start].issubset(visited): 215 | node = NestedSetModel.query.filter( 216 | NestedSetModel.id == start).first() 217 | LEFT = LEFT + 1 218 | node.rgt = LEFT 219 | db.session.add(node) 220 | db.session.commit() 221 | 222 | return visited 223 | 224 | 225 | def randomword(length): 226 | return ''.join(random.choice(string.ascii_lowercase) for i in range(length)) 227 | 228 | 229 | def getMeta(id): 230 | if id is None: 231 | end_range = 1 + NUM_RECORDS 232 | start = 1 233 | parent_id = -1 234 | path = 'None' 235 | return start, end_range, parent_id, path 236 | else: 237 | start = id.id + 1 238 | end_range = (id.id + 1) + NUM_RECORDS 239 | parent_id = id.parent_id 240 | parent_object = MaterializedPathModel.query.filter( 241 | MaterializedPathModel.id == parent_id).first() 242 | path = str(parent_object.path) 243 | return start, end_range, parent_id, path 244 | -------------------------------------------------------------------------------- /wsgi.py: -------------------------------------------------------------------------------- 1 | from .app.app import create_app 2 | from .config import ApplicationConfig 3 | from functools import wraps 4 | 5 | app = create_app(ApplicationConfig) 6 | if __name__ == '__main__': 7 | app.run() 8 | --------------------------------------------------------------------------------