├── .bumpversion.cfg ├── .gitignore ├── CHANGES.txt ├── LICENSE ├── MANIFEST.in ├── README.md ├── description.rst ├── development.ini ├── examples ├── api.ini └── web.ini ├── pip-dev.txt ├── pip-test.txt ├── pip.txt ├── setup.cfg ├── setup.py ├── suma ├── __init__.py ├── api │ ├── __init__.py │ ├── config.py │ ├── resources.py │ ├── routes.py │ ├── schemas │ │ ├── __init__.py │ │ └── link.py │ ├── serializers │ │ ├── __init__.py │ │ └── link.py │ ├── settings │ │ ├── __init__.py │ │ ├── adapter.py │ │ ├── celery.py │ │ ├── extension.py │ │ ├── logger.py │ │ └── service.py │ ├── subscribers │ │ ├── __init__.py │ │ └── request.py │ └── views.py ├── celery.py ├── common │ ├── __init__.py │ ├── resources │ │ ├── __init__.py │ │ └── base.py │ └── services │ │ ├── __init__.py │ │ └── factories.py ├── core │ ├── __init__.py │ ├── migrations │ │ ├── README │ │ ├── env.py │ │ ├── script.py.mako │ │ └── versions │ │ │ └── INFO │ ├── models │ │ ├── __init__.py │ │ ├── link.py │ │ ├── meta │ │ │ ├── __init__.py │ │ │ ├── base.py │ │ │ ├── schema.py │ │ │ └── types.py │ │ └── stat.py │ ├── scripts │ │ ├── __init__.py │ │ └── initializedb.py │ ├── services │ │ ├── __init__.py │ │ ├── file.py │ │ ├── hashid.py │ │ ├── interfaces │ │ │ ├── __init__.py │ │ │ ├── file.py │ │ │ ├── hashid.py │ │ │ ├── link.py │ │ │ ├── screenshot.py │ │ │ ├── stat.py │ │ │ └── task.py │ │ ├── link.py │ │ ├── screenshot.py │ │ ├── stat.py │ │ └── task.py │ └── tasks.py └── web │ ├── __init__.py │ ├── config.py │ ├── resources.py │ ├── routes.py │ ├── settings │ ├── __init__.py │ ├── adapter.py │ ├── celery.py │ ├── extension.py │ ├── logger.py │ └── service.py │ └── views.py ├── tests ├── api │ ├── schemas │ │ └── test_link_schema.py │ └── views │ │ └── test_api_link_views.py ├── conftest.py ├── core │ ├── models │ │ ├── test_link_model.py │ │ └── test_stat_model.py │ └── services │ │ ├── conftest.py │ │ ├── test_hashid_service.py │ │ ├── test_link_service.py │ │ └── test_stat_service.py └── web │ └── views │ └── test_web_link_views.py └── tox.ini /.bumpversion.cfg: -------------------------------------------------------------------------------- 1 | [bumpversion] 2 | current_version = 0.1.1 3 | files = setup.py 4 | parse = (?P\d+)\.(?P\d+)\.(?P\d+) 5 | search = version='{current_version}' 6 | replace = version='{new_version}' 7 | 8 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | *.egg-info 3 | *.pyc 4 | *.swp 5 | *.swo 6 | *.egg-info 7 | build/ 8 | .coverage 9 | .tags 10 | .DS_Store 11 | __pycache__ 12 | ghostdriver.log 13 | /storage 14 | .tox 15 | .cache 16 | .eggs 17 | /dist 18 | -------------------------------------------------------------------------------- /CHANGES.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rach/suma/58446efc2b6abdcfde3ee226d54cb699e66d2cc2/CHANGES.txt -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright 2015 Rachid Belaid 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include *.txt *.ini *.cfg *.rst *.md 2 | recursive-include suma *.ico *.png *.css *.gif *.jpg *.txt *.js *.html *.eot *.svg *.ttf *.woff *.otf *.csv *.py *.gz *.pdf *.json INFO *.mako 3 | 4 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | #SUMA 2 | 3 | Suma stands for **S**hort **U**RL **M**anagment **A**pp. 4 | The role of Suma to manage external links and extract data from them, Suma is a small web service to easily do the following: 5 | 6 | - Creating short URL for external link within your application 7 | - Extracting Title 8 | - Capturing Screenshot from URL 9 | - Blocking URL's 10 | - Collecting clicks 11 | 12 | If your application needs to display external links then it's probably important that you don't redirect the user directly to the URL so you fight spam, phishing attacks or inappropriate links. 13 | 14 | Suma is at an early stage of development, but the goal of this project is to provide a microservice which covers the basic need for a company to protect their users from external links within their products. You can read more about the motivation behind Suma [here](http://rachbelaid.com/introducing-suma) 15 | 16 | ##Use cases 17 | 18 | If you don't understand directly what Suma is for. Let's illustrate it with few use cases: 19 | 20 | - Public Feeds (eg: Twitter or FB like app) which allow user to post link publicly 21 | - Reviews or comments allowing external links 22 | - Display link title or screenshot to preview an external link within your application (eg: slack like app) 23 | 24 | To summarize: if your application allows external links from users, then Suma can be useful. 25 | 26 | ##Project Structure and Usage 27 | 28 | Suma is composed of 2 web applications. A private API to manage the links and a public API to handle the link redirection and statistic collection. 29 | 30 | Suma is structured this way to make it easier to protect the private API. The private API shouldn't be exposed to the outside world and it has to be used via your application as a microservice. 31 | 32 | Eg. User Jane Doe shares a comment which contains a link. Your application detects a link, then calls Suma to convert this link, then your application handles the substitution. 33 | 34 | For now, it is the responsibility of your application to handle detection and substitution of links. Maybe in the future, we could add some helpers. 35 | 36 | The code of the private API is located in `suma/api` and the code of the public API is located `suma/web`. 37 | 38 | ##Installation 39 | 40 | ###Requirements 41 | 42 | - Python 2.7 43 | - Postgres 9.3+ 44 | - Redis 45 | - Phantomjs (for the screenshots) 46 | - Unix Base operating system 47 | 48 | ### Creating an environment 49 | 50 | The first thing you’ll need is the Python virtualenv package. You probably already have this, but if not, you can install it with: 51 | 52 | pip install virtualenv 53 | 54 | Once that’s done, choose a location for the environment, and create it with the virtualenv command. 55 | 56 | virtualenv ~/.virtualenv/suma/ 57 | 58 | Finally, activate your virtualenv: 59 | 60 | source ~/.virtualenv/suma/bin/activate 61 | 62 | Now that you have an environment setup, you can proceed to the installation of Suma 63 | 64 | ### Installing Suma 65 | 66 | You can install Suma and all its dependencies with the `pip` command used to install virtualenv: 67 | 68 | pip install suma 69 | 70 | If you plan to use s3 then you install the Suma with the s3 dependencies. 71 | 72 | pip install suma[s3] 73 | 74 | 75 | ###Configuration files 76 | 77 | To run Suma, you will need to create 2 ini files which follow the [Paste.Deploy](http://docs.pylonsproject.org/projects/pyramid/en/latest/narr/paste.html) format. 78 | We provide 2 examples of files to run suma: 79 | 80 | - [api.ini](https://github.com/rach/suma/blob/master/examples/api.ini) to run the private API. 81 | - [web.ini](https://github.com/rach/suma/blob/master/examples/web.ini) to run the public API. 82 | 83 | Within these files you need to change at least these 2 important settings: 84 | 85 | - `sqlalchemy.url` to a dsn representing your database settings. For more info, you can read more about it [here](http://docs.sqlalchemy.org/en/latest/core/engines.html). Suma only supports PostgreSQL so you cannot use a dsn for another RDBMS 86 | 87 | - `hashid.secret` to a secret of your choice. The secret is used to generate different short URL's per installation 88 | 89 | You can also configure these others settings if you aren't happy with the values in the files' examples: 90 | 91 | - `celery.broker_url` is dsn of a broker URL. We suggest that you use redis but celery supports other brokers. You can refer to the celery [documentation](http://docs.celeryproject.org/en/latest/getting-started/brokers/) for more info. 92 | 93 | - `storage.s3` is a boolean to choose between s3 or local storage for the screenshots. This setting is set to `false` by default for using the local filesystem. 94 | 95 | - `storage.local.base_path` a local path to where to create the screenshots if you are using local storage 96 | - `storage.local.base_url`: this is the URL where the application accesses the screenshots if you are using local storage. By default, the public api can serve the screenshots if you set the value to a relative URL (eg: `/storage`). 97 | 98 | - `storage.s3.base_url`: the url where the screenshot can be accessed from if you are using s3. 99 | - `storage.s3.bucket_name`: the name of the s3 bucket 100 | - `storage.s3.access_key`: the access key id to write in the s3 bucket 101 | - `storage.s3.secret_key`: the secret key id to write in the bucket 102 | - `suma.tasks`: a list of tasks to run on the URL. The possible tasks are `html`, `text`, `screenshot` and `title`. 103 | 104 | ### Database 105 | 106 | Suma uses PostgreSQL. It is not required to create a new database if you are using PostgreSQL already because Suma uses a different schema than public to avoid conflicts. 107 | 108 | After installing and configuring Suma, you can create the required tables and schema with: 109 | 110 | # activating the virtualenv 111 | source ~/.virtualenv/suma/bin/activate 112 | 113 | 114 | initialize_suma_db application.ini 115 | 116 | 117 | ###Running Suma 118 | 119 | We provide 2 examples of config files with already a section to run `uWSGI` with some basic settings. First, you need to install [uWSGI](https://pypi.python.org/pypi/uWSGI). 120 | 121 | # activating the virtualenv 122 | source ~/.virtualenv/suma/bin/activate 123 | 124 | 125 | # installing dependencies to run suma 126 | pip install uwsgi 127 | 128 | 129 | # running the private api on the port 8081 130 | uwsgi --ini examples/api.ini 131 | 132 | 133 | # running the private api on the port 8081 134 | uwsgi --ini examples/web.ini 135 | 136 | 137 | # running the background workers to collect link data 138 | celery worker -A suma.celery --ini examples/api.ini 139 | 140 | We suggest that you use uWSGI because it has a good support for [PasteDeploy](https://pypi.python.org/pypi/PasteDeploy). You need to configure the uWSGI's settings to run the application as you prefer (eg: socket + nginx) 141 | 142 | ##Background Tasks 143 | 144 | Suma can run 4 types of background tasks: 145 | 146 | - `html` to store the HTML from the URL. This can be used as page cache or to run operations on it. 147 | - `text` to extract the main content from the URL. This can be used to display the content preview. 148 | - `screenshot` to generate a screenshot from the URL. This can be used to display thumbnails preview within your application. 149 | - `title` to extract the title from the URL. This can also be used for preview. 150 | 151 | ##Banning Rules 152 | 153 | When you ban a URL you can have 3 modes: 154 | 155 | - `url`: to ban exactly identical URL's 156 | - `netloc`: to ban any URL which matches the same FQDN 157 | - `path`: to ban any URL which matches FQDN and the path without considering the query string or an anchor. 158 | 159 | To illustrate the rules let's assume these 3 urls: 160 | 161 | - https://google.com/test 162 | - http://google.com/test?something=1#title 163 | - https://google.com/random 164 | 165 | if we ban `https://google.com/test` with mode `url` then only `https://google.com/test` will be blocked. 166 | 167 | if we ban `https://google.com/test` with mode `path` then `https://google.com/test` and `http://google.com/test?something=1#title` will be blocked. 168 | 169 | if we ban `https://google.com/test` with mode `path` then all the URL's above will be blocked. 170 | 171 | ##API's 172 | 173 | ###Private 174 | 175 | The private API allows you to create, ban links and refresh link's data. 176 | 177 | ####Create link 178 | 179 | Create a link in Suma for a specific URL. You can also use a `user_id` to generate different links for the same url. 180 | 181 | POST /links 182 | 183 | #####Parameters 184 | 185 | 186 | | Name | Type | Required | Description | 187 | | --------- |:---------:|:--------:| ----------------------------------------------------------- | 188 | | url | string | true | The url that you want to create a link for | 189 | | user_id | integer | false | To have different links created for same URL between users | 190 | 191 | 192 | #####Payload 193 | 194 | { 195 | "url": "https://google.com", 196 | } 197 | 198 | or using `user_id`: 199 | 200 | { 201 | "url": "https://google.com", 202 | "user_id": 42, 203 | } 204 | 205 | #####Response 206 | 207 | Status: 201 Created 208 | 209 | Content-Type: application/json 210 | 211 | { 212 | "data": { 213 | "attributes": { 214 | "title": null, 215 | "screenshot": null, 216 | "updated": "2015-12-18T08:20:27.929472", 217 | "created": "2015-12-18T08:20:27.929472", 218 | "url": "https://google.com", 219 | "banned": false, 220 | "hashid": "KEYaED", 221 | "clicks": 0 222 | }, 223 | "type": "links", 224 | "id": 1 225 | "actions": { 226 | "ban": { 227 | "links": { 228 | "related": "http://private-api/links/2/ban" 229 | } 230 | }, 231 | "text": { 232 | "links": { 233 | "related": "http://private-api/api/links/2/text" 234 | } 235 | }, 236 | "html": { 237 | "links": { 238 | "related": "http://private-api/api/links/2/html" 239 | } 240 | }, 241 | "refresh": { 242 | "links": { 243 | "related": "http://private-api/api/links/2/refresh" 244 | } 245 | } 246 | } 247 | } 248 | } 249 | 250 | This API return a `201` status if a new link has been created, otherwise it returns a `200` status if a link already exists for the received `url` and `user_id`. When you create a new Link, the `title` and the `screenshot` attributes will be `null` because they are generated asynchronously via the background workers. 251 | 252 | ###Get link 253 | 254 | Return the existing link for these `id` or `hashid`. 255 | 256 | GET /links/:id 257 | GET /links/:hashid 258 | 259 | ####Response 260 | 261 | Status: 200 262 | 263 | Content-Type: application/json 264 | 265 | { 266 | "data": { 267 | "attributes": { 268 | "title": "Google", 269 | "screenshot": "http://url-to-screenshot", 270 | "updated": "2015-12-18T08:20:27.929472", 271 | "created": "2015-12-18T08:20:27.929472", 272 | "url": "https://google.com", 273 | "banned": false, 274 | "hashid": "KEYaED", 275 | "clicks": 0 276 | }, 277 | "type": "links", 278 | "id": 1 279 | "actions": { 280 | "ban": { 281 | "links": { 282 | "related": "http://private-api/links/2/ban" 283 | } 284 | }, 285 | "text": { 286 | "links": { 287 | "related": "http://private-api/api/links/2/text" 288 | } 289 | }, 290 | "html": { 291 | "links": { 292 | "related": "http://private-api/api/links/2/html" 293 | } 294 | }, 295 | "refresh": { 296 | "links": { 297 | "related": "http://private-api/api/links/2/refresh" 298 | } 299 | } 300 | } 301 | } 302 | } 303 | 304 | This endpoint returns 404 if there is no link matching the `id` or `hashid`. 305 | 306 | ###Ban Existing Link 307 | 308 | POST /links/:id/ban 309 | POST /links/:hashid/ban 310 | 311 | ####Parameters 312 | 313 | 314 | | Name | Type | Required | Description | 315 | | --------- |:---------:|:--------:| ---------------------------------------------------------------- | 316 | | mode | string | false | one of these values: "url", "path" or "netloc". Default to "url"" | 317 | 318 | 319 | #####Payload 320 | 321 | { 322 | } 323 | 324 | or with `mode`: 325 | 326 | { 327 | "mode": "netloc", 328 | } 329 | 330 | 331 | 332 | ###Ban a URL 333 | 334 | You can ban a URL even if no link exists yet in the database. It will automatically ban any future links created which match the criteria. 335 | 336 | POST /ban 337 | 338 | ####Parameters 339 | 340 | 341 | | Name | Type | Required | Description | 342 | | --------- |:---------:|:--------:| ----------------------------------------------------------- | 343 | | url | string | true | The url that you want to ban | 344 | | mode | string | false | one of these values: "url", "path", "netloc" | 345 | 346 | 347 | ####Response 348 | 349 | Status: 201 350 | 351 | Content-Type: application/json 352 | 353 | ###Get Link's HTML 354 | 355 | If you enable the `html` task then Suma stores the HTML of a URL so you can retrieve it later. 356 | 357 | GET /links/:id/html 358 | GET /links/:hashid/html 359 | 360 | ####Response 361 | 362 | Status: 200 363 | 364 | Content-Type: text/html 365 | 366 | If the value doesn't exist then you will receive a 204 (No Content) 367 | 368 | ###Get Link's text 369 | 370 | If you enable the `text` task then Suma tries to get the main content of a URL using [Goose](https://pypi.python.org/pypi/goose-extractor/). This can be useful for articles to display the content for preview. 371 | 372 | GET /links/:id/text 373 | GET /links/:hashid/text 374 | 375 | ####Response 376 | 377 | Status: 200 378 | 379 | Content-Type: text/plain 380 | 381 | If the value doesn't exist then you will receive a 204 (No Content) 382 | 383 | ##Public API 384 | 385 | ###Access Link (Redirect) 386 | 387 | GET /:hashid 388 | 389 | Accessing the hashid of the public API will return a Permanent Redirect (301) to the URL and increment the clicks counter. 390 | 391 | ###Access Link Screenshot (Redirect) 392 | 393 | GET /:hashid/screenshot 394 | 395 | Accessing this endpoint will return a Temporary Redirect (302) to the screenshot URL 396 | 397 | ##Questions 398 | 399 | If you've met any difficulties or have questions, you can ask them via [gitter](gitter.im/rach/suma) 400 | 401 | ##Contribute 402 | 403 | The project is young so there are quite a few things that you can do if you want to contribute: 404 | 405 | - Testing the application and report bugs 406 | - Improving the documentation to cover more installation instructions 407 | - Add a Dockerfile, Compose file 408 | - Improving the API's 409 | - Improving coverage 410 | - Reviewing indexes 411 | - Correcting my English (Sorry, I'm not a native English speaker) 412 | 413 | Any help is appreciated. 414 | 415 | ##License 416 | 417 | Suma is licensed under Apache V2 license, the full license text can be found [here](https://github.com/rach/suma/blob/master/LICENSE) 418 | -------------------------------------------------------------------------------- /description.rst: -------------------------------------------------------------------------------- 1 | SUMA 2 | #### 3 | 4 | Suma stands for **S**\ hort **U**\ RL **M**\ anagment **A**\ pp. 5 | The role of Suma to manage external links and extract data from them, Suma is a small web service to easily do the following: 6 | 7 | - Creating short URL for external link within your application 8 | - Extracting Title 9 | - Capturing Screenshot from URL 10 | - Blocking URL's 11 | - Collecting clicks 12 | 13 | 14 | Use cases 15 | --------- 16 | 17 | If you don't understand directly what Suma is for. Let's illustrate it with few use cases: 18 | 19 | - Public Feeds (eg: Twitter or FB like app) which allow user to post link publicly 20 | - Reviews or comments allowing external links 21 | - Display link title or screenshot to preview an external link within your application (eg: slack like app) 22 | 23 | To summarize: if your application allows external links from users, then Suma can be useful. 24 | 25 | Project Structure and Usage 26 | --------------------------- 27 | 28 | Suma is composed of 2 web applications. A private API to manage the links and a public API to handle the link redirection and statistic collection. 29 | 30 | Suma is structured this way to make it easier to protect the private API. The private API shouldn't be exposed to the outside world and it has to be used via your application as a microservice. 31 | 32 | Installation 33 | ------------ 34 | 35 | The installation instructions can be found in the `README `_ of the project. 36 | 37 | License 38 | ------- 39 | 40 | Suma is licensed under Apache V2 license, the full license text can be found `here `_ 41 | -------------------------------------------------------------------------------- /development.ini: -------------------------------------------------------------------------------- 1 | [DEFAULT] 2 | sqlalchemy.url = postgresql://suma@/suma 3 | hashid.secret = '123456' 4 | celery.broker_url = redis://localhost:6379/0 5 | screenshot.path = %(here)s/files 6 | storage.s3 = true 7 | storage.local.base_path = %(here)s/storage 8 | storage.local.base_url = /storage 9 | storage.s3.base_url = http://mybucket.s3.amazonaws.com/ 10 | storage.s3.bucket_name = mybucket 11 | storage.s3.access_key = key 12 | storage.s3.secret_key = secret 13 | suma.tasks = 14 | html 15 | text 16 | screenshot 17 | title 18 | 19 | [composite:main] 20 | use = egg:Paste#urlmap 21 | / = web 22 | /api = api 23 | 24 | [app:api] 25 | use = egg:suma#api 26 | 27 | pyramid.includes = 28 | pyramid_debugtoolbar 29 | 30 | debugtoolbar.enabled = true 31 | 32 | 33 | [app:web] 34 | use = egg:suma#web 35 | 36 | pyramid.includes = 37 | pyramid_debugtoolbar 38 | 39 | debugtoolbar.enabled = true 40 | 41 | jinja2.directories = 42 | suma:web/templates 43 | 44 | [alembic] 45 | script_location = suma:core/migrations 46 | 47 | ### 48 | # wsgi server configuration 49 | ### 50 | 51 | [server:main] 52 | use = egg:waitress#main 53 | host = 0.0.0.0 54 | port = 6543 55 | 56 | ### 57 | # logging configuration 58 | # http://docs.pylonsproject.org/projects/pyramid/en/latest/narr/logging.html 59 | ### 60 | 61 | [loggers] 62 | keys = root, suma, sqlalchemy, alembic, exc_logger 63 | 64 | [handlers] 65 | keys = console 66 | 67 | [formatters] 68 | keys = generic 69 | 70 | [logger_root] 71 | level = INFO 72 | handlers = console 73 | 74 | [logger_suma] 75 | level = DEBUG 76 | handlers = 77 | qualname = suma 78 | 79 | [logger_exc_logger] 80 | level = ERROR 81 | handlers = console 82 | qualname = exc_logger 83 | 84 | [logger_sqlalchemy] 85 | level = INFO 86 | handlers = 87 | qualname = sqlalchemy.engine 88 | # "level = INFO" logs SQL queries. 89 | # "level = DEBUG" logs SQL queries and results. 90 | # "level = WARN" logs neither. (Recommended for production systems.) 91 | 92 | [logger_alembic] 93 | level = INFO 94 | handlers = 95 | qualname = alembic 96 | 97 | [handler_console] 98 | class = StreamHandler 99 | args = (sys.stderr,) 100 | level = NOTSET 101 | formatter = generic 102 | 103 | [formatter_generic] 104 | format = %(asctime)s %(levelname)-5.5s [%(name)s][%(threadName)s] %(message)s 105 | -------------------------------------------------------------------------------- /examples/api.ini: -------------------------------------------------------------------------------- 1 | [DEFAULT] 2 | sqlalchemy.url = postgresql://suma@/suma 3 | hashid.secret = 'mysecret' #TO CHANGE 4 | celery.broker_url = redis://localhost:6379/0 #TO CHANGE IF YOU DON'T USE REDIS 5 | screenshot.path = %(here)s/files # SET PATH WHERE YOU WANT TO CREATE THE SCREENSHOTS 6 | storage.s3 = false 7 | storage.local.base_path = %(here)s/storage 8 | storage.local.base_url = http://localhost:8080/storage 9 | # if you want to use s3 then you need set storage.s3 to true 10 | #storage.s3.base_url = http://mybucket.s3.amazonaws.com/ 11 | #storage.s3.bucket_name = mybucket 12 | #storage.s3.access_key = key 13 | #storage.s3.secret_key = secret 14 | suma.tasks = 15 | html 16 | text 17 | screenshot 18 | title 19 | 20 | [app:main] 21 | use = egg:suma#api 22 | 23 | [alembic] 24 | script_location = suma:core/migrations 25 | 26 | ### 27 | # logging configuration 28 | # http://docs.pylonsproject.org/projects/pyramid/en/latest/narr/logging.html 29 | ### 30 | 31 | [uwsgi] 32 | paste = config:%p 33 | paste-logger = %p 34 | virtualenv = /Users/rach/.virtualenv/suma 35 | http = :8082 36 | 37 | ### 38 | # logging configuration 39 | # http://docs.pylonsproject.org/projects/pyramid/en/latest/narr/logging.html 40 | ### 41 | 42 | [loggers] 43 | keys = root, suma, sqlalchemy, alembic, exc_logger 44 | 45 | [handlers] 46 | keys = console 47 | 48 | [formatters] 49 | keys = generic 50 | 51 | [logger_root] 52 | level = INFO 53 | handlers = console 54 | 55 | [logger_suma] 56 | level = DEBUG 57 | handlers = 58 | qualname = suma 59 | 60 | [logger_exc_logger] 61 | level = ERROR 62 | handlers = console 63 | qualname = exc_logger 64 | 65 | [logger_sqlalchemy] 66 | level = WARN 67 | handlers = 68 | qualname = sqlalchemy.engine 69 | # "level = INFO" logs SQL queries. 70 | # "level = DEBUG" logs SQL queries and results. 71 | # "level = WARN" logs neither. (Recommended for production systems.) 72 | 73 | [logger_alembic] 74 | level = INFO 75 | handlers = 76 | qualname = alembic 77 | 78 | [handler_console] 79 | class = StreamHandler 80 | args = (sys.stderr,) 81 | level = NOTSET 82 | formatter = generic 83 | 84 | [formatter_generic] 85 | format = %(asctime)s %(levelname)-5.5s [%(name)s][%(threadName)s] %(message)s -------------------------------------------------------------------------------- /examples/web.ini: -------------------------------------------------------------------------------- 1 | [DEFAULT] 2 | sqlalchemy.url = postgresql://user:password@host:port/database 3 | hashid.secret = 'mysecret' #TO CHANGE 4 | celery.broker_url = redis://localhost:6379/0 #TO CHANGE IF YOU DON'T USE REDIS 5 | screenshot.path = %(here)s/files # SET PATH WHERE YOU WANT TO CREATE THE SCREENSHOTS 6 | storage.s3 = false 7 | storage.local.base_path = %(here)s/storage 8 | storage.local.base_url = /storage 9 | # if you want to use s3 then you need set storage.s3 to true 10 | #storage.s3.base_url = http://mybucket.s3.amazonaws.com/ 11 | #storage.s3.bucket_name = mybucket 12 | #storage.s3.access_key = key 13 | #storage.s3.secret_key = secret 14 | suma.tasks = 15 | html 16 | text 17 | screenshot 18 | title 19 | 20 | [app:main] 21 | use = egg:suma#web 22 | 23 | [alembic] 24 | script_location = suma:core/migrations 25 | 26 | [uwsgi] 27 | paste = config:%p 28 | paste-logger = %p 29 | virtualenv = /path/to/virtualenv 30 | http = :8080 31 | 32 | ### 33 | # logging configuration 34 | # http://docs.pylonsproject.org/projects/pyramid/en/latest/narr/logging.html 35 | ### 36 | 37 | [loggers] 38 | keys = root, suma, sqlalchemy, alembic, exc_logger 39 | 40 | [handlers] 41 | keys = console 42 | 43 | [formatters] 44 | keys = generic 45 | 46 | [logger_root] 47 | level = INFO 48 | handlers = console 49 | 50 | [logger_suma] 51 | level = DEBUG 52 | handlers = 53 | qualname = suma 54 | 55 | [logger_exc_logger] 56 | level = ERROR 57 | handlers = console 58 | qualname = exc_logger 59 | 60 | [logger_sqlalchemy] 61 | level = WARN 62 | handlers = 63 | qualname = sqlalchemy.engine 64 | # "level = INFO" logs SQL queries. 65 | # "level = DEBUG" logs SQL queries and results. 66 | # "level = WARN" logs neither. (Recommended for production systems.) 67 | 68 | [logger_alembic] 69 | level = INFO 70 | handlers = 71 | qualname = alembic 72 | 73 | [handler_console] 74 | class = StreamHandler 75 | args = (sys.stderr,) 76 | level = NOTSET 77 | formatter = generic 78 | 79 | [formatter_generic] 80 | format = %(asctime)s %(levelname)-5.5s [%(name)s][%(threadName)s] %(message)s -------------------------------------------------------------------------------- /pip-dev.txt: -------------------------------------------------------------------------------- 1 | -e .[dev] 2 | 3 | -------------------------------------------------------------------------------- /pip-test.txt: -------------------------------------------------------------------------------- 1 | -e .[test] 2 | 3 | -------------------------------------------------------------------------------- /pip.txt: -------------------------------------------------------------------------------- 1 | -e . 2 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [pytest] 2 | addopts = -rsxX -q --tb=short 3 | [coverage:run] 4 | omit = 5 | suma/web/settings/* 6 | suma/api/settings/* 7 | suma/core/services/interfaces/* 8 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | from setuptools.command.test import test as TestCommand 4 | from setuptools import setup, find_packages 5 | 6 | 7 | class ToxCommand(TestCommand): 8 | user_options = [('tox-args=', 'a', "Arguments to pass to tox")] 9 | 10 | def initialize_options(self): 11 | TestCommand.initialize_options(self) 12 | self.tox_args = None 13 | 14 | def finalize_options(self): 15 | TestCommand.finalize_options(self) 16 | self.test_args = [] 17 | self.test_suite = True 18 | 19 | def run_tests(self): 20 | import tox 21 | import shlex 22 | args = self.tox_args 23 | if args: 24 | args = shlex.split(self.tox_args) 25 | errno = tox.cmdline(args=args) 26 | sys.exit(errno) 27 | 28 | here = os.path.abspath(os.path.dirname(__file__)) 29 | 30 | short_desc = ( 31 | "Application to generate short URL's, manage external links and extract " 32 | "link info (eg: title, screenshot, content) " 33 | ) 34 | 35 | 36 | install_requires = [ 37 | 'alembic==0.8.4', 38 | 'pyramid==1.5.7', 39 | 'pyramid-tm==0.12', 40 | 'pyramid-services==0.3', 41 | 'pyramid-exclog==0.7', 42 | 'zope.sqlalchemy==0.7.6', 43 | 'Sqlalchemy==1.0.10', 44 | 'pyramid_storage==0.0.8', 45 | 'schematics==1.1.0', 46 | 'structlog==15.1.0', 47 | 'hashids==1.1.0', 48 | 'psycopg2==2.6.1', 49 | 'filedepot==0.2.1', 50 | 'goose-extractor==1.0.25', 51 | 'celery[redis]==3.1.18', 52 | 'requests==2.8.1', 53 | 'PasteScript==2.0.2', 54 | 'selenium==2.47.1' 55 | ] 56 | 57 | s3_require = [ 58 | 'boto', 59 | ] 60 | 61 | tests_require = [ 62 | 'tox', 63 | 'pytest-cov' 64 | 'pytest', 65 | ] 66 | 67 | 68 | develop_requires = [ 69 | 'waitress', 70 | 'pyramid_debugtoolbar', 71 | 'tox', 72 | 'pytest-cov', # before pytest, more info why. See bug #196 in setuptools 73 | 'pytest', 74 | 'Paste==2.0.2', 75 | 'bumpversion', 76 | 'alembic==0.7.7', 77 | ] 78 | 79 | 80 | dependency_links = [ 81 | ] 82 | 83 | 84 | setup( 85 | name='suma', 86 | version='0.1.1', 87 | description=short_desc, 88 | long_description=open('description.rst').read() + '\n\n' + open('CHANGES.txt').read(), 89 | cmdclass={'test': ToxCommand, }, 90 | classifiers=[ 91 | "Development Status :: 3 - Alpha", 92 | "Intended Audience :: Developers", 93 | "License :: OSI Approved :: Apache Software License", 94 | "Natural Language :: English", 95 | "Operating System :: OS Independent", 96 | "Programming Language :: Python", 97 | "Programming Language :: Python :: 2.7", 98 | "Framework :: Pyramid", 99 | "Topic :: Internet :: WWW/HTTP", 100 | "Topic :: Internet :: WWW/HTTP :: WSGI :: Application", 101 | ], 102 | author='Rachid Belaid', 103 | author_email='rachid.belaid@gmail.com', 104 | url='https://github.com/rach/suma', 105 | keywords='shorturl screenshot', 106 | packages=find_packages(), 107 | dependency_links=dependency_links, 108 | include_package_data=True, 109 | zip_safe=False, 110 | install_requires=install_requires, 111 | tests_require=tests_require, 112 | extras_require={ 113 | 'dev': develop_requires, 114 | 'test': tests_require, 115 | 's3': s3_require 116 | }, 117 | entry_points="""\ 118 | [console_scripts] 119 | initialize_suma_db = suma.core.scripts.initializedb:main 120 | [paste.app_factory] 121 | api = suma.api:main 122 | web = suma.web:main 123 | """ 124 | ) 125 | -------------------------------------------------------------------------------- /suma/__init__.py: -------------------------------------------------------------------------------- 1 | # this is a namespace package 2 | try: 3 | import pkg_resources 4 | pkg_resources.declare_namespace(__name__) 5 | except ImportError: 6 | import pkgutil 7 | __path__ = pkgutil.extend_path(__path__, __name__) 8 | -------------------------------------------------------------------------------- /suma/api/__init__.py: -------------------------------------------------------------------------------- 1 | from suma.api.config import get_config 2 | 3 | 4 | def main(global_config, **settings): 5 | """ This function returns a Pyramid WSGI application. 6 | """ 7 | config = get_config(global_config, **settings) 8 | return config.make_wsgi_app() 9 | -------------------------------------------------------------------------------- /suma/api/config.py: -------------------------------------------------------------------------------- 1 | from pyramid.config import Configurator 2 | 3 | 4 | def get_config(global_config, **settings): 5 | """ 6 | Control configurations state 7 | """ 8 | merged_settings = {} 9 | merged_settings.update(global_config) 10 | merged_settings.update(settings) 11 | config = Configurator(settings=merged_settings) 12 | config.include('suma.api.settings.extension') 13 | config.include('suma.api.settings.logger') 14 | config.include('suma.api.settings.service') 15 | config.include('suma.api.routes') 16 | config.include('suma.api.settings.adapter') 17 | config.include('suma.api.settings.celery') 18 | config.scan('suma.api') 19 | return config 20 | -------------------------------------------------------------------------------- /suma/api/resources.py: -------------------------------------------------------------------------------- 1 | from suma.common.resources.base import ResourceWrapper 2 | 3 | 4 | class LinkFactory(object): 5 | 6 | def __init__(self, request): 7 | self.request = request 8 | 9 | def __getitem__(self, key): 10 | svc = self.request.find_service(name='link') 11 | link = svc.get_link_by_id_or_hashid(key) 12 | if link: 13 | return LinkResource(link) 14 | raise KeyError(key) 15 | 16 | def unwrap(self): 17 | return None 18 | 19 | 20 | class LinkResource(ResourceWrapper): 21 | __name__ = 'LinkResource' 22 | __parent__ = LinkFactory 23 | 24 | def __getitem__(self, key): 25 | raise KeyError(key) 26 | -------------------------------------------------------------------------------- /suma/api/routes.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | def includeme(config): 4 | config.add_route('links', '/links*traverse', 5 | factory='suma.api.resources.LinkFactory') 6 | config.add_route('ban', '/ban') 7 | -------------------------------------------------------------------------------- /suma/api/schemas/__init__.py: -------------------------------------------------------------------------------- 1 | from .link import LinkSchema, BanLinkSchema, BanSchema 2 | -------------------------------------------------------------------------------- /suma/api/schemas/link.py: -------------------------------------------------------------------------------- 1 | from schematics.models import Model 2 | from schematics.types import URLType, LongType, StringType 3 | 4 | 5 | _choices = ['url', 'netloc', 'path'] 6 | 7 | class LinkSchema(Model): 8 | url = URLType(required=True) 9 | user_id = LongType(min_value=0, required=False) 10 | 11 | class BanSchema(Model): 12 | url = URLType(required=True) 13 | mode = StringType(choices=_choices, default='url', required=False) 14 | 15 | class BanLinkSchema(Model): 16 | mode = StringType(choices=_choices, default='url', required=False) 17 | -------------------------------------------------------------------------------- /suma/api/serializers/__init__.py: -------------------------------------------------------------------------------- 1 | from suma.api.serializers.link import serialize_create_link, serialize_get_link 2 | -------------------------------------------------------------------------------- /suma/api/serializers/link.py: -------------------------------------------------------------------------------- 1 | 2 | def serialize_link(request, link): 3 | file = request.find_service(name='file') 4 | screenshot = link.meta.get('screenshot', None) 5 | if screenshot: 6 | screenshot = file.url(screenshot) 7 | return { 8 | "data": { 9 | "type": "links", 10 | "id": link.id, 11 | "attributes": { 12 | "hashid": link.hashid, 13 | "url": link.url, 14 | "banned": link.is_banned, 15 | "clicks": link.clicks, 16 | "screenshot": screenshot, 17 | "title": link.meta.get('title', None), 18 | "created": link.created, 19 | "updated": link.updated 20 | }, 21 | "actions": { 22 | "html": { 23 | "links": { 24 | "related": request.route_url('links', traverse='/%s/%s' % (link.id, 'html')) 25 | } 26 | }, 27 | "text": { 28 | "links": { 29 | "related": request.route_url('links', traverse='/%s/%s' % (link.id, 'text')) 30 | } 31 | }, 32 | "ban": { 33 | "links": { 34 | "related": request.route_url('links', traverse='/%s/%s' % (link.id, 'ban')) 35 | } 36 | }, 37 | "refresh": { 38 | "links": { 39 | "related": request.route_url('links', traverse='/%s/%s' % (link.id, 'refresh')) 40 | } 41 | }, 42 | } 43 | } 44 | } 45 | 46 | def serialize_create_link(request, link): 47 | val = serialize_link(request, link) 48 | val["data"]["links"] = {"self": request.route_url('links', traverse='/%s' % (link.id,))} 49 | return val 50 | 51 | def serialize_get_link(request, link): 52 | val = serialize_link(request, link) 53 | val["links"] = {"self": request.route_url('links', traverse='/%s' % (link.id,))} 54 | return val 55 | -------------------------------------------------------------------------------- /suma/api/settings/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rach/suma/58446efc2b6abdcfde3ee226d54cb699e66d2cc2/suma/api/settings/__init__.py -------------------------------------------------------------------------------- /suma/api/settings/adapter.py: -------------------------------------------------------------------------------- 1 | from pyramid.renderers import JSON 2 | import datetime 3 | from suma.core.models.meta import EnumSymbol 4 | 5 | 6 | def enum_adapter(obj, request): 7 | return obj.value 8 | 9 | 10 | def datetime_adapter(obj, request): 11 | return obj.isoformat() 12 | 13 | 14 | def includeme(config): 15 | json_renderer = JSON() 16 | json_renderer.add_adapter(datetime.datetime, datetime_adapter) 17 | json_renderer.add_adapter(EnumSymbol, enum_adapter) 18 | config.add_renderer('json', json_renderer) 19 | 20 | -------------------------------------------------------------------------------- /suma/api/settings/celery.py: -------------------------------------------------------------------------------- 1 | from suma.celery import celery 2 | 3 | 4 | def includeme(config): 5 | config_dict = {} 6 | for key, value in config.registry.settings.items(): 7 | if key.lower().startswith('celery.'): 8 | config_dict[key[7:].upper()] = value 9 | celery.config_from_object(config_dict) 10 | -------------------------------------------------------------------------------- /suma/api/settings/extension.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | def includeme(config): 4 | config.include('pyramid_tm') 5 | config.include('pyramid_services') 6 | config.include('pyramid_exclog') 7 | -------------------------------------------------------------------------------- /suma/api/settings/logger.py: -------------------------------------------------------------------------------- 1 | import structlog 2 | 3 | 4 | def includeme(config): 5 | structlog.configure( 6 | processors=[ 7 | structlog.processors.KeyValueRenderer( 8 | key_order=['event', 'request_id'], 9 | ), 10 | ], 11 | context_class=structlog.threadlocal.wrap_dict(dict), 12 | logger_factory=structlog.stdlib.LoggerFactory(), 13 | ) 14 | -------------------------------------------------------------------------------- /suma/api/settings/service.py: -------------------------------------------------------------------------------- 1 | from suma.common.services import ( 2 | link_service_factory, 3 | file_service_factory, 4 | task_service_factory, 5 | hashid_service_factory 6 | ) 7 | from sqlalchemy import engine_from_config 8 | from suma.core.models.meta import ( 9 | create_dbsession 10 | ) 11 | import zope.sqlalchemy 12 | 13 | 14 | def includeme(config): 15 | engine = engine_from_config(config.registry.settings, 'sqlalchemy.') 16 | dbsession = create_dbsession(engine) 17 | zope.sqlalchemy.register(dbsession) 18 | 19 | config.register_service( 20 | dbsession, 21 | name='db' 22 | ) 23 | 24 | config.register_service_factory( 25 | link_service_factory, 26 | name='link' 27 | ) 28 | 29 | config.register_service_factory( 30 | hashid_service_factory, 31 | name='hashid' 32 | ) 33 | 34 | config.register_service_factory( 35 | task_service_factory, 36 | name='task' 37 | ) 38 | 39 | config.register_service_factory( 40 | file_service_factory, 41 | name='file' 42 | ) 43 | -------------------------------------------------------------------------------- /suma/api/subscribers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rach/suma/58446efc2b6abdcfde3ee226d54cb699e66d2cc2/suma/api/subscribers/__init__.py -------------------------------------------------------------------------------- /suma/api/subscribers/request.py: -------------------------------------------------------------------------------- 1 | from pyramid.events import BeforeRender, NewRequest, ContextFound 2 | from pyramid.events import subscriber 3 | from structlog import get_logger 4 | import uuid 5 | 6 | 7 | log = get_logger() 8 | 9 | 10 | @subscriber(NewRequest) 11 | def add_logger_request_id(event): 12 | log.new(request_id=uuid.uuid4()) 13 | -------------------------------------------------------------------------------- /suma/api/views.py: -------------------------------------------------------------------------------- 1 | from pyramid.view import view_config, view_defaults 2 | from suma.api.schemas import LinkSchema, BanSchema, BanLinkSchema 3 | from suma.api.resources import LinkResource 4 | from schematics.exceptions import ModelValidationError, ModelConversionError 5 | from suma.api import serializers 6 | from pyramid.httpexceptions import HTTPBadRequest, HTTPOk, HTTPCreated, HTTPNoContent 7 | import json 8 | 9 | from pyramid.view import ( 10 | notfound_view_config 11 | ) 12 | 13 | 14 | @notfound_view_config(renderer='json') 15 | def notfound(request): 16 | request.response.status = 404 17 | return {} 18 | 19 | @view_defaults(route_name='links', renderer='json') 20 | class LinkView(): 21 | 22 | def __init__(self, context, request): 23 | self.context = context 24 | self.request = request 25 | self.link = context.unwrap() 26 | self.response = request.response 27 | self.link_svc = request.find_service(name='link') 28 | self.task = request.find_service(name='task') 29 | 30 | @view_config(request_method='POST') 31 | def create_link(self): 32 | try: 33 | schema = LinkSchema(self.request.json_body) # Can raise ModelConversionError 34 | schema.validate() # Can raise ModelValidationError 35 | link, created = self.link_svc.create_link(schema.url, schema.user_id) 36 | if created: 37 | self.task.get_link_data(link.id, ) 38 | self.response.status_code = 201 39 | return serializers.serialize_create_link(self.request, link) 40 | except (ModelConversionError, ModelValidationError), e: 41 | return HTTPBadRequest(json.dumps(e.messages)) 42 | except ValueError, e: 43 | # Json badly formated 44 | return HTTPBadRequest(json.dumps(e.message)) 45 | 46 | @view_config(context=LinkResource, request_method='GET') 47 | def get_link(self): 48 | return serializers.serialize_get_link(self.request, self.context.unwrap()) 49 | 50 | @view_config(context=LinkResource, request_method='GET', name='text', renderer='string') 51 | def get_link_text(self): 52 | text = self.link.meta.get('text', None) 53 | if not text: 54 | return HTTPNoContent() 55 | return text 56 | 57 | @view_config(context=LinkResource, request_method='GET', name='html') 58 | def get_link_text(self): 59 | html = self.link.meta.get('html', None) 60 | if not html: 61 | return HTTPNoContent() 62 | return HTTPOk(body=html) 63 | 64 | @view_config(context=LinkResource, name='refresh') 65 | def refresh_link(self): 66 | self.task.get_link_data(self.link.id, ) 67 | return HTTPOk('link queued to refresh meta') 68 | 69 | @view_config(context=LinkResource, request_method='POST', name='ban') 70 | def ban(self): 71 | try: 72 | schema = BanLinkSchema(self.request.json_body) # Can raise ModelConversionError 73 | schema.validate() # Can raise ModelValidationError 74 | self.link_svc.ban_url(self.link.url, schema.mode) 75 | return HTTPCreated("link's %s banned" % schema.mode) 76 | except (ModelConversionError, ModelValidationError), e: 77 | return HTTPBadRequest(json.dumps(e.messages)) 78 | except ValueError, e: 79 | # Json badly formated 80 | return HTTPBadRequest(json.dumps(e.message)) 81 | 82 | 83 | # TODO: Rewrite to void code duplicate but slightly different as the url is pulled from schema 84 | 85 | @view_config(route_name='ban', request_method='POST') 86 | def ban_api(request): 87 | link_svc = request.find_service(name='link') 88 | try: 89 | schema = BanSchema(request.json_body) # Can raise ModelConversionError 90 | schema.validate() # Can raise ModelValidationError 91 | link_svc.ban_url(schema.url, schema.mode) 92 | return HTTPCreated("link's %s banned" % schema.mode) 93 | except (ModelConversionError, ModelValidationError), e: 94 | return HTTPBadRequest(json.dumps(e.messages)) 95 | except ValueError, e: 96 | # Json badly formated 97 | return HTTPBadRequest(json.dumps(e.message)) 98 | -------------------------------------------------------------------------------- /suma/celery.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from optparse import make_option 3 | from celery import signals, Celery 4 | from pyramid.paster import bootstrap, get_appsettings 5 | from sqlalchemy import create_engine 6 | from celery import Task 7 | from celery.loaders.base import BaseLoader 8 | from suma.core.models.meta import create_dbsession 9 | from paste.deploy.converters import aslist, asbool 10 | 11 | celery = Celery() 12 | 13 | celery.user_options['preload'].add( 14 | make_option( 15 | '-i', '--ini', 16 | default=None, 17 | help='Paste ini configuration file.'), 18 | ) 19 | 20 | 21 | class INILoader(BaseLoader): 22 | 23 | def __init__(self, app, **kwargs): 24 | self.celery_conf = kwargs.pop('ini_file') 25 | 26 | super(INILoader, self).__init__(app, **kwargs) 27 | 28 | def read_configuration(self, fail_silently=True): 29 | config_dict = {} 30 | 31 | for key, value in get_appsettings(self.celery_conf).items(): 32 | if key.lower().startswith('celery.'): 33 | config_dict[key[7:].upper()] = value 34 | 35 | config_dict['CELERY_IMPORTS'] = [ 36 | 'suma.core.tasks', 37 | ] 38 | return config_dict 39 | 40 | 41 | @signals.user_preload_options.connect 42 | def on_preload_parsed(options, **kwargs): 43 | ini_location = options['ini'] 44 | 45 | if ini_location is None: 46 | print('You must provide the paste --ini argument') 47 | exit(-1) 48 | 49 | loader = INILoader(celery, ini_file=ini_location) 50 | celery_config = loader.read_configuration() 51 | celery.config_from_object(celery_config) 52 | registry = bootstrap(ini_location)['registry'] 53 | celery.conf.update({'pyramid.registry': registry}) 54 | 55 | 56 | class SumaTask(Task): 57 | abstract = True 58 | _dbsession = None 59 | _registry = None 60 | _services = None 61 | _flags = None 62 | 63 | @property 64 | def dbsession(self): 65 | if self._dbsession is None: 66 | engine = create_engine(self.registry.settings['sqlalchemy.url']) 67 | self._dbsession = create_dbsession(engine) 68 | return self._dbsession 69 | 70 | @property 71 | def registry(self): 72 | if self._registry is None: 73 | self._registry = celery.conf['pyramid.registry'] 74 | return self._registry 75 | 76 | @property 77 | def flags(self): 78 | if self._flags is None: 79 | flags = aslist(self.registry.settings.get('suma.tasks',[])) 80 | self._flags = flags 81 | return self._flags 82 | 83 | @property 84 | def services(self): 85 | #circular imports otherwise 86 | from suma.core.services import ( 87 | LinkService, 88 | HashIdService, 89 | ScreenshotService, 90 | LocalFileService, 91 | S3FileService, 92 | StatService 93 | ) 94 | if self._services is None: 95 | services_dict = {} 96 | secret = self.registry.settings['hashid.secret'] 97 | if asbool(self.registry.settings.get('storage.s3', False)): 98 | services_dict['file'] = S3FileService( 99 | self.registry.settings['storage.s3.base_url'], 100 | self.registry.settings['storage.s3.bucket_name'], 101 | self.registry.settings['storage.s3.access_key'], 102 | self.registry.settings['storage.s3.secret_key'], 103 | ) 104 | else: 105 | services_dict['file'] = LocalFileService( 106 | self.registry.settings['storage.local.base_url'], 107 | self.registry.settings['storage.local.base_path'], 108 | ) 109 | services_dict['hashid'] = HashIdService(secret) 110 | services_dict['screenshot'] = ScreenshotService() 111 | services_dict['stat'] = StatService(self.dbsession) 112 | services_dict['link'] = LinkService(self.dbsession, 113 | services_dict['hashid']) 114 | self._services = type('Services', (), services_dict) 115 | return self._services 116 | 117 | celery.Task = SumaTask 118 | task = celery.task 119 | -------------------------------------------------------------------------------- /suma/common/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rach/suma/58446efc2b6abdcfde3ee226d54cb699e66d2cc2/suma/common/__init__.py -------------------------------------------------------------------------------- /suma/common/resources/__init__.py: -------------------------------------------------------------------------------- 1 | from suma.common.resources.base import ResourceWrapper 2 | -------------------------------------------------------------------------------- /suma/common/resources/base.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | class ResourceWrapper(dict): 4 | def __init__(self, resource): 5 | self.resource = resource 6 | 7 | def unwrap(self): 8 | return self.resource 9 | -------------------------------------------------------------------------------- /suma/common/services/__init__.py: -------------------------------------------------------------------------------- 1 | from .factories import ( 2 | link_service_factory, 3 | task_service_factory, 4 | file_service_factory, 5 | hashid_service_factory 6 | ) 7 | -------------------------------------------------------------------------------- /suma/common/services/factories.py: -------------------------------------------------------------------------------- 1 | from suma.core.services import ( 2 | LinkService, 3 | HashIdService, 4 | TaskService, 5 | S3FileService, 6 | LocalFileService 7 | ) 8 | from paste.deploy.converters import asbool 9 | 10 | 11 | def link_service_factory(context, request): 12 | dbsession = request.find_service(name='db', context=context) 13 | hashid = request.find_service(name='hashid', context=context) 14 | return LinkService( 15 | dbsession=dbsession, 16 | hashid=hashid, 17 | ) 18 | 19 | 20 | def hashid_service_factory(context, request): 21 | hashid_secret = request.registry.settings.get('hashid.secret') 22 | return HashIdService( 23 | secret=hashid_secret 24 | ) 25 | 26 | 27 | def task_service_factory(context, request): 28 | return TaskService() 29 | 30 | 31 | def file_service_factory(context, request): 32 | if asbool(request.registry.settings.get('storage.s3', False)): 33 | return S3FileService( 34 | request.registry.settings['storage.s3.base_url'], 35 | request.registry.settings['storage.s3.bucket_name'], 36 | request.registry.settings['storage.s3.access_key'], 37 | request.registry.settings['storage.s3.secret_key'], 38 | ) 39 | return LocalFileService( 40 | request.registry.settings['storage.local.base_url'], 41 | request.registry.settings['storage.local.base_path'], 42 | ) 43 | -------------------------------------------------------------------------------- /suma/core/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rach/suma/58446efc2b6abdcfde3ee226d54cb699e66d2cc2/suma/core/__init__.py -------------------------------------------------------------------------------- /suma/core/migrations/README: -------------------------------------------------------------------------------- 1 | Generic single-database configuration. -------------------------------------------------------------------------------- /suma/core/migrations/env.py: -------------------------------------------------------------------------------- 1 | from __future__ import with_statement 2 | from alembic import context 3 | from sqlalchemy import engine_from_config, pool 4 | from logging.config import fileConfig 5 | from suma.core.models.meta import Base 6 | 7 | # this is the Alembic Config object, which provides 8 | # access to the values within the .ini file in use. 9 | config = context.config 10 | 11 | # Interpret the config file for Python logging. 12 | # This line sets up loggers basically. 13 | fileConfig(config.config_file_name) 14 | 15 | target_metadata = Base.metadata 16 | 17 | SECTION = 'DEFAULT' 18 | 19 | # other values from the config, defined by the needs of env.py, 20 | # can be acquired: 21 | # my_important_option = config.get_main_option("my_important_option") 22 | # ... etc. 23 | 24 | 25 | def run_migrations_offline(): 26 | """Run migrations in 'offline' mode. 27 | 28 | This configures the context with just a URL 29 | and not an Engine, though an Engine is acceptable 30 | here as well. By skipping the Engine creation 31 | we don't even need a DBAPI to be available. 32 | 33 | Calls to context.execute() here emit the given string to the 34 | script output. 35 | 36 | """ 37 | url = config.get_main_option("sqlalchemy.url") 38 | context.configure(url=url, target_metadata=target_metadata) 39 | 40 | with context.begin_transaction(): 41 | context.run_migrations() 42 | 43 | 44 | def run_migrations_online(): 45 | """Run migrations in 'online' mode. 46 | 47 | In this scenario we need to create an Engine 48 | and associate a connection with the context. 49 | 50 | """ 51 | engine = engine_from_config( 52 | config.get_section(SECTION), 53 | prefix='sqlalchemy.', 54 | poolclass=pool.NullPool) 55 | 56 | connection = engine.connect() 57 | context.configure( 58 | connection=connection, 59 | target_metadata=target_metadata, 60 | ) 61 | 62 | try: 63 | with context.begin_transaction(): 64 | context.run_migrations() 65 | finally: 66 | connection.close() 67 | 68 | if context.is_offline_mode(): 69 | run_migrations_offline() 70 | else: 71 | run_migrations_online() 72 | -------------------------------------------------------------------------------- /suma/core/migrations/script.py.mako: -------------------------------------------------------------------------------- 1 | """${message} 2 | 3 | Revision ID: ${up_revision} 4 | Revises: ${down_revision} 5 | Create Date: ${create_date} 6 | 7 | """ 8 | 9 | # revision identifiers, used by Alembic. 10 | revision = ${repr(up_revision)} 11 | down_revision = ${repr(down_revision)} 12 | 13 | from alembic import op 14 | import sqlalchemy as sa 15 | ${imports if imports else ""} 16 | 17 | def upgrade(): 18 | ${upgrades if upgrades else "pass"} 19 | 20 | 21 | def downgrade(): 22 | ${downgrades if downgrades else "pass"} 23 | -------------------------------------------------------------------------------- /suma/core/migrations/versions/INFO: -------------------------------------------------------------------------------- 1 | # Directory to hold Alembic migrations -------------------------------------------------------------------------------- /suma/core/models/__init__.py: -------------------------------------------------------------------------------- 1 | from suma.core.models.link import Link, BlockingType, BlockedLink 2 | from suma.core.models.stat import Stat, ClickCounter 3 | -------------------------------------------------------------------------------- /suma/core/models/link.py: -------------------------------------------------------------------------------- 1 | from sqlalchemy import ( 2 | Column, 3 | Unicode, 4 | String, 5 | BigInteger, 6 | UniqueConstraint, 7 | or_, 8 | Index 9 | ) 10 | 11 | from suma.core.models.meta import ( 12 | Base, 13 | DeclEnum, 14 | TimestampColumns 15 | ) 16 | from sqlalchemy.dialects.postgresql import JSONB 17 | from sqlalchemy.ext.hybrid import hybrid_property 18 | from sqlalchemy.sql import exists 19 | from sqlalchemy.orm import column_property 20 | from sqlalchemy.ext.mutable import MutableDict 21 | from urlparse import urlparse 22 | from hashlib import sha1 23 | 24 | 25 | class BlockingType(DeclEnum): 26 | url = "url", "Url" 27 | netloc = "netloc", "Netloc" 28 | path = "path", "Path" 29 | 30 | 31 | class BlockedLink (Base): 32 | url = Column('url', Unicode, nullable=False) # this is there more for readability 33 | hash = Column(String, index=True, unique=True, nullable=False) 34 | type = Column(BlockingType.db_type(), nullable=False) 35 | 36 | 37 | class Link(Base, TimestampColumns): 38 | hashid = Column(String, index=True, unique=True, nullable=False) 39 | _url = Column('url', Unicode, index=True, nullable=False) 40 | user_id = Column(BigInteger) 41 | _hash = Column('hash', String, index=True, nullable=False) 42 | _hash_netloc = Column('hash_netloc', String, nullable=False) 43 | _hash_path = Column('hash_path', String, nullable=False) 44 | meta = Column(MutableDict.as_mutable(JSONB), default={}) 45 | __table_args__ = ( 46 | Index('unique_url_and_user_id', _url, user_id, unique=True, 47 | postgresql_where=(user_id != None)), 48 | Index('unique_url_and_user_id_is_null', 'url', unique=True, 49 | postgresql_where=(user_id == None)) 50 | ) 51 | 52 | is_banned = column_property( 53 | exists().where( 54 | or_( 55 | BlockedLink.hash.in_([_hash, _hash_netloc, _hash_path]) 56 | ) 57 | ) 58 | ) 59 | 60 | @property 61 | def clicks(self): 62 | if self._clicks: 63 | return self._clicks.counter 64 | return 0 65 | 66 | @hybrid_property 67 | def url(self): 68 | return self._url 69 | 70 | @url.setter 71 | def url(self, value): 72 | self._url = value 73 | self._hash, self._hash_netloc, self._hash_path = create_url_hashes(value) 74 | 75 | 76 | def create_url_hashes(url): 77 | parsed = urlparse(url) 78 | return ( 79 | sha1(url).hexdigest(), 80 | sha1(parsed.netloc).hexdigest(), 81 | sha1(parsed.netloc + parsed.path).hexdigest() 82 | ) 83 | -------------------------------------------------------------------------------- /suma/core/models/meta/__init__.py: -------------------------------------------------------------------------------- 1 | #structure inspired from http://www.youtube.com/watch?v=5SSC6nU314c 2 | 3 | from .base import Base, create_dbsession 4 | from .types import ( 5 | CaseInsensitiveComparator, 6 | SpaceInsensitiveComparator, 7 | DeclEnum, 8 | EnumSymbol 9 | ) 10 | from .schema import TimestampColumns, CreatedColumn 11 | 12 | -------------------------------------------------------------------------------- /suma/core/models/meta/base.py: -------------------------------------------------------------------------------- 1 | from sqlalchemy.ext.declarative import declared_attr 2 | from sqlalchemy.ext.declarative import declarative_base 3 | from sqlalchemy.orm import sessionmaker, scoped_session 4 | from sqlalchemy import event, DDL 5 | from sqlalchemy import ( 6 | Column, 7 | Integer 8 | ) 9 | import re 10 | 11 | 12 | _underscorer1 = re.compile(r'(.)([A-Z][a-z]+)') 13 | _underscorer2 = re.compile('([a-z0-9])([A-Z])') 14 | 15 | 16 | def _camel_to_snake(s): 17 | subbed = _underscorer1.sub(r'\1_\2', s) 18 | return _underscorer2.sub(r'\1_\2', subbed).lower() 19 | 20 | 21 | class Base(object): 22 | @declared_attr 23 | def __tablename__(cls): 24 | return _camel_to_snake(cls.__name__) 25 | 26 | __table_args__ = {'schema': 'suma'} 27 | 28 | id = Column(Integer, primary_key=True) 29 | 30 | 31 | Base = declarative_base(cls=Base) 32 | 33 | event.listen(Base.metadata, 'before_create', 34 | DDL("CREATE SCHEMA IF NOT EXISTS suma")) 35 | 36 | 37 | def create_dbsession(engine): 38 | dbsession = scoped_session(sessionmaker()) 39 | dbsession.configure(bind=engine) 40 | Base.metadata.bind = engine 41 | return dbsession 42 | -------------------------------------------------------------------------------- /suma/core/models/meta/schema.py: -------------------------------------------------------------------------------- 1 | from sqlalchemy import ( 2 | Column, 3 | DateTime, 4 | func 5 | ) 6 | 7 | 8 | class TimestampColumns(object): 9 | created = Column(DateTime, default=func.now()) 10 | updated = Column(DateTime, default=func.now(), 11 | onupdate=func.now()) 12 | 13 | 14 | class CreatedColumn(object): 15 | created = Column(DateTime, default=func.now()) 16 | -------------------------------------------------------------------------------- /suma/core/models/meta/types.py: -------------------------------------------------------------------------------- 1 | from sqlalchemy.orm.properties import ColumnProperty 2 | from sqlalchemy import func 3 | from sqlalchemy.ext import hybrid 4 | from sqlalchemy.types import SchemaType, TypeDecorator, Enum 5 | import re 6 | 7 | 8 | class EnumSymbol(object): 9 | """Define a fixed symbol tied to a parent class.""" 10 | 11 | def __init__(self, cls_, name, value, description): 12 | self.cls_ = cls_ 13 | self.name = name 14 | self.value = value 15 | self.description = description 16 | 17 | def __reduce__(self): 18 | """Allow unpickling to return the symbol 19 | linked to the DeclEnum class.""" 20 | return getattr, (self.cls_, self.name) 21 | 22 | def __iter__(self): 23 | return iter([self.value, self.description]) 24 | 25 | def __repr__(self): 26 | return "<%s>" % self.name 27 | 28 | 29 | class EnumMeta(type): 30 | """Generate new DeclEnum classes.""" 31 | 32 | def __init__(cls, classname, bases, dict_): 33 | cls._reg = reg = cls._reg.copy() 34 | for k, v in dict_.items(): 35 | if isinstance(v, tuple): 36 | sym = reg[v[0]] = EnumSymbol(cls, k, *v) 37 | setattr(cls, k, sym) 38 | return type.__init__(cls, classname, bases, dict_) 39 | 40 | def __iter__(cls): 41 | return iter(cls._reg.values()) 42 | 43 | 44 | class DeclEnum(object): 45 | """Declarative enumeration.""" 46 | 47 | __metaclass__ = EnumMeta 48 | _reg = {} 49 | 50 | @classmethod 51 | def from_string(cls, value): 52 | try: 53 | return cls._reg[value] 54 | except KeyError: 55 | raise ValueError( 56 | "Invalid value for %r: %r" % 57 | (cls.__name__, value) 58 | ) 59 | 60 | @classmethod 61 | def values(cls): 62 | return cls._reg.keys() 63 | 64 | @classmethod 65 | def db_type(cls): 66 | return DeclEnumType(cls) 67 | 68 | 69 | class DeclEnumType(SchemaType, TypeDecorator): 70 | def __init__(self, enum): 71 | self.enum = enum 72 | self.impl = Enum( 73 | *enum.values(), 74 | name="ck%s" % re.sub( 75 | '([A-Z])', 76 | lambda m: "_" + m.group(1).lower(), 77 | enum.__name__) 78 | ) 79 | 80 | def _set_table(self, table, column): 81 | self.impl._set_table(table, column) 82 | 83 | def copy(self): 84 | return DeclEnumType(self.enum) 85 | 86 | def process_bind_param(self, value, dialect): 87 | if value is None: 88 | return None 89 | return value.value 90 | 91 | def process_result_value(self, value, dialect): 92 | if value is None: 93 | return None 94 | return self.enum.from_string(value.strip()) 95 | 96 | 97 | class CaseInsensitiveComparator(ColumnProperty.Comparator): 98 | def operate(self, op, other): 99 | return op(func.lower(self), func.lower(other)) 100 | 101 | 102 | class SpaceInsensitiveComparator(hybrid.Comparator): 103 | def operate(self, op, other): 104 | return op(func.replace(self, ' ', ''), func.replace(other, ' ', '')) 105 | -------------------------------------------------------------------------------- /suma/core/models/stat.py: -------------------------------------------------------------------------------- 1 | from sqlalchemy import ( 2 | Column, 3 | Unicode, 4 | String, 5 | BigInteger, 6 | ForeignKey, 7 | or_ 8 | ) 9 | from sqlalchemy.dialects.postgresql import INET 10 | from .link import Link 11 | from sqlalchemy.orm import relationship, backref 12 | from suma.core.models.meta import ( 13 | Base, 14 | CreatedColumn 15 | ) 16 | 17 | 18 | class Stat(Base, CreatedColumn): 19 | link_id = Column(ForeignKey(Link.id), nullable=False) 20 | link = relationship(Link, backref=backref('stats')) 21 | referer = Column(Unicode) 22 | ip = Column(INET) 23 | 24 | # The models belows, are denormalized models from Stat for quick lookup 25 | 26 | 27 | class ClickCounter(Base): 28 | id = Column(ForeignKey(Link.id), primary_key=True, nullable=False) 29 | link = relationship(Link, backref=backref('_clicks', uselist=False, lazy='joined')) 30 | counter = Column(BigInteger, default=1) 31 | -------------------------------------------------------------------------------- /suma/core/scripts/__init__.py: -------------------------------------------------------------------------------- 1 | # package 2 | -------------------------------------------------------------------------------- /suma/core/scripts/initializedb.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | from alembic.config import Config 4 | from alembic import command 5 | 6 | from sqlalchemy import engine_from_config 7 | 8 | from pyramid.paster import ( 9 | get_appsettings, 10 | setup_logging, 11 | ) 12 | 13 | from suma.core.models.meta import ( 14 | Base, 15 | create_dbsession 16 | ) 17 | 18 | 19 | def usage(argv): 20 | cmd = os.path.basename(argv[0]) 21 | print('usage: %s \n' 22 | '(example: "%s development.ini")' % (cmd, cmd)) 23 | sys.exit(1) 24 | 25 | 26 | def main(argv=sys.argv): 27 | if len(argv) != 2: 28 | usage(argv) 29 | config_uri = argv[1] 30 | setup_logging(config_uri) 31 | settings = get_appsettings(config_uri, 'main') 32 | engine = engine_from_config(settings, 'sqlalchemy.') 33 | dbsession = create_dbsession(engine) 34 | Base.metadata.create_all(engine) 35 | 36 | alembic_cfg = Config(config_uri) 37 | command.stamp(alembic_cfg, "head") 38 | -------------------------------------------------------------------------------- /suma/core/services/__init__.py: -------------------------------------------------------------------------------- 1 | from .link import LinkService 2 | from .hashid import HashIdService 3 | from .file import LocalFileService, S3FileService 4 | from .screenshot import ScreenshotService 5 | from .stat import StatService 6 | from .task import TaskService 7 | -------------------------------------------------------------------------------- /suma/core/services/file.py: -------------------------------------------------------------------------------- 1 | from suma.core.services.interfaces import IFileService 2 | from depot.manager import DepotManager 3 | from zope.interface import implementer 4 | from structlog import get_logger 5 | log = get_logger() 6 | from pyramid_storage.local import LocalFileStorage 7 | from pyramid_storage.s3 import S3FileStorage 8 | from cStringIO import StringIO 9 | 10 | 11 | @implementer(IFileService) 12 | class LocalFileService(object): 13 | def __init__(self, base_url, base_path): 14 | suffix = '' 15 | if not base_url.endswith('/'): 16 | suffix = '/' 17 | self.storage = LocalFileStorage(base_path, base_url + suffix) 18 | 19 | def create(self, data, filename, folder): 20 | output = StringIO(data) 21 | return self.storage.save_file(output, filename, folder) # return final filename 22 | 23 | def url(self, filename): 24 | return self.storage.url(filename) 25 | 26 | 27 | @implementer(IFileService) 28 | class S3FileService(LocalFileService): 29 | def __init__(self, base_url, bucket_name, access_key, secret_key): 30 | suffix = '' 31 | if not base_url.endswith('/'): 32 | suffix = '/' 33 | self.storage = S3FileStorage( 34 | bucket_name=bucket_name, 35 | base_url=base_url + suffix, 36 | access_key=access_key, 37 | secret_key=secret_key 38 | ) 39 | -------------------------------------------------------------------------------- /suma/core/services/hashid.py: -------------------------------------------------------------------------------- 1 | from suma.core.services.interfaces import IHashIdService 2 | from zope.interface import implementer 3 | from structlog import get_logger 4 | log = get_logger() 5 | import hashids 6 | 7 | 8 | @implementer(IHashIdService) 9 | class HashIdService(object): 10 | def __init__(self, secret): 11 | self.hashids = hashids.Hashids(secret, min_length=6) 12 | 13 | def encode(self, primary_id, secondary_id=None): 14 | args = [] 15 | if secondary_id is not None: 16 | # Using the primary_id as 1st argument create better entropy 17 | # The same user with different link will get very different ids 18 | args.append(secondary_id) 19 | args.append(primary_id) 20 | return self.hashids.encode(*args) 21 | 22 | def decode(self, short_id): 23 | # we are reversing the order to respect primary first in the return 24 | return self.hashids.decode(short_id)[::-1] # we use the step to ensure a tuple 25 | -------------------------------------------------------------------------------- /suma/core/services/interfaces/__init__.py: -------------------------------------------------------------------------------- 1 | from .link import ILinkService 2 | from .hashid import IHashIdService 3 | from .file import IFileService 4 | from .screenshot import IScreenshotService 5 | from .stat import IStatService 6 | from .task import ITaskService 7 | -------------------------------------------------------------------------------- /suma/core/services/interfaces/file.py: -------------------------------------------------------------------------------- 1 | 2 | from zope.interface import Interface 3 | 4 | 5 | class IFileService(Interface): 6 | 7 | def create(self, data, filename, folder): 8 | pass 9 | 10 | def url(self, filename): 11 | pass 12 | -------------------------------------------------------------------------------- /suma/core/services/interfaces/hashid.py: -------------------------------------------------------------------------------- 1 | from zope.interface import Interface 2 | 3 | 4 | class IHashIdService(Interface): 5 | 6 | def encode(self, primary_id, secondary_id=None): 7 | pass 8 | 9 | def decode(self, short_id): 10 | pass 11 | -------------------------------------------------------------------------------- /suma/core/services/interfaces/link.py: -------------------------------------------------------------------------------- 1 | from zope.interface import Interface 2 | 3 | 4 | class ILinkService(Interface): 5 | 6 | def create_link(self, url, user_id=None): 7 | pass 8 | 9 | def get_link_by_id_or_hashid(self, id_or_hashid): 10 | pass 11 | 12 | def get_link_by_url(self, url, user_id=None): 13 | pass 14 | 15 | def get_link_by_id(self, id): 16 | pass 17 | 18 | def get_link_by_hashid(self, hashid): 19 | pass 20 | 21 | def ban_url(self, url, mode='url'): 22 | pass 23 | -------------------------------------------------------------------------------- /suma/core/services/interfaces/screenshot.py: -------------------------------------------------------------------------------- 1 | from zope.interface import Interface 2 | 3 | 4 | class IScreenshotService(Interface): 5 | def capture(self, url, width, height): 6 | pass 7 | -------------------------------------------------------------------------------- /suma/core/services/interfaces/stat.py: -------------------------------------------------------------------------------- 1 | from zope.interface import Interface 2 | 3 | 4 | class IStatService(Interface): 5 | def add_click(self, link_id, ip=None, referer=None): 6 | pass 7 | 8 | def get_counter_by_link_id(self, link_id): 9 | pass 10 | -------------------------------------------------------------------------------- /suma/core/services/interfaces/task.py: -------------------------------------------------------------------------------- 1 | from zope.interface import Interface 2 | 3 | 4 | class ITaskService(Interface): 5 | def add_click(self, link_id, ip=None, referer=None): 6 | pass 7 | 8 | def get_link_data(self, link_id): 9 | pass 10 | -------------------------------------------------------------------------------- /suma/core/services/link.py: -------------------------------------------------------------------------------- 1 | from suma.core.services.interfaces import ILinkService 2 | from zope.interface import implementer 3 | from suma.core.models import Link, BlockingType, BlockedLink 4 | from suma.core.models.link import create_url_hashes 5 | from structlog import get_logger 6 | from sqlalchemy.orm.exc import NoResultFound 7 | from sqlalchemy import or_ 8 | from sqlalchemy.schema import Sequence 9 | from sqlalchemy.exc import IntegrityError 10 | 11 | log = get_logger() 12 | 13 | 14 | @implementer(ILinkService) 15 | class LinkService(object): 16 | 17 | def __init__(self, dbsession, hashid): 18 | self.dbsession = dbsession 19 | self.hashid = hashid 20 | 21 | def create_link(self, url, user_id=None): 22 | created = False 23 | link = self.get_link_by_url(url, user_id) 24 | if link: 25 | return link, created 26 | 27 | nextid = self.dbsession.execute(Sequence("link_id_seq")) 28 | hashid = self.hashid.encode(nextid) 29 | link = Link( 30 | id=nextid, 31 | user_id=user_id, 32 | url=url, 33 | hashid=hashid 34 | ) 35 | self.dbsession.begin_nested() 36 | try: 37 | self.dbsession.add(link) 38 | self.dbsession.commit() 39 | created = True 40 | except IntegrityError: 41 | self.dbsession.rollback() 42 | link = self.get_link_by_url(url, user_id) 43 | 44 | return link, created 45 | 46 | def get_link_by_id_or_hashid(self, id_or_hashid): 47 | args = [] 48 | if str(id_or_hashid).isdigit(): 49 | args.append(Link.id == id_or_hashid) 50 | args.append(Link.hashid == str(id_or_hashid)) 51 | try: 52 | return self.dbsession.query(Link).filter( 53 | or_(*args) 54 | ).one() 55 | except NoResultFound: 56 | return None 57 | 58 | def get_link_by_url(self, url, user_id=None): 59 | l_query = self.dbsession.query(Link) 60 | return l_query.filter(Link.url == url, Link.user_id == user_id).first() 61 | 62 | def get_link_by_id(self, id): 63 | return self.dbsession.query(Link).filter(Link.id == id).first() 64 | 65 | def get_link_by_hashid(self, hashid): 66 | return self.dbsession.query(Link).filter(Link.hashid == hashid).first() 67 | 68 | def ban_url(self, url, mode='url'): 69 | assert mode in ('url', 'netloc', 'path'),\ 70 | 'Bad mode is not "url", "netloc" or "path"' 71 | hash_url, hash_netloc, hash_path = create_url_hashes(url) 72 | hdict = dict(zip(('url', 'netloc', 'path'), (hash_url, hash_netloc, hash_path))) 73 | btype = BlockingType.from_string(mode) 74 | bl = BlockedLink( 75 | hash=hdict[mode], 76 | url=url, 77 | type=btype 78 | ) 79 | self.dbsession.begin_nested() 80 | try: 81 | self.dbsession.add(bl) 82 | self.dbsession.commit() 83 | except IntegrityError: 84 | # already blocked so we me ignore 85 | self.dbsession.rollback() 86 | -------------------------------------------------------------------------------- /suma/core/services/screenshot.py: -------------------------------------------------------------------------------- 1 | from suma.core.services.interfaces import IScreenshotService 2 | from zope.interface import implementer 3 | from selenium import webdriver 4 | from structlog import get_logger 5 | log = get_logger() 6 | 7 | 8 | @implementer(IScreenshotService) 9 | class ScreenshotService(object): 10 | 11 | def capture(self, url, width, height): 12 | # TODO we need a more efficiant way to avoid recreating driver everytimes 13 | driver = webdriver.PhantomJS() 14 | try: 15 | driver.set_window_size(width, height) 16 | driver.get(url) 17 | return driver.get_screenshot_as_png(), driver.page_source 18 | finally: 19 | driver.quit() 20 | -------------------------------------------------------------------------------- /suma/core/services/stat.py: -------------------------------------------------------------------------------- 1 | from types import IntType, LongType 2 | from suma.core.services.interfaces import IStatService 3 | from zope.interface import implementer 4 | from suma.core.models import Stat, ClickCounter 5 | from structlog import get_logger 6 | from sqlalchemy.exc import IntegrityError 7 | log = get_logger() 8 | 9 | 10 | @implementer(IStatService) 11 | class StatService(object): 12 | 13 | def __init__(self, dbsession): 14 | self.dbsession = dbsession 15 | 16 | def add_click(self, link_id, ip=None, referer=None): 17 | """ 18 | The code may look more complex than it should but it was made to handle 19 | the edge case of concurrent transaction. Counter doesn't exist yet when 20 | we fetch but it exist when we try to write it to the DB. 21 | """ 22 | assert type(link_id) in [IntType, LongType], "link_id is not an integer: %r" % link_id 23 | 24 | lcounter = self.get_counter_by_link_id(link_id) 25 | if lcounter: 26 | lcounter.counter = lcounter.counter + 1 27 | self.dbsession.add(lcounter) 28 | if not lcounter: 29 | lcounter = ClickCounter( 30 | id=link_id, 31 | counter=1 32 | ) 33 | self.dbsession.begin_nested() 34 | try: 35 | self.dbsession.add(lcounter) 36 | self.dbsession.commit() 37 | except IntegrityError: 38 | self.dbsession.rollback() 39 | lcounter = self.get_counter_by_link_id(link_id) 40 | lcounter.counter = lcounter.counter + 1 41 | self.dbsession.add(lcounter) 42 | 43 | stat = Stat( 44 | link_id=link_id, 45 | ip=ip, 46 | referer=referer 47 | ) 48 | self.dbsession.add(stat) 49 | return lcounter, stat 50 | 51 | def get_counter_by_link_id(self, link_id): 52 | return self.dbsession.query(ClickCounter).get(link_id) 53 | -------------------------------------------------------------------------------- /suma/core/services/task.py: -------------------------------------------------------------------------------- 1 | from suma.core.services.interfaces import ITaskService 2 | from suma.core import tasks 3 | from zope.interface import implementer 4 | from structlog import get_logger 5 | from types import IntType, LongType 6 | log = get_logger() 7 | 8 | 9 | @implementer(ITaskService) 10 | class TaskService(object): 11 | def add_click(self, link_id, ip=None, referer=None): 12 | assert type(link_id) in [IntType, LongType], "link_id is not an integer: %r" % link_id 13 | tasks.add_click_task.apply_async((link_id, ip, referer), serializer='json') 14 | 15 | def get_link_data(self, link_id): 16 | assert type(link_id) in [IntType, LongType], "link_id is not an integer: %r" % link_id 17 | tasks.get_link_data_task.apply_async((link_id, ), serializer='json') 18 | -------------------------------------------------------------------------------- /suma/core/tasks.py: -------------------------------------------------------------------------------- 1 | from suma.celery import task 2 | from goose import Goose 3 | import requests 4 | import uuid 5 | 6 | 7 | @task() 8 | def get_link_data_task(link_id): 9 | dbsession = get_link_data_task.dbsession 10 | services = get_link_data_task.services 11 | flags = get_link_data_task.flags 12 | if not flags: 13 | return 14 | link = services.link.get_link_by_id(link_id) 15 | if link is None: 16 | return 17 | html = None 18 | if 'screenshot' in flags: 19 | data, html = services.screenshot.capture(link.url, 1024, 800) 20 | # TODO: Investigate if this way of generating filename can create clashes 21 | # TODO: Delete the previous file if it exist 22 | filename = services.file.create(data, str(uuid.uuid4()) + '.png', 'screenshots') 23 | link.meta['screenshot'] = filename 24 | 25 | if 'html' in flags: 26 | link.meta['html'] = html if html else requests.get(link.url).text 27 | 28 | # this should move to a service too 29 | if 'text' in flags or 'title' in flags: 30 | goose = Goose() 31 | a = goose.extract(raw_html=html if html else requests.get(link.url).text) 32 | if 'text' in flags: 33 | link.meta['text'] = a.cleaned_text 34 | 35 | if 'title' in flags: 36 | link.meta['title'] = a.title 37 | dbsession.commit() # we are outside the web transaction 38 | 39 | 40 | @task() 41 | def add_click_task(link_id, ip, referer): 42 | dbsession = add_click_task.dbsession 43 | services = add_click_task.services 44 | services.stat.add_click(link_id, referer, ip) 45 | dbsession.commit() 46 | -------------------------------------------------------------------------------- /suma/web/__init__.py: -------------------------------------------------------------------------------- 1 | from suma.web.config import get_config 2 | 3 | 4 | def main(global_config, **settings): 5 | """ This function returns a Pyramid WSGI application. 6 | """ 7 | config = get_config(global_config, **settings) 8 | return config.make_wsgi_app() 9 | -------------------------------------------------------------------------------- /suma/web/config.py: -------------------------------------------------------------------------------- 1 | from pyramid.config import Configurator 2 | 3 | 4 | def get_config(global_config, **settings): 5 | """ 6 | Control configurations state 7 | """ 8 | merged_settings = {} 9 | merged_settings.update(global_config) 10 | merged_settings.update(settings) 11 | config = Configurator(settings=merged_settings) 12 | config.include('suma.web.settings.extension') 13 | config.include('suma.web.settings.logger') 14 | config.include('suma.web.settings.service') 15 | config.include('suma.web.routes') 16 | config.include('suma.web.settings.adapter') 17 | config.scan('suma.web') 18 | return config 19 | -------------------------------------------------------------------------------- /suma/web/resources.py: -------------------------------------------------------------------------------- 1 | from suma.common.resources import ResourceWrapper 2 | 3 | 4 | class LinkFactory(object): 5 | 6 | def __init__(self, request): 7 | self.request = request 8 | self.svc = request.find_service(name='link', context=None) 9 | 10 | def __getitem__(self, key): 11 | link = self.svc.get_link_by_hashid(key) 12 | if link and not link.is_banned: 13 | return LinkResource(link) 14 | raise KeyError(key) 15 | 16 | 17 | class LinkResource(ResourceWrapper): 18 | 19 | def __getitem__(self, key): 20 | raise KeyError(key) 21 | -------------------------------------------------------------------------------- /suma/web/routes.py: -------------------------------------------------------------------------------- 1 | from paste.deploy.converters import asbool 2 | 3 | 4 | def includeme(config): 5 | 6 | if not asbool(config.registry.settings.get('storage.s3', False)): 7 | storage_url = config.registry.settings['storage.local.base_url'] 8 | storage_path = config.registry.settings['storage.local.base_path'] 9 | config.add_static_view(storage_url.replace('/', ''), path=storage_path) 10 | 11 | config.add_route('links', '/*traverse', 12 | factory='suma.web.resources.LinkFactory') 13 | -------------------------------------------------------------------------------- /suma/web/settings/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rach/suma/58446efc2b6abdcfde3ee226d54cb699e66d2cc2/suma/web/settings/__init__.py -------------------------------------------------------------------------------- /suma/web/settings/adapter.py: -------------------------------------------------------------------------------- 1 | from pyramid.renderers import JSON 2 | import datetime 3 | from suma.core.models.meta import EnumSymbol 4 | 5 | 6 | def enum_adapter(obj, request): 7 | return obj.value 8 | 9 | 10 | def datetime_adapter(obj, request): 11 | return obj.isoformat() 12 | 13 | 14 | def includeme(config): 15 | json_renderer = JSON() 16 | json_renderer.add_adapter(datetime.datetime, datetime_adapter) 17 | json_renderer.add_adapter(EnumSymbol, enum_adapter) 18 | config.add_renderer('json', json_renderer) 19 | 20 | -------------------------------------------------------------------------------- /suma/web/settings/celery.py: -------------------------------------------------------------------------------- 1 | from suma.celery import celery 2 | 3 | 4 | def includeme(config): 5 | config_dict = {} 6 | for key, value in config.registry.settings.items(): 7 | if key.lower().startswith('celery.'): 8 | config_dict[key[7:].upper()] = value 9 | celery.config_from_object(config_dict) 10 | -------------------------------------------------------------------------------- /suma/web/settings/extension.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | def includeme(config): 4 | config.include('pyramid_tm') 5 | config.include('pyramid_services') 6 | config.include('pyramid_exclog') 7 | -------------------------------------------------------------------------------- /suma/web/settings/logger.py: -------------------------------------------------------------------------------- 1 | import structlog 2 | 3 | 4 | def includeme(config): 5 | structlog.configure( 6 | processors=[ 7 | structlog.processors.KeyValueRenderer( 8 | key_order=['event', 'request_id'], 9 | ), 10 | ], 11 | context_class=structlog.threadlocal.wrap_dict(dict), 12 | logger_factory=structlog.stdlib.LoggerFactory(), 13 | ) 14 | -------------------------------------------------------------------------------- /suma/web/settings/service.py: -------------------------------------------------------------------------------- 1 | from suma.common.services import ( 2 | link_service_factory, 3 | task_service_factory, 4 | hashid_service_factory, 5 | file_service_factory 6 | ) 7 | from sqlalchemy import engine_from_config 8 | from suma.core.models.meta import ( 9 | create_dbsession 10 | ) 11 | import zope.sqlalchemy 12 | 13 | 14 | def includeme(config): 15 | engine = engine_from_config(config.registry.settings, 'sqlalchemy.') 16 | dbsession = create_dbsession(engine) 17 | zope.sqlalchemy.register(dbsession, keep_session=True) 18 | 19 | config.register_service( 20 | dbsession, 21 | name='db' 22 | ) 23 | 24 | config.register_service_factory( 25 | link_service_factory, 26 | name='link' 27 | ) 28 | 29 | # Needed just to initialize the link_service 30 | config.register_service_factory( 31 | hashid_service_factory, 32 | name='hashid' 33 | ) 34 | 35 | config.register_service_factory( 36 | task_service_factory, 37 | name='task' 38 | ) 39 | 40 | config.register_service_factory( 41 | file_service_factory, 42 | name='file' 43 | ) 44 | -------------------------------------------------------------------------------- /suma/web/views.py: -------------------------------------------------------------------------------- 1 | from pyramid.view import view_config, view_defaults 2 | from pyramid.httpexceptions import ( 3 | HTTPMovedPermanently, 4 | HTTPOk, 5 | HTTPNotFound, 6 | HTTPFound 7 | ) 8 | from suma.web.resources import LinkResource 9 | 10 | 11 | @view_defaults(route_name='links') 12 | class LinkView(): 13 | 14 | def __init__(self, context, request): 15 | self.context = context 16 | self.link = context.unwrap() 17 | self.request = request 18 | self.task = request.find_service(name='task') 19 | self.file = request.find_service(name='file') 20 | 21 | @view_config(context=LinkResource, request_method='GET') 22 | def get(self): 23 | self.task.add_click( 24 | self.link.id, 25 | self.request.remote_addr, 26 | self.request.referer 27 | ) 28 | return HTTPMovedPermanently(self.link.url) 29 | 30 | @view_config(context=LinkResource, name='screenshot') 31 | def screenshot(self): 32 | screenshot = self.link.meta.get('screenshot', None) 33 | if screenshot: 34 | return HTTPFound(location=self.file.url(screenshot)) 35 | return HTTPNotFound() 36 | -------------------------------------------------------------------------------- /tests/api/schemas/test_link_schema.py: -------------------------------------------------------------------------------- 1 | from suma.api.schemas import LinkSchema 2 | from schematics.exceptions import ModelValidationError, ModelConversionError 3 | import pytest 4 | 5 | 6 | def test_valid_link_schema(): 7 | schema = LinkSchema({"url": "https://google.com"}) 8 | schema.validate() 9 | assert schema.url == "https://google.com" 10 | assert schema.user_id is None 11 | 12 | 13 | def test_link_schema_url_required(): 14 | schema = LinkSchema({}) 15 | with pytest.raises(ModelValidationError) as excinfo: 16 | schema.validate() 17 | assert 'url' in str(excinfo.value) 18 | 19 | 20 | def test_valid_link_schema_with_user_id(): 21 | schema = LinkSchema({"url": "https://google.com", "user_id": 1}) 22 | schema.validate() 23 | assert schema.url == "https://google.com" 24 | assert schema.user_id == 1 25 | 26 | 27 | def test_link_schema_with_invalid_url(): 28 | schema = LinkSchema({"url": "fail", "user_id": 1L}) 29 | with pytest.raises(ModelValidationError) as excinfo: 30 | schema.validate() 31 | assert 'url' in str(excinfo.value) 32 | 33 | 34 | def test_link_schema_with_invalid_user_id(): 35 | with pytest.raises(ModelConversionError) as excinfo: 36 | schema = LinkSchema({"url": "https://google.com", "user_id": "fail"}) 37 | schema.validate() 38 | assert 'user_id' in str(excinfo.value) 39 | -------------------------------------------------------------------------------- /tests/api/views/test_api_link_views.py: -------------------------------------------------------------------------------- 1 | from suma.api.resources import LinkResource, LinkFactory 2 | from suma.core.models import Link 3 | from suma.core.services.interfaces import ( 4 | ITaskService, 5 | ILinkService, 6 | IFileService 7 | ) 8 | from zope.interface import implementer 9 | import pytest 10 | import os 11 | 12 | 13 | @implementer(IFileService) 14 | class FakeFileService(object): 15 | 16 | def create(self, data, filename, folder): 17 | return os.path.join(folder, filename) 18 | 19 | def url(self, filename): 20 | return 'http://localhost/' + filename 21 | 22 | 23 | @implementer(ILinkService) 24 | class FakeLinkService(object): 25 | def __init__(self): 26 | self._link = Link( 27 | id=1, url='https://google.com', hashid='1234', 28 | meta={"title": "TEST", "screenshot": "screenshots/1234.png"} 29 | ) 30 | self._created = True 31 | 32 | def create_link(self, url, user_id=None): 33 | return self._link, True 34 | 35 | def get_link_by_id_or_hashid(self, id_or_hashid): 36 | if id_or_hashid in [1, '1234']: 37 | return self._link 38 | return None 39 | 40 | def get_link_by_url(self, url, user_id=None): 41 | if url == 'https://google.com': 42 | return self._link 43 | return None 44 | 45 | def get_link_by_id(self, id): 46 | if id == 1: 47 | return self._link 48 | return None 49 | 50 | def get_link_by_hashid(self, hashid): 51 | if hashid == '1234': 52 | return self._link 53 | return None 54 | 55 | def ban_url(self, url, mode='url'): 56 | pass 57 | 58 | 59 | @implementer(ITaskService) 60 | class FakeTaskService(object): 61 | def add_click(self, link_id, ip=None, referer=None): 62 | pass 63 | 64 | def get_link_data(self, link_id): 65 | pass 66 | 67 | 68 | @pytest.fixture 69 | def link(db_session): 70 | link = Link(hashid='1234', url='http://google.com') 71 | db_session.add(link) 72 | db_session.flush() 73 | return link 74 | 75 | 76 | @pytest.fixture 77 | def link_resource(link): 78 | return LinkResource(link) 79 | 80 | 81 | @pytest.fixture 82 | def fake_task_service(config): 83 | service = FakeTaskService() 84 | config.register_service(service, name='task') 85 | return service 86 | 87 | 88 | @pytest.fixture 89 | def fake_link_service(config): 90 | service = FakeLinkService() 91 | config.register_service(service, name='link') 92 | return service 93 | 94 | 95 | @pytest.fixture 96 | def fake_file_service(config): 97 | service = FakeFileService() 98 | config.register_service(service, name='file') 99 | return service 100 | 101 | @pytest.fixture(autouse=True) 102 | def routes(config): 103 | config.include('suma.api.routes') 104 | 105 | 106 | def test_create_link_api_view(dummy_request, fake_link_service, 107 | fake_task_service, fake_file_service): 108 | from suma.api.views import LinkView 109 | dummy_request.method = 'POST' 110 | dummy_request.post = '{"url": "http://google.com"}' 111 | dummy_request.json_body = {"url": "http://google.com"} 112 | view = LinkView(LinkFactory(dummy_request), dummy_request) 113 | response_obj = view.create_link() 114 | assert dummy_request.response.status_code == 201 115 | assert response_obj["data"]["id"] == 1 116 | assert response_obj["data"]["attributes"]["hashid"] == '1234' 117 | 118 | 119 | def test_create_link_api_view_fail_validation(dummy_request, fake_link_service, 120 | fake_task_service, fake_file_service): 121 | from suma.api.views import LinkView 122 | dummy_request.method = 'POST' 123 | dummy_request.post = '{"unknown": "http://google.com"}' 124 | dummy_request.json_body = {"unknown": "http://google.com"} 125 | view = LinkView(LinkFactory(dummy_request), dummy_request) 126 | response = view.create_link() 127 | assert response.status_code == 400 128 | 129 | 130 | def test_ban_link_api_view(db_session, dummy_request, 131 | fake_link_service, fake_task_service, fake_file_service): 132 | from suma.api.views import LinkView 133 | l = Link(url="http://google.com", hashid='1234') 134 | db_session.add(l) 135 | db_session.flush() 136 | dummy_request.method = 'POST' 137 | dummy_request.post = '{"mode": "url"}' 138 | dummy_request.json_body = {"mode": "url"} 139 | view = LinkView(LinkResource(l), dummy_request) 140 | response = view.ban() 141 | assert response.status_code == 201 142 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from sqlalchemy import create_engine 3 | from suma.core.models.meta import create_dbsession, Base 4 | import zope.sqlalchemy 5 | from pyramid import testing 6 | from pyramid_services import find_service 7 | from zope.interface.adapter import AdapterRegistry 8 | import types 9 | 10 | 11 | @pytest.fixture(scope='session') 12 | def db(request): 13 | """Session-scoped sqlalchemy database connection""" 14 | engine = create_engine('postgresql://suma@/suma_test') 15 | dbsession = create_dbsession(engine) 16 | zope.sqlalchemy.register(dbsession) 17 | # we drop before to be sure we didn't leave the previous state unclean 18 | Base.metadata.drop_all() 19 | Base.metadata.create_all() 20 | dbsession.registry.clear() 21 | request.addfinalizer(Base.metadata.drop_all) 22 | return dbsession 23 | 24 | 25 | @pytest.fixture 26 | def db_session(request, db): 27 | """Function-scoped sqlalchemy database session""" 28 | from transaction import abort 29 | trans = db.connection().begin() 30 | request.addfinalizer(trans.rollback) 31 | request.addfinalizer(abort) 32 | return db 33 | 34 | 35 | 36 | @pytest.fixture 37 | def config(request): 38 | config = testing.setUp() 39 | config.registry.settings['storage.local.base_url'] = '/tmp' 40 | config.registry.settings['storage.local.base_path'] = '/storage' 41 | config.include('pyramid_services') 42 | request.addfinalizer(testing.tearDown) 43 | return config 44 | 45 | @pytest.fixture 46 | def dummy_request(request, config): 47 | req = testing.DummyRequest() 48 | req.find_service = types.MethodType(find_service, req) 49 | req.service_cache = AdapterRegistry() 50 | return req 51 | -------------------------------------------------------------------------------- /tests/core/models/test_link_model.py: -------------------------------------------------------------------------------- 1 | from suma.core.models import Link 2 | from hashlib import sha1 3 | from sqlalchemy.exc import IntegrityError 4 | import pytest 5 | 6 | 7 | def test_create_link(db_session): 8 | link = Link( 9 | hashid='test', 10 | url='http://google.com/test' 11 | ) 12 | db_session.add(link) 13 | assert db_session.query(Link).filter(Link.url == 'http://google.com/test').count() == 1 14 | assert link._hash == sha1('http://google.com/test').hexdigest() 15 | # Testing the hashes attributes are created 16 | assert link._hash_path == sha1('google.com/test').hexdigest() 17 | assert link._hash_netloc == sha1('google.com').hexdigest() 18 | assert link._hash_netloc == sha1('google.com').hexdigest() 19 | 20 | 21 | def test_create_link_with_user_id(db_session): 22 | link = Link( 23 | hashid='test', 24 | user_id=1, 25 | url='http://google.com/test' 26 | ) 27 | db_session.add(link) 28 | assert db_session.query(Link.url == 'http://google.com/test').count() == 1 29 | 30 | 31 | def test_create_link_without_user_id_unique_constraint(db_session): 32 | link_one = Link( 33 | hashid='hashone', 34 | url='http://google.com/test' 35 | ) 36 | link_two = Link( 37 | hashid='hashtwo', 38 | url='http://google.com/test' 39 | ) 40 | db_session.add(link_one) 41 | db_session.add(link_two) 42 | with pytest.raises(IntegrityError) as excinfo: 43 | db_session.flush() 44 | assert 'violates unique constraint "unique_url_and_user_id_is_null"' in str(excinfo.value) 45 | 46 | 47 | def test_create_link_with_user_id_unique_constraint(db_session): 48 | link_one = Link( 49 | hashid='hashone', 50 | user_id=1, 51 | url='http://google.com/test' 52 | ) 53 | link_two = Link( 54 | hashid='hashtwo', 55 | user_id=1, 56 | url='http://google.com/test' 57 | ) 58 | db_session.add(link_one) 59 | db_session.add(link_two) 60 | with pytest.raises(IntegrityError) as excinfo: 61 | db_session.flush() 62 | assert 'violates unique constraint "unique_url_and_user_id"' in str(excinfo.value) 63 | 64 | 65 | def test_link_hashid_not_null_contraint(db_session): 66 | link = Link(url='http://google.com/test') 67 | db_session.add(link) 68 | with pytest.raises(IntegrityError) as excinfo: 69 | db_session.flush() 70 | assert 'violates not-null constraint' in str(excinfo.value) 71 | 72 | 73 | def test_link_url_not_null_constraint(db_session): 74 | link = Link(hashid='12345') 75 | db_session.add(link) 76 | with pytest.raises(IntegrityError) as excinfo: 77 | db_session.flush() 78 | assert 'violates not-null constraint' in str(excinfo.value) 79 | -------------------------------------------------------------------------------- /tests/core/models/test_stat_model.py: -------------------------------------------------------------------------------- 1 | from suma.core.models import Link, Stat, ClickCounter 2 | from hashlib import sha1 3 | from sqlalchemy.exc import IntegrityError 4 | import pytest 5 | 6 | 7 | @pytest.fixture 8 | def link(): 9 | return Link( 10 | hashid='12345', 11 | url='http://google.com/test' 12 | ) 13 | 14 | 15 | def test_create_stat(db_session, link): 16 | stat = Stat( 17 | link=link 18 | ) 19 | db_session.add(stat) 20 | assert db_session.query(Stat).count() == 1 21 | 22 | 23 | def test_stat_link_not_null_constraint(db_session): 24 | stat = Stat( 25 | ) 26 | db_session.add(stat) 27 | with pytest.raises(IntegrityError) as excinfo: 28 | db_session.flush() 29 | assert 'violates not-null constraint' in str(excinfo.value) 30 | 31 | 32 | def test_create_click(db_session, link): 33 | click = ClickCounter( 34 | link=link 35 | ) 36 | db_session.add(click) 37 | db_session.flush() 38 | assert click.counter == 1 39 | assert db_session.query(ClickCounter).count() == 1 40 | 41 | 42 | def test_click_link_not_null_constrain(db_session): 43 | click = ClickCounter( 44 | ) 45 | db_session.add(click) 46 | with pytest.raises(IntegrityError) as excinfo: 47 | db_session.flush() 48 | assert 'violates not-null constraint' in str(excinfo.value) 49 | -------------------------------------------------------------------------------- /tests/core/services/conftest.py: -------------------------------------------------------------------------------- 1 | from suma.core.services import LinkService, HashIdService, StatService 2 | from hashlib import sha1 3 | from sqlalchemy.exc import IntegrityError 4 | import pytest 5 | 6 | 7 | @pytest.fixture 8 | def link_svc(db_session, hashid_svc): 9 | return LinkService(db_session, hashid_svc) 10 | 11 | 12 | @pytest.fixture 13 | def hashid_svc(): 14 | return HashIdService('secret') 15 | 16 | @pytest.fixture 17 | def stat_svc(db_session): 18 | return StatService(db_session) 19 | -------------------------------------------------------------------------------- /tests/core/services/test_hashid_service.py: -------------------------------------------------------------------------------- 1 | def test_encode(hashid_svc): 2 | hashid = hashid_svc.encode(1) 3 | assert len(hashid) >= 6 4 | 5 | 6 | def test_decode(hashid_svc): 7 | primary_id = 10 8 | hashid = hashid_svc.encode(primary_id) 9 | assert hashid_svc.decode(hashid) == (primary_id,) 10 | 11 | 12 | def test_encode_with_secondary_id(hashid_svc): 13 | hashid1 = hashid_svc.encode(1) 14 | assert len(hashid1) >= 6 15 | hashid2 = hashid_svc.encode(1, 2) 16 | assert len(hashid2) >= 6 17 | hashid1 != hashid2 18 | 19 | 20 | def test_decode_with_secondary_id(hashid_svc): 21 | primary_id = 10 22 | secondary_id = 5 23 | hashid = hashid_svc.encode(primary_id, secondary_id) 24 | assert hashid_svc.decode(hashid) == (primary_id, secondary_id) 25 | -------------------------------------------------------------------------------- /tests/core/services/test_link_service.py: -------------------------------------------------------------------------------- 1 | from suma.core.models import Link 2 | 3 | 4 | def test_create_link(db_session, link_svc): 5 | link_svc.create_link('http://google.com/test') 6 | assert db_session.query(Link).filter(Link.url == 'http://google.com/test').count() == 1 7 | 8 | 9 | def test_create_link_with_user_id(db_session, link_svc): 10 | link_svc.create_link('http://google.com/test', 1) 11 | assert ( 12 | db_session.query(Link). 13 | filter(Link.url == 'http://google.com/test', Link.user_id == 1).count() == 1 14 | ) 15 | 16 | 17 | def test_create_duplicate_link(db_session, link_svc): 18 | l1, created = link_svc.create_link('http://google.com/test') 19 | assert created is True 20 | l2, created = link_svc.create_link('http://google.com/test') 21 | assert created is False 22 | assert l1 == l2 23 | assert db_session.query(Link).filter(Link.url == 'http://google.com/test').count() == 1 24 | 25 | 26 | def test_get_link_by_id_or_hashid(db_session, link_svc): 27 | l1 = Link(hashid='1234', url='http://google.com/test') 28 | db_session.add(l1) 29 | db_session.flush() 30 | l2 = link_svc.get_link_by_id_or_hashid(l1.id) 31 | assert l2 is not None 32 | assert l1 == l2 33 | l3 = link_svc.get_link_by_id_or_hashid('1234') 34 | assert l3 is not None 35 | assert l1 == l3 36 | 37 | 38 | def test_get_link_by_id(db_session, link_svc): 39 | l1 = Link(hashid='1234', url='http://google.com/test') 40 | db_session.add(l1) 41 | db_session.flush() 42 | l2 = link_svc.get_link_by_id(l1.id) 43 | assert l2 is not None 44 | assert l1 == l2 45 | 46 | 47 | def test_get_link_by_url(db_session, link_svc): 48 | l1 = Link(hashid='1234', url='http://google.com/test') 49 | db_session.add(l1) 50 | db_session.flush() 51 | l2 = link_svc.get_link_by_url('http://google.com/test') 52 | assert l2 is not None 53 | assert l1 == l2 54 | 55 | 56 | def test_get_link_by_url_with_user_id(db_session, link_svc): 57 | l1 = Link(hashid='1234', user_id=1, url='http://google.com/test') 58 | db_session.add(l1) 59 | db_session.flush() 60 | l2 = link_svc.get_link_by_url('http://google.com/test') 61 | assert l2 is None 62 | l3 = link_svc.get_link_by_url('http://google.com/test', user_id=1) 63 | assert l3 is not None 64 | assert l1 == l3 65 | 66 | 67 | def test_get_link_by_hashid(db_session, link_svc): 68 | l1 = Link(hashid='1234', url='http://google.com/test') 69 | db_session.add(l1) 70 | db_session.flush() 71 | l2 = link_svc.get_link_by_hashid('1234') 72 | assert l2 is not None 73 | assert l1 == l2 74 | 75 | 76 | def test_get_link_by_unknown_hashid(db_session, link_svc): 77 | l1 = Link(hashid='1234', url='http://google.com/test') 78 | db_session.add(l1) 79 | db_session.flush() 80 | l2 = link_svc.get_link_by_hashid('5678') 81 | assert l2 is None 82 | 83 | 84 | def test_get_link_by_id_or_hashid_with_unknown_hashid(db_session, link_svc): 85 | l1 = Link(hashid='1234', url='http://google.com/test') 86 | db_session.add(l1) 87 | db_session.flush() 88 | l2 = link_svc.get_link_by_id_or_hashid('abcd') 89 | assert l2 is None 90 | 91 | 92 | def test_get_link_by_id_or_hashid_with_unknown_hashid(db_session, link_svc): 93 | l1 = Link(hashid='1234', url='http://google.com/test') 94 | db_session.add(l1) 95 | db_session.flush() 96 | l2 = link_svc.get_link_by_id_or_hashid(-1) 97 | assert l2 is None 98 | 99 | 100 | def test_ban_url(db_session, link_svc): 101 | l1 = Link(hashid='aaaa', url='http://google.com/test') 102 | l2 = Link(hashid='bbbb', url='http://google.com/random') 103 | l3 = Link(hashid='cccc', url='http://google.com/test?v=1') 104 | db_session.add(l1) 105 | db_session.add(l2) 106 | db_session.add(l3) 107 | db_session.flush() 108 | link_svc.ban_url('http://google.com/test', mode='url') 109 | assert l1.is_banned 110 | assert not l2.is_banned 111 | assert not l3.is_banned 112 | 113 | 114 | def test_ban_url_path(db_session, link_svc): 115 | l1 = Link(hashid='aaaa', url='http://google.com/test') 116 | l2 = Link(hashid='bbbb', url='http://google.com/random') 117 | l3 = Link(hashid='cccc', url='http://google.com/test?v=1') 118 | db_session.add(l1) 119 | db_session.add(l2) 120 | db_session.add(l3) 121 | db_session.flush() 122 | link_svc.ban_url('http://google.com/test', mode='path') 123 | assert l1.is_banned and l3.is_banned and not l2.is_banned 124 | 125 | 126 | def test_ban_url_netloc(db_session, link_svc): 127 | l1 = Link(hashid='aaaa', url='http://google.com/test') 128 | l2 = Link(hashid='bbbb', url='http://google.com/random') 129 | l3 = Link(hashid='cccc', url='http://google.com/test?v=1') 130 | db_session.add(l1) 131 | db_session.add(l2) 132 | db_session.add(l3) 133 | db_session.flush() 134 | link_svc.ban_url('http://google.com/test', mode='netloc') 135 | assert l1.is_banned and l2.is_banned and l3.is_banned 136 | -------------------------------------------------------------------------------- /tests/core/services/test_stat_service.py: -------------------------------------------------------------------------------- 1 | from suma.core.models import Link, ClickCounter, Stat 2 | 3 | 4 | def test_add_click(db_session, stat_svc): 5 | link = Link(url='http://google.com/test', hashid='1234') 6 | db_session.add(link) 7 | db_session.flush() 8 | stat_svc.add_click(link.id) 9 | assert db_session.query(Stat).count() == 1 10 | assert db_session.query(ClickCounter).count() == 1 11 | assert db_session.query(ClickCounter).get(link.id).counter == 1 12 | stat_svc.add_click(link.id) 13 | assert db_session.query(ClickCounter).count() == 1 14 | assert db_session.query(ClickCounter).get(link.id).counter == 2 15 | -------------------------------------------------------------------------------- /tests/web/views/test_web_link_views.py: -------------------------------------------------------------------------------- 1 | from suma.web.resources import LinkResource 2 | from suma.core.models import Link 3 | from suma.core.services.interfaces import ITaskService, IFileService 4 | from zope.interface import implementer 5 | import pytest 6 | import os 7 | 8 | 9 | @implementer(IFileService) 10 | class FakeFileService(object): 11 | 12 | def create(self, data, filename, folder): 13 | return os.path.join(folder, filename) 14 | 15 | def url(self, filename): 16 | return 'http://localhost/' + filename 17 | 18 | 19 | @implementer(ITaskService) 20 | class FakeTaskService(object): 21 | def add_click(self, link_id, ip=None, referer=None): 22 | pass 23 | 24 | def get_link_data(self, link_id): 25 | pass 26 | 27 | @pytest.fixture 28 | def link(db_session): 29 | link = Link(hashid='1234', url='http://google.com') 30 | db_session.add(link) 31 | db_session.flush() 32 | return link 33 | 34 | @pytest.fixture 35 | def link_resource(link): 36 | return LinkResource(link) 37 | 38 | @pytest.fixture 39 | def fake_task_service(config): 40 | service = FakeTaskService() 41 | config.register_service(service, name='task') 42 | return service 43 | 44 | @pytest.fixture 45 | def fake_file_service(config): 46 | service = FakeFileService() 47 | config.register_service(service, name='file') 48 | return service 49 | 50 | 51 | @pytest.fixture(autouse=True) 52 | def routes(config): 53 | config.include('suma.web.routes') 54 | 55 | 56 | def test_get_link_view(dummy_request, link_resource, fake_task_service, fake_file_service): 57 | from suma.web.views import LinkView 58 | dummy_request.remote_addr = '127.0.0.1' 59 | dummy_request.referer = 'http://test.com' 60 | view = LinkView(link_resource, dummy_request) 61 | response = view.get() 62 | assert response.status_code == 301 63 | assert response.headers.get('Location') == 'http://google.com' 64 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | # Tox (http://tox.testrun.org/) is a tool for running tests 2 | # in multiple virtualenvs. This configuration file will run the 3 | # test suite on all supported python versions. To use it, "pip install tox" 4 | # and then run "tox" from this directory. 5 | 6 | [tox] 7 | envlist = py27 8 | 9 | [testenv] 10 | commands = 11 | py.test --cov=suma 12 | deps = 13 | -e.[test] 14 | --------------------------------------------------------------------------------