├── .gitignore ├── LICENSE ├── Pipfile ├── Pipfile.lock ├── README.md ├── app.py ├── run_local.sh ├── serverless.yml ├── settings.py ├── src ├── __init__.py ├── manage_models.py └── nlp.py ├── test ├── __init__.py └── test_nlp.py └── zappa_settings.json /.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled python modules. 2 | *.pyc 3 | 4 | # Setuptools distribution folder. 5 | /dist/ 6 | 7 | # Python egg metadata, regenerated from source files by setuptools. 8 | /*.egg-info 9 | 10 | # vscode files 11 | .vscode 12 | 13 | # pytest_cache files 14 | .pytest_cache 15 | __pycache__ 16 | 17 | 18 | .env -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) [year] [fullname] 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Pipfile: -------------------------------------------------------------------------------- 1 | [[source]] 2 | name = "pypi" 3 | url = "https://pypi.org/simple" 4 | verify_ssl = true 5 | 6 | [dev-packages] 7 | pytest = "*" 8 | autopep8 = "*" 9 | 10 | [packages] 11 | zappa = "*" 12 | boto3 = "*" 13 | flask = "*" 14 | requests = "*" 15 | spacy = "*" 16 | sentry-sdk = {extras = ["flask"],version = "==0.16.4"} 17 | python-dotenv = "*" 18 | 19 | [requires] 20 | python_version = "3.8" 21 | -------------------------------------------------------------------------------- /Pipfile.lock: -------------------------------------------------------------------------------- 1 | { 2 | "_meta": { 3 | "hash": { 4 | "sha256": "3a6cc2cff80a3727e24e760baa8b98e222b33f5e9933c1742dc56d6f18303f6d" 5 | }, 6 | "pipfile-spec": 6, 7 | "requires": { 8 | "python_version": "3.8" 9 | }, 10 | "sources": [ 11 | { 12 | "name": "pypi", 13 | "url": "https://pypi.org/simple", 14 | "verify_ssl": true 15 | } 16 | ] 17 | }, 18 | "default": { 19 | "argcomplete": { 20 | "hashes": [ 21 | "sha256:2fbe5ed09fd2c1d727d4199feca96569a5b50d44c71b16da9c742201f7cc295c", 22 | "sha256:91dc7f9c7f6281d5a0dce5e73d2e33283aaef083495c13974a7dd197a1cdc949" 23 | ], 24 | "version": "==1.12.0" 25 | }, 26 | "blinker": { 27 | "hashes": [ 28 | "sha256:471aee25f3992bd325afa3772f1063dbdbbca947a041b8b89466dc00d606f8b6" 29 | ], 30 | "version": "==1.4" 31 | }, 32 | "blis": { 33 | "hashes": [ 34 | "sha256:00473602629ba69fe6565108e21957e918cb48b59f5bf2f6bfb6e04de42500cb", 35 | "sha256:03c368c9716ca814c436550a5f1e02ccf74850e613602519e3941d212e5aa177", 36 | "sha256:135450caabc8aea9bb9250329ebdf7189982d9b57d5c92789b2ba2fe52c247a7", 37 | "sha256:1402d9cbb0fbc21b749dd5b87d7ee14249e74a0ca38be6ecc56b3b356fca2f21", 38 | "sha256:26b16d6005bb2671699831b5cc699905215d1abde1ec5c1d04de7dcd9eb29f75", 39 | "sha256:3347a4b1b7d3ae14476aac9a6f7bf8ebf464863f4ebf4aea228874a7694ea240", 40 | "sha256:38fe877a4b52e762f5e137a412e3c256545a696a12ae8c40d67b8815d2bb5097", 41 | "sha256:4fb89c47ee06b58a4410a16fd5794847517262c9d2a342643475b477dfeff0a4", 42 | "sha256:77a6486b9794af01bcdfd1bc6e067c93add4b93292e6f95bf6e5ce7f98bf0163", 43 | "sha256:856142a11e37fd2c47c5006a3197e157bb8469a491a73d2d442223dd3279df84", 44 | "sha256:8aeaf6954351593a1e412f80e398aa51df588d3c0de74b9f3323b694c603381b", 45 | "sha256:9ede123065f3cacb109967755b3d83d4ca0de90643a9058129a6ab2d4051954f", 46 | "sha256:d1d59faebc1c94f8f4f77154ef4b9d6d40364b111cf8fde48ee3b524c85f1075", 47 | "sha256:d69257d317e86f34a7f230a2fd1f021fd2a1b944137f40d8cdbb23bd334cd0c4", 48 | "sha256:ddd732c5274d1082fa92e2c42317587d5ebabce7741ca98120f69bd45d004b99", 49 | "sha256:f0b0dad4d6268d9dba0a65a9db12dd7a2d8686b648399e4aa1aec7550697e99e" 50 | ], 51 | "version": "==0.4.1" 52 | }, 53 | "boto3": { 54 | "hashes": [ 55 | "sha256:1cfbadf41777dade69a3e5eaf1b71d15b4ae616fd94d16a894b692e14319f4a2", 56 | "sha256:cc3636828f1677ff93e8b1130c90dfe800187964e33786711450e8653d3f245f" 57 | ], 58 | "index": "pypi", 59 | "version": "==1.14.46" 60 | }, 61 | "botocore": { 62 | "hashes": [ 63 | "sha256:2f15a755b990db13a7a9e06a124c6ca5fa1c4470d76672363024d7f2a6c2566c", 64 | "sha256:6b134681c938f00b28424abf4b46fa6034b516d8add3a3f524e2292db61aa070" 65 | ], 66 | "version": "==1.17.46" 67 | }, 68 | "catalogue": { 69 | "hashes": [ 70 | "sha256:584d78e7f4c3c6e2fd498eb56dfc8ef1f4ff738480237de2ccd26cbe2cf47172", 71 | "sha256:d74d1d856c6b36a37bf14aa6dbbc27d0582667b7ab979a6108e61a575e8723f5" 72 | ], 73 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", 74 | "version": "==1.0.0" 75 | }, 76 | "certifi": { 77 | "hashes": [ 78 | "sha256:5930595817496dd21bb8dc35dad090f1c2cd0adfaf21204bf6732ca5d8ee34d3", 79 | "sha256:8fc0819f1f30ba15bdb34cceffb9ef04d99f420f68eb75d901e9560b8749fc41" 80 | ], 81 | "version": "==2020.6.20" 82 | }, 83 | "cfn-flip": { 84 | "hashes": [ 85 | "sha256:2bed32a1f4dca26dc64178d52511fd4ef778b5ccbcf32559cac884ace75bde6a" 86 | ], 87 | "version": "==1.2.3" 88 | }, 89 | "chardet": { 90 | "hashes": [ 91 | "sha256:84ab92ed1c4d4f16916e05906b6b75a6c0fb5db821cc65e70cbd64a3e2a5eaae", 92 | "sha256:fc323ffcaeaed0e0a02bf4d117757b98aed530d9ed4531e3e15460124c106691" 93 | ], 94 | "version": "==3.0.4" 95 | }, 96 | "click": { 97 | "hashes": [ 98 | "sha256:d2b5255c7c6349bc1bd1e59e08cd12acbbd63ce649f2588755783aa94dfb6b1a", 99 | "sha256:dacca89f4bfadd5de3d7489b7c8a566eee0d3676333fbb50030263894c38c0dc" 100 | ], 101 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", 102 | "version": "==7.1.2" 103 | }, 104 | "cymem": { 105 | "hashes": [ 106 | "sha256:5083b2ab5fe13ced094a82e0df465e2dbbd9b1c013288888035e24fd6eb4ed01", 107 | "sha256:622c20a57701d02f01a47e856dea248e112638f28c8249dbe3ed95a9702e3d74", 108 | "sha256:6f4cb689a9552e9e13dccc89203c8ab09f210a7ffb92ce27c384a4a0be27b527", 109 | "sha256:719f04a11ca709fc2b47868070d79fccff77e5d502ff32de2f4baa73cb16166f", 110 | "sha256:7236252bed70f37b898933dcf8aa875d0829664a245a272516f27b30439df71c", 111 | "sha256:7f5ddceb12b73f7fd2e4398266401b6f887003740ccd18c989a2af04500b5f2b", 112 | "sha256:85b9364e099426bd7f445a7705aad87bf6dbb71d79e3802dd8ca14e181d38a33", 113 | "sha256:c288a1bbdf58c360457443e5297e74844e1961e5e7001dbcb3a5297a41911a11", 114 | "sha256:cd21ec48ee70878d46c486e2f7ae94b32bfc6b37c4d27876c5a5a00c4eb75c3c", 115 | "sha256:d7505c500d994f11662e5595f5002251f572acc189f18944619352e2636f5181", 116 | "sha256:dd24848fbd75b17bab06408da6c029ba7cc615bd9e4a1f755fb3a090025fb922", 117 | "sha256:f4f19af4bca81f11922508a9dcf30ce1d2aee4972af9f81ce8e5331a6f46f5e1" 118 | ], 119 | "version": "==2.0.3" 120 | }, 121 | "docutils": { 122 | "hashes": [ 123 | "sha256:6c4f696463b79f1fb8ba0c594b63840ebd41f059e92b31957c46b74a4599b6d0", 124 | "sha256:9e4d7ecfc600058e07ba661411a2b7de2fd0fafa17d1a7f7361cd47b1175c827", 125 | "sha256:a2aeea129088da402665e92e0b25b04b073c04b2dce4ab65caaa38b7ce2e1a99" 126 | ], 127 | "markers": "python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2, 3.3'", 128 | "version": "==0.15.2" 129 | }, 130 | "durationpy": { 131 | "hashes": [ 132 | "sha256:5ef9416b527b50d722f34655becfb75e49228eb82f87b855ed1911b3314b5408" 133 | ], 134 | "version": "==0.5" 135 | }, 136 | "flask": { 137 | "hashes": [ 138 | "sha256:4efa1ae2d7c9865af48986de8aeb8504bf32c7f3d6fdc9353d34b21f4b127060", 139 | "sha256:8a4fdd8936eba2512e9c85df320a37e694c93945b33ef33c89946a340a238557" 140 | ], 141 | "index": "pypi", 142 | "version": "==1.1.2" 143 | }, 144 | "future": { 145 | "hashes": [ 146 | "sha256:b1bead90b70cf6ec3f0710ae53a525360fa360d306a86583adc6bf83a4db537d" 147 | ], 148 | "markers": "python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2, 3.3'", 149 | "version": "==0.18.2" 150 | }, 151 | "hjson": { 152 | "hashes": [ 153 | "sha256:1d1727faa6aaef2973921877125a3ab7c5f6d34b93233179d01770f41fab51f9" 154 | ], 155 | "version": "==3.0.1" 156 | }, 157 | "idna": { 158 | "hashes": [ 159 | "sha256:b307872f855b18632ce0c21c5e45be78c0ea7ae4c15c828c20788b26921eb3f6", 160 | "sha256:b97d804b1e9b523befed77c48dacec60e6dcb0b5391d57af6a65a312a90648c0" 161 | ], 162 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", 163 | "version": "==2.10" 164 | }, 165 | "itsdangerous": { 166 | "hashes": [ 167 | "sha256:321b033d07f2a4136d3ec762eac9f16a10ccd60f53c0c91af90217ace7ba1f19", 168 | "sha256:b12271b2047cb23eeb98c8b5622e2e5c5e9abd9784a153e9d8ef9cb4dd09d749" 169 | ], 170 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", 171 | "version": "==1.1.0" 172 | }, 173 | "jinja2": { 174 | "hashes": [ 175 | "sha256:89aab215427ef59c34ad58735269eb58b1a5808103067f7bb9d5836c651b3bb0", 176 | "sha256:f0a4641d3cf955324a89c04f3d94663aa4d638abe8f733ecd3582848e1c37035" 177 | ], 178 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", 179 | "version": "==2.11.2" 180 | }, 181 | "jmespath": { 182 | "hashes": [ 183 | "sha256:b85d0567b8666149a93172712e68920734333c0ce7e89b78b3e987f71e5ed4f9", 184 | "sha256:cdf6525904cc597730141d61b36f2e4b8ecc257c420fa2f4549bac2c2d0cb72f" 185 | ], 186 | "markers": "python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2, 3.3'", 187 | "version": "==0.10.0" 188 | }, 189 | "kappa": { 190 | "hashes": [ 191 | "sha256:4b5b372872f25d619e427e04282551048dc975a107385b076b3ffc6406a15833", 192 | "sha256:4d6b7b3accce4a0aaaac92b36237a6304f0f2fffbbe3caea3f7c9f52d12c9989" 193 | ], 194 | "version": "==0.6.0" 195 | }, 196 | "markupsafe": { 197 | "hashes": [ 198 | "sha256:00bc623926325b26bb9605ae9eae8a215691f33cae5df11ca5424f06f2d1f473", 199 | "sha256:09027a7803a62ca78792ad89403b1b7a73a01c8cb65909cd876f7fcebd79b161", 200 | "sha256:09c4b7f37d6c648cb13f9230d847adf22f8171b1ccc4d5682398e77f40309235", 201 | "sha256:1027c282dad077d0bae18be6794e6b6b8c91d58ed8a8d89a89d59693b9131db5", 202 | "sha256:13d3144e1e340870b25e7b10b98d779608c02016d5184cfb9927a9f10c689f42", 203 | "sha256:24982cc2533820871eba85ba648cd53d8623687ff11cbb805be4ff7b4c971aff", 204 | "sha256:29872e92839765e546828bb7754a68c418d927cd064fd4708fab9fe9c8bb116b", 205 | "sha256:43a55c2930bbc139570ac2452adf3d70cdbb3cfe5912c71cdce1c2c6bbd9c5d1", 206 | "sha256:46c99d2de99945ec5cb54f23c8cd5689f6d7177305ebff350a58ce5f8de1669e", 207 | "sha256:500d4957e52ddc3351cabf489e79c91c17f6e0899158447047588650b5e69183", 208 | "sha256:535f6fc4d397c1563d08b88e485c3496cf5784e927af890fb3c3aac7f933ec66", 209 | "sha256:596510de112c685489095da617b5bcbbac7dd6384aeebeda4df6025d0256a81b", 210 | "sha256:62fe6c95e3ec8a7fad637b7f3d372c15ec1caa01ab47926cfdf7a75b40e0eac1", 211 | "sha256:6788b695d50a51edb699cb55e35487e430fa21f1ed838122d722e0ff0ac5ba15", 212 | "sha256:6dd73240d2af64df90aa7c4e7481e23825ea70af4b4922f8ede5b9e35f78a3b1", 213 | "sha256:717ba8fe3ae9cc0006d7c451f0bb265ee07739daf76355d06366154ee68d221e", 214 | "sha256:79855e1c5b8da654cf486b830bd42c06e8780cea587384cf6545b7d9ac013a0b", 215 | "sha256:7c1699dfe0cf8ff607dbdcc1e9b9af1755371f92a68f706051cc8c37d447c905", 216 | "sha256:88e5fcfb52ee7b911e8bb6d6aa2fd21fbecc674eadd44118a9cc3863f938e735", 217 | "sha256:8defac2f2ccd6805ebf65f5eeb132adcf2ab57aa11fdf4c0dd5169a004710e7d", 218 | "sha256:98c7086708b163d425c67c7a91bad6e466bb99d797aa64f965e9d25c12111a5e", 219 | "sha256:9add70b36c5666a2ed02b43b335fe19002ee5235efd4b8a89bfcf9005bebac0d", 220 | "sha256:9bf40443012702a1d2070043cb6291650a0841ece432556f784f004937f0f32c", 221 | "sha256:ade5e387d2ad0d7ebf59146cc00c8044acbd863725f887353a10df825fc8ae21", 222 | "sha256:b00c1de48212e4cc9603895652c5c410df699856a2853135b3967591e4beebc2", 223 | "sha256:b1282f8c00509d99fef04d8ba936b156d419be841854fe901d8ae224c59f0be5", 224 | "sha256:b2051432115498d3562c084a49bba65d97cf251f5a331c64a12ee7e04dacc51b", 225 | "sha256:ba59edeaa2fc6114428f1637ffff42da1e311e29382d81b339c1817d37ec93c6", 226 | "sha256:c8716a48d94b06bb3b2524c2b77e055fb313aeb4ea620c8dd03a105574ba704f", 227 | "sha256:cd5df75523866410809ca100dc9681e301e3c27567cf498077e8551b6d20e42f", 228 | "sha256:cdb132fc825c38e1aeec2c8aa9338310d29d337bebbd7baa06889d09a60a1fa2", 229 | "sha256:e249096428b3ae81b08327a63a485ad0878de3fb939049038579ac0ef61e17e7", 230 | "sha256:e8313f01ba26fbbe36c7be1966a7b7424942f670f38e666995b88d012765b9be" 231 | ], 232 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", 233 | "version": "==1.1.1" 234 | }, 235 | "murmurhash": { 236 | "hashes": [ 237 | "sha256:27b908fe4bdb426f4e4e4a8821acbe0302915b2945e035ec9d8ca513e2a74b1f", 238 | "sha256:33405103fa8cde15d72ee525a03d5cfe2c7e4901133819754810986e29627d68", 239 | "sha256:386a9eed3cb27cb2cd4394b6521275ba04552642c2d9cab5c9fb42aa5a3325c0", 240 | "sha256:3af36a0dc9f13f6892d9b8b39a6a3ccf216cae5bce38adc7c2d145677987772f", 241 | "sha256:717196a04cdc80cc3103a3da17b2415a8a5e1d0d578b7079259386bf153b3258", 242 | "sha256:8a4ed95cd3456b43ea301679c7c39ade43fc18b844b37d0ba0ac0d6acbff8e0c", 243 | "sha256:8b045a79e8b621b4b35b29f29e33e9e0964f3a276f7da4d5736142f322ad4842", 244 | "sha256:a6c071b4b498bcea16a8dc8590cad81fa8d43821f34c74bc00f96499e2527073", 245 | "sha256:b0afe329701b59d02e56bc6cee7325af83e3fee9c299c615fc1df3202b4f886f", 246 | "sha256:ba766343bdbcb928039b8fff609e80ae7a5fd5ed7a4fc5af822224b63e0cbaff", 247 | "sha256:bf33490514d308bcc27ed240cb3eb114f1ec31af031535cd8f27659a7049bd52", 248 | "sha256:c7a646f6b07b033642b4f52ae2e45efd8b80780b3b90e8092a0cec935fbf81e2", 249 | "sha256:cc97ea766ac545074bab0e5af3dbc48e0d05ba230ae5a404e284d39abe4b3baf", 250 | "sha256:d696c394ebd164ca80b5871e2e9ad2f9fdbb81bd3c552c1d5f1e8ee694e6204a", 251 | "sha256:f468e4868f78c3ac202a66abfe2866414bca4ae7666a21ef0938c423de0f7d50", 252 | "sha256:fe344face8d30a5a6aa26e5acf288aa2a8f0f32e05efdda3d314b4bf289ec2af" 253 | ], 254 | "version": "==1.0.2" 255 | }, 256 | "numpy": { 257 | "hashes": [ 258 | "sha256:082f8d4dd69b6b688f64f509b91d482362124986d98dc7dc5f5e9f9b9c3bb983", 259 | "sha256:1bc0145999e8cb8aed9d4e65dd8b139adf1919e521177f198529687dbf613065", 260 | "sha256:309cbcfaa103fc9a33ec16d2d62569d541b79f828c382556ff072442226d1968", 261 | "sha256:3673c8b2b29077f1b7b3a848794f8e11f401ba0b71c49fbd26fb40b71788b132", 262 | "sha256:480fdd4dbda4dd6b638d3863da3be82873bba6d32d1fc12ea1b8486ac7b8d129", 263 | "sha256:56ef7f56470c24bb67fb43dae442e946a6ce172f97c69f8d067ff8550cf782ff", 264 | "sha256:5a936fd51049541d86ccdeef2833cc89a18e4d3808fe58a8abeb802665c5af93", 265 | "sha256:5b6885c12784a27e957294b60f97e8b5b4174c7504665333c5e94fbf41ae5d6a", 266 | "sha256:667c07063940e934287993366ad5f56766bc009017b4a0fe91dbd07960d0aba7", 267 | "sha256:7ed448ff4eaffeb01094959b19cbaf998ecdee9ef9932381420d514e446601cd", 268 | "sha256:8343bf67c72e09cfabfab55ad4a43ce3f6bf6e6ced7acf70f45ded9ebb425055", 269 | "sha256:92feb989b47f83ebef246adabc7ff3b9a59ac30601c3f6819f8913458610bdcc", 270 | "sha256:935c27ae2760c21cd7354402546f6be21d3d0c806fffe967f745d5f2de5005a7", 271 | "sha256:aaf42a04b472d12515debc621c31cf16c215e332242e7a9f56403d814c744624", 272 | "sha256:b12e639378c741add21fbffd16ba5ad25c0a1a17cf2b6fe4288feeb65144f35b", 273 | "sha256:b1cca51512299841bf69add3b75361779962f9cee7d9ee3bb446d5982e925b69", 274 | "sha256:b8456987b637232602ceb4d663cb34106f7eb780e247d51a260b84760fd8f491", 275 | "sha256:b9792b0ac0130b277536ab8944e7b754c69560dac0415dd4b2dbd16b902c8954", 276 | "sha256:c9591886fc9cbe5532d5df85cb8e0cc3b44ba8ce4367bd4cf1b93dc19713da72", 277 | "sha256:cf1347450c0b7644ea142712619533553f02ef23f92f781312f6a3553d031fc7", 278 | "sha256:de8b4a9b56255797cbddb93281ed92acbc510fb7b15df3f01bd28f46ebc4edae", 279 | "sha256:e1b1dc0372f530f26a03578ac75d5e51b3868b9b76cd2facba4c9ee0eb252ab1", 280 | "sha256:e45f8e981a0ab47103181773cc0a54e650b2aef8c7b6cd07405d0fa8d869444a", 281 | "sha256:e4f6d3c53911a9d103d8ec9518190e52a8b945bab021745af4939cfc7c0d4a9e", 282 | "sha256:ed8a311493cf5480a2ebc597d1e177231984c818a86875126cfd004241a73c3e", 283 | "sha256:ef71a1d4fd4858596ae80ad1ec76404ad29701f8ca7cdcebc50300178db14dfc" 284 | ], 285 | "markers": "python_version >= '3.6'", 286 | "version": "==1.19.1" 287 | }, 288 | "pip-tools": { 289 | "hashes": [ 290 | "sha256:5672c2b6ca0f1fd803f3b45568c2cf7fadf135b4971e7d665232b2075544c0ef", 291 | "sha256:73787e23269bf8a9230f376c351297b9037ed0d32ab0f9bef4a187d976acc054" 292 | ], 293 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", 294 | "version": "==5.3.1" 295 | }, 296 | "plac": { 297 | "hashes": [ 298 | "sha256:398cb947c60c4c25e275e1f1dadf027e7096858fb260b8ece3b33bcff90d985f", 299 | "sha256:487e553017d419f35add346c4c09707e52fa53f7e7181ce1098ca27620e9ceee" 300 | ], 301 | "version": "==1.1.3" 302 | }, 303 | "placebo": { 304 | "hashes": [ 305 | "sha256:03157f8527bbc2965b71b88f4a139ef8038618b346787f20d63e3c5da541b047" 306 | ], 307 | "version": "==0.9.0" 308 | }, 309 | "preshed": { 310 | "hashes": [ 311 | "sha256:0c15ae62f2595ca479decc3452967484dae57b510278800f5deb9115238cc818", 312 | "sha256:190345724eb3f7aeaeb2a758740d698bd6c017c2cdf07c71c16b34820973d114", 313 | "sha256:1be3cb59211282e906a11443464fe3e19f6561e2fcd06410e4adc6d45354cf82", 314 | "sha256:1ef72a120e49356058b3c0590d7b5e91f2747b44e006eef6579be6131223cab0", 315 | "sha256:253970beae87ab672a6afb543908761795eea3cb7b0d784e2ea51e265752059e", 316 | "sha256:448d9df12e63fe4a3024f6153ee6703bb95d2be0ce887b5eda7ddc41acfba825", 317 | "sha256:61d73468c97c1d6d5a048de0b01d5a6fd052123358aca4823cdb277e436436cb", 318 | "sha256:633358f1fb0ec5dd6dbe4971c328d08809e5a8dbefdf13a802ae0a7cb45306c7", 319 | "sha256:6518bbd5fb8adbc3231e75ae78d96a7bdd5405a3b23a09d5e62a2e4fc833724e", 320 | "sha256:7e80ffc1fb79496d4feafe0eaf71ee5e532b91daf6cec235d7f9c4c12657a58c", 321 | "sha256:7ea588a78aaf310ae2c293071a8571b07ae434819be05fe510442b6df3f8fbf7", 322 | "sha256:88427346b220293439db77c82913791fa13edc6ac73d8159610699a3ca17aae9", 323 | "sha256:8a9a8222a697a513f25a94733e7a17cc298ecd8fd56b606a1d8fa0ac342c2830", 324 | "sha256:b4ae6c7c44aa3ff7bd717791bb6b619ecb273b7cb128c986f2dc65f6e0e6ddd4", 325 | "sha256:e37058d91bd7f0f5a7a9c83d22a83dc581ab5f79688a87be81f200993145a250", 326 | "sha256:ece5e850f667eaa3367d5c56dda9e3aa6ac1c0bb2117d2f466a26db5f26bbe4b" 327 | ], 328 | "version": "==3.0.2" 329 | }, 330 | "python-dateutil": { 331 | "hashes": [ 332 | "sha256:891c38b2a02f5bb1be3e4793866c8df49c7d19baabf9c1bad62547e0b4866aca", 333 | "sha256:95511bae634d69bc7329ba55e646499a842bc4ec342ad54a8cdb65645a0aad3c" 334 | ], 335 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", 336 | "version": "==2.6.1" 337 | }, 338 | "python-dotenv": { 339 | "hashes": [ 340 | "sha256:8c10c99a1b25d9a68058a1ad6f90381a62ba68230ca93966882a4dbc3bc9c33d", 341 | "sha256:c10863aee750ad720f4f43436565e4c1698798d763b63234fb5021b6c616e423" 342 | ], 343 | "index": "pypi", 344 | "version": "==0.14.0" 345 | }, 346 | "python-slugify": { 347 | "hashes": [ 348 | "sha256:69a517766e00c1268e5bbfc0d010a0a8508de0b18d30ad5a1ff357f8ae724270" 349 | ], 350 | "version": "==4.0.1" 351 | }, 352 | "pyyaml": { 353 | "hashes": [ 354 | "sha256:06a0d7ba600ce0b2d2fe2e78453a470b5a6e000a985dd4a4e54e436cc36b0e97", 355 | "sha256:240097ff019d7c70a4922b6869d8a86407758333f02203e0fc6ff79c5dcede76", 356 | "sha256:4f4b913ca1a7319b33cfb1369e91e50354d6f07a135f3b901aca02aa95940bd2", 357 | "sha256:69f00dca373f240f842b2931fb2c7e14ddbacd1397d57157a9b005a6a9942648", 358 | "sha256:73f099454b799e05e5ab51423c7bcf361c58d3206fa7b0d555426b1f4d9a3eaf", 359 | "sha256:74809a57b329d6cc0fdccee6318f44b9b8649961fa73144a98735b0aaf029f1f", 360 | "sha256:7739fc0fa8205b3ee8808aea45e968bc90082c10aef6ea95e855e10abf4a37b2", 361 | "sha256:95f71d2af0ff4227885f7a6605c37fd53d3a106fcab511b8860ecca9fcf400ee", 362 | "sha256:b8eac752c5e14d3eca0e6dd9199cd627518cb5ec06add0de9d32baeee6fe645d", 363 | "sha256:cc8955cfbfc7a115fa81d85284ee61147059a753344bc51098f3ccd69b0d7e0c", 364 | "sha256:d13155f591e6fcc1ec3b30685d50bf0711574e2c0dfffd7644babf8b5102ca1a" 365 | ], 366 | "version": "==5.3.1" 367 | }, 368 | "requests": { 369 | "hashes": [ 370 | "sha256:b3559a131db72c33ee969480840fff4bb6dd111de7dd27c8ee1f820f4f00231b", 371 | "sha256:fe75cc94a9443b9246fc7049224f75604b113c36acb93f87b80ed42c44cbb898" 372 | ], 373 | "index": "pypi", 374 | "version": "==2.24.0" 375 | }, 376 | "s3transfer": { 377 | "hashes": [ 378 | "sha256:2482b4259524933a022d59da830f51bd746db62f047d6eb213f2f8855dcb8a13", 379 | "sha256:921a37e2aefc64145e7b73d50c71bb4f26f46e4c9f414dc648c6245ff92cf7db" 380 | ], 381 | "version": "==0.3.3" 382 | }, 383 | "sentry-sdk": { 384 | "extras": [ 385 | "flask" 386 | ], 387 | "hashes": [ 388 | "sha256:5f3d96ebd1cf758216552c1a0dc2ca1a000af19a4f9b4a3f4c237c7069fde1d4", 389 | "sha256:ec255a60d58a8ba35439491d2daaf4b3d03283d0dbdec84a6e359a77fc36961a" 390 | ], 391 | "index": "pypi", 392 | "version": "==0.16.4" 393 | }, 394 | "six": { 395 | "hashes": [ 396 | "sha256:30639c035cdb23534cd4aa2dd52c3bf48f06e5f4a941509c8bafd8ce11080259", 397 | "sha256:8b74bedcbbbaca38ff6d7491d76f2b06b3592611af620f8426e82dddb04a5ced" 398 | ], 399 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", 400 | "version": "==1.15.0" 401 | }, 402 | "spacy": { 403 | "hashes": [ 404 | "sha256:0f5d088c1d2a1fcf247090854927cd0ba4e28266323af112dead20ff020ded1c", 405 | "sha256:11b9517cdcbea166a9461093821d12bf632aea7dd14b6e3c549871903bda41b8", 406 | "sha256:1fcfb911b254af3144b3e65a2daf671cb26b6243ec431089ccb28cbe03d826de", 407 | "sha256:366eaae9634c59f89015ad11db1d8559c327ab665a5f644c71155c76711ee50a", 408 | "sha256:3bafcc134c340c5d7556612344d2844522d452b99a21f2b0a9b640f6c55f1110", 409 | "sha256:4944a1118f6dbb49201749d72527b749f74032e1026ddf387bc3a7e172ff0300", 410 | "sha256:7a6b7486f71930e7de7100feb72036e3ccb8c18509ff23e8453cff0b28470ea4", 411 | "sha256:818de26e0e383f64ccbe3db185574920de05923d8deac8bbb12113b9e33cee1f", 412 | "sha256:b7df3622e9a867294b913cd0a4fba99d47162af1cfd3a840c5943b25f390bb5c", 413 | "sha256:f7b3a17730786979f964b16ee1e4a9146cd05016f100afb274dd66336dfc39eb" 414 | ], 415 | "index": "pypi", 416 | "version": "==2.3.2" 417 | }, 418 | "srsly": { 419 | "hashes": [ 420 | "sha256:18bad26c34cf5a8853fbf018fd168a7bf2ea7ce661e66476c25dac711cb79c9b", 421 | "sha256:2179cf1e88c250e89e40227bd5848341011c170079b3d424987d067de6a73f42", 422 | "sha256:21cfb0e5dea2c4515b5c2daa78402d5782c6425b4f58af40d2e2cb45e4778d8c", 423 | "sha256:29434753a77481ec6129991f4116f983085cc8005c1ad963261124842e8c05fc", 424 | "sha256:3f3975e8cb67194d26dd03508469b1303f8b994f30e7782f7eae25fef6dc4aad", 425 | "sha256:46213d8f094b348a9433c825ac1eba36a21aa25a8bae6f29c2f9f053e15be961", 426 | "sha256:59258b81d567df207f8a0a33c4b5fa232afccf1d927c8ce3ba5395bfd64c0ed8", 427 | "sha256:7c553a709fd56a37a07f969e849f55a0aeabaeb7677bebc588a640ab8ec134aa", 428 | "sha256:95849d84e8929be248a180e672c8ce1ed98b1341263bc983efdf8427465584f1", 429 | "sha256:b94d8a13c60e3298a9ba12b1b211026e8378c7d087efd7ce46a3f2d8d4678d94", 430 | "sha256:c8beff52c104a7ffe4a15513a05dc0497998cf83aa1ca39454489994d18c1c07", 431 | "sha256:d409beb7257208633c974c01f9dc3265562fb6802caee7de21880761ba87c3ed" 432 | ], 433 | "version": "==1.0.2" 434 | }, 435 | "text-unidecode": { 436 | "hashes": [ 437 | "sha256:1311f10e8b895935241623731c2ba64f4c455287888b18189350b67134a822e8", 438 | "sha256:bad6603bb14d279193107714b288be206cac565dfa49aa5b105294dd5c4aab93" 439 | ], 440 | "version": "==1.3" 441 | }, 442 | "thinc": { 443 | "hashes": [ 444 | "sha256:0139fa84dc9b8d88af15e648fc4ae13d899b8b5e49cb26a8f4a0604ee9ad8a9e", 445 | "sha256:061633bf334e3728173d59d6001e8cdef3839166c71e23b3c5f74f5fae3c0d7c", 446 | "sha256:0df8c5762359a3a4d8d494aa2eff11c4936c4f34559fe1b3ab1d13d24c76b509", 447 | "sha256:33db4a9182c78c8f4823b1765274bbb0caa8f4269dbd102f2e6ab2f7f91a6084", 448 | "sha256:36237f711f0b3da932bd28cc366a92f6f1b6d1f95ad6cbbc8166b94785b38e40", 449 | "sha256:387d25e57e53eed86d24f2657ab9555703043de27211764835a38e2e31b3c8e9", 450 | "sha256:55b9e02e4b8395cee0a8a810bd8af4d7600b04520bab60df1fc513d50a41eec5", 451 | "sha256:5d633cc5c210a02ba706ed7e800f4dc906ba1e10b85e3ed40d77fdb7e7674a20", 452 | "sha256:947806f4cbbcaf8dd046942acd5e52d55ac805303985a2e36de4734be5496bf1", 453 | "sha256:d3ff8cfbf583ac788a85f5e0e3cf00edf2f6bc5ba2b2ca264771870c07cb5717", 454 | "sha256:d70e71b0561bbf844bc9f737f60150b0f8f04dfd603151869d93a5735deb6219", 455 | "sha256:e2ebeeafd79bb86697388fccc5996d6ea1e69106e2a7fc3a1092d626b522cc01" 456 | ], 457 | "version": "==7.4.1" 458 | }, 459 | "toml": { 460 | "hashes": [ 461 | "sha256:926b612be1e5ce0634a2ca03470f95169cf16f939018233a670519cb4ac58b0f", 462 | "sha256:bda89d5935c2eac546d648028b9901107a595863cb36bae0c73ac804a9b4ce88" 463 | ], 464 | "version": "==0.10.1" 465 | }, 466 | "tqdm": { 467 | "hashes": [ 468 | "sha256:1a336d2b829be50e46b84668691e0a2719f26c97c62846298dd5ae2937e4d5cf", 469 | "sha256:564d632ea2b9cb52979f7956e093e831c28d441c11751682f84c86fc46e4fd21" 470 | ], 471 | "markers": "python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2, 3.3'", 472 | "version": "==4.48.2" 473 | }, 474 | "troposphere": { 475 | "hashes": [ 476 | "sha256:e6f0883022a660d4096264496db0bceb2655f0b032bddb908525f98ec0958647" 477 | ], 478 | "version": "==2.6.2" 479 | }, 480 | "urllib3": { 481 | "hashes": [ 482 | "sha256:91056c15fa70756691db97756772bb1eb9678fa585d9184f24534b100dc60f4a", 483 | "sha256:e7983572181f5e1522d9c98453462384ee92a0be7fac5f1413a1e35c56cc0461" 484 | ], 485 | "markers": "python_version != '3.4'", 486 | "version": "==1.25.10" 487 | }, 488 | "wasabi": { 489 | "hashes": [ 490 | "sha256:ee3809f4ce00e1e7f424b1572c753cff0dcaca2ca684e67e31f985033a9f070b" 491 | ], 492 | "version": "==0.7.1" 493 | }, 494 | "werkzeug": { 495 | "hashes": [ 496 | "sha256:1e0dedc2acb1f46827daa2e399c1485c8fa17c0d8e70b6b875b4e7f54bf408d2", 497 | "sha256:b353856d37dec59d6511359f97f6a4b2468442e454bd1c98298ddce53cac1f04" 498 | ], 499 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", 500 | "version": "==0.16.1" 501 | }, 502 | "wheel": { 503 | "hashes": [ 504 | "sha256:497add53525d16c173c2c1c733b8f655510e909ea78cc0e29d374243544b77a2", 505 | "sha256:99a22d87add3f634ff917310a3d87e499f19e663413a52eb9232c447aa646c9f" 506 | ], 507 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", 508 | "version": "==0.35.1" 509 | }, 510 | "wsgi-request-logger": { 511 | "hashes": [ 512 | "sha256:445d7ec52799562f812006394d0b4a7064b37084c6ea6bd74ea7a2136c97ed83" 513 | ], 514 | "version": "==0.4.6" 515 | }, 516 | "zappa": { 517 | "hashes": [ 518 | "sha256:c6e740334c1c39e644a345124b7317003420b632a8f7a6811d18b82bb16e2f8e", 519 | "sha256:ccfc336d3bc48a6898cbbd157e16653f717dbe3ca37f933ce40acfe242a03a40" 520 | ], 521 | "index": "pypi", 522 | "version": "==0.51.0" 523 | } 524 | }, 525 | "develop": { 526 | "attrs": { 527 | "hashes": [ 528 | "sha256:08a96c641c3a74e44eb59afb61a24f2cb9f4d7188748e76ba4bb5edfa3cb7d1c", 529 | "sha256:f7b7ce16570fe9965acd6d30101a28f62fb4a7f9e926b3bbc9b61f8b04247e72" 530 | ], 531 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", 532 | "version": "==19.3.0" 533 | }, 534 | "autopep8": { 535 | "hashes": [ 536 | "sha256:d21d3901cb0da6ebd1e83fc9b0dfbde8b46afc2ede4fe32fbda0c7c6118ca094" 537 | ], 538 | "index": "pypi", 539 | "version": "==1.5.4" 540 | }, 541 | "iniconfig": { 542 | "hashes": [ 543 | "sha256:80cf40c597eb564e86346103f609d74efce0f6b4d4f30ec8ce9e2c26411ba437", 544 | "sha256:e5f92f89355a67de0595932a6c6c02ab4afddc6fcdc0bfc5becd0d60884d3f69" 545 | ], 546 | "version": "==1.0.1" 547 | }, 548 | "more-itertools": { 549 | "hashes": [ 550 | "sha256:68c70cc7167bdf5c7c9d8f6954a7837089c6a36bf565383919bb595efb8a17e5", 551 | "sha256:b78134b2063dd214000685165d81c154522c3ee0a1c0d4d113c80361c234c5a2" 552 | ], 553 | "markers": "python_version >= '3.5'", 554 | "version": "==8.4.0" 555 | }, 556 | "packaging": { 557 | "hashes": [ 558 | "sha256:4357f74f47b9c12db93624a82154e9b120fa8293699949152b22065d556079f8", 559 | "sha256:998416ba6962ae7fbd6596850b80e17859a5753ba17c32284f67bfff33784181" 560 | ], 561 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", 562 | "version": "==20.4" 563 | }, 564 | "pluggy": { 565 | "hashes": [ 566 | "sha256:15b2acde666561e1298d71b523007ed7364de07029219b604cf808bfa1c765b0", 567 | "sha256:966c145cd83c96502c3c3868f50408687b38434af77734af1e9ca461a4081d2d" 568 | ], 569 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", 570 | "version": "==0.13.1" 571 | }, 572 | "py": { 573 | "hashes": [ 574 | "sha256:366389d1db726cd2fcfc79732e75410e5fe4d31db13692115529d34069a043c2", 575 | "sha256:9ca6883ce56b4e8da7e79ac18787889fa5206c79dcc67fb065376cd2fe03f342" 576 | ], 577 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", 578 | "version": "==1.9.0" 579 | }, 580 | "pycodestyle": { 581 | "hashes": [ 582 | "sha256:2295e7b2f6b5bd100585ebcb1f616591b652db8a741695b3d8f5d28bdc934367", 583 | "sha256:c58a7d2815e0e8d7972bf1803331fb0152f867bd89adf8a01dfd55085434192e" 584 | ], 585 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", 586 | "version": "==2.6.0" 587 | }, 588 | "pyparsing": { 589 | "hashes": [ 590 | "sha256:c203ec8783bf771a155b207279b9bccb8dea02d8f0c9e5f8ead507bc3246ecc1", 591 | "sha256:ef9d7589ef3c200abe66653d3f1ab1033c3c419ae9b9bdb1240a85b024efc88b" 592 | ], 593 | "markers": "python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2, 3.3'", 594 | "version": "==2.4.7" 595 | }, 596 | "pytest": { 597 | "hashes": [ 598 | "sha256:85228d75db9f45e06e57ef9bf4429267f81ac7c0d742cc9ed63d09886a9fe6f4", 599 | "sha256:8b6007800c53fdacd5a5c192203f4e531eb2a1540ad9c752e052ec0f7143dbad" 600 | ], 601 | "index": "pypi", 602 | "version": "==6.0.1" 603 | }, 604 | "six": { 605 | "hashes": [ 606 | "sha256:30639c035cdb23534cd4aa2dd52c3bf48f06e5f4a941509c8bafd8ce11080259", 607 | "sha256:8b74bedcbbbaca38ff6d7491d76f2b06b3592611af620f8426e82dddb04a5ced" 608 | ], 609 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", 610 | "version": "==1.15.0" 611 | }, 612 | "toml": { 613 | "hashes": [ 614 | "sha256:926b612be1e5ce0634a2ca03470f95169cf16f939018233a670519cb4ac58b0f", 615 | "sha256:bda89d5935c2eac546d648028b9901107a595863cb36bae0c73ac804a9b4ce88" 616 | ], 617 | "version": "==0.10.1" 618 | } 619 | } 620 | } 621 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # About 2 | This is a small Flask project that lets you run NLP on AWS Lambda hosting Spacy models on S3 and therefore not running into the 250MB Lambda limit. 3 | 4 | # Installing 5 | 6 | `pipenv install` 7 | 8 | # Activating environment 9 | `pipenv shell` 10 | 11 | # Testing 12 | 13 | `pipenv install -d` to install also dev dependencies 14 | `pytest` 15 | 16 | # Before running it 17 | Create an .env file in the root folder where `settings.py` is with these variables 18 | ``` 19 | FLASK_ENV=development 20 | SENTRY_DSN=your_sentry_dsn 21 | SPACY_MODEL_MEDIUM=en_core_web_md-2.3.1 # or whatever you want 22 | SPACY_MODEL_SMALL=en_core_web_sm-2.3.1 # same 23 | S3_BUCKET=your_bucket 24 | ``` 25 | 26 | # Deploying it 27 | Delete `zappa_settings.json` and create one from scratch if you want, or change the S3 bucket name in the JSON provided. I used different buckets to store the models and the lambda code but you can probably use the same one. 28 | 29 | Then, do `zappa deploy dev` and it should be deployed in a couple minutes -------------------------------------------------------------------------------- /app.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import sentry_sdk 4 | from flask import Flask, jsonify, request 5 | from sentry_sdk.integrations.flask import FlaskIntegration 6 | 7 | import settings 8 | from src.nlp import find_locations 9 | 10 | 11 | sentry_sdk.init( 12 | dsn=os.environ['SENTRY_DSN'], 13 | integrations=[FlaskIntegration()] 14 | ) 15 | 16 | app = Flask(__name__) 17 | 18 | 19 | @app.route('/') 20 | def index(): 21 | return "Hello, world!", 200 22 | 23 | 24 | @app.route('/find_locations', methods=['POST']) 25 | def route_find_locations(): 26 | json_payload = request.get_json(silent=False) 27 | locations = find_locations(**json_payload) 28 | return jsonify(locations) 29 | 30 | 31 | @app.route('/debug-sentry', methods=['GET', 'POST']) 32 | def trigger_error(): 33 | request_args = process_request_arguments(request) 34 | division_by_zero = 1 / 0 35 | -------------------------------------------------------------------------------- /run_local.sh: -------------------------------------------------------------------------------- 1 | flask run --port 5001 -------------------------------------------------------------------------------- /serverless.yml: -------------------------------------------------------------------------------- 1 | org: xoelop 2 | app: noicejobs-lambda 3 | service: noicejobsLambda 4 | 5 | provider: 6 | name: aws 7 | runtime: python3.8 8 | stage: dev 9 | region: us-east-1 10 | memorySize: 512 11 | 12 | plugins: 13 | - serverless-wsgi 14 | - serverless-python-requirements 15 | 16 | custom: 17 | wsgi: 18 | app: app.app 19 | packRequirements: false 20 | pythonRequirements: 21 | slim: true 22 | zip: true 23 | noDeploy: 24 | - pytest 25 | 26 | functions: 27 | app: 28 | handler: wsgi_handler.handler 29 | events: 30 | - http: ANY / 31 | - http: 'ANY {proxy+}' -------------------------------------------------------------------------------- /settings.py: -------------------------------------------------------------------------------- 1 | from dotenv import load_dotenv 2 | 3 | load_dotenv() 4 | -------------------------------------------------------------------------------- /src/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | -------------------------------------------------------------------------------- /src/manage_models.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import tarfile 4 | from pathlib import Path 5 | 6 | import boto3 7 | import requests 8 | 9 | import settings 10 | 11 | model = os.environ['SPACY_MODEL_MEDIUM'] 12 | model_sm = os.environ['SPACY_MODEL_SMALL'] 13 | dest = '/tmp/models' 14 | s3_bucket = os.environ['S3_BUCKET'] 15 | 16 | 17 | def makedir_if_not_exists(dest): 18 | if not os.path.exists(dest): 19 | os.makedirs(dest) 20 | 21 | 22 | def download_model_from_github(model: str, dest: str = '/tmp/models') -> str: 23 | print(f'Downloading {model} to {dest}') 24 | url = f'https://github.com/explosion/spacy-models/releases/download/{model}/{model}.tar.gz' 25 | 26 | makedir_if_not_exists(dest) 27 | filename = os.path.join(Path(dest), f'{model}.tar.gz') 28 | 29 | # download model 30 | with requests.get(url, stream=True) as r: 31 | r.raise_for_status() 32 | with open(filename, 'wb') as f: 33 | for chunk in r.iter_content(chunk_size=8192): 34 | f.write(chunk) 35 | unzip_file(filename, dest) 36 | uncompressed_file = os.path.join(Path(dest), model) 37 | print(f'Downloaded to {uncompressed_file}') 38 | 39 | return uncompressed_file 40 | 41 | 42 | def download_model_from_s3(model: str, dest: str) -> str: 43 | print(f'Downloading {model} from S3') 44 | makedir_if_not_exists(dest) 45 | filename = os.path.join(Path(dest), f'{model}.tar.gz') 46 | 47 | # download model 48 | object_name = f'models/{model}.tar.gz' 49 | s3 = boto3.client('s3') 50 | s3.download_file(s3_bucket, object_name, filename) 51 | 52 | unzip_file(filename, dest) 53 | uncompressed_file = os.path.join(Path(dest), model) 54 | print(f'Downloaded to {uncompressed_file}') 55 | 56 | return uncompressed_file 57 | 58 | 59 | def unzip_file(filename: str, dest: str): 60 | print(f'Unzipping {filename}') 61 | with tarfile.open(filename) as f: 62 | f.extractall(path=dest) 63 | 64 | 65 | def get_model_from_disk(model: str, dest: str = dest) -> str: 66 | print(f'Getting model {model} from disk') 67 | filename = os.path.join(Path(dest), model) 68 | if not os.path.exists(filename): 69 | print('Not in disk, downloading from S3 bucket') 70 | filename = download_model_from_s3(model, dest) 71 | dirname = model.split('-')[0] 72 | model_full_path = os.path.join(filename, dirname, model) 73 | return model_full_path 74 | 75 | 76 | def upload_model_to_s3(model: str, location: str = dest, s3_bucket: str = s3_bucket): 77 | print(f'Uploading {model} from {location} to {s3_bucket} S3 bucket') 78 | file_full_path = os.path.join(Path(location), f'{model}.tar.gz') 79 | if not os.path.exists(file_full_path): 80 | print('File not in disk, downloading from GitHub') 81 | download_model_from_github(model, dest=location) 82 | s3_client = boto3.client('s3') 83 | s3_client.upload_file(file_full_path, s3_bucket, f'models/{model}.tar.gz') 84 | print('Done') 85 | 86 | 87 | if __name__ == "__main__": 88 | parser = argparse.ArgumentParser(description='Download Spacy models to disk and upload them to S3') 89 | parser.add_argument('--download', '-d', action='store_true') 90 | parser.add_argument('--upload', '-u', action='store_true') 91 | 92 | args = parser.parse_args() 93 | 94 | if args.download: 95 | download_model_from_github(model=model_sm) 96 | if args.upload: 97 | upload_model_to_s3(model=model_sm) 98 | 99 | # download_model_from_github(model=model) 100 | # get_model_from_disk(model_sm, dest=f'{dest}/test_S3') 101 | -------------------------------------------------------------------------------- /src/nlp.py: -------------------------------------------------------------------------------- 1 | import os 2 | from collections import Counter 3 | from typing import List 4 | 5 | import spacy 6 | 7 | from src.manage_models import get_model_from_disk 8 | 9 | model = os.environ['SPACY_MODEL_MEDIUM'] 10 | model_sm = os.environ['SPACY_MODEL_SMALL'] 11 | 12 | model_location = get_model_from_disk(model) 13 | 14 | nlp = spacy.load(model_location) 15 | 16 | 17 | def find_locations(text: str) -> List[str]: 18 | doc = nlp(text) 19 | location_labels = ['GPE', 'LOC'] 20 | location_list = [ent.text for ent in doc.ents if ent.label_ in location_labels] 21 | locations_sorted_by_num_appearances = list(Counter(location_list).keys()) 22 | return locations_sorted_by_num_appearances 23 | -------------------------------------------------------------------------------- /test/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /test/test_nlp.py: -------------------------------------------------------------------------------- 1 | from src.nlp import find_locations 2 | 3 | 4 | def test_find_locations(): 5 | text = 'Spain is a sunnier country than the UK' 6 | locations = find_locations(text=text) 7 | assert 'Spain' in locations 8 | assert 'UK' in locations 9 | -------------------------------------------------------------------------------- /zappa_settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "dev": { 3 | "app_function": "app.app", 4 | "aws_region": "us-east-1", 5 | "profile_name": "default", 6 | "project_name": "nlp-lambda-example", 7 | "runtime": "python3.8", 8 | "s3_bucket": "nlp-lambda-example", 9 | "slim_handler": true, 10 | "keep_warm": true, 11 | "memory_size": 1408, 12 | "lambda_concurrency": 100, 13 | "exclude": ["__pycache__", ".pytest_cache", ".vscode", "*.pyc", "test/*"] 14 | } 15 | } --------------------------------------------------------------------------------