├── .gitignore ├── LICENSE ├── README.md ├── base ├── about-base.md ├── handler.py ├── requirements.txt └── scrape.py └── course ├── Pipfile ├── basic_zip.sh ├── cache ├── example.txt └── hello-world.txt ├── deploy.sh ├── lambda.py ├── prepare.sh ├── push_to_s3.sh └── serverless-app.code-workspace /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | 3 | # Byte-compiled / optimized / DLL files 4 | __pycache__/ 5 | *.py[cod] 6 | *$py.class 7 | 8 | # C extensions 9 | *.so 10 | 11 | # Distribution / packaging 12 | .Python 13 | build/ 14 | develop-eggs/ 15 | dist/ 16 | downloads/ 17 | eggs/ 18 | .eggs/ 19 | lib/ 20 | lib64/ 21 | parts/ 22 | sdist/ 23 | var/ 24 | wheels/ 25 | pip-wheel-metadata/ 26 | share/python-wheels/ 27 | *.egg-info/ 28 | .installed.cfg 29 | *.egg 30 | MANIFEST 31 | 32 | # PyInstaller 33 | # Usually these files are written by a python script from a template 34 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 35 | *.manifest 36 | *.spec 37 | 38 | # Installer logs 39 | pip-log.txt 40 | pip-delete-this-directory.txt 41 | 42 | # Unit test / coverage reports 43 | htmlcov/ 44 | .tox/ 45 | .nox/ 46 | .coverage 47 | .coverage.* 48 | .cache 49 | nosetests.xml 50 | coverage.xml 51 | *.cover 52 | *.py,cover 53 | .hypothesis/ 54 | .pytest_cache/ 55 | 56 | # Translations 57 | *.mo 58 | *.pot 59 | 60 | # Django stuff: 61 | *.log 62 | local_settings.py 63 | db.sqlite3 64 | db.sqlite3-journal 65 | 66 | # Flask stuff: 67 | instance/ 68 | .webassets-cache 69 | 70 | # Scrapy stuff: 71 | .scrapy 72 | 73 | # Sphinx documentation 74 | docs/_build/ 75 | 76 | # PyBuilder 77 | target/ 78 | 79 | # Jupyter Notebook 80 | .ipynb_checkpoints 81 | 82 | # IPython 83 | profile_default/ 84 | ipython_config.py 85 | 86 | # pyenv 87 | .python-version 88 | 89 | # pipenv 90 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 91 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 92 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 93 | # install all needed dependencies. 94 | #Pipfile.lock 95 | 96 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 97 | __pypackages__/ 98 | 99 | # Celery stuff 100 | celerybeat-schedule 101 | celerybeat.pid 102 | 103 | # SageMath parsed files 104 | *.sage.py 105 | 106 | # Environments 107 | .env 108 | .venv 109 | env/ 110 | venv/ 111 | ENV/ 112 | env.bak/ 113 | venv.bak/ 114 | 115 | # Spyder project settings 116 | .spyderproject 117 | .spyproject 118 | 119 | # Rope project settings 120 | .ropeproject 121 | 122 | # mkdocs documentation 123 | /site 124 | 125 | # mypy 126 | .mypy_cache/ 127 | .dmypy.json 128 | dmypy.json 129 | 130 | # Pyre type checker 131 | .pyre/ 132 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Coding For Entrepreneurs 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Serverless Python Workflow with AWS Lambda 2 | A tutorial to setup and deploy a simple Serverless Python workflow with REST API endpoints in AWS Lambda. 3 | 4 | #### Requirements 5 | - Working Knowledge of Python or [Learn Here](https://cfe.sh/projects/30-days-python-38) 6 | - [AWS Account](https://aws.amazon.com) with the appropriate permissions for AWS Lambda, API Gateway, AWS S3, and IAM. If it's your account, you already have this. 7 | 8 | 9 | ### Getting started 10 | 11 | #### 1. Download Project 12 | ``` 13 | mkdir serverless-workflow 14 | cd serverless-worlfow 15 | git clone https://github.com/codingforentrepreneurs/Serverless-Python-Workflow-with-AWS-Lambda . 16 | ``` 17 | 18 | #### 2. Copy `base` files to project root 19 | ``` 20 | cp base/* . 21 | ``` 22 | 23 | 24 | #### 3. Remove Unnecessary files 25 | ``` 26 | rm -rf .git 27 | rm -rf base 28 | ``` 29 | 30 | 31 | #### 4. Virtual Environment Setup 32 | 33 | ##### Pipenv 34 | ``` 35 | pipenv install -r requirements.txt --python 3.8 36 | ``` 37 | 38 | ##### Virtualenv 39 | ``` 40 | virtualenv -p python3.8 . 41 | source bin/activate 42 | pip install -r requirements.txt 43 | ``` 44 | > If using `windows` with `virtualenv` just run `.\Scripts\activate` 45 | 46 | 47 | #### 5. Ready 48 | You now have a baseline Lambda function working locally. Check out the code in the [course directory](./course) or learn how to implement this code on [cfe](https://www.codingforentrepreneurs.com/projects/serverless-python-aws-lambda-api-gateway). 49 | -------------------------------------------------------------------------------- /base/about-base.md: -------------------------------------------------------------------------------- 1 | ### Serverless Python Workflow with AWS Lambda Base Project 2 | 3 | This code is meant to be a starting point for the tutorial series. The scraping portion can be replaced with any kind of workflow you need to run. -------------------------------------------------------------------------------- /base/handler.py: -------------------------------------------------------------------------------- 1 | 2 | import json 3 | 4 | # from scrape import run 5 | 6 | def lambda_handler(event, context): 7 | """ 8 | This is roughly the exact same handler function that AWS provides. 9 | """ 10 | response = { 11 | 'statusCode': 200, 12 | 'body': json.dumps("Hello world") 13 | } 14 | return response -------------------------------------------------------------------------------- /base/requirements.txt: -------------------------------------------------------------------------------- 1 | requests 2 | pandas 3 | numpy 4 | requests-html -------------------------------------------------------------------------------- /base/scrape.py: -------------------------------------------------------------------------------- 1 | """ 2 | This is a simple web scraper that was created in 3 | Day 12 of 30 Days of Python on: 4 | https://cfe.sh/projects/30-days-python-38 5 | """ 6 | import os 7 | import sys 8 | import datetime 9 | import requests 10 | import pandas as pd 11 | from requests_html import HTML 12 | 13 | BASE_DIR = os.path.dirname(__file__) 14 | 15 | 16 | 17 | def url_to_txt(url, filename="world.html", save=False): 18 | r = requests.get(url) 19 | if r.status_code == 200: 20 | html_text = r.text 21 | if save: 22 | with open(f"world-{year}.html", 'w') as f: 23 | f.write(html_text) 24 | return html_text 25 | return None 26 | 27 | 28 | def parse_and_extract(url, name='2020'): 29 | html_text = url_to_txt(url) 30 | if html_text == None: 31 | return False 32 | r_html = HTML(html=html_text) 33 | table_class = ".imdb-scroll-table" 34 | # table_class = "#table" 35 | r_table = r_html.find(table_class) 36 | 37 | # print(r_table) 38 | table_data = [] 39 | # table_data_dicts = [] 40 | header_names = [] 41 | if len(r_table) == 0: 42 | return False 43 | parsed_table = r_table[0] 44 | rows = parsed_table.find("tr") 45 | header_row = rows[0] 46 | header_cols = header_row.find('th') 47 | header_names = [x.text for x in header_cols] 48 | for row in rows[1:]: 49 | # print(row.text) 50 | cols = row.find("td") 51 | row_data = [] 52 | row_dict_data = {} 53 | for i, col in enumerate(cols): 54 | # print(i, col.text, '\n\n') 55 | header_name = header_names[i] 56 | # row_dict_data[header_name] = col.text 57 | row_data.append(col.text) 58 | table_data_dicts.append(row_dict_data) 59 | table_data.append(row_data) 60 | df = pd.DataFrame(table_data, columns=header_names) 61 | # df = pd.DataFrame(table_data_dicts) 62 | path = os.path.join(BASE_DIR, 'data') 63 | os.makedirs(path, exist_ok=True) 64 | filepath = os.path.join('data', f'{name}.csv') 65 | df.to_csv(filepath, index=False) 66 | return True 67 | 68 | def run(start_year=None, years_ago=0): 69 | if start_year == None: 70 | now = datetime.datetime.now() 71 | start_year = now.year 72 | assert isinstance(start_year, int) 73 | assert isinstance(years_ago, int) 74 | assert len(f"{start_year}") == 4 75 | for i in range(0, years_ago+1): 76 | url = f"https://www.boxofficemojo.com/year/world/{start_year}/" 77 | finished = parse_and_extract(url, name=start_year) 78 | if finished: 79 | print(f"Finished {start_year}") 80 | else: 81 | print(f"{start_year} not finished") 82 | start_year -= 1 -------------------------------------------------------------------------------- /course/Pipfile: -------------------------------------------------------------------------------- 1 | [[source]] 2 | name = "pypi" 3 | url = "https://pypi.org/simple" 4 | verify_ssl = true 5 | 6 | [dev-packages] 7 | 8 | [packages] 9 | pandas = "*" 10 | numpy = "*" 11 | requests = "*" 12 | boto3 = "*" 13 | beautifulsoup4 = "*" 14 | 15 | [requires] 16 | python_version = "3.8" 17 | -------------------------------------------------------------------------------- /course/basic_zip.sh: -------------------------------------------------------------------------------- 1 | zip -vr helloWorldLambda.zip cache/ -x "*.DS_Store" 2 | 3 | zip -g helloWorldLambda.zip lambda.py 4 | -------------------------------------------------------------------------------- /course/cache/example.txt: -------------------------------------------------------------------------------- 1 | {"hello": "world"} -------------------------------------------------------------------------------- /course/cache/hello-world.txt: -------------------------------------------------------------------------------- 1 | Hello there! -------------------------------------------------------------------------------- /course/deploy.sh: -------------------------------------------------------------------------------- 1 | /Users/cfe/Dev/serverless-app/prepare.sh 2 | 3 | zip -vr helloWorldLambda.zip cache/ -x "*.DS_Store" 4 | 5 | zip -g helloWorldLambda.zip scraper.py 6 | zip -g helloWorldLambda.zip lambda.py 7 | 8 | aws s3api put-object \ 9 | --bucket cfe-lambda \ 10 | --key helloWorld/helloWorldLambda.zip \ 11 | --body helloWorldLambda.zip 12 | 13 | aws lambda update-function-code \ 14 | --function-name helloWorldLambda \ 15 | --s3-bucket cfe-lambda \ 16 | --s3-key helloWorld/helloWorldLambda.zip \ 17 | --publish \ 18 | --region us-west-1 19 | 20 | rm helloWorldLambda.zip -------------------------------------------------------------------------------- /course/lambda.py: -------------------------------------------------------------------------------- 1 | import json 2 | import pandas as pd 3 | 4 | from scraper import scrape_event 5 | 6 | def scrape_handler(): 7 | scrape_event(year=2020) 8 | data = { 9 | "scaping": True 10 | } 11 | return { 12 | "statusCode": 200, 13 | "body": json.dumps(data) 14 | } 15 | 16 | 17 | def handler(event, context): 18 | data = { 19 | "message": "Local deployment works!" 20 | } 21 | random_data = [{"hello": 123, "world": "this is cool"}] 22 | df = pd.DataFrame(random_data) 23 | columns = list(df.columns) 24 | data['columns'] = columns 25 | http_data = {} 26 | try: 27 | http_data = event['requestContext']['http'] 28 | except: 29 | pass 30 | path = http_data.get("path") 31 | method = http_data.get("method") 32 | source_id = http_data.get("sourceIp") 33 | user_agent = http_data.get('userAgent') 34 | data['path'] = path 35 | if path != None: 36 | if "/scrape" in path: 37 | return scrape_handler() 38 | return { 39 | "statusCode": 200, 40 | "body": json.dumps(data) 41 | } -------------------------------------------------------------------------------- /course/prepare.sh: -------------------------------------------------------------------------------- 1 | pip install --target ./package pytz requests beautifulsoup4 2 | cd package 3 | 4 | # download pandas wheel 5 | curl -O https://files.pythonhosted.org/packages/f5/10/40688389f5e234bde06aa84e6f3ccf5beea6269f57e2bef67866d3b43268/pandas-1.0.3-cp38-cp38-manylinux1_x86_64.whl 6 | 7 | # unzip pands 8 | unzip pandas-1.0.3-cp38-cp38-manylinux1_x86_64.whl 9 | 10 | curl -O https://files.pythonhosted.org/packages/ca/c6/cca531518aab1c161233c61e090728024aa647f2ff9c3b91d3f4e68e7e0e/numpy-1.18.3-cp38-cp38-manylinux1_x86_64.whl 11 | 12 | unzip numpy-1.18.3-cp38-cp38-manylinux1_x86_64.whl 13 | 14 | rm -r *.whl *.dist-info __pycache__ 15 | 16 | 17 | zip -r9 ${OLDPWD}/helloWorldLambda.zip . 18 | cd ${OLDPWD} 19 | rm -rf package -------------------------------------------------------------------------------- /course/push_to_s3.sh: -------------------------------------------------------------------------------- 1 | zip -vr helloWorldLambda.zip cache/ -x "*.DS_Store" 2 | 3 | zip -g helloWorldLambda.zip lambda.py 4 | 5 | 6 | aws s3api put-object \ 7 | --bucket cfe-lambda \ 8 | --key helloWorld/helloWorldLambda.zip \ 9 | --body helloWorldLambda.zip -------------------------------------------------------------------------------- /course/serverless-app.code-workspace: -------------------------------------------------------------------------------- 1 | { 2 | "folders": [ 3 | { 4 | "path": "." 5 | } 6 | ], 7 | "settings": { 8 | "python.pythonPath": "/Users/cfe/.local/share/virtualenvs/serverless-app-0vZXD_LV/bin/python" 9 | } 10 | } --------------------------------------------------------------------------------