├── .gitignore
├── LICENSE
├── README.md
├── base
    ├── about-base.md
    ├── handler.py
    ├── requirements.txt
    └── scrape.py
└── course
    ├── Pipfile
    ├── basic_zip.sh
    ├── cache
        ├── example.txt
        └── hello-world.txt
    ├── deploy.sh
    ├── lambda.py
    ├── prepare.sh
    ├── push_to_s3.sh
    └── serverless-app.code-workspace


/.gitignore:
--------------------------------------------------------------------------------
  1 | .DS_Store
  2 | 
  3 | # Byte-compiled / optimized / DLL files
  4 | __pycache__/
  5 | *.py[cod]
  6 | *$py.class
  7 | 
  8 | # C extensions
  9 | *.so
 10 | 
 11 | # Distribution / packaging
 12 | .Python
 13 | build/
 14 | develop-eggs/
 15 | dist/
 16 | downloads/
 17 | eggs/
 18 | .eggs/
 19 | lib/
 20 | lib64/
 21 | parts/
 22 | sdist/
 23 | var/
 24 | wheels/
 25 | pip-wheel-metadata/
 26 | share/python-wheels/
 27 | *.egg-info/
 28 | .installed.cfg
 29 | *.egg
 30 | MANIFEST
 31 | 
 32 | # PyInstaller
 33 | #  Usually these files are written by a python script from a template
 34 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 35 | *.manifest
 36 | *.spec
 37 | 
 38 | # Installer logs
 39 | pip-log.txt
 40 | pip-delete-this-directory.txt
 41 | 
 42 | # Unit test / coverage reports
 43 | htmlcov/
 44 | .tox/
 45 | .nox/
 46 | .coverage
 47 | .coverage.*
 48 | .cache
 49 | nosetests.xml
 50 | coverage.xml
 51 | *.cover
 52 | *.py,cover
 53 | .hypothesis/
 54 | .pytest_cache/
 55 | 
 56 | # Translations
 57 | *.mo
 58 | *.pot
 59 | 
 60 | # Django stuff:
 61 | *.log
 62 | local_settings.py
 63 | db.sqlite3
 64 | db.sqlite3-journal
 65 | 
 66 | # Flask stuff:
 67 | instance/
 68 | .webassets-cache
 69 | 
 70 | # Scrapy stuff:
 71 | .scrapy
 72 | 
 73 | # Sphinx documentation
 74 | docs/_build/
 75 | 
 76 | # PyBuilder
 77 | target/
 78 | 
 79 | # Jupyter Notebook
 80 | .ipynb_checkpoints
 81 | 
 82 | # IPython
 83 | profile_default/
 84 | ipython_config.py
 85 | 
 86 | # pyenv
 87 | .python-version
 88 | 
 89 | # pipenv
 90 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 91 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 92 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 93 | #   install all needed dependencies.
 94 | #Pipfile.lock
 95 | 
 96 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 97 | __pypackages__/
 98 | 
 99 | # Celery stuff
100 | celerybeat-schedule
101 | celerybeat.pid
102 | 
103 | # SageMath parsed files
104 | *.sage.py
105 | 
106 | # Environments
107 | .env
108 | .venv
109 | env/
110 | venv/
111 | ENV/
112 | env.bak/
113 | venv.bak/
114 | 
115 | # Spyder project settings
116 | .spyderproject
117 | .spyproject
118 | 
119 | # Rope project settings
120 | .ropeproject
121 | 
122 | # mkdocs documentation
123 | /site
124 | 
125 | # mypy
126 | .mypy_cache/
127 | .dmypy.json
128 | dmypy.json
129 | 
130 | # Pyre type checker
131 | .pyre/
132 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 Coding For Entrepreneurs
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Serverless Python Workflow with AWS Lambda
 2 | A tutorial to setup and deploy a simple Serverless Python workflow with REST API endpoints in AWS Lambda.
 3 | 
 4 | #### Requirements
 5 | - Working Knowledge of Python or [Learn Here](https://cfe.sh/projects/30-days-python-38)
 6 | - [AWS Account](https://aws.amazon.com) with the appropriate permissions for AWS Lambda, API Gateway, AWS S3, and IAM. If it's your account, you already have this.
 7 | 
 8 | 
 9 | ### Getting started
10 | 
11 | #### 1. Download Project
12 | ```
13 | mkdir serverless-workflow
14 | cd serverless-worlfow 
15 | git clone https://github.com/codingforentrepreneurs/Serverless-Python-Workflow-with-AWS-Lambda .
16 | ```
17 | 
18 | #### 2. Copy `base` files to project root
19 | ```
20 | cp base/* .
21 | ```
22 | 
23 | 
24 | #### 3. Remove Unnecessary files
25 | ```
26 | rm -rf .git
27 | rm -rf base
28 | ```
29 | 
30 | 
31 | #### 4. Virtual Environment Setup
32 | 
33 | ##### Pipenv
34 | ```
35 | pipenv install -r requirements.txt --python 3.8
36 | ```
37 | 
38 | ##### Virtualenv
39 | ```
40 | virtualenv -p python3.8 .
41 | source bin/activate
42 | pip install -r requirements.txt
43 | ```
44 | > If using `windows` with `virtualenv` just run `.\Scripts\activate`
45 | 
46 | 
47 | #### 5. Ready
48 | You now have a baseline Lambda function working locally. Check out the code in the [course directory](./course) or learn how to implement this code on [cfe](https://www.codingforentrepreneurs.com/projects/serverless-python-aws-lambda-api-gateway).
49 | 


--------------------------------------------------------------------------------
/base/about-base.md:
--------------------------------------------------------------------------------
1 | ### Serverless Python Workflow with AWS Lambda Base Project
2 | 
3 | This code is meant to be a starting point for the tutorial series. The scraping portion can be replaced with any kind of workflow you need to run.


--------------------------------------------------------------------------------
/base/handler.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import json
 3 | 
 4 | # from scrape import run
 5 | 
 6 | def lambda_handler(event, context):
 7 |     """
 8 |     This is roughly the exact same handler function that AWS provides.
 9 |     """
10 |     response = {
11 |         'statusCode': 200,
12 |         'body': json.dumps("Hello world")
13 |     }
14 |     return response


--------------------------------------------------------------------------------
/base/requirements.txt:
--------------------------------------------------------------------------------
1 | requests
2 | pandas
3 | numpy
4 | requests-html


--------------------------------------------------------------------------------
/base/scrape.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This is a simple web scraper that was created in
 3 | Day 12 of 30 Days of Python on:
 4 | https://cfe.sh/projects/30-days-python-38
 5 | """
 6 | import os
 7 | import sys
 8 | import datetime
 9 | import requests
10 | import pandas as pd
11 | from requests_html import HTML
12 | 
13 | BASE_DIR = os.path.dirname(__file__)
14 | 
15 | 
16 | 
17 | def url_to_txt(url, filename="world.html", save=False):
18 |     r = requests.get(url)
19 |     if r.status_code == 200:
20 |         html_text = r.text
21 |         if save:
22 |             with open(f"world-{year}.html", 'w') as f:
23 |                 f.write(html_text)
24 |         return html_text
25 |     return None
26 | 
27 | 
28 | def parse_and_extract(url, name='2020'):
29 |     html_text = url_to_txt(url)
30 |     if html_text == None:
31 |         return False
32 |     r_html = HTML(html=html_text)
33 |     table_class = ".imdb-scroll-table"
34 |     # table_class = "#table"
35 |     r_table = r_html.find(table_class)
36 | 
37 |     # print(r_table)
38 |     table_data = []
39 |     # table_data_dicts = []
40 |     header_names = []
41 |     if len(r_table) == 0:
42 |         return False
43 |     parsed_table = r_table[0]
44 |     rows = parsed_table.find("tr")
45 |     header_row = rows[0]
46 |     header_cols = header_row.find('th')
47 |     header_names = [x.text for x in header_cols]
48 |     for row in rows[1:]:
49 |         # print(row.text)
50 |         cols = row.find("td")
51 |         row_data = []
52 |         row_dict_data = {}
53 |         for i, col in enumerate(cols):
54 |             # print(i, col.text, '\n\n')
55 |             header_name = header_names[i]
56 |             # row_dict_data[header_name] = col.text
57 |             row_data.append(col.text)
58 |         table_data_dicts.append(row_dict_data)
59 |         table_data.append(row_data)
60 |     df = pd.DataFrame(table_data, columns=header_names)
61 |     # df = pd.DataFrame(table_data_dicts)
62 |     path = os.path.join(BASE_DIR, 'data')
63 |     os.makedirs(path, exist_ok=True)
64 |     filepath = os.path.join('data', f'{name}.csv')
65 |     df.to_csv(filepath, index=False)
66 |     return True
67 | 
68 | def run(start_year=None, years_ago=0):
69 |     if start_year == None:
70 |         now = datetime.datetime.now()
71 |         start_year = now.year
72 |     assert isinstance(start_year, int)
73 |     assert isinstance(years_ago, int)
74 |     assert len(f"{start_year}") == 4
75 |     for i in range(0, years_ago+1):
76 |         url = f"https://www.boxofficemojo.com/year/world/{start_year}/"
77 |         finished = parse_and_extract(url, name=start_year)
78 |         if finished:
79 |             print(f"Finished {start_year}")
80 |         else:
81 |             print(f"{start_year} not finished")
82 |         start_year -= 1


--------------------------------------------------------------------------------
/course/Pipfile:
--------------------------------------------------------------------------------
 1 | [[source]]
 2 | name = "pypi"
 3 | url = "https://pypi.org/simple"
 4 | verify_ssl = true
 5 | 
 6 | [dev-packages]
 7 | 
 8 | [packages]
 9 | pandas = "*"
10 | numpy = "*"
11 | requests = "*"
12 | boto3 = "*"
13 | beautifulsoup4 = "*"
14 | 
15 | [requires]
16 | python_version = "3.8"
17 | 


--------------------------------------------------------------------------------
/course/basic_zip.sh:
--------------------------------------------------------------------------------
1 | zip -vr helloWorldLambda.zip cache/ -x "*.DS_Store"
2 | 
3 | zip -g helloWorldLambda.zip lambda.py
4 | 


--------------------------------------------------------------------------------
/course/cache/example.txt:
--------------------------------------------------------------------------------
1 | {"hello": "world"}


--------------------------------------------------------------------------------
/course/cache/hello-world.txt:
--------------------------------------------------------------------------------
1 | Hello there!


--------------------------------------------------------------------------------
/course/deploy.sh:
--------------------------------------------------------------------------------
 1 | /Users/cfe/Dev/serverless-app/prepare.sh
 2 | 
 3 | zip -vr helloWorldLambda.zip cache/ -x "*.DS_Store"
 4 | 
 5 | zip -g helloWorldLambda.zip scraper.py
 6 | zip -g helloWorldLambda.zip lambda.py
 7 | 
 8 | aws s3api put-object \
 9 |    --bucket cfe-lambda \
10 |    --key helloWorld/helloWorldLambda.zip \
11 |    --body helloWorldLambda.zip
12 | 
13 | aws lambda update-function-code \
14 |     --function-name helloWorldLambda \
15 |     --s3-bucket cfe-lambda \
16 |     --s3-key helloWorld/helloWorldLambda.zip \
17 |     --publish \
18 |     --region us-west-1
19 | 
20 | rm helloWorldLambda.zip


--------------------------------------------------------------------------------
/course/lambda.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import pandas as pd
 3 | 
 4 | from scraper import scrape_event
 5 | 
 6 | def scrape_handler():
 7 |     scrape_event(year=2020)
 8 |     data = {
 9 |         "scaping": True
10 |     }
11 |     return {
12 |         "statusCode": 200,
13 |         "body": json.dumps(data)
14 |     }
15 | 
16 | 
17 | def handler(event, context):
18 |     data = {
19 |         "message": "Local deployment works!"
20 |     }
21 |     random_data = [{"hello": 123, "world": "this is cool"}]
22 |     df = pd.DataFrame(random_data)
23 |     columns = list(df.columns)
24 |     data['columns'] = columns
25 |     http_data = {}
26 |     try:
27 |         http_data = event['requestContext']['http']
28 |     except:
29 |         pass
30 |     path = http_data.get("path")
31 |     method = http_data.get("method")
32 |     source_id = http_data.get("sourceIp")
33 |     user_agent = http_data.get('userAgent')
34 |     data['path'] = path
35 |     if path != None:
36 |         if "/scrape" in path:
37 |             return scrape_handler()
38 |     return {
39 |         "statusCode": 200,
40 |         "body": json.dumps(data)
41 |     }


--------------------------------------------------------------------------------
/course/prepare.sh:
--------------------------------------------------------------------------------
 1 | pip install --target ./package pytz requests beautifulsoup4
 2 | cd package
 3 | 
 4 | # download pandas wheel
 5 | curl -O https://files.pythonhosted.org/packages/f5/10/40688389f5e234bde06aa84e6f3ccf5beea6269f57e2bef67866d3b43268/pandas-1.0.3-cp38-cp38-manylinux1_x86_64.whl
 6 | 
 7 | # unzip pands
 8 | unzip pandas-1.0.3-cp38-cp38-manylinux1_x86_64.whl
 9 | 
10 | curl -O https://files.pythonhosted.org/packages/ca/c6/cca531518aab1c161233c61e090728024aa647f2ff9c3b91d3f4e68e7e0e/numpy-1.18.3-cp38-cp38-manylinux1_x86_64.whl
11 | 
12 | unzip numpy-1.18.3-cp38-cp38-manylinux1_x86_64.whl
13 | 
14 | rm -r *.whl *.dist-info __pycache__
15 | 
16 | 
17 | zip -r9 ${OLDPWD}/helloWorldLambda.zip .
18 | cd ${OLDPWD}
19 | rm -rf package


--------------------------------------------------------------------------------
/course/push_to_s3.sh:
--------------------------------------------------------------------------------
1 | zip -vr helloWorldLambda.zip cache/ -x "*.DS_Store"
2 | 
3 | zip -g helloWorldLambda.zip lambda.py
4 | 
5 | 
6 | aws s3api put-object \
7 |    --bucket cfe-lambda \
8 |    --key helloWorld/helloWorldLambda.zip \
9 |    --body helloWorldLambda.zip


--------------------------------------------------------------------------------
/course/serverless-app.code-workspace:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"folders": [
 3 | 		{
 4 | 			"path": "."
 5 | 		}
 6 | 	],
 7 | 	"settings": {
 8 | 		"python.pythonPath": "/Users/cfe/.local/share/virtualenvs/serverless-app-0vZXD_LV/bin/python"
 9 | 	}
10 | }


--------------------------------------------------------------------------------