├── .devcontainer
├── 000-introduction
│ └── devcontainer.json
├── 001-introduction-to-forging-api-requests
│ └── devcontainer.json
├── 002-proxies
│ └── devcontainer.json
└── 003-beautiful-soup-static
│ └── devcontainer.json
├── .gitattributes
├── .github
├── FUNDING.yml
└── ISSUE_TEMPLATE
│ ├── bug_report.md
│ ├── information_correction.md
│ └── topic_request.md
├── .gitignore
├── 000-introduction
├── README.md
└── slides.pdf
├── 001-introduction-to-forging-api-requests
├── README.md
├── activity.py
├── docker-compose.yml
├── slides.pdf
├── test.py
├── thumbnail.png
└── website
│ ├── client
│ ├── .gitignore
│ ├── Dockerfile
│ ├── README.md
│ ├── package-lock.json
│ ├── package.json
│ ├── public
│ │ ├── favicon.ico
│ │ ├── index.html
│ │ ├── manifest.json
│ │ └── robots.txt
│ └── src
│ │ ├── components
│ │ ├── Footer.js
│ │ ├── Header.js
│ │ └── Post.js
│ │ ├── images
│ │ └── heart.png
│ │ ├── index.css
│ │ ├── index.js
│ │ ├── pages
│ │ ├── Discover.js
│ │ ├── Feed.js
│ │ └── ProfileFeed.js
│ │ ├── reportWebVitals.js
│ │ ├── services
│ │ ├── DiscoveryService.js
│ │ └── FeedService.js
│ │ └── setupTests.js
│ └── server
│ ├── .gitignore
│ ├── Dockerfile
│ ├── db_seeding
│ ├── create_data.py
│ ├── initial_data.json
│ └── seedDatabase.js
│ ├── get-client.js
│ ├── package-lock.json
│ ├── package.json
│ └── server.js
├── 002-proxies
├── README.md
└── assets
│ ├── 5-req-2.png
│ ├── 5-req.png
│ ├── pool-comp.png
│ ├── proxy-list.png
│ ├── rotating.png
│ ├── thumbnail.png
│ ├── type-comp.png
│ ├── webshare-rotating.png
│ ├── webshare-tab.png
│ ├── with-proxy.png
│ └── without-proxy.png
├── 003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping
├── README.md
├── activities.py
├── assets
│ ├── ssr-sites.png
│ ├── static-sites.png
│ └── thumbnail.png
├── docker-compose.yml
├── requirements.txt
├── test.py
└── website
│ ├── 301.txt
│ ├── 404.html
│ ├── Dockerfile
│ ├── Gemfile
│ ├── Gemfile.lock
│ ├── LICENSE
│ ├── Makefile
│ ├── README.md
│ ├── TODO
│ ├── _config.yml
│ ├── _data
│ └── navigation.yml
│ ├── _includes
│ ├── logo.html
│ ├── navigation.html
│ ├── product-styles.html
│ ├── relative-src.html
│ ├── snipcart-button.html
│ └── social-icon.html
│ ├── _layouts
│ ├── default.html
│ ├── page.html
│ └── product.html
│ ├── _products
│ ├── deer.md
│ ├── elephant.md
│ ├── giraffe.md
│ ├── hog.md
│ ├── lion.md
│ └── tiger.md
│ ├── _sass
│ ├── cloudcannon.scss
│ ├── contact.scss
│ ├── elements.scss
│ ├── footer.scss
│ ├── forms.scss
│ ├── layout.scss
│ ├── mixins
│ │ ├── columns.scss
│ │ └── flexbox.scss
│ ├── navigation.scss
│ ├── products.scss
│ ├── staff.scss
│ └── variables.scss
│ ├── _staff_members
│ ├── ava.md
│ └── steph.md
│ ├── about.html
│ ├── cloudcannon.config.yml
│ ├── contact-success.html
│ ├── contact.html
│ ├── css
│ └── screen.scss
│ ├── favicon.png
│ ├── images
│ ├── _screenshot.png
│ └── products
│ │ ├── deer
│ │ ├── black.jpg
│ │ ├── blue.jpg
│ │ ├── clay.jpg
│ │ └── cream.jpg
│ │ ├── elephant
│ │ ├── cream.jpg
│ │ └── green.jpg
│ │ ├── giraffe
│ │ └── green.jpg
│ │ ├── hog
│ │ ├── blue.jpg
│ │ ├── clay.jpg
│ │ └── cream.jpg
│ │ ├── lion
│ │ └── blue.jpg
│ │ └── tiger
│ │ ├── black.jpg
│ │ ├── blue.jpg
│ │ ├── clay.jpg
│ │ ├── cream.jpg
│ │ └── green.jpg
│ ├── index.html
│ ├── js
│ ├── _style-picker.js
│ └── application.js
│ ├── returns.html
│ ├── robots.txt
│ ├── schemas
│ ├── products.md
│ └── staff_members.md
│ ├── shipping.html
│ ├── siteicon.png
│ └── touch-icon.png
├── LICENSE
├── README.md
└── assets
├── codespaces-new-with-options.png
├── codespaces-select-configuration.png
└── delete-codespace.png
/.devcontainer/000-introduction/devcontainer.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "000 Introduction",
3 | "workspaceMount": "source=${localWorkspaceFolder},target=/workspace,type=bind,consistency=cached",
4 | "workspaceFolder": "/workspace/000-introduction",
5 | "image": "mcr.microsoft.com/devcontainers/universal"
6 | }
--------------------------------------------------------------------------------
/.devcontainer/001-introduction-to-forging-api-requests/devcontainer.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "001 Introduction to Forging API Requests",
3 | "workspaceMount": "source=${localWorkspaceFolder},target=/workspace,type=bind,consistency=cached",
4 | "workspaceFolder": "/workspace/001-introduction-to-forging-api-requests",
5 | "image": "mcr.microsoft.com/devcontainers/universal",
6 | "postStartCommand": "docker compose up -d && gh codespace ports visibility 8080:public -c $CODESPACE_NAME",
7 | "forwardPorts": [5434, 3000, 8080],
8 | "portsAttributes": {
9 | "5434": {
10 | "label": "postgres",
11 | "onAutoForward": "silent"
12 | },
13 | "3000": {
14 | "label": "Website",
15 | "onAutoForward": "openBrowser"
16 | },
17 | "8080": {
18 | "label": "Backend",
19 | "onAutoForward": "silent"
20 | }
21 | }
22 | }
--------------------------------------------------------------------------------
/.devcontainer/002-proxies/devcontainer.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "002 Proxies",
3 | "workspaceMount": "source=${localWorkspaceFolder},target=/workspace,type=bind,consistency=cached",
4 | "workspaceFolder": "/workspace/002-proxies",
5 | "image": "mcr.microsoft.com/devcontainers/universal"
6 | }
--------------------------------------------------------------------------------
/.devcontainer/003-beautiful-soup-static/devcontainer.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "003 Beautiful Soup with Static & SSR Web Scraping",
3 | "workspaceMount": "source=${localWorkspaceFolder},target=/workspace,type=bind,consistency=cached",
4 | "workspaceFolder": "/workspace/003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping",
5 | "image": "mcr.microsoft.com/devcontainers/universal",
6 | "postStartCommand": "docker compose up -d && python3 -m pip install -r requirements.txt",
7 | "forwardPorts": [3000],
8 | "portsAttributes": {
9 | "3000": {
10 | "label": "Website",
11 | "onAutoForward": "openBrowser"
12 | }
13 | }
14 | }
--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text=auto
3 |
--------------------------------------------------------------------------------
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | custom: ['https://www.paypal.me/dteather']
2 | github: davidteather
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Bug report
3 | about: Create a report to help us improve
4 | title: "[BUG] - Your Error Here"
5 | labels: bug
6 | assignees: ''
7 |
8 | ---
9 | Fill Out the template :)
10 |
11 | **Describe the bug**
12 |
13 | A clear and concise description of what the bug is.
14 |
15 | **The buggy code**
16 |
17 | Please add any relevant code that is giving you unexpected results.
18 |
19 | Preferably the smallest amount of code to reproduce the issue.
20 |
21 | **Expected behavior**
22 |
23 | A clear and concise description of what you expected to happen.
24 |
25 | **Error Trace (if any)**
26 |
27 | Put the error trace below if there's any error thrown.
28 | ```
29 | # Error Trace Here
30 | ```
31 |
32 | **Desktop (please complete the following information):**
33 | - OS: [e.g. Windows 10]
34 | - Lesson #: [e.g. Lesson 1]
35 |
36 | **Additional context**
37 |
38 | Add any other context about the problem here.
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/information_correction.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Incorrect Information
3 | about:
4 | title: "[INCORRECT] - Lesson #"
5 | labels: incorrect
6 | assignees: ''
7 |
8 | ---
9 |
10 | **Note:** Videos themselves will **not** be updated, if there are any corrections needed they will be posted under the videos link in the lesson's readme.
11 |
12 | **Where is there incorrect information**
13 | Please give a lesson number and an exact quote
14 |
15 | > your quote here
16 |
17 | **Explain how this is incorrect** (provide sources)
18 |
19 | Please explain how the section you quoted above is incorrect, and please provide any sources (articles, interviews of authorities on the subject, etc)
20 |
21 |
22 | [ ] I have checked that this is not already in the corrections section.
23 |
24 | Consider creating a PR for your change and referencing this issue number in it.
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/topic_request.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Request A New Topic/Lesson
3 | about: What other topics do you want in this course?
4 | title: "[TOPIC REQUEST] - YOUR TOPIC HERE"
5 | labels: topic-request
6 | assignees: ''
7 |
8 | ---
9 |
10 | **Describe the topic you'd like covered**
11 |
12 | Write what you'd like covered here
13 |
14 | **Links to any resources that might be helpful with this topic**
15 | * Ex: https://twitter.com/david_teather (could include any articles about it or whatever)
16 |
17 | **Are you an authority on this topic?**: (yes/no)
18 | * **Note:** If you are an authority I'd love to collaborate on this lesson and at least run a draft or two by you before publishing it.
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__
2 | .vscode
3 | .pytest_cache
4 | .DS_Store
--------------------------------------------------------------------------------
/000-introduction/README.md:
--------------------------------------------------------------------------------
1 | # Introduction
2 |
3 | Welcome to the course! Glad you're here :)
4 |
5 | ### Supporting The Project
6 | * Star the repo 😎
7 | * Maybe share it with some people new to web-scraping?
8 | * Consider [sponsoring](https://github.com/sponsors/davidteather) me on GitHub
9 | * Send me an email or a [LinkedIn](https://www.linkedin.com/in/davidteather/) message telling me what you enjoy in the course (and maybe what else you want to see in the future)
10 | * Submit PRs for suggestions/issues :)
11 |
12 | ## Table Of Contents
13 | 1. [Welcome!](#welcome)
14 | 1. [What I'm Known For](#what-im-known-for)
15 | 2. [Learning Objectives](#learning-objectives)
16 | 3. [How You Will Learn](#how-you-will-learn)
17 | 4. [How To Learn Effectively](#how-to-learn-effectively)
18 | 5. [Course Topics](#course-topics)
19 | 3. [Getting Started](#getting-started)
20 | 1. [Prerequisites](#prerequisites)
21 | 2. [Tools Required](#tools-required)
22 |
23 | ## Video For The Lesson
24 | Consider checking out the video for this introduction [here](https://www.youtube.com/watch?v=KY3E-6wVOqA&list=PLmRtxHvzkEE8Ofiy4hnnXSoxw7gs4HOHt), this video just provides the [slides](./slides.pdf) with commentary, later lessons are more high quality.
25 |
26 | ### Video Corrections
27 | None so far
28 |
29 | ## Welcome
30 |
31 | I'm David Teather and I work as a software engineer and my specialty is data extraction.
32 |
33 | If you'd like a more visual experience check out the introduction video on [YouTube](https://www.youtube.com/watch?v=KY3E-6wVOqA&list=PLmRtxHvzkEE8Ofiy4hnnXSoxw7gs4HOHt), or pull up the introduction [slides](./slides.pdf)
34 | ### What I'm Known For
35 | * [My research](https://theresponsetimes.com/yikyak-is-exposing-user-locations/) on YikYak (a social media app) that was featured in [Vice](https://www.vice.com/en/article/7kbnna/anonymous-social-media-app-yik-yak-exposed-users-precise-locations) and [The Verge](https://www.theverge.com/2022/5/13/23070696/yik-yak-anonymous-app-precise-locations-revealed)
36 | * Creating various data extraction tools
37 | * My most popular is [TikTokApi](https://github.com/davidteather/TikTok-Api)
38 | * 600K+ Downloads
39 | * 2.3K+ Stars
40 |
41 | ## Course Introduction
42 | ### Learning Objectives
43 | * Learners will understand the many different ways websites prevent web scraping
44 | * Learners will be able to reverse engineer a real-world website for data extraction
45 |
46 | ### How You Will Learn
47 | * Real website examples
48 | * Although these websites might change over time and the lesson becomes broken
49 | * Websites I've created for this course
50 | * Will not change to ensure that these lessons don't break
51 | * Each lesson will have a hands on activity
52 | * In addition most modules will have a `submission.py` file that you can create functions related to the lesson concept and run it against a test suite
53 | * These will primarily focused on extracting data from the websites created for this course
54 |
55 | ### How To Learn Effectively
56 | * Everybody learns different so these are guidelines
57 | * Take notes from the slides presented in the [videos](https://youtube.com/playlist?list=PLmRtxHvzkEE8Ofiy4hnnXSoxw7gs4HOHt)
58 | * These will revolve around general concepts
59 | * Will be accompanied by programs to write
60 | * Try the activities before watching the solution in the video
61 | * Treat the website folder as a black box, like you would a real website, you can figure out everything through the website itself
62 |
63 | ### Course Topics
64 | * Forging API requests
65 | * Proxies
66 | * Captchas
67 | * Storing data at scale
68 | * Emulating human behavior
69 | * And more
70 | * Feel free to [tweet at me](https://twitter.com/david_teather) or file an issue with the `lesson-request` label with what you'd like to see
71 |
72 | ## Getting Started
73 |
74 | Learn how to get started learning with this course!
75 | ### Prerequisites
76 | * A basic understanding of programming
77 | * Recommended
78 | * Some python experience
79 | * We probably won't do much complex python
80 |
81 | ### Tools Required
82 | * [Docker](https://www.docker.com/)
83 | * And docker-compose (should be bundled)
84 | * [Python](https://www.python.org/)
85 | * I'll be using 3.10
86 | * A web browser
87 | * I'll be using [Brave](https://brave.com/) (chromium based)
88 | * Doesn't really matter which as long as you can view network traffic
89 | * And the files in this git repo, so be sure to download it! (and maybe give it a star 😉)
90 |
91 |
92 | Hope you'll enjoy the content in this course! You can either get started with [lesson 1](../001-introduction-to-forging-api-requests/), or check out the [course catalogue](../README.md#course-catalogue)
--------------------------------------------------------------------------------
/000-introduction/slides.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/davidteather/everything-web-scraping/6afdda03e1b84d64a8e460f6e7265a60a87bdbdc/000-introduction/slides.pdf
--------------------------------------------------------------------------------
/001-introduction-to-forging-api-requests/README.md:
--------------------------------------------------------------------------------
1 | # Lesson 1 - Introduction To Forging API Requests
2 |
3 | This lesson is designed to teach you how data is sent between websites and servers and how we can exploit this to extract data.
4 |
5 | ### Supporting The Project
6 | * Star the repo 😎
7 | * Maybe share it with some people new to web-scraping?
8 | * Consider [sponsoring](https://github.com/sponsors/davidteather) me on GitHub
9 | * Send me an email or a [LinkedIn](https://www.linkedin.com/in/davidteather/) message telling me what you enjoy in the course (and maybe what else you want to see in the future)
10 | * Submit PRs for suggestions/issues :)
11 |
12 | ## Learning Objectives
13 | * Learners will understand how data is sent between a client and a server.
14 | * Learners will forge API requests to a mock website.
15 |
16 |
17 | ## Table of Contents
18 | * [Lesson Video](#lesson-video)
19 | * [Video Corrections](#video-corrections)
20 | * [How Do Websites Get Data](#how-do-websites-get-data)
21 | * [Popular Ways Websites Get Data](#popular-ways-websites-get-data)
22 | * [How Do We Exploit This?](#how-do-we-exploit-this)
23 | * [Lesson Activity](#activity)
24 | * [Description](#brief-description)
25 | * [Testing](#testing)
26 | * [Solutions](#solutions)
27 |
28 | ## Lesson Video
29 |
30 | [](https://www.youtube.com/watch?v=8GZPQUjd7pk&list=PLmRtxHvzkEE8Ofiy4hnnXSoxw7gs4HOHt)
31 |
32 | [Watch Here](https://www.youtube.com/watch?v=8GZPQUjd7pk&list=PLmRtxHvzkEE8Ofiy4hnnXSoxw7gs4HOHt)
33 |
34 | ### Video Corrections
35 | None so far
36 |
37 | ## How Do Websites Get Data?
38 |
39 | Watch this section on [YouTube](https://www.youtube.com/watch?v=8GZPQUjd7pk&list=PLmRtxHvzkEE8Ofiy4hnnXSoxw7gs4HOHt) and/or pull up the [slides](./slides.pdf)
40 |
41 | ### Popular Ways Websites Get Data
42 | * Server Side Rendering (SSR)
43 | * Data is sent as part of the HTML response to the requester
44 | * Each request for new data usually requires a page reload
45 | * AJAX
46 | * Takes a client (ex: web browser) and server approach
47 | * When the client needs new data it requests it from the server
48 | * This allows the client to update the data on the page without refreshing the page itself
49 | * Leads to a more fluid and responsive user experience
50 | * This type is the focus of this lesson
51 |
52 | Visualizations of how the data flows available in the [video](https://www.youtube.com/watch?v=8GZPQUjd7pk&list=PLmRtxHvzkEE8Ofiy4hnnXSoxw7gs4HOHt) and [slides](./slides.pdf)
53 |
54 | ### How Do We Exploit This?
55 |
56 | If we're able to emulate the requests that a legitimate client makes then we can extract data from the server without ever interacting with the client itself. This technique is generally referred to as **forging requests**.
57 |
58 | * Advantages
59 | * These APIs can be easier to scrape at scale than trying to do it through a client
60 | * They may contain extra information you can't see in the HTML itself
61 | * Similar to Missouri accidentally exposing their teachers SSNs [The Verge](https://www.theverge.com/2021/10/14/22726866/missouri-governor-department-elementary-secondary-education-ssn-vulnerability-disclosure)
62 | * Less data returned means quicker requests (and less data transfer fees)
63 | * Excess HTML, CSS, etc isn't usually returned from the server, just pure data
64 | * Disadvantages
65 | * Some websites frequently update their APIs
66 | * Extra work has to be done to keep up with these changes compared to just scraping HTML
67 | * Might change endpoints, the schema of the data returned, etc
68 | * Can be hard to emulate human behavior to avoid captchas and other blocking mechanisms
69 | * Can be difficult to figure out how the website is generating user sessions and other security parameters to prevent web scraping
70 |
71 | ## Activity
72 |
73 | In this activity you'll be looking at a mock website and writing a python script to extract data from it. To get started you should run `docker-compose up` in this directory. If you don't know what docker is or are new to it check out the [docker section of the readme](../README.md#how-to-start-the-mock-websites)
74 |
75 |
76 | ### Brief Description
77 |
78 | Our goal is to extract as much data as possible from the website by looking at the network inspector tab of the browser when visiting the mock website. We want to make the same requests that the website (client) makes to the server.
79 |
80 | Open `activity.py`, you will be modifying the existing function to do what the comments tell you to do. I recommend using the [requests](https://requests.readthedocs.io/en/latest/user/quickstart/) package, although feel free to use whatever you want.
81 |
82 | **Do not** change the method names, however feel free to call those methods if you want to test them out in the `if __name__ == "__main__"` section.
83 |
84 | ### Testing
85 |
86 | To check if your implementation is correct run `python test.py` this will import the functions you made. It will tell you what tests failed if any, and will show a success message if all tests passed.
87 |
88 | ### Solutions
89 |
90 | You can find the solutions in the [video](https://youtu.be/8GZPQUjd7pk?list=PLmRtxHvzkEE8Ofiy4hnnXSoxw7gs4HOHt), or use the timestamps here
91 | * [extract_feed()](https://youtu.be/8GZPQUjd7pk?list=PLmRtxHvzkEE8Ofiy4hnnXSoxw7gs4HOHt&t=174)
92 | * [extract_emails()](https://youtu.be/8GZPQUjd7pk?list=PLmRtxHvzkEE8Ofiy4hnnXSoxw7gs4HOHt&t=240)
93 | * [user_exists()](https://youtu.be/8GZPQUjd7pk?list=PLmRtxHvzkEE8Ofiy4hnnXSoxw7gs4HOHt&t=258)
--------------------------------------------------------------------------------
/001-introduction-to-forging-api-requests/activity.py:
--------------------------------------------------------------------------------
1 | """
2 | To check if your implementation is correct run test.py
3 |
4 | *NOTE: Don't change the method names, as that's what's used in the tester.
5 | but, feel free to add anything else to test and debug your code.
6 | """
7 |
8 | def extract_feed():
9 | """
10 | Return an array of all the post objects on the feed page.
11 | """
12 |
13 | return []
14 |
15 | def extract_emails():
16 | """
17 | Return an array of all the emails on the discover page.
18 | """
19 |
20 | return []
21 |
22 | def username_exists(username):
23 | """
24 | username - The username to check if exists, without @ (ex: username="davidteather")
25 | This function will return True if the provided username already exists, and false if it doesn't
26 | """
27 |
28 | return False
29 |
30 | if __name__ == "__main__":
31 | # Optional: You can call your methods here if you want to test them without running the tester
32 | # print(extract_feed())
33 | pass
--------------------------------------------------------------------------------
/001-introduction-to-forging-api-requests/docker-compose.yml:
--------------------------------------------------------------------------------
1 | version: '3.9'
2 |
3 | services:
4 | db:
5 | image: postgres:latest
6 | environment:
7 | POSTGRES_DB: lesson_1
8 | POSTGRES_PASSWORD: postgres
9 | POSTGRES_USER: postgres
10 | ports:
11 | - 5434:5432
12 |
13 | client:
14 | build:
15 | context: ./website/client
16 | container_name: lesson-001-client
17 | depends_on:
18 | - server
19 | - db
20 | environment:
21 | REACT_APP_WEBSITE_NAME: Social
22 | REACT_APP_BACKEND_HOST: localhost
23 | REACT_APP_BACKEND_PORT: 8080
24 | REACT_APP_CODESPACE_NAME: ${CODESPACE_NAME}
25 | REACT_APP_GITHUB_CODESPACES_PORT_FORWARDING_DOMAIN: ${GITHUB_CODESPACES_PORT_FORWARDING_DOMAIN}
26 | ports:
27 | - 3000:3000
28 |
29 | server:
30 | build:
31 | context: ./website/server
32 | depends_on:
33 | - db
34 | environment:
35 | PORT: 8080
36 | POSTGRES_DB: lesson_1
37 | POSTGRES_PASSWORD: postgres
38 | POSTGRES_USER: postgres
39 | POSTGRES_PORT: 5432
40 | POSTGRES_HOST: db
41 | links:
42 | - db
43 | ports:
44 | - "8080:8080"
45 | container_name: lesson-001-server
--------------------------------------------------------------------------------
/001-introduction-to-forging-api-requests/slides.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/davidteather/everything-web-scraping/6afdda03e1b84d64a8e460f6e7265a60a87bdbdc/001-introduction-to-forging-api-requests/slides.pdf
--------------------------------------------------------------------------------
/001-introduction-to-forging-api-requests/test.py:
--------------------------------------------------------------------------------
1 | # Grades submission.py on the test cases
2 | # Don't look inside this if you haven't passed the tests yet
3 |
4 | from activity import extract_feed, extract_emails, username_exists
5 | import json
6 |
7 | def test_extract_feed(posts):
8 | images = []
9 | for post in posts:
10 | images.append(post["image_url"]) # Image urls are unique
11 |
12 | feed = extract_feed()
13 | for post in feed:
14 | if post["image_url"] not in images:
15 | print(f"extract_feed(): ❌\n\tReturned a post that was not in the database (or returned multiple instances of a single post)")
16 | return False
17 |
18 | images.remove(post["image_url"])
19 |
20 | if len(images) != 0:
21 | print(f"extract_feed(): ❌\n\tDidn't return all posts in the database")
22 | return False
23 |
24 | print(f"extract_feed(): ✅")
25 | return True
26 |
27 | def test_extract_emails(profiles):
28 | emails = []
29 | for profile in profiles:
30 | emails.append(profile["email"]) # Image urls are unique
31 |
32 | feed = extract_emails()
33 | for email in feed:
34 | if email not in emails:
35 | print(f"extract_emails(): ❌\n\tReturned an email that was not in the database (or returned multiple instances of a single email)")
36 | return False
37 |
38 | emails.remove(email)
39 |
40 | if len(emails) != 0:
41 | print(f"extract_emails(): ❌\n\tDidn't return all emails in the database")
42 | return False
43 |
44 | print(f"extract_emails(): ✅")
45 | return True
46 |
47 | def test_username_exists(profiles):
48 | fake_usernames = ["orange", "apple", "bruh", "davidteather", "subscribe", "contact.davidteather@gmail.com"]
49 | for profile in profiles:
50 | u = profile['username']
51 |
52 | # Ensure fake_usernames actually don't exist
53 | if u in fake_usernames:
54 | fake_usernames.remove(u)
55 |
56 | if not username_exists(u):
57 | print(f"username_exists(): ❌\n\tReturned False for a username that exists")
58 | return False
59 |
60 | for fake_username in fake_usernames:
61 | if username_exists(fake_username):
62 | print(f"username_exists(): ❌\n\tReturned True for a username that doesn't exist")
63 | return False
64 |
65 | print(f"username_exists(): ✅")
66 | return True
67 |
68 | if __name__ == "__main__":
69 | with open("website/server/db_seeding/initial_data.json", "r", encoding='utf-8') as init_data:
70 | data = json.loads(init_data.read())
71 |
72 | profiles = data["profiles"]
73 | posts = data["posts"]
74 |
75 | passed_extract_feed = test_extract_feed(posts)
76 | passed_extract_emails = test_extract_emails(profiles)
77 | passed_username_exists = test_username_exists(profiles)
78 |
79 | if passed_extract_feed and passed_username_exists and passed_extract_emails:
80 | print(f"All tests: ✅")
--------------------------------------------------------------------------------
/001-introduction-to-forging-api-requests/thumbnail.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/davidteather/everything-web-scraping/6afdda03e1b84d64a8e460f6e7265a60a87bdbdc/001-introduction-to-forging-api-requests/thumbnail.png
--------------------------------------------------------------------------------
/001-introduction-to-forging-api-requests/website/client/.gitignore:
--------------------------------------------------------------------------------
1 | # See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
2 |
3 | # dependencies
4 | /node_modules
5 | /.pnp
6 | .pnp.js
7 |
8 | # testing
9 | /coverage
10 |
11 | # production
12 | /build
13 |
14 | # misc
15 | .DS_Store
16 | .env.local
17 | .env.development.local
18 | .env.test.local
19 | .env.production.local
20 |
21 | npm-debug.log*
22 | yarn-debug.log*
23 | yarn-error.log*
24 |
--------------------------------------------------------------------------------
/001-introduction-to-forging-api-requests/website/client/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM node:13.12.0-alpine
2 |
3 | LABEL "lesson.number"=1
4 |
5 | # add `/app/node_modules/.bin` to $PATH
6 | ENV PATH /app/node_modules/.bin:$PATH
7 |
8 | # install app dependencies
9 | COPY package.json ./
10 | COPY package-lock.json ./
11 | RUN npm install --silent
12 |
13 | # add app
14 | COPY . ./
15 |
16 | # start app
17 | CMD ["npm", "start"]
--------------------------------------------------------------------------------
/001-introduction-to-forging-api-requests/website/client/README.md:
--------------------------------------------------------------------------------
1 | # Getting Started with Create React App
2 |
3 | This project was bootstrapped with [Create React App](https://github.com/facebook/create-react-app).
4 |
5 | ## Available Scripts
6 |
7 | In the project directory, you can run:
8 |
9 | ### `npm start`
10 |
11 | Runs the app in the development mode.\
12 | Open [http://localhost:3000](http://localhost:3000) to view it in your browser.
13 |
14 | The page will reload when you make changes.\
15 | You may also see any lint errors in the console.
16 |
17 | ### `npm test`
18 |
19 | Launches the test runner in the interactive watch mode.\
20 | See the section about [running tests](https://facebook.github.io/create-react-app/docs/running-tests) for more information.
21 |
22 | ### `npm run build`
23 |
24 | Builds the app for production to the `build` folder.\
25 | It correctly bundles React in production mode and optimizes the build for the best performance.
26 |
27 | The build is minified and the filenames include the hashes.\
28 | Your app is ready to be deployed!
29 |
30 | See the section about [deployment](https://facebook.github.io/create-react-app/docs/deployment) for more information.
31 |
32 | ### `npm run eject`
33 |
34 | **Note: this is a one-way operation. Once you `eject`, you can't go back!**
35 |
36 | If you aren't satisfied with the build tool and configuration choices, you can `eject` at any time. This command will remove the single build dependency from your project.
37 |
38 | Instead, it will copy all the configuration files and the transitive dependencies (webpack, Babel, ESLint, etc) right into your project so you have full control over them. All of the commands except `eject` will still work, but they will point to the copied scripts so you can tweak them. At this point you're on your own.
39 |
40 | You don't have to ever use `eject`. The curated feature set is suitable for small and middle deployments, and you shouldn't feel obligated to use this feature. However we understand that this tool wouldn't be useful if you couldn't customize it when you are ready for it.
41 |
42 | ## Learn More
43 |
44 | You can learn more in the [Create React App documentation](https://facebook.github.io/create-react-app/docs/getting-started).
45 |
46 | To learn React, check out the [React documentation](https://reactjs.org/).
47 |
48 | ### Code Splitting
49 |
50 | This section has moved here: [https://facebook.github.io/create-react-app/docs/code-splitting](https://facebook.github.io/create-react-app/docs/code-splitting)
51 |
52 | ### Analyzing the Bundle Size
53 |
54 | This section has moved here: [https://facebook.github.io/create-react-app/docs/analyzing-the-bundle-size](https://facebook.github.io/create-react-app/docs/analyzing-the-bundle-size)
55 |
56 | ### Making a Progressive Web App
57 |
58 | This section has moved here: [https://facebook.github.io/create-react-app/docs/making-a-progressive-web-app](https://facebook.github.io/create-react-app/docs/making-a-progressive-web-app)
59 |
60 | ### Advanced Configuration
61 |
62 | This section has moved here: [https://facebook.github.io/create-react-app/docs/advanced-configuration](https://facebook.github.io/create-react-app/docs/advanced-configuration)
63 |
64 | ### Deployment
65 |
66 | This section has moved here: [https://facebook.github.io/create-react-app/docs/deployment](https://facebook.github.io/create-react-app/docs/deployment)
67 |
68 | ### `npm run build` fails to minify
69 |
70 | This section has moved here: [https://facebook.github.io/create-react-app/docs/troubleshooting#npm-run-build-fails-to-minify](https://facebook.github.io/create-react-app/docs/troubleshooting#npm-run-build-fails-to-minify)
71 |
--------------------------------------------------------------------------------
/001-introduction-to-forging-api-requests/website/client/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "client",
3 | "version": "0.1.0",
4 | "private": true,
5 | "dependencies": {
6 | "@testing-library/jest-dom": "^5.16.4",
7 | "@testing-library/react": "^13.3.0",
8 | "@testing-library/user-event": "^13.5.0",
9 | "axios": "^0.27.2",
10 | "bootstrap": "^5.1.3",
11 | "react": "^18.1.0",
12 | "react-bootstrap": "^2.4.0",
13 | "react-dom": "^18.1.0",
14 | "react-scripts": "5.0.1",
15 | "styled-components": "^5.3.5",
16 | "web-vitals": "^2.1.4"
17 | },
18 | "scripts": {
19 | "start": "react-scripts start",
20 | "build": "react-scripts build",
21 | "test": "react-scripts test",
22 | "eject": "react-scripts eject"
23 | },
24 | "eslintConfig": {
25 | "extends": [
26 | "react-app",
27 | "react-app/jest"
28 | ]
29 | },
30 | "browserslist": {
31 | "production": [
32 | ">0.2%",
33 | "not dead",
34 | "not op_mini all"
35 | ],
36 | "development": [
37 | "last 1 chrome version",
38 | "last 1 firefox version",
39 | "last 1 safari version"
40 | ]
41 | },
42 | "devDependencies": {
43 | "react-router-dom": "^6.3.0"
44 | }
45 | }
46 |
--------------------------------------------------------------------------------
/001-introduction-to-forging-api-requests/website/client/public/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/davidteather/everything-web-scraping/6afdda03e1b84d64a8e460f6e7265a60a87bdbdc/001-introduction-to-forging-api-requests/website/client/public/favicon.ico
--------------------------------------------------------------------------------
/001-introduction-to-forging-api-requests/website/client/public/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
70 | >
71 | )
72 | }
73 | }
74 |
75 | export default ProfileFeed
--------------------------------------------------------------------------------
/001-introduction-to-forging-api-requests/website/client/src/reportWebVitals.js:
--------------------------------------------------------------------------------
1 | const reportWebVitals = onPerfEntry => {
2 | if (onPerfEntry && onPerfEntry instanceof Function) {
3 | import('web-vitals').then(({ getCLS, getFID, getFCP, getLCP, getTTFB }) => {
4 | getCLS(onPerfEntry);
5 | getFID(onPerfEntry);
6 | getFCP(onPerfEntry);
7 | getLCP(onPerfEntry);
8 | getTTFB(onPerfEntry);
9 | });
10 | }
11 | };
12 |
13 | export default reportWebVitals;
14 |
--------------------------------------------------------------------------------
/001-introduction-to-forging-api-requests/website/client/src/services/DiscoveryService.js:
--------------------------------------------------------------------------------
1 | const isCodespaces = process.env.REACT_APP_CODESPACE_NAME != "" && process.env.REACT_APP_GITHUB_CODESPACES_PORT_FORWARDING_DOMAIN != "";
2 |
3 | const BACKEND_URL = isCodespaces
4 | ? `https://${process.env.REACT_APP_CODESPACE_NAME}-${process.env.REACT_APP_BACKEND_PORT}.${process.env.REACT_APP_GITHUB_CODESPACES_PORT_FORWARDING_DOMAIN}`
5 | : `http://${process.env.BACKEND_HOST}:${process.env.BACKEND_PORT}`;
6 |
7 | export async function getProfiles(page) {
8 | try{
9 | const response = await fetch(`${BACKEND_URL}/discover/profiles/${page}`);
10 | return response.json();
11 | }catch(error) {
12 | return new Promise((res, rej) => {
13 | res([])
14 | });
15 | }
16 |
17 | }
--------------------------------------------------------------------------------
/001-introduction-to-forging-api-requests/website/client/src/services/FeedService.js:
--------------------------------------------------------------------------------
1 | const isCodespaces = process.env.REACT_APP_CODESPACE_NAME != "" && process.env.REACT_APP_GITHUB_CODESPACES_PORT_FORWARDING_DOMAIN != "";
2 |
3 | const BACKEND_URL = isCodespaces
4 | ? `https://${process.env.REACT_APP_CODESPACE_NAME}-${process.env.REACT_APP_BACKEND_PORT}.${process.env.REACT_APP_GITHUB_CODESPACES_PORT_FORWARDING_DOMAIN}`
5 | : `http://${process.env.BACKEND_HOST}:${process.env.BACKEND_PORT}`;
6 |
7 | export async function getFeed(page) {
8 | try{
9 | const response = await fetch(`${BACKEND_URL}/feed/${page}`);
10 | return response.json();
11 | }catch(error) {
12 | return new Promise((res, rej) => {
13 | res([])
14 | });
15 | }
16 |
17 | }
18 |
19 |
20 | export async function getProfileFeed(username, page) {
21 | try{
22 | const response = await fetch(`${BACKEND_URL}/profile/${username}/feed/${page}`);
23 | return response.json();
24 | }catch(error) {
25 | return new Promise((res, rej) => {
26 | res([])
27 | });
28 | }
29 |
30 | }
--------------------------------------------------------------------------------
/001-introduction-to-forging-api-requests/website/client/src/setupTests.js:
--------------------------------------------------------------------------------
1 | // jest-dom adds custom jest matchers for asserting on DOM nodes.
2 | // allows you to do things like:
3 | // expect(element).toHaveTextContent(/react/i)
4 | // learn more: https://github.com/testing-library/jest-dom
5 | import '@testing-library/jest-dom';
6 |
--------------------------------------------------------------------------------
/001-introduction-to-forging-api-requests/website/server/.gitignore:
--------------------------------------------------------------------------------
1 | node_modules
--------------------------------------------------------------------------------
/001-introduction-to-forging-api-requests/website/server/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM node:16
2 |
3 | LABEL "lesson.number"=1
4 |
5 | COPY package.json ./
6 | COPY package-lock.json ./
7 | RUN npm install --silent
8 |
9 | COPY . ./
10 |
11 | EXPOSE 8080
12 |
13 | CMD ["npm", "run", "start"]
--------------------------------------------------------------------------------
/001-introduction-to-forging-api-requests/website/server/db_seeding/create_data.py:
--------------------------------------------------------------------------------
1 | from faker import Faker
2 | from faker.providers import address, color, geo, person, profile
3 | import time
4 | import json
5 | import random
6 | import requests
7 |
8 | fake = Faker()
9 | fake.add_provider(profile)
10 |
11 | data = {"profiles": [], "posts": []}
12 |
13 | # Profile Generation
14 | profile_colors = [
15 | "#8ECAE6",
16 | "#219EBC",
17 | "#D82F2F",
18 | "#FB8500",
19 | "#FFB703",
20 | "#CBF3F0",
21 | "#2EC4B6",
22 | "#FFBF69",
23 | "#FF9F1C",
24 | "#DCEDFF",
25 | "#94B0DA",
26 | "#419D78",
27 | "#E0A458",
28 | "#FFDBB5",
29 | "#C7F0BD",
30 | "#9E768F",
31 | "#9FA4C4",
32 | "#B47EB3",
33 | "#92D1C3",
34 | "#67AAF9",
35 | "#B95F89",
36 | "#8884FF",
37 | ]
38 | for x in range(50): # make 50 profiles
39 | while True:
40 | p = fake.profile()
41 | new_profile = {
42 | "job": p["job"],
43 | "company": p["company"],
44 | "username": p["username"],
45 | "name": p["name"],
46 | "email": p["mail"],
47 | "birthday": p["birthdate"].strftime("%m-%d-%Y"),
48 | "profile_color": random.choice(profile_colors),
49 | }
50 | if "'" not in p["job"]:
51 | break
52 |
53 | data["profiles"].append(new_profile)
54 |
55 |
56 | # Post Generation
57 | POSTS_TO_CREATE = 500
58 | POSTS_PER_PAGE = 50
59 |
60 | photo_options = []
61 | queries = [
62 | "dancing",
63 | "technology",
64 | "programming",
65 | "birds",
66 | "dogs",
67 | "cats",
68 | "social",
69 | "vibes",
70 | "sunset",
71 | "cars",
72 | "landscape",
73 | "mountain",
74 | "snow",
75 | "river",
76 | "stream",
77 | "reading",
78 | "bookstore",
79 | "nighttime",
80 | "stars",
81 | "astronomy",
82 | "coffee",
83 | ]
84 | for query in queries:
85 | r = requests.get(
86 | f"https://unsplash.com/napi/search?query={query}&per_page={POSTS_PER_PAGE}",
87 | headers={
88 | "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.74 Safari/537.36"
89 | },
90 | )
91 | d = r.json()
92 | for photo in d["photos"]["results"]:
93 | photo_url = photo["urls"]["full"]
94 | photo_options.append(
95 | {
96 | "url": photo_url,
97 | "unsplash": photo["links"]["html"],
98 | "likes": photo["likes"],
99 | }
100 | )
101 |
102 | used_urls = []
103 | for x in range(POSTS_TO_CREATE):
104 | poster = random.choice(data["profiles"])
105 | long_text = fake.text()
106 | while True:
107 | photo = random.choice(photo_options)
108 | photo_options.remove(photo)
109 |
110 | if photo["unsplash"] not in used_urls:
111 | break
112 |
113 | used_urls.append(photo["unsplash"])
114 | new_post = {
115 | "image_url": photo["url"],
116 | "image_unsplash_url": photo["unsplash"],
117 | "likes_count": photo["likes"],
118 | "caption": " ".join(long_text.split(" ")[0:200]),
119 | "author_username": poster["username"],
120 | }
121 |
122 | data["posts"].append(new_post)
123 |
124 |
125 | with open("initial_data.json", "w+", encoding="utf-8") as o:
126 | json.dump(data, o)
127 |
--------------------------------------------------------------------------------
/001-introduction-to-forging-api-requests/website/server/db_seeding/seedDatabase.js:
--------------------------------------------------------------------------------
1 |
2 | const getClient = require('../get-client')
3 |
4 | const initialData = require('./initial_data.json')
5 |
6 | module.exports.seedDatabase = async () => {
7 | const client = await getClient.getClient()
8 |
9 | await client.query(`CREATE TABLE IF NOT EXISTS profiles (
10 | id serial PRIMARY KEY,
11 | job text NOT NULL,
12 | company text NOT NULL,
13 | username text NOT NULL UNIQUE,
14 | name text NOT NULL,
15 | email text NOT NULL,
16 | birthday timestamptz NOT NULL,
17 | profile_color text NOT NULL
18 | )`, (err, result) => {
19 | if (err) {
20 | console.log(err)
21 | } else {
22 | sql = "INSERT INTO profiles (job, company, username, name, email, profile_color, birthday)\nVALUES "
23 | var value_list = "";
24 | var time_list = [];
25 | for (let i = 0; i < initialData.profiles.length; i++) {
26 | cur = initialData.profiles[i]
27 | value_list = value_list + `('${cur.job}', '${cur.company}', '${cur.username}', '${cur.name}', '${cur.email}', '${cur.profile_color}', $${i+1})`
28 |
29 | time_list.push(cur.birthday)
30 | if (i != initialData.profiles.length - 1) {
31 | value_list += ",\n"
32 | }
33 | }
34 |
35 | var full_sql = `${sql}${value_list} ON CONFLICT DO NOTHING`
36 |
37 | client.query(full_sql, time_list, (err, result) => {
38 | if (err) {
39 | console.log(err)
40 | } else {
41 | console.log("Seeded profiles!")
42 | }
43 | })
44 | }
45 | })
46 |
47 | await client.query(`CREATE TABLE IF NOT EXISTS posts (
48 | id serial PRIMARY KEY,
49 | image_url text NOT NULL,
50 | image_unsplash_url text NOT NULL UNIQUE,
51 | likes_count int NOT NULL,
52 | caption text NOT NULL,
53 | author_username text NOT NULL
54 | )`, (err, result) => {
55 | if (err) {
56 | console.log(err)
57 | } else {
58 | sql = "INSERT INTO posts (image_url, image_unsplash_url, likes_count, caption, author_username)\nVALUES "
59 | var value_list = "";
60 | for (let i = 0; i < initialData.posts.length; i++) {
61 | cur = initialData.posts[i]
62 | value_list = value_list + `('${cur.image_url}', '${cur.image_unsplash_url}', ${cur.likes_count}, '${cur.caption}', '${cur.author_username}')`
63 |
64 | if (i != initialData.posts.length - 1) {
65 | value_list += ",\n"
66 | }
67 | }
68 |
69 | var full_sql = `${sql}${value_list} ON CONFLICT DO NOTHING`
70 |
71 | client.query(full_sql, (err, result) => {
72 | if (err) {
73 | console.log(err)
74 | } else {
75 | console.log("Seeded posts!")
76 | }
77 | })
78 | }
79 | })
80 | }
--------------------------------------------------------------------------------
/001-introduction-to-forging-api-requests/website/server/get-client.js:
--------------------------------------------------------------------------------
1 | const { Client } = require('pg');
2 |
3 | const MAX_RETRIES = 30;
4 |
5 | const delay = ms => new Promise(resolve => setTimeout(resolve, ms))
6 |
7 | module.exports.getClient = async () => {
8 | let tries = 0
9 |
10 | var client = null
11 |
12 | var connected = false
13 | while (tries < MAX_RETRIES) {
14 | try {
15 | client = new Client({
16 | connectionString: `postgres://${process.env.POSTGRES_USER}:${process.env.POSTGRES_PASSWORD}@${process.env.POSTGRES_HOST}:${process.env.POSTGRES_PORT}/${process.env.POSTGRES_DB}`
17 | });
18 | await client.connect()
19 | connected = true
20 | } catch (e) {
21 | await delay(1000) // Wait 1 second to retry
22 | }
23 | tries++
24 |
25 | if (connected)
26 | break
27 | }
28 |
29 | if (connected == null) {
30 | console.log("Could not connect to database, try running \"docker-compose up\" again.")
31 | }
32 |
33 | return client;
34 | };
--------------------------------------------------------------------------------
/001-introduction-to-forging-api-requests/website/server/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "server",
3 | "version": "1.0.0",
4 | "description": "",
5 | "main": "server.js",
6 | "scripts": {
7 | "test": "echo \"Error: no test specified\" && exit 1",
8 | "start": "node ./server.js",
9 | "dev": "nodemon ./server.js localhost 8080"
10 | },
11 | "author": "David Teather",
12 | "license": "MIT",
13 | "dependencies": {
14 | "dateformat": "^5.0.3",
15 | "express": "^4.18.1",
16 | "pg": "^8.7.3"
17 | }
18 | }
19 |
--------------------------------------------------------------------------------
/001-introduction-to-forging-api-requests/website/server/server.js:
--------------------------------------------------------------------------------
1 | const express = require("express");
2 | const { seedDatabase } = require("./db_seeding/seedDatabase");
3 | const app = express();
4 | const getClient = require("./get-client")
5 |
6 | const PAGE_SIZE = 10
7 |
8 | // Allow cors
9 | app.use((req, res, next) => {
10 | res.header("Access-Control-Allow-Origin", "*");
11 | res.header("Access-Control-Allow-Headers", "Origin, X-Requested-With, Content-Type, Accept");
12 | next();
13 | });
14 |
15 | app.get('/', (req, res) => {
16 | res.send("Test")
17 | })
18 |
19 | app.get('/discover/profiles/:page', async (req, res) => {
20 | let page = req.params.page
21 | const client = await getClient.getClient()
22 |
23 | await client.query(
24 | {
25 | text: `SELECT * FROM profiles LIMIT $1 OFFSET $2`,
26 | values: [PAGE_SIZE, PAGE_SIZE * page]
27 | }, (err, result) => {
28 | if (err) {
29 | console.log(err)
30 | res.sendStatus(500)
31 | client.end()
32 | } else {
33 | res.json({ profiles: result.rows })
34 | client.end()
35 | }
36 | })
37 | })
38 |
39 | app.get('/feed/:page', async (req, res) => {
40 | let page = req.params.page
41 | const client = await getClient.getClient()
42 |
43 | await client.query(
44 | {
45 | text: "SELECT * FROM posts ORDER BY likes_count DESC LIMIT $1 OFFSET $2",
46 | values: [PAGE_SIZE, PAGE_SIZE * page]
47 | }, (err, result) => {
48 | if (err) {
49 | console.log(err)
50 | res.sendStatus(500)
51 | client.end()
52 | } else {
53 | res.json({ posts: result.rows })
54 | client.end()
55 | }
56 | })
57 | })
58 |
59 | app.get('/profile/:username/feed/:page', async (req, res) => {
60 | let page = req.params.page
61 | let username = req.params.username
62 | const client = await getClient.getClient()
63 |
64 | await client.query({
65 | text: "SELECT * FROM profiles WHERE username=$1",
66 | values: [username]
67 | }, async (err, result) => {
68 | if (err) {
69 | console.log(err)
70 | res.sendStatus(500)
71 | client.end()
72 | } else {
73 | if (result.rowCount == 0) {
74 | // User doesn't exist
75 | res.json({ posts: [], error: "USER_DOES_NOT_EXIST"})
76 | client.end()
77 | } else {
78 | await client.query(
79 | {
80 | text: "SELECT * FROM posts WHERE author_username=$1 ORDER BY likes_count DESC LIMIT $2 OFFSET $3",
81 | values: [username, PAGE_SIZE, PAGE_SIZE * page]
82 | }, (err, result) => {
83 | if (err) {
84 | console.log(err)
85 | res.sendStatus(500)
86 | client.end()
87 | } else {
88 | res.json({ posts: result.rows })
89 | client.end()
90 | }
91 | })
92 | }
93 | }
94 | })
95 |
96 | })
97 |
98 | app.listen(process.env.PORT, () => {
99 | console.log(`Listening on port ${process.env.PORT}`);
100 | seedDatabase();
101 | });
--------------------------------------------------------------------------------
/002-proxies/README.md:
--------------------------------------------------------------------------------
1 | # Lesson 2 - Proxies
2 |
3 | This lesson is designed to teach you about what proxies are are, how they're helpful in web scraping, the different kinds of proxies, and how to use them in python!
4 |
5 | **Note:** No activity in this lesson, I couldn't figure out a way that was still challenging but actually possible to implement. If you have ideas on how to do this feel free to file an issue or submit a PR :)
6 |
7 | ### Supporting The Project
8 | * Star the repo 😎
9 | * Maybe share it with some people new to web-scraping?
10 | * Consider [sponsoring](https://github.com/sponsors/davidteather) me on GitHub
11 | * Send me an email or a [LinkedIn](https://www.linkedin.com/in/davidteather/) message telling me what you enjoy in the course (and maybe what else you want to see in the future)
12 | * Submit PRs for suggestions/issues :)
13 |
14 | ## Learning Objectives
15 | * Learners will know how proxies work
16 | * Learners will understand how proxies are helpful in web scraping
17 | * Learners will be able to compare the different tradeoffs of the most common proxy types
18 | * Learners will use proxies in python
19 |
20 | ## Table of Contents
21 | * [Lesson Video](#lesson-video)
22 | * [Video Corrections](#video-corrections)
23 | * [What Are Proxies?](#what-are-proxies)
24 | * [Why Use Proxies?](#why-use-proxies)
25 | * [How To Get Proxies](#how-to-get-proxies)
26 | * [The Different Types of Proxies](#the-different-types-of-proxies)
27 | * [What Type Of Proxy Should You Use?](#what-type-of-proxy-should-you-use)
28 | * [How To Use Proxies In Python?](#how-to-use-proxies-in-python)
29 | * [Conclusion](#conclusion)
30 |
31 | ## Lesson Video
32 |
33 | [](https://www.youtube.com/watch?v=X0FG2JaaWOY&list=PLmRtxHvzkEE8Ofiy4hnnXSoxw7gs4HOHt)
34 |
35 | [Watch Here](https://www.youtube.com/watch?v=X0FG2JaaWOY&list=PLmRtxHvzkEE8Ofiy4hnnXSoxw7gs4HOHt)
36 |
37 | ### Video Corrections
38 | None so far
39 |
40 | ## What Are Proxies?
41 | Note: All of this information is covered with visuals in the video linked in this readme. If you watched the video you're done with the section.
42 |
43 | Without using a proxy, requests from your computer go directly to the server hosting the website you’re scraping. The server then has access to your IP making that request, which isn’t always a problem but there’s some techniques to stop web scraping that do use IP detection.
44 |
45 |
46 |
47 |
48 |
49 | Proxies are like pass-through computers for accessing the internet, your computer makes a request to a proxy, then that proxy makes the real request to the website returning its information to your computer.
50 |
51 |
52 |
53 |
54 |
55 | This way the website only has access to the IP from the proxy (which is easier to switch), not your real IP.
56 |
57 | This way of disguising who is making the real request to a website is **critical for web scraping**.
58 |
59 | ## Why Use Proxies?
60 |
61 | Websites typically want to stop web scraping, and implement a rate limit, which is a technique to ensure that a single IP hasn’t made over some number of requests in a given time period, for example 10 requests in a minute.
62 |
63 | If an IP has made more than the permitted amount, then no data is returned thus preventing web scraping. This threshold is designed so that normal users of the website won’t ever hit this rate limit, however careless bots trying to extract data will hit this limit.
64 |
65 | We can use proxies to circumnavigate this rate limit since the rate limiters are typically tied to a specific IP, we can use proxies to make requests from different IPs. Using a single proxy does not increase your effective rate limit, however if you use a combination of a bunch of different proxies, then the effective rate limit is multiplied by the number of proxies you’re using.
66 |
67 |
68 |
69 |
70 |
71 | To this server it looks like there's 5 different clients making requests.
72 |
73 |
74 |
75 |
76 |
77 | In reality it's just one computer controlling 5 different proxies.
78 |
79 | ## How To Get Proxies
80 |
81 | There are many sites that offer proxies as a service, although it’s entirely possible to host your own. It’s typically much easier and cheaper to pay a company that specializes in this.
82 |
83 | I've personally used both. I'm not sponsored by either of them while writing this, but I do have affiliate links. Feel free to use them it supports this project 😀
84 | * [Bright Data](https://brightdata.grsm.io/u10xm7thq4ci) (affiliate link)
85 | * [non-affiliate link](https://brightdata.com/)
86 | * [Webshare.io](https://www.webshare.io/?referral_code=3x5812idzzzp) (affiliate link)
87 | * [non-affiliate link](https://www.webshare.io/)
88 | * Has a free tier of up to 1gb/month with 10 proxies
89 | * I've found this useful for small projects that I don't justify buying proxies for
90 |
91 | ## The Different Types Of Proxies
92 | Typically companies that offer proxies as a service have various types of different proxies. I've covered the most common types of proxies that I've seen below.
93 |
94 | * Data Center
95 | * Hosted on data centers like AWS, GCP, Azure, etc
96 | * Advantages
97 | * Cheapest variant
98 | * Most accessible 100% of proxy services should have this
99 | * Disadvantages
100 | * Easiest proxy to detect as most real end users will not be connecting from a data center
101 | * Residential
102 | * Tied to physical locations typically from ISPs like Xfinity, Spectrum, AT&T, etc
103 | * Advantages
104 | * Hard to detect as most real end-users will be connecting from an ISP
105 | * Disadvantages
106 | * Fairly expensive as you're indirectly paying for an internet subscription
107 | * Mobile
108 | * Use cell providers like Verizon, AT&T, Sprint, etc
109 | * Advantages
110 | * The hardest to detect
111 | * Companies that offer these typically let you select specific cities to make requests from which can be advantageous depending on your needs
112 | * Disadvantages
113 | * Extremely expensive ($40/gb Bright Data July 2022)
114 |
115 |
116 |
117 |
118 |
119 | In addition to these different proxy types companies also typically offer two types of proxy pools.
120 | * Static IPs
121 | * Fixed number of IPs that do not change
122 | * Advantages
123 | * Usually pretty cheap
124 | * Neutral
125 | * Fixed pricing per IP/month
126 | * Rotating IPs
127 | * These typically distribute your request across all of the available proxies that proxy provider owns
128 | * Advantages
129 | * Potentially access to thousands of IPs
130 | * Neutral
131 | * Usually no fixed monthly pricing
132 | * Disadvantages
133 | * If you need to log in as a user you'll be making requests from hundreds of IPs which is a huge red flag that you're a bot
134 |
135 |
136 |
137 |
138 |
139 | ## What Type Of Proxy Should You Use?
140 |
141 | The type of proxy you should use is highly dependent on the website you’re trying to scrape, my best advice is to experiment to find the cheapest type that works reliably for you.
142 |
143 | There's a lot of changing variables that go into this, a few: websites updating their bot detection, more traffic from your proxy provider to the website from other users, your proxy provider might restrict some websites. For these reasons, I recommend re-evaluating your proxy provider and proxy type if you start having issues.
144 |
145 | ## How To Use Proxies In Python?
146 | I'll be using [webshare.io](https://www.webshare.io/?referral_code=3x5812idzzzp) (affiliate link) here but other proxy providers have really similar interfaces.
147 |
148 | On the side bar if you go to proxy -> list
149 |
150 |
151 |
152 |
153 |
154 | Then you should see something that looks like the following
155 |
156 |
157 |
158 |
159 |
160 | If we move these credentials into a python script we get something like
161 | ```py
162 | PROXY_ADDRESS = "127.0.0.1"
163 | PROXY_PORT = 8080
164 | PROXY_USERNAME = "subscribe"
165 | PROXY_PASS = "on_youtube"
166 | ```
167 |
168 | I'll be using the requests python package to make HTTP requests. So let's import that
169 | ```py
170 | PROXY_ADDRESS = "127.0.0.1"
171 | PROXY_PORT = 8080
172 | PROXY_USERNAME = "subscribe"
173 | PROXY_PASS = "on_youtube"
174 |
175 | import requests # run "pip install requests" to install this package
176 | ```
177 |
178 | Next we can make a dictionary that contains the credentials to our proxy in a URL format.
179 | ```py
180 | PROXY_ADDRESS = "127.0.0.1"
181 | PROXY_PORT = 8080
182 | PROXY_USERNAME = "subscribe"
183 | PROXY_PASS = "on_youtube"
184 |
185 | import requests # run "pip install requests" to install this package
186 |
187 | proxy = {
188 | "http": f"http://{PROXY_USERNAME}:{PROXY_PASSWORD}@{PROXY_ADDRESS}:{PROXY_PORT}"
189 | "https": f"http://{PROXY_USERNAME}:{PROXY_PASSWORD}@{PROXY_ADDRESS}:{PROXY_PORT}"
190 | }
191 | ```
192 |
193 | Finally to make a request with the proxy we can just pass in the proxy dictionary into a requests package method
194 |
195 | ```py
196 | PROXY_ADDRESS = "127.0.0.1"
197 | PROXY_PORT = 8080
198 | PROXY_USERNAME = "subscribe"
199 | PROXY_PASS = "on_youtube"
200 |
201 | import requests # run "pip install requests" to install this package
202 |
203 | proxy = {
204 | "http": f"http://{PROXY_USERNAME}:{PROXY_PASSWORD}@{PROXY_ADDRESS}:{PROXY_PORT}"
205 | "https": f"http://{PROXY_USERNAME}:{PROXY_PASSWORD}@{PROXY_ADDRESS}:{PROXY_PORT}"
206 | }
207 |
208 | requests.get("https://github.com/davidteather/everything-web-scraping/stargazers", proxies=proxy)
209 | # You could also be on the stargazers list if you star this repo 😎
210 | ```
211 |
212 | You could define multiple proxies and pick which one you wanted to use based on some logic. Maybe you want to randomly select a proxy to send the request through you could define a ton of proxy dictionaries and do something like the following
213 | ```py
214 | import random
215 |
216 | requests.get("https://github.com/davidteather/everything-web-scraping/stargazers", proxies=random.choice([proxy_1, proxy_2]))
217 | ```
218 |
219 | It's better to abstract this random proxy usage to either just read in a file that contains all of your proxies, or you could use what's called a **rotating proxy**. [webshare.io](https://www.webshare.io/?referral_code=3x5812idzzzp) (affiliate link) offers a free one that distributes your requests across all of your proxies.
220 |
221 |
222 |
223 |
224 |
225 |
226 |
227 |
228 |
229 | Note: fake credentials
230 |
231 | If we plug these credentials into the proxy dictionary it'll distribute our requests across all of the proxies that we have on the site.
232 |
233 | ```py
234 | PROXY_ADDRESS = "p.webshare.io"
235 | PROXY_PORT = 80
236 | PROXY_USERNAME = "rotating-username"
237 | PROXY_PASS = "subscribe"
238 |
239 | import requests # run "pip install requests" to install this package
240 |
241 | proxy = {
242 | "http": f"http://{PROXY_USERNAME}:{PROXY_PASSWORD}@{PROXY_ADDRESS}:{PROXY_PORT}"
243 | "https": f"http://{PROXY_USERNAME}:{PROXY_PASSWORD}@{PROXY_ADDRESS}:{PROXY_PORT}"
244 | }
245 |
246 | requests.get("https://github.com/davidteather/everything-web-scraping/stargazers", proxies=proxy)
247 | # You could also be on the stargazers list if you star this repo 😎
248 | ```
249 |
250 | ## Conclusion
251 |
252 | Congrats you've finished another lesson :)
253 |
254 | If you liked this lesson please consider giving the repository a star and if you have any suggestions I'd love to hear them on [YouTube](https://youtube.com/davidteathercodes), [Twitter](https://twitter.com/david_teather), or file an issue with the label suggestion!
--------------------------------------------------------------------------------
/002-proxies/assets/5-req-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/davidteather/everything-web-scraping/6afdda03e1b84d64a8e460f6e7265a60a87bdbdc/002-proxies/assets/5-req-2.png
--------------------------------------------------------------------------------
/002-proxies/assets/5-req.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/davidteather/everything-web-scraping/6afdda03e1b84d64a8e460f6e7265a60a87bdbdc/002-proxies/assets/5-req.png
--------------------------------------------------------------------------------
/002-proxies/assets/pool-comp.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/davidteather/everything-web-scraping/6afdda03e1b84d64a8e460f6e7265a60a87bdbdc/002-proxies/assets/pool-comp.png
--------------------------------------------------------------------------------
/002-proxies/assets/proxy-list.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/davidteather/everything-web-scraping/6afdda03e1b84d64a8e460f6e7265a60a87bdbdc/002-proxies/assets/proxy-list.png
--------------------------------------------------------------------------------
/002-proxies/assets/rotating.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/davidteather/everything-web-scraping/6afdda03e1b84d64a8e460f6e7265a60a87bdbdc/002-proxies/assets/rotating.png
--------------------------------------------------------------------------------
/002-proxies/assets/thumbnail.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/davidteather/everything-web-scraping/6afdda03e1b84d64a8e460f6e7265a60a87bdbdc/002-proxies/assets/thumbnail.png
--------------------------------------------------------------------------------
/002-proxies/assets/type-comp.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/davidteather/everything-web-scraping/6afdda03e1b84d64a8e460f6e7265a60a87bdbdc/002-proxies/assets/type-comp.png
--------------------------------------------------------------------------------
/002-proxies/assets/webshare-rotating.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/davidteather/everything-web-scraping/6afdda03e1b84d64a8e460f6e7265a60a87bdbdc/002-proxies/assets/webshare-rotating.png
--------------------------------------------------------------------------------
/002-proxies/assets/webshare-tab.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/davidteather/everything-web-scraping/6afdda03e1b84d64a8e460f6e7265a60a87bdbdc/002-proxies/assets/webshare-tab.png
--------------------------------------------------------------------------------
/002-proxies/assets/with-proxy.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/davidteather/everything-web-scraping/6afdda03e1b84d64a8e460f6e7265a60a87bdbdc/002-proxies/assets/with-proxy.png
--------------------------------------------------------------------------------
/002-proxies/assets/without-proxy.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/davidteather/everything-web-scraping/6afdda03e1b84d64a8e460f6e7265a60a87bdbdc/002-proxies/assets/without-proxy.png
--------------------------------------------------------------------------------
/003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/README.md:
--------------------------------------------------------------------------------
1 | # Lesson 3 - BeautifulSoup With Static Site & Server Side Rendered Web Scraping
2 |
3 | This lesson is designed to teach you about how to extract data from static websites and websites that are server side rendered (SSR). We'll be using the python package BeautifulSoup to extract data from the HTML.
4 |
5 | ### Supporting The Project
6 | * Star the repo 😎
7 | * Share it with someone new to web-scraping
8 | * Consider [sponsoring](https://github.com/sponsors/davidteather) me on GitHub
9 | * Send me an email or a [LinkedIn](https://www.linkedin.com/in/davidteather/) message telling me what you enjoy in the course (and what else you want to see in the future)
10 | * Submit PRs for suggestions/issues :)
11 |
12 | ## Learning Objectives
13 | * Learners will be able to compare and contrast static sites and server side rendered (SSR) sites, and explain how and why we're able to treat them the same as web scrapers
14 | * Learners will be able to explain basic HTML structure
15 | * Learners will be able to use beautiful soup to extract data from a static site
16 | * Learners will be able to identify if their desired data is statically or dynamically rendered
17 |
18 | ## Table of Contents
19 |
20 | * [Lesson Video](#lesson-video)
21 | * [Static Sites vs Server Side Rendered Sites (SSR)](#static-sites-vs-server-side-rendered-sites-ssr)
22 | * [Static Sites](#static-sites)
23 | * [Server Side Rendering (SSR)](#server-side-rendering-ssr)
24 | * [How We Can Web Scrape These Sites?](#how-we-can-web-scrape-these-sites)
25 | * [Basic HTML Structure](#basic-html-structure)
26 | * [Tags](#tags)
27 | * [Attributes](#attributes)
28 | * [Classes](#classes)
29 | * [Extracting Data Using BeautifulSoup](#extracting-data-using-beautifulsoup)
30 | * [Running The Website](#running-the-website)
31 | * [Installing BeautifulSoup](#installing-beautifulsoup)
32 | * [Getting HTTP From A Website](#getting-http-from-a-website)
33 | * [Parsing HTML With BeautifulSoup](#parsing-html-with-beautifulsoup)
34 | * [Extracting Prices From Homepage](#extracting-prices-from-homepage)
35 | * [Using Developer Tools](#using-developer-tools)
36 | * [Extracting The First Price](#extracting-the-first-price)
37 | * [Extracting All Prices](#extracting-all-prices)
38 | * [Full Code](#full-code)
39 | * [Activities](#activities)
40 |
41 |
42 | ## Lesson Video
43 |
44 | [](https://www.youtube.com/watch?v=_Ptvvjm15EA&list=PLmRtxHvzkEE8Ofiy4hnnXSoxw7gs4HOHt&index=4)
45 |
46 | [Watch Here](https://www.youtube.com/watch?v=_Ptvvjm15EA&list=PLmRtxHvzkEE8Ofiy4hnnXSoxw7gs4HOHt&index=4)
47 |
48 | ### Video Corrections
49 | None so far
50 |
51 | ## Static Sites vs Server Side Rendered Sites (SSR)
52 |
53 | ### Static Sites
54 |
55 | Static websites are just normal HTML files that are served to the user exactly as they're stored on the server, this means user-specific information is not included. Sites that don't require content to change are typically: portfolio websites, blogs, landing pages, documentation, etc. A concrete example is my security blog [The Response Times](https://theresponsetimes.com/) which is fully static.
56 |
57 |
58 |
59 |
60 |
61 | > Note: Many websites are static HTML files but use JavaScript to make requests for additional data, this type of site is out of scope for this lesson, but checkout [001 - Introduction To Forging API Requests](../001-introduction-to-forging-api-requests/README.md) to learn more.
62 |
63 | > The easiest way to tell if a website is static or not is to view the source code of the website, on most browsers you can right click the page and click "View Page Source". If the data you want is in the source code, then it's statically rendered.
64 |
65 | ### Server Side Rendering (SSR)
66 |
67 | In SSR a server will inject data into HTML before sending it to the client. This is a very widely used approach, but the data injected could be: current trending topics, your messages with another user, or the current weather, and much more.
68 |
69 | Since SSR returns HTML that the browser can render without doing extra work, it's popular because it decreases the page load time for the end user especially if they're viewing the website on a computationally weak device.
70 |
71 |
72 |
73 |
74 |
75 | ### How We Can Web Scrape These Sites?
76 |
77 | Since both of these sites return HTML with the data that we want in the file, we just need a way to parse HTML and select the data we want. Luckily, there's tons of python packages that allow us to do this, in this video we'll be using BeautifulSoup.
78 |
79 | ## Basic HTML Structure
80 |
81 | But first, we need to learn the basic syntax and structure of HTML so that we can tell BeautifulSoup how to extract our data.
82 |
83 | ### Tags
84 |
85 | The most important parts of HTML are called tags. Tags build up the structure of the website and each of them has a different purpose and are usually rendered differently by the web browser. All tags start with `<` and end with `>`, an example is the `
` opening tag which represents a paragraph of text in HTML.
86 |
87 | Tags also must be closed using a closing tag like `
`. All content between the opening tag and the closing tag is the content within element.
88 |
89 | A completed example
90 | ```html
91 |
92 | Here's my HTML paragraph using the p tag!
93 |
94 | ```
95 |
96 | An example of how different tags are rendered differently on browsers, is the header tags, they change the font size and range from `
` being the largest to `
` being the smallest.
97 |
98 | You can find a list of all supported tags [from Mozilla](https://developer.mozilla.org/en-US/docs/Web/HTML/Element)
99 |
100 |
101 | Another thing to note is that tags also can be nested
102 | ```html
103 |
104 |
Larger Header Font Size!
105 |
Paragraph text
106 |
107 | ```
108 |
109 | ### Attributes
110 |
111 | The default behavior of tags can be modified by adding attributes to opening tags.
112 |
113 | For example the `` tag is used to show an image and has multiple attributes that can be modified. That change how the image is displayed.
114 | ```html
115 |
116 | ```
117 |
118 | ### Classes
119 |
120 | Classes apply user defined style sheets known as CSS that change how an element looks. In well designed websites, this is one of the best ways to select what parts you want to extract data from.
121 |
122 | You don't need to know CSS for this tutorial, but here's a basic example.
123 | ```html
124 |
This text is green
125 |
This background is blue
126 |
This text is green and background blue
127 |
128 |
129 |
137 | ```
138 |
139 | ## Extracting Data Using BeautifulSoup
140 |
141 | Enough HTML! Let's start extracting data from a website using BeautifulSoup.
142 |
143 | ### Running The Website
144 |
145 | Visit [Running The Websites](../README.md#how-to-start-the-mock-websites)
146 |
147 | ### Installing BeautifulSoup
148 |
149 | If it's not already installed, run `pip install -r requirements.txt` in this directory. Or do `pip install beautifulsoup4`
150 |
151 | ### Getting HTTP From A Website
152 |
153 | We'll be using the `requests` package to get HTML from the website, you can install it with `pip install requests`
154 |
155 | ```python
156 | import requests
157 |
158 | WEBSITE_URL = "http://localhost:3000"
159 |
160 | r = requests.get(WEBSITE_URL)
161 | print(r.text) # print out the HTML
162 | ```
163 |
164 | ### Parsing HTML With BeautifulSoup
165 |
166 | Now that we have the HTML in python, we can use BeautifulSoup to parse it.
167 |
168 | ```python
169 | from bs4 import BeautifulSoup
170 | import requests
171 |
172 | WEBSITE_URL = "http://localhost:3000"
173 |
174 | r = requests.get(WEBSITE_URL)
175 | soup = BeautifulSoup(r.text, "html.parser") # parse the HTML
176 | print(soup)
177 | ```
178 |
179 | ### Extracting Prices From Homepage
180 |
181 | To extract the prices from the homepage, we first have to look at the HTML and figure out how to select the right elements.
182 |
183 | I personally prefer to use developer tools, but you can also view the page source directly in the browser, after right clicking the page.
184 |
185 | #### Using Developer Tools
186 |
187 | * Find the element you want to extract data from
188 | * Right click on the element and click `Inspect`
189 | * Then you can see the price $3.13 is in a `
` tag in the browser
190 |
191 | *(Step by step in video)*
192 |
193 | #### Extracting The First Price
194 |
195 | However you choose to view the HTML, you should see something like this
196 | ```html
197 |
` tag since that contains the price of the item, it's a child of `product_detail` so we can select it like so.
231 | ```python
232 | print(product_detail.p)
233 | ```
234 |
235 | Which returns
236 | ```html
237 |
` tags that are children of `product_detail`. We need to select the right one. We can select all children of an element with `find_all()` or `findChildren()`
241 | ```python
242 | # We can make another find_all attribute to find the children elements of a given object
243 | for child in product_detail.find_all('p'):
244 | print(child)
245 |
246 | # Or we can use findChildren()
247 | # If we want to see it in an array format
248 | children = product_detail.findChildren('p')
249 | print(children)
250 | ```
251 |
252 | ```html
253 |
Sacha’s elegant antlers have never been se…
254 |
$3.13
255 | [
Sacha’s elegant antlers have never been se…
,
$3.13
]
256 | ```
257 |
258 | Now we can select the price by selecting the second `
267 | ```
268 |
269 | Last step is stripping the HTML tags from the price with `get_text()`
270 | ```python
271 | price = children[1].get_text()
272 | print(price)
273 | ```
274 |
275 | ```
276 | $3.13
277 | ```
278 |
279 | We got the first price! 🤠
280 |
281 | #### Extracting All Prices
282 |
283 | In our previous code we were only getting the first price, but we want to get all the prices. To do this we can use `find_all()` to get all the `
` tags with `class="product-details"`.
284 |
285 | Then we can use the same code as before to get the price for each product
286 | ```python
287 | for product in soup.find_all('div', attrs={'class': 'product-details'}):
288 | # We know it's the second child we want
289 | children = product.findChildren('p')
290 |
291 | # Get rid of the HTML tags
292 | price = children[1].get_text()
293 | print(price)
294 | ```
295 |
296 | #### Full Code
297 |
298 | ```python
299 | from bs4 import BeautifulSoup
300 | import requests
301 |
302 | WEBSITE_URL = "http://localhost:3000"
303 |
304 | r = requests.get(WEBSITE_URL)
305 | soup = BeautifulSoup(r.text, "html.parser") # parse the HTML
306 |
307 | for product in soup.find_all('div', attrs={'class': 'product-details'}):
308 | # We know it's the second child we want
309 | children = product.findChildren('p')
310 |
311 | # Get rid of the HTML tags
312 | price = children[1].get_text()
313 | print(price)
314 | ```
315 |
316 | ## Activities
317 |
318 | Here's some additional activities to help you practice web scraping. After trying for a bit, use the hints to help you out. If you're still stuck, the solutions are in the video!
319 |
320 | Modify `activities.py` and complete the functions.
321 |
322 | **Do not** change the method names, however feel free to call those methods if you want to test them out in the `if __name__ == "__main__"` section.
323 |
324 | ### Testing
325 |
326 | To check if your implementations are correct run `python test.py` this will import the functions you made. It will tell you what tests failed if any, and will show a success message if all tests passed.
327 |
328 | > Note: Bonuses are not tested, so you'll have to check those yourself.
329 |
330 | ### 1: Product Title & Prices
331 | * Return a list of the product titles and prices
332 | * Ex: `["Sacha the Deer ($3.13)", ...]`
333 | *
334 | Hint
335 | You'll need to select the parent of all product features which is all of the <li> elements under the <ul class="product-list"> element
336 |
337 |
338 | ### 2: Get All Colors Available For Each Product
339 | * Return each product's title and color options as a list of strings
340 | * Ex: `["Sacha the Deer (#000000, #39589e, #9c5145, #dfd3c2)", ...]`
341 | *
342 | Hint: Extracting Attributes
343 | You can access attributes of an element with get('attribute_name')
344 |
345 | Ex: `product.find('a').get('href')`
346 |
347 |
348 | ### 3: Get Every Product's Material
349 | * This is visible when you click into a product's page
350 | * Return each product's title and material as a list of strings
351 | * Ex: `["Bumble the Elephant made of 70% Cotton, 30% Nylon", ...]`
352 | *
353 | Hint
354 | You'll need to make an additional HTML request for each product, and a new BeautifulSoup object for each product page.
355 |
356 |
357 | ### 4: Filter all the products from highest reviewed to lowest reviewed
358 | * Return a list of the products sorted by the star count
359 | ```
360 | [('Scar the Lion', 5),
361 | ('Gerald the Giraffe', 4),
362 | ('Gavin the Tiger', 4),
363 | ('Sacha the Deer', 3),
364 | ('Bumble the Elephant', 3),
365 | ('Todd the Hedgehog', 2)]
366 | ```
367 |
368 | ### 5: Product Availability
369 | * Not all products are available, look at `Gerald the Giraffe`
370 | * Return a list of strings of all products and their availability
371 | * Ex: `["Sacha the Deer is available: True", ...]`
372 | *
373 | Bonus
374 | Add some logic to check products every X minutes, so you can be notified when products come back in stock.
375 |
376 | Note: This website won't have items come back into stock, but if it was a real website you could have a Discord bot or something notify you.
377 | - Example: Old commisioned project I made [here](https://github.com/davidteather/Hotukdeals-Discord-Notifier)
378 |
379 |
380 | ### 6: Scrape Reviews For Each Product
381 | * Return a dictionary with structure `{"product_title": [{"rating": "5", "review_title": "Great!", "review_full": "I love it"}, ...], ...}`
382 | * Ex: `{"Sacha the Deer": [{'rating': '5', 'review_title': 'V neck', 'review_full': 'Great shirt. love the detail in back. feminine and different than the average t'}]}`
383 | *
384 | Bonus
385 | Try and do sentiment analysis on product reviews and sort by ones with the best average sentiment.
386 |
387 | You might find [this article](https://realpython.com/python-nltk-sentiment-analysis/#using-nltks-pre-trained-sentiment-analyzer) helpful
388 |
389 |
390 | ### Solutions
391 |
392 | * [1: Product Title & Prices](https://youtu.be/_Ptvvjm15EA?list=PLmRtxHvzkEE8Ofiy4hnnXSoxw7gs4HOHt&t=506)
393 | * [2: Get All Colors Available For Each Product](https://youtu.be/_Ptvvjm15EA?list=PLmRtxHvzkEE8Ofiy4hnnXSoxw7gs4HOHt&t=547)
394 | * [3: Get Every Product's Material](https://youtu.be/_Ptvvjm15EA?list=PLmRtxHvzkEE8Ofiy4hnnXSoxw7gs4HOHt&t=620)
395 | * [4: Filter all the products from highest reviewed to lowest reviewed](https://youtu.be/_Ptvvjm15EA?list=PLmRtxHvzkEE8Ofiy4hnnXSoxw7gs4HOHt&t=679)
396 | * [5: Product Availability](https://youtu.be/_Ptvvjm15EA?list=PLmRtxHvzkEE8Ofiy4hnnXSoxw7gs4HOHt&t=710)
397 | * [6: Scrape Reviews For Each Product](https://youtu.be/_Ptvvjm15EA?list=PLmRtxHvzkEE8Ofiy4hnnXSoxw7gs4HOHt&t=744)
398 |
399 | ## Conclusion
400 |
401 | Congrats you've finished another lesson :)
402 |
403 | If you liked this lesson please consider giving the repository a star and if you have any suggestions I'd love to hear them on [YouTube](https://youtube.com/davidteathercodes), [Twitter](https://twitter.com/david_teather), or file an issue with the label suggestion!
--------------------------------------------------------------------------------
/003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/activities.py:
--------------------------------------------------------------------------------
1 | import requests
2 |
3 | # Note: the tester relies on this variable, update it if you are running the server on a different port
4 | WEBSITE_BASE_URL = "http://localhost:3000"
5 |
6 | # Activity 1: Product Titles & Prices
7 | # Return a list of the product titles and prices
8 | # Ex: ["Sacha the Deer ($3.13)", ...]
9 | def title_and_prices():
10 | return ["Sacha the Deer ($3.13)"]
11 |
12 | # Activity 2: Get All Colors Available For Each Product
13 | # Return each product's title and color options as a list of strings
14 | # Ex: ["Sacha the Deer (#000000, #39589e, #9c5145, #dfd3c2)", ...]
15 | def product_colors():
16 | return ["Sacha the Deer (#000000, #39589e, #9c5145, #dfd3c2)"]
17 |
18 | # Activity 3: Get All Product's Material
19 | # Return each product's title and material as a list of strings
20 | # Ex: ["Bumble the Elephant made of 70% Cotton, 30% Nylon", ...]
21 | def product_materials():
22 | return ["Bumble the Elephant made of 70% Cotton, 30% Nylon"]
23 |
24 | # Activity 4: Filter all the products from highest reviewed to lowest reviewed
25 | # Return a list of the product titles and average rating as a touple
26 | # Ex: [('Scar the Lion', 5), ...]
27 | def highest_reviewed():
28 | return [("Scar the Lion", 5), ("Sacha the Deer", 5)]
29 |
30 | # Activity 5: Product Availability
31 | # Not all products are available, look at `Gerald the Giraffe`
32 | # Return a list of strings of all products and their availability
33 | # Ex: ["Sacha the Deer is available: True", ...]
34 | def product_availability():
35 | return ["Sacha the Deer is available: True"]
36 |
37 | # Activity 6: Scrape Reviews For Each Product
38 | # Return a dictionary with structure {"product_title": [{"rating": "5", "review_title": "Great!", "review_full": "I love it"}, ...], ...}
39 | # Ex: {"Sacha the Deer": [{'rating': '5', 'review_title': 'V neck', 'review_full': 'Great shirt. love the detail in back. feminine and different than the average t'}, ...]}
40 | def product_reviews():
41 | return {"Sacha the Deer": [{'rating': '5', 'review_title': 'V neck', 'review_full': 'Great shirt. love the detail in back. feminine and different than the average t'}]}
42 |
43 | if __name__ == "__main__":
44 | # Optional: You can call your methods here if you want to test them without running the tester
45 | # print(title_and_prices())
46 | pass
--------------------------------------------------------------------------------
/003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/assets/ssr-sites.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/davidteather/everything-web-scraping/6afdda03e1b84d64a8e460f6e7265a60a87bdbdc/003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/assets/ssr-sites.png
--------------------------------------------------------------------------------
/003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/assets/static-sites.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/davidteather/everything-web-scraping/6afdda03e1b84d64a8e460f6e7265a60a87bdbdc/003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/assets/static-sites.png
--------------------------------------------------------------------------------
/003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/assets/thumbnail.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/davidteather/everything-web-scraping/6afdda03e1b84d64a8e460f6e7265a60a87bdbdc/003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/assets/thumbnail.png
--------------------------------------------------------------------------------
/003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/docker-compose.yml:
--------------------------------------------------------------------------------
1 | version: '3.9'
2 |
3 | services:
4 | website:
5 | build:
6 | context: ./website
7 | ports:
8 | - 3000:3000
--------------------------------------------------------------------------------
/003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/requirements.txt:
--------------------------------------------------------------------------------
1 | beautifulsoup4==4.12.2
2 | requests==2.31.0
3 | pytest==7.4.0
--------------------------------------------------------------------------------
/003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/test.py:
--------------------------------------------------------------------------------
1 | # Grades submission.py on the test cases
2 | # Don't look inside this if you haven't passed the tests yet
3 |
4 | from activities import WEBSITE_BASE_URL, title_and_prices, product_colors, product_materials, highest_reviewed, product_availability, product_reviews
5 | import requests
6 | from bs4 import BeautifulSoup
7 | import pytest
8 |
9 | def test_title_and_prices():
10 | r = requests.get(WEBSITE_BASE_URL)
11 | soup = BeautifulSoup(r.text, 'html.parser')
12 |
13 | prices = []
14 | for product in soup.find_all('div', attrs={'class': 'product-details'}):
15 | children = product.findChildren('p')
16 | price = children[1].get_text()
17 | title = product.h4.get_text()
18 | prices.append(f"{title} ({price})")
19 |
20 | actual = title_and_prices()
21 | assert prices == actual, f'Got {actual} but expected {prices}'
22 |
23 | def test_product_colors():
24 | r = requests.get(WEBSITE_BASE_URL)
25 | soup = BeautifulSoup(r.text, 'html.parser')
26 |
27 | product_parent = soup.find('ul', attrs={'class': 'product-list'})
28 | result = []
29 | for product in product_parent.find_all('li'):
30 | style_picker = product.find('div', attrs={'class': 'style-picker'})
31 |
32 | # To get the hex codes we want to look at the style attribute
33 | # We can use the get method to get the style attribute
34 | hex_codes = []
35 |
36 | if style_picker is None:
37 | # The site has some extra elements for spacing that aren't products
38 | continue
39 |
40 | for color in style_picker.find_all('div'):
41 | style = color.get('style')
42 | hex_code = style.split(": ")[1]
43 | hex_codes.append(hex_code)
44 |
45 | product_title = product.h4.get_text()
46 | result.append(f"{product_title} ({', '.join(hex_codes)})")
47 |
48 | actual = product_colors()
49 | assert result == actual, f"Got {actual} but expected {result}"
50 |
51 | def test_product_materials():
52 | r = requests.get(WEBSITE_BASE_URL)
53 | soup = BeautifulSoup(r.text, 'html.parser')
54 |
55 | result = []
56 | product_parent = soup.find('ul', attrs={'class': 'product-list'})
57 | for product in product_parent.find_all('li'):
58 | a_tag = product.a
59 |
60 | if a_tag is None:
61 | # Again, has extra for spacing that aren't products
62 | continue
63 |
64 | product_link = product.a.get('href')
65 | product_link = WEBSITE_BASE_URL + product_link
66 |
67 | # We can now use the product_link to get the specific product's page which contains information on
68 | # the product's material
69 | product_page = requests.get(product_link)
70 | product_soup = BeautifulSoup(product_page.text, 'html.parser')
71 |
72 | # The material is in
tag with id of "material"
73 | material = product_soup.find('p', attrs={'id': 'material'}).get_text()
74 |
75 | # Get the product title for printing
76 | product_title = product.h4.get_text()
77 | result.append(f"{product_title} made of {material}")
78 |
79 | actual = product_materials()
80 | assert result == actual, f"Got {actual} but expected {result}"
81 |
82 | def test_highest_reviewed():
83 | r = requests.get(WEBSITE_BASE_URL)
84 | soup = BeautifulSoup(r.text, 'html.parser')
85 |
86 | product_ratings_list = []
87 | product_parent = soup.find('ul', attrs={'class': 'product-list'})
88 | for product in product_parent.find_all('li'):
89 | a_tag = product.a
90 |
91 | if a_tag is None:
92 | # Again, has extra
for spacing that aren't products
93 | continue
94 |
95 | product_link = product.a.get('href')
96 | product_link = WEBSITE_BASE_URL + product_link
97 |
98 | product_page = requests.get(product_link)
99 | product_soup = BeautifulSoup(product_page.text, 'html.parser')
100 |
101 | # Get the star container
102 | star_container = product_soup.find('div', attrs={'class': 'star-rating'})
103 |
104 | # A star is marked as a full star if it has the class "checked"
105 | # We can use this to count the number of full stars
106 | full_stars_list = star_container.find_all('span', attrs={'class': 'checked'})
107 | full_stars = len(full_stars_list)
108 |
109 | product_title = product.h4.get_text()
110 |
111 | product_ratings_list.append((product_title, full_stars))
112 |
113 | # Sort product_ratings_list by the number of stars
114 | product_ratings_list.sort(key=lambda x: x[1], reverse=True)
115 | actual = highest_reviewed()
116 | assert product_ratings_list == actual, f"Got {actual} but expected {product_ratings_list}"
117 |
118 | def test_product_availability():
119 | r = requests.get(WEBSITE_BASE_URL)
120 | soup = BeautifulSoup(r.text, 'html.parser')
121 |
122 | result = []
123 | product_parent = soup.find('ul', attrs={'class': 'product-list'})
124 | for product in product_parent.find_all('li'):
125 | a_tag = product.a
126 |
127 | if a_tag is None:
128 | # Again, has extra for spacing that aren't products
129 | continue
130 |
131 | product_link = product.a.get('href')
132 | product_link = WEBSITE_BASE_URL + product_link
133 |
134 | product_page = requests.get(product_link)
135 | product_soup = BeautifulSoup(product_page.text, 'html.parser')
136 |
137 | # Button has
51 | {{ review.rating }}
52 | {% for i in (1..5) %}
53 | {% if i <= review.rating %}
54 |
55 | {% else %}
56 |
57 | {% endif %}
58 | {% endfor %}
59 |
60 |
{{ review.title }}
61 |
{{ review.review }}
62 |
63 | {% endfor %}
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
--------------------------------------------------------------------------------
/003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/website/_products/elephant.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Bumble the Elephant
3 | description_markdown: >-
4 | Bumble the humble elephant is your shining star. He will always remember who
5 | you are and why you are here.
6 | material: 70% Cotton, 30% Nylon
7 | garment_type:
8 | price: '6.08'
9 | sku: elephant
10 | stock: 10
11 | sizes:
12 | - Small
13 | - Medium
14 | - Large
15 | - XL
16 | styles:
17 | - name: Cream
18 | color: '#dfd3c2'
19 | image: /images/products/elephant/cream.jpg
20 | - name: Green
21 | color: '#67aa79'
22 | image: /images/products/elephant/green.jpg
23 | in_stock: true
24 | avg_rating: 3
25 | reviews:
26 | - rating: 5
27 | title: Adorable and flattering
28 | review: This top is very cute. got it in the lighter color. the fit is great and it will go with many things. if size medium were not out of stock in the blue color i would have purchased that one also.
29 | - rating: 2
30 | title: Way too large everywhere
31 | review: I ordered a medium in this and it definitely fits more like a large. i liked the color & the sleeves, but tying the tie at the neck looks silly on me and leaving it open shows too much cleavage. overall, it is just shapeless and too much fabric on me. i'm sending this one back.
32 | - rating: 5
33 | title: I love this!
34 | review: I was pleasantly surprised with this cardigan! i get so many compliments- i can easily see me carrying this into my fall wardrobe!
35 | - rating: 5
36 | title: Great summer top
37 | review: I tried on this top in store & was a bit wishy washy on it at first. however i am so glad i bought it and have worn it 3x in a week (shh!). it is incredibly lightweight & breathable which is great for hot texas summers. i also love that it has interesting back detail. the other reviewers mentioned it was see through but i don't agree - i wear a nude bra with it and have no issues. it can be worn casually with shorts or dressed up with white denim & fancier accessories. either way it looks best w
38 | - rating: 5
39 | title: Lovely top
40 | review: This top is lovely and flowy! it fits true to size, and the lace detail at the neck is perfect to make this top stand out. i felt like it was a little sheer, but it wasn't problematic enough for me to take away a star -)
41 | - rating: 5
42 | title: Sold on reviews
43 | review: After reading many positive reviews i ordered. sure glad i did. i love the light, airy fabric. it hangs nicely. i am mostly a 12, sometimes a 14. i ordered both sizes and the 14 was a perfect fit. i am very pleased with this purchase. thought it might be too girly and/or young looking on me, but not so at all. i got the white and i certainly like the red detail at the button holes too.
44 | - rating: 5
45 | title: Simply love
46 | review: This sweater is substantial, well-made, fabric is thick and comfortable. the length is on the shorter side, not cropped but shorter, perhaps if they had petite, ti would be cropped. the sleeves are perfect length too on me, it looks shorter on the model, they hit at mys wrists (i do usually need petite length). it seems a simple top, but it is quite beautiful in person. i strongly recommend it, even for the price...xs and i am 115 lbs, 30dd chest, 26.5 in waist
47 | - rating: 3
48 | title: Beauty and the beast
49 | review: I love the designs of tiny blouses. but i wish they could make them more durable. i have owned several blouses in the past where i have to use a little pin to secure the button line from pulling at the breast area, or that after a few delicate washings have come apart at a seam. and ironing or steaming them is a must. this blouse was no exception. it went back because of pulling at the bust. i'm willing to take some extra care of my clothing out of my busy schedule, but i shouldn't have to! 5'7"
50 | - rating: 5
51 | title: Super cute shirt
52 | review: I just bought this shirt on sale. i'd been eyeing it for a bit and when it went on sale i had to get it. its really cute and comfortable and fits really nicely. it's not too short or boxy and the detail on the sleeves is great.
53 | - rating: 4
54 | title: Give it a try
55 | review: I'm between a s & m. on most swing tops usually s which i thought i'd be in this one but not the case. a m fit but i thought i could probably even wear a l which i never wear. but it is gorgeous. this is one of those shirts that keep me coming back to retailer. this is a perfect fall top! great tan color the beading is so so pretty and will really look great under sweaters. this top looked so great with my louis vuitton bag it was just too good to pass up so go to your local store, find th
56 | - rating: 5
57 | title: Gorgeous with a great fit
58 | review: I love this flattering and comfortable jacket- it has a nice stretch and the color is out of this world. i think it will pack a big punch this fall.
59 | - rating: 1
60 | title: Shrinks
61 | review: I loved this blouse when i got it and wore it before washing. it fit really well and was flattering. the only time i laundered it i hand- washed it in cold water and hung it to dry. the blouse shrunk at least a full size and in awkward places. the sleeves are now tight and way too short. the overall length shortened by at least 2 inches. the top is so tight in the bust now that the buttons popped open. i'm very disappointed and surprised.i will be returning it as it is unwearable as is.
62 | - rating: 5
63 | title: I love this!
64 | review: I actually like my shirt more than the other reviewers it seems. i am little (5'1" and 103) and i don't think it's too boxy at all. i like the way it hangs. and i'll say i like it much more in person than on the model. it's somehow cozier than it looks in the picture. it's a real boosh (that mean's comfy.) i wouldn't have picked it from the online pic, but in person, it really caught my eye.
65 | - rating: 5
66 | title: Flexible sweatering
67 | review: Love this sweater! it is easy to wear and goes well with multiple outfits. it is a great, flexible add to my wardrobe. it fits well, even in the sleeves!
68 | - rating: 5
69 | title: Light and lovely.
70 | review: I recently purchased this tunic on sale. the cut of this top is flattering on my curvy figure as it has a cinched in waist. the embroidery is lovely and very bright with several shades of colors. i wore this with black leggings,boots and a green cardigan and received many compliments.
71 | - rating: 4
72 | title: Very cute, runs large
73 | review: I like the casual, yet detailed look of this piece. it looks great with crops. the casual tie string makes the piece feel very laid back, but the details are so pretty. i have it in lavender. it looks way better in person. saw it at the store and grabbed it immediately.
74 | - rating: 4
75 | title: Fun arm detail
76 | review: Really enjoy this pullover's arm detail. i got the "purple" which was somewhere between aubergine and garnet. i think this will easily go with black pants to wear to work or a fun skirt/jeans.
77 | - rating: 4
78 | title: Cute!
79 | review: This top is cute! it adds a pop of color to my day and seems to be of high quality. the colors seem more vibrant in person and the material is quite soft and stretchy. as another reviewer pointed out, the peplum is only in the front! it sounds really weird but i don't really mind the look because i'll be wearing a jacket over it. i'm usually a size 12/14 with a 36dd bust and the large fits beautifully.
80 | - rating: 3
81 | title: Nice try...
82 | review: This top was so pretty online! unfortunately it did not work for me- the front was a bit more shapeless in person, though the back was gorgeous, with a button gather at center. the color appeared yellow online, but was a bit more green in person. also, the armholes were very low, so it would need a camisole or no bra.
83 | - rating: 5
84 | title: Details and versatility
85 | review: This top is easy to dress up with a blazer or wear casually with jeans and trainers. so many nice details and a great fit!
86 | - rating: 2
87 | title: Sandstripe vest
88 | review: As a beach cover-up this would be great, but i'm surprised retailer calls this a dress and doesn't show on the model how thin it is. it's pretty sheer and i couldn't get away without layering it.
89 | - rating: 4
90 | title: Pretty details
91 | review: I have this shirt in the gray and in the orange and i just love the details on front.
92 | - rating: 5
93 | title: Watercolor look
94 | review: Tshirt is beautiful. looks like watercolor. the fabric is a thin soft cotton that is really nice. i am 5'6" and very thin. xs fit well and was not too short. s would have been fine too. i would choose a size by your shoulder width. i bought my mother the medium.
95 | - rating: 5
96 | title: Perfect summer basic
97 | review: I tried this on in the store on a whim. it is so flattering, soft, and pretty. i generally wear a size 12 and bought a l although the m might have fit, also it's got plenty of room. the chiffon yoke is not particularly sheer and a nude bra strap is basically invisible, the jersey is adequately opaque so no camisole needed. my dimensions are about 40" bust x 33" waist.
98 | - rating: 4
99 | title: Beautiful print, but runs very large
100 | review: This print is beautiful and the design is unique and great for work. my only complaint is that it runs huge and the torso fit is extremely boxy. i'm normally in between a size xs and s at retailer, bought the xs in this shirt and it still almost too big.
101 | - rating: 4
102 | title: Love the look for the holidays
103 | review: This velvet t-shirt is perfect for the holidays! i usually wear a size 12 top, or a l, but i ordered up and got a size xl. it is very tight around my chest (36c) and armpits, and the v-neck sometimes spreads apart a lot if i move my arms around a lot. but other than that, it looks great, just runs small. if you are a size 14 or larger, i don't think that this will fit.
104 | - rating: 4
105 | title: Pretty cropped cardigan.
106 | review: This cardigan is nice and the lace at the back is pretty. just be aware that it's definitely a cropped length, which is good for wearing over a dress that you don't want to cover up too much. size up if you want room to layer. the one i tried on at the store had the top button almost falling off, so i didn't buy it.
107 | - rating: 1
108 | title: Enormous
109 | review: I was really excited about this sweater, but the fit was terrible. the sleeves were the right size, but the body of the sweater was a tent - absolutely no shape. they must have clipped the sweater on the model. for reference, i'm 5'4 and 120 pounds and ordered a size small.
110 | - rating: 5
111 | title: Great casual top
112 | review: Love this top for casual days. fits perfectly, fabric feels quality and color is bright but not too bright. good length, just touches top of my thighs.
113 | - rating: 5
114 | title: Perfect shirt
115 | review: This shirt is so cute alone with jeans or dressed up with nice jewelry, a scarf or cardi. its just the right weight, true to size, drapes nicely and its very flattering. i"m sorry i didn't order more when i had the chance. its already sold out in the colors and sizes i wanted. excellent quality as usual -- thanks again retailer!
116 | - rating: 2
117 | title: Cute but won't last
118 | review: Wore it a couple of times, it is cool and comfortable. washed it and the raw edges are starting to fray. i might get one or two more wears out of it. hand washed and hung dry.
119 | - rating: 5
120 | title: Great find on sale!
121 | review: I can't believe this top hasn't sold out. it is a great casual top that will go well with anything. i agree with previous reviewer that talked about sizing up to cover straps. i did size up to a large but it didn't get huge around the midsection. i got the red and it will go great with pilcro khakis or denim.
122 | - rating: 4
123 | title: Lovely fabric, not boxy
124 | review: I've bought similar blouses from retailer before that i didn't love as much as this. it's not boxy or wide on me. it's a great length and i find the 3/4 bell sleeves to be elegant. i bought the purple one on sale and went back for the red/cream. my only complaint would be that the stitching could be of better quality. the fabric is very nice -- love the print.
125 | - rating: 4
126 | title: Perfect...but
127 | review: It's an adorable take on the "casual" sweatshirt. i love the layers, but i wish they were just a little different. they hang a little strange. perhaps if the bottom one was a smidge longer? i'm not sure. or if the sweatshirt layer was a little longer and the blouse bit was therefore longer. it all hangs just a little...off? but, it's not so off that it looks bad. i'm petite, so it's cute on, but, i think if you were tall, it might hang a little weird. a warning though, i ordered a small, and the
128 | - rating: 5
129 | title: So lovely i bought two?????
130 | review: This top fits tts as shown on model. loose, flowy but the cut/details/flounce still give some nice shape. i usually take a s or m at retailer depending on the style/make. went with the small in this. i am 5'5",narrow shoulders, ruler shape 34dd. no gaping in the front. fits gracefully flows with details that are classic and add the wow factor to this social or work appropriate top. so complimentary, it would flatter many body types. the neckline is juuuuussst right, i fasten the top button to make
131 | - rating: 4
132 | title: Cute but short
133 | review: Tiny collar and short bodice length. lovely fabric, soft and silky.
134 | - rating: 4
135 | title: Easy top
136 | review: I like this top. another reviewer mentioned that it was a great top for a busty girl...i agree. i'm a 32g and could have sized down to a small, but i kept the medium because i wanted the extra length to have the option of wearing it as a dress. the small may have caused slight button pulling, but since the fabric has stretch and the top has a generous cut, it wouldn?t look insane. my only issue is the fabric pills. i wore the top once, and the fabric near the armpit pilled a lot, especially for
137 | - rating: 5
138 | title: Cozy!
139 | review: I am 5'8" and about 145 lbs - curvy through hips, small up top and i got a small and it fit great. i got it in the wine and it is a pretty color for fall into winter. the fabric is super soft and cozy!
140 | - rating: 5
141 | title: Conversationalist button-down with cats
142 | review: I love this shirt, very comfortable and has a nice drape. i originally bought the size i normally wear but found it to be a little tight across my chest, so i returned for the next size up.very well made and not sheer so did not have to wear a tank underneath.
143 | - rating: 5
144 | title: Summer sweater
145 | review: This is a thick material, but it is also airy. i got the white and don't have to worry about it being see through. it is a little boxy, but the hem hits a good length and doesn't bother me. really great for work in the summer and going between really hot and really cold temperatures. a good basic piece that can be dressed up or down that is definitely worth getting.
146 | - rating: 4
147 | title: Fall dots
148 | review: Great transition into fall for layering. easy breezy dress up or down-is thin so it does need a cami
149 | - rating: 4
150 | title: Pretty but boxy
151 | review: This looked more fitted on the model. the xs was still boxy on me, but i loved the eyelet hem so much i kept it!
152 | - rating: 4
153 | title: Pretty but...
154 | review: I really had high hopes for this sweater however, unfortunately it is not as nice as i expected it to be. i own a few angel of the north sweaters and i really don't feel like this particular sweater meets their usual standard. first of all the yarn feels very cheap and is not very soft. secondly, the actual knit also appears to be very cheap. i ordered the white sweater in my usual size medium and while it fits, i really can't see how they were able to get the neck of the sweater to go off the m
155 | - rating: 5
156 | title: Cardigan love
157 | review: This cardigan runs slightly large as it is boxy, which i love. the color is exactly as pictured and it is soooo soft! can't wait to wear it with a t-shirt and jeans!
158 | - rating: 5
159 | title: I love it!!
160 | review: I knew i would keep it as soon as i put it on -- comfortable, beautiful fabric, cute gold buttons! as well, due to the cut, it feels roomy, but has a slender fit around the arms. the "denim" color has beautiful detail, but i don't think i would like the maroon solid. i wish there was another nonsolid color -- i would buy another one!
161 | - rating: 2
162 | title: Short but wide
163 | review: This is a crop top but super boxy and wide. i expected it to be more fitted in the waist. since it sits out away a from the body a lot, it was not flattering. i also felt the stitching to be a bit matronly for me. too bad as i am having such a hard time finding tops and i'm willing to pay full price for something that works.
164 | - rating: 4
165 | title: Old retailer
166 | review: Ok, so yes, it is true that the top part where the sleeves are can be snug a little, especially if you work out, but overall i found the top to run true to my regular size (i only tried on the 0, didn't try bigger). the length was great for me being short, i didn't think i need the petite (i usually do for pants and skirts/dresses, tops on and off). the look of the shirt is also classic, flowy and it looked great with e half tuck. the embroidery is also nice though one part of it seemed loose. a
167 | - rating: 5
168 | title: Love the style
169 | review: I was very happy to receive this as a birthday gift. i usually wear an xs or s in clothes i buy from retailer. i was given this in a s, but it wasn't even close to meeting in the front, so i returned it for a m. it doesn't button anyway, but i didn't want it to look like it wouldn't meet at all. i tried on a size s sweater in this brand while i was in the store, and it fit perfectly, so this one is just running small. overall, i love the lace detail on the front and the softness of the sweater.
170 | - rating: 5
171 | title: Perfect and at the right price!
172 | review: In my opinion, the quality and uniqueness of this piece far definitely warrants a full price purchase. adorable cut and style, comfortable and cute. it's the perfect flexible top to wear to work with a skirt or more casually with your favorite pair of jeans. love this!
173 | - rating: 5
174 | title: Cozy casual - perfect for fall
175 | review: The color is perfect for fall and into winter. only the inside collar on the photo shows the subtle plaid lining. the lining was an unexpected bonus, and adds even more dimension to the shirt if the sleeves are turned up.the fit is true to size and the 'stressed' velvet fabric is current for this season.i'm going to enjoy wearing this shirt.
176 | - rating: 5
177 | title: Beautiful blouse- does run large
178 | review: Saw this in the store yesterday and had my daughter try it on. it was really cute on her and she loved it. in fairness to the other reviewers she tried on a size 0 which was perfect for her petite frame and i can see how this blouse would run really big in the larger sizes. but the fabric and detail is so pretty and it was on sale.
179 | - rating: 5
180 | title: Comfy top
181 | review: I bought the black and love it. i bought both the medium and large but am going to keep the medium. it was plenty roomy. i'm 5'2" and about fluctuate between 125-130 lbs. i usually wear a medium or large in retailer-wear but i like things roomy. have to hide a little extra these days!
182 |
183 | ---
--------------------------------------------------------------------------------
/003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/website/_sass/cloudcannon.scss:
--------------------------------------------------------------------------------
1 | .editor-link {
2 | display: none !important;
3 | margin-top: 0;
4 |
5 | .btn {
6 | border: 0;
7 | border-radius: 2px;
8 | width: 100%;
9 | max-width: 500px;
10 | box-sizing: border-box;
11 | font-size: 2rem;
12 | text-decoration: none;
13 | padding: 10px 15px;
14 | margin: 0;
15 | font-size: 18px;
16 | }
17 |
18 | nav &, .btn {
19 | cursor: pointer;
20 | background-color: #f7e064;
21 | color: #333;
22 | box-shadow: 1px 1px 5px 0 rgba(0, 0, 0, 0.2);
23 | border-radius: 4px;
24 | line-height: 1;
25 |
26 | &:hover {
27 | background-color: #f4d525;
28 | color: #333;
29 | }
30 | }
31 | }
32 |
33 | .cms-editor-active .editor-link {
34 | display: block !important;
35 | }
36 |
37 | .cms-editor-active nav .editor-link {
38 | display: inline !important;
39 | }
40 |
--------------------------------------------------------------------------------
/003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/website/_sass/contact.scss:
--------------------------------------------------------------------------------
1 | .map {
2 | height: 499px;
3 | border: 1px solid #DDDDDD;
4 | border-radius: 3px;
5 | }
6 |
7 | .contact-box {
8 | @extend %flexbox;
9 | @include flex-flow(wrap);
10 | max-width: 1000px;
11 | margin: 0 auto 20px auto;
12 | padding: 0 0 50px 0;
13 |
14 | form {
15 | width: 100%
16 | }
17 |
18 | h3 {
19 | margin: 0;
20 | font-size: 1.8rem;
21 | }
22 |
23 | .contact-form, .contact-details {
24 | padding: 20px;
25 | width: 100%;
26 | box-sizing: border-box;
27 | @media #{$desktop} {
28 | -webkit-flex: 1;
29 | flex: 1;
30 | order: 2;
31 | }
32 | }
33 |
34 | .contact-details {
35 | font-size: .8em;
36 | @media #{$desktop} {
37 | order: 1;
38 | }
39 |
40 | a svg {
41 | position: relative;
42 | top: 5px;
43 | }
44 |
45 | a {
46 | white-space: nowrap;
47 | overflow: hidden;
48 | text-overflow: ellipsis;
49 | display: block;
50 | }
51 | }
52 | }
53 |
--------------------------------------------------------------------------------
/003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/website/_sass/elements.scss:
--------------------------------------------------------------------------------
1 | html, body {
2 | margin: 0;
3 | padding: 0;
4 | }
5 |
6 | body {
7 | font-family: "San Francisco", "Helvetica Neue", "Helvetica", "Arial";
8 | -webkit-font-smoothing: antialiased;
9 | }
10 |
11 | a {
12 | color: $brand-color;
13 | text-decoration: none;
14 | }
15 |
16 | a:hover {
17 | text-decoration: underline;
18 | }
19 |
20 | img {
21 | max-width: 100%;
22 | }
23 |
24 | h1 strong, h2 strong {
25 | font-weight: 700;
26 | }
27 |
28 | h1 {
29 | font-weight: 300;
30 | font-size: 2.3em;
31 | margin: 0;
32 | }
33 |
34 | h2 {
35 | font-weight: 300;
36 | font-size: 2.2em;
37 | margin: 0;
38 | }
39 |
40 | h3 {
41 | font-size: 2rem;
42 | margin: 20px 0 10px 0;
43 | }
44 |
45 | h4 {
46 | font-size: 1.4rem;
47 |
48 | a {
49 | color: #000;
50 | }
51 | }
52 |
53 | p, address, label, ul {
54 | font-size: 1.2rem;
55 | color: #666;
56 | margin-bottom: 20px;
57 | line-height: 1.4em;
58 | }
59 |
60 | ul {
61 | padding-left: 1em;
62 | }
--------------------------------------------------------------------------------
/003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/website/_sass/footer.scss:
--------------------------------------------------------------------------------
1 | footer {
2 | padding: 50px 0 50px 0;
3 | font-size: 1.1.1rem;
4 | position: relative;
5 | border: 1px solid #eee;
6 |
7 | .copyright {
8 | font-size: .9rem;
9 | margin: 20px auto 0;
10 | }
11 |
12 | h2 {
13 | font-size: 1.4rem;
14 | margin: 30px 0;
15 | }
16 |
17 | .footer-columns {
18 | @media #{$mid-point} {
19 | display: flex;
20 | }
21 | @include flex-flow(wrap);
22 | list-style: none;
23 | padding: 0;
24 |
25 | @media #{$mid-point} {
26 | margin: -10px -60px 10px -60px;
27 | }
28 |
29 | & > li {
30 | @media #{$mid-point} {
31 | flex: 1;
32 | padding: 0 60px;
33 | }
34 |
35 | box-sizing: border-box;
36 |
37 | &.footer-nav {
38 | @media #{$mid-point} {
39 | flex: 0;
40 | }
41 | }
42 | ul {
43 | padding: 0;
44 | list-style: none;
45 | li {
46 | font-size: 1.1rem;
47 | margin: 11px 0;
48 | a {
49 | white-space: nowrap;
50 | color: #999;
51 | }
52 | }
53 | }
54 | }
55 |
56 | .about {
57 | svg path {
58 | fill: $brand-color;
59 | }
60 | }
61 |
62 | p {
63 | font-size: 1.1rem;
64 | color: #999;
65 | }
66 |
67 | .email {
68 | margin: 0;
69 | }
70 |
71 | h4 {
72 | margin: 0 0 1.1rem 0;
73 | font-size: .9em;
74 | }
75 | }
76 |
77 | a {
78 | text-decoration: none;
79 | }
80 | }
81 |
82 | .social-icons {
83 | svg {
84 | width: 100%;
85 | height: 100%;
86 |
87 | &, path {
88 | fill: #fff;
89 | }
90 | }
91 |
92 | a {
93 | display: inline-block;
94 | width: 20px;
95 | height: 20px;
96 | padding: 8px;
97 | line-height: 0;
98 | margin: 0 10px 20px 0;
99 | border-radius: 50px;
100 | &.email {
101 | background-color: #ff9d09;
102 | }
103 |
104 | &.facebook {
105 | background-color: #3b5998;
106 | }
107 |
108 | &.twitter {
109 | background-color: #55acee;
110 | }
111 |
112 | &.google-plus {
113 | background-color: #db4437;
114 | }
115 |
116 | &.youtube {
117 | background-color: #cd201f;
118 | }
119 |
120 | &.instagram {
121 | background-color: #f167f5;
122 | }
123 |
124 | &.linkedin {
125 | background-color: #0077b5;
126 | }
127 |
128 | &.pinterest {
129 | background-color: #C92228;
130 | }
131 | }
132 | }
133 |
--------------------------------------------------------------------------------
/003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/website/_sass/forms.scss:
--------------------------------------------------------------------------------
1 | .button a,
2 | .button button,
3 | input[type=submit] {
4 | color: #fff;
5 | text-decoration: none;
6 | padding: 15px 30px;
7 | background: $brand-color;
8 | border-radius: 3px;
9 | border: 1px solid lighten($brand-color, 10%);
10 | -webkit-appearance: none;
11 | }
12 |
13 | .button a:hover,
14 | .button button:hover,
15 | input[type=submit]:hover {
16 | background: darken($brand-color, 10%);
17 | cursor: pointer;
18 | }
19 |
20 | .button a:active,
21 | .button button:active,
22 | input[type=submit]:active {
23 | border-width: 1px;
24 | border-top: 3px solid transparent
25 | }
26 |
27 | .button.alt a {
28 | background: rgba(255,255,255,0.15);
29 | border-radius: 3px;
30 | border: 1px solid rgba(255, 255, 255, 0.3);
31 | padding: 16px 50px;
32 | }
33 |
34 | .button.alt a:hover {
35 | background: #fff;
36 | color: $brand-color;
37 | }
38 |
39 | textarea, input, button, select { font-family: inherit; font-size: inherit; }
40 |
41 | input[type=submit] {
42 | margin: 20px 0 0 0;
43 | }
44 |
45 | label, input, textarea, select {
46 | display: block;
47 | width: 100%;
48 | box-sizing: border-box;
49 | border-radius: 3px;
50 | }
51 |
52 | .radio-input {
53 | line-height: 1;
54 | margin: 20px 0;
55 | cursor: pointer;
56 |
57 | input {
58 | display: inline-block;
59 | width: auto;
60 | cursor: pointer;
61 | }
62 | }
63 |
64 | textarea {
65 | resize: vertical;
66 | height: 150px;
67 | }
68 |
69 | label {
70 | margin: 20px 0 5px 0;
71 | }
72 |
73 | label:first-child {
74 | margin: 0 0 5px 0;
75 | }
76 |
77 | label:last-child, input:last-child {
78 | margin-bottom: 0;
79 | }
80 |
81 | input, textarea, select {
82 | padding: 10px;
83 | font-size: 1em;
84 | }
85 |
86 | input, textarea {
87 | outline: none;
88 | border: 1px solid #DDDDDD;
89 | }
90 |
91 | input[type=text]:focus, input[type=email]:focus, input[type=password]:focus, textarea:focus {
92 | box-shadow: 0 0 5px $brand-color;
93 | border: 1px solid $brand-color;
94 | }
95 |
--------------------------------------------------------------------------------
/003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/website/_sass/layout.scss:
--------------------------------------------------------------------------------
1 | .container, .text-container {
2 | margin: 0 auto;
3 | position: relative;
4 | padding: 0 20px;
5 | }
6 |
7 | .text-container {
8 | max-width: 750px;
9 | }
10 |
11 | .container {
12 | max-width: 1140px;
13 |
14 | &.max-container {
15 | max-width: 100%;
16 | padding: 0;
17 | }
18 | }
19 |
20 | .hero {
21 | color: #ffffff;
22 | text-align: center;
23 | position: relative;
24 | z-index: 9;
25 | h2 {
26 | font-size: 3.2rem;
27 | line-height: 1.5;
28 | font-weight: bold;
29 | }
30 |
31 | @media (min-width: 500px) {
32 | h2 {
33 | font-size: 3.5rem;
34 | }
35 | }
36 |
37 | p {
38 | color: #fff;
39 | }
40 | }
41 |
42 | .content {
43 | background: #fff;
44 | padding: 1px 0 0 0;
45 | position: relative;
46 | }
47 |
48 | .company-name {
49 | display: inline-block;
50 | position: relative;
51 | top: 4px;
52 | a {
53 | display: block;
54 | }
55 |
56 | svg {
57 | display: inline-block;
58 | height: 25px;
59 | width: 25px;
60 | }
61 |
62 | span {
63 | font-size: 16px;
64 | position: relative;
65 | top: -6px;
66 | left: 3px;
67 | }
68 | }
69 |
70 | .screenshot{
71 | height: auto;
72 | display: block;
73 | margin: 0 auto;
74 | border-radius: 2px;
75 | padding: 20px 0 0;
76 | background: #DDD url('data:image/svg+xml;utf8,') 4px 4px no-repeat;
77 | box-shadow: 0px 0px 50px rgba(0,0,0,.2);
78 | }
79 |
80 | section {
81 | padding: 100px 0;
82 | }
83 |
84 | section + section {
85 | padding-top: 0;
86 | }
87 |
88 | .subtext {
89 | margin-top: 10px;
90 | text-align: center;
91 | }
92 |
93 |
94 | .cta {
95 | margin: 60px 0;
96 | }
97 |
98 | .page h2 {
99 | text-align: center;
100 | }
101 |
102 | blockquote {
103 | padding: 18px 25px;
104 | margin: 0 auto;
105 | quotes: "\201C""\201D""\2018""\2019";
106 | font-style: italic;
107 | line-height: 2.5;
108 | font-size: 1.1em;
109 | max-width: 900px;
110 |
111 | .author {
112 | display: block;
113 | font-weight: bold;
114 | margin: 20px 0 0 0;
115 | font-size: 1.1em;
116 | font-style: normal;
117 | text-align: right;
118 | }
119 |
120 | p {
121 | display: inline;
122 | }
123 | }
124 |
125 | blockquote .quote:before,
126 | blockquote .quote:after {
127 | color: #ccc;
128 | content: open-quote;
129 | font-size: 4em;
130 | line-height: 0.1em;
131 | margin-right: 0.1em;
132 | vertical-align: -0.4em;
133 | }
134 |
135 | blockquote .quote:after {
136 | content: close-quote;
137 | }
138 |
139 | .page {
140 | margin-bottom: 0;
141 | padding-bottom: 80px;
142 | }
143 |
144 | .center-text {
145 | text-align: center;
146 | }
147 |
148 | .thirds {
149 | display: flex;
150 | margin: 0 -20px 0 -40px;
151 | flex-wrap: wrap;
152 | }
153 |
154 | .thirds div {
155 | flex: 1 1 200px;
156 | padding: 100px 40px 0 40px;
157 | }
158 |
--------------------------------------------------------------------------------
/003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/website/_sass/mixins/columns.scss:
--------------------------------------------------------------------------------
1 | @mixin columns($value) {
2 | columns: $value;
3 | -webkit-columns: $value;
4 | -moz-columns: $value;
5 | }
6 |
--------------------------------------------------------------------------------
/003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/website/_sass/mixins/flexbox.scss:
--------------------------------------------------------------------------------
1 | // Flexbox Mixins
2 | // http://philipwalton.github.io/solved-by-flexbox/
3 | // https://github.com/philipwalton/solved-by-flexbox
4 | //
5 | // Copyright (c) 2013 Brian Franco
6 | //
7 | // Permission is hereby granted, free of charge, to any person obtaining a
8 | // copy of this software and associated documentation files (the
9 | // "Software"), to deal in the Software without restriction, including
10 | // without limitation the rights to use, copy, modify, merge, publish,
11 | // distribute, sublicense, and/or sell copies of the Software, and to
12 | // permit persons to whom the Software is furnished to do so, subject to
13 | // the following conditions:
14 | // The above copyright notice and this permission notice shall be included
15 | // in all copies or substantial portions of the Software.
16 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 | // OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 | // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19 | // IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20 | // CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 | // TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 | // SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 | //
24 | // This is a set of mixins for those who want to mess around with flexbox
25 | // using the native support of current browsers. For full support table
26 | // check: http://caniuse.com/flexbox
27 | //
28 | // Basically this will use:
29 | //
30 | // * Fallback, old syntax (IE10, mobile webkit browsers - no wrapping)
31 | // * Final standards syntax (FF, Safari, Chrome, IE11, Opera)
32 | //
33 | // This was inspired by:
34 | //
35 | // * http://dev.opera.com/articles/view/advanced-cross-browser-flexbox/
36 | //
37 | // With help from:
38 | //
39 | // * http://w3.org/tr/css3-flexbox/
40 | // * http://the-echoplex.net/flexyboxes/
41 | // * http://msdn.microsoft.com/en-us/library/ie/hh772069(v=vs.85).aspx
42 | // * http://css-tricks.com/using-flexbox/
43 | // * http://dev.opera.com/articles/view/advanced-cross-browser-flexbox/
44 | // * https://developer.mozilla.org/en-us/docs/web/guide/css/flexible_boxes
45 |
46 | //----------------------------------------------------------------------
47 |
48 | // Flexbox Containers
49 | //
50 | // The 'flex' value causes an element to generate a block-level flex
51 | // container box.
52 | //
53 | // The 'inline-flex' value causes an element to generate a inline-level
54 | // flex container box.
55 | //
56 | // display: flex | inline-flex
57 | //
58 | // http://w3.org/tr/css3-flexbox/#flex-containers
59 | //
60 | // (Placeholder selectors for each type, for those who rather @extend)
61 |
62 | @mixin flexbox {
63 | display: -webkit-box;
64 | display: -webkit-flex;
65 | display: -moz-flex;
66 | display: -ms-flexbox;
67 | display: flex;
68 | }
69 |
70 | %flexbox { @include flexbox; }
71 |
72 | //----------------------------------
73 |
74 | @mixin inline-flex {
75 | display: -webkit-inline-box;
76 | display: -webkit-inline-flex;
77 | display: -moz-inline-flex;
78 | display: -ms-inline-flexbox;
79 | display: inline-flex;
80 | }
81 |
82 | %inline-flex { @include inline-flex; }
83 |
84 | //----------------------------------------------------------------------
85 |
86 | // Flexbox Direction
87 | //
88 | // The 'flex-direction' property specifies how flex items are placed in
89 | // the flex container, by setting the direction of the flex container's
90 | // main axis. This determines the direction that flex items are laid out in.
91 | //
92 | // Values: row | row-reverse | column | column-reverse
93 | // Default: row
94 | //
95 | // http://w3.org/tr/css3-flexbox/#flex-direction-property
96 |
97 | @mixin flex-direction($value: row) {
98 | @if $value == row-reverse {
99 | -webkit-box-direction: reverse;
100 | -webkit-box-orient: horizontal;
101 | } @else if $value == column {
102 | -webkit-box-direction: normal;
103 | -webkit-box-orient: vertical;
104 | } @else if $value == column-reverse {
105 | -webkit-box-direction: reverse;
106 | -webkit-box-orient: vertical;
107 | } @else {
108 | -webkit-box-direction: normal;
109 | -webkit-box-orient: horizontal;
110 | }
111 | -webkit-flex-direction: $value;
112 | -moz-flex-direction: $value;
113 | -ms-flex-direction: $value;
114 | flex-direction: $value;
115 | }
116 | // Shorter version:
117 | @mixin flex-dir($args...) { @include flex-direction($args...); }
118 |
119 | //----------------------------------------------------------------------
120 |
121 | // Flexbox Wrap
122 | //
123 | // The 'flex-wrap' property controls whether the flex container is single-line
124 | // or multi-line, and the direction of the cross-axis, which determines
125 | // the direction new lines are stacked in.
126 | //
127 | // Values: nowrap | wrap | wrap-reverse
128 | // Default: nowrap
129 | //
130 | // http://w3.org/tr/css3-flexbox/#flex-wrap-property
131 |
132 | @mixin flex-wrap($value: nowrap) {
133 | // No Webkit Box fallback.
134 | -webkit-flex-wrap: $value;
135 | -moz-flex-wrap: $value;
136 | @if $value == nowrap {
137 | -ms-flex-wrap: none;
138 | } @else {
139 | -ms-flex-wrap: $value;
140 | }
141 | flex-wrap: $value;
142 | }
143 |
144 | //----------------------------------------------------------------------
145 |
146 | // Flexbox Flow (shorthand)
147 | //
148 | // The 'flex-flow' property is a shorthand for setting the 'flex-direction'
149 | // and 'flex-wrap' properties, which together define the flex container's
150 | // main and cross axes.
151 | //
152 | // Values: |
153 | // Default: row nowrap
154 | //
155 | // http://w3.org/tr/css3-flexbox/#flex-flow-property
156 |
157 | @mixin flex-flow($values: (row nowrap)) {
158 | // No Webkit Box fallback.
159 | -webkit-flex-flow: $values;
160 | -moz-flex-flow: $values;
161 | -ms-flex-flow: $values;
162 | flex-flow: $values;
163 | }
164 |
165 | //----------------------------------------------------------------------
166 |
167 | // Flexbox Order
168 | //
169 | // The 'order' property controls the order in which flex items appear within
170 | // their flex container, by assigning them to ordinal groups.
171 | //
172 | // Default: 0
173 | //
174 | // http://w3.org/tr/css3-flexbox/#order-property
175 |
176 | @mixin order($int: 0) {
177 | -webkit-box-ordinal-group: $int + 1;
178 | -webkit-order: $int;
179 | -moz-order: $int;
180 | -ms-flex-order: $int;
181 | order: $int;
182 | }
183 |
184 | //----------------------------------------------------------------------
185 |
186 | // Flexbox Grow
187 | //
188 | // The 'flex-grow' property sets the flex grow factor. Negative numbers
189 | // are invalid.
190 | //
191 | // Default: 0
192 | //
193 | // http://w3.org/tr/css3-flexbox/#flex-grow-property
194 |
195 | @mixin flex-grow($int: 0) {
196 | -webkit-box-flex: $int;
197 | -webkit-flex-grow: $int;
198 | -moz-flex-grow: $int;
199 | -ms-flex-positive: $int;
200 | flex-grow: $int;
201 | }
202 |
203 | //----------------------------------------------------------------------
204 |
205 | // Flexbox Shrink
206 | //
207 | // The 'flex-shrink' property sets the flex shrink factor. Negative numbers
208 | // are invalid.
209 | //
210 | // Default: 1
211 | //
212 | // http://w3.org/tr/css3-flexbox/#flex-shrink-property
213 |
214 | @mixin flex-shrink($int: 1) {
215 | -webkit-flex-shrink: $int;
216 | -moz-flex-shrink: $int;
217 | -ms-flex-negative: $int;
218 | flex-shrink: $int;
219 | }
220 |
221 | //----------------------------------------------------------------------
222 |
223 | // Flexbox Basis
224 | //
225 | // The 'flex-basis' property sets the flex basis. Negative lengths are invalid.
226 | //
227 | // Values: Like "width"
228 | // Default: auto
229 | //
230 | // http://www.w3.org/TR/css3-flexbox/#flex-basis-property
231 |
232 | @mixin flex-basis($value: auto) {
233 | -webkit-flex-basis: $value;
234 | -moz-flex-basis: $value;
235 | -ms-flex-preferred-size: $value;
236 | flex-basis: $value;
237 | }
238 |
239 | //----------------------------------------------------------------------
240 |
241 | // Flexbox "Flex" (shorthand)
242 | //
243 | // The 'flex' property specifies the components of a flexible length: the
244 | // flex grow factor and flex shrink factor, and the flex basis. When an
245 | // element is a flex item, 'flex' is consulted instead of the main size
246 | // property to determine the main size of the element. If an element is
247 | // not a flex item, 'flex' has no effect.
248 | //
249 | // Values: none | ||
250 | // Default: See individual properties (1 1 0).
251 | //
252 | // http://w3.org/tr/css3-flexbox/#flex-property
253 |
254 | @mixin flex($fg: 1, $fs: null, $fb: null) {
255 |
256 | // Set a variable to be used by box-flex properties
257 | $fg-boxflex: $fg;
258 |
259 | // Box-Flex only supports a flex-grow value so let's grab the
260 | // first item in the list and just return that.
261 | @if type-of($fg) == 'list' {
262 | $fg-boxflex: nth($fg, 1);
263 | }
264 |
265 | -webkit-box-flex: $fg-boxflex;
266 | -webkit-flex: $fg $fs $fb;
267 | -moz-box-flex: $fg-boxflex;
268 | -moz-flex: $fg $fs $fb;
269 | -ms-flex: $fg $fs $fb;
270 | flex: $fg $fs $fb;
271 | }
272 |
273 | //----------------------------------------------------------------------
274 |
275 | // Flexbox Justify Content
276 | //
277 | // The 'justify-content' property aligns flex items along the main axis
278 | // of the current line of the flex container. This is done after any flexible
279 | // lengths and any auto margins have been resolved. Typically it helps distribute
280 | // extra free space leftover when either all the flex items on a line are
281 | // inflexible, or are flexible but have reached their maximum size. It also
282 | // exerts some control over the alignment of items when they overflow the line.
283 | //
284 | // Note: 'space-*' values not supported in older syntaxes.
285 | //
286 | // Values: flex-start | flex-end | center | space-between | space-around
287 | // Default: flex-start
288 | //
289 | // http://w3.org/tr/css3-flexbox/#justify-content-property
290 |
291 | @mixin justify-content($value: flex-start) {
292 | @if $value == flex-start {
293 | -webkit-box-pack: start;
294 | -ms-flex-pack: start;
295 | } @else if $value == flex-end {
296 | -webkit-box-pack: end;
297 | -ms-flex-pack: end;
298 | } @else if $value == space-between {
299 | -webkit-box-pack: justify;
300 | -ms-flex-pack: justify;
301 | } @else if $value == space-around {
302 | -ms-flex-pack: distribute;
303 | } @else {
304 | -webkit-box-pack: $value;
305 | -ms-flex-pack: $value;
306 | }
307 | -webkit-justify-content: $value;
308 | -moz-justify-content: $value;
309 | justify-content: $value;
310 | }
311 | // Shorter version:
312 | @mixin flex-just($args...) { @include justify-content($args...); }
313 |
314 | //----------------------------------------------------------------------
315 |
316 | // Flexbox Align Items
317 | //
318 | // Flex items can be aligned in the cross axis of the current line of the
319 | // flex container, similar to 'justify-content' but in the perpendicular
320 | // direction. 'align-items' sets the default alignment for all of the flex
321 | // container's items, including anonymous flex items. 'align-self' allows
322 | // this default alignment to be overridden for individual flex items. (For
323 | // anonymous flex items, 'align-self' always matches the value of 'align-items'
324 | // on their associated flex container.)
325 | //
326 | // Values: flex-start | flex-end | center | baseline | stretch
327 | // Default: stretch
328 | //
329 | // http://w3.org/tr/css3-flexbox/#align-items-property
330 |
331 | @mixin align-items($value: stretch) {
332 | @if $value == flex-start {
333 | -webkit-box-align: start;
334 | -ms-flex-align: start;
335 | } @else if $value == flex-end {
336 | -webkit-box-align: end;
337 | -ms-flex-align: end;
338 | } @else {
339 | -webkit-box-align: $value;
340 | -ms-flex-align: $value;
341 | }
342 | -webkit-align-items: $value;
343 | -moz-align-items: $value;
344 | align-items: $value;
345 | }
346 |
347 | //----------------------------------
348 |
349 | // Flexbox Align Self
350 | //
351 | // Values: auto | flex-start | flex-end | center | baseline | stretch
352 | // Default: auto
353 |
354 | @mixin align-self($value: auto) {
355 | // No Webkit Box Fallback.
356 | -webkit-align-self: $value;
357 | -moz-align-self: $value;
358 | @if $value == flex-start {
359 | -ms-flex-item-align: start;
360 | } @else if $value == flex-end {
361 | -ms-flex-item-align: end;
362 | } @else {
363 | -ms-flex-item-align: $value;
364 | }
365 | align-self: $value;
366 | }
367 |
368 | //----------------------------------------------------------------------
369 |
370 | // Flexbox Align Content
371 | //
372 | // The 'align-content' property aligns a flex container's lines within the
373 | // flex container when there is extra space in the cross-axis, similar to
374 | // how 'justify-content' aligns individual items within the main-axis. Note,
375 | // this property has no effect when the flexbox has only a single line.
376 | //
377 | // Values: flex-start | flex-end | center | space-between | space-around | stretch
378 | // Default: stretch
379 | //
380 | // http://w3.org/tr/css3-flexbox/#align-content-property
381 |
382 | @mixin align-content($value: stretch) {
383 | // No Webkit Box Fallback.
384 | -webkit-align-content: $value;
385 | -moz-align-content: $value;
386 | @if $value == flex-start {
387 | -ms-flex-line-pack: start;
388 | } @else if $value == flex-end {
389 | -ms-flex-line-pack: end;
390 | } @else {
391 | -ms-flex-line-pack: $value;
392 | }
393 | align-content: $value;
394 | }
395 |
--------------------------------------------------------------------------------
/003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/website/_sass/navigation.scss:
--------------------------------------------------------------------------------
1 | @media #{$below-mid-point} {
2 | .nav-open {
3 |
4 | nav {
5 | border: 0;
6 | z-index: 2;
7 |
8 | a {
9 | display: block;
10 | }
11 | }
12 | }
13 | }
14 |
15 | header {
16 | color: #fff;
17 | padding: 20px 0 59px 0;
18 | background: linear-gradient(135deg, $brand-color 0%, $brand-color-2 100%);
19 | position: relative;
20 |
21 | p {
22 | margin: 0;
23 | }
24 | @media #{$mid-point} {
25 | &:before, &:after {
26 | content: "";
27 | width: 25%;
28 | display: block;
29 | position: absolute;
30 | border-radius: 50% 50% 50% 50% / 60% 60% 40% 40%;
31 | transform: translate(-50%, 0);
32 |
33 | }
34 |
35 | &:before {
36 | transform: skewX(-15deg);
37 | box-shadow: 99px 29px 170px -24px rgba(0,0,0,.05);
38 | top: -254px;
39 | bottom: 50%;
40 | left: 43%
41 | }
42 |
43 | &:after {
44 | box-shadow: inset 92px -41px 104px -28px rgba(0,0,0,.05);
45 | transform: skewX(-12deg);
46 | top: 47%;
47 | left: 40%;
48 | bottom: -121px;
49 | }
50 | }
51 |
52 | a {
53 | color: #fff;
54 | text-decoration: none;
55 | z-index: 1;
56 | position: relative;
57 |
58 | &:hover {
59 | text-decoration: none;
60 | }
61 | }
62 | }
63 |
64 | nav {
65 | width: 100%;
66 | padding: 20px 0 0 0;
67 |
68 | @media #{$mid-point} {
69 | display: inline-block;
70 | vertical-align: top;
71 | width: auto;
72 | padding: 0 0 0 40px;
73 | }
74 |
75 | a {
76 | margin: 0 3px;
77 | padding: 20px 10px;
78 | border-bottom: 1px solid rgba(255,255,255,0);
79 | color: rgba(255,255,255,.6);
80 | transition: 200ms ease-in color;
81 | display: none;
82 |
83 | @media #{$mid-point} {
84 | display: inline-block;
85 | padding: 10px;
86 | }
87 |
88 |
89 | &.nav-toggle {
90 | display: inline;
91 | position: absolute;
92 | right: 0;
93 | top: -22px;
94 | font-size: 1.9em;
95 | border: 0;
96 | z-index: 2;
97 | padding: 20px;
98 |
99 | @media #{$mid-point} {
100 | display: none;
101 | }
102 |
103 | &:hover {
104 | border: 0;
105 | }
106 | }
107 | }
108 |
109 | a:hover {
110 | color: #fff;
111 | }
112 |
113 |
114 | a.cart {
115 | line-height: 1;
116 |
117 | i {
118 | font-size: 22px;
119 | line-height: 1;
120 | display: inline-block;
121 | vertical-align: middle;
122 | padding: 0 6px;
123 | }
124 |
125 | span {
126 | display: inline-block;
127 | vertical-align: middle;
128 | position: relative;
129 | top: 2px;
130 |
131 | }
132 | }
133 |
134 | @media #{$mid-point} {
135 | a.cart {
136 | border-radius: 43px;
137 | color: #fff;
138 | border: 1px solid rgba(255,255,255, .4);
139 | position: absolute;
140 | right: 20px;
141 | top: -2px;
142 | font-size: .8em;
143 | transition: border .3s ease;
144 |
145 |
146 | &:hover {
147 | border-color: #fff;
148 | }
149 | }
150 | }
151 |
152 | a.active {
153 | color: #fff;
154 | }
155 |
156 | }
157 |
--------------------------------------------------------------------------------
/003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/website/_sass/products.scss:
--------------------------------------------------------------------------------
1 | .star-rating .fa-star.unchecked {
2 | color: #ddd;
3 | outline: none;
4 | }
5 |
6 | .product-rating {
7 | display: flex;
8 | flex-direction: column;
9 | margin-bottom: 20px;
10 | }
11 |
12 | .rating {
13 | display: flex;
14 | align-items: center;
15 | margin-bottom: 10px;
16 | }
17 |
18 | .rating-number {
19 | font-size: 24px;
20 | font-weight: bold;
21 | margin-right: 10px;
22 | }
23 |
24 | .stars {
25 | color: #ffc107;
26 | font-size: 20px;
27 | }
28 |
29 | .fa-star {
30 | margin-right: 5px;
31 | }
32 |
33 | .checked {
34 | color: #ffc107;
35 | }
36 |
37 | .rating-title {
38 | font-size: 18px;
39 | font-weight: bold;
40 | margin-bottom: 5px;
41 | }
42 |
43 | .rating-review {
44 | font-size: 16px;
45 | line-height: 1.5;
46 | }
47 |
48 |
49 | .styles {
50 | width: 100%;
51 | background: #eee;
52 | position: relative;
53 | padding: 20px;
54 | box-sizing: border-box;
55 | border-radius: 3px;
56 |
57 | .style-picker {
58 | z-index: 2;
59 | display: block;
60 | line-height: 0;
61 | position: absolute;
62 | bottom: 20px;
63 | right: 20px;
64 |
65 | > div {
66 | width: 20px;
67 | height: 20px;
68 | border: 1px solid #000;
69 | display: inline-block;
70 | margin: 0 5px 0 0;
71 | border-radius: 20px;
72 | cursor: pointer;
73 | }
74 | }
75 |
76 | > div + div {
77 | display: none;
78 | }
79 |
80 | i {
81 | font-size: 14rem;
82 | color: #fff;
83 | }
84 | }
85 |
86 | .product-container {
87 | display: flex;
88 | flex-wrap: wrap;
89 | margin-left: -40px;
90 |
91 | .styles {
92 | width: auto;
93 | flex: 1 1 300px;
94 | margin-left: 40px;
95 | }
96 |
97 | .product-details {
98 | flex: 2 1 500px;
99 | text-align: left;
100 | margin-left: 40px;
101 |
102 | h4 {
103 | color: $brand-color;
104 | }
105 | }
106 |
107 | form {
108 | border: 1px solid #444;
109 | padding: 20px;
110 | border-radius: 2px;
111 |
112 | label:first-child {
113 | margin-top: 0;
114 | }
115 | }
116 | }
117 |
118 | .product-list {
119 | display: flex;
120 | flex-flow: wrap;
121 | margin: 0 -20px;
122 |
123 | &, > li {
124 | list-style: none;
125 | padding: 0;
126 | }
127 |
128 | > li {
129 | flex: 1 1 300px;
130 |
131 | &:not(:empty) {
132 | margin: 20px;
133 | }
134 | }
135 |
136 | h4 {
137 | margin: 10px 0 0 0;
138 | padding-right: 100px;
139 | }
140 |
141 | p {
142 | line-height: 1;
143 | margin: 10px 0 0 0;
144 | }
145 |
146 | .product-description p {
147 | font-size: 1.1rem;
148 | }
149 |
150 | .snipcart-add-item {
151 | -webkit-appearance: none;
152 | color: #222;
153 | text-decoration: none;
154 | border: 0;
155 | background: none;
156 | font-weight: bold;
157 | font-size: 1.2em;
158 | padding: 10px 15px 15px;
159 | float: right;
160 | cursor: pointer;
161 | line-height: 1;
162 | margin-top: 10px;
163 |
164 | &:hover {
165 | background: #f5f5f5;
166 | color: #000;
167 | }
168 | }
169 | }
--------------------------------------------------------------------------------
/003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/website/_sass/staff.scss:
--------------------------------------------------------------------------------
1 |
2 | .staff-image {
3 | margin: 25px auto 0 auto;
4 | background: no-repeat center center;
5 | background-size: cover;
6 | border-radius: 300px;
7 | width: 70%;
8 | padding-bottom: 70%;
9 | }
10 |
11 | .staff {
12 | padding: 0;
13 | list-style: none;
14 | @extend %flexbox;
15 | @include flex-flow(wrap);
16 | text-align: center;
17 | margin: -175px 0 0 0;
18 | justify-content: center;
19 |
20 | li {
21 | padding: 30px 20px;
22 | box-sizing: border-box;
23 | flex: 0 1 300px;
24 | background: #fff;
25 | margin: 0 60px;
26 | border-radius: 3px;
27 | box-shadow: 0 10px 100px 0 rgba(0,0,0,0.1);
28 | }
29 |
30 | .square-image {
31 | width: 400px;
32 | height: 400px;
33 |
34 | img {
35 | border-radius: 400px;
36 | }
37 | }
38 |
39 | .name {
40 | font-size: 1.2rem;
41 | margin-top: 20px;
42 | }
43 |
44 | .position {
45 | font-size: 1.3rem;
46 | color: #666;
47 | }
48 | }
49 |
50 |
51 | .story-points {
52 | h3 {
53 | font-size: 1.3rem;
54 | margin: 0;
55 | color: $brand-color;
56 | }
57 | }
58 |
--------------------------------------------------------------------------------
/003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/website/_sass/variables.scss:
--------------------------------------------------------------------------------
1 | $brand-color: #7d59f2;
2 | $brand-color-2: #e371f6;
3 | $default-hero-image: "/images/hero/box-top.jpg";
4 |
5 | // Breakpoints
6 | $tablet: "(min-width: 450px)";
7 | $above-tablet: "(min-width: 550px)";
8 | $mid-point: "(min-width: 850px)";
9 | $below-mid-point: "(max-width: 849px)";
10 | $desktop: "(min-width: 768px)";
11 |
--------------------------------------------------------------------------------
/003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/website/_staff_members/ava.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Ava Sandler
3 | image_path: https://unsplash.it/600/800?image=1062
4 | ---
--------------------------------------------------------------------------------
/003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/website/_staff_members/steph.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Steph Poco
3 | image_path: https://unsplash.it/600/800?image=823
4 | ---
5 |
--------------------------------------------------------------------------------
/003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/website/about.html:
--------------------------------------------------------------------------------
1 | ---
2 | title: Our Story
3 | hero: Our story
4 | ---
5 |
6 |
What more could you want from life than drawing animals and bringing happiness to others? We hope this passion show through in our work.
21 |
22 |
23 |
Animal
24 |
It's easy to forget that we're all part of the animal kingdom. Fur is a brand that helps you get in touch with your inner animal.
25 |
26 |
27 |
Style
28 |
We like to keep things plain and simple around here to help the animals speak for themselves.
29 |
30 |
31 |
--------------------------------------------------------------------------------
/003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/website/cloudcannon.config.yml:
--------------------------------------------------------------------------------
1 | _select_data:
2 | garment_types:
3 | - hat
4 | - top
5 |
6 | _inputs:
7 | map:
8 | comment: "Update the map location and display settings."
9 | latitude:
10 | comment: "Coordinates for the center marker on the map."
11 | longitude:
12 | comment: "Coordinates for the center marker on the map."
13 | zoom:
14 | comment: "The zoom level for the map."
15 | pricing_table:
16 | comment: "Update the information in the pricing tables."
17 | highlight:
18 | comment: "Emphasis the text"
19 | color:
20 | comment: "The background color used in the plan name and call to action."
21 | new_window:
22 | comment: "Open link in new window"
23 | description:
24 | comment: "This is used for facebook shares of the page and google indexing"
25 |
26 | collections_config:
27 | staff_members:
28 | _enabled_editors:
29 | - data
30 | icon: account_circle
31 | schemas:
32 | default:
33 | path: schemas/staff_members.md
34 | products:
35 | _enabled_editors:
36 | - data
37 | output: true
38 | icon: shopping_cart
39 | schemas:
40 | default:
41 | path: schemas/products.md
--------------------------------------------------------------------------------
/003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/website/contact-success.html:
--------------------------------------------------------------------------------
1 | ---
2 | title: We'll be in touch
3 | hero: We'll be in touch
4 | ---
5 |
6 |
In retail, a return is the process of a customer taking previously purchased merchandise back to the retailer, and in turn receiving a refund in the original form of payment, exchange for another item (identical or different), or a store credit.
9 |
10 |
Many retailers will accept returns provided that the customer has a receipt as a proof of purchase, and that certain other conditions, which depend on the retailer's policies, are met. These may include the merchandise being in a certain condition (usually resellable if not defective), no more than a certain amount of time having passed since the purchase, and sometimes that identification be provided (though usually only if a receipt is not provided). In some cases, only exchanges or store credit are offered, again usually only without a receipt, or after an initial refund period has passed. Some retailers charge a restocking fee for non-defective returned merchandise, but typically only if the packaging has been opened.
11 |
12 |
While retailers are not usually required to accept returns, laws in many places require retailers to post their return policy in a place where it would be visible to the customer prior to purchase.
13 |
14 |
In certain countries, such as Australia, consumer rights dictate that under certain situations consumers have a right to demand a refund. These situations include sales that relied on false or misleading claims, defective goods, and undisclosed conditions of sale.
15 |
16 |
There are various reasons why customers may wish to return merchandise. These include a change of one's mind (buyer's remorse), quality of the merchandise, personal dissatisfaction, or a mistaken purchase of the wrong product. For clothing or other sized items, it may be a lack of a correct fit. Sometimes, there may be a product recall in which the manufacturer has requested (or been ordered) that the merchandise be brought back to the store. Also, gift receipts are offered sometimes when an item is purchased for another person, and the recipient can exchange this item for another item of comparable value, or for store credit, often on a gift card.
Please review the below information to find answers to any of your questions regarding our policies.
9 |
10 |
Shipping Services
11 |
Fur provides the below shipping services. Actual delivery time frames may vary due to weather, service delays, holidays etc.
12 |
13 |
Free Standard shipping is available for apparel only on orders that have an after-discount subtotal value of $1.00 or more. Domestic shipping to Hawaii, Alaska, or Puerto Rico or any other location outside the Continental United States may take longer than expected due to their locations.
14 |
15 |
Standard Shipping (6-10 business days)
16 |
17 |
Standard shipping is our least expensive option and has an average delivery window of 3-10 days depending on the final delivery location zip code.
18 | Tracking is available.
19 |
Final delivery is made by USPS.
20 |
Select Standard shipping for orders being shipped to a P.O. Box.
21 |
Standard shipping may take a few more days for locations like Hawaii, Alaska, Puerto Rico or any orders shipped outside the United States. This service is available for APO, DPO and FPO addresses and non-contiguous U.S. states and territories.
22 |
23 |
24 |
Express Shipping (2 Business Days)
25 |
26 |
Express shipping is an expedited option that offers full door-to-door tracking.
27 |
Express shipping is an available shipping option throughout all 50 states.
28 |
Express shipping will arrive by 4:30PM to a business or 7:00PM to a residence.
29 |
This service is not available for P.O. Boxes or APO, DPO and FPO addresses.
30 |
31 |
32 |
Overnight (Next Business Day)
33 |
34 |
Overnight is an expedited shipping option that offers full door-to-door tracking.
35 |
Overnight is an available shipping option throughout all 50 states.
36 |
Overnight will arrive the next business day by 10:30 a.m. to most U.S. addresses; by noon, 4:30 p.m. or 5 p.m. in remote areas; by noon, 1:30 p.m. or 4:30 p.m. on Saturdays.
37 |
This service is not available for P.O. Boxes or APO, DPO and FPO addresses.
38 |
39 |
--------------------------------------------------------------------------------
/003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/website/siteicon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/davidteather/everything-web-scraping/6afdda03e1b84d64a8e460f6e7265a60a87bdbdc/003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/website/siteicon.png
--------------------------------------------------------------------------------
/003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/website/touch-icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/davidteather/everything-web-scraping/6afdda03e1b84d64a8e460f6e7265a60a87bdbdc/003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/website/touch-icon.png
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Everything Web Scraping
2 | Learn everything web scraping by [David Teather](https://twitter.com/david_teather) find the video series on [YouTube](https://youtube.com/playlist?list=PLmRtxHvzkEE8Ofiy4hnnXSoxw7gs4HOHt).
3 |
4 | [](https://www.linkedin.com/in/davidteather/) [](https://github.com/sponsors/davidteather) [](https://discord.gg/yyPhbfma6f)  [](https://twitter.com/david_teather)
5 |
6 | ## Table Of Contents
7 | 1. [Course Catalogue](#course-catalogue)
8 | 2. [How To start The Mock Websites](#how-to-start-the-mock-websites)
9 |
10 | **Please** consider giving [Course Feedback](https://forms.gle/LjRwHoR34vD1knGa6)
11 |
12 | ## Welcome!
13 |
14 | Glad you're here! If it's your first time check out the the [introduction](./000-introduction/README.md), if not welcome back!
15 |
16 | Consider [sponsoring me](https://github.com/sponsors/davidteather) on GitHub to make work like this possible
17 |
18 | ### Supporting The Project
19 | * Star the repo 😎
20 | * Maybe share it with some people new to web-scraping?
21 | * Consider [sponsoring](https://github.com/sponsors/davidteather) me on GitHub
22 | * Send me an email or a [LinkedIn](https://www.linkedin.com/in/davidteather/) message telling me what you enjoy in the course (and maybe what else you want to see in the future)
23 | * Submit PRs for suggestions/issues :)
24 | ## Course Catalogue
25 | 0. [Introduction To The Course](/000-introduction/)
26 | 1. [Introduction To Forging API Requests](/001-introduction-to-forging-api-requests/)
27 | 2. [Proxies](/002-proxies)
28 | 3. [Beautiful Soup Scraping With Static and Server Side Rendered Sites](/003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping)
29 |
30 |
31 | ## How To Start The Mock Websites
32 |
33 | [Video Walkthrough](https://youtu.be/WmeDXK7KRKE)
34 |
35 | ### With GitHub Codespaces (Recommended)
36 |
37 | If you don't want to deal with installing and configuring software, I've set up this repository so that a GitHub Codespace can do all of that for you.
38 |
39 | > Note: A free GitHub account comes with 60 hours of Codespaces free each month, and if you're a student you can get 90 hours free each month with GitHub Pro through the [GitHub Student Developer Pack](https://education.github.com/pack) ([source](https://github.com/features/codespaces))
40 |
41 | #### Creating A Codespace
42 | > If you want to save your solutions, [create a fork](https://github.com/davidteather/everything-web-scraping/fork) then create a Codespace from your own repo, then you'll be able to use git to save your changes as normal.
43 |
44 | Create a Codespace using the instructions below or [here](https://github.com/codespaces/new)
45 |
46 |
47 |
Select Code -> Codespaces Tab -> The + Icon -> New With Options
Select the configuration of the lesson you're on, and after hitting create a Codespace
54 |
VS Code editor will open in the browser and start all programs needed for the activity!
55 |
56 |
57 | #### Cleaning Up
58 |
59 | After finishing each lesson you can visit the [GitHub Codespaces](https://github.com/codespaces) menu and delete the Codespace so you don't get charged while you're not using it.
60 |
61 |
62 |
63 |
Delete a Codespace with the 3 dots -> Delete
64 |
This will delete any changes you've made
65 |
66 |
67 | > Note: If you enjoy GitHub Codespaces consider checking out my ~30 minute [LinkedIn Learning Course](https://www.linkedin.com/learning/github-codespaces-for-students/why-use-github-codespaces) on Codespaces, you can get free 24h access through [my LinkedIn post](https://www.linkedin.com/feed/update/urn:li:activity:7069391759281147905/) and feel free to send a connection request while you're over there 🤠
68 |
69 | ### With Docker
70 |
71 | Run `docker-compose up` while in a lesson directory, when it says development server started open `localhost:3000` in your browser to check that it's working properly.
72 |
73 | When done with this lesson you can `control + c` to shut down your docker containers.
74 |
75 | #### Cleaning Up
76 | ##### With Docker Desktop
77 |
78 | 1. Navigate to the containers tab on the side, find the lesson you want to delete and click the trashcan icon to remove it.
79 | 2. Navigate to the images tab on the side, find the images starting with the course name to delete and hit the trash can.
80 |
81 | ##### With Command line
82 |
83 | 1. To remove containers, `docker rm $(docker ps -a -q --filter name=XXX)`, where XXX is the lesson number you want removed (ex: 001).
84 | 2. To remove images, `docker rmi $(docker images --filter label=lesson.number=X -a -q)`, where X is the number you want removed (ex: 1, ex: 10)
85 |
--------------------------------------------------------------------------------
/assets/codespaces-new-with-options.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/davidteather/everything-web-scraping/6afdda03e1b84d64a8e460f6e7265a60a87bdbdc/assets/codespaces-new-with-options.png
--------------------------------------------------------------------------------
/assets/codespaces-select-configuration.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/davidteather/everything-web-scraping/6afdda03e1b84d64a8e460f6e7265a60a87bdbdc/assets/codespaces-select-configuration.png
--------------------------------------------------------------------------------
/assets/delete-codespace.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/davidteather/everything-web-scraping/6afdda03e1b84d64a8e460f6e7265a60a87bdbdc/assets/delete-codespace.png
--------------------------------------------------------------------------------