├── .devcontainer ├── 000-introduction │ └── devcontainer.json ├── 001-introduction-to-forging-api-requests │ └── devcontainer.json ├── 002-proxies │ └── devcontainer.json └── 003-beautiful-soup-static │ └── devcontainer.json ├── .gitattributes ├── .github ├── FUNDING.yml └── ISSUE_TEMPLATE │ ├── bug_report.md │ ├── information_correction.md │ └── topic_request.md ├── .gitignore ├── 000-introduction ├── README.md └── slides.pdf ├── 001-introduction-to-forging-api-requests ├── README.md ├── activity.py ├── docker-compose.yml ├── slides.pdf ├── test.py ├── thumbnail.png └── website │ ├── client │ ├── .gitignore │ ├── Dockerfile │ ├── README.md │ ├── package-lock.json │ ├── package.json │ ├── public │ │ ├── favicon.ico │ │ ├── index.html │ │ ├── manifest.json │ │ └── robots.txt │ └── src │ │ ├── components │ │ ├── Footer.js │ │ ├── Header.js │ │ └── Post.js │ │ ├── images │ │ └── heart.png │ │ ├── index.css │ │ ├── index.js │ │ ├── pages │ │ ├── Discover.js │ │ ├── Feed.js │ │ └── ProfileFeed.js │ │ ├── reportWebVitals.js │ │ ├── services │ │ ├── DiscoveryService.js │ │ └── FeedService.js │ │ └── setupTests.js │ └── server │ ├── .gitignore │ ├── Dockerfile │ ├── db_seeding │ ├── create_data.py │ ├── initial_data.json │ └── seedDatabase.js │ ├── get-client.js │ ├── package-lock.json │ ├── package.json │ └── server.js ├── 002-proxies ├── README.md └── assets │ ├── 5-req-2.png │ ├── 5-req.png │ ├── pool-comp.png │ ├── proxy-list.png │ ├── rotating.png │ ├── thumbnail.png │ ├── type-comp.png │ ├── webshare-rotating.png │ ├── webshare-tab.png │ ├── with-proxy.png │ └── without-proxy.png ├── 003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping ├── README.md ├── activities.py ├── assets │ ├── ssr-sites.png │ ├── static-sites.png │ └── thumbnail.png ├── docker-compose.yml ├── requirements.txt ├── test.py └── website │ ├── 301.txt │ ├── 404.html │ ├── Dockerfile │ ├── Gemfile │ ├── Gemfile.lock │ ├── LICENSE │ ├── Makefile │ ├── README.md │ ├── TODO │ ├── _config.yml │ ├── _data │ └── navigation.yml │ ├── _includes │ ├── logo.html │ ├── navigation.html │ ├── product-styles.html │ ├── relative-src.html │ ├── snipcart-button.html │ └── social-icon.html │ ├── _layouts │ ├── default.html │ ├── page.html │ └── product.html │ ├── _products │ ├── deer.md │ ├── elephant.md │ ├── giraffe.md │ ├── hog.md │ ├── lion.md │ └── tiger.md │ ├── _sass │ ├── cloudcannon.scss │ ├── contact.scss │ ├── elements.scss │ ├── footer.scss │ ├── forms.scss │ ├── layout.scss │ ├── mixins │ │ ├── columns.scss │ │ └── flexbox.scss │ ├── navigation.scss │ ├── products.scss │ ├── staff.scss │ └── variables.scss │ ├── _staff_members │ ├── ava.md │ └── steph.md │ ├── about.html │ ├── cloudcannon.config.yml │ ├── contact-success.html │ ├── contact.html │ ├── css │ └── screen.scss │ ├── favicon.png │ ├── images │ ├── _screenshot.png │ └── products │ │ ├── deer │ │ ├── black.jpg │ │ ├── blue.jpg │ │ ├── clay.jpg │ │ └── cream.jpg │ │ ├── elephant │ │ ├── cream.jpg │ │ └── green.jpg │ │ ├── giraffe │ │ └── green.jpg │ │ ├── hog │ │ ├── blue.jpg │ │ ├── clay.jpg │ │ └── cream.jpg │ │ ├── lion │ │ └── blue.jpg │ │ └── tiger │ │ ├── black.jpg │ │ ├── blue.jpg │ │ ├── clay.jpg │ │ ├── cream.jpg │ │ └── green.jpg │ ├── index.html │ ├── js │ ├── _style-picker.js │ └── application.js │ ├── returns.html │ ├── robots.txt │ ├── schemas │ ├── products.md │ └── staff_members.md │ ├── shipping.html │ ├── siteicon.png │ └── touch-icon.png ├── LICENSE ├── README.md └── assets ├── codespaces-new-with-options.png ├── codespaces-select-configuration.png └── delete-codespace.png /.devcontainer/000-introduction/devcontainer.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "000 Introduction", 3 | "workspaceMount": "source=${localWorkspaceFolder},target=/workspace,type=bind,consistency=cached", 4 | "workspaceFolder": "/workspace/000-introduction", 5 | "image": "mcr.microsoft.com/devcontainers/universal" 6 | } -------------------------------------------------------------------------------- /.devcontainer/001-introduction-to-forging-api-requests/devcontainer.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "001 Introduction to Forging API Requests", 3 | "workspaceMount": "source=${localWorkspaceFolder},target=/workspace,type=bind,consistency=cached", 4 | "workspaceFolder": "/workspace/001-introduction-to-forging-api-requests", 5 | "image": "mcr.microsoft.com/devcontainers/universal", 6 | "postStartCommand": "docker compose up -d && gh codespace ports visibility 8080:public -c $CODESPACE_NAME", 7 | "forwardPorts": [5434, 3000, 8080], 8 | "portsAttributes": { 9 | "5434": { 10 | "label": "postgres", 11 | "onAutoForward": "silent" 12 | }, 13 | "3000": { 14 | "label": "Website", 15 | "onAutoForward": "openBrowser" 16 | }, 17 | "8080": { 18 | "label": "Backend", 19 | "onAutoForward": "silent" 20 | } 21 | } 22 | } -------------------------------------------------------------------------------- /.devcontainer/002-proxies/devcontainer.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "002 Proxies", 3 | "workspaceMount": "source=${localWorkspaceFolder},target=/workspace,type=bind,consistency=cached", 4 | "workspaceFolder": "/workspace/002-proxies", 5 | "image": "mcr.microsoft.com/devcontainers/universal" 6 | } -------------------------------------------------------------------------------- /.devcontainer/003-beautiful-soup-static/devcontainer.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "003 Beautiful Soup with Static & SSR Web Scraping", 3 | "workspaceMount": "source=${localWorkspaceFolder},target=/workspace,type=bind,consistency=cached", 4 | "workspaceFolder": "/workspace/003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping", 5 | "image": "mcr.microsoft.com/devcontainers/universal", 6 | "postStartCommand": "docker compose up -d && python3 -m pip install -r requirements.txt", 7 | "forwardPorts": [3000], 8 | "portsAttributes": { 9 | "3000": { 10 | "label": "Website", 11 | "onAutoForward": "openBrowser" 12 | } 13 | } 14 | } -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | custom: ['https://www.paypal.me/dteather'] 2 | github: davidteather -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: "[BUG] - Your Error Here" 5 | labels: bug 6 | assignees: '' 7 | 8 | --- 9 | Fill Out the template :) 10 | 11 | **Describe the bug** 12 | 13 | A clear and concise description of what the bug is. 14 | 15 | **The buggy code** 16 | 17 | Please add any relevant code that is giving you unexpected results. 18 | 19 | Preferably the smallest amount of code to reproduce the issue. 20 | 21 | **Expected behavior** 22 | 23 | A clear and concise description of what you expected to happen. 24 | 25 | **Error Trace (if any)** 26 | 27 | Put the error trace below if there's any error thrown. 28 | ``` 29 | # Error Trace Here 30 | ``` 31 | 32 | **Desktop (please complete the following information):** 33 | - OS: [e.g. Windows 10] 34 | - Lesson #: [e.g. Lesson 1] 35 | 36 | **Additional context** 37 | 38 | Add any other context about the problem here. -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/information_correction.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Incorrect Information 3 | about: 4 | title: "[INCORRECT] - Lesson #" 5 | labels: incorrect 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Note:** Videos themselves will **not** be updated, if there are any corrections needed they will be posted under the videos link in the lesson's readme. 11 | 12 | **Where is there incorrect information** 13 | Please give a lesson number and an exact quote 14 | 15 | > your quote here 16 | 17 | **Explain how this is incorrect** (provide sources) 18 | 19 | Please explain how the section you quoted above is incorrect, and please provide any sources (articles, interviews of authorities on the subject, etc) 20 | 21 | 22 | [ ] I have checked that this is not already in the corrections section. 23 | 24 | Consider creating a PR for your change and referencing this issue number in it. -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/topic_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Request A New Topic/Lesson 3 | about: What other topics do you want in this course? 4 | title: "[TOPIC REQUEST] - YOUR TOPIC HERE" 5 | labels: topic-request 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the topic you'd like covered** 11 | 12 | Write what you'd like covered here 13 | 14 | **Links to any resources that might be helpful with this topic** 15 | * Ex: https://twitter.com/david_teather (could include any articles about it or whatever) 16 | 17 | **Are you an authority on this topic?**: (yes/no) 18 | * **Note:** If you are an authority I'd love to collaborate on this lesson and at least run a draft or two by you before publishing it. -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | .vscode 3 | .pytest_cache 4 | .DS_Store -------------------------------------------------------------------------------- /000-introduction/README.md: -------------------------------------------------------------------------------- 1 | # Introduction 2 | 3 | Welcome to the course! Glad you're here :) 4 | 5 | ### Supporting The Project 6 | * Star the repo 😎 7 | * Maybe share it with some people new to web-scraping? 8 | * Consider [sponsoring](https://github.com/sponsors/davidteather) me on GitHub 9 | * Send me an email or a [LinkedIn](https://www.linkedin.com/in/davidteather/) message telling me what you enjoy in the course (and maybe what else you want to see in the future) 10 | * Submit PRs for suggestions/issues :) 11 | 12 | ## Table Of Contents 13 | 1. [Welcome!](#welcome) 14 | 1. [What I'm Known For](#what-im-known-for) 15 | 2. [Learning Objectives](#learning-objectives) 16 | 3. [How You Will Learn](#how-you-will-learn) 17 | 4. [How To Learn Effectively](#how-to-learn-effectively) 18 | 5. [Course Topics](#course-topics) 19 | 3. [Getting Started](#getting-started) 20 | 1. [Prerequisites](#prerequisites) 21 | 2. [Tools Required](#tools-required) 22 | 23 | ## Video For The Lesson 24 | Consider checking out the video for this introduction [here](https://www.youtube.com/watch?v=KY3E-6wVOqA&list=PLmRtxHvzkEE8Ofiy4hnnXSoxw7gs4HOHt), this video just provides the [slides](./slides.pdf) with commentary, later lessons are more high quality. 25 | 26 | ### Video Corrections 27 | None so far 28 | 29 | ## Welcome 30 | 31 | I'm David Teather and I work as a software engineer and my specialty is data extraction. 32 | 33 | If you'd like a more visual experience check out the introduction video on [YouTube](https://www.youtube.com/watch?v=KY3E-6wVOqA&list=PLmRtxHvzkEE8Ofiy4hnnXSoxw7gs4HOHt), or pull up the introduction [slides](./slides.pdf) 34 | ### What I'm Known For 35 | * [My research](https://theresponsetimes.com/yikyak-is-exposing-user-locations/) on YikYak (a social media app) that was featured in [Vice](https://www.vice.com/en/article/7kbnna/anonymous-social-media-app-yik-yak-exposed-users-precise-locations) and [The Verge](https://www.theverge.com/2022/5/13/23070696/yik-yak-anonymous-app-precise-locations-revealed) 36 | * Creating various data extraction tools 37 | * My most popular is [TikTokApi](https://github.com/davidteather/TikTok-Api) 38 | * 600K+ Downloads 39 | * 2.3K+ Stars 40 | 41 | ## Course Introduction 42 | ### Learning Objectives 43 | * Learners will understand the many different ways websites prevent web scraping 44 | * Learners will be able to reverse engineer a real-world website for data extraction 45 | 46 | ### How You Will Learn 47 | * Real website examples 48 | * Although these websites might change over time and the lesson becomes broken 49 | * Websites I've created for this course 50 | * Will not change to ensure that these lessons don't break 51 | * Each lesson will have a hands on activity 52 | * In addition most modules will have a `submission.py` file that you can create functions related to the lesson concept and run it against a test suite 53 | * These will primarily focused on extracting data from the websites created for this course 54 | 55 | ### How To Learn Effectively 56 | * Everybody learns different so these are guidelines 57 | * Take notes from the slides presented in the [videos](https://youtube.com/playlist?list=PLmRtxHvzkEE8Ofiy4hnnXSoxw7gs4HOHt) 58 | * These will revolve around general concepts 59 | * Will be accompanied by programs to write 60 | * Try the activities before watching the solution in the video 61 | * Treat the website folder as a black box, like you would a real website, you can figure out everything through the website itself 62 | 63 | ### Course Topics 64 | * Forging API requests 65 | * Proxies 66 | * Captchas 67 | * Storing data at scale 68 | * Emulating human behavior 69 | * And more 70 | * Feel free to [tweet at me](https://twitter.com/david_teather) or file an issue with the `lesson-request` label with what you'd like to see 71 | 72 | ## Getting Started 73 | 74 | Learn how to get started learning with this course! 75 | ### Prerequisites 76 | * A basic understanding of programming 77 | * Recommended 78 | * Some python experience 79 | * We probably won't do much complex python 80 | 81 | ### Tools Required 82 | * [Docker](https://www.docker.com/) 83 | * And docker-compose (should be bundled) 84 | * [Python](https://www.python.org/) 85 | * I'll be using 3.10 86 | * A web browser 87 | * I'll be using [Brave](https://brave.com/) (chromium based) 88 | * Doesn't really matter which as long as you can view network traffic 89 | * And the files in this git repo, so be sure to download it! (and maybe give it a star 😉) 90 | 91 | 92 | Hope you'll enjoy the content in this course! You can either get started with [lesson 1](../001-introduction-to-forging-api-requests/), or check out the [course catalogue](../README.md#course-catalogue) -------------------------------------------------------------------------------- /000-introduction/slides.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/davidteather/everything-web-scraping/6afdda03e1b84d64a8e460f6e7265a60a87bdbdc/000-introduction/slides.pdf -------------------------------------------------------------------------------- /001-introduction-to-forging-api-requests/README.md: -------------------------------------------------------------------------------- 1 | # Lesson 1 - Introduction To Forging API Requests 2 | 3 | This lesson is designed to teach you how data is sent between websites and servers and how we can exploit this to extract data. 4 | 5 | ### Supporting The Project 6 | * Star the repo 😎 7 | * Maybe share it with some people new to web-scraping? 8 | * Consider [sponsoring](https://github.com/sponsors/davidteather) me on GitHub 9 | * Send me an email or a [LinkedIn](https://www.linkedin.com/in/davidteather/) message telling me what you enjoy in the course (and maybe what else you want to see in the future) 10 | * Submit PRs for suggestions/issues :) 11 | 12 | ## Learning Objectives 13 | * Learners will understand how data is sent between a client and a server. 14 | * Learners will forge API requests to a mock website. 15 | 16 | 17 | ## Table of Contents 18 | * [Lesson Video](#lesson-video) 19 | * [Video Corrections](#video-corrections) 20 | * [How Do Websites Get Data](#how-do-websites-get-data) 21 | * [Popular Ways Websites Get Data](#popular-ways-websites-get-data) 22 | * [How Do We Exploit This?](#how-do-we-exploit-this) 23 | * [Lesson Activity](#activity) 24 | * [Description](#brief-description) 25 | * [Testing](#testing) 26 | * [Solutions](#solutions) 27 | 28 | ## Lesson Video 29 | 30 | [![](./thumbnail.png?raw=true)](https://www.youtube.com/watch?v=8GZPQUjd7pk&list=PLmRtxHvzkEE8Ofiy4hnnXSoxw7gs4HOHt) 31 | 32 | [Watch Here](https://www.youtube.com/watch?v=8GZPQUjd7pk&list=PLmRtxHvzkEE8Ofiy4hnnXSoxw7gs4HOHt) 33 | 34 | ### Video Corrections 35 | None so far 36 | 37 | ## How Do Websites Get Data? 38 | 39 | Watch this section on [YouTube](https://www.youtube.com/watch?v=8GZPQUjd7pk&list=PLmRtxHvzkEE8Ofiy4hnnXSoxw7gs4HOHt) and/or pull up the [slides](./slides.pdf) 40 | 41 | ### Popular Ways Websites Get Data 42 | * Server Side Rendering (SSR) 43 | * Data is sent as part of the HTML response to the requester 44 | * Each request for new data usually requires a page reload 45 | * AJAX 46 | * Takes a client (ex: web browser) and server approach 47 | * When the client needs new data it requests it from the server 48 | * This allows the client to update the data on the page without refreshing the page itself 49 | * Leads to a more fluid and responsive user experience 50 | * This type is the focus of this lesson 51 | 52 | Visualizations of how the data flows available in the [video](https://www.youtube.com/watch?v=8GZPQUjd7pk&list=PLmRtxHvzkEE8Ofiy4hnnXSoxw7gs4HOHt) and [slides](./slides.pdf) 53 | 54 | ### How Do We Exploit This? 55 | 56 | If we're able to emulate the requests that a legitimate client makes then we can extract data from the server without ever interacting with the client itself. This technique is generally referred to as **forging requests**. 57 | 58 | * Advantages 59 | * These APIs can be easier to scrape at scale than trying to do it through a client 60 | * They may contain extra information you can't see in the HTML itself 61 | * Similar to Missouri accidentally exposing their teachers SSNs [The Verge](https://www.theverge.com/2021/10/14/22726866/missouri-governor-department-elementary-secondary-education-ssn-vulnerability-disclosure) 62 | * Less data returned means quicker requests (and less data transfer fees) 63 | * Excess HTML, CSS, etc isn't usually returned from the server, just pure data 64 | * Disadvantages 65 | * Some websites frequently update their APIs 66 | * Extra work has to be done to keep up with these changes compared to just scraping HTML 67 | * Might change endpoints, the schema of the data returned, etc 68 | * Can be hard to emulate human behavior to avoid captchas and other blocking mechanisms 69 | * Can be difficult to figure out how the website is generating user sessions and other security parameters to prevent web scraping 70 | 71 | ## Activity 72 | 73 | In this activity you'll be looking at a mock website and writing a python script to extract data from it. To get started you should run `docker-compose up` in this directory. If you don't know what docker is or are new to it check out the [docker section of the readme](../README.md#how-to-start-the-mock-websites) 74 | 75 | 76 | ### Brief Description 77 | 78 | Our goal is to extract as much data as possible from the website by looking at the network inspector tab of the browser when visiting the mock website. We want to make the same requests that the website (client) makes to the server. 79 | 80 | Open `activity.py`, you will be modifying the existing function to do what the comments tell you to do. I recommend using the [requests](https://requests.readthedocs.io/en/latest/user/quickstart/) package, although feel free to use whatever you want. 81 | 82 | **Do not** change the method names, however feel free to call those methods if you want to test them out in the `if __name__ == "__main__"` section. 83 | 84 | ### Testing 85 | 86 | To check if your implementation is correct run `python test.py` this will import the functions you made. It will tell you what tests failed if any, and will show a success message if all tests passed. 87 | 88 | ### Solutions 89 | 90 | You can find the solutions in the [video](https://youtu.be/8GZPQUjd7pk?list=PLmRtxHvzkEE8Ofiy4hnnXSoxw7gs4HOHt), or use the timestamps here 91 | * [extract_feed()](https://youtu.be/8GZPQUjd7pk?list=PLmRtxHvzkEE8Ofiy4hnnXSoxw7gs4HOHt&t=174) 92 | * [extract_emails()](https://youtu.be/8GZPQUjd7pk?list=PLmRtxHvzkEE8Ofiy4hnnXSoxw7gs4HOHt&t=240) 93 | * [user_exists()](https://youtu.be/8GZPQUjd7pk?list=PLmRtxHvzkEE8Ofiy4hnnXSoxw7gs4HOHt&t=258) -------------------------------------------------------------------------------- /001-introduction-to-forging-api-requests/activity.py: -------------------------------------------------------------------------------- 1 | """ 2 | To check if your implementation is correct run test.py 3 | 4 | *NOTE: Don't change the method names, as that's what's used in the tester. 5 | but, feel free to add anything else to test and debug your code. 6 | """ 7 | 8 | def extract_feed(): 9 | """ 10 | Return an array of all the post objects on the feed page. 11 | """ 12 | 13 | return [] 14 | 15 | def extract_emails(): 16 | """ 17 | Return an array of all the emails on the discover page. 18 | """ 19 | 20 | return [] 21 | 22 | def username_exists(username): 23 | """ 24 | username - The username to check if exists, without @ (ex: username="davidteather") 25 | This function will return True if the provided username already exists, and false if it doesn't 26 | """ 27 | 28 | return False 29 | 30 | if __name__ == "__main__": 31 | # Optional: You can call your methods here if you want to test them without running the tester 32 | # print(extract_feed()) 33 | pass -------------------------------------------------------------------------------- /001-introduction-to-forging-api-requests/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3.9' 2 | 3 | services: 4 | db: 5 | image: postgres:latest 6 | environment: 7 | POSTGRES_DB: lesson_1 8 | POSTGRES_PASSWORD: postgres 9 | POSTGRES_USER: postgres 10 | ports: 11 | - 5434:5432 12 | 13 | client: 14 | build: 15 | context: ./website/client 16 | container_name: lesson-001-client 17 | depends_on: 18 | - server 19 | - db 20 | environment: 21 | REACT_APP_WEBSITE_NAME: Social 22 | REACT_APP_BACKEND_HOST: localhost 23 | REACT_APP_BACKEND_PORT: 8080 24 | REACT_APP_CODESPACE_NAME: ${CODESPACE_NAME} 25 | REACT_APP_GITHUB_CODESPACES_PORT_FORWARDING_DOMAIN: ${GITHUB_CODESPACES_PORT_FORWARDING_DOMAIN} 26 | ports: 27 | - 3000:3000 28 | 29 | server: 30 | build: 31 | context: ./website/server 32 | depends_on: 33 | - db 34 | environment: 35 | PORT: 8080 36 | POSTGRES_DB: lesson_1 37 | POSTGRES_PASSWORD: postgres 38 | POSTGRES_USER: postgres 39 | POSTGRES_PORT: 5432 40 | POSTGRES_HOST: db 41 | links: 42 | - db 43 | ports: 44 | - "8080:8080" 45 | container_name: lesson-001-server -------------------------------------------------------------------------------- /001-introduction-to-forging-api-requests/slides.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/davidteather/everything-web-scraping/6afdda03e1b84d64a8e460f6e7265a60a87bdbdc/001-introduction-to-forging-api-requests/slides.pdf -------------------------------------------------------------------------------- /001-introduction-to-forging-api-requests/test.py: -------------------------------------------------------------------------------- 1 | # Grades submission.py on the test cases 2 | # Don't look inside this if you haven't passed the tests yet 3 | 4 | from activity import extract_feed, extract_emails, username_exists 5 | import json 6 | 7 | def test_extract_feed(posts): 8 | images = [] 9 | for post in posts: 10 | images.append(post["image_url"]) # Image urls are unique 11 | 12 | feed = extract_feed() 13 | for post in feed: 14 | if post["image_url"] not in images: 15 | print(f"extract_feed(): ❌\n\tReturned a post that was not in the database (or returned multiple instances of a single post)") 16 | return False 17 | 18 | images.remove(post["image_url"]) 19 | 20 | if len(images) != 0: 21 | print(f"extract_feed(): ❌\n\tDidn't return all posts in the database") 22 | return False 23 | 24 | print(f"extract_feed(): ✅") 25 | return True 26 | 27 | def test_extract_emails(profiles): 28 | emails = [] 29 | for profile in profiles: 30 | emails.append(profile["email"]) # Image urls are unique 31 | 32 | feed = extract_emails() 33 | for email in feed: 34 | if email not in emails: 35 | print(f"extract_emails(): ❌\n\tReturned an email that was not in the database (or returned multiple instances of a single email)") 36 | return False 37 | 38 | emails.remove(email) 39 | 40 | if len(emails) != 0: 41 | print(f"extract_emails(): ❌\n\tDidn't return all emails in the database") 42 | return False 43 | 44 | print(f"extract_emails(): ✅") 45 | return True 46 | 47 | def test_username_exists(profiles): 48 | fake_usernames = ["orange", "apple", "bruh", "davidteather", "subscribe", "contact.davidteather@gmail.com"] 49 | for profile in profiles: 50 | u = profile['username'] 51 | 52 | # Ensure fake_usernames actually don't exist 53 | if u in fake_usernames: 54 | fake_usernames.remove(u) 55 | 56 | if not username_exists(u): 57 | print(f"username_exists(): ❌\n\tReturned False for a username that exists") 58 | return False 59 | 60 | for fake_username in fake_usernames: 61 | if username_exists(fake_username): 62 | print(f"username_exists(): ❌\n\tReturned True for a username that doesn't exist") 63 | return False 64 | 65 | print(f"username_exists(): ✅") 66 | return True 67 | 68 | if __name__ == "__main__": 69 | with open("website/server/db_seeding/initial_data.json", "r", encoding='utf-8') as init_data: 70 | data = json.loads(init_data.read()) 71 | 72 | profiles = data["profiles"] 73 | posts = data["posts"] 74 | 75 | passed_extract_feed = test_extract_feed(posts) 76 | passed_extract_emails = test_extract_emails(profiles) 77 | passed_username_exists = test_username_exists(profiles) 78 | 79 | if passed_extract_feed and passed_username_exists and passed_extract_emails: 80 | print(f"All tests: ✅") -------------------------------------------------------------------------------- /001-introduction-to-forging-api-requests/thumbnail.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/davidteather/everything-web-scraping/6afdda03e1b84d64a8e460f6e7265a60a87bdbdc/001-introduction-to-forging-api-requests/thumbnail.png -------------------------------------------------------------------------------- /001-introduction-to-forging-api-requests/website/client/.gitignore: -------------------------------------------------------------------------------- 1 | # See https://help.github.com/articles/ignoring-files/ for more about ignoring files. 2 | 3 | # dependencies 4 | /node_modules 5 | /.pnp 6 | .pnp.js 7 | 8 | # testing 9 | /coverage 10 | 11 | # production 12 | /build 13 | 14 | # misc 15 | .DS_Store 16 | .env.local 17 | .env.development.local 18 | .env.test.local 19 | .env.production.local 20 | 21 | npm-debug.log* 22 | yarn-debug.log* 23 | yarn-error.log* 24 | -------------------------------------------------------------------------------- /001-introduction-to-forging-api-requests/website/client/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM node:13.12.0-alpine 2 | 3 | LABEL "lesson.number"=1 4 | 5 | # add `/app/node_modules/.bin` to $PATH 6 | ENV PATH /app/node_modules/.bin:$PATH 7 | 8 | # install app dependencies 9 | COPY package.json ./ 10 | COPY package-lock.json ./ 11 | RUN npm install --silent 12 | 13 | # add app 14 | COPY . ./ 15 | 16 | # start app 17 | CMD ["npm", "start"] -------------------------------------------------------------------------------- /001-introduction-to-forging-api-requests/website/client/README.md: -------------------------------------------------------------------------------- 1 | # Getting Started with Create React App 2 | 3 | This project was bootstrapped with [Create React App](https://github.com/facebook/create-react-app). 4 | 5 | ## Available Scripts 6 | 7 | In the project directory, you can run: 8 | 9 | ### `npm start` 10 | 11 | Runs the app in the development mode.\ 12 | Open [http://localhost:3000](http://localhost:3000) to view it in your browser. 13 | 14 | The page will reload when you make changes.\ 15 | You may also see any lint errors in the console. 16 | 17 | ### `npm test` 18 | 19 | Launches the test runner in the interactive watch mode.\ 20 | See the section about [running tests](https://facebook.github.io/create-react-app/docs/running-tests) for more information. 21 | 22 | ### `npm run build` 23 | 24 | Builds the app for production to the `build` folder.\ 25 | It correctly bundles React in production mode and optimizes the build for the best performance. 26 | 27 | The build is minified and the filenames include the hashes.\ 28 | Your app is ready to be deployed! 29 | 30 | See the section about [deployment](https://facebook.github.io/create-react-app/docs/deployment) for more information. 31 | 32 | ### `npm run eject` 33 | 34 | **Note: this is a one-way operation. Once you `eject`, you can't go back!** 35 | 36 | If you aren't satisfied with the build tool and configuration choices, you can `eject` at any time. This command will remove the single build dependency from your project. 37 | 38 | Instead, it will copy all the configuration files and the transitive dependencies (webpack, Babel, ESLint, etc) right into your project so you have full control over them. All of the commands except `eject` will still work, but they will point to the copied scripts so you can tweak them. At this point you're on your own. 39 | 40 | You don't have to ever use `eject`. The curated feature set is suitable for small and middle deployments, and you shouldn't feel obligated to use this feature. However we understand that this tool wouldn't be useful if you couldn't customize it when you are ready for it. 41 | 42 | ## Learn More 43 | 44 | You can learn more in the [Create React App documentation](https://facebook.github.io/create-react-app/docs/getting-started). 45 | 46 | To learn React, check out the [React documentation](https://reactjs.org/). 47 | 48 | ### Code Splitting 49 | 50 | This section has moved here: [https://facebook.github.io/create-react-app/docs/code-splitting](https://facebook.github.io/create-react-app/docs/code-splitting) 51 | 52 | ### Analyzing the Bundle Size 53 | 54 | This section has moved here: [https://facebook.github.io/create-react-app/docs/analyzing-the-bundle-size](https://facebook.github.io/create-react-app/docs/analyzing-the-bundle-size) 55 | 56 | ### Making a Progressive Web App 57 | 58 | This section has moved here: [https://facebook.github.io/create-react-app/docs/making-a-progressive-web-app](https://facebook.github.io/create-react-app/docs/making-a-progressive-web-app) 59 | 60 | ### Advanced Configuration 61 | 62 | This section has moved here: [https://facebook.github.io/create-react-app/docs/advanced-configuration](https://facebook.github.io/create-react-app/docs/advanced-configuration) 63 | 64 | ### Deployment 65 | 66 | This section has moved here: [https://facebook.github.io/create-react-app/docs/deployment](https://facebook.github.io/create-react-app/docs/deployment) 67 | 68 | ### `npm run build` fails to minify 69 | 70 | This section has moved here: [https://facebook.github.io/create-react-app/docs/troubleshooting#npm-run-build-fails-to-minify](https://facebook.github.io/create-react-app/docs/troubleshooting#npm-run-build-fails-to-minify) 71 | -------------------------------------------------------------------------------- /001-introduction-to-forging-api-requests/website/client/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "client", 3 | "version": "0.1.0", 4 | "private": true, 5 | "dependencies": { 6 | "@testing-library/jest-dom": "^5.16.4", 7 | "@testing-library/react": "^13.3.0", 8 | "@testing-library/user-event": "^13.5.0", 9 | "axios": "^0.27.2", 10 | "bootstrap": "^5.1.3", 11 | "react": "^18.1.0", 12 | "react-bootstrap": "^2.4.0", 13 | "react-dom": "^18.1.0", 14 | "react-scripts": "5.0.1", 15 | "styled-components": "^5.3.5", 16 | "web-vitals": "^2.1.4" 17 | }, 18 | "scripts": { 19 | "start": "react-scripts start", 20 | "build": "react-scripts build", 21 | "test": "react-scripts test", 22 | "eject": "react-scripts eject" 23 | }, 24 | "eslintConfig": { 25 | "extends": [ 26 | "react-app", 27 | "react-app/jest" 28 | ] 29 | }, 30 | "browserslist": { 31 | "production": [ 32 | ">0.2%", 33 | "not dead", 34 | "not op_mini all" 35 | ], 36 | "development": [ 37 | "last 1 chrome version", 38 | "last 1 firefox version", 39 | "last 1 safari version" 40 | ] 41 | }, 42 | "devDependencies": { 43 | "react-router-dom": "^6.3.0" 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /001-introduction-to-forging-api-requests/website/client/public/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/davidteather/everything-web-scraping/6afdda03e1b84d64a8e460f6e7265a60a87bdbdc/001-introduction-to-forging-api-requests/website/client/public/favicon.ico -------------------------------------------------------------------------------- /001-introduction-to-forging-api-requests/website/client/public/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 12 | 13 | 17 | 18 | 27 | %REACT_APP_WEBSITE_NAME% 28 | 29 | 30 | 31 |
32 | 42 | 43 | 44 | -------------------------------------------------------------------------------- /001-introduction-to-forging-api-requests/website/client/public/manifest.json: -------------------------------------------------------------------------------- 1 | { 2 | "short_name": "React App", 3 | "name": "Create React App Sample", 4 | "icons": [ 5 | { 6 | "src": "favicon.ico", 7 | "sizes": "64x64 32x32 24x24 16x16", 8 | "type": "image/x-icon" 9 | } 10 | ], 11 | "start_url": ".", 12 | "display": "standalone", 13 | "theme_color": "#000000", 14 | "background_color": "#ffffff" 15 | } 16 | -------------------------------------------------------------------------------- /001-introduction-to-forging-api-requests/website/client/public/robots.txt: -------------------------------------------------------------------------------- 1 | # https://www.robotstxt.org/robotstxt.html 2 | User-agent: * 3 | Disallow: 4 | -------------------------------------------------------------------------------- /001-introduction-to-forging-api-requests/website/client/src/components/Footer.js: -------------------------------------------------------------------------------- 1 | const Footer = () => { 2 | return ( 3 | <> 4 | 5 | ) 6 | }; 7 | 8 | export default Footer; -------------------------------------------------------------------------------- /001-introduction-to-forging-api-requests/website/client/src/components/Header.js: -------------------------------------------------------------------------------- 1 | import { Link } from "react-router-dom"; 2 | import {Navbar, Nav, Container} from 'react-bootstrap'; 3 | 4 | const Header = () => { 5 | return ( 6 | <> 7 | 8 | 9 | {process.env.REACT_APP_WEBSITE_NAME} 10 | 11 | 12 | 16 | 17 | 18 | 19 | 20 | ) 21 | }; 22 | 23 | export default Header; -------------------------------------------------------------------------------- /001-introduction-to-forging-api-requests/website/client/src/components/Post.js: -------------------------------------------------------------------------------- 1 | import React, { Component } from 'react'; 2 | import { Link } from 'react-router-dom'; 3 | import heart from '../images/heart.png' 4 | 5 | class Post extends Component { 6 | render() { 7 | return ( 8 |
9 |
10 | {this.props.post.caption} 11 |
12 |
13 | Heart icon {this.props.post.likes_count} Likes 14 |
15 | @{this.props.post.author_username}: {this.props.post.caption} 16 |
17 |
18 | Find it on Unsplash 19 |
20 |
21 | ); 22 | } 23 | } 24 | 25 | export default Post; -------------------------------------------------------------------------------- /001-introduction-to-forging-api-requests/website/client/src/images/heart.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/davidteather/everything-web-scraping/6afdda03e1b84d64a8e460f6e7265a60a87bdbdc/001-introduction-to-forging-api-requests/website/client/src/images/heart.png -------------------------------------------------------------------------------- /001-introduction-to-forging-api-requests/website/client/src/index.css: -------------------------------------------------------------------------------- 1 | body { 2 | margin: 0; 3 | font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', 'Roboto', 'Oxygen', 4 | 'Ubuntu', 'Cantarell', 'Fira Sans', 'Droid Sans', 'Helvetica Neue', 5 | sans-serif; 6 | -webkit-font-smoothing: antialiased; 7 | -moz-osx-font-smoothing: grayscale; 8 | } 9 | 10 | code { 11 | font-family: source-code-pro, Menlo, Monaco, Consolas, 'Courier New', 12 | monospace; 13 | } 14 | 15 | .unstyle_link { 16 | color: inherit; 17 | text-decoration: inherit; 18 | } -------------------------------------------------------------------------------- /001-introduction-to-forging-api-requests/website/client/src/index.js: -------------------------------------------------------------------------------- 1 | import ReactDOM from "react-dom/client"; 2 | import { BrowserRouter, Routes, Route } from "react-router-dom"; 3 | import Feed from "./pages/Feed"; 4 | import Discover from './pages/Discover'; 5 | import 'bootstrap/dist/css/bootstrap.min.css'; 6 | import './index.css' 7 | import ProfileFeed from "./pages/ProfileFeed"; 8 | 9 | export default function App() { 10 | return ( 11 |
12 | 13 | 14 | } /> 15 | } /> 16 | } /> 17 | 18 | 19 |
20 | ); 21 | } 22 | 23 | const root = ReactDOM.createRoot(document.getElementById('root')); 24 | root.render(); -------------------------------------------------------------------------------- /001-introduction-to-forging-api-requests/website/client/src/pages/Discover.js: -------------------------------------------------------------------------------- 1 | 2 | import { useState, useEffect } from "react"; 3 | import Header from "../components/Header" 4 | import { getProfiles } from "../services/DiscoveryService" 5 | 6 | 7 | 8 | const Discover = () => { 9 | const [profiles, setProfiles] = useState([]) 10 | const [currentPage, setCurrentPage] = useState(0) 11 | const [requestInProgress, setRequestInProgress] = useState(false) 12 | const [isMoreData, setIsMoreData] = useState(true) 13 | 14 | 15 | 16 | 17 | useEffect(() => { 18 | setRequestInProgress(true) 19 | 20 | if (isMoreData) { 21 | getProfiles(currentPage).then((data) => { 22 | setProfiles(p => p.concat(data.profiles)) 23 | if (data.profiles.length === 0) { 24 | setIsMoreData(false) 25 | } 26 | }).finally(() => { 27 | setRequestInProgress(false) 28 | }); 29 | } 30 | 31 | const handleScroll = () => { 32 | if (window.innerHeight + document.documentElement.scrollTop !== document.documentElement.offsetHeight) return; 33 | if (!requestInProgress) { 34 | setCurrentPage(p => p+1) 35 | } 36 | } 37 | 38 | window.addEventListener('scroll', handleScroll); 39 | return () => window.removeEventListener("scroll", handleScroll); 40 | }, [currentPage, isMoreData]); 41 | 42 | return ( 43 | <> 44 |
45 |
46 |

Discover Page

47 | { 48 | profiles.map((profile, i) => { 49 | return ( 50 |
51 |
{profile.name[0]}
52 |
53 | {profile.name} 54 |
Is working as a {profile.job} at {profile.company}
55 |
56 |
57 | 58 |
59 |
60 | 61 | ) 62 | }) 63 | } 64 |
65 | 66 | 67 | ) 68 | 69 | 70 | } 71 | 72 | export default Discover -------------------------------------------------------------------------------- /001-introduction-to-forging-api-requests/website/client/src/pages/Feed.js: -------------------------------------------------------------------------------- 1 | import { useState, useEffect } from "react"; 2 | import Header from "../components/Header" 3 | import Post from "../components/Post"; 4 | import { getFeed } from "../services/FeedService"; 5 | 6 | const Feed = () => { 7 | const [posts, setPosts] = useState([]) 8 | const [currentPage, setCurrentPage] = useState(0) 9 | const [requestInProgress, setRequestInProgress] = useState(false) 10 | const [isMoreData, setIsMoreData] = useState(true) 11 | 12 | useEffect(() => { 13 | setRequestInProgress(true) 14 | 15 | if (isMoreData) { 16 | getFeed(currentPage).then((data) => { 17 | setPosts(p => p.concat(data.posts)) 18 | if (data.posts.length === 0) { 19 | setIsMoreData(false) 20 | } 21 | }).finally(() => { 22 | setRequestInProgress(false) 23 | }); 24 | } 25 | 26 | const handleScroll = () => { 27 | if (window.innerHeight + document.documentElement.scrollTop !== document.documentElement.offsetHeight) return; 28 | if (!requestInProgress) { 29 | setCurrentPage(p => p+1) 30 | } 31 | } 32 | 33 | window.addEventListener('scroll', handleScroll); 34 | return () => window.removeEventListener("scroll", handleScroll); 35 | }, [currentPage, isMoreData]); 36 | 37 | return ( 38 | <> 39 |
40 |
41 | { 42 | posts.map((post, i) => { 43 | return ( 44 | 45 | 46 | ) 47 | }) 48 | } 49 |
50 | 51 | ) 52 | 53 | } 54 | 55 | export default Feed -------------------------------------------------------------------------------- /001-introduction-to-forging-api-requests/website/client/src/pages/ProfileFeed.js: -------------------------------------------------------------------------------- 1 | import { useState, useEffect } from "react"; 2 | import Header from "../components/Header" 3 | import Post from "../components/Post"; 4 | import { getProfileFeed } from "../services/FeedService"; 5 | import { useParams } from 'react-router-dom' 6 | 7 | const ProfileFeed = () => { 8 | const [posts, setPosts] = useState([]) 9 | const [userExists, setUserExists] = useState(true) 10 | const [currentPage, setCurrentPage] = useState(0) 11 | const [requestInProgress, setRequestInProgress] = useState(false) 12 | const [isMoreData, setIsMoreData] = useState(true) 13 | const { username } = useParams() 14 | 15 | useEffect(() => { 16 | setRequestInProgress(true) 17 | 18 | if (isMoreData) { 19 | getProfileFeed(username, currentPage).then((data) => { 20 | setPosts(p => p.concat(data.posts)) 21 | if (data.posts.length === 0) { 22 | if (data.error === "USER_DOES_NOT_EXIST") { 23 | setUserExists(false) 24 | } 25 | setIsMoreData(false) 26 | } 27 | }).finally(() => { 28 | setRequestInProgress(false) 29 | }); 30 | } 31 | 32 | const handleScroll = () => { 33 | if (window.innerHeight + document.documentElement.scrollTop !== document.documentElement.offsetHeight) return; 34 | if (!requestInProgress) { 35 | setCurrentPage(p => p+1) 36 | } 37 | } 38 | 39 | window.addEventListener('scroll', handleScroll); 40 | return () => window.removeEventListener("scroll", handleScroll); 41 | }, [currentPage, isMoreData]); 42 | 43 | if (userExists) { 44 | return ( 45 | <> 46 |
47 |
48 | { 49 | posts.map((post, i) => { 50 | return ( 51 | 52 | 53 | ) 54 | }) 55 | } 56 |
57 | 58 | ) 59 | } else { 60 | return ( 61 | <> 62 |
63 |
64 |
65 |

66 | user @{username} doesn't exist 67 |

68 |
69 |
70 | 71 | ) 72 | } 73 | } 74 | 75 | export default ProfileFeed -------------------------------------------------------------------------------- /001-introduction-to-forging-api-requests/website/client/src/reportWebVitals.js: -------------------------------------------------------------------------------- 1 | const reportWebVitals = onPerfEntry => { 2 | if (onPerfEntry && onPerfEntry instanceof Function) { 3 | import('web-vitals').then(({ getCLS, getFID, getFCP, getLCP, getTTFB }) => { 4 | getCLS(onPerfEntry); 5 | getFID(onPerfEntry); 6 | getFCP(onPerfEntry); 7 | getLCP(onPerfEntry); 8 | getTTFB(onPerfEntry); 9 | }); 10 | } 11 | }; 12 | 13 | export default reportWebVitals; 14 | -------------------------------------------------------------------------------- /001-introduction-to-forging-api-requests/website/client/src/services/DiscoveryService.js: -------------------------------------------------------------------------------- 1 | const isCodespaces = process.env.REACT_APP_CODESPACE_NAME != "" && process.env.REACT_APP_GITHUB_CODESPACES_PORT_FORWARDING_DOMAIN != ""; 2 | 3 | const BACKEND_URL = isCodespaces 4 | ? `https://${process.env.REACT_APP_CODESPACE_NAME}-${process.env.REACT_APP_BACKEND_PORT}.${process.env.REACT_APP_GITHUB_CODESPACES_PORT_FORWARDING_DOMAIN}` 5 | : `http://${process.env.BACKEND_HOST}:${process.env.BACKEND_PORT}`; 6 | 7 | export async function getProfiles(page) { 8 | try{ 9 | const response = await fetch(`${BACKEND_URL}/discover/profiles/${page}`); 10 | return response.json(); 11 | }catch(error) { 12 | return new Promise((res, rej) => { 13 | res([]) 14 | }); 15 | } 16 | 17 | } -------------------------------------------------------------------------------- /001-introduction-to-forging-api-requests/website/client/src/services/FeedService.js: -------------------------------------------------------------------------------- 1 | const isCodespaces = process.env.REACT_APP_CODESPACE_NAME != "" && process.env.REACT_APP_GITHUB_CODESPACES_PORT_FORWARDING_DOMAIN != ""; 2 | 3 | const BACKEND_URL = isCodespaces 4 | ? `https://${process.env.REACT_APP_CODESPACE_NAME}-${process.env.REACT_APP_BACKEND_PORT}.${process.env.REACT_APP_GITHUB_CODESPACES_PORT_FORWARDING_DOMAIN}` 5 | : `http://${process.env.BACKEND_HOST}:${process.env.BACKEND_PORT}`; 6 | 7 | export async function getFeed(page) { 8 | try{ 9 | const response = await fetch(`${BACKEND_URL}/feed/${page}`); 10 | return response.json(); 11 | }catch(error) { 12 | return new Promise((res, rej) => { 13 | res([]) 14 | }); 15 | } 16 | 17 | } 18 | 19 | 20 | export async function getProfileFeed(username, page) { 21 | try{ 22 | const response = await fetch(`${BACKEND_URL}/profile/${username}/feed/${page}`); 23 | return response.json(); 24 | }catch(error) { 25 | return new Promise((res, rej) => { 26 | res([]) 27 | }); 28 | } 29 | 30 | } -------------------------------------------------------------------------------- /001-introduction-to-forging-api-requests/website/client/src/setupTests.js: -------------------------------------------------------------------------------- 1 | // jest-dom adds custom jest matchers for asserting on DOM nodes. 2 | // allows you to do things like: 3 | // expect(element).toHaveTextContent(/react/i) 4 | // learn more: https://github.com/testing-library/jest-dom 5 | import '@testing-library/jest-dom'; 6 | -------------------------------------------------------------------------------- /001-introduction-to-forging-api-requests/website/server/.gitignore: -------------------------------------------------------------------------------- 1 | node_modules -------------------------------------------------------------------------------- /001-introduction-to-forging-api-requests/website/server/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM node:16 2 | 3 | LABEL "lesson.number"=1 4 | 5 | COPY package.json ./ 6 | COPY package-lock.json ./ 7 | RUN npm install --silent 8 | 9 | COPY . ./ 10 | 11 | EXPOSE 8080 12 | 13 | CMD ["npm", "run", "start"] -------------------------------------------------------------------------------- /001-introduction-to-forging-api-requests/website/server/db_seeding/create_data.py: -------------------------------------------------------------------------------- 1 | from faker import Faker 2 | from faker.providers import address, color, geo, person, profile 3 | import time 4 | import json 5 | import random 6 | import requests 7 | 8 | fake = Faker() 9 | fake.add_provider(profile) 10 | 11 | data = {"profiles": [], "posts": []} 12 | 13 | # Profile Generation 14 | profile_colors = [ 15 | "#8ECAE6", 16 | "#219EBC", 17 | "#D82F2F", 18 | "#FB8500", 19 | "#FFB703", 20 | "#CBF3F0", 21 | "#2EC4B6", 22 | "#FFBF69", 23 | "#FF9F1C", 24 | "#DCEDFF", 25 | "#94B0DA", 26 | "#419D78", 27 | "#E0A458", 28 | "#FFDBB5", 29 | "#C7F0BD", 30 | "#9E768F", 31 | "#9FA4C4", 32 | "#B47EB3", 33 | "#92D1C3", 34 | "#67AAF9", 35 | "#B95F89", 36 | "#8884FF", 37 | ] 38 | for x in range(50): # make 50 profiles 39 | while True: 40 | p = fake.profile() 41 | new_profile = { 42 | "job": p["job"], 43 | "company": p["company"], 44 | "username": p["username"], 45 | "name": p["name"], 46 | "email": p["mail"], 47 | "birthday": p["birthdate"].strftime("%m-%d-%Y"), 48 | "profile_color": random.choice(profile_colors), 49 | } 50 | if "'" not in p["job"]: 51 | break 52 | 53 | data["profiles"].append(new_profile) 54 | 55 | 56 | # Post Generation 57 | POSTS_TO_CREATE = 500 58 | POSTS_PER_PAGE = 50 59 | 60 | photo_options = [] 61 | queries = [ 62 | "dancing", 63 | "technology", 64 | "programming", 65 | "birds", 66 | "dogs", 67 | "cats", 68 | "social", 69 | "vibes", 70 | "sunset", 71 | "cars", 72 | "landscape", 73 | "mountain", 74 | "snow", 75 | "river", 76 | "stream", 77 | "reading", 78 | "bookstore", 79 | "nighttime", 80 | "stars", 81 | "astronomy", 82 | "coffee", 83 | ] 84 | for query in queries: 85 | r = requests.get( 86 | f"https://unsplash.com/napi/search?query={query}&per_page={POSTS_PER_PAGE}", 87 | headers={ 88 | "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.74 Safari/537.36" 89 | }, 90 | ) 91 | d = r.json() 92 | for photo in d["photos"]["results"]: 93 | photo_url = photo["urls"]["full"] 94 | photo_options.append( 95 | { 96 | "url": photo_url, 97 | "unsplash": photo["links"]["html"], 98 | "likes": photo["likes"], 99 | } 100 | ) 101 | 102 | used_urls = [] 103 | for x in range(POSTS_TO_CREATE): 104 | poster = random.choice(data["profiles"]) 105 | long_text = fake.text() 106 | while True: 107 | photo = random.choice(photo_options) 108 | photo_options.remove(photo) 109 | 110 | if photo["unsplash"] not in used_urls: 111 | break 112 | 113 | used_urls.append(photo["unsplash"]) 114 | new_post = { 115 | "image_url": photo["url"], 116 | "image_unsplash_url": photo["unsplash"], 117 | "likes_count": photo["likes"], 118 | "caption": " ".join(long_text.split(" ")[0:200]), 119 | "author_username": poster["username"], 120 | } 121 | 122 | data["posts"].append(new_post) 123 | 124 | 125 | with open("initial_data.json", "w+", encoding="utf-8") as o: 126 | json.dump(data, o) 127 | -------------------------------------------------------------------------------- /001-introduction-to-forging-api-requests/website/server/db_seeding/seedDatabase.js: -------------------------------------------------------------------------------- 1 | 2 | const getClient = require('../get-client') 3 | 4 | const initialData = require('./initial_data.json') 5 | 6 | module.exports.seedDatabase = async () => { 7 | const client = await getClient.getClient() 8 | 9 | await client.query(`CREATE TABLE IF NOT EXISTS profiles ( 10 | id serial PRIMARY KEY, 11 | job text NOT NULL, 12 | company text NOT NULL, 13 | username text NOT NULL UNIQUE, 14 | name text NOT NULL, 15 | email text NOT NULL, 16 | birthday timestamptz NOT NULL, 17 | profile_color text NOT NULL 18 | )`, (err, result) => { 19 | if (err) { 20 | console.log(err) 21 | } else { 22 | sql = "INSERT INTO profiles (job, company, username, name, email, profile_color, birthday)\nVALUES " 23 | var value_list = ""; 24 | var time_list = []; 25 | for (let i = 0; i < initialData.profiles.length; i++) { 26 | cur = initialData.profiles[i] 27 | value_list = value_list + `('${cur.job}', '${cur.company}', '${cur.username}', '${cur.name}', '${cur.email}', '${cur.profile_color}', $${i+1})` 28 | 29 | time_list.push(cur.birthday) 30 | if (i != initialData.profiles.length - 1) { 31 | value_list += ",\n" 32 | } 33 | } 34 | 35 | var full_sql = `${sql}${value_list} ON CONFLICT DO NOTHING` 36 | 37 | client.query(full_sql, time_list, (err, result) => { 38 | if (err) { 39 | console.log(err) 40 | } else { 41 | console.log("Seeded profiles!") 42 | } 43 | }) 44 | } 45 | }) 46 | 47 | await client.query(`CREATE TABLE IF NOT EXISTS posts ( 48 | id serial PRIMARY KEY, 49 | image_url text NOT NULL, 50 | image_unsplash_url text NOT NULL UNIQUE, 51 | likes_count int NOT NULL, 52 | caption text NOT NULL, 53 | author_username text NOT NULL 54 | )`, (err, result) => { 55 | if (err) { 56 | console.log(err) 57 | } else { 58 | sql = "INSERT INTO posts (image_url, image_unsplash_url, likes_count, caption, author_username)\nVALUES " 59 | var value_list = ""; 60 | for (let i = 0; i < initialData.posts.length; i++) { 61 | cur = initialData.posts[i] 62 | value_list = value_list + `('${cur.image_url}', '${cur.image_unsplash_url}', ${cur.likes_count}, '${cur.caption}', '${cur.author_username}')` 63 | 64 | if (i != initialData.posts.length - 1) { 65 | value_list += ",\n" 66 | } 67 | } 68 | 69 | var full_sql = `${sql}${value_list} ON CONFLICT DO NOTHING` 70 | 71 | client.query(full_sql, (err, result) => { 72 | if (err) { 73 | console.log(err) 74 | } else { 75 | console.log("Seeded posts!") 76 | } 77 | }) 78 | } 79 | }) 80 | } -------------------------------------------------------------------------------- /001-introduction-to-forging-api-requests/website/server/get-client.js: -------------------------------------------------------------------------------- 1 | const { Client } = require('pg'); 2 | 3 | const MAX_RETRIES = 30; 4 | 5 | const delay = ms => new Promise(resolve => setTimeout(resolve, ms)) 6 | 7 | module.exports.getClient = async () => { 8 | let tries = 0 9 | 10 | var client = null 11 | 12 | var connected = false 13 | while (tries < MAX_RETRIES) { 14 | try { 15 | client = new Client({ 16 | connectionString: `postgres://${process.env.POSTGRES_USER}:${process.env.POSTGRES_PASSWORD}@${process.env.POSTGRES_HOST}:${process.env.POSTGRES_PORT}/${process.env.POSTGRES_DB}` 17 | }); 18 | await client.connect() 19 | connected = true 20 | } catch (e) { 21 | await delay(1000) // Wait 1 second to retry 22 | } 23 | tries++ 24 | 25 | if (connected) 26 | break 27 | } 28 | 29 | if (connected == null) { 30 | console.log("Could not connect to database, try running \"docker-compose up\" again.") 31 | } 32 | 33 | return client; 34 | }; -------------------------------------------------------------------------------- /001-introduction-to-forging-api-requests/website/server/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "server", 3 | "version": "1.0.0", 4 | "description": "", 5 | "main": "server.js", 6 | "scripts": { 7 | "test": "echo \"Error: no test specified\" && exit 1", 8 | "start": "node ./server.js", 9 | "dev": "nodemon ./server.js localhost 8080" 10 | }, 11 | "author": "David Teather", 12 | "license": "MIT", 13 | "dependencies": { 14 | "dateformat": "^5.0.3", 15 | "express": "^4.18.1", 16 | "pg": "^8.7.3" 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /001-introduction-to-forging-api-requests/website/server/server.js: -------------------------------------------------------------------------------- 1 | const express = require("express"); 2 | const { seedDatabase } = require("./db_seeding/seedDatabase"); 3 | const app = express(); 4 | const getClient = require("./get-client") 5 | 6 | const PAGE_SIZE = 10 7 | 8 | // Allow cors 9 | app.use((req, res, next) => { 10 | res.header("Access-Control-Allow-Origin", "*"); 11 | res.header("Access-Control-Allow-Headers", "Origin, X-Requested-With, Content-Type, Accept"); 12 | next(); 13 | }); 14 | 15 | app.get('/', (req, res) => { 16 | res.send("Test") 17 | }) 18 | 19 | app.get('/discover/profiles/:page', async (req, res) => { 20 | let page = req.params.page 21 | const client = await getClient.getClient() 22 | 23 | await client.query( 24 | { 25 | text: `SELECT * FROM profiles LIMIT $1 OFFSET $2`, 26 | values: [PAGE_SIZE, PAGE_SIZE * page] 27 | }, (err, result) => { 28 | if (err) { 29 | console.log(err) 30 | res.sendStatus(500) 31 | client.end() 32 | } else { 33 | res.json({ profiles: result.rows }) 34 | client.end() 35 | } 36 | }) 37 | }) 38 | 39 | app.get('/feed/:page', async (req, res) => { 40 | let page = req.params.page 41 | const client = await getClient.getClient() 42 | 43 | await client.query( 44 | { 45 | text: "SELECT * FROM posts ORDER BY likes_count DESC LIMIT $1 OFFSET $2", 46 | values: [PAGE_SIZE, PAGE_SIZE * page] 47 | }, (err, result) => { 48 | if (err) { 49 | console.log(err) 50 | res.sendStatus(500) 51 | client.end() 52 | } else { 53 | res.json({ posts: result.rows }) 54 | client.end() 55 | } 56 | }) 57 | }) 58 | 59 | app.get('/profile/:username/feed/:page', async (req, res) => { 60 | let page = req.params.page 61 | let username = req.params.username 62 | const client = await getClient.getClient() 63 | 64 | await client.query({ 65 | text: "SELECT * FROM profiles WHERE username=$1", 66 | values: [username] 67 | }, async (err, result) => { 68 | if (err) { 69 | console.log(err) 70 | res.sendStatus(500) 71 | client.end() 72 | } else { 73 | if (result.rowCount == 0) { 74 | // User doesn't exist 75 | res.json({ posts: [], error: "USER_DOES_NOT_EXIST"}) 76 | client.end() 77 | } else { 78 | await client.query( 79 | { 80 | text: "SELECT * FROM posts WHERE author_username=$1 ORDER BY likes_count DESC LIMIT $2 OFFSET $3", 81 | values: [username, PAGE_SIZE, PAGE_SIZE * page] 82 | }, (err, result) => { 83 | if (err) { 84 | console.log(err) 85 | res.sendStatus(500) 86 | client.end() 87 | } else { 88 | res.json({ posts: result.rows }) 89 | client.end() 90 | } 91 | }) 92 | } 93 | } 94 | }) 95 | 96 | }) 97 | 98 | app.listen(process.env.PORT, () => { 99 | console.log(`Listening on port ${process.env.PORT}`); 100 | seedDatabase(); 101 | }); -------------------------------------------------------------------------------- /002-proxies/README.md: -------------------------------------------------------------------------------- 1 | # Lesson 2 - Proxies 2 | 3 | This lesson is designed to teach you about what proxies are are, how they're helpful in web scraping, the different kinds of proxies, and how to use them in python! 4 | 5 | **Note:** No activity in this lesson, I couldn't figure out a way that was still challenging but actually possible to implement. If you have ideas on how to do this feel free to file an issue or submit a PR :) 6 | 7 | ### Supporting The Project 8 | * Star the repo 😎 9 | * Maybe share it with some people new to web-scraping? 10 | * Consider [sponsoring](https://github.com/sponsors/davidteather) me on GitHub 11 | * Send me an email or a [LinkedIn](https://www.linkedin.com/in/davidteather/) message telling me what you enjoy in the course (and maybe what else you want to see in the future) 12 | * Submit PRs for suggestions/issues :) 13 | 14 | ## Learning Objectives 15 | * Learners will know how proxies work 16 | * Learners will understand how proxies are helpful in web scraping 17 | * Learners will be able to compare the different tradeoffs of the most common proxy types 18 | * Learners will use proxies in python 19 | 20 | ## Table of Contents 21 | * [Lesson Video](#lesson-video) 22 | * [Video Corrections](#video-corrections) 23 | * [What Are Proxies?](#what-are-proxies) 24 | * [Why Use Proxies?](#why-use-proxies) 25 | * [How To Get Proxies](#how-to-get-proxies) 26 | * [The Different Types of Proxies](#the-different-types-of-proxies) 27 | * [What Type Of Proxy Should You Use?](#what-type-of-proxy-should-you-use) 28 | * [How To Use Proxies In Python?](#how-to-use-proxies-in-python) 29 | * [Conclusion](#conclusion) 30 | 31 | ## Lesson Video 32 | 33 | [![](./assets/thumbnail.png?raw=true)](https://www.youtube.com/watch?v=X0FG2JaaWOY&list=PLmRtxHvzkEE8Ofiy4hnnXSoxw7gs4HOHt) 34 | 35 | [Watch Here](https://www.youtube.com/watch?v=X0FG2JaaWOY&list=PLmRtxHvzkEE8Ofiy4hnnXSoxw7gs4HOHt) 36 | 37 | ### Video Corrections 38 | None so far 39 | 40 | ## What Are Proxies? 41 | Note: All of this information is covered with visuals in the video linked in this readme. If you watched the video you're done with the section. 42 | 43 | Without using a proxy, requests from your computer go directly to the server hosting the website you’re scraping. The server then has access to your IP making that request, which isn’t always a problem but there’s some techniques to stop web scraping that do use IP detection. 44 | 45 |
46 | 47 |
48 | 49 | Proxies are like pass-through computers for accessing the internet, your computer makes a request to a proxy, then that proxy makes the real request to the website returning its information to your computer. 50 | 51 |
52 | 53 |
54 | 55 | This way the website only has access to the IP from the proxy (which is easier to switch), not your real IP. 56 | 57 | This way of disguising who is making the real request to a website is **critical for web scraping**. 58 | 59 | ## Why Use Proxies? 60 | 61 | Websites typically want to stop web scraping, and implement a rate limit, which is a technique to ensure that a single IP hasn’t made over some number of requests in a given time period, for example 10 requests in a minute. 62 | 63 | If an IP has made more than the permitted amount, then no data is returned thus preventing web scraping. This threshold is designed so that normal users of the website won’t ever hit this rate limit, however careless bots trying to extract data will hit this limit. 64 | 65 | We can use proxies to circumnavigate this rate limit since the rate limiters are typically tied to a specific IP, we can use proxies to make requests from different IPs. Using a single proxy does not increase your effective rate limit, however if you use a combination of a bunch of different proxies, then the effective rate limit is multiplied by the number of proxies you’re using. 66 | 67 |
68 | 69 |
70 | 71 | To this server it looks like there's 5 different clients making requests. 72 | 73 |
74 | 75 |
76 | 77 | In reality it's just one computer controlling 5 different proxies. 78 | 79 | ## How To Get Proxies 80 | 81 | There are many sites that offer proxies as a service, although it’s entirely possible to host your own. It’s typically much easier and cheaper to pay a company that specializes in this. 82 | 83 | I've personally used both. I'm not sponsored by either of them while writing this, but I do have affiliate links. Feel free to use them it supports this project 😀 84 | * [Bright Data](https://brightdata.grsm.io/u10xm7thq4ci) (affiliate link) 85 | * [non-affiliate link](https://brightdata.com/) 86 | * [Webshare.io](https://www.webshare.io/?referral_code=3x5812idzzzp) (affiliate link) 87 | * [non-affiliate link](https://www.webshare.io/) 88 | * Has a free tier of up to 1gb/month with 10 proxies 89 | * I've found this useful for small projects that I don't justify buying proxies for 90 | 91 | ## The Different Types Of Proxies 92 | Typically companies that offer proxies as a service have various types of different proxies. I've covered the most common types of proxies that I've seen below. 93 | 94 | * Data Center 95 | * Hosted on data centers like AWS, GCP, Azure, etc 96 | * Advantages 97 | * Cheapest variant 98 | * Most accessible 100% of proxy services should have this 99 | * Disadvantages 100 | * Easiest proxy to detect as most real end users will not be connecting from a data center 101 | * Residential 102 | * Tied to physical locations typically from ISPs like Xfinity, Spectrum, AT&T, etc 103 | * Advantages 104 | * Hard to detect as most real end-users will be connecting from an ISP 105 | * Disadvantages 106 | * Fairly expensive as you're indirectly paying for an internet subscription 107 | * Mobile 108 | * Use cell providers like Verizon, AT&T, Sprint, etc 109 | * Advantages 110 | * The hardest to detect 111 | * Companies that offer these typically let you select specific cities to make requests from which can be advantageous depending on your needs 112 | * Disadvantages 113 | * Extremely expensive ($40/gb Bright Data July 2022) 114 | 115 |
116 | 117 |
118 | 119 | In addition to these different proxy types companies also typically offer two types of proxy pools. 120 | * Static IPs 121 | * Fixed number of IPs that do not change 122 | * Advantages 123 | * Usually pretty cheap 124 | * Neutral 125 | * Fixed pricing per IP/month 126 | * Rotating IPs 127 | * These typically distribute your request across all of the available proxies that proxy provider owns 128 | * Advantages 129 | * Potentially access to thousands of IPs 130 | * Neutral 131 | * Usually no fixed monthly pricing 132 | * Disadvantages 133 | * If you need to log in as a user you'll be making requests from hundreds of IPs which is a huge red flag that you're a bot 134 | 135 |
136 | 137 |
138 | 139 | ## What Type Of Proxy Should You Use? 140 | 141 | The type of proxy you should use is highly dependent on the website you’re trying to scrape, my best advice is to experiment to find the cheapest type that works reliably for you. 142 | 143 | There's a lot of changing variables that go into this, a few: websites updating their bot detection, more traffic from your proxy provider to the website from other users, your proxy provider might restrict some websites. For these reasons, I recommend re-evaluating your proxy provider and proxy type if you start having issues. 144 | 145 | ## How To Use Proxies In Python? 146 | I'll be using [webshare.io](https://www.webshare.io/?referral_code=3x5812idzzzp) (affiliate link) here but other proxy providers have really similar interfaces. 147 | 148 | On the side bar if you go to proxy -> list 149 | 150 |
151 | 152 |
153 | 154 | Then you should see something that looks like the following 155 | 156 |
157 | 158 |
159 | 160 | If we move these credentials into a python script we get something like 161 | ```py 162 | PROXY_ADDRESS = "127.0.0.1" 163 | PROXY_PORT = 8080 164 | PROXY_USERNAME = "subscribe" 165 | PROXY_PASS = "on_youtube" 166 | ``` 167 | 168 | I'll be using the requests python package to make HTTP requests. So let's import that 169 | ```py 170 | PROXY_ADDRESS = "127.0.0.1" 171 | PROXY_PORT = 8080 172 | PROXY_USERNAME = "subscribe" 173 | PROXY_PASS = "on_youtube" 174 | 175 | import requests # run "pip install requests" to install this package 176 | ``` 177 | 178 | Next we can make a dictionary that contains the credentials to our proxy in a URL format. 179 | ```py 180 | PROXY_ADDRESS = "127.0.0.1" 181 | PROXY_PORT = 8080 182 | PROXY_USERNAME = "subscribe" 183 | PROXY_PASS = "on_youtube" 184 | 185 | import requests # run "pip install requests" to install this package 186 | 187 | proxy = { 188 | "http": f"http://{PROXY_USERNAME}:{PROXY_PASSWORD}@{PROXY_ADDRESS}:{PROXY_PORT}" 189 | "https": f"http://{PROXY_USERNAME}:{PROXY_PASSWORD}@{PROXY_ADDRESS}:{PROXY_PORT}" 190 | } 191 | ``` 192 | 193 | Finally to make a request with the proxy we can just pass in the proxy dictionary into a requests package method 194 | 195 | ```py 196 | PROXY_ADDRESS = "127.0.0.1" 197 | PROXY_PORT = 8080 198 | PROXY_USERNAME = "subscribe" 199 | PROXY_PASS = "on_youtube" 200 | 201 | import requests # run "pip install requests" to install this package 202 | 203 | proxy = { 204 | "http": f"http://{PROXY_USERNAME}:{PROXY_PASSWORD}@{PROXY_ADDRESS}:{PROXY_PORT}" 205 | "https": f"http://{PROXY_USERNAME}:{PROXY_PASSWORD}@{PROXY_ADDRESS}:{PROXY_PORT}" 206 | } 207 | 208 | requests.get("https://github.com/davidteather/everything-web-scraping/stargazers", proxies=proxy) 209 | # You could also be on the stargazers list if you star this repo 😎 210 | ``` 211 | 212 | You could define multiple proxies and pick which one you wanted to use based on some logic. Maybe you want to randomly select a proxy to send the request through you could define a ton of proxy dictionaries and do something like the following 213 | ```py 214 | import random 215 | 216 | requests.get("https://github.com/davidteather/everything-web-scraping/stargazers", proxies=random.choice([proxy_1, proxy_2])) 217 | ``` 218 | 219 | It's better to abstract this random proxy usage to either just read in a file that contains all of your proxies, or you could use what's called a **rotating proxy**. [webshare.io](https://www.webshare.io/?referral_code=3x5812idzzzp) (affiliate link) offers a free one that distributes your requests across all of your proxies. 220 | 221 |
222 | 223 |
224 | 225 |
226 | 227 |
228 | 229 | Note: fake credentials 230 | 231 | If we plug these credentials into the proxy dictionary it'll distribute our requests across all of the proxies that we have on the site. 232 | 233 | ```py 234 | PROXY_ADDRESS = "p.webshare.io" 235 | PROXY_PORT = 80 236 | PROXY_USERNAME = "rotating-username" 237 | PROXY_PASS = "subscribe" 238 | 239 | import requests # run "pip install requests" to install this package 240 | 241 | proxy = { 242 | "http": f"http://{PROXY_USERNAME}:{PROXY_PASSWORD}@{PROXY_ADDRESS}:{PROXY_PORT}" 243 | "https": f"http://{PROXY_USERNAME}:{PROXY_PASSWORD}@{PROXY_ADDRESS}:{PROXY_PORT}" 244 | } 245 | 246 | requests.get("https://github.com/davidteather/everything-web-scraping/stargazers", proxies=proxy) 247 | # You could also be on the stargazers list if you star this repo 😎 248 | ``` 249 | 250 | ## Conclusion 251 | 252 | Congrats you've finished another lesson :) 253 | 254 | If you liked this lesson please consider giving the repository a star and if you have any suggestions I'd love to hear them on [YouTube](https://youtube.com/davidteathercodes), [Twitter](https://twitter.com/david_teather), or file an issue with the label suggestion! -------------------------------------------------------------------------------- /002-proxies/assets/5-req-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/davidteather/everything-web-scraping/6afdda03e1b84d64a8e460f6e7265a60a87bdbdc/002-proxies/assets/5-req-2.png -------------------------------------------------------------------------------- /002-proxies/assets/5-req.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/davidteather/everything-web-scraping/6afdda03e1b84d64a8e460f6e7265a60a87bdbdc/002-proxies/assets/5-req.png -------------------------------------------------------------------------------- /002-proxies/assets/pool-comp.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/davidteather/everything-web-scraping/6afdda03e1b84d64a8e460f6e7265a60a87bdbdc/002-proxies/assets/pool-comp.png -------------------------------------------------------------------------------- /002-proxies/assets/proxy-list.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/davidteather/everything-web-scraping/6afdda03e1b84d64a8e460f6e7265a60a87bdbdc/002-proxies/assets/proxy-list.png -------------------------------------------------------------------------------- /002-proxies/assets/rotating.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/davidteather/everything-web-scraping/6afdda03e1b84d64a8e460f6e7265a60a87bdbdc/002-proxies/assets/rotating.png -------------------------------------------------------------------------------- /002-proxies/assets/thumbnail.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/davidteather/everything-web-scraping/6afdda03e1b84d64a8e460f6e7265a60a87bdbdc/002-proxies/assets/thumbnail.png -------------------------------------------------------------------------------- /002-proxies/assets/type-comp.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/davidteather/everything-web-scraping/6afdda03e1b84d64a8e460f6e7265a60a87bdbdc/002-proxies/assets/type-comp.png -------------------------------------------------------------------------------- /002-proxies/assets/webshare-rotating.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/davidteather/everything-web-scraping/6afdda03e1b84d64a8e460f6e7265a60a87bdbdc/002-proxies/assets/webshare-rotating.png -------------------------------------------------------------------------------- /002-proxies/assets/webshare-tab.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/davidteather/everything-web-scraping/6afdda03e1b84d64a8e460f6e7265a60a87bdbdc/002-proxies/assets/webshare-tab.png -------------------------------------------------------------------------------- /002-proxies/assets/with-proxy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/davidteather/everything-web-scraping/6afdda03e1b84d64a8e460f6e7265a60a87bdbdc/002-proxies/assets/with-proxy.png -------------------------------------------------------------------------------- /002-proxies/assets/without-proxy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/davidteather/everything-web-scraping/6afdda03e1b84d64a8e460f6e7265a60a87bdbdc/002-proxies/assets/without-proxy.png -------------------------------------------------------------------------------- /003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/README.md: -------------------------------------------------------------------------------- 1 | # Lesson 3 - BeautifulSoup With Static Site & Server Side Rendered Web Scraping 2 | 3 | This lesson is designed to teach you about how to extract data from static websites and websites that are server side rendered (SSR). We'll be using the python package BeautifulSoup to extract data from the HTML. 4 | 5 | ### Supporting The Project 6 | * Star the repo 😎 7 | * Share it with someone new to web-scraping 8 | * Consider [sponsoring](https://github.com/sponsors/davidteather) me on GitHub 9 | * Send me an email or a [LinkedIn](https://www.linkedin.com/in/davidteather/) message telling me what you enjoy in the course (and what else you want to see in the future) 10 | * Submit PRs for suggestions/issues :) 11 | 12 | ## Learning Objectives 13 | * Learners will be able to compare and contrast static sites and server side rendered (SSR) sites, and explain how and why we're able to treat them the same as web scrapers 14 | * Learners will be able to explain basic HTML structure 15 | * Learners will be able to use beautiful soup to extract data from a static site 16 | * Learners will be able to identify if their desired data is statically or dynamically rendered 17 | 18 | ## Table of Contents 19 | 20 | * [Lesson Video](#lesson-video) 21 | * [Static Sites vs Server Side Rendered Sites (SSR)](#static-sites-vs-server-side-rendered-sites-ssr) 22 | * [Static Sites](#static-sites) 23 | * [Server Side Rendering (SSR)](#server-side-rendering-ssr) 24 | * [How We Can Web Scrape These Sites?](#how-we-can-web-scrape-these-sites) 25 | * [Basic HTML Structure](#basic-html-structure) 26 | * [Tags](#tags) 27 | * [Attributes](#attributes) 28 | * [Classes](#classes) 29 | * [Extracting Data Using BeautifulSoup](#extracting-data-using-beautifulsoup) 30 | * [Running The Website](#running-the-website) 31 | * [Installing BeautifulSoup](#installing-beautifulsoup) 32 | * [Getting HTTP From A Website](#getting-http-from-a-website) 33 | * [Parsing HTML With BeautifulSoup](#parsing-html-with-beautifulsoup) 34 | * [Extracting Prices From Homepage](#extracting-prices-from-homepage) 35 | * [Using Developer Tools](#using-developer-tools) 36 | * [Extracting The First Price](#extracting-the-first-price) 37 | * [Extracting All Prices](#extracting-all-prices) 38 | * [Full Code](#full-code) 39 | * [Activities](#activities) 40 | 41 | 42 | ## Lesson Video 43 | 44 | [![](./assets/thumbnail.png?raw=true)](https://www.youtube.com/watch?v=_Ptvvjm15EA&list=PLmRtxHvzkEE8Ofiy4hnnXSoxw7gs4HOHt&index=4) 45 | 46 | [Watch Here](https://www.youtube.com/watch?v=_Ptvvjm15EA&list=PLmRtxHvzkEE8Ofiy4hnnXSoxw7gs4HOHt&index=4) 47 | 48 | ### Video Corrections 49 | None so far 50 | 51 | ## Static Sites vs Server Side Rendered Sites (SSR) 52 | 53 | ### Static Sites 54 | 55 | Static websites are just normal HTML files that are served to the user exactly as they're stored on the server, this means user-specific information is not included. Sites that don't require content to change are typically: portfolio websites, blogs, landing pages, documentation, etc. A concrete example is my security blog [The Response Times](https://theresponsetimes.com/) which is fully static. 56 | 57 |
58 | 59 |
60 | 61 | > Note: Many websites are static HTML files but use JavaScript to make requests for additional data, this type of site is out of scope for this lesson, but checkout [001 - Introduction To Forging API Requests](../001-introduction-to-forging-api-requests/README.md) to learn more. 62 | 63 | > The easiest way to tell if a website is static or not is to view the source code of the website, on most browsers you can right click the page and click "View Page Source". If the data you want is in the source code, then it's statically rendered. 64 | 65 | ### Server Side Rendering (SSR) 66 | 67 | In SSR a server will inject data into HTML before sending it to the client. This is a very widely used approach, but the data injected could be: current trending topics, your messages with another user, or the current weather, and much more. 68 | 69 | Since SSR returns HTML that the browser can render without doing extra work, it's popular because it decreases the page load time for the end user especially if they're viewing the website on a computationally weak device. 70 | 71 |
72 | 73 |
74 | 75 | ### How We Can Web Scrape These Sites? 76 | 77 | Since both of these sites return HTML with the data that we want in the file, we just need a way to parse HTML and select the data we want. Luckily, there's tons of python packages that allow us to do this, in this video we'll be using BeautifulSoup. 78 | 79 | ## Basic HTML Structure 80 | 81 | But first, we need to learn the basic syntax and structure of HTML so that we can tell BeautifulSoup how to extract our data. 82 | 83 | ### Tags 84 | 85 | The most important parts of HTML are called tags. Tags build up the structure of the website and each of them has a different purpose and are usually rendered differently by the web browser. All tags start with `<` and end with `>`, an example is the `

` opening tag which represents a paragraph of text in HTML. 86 | 87 | Tags also must be closed using a closing tag like `

`. All content between the opening tag and the closing tag is the content within element. 88 | 89 | A completed example 90 | ```html 91 |

92 | Here's my HTML paragraph using the p tag! 93 |

94 | ``` 95 | 96 | An example of how different tags are rendered differently on browsers, is the header tags, they change the font size and range from `

` being the largest to `

` being the smallest. 97 | 98 | You can find a list of all supported tags [from Mozilla](https://developer.mozilla.org/en-US/docs/Web/HTML/Element) 99 | 100 | 101 | Another thing to note is that tags also can be nested 102 | ```html 103 |
104 |

Larger Header Font Size!

105 |

Paragraph text

106 |
107 | ``` 108 | 109 | ### Attributes 110 | 111 | The default behavior of tags can be modified by adding attributes to opening tags. 112 | 113 | For example the `` tag is used to show an image and has multiple attributes that can be modified. That change how the image is displayed. 114 | ```html 115 | Visual Description 116 | ``` 117 | 118 | ### Classes 119 | 120 | Classes apply user defined style sheets known as CSS that change how an element looks. In well designed websites, this is one of the best ways to select what parts you want to extract data from. 121 | 122 | You don't need to know CSS for this tutorial, but here's a basic example. 123 | ```html 124 |

This text is green

125 |

This background is blue

126 |

This text is green and background blue

127 | 128 | 129 | 137 | ``` 138 | 139 | ## Extracting Data Using BeautifulSoup 140 | 141 | Enough HTML! Let's start extracting data from a website using BeautifulSoup. 142 | 143 | ### Running The Website 144 | 145 | Visit [Running The Websites](../README.md#how-to-start-the-mock-websites) 146 | 147 | ### Installing BeautifulSoup 148 | 149 | If it's not already installed, run `pip install -r requirements.txt` in this directory. Or do `pip install beautifulsoup4` 150 | 151 | ### Getting HTTP From A Website 152 | 153 | We'll be using the `requests` package to get HTML from the website, you can install it with `pip install requests` 154 | 155 | ```python 156 | import requests 157 | 158 | WEBSITE_URL = "http://localhost:3000" 159 | 160 | r = requests.get(WEBSITE_URL) 161 | print(r.text) # print out the HTML 162 | ``` 163 | 164 | ### Parsing HTML With BeautifulSoup 165 | 166 | Now that we have the HTML in python, we can use BeautifulSoup to parse it. 167 | 168 | ```python 169 | from bs4 import BeautifulSoup 170 | import requests 171 | 172 | WEBSITE_URL = "http://localhost:3000" 173 | 174 | r = requests.get(WEBSITE_URL) 175 | soup = BeautifulSoup(r.text, "html.parser") # parse the HTML 176 | print(soup) 177 | ``` 178 | 179 | ### Extracting Prices From Homepage 180 | 181 | To extract the prices from the homepage, we first have to look at the HTML and figure out how to select the right elements. 182 | 183 | I personally prefer to use developer tools, but you can also view the page source directly in the browser, after right clicking the page. 184 | 185 | #### Using Developer Tools 186 | 187 | * Find the element you want to extract data from 188 | * Right click on the element and click `Inspect` 189 | * Then you can see the price $3.13 is in a `

` tag in the browser 190 | 191 | *(Step by step in video)* 192 | 193 | #### Extracting The First Price 194 | 195 | However you choose to view the HTML, you should see something like this 196 | ```html 197 |

198 |

Sacha the Deer

199 |
200 |

Sacha’s elegant antlers have never been se…

201 |
202 |

$3.13

203 |
204 | ``` 205 | 206 | We see the price is in a `

` tag without a class or id, this makes it a little bit more difficult especially since there's multiple `

` tags so we need to find a way to select the right one. 207 | 208 | To do this, we can first select the parent `

` with `class="product-details"`. 209 | 210 | ```python 211 | soup = BeautifulSoup(r.text, "html.parser") 212 | 213 | # select the parent div 214 | product_detail = soup.find("div", {"class": "product-details"}) # you can select by any attribute with this syntax 215 | 216 | print(product_detail) 217 | ``` 218 | 219 | Results in 220 | ```html 221 |
222 |

Sacha the Deer

223 |
224 |

Sacha’s elegant antlers have never been se…

225 |
226 |

$3.13

227 |
228 | ``` 229 | 230 | We need to select the `

` tag since that contains the price of the item, it's a child of `product_detail` so we can select it like so. 231 | ```python 232 | print(product_detail.p) 233 | ``` 234 | 235 | Which returns 236 | ```html 237 |

Sacha’s elegant antlers have never been se…

238 | ``` 239 | 240 | But, since there's multiple `

` tags that are children of `product_detail`. We need to select the right one. We can select all children of an element with `find_all()` or `findChildren()` 241 | ```python 242 | # We can make another find_all attribute to find the children elements of a given object 243 | for child in product_detail.find_all('p'): 244 | print(child) 245 | 246 | # Or we can use findChildren() 247 | # If we want to see it in an array format 248 | children = product_detail.findChildren('p') 249 | print(children) 250 | ``` 251 | 252 | ```html 253 |

Sacha’s elegant antlers have never been se…

254 |

$3.13

255 | [

Sacha’s elegant antlers have never been se…

,

$3.13

] 256 | ``` 257 | 258 | Now we can select the price by selecting the second `

` tag 259 | ```python 260 | children = product_detail.findChildren('p') 261 | price = children[1] 262 | print(price) 263 | ``` 264 | 265 | ```html 266 |

$3.13

267 | ``` 268 | 269 | Last step is stripping the HTML tags from the price with `get_text()` 270 | ```python 271 | price = children[1].get_text() 272 | print(price) 273 | ``` 274 | 275 | ``` 276 | $3.13 277 | ``` 278 | 279 | We got the first price! 🤠 280 | 281 | #### Extracting All Prices 282 | 283 | In our previous code we were only getting the first price, but we want to get all the prices. To do this we can use `find_all()` to get all the `
` tags with `class="product-details"`. 284 | 285 | Then we can use the same code as before to get the price for each product 286 | ```python 287 | for product in soup.find_all('div', attrs={'class': 'product-details'}): 288 | # We know it's the second child we want 289 | children = product.findChildren('p') 290 | 291 | # Get rid of the HTML tags 292 | price = children[1].get_text() 293 | print(price) 294 | ``` 295 | 296 | #### Full Code 297 | 298 | ```python 299 | from bs4 import BeautifulSoup 300 | import requests 301 | 302 | WEBSITE_URL = "http://localhost:3000" 303 | 304 | r = requests.get(WEBSITE_URL) 305 | soup = BeautifulSoup(r.text, "html.parser") # parse the HTML 306 | 307 | for product in soup.find_all('div', attrs={'class': 'product-details'}): 308 | # We know it's the second child we want 309 | children = product.findChildren('p') 310 | 311 | # Get rid of the HTML tags 312 | price = children[1].get_text() 313 | print(price) 314 | ``` 315 | 316 | ## Activities 317 | 318 | Here's some additional activities to help you practice web scraping. After trying for a bit, use the hints to help you out. If you're still stuck, the solutions are in the video! 319 | 320 | Modify `activities.py` and complete the functions. 321 | 322 | **Do not** change the method names, however feel free to call those methods if you want to test them out in the `if __name__ == "__main__"` section. 323 | 324 | ### Testing 325 | 326 | To check if your implementations are correct run `python test.py` this will import the functions you made. It will tell you what tests failed if any, and will show a success message if all tests passed. 327 | 328 | > Note: Bonuses are not tested, so you'll have to check those yourself. 329 | 330 | ### 1: Product Title & Prices 331 | * Return a list of the product titles and prices 332 | * Ex: `["Sacha the Deer ($3.13)", ...]` 333 | *
334 | Hint 335 | You'll need to select the parent of all product features which is all of the <li> elements under the <ul class="product-list"> element 336 |
337 | 338 | ### 2: Get All Colors Available For Each Product 339 | * Return each product's title and color options as a list of strings 340 | * Ex: `["Sacha the Deer (#000000, #39589e, #9c5145, #dfd3c2)", ...]` 341 | *
342 | Hint: Extracting Attributes 343 | You can access attributes of an element with get('attribute_name') 344 | 345 | Ex: `product.find('a').get('href')` 346 |
347 | 348 | ### 3: Get Every Product's Material 349 | * This is visible when you click into a product's page 350 | * Return each product's title and material as a list of strings 351 | * Ex: `["Bumble the Elephant made of 70% Cotton, 30% Nylon", ...]` 352 | *
353 | Hint 354 | You'll need to make an additional HTML request for each product, and a new BeautifulSoup object for each product page. 355 |
356 | 357 | ### 4: Filter all the products from highest reviewed to lowest reviewed 358 | * Return a list of the products sorted by the star count 359 | ``` 360 | [('Scar the Lion', 5), 361 | ('Gerald the Giraffe', 4), 362 | ('Gavin the Tiger', 4), 363 | ('Sacha the Deer', 3), 364 | ('Bumble the Elephant', 3), 365 | ('Todd the Hedgehog', 2)] 366 | ``` 367 | 368 | ### 5: Product Availability 369 | * Not all products are available, look at `Gerald the Giraffe` 370 | * Return a list of strings of all products and their availability 371 | * Ex: `["Sacha the Deer is available: True", ...]` 372 | *
373 | Bonus 374 | Add some logic to check products every X minutes, so you can be notified when products come back in stock. 375 | 376 | Note: This website won't have items come back into stock, but if it was a real website you could have a Discord bot or something notify you. 377 | - Example: Old commisioned project I made [here](https://github.com/davidteather/Hotukdeals-Discord-Notifier) 378 |
379 | 380 | ### 6: Scrape Reviews For Each Product 381 | * Return a dictionary with structure `{"product_title": [{"rating": "5", "review_title": "Great!", "review_full": "I love it"}, ...], ...}` 382 | * Ex: `{"Sacha the Deer": [{'rating': '5', 'review_title': 'V neck', 'review_full': 'Great shirt. love the detail in back. feminine and different than the average t'}]}` 383 | *
384 | Bonus 385 | Try and do sentiment analysis on product reviews and sort by ones with the best average sentiment. 386 | 387 | You might find [this article](https://realpython.com/python-nltk-sentiment-analysis/#using-nltks-pre-trained-sentiment-analyzer) helpful 388 |
389 | 390 | ### Solutions 391 | 392 | * [1: Product Title & Prices](https://youtu.be/_Ptvvjm15EA?list=PLmRtxHvzkEE8Ofiy4hnnXSoxw7gs4HOHt&t=506) 393 | * [2: Get All Colors Available For Each Product](https://youtu.be/_Ptvvjm15EA?list=PLmRtxHvzkEE8Ofiy4hnnXSoxw7gs4HOHt&t=547) 394 | * [3: Get Every Product's Material](https://youtu.be/_Ptvvjm15EA?list=PLmRtxHvzkEE8Ofiy4hnnXSoxw7gs4HOHt&t=620) 395 | * [4: Filter all the products from highest reviewed to lowest reviewed](https://youtu.be/_Ptvvjm15EA?list=PLmRtxHvzkEE8Ofiy4hnnXSoxw7gs4HOHt&t=679) 396 | * [5: Product Availability](https://youtu.be/_Ptvvjm15EA?list=PLmRtxHvzkEE8Ofiy4hnnXSoxw7gs4HOHt&t=710) 397 | * [6: Scrape Reviews For Each Product](https://youtu.be/_Ptvvjm15EA?list=PLmRtxHvzkEE8Ofiy4hnnXSoxw7gs4HOHt&t=744) 398 | 399 | ## Conclusion 400 | 401 | Congrats you've finished another lesson :) 402 | 403 | If you liked this lesson please consider giving the repository a star and if you have any suggestions I'd love to hear them on [YouTube](https://youtube.com/davidteathercodes), [Twitter](https://twitter.com/david_teather), or file an issue with the label suggestion! -------------------------------------------------------------------------------- /003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/activities.py: -------------------------------------------------------------------------------- 1 | import requests 2 | 3 | # Note: the tester relies on this variable, update it if you are running the server on a different port 4 | WEBSITE_BASE_URL = "http://localhost:3000" 5 | 6 | # Activity 1: Product Titles & Prices 7 | # Return a list of the product titles and prices 8 | # Ex: ["Sacha the Deer ($3.13)", ...] 9 | def title_and_prices(): 10 | return ["Sacha the Deer ($3.13)"] 11 | 12 | # Activity 2: Get All Colors Available For Each Product 13 | # Return each product's title and color options as a list of strings 14 | # Ex: ["Sacha the Deer (#000000, #39589e, #9c5145, #dfd3c2)", ...] 15 | def product_colors(): 16 | return ["Sacha the Deer (#000000, #39589e, #9c5145, #dfd3c2)"] 17 | 18 | # Activity 3: Get All Product's Material 19 | # Return each product's title and material as a list of strings 20 | # Ex: ["Bumble the Elephant made of 70% Cotton, 30% Nylon", ...] 21 | def product_materials(): 22 | return ["Bumble the Elephant made of 70% Cotton, 30% Nylon"] 23 | 24 | # Activity 4: Filter all the products from highest reviewed to lowest reviewed 25 | # Return a list of the product titles and average rating as a touple 26 | # Ex: [('Scar the Lion', 5), ...] 27 | def highest_reviewed(): 28 | return [("Scar the Lion", 5), ("Sacha the Deer", 5)] 29 | 30 | # Activity 5: Product Availability 31 | # Not all products are available, look at `Gerald the Giraffe` 32 | # Return a list of strings of all products and their availability 33 | # Ex: ["Sacha the Deer is available: True", ...] 34 | def product_availability(): 35 | return ["Sacha the Deer is available: True"] 36 | 37 | # Activity 6: Scrape Reviews For Each Product 38 | # Return a dictionary with structure {"product_title": [{"rating": "5", "review_title": "Great!", "review_full": "I love it"}, ...], ...} 39 | # Ex: {"Sacha the Deer": [{'rating': '5', 'review_title': 'V neck', 'review_full': 'Great shirt. love the detail in back. feminine and different than the average t'}, ...]} 40 | def product_reviews(): 41 | return {"Sacha the Deer": [{'rating': '5', 'review_title': 'V neck', 'review_full': 'Great shirt. love the detail in back. feminine and different than the average t'}]} 42 | 43 | if __name__ == "__main__": 44 | # Optional: You can call your methods here if you want to test them without running the tester 45 | # print(title_and_prices()) 46 | pass -------------------------------------------------------------------------------- /003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/assets/ssr-sites.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/davidteather/everything-web-scraping/6afdda03e1b84d64a8e460f6e7265a60a87bdbdc/003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/assets/ssr-sites.png -------------------------------------------------------------------------------- /003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/assets/static-sites.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/davidteather/everything-web-scraping/6afdda03e1b84d64a8e460f6e7265a60a87bdbdc/003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/assets/static-sites.png -------------------------------------------------------------------------------- /003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/assets/thumbnail.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/davidteather/everything-web-scraping/6afdda03e1b84d64a8e460f6e7265a60a87bdbdc/003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/assets/thumbnail.png -------------------------------------------------------------------------------- /003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3.9' 2 | 3 | services: 4 | website: 5 | build: 6 | context: ./website 7 | ports: 8 | - 3000:3000 -------------------------------------------------------------------------------- /003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/requirements.txt: -------------------------------------------------------------------------------- 1 | beautifulsoup4==4.12.2 2 | requests==2.31.0 3 | pytest==7.4.0 -------------------------------------------------------------------------------- /003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/test.py: -------------------------------------------------------------------------------- 1 | # Grades submission.py on the test cases 2 | # Don't look inside this if you haven't passed the tests yet 3 | 4 | from activities import WEBSITE_BASE_URL, title_and_prices, product_colors, product_materials, highest_reviewed, product_availability, product_reviews 5 | import requests 6 | from bs4 import BeautifulSoup 7 | import pytest 8 | 9 | def test_title_and_prices(): 10 | r = requests.get(WEBSITE_BASE_URL) 11 | soup = BeautifulSoup(r.text, 'html.parser') 12 | 13 | prices = [] 14 | for product in soup.find_all('div', attrs={'class': 'product-details'}): 15 | children = product.findChildren('p') 16 | price = children[1].get_text() 17 | title = product.h4.get_text() 18 | prices.append(f"{title} ({price})") 19 | 20 | actual = title_and_prices() 21 | assert prices == actual, f'Got {actual} but expected {prices}' 22 | 23 | def test_product_colors(): 24 | r = requests.get(WEBSITE_BASE_URL) 25 | soup = BeautifulSoup(r.text, 'html.parser') 26 | 27 | product_parent = soup.find('ul', attrs={'class': 'product-list'}) 28 | result = [] 29 | for product in product_parent.find_all('li'): 30 | style_picker = product.find('div', attrs={'class': 'style-picker'}) 31 | 32 | # To get the hex codes we want to look at the style attribute 33 | # We can use the get method to get the style attribute 34 | hex_codes = [] 35 | 36 | if style_picker is None: 37 | # The site has some extra
  • elements for spacing that aren't products 38 | continue 39 | 40 | for color in style_picker.find_all('div'): 41 | style = color.get('style') 42 | hex_code = style.split(": ")[1] 43 | hex_codes.append(hex_code) 44 | 45 | product_title = product.h4.get_text() 46 | result.append(f"{product_title} ({', '.join(hex_codes)})") 47 | 48 | actual = product_colors() 49 | assert result == actual, f"Got {actual} but expected {result}" 50 | 51 | def test_product_materials(): 52 | r = requests.get(WEBSITE_BASE_URL) 53 | soup = BeautifulSoup(r.text, 'html.parser') 54 | 55 | result = [] 56 | product_parent = soup.find('ul', attrs={'class': 'product-list'}) 57 | for product in product_parent.find_all('li'): 58 | a_tag = product.a 59 | 60 | if a_tag is None: 61 | # Again, has extra
  • for spacing that aren't products 62 | continue 63 | 64 | product_link = product.a.get('href') 65 | product_link = WEBSITE_BASE_URL + product_link 66 | 67 | # We can now use the product_link to get the specific product's page which contains information on 68 | # the product's material 69 | product_page = requests.get(product_link) 70 | product_soup = BeautifulSoup(product_page.text, 'html.parser') 71 | 72 | # The material is in

    tag with id of "material" 73 | material = product_soup.find('p', attrs={'id': 'material'}).get_text() 74 | 75 | # Get the product title for printing 76 | product_title = product.h4.get_text() 77 | result.append(f"{product_title} made of {material}") 78 | 79 | actual = product_materials() 80 | assert result == actual, f"Got {actual} but expected {result}" 81 | 82 | def test_highest_reviewed(): 83 | r = requests.get(WEBSITE_BASE_URL) 84 | soup = BeautifulSoup(r.text, 'html.parser') 85 | 86 | product_ratings_list = [] 87 | product_parent = soup.find('ul', attrs={'class': 'product-list'}) 88 | for product in product_parent.find_all('li'): 89 | a_tag = product.a 90 | 91 | if a_tag is None: 92 | # Again, has extra

  • for spacing that aren't products 93 | continue 94 | 95 | product_link = product.a.get('href') 96 | product_link = WEBSITE_BASE_URL + product_link 97 | 98 | product_page = requests.get(product_link) 99 | product_soup = BeautifulSoup(product_page.text, 'html.parser') 100 | 101 | # Get the star container 102 | star_container = product_soup.find('div', attrs={'class': 'star-rating'}) 103 | 104 | # A star is marked as a full star if it has the class "checked" 105 | # We can use this to count the number of full stars 106 | full_stars_list = star_container.find_all('span', attrs={'class': 'checked'}) 107 | full_stars = len(full_stars_list) 108 | 109 | product_title = product.h4.get_text() 110 | 111 | product_ratings_list.append((product_title, full_stars)) 112 | 113 | # Sort product_ratings_list by the number of stars 114 | product_ratings_list.sort(key=lambda x: x[1], reverse=True) 115 | actual = highest_reviewed() 116 | assert product_ratings_list == actual, f"Got {actual} but expected {product_ratings_list}" 117 | 118 | def test_product_availability(): 119 | r = requests.get(WEBSITE_BASE_URL) 120 | soup = BeautifulSoup(r.text, 'html.parser') 121 | 122 | result = [] 123 | product_parent = soup.find('ul', attrs={'class': 'product-list'}) 124 | for product in product_parent.find_all('li'): 125 | a_tag = product.a 126 | 127 | if a_tag is None: 128 | # Again, has extra
  • for spacing that aren't products 129 | continue 130 | 131 | product_link = product.a.get('href') 132 | product_link = WEBSITE_BASE_URL + product_link 133 | 134 | product_page = requests.get(product_link) 135 | product_soup = BeautifulSoup(product_page.text, 'html.parser') 136 | 137 | # Button has
    138 | button = product_soup.find('div', attrs={'class': 'button'}) 139 | button_text = button.get_text() 140 | 141 | is_available = True 142 | if button_text == "Out of Stock": 143 | is_available = False 144 | 145 | product_title = product.h4.get_text() 146 | 147 | result.append(f"{product_title} is available: {is_available}") 148 | 149 | actual = product_availability() 150 | assert result == actual, f"Got {actual} but expected {result}" 151 | 152 | def test_product_reviews(): 153 | r = requests.get(WEBSITE_BASE_URL) 154 | soup = BeautifulSoup(r.text, 'html.parser') 155 | 156 | expected_product_reviews = {} 157 | product_parent = soup.find('ul', attrs={'class': 'product-list'}) 158 | for product in product_parent.find_all('li'): 159 | a_tag = product.a 160 | 161 | if a_tag is None: 162 | # Again, has extra
  • for spacing that aren't products 163 | continue 164 | 165 | product_link = product.a.get('href') 166 | product_link = WEBSITE_BASE_URL + product_link 167 | 168 | product_page = requests.get(product_link) 169 | product_soup = BeautifulSoup(product_page.text, 'html.parser') 170 | 171 | reviews_main = product_soup.find('div', attrs={'class': 'product-ratings'}) 172 | reviews = reviews_main.find_all('div', attrs={'class': 'product-rating'}) 173 | 174 | product_title = product.h4.get_text() 175 | 176 | for review in reviews: 177 | r = review.find_all('span', attrs={'class': 'checked'}) 178 | 179 | review_rating = review.find('span', attrs={'class': 'rating-number'}).get_text() 180 | review_title = review.find('div', attrs={'class': 'rating-title'}).get_text() 181 | review_full = review.find('div', attrs={'class': 'rating-review'}).get_text() 182 | 183 | review = { 184 | 'rating': review_rating, 185 | 'review_title': review_title, 186 | 'review_full': review_full 187 | } 188 | 189 | if product_title in expected_product_reviews: 190 | expected_product_reviews[product_title].append(review) 191 | else: 192 | expected_product_reviews[product_title] = [review] 193 | 194 | actual = product_reviews() 195 | assert expected_product_reviews == actual, f"Got {actual} but expected {expected_product_reviews}" 196 | 197 | if __name__ == "__main__": 198 | pytest.main([__file__]) -------------------------------------------------------------------------------- /003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/website/301.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/davidteather/everything-web-scraping/6afdda03e1b84d64a8e460f6e7265a60a87bdbdc/003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/website/301.txt -------------------------------------------------------------------------------- /003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/website/404.html: -------------------------------------------------------------------------------- 1 | --- 2 | title: Not Found 3 | hero: This does not exist 4 | permalink: /404.html 5 | sitemap: false 6 | --- 7 | 8 |

    This does not exist

    9 | -------------------------------------------------------------------------------- /003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/website/Dockerfile: -------------------------------------------------------------------------------- 1 | # Use the official Ruby image from the Docker Hub 2 | FROM ruby:3.0.0 3 | 4 | # Install the necessary libraries 5 | RUN apt-get update -qq && apt-get install -y build-essential libpq-dev 6 | 7 | # Install Jekyll and Bundler 8 | RUN gem install jekyll 9 | RUN gem install bundler:1.17.3 10 | 11 | 12 | # Create a new directory for your Jekyll site 13 | RUN mkdir /usr/src/app 14 | 15 | # Change to the new directory 16 | WORKDIR /usr/src/app 17 | 18 | # Copy Gemfile and Gemfile.lock 19 | COPY Gemfile* ./ 20 | 21 | # Install the Gems 22 | RUN bundle install 23 | 24 | # Copy the rest of your Jekyll site to the image 25 | COPY . . 26 | 27 | # Make port 4000 available to the world outside this container 28 | EXPOSE 3000 29 | 30 | # Execute Jekyll serve command 31 | CMD ["bundle", "exec", "jekyll", "serve", "--host", "0.0.0.0", "--port", "3000"] 32 | -------------------------------------------------------------------------------- /003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/website/Gemfile: -------------------------------------------------------------------------------- 1 | source 'https://rubygems.org' 2 | 3 | gem 'jekyll', '4.2.1' 4 | 5 | group :jekyll_plugins do 6 | gem 'jekyll-seo-tag', '2.6.1' 7 | gem 'jekyll-sitemap', '1.3.1' 8 | end 9 | 10 | gem 'webrick', '1.8.1' 11 | -------------------------------------------------------------------------------- /003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/website/Gemfile.lock: -------------------------------------------------------------------------------- 1 | GEM 2 | remote: https://rubygems.org/ 3 | specs: 4 | addressable (2.8.0) 5 | public_suffix (>= 2.0.2, < 5.0) 6 | colorator (1.1.0) 7 | concurrent-ruby (1.1.9) 8 | em-websocket (0.5.3) 9 | eventmachine (>= 0.12.9) 10 | http_parser.rb (~> 0) 11 | eventmachine (1.2.7) 12 | eventmachine (1.2.7-x64-mingw32) 13 | ffi (1.15.5) 14 | ffi (1.15.5-x64-mingw32) 15 | forwardable-extended (2.6.0) 16 | http_parser.rb (0.8.0) 17 | i18n (1.8.11) 18 | concurrent-ruby (~> 1.0) 19 | jekyll (4.2.1) 20 | addressable (~> 2.4) 21 | colorator (~> 1.0) 22 | em-websocket (~> 0.5) 23 | i18n (~> 1.0) 24 | jekyll-sass-converter (~> 2.0) 25 | jekyll-watch (~> 2.0) 26 | kramdown (~> 2.3) 27 | kramdown-parser-gfm (~> 1.0) 28 | liquid (~> 4.0) 29 | mercenary (~> 0.4.0) 30 | pathutil (~> 0.9) 31 | rouge (~> 3.0) 32 | safe_yaml (~> 1.0) 33 | terminal-table (~> 2.0) 34 | jekyll-sass-converter (2.1.0) 35 | sassc (> 2.0.1, < 3.0) 36 | jekyll-seo-tag (2.6.1) 37 | jekyll (>= 3.3, < 5.0) 38 | jekyll-sitemap (1.3.1) 39 | jekyll (>= 3.7, < 5.0) 40 | jekyll-watch (2.2.1) 41 | listen (~> 3.0) 42 | kramdown (2.3.1) 43 | rexml 44 | kramdown-parser-gfm (1.1.0) 45 | kramdown (~> 2.0) 46 | liquid (4.0.3) 47 | listen (3.7.1) 48 | rb-fsevent (~> 0.10, >= 0.10.3) 49 | rb-inotify (~> 0.9, >= 0.9.10) 50 | mercenary (0.4.0) 51 | pathutil (0.16.2) 52 | forwardable-extended (~> 2.6) 53 | public_suffix (4.0.6) 54 | rb-fsevent (0.11.0) 55 | rb-inotify (0.10.1) 56 | ffi (~> 1.0) 57 | rexml (3.2.5) 58 | rouge (3.27.0) 59 | safe_yaml (1.0.5) 60 | sassc (2.4.0) 61 | ffi (~> 1.9) 62 | sassc (2.4.0-x64-mingw32) 63 | ffi (~> 1.9) 64 | terminal-table (2.0.0) 65 | unicode-display_width (~> 1.1, >= 1.1.1) 66 | unicode-display_width (1.8.0) 67 | webrick (1.8.1) 68 | 69 | PLATFORMS 70 | ruby 71 | x64-mingw32 72 | 73 | DEPENDENCIES 74 | jekyll (= 4.2.1) 75 | jekyll-seo-tag (= 2.6.1) 76 | jekyll-sitemap (= 1.3.1) 77 | webrick (= 1.8.1) 78 | 79 | BUNDLED WITH 80 | 1.17.3 81 | -------------------------------------------------------------------------------- /003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/website/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2016 CloudCannon 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/website/Makefile: -------------------------------------------------------------------------------- 1 | dev: 2 | bundle exec jekyll serve --drafts --livereload 3 | 4 | build: 5 | bundle exec jekyll build 6 | 7 | install: 8 | gem install bundler jekyll && bundle update -------------------------------------------------------------------------------- /003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/website/README.md: -------------------------------------------------------------------------------- 1 | # Web Scraping Workshop Website Madhacks 2023 2 | 3 | This is heavily based upon [CloudCannon/fur-jekyll-template](https://github.com/CloudCannon/fur-jekyll-template) modified for use for this workshop! This does **not** have a database or any backend to it at all, the main purpose of this website in the workshop is to teach static site web scraping with Beautiful Soup. 4 | 5 | Reviews from [this dataset](https://www.kaggle.com/datasets/nicapotato/womens-ecommerce-clothing-reviews) -------------------------------------------------------------------------------- /003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/website/TODO: -------------------------------------------------------------------------------- 1 | * Fix search to actually work 2 | * Categorical search (category.html) with sliders and such would be nice if filtering there also worked -------------------------------------------------------------------------------- /003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/website/_config.yml: -------------------------------------------------------------------------------- 1 | # ---- 2 | # Site 3 | 4 | title: Fur 5 | google_analytics_key: 6 | google_maps_javascript_api_key: 7 | 8 | # Values for the jekyll-seo-tag gem (https://github.com/jekyll/jekyll-seo-tag) 9 | logo: /siteicon.png 10 | description: Fur t-shirt store 11 | author: 12 | name: fur 13 | email: fur@example.com 14 | twitter: fur # twitter username without the @ symbol 15 | phone: "+1 23456789" 16 | social: 17 | name: Fur Template 18 | links: 19 | - https://github.com/CloudCannon/fur-jekyll-template 20 | 21 | # ----- 22 | # Build 23 | timezone: Etc/UTC 24 | 25 | collections: 26 | staff_members: 27 | output: false 28 | products: 29 | output: true 30 | 31 | permalink: pretty 32 | 33 | defaults: 34 | - 35 | scope: 36 | path: "" 37 | type: "products" 38 | values: 39 | layout: "product" 40 | - 41 | scope: 42 | path: "" 43 | values: 44 | layout: "page" 45 | - 46 | scope: 47 | path: "index.html" 48 | values: 49 | layout: "default" 50 | 51 | jekyll-archives: 52 | enabled: 53 | - categories 54 | 55 | plugins: 56 | - jekyll-sitemap 57 | - jekyll-seo-tag 58 | 59 | exclude: 60 | - Gemfile 61 | - Gemfile.lock 62 | - README.md 63 | - LICENCE 64 | 65 | social_icons: 66 | - Email 67 | - Facebook 68 | - Google Plus 69 | - Instagram 70 | - LinkedIn 71 | - Pinterest 72 | - Tumblr 73 | - Twitter 74 | - YouTube 75 | - RSS 76 | -------------------------------------------------------------------------------- /003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/website/_data/navigation.yml: -------------------------------------------------------------------------------- 1 | - name: Products 2 | link: / 3 | new_window: false 4 | highlight: false 5 | - name: Our Story 6 | link: /about/ 7 | new_window: false 8 | highlight: false 9 | - name: Contact 10 | link: /contact/ 11 | new_window: false 12 | highlight: false -------------------------------------------------------------------------------- /003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/website/_includes/logo.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | -------------------------------------------------------------------------------- /003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/website/_includes/navigation.html: -------------------------------------------------------------------------------- 1 | 17 | -------------------------------------------------------------------------------- /003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/website/_includes/product-styles.html: -------------------------------------------------------------------------------- 1 |
    2 | {% for style in include.product.styles %} 3 |
    4 | 5 | 6 | 7 |
    8 | {% endfor %} 9 | 10 |
    11 | {% for style in include.product.styles %} 12 |
    13 | {% endfor %} 14 |
    15 |
    16 | -------------------------------------------------------------------------------- /003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/website/_includes/relative-src.html: -------------------------------------------------------------------------------- 1 | {% assign prefix = include.src | slice: 0, 2 %}{% assign protocol = include.src | slice: 0, 4 %}{% unless protocol == 'http' or prefix == "//" %}{{ site.baseurl }}{% endunless %}{{ include.src }} 2 | -------------------------------------------------------------------------------- /003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/website/_includes/snipcart-button.html: -------------------------------------------------------------------------------- 1 | {% assign colors = "" %} 2 | {% for style in include.product.styles %} 3 | {% assign colors = colors | append: '|' | append: style.name %} 4 | {% endfor %} 5 | {% assign colors = colors | remove_first: '|' %} 6 | 20 | -------------------------------------------------------------------------------- /003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/website/_includes/social-icon.html: -------------------------------------------------------------------------------- 1 | {% case include.icon %} 2 | {% when "Email" %} 3 | 4 | {% when "Facebook" %} 5 | 6 | {% when "Facebook2" %} 7 | 8 | {% when "Instagram" %} 9 | 10 | {% when "LinkedIn" %} 11 | 12 | {% when "Pinterest" %} 13 | 14 | {% when "Tumblr" %} 15 | 16 | {% when "Twitter" %} 17 | 18 | {% when "YouTube" %} 19 | 20 | {% when "RSS" %} 21 | 22 | {% endcase %} 23 | -------------------------------------------------------------------------------- /003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/website/_layouts/default.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | {% seo %} 9 | 10 | 11 | 12 | 13 | 14 | 15 | {% if jekyll.environment == 'production' and site.google_analytics_key != '' %} 16 | 21 | 22 | {% endif %} 23 | 24 | 25 | 26 | 27 | 28 | {% if site.snipcart_key %} 29 | 30 | 35 | {% endif %} 36 | 37 | 38 | 39 |
    40 |
    41 | 42 | {% include navigation.html %} 43 |
    44 | 45 |
    46 |
    47 | {% if page.hero %} 48 |

    {{ page.hero }}

    49 | {% elsif page.name %} 50 |

    {{ page.name }}

    51 | {% endif %} 52 | 53 | {% if page.subtitle %} 54 |

    {{ page.subtitle }}

    55 | {% endif %} 56 |
    57 |
    58 |
    59 | {{ content }} 60 | 102 | 108 | 109 | 110 | 111 | -------------------------------------------------------------------------------- /003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/website/_layouts/page.html: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default 3 | --- 4 |
    5 |
    6 |
    7 | {{ content }} 8 |
    9 |
    10 |
    11 | -------------------------------------------------------------------------------- /003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/website/_layouts/product.html: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default 3 | --- 4 |
    5 |
    6 |
    7 |
    8 | {% include product-styles.html product=page %} 9 |
    10 |

    {{ page.name }}

    11 |
    12 |
    13 | {% for i in (1..5) %} 14 | {% if i <= page.avg_rating %} 15 | 16 | {% else %} 17 | 18 | {% endif %} 19 | {% endfor %} 20 |
    21 |
    ({{ page.reviews.size }} ratings)
    22 |
    23 | 24 | 25 | {{ page.description_markdown | markdownify }} 26 | 27 |

    {{ page.material }}

    28 | 29 |

    ${{ page.price }}

    30 | 31 | {% if page.in_stock %} 32 | 33 |
    {% include snipcart-button.html product=page %}
    34 | 35 | {% else %} 36 |
    Out of Stock
    37 | {% endif %} 38 | 39 | 40 |
    41 | 42 | 43 |
    44 |

    Reviews

    45 | 46 | 47 |
    48 | {% for review in page.reviews %} 49 |
    50 |
    51 | {{ review.rating }} 52 | {% for i in (1..5) %} 53 | {% if i <= review.rating %} 54 | 55 | {% else %} 56 | 57 | {% endif %} 58 | {% endfor %} 59 |
    60 |
    {{ review.title }}
    61 |
    {{ review.review }}
    62 |
    63 | {% endfor %} 64 |
    65 |
    66 |
    67 |
    68 |
    69 |
    70 | 71 | 72 | -------------------------------------------------------------------------------- /003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/website/_products/elephant.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bumble the Elephant 3 | description_markdown: >- 4 | Bumble the humble elephant is your shining star. He will always remember who 5 | you are and why you are here. 6 | material: 70% Cotton, 30% Nylon 7 | garment_type: 8 | price: '6.08' 9 | sku: elephant 10 | stock: 10 11 | sizes: 12 | - Small 13 | - Medium 14 | - Large 15 | - XL 16 | styles: 17 | - name: Cream 18 | color: '#dfd3c2' 19 | image: /images/products/elephant/cream.jpg 20 | - name: Green 21 | color: '#67aa79' 22 | image: /images/products/elephant/green.jpg 23 | in_stock: true 24 | avg_rating: 3 25 | reviews: 26 | - rating: 5 27 | title: Adorable and flattering 28 | review: This top is very cute. got it in the lighter color. the fit is great and it will go with many things. if size medium were not out of stock in the blue color i would have purchased that one also. 29 | - rating: 2 30 | title: Way too large everywhere 31 | review: I ordered a medium in this and it definitely fits more like a large. i liked the color & the sleeves, but tying the tie at the neck looks silly on me and leaving it open shows too much cleavage. overall, it is just shapeless and too much fabric on me. i'm sending this one back. 32 | - rating: 5 33 | title: I love this! 34 | review: I was pleasantly surprised with this cardigan! i get so many compliments- i can easily see me carrying this into my fall wardrobe! 35 | - rating: 5 36 | title: Great summer top 37 | review: I tried on this top in store & was a bit wishy washy on it at first. however i am so glad i bought it and have worn it 3x in a week (shh!). it is incredibly lightweight & breathable which is great for hot texas summers. i also love that it has interesting back detail. the other reviewers mentioned it was see through but i don't agree - i wear a nude bra with it and have no issues. it can be worn casually with shorts or dressed up with white denim & fancier accessories. either way it looks best w 38 | - rating: 5 39 | title: Lovely top 40 | review: This top is lovely and flowy! it fits true to size, and the lace detail at the neck is perfect to make this top stand out. i felt like it was a little sheer, but it wasn't problematic enough for me to take away a star -) 41 | - rating: 5 42 | title: Sold on reviews 43 | review: After reading many positive reviews i ordered. sure glad i did. i love the light, airy fabric. it hangs nicely. i am mostly a 12, sometimes a 14. i ordered both sizes and the 14 was a perfect fit. i am very pleased with this purchase. thought it might be too girly and/or young looking on me, but not so at all. i got the white and i certainly like the red detail at the button holes too. 44 | - rating: 5 45 | title: Simply love 46 | review: This sweater is substantial, well-made, fabric is thick and comfortable. the length is on the shorter side, not cropped but shorter, perhaps if they had petite, ti would be cropped. the sleeves are perfect length too on me, it looks shorter on the model, they hit at mys wrists (i do usually need petite length). it seems a simple top, but it is quite beautiful in person. i strongly recommend it, even for the price...xs and i am 115 lbs, 30dd chest, 26.5 in waist 47 | - rating: 3 48 | title: Beauty and the beast 49 | review: I love the designs of tiny blouses. but i wish they could make them more durable. i have owned several blouses in the past where i have to use a little pin to secure the button line from pulling at the breast area, or that after a few delicate washings have come apart at a seam. and ironing or steaming them is a must. this blouse was no exception. it went back because of pulling at the bust. i'm willing to take some extra care of my clothing out of my busy schedule, but i shouldn't have to! 5'7" 50 | - rating: 5 51 | title: Super cute shirt 52 | review: I just bought this shirt on sale. i'd been eyeing it for a bit and when it went on sale i had to get it. its really cute and comfortable and fits really nicely. it's not too short or boxy and the detail on the sleeves is great. 53 | - rating: 4 54 | title: Give it a try 55 | review: I'm between a s & m. on most swing tops usually s which i thought i'd be in this one but not the case. a m fit but i thought i could probably even wear a l which i never wear. but it is gorgeous. this is one of those shirts that keep me coming back to retailer. this is a perfect fall top! great tan color the beading is so so pretty and will really look great under sweaters. this top looked so great with my louis vuitton bag it was just too good to pass up so go to your local store, find th 56 | - rating: 5 57 | title: Gorgeous with a great fit 58 | review: I love this flattering and comfortable jacket- it has a nice stretch and the color is out of this world. i think it will pack a big punch this fall. 59 | - rating: 1 60 | title: Shrinks 61 | review: I loved this blouse when i got it and wore it before washing. it fit really well and was flattering. the only time i laundered it i hand- washed it in cold water and hung it to dry. the blouse shrunk at least a full size and in awkward places. the sleeves are now tight and way too short. the overall length shortened by at least 2 inches. the top is so tight in the bust now that the buttons popped open. i'm very disappointed and surprised.i will be returning it as it is unwearable as is. 62 | - rating: 5 63 | title: I love this! 64 | review: I actually like my shirt more than the other reviewers it seems. i am little (5'1" and 103) and i don't think it's too boxy at all. i like the way it hangs. and i'll say i like it much more in person than on the model. it's somehow cozier than it looks in the picture. it's a real boosh (that mean's comfy.) i wouldn't have picked it from the online pic, but in person, it really caught my eye. 65 | - rating: 5 66 | title: Flexible sweatering 67 | review: Love this sweater! it is easy to wear and goes well with multiple outfits. it is a great, flexible add to my wardrobe. it fits well, even in the sleeves! 68 | - rating: 5 69 | title: Light and lovely. 70 | review: I recently purchased this tunic on sale. the cut of this top is flattering on my curvy figure as it has a cinched in waist. the embroidery is lovely and very bright with several shades of colors. i wore this with black leggings,boots and a green cardigan and received many compliments. 71 | - rating: 4 72 | title: Very cute, runs large 73 | review: I like the casual, yet detailed look of this piece. it looks great with crops. the casual tie string makes the piece feel very laid back, but the details are so pretty. i have it in lavender. it looks way better in person. saw it at the store and grabbed it immediately. 74 | - rating: 4 75 | title: Fun arm detail 76 | review: Really enjoy this pullover's arm detail. i got the "purple" which was somewhere between aubergine and garnet. i think this will easily go with black pants to wear to work or a fun skirt/jeans. 77 | - rating: 4 78 | title: Cute! 79 | review: This top is cute! it adds a pop of color to my day and seems to be of high quality. the colors seem more vibrant in person and the material is quite soft and stretchy. as another reviewer pointed out, the peplum is only in the front! it sounds really weird but i don't really mind the look because i'll be wearing a jacket over it. i'm usually a size 12/14 with a 36dd bust and the large fits beautifully. 80 | - rating: 3 81 | title: Nice try... 82 | review: This top was so pretty online! unfortunately it did not work for me- the front was a bit more shapeless in person, though the back was gorgeous, with a button gather at center. the color appeared yellow online, but was a bit more green in person. also, the armholes were very low, so it would need a camisole or no bra. 83 | - rating: 5 84 | title: Details and versatility 85 | review: This top is easy to dress up with a blazer or wear casually with jeans and trainers. so many nice details and a great fit! 86 | - rating: 2 87 | title: Sandstripe vest 88 | review: As a beach cover-up this would be great, but i'm surprised retailer calls this a dress and doesn't show on the model how thin it is. it's pretty sheer and i couldn't get away without layering it. 89 | - rating: 4 90 | title: Pretty details 91 | review: I have this shirt in the gray and in the orange and i just love the details on front. 92 | - rating: 5 93 | title: Watercolor look 94 | review: Tshirt is beautiful. looks like watercolor. the fabric is a thin soft cotton that is really nice. i am 5'6" and very thin. xs fit well and was not too short. s would have been fine too. i would choose a size by your shoulder width. i bought my mother the medium. 95 | - rating: 5 96 | title: Perfect summer basic 97 | review: I tried this on in the store on a whim. it is so flattering, soft, and pretty. i generally wear a size 12 and bought a l although the m might have fit, also it's got plenty of room. the chiffon yoke is not particularly sheer and a nude bra strap is basically invisible, the jersey is adequately opaque so no camisole needed. my dimensions are about 40" bust x 33" waist. 98 | - rating: 4 99 | title: Beautiful print, but runs very large 100 | review: This print is beautiful and the design is unique and great for work. my only complaint is that it runs huge and the torso fit is extremely boxy. i'm normally in between a size xs and s at retailer, bought the xs in this shirt and it still almost too big. 101 | - rating: 4 102 | title: Love the look for the holidays 103 | review: This velvet t-shirt is perfect for the holidays! i usually wear a size 12 top, or a l, but i ordered up and got a size xl. it is very tight around my chest (36c) and armpits, and the v-neck sometimes spreads apart a lot if i move my arms around a lot. but other than that, it looks great, just runs small. if you are a size 14 or larger, i don't think that this will fit. 104 | - rating: 4 105 | title: Pretty cropped cardigan. 106 | review: This cardigan is nice and the lace at the back is pretty. just be aware that it's definitely a cropped length, which is good for wearing over a dress that you don't want to cover up too much. size up if you want room to layer. the one i tried on at the store had the top button almost falling off, so i didn't buy it. 107 | - rating: 1 108 | title: Enormous 109 | review: I was really excited about this sweater, but the fit was terrible. the sleeves were the right size, but the body of the sweater was a tent - absolutely no shape. they must have clipped the sweater on the model. for reference, i'm 5'4 and 120 pounds and ordered a size small. 110 | - rating: 5 111 | title: Great casual top 112 | review: Love this top for casual days. fits perfectly, fabric feels quality and color is bright but not too bright. good length, just touches top of my thighs. 113 | - rating: 5 114 | title: Perfect shirt 115 | review: This shirt is so cute alone with jeans or dressed up with nice jewelry, a scarf or cardi. its just the right weight, true to size, drapes nicely and its very flattering. i"m sorry i didn't order more when i had the chance. its already sold out in the colors and sizes i wanted. excellent quality as usual -- thanks again retailer! 116 | - rating: 2 117 | title: Cute but won't last 118 | review: Wore it a couple of times, it is cool and comfortable. washed it and the raw edges are starting to fray. i might get one or two more wears out of it. hand washed and hung dry. 119 | - rating: 5 120 | title: Great find on sale! 121 | review: I can't believe this top hasn't sold out. it is a great casual top that will go well with anything. i agree with previous reviewer that talked about sizing up to cover straps. i did size up to a large but it didn't get huge around the midsection. i got the red and it will go great with pilcro khakis or denim. 122 | - rating: 4 123 | title: Lovely fabric, not boxy 124 | review: I've bought similar blouses from retailer before that i didn't love as much as this. it's not boxy or wide on me. it's a great length and i find the 3/4 bell sleeves to be elegant. i bought the purple one on sale and went back for the red/cream. my only complaint would be that the stitching could be of better quality. the fabric is very nice -- love the print. 125 | - rating: 4 126 | title: Perfect...but 127 | review: It's an adorable take on the "casual" sweatshirt. i love the layers, but i wish they were just a little different. they hang a little strange. perhaps if the bottom one was a smidge longer? i'm not sure. or if the sweatshirt layer was a little longer and the blouse bit was therefore longer. it all hangs just a little...off? but, it's not so off that it looks bad. i'm petite, so it's cute on, but, i think if you were tall, it might hang a little weird. a warning though, i ordered a small, and the 128 | - rating: 5 129 | title: So lovely i bought two????? 130 | review: This top fits tts as shown on model. loose, flowy but the cut/details/flounce still give some nice shape. i usually take a s or m at retailer depending on the style/make. went with the small in this. i am 5'5",narrow shoulders, ruler shape 34dd. no gaping in the front. fits gracefully flows with details that are classic and add the wow factor to this social or work appropriate top. so complimentary, it would flatter many body types. the neckline is juuuuussst right, i fasten the top button to make 131 | - rating: 4 132 | title: Cute but short 133 | review: Tiny collar and short bodice length. lovely fabric, soft and silky. 134 | - rating: 4 135 | title: Easy top 136 | review: I like this top. another reviewer mentioned that it was a great top for a busty girl...i agree. i'm a 32g and could have sized down to a small, but i kept the medium because i wanted the extra length to have the option of wearing it as a dress. the small may have caused slight button pulling, but since the fabric has stretch and the top has a generous cut, it wouldn?t look insane. my only issue is the fabric pills. i wore the top once, and the fabric near the armpit pilled a lot, especially for 137 | - rating: 5 138 | title: Cozy! 139 | review: I am 5'8" and about 145 lbs - curvy through hips, small up top and i got a small and it fit great. i got it in the wine and it is a pretty color for fall into winter. the fabric is super soft and cozy! 140 | - rating: 5 141 | title: Conversationalist button-down with cats 142 | review: I love this shirt, very comfortable and has a nice drape. i originally bought the size i normally wear but found it to be a little tight across my chest, so i returned for the next size up.very well made and not sheer so did not have to wear a tank underneath. 143 | - rating: 5 144 | title: Summer sweater 145 | review: This is a thick material, but it is also airy. i got the white and don't have to worry about it being see through. it is a little boxy, but the hem hits a good length and doesn't bother me. really great for work in the summer and going between really hot and really cold temperatures. a good basic piece that can be dressed up or down that is definitely worth getting. 146 | - rating: 4 147 | title: Fall dots 148 | review: Great transition into fall for layering. easy breezy dress up or down-is thin so it does need a cami 149 | - rating: 4 150 | title: Pretty but boxy 151 | review: This looked more fitted on the model. the xs was still boxy on me, but i loved the eyelet hem so much i kept it! 152 | - rating: 4 153 | title: Pretty but... 154 | review: I really had high hopes for this sweater however, unfortunately it is not as nice as i expected it to be. i own a few angel of the north sweaters and i really don't feel like this particular sweater meets their usual standard. first of all the yarn feels very cheap and is not very soft. secondly, the actual knit also appears to be very cheap. i ordered the white sweater in my usual size medium and while it fits, i really can't see how they were able to get the neck of the sweater to go off the m 155 | - rating: 5 156 | title: Cardigan love 157 | review: This cardigan runs slightly large as it is boxy, which i love. the color is exactly as pictured and it is soooo soft! can't wait to wear it with a t-shirt and jeans! 158 | - rating: 5 159 | title: I love it!! 160 | review: I knew i would keep it as soon as i put it on -- comfortable, beautiful fabric, cute gold buttons! as well, due to the cut, it feels roomy, but has a slender fit around the arms. the "denim" color has beautiful detail, but i don't think i would like the maroon solid. i wish there was another nonsolid color -- i would buy another one! 161 | - rating: 2 162 | title: Short but wide 163 | review: This is a crop top but super boxy and wide. i expected it to be more fitted in the waist. since it sits out away a from the body a lot, it was not flattering. i also felt the stitching to be a bit matronly for me. too bad as i am having such a hard time finding tops and i'm willing to pay full price for something that works. 164 | - rating: 4 165 | title: Old retailer 166 | review: Ok, so yes, it is true that the top part where the sleeves are can be snug a little, especially if you work out, but overall i found the top to run true to my regular size (i only tried on the 0, didn't try bigger). the length was great for me being short, i didn't think i need the petite (i usually do for pants and skirts/dresses, tops on and off). the look of the shirt is also classic, flowy and it looked great with e half tuck. the embroidery is also nice though one part of it seemed loose. a 167 | - rating: 5 168 | title: Love the style 169 | review: I was very happy to receive this as a birthday gift. i usually wear an xs or s in clothes i buy from retailer. i was given this in a s, but it wasn't even close to meeting in the front, so i returned it for a m. it doesn't button anyway, but i didn't want it to look like it wouldn't meet at all. i tried on a size s sweater in this brand while i was in the store, and it fit perfectly, so this one is just running small. overall, i love the lace detail on the front and the softness of the sweater. 170 | - rating: 5 171 | title: Perfect and at the right price! 172 | review: In my opinion, the quality and uniqueness of this piece far definitely warrants a full price purchase. adorable cut and style, comfortable and cute. it's the perfect flexible top to wear to work with a skirt or more casually with your favorite pair of jeans. love this! 173 | - rating: 5 174 | title: Cozy casual - perfect for fall 175 | review: The color is perfect for fall and into winter. only the inside collar on the photo shows the subtle plaid lining. the lining was an unexpected bonus, and adds even more dimension to the shirt if the sleeves are turned up.the fit is true to size and the 'stressed' velvet fabric is current for this season.i'm going to enjoy wearing this shirt. 176 | - rating: 5 177 | title: Beautiful blouse- does run large 178 | review: Saw this in the store yesterday and had my daughter try it on. it was really cute on her and she loved it. in fairness to the other reviewers she tried on a size 0 which was perfect for her petite frame and i can see how this blouse would run really big in the larger sizes. but the fabric and detail is so pretty and it was on sale. 179 | - rating: 5 180 | title: Comfy top 181 | review: I bought the black and love it. i bought both the medium and large but am going to keep the medium. it was plenty roomy. i'm 5'2" and about fluctuate between 125-130 lbs. i usually wear a medium or large in retailer-wear but i like things roomy. have to hide a little extra these days! 182 | 183 | --- -------------------------------------------------------------------------------- /003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/website/_sass/cloudcannon.scss: -------------------------------------------------------------------------------- 1 | .editor-link { 2 | display: none !important; 3 | margin-top: 0; 4 | 5 | .btn { 6 | border: 0; 7 | border-radius: 2px; 8 | width: 100%; 9 | max-width: 500px; 10 | box-sizing: border-box; 11 | font-size: 2rem; 12 | text-decoration: none; 13 | padding: 10px 15px; 14 | margin: 0; 15 | font-size: 18px; 16 | } 17 | 18 | nav &, .btn { 19 | cursor: pointer; 20 | background-color: #f7e064; 21 | color: #333; 22 | box-shadow: 1px 1px 5px 0 rgba(0, 0, 0, 0.2); 23 | border-radius: 4px; 24 | line-height: 1; 25 | 26 | &:hover { 27 | background-color: #f4d525; 28 | color: #333; 29 | } 30 | } 31 | } 32 | 33 | .cms-editor-active .editor-link { 34 | display: block !important; 35 | } 36 | 37 | .cms-editor-active nav .editor-link { 38 | display: inline !important; 39 | } 40 | -------------------------------------------------------------------------------- /003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/website/_sass/contact.scss: -------------------------------------------------------------------------------- 1 | .map { 2 | height: 499px; 3 | border: 1px solid #DDDDDD; 4 | border-radius: 3px; 5 | } 6 | 7 | .contact-box { 8 | @extend %flexbox; 9 | @include flex-flow(wrap); 10 | max-width: 1000px; 11 | margin: 0 auto 20px auto; 12 | padding: 0 0 50px 0; 13 | 14 | form { 15 | width: 100% 16 | } 17 | 18 | h3 { 19 | margin: 0; 20 | font-size: 1.8rem; 21 | } 22 | 23 | .contact-form, .contact-details { 24 | padding: 20px; 25 | width: 100%; 26 | box-sizing: border-box; 27 | @media #{$desktop} { 28 | -webkit-flex: 1; 29 | flex: 1; 30 | order: 2; 31 | } 32 | } 33 | 34 | .contact-details { 35 | font-size: .8em; 36 | @media #{$desktop} { 37 | order: 1; 38 | } 39 | 40 | a svg { 41 | position: relative; 42 | top: 5px; 43 | } 44 | 45 | a { 46 | white-space: nowrap; 47 | overflow: hidden; 48 | text-overflow: ellipsis; 49 | display: block; 50 | } 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/website/_sass/elements.scss: -------------------------------------------------------------------------------- 1 | html, body { 2 | margin: 0; 3 | padding: 0; 4 | } 5 | 6 | body { 7 | font-family: "San Francisco", "Helvetica Neue", "Helvetica", "Arial"; 8 | -webkit-font-smoothing: antialiased; 9 | } 10 | 11 | a { 12 | color: $brand-color; 13 | text-decoration: none; 14 | } 15 | 16 | a:hover { 17 | text-decoration: underline; 18 | } 19 | 20 | img { 21 | max-width: 100%; 22 | } 23 | 24 | h1 strong, h2 strong { 25 | font-weight: 700; 26 | } 27 | 28 | h1 { 29 | font-weight: 300; 30 | font-size: 2.3em; 31 | margin: 0; 32 | } 33 | 34 | h2 { 35 | font-weight: 300; 36 | font-size: 2.2em; 37 | margin: 0; 38 | } 39 | 40 | h3 { 41 | font-size: 2rem; 42 | margin: 20px 0 10px 0; 43 | } 44 | 45 | h4 { 46 | font-size: 1.4rem; 47 | 48 | a { 49 | color: #000; 50 | } 51 | } 52 | 53 | p, address, label, ul { 54 | font-size: 1.2rem; 55 | color: #666; 56 | margin-bottom: 20px; 57 | line-height: 1.4em; 58 | } 59 | 60 | ul { 61 | padding-left: 1em; 62 | } -------------------------------------------------------------------------------- /003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/website/_sass/footer.scss: -------------------------------------------------------------------------------- 1 | footer { 2 | padding: 50px 0 50px 0; 3 | font-size: 1.1.1rem; 4 | position: relative; 5 | border: 1px solid #eee; 6 | 7 | .copyright { 8 | font-size: .9rem; 9 | margin: 20px auto 0; 10 | } 11 | 12 | h2 { 13 | font-size: 1.4rem; 14 | margin: 30px 0; 15 | } 16 | 17 | .footer-columns { 18 | @media #{$mid-point} { 19 | display: flex; 20 | } 21 | @include flex-flow(wrap); 22 | list-style: none; 23 | padding: 0; 24 | 25 | @media #{$mid-point} { 26 | margin: -10px -60px 10px -60px; 27 | } 28 | 29 | & > li { 30 | @media #{$mid-point} { 31 | flex: 1; 32 | padding: 0 60px; 33 | } 34 | 35 | box-sizing: border-box; 36 | 37 | &.footer-nav { 38 | @media #{$mid-point} { 39 | flex: 0; 40 | } 41 | } 42 | ul { 43 | padding: 0; 44 | list-style: none; 45 | li { 46 | font-size: 1.1rem; 47 | margin: 11px 0; 48 | a { 49 | white-space: nowrap; 50 | color: #999; 51 | } 52 | } 53 | } 54 | } 55 | 56 | .about { 57 | svg path { 58 | fill: $brand-color; 59 | } 60 | } 61 | 62 | p { 63 | font-size: 1.1rem; 64 | color: #999; 65 | } 66 | 67 | .email { 68 | margin: 0; 69 | } 70 | 71 | h4 { 72 | margin: 0 0 1.1rem 0; 73 | font-size: .9em; 74 | } 75 | } 76 | 77 | a { 78 | text-decoration: none; 79 | } 80 | } 81 | 82 | .social-icons { 83 | svg { 84 | width: 100%; 85 | height: 100%; 86 | 87 | &, path { 88 | fill: #fff; 89 | } 90 | } 91 | 92 | a { 93 | display: inline-block; 94 | width: 20px; 95 | height: 20px; 96 | padding: 8px; 97 | line-height: 0; 98 | margin: 0 10px 20px 0; 99 | border-radius: 50px; 100 | &.email { 101 | background-color: #ff9d09; 102 | } 103 | 104 | &.facebook { 105 | background-color: #3b5998; 106 | } 107 | 108 | &.twitter { 109 | background-color: #55acee; 110 | } 111 | 112 | &.google-plus { 113 | background-color: #db4437; 114 | } 115 | 116 | &.youtube { 117 | background-color: #cd201f; 118 | } 119 | 120 | &.instagram { 121 | background-color: #f167f5; 122 | } 123 | 124 | &.linkedin { 125 | background-color: #0077b5; 126 | } 127 | 128 | &.pinterest { 129 | background-color: #C92228; 130 | } 131 | } 132 | } 133 | -------------------------------------------------------------------------------- /003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/website/_sass/forms.scss: -------------------------------------------------------------------------------- 1 | .button a, 2 | .button button, 3 | input[type=submit] { 4 | color: #fff; 5 | text-decoration: none; 6 | padding: 15px 30px; 7 | background: $brand-color; 8 | border-radius: 3px; 9 | border: 1px solid lighten($brand-color, 10%); 10 | -webkit-appearance: none; 11 | } 12 | 13 | .button a:hover, 14 | .button button:hover, 15 | input[type=submit]:hover { 16 | background: darken($brand-color, 10%); 17 | cursor: pointer; 18 | } 19 | 20 | .button a:active, 21 | .button button:active, 22 | input[type=submit]:active { 23 | border-width: 1px; 24 | border-top: 3px solid transparent 25 | } 26 | 27 | .button.alt a { 28 | background: rgba(255,255,255,0.15); 29 | border-radius: 3px; 30 | border: 1px solid rgba(255, 255, 255, 0.3); 31 | padding: 16px 50px; 32 | } 33 | 34 | .button.alt a:hover { 35 | background: #fff; 36 | color: $brand-color; 37 | } 38 | 39 | textarea, input, button, select { font-family: inherit; font-size: inherit; } 40 | 41 | input[type=submit] { 42 | margin: 20px 0 0 0; 43 | } 44 | 45 | label, input, textarea, select { 46 | display: block; 47 | width: 100%; 48 | box-sizing: border-box; 49 | border-radius: 3px; 50 | } 51 | 52 | .radio-input { 53 | line-height: 1; 54 | margin: 20px 0; 55 | cursor: pointer; 56 | 57 | input { 58 | display: inline-block; 59 | width: auto; 60 | cursor: pointer; 61 | } 62 | } 63 | 64 | textarea { 65 | resize: vertical; 66 | height: 150px; 67 | } 68 | 69 | label { 70 | margin: 20px 0 5px 0; 71 | } 72 | 73 | label:first-child { 74 | margin: 0 0 5px 0; 75 | } 76 | 77 | label:last-child, input:last-child { 78 | margin-bottom: 0; 79 | } 80 | 81 | input, textarea, select { 82 | padding: 10px; 83 | font-size: 1em; 84 | } 85 | 86 | input, textarea { 87 | outline: none; 88 | border: 1px solid #DDDDDD; 89 | } 90 | 91 | input[type=text]:focus, input[type=email]:focus, input[type=password]:focus, textarea:focus { 92 | box-shadow: 0 0 5px $brand-color; 93 | border: 1px solid $brand-color; 94 | } 95 | -------------------------------------------------------------------------------- /003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/website/_sass/layout.scss: -------------------------------------------------------------------------------- 1 | .container, .text-container { 2 | margin: 0 auto; 3 | position: relative; 4 | padding: 0 20px; 5 | } 6 | 7 | .text-container { 8 | max-width: 750px; 9 | } 10 | 11 | .container { 12 | max-width: 1140px; 13 | 14 | &.max-container { 15 | max-width: 100%; 16 | padding: 0; 17 | } 18 | } 19 | 20 | .hero { 21 | color: #ffffff; 22 | text-align: center; 23 | position: relative; 24 | z-index: 9; 25 | h2 { 26 | font-size: 3.2rem; 27 | line-height: 1.5; 28 | font-weight: bold; 29 | } 30 | 31 | @media (min-width: 500px) { 32 | h2 { 33 | font-size: 3.5rem; 34 | } 35 | } 36 | 37 | p { 38 | color: #fff; 39 | } 40 | } 41 | 42 | .content { 43 | background: #fff; 44 | padding: 1px 0 0 0; 45 | position: relative; 46 | } 47 | 48 | .company-name { 49 | display: inline-block; 50 | position: relative; 51 | top: 4px; 52 | a { 53 | display: block; 54 | } 55 | 56 | svg { 57 | display: inline-block; 58 | height: 25px; 59 | width: 25px; 60 | } 61 | 62 | span { 63 | font-size: 16px; 64 | position: relative; 65 | top: -6px; 66 | left: 3px; 67 | } 68 | } 69 | 70 | .screenshot{ 71 | height: auto; 72 | display: block; 73 | margin: 0 auto; 74 | border-radius: 2px; 75 | padding: 20px 0 0; 76 | background: #DDD url('data:image/svg+xml;utf8,') 4px 4px no-repeat; 77 | box-shadow: 0px 0px 50px rgba(0,0,0,.2); 78 | } 79 | 80 | section { 81 | padding: 100px 0; 82 | } 83 | 84 | section + section { 85 | padding-top: 0; 86 | } 87 | 88 | .subtext { 89 | margin-top: 10px; 90 | text-align: center; 91 | } 92 | 93 | 94 | .cta { 95 | margin: 60px 0; 96 | } 97 | 98 | .page h2 { 99 | text-align: center; 100 | } 101 | 102 | blockquote { 103 | padding: 18px 25px; 104 | margin: 0 auto; 105 | quotes: "\201C""\201D""\2018""\2019"; 106 | font-style: italic; 107 | line-height: 2.5; 108 | font-size: 1.1em; 109 | max-width: 900px; 110 | 111 | .author { 112 | display: block; 113 | font-weight: bold; 114 | margin: 20px 0 0 0; 115 | font-size: 1.1em; 116 | font-style: normal; 117 | text-align: right; 118 | } 119 | 120 | p { 121 | display: inline; 122 | } 123 | } 124 | 125 | blockquote .quote:before, 126 | blockquote .quote:after { 127 | color: #ccc; 128 | content: open-quote; 129 | font-size: 4em; 130 | line-height: 0.1em; 131 | margin-right: 0.1em; 132 | vertical-align: -0.4em; 133 | } 134 | 135 | blockquote .quote:after { 136 | content: close-quote; 137 | } 138 | 139 | .page { 140 | margin-bottom: 0; 141 | padding-bottom: 80px; 142 | } 143 | 144 | .center-text { 145 | text-align: center; 146 | } 147 | 148 | .thirds { 149 | display: flex; 150 | margin: 0 -20px 0 -40px; 151 | flex-wrap: wrap; 152 | } 153 | 154 | .thirds div { 155 | flex: 1 1 200px; 156 | padding: 100px 40px 0 40px; 157 | } 158 | -------------------------------------------------------------------------------- /003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/website/_sass/mixins/columns.scss: -------------------------------------------------------------------------------- 1 | @mixin columns($value) { 2 | columns: $value; 3 | -webkit-columns: $value; 4 | -moz-columns: $value; 5 | } 6 | -------------------------------------------------------------------------------- /003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/website/_sass/mixins/flexbox.scss: -------------------------------------------------------------------------------- 1 | // Flexbox Mixins 2 | // http://philipwalton.github.io/solved-by-flexbox/ 3 | // https://github.com/philipwalton/solved-by-flexbox 4 | // 5 | // Copyright (c) 2013 Brian Franco 6 | // 7 | // Permission is hereby granted, free of charge, to any person obtaining a 8 | // copy of this software and associated documentation files (the 9 | // "Software"), to deal in the Software without restriction, including 10 | // without limitation the rights to use, copy, modify, merge, publish, 11 | // distribute, sublicense, and/or sell copies of the Software, and to 12 | // permit persons to whom the Software is furnished to do so, subject to 13 | // the following conditions: 14 | // The above copyright notice and this permission notice shall be included 15 | // in all copies or substantial portions of the Software. 16 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 17 | // OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 19 | // IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 20 | // CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 21 | // TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 22 | // SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 23 | // 24 | // This is a set of mixins for those who want to mess around with flexbox 25 | // using the native support of current browsers. For full support table 26 | // check: http://caniuse.com/flexbox 27 | // 28 | // Basically this will use: 29 | // 30 | // * Fallback, old syntax (IE10, mobile webkit browsers - no wrapping) 31 | // * Final standards syntax (FF, Safari, Chrome, IE11, Opera) 32 | // 33 | // This was inspired by: 34 | // 35 | // * http://dev.opera.com/articles/view/advanced-cross-browser-flexbox/ 36 | // 37 | // With help from: 38 | // 39 | // * http://w3.org/tr/css3-flexbox/ 40 | // * http://the-echoplex.net/flexyboxes/ 41 | // * http://msdn.microsoft.com/en-us/library/ie/hh772069(v=vs.85).aspx 42 | // * http://css-tricks.com/using-flexbox/ 43 | // * http://dev.opera.com/articles/view/advanced-cross-browser-flexbox/ 44 | // * https://developer.mozilla.org/en-us/docs/web/guide/css/flexible_boxes 45 | 46 | //---------------------------------------------------------------------- 47 | 48 | // Flexbox Containers 49 | // 50 | // The 'flex' value causes an element to generate a block-level flex 51 | // container box. 52 | // 53 | // The 'inline-flex' value causes an element to generate a inline-level 54 | // flex container box. 55 | // 56 | // display: flex | inline-flex 57 | // 58 | // http://w3.org/tr/css3-flexbox/#flex-containers 59 | // 60 | // (Placeholder selectors for each type, for those who rather @extend) 61 | 62 | @mixin flexbox { 63 | display: -webkit-box; 64 | display: -webkit-flex; 65 | display: -moz-flex; 66 | display: -ms-flexbox; 67 | display: flex; 68 | } 69 | 70 | %flexbox { @include flexbox; } 71 | 72 | //---------------------------------- 73 | 74 | @mixin inline-flex { 75 | display: -webkit-inline-box; 76 | display: -webkit-inline-flex; 77 | display: -moz-inline-flex; 78 | display: -ms-inline-flexbox; 79 | display: inline-flex; 80 | } 81 | 82 | %inline-flex { @include inline-flex; } 83 | 84 | //---------------------------------------------------------------------- 85 | 86 | // Flexbox Direction 87 | // 88 | // The 'flex-direction' property specifies how flex items are placed in 89 | // the flex container, by setting the direction of the flex container's 90 | // main axis. This determines the direction that flex items are laid out in. 91 | // 92 | // Values: row | row-reverse | column | column-reverse 93 | // Default: row 94 | // 95 | // http://w3.org/tr/css3-flexbox/#flex-direction-property 96 | 97 | @mixin flex-direction($value: row) { 98 | @if $value == row-reverse { 99 | -webkit-box-direction: reverse; 100 | -webkit-box-orient: horizontal; 101 | } @else if $value == column { 102 | -webkit-box-direction: normal; 103 | -webkit-box-orient: vertical; 104 | } @else if $value == column-reverse { 105 | -webkit-box-direction: reverse; 106 | -webkit-box-orient: vertical; 107 | } @else { 108 | -webkit-box-direction: normal; 109 | -webkit-box-orient: horizontal; 110 | } 111 | -webkit-flex-direction: $value; 112 | -moz-flex-direction: $value; 113 | -ms-flex-direction: $value; 114 | flex-direction: $value; 115 | } 116 | // Shorter version: 117 | @mixin flex-dir($args...) { @include flex-direction($args...); } 118 | 119 | //---------------------------------------------------------------------- 120 | 121 | // Flexbox Wrap 122 | // 123 | // The 'flex-wrap' property controls whether the flex container is single-line 124 | // or multi-line, and the direction of the cross-axis, which determines 125 | // the direction new lines are stacked in. 126 | // 127 | // Values: nowrap | wrap | wrap-reverse 128 | // Default: nowrap 129 | // 130 | // http://w3.org/tr/css3-flexbox/#flex-wrap-property 131 | 132 | @mixin flex-wrap($value: nowrap) { 133 | // No Webkit Box fallback. 134 | -webkit-flex-wrap: $value; 135 | -moz-flex-wrap: $value; 136 | @if $value == nowrap { 137 | -ms-flex-wrap: none; 138 | } @else { 139 | -ms-flex-wrap: $value; 140 | } 141 | flex-wrap: $value; 142 | } 143 | 144 | //---------------------------------------------------------------------- 145 | 146 | // Flexbox Flow (shorthand) 147 | // 148 | // The 'flex-flow' property is a shorthand for setting the 'flex-direction' 149 | // and 'flex-wrap' properties, which together define the flex container's 150 | // main and cross axes. 151 | // 152 | // Values: | 153 | // Default: row nowrap 154 | // 155 | // http://w3.org/tr/css3-flexbox/#flex-flow-property 156 | 157 | @mixin flex-flow($values: (row nowrap)) { 158 | // No Webkit Box fallback. 159 | -webkit-flex-flow: $values; 160 | -moz-flex-flow: $values; 161 | -ms-flex-flow: $values; 162 | flex-flow: $values; 163 | } 164 | 165 | //---------------------------------------------------------------------- 166 | 167 | // Flexbox Order 168 | // 169 | // The 'order' property controls the order in which flex items appear within 170 | // their flex container, by assigning them to ordinal groups. 171 | // 172 | // Default: 0 173 | // 174 | // http://w3.org/tr/css3-flexbox/#order-property 175 | 176 | @mixin order($int: 0) { 177 | -webkit-box-ordinal-group: $int + 1; 178 | -webkit-order: $int; 179 | -moz-order: $int; 180 | -ms-flex-order: $int; 181 | order: $int; 182 | } 183 | 184 | //---------------------------------------------------------------------- 185 | 186 | // Flexbox Grow 187 | // 188 | // The 'flex-grow' property sets the flex grow factor. Negative numbers 189 | // are invalid. 190 | // 191 | // Default: 0 192 | // 193 | // http://w3.org/tr/css3-flexbox/#flex-grow-property 194 | 195 | @mixin flex-grow($int: 0) { 196 | -webkit-box-flex: $int; 197 | -webkit-flex-grow: $int; 198 | -moz-flex-grow: $int; 199 | -ms-flex-positive: $int; 200 | flex-grow: $int; 201 | } 202 | 203 | //---------------------------------------------------------------------- 204 | 205 | // Flexbox Shrink 206 | // 207 | // The 'flex-shrink' property sets the flex shrink factor. Negative numbers 208 | // are invalid. 209 | // 210 | // Default: 1 211 | // 212 | // http://w3.org/tr/css3-flexbox/#flex-shrink-property 213 | 214 | @mixin flex-shrink($int: 1) { 215 | -webkit-flex-shrink: $int; 216 | -moz-flex-shrink: $int; 217 | -ms-flex-negative: $int; 218 | flex-shrink: $int; 219 | } 220 | 221 | //---------------------------------------------------------------------- 222 | 223 | // Flexbox Basis 224 | // 225 | // The 'flex-basis' property sets the flex basis. Negative lengths are invalid. 226 | // 227 | // Values: Like "width" 228 | // Default: auto 229 | // 230 | // http://www.w3.org/TR/css3-flexbox/#flex-basis-property 231 | 232 | @mixin flex-basis($value: auto) { 233 | -webkit-flex-basis: $value; 234 | -moz-flex-basis: $value; 235 | -ms-flex-preferred-size: $value; 236 | flex-basis: $value; 237 | } 238 | 239 | //---------------------------------------------------------------------- 240 | 241 | // Flexbox "Flex" (shorthand) 242 | // 243 | // The 'flex' property specifies the components of a flexible length: the 244 | // flex grow factor and flex shrink factor, and the flex basis. When an 245 | // element is a flex item, 'flex' is consulted instead of the main size 246 | // property to determine the main size of the element. If an element is 247 | // not a flex item, 'flex' has no effect. 248 | // 249 | // Values: none | || 250 | // Default: See individual properties (1 1 0). 251 | // 252 | // http://w3.org/tr/css3-flexbox/#flex-property 253 | 254 | @mixin flex($fg: 1, $fs: null, $fb: null) { 255 | 256 | // Set a variable to be used by box-flex properties 257 | $fg-boxflex: $fg; 258 | 259 | // Box-Flex only supports a flex-grow value so let's grab the 260 | // first item in the list and just return that. 261 | @if type-of($fg) == 'list' { 262 | $fg-boxflex: nth($fg, 1); 263 | } 264 | 265 | -webkit-box-flex: $fg-boxflex; 266 | -webkit-flex: $fg $fs $fb; 267 | -moz-box-flex: $fg-boxflex; 268 | -moz-flex: $fg $fs $fb; 269 | -ms-flex: $fg $fs $fb; 270 | flex: $fg $fs $fb; 271 | } 272 | 273 | //---------------------------------------------------------------------- 274 | 275 | // Flexbox Justify Content 276 | // 277 | // The 'justify-content' property aligns flex items along the main axis 278 | // of the current line of the flex container. This is done after any flexible 279 | // lengths and any auto margins have been resolved. Typically it helps distribute 280 | // extra free space leftover when either all the flex items on a line are 281 | // inflexible, or are flexible but have reached their maximum size. It also 282 | // exerts some control over the alignment of items when they overflow the line. 283 | // 284 | // Note: 'space-*' values not supported in older syntaxes. 285 | // 286 | // Values: flex-start | flex-end | center | space-between | space-around 287 | // Default: flex-start 288 | // 289 | // http://w3.org/tr/css3-flexbox/#justify-content-property 290 | 291 | @mixin justify-content($value: flex-start) { 292 | @if $value == flex-start { 293 | -webkit-box-pack: start; 294 | -ms-flex-pack: start; 295 | } @else if $value == flex-end { 296 | -webkit-box-pack: end; 297 | -ms-flex-pack: end; 298 | } @else if $value == space-between { 299 | -webkit-box-pack: justify; 300 | -ms-flex-pack: justify; 301 | } @else if $value == space-around { 302 | -ms-flex-pack: distribute; 303 | } @else { 304 | -webkit-box-pack: $value; 305 | -ms-flex-pack: $value; 306 | } 307 | -webkit-justify-content: $value; 308 | -moz-justify-content: $value; 309 | justify-content: $value; 310 | } 311 | // Shorter version: 312 | @mixin flex-just($args...) { @include justify-content($args...); } 313 | 314 | //---------------------------------------------------------------------- 315 | 316 | // Flexbox Align Items 317 | // 318 | // Flex items can be aligned in the cross axis of the current line of the 319 | // flex container, similar to 'justify-content' but in the perpendicular 320 | // direction. 'align-items' sets the default alignment for all of the flex 321 | // container's items, including anonymous flex items. 'align-self' allows 322 | // this default alignment to be overridden for individual flex items. (For 323 | // anonymous flex items, 'align-self' always matches the value of 'align-items' 324 | // on their associated flex container.) 325 | // 326 | // Values: flex-start | flex-end | center | baseline | stretch 327 | // Default: stretch 328 | // 329 | // http://w3.org/tr/css3-flexbox/#align-items-property 330 | 331 | @mixin align-items($value: stretch) { 332 | @if $value == flex-start { 333 | -webkit-box-align: start; 334 | -ms-flex-align: start; 335 | } @else if $value == flex-end { 336 | -webkit-box-align: end; 337 | -ms-flex-align: end; 338 | } @else { 339 | -webkit-box-align: $value; 340 | -ms-flex-align: $value; 341 | } 342 | -webkit-align-items: $value; 343 | -moz-align-items: $value; 344 | align-items: $value; 345 | } 346 | 347 | //---------------------------------- 348 | 349 | // Flexbox Align Self 350 | // 351 | // Values: auto | flex-start | flex-end | center | baseline | stretch 352 | // Default: auto 353 | 354 | @mixin align-self($value: auto) { 355 | // No Webkit Box Fallback. 356 | -webkit-align-self: $value; 357 | -moz-align-self: $value; 358 | @if $value == flex-start { 359 | -ms-flex-item-align: start; 360 | } @else if $value == flex-end { 361 | -ms-flex-item-align: end; 362 | } @else { 363 | -ms-flex-item-align: $value; 364 | } 365 | align-self: $value; 366 | } 367 | 368 | //---------------------------------------------------------------------- 369 | 370 | // Flexbox Align Content 371 | // 372 | // The 'align-content' property aligns a flex container's lines within the 373 | // flex container when there is extra space in the cross-axis, similar to 374 | // how 'justify-content' aligns individual items within the main-axis. Note, 375 | // this property has no effect when the flexbox has only a single line. 376 | // 377 | // Values: flex-start | flex-end | center | space-between | space-around | stretch 378 | // Default: stretch 379 | // 380 | // http://w3.org/tr/css3-flexbox/#align-content-property 381 | 382 | @mixin align-content($value: stretch) { 383 | // No Webkit Box Fallback. 384 | -webkit-align-content: $value; 385 | -moz-align-content: $value; 386 | @if $value == flex-start { 387 | -ms-flex-line-pack: start; 388 | } @else if $value == flex-end { 389 | -ms-flex-line-pack: end; 390 | } @else { 391 | -ms-flex-line-pack: $value; 392 | } 393 | align-content: $value; 394 | } 395 | -------------------------------------------------------------------------------- /003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/website/_sass/navigation.scss: -------------------------------------------------------------------------------- 1 | @media #{$below-mid-point} { 2 | .nav-open { 3 | 4 | nav { 5 | border: 0; 6 | z-index: 2; 7 | 8 | a { 9 | display: block; 10 | } 11 | } 12 | } 13 | } 14 | 15 | header { 16 | color: #fff; 17 | padding: 20px 0 59px 0; 18 | background: linear-gradient(135deg, $brand-color 0%, $brand-color-2 100%); 19 | position: relative; 20 | 21 | p { 22 | margin: 0; 23 | } 24 | @media #{$mid-point} { 25 | &:before, &:after { 26 | content: ""; 27 | width: 25%; 28 | display: block; 29 | position: absolute; 30 | border-radius: 50% 50% 50% 50% / 60% 60% 40% 40%; 31 | transform: translate(-50%, 0); 32 | 33 | } 34 | 35 | &:before { 36 | transform: skewX(-15deg); 37 | box-shadow: 99px 29px 170px -24px rgba(0,0,0,.05); 38 | top: -254px; 39 | bottom: 50%; 40 | left: 43% 41 | } 42 | 43 | &:after { 44 | box-shadow: inset 92px -41px 104px -28px rgba(0,0,0,.05); 45 | transform: skewX(-12deg); 46 | top: 47%; 47 | left: 40%; 48 | bottom: -121px; 49 | } 50 | } 51 | 52 | a { 53 | color: #fff; 54 | text-decoration: none; 55 | z-index: 1; 56 | position: relative; 57 | 58 | &:hover { 59 | text-decoration: none; 60 | } 61 | } 62 | } 63 | 64 | nav { 65 | width: 100%; 66 | padding: 20px 0 0 0; 67 | 68 | @media #{$mid-point} { 69 | display: inline-block; 70 | vertical-align: top; 71 | width: auto; 72 | padding: 0 0 0 40px; 73 | } 74 | 75 | a { 76 | margin: 0 3px; 77 | padding: 20px 10px; 78 | border-bottom: 1px solid rgba(255,255,255,0); 79 | color: rgba(255,255,255,.6); 80 | transition: 200ms ease-in color; 81 | display: none; 82 | 83 | @media #{$mid-point} { 84 | display: inline-block; 85 | padding: 10px; 86 | } 87 | 88 | 89 | &.nav-toggle { 90 | display: inline; 91 | position: absolute; 92 | right: 0; 93 | top: -22px; 94 | font-size: 1.9em; 95 | border: 0; 96 | z-index: 2; 97 | padding: 20px; 98 | 99 | @media #{$mid-point} { 100 | display: none; 101 | } 102 | 103 | &:hover { 104 | border: 0; 105 | } 106 | } 107 | } 108 | 109 | a:hover { 110 | color: #fff; 111 | } 112 | 113 | 114 | a.cart { 115 | line-height: 1; 116 | 117 | i { 118 | font-size: 22px; 119 | line-height: 1; 120 | display: inline-block; 121 | vertical-align: middle; 122 | padding: 0 6px; 123 | } 124 | 125 | span { 126 | display: inline-block; 127 | vertical-align: middle; 128 | position: relative; 129 | top: 2px; 130 | 131 | } 132 | } 133 | 134 | @media #{$mid-point} { 135 | a.cart { 136 | border-radius: 43px; 137 | color: #fff; 138 | border: 1px solid rgba(255,255,255, .4); 139 | position: absolute; 140 | right: 20px; 141 | top: -2px; 142 | font-size: .8em; 143 | transition: border .3s ease; 144 | 145 | 146 | &:hover { 147 | border-color: #fff; 148 | } 149 | } 150 | } 151 | 152 | a.active { 153 | color: #fff; 154 | } 155 | 156 | } 157 | -------------------------------------------------------------------------------- /003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/website/_sass/products.scss: -------------------------------------------------------------------------------- 1 | .star-rating .fa-star.unchecked { 2 | color: #ddd; 3 | outline: none; 4 | } 5 | 6 | .product-rating { 7 | display: flex; 8 | flex-direction: column; 9 | margin-bottom: 20px; 10 | } 11 | 12 | .rating { 13 | display: flex; 14 | align-items: center; 15 | margin-bottom: 10px; 16 | } 17 | 18 | .rating-number { 19 | font-size: 24px; 20 | font-weight: bold; 21 | margin-right: 10px; 22 | } 23 | 24 | .stars { 25 | color: #ffc107; 26 | font-size: 20px; 27 | } 28 | 29 | .fa-star { 30 | margin-right: 5px; 31 | } 32 | 33 | .checked { 34 | color: #ffc107; 35 | } 36 | 37 | .rating-title { 38 | font-size: 18px; 39 | font-weight: bold; 40 | margin-bottom: 5px; 41 | } 42 | 43 | .rating-review { 44 | font-size: 16px; 45 | line-height: 1.5; 46 | } 47 | 48 | 49 | .styles { 50 | width: 100%; 51 | background: #eee; 52 | position: relative; 53 | padding: 20px; 54 | box-sizing: border-box; 55 | border-radius: 3px; 56 | 57 | .style-picker { 58 | z-index: 2; 59 | display: block; 60 | line-height: 0; 61 | position: absolute; 62 | bottom: 20px; 63 | right: 20px; 64 | 65 | > div { 66 | width: 20px; 67 | height: 20px; 68 | border: 1px solid #000; 69 | display: inline-block; 70 | margin: 0 5px 0 0; 71 | border-radius: 20px; 72 | cursor: pointer; 73 | } 74 | } 75 | 76 | > div + div { 77 | display: none; 78 | } 79 | 80 | i { 81 | font-size: 14rem; 82 | color: #fff; 83 | } 84 | } 85 | 86 | .product-container { 87 | display: flex; 88 | flex-wrap: wrap; 89 | margin-left: -40px; 90 | 91 | .styles { 92 | width: auto; 93 | flex: 1 1 300px; 94 | margin-left: 40px; 95 | } 96 | 97 | .product-details { 98 | flex: 2 1 500px; 99 | text-align: left; 100 | margin-left: 40px; 101 | 102 | h4 { 103 | color: $brand-color; 104 | } 105 | } 106 | 107 | form { 108 | border: 1px solid #444; 109 | padding: 20px; 110 | border-radius: 2px; 111 | 112 | label:first-child { 113 | margin-top: 0; 114 | } 115 | } 116 | } 117 | 118 | .product-list { 119 | display: flex; 120 | flex-flow: wrap; 121 | margin: 0 -20px; 122 | 123 | &, > li { 124 | list-style: none; 125 | padding: 0; 126 | } 127 | 128 | > li { 129 | flex: 1 1 300px; 130 | 131 | &:not(:empty) { 132 | margin: 20px; 133 | } 134 | } 135 | 136 | h4 { 137 | margin: 10px 0 0 0; 138 | padding-right: 100px; 139 | } 140 | 141 | p { 142 | line-height: 1; 143 | margin: 10px 0 0 0; 144 | } 145 | 146 | .product-description p { 147 | font-size: 1.1rem; 148 | } 149 | 150 | .snipcart-add-item { 151 | -webkit-appearance: none; 152 | color: #222; 153 | text-decoration: none; 154 | border: 0; 155 | background: none; 156 | font-weight: bold; 157 | font-size: 1.2em; 158 | padding: 10px 15px 15px; 159 | float: right; 160 | cursor: pointer; 161 | line-height: 1; 162 | margin-top: 10px; 163 | 164 | &:hover { 165 | background: #f5f5f5; 166 | color: #000; 167 | } 168 | } 169 | } -------------------------------------------------------------------------------- /003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/website/_sass/staff.scss: -------------------------------------------------------------------------------- 1 | 2 | .staff-image { 3 | margin: 25px auto 0 auto; 4 | background: no-repeat center center; 5 | background-size: cover; 6 | border-radius: 300px; 7 | width: 70%; 8 | padding-bottom: 70%; 9 | } 10 | 11 | .staff { 12 | padding: 0; 13 | list-style: none; 14 | @extend %flexbox; 15 | @include flex-flow(wrap); 16 | text-align: center; 17 | margin: -175px 0 0 0; 18 | justify-content: center; 19 | 20 | li { 21 | padding: 30px 20px; 22 | box-sizing: border-box; 23 | flex: 0 1 300px; 24 | background: #fff; 25 | margin: 0 60px; 26 | border-radius: 3px; 27 | box-shadow: 0 10px 100px 0 rgba(0,0,0,0.1); 28 | } 29 | 30 | .square-image { 31 | width: 400px; 32 | height: 400px; 33 | 34 | img { 35 | border-radius: 400px; 36 | } 37 | } 38 | 39 | .name { 40 | font-size: 1.2rem; 41 | margin-top: 20px; 42 | } 43 | 44 | .position { 45 | font-size: 1.3rem; 46 | color: #666; 47 | } 48 | } 49 | 50 | 51 | .story-points { 52 | h3 { 53 | font-size: 1.3rem; 54 | margin: 0; 55 | color: $brand-color; 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/website/_sass/variables.scss: -------------------------------------------------------------------------------- 1 | $brand-color: #7d59f2; 2 | $brand-color-2: #e371f6; 3 | $default-hero-image: "/images/hero/box-top.jpg"; 4 | 5 | // Breakpoints 6 | $tablet: "(min-width: 450px)"; 7 | $above-tablet: "(min-width: 550px)"; 8 | $mid-point: "(min-width: 850px)"; 9 | $below-mid-point: "(max-width: 849px)"; 10 | $desktop: "(min-width: 768px)"; 11 | -------------------------------------------------------------------------------- /003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/website/_staff_members/ava.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Ava Sandler 3 | image_path: https://unsplash.it/600/800?image=1062 4 | --- -------------------------------------------------------------------------------- /003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/website/_staff_members/steph.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Steph Poco 3 | image_path: https://unsplash.it/600/800?image=823 4 | --- 5 | -------------------------------------------------------------------------------- /003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/website/about.html: -------------------------------------------------------------------------------- 1 | --- 2 | title: Our Story 3 | hero: Our story 4 | --- 5 | 6 | 8 |
      9 | {% for person in site.staff_members %} 10 |
    • 11 |
      12 |
      {{ person.name }}
      13 |
    • 14 | {% endfor %} 15 |
    16 | 17 |
    18 |
    19 |

    Passion

    20 |

    What more could you want from life than drawing animals and bringing happiness to others? We hope this passion show through in our work.

    21 |
    22 |
    23 |

    Animal

    24 |

    It's easy to forget that we're all part of the animal kingdom. Fur is a brand that helps you get in touch with your inner animal.

    25 |
    26 |
    27 |

    Style

    28 |

    We like to keep things plain and simple around here to help the animals speak for themselves.

    29 |
    30 |
    31 | -------------------------------------------------------------------------------- /003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/website/cloudcannon.config.yml: -------------------------------------------------------------------------------- 1 | _select_data: 2 | garment_types: 3 | - hat 4 | - top 5 | 6 | _inputs: 7 | map: 8 | comment: "Update the map location and display settings." 9 | latitude: 10 | comment: "Coordinates for the center marker on the map." 11 | longitude: 12 | comment: "Coordinates for the center marker on the map." 13 | zoom: 14 | comment: "The zoom level for the map." 15 | pricing_table: 16 | comment: "Update the information in the pricing tables." 17 | highlight: 18 | comment: "Emphasis the text" 19 | color: 20 | comment: "The background color used in the plan name and call to action." 21 | new_window: 22 | comment: "Open link in new window" 23 | description: 24 | comment: "This is used for facebook shares of the page and google indexing" 25 | 26 | collections_config: 27 | staff_members: 28 | _enabled_editors: 29 | - data 30 | icon: account_circle 31 | schemas: 32 | default: 33 | path: schemas/staff_members.md 34 | products: 35 | _enabled_editors: 36 | - data 37 | output: true 38 | icon: shopping_cart 39 | schemas: 40 | default: 41 | path: schemas/products.md -------------------------------------------------------------------------------- /003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/website/contact-success.html: -------------------------------------------------------------------------------- 1 | --- 2 | title: We'll be in touch 3 | hero: We'll be in touch 4 | --- 5 | 6 |

    Success

    7 |

    Your message is sent. We'll be in touch soon.

    8 | -------------------------------------------------------------------------------- /003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/website/contact.html: -------------------------------------------------------------------------------- 1 | --- 2 | title: Contact 3 | description: Get in touch 4 | email: fur@example.com 5 | hero: Get in touch 6 | map: 7 | latitude: -45.873511 8 | longitude: 170.503704 9 | address: 'Dunedin, New Zealand' 10 | zoom: 13 11 | --- 12 | 13 |
    14 |
    15 |

    Send us a message

    16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 |
    29 | 30 |
    31 |
    32 |
    33 | 34 | 61 | 62 | -------------------------------------------------------------------------------- /003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/website/css/screen.scss: -------------------------------------------------------------------------------- 1 | --- 2 | --- 3 | @import "mixins/flexbox"; 4 | @import "mixins/columns"; 5 | @import "variables"; 6 | @import "elements"; 7 | @import "layout"; 8 | @import "products"; 9 | @import "staff"; 10 | @import "contact"; 11 | @import "forms"; 12 | @import "navigation"; 13 | @import "footer"; 14 | @import "cloudcannon"; 15 | -------------------------------------------------------------------------------- /003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/website/favicon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/davidteather/everything-web-scraping/6afdda03e1b84d64a8e460f6e7265a60a87bdbdc/003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/website/favicon.png -------------------------------------------------------------------------------- /003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/website/images/_screenshot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/davidteather/everything-web-scraping/6afdda03e1b84d64a8e460f6e7265a60a87bdbdc/003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/website/images/_screenshot.png -------------------------------------------------------------------------------- /003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/website/images/products/deer/black.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/davidteather/everything-web-scraping/6afdda03e1b84d64a8e460f6e7265a60a87bdbdc/003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/website/images/products/deer/black.jpg -------------------------------------------------------------------------------- /003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/website/images/products/deer/blue.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/davidteather/everything-web-scraping/6afdda03e1b84d64a8e460f6e7265a60a87bdbdc/003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/website/images/products/deer/blue.jpg -------------------------------------------------------------------------------- /003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/website/images/products/deer/clay.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/davidteather/everything-web-scraping/6afdda03e1b84d64a8e460f6e7265a60a87bdbdc/003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/website/images/products/deer/clay.jpg -------------------------------------------------------------------------------- /003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/website/images/products/deer/cream.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/davidteather/everything-web-scraping/6afdda03e1b84d64a8e460f6e7265a60a87bdbdc/003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/website/images/products/deer/cream.jpg -------------------------------------------------------------------------------- /003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/website/images/products/elephant/cream.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/davidteather/everything-web-scraping/6afdda03e1b84d64a8e460f6e7265a60a87bdbdc/003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/website/images/products/elephant/cream.jpg -------------------------------------------------------------------------------- /003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/website/images/products/elephant/green.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/davidteather/everything-web-scraping/6afdda03e1b84d64a8e460f6e7265a60a87bdbdc/003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/website/images/products/elephant/green.jpg -------------------------------------------------------------------------------- /003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/website/images/products/giraffe/green.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/davidteather/everything-web-scraping/6afdda03e1b84d64a8e460f6e7265a60a87bdbdc/003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/website/images/products/giraffe/green.jpg -------------------------------------------------------------------------------- /003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/website/images/products/hog/blue.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/davidteather/everything-web-scraping/6afdda03e1b84d64a8e460f6e7265a60a87bdbdc/003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/website/images/products/hog/blue.jpg -------------------------------------------------------------------------------- /003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/website/images/products/hog/clay.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/davidteather/everything-web-scraping/6afdda03e1b84d64a8e460f6e7265a60a87bdbdc/003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/website/images/products/hog/clay.jpg -------------------------------------------------------------------------------- /003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/website/images/products/hog/cream.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/davidteather/everything-web-scraping/6afdda03e1b84d64a8e460f6e7265a60a87bdbdc/003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/website/images/products/hog/cream.jpg -------------------------------------------------------------------------------- /003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/website/images/products/lion/blue.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/davidteather/everything-web-scraping/6afdda03e1b84d64a8e460f6e7265a60a87bdbdc/003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/website/images/products/lion/blue.jpg -------------------------------------------------------------------------------- /003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/website/images/products/tiger/black.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/davidteather/everything-web-scraping/6afdda03e1b84d64a8e460f6e7265a60a87bdbdc/003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/website/images/products/tiger/black.jpg -------------------------------------------------------------------------------- /003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/website/images/products/tiger/blue.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/davidteather/everything-web-scraping/6afdda03e1b84d64a8e460f6e7265a60a87bdbdc/003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/website/images/products/tiger/blue.jpg -------------------------------------------------------------------------------- /003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/website/images/products/tiger/clay.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/davidteather/everything-web-scraping/6afdda03e1b84d64a8e460f6e7265a60a87bdbdc/003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/website/images/products/tiger/clay.jpg -------------------------------------------------------------------------------- /003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/website/images/products/tiger/cream.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/davidteather/everything-web-scraping/6afdda03e1b84d64a8e460f6e7265a60a87bdbdc/003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/website/images/products/tiger/cream.jpg -------------------------------------------------------------------------------- /003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/website/images/products/tiger/green.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/davidteather/everything-web-scraping/6afdda03e1b84d64a8e460f6e7265a60a87bdbdc/003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/website/images/products/tiger/green.jpg -------------------------------------------------------------------------------- /003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/website/index.html: -------------------------------------------------------------------------------- 1 | --- 2 | title: Find Your Spirit Animal 3 | hero: Find your spirit animal 4 | subtitle: The animal friendly clothing company 5 | --- 6 | 7 |
    8 |
    9 |
    10 | 11 |
      12 | {% for product in site.products %} 13 |
    • 14 | {% include product-styles.html product=product %} 15 |
      16 |

      {{ product.name }}

      17 |
      18 | {{ product.description_markdown | truncate: 45 | markdownify }} 19 |
      20 |

      ${{ product.price }}

      21 |
      22 |
    • 23 | {% endfor %} 24 |
    • 25 |
    • 26 |
    • 27 |
    28 |
    29 |
    30 |
    31 | -------------------------------------------------------------------------------- /003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/website/js/_style-picker.js: -------------------------------------------------------------------------------- 1 | (function() { 2 | "use strict"; 3 | 4 | $(".style-picker").on("click", "> div", function (e) { 5 | var $el = $(e.currentTarget), 6 | id = $el.data("itemId"), 7 | $parent = $el.closest(".styles"); 8 | 9 | $parent.children(".style").hide(); 10 | $parent.children(".style[data-item-id=" + id + "]").show(); 11 | }); 12 | })(); 13 | -------------------------------------------------------------------------------- /003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/website/js/application.js: -------------------------------------------------------------------------------- 1 | --- 2 | layout: null 3 | --- 4 | 5 | {% include_relative _style-picker.js %} -------------------------------------------------------------------------------- /003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/website/returns.html: -------------------------------------------------------------------------------- 1 | --- 2 | title: Returns 3 | hero: Return policy 4 | --- 5 | 6 |
    7 | 8 |

    In retail, a return is the process of a customer taking previously purchased merchandise back to the retailer, and in turn receiving a refund in the original form of payment, exchange for another item (identical or different), or a store credit.

    9 | 10 |

    Many retailers will accept returns provided that the customer has a receipt as a proof of purchase, and that certain other conditions, which depend on the retailer's policies, are met. These may include the merchandise being in a certain condition (usually resellable if not defective), no more than a certain amount of time having passed since the purchase, and sometimes that identification be provided (though usually only if a receipt is not provided). In some cases, only exchanges or store credit are offered, again usually only without a receipt, or after an initial refund period has passed. Some retailers charge a restocking fee for non-defective returned merchandise, but typically only if the packaging has been opened.

    11 | 12 |

    While retailers are not usually required to accept returns, laws in many places require retailers to post their return policy in a place where it would be visible to the customer prior to purchase.

    13 | 14 |

    In certain countries, such as Australia, consumer rights dictate that under certain situations consumers have a right to demand a refund. These situations include sales that relied on false or misleading claims, defective goods, and undisclosed conditions of sale.

    15 | 16 |

    There are various reasons why customers may wish to return merchandise. These include a change of one's mind (buyer's remorse), quality of the merchandise, personal dissatisfaction, or a mistaken purchase of the wrong product. For clothing or other sized items, it may be a lack of a correct fit. Sometimes, there may be a product recall in which the manufacturer has requested (or been ordered) that the merchandise be brought back to the store. Also, gift receipts are offered sometimes when an item is purchased for another person, and the recipient can exchange this item for another item of comparable value, or for store credit, often on a gift card.

    17 | 18 |

    Source: Wikipedia

    19 | 20 |
    -------------------------------------------------------------------------------- /003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/website/robots.txt: -------------------------------------------------------------------------------- 1 | --- 2 | layout: null 3 | sitemap: false 4 | --- 5 | User-agent: * 6 | Sitemap: {{ site.url }}/sitemap.xml 7 | -------------------------------------------------------------------------------- /003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/website/schemas/products.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: 3 | garment_type: top 4 | price: 5 | sku: 6 | description: 7 | sizes: 8 | - XS 9 | - Small 10 | - Medium 11 | - Large 12 | - XL 13 | styles: 14 | - name: 15 | color: 16 | image: 17 | --- 18 | -------------------------------------------------------------------------------- /003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/website/schemas/staff_members.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: 3 | position: 4 | image_path: 5 | twitter: 6 | blurb: 7 | --- 8 | -------------------------------------------------------------------------------- /003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/website/shipping.html: -------------------------------------------------------------------------------- 1 | --- 2 | title: Shipping / Delivery 3 | hero: Delivery Details 4 | --- 5 | 6 |
    7 | 8 |

    Please review the below information to find answers to any of your questions regarding our policies.

    9 | 10 |

    Shipping Services

    11 |

    Fur provides the below shipping services. Actual delivery time frames may vary due to weather, service delays, holidays etc.

    12 | 13 |

    Free Standard shipping is available for apparel only on orders that have an after-discount subtotal value of $1.00 or more. Domestic shipping to Hawaii, Alaska, or Puerto Rico or any other location outside the Continental United States may take longer than expected due to their locations.

    14 | 15 |

    Standard Shipping (6-10 business days)

    16 |
      17 |
    • Standard shipping is our least expensive option and has an average delivery window of 3-10 days depending on the final delivery location zip code. 18 | Tracking is available.
    • 19 |
    • Final delivery is made by USPS.
    • 20 |
    • Select Standard shipping for orders being shipped to a P.O. Box.
    • 21 |
    • Standard shipping may take a few more days for locations like Hawaii, Alaska, Puerto Rico or any orders shipped outside the United States. This service is available for APO, DPO and FPO addresses and non-contiguous U.S. states and territories.
    • 22 |
    23 | 24 |

    Express Shipping (2 Business Days)

    25 |
      26 |
    • Express shipping is an expedited option that offers full door-to-door tracking.
    • 27 |
    • Express shipping is an available shipping option throughout all 50 states.
    • 28 |
    • Express shipping will arrive by 4:30PM to a business or 7:00PM to a residence.
    • 29 |
    • This service is not available for P.O. Boxes or APO, DPO and FPO addresses.
    • 30 |
    31 | 32 |

    Overnight (Next Business Day)

    33 |
      34 |
    • Overnight is an expedited shipping option that offers full door-to-door tracking.
    • 35 |
    • Overnight is an available shipping option throughout all 50 states.
    • 36 |
    • Overnight will arrive the next business day by 10:30 a.m. to most U.S. addresses; by noon, 4:30 p.m. or 5 p.m. in remote areas; by noon, 1:30 p.m. or 4:30 p.m. on Saturdays.
    • 37 |
    • This service is not available for P.O. Boxes or APO, DPO and FPO addresses.
    • 38 |
    39 |
    -------------------------------------------------------------------------------- /003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/website/siteicon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/davidteather/everything-web-scraping/6afdda03e1b84d64a8e460f6e7265a60a87bdbdc/003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/website/siteicon.png -------------------------------------------------------------------------------- /003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/website/touch-icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/davidteather/everything-web-scraping/6afdda03e1b84d64a8e460f6e7265a60a87bdbdc/003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping/website/touch-icon.png -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Everything Web Scraping 2 | Learn everything web scraping by [David Teather](https://twitter.com/david_teather) find the video series on [YouTube](https://youtube.com/playlist?list=PLmRtxHvzkEE8Ofiy4hnnXSoxw7gs4HOHt). 3 | 4 | [![LinkedIn](https://img.shields.io/badge/LinkedIn-0077B5?style=for-the-badge&logo=linkedin&logoColor=white&style=flat-square)](https://www.linkedin.com/in/davidteather/) [![Sponsor Me](https://img.shields.io/static/v1?label=Sponsor&message=%E2%9D%A4&logo=GitHub)](https://github.com/sponsors/davidteather) [![Discord Server](https://img.shields.io/discord/783108952111579166.svg?color=7289da&logo=discord&style=flat-square)](https://discord.gg/yyPhbfma6f) ![](https://visitor-badge.laobi.icu/badge?page_id=davidteather.web-scraping-by-reverse-engineering) [![Twitter URL](https://img.shields.io/twitter/url?style=social&url=https%3A%2F%2Ftwitter.com%2Fdavid_teather)](https://twitter.com/david_teather) 5 | 6 | ## Table Of Contents 7 | 1. [Course Catalogue](#course-catalogue) 8 | 2. [How To start The Mock Websites](#how-to-start-the-mock-websites) 9 | 10 | **Please** consider giving [Course Feedback](https://forms.gle/LjRwHoR34vD1knGa6) 11 | 12 | ## Welcome! 13 | 14 | Glad you're here! If it's your first time check out the the [introduction](./000-introduction/README.md), if not welcome back! 15 | 16 | Consider [sponsoring me](https://github.com/sponsors/davidteather) on GitHub to make work like this possible 17 | 18 | ### Supporting The Project 19 | * Star the repo 😎 20 | * Maybe share it with some people new to web-scraping? 21 | * Consider [sponsoring](https://github.com/sponsors/davidteather) me on GitHub 22 | * Send me an email or a [LinkedIn](https://www.linkedin.com/in/davidteather/) message telling me what you enjoy in the course (and maybe what else you want to see in the future) 23 | * Submit PRs for suggestions/issues :) 24 | ## Course Catalogue 25 | 0. [Introduction To The Course](/000-introduction/) 26 | 1. [Introduction To Forging API Requests](/001-introduction-to-forging-api-requests/) 27 | 2. [Proxies](/002-proxies) 28 | 3. [Beautiful Soup Scraping With Static and Server Side Rendered Sites](/003-beautiful-soup-with-static-site-and-server-side-rendered-web-scraping) 29 | 30 | 31 | ## How To Start The Mock Websites 32 | 33 | [Video Walkthrough](https://youtu.be/WmeDXK7KRKE) 34 | 35 | ### With GitHub Codespaces (Recommended) 36 | 37 | If you don't want to deal with installing and configuring software, I've set up this repository so that a GitHub Codespace can do all of that for you. 38 | 39 | > Note: A free GitHub account comes with 60 hours of Codespaces free each month, and if you're a student you can get 90 hours free each month with GitHub Pro through the [GitHub Student Developer Pack](https://education.github.com/pack) ([source](https://github.com/features/codespaces)) 40 | 41 | #### Creating A Codespace 42 | > If you want to save your solutions, [create a fork](https://github.com/davidteather/everything-web-scraping/fork) then create a Codespace from your own repo, then you'll be able to use git to save your changes as normal. 43 | 44 | Create a Codespace using the instructions below or [here](https://github.com/codespaces/new) 45 |
    46 | 47 |

    Select Code -> Codespaces Tab -> The + Icon -> New With Options

    48 |

    Or click here

    49 |
    50 | 51 |
    52 | 53 |

    Select the configuration of the lesson you're on, and after hitting create a Codespace

    54 |

    VS Code editor will open in the browser and start all programs needed for the activity!

    55 |
    56 | 57 | #### Cleaning Up 58 | 59 | After finishing each lesson you can visit the [GitHub Codespaces](https://github.com/codespaces) menu and delete the Codespace so you don't get charged while you're not using it. 60 | 61 |
    62 | 63 |

    Delete a Codespace with the 3 dots -> Delete

    64 |

    This will delete any changes you've made

    65 |
    66 | 67 | > Note: If you enjoy GitHub Codespaces consider checking out my ~30 minute [LinkedIn Learning Course](https://www.linkedin.com/learning/github-codespaces-for-students/why-use-github-codespaces) on Codespaces, you can get free 24h access through [my LinkedIn post](https://www.linkedin.com/feed/update/urn:li:activity:7069391759281147905/) and feel free to send a connection request while you're over there 🤠 68 | 69 | ### With Docker 70 | 71 | Run `docker-compose up` while in a lesson directory, when it says development server started open `localhost:3000` in your browser to check that it's working properly. 72 | 73 | When done with this lesson you can `control + c` to shut down your docker containers. 74 | 75 | #### Cleaning Up 76 | ##### With Docker Desktop 77 | 78 | 1. Navigate to the containers tab on the side, find the lesson you want to delete and click the trashcan icon to remove it. 79 | 2. Navigate to the images tab on the side, find the images starting with the course name to delete and hit the trash can. 80 | 81 | ##### With Command line 82 | 83 | 1. To remove containers, `docker rm $(docker ps -a -q --filter name=XXX)`, where XXX is the lesson number you want removed (ex: 001). 84 | 2. To remove images, `docker rmi $(docker images --filter label=lesson.number=X -a -q)`, where X is the number you want removed (ex: 1, ex: 10) 85 | -------------------------------------------------------------------------------- /assets/codespaces-new-with-options.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/davidteather/everything-web-scraping/6afdda03e1b84d64a8e460f6e7265a60a87bdbdc/assets/codespaces-new-with-options.png -------------------------------------------------------------------------------- /assets/codespaces-select-configuration.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/davidteather/everything-web-scraping/6afdda03e1b84d64a8e460f6e7265a60a87bdbdc/assets/codespaces-select-configuration.png -------------------------------------------------------------------------------- /assets/delete-codespace.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/davidteather/everything-web-scraping/6afdda03e1b84d64a8e460f6e7265a60a87bdbdc/assets/delete-codespace.png --------------------------------------------------------------------------------