├── Facebook-post-scraper.py ├── LICENSE.md ├── Promo-post-winner.py ├── README.md └── secrets.py /Facebook-post-scraper.py: -------------------------------------------------------------------------------- 1 | import requests 2 | from bs4 import BeautifulSoup 3 | from secrets import username, password 4 | 5 | class FaceBookBot(): 6 | login_basic_url = 'https://mbasic.facebook.com/login' 7 | login_mobile_url = 'https://m.facebook.com/login' 8 | payload = { 9 | 'email': username, 10 | 'pass': password 11 | } 12 | post_ID = "" 13 | 14 | def parse_html(self, request_url): 15 | with requests.Session() as session: 16 | post = session.post(self.login_basic_url, data=self.payload) 17 | parsed_html = session.get(request_url) 18 | return parsed_html 19 | 20 | def post_content(self): 21 | REQUEST_URL = f'https://mbasic.facebook.com/story.php?story_fbid={self.post_ID}&id=415518858611168' 22 | 23 | soup = BeautifulSoup(self.parse_html(REQUEST_URL).content, "html.parser") 24 | content = soup.find_all('p') 25 | post_content = [] 26 | for lines in content: 27 | post_content.append(lines.text) 28 | 29 | post_content = ' '.join(post_content) 30 | return post_content 31 | 32 | def date_posted(self): 33 | REQUEST_URL = f'https://mbasic.facebook.com/story.php?story_fbid={self.post_ID}&id=415518858611168' 34 | 35 | soup = BeautifulSoup(self.parse_html(REQUEST_URL).content, "html.parser") 36 | date_posted = soup.find('abbr') 37 | return date_posted.text 38 | 39 | def post_likes(self): 40 | limit = 200 41 | REQUEST_URL = f'https://mbasic.facebook.com/ufi/reaction/profile/browser/fetch/?limit={limit}&total_count=17&ft_ent_identifier={self.post_ID}' 42 | 43 | soup = BeautifulSoup(self.parse_html(REQUEST_URL).content, "html.parser") 44 | names = soup.find_all('h3') 45 | people_who_liked = [] 46 | for name in names: 47 | people_who_liked.append(name.text) 48 | people_who_liked = [i for i in people_who_liked if i] 49 | return people_who_liked 50 | 51 | def post_shares(self): 52 | REQUEST_URL = f'https://m.facebook.com/browse/shares?id={self.post_ID}' 53 | 54 | with requests.Session() as session: 55 | post = session.post(self.login_mobile_url, data=self.payload) 56 | parsed_html = session.get(REQUEST_URL) 57 | 58 | soup = BeautifulSoup(parsed_html.content, "html.parser") 59 | names = soup.find_all('span') 60 | people_who_shared = [] 61 | for name in names: 62 | people_who_shared.append(name.text) 63 | return people_who_shared 64 | 65 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2020 [Adriaan van Niekerk](https://github.com/adriaan90) 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. -------------------------------------------------------------------------------- /Promo-post-winner.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import random 3 | 4 | from time import sleep 5 | from selenium import webdriver 6 | from bs4 import BeautifulSoup 7 | from selenium.webdriver.common.keys import Keys 8 | from selenium.webdriver.chrome.options import Options 9 | 10 | from secrets import username, password 11 | 12 | class FaceBookBot(): 13 | 14 | def __init__(self): 15 | options = webdriver.ChromeOptions() 16 | options.add_argument('--disable-notifications') 17 | self.driver = webdriver.Chrome(options=options) 18 | 19 | def login(self,username, password): 20 | self.driver.get("https://www.facebook.com/login") 21 | 22 | sleep(2) 23 | 24 | email_in = self.driver.find_element_by_xpath('//*[@id="email"]') 25 | email_in.send_keys(username) 26 | 27 | password_in = self.driver.find_element_by_xpath('//*[@id="pass"]') 28 | password_in.send_keys(password) 29 | 30 | login_btn = self.driver.find_element_by_xpath('//*[@id="loginbutton"]') 31 | login_btn.click() 32 | 33 | sleep(2) 34 | 35 | 36 | 37 | def log_in_basic(self): 38 | POST_LOGIN_URL = 'https://mbasic.facebook.com/login' 39 | 40 | payload = { 41 | 'email': username, 42 | 'pass': password 43 | } 44 | 45 | with requests.Session() as session: 46 | post = session.post(POST_LOGIN_URL, data=payload) 47 | 48 | def post_likes(self): 49 | #This URL will be the URL that your login form points to with the "action" tag. 50 | POST_LOGIN_URL = 'https://mbasic.facebook.com/login' 51 | 52 | #This URL is the page you actually want to pull down with requests. 53 | post_ID = 'the-post-ID' 54 | limit = 200 55 | REQUEST_URL = f'https://mbasic.facebook.com/ufi/reaction/profile/browser/fetch/?limit={limit}&total_count=17&ft_ent_identifier={post_ID}' 56 | 57 | payload = { 58 | 'email': username, 59 | 'pass': password 60 | } 61 | 62 | with requests.Session() as session: 63 | post = session.post(POST_LOGIN_URL, data=payload) 64 | r = session.get(REQUEST_URL) 65 | soup = BeautifulSoup(r.content, "html.parser") 66 | names = soup.find_all('h3', class_='be') 67 | people_who_liked = [] 68 | for name in names: 69 | people_who_liked.append(name.text) 70 | 71 | return people_who_liked 72 | 73 | def post_shares(self): 74 | #This URL will be the URL that your login form points to with the "action" tag. 75 | POST_LOGIN_URL = 'https://mbasic.facebook.com/login' 76 | 77 | post_ID = 'the-post-ID' 78 | #This URL is the page you actually want to pull down with requests. 79 | REQUEST_URL = f'https://m.facebook.com/browse/shares?id={post_ID}' 80 | 81 | payload = { 82 | 'email': username, 83 | 'pass': password 84 | } 85 | 86 | with requests.Session() as session: 87 | post = session.post(POST_LOGIN_URL, data=payload) 88 | r = session.get(REQUEST_URL) 89 | soup = BeautifulSoup(r.content, "html.parser") 90 | names = soup.find_all('span') 91 | people_who_shared = [] 92 | for name in names: 93 | people_who_shared.append(name.text) 94 | 95 | return people_who_shared 96 | 97 | def page_likes(self): 98 | self.login(username, password) 99 | 100 | page_name = "your-page-name" 101 | # This URL is the page you actually want to pull down with requests. 102 | REQUEST_URL = f'https://www.facebook.com/{page_name}/settings/?tab=people_and_other_pages&ref=page_edit' 103 | 104 | self.driver.get(REQUEST_URL) 105 | 106 | sleep(2) 107 | 108 | for i in range(1,15): 109 | self.driver.execute_script("window.scrollTo(0, document.body.scrollHeight);") 110 | sleep(3) 111 | 112 | page = self.driver.page_source 113 | soup = BeautifulSoup(page, "html.parser") 114 | names = soup.find_all('a', class_='_3cb8') 115 | people_who_liked_page = [] 116 | for name in names: 117 | people_who_liked_page.append(name.text) 118 | 119 | return people_who_liked_page 120 | 121 | def select_winner(self,list_A,list_B,list_C): 122 | eligible_to_win = [] 123 | for name in list_A: 124 | if name in list_B and name in list_C: 125 | eligible_to_win.append(name) 126 | return eligible_to_win 127 | 128 | bot = FaceBookBot() 129 | people_who_follow = bot.page_likes() 130 | people_who_liked = bot.post_likes() 131 | people_who_shared = bot.post_shares() 132 | 133 | eligible = bot.select_winner(people_who_liked,people_who_follow,people_who_shared) 134 | winner = random.choice(eligible) 135 | print(winner) 136 | 137 | 138 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Facebook Web Scraper 2 | 3 | [![forthebadge](https://forthebadge.com/images/badges/made-with-python.svg)](https://forthebadge.com) [![forthebadge](https://forthebadge.com/images/badges/built-with-love.svg)](https://forthebadge.com) 4 | 5 | 6 | [![PRs Welcome](https://img.shields.io/badge/PRs-welcome-brightgreen.svg?style=flat-square)](http://makeapullrequest.com) [![Donate](https://img.shields.io/badge/Donate-PayPal-green.svg)](https://www.paypal.com/donate?hosted_button_id=NS2E6R9YAGHYY) 7 | 8 | **Facebook Web Scraper** is a collection of tools that you can use to collect data from Facebook. Currently this include: 9 | 10 | * Determine a promotional post winner that liked, shared and followed your page. 11 | * Scrape general information from a post: 12 | * Post content 13 | * Date posted 14 | * Post likes 15 | * Post shares 16 | 17 | # **How to Contribute** 18 | 19 | 1. Clone repo and create a new branch: `$ git checkout https://github.com/adriaan90/Facebook-web-scraper.git -b name_for_new_branch`. 20 | 2. Make changes and test 21 | 3. Submit Pull Request with comprehensive description of changes 22 | 23 | # **Donations** 24 | 25 | This is free, open-source software. If you'd like to support the development of future projects, or say thanks for this one, you can donate by clicking on the **donate** button below. 26 | 27 | [![Donate](https://img.shields.io/badge/Donate-PayPal-green.svg)](https://www.paypal.com/donate?hosted_button_id=NS2E6R9YAGHYY) 28 | 29 | # **License** 30 | 31 | The MIT License (MIT) 2020 - [Adriaan van Niekerk](https://github.com/adriaan90/). Please have a look at the [LICENSE.md](LICENSE.md) for more details. -------------------------------------------------------------------------------- /secrets.py: -------------------------------------------------------------------------------- 1 | username = "your-username" 2 | password = "your-password" --------------------------------------------------------------------------------