└── web_scraping.py /web_scraping.py: -------------------------------------------------------------------------------- 1 | from splinter import Browser 2 | import pandas as pd 3 | 4 | url = "https://www.google.com" 5 | 6 | browser = Browser('chrome') # open a chrome browser 7 | browser.visit(url) # goes to the url 8 | 9 | search_bar_xpath = '//*[@id="lst-ib"]' 10 | search_bar = browser.find_by_xpath(search_bar_xpath)[0] # find_by_xpath returns a list, so index 0 11 | search_bar.fill("CodingStartups.com") # simulate typing 12 | 13 | 14 | search_button_xpath = '//*[@id="tsf"]/div[2]/div[3]/center/input[1]' 15 | search_button = browser.find_by_xpath(search_button_xpath)[0] 16 | search_button.click() # simulate clicking 17 | 18 | 19 | search_results_xpath = '//h3[@class="r"]/a' 20 | search_results = browser.find_by_xpath(search_results_xpath) # returns list of link elements 21 | 22 | # iterate through list of link elements 23 | scraped_data = [] 24 | for search_result in search_results: 25 | 26 | title = search_result.text.encode('utf8') # trust me, clean data 27 | link = search_result["href"] 28 | scraped_data.append((title, link)) 29 | 30 | # put all the data into a pandas dataframe 31 | df = pd.DataFrame(data=scraped_data, columns=["title", "link"]) 32 | df.to_csv("links.csv") # export to csv --------------------------------------------------------------------------------