└── web_scraping.py


/web_scraping.py:
--------------------------------------------------------------------------------
 1 | from splinter import Browser
 2 | import pandas as pd
 3 | 
 4 | url = "https://www.google.com"
 5 | 
 6 | browser = Browser('chrome')  # open a chrome browser
 7 | browser.visit(url)  # goes to the url
 8 | 
 9 | search_bar_xpath = '//*[@id="lst-ib"]'
10 | search_bar = browser.find_by_xpath(search_bar_xpath)[0]  # find_by_xpath returns a list, so index 0
11 | search_bar.fill("CodingStartups.com")  # simulate typing
12 | 
13 | 
14 | search_button_xpath = '//*[@id="tsf"]/div[2]/div[3]/center/input[1]'
15 | search_button = browser.find_by_xpath(search_button_xpath)[0]
16 | search_button.click()  # simulate clicking
17 | 
18 | 
19 | search_results_xpath = '//h3[@class="r"]/a'
20 | search_results = browser.find_by_xpath(search_results_xpath)  # returns list of link elements
21 | 
22 | # iterate through list of link elements
23 | scraped_data = []
24 | for search_result in search_results:
25 | 
26 | 	title = search_result.text.encode('utf8')  # trust me, clean data
27 | 	link = search_result["href"]
28 | 	scraped_data.append((title, link))
29 | 
30 | # put all the data into a pandas dataframe
31 | df = pd.DataFrame(data=scraped_data, columns=["title", "link"])
32 | df.to_csv("links.csv")	# export to csv


--------------------------------------------------------------------------------