└── findJob.py /findJob.py: -------------------------------------------------------------------------------- 1 | from selenium import webdriver 2 | import pandas as pd 3 | from time import sleep 4 | from bs4 import BeautifulSoup 5 | from selenium.common.exceptions import NoSuchElementException 6 | 7 | 8 | class FindJob(): 9 | def __init__(self): 10 | options = webdriver.ChromeOptions() 11 | options.add_argument("--start-maximized") 12 | self.driver = webdriver.Chrome(chrome_options=options) 13 | dataframe = pd.DataFrame( 14 | columns=["Title", "Location", "Company", "Salary", "Description"]) 15 | # self.driver.minimize_window() 16 | for cnt in range(0, 30, 10): 17 | self.driver.get( 18 | "https://www.indeed.com/jobs?q=data+science&l=United+States&start=" + str(cnt)) 19 | 20 | sleep(10) 21 | 22 | # try: 23 | pop_up = 'None' 24 | 25 | jobs = self.driver.find_elements_by_id('resultsBody') 26 | 27 | for job in jobs: 28 | result = job.get_attribute('innerHTML') 29 | soup = BeautifulSoup(result, 'html.parser') 30 | print(soup.attrs) 31 | 32 | title = soup.find( 33 | "a", class_="jobtitle").text.replace('\n', '') 34 | location = soup.find(class_="location").text 35 | employer = soup.find( 36 | class_="company").text.replace('\n', '').strip() 37 | try: 38 | salary = soup.find(class_="salary").text.replace( 39 | '\n', '').strip() 40 | except: 41 | salary = 'None' 42 | 43 | print(title, location, employer, salary) 44 | 45 | summ = job.find_elements_by_class_name("summary")[0] 46 | summ.click() 47 | sleep(1) 48 | """ try: 49 | whole_job = self.driver.find_element_by_xpath('/html/body/div[1]/div[1]/div/div/div/div[1]') 50 | job_desc = whole_job.find_element_by_xpath('/html/body/div[1]/div[1]/div/div/div/div[1]/div/div[3]/div[2]/div[2]') 51 | print(job_desc) 52 | except NoSuchElementException: 53 | job_desc = 'None' """ 54 | 55 | dataframe = dataframe.append( 56 | {'Title': title, 'Location': location, 'Employer': employer}, ignore_index=True) 57 | # except: 58 | # pop_up = self.driver.find_element_by_xpath( 59 | # '/html/body/div[5]/div[2]/div[1]') 60 | # pop_up.click() 61 | dataframe.to_csv("jobs.csv", index=False) 62 | 63 | 64 | 65 | f = FindJob() 66 | --------------------------------------------------------------------------------