└── findJob.py


/findJob.py:
--------------------------------------------------------------------------------
 1 | from selenium import webdriver
 2 | import pandas as pd
 3 | from time import sleep
 4 | from bs4 import BeautifulSoup
 5 | from selenium.common.exceptions import NoSuchElementException
 6 | 
 7 | 
 8 | class FindJob():
 9 |     def __init__(self):
10 |         options = webdriver.ChromeOptions()
11 |         options.add_argument("--start-maximized")
12 |         self.driver = webdriver.Chrome(chrome_options=options)
13 |         dataframe = pd.DataFrame(
14 |             columns=["Title", "Location", "Company", "Salary", "Description"])
15 |         # self.driver.minimize_window()
16 |         for cnt in range(0, 30, 10):
17 |             self.driver.get(
18 |                 "https://www.indeed.com/jobs?q=data+science&l=United+States&start=" + str(cnt))
19 | 
20 |             sleep(10)
21 | 
22 |             # try:
23 |             pop_up = 'None'
24 | 
25 |             jobs = self.driver.find_elements_by_id('resultsBody')
26 | 
27 |             for job in jobs:
28 |                 result = job.get_attribute('innerHTML')
29 |                 soup = BeautifulSoup(result, 'html.parser')
30 |                 print(soup.attrs)
31 | 
32 |                 title = soup.find(
33 |                     "a", class_="jobtitle").text.replace('\n', '')
34 |                 location = soup.find(class_="location").text
35 |                 employer = soup.find(
36 |                     class_="company").text.replace('\n', '').strip()
37 |                 try:
38 |                     salary = soup.find(class_="salary").text.replace(
39 |                         '\n', '').strip()
40 |                 except:
41 |                     salary = 'None'
42 | 
43 |                 print(title, location, employer, salary)
44 | 
45 |                 summ = job.find_elements_by_class_name("summary")[0]
46 |                 summ.click()
47 |                 sleep(1)
48 |                 """ try:
49 |                     whole_job = self.driver.find_element_by_xpath('/html/body/div[1]/div[1]/div/div/div/div[1]')
50 |                     job_desc = whole_job.find_element_by_xpath('/html/body/div[1]/div[1]/div/div/div/div[1]/div/div[3]/div[2]/div[2]')
51 |                     print(job_desc)
52 |                 except NoSuchElementException:
53 |                     job_desc = 'None' """
54 | 
55 |                 dataframe = dataframe.append(
56 |                     {'Title': title, 'Location': location, 'Employer': employer}, ignore_index=True)
57 |             # except:
58 |             #     pop_up = self.driver.find_element_by_xpath(
59 |             #         '/html/body/div[5]/div[2]/div[1]')
60 |             #     pop_up.click()
61 |             dataframe.to_csv("jobs.csv", index=False)
62 | 
63 | 
64 | 
65 | f = FindJob()
66 | 


--------------------------------------------------------------------------------