├── requirements.txt ├── LICENSE ├── README.md └── indeedapiwrapper.py /requirements.txt: -------------------------------------------------------------------------------- 1 | requests>=2.20.0 2 | beautifulsoup4==4.6.1 3 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Kashif Aziz 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # indeed-python-wrapper 2 | A Python wrapper for Indeed Job Search API. 3 | 4 | ### Prerequisites 5 | 6 | - Indeed Publisher API ID, available for free from [here](https://ads.indeed.com/jobroll/xmlfeed?target=_blank) 7 | - Python 3.x 8 | - BeautifulSoup and Requests library. Use the enclosed requirements.txt to install them. 9 | 10 | 11 | ### Usage 12 | 13 | Open indeedapiwrapper.py, add following parameters to fetch job listings through Indeed Job Search API: 14 | - Publisher Id is required 15 | - To search jobs, either provide query string or combination of location and country code. 16 | 17 | ```python 18 | params = { 19 | 'publisher': "", # publisher ID (Required) 20 | 'q': "", # Job search query 21 | 'l': "", # location (city / state) 22 | 'co': "", # Country Code 23 | 'sort': "", # Sort order, date or relevance 24 | 'days': "" # number of days to fetch jobs, maximum is 7 days 25 | } 26 | ``` 27 | 28 | ### Output 29 | 30 | The list of jobs will be saved in a CSV file "indeedjobs.csv" in the same directory where the script resides. 31 | 32 | ### More Information 33 | More details in the [blog post here](http://www.kashifaziz.me/indeed-api-python-wrapper.html/). 34 | -------------------------------------------------------------------------------- /indeedapiwrapper.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | ''' 3 | A Python based wrapper over Indeed Job Search API 4 | By: Kashif Aziz (http://www.kashifaziz.me) 5 | ''' 6 | 7 | from bs4 import BeautifulSoup 8 | import requests 9 | import os 10 | import os.path 11 | import csv 12 | import time 13 | import json 14 | from random import uniform, choice 15 | from datetime import datetime 16 | 17 | 18 | def writerows(rows, filename, heading=None): 19 | with open(filename, 'a', encoding='utf-8', newline='\n') as toWrite: 20 | writer = csv.writer(toWrite) 21 | if heading: 22 | writer.writerow(heading) 23 | else: 24 | writer.writerows(rows) 25 | 26 | 27 | def get_indeed_jobs(**params): 28 | ''' 29 | get jobs from indeed using the API 30 | pass number of days and location 31 | ''' 32 | 33 | # Set CSV and log file names and server path 34 | base_path = "" 35 | 36 | jobs_csv = base_path+"indeedjobs.csv" 37 | 38 | # remove csv files, if exist 39 | if os.path.exists(jobs_csv): 40 | os.remove(jobs_csv) 41 | 42 | indeed_url = prepare_params(**params) 43 | 44 | flag = "" 45 | 46 | # get total jobs found for number of days and location 47 | total_jobs = get_total_jobs(indeed_url) 48 | if total_jobs: 49 | print("total jobs {}".format(total_jobs)) 50 | 51 | # build api links with start and end range 52 | api_links = build_api_links(indeed_url, total_jobs) 53 | 54 | if api_links: 55 | heading = ["Employer", "City", "State", "Zipcode", "Jobtitle", "Joblink", "Jobdate", "Description"] 56 | writerows("", "indeedjobs.csv", heading=heading) 57 | 58 | for link in api_links: 59 | get_indeed_job_listings(link) 60 | 61 | flag = "Jobs fetched successfully." 62 | 63 | else: 64 | flag = "Jobs not found. Please use a valid Publisher Id along with query keyword or location." 65 | 66 | return flag 67 | 68 | 69 | def prepare_params(**params): 70 | # prepare parameters for Indeed API, returned prepared url 71 | 72 | user_agent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_2)" 73 | 74 | # number of job days should not be more than 7 75 | if not params["days"] or int(params["days"]) > 7: 76 | params["days"] = "1" 77 | 78 | # if value for query is not available, location and country must be present, else set default values 79 | if not params["q"]: 80 | if not (params["l"] and params["co"]): 81 | params["l"] = "Karachi" 82 | params["co"] = "pk" 83 | 84 | indeed_api_params = { 85 | 'publisher': params["publisher"], 86 | 'q': params["q"], 87 | 'l': params["l"], 88 | 'co': params["co"], 89 | 'sort': params["sort"], 90 | 'format': "json", 91 | 'fromage': params["days"], 92 | 'v': "2", 93 | 'useragent': user_agent 94 | } 95 | 96 | api_arguments = "&".join([key+"="+value for key, value in indeed_api_params.items()]) 97 | 98 | return "http://api.indeed.com/ads/apisearch?"+api_arguments 99 | 100 | 101 | def get_total_jobs(indeed_url): 102 | total_jobs = 0 103 | try: 104 | response = requests.get(indeed_url) 105 | if response.status_code == 200: 106 | json_data = response.json() 107 | total_jobs = json_data["totalResults"] 108 | except Exception as e: 109 | print("Error getting total jobs! {}".format(e)) 110 | 111 | return total_jobs 112 | 113 | 114 | def build_api_links(indeed_url, total_jobs): 115 | api_links = [] 116 | start = 0 117 | limit = 25 118 | 119 | while start < total_jobs: 120 | api_links.append([indeed_url+"&start="+str(start)+"&limit="+str(limit)]) 121 | start = start + limit 122 | 123 | return api_links 124 | 125 | 126 | def get_indeed_job_listings(indeed_url): 127 | ''' 128 | get jobs from indeed api link and save to csv 129 | ''' 130 | indeed_url = indeed_url[0] 131 | # print(indeed_url) 132 | 133 | jobs = [] 134 | try: 135 | response = requests.get(indeed_url) 136 | if response.status_code == 200: 137 | json_data = response.json() 138 | for x in json_data["results"]: 139 | jobtitle = x["jobtitle"] 140 | employer = x["company"] 141 | job_snippet = x["snippet"] 142 | 143 | location = x["formattedLocationFull"] 144 | city = state = zipcode = joblink = jobdate = "" 145 | 146 | try: 147 | city = location.split(",")[0].strip() 148 | state_and_zip = location.split(",")[1].strip() 149 | state = state_and_zip[:2] 150 | zipcode = state_and_zip[2:].strip() 151 | except Exception as e: 152 | pass 153 | 154 | try: 155 | joblink = x["url"].split("&")[0] 156 | except Exception as e: 157 | pass 158 | 159 | try: 160 | jobdate = x["date"].split(",")[1][:12].strip() 161 | except Exception as e: 162 | pass 163 | 164 | print(jobtitle) 165 | jobs.append([employer, city, state, zipcode, jobtitle, joblink, jobdate, job_snippet]) 166 | 167 | writerows(jobs, "indeedjobs.csv") 168 | 169 | except Exception as e: 170 | print("Error! {}".format(e)) 171 | 172 | # take a break .. make this random 173 | time.sleep(uniform(0.5, 2.0)) # sleep randomly 174 | 175 | return 176 | 177 | 178 | if __name__ == "__main__": 179 | # Parameters to fetch job listings through Indeed API 180 | # Publisher Id is required 181 | # To search jobs, either provide query string or combination of location and country code. 182 | 183 | params = { 184 | 'publisher': "", # publisher ID (Required) 185 | 'q': "", # Job search query 186 | 'l': "", # location (city / state) 187 | 'co': "", # Country Code 188 | 'sort': "", # Sort order, date or relevance 189 | 'days': "" # number of days to fetch jobs, maximum is 7 days 190 | } 191 | 192 | get_jobs = get_indeed_jobs(**params) 193 | print(get_jobs) 194 | --------------------------------------------------------------------------------