├── .gitignore
├── uber-jobs.Rproj
├── simple.categories.csv
└── analyze.R


/.gitignore:
--------------------------------------------------------------------------------
 1 | .Rproj.user
 2 | .Rhistory
 3 | .RData
 4 | .Ruserdata
 5 | .DS_Store
 6 | *.csv
 7 | *.xlsx
 8 | *.png
 9 | uber-jobs.Rproj
10 | 


--------------------------------------------------------------------------------
/uber-jobs.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: Default
 4 | SaveWorkspace: Default
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 | 


--------------------------------------------------------------------------------
/simple.categories.csv:
--------------------------------------------------------------------------------
1 | category,simple"City Operations, Operations & Launch",Operations"Community Operations, Global Community Operations",OperationsEngineering,Engineering"City Marketing, Local Marketing",Marketing"Finance & Strategy, Finance & Accounting",FinanceGrowth Marketing,MarketingProduct,Product"Central Marketing, Marketing",MarketingDesign,DesignLegal,Legal"People, People & Places",Human Resources"Local Marketing, Marketing",Marketing"Data Science, Engineering",EngineeringCommunications,Communications"Tax & Treasury, Finance & Accounting",Finance"Business Development, Business",Business"Accounting, Finance & Accounting",FinancePublic Policy,Communications"Recruiting, People & Places",Human Resources"Sales, Business & Sales",Business"Advanced Technologies Center, Engineering",Engineering"People, People Operations",Human Resources"Tech Services, Engineering",Engineering"Workplace, People & Places",Human Resources"Engineering Security, Safety & Security",Engineering"Xchange Leasing, Rentals & Leasing",LeasingBusiness,Business"Recruiting, People Operations",Human ResourcesNA,Unknown"Investigations & Intelligence, Safety & Security",Safety & Security"Maps, Engineering",Engineering"Business Development, Business & Sales",Business"Advanced Technologies, Engineering",Engineering"Procurement, Finance & Accounting",Finance"Trust & Safety, Safety & Security",Safety & Security"Insurance, Finance & Accounting",Finance"Workplace, People Operations",Human Resources"Physical Security, Safety & Security",Safety & Security"Launch, Operations & Launch",Operations"Business, Business & Sales",Business"Finance Technology, Finance & Accounting",Finance"Data Infrastructure, Engineering",Engineering"Design, University",Other"Engineering, University",Other"U4B, Business",Leasing"City Rentals, Rentals & Leasing",Other"Operations, University",Other"Data Science, University",Other"Engineering Security, University",Other


--------------------------------------------------------------------------------
/analyze.R:
--------------------------------------------------------------------------------
  1 | library(readxl)
  2 | library(dplyr)
  3 | library(tidyr)
  4 | library(ggplot2)
  5 | library(readr)
  6 | 
  7 | # LOADING
  8 | 
  9 | # Original data
 10 | jobs <- read_excel("uber-job-listings-thinknum-all.xlsx")
 11 | 
 12 | # Remove unneeded columns and clean up column names
 13 | jobs.clean <- select(jobs,
 14 |                      unique.id = `Unique ID`,
 15 |                      listing.id = `Listing ID`,
 16 |                      as.of.date = `As Of Date`,
 17 |                      title = Title,
 18 |                      category = Category,
 19 |                      city = City,
 20 |                      state = State,
 21 |                      country = Country,
 22 |                      posted.date = `Posted Date`)
 23 | 
 24 | # Convert discrete labels to factors
 25 | jobs.clean$category <- as.factor(jobs.clean$category)
 26 | jobs.clean$city <- as.factor(jobs.clean$city)
 27 | jobs.clean$state <- as.factor(jobs.clean$state)
 28 | jobs.clean$country <- as.factor(jobs.clean$country)
 29 | 
 30 | # VALIDATION
 31 | 
 32 | # There are jobs that change category...
 33 | jobs.clean %>%
 34 |   group_by(listing.id) %>%
 35 |   distinct(category) %>%
 36 |   summarise(count=n()) %>%
 37 |   filter(count > 1)
 38 | 
 39 | # And many jobs that change post date...
 40 | # Example of a job that is re-opened: https://careers-uber.icims.com/jobs/28368/job/login
 41 | # Starts 2/22/17, re-opened 3/8/17, last seen 3/24/17
 42 | jobs.clean %>%
 43 |   group_by(listing.id) %>%
 44 |   distinct(posted.date) %>%
 45 |   summarise(count=n()) %>%
 46 |   filter(count > 1)
 47 | 
 48 | # SIMPLIFICATION
 49 | 
 50 | # Filter to only the most recent observation of each job opening
 51 | jobs.reduced <- jobs.clean %>%
 52 |   filter(!is.na(posted.date), !is.na(as.of.date)) %>%
 53 |   group_by(listing.id) %>%
 54 |   summarise(
 55 |     title = first(title),
 56 |     category = first(category),
 57 |     city = first(city),
 58 |     country = first(country),
 59 |     first.posted = min(posted.date),
 60 |     last.seen = max(as.of.date)
 61 |   )
 62 | 
 63 | # EXPORT USEFUL SUBSETS
 64 | 
 65 | # Country
 66 | jobs.by.country <- jobs.reduced %>%
 67 |   group_by(country) %>%
 68 |   summarise(count=n()) %>%
 69 |   arrange(desc(count))
 70 | 
 71 | write.csv(jobs.by.country, "jobs.by.country.csv")
 72 | 
 73 | # City
 74 | jobs.by.city <- jobs.reduced %>%
 75 |   group_by(country, city) %>%
 76 |   summarise(count=n()) %>%
 77 |   arrange(desc(count))
 78 | 
 79 | write.csv(jobs.by.city, "jobs.by.city.csv")
 80 | 
 81 | # Category
 82 | jobs.by.category <- jobs.reduced %>%
 83 |   group_by(category) %>%
 84 |   summarise(count=n()) %>%
 85 |   arrange(desc(count))
 86 | 
 87 | write.csv(jobs.by.category, "jobs.by.category.csv")
 88 | 
 89 | # Title
 90 | jobs.by.title <- jobs.reduced %>%
 91 |   group_by(title) %>%
 92 |   summarise(count=n()) %>%
 93 |   arrange(desc(count))
 94 | 
 95 | write.csv(jobs.by.title, "jobs.by.title.csv")
 96 | 
 97 | # ANALYZE JOB TURNOVER
 98 | 
 99 | MakeJobCounter <- function(jobs.data) {
100 |   CountJobs <- function(d) {
101 |     dJobs <- jobs.data %>%
102 |       filter(d >= first.posted, d <= last.seen)
103 |     
104 |     return(nrow(dJobs))
105 |   }
106 |   
107 |   return(CountJobs)
108 | }
109 | 
110 | dates = seq(
111 |   min(jobs.reduced$last.seen),
112 |   max(jobs.reduced$last.seen),
113 |   by="day"
114 | )
115 | 
116 | # Open jobs
117 | jobs.open <- tibble(
118 |   date=dates,
119 |   open=sapply(dates, MakeJobCounter(jobs.reduced))
120 | )
121 | 
122 | # Opened (new) jobs
123 | jobs.opened <- jobs.reduced %>%
124 |   group_by(first.posted) %>%
125 |   summarise(opened=n()) %>%
126 |   filter(first.posted > dates[1])
127 | 
128 | # Closed jobs
129 | jobs.closed <- jobs.reduced %>%
130 |   group_by(last.seen) %>%
131 |   summarise(closed=n()) %>%
132 |   filter(last.seen < dates[length(dates)])
133 | 
134 | jobs.counts <- merge(x=jobs.open, y=jobs.opened, by.x="date", by.y="first.posted", all=TRUE)
135 | jobs.counts <- merge(x=jobs.counts, y=jobs.closed, by.x="date", by.y="last.seen", all=TRUE)
136 | jobs.turnover <- gather(jobs.counts, jobs, count, opened:filled)
137 | 
138 | ggplot(jobs.open, aes(x=date, y=open)) +
139 |   geom_line() +
140 |   ggtitle("Open Uber jobs by date") +
141 |   labs(x="Date", y="Open jobs")
142 | 
143 | # ANALYZE JOB CATEGORIES
144 | 
145 | simple.categories <- read.csv("simple.categories.csv")
146 | 
147 | mapped <- simple.categories$simple[match(jobs.reduced$category, simple.categories$category)]
148 | 
149 | jobs.with.simple <- jobs.reduced %>%
150 |   mutate(simple.category = simple.categories$simple[match(jobs.reduced$category, simple.categories$category)])
151 | 
152 | jobs.with.simple %>%
153 |   group_by(simple.category) %>%
154 |   summarise(count=n()) %>%
155 |   arrange(desc(count))
156 | 
157 | # CountOpenJobsByCategory <- function(d) {
158 | #   dJobs <- jobs.reduced %>%
159 | #     filter(d >= first.posted, d <= last.seen) %>%
160 | #     group_by(category) %>%
161 | #     summarise(open=n()) %>%
162 | #     mutate(date=d)
163 | #   
164 | #   return(dJobs)c("#FFFFFF")
165 | # }
166 | 
167 | # jobs.open.by.category <- do.call(rbind, sapply(dates, CountOpenJobsByCategory, simplify=FALSE))
168 | # 
169 | # ggplot(jobs.open.by.category, aes(x=date, y=open, color=category)) +
170 | #   geom_line() +
171 | #   ggtitle("Uber jobs") +
172 | #   labs(x="Date", y="Jobs", color="Series")
173 | 
174 | # ANALYZE JOB FILL SPEED
175 | 
176 | jobs.closed <- jobs.reduced %>%
177 |   filter(last.seen < dates[length(dates)]) %>%
178 |   mutate(days.to.close = as.numeric(last.seen - first.posted, units="days")) %>%
179 |   arrange(desc(days.to.close))
180 | 
181 | write.csv(jobs.closed, "jobs.closed.csv", row.names=FALSE)
182 | 
183 | # ANALYZE PITTSBURGH (SELF-DRIVING) JOBS
184 | 
185 | jobs.pittsburgh <- jobs.reduced %>%
186 |   filter(city == "Pittsburgh")
187 | 
188 | write_csv(jobs.pittsburgh, "jobs.pittsburgh.csv")
189 | 
190 | jobs.pittsburgh.open <- tibble(
191 |   date=dates,
192 |   open=sapply(dates, MakeJobCounter(jobs.pittsburgh))
193 | )
194 | 
195 | write_csv(jobs.pittsburgh.open, "jobs.pittsburgh.open.csv")
196 | 
197 | ggplot(jobs.pittsburgh.open, aes(x=date, y=open)) +
198 |   geom_line() +
199 |   ggtitle("Open Uber jobs in Pittsburgh by date") +
200 |   labs(x="Date", y="Open jobs")
201 | 
202 | # ANALYZE LEASING / XCHANGE JOBS
203 | 
204 | jobs.leasing <- jobs.reduced %>%
205 |   filter(grepl("xchange", title, ignore.case=TRUE) | grepl("leasing", title, ignore.case=TRUE))
206 | 
207 | jobs.leasing.open <- tibble(
208 |   date=dates,
209 |   open=sapply(dates, MakeJobCounter(jobs.leasing))
210 | )
211 | 
212 | ggplot(jobs.leasing.open, aes(x=date, y=open)) +
213 |   geom_line() +
214 |   ggtitle("Open Uber jobs with either 'xchange' or 'leasing' in title") +
215 |   labs(x="Date", y="Open jobs")
216 | 


--------------------------------------------------------------------------------