├── .gitignore ├── uber-jobs.Rproj ├── simple.categories.csv └── analyze.R /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | .RData 4 | .Ruserdata 5 | .DS_Store 6 | *.csv 7 | *.xlsx 8 | *.png 9 | uber-jobs.Rproj 10 | -------------------------------------------------------------------------------- /uber-jobs.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | -------------------------------------------------------------------------------- /simple.categories.csv: -------------------------------------------------------------------------------- 1 | category,simple "City Operations, Operations & Launch",Operations "Community Operations, Global Community Operations",Operations Engineering,Engineering "City Marketing, Local Marketing",Marketing "Finance & Strategy, Finance & Accounting",Finance Growth Marketing,Marketing Product,Product "Central Marketing, Marketing",Marketing Design,Design Legal,Legal "People, People & Places",Human Resources "Local Marketing, Marketing",Marketing "Data Science, Engineering",Engineering Communications,Communications "Tax & Treasury, Finance & Accounting",Finance "Business Development, Business",Business "Accounting, Finance & Accounting",Finance Public Policy,Communications "Recruiting, People & Places",Human Resources "Sales, Business & Sales",Business "Advanced Technologies Center, Engineering",Engineering "People, People Operations",Human Resources "Tech Services, Engineering",Engineering "Workplace, People & Places",Human Resources "Engineering Security, Safety & Security",Engineering "Xchange Leasing, Rentals & Leasing",Leasing Business,Business "Recruiting, People Operations",Human Resources NA,Unknown "Investigations & Intelligence, Safety & Security",Safety & Security "Maps, Engineering",Engineering "Business Development, Business & Sales",Business "Advanced Technologies, Engineering",Engineering "Procurement, Finance & Accounting",Finance "Trust & Safety, Safety & Security",Safety & Security "Insurance, Finance & Accounting",Finance "Workplace, People Operations",Human Resources "Physical Security, Safety & Security",Safety & Security "Launch, Operations & Launch",Operations "Business, Business & Sales",Business "Finance Technology, Finance & Accounting",Finance "Data Infrastructure, Engineering",Engineering "Design, University",Other "Engineering, University",Other "U4B, Business",Leasing "City Rentals, Rentals & Leasing",Other "Operations, University",Other "Data Science, University",Other "Engineering Security, University",Other -------------------------------------------------------------------------------- /analyze.R: -------------------------------------------------------------------------------- 1 | library(readxl) 2 | library(dplyr) 3 | library(tidyr) 4 | library(ggplot2) 5 | library(readr) 6 | 7 | # LOADING 8 | 9 | # Original data 10 | jobs <- read_excel("uber-job-listings-thinknum-all.xlsx") 11 | 12 | # Remove unneeded columns and clean up column names 13 | jobs.clean <- select(jobs, 14 | unique.id = `Unique ID`, 15 | listing.id = `Listing ID`, 16 | as.of.date = `As Of Date`, 17 | title = Title, 18 | category = Category, 19 | city = City, 20 | state = State, 21 | country = Country, 22 | posted.date = `Posted Date`) 23 | 24 | # Convert discrete labels to factors 25 | jobs.clean$category <- as.factor(jobs.clean$category) 26 | jobs.clean$city <- as.factor(jobs.clean$city) 27 | jobs.clean$state <- as.factor(jobs.clean$state) 28 | jobs.clean$country <- as.factor(jobs.clean$country) 29 | 30 | # VALIDATION 31 | 32 | # There are jobs that change category... 33 | jobs.clean %>% 34 | group_by(listing.id) %>% 35 | distinct(category) %>% 36 | summarise(count=n()) %>% 37 | filter(count > 1) 38 | 39 | # And many jobs that change post date... 40 | # Example of a job that is re-opened: https://careers-uber.icims.com/jobs/28368/job/login 41 | # Starts 2/22/17, re-opened 3/8/17, last seen 3/24/17 42 | jobs.clean %>% 43 | group_by(listing.id) %>% 44 | distinct(posted.date) %>% 45 | summarise(count=n()) %>% 46 | filter(count > 1) 47 | 48 | # SIMPLIFICATION 49 | 50 | # Filter to only the most recent observation of each job opening 51 | jobs.reduced <- jobs.clean %>% 52 | filter(!is.na(posted.date), !is.na(as.of.date)) %>% 53 | group_by(listing.id) %>% 54 | summarise( 55 | title = first(title), 56 | category = first(category), 57 | city = first(city), 58 | country = first(country), 59 | first.posted = min(posted.date), 60 | last.seen = max(as.of.date) 61 | ) 62 | 63 | # EXPORT USEFUL SUBSETS 64 | 65 | # Country 66 | jobs.by.country <- jobs.reduced %>% 67 | group_by(country) %>% 68 | summarise(count=n()) %>% 69 | arrange(desc(count)) 70 | 71 | write.csv(jobs.by.country, "jobs.by.country.csv") 72 | 73 | # City 74 | jobs.by.city <- jobs.reduced %>% 75 | group_by(country, city) %>% 76 | summarise(count=n()) %>% 77 | arrange(desc(count)) 78 | 79 | write.csv(jobs.by.city, "jobs.by.city.csv") 80 | 81 | # Category 82 | jobs.by.category <- jobs.reduced %>% 83 | group_by(category) %>% 84 | summarise(count=n()) %>% 85 | arrange(desc(count)) 86 | 87 | write.csv(jobs.by.category, "jobs.by.category.csv") 88 | 89 | # Title 90 | jobs.by.title <- jobs.reduced %>% 91 | group_by(title) %>% 92 | summarise(count=n()) %>% 93 | arrange(desc(count)) 94 | 95 | write.csv(jobs.by.title, "jobs.by.title.csv") 96 | 97 | # ANALYZE JOB TURNOVER 98 | 99 | MakeJobCounter <- function(jobs.data) { 100 | CountJobs <- function(d) { 101 | dJobs <- jobs.data %>% 102 | filter(d >= first.posted, d <= last.seen) 103 | 104 | return(nrow(dJobs)) 105 | } 106 | 107 | return(CountJobs) 108 | } 109 | 110 | dates = seq( 111 | min(jobs.reduced$last.seen), 112 | max(jobs.reduced$last.seen), 113 | by="day" 114 | ) 115 | 116 | # Open jobs 117 | jobs.open <- tibble( 118 | date=dates, 119 | open=sapply(dates, MakeJobCounter(jobs.reduced)) 120 | ) 121 | 122 | # Opened (new) jobs 123 | jobs.opened <- jobs.reduced %>% 124 | group_by(first.posted) %>% 125 | summarise(opened=n()) %>% 126 | filter(first.posted > dates[1]) 127 | 128 | # Closed jobs 129 | jobs.closed <- jobs.reduced %>% 130 | group_by(last.seen) %>% 131 | summarise(closed=n()) %>% 132 | filter(last.seen < dates[length(dates)]) 133 | 134 | jobs.counts <- merge(x=jobs.open, y=jobs.opened, by.x="date", by.y="first.posted", all=TRUE) 135 | jobs.counts <- merge(x=jobs.counts, y=jobs.closed, by.x="date", by.y="last.seen", all=TRUE) 136 | jobs.turnover <- gather(jobs.counts, jobs, count, opened:filled) 137 | 138 | ggplot(jobs.open, aes(x=date, y=open)) + 139 | geom_line() + 140 | ggtitle("Open Uber jobs by date") + 141 | labs(x="Date", y="Open jobs") 142 | 143 | # ANALYZE JOB CATEGORIES 144 | 145 | simple.categories <- read.csv("simple.categories.csv") 146 | 147 | mapped <- simple.categories$simple[match(jobs.reduced$category, simple.categories$category)] 148 | 149 | jobs.with.simple <- jobs.reduced %>% 150 | mutate(simple.category = simple.categories$simple[match(jobs.reduced$category, simple.categories$category)]) 151 | 152 | jobs.with.simple %>% 153 | group_by(simple.category) %>% 154 | summarise(count=n()) %>% 155 | arrange(desc(count)) 156 | 157 | # CountOpenJobsByCategory <- function(d) { 158 | # dJobs <- jobs.reduced %>% 159 | # filter(d >= first.posted, d <= last.seen) %>% 160 | # group_by(category) %>% 161 | # summarise(open=n()) %>% 162 | # mutate(date=d) 163 | # 164 | # return(dJobs)c("#FFFFFF") 165 | # } 166 | 167 | # jobs.open.by.category <- do.call(rbind, sapply(dates, CountOpenJobsByCategory, simplify=FALSE)) 168 | # 169 | # ggplot(jobs.open.by.category, aes(x=date, y=open, color=category)) + 170 | # geom_line() + 171 | # ggtitle("Uber jobs") + 172 | # labs(x="Date", y="Jobs", color="Series") 173 | 174 | # ANALYZE JOB FILL SPEED 175 | 176 | jobs.closed <- jobs.reduced %>% 177 | filter(last.seen < dates[length(dates)]) %>% 178 | mutate(days.to.close = as.numeric(last.seen - first.posted, units="days")) %>% 179 | arrange(desc(days.to.close)) 180 | 181 | write.csv(jobs.closed, "jobs.closed.csv", row.names=FALSE) 182 | 183 | # ANALYZE PITTSBURGH (SELF-DRIVING) JOBS 184 | 185 | jobs.pittsburgh <- jobs.reduced %>% 186 | filter(city == "Pittsburgh") 187 | 188 | write_csv(jobs.pittsburgh, "jobs.pittsburgh.csv") 189 | 190 | jobs.pittsburgh.open <- tibble( 191 | date=dates, 192 | open=sapply(dates, MakeJobCounter(jobs.pittsburgh)) 193 | ) 194 | 195 | write_csv(jobs.pittsburgh.open, "jobs.pittsburgh.open.csv") 196 | 197 | ggplot(jobs.pittsburgh.open, aes(x=date, y=open)) + 198 | geom_line() + 199 | ggtitle("Open Uber jobs in Pittsburgh by date") + 200 | labs(x="Date", y="Open jobs") 201 | 202 | # ANALYZE LEASING / XCHANGE JOBS 203 | 204 | jobs.leasing <- jobs.reduced %>% 205 | filter(grepl("xchange", title, ignore.case=TRUE) | grepl("leasing", title, ignore.case=TRUE)) 206 | 207 | jobs.leasing.open <- tibble( 208 | date=dates, 209 | open=sapply(dates, MakeJobCounter(jobs.leasing)) 210 | ) 211 | 212 | ggplot(jobs.leasing.open, aes(x=date, y=open)) + 213 | geom_line() + 214 | ggtitle("Open Uber jobs with either 'xchange' or 'leasing' in title") + 215 | labs(x="Date", y="Open jobs") 216 | --------------------------------------------------------------------------------