├── HR Predictive Analytics Tutorial 1-4.r
├── HR_comma_sep.csv
├── README.md
└── currentEmployees.csv


/HR Predictive Analytics Tutorial 1-4.r:
--------------------------------------------------------------------------------
  1 | 
  2 | #Full description of code and analysis can be found at: https://www.littlemissdata.com/blog/predictive-analytics-tutorial-part-1
  3 | 
  4 | # If you are using local R install:
  5 |     #You can download the file from https://raw.githubusercontent.com/lgellis/Predictive_Analytics_Tutorial
  6 |     #and use the following line of code to import: 
  7 |     #hr = read.table("/filepath/HR_comma_sep.csv", head=TRUE, sep=",")
  8 | 
  9 |     #OR 
 10 |     #You can import the file directly from the github repo with these line of code:
 11 |     #install.packages("data.table")
 12 |     #library(data.table)
 13 |     #hr= fread('https://raw.githubusercontent.com/lgellis/Predictive_Analytics_Tutorial/master/HR_comma_sep.csv')
 14 | 
 15 | #If you are using DSX, import the file place your cursor in cell and "insert to code" as per the instructions here: 
 16 | #https://www.littlemissdata.com/blog/predictive-analytics-tutorial-part-1
 17 | head(df.data.2)
 18 | 
 19 | 
 20 | #rename data frame
 21 | hr <- df.data.2 
 22 | 
 23 | #view columns
 24 | names(hr)
 25 | 
 26 | #quick view
 27 | head(hr)
 28 | 
 29 | #dim shows number of rows and columns
 30 | dim(hr)
 31 | 
 32 | #str shows the structure of each column including the type of data and sample values
 33 | str(hr)
 34 | 
 35 | #show summary statistics for each column.  For numeric: min, max, median, mean, 2st and 3rd quartile.  For factors show the counts of each.
 36 | summary(hr)
 37 | 
 38 | #install packages - do this one time
 39 | install.packages("data.table")
 40 | install.packages("corrplot")
 41 | install.packages("ggplot2")
 42 | install.packages("gcookbook")
 43 | install.packages("caret")
 44 | install.packages("hexbin")
 45 | install.packages("leaps")
 46 | install.packages("plyr")
 47 | install.packages("plotly")
 48 | install.packages("waffle")
 49 | install.packages("dummies")
 50 | install.packages("caTools")
 51 | install.packages("wesanderson")
 52 | install.packages("visreg")
 53 | install.packages("car")
 54 | install.packages("leaps")
 55 | install.packages("MASS")
 56 | 
 57 | # Load the relevant libraries - do this every time
 58 | library(data.table)
 59 | library(corrplot)
 60 | library(ggplot2)
 61 | library (gcookbook)
 62 | library(caret)
 63 | library(hexbin)
 64 | library(leaps)
 65 | library(plyr)
 66 | library(plotly)
 67 | library(waffle)
 68 | library(dummies)
 69 | library(caTools)
 70 | library(wesanderson)
 71 | library(visreg)
 72 | library(car)
 73 | library(rpart)
 74 | library(leaps)
 75 | library(MASS)
 76 | 
 77 | 
 78 | #attach allows us to reference columns by their name 
 79 | attach(hr)
 80 | #Check Correlations of numeric columns
 81 | corMatrix <-cor(hr[1:8], use="complete.obs", method="pearson") 
 82 | #round to two decimals
 83 | round(corMatrix, 2)
 84 | 
 85 | corrplot(corMatrix, method="circle")
 86 | corrplot(corMatrix, method="square")
 87 | corrplot(corMatrix, method="number")
 88 | corrplot(corMatrix, method="shade")
 89 | corrplot(corMatrix, type = "upper")
 90 | corrplot.mixed(corMatrix)
 91 | 
 92 | 
 93 | #rename
 94 | colNames <- c("satLevel", "lastEval", "numProj", "avgHrs", "timeCpny", "wrkAcdnt", "left", "fiveYrPrmo", "job", "salary")
 95 | setnames(hr, colNames)
 96 | names(hr)
 97 | attach(hr)
 98 | 
 99 | corrplot(corMatrix, order = "hclust", addrect = 2, col = heat.colors(100), tl.col = "black")
100 | 
101 | 
102 | # Change background color to lightblue
103 | corrplot(corMatrix, type = "upper", order = "hclust", col = c("black", "white"), bg = "lightblue", tl.col = "black")
104 | 
105 | 
106 | # Run a histogram for all numeric variables to understand distribution
107 | 
108 | hist(avgHrs/4, main="Distribution of Average Hours per Week", xlab="Avg Hours", breaks=7, col="lightblue")
109 | hist(satLevel, main="Distribution of Satisfaction Level", xlab="Satisfaction Level", breaks=7, col="lightblue")
110 | hist(lastEval, main="Distribution of Last Evaluations", xlab="Last Eval", breaks=7, col="lightblue")
111 | hist(numProj, main="Distribution of Number of Projects", xlab="Number of Projects", breaks=7, col="lightblue")
112 | 
113 | 
114 | # side by side pretty bar
115 | ggplot(data=hr, aes(x=numProj, y=avgHrs, fill=salary)) + 
116 |          geom_bar(position="dodge", color="black", stat="identity") +
117 |          scale_fill_brewer(palette = "Pastel1") 
118 | 
119 | 
120 | 
121 | #density plot 
122 | qplot(avgHrs/4, data=hr, geom="density", fill=salary, alpha=I(.5), 
123 |       main="Avg Weekly Hours by Salary Category", xlab="Average Weekly Hours", 
124 |       ylab="Density")
125 | 
126 | 
127 | attach(hr)
128 | 
129 | #create factor variables with a better format
130 | 
131 | hr$leftFactor <- factor(left,levels=c(0,1),
132 |                      labels=c("Did Not Leave Company","Left Company")) 
133 | 
134 | hr$promoFactor <- factor(fiveYrPrmo,levels=c(0,1),
135 |                      labels=c("Did Not Get Promoted","Did Get Promoted")) 
136 | 
137 | hr$wrkAcdntFactor <- factor(wrkAcdnt,levels=c(0,1),
138 |                       labels=c("No Accident","Accident")) 
139 | 
140 | attach(hr)
141 | 
142 | 
143 | #density plot 
144 | qplot(avgHrs/4, data=hr, geom="density", fill=leftFactor, alpha=I(.5), 
145 |       main="Avg Weekly Hours by Retention", xlab="Average Weekly Hours", 
146 |       ylab="Density")
147 | 
148 | 
149 | #boxplot
150 | boxplot(avgHrs~job,data=hr, main="HR Data",
151 |         xlab="Job Title", ylab="Avg Hours", col="lightblue") 
152 | 
153 | #violin plot
154 | hrBox <-ggplot(hr, aes(y=avgHrs, x=job)) 
155 | hrBox + geom_violin(trim=FALSE, fill="lightblue") 
156 | 
157 | #many dimension charts
158 | qplot(avgHrs/4, timeCpny, data=hr, shape=leftFactor, color=salary, facets=numProj~promoFactor, size=I(3),
159 |       xlab="avg hrs", ylab="time at company") 
160 | 
161 | hrScat <-ggplot(hr, aes(x=avgHrs/4, y=satLevel))
162 | hrScat + geom_point()
163 | #make the points more transparent so that it's less intense
164 | hrScat + geom_point(alpha=.01)
165 | hrScat + stat_bin2d
166 | 
167 | hrScat + stat_binhex()
168 | hrScat + stat_binhex() + scale_fill_gradient(low="lightblue", high="red")
169 | 
170 | LEvSL <-ggplot(hr, aes(x=satLevel, y=lastEval))
171 | LEvSL + geom_point()
172 | LEvSL + stat_binhex() + scale_fill_gradient(low="lightblue", high="red")
173 | 
174 | # Example of a Bagplot
175 | #install.packages("aplpack")
176 | library(aplpack)
177 | 
178 | bagplot(avgHrs/4, satLevel, xlab="Avg Weekly Hrs", ylab="SatLevel",
179 |   main="Bagplot Avg Hours by Sat Level")
180 | 
181 | attach(hr)
182 | typeof(hr)
183 | hr <- as.data.frame(hr)
184 | typeof(hr)
185 | 
186 | #create dummy variables for job and salary  All values need to be numeric for inclusion in the model
187 | hr2 <- cbind (hr, dummy(job), dummy(salary))
188 | 
189 | 
190 | #Ensure dummy variables are created
191 | names(hr2)
192 | head(hr2)
193 | 
194 | #Log Transforms for: satLevel, lastEval
195 | 
196 | hr2$satLevelLog <- log(satLevel)
197 | hr2$lastEvalLog <- log(lastEval)
198 | 
199 | 
200 | #SQRT Transforms for: satLevel, lastEval
201 | 
202 | hr2$satLevelSqrt <- sqrt(satLevel)
203 | hr2$lastEvalSqrt <- sqrt(lastEval)
204 | 
205 | #Scale Transforms for: satLevel, lastEval
206 | 
207 | hr2$satLevelScale <- scale(satLevel)
208 | hr2$lastEvalScale <- scale(lastEval)
209 | 
210 | 
211 | #Peek at new columns
212 | head(hr2)
213 | 
214 | #Ensure all columns present
215 | names(hr2)
216 | 
217 | #Create the binary variable for highly rated but low satisfaction users
218 | 
219 | hr2$greatEvalLowSat <- ifelse(lastEval>0.8 & satLevel <0.2, 1, 0)
220 | 
221 | #Visualize effects that it has on average hours
222 | attach(hr2)
223 | 
224 | x <- ggplot(hr2, aes(factor(greatEvalLowSat), avgHrs))
225 | x <- x + geom_boxplot (aes(fill=factor(greatEvalLowSat)), outlier.color="black",  outlier.size=1) 
226 | x <- x + coord_flip() 
227 | x + scale_fill_manual(values=wes_palette(n=2, name="GrandBudapest2"))
228 | 
229 | #Visualize distribution by job
230 | ggplot(hr, aes(avgHrs, job, z= greatEvalLowSat)) + geom_tile(aes(fill = greatEvalLowSat)) + theme_bw() +   scale_fill_gradient(low="#98afc7", high="#646d7e") 
231 | 
232 | 
233 | #Get distribution table
234 | summary <- count(avgHrs, c('greatEvalLowSat'))
235 | 
236 | summary
237 | 
238 | #Starting code credit to: https://stackoverflow.com/questions/17200114/how-to-split-data-into-training-testing-sets-using-sample-function
239 | set.seed(101) 
240 | sample = sample.split(hr2, SplitRatio = .75)
241 | train = subset(hr2, sample == TRUE)
242 | test  = subset(hr2, sample == FALSE)
243 | 
244 | 
245 | #See if the test/train sets have beens split appropriately
246 | dim(train)
247 | dim(test)
248 | 
249 | #Ensure data characteristics are roughly the same
250 | summary(train)
251 | summary(test)
252 | 
253 | #1) Throw everything in!
254 | model.lr1 <- lm(avgHrs ~ ., train)
255 | summary(model.lr1)
256 | #adj rsquared - higher the better
257 | summary(model.lr1)
258 | 
259 | #Adj R-squared - higher is better
260 | #AIC, BIC - lower the better
261 | aic <- AIC(model.lr1)
262 | bic <-BIC(model.lr1)
263 | pred.model.lr1 <- predict(model.lr1, newdata = test) # validation predictions
264 | # mean Prediction Error and Std Error - lower is better
265 | meanPred <- mean((test$avgHrs - pred.model.lr1)^2) # mean prediction error
266 | stdError <- sd((test$avgHrs - pred.model.lr1)^2)/length(test$avgHrs) # std error
267 | 
268 | str(summary(model.lr1))
269 | 
270 | ### Create a data frame to store the model metrics
271 | 
272 | # Note stringsAsFactors = FALSE
273 | modelMetrics <- data.frame( "Model" = character(), "adjRsq" = integer(),  "AIC"= integer(), "BIC"= integer(), "Mean Prediction Error"= integer(), "Standard Error"= integer(), stringsAsFactors=FALSE)
274 | 
275 | # Append a row
276 | modelMetrics[nrow(modelMetrics) + 1, ] <- c( "model.lr1", 0.279, aic, bic, meanPred, stdError)
277 | modelMetrics
278 | 
279 | #2) Backward selection
280 | 
281 | step <- stepAIC(model.lr1, direction="backward")
282 | step$anova # display results
283 | 
284 | 
285 | model.bkwd.lr1 <- lm(avgHrs ~ satLevel + lastEval + numProj + timeCpny + left + satLevelLog + 
286 |     lastEvalLog + satLevelSqrt + lastEvalSqrt, train)
287 | summary(model.bkwd.lr1)
288 | 
289 | #Adj R-squared - higher is better
290 | #AIC, BIC - lower the better
291 | aic <- AIC(model.bkwd.lr1)
292 | bic <-BIC(model.bkwd.lr1)
293 | pred.model.bkwd.lr1 <- predict(model.bkwd.lr1, newdata = test) # validation predictions
294 | # mean Prediction Error and Std Error - lower is better
295 | meanPred <- mean((test$avgHrs - pred.model.bkwd.lr1 )^2) # mean prediction error
296 | stdError <- sd((test$avgHrs - pred.model.bkwd.lr1 )^2)/length(test$avgHrs) # std error
297 | 
298 | # Append a row to our modelMetrics 
299 | modelMetrics[nrow(modelMetrics) + 1, ] <- c( "model.bkwd.lr1", 0.2795, aic, bic, meanPred, stdError)
300 | modelMetrics
301 | 
302 | #3) Create a Model with the highest Correlated Values
303 | #Pull a correlation plot on all of the variables to see what are the most correlated to avgHrs
304 | #remove the non-numeric
305 | corData <- train[ -c(9:13) ]
306 | head(corData)
307 | 
308 | 
309 | corMatrix <-cor(corData, use="complete.obs", method="pearson") 
310 | #round to two decimals
311 | corrplot(corMatrix, type = "upper", order = "hclust", col = c("black", "white"), bg = "lightblue", tl.col = "black")
312 | #some of the highest correlated are: numProj, lastEvalLog, timeCmpy, left, greatEvalLowSat
313 | 
314 | # Create a Model with the highest Correlated Values
315 | model.lr2 <- lm(avgHrs ~ numProj + lastEvalLog + timeCpny + left + greatEvalLowSat, train)
316 | summary(model.lr2)
317 | 
318 | 
319 | #Adj R-squared - higher is better
320 | #AIC, BIC - lower the better
321 | aic <- AIC(model.lr2)
322 | bic <-BIC(model.lr2)
323 | pred.model.lr2 <- predict(model.lr2, newdata = test) # validation predictions
324 | # mean Prediction Error and Std Error - lower is better
325 | meanPred <- mean((test$avgHrs - pred.model.lr2 )^2) # mean prediction error
326 | stdError <- sd((test$avgHrs - pred.model.lr2 )^2)/length(test$avgHrs) # std error
327 | 
328 | # Append a row to our modelMetrics 
329 | modelMetrics[nrow(modelMetrics) + 1, ] <- c( "model.lr2",  0.2353, aic, bic, meanPred, stdError)
330 | modelMetrics
331 | 
332 | #4) Best Subsets
333 | 
334 | model.bestSub=regsubsets(avgHrs ~ ., train, nvmax =25)
335 | 
336 | summary(model.bestSub)
337 | 
338 | reg.summary =summary(model.bestSub)
339 | 
340 | which.min (reg.summary$bic )
341 | which.max (reg.summary$adjr2 )#just for fun
342 | 
343 | 
344 | #Plot the variable bic values by number of variables
345 | plot(reg.summary$bic ,xlab=" Number of Variables ",ylab=" BIC",type="l")
346 | points (6, reg.summary$bic [6], col =" red",cex =2, pch =20)
347 | 
348 | coef(model.bestSub, 6)
349 | 
350 | bestSubModel = lm(avgHrs ~ satLevel + numProj + timeCpny + NAsupport + NAtechnical + NAhigh,  data=train)
351 | 
352 | bestSubModel
353 | summary(bestSubModel)
354 | 
355 | #Adj R-squared - higher is better
356 | #AIC, BIC - lower the better
357 | aic <- AIC(bestSubModel)
358 | bic <-BIC(bestSubModel)
359 | pred.bestSubModel <- predict(bestSubModel, newdata = test) # validation predictions
360 | # mean Prediction Error and Std Error - lower is better
361 | meanPred <- mean((test$avgHrs - pred.bestSubModel )^2) # mean prediction error
362 | stdError <- sd((test$avgHrs - pred.bestSubModel )^2)/length(test$avgHrs) # std error
363 | 
364 | # Append a row to our modelMetrics 
365 | modelMetrics[nrow(modelMetrics) + 1, ] <- c( "bestSubModel",  0.1789, aic, bic, meanPred, stdError)
366 | modelMetrics
367 | 
368 | #standard model plots
369 | par(mfrow = c(2, 2))  # Split the plotting panel into a 2 x 2 grid
370 | plot(model.bkwd.lr1)
371 | 
372 | 
373 | par(mfrow = c(1, 1))
374 | #qq plot
375 | qqnorm(resid(model.bkwd.lr1))
376 | qqPlot(model.bkwd.lr1)
377 | 
378 | #residual plot
379 | 
380 | #Cooks Distance
381 | cutoff <- 4/((nrow(train)-length(model.bkwd.lr1$coefficients)-2))
382 | plot(model.bkwd.lr1, which=4, cook.levels=cutoff)
383 | 
384 | #visualize the model
385 | visreg2d(model.bkwd.lr1, "numProj", "lastEvalLog", plot.type="persp" )
386 | 
387 | visreg2d(model.bkwd.lr1, "numProj", "lastEvalLog", plot.type="image" )
388 | 
389 | visreg(model.bkwd.lr1, "numProj")
390 | 
391 | 
392 | #import in the data set 
393 | currentEmployees <- fread('https://raw.githubusercontent.com/lgellis/Predictive_Analytics_Tutorial/master/currentEmployees.csv')
394 | currentEmployees
395 | typeof(currentEmployees)
396 | #convert to dataframe
397 | currentEmployees <-as.data.frame(currentEmployees)
398 | 
399 | 
400 | #Add the existing data transformations
401 | 
402 | colNames <- c("satLevel", "lastEval", "numProj", "timeCpny", "wrkAcdnt", "left", "fiveYrPrmo", "job", "salary")
403 | setnames(currentEmployees, colNames)
404 | attach(currentEmployees)
405 | currentEmployees
406 | 
407 | 
408 | 
409 | #create factor variables with a better format
410 | 
411 | currentEmployees$leftFactor <- factor(left,levels=c(0,1),
412 |                      labels=c("Did Not Leave Company","Left Company")) 
413 | 
414 | currentEmployees$promoFactor <- factor(fiveYrPrmo,levels=c(0,1),
415 |                      labels=c("Did Not Get Promoted","Did Get Promoted")) 
416 | 
417 | currentEmployees$wrkAcdntFactor <- factor(wrkAcdnt,levels=c(0,1),
418 |                       labels=c("No Accident","Accident")) 
419 | 
420 | attach(currentEmployees)
421 | 
422 | currentEmployees <- cbind (currentEmployees, dummy(job), dummy(salary))
423 | 
424 | #Log Transforms for: satLevel, lastEval
425 | 
426 | currentEmployees$satLevelLog <- log(satLevel)
427 | currentEmployees$lastEvalLog <- log(lastEval)
428 | 
429 | #SQRT Transforms for: satLevel, lastEval
430 | 
431 | currentEmployees$satLevelSqrt <- sqrt(satLevel)
432 | currentEmployees$lastEvalSqrt <- sqrt(lastEval)
433 | 
434 | #Scale Transforms for: satLevel, lastEval
435 | 
436 | currentEmployees$satLevelScale <- scale(satLevel)
437 | currentEmployees$lastEvalScale <- scale(lastEval)
438 | 
439 | currentEmployees$greatEvalLowSat <- ifelse(lastEval>0.8 & satLevel <0.2, 1, 0)
440 | 
441 | currentEmployees$Predictions <- predict(model.bkwd.lr1, currentEmployees) # test predictions
442 | 
443 | head(currentEmployees)
444 | 
445 | 
446 | #Calculate Overage Hours
447 | 
448 | currentEmployees$predictedOverageHours = currentEmployees$Predictions - 160
449 | 
450 | #In case the employees worked less than 160 hours per month set negative values to 0
451 | 
452 | currentEmployees$netPredictedOverageHours <- currentEmployees$predictedOverageHours 
453 | currentEmployees$netPredictedOverageHours[currentEmployees$netPredictedOverageHours<0] <- 0
454 | 
455 | 
456 | head(currentEmployees)
457 | 
458 | 
459 | #Calculate total hours worked by each category
460 | 
461 | lowWageTotal = sum(currentEmployees[currentEmployees$salary=='low',]$predictedOverageHours) 
462 | mediumWageTotal = sum(currentEmployees[currentEmployees$salary=='medium',]$predictedOverageHours) 
463 | highWageTotal = sum(currentEmployees[currentEmployees$salary=='high',]$predictedOverageHours) 
464 | 
465 | 
466 | #assume low salary makes $10/hr, medium is $25/hr and high is $50/hr and assume they get time and a half during overtime
467 | 
468 | #total paid is sum of all predictions of low * 10, medium * 20, high * 40
469 | 
470 | TotalPaidPerMonth = lowWageTotal*10*1.5 + mediumWageTotal*25*1.5 + highWageTotal*50*1.5
471 | TotalPaidPerMonth
472 | TotalPaidPerYear = TotalPaidPerMonth * 12
473 | TotalPaidPerYear
474 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # HR Predictive Analytics Tutorial
 2 | 
 3 | Follow along to learn how to solve your business problems using predictive analytics.  Technologies used include R, Data Science Experience and IBM Cloud Lite.  
 4 | 
 5 | Instructions start here: https://www.littlemissdata.com/blog/predictive-analytics-goes-mainstream
 6 | 
 7 | - [Tutorial 1:  Define the Problem and Set Up](https://www.littlemissdata.com/blog/predictive-analytics-tutorial-part-1)
 8 | 
 9 | - [Tutorial 2: Exploratory Data Analysis or EDA](https://www.littlemissdata.com/blog/predictive-analytics-tutorial-part-2)
10 | 
11 | - [Tutorial 3: Transform ](https://www.littlemissdata.com/blog/predictive-analytics-tutorial-part-3)
12 | 
13 | - [Tutorial 4:  Model, Assess and Implement - Coming Soon!](https://www.littlemissdata.com/blog/predictive-analytics-tutorial-part-4)
14 | 
15 | Data set sourced from Kaggle - https://www.kaggle.com/ludobenistant/hr-analytics
16 | 


--------------------------------------------------------------------------------
/currentEmployees.csv:
--------------------------------------------------------------------------------
  1 | satisfaction_level,last_evaluation,number_project,time_spend_company,Work_accident,left,promotion_last_5years,sales,salary
  2 | 0.152,0.53,2,3,0,0,1,sales,low
  3 | 1.6,0.86,4,3,0,0,0,sales,medium
  4 | 0.22,0.88,4,3,1,0,0,sales,medium
  5 | 1.44,0.87,6,4,0,0,0,sales,low
  6 | 0.5,0.52,6,3,0,0,0,sales,low
  7 | 0.82,0.5,5,3,0,0,0,sales,low
  8 | 0.6,0.77,2,3,0,0,0,sales,low
  9 | 1.84,0.85,2,6,0,0,0,management,low
 10 | 1.78,1,6,4,0,0,0,sales,low
 11 | 0.84,0.53,2,3,0,0,0,technical,low
 12 | 0.9,0.54,5,3,0,0,0,sales,low
 13 | 0.044,0.81,2,4,0,0,0,sales,high
 14 | 0.336,0.92,2,5,1,0,0,sales,high
 15 | 0.164,0.55,6,3,0,0,0,sales,high
 16 | 0.144,0.56,2,3,0,0,0,sales,medium
 17 | 0.152,0.54,6,3,0,0,1,sales,high
 18 | 0.18,0.47,6,3,0,0,0,management,high
 19 | 0.312,0.99,6,6,0,0,0,sales,low
 20 | 0.18,0.51,5,3,0,0,0,sales,low
 21 | 0.304,0.89,5,5,0,0,0,sales,low
 22 | 0.044,0.83,6,4,1,0,0,sales,medium
 23 | 0.152,0.55,2,3,0,0,0,sales,low
 24 | 0.036,0.95,6,4,0,0,0,management,low
 25 | 0.184,0.57,2,3,0,0,0,sales,low
 26 | 0.16,0.53,2,3,0,0,0,sales,low
 27 | 0.356,0.92,5,5,0,0,0,management,low
 28 | 0.328,0.87,4,5,0,0,0,sales,low
 29 | 0.16,0.49,2,3,0,0,0,sales,low
 30 | 0.164,0.46,2,3,0,0,0,accounting,low
 31 | 0.152,0.5,2,3,0,0,0,accounting,low
 32 | 0.036,0.62,6,4,0,0,0,management,low
 33 | 0.9,0.57,2,3,0,0,0,hr,medium
 34 | 0.8,0.51,2,3,0,0,1,hr,medium
 35 | 0.9,0.55,2,3,0,0,0,hr,low
 36 | 0.3,0.87,4,6,1,0,0,management,low
 37 | 0.2,0.94,6,4,0,0,0,technical,low
 38 | 0.76,0.46,2,3,0,0,0,technical,low
 39 | 0.9,0.5,2,3,0,0,0,technical,low
 40 | 0.22,0.89,6,4,0,0,0,product_mng,low
 41 | 0.82,0.54,2,3,0,0,0,technical,low
 42 | 0.5,0.88,5,5,0,0,0,technical,low
 43 | 0.9,0.48,2,3,0,0,0,technical,high
 44 | 0.8,0.46,2,3,0,0,0,technical,low
 45 | 0.2,0.8,7,4,0,0,0,technical,low
 46 | 0.18,0.54,6,4,0,0,0,technical,low
 47 | 0.5,0.47,3,4,0,0,0,IT,low
 48 | 0.56,0.78,2,3,0,0,0,support,low
 49 | 0.56,0.55,3,5,0,0,0,support,low
 50 | 0.54,0.55,2,4,0,0,0,support,low
 51 | 0.49,0.54,2,3,0,0,0,support,low
 52 | 0.59,0.92,6,4,0,0,0,support,low
 53 | 0.85,0.55,4,3,0,0,0,support,low
 54 | 0.76,0.88,2,3,0,0,0,support,low
 55 | 0.78,0.87,5,5,0,0,0,support,low
 56 | 0.54,0.52,6,4,0,0,0,support,low
 57 | 0.47,0.5,2,3,0,0,0,support,low
 58 | 0.78,0.77,6,4,0,0,0,support,low
 59 | 0.55,0.85,2,4,0,0,0,technical,low
 60 | 0.55,1,2,3,0,0,0,technical,high
 61 | 0.54,0.53,5,5,0,0,0,technical,low
 62 | 0.92,0.91,4,5,0,0,0,management,low
 63 | 0.46,0.93,2,4,0,0,0,IT,low
 64 | 0.94,0.95,2,5,0,0,0,IT,low
 65 | 0.144,0.56,2,3,0,0,0,IT,low
 66 | 0.044,0.94,6,4,0,0,0,IT,low
 67 | 0.324,0.7,2,4,0,0,0,IT,medium
 68 | 0.172,0.54,2,3,0,0,0,product_mng,medium
 69 | 0.36,0.98,2,6,0,0,0,product_mng,medium
 70 | 0.304,0.86,4,5,0,0,0,product_mng,medium
 71 | 0.5,0.5,5,3,0,0,0,product_mng,low
 72 | 0.89,0.99,5,3,0,0,0,IT,low
 73 | 0.54,0.77,2,4,0,0,0,product_mng,low
 74 | 0.88,0.1,2,3,0,0,0,product_mng,high
 75 | 0.48,0.36,6,4,0,0,0,product_mng,low
 76 | 0.92,0.11,4,4,0,0,0,product_mng,medium
 77 | 0.55,0.81,7,4,0,0,0,product_mng,medium
 78 | 0.55,0.43,6,4,0,0,0,product_mng,medium
 79 | 0.54,0.9,2,3,0,0,0,marketing,medium
 80 | 0.92,0.76,2,3,0,0,0,sales,low
 81 | 0.46,0.43,2,3,0,0,0,accounting,low
 82 | 0.94,0.74,2,3,0,0,0,support,low
 83 | 0.81,0.09,2,3,0,0,0,technical,low
 84 | 0.54,0.45,4,3,0,0,1,management,low
 85 | 1,0.09,4,6,0,0,0,marketing,low
 86 | 0.91,0.11,6,4,0,0,0,marketing,low
 87 | 0.93,0.11,6,5,0,0,0,marketing,low
 88 | 0.84,0.1,5,3,0,0,0,sales,medium
 89 | 0.1,0.4,2,3,0,0,0,sales,medium
 90 | 0.38,0.43,2,4,0,0,0,sales,medium
 91 | 0.45,0.79,6,5,0,0,1,sales,medium
 92 | 0.11,0.5,2,5,0,0,0,IT,high
 93 | 0.41,1,5,3,0,0,0,IT,high
 94 | 0.87,0.48,2,3,0,0,0,IT,high
 95 | 0.45,0.55,2,4,0,0,0,IT,medium
 96 | 0.4,0.83,6,5,0,0,0,IT,high
 97 | 0.1,0.51,2,3,0,0,0,IT,high
 98 | 0.09,0.77,6,3,0,0,0,IT,low
 99 | 0.84,0.84,6,3,0,0,0,IT,low
100 | 0.4,0.97,6,3,0,0,0,IT,low
101 | 0.9,1,5,6,0,0,0,IT,medium


--------------------------------------------------------------------------------