├── HR Predictive Analytics Tutorial 1-4.r ├── HR_comma_sep.csv ├── README.md └── currentEmployees.csv /HR Predictive Analytics Tutorial 1-4.r: -------------------------------------------------------------------------------- 1 | 2 | #Full description of code and analysis can be found at: https://www.littlemissdata.com/blog/predictive-analytics-tutorial-part-1 3 | 4 | # If you are using local R install: 5 | #You can download the file from https://raw.githubusercontent.com/lgellis/Predictive_Analytics_Tutorial 6 | #and use the following line of code to import: 7 | #hr = read.table("/filepath/HR_comma_sep.csv", head=TRUE, sep=",") 8 | 9 | #OR 10 | #You can import the file directly from the github repo with these line of code: 11 | #install.packages("data.table") 12 | #library(data.table) 13 | #hr= fread('https://raw.githubusercontent.com/lgellis/Predictive_Analytics_Tutorial/master/HR_comma_sep.csv') 14 | 15 | #If you are using DSX, import the file place your cursor in cell and "insert to code" as per the instructions here: 16 | #https://www.littlemissdata.com/blog/predictive-analytics-tutorial-part-1 17 | head(df.data.2) 18 | 19 | 20 | #rename data frame 21 | hr <- df.data.2 22 | 23 | #view columns 24 | names(hr) 25 | 26 | #quick view 27 | head(hr) 28 | 29 | #dim shows number of rows and columns 30 | dim(hr) 31 | 32 | #str shows the structure of each column including the type of data and sample values 33 | str(hr) 34 | 35 | #show summary statistics for each column. For numeric: min, max, median, mean, 2st and 3rd quartile. For factors show the counts of each. 36 | summary(hr) 37 | 38 | #install packages - do this one time 39 | install.packages("data.table") 40 | install.packages("corrplot") 41 | install.packages("ggplot2") 42 | install.packages("gcookbook") 43 | install.packages("caret") 44 | install.packages("hexbin") 45 | install.packages("leaps") 46 | install.packages("plyr") 47 | install.packages("plotly") 48 | install.packages("waffle") 49 | install.packages("dummies") 50 | install.packages("caTools") 51 | install.packages("wesanderson") 52 | install.packages("visreg") 53 | install.packages("car") 54 | install.packages("leaps") 55 | install.packages("MASS") 56 | 57 | # Load the relevant libraries - do this every time 58 | library(data.table) 59 | library(corrplot) 60 | library(ggplot2) 61 | library (gcookbook) 62 | library(caret) 63 | library(hexbin) 64 | library(leaps) 65 | library(plyr) 66 | library(plotly) 67 | library(waffle) 68 | library(dummies) 69 | library(caTools) 70 | library(wesanderson) 71 | library(visreg) 72 | library(car) 73 | library(rpart) 74 | library(leaps) 75 | library(MASS) 76 | 77 | 78 | #attach allows us to reference columns by their name 79 | attach(hr) 80 | #Check Correlations of numeric columns 81 | corMatrix <-cor(hr[1:8], use="complete.obs", method="pearson") 82 | #round to two decimals 83 | round(corMatrix, 2) 84 | 85 | corrplot(corMatrix, method="circle") 86 | corrplot(corMatrix, method="square") 87 | corrplot(corMatrix, method="number") 88 | corrplot(corMatrix, method="shade") 89 | corrplot(corMatrix, type = "upper") 90 | corrplot.mixed(corMatrix) 91 | 92 | 93 | #rename 94 | colNames <- c("satLevel", "lastEval", "numProj", "avgHrs", "timeCpny", "wrkAcdnt", "left", "fiveYrPrmo", "job", "salary") 95 | setnames(hr, colNames) 96 | names(hr) 97 | attach(hr) 98 | 99 | corrplot(corMatrix, order = "hclust", addrect = 2, col = heat.colors(100), tl.col = "black") 100 | 101 | 102 | # Change background color to lightblue 103 | corrplot(corMatrix, type = "upper", order = "hclust", col = c("black", "white"), bg = "lightblue", tl.col = "black") 104 | 105 | 106 | # Run a histogram for all numeric variables to understand distribution 107 | 108 | hist(avgHrs/4, main="Distribution of Average Hours per Week", xlab="Avg Hours", breaks=7, col="lightblue") 109 | hist(satLevel, main="Distribution of Satisfaction Level", xlab="Satisfaction Level", breaks=7, col="lightblue") 110 | hist(lastEval, main="Distribution of Last Evaluations", xlab="Last Eval", breaks=7, col="lightblue") 111 | hist(numProj, main="Distribution of Number of Projects", xlab="Number of Projects", breaks=7, col="lightblue") 112 | 113 | 114 | # side by side pretty bar 115 | ggplot(data=hr, aes(x=numProj, y=avgHrs, fill=salary)) + 116 | geom_bar(position="dodge", color="black", stat="identity") + 117 | scale_fill_brewer(palette = "Pastel1") 118 | 119 | 120 | 121 | #density plot 122 | qplot(avgHrs/4, data=hr, geom="density", fill=salary, alpha=I(.5), 123 | main="Avg Weekly Hours by Salary Category", xlab="Average Weekly Hours", 124 | ylab="Density") 125 | 126 | 127 | attach(hr) 128 | 129 | #create factor variables with a better format 130 | 131 | hr$leftFactor <- factor(left,levels=c(0,1), 132 | labels=c("Did Not Leave Company","Left Company")) 133 | 134 | hr$promoFactor <- factor(fiveYrPrmo,levels=c(0,1), 135 | labels=c("Did Not Get Promoted","Did Get Promoted")) 136 | 137 | hr$wrkAcdntFactor <- factor(wrkAcdnt,levels=c(0,1), 138 | labels=c("No Accident","Accident")) 139 | 140 | attach(hr) 141 | 142 | 143 | #density plot 144 | qplot(avgHrs/4, data=hr, geom="density", fill=leftFactor, alpha=I(.5), 145 | main="Avg Weekly Hours by Retention", xlab="Average Weekly Hours", 146 | ylab="Density") 147 | 148 | 149 | #boxplot 150 | boxplot(avgHrs~job,data=hr, main="HR Data", 151 | xlab="Job Title", ylab="Avg Hours", col="lightblue") 152 | 153 | #violin plot 154 | hrBox <-ggplot(hr, aes(y=avgHrs, x=job)) 155 | hrBox + geom_violin(trim=FALSE, fill="lightblue") 156 | 157 | #many dimension charts 158 | qplot(avgHrs/4, timeCpny, data=hr, shape=leftFactor, color=salary, facets=numProj~promoFactor, size=I(3), 159 | xlab="avg hrs", ylab="time at company") 160 | 161 | hrScat <-ggplot(hr, aes(x=avgHrs/4, y=satLevel)) 162 | hrScat + geom_point() 163 | #make the points more transparent so that it's less intense 164 | hrScat + geom_point(alpha=.01) 165 | hrScat + stat_bin2d 166 | 167 | hrScat + stat_binhex() 168 | hrScat + stat_binhex() + scale_fill_gradient(low="lightblue", high="red") 169 | 170 | LEvSL <-ggplot(hr, aes(x=satLevel, y=lastEval)) 171 | LEvSL + geom_point() 172 | LEvSL + stat_binhex() + scale_fill_gradient(low="lightblue", high="red") 173 | 174 | # Example of a Bagplot 175 | #install.packages("aplpack") 176 | library(aplpack) 177 | 178 | bagplot(avgHrs/4, satLevel, xlab="Avg Weekly Hrs", ylab="SatLevel", 179 | main="Bagplot Avg Hours by Sat Level") 180 | 181 | attach(hr) 182 | typeof(hr) 183 | hr <- as.data.frame(hr) 184 | typeof(hr) 185 | 186 | #create dummy variables for job and salary All values need to be numeric for inclusion in the model 187 | hr2 <- cbind (hr, dummy(job), dummy(salary)) 188 | 189 | 190 | #Ensure dummy variables are created 191 | names(hr2) 192 | head(hr2) 193 | 194 | #Log Transforms for: satLevel, lastEval 195 | 196 | hr2$satLevelLog <- log(satLevel) 197 | hr2$lastEvalLog <- log(lastEval) 198 | 199 | 200 | #SQRT Transforms for: satLevel, lastEval 201 | 202 | hr2$satLevelSqrt <- sqrt(satLevel) 203 | hr2$lastEvalSqrt <- sqrt(lastEval) 204 | 205 | #Scale Transforms for: satLevel, lastEval 206 | 207 | hr2$satLevelScale <- scale(satLevel) 208 | hr2$lastEvalScale <- scale(lastEval) 209 | 210 | 211 | #Peek at new columns 212 | head(hr2) 213 | 214 | #Ensure all columns present 215 | names(hr2) 216 | 217 | #Create the binary variable for highly rated but low satisfaction users 218 | 219 | hr2$greatEvalLowSat <- ifelse(lastEval>0.8 & satLevel <0.2, 1, 0) 220 | 221 | #Visualize effects that it has on average hours 222 | attach(hr2) 223 | 224 | x <- ggplot(hr2, aes(factor(greatEvalLowSat), avgHrs)) 225 | x <- x + geom_boxplot (aes(fill=factor(greatEvalLowSat)), outlier.color="black", outlier.size=1) 226 | x <- x + coord_flip() 227 | x + scale_fill_manual(values=wes_palette(n=2, name="GrandBudapest2")) 228 | 229 | #Visualize distribution by job 230 | ggplot(hr, aes(avgHrs, job, z= greatEvalLowSat)) + geom_tile(aes(fill = greatEvalLowSat)) + theme_bw() + scale_fill_gradient(low="#98afc7", high="#646d7e") 231 | 232 | 233 | #Get distribution table 234 | summary <- count(avgHrs, c('greatEvalLowSat')) 235 | 236 | summary 237 | 238 | #Starting code credit to: https://stackoverflow.com/questions/17200114/how-to-split-data-into-training-testing-sets-using-sample-function 239 | set.seed(101) 240 | sample = sample.split(hr2, SplitRatio = .75) 241 | train = subset(hr2, sample == TRUE) 242 | test = subset(hr2, sample == FALSE) 243 | 244 | 245 | #See if the test/train sets have beens split appropriately 246 | dim(train) 247 | dim(test) 248 | 249 | #Ensure data characteristics are roughly the same 250 | summary(train) 251 | summary(test) 252 | 253 | #1) Throw everything in! 254 | model.lr1 <- lm(avgHrs ~ ., train) 255 | summary(model.lr1) 256 | #adj rsquared - higher the better 257 | summary(model.lr1) 258 | 259 | #Adj R-squared - higher is better 260 | #AIC, BIC - lower the better 261 | aic <- AIC(model.lr1) 262 | bic <-BIC(model.lr1) 263 | pred.model.lr1 <- predict(model.lr1, newdata = test) # validation predictions 264 | # mean Prediction Error and Std Error - lower is better 265 | meanPred <- mean((test$avgHrs - pred.model.lr1)^2) # mean prediction error 266 | stdError <- sd((test$avgHrs - pred.model.lr1)^2)/length(test$avgHrs) # std error 267 | 268 | str(summary(model.lr1)) 269 | 270 | ### Create a data frame to store the model metrics 271 | 272 | # Note stringsAsFactors = FALSE 273 | modelMetrics <- data.frame( "Model" = character(), "adjRsq" = integer(), "AIC"= integer(), "BIC"= integer(), "Mean Prediction Error"= integer(), "Standard Error"= integer(), stringsAsFactors=FALSE) 274 | 275 | # Append a row 276 | modelMetrics[nrow(modelMetrics) + 1, ] <- c( "model.lr1", 0.279, aic, bic, meanPred, stdError) 277 | modelMetrics 278 | 279 | #2) Backward selection 280 | 281 | step <- stepAIC(model.lr1, direction="backward") 282 | step$anova # display results 283 | 284 | 285 | model.bkwd.lr1 <- lm(avgHrs ~ satLevel + lastEval + numProj + timeCpny + left + satLevelLog + 286 | lastEvalLog + satLevelSqrt + lastEvalSqrt, train) 287 | summary(model.bkwd.lr1) 288 | 289 | #Adj R-squared - higher is better 290 | #AIC, BIC - lower the better 291 | aic <- AIC(model.bkwd.lr1) 292 | bic <-BIC(model.bkwd.lr1) 293 | pred.model.bkwd.lr1 <- predict(model.bkwd.lr1, newdata = test) # validation predictions 294 | # mean Prediction Error and Std Error - lower is better 295 | meanPred <- mean((test$avgHrs - pred.model.bkwd.lr1 )^2) # mean prediction error 296 | stdError <- sd((test$avgHrs - pred.model.bkwd.lr1 )^2)/length(test$avgHrs) # std error 297 | 298 | # Append a row to our modelMetrics 299 | modelMetrics[nrow(modelMetrics) + 1, ] <- c( "model.bkwd.lr1", 0.2795, aic, bic, meanPred, stdError) 300 | modelMetrics 301 | 302 | #3) Create a Model with the highest Correlated Values 303 | #Pull a correlation plot on all of the variables to see what are the most correlated to avgHrs 304 | #remove the non-numeric 305 | corData <- train[ -c(9:13) ] 306 | head(corData) 307 | 308 | 309 | corMatrix <-cor(corData, use="complete.obs", method="pearson") 310 | #round to two decimals 311 | corrplot(corMatrix, type = "upper", order = "hclust", col = c("black", "white"), bg = "lightblue", tl.col = "black") 312 | #some of the highest correlated are: numProj, lastEvalLog, timeCmpy, left, greatEvalLowSat 313 | 314 | # Create a Model with the highest Correlated Values 315 | model.lr2 <- lm(avgHrs ~ numProj + lastEvalLog + timeCpny + left + greatEvalLowSat, train) 316 | summary(model.lr2) 317 | 318 | 319 | #Adj R-squared - higher is better 320 | #AIC, BIC - lower the better 321 | aic <- AIC(model.lr2) 322 | bic <-BIC(model.lr2) 323 | pred.model.lr2 <- predict(model.lr2, newdata = test) # validation predictions 324 | # mean Prediction Error and Std Error - lower is better 325 | meanPred <- mean((test$avgHrs - pred.model.lr2 )^2) # mean prediction error 326 | stdError <- sd((test$avgHrs - pred.model.lr2 )^2)/length(test$avgHrs) # std error 327 | 328 | # Append a row to our modelMetrics 329 | modelMetrics[nrow(modelMetrics) + 1, ] <- c( "model.lr2", 0.2353, aic, bic, meanPred, stdError) 330 | modelMetrics 331 | 332 | #4) Best Subsets 333 | 334 | model.bestSub=regsubsets(avgHrs ~ ., train, nvmax =25) 335 | 336 | summary(model.bestSub) 337 | 338 | reg.summary =summary(model.bestSub) 339 | 340 | which.min (reg.summary$bic ) 341 | which.max (reg.summary$adjr2 )#just for fun 342 | 343 | 344 | #Plot the variable bic values by number of variables 345 | plot(reg.summary$bic ,xlab=" Number of Variables ",ylab=" BIC",type="l") 346 | points (6, reg.summary$bic [6], col =" red",cex =2, pch =20) 347 | 348 | coef(model.bestSub, 6) 349 | 350 | bestSubModel = lm(avgHrs ~ satLevel + numProj + timeCpny + NAsupport + NAtechnical + NAhigh, data=train) 351 | 352 | bestSubModel 353 | summary(bestSubModel) 354 | 355 | #Adj R-squared - higher is better 356 | #AIC, BIC - lower the better 357 | aic <- AIC(bestSubModel) 358 | bic <-BIC(bestSubModel) 359 | pred.bestSubModel <- predict(bestSubModel, newdata = test) # validation predictions 360 | # mean Prediction Error and Std Error - lower is better 361 | meanPred <- mean((test$avgHrs - pred.bestSubModel )^2) # mean prediction error 362 | stdError <- sd((test$avgHrs - pred.bestSubModel )^2)/length(test$avgHrs) # std error 363 | 364 | # Append a row to our modelMetrics 365 | modelMetrics[nrow(modelMetrics) + 1, ] <- c( "bestSubModel", 0.1789, aic, bic, meanPred, stdError) 366 | modelMetrics 367 | 368 | #standard model plots 369 | par(mfrow = c(2, 2)) # Split the plotting panel into a 2 x 2 grid 370 | plot(model.bkwd.lr1) 371 | 372 | 373 | par(mfrow = c(1, 1)) 374 | #qq plot 375 | qqnorm(resid(model.bkwd.lr1)) 376 | qqPlot(model.bkwd.lr1) 377 | 378 | #residual plot 379 | 380 | #Cooks Distance 381 | cutoff <- 4/((nrow(train)-length(model.bkwd.lr1$coefficients)-2)) 382 | plot(model.bkwd.lr1, which=4, cook.levels=cutoff) 383 | 384 | #visualize the model 385 | visreg2d(model.bkwd.lr1, "numProj", "lastEvalLog", plot.type="persp" ) 386 | 387 | visreg2d(model.bkwd.lr1, "numProj", "lastEvalLog", plot.type="image" ) 388 | 389 | visreg(model.bkwd.lr1, "numProj") 390 | 391 | 392 | #import in the data set 393 | currentEmployees <- fread('https://raw.githubusercontent.com/lgellis/Predictive_Analytics_Tutorial/master/currentEmployees.csv') 394 | currentEmployees 395 | typeof(currentEmployees) 396 | #convert to dataframe 397 | currentEmployees <-as.data.frame(currentEmployees) 398 | 399 | 400 | #Add the existing data transformations 401 | 402 | colNames <- c("satLevel", "lastEval", "numProj", "timeCpny", "wrkAcdnt", "left", "fiveYrPrmo", "job", "salary") 403 | setnames(currentEmployees, colNames) 404 | attach(currentEmployees) 405 | currentEmployees 406 | 407 | 408 | 409 | #create factor variables with a better format 410 | 411 | currentEmployees$leftFactor <- factor(left,levels=c(0,1), 412 | labels=c("Did Not Leave Company","Left Company")) 413 | 414 | currentEmployees$promoFactor <- factor(fiveYrPrmo,levels=c(0,1), 415 | labels=c("Did Not Get Promoted","Did Get Promoted")) 416 | 417 | currentEmployees$wrkAcdntFactor <- factor(wrkAcdnt,levels=c(0,1), 418 | labels=c("No Accident","Accident")) 419 | 420 | attach(currentEmployees) 421 | 422 | currentEmployees <- cbind (currentEmployees, dummy(job), dummy(salary)) 423 | 424 | #Log Transforms for: satLevel, lastEval 425 | 426 | currentEmployees$satLevelLog <- log(satLevel) 427 | currentEmployees$lastEvalLog <- log(lastEval) 428 | 429 | #SQRT Transforms for: satLevel, lastEval 430 | 431 | currentEmployees$satLevelSqrt <- sqrt(satLevel) 432 | currentEmployees$lastEvalSqrt <- sqrt(lastEval) 433 | 434 | #Scale Transforms for: satLevel, lastEval 435 | 436 | currentEmployees$satLevelScale <- scale(satLevel) 437 | currentEmployees$lastEvalScale <- scale(lastEval) 438 | 439 | currentEmployees$greatEvalLowSat <- ifelse(lastEval>0.8 & satLevel <0.2, 1, 0) 440 | 441 | currentEmployees$Predictions <- predict(model.bkwd.lr1, currentEmployees) # test predictions 442 | 443 | head(currentEmployees) 444 | 445 | 446 | #Calculate Overage Hours 447 | 448 | currentEmployees$predictedOverageHours = currentEmployees$Predictions - 160 449 | 450 | #In case the employees worked less than 160 hours per month set negative values to 0 451 | 452 | currentEmployees$netPredictedOverageHours <- currentEmployees$predictedOverageHours 453 | currentEmployees$netPredictedOverageHours[currentEmployees$netPredictedOverageHours<0] <- 0 454 | 455 | 456 | head(currentEmployees) 457 | 458 | 459 | #Calculate total hours worked by each category 460 | 461 | lowWageTotal = sum(currentEmployees[currentEmployees$salary=='low',]$predictedOverageHours) 462 | mediumWageTotal = sum(currentEmployees[currentEmployees$salary=='medium',]$predictedOverageHours) 463 | highWageTotal = sum(currentEmployees[currentEmployees$salary=='high',]$predictedOverageHours) 464 | 465 | 466 | #assume low salary makes $10/hr, medium is $25/hr and high is $50/hr and assume they get time and a half during overtime 467 | 468 | #total paid is sum of all predictions of low * 10, medium * 20, high * 40 469 | 470 | TotalPaidPerMonth = lowWageTotal*10*1.5 + mediumWageTotal*25*1.5 + highWageTotal*50*1.5 471 | TotalPaidPerMonth 472 | TotalPaidPerYear = TotalPaidPerMonth * 12 473 | TotalPaidPerYear 474 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # HR Predictive Analytics Tutorial 2 | 3 | Follow along to learn how to solve your business problems using predictive analytics. Technologies used include R, Data Science Experience and IBM Cloud Lite. 4 | 5 | Instructions start here: https://www.littlemissdata.com/blog/predictive-analytics-goes-mainstream 6 | 7 | - [Tutorial 1: Define the Problem and Set Up](https://www.littlemissdata.com/blog/predictive-analytics-tutorial-part-1) 8 | 9 | - [Tutorial 2: Exploratory Data Analysis or EDA](https://www.littlemissdata.com/blog/predictive-analytics-tutorial-part-2) 10 | 11 | - [Tutorial 3: Transform ](https://www.littlemissdata.com/blog/predictive-analytics-tutorial-part-3) 12 | 13 | - [Tutorial 4: Model, Assess and Implement - Coming Soon!](https://www.littlemissdata.com/blog/predictive-analytics-tutorial-part-4) 14 | 15 | Data set sourced from Kaggle - https://www.kaggle.com/ludobenistant/hr-analytics 16 | -------------------------------------------------------------------------------- /currentEmployees.csv: -------------------------------------------------------------------------------- 1 | satisfaction_level,last_evaluation,number_project,time_spend_company,Work_accident,left,promotion_last_5years,sales,salary 2 | 0.152,0.53,2,3,0,0,1,sales,low 3 | 1.6,0.86,4,3,0,0,0,sales,medium 4 | 0.22,0.88,4,3,1,0,0,sales,medium 5 | 1.44,0.87,6,4,0,0,0,sales,low 6 | 0.5,0.52,6,3,0,0,0,sales,low 7 | 0.82,0.5,5,3,0,0,0,sales,low 8 | 0.6,0.77,2,3,0,0,0,sales,low 9 | 1.84,0.85,2,6,0,0,0,management,low 10 | 1.78,1,6,4,0,0,0,sales,low 11 | 0.84,0.53,2,3,0,0,0,technical,low 12 | 0.9,0.54,5,3,0,0,0,sales,low 13 | 0.044,0.81,2,4,0,0,0,sales,high 14 | 0.336,0.92,2,5,1,0,0,sales,high 15 | 0.164,0.55,6,3,0,0,0,sales,high 16 | 0.144,0.56,2,3,0,0,0,sales,medium 17 | 0.152,0.54,6,3,0,0,1,sales,high 18 | 0.18,0.47,6,3,0,0,0,management,high 19 | 0.312,0.99,6,6,0,0,0,sales,low 20 | 0.18,0.51,5,3,0,0,0,sales,low 21 | 0.304,0.89,5,5,0,0,0,sales,low 22 | 0.044,0.83,6,4,1,0,0,sales,medium 23 | 0.152,0.55,2,3,0,0,0,sales,low 24 | 0.036,0.95,6,4,0,0,0,management,low 25 | 0.184,0.57,2,3,0,0,0,sales,low 26 | 0.16,0.53,2,3,0,0,0,sales,low 27 | 0.356,0.92,5,5,0,0,0,management,low 28 | 0.328,0.87,4,5,0,0,0,sales,low 29 | 0.16,0.49,2,3,0,0,0,sales,low 30 | 0.164,0.46,2,3,0,0,0,accounting,low 31 | 0.152,0.5,2,3,0,0,0,accounting,low 32 | 0.036,0.62,6,4,0,0,0,management,low 33 | 0.9,0.57,2,3,0,0,0,hr,medium 34 | 0.8,0.51,2,3,0,0,1,hr,medium 35 | 0.9,0.55,2,3,0,0,0,hr,low 36 | 0.3,0.87,4,6,1,0,0,management,low 37 | 0.2,0.94,6,4,0,0,0,technical,low 38 | 0.76,0.46,2,3,0,0,0,technical,low 39 | 0.9,0.5,2,3,0,0,0,technical,low 40 | 0.22,0.89,6,4,0,0,0,product_mng,low 41 | 0.82,0.54,2,3,0,0,0,technical,low 42 | 0.5,0.88,5,5,0,0,0,technical,low 43 | 0.9,0.48,2,3,0,0,0,technical,high 44 | 0.8,0.46,2,3,0,0,0,technical,low 45 | 0.2,0.8,7,4,0,0,0,technical,low 46 | 0.18,0.54,6,4,0,0,0,technical,low 47 | 0.5,0.47,3,4,0,0,0,IT,low 48 | 0.56,0.78,2,3,0,0,0,support,low 49 | 0.56,0.55,3,5,0,0,0,support,low 50 | 0.54,0.55,2,4,0,0,0,support,low 51 | 0.49,0.54,2,3,0,0,0,support,low 52 | 0.59,0.92,6,4,0,0,0,support,low 53 | 0.85,0.55,4,3,0,0,0,support,low 54 | 0.76,0.88,2,3,0,0,0,support,low 55 | 0.78,0.87,5,5,0,0,0,support,low 56 | 0.54,0.52,6,4,0,0,0,support,low 57 | 0.47,0.5,2,3,0,0,0,support,low 58 | 0.78,0.77,6,4,0,0,0,support,low 59 | 0.55,0.85,2,4,0,0,0,technical,low 60 | 0.55,1,2,3,0,0,0,technical,high 61 | 0.54,0.53,5,5,0,0,0,technical,low 62 | 0.92,0.91,4,5,0,0,0,management,low 63 | 0.46,0.93,2,4,0,0,0,IT,low 64 | 0.94,0.95,2,5,0,0,0,IT,low 65 | 0.144,0.56,2,3,0,0,0,IT,low 66 | 0.044,0.94,6,4,0,0,0,IT,low 67 | 0.324,0.7,2,4,0,0,0,IT,medium 68 | 0.172,0.54,2,3,0,0,0,product_mng,medium 69 | 0.36,0.98,2,6,0,0,0,product_mng,medium 70 | 0.304,0.86,4,5,0,0,0,product_mng,medium 71 | 0.5,0.5,5,3,0,0,0,product_mng,low 72 | 0.89,0.99,5,3,0,0,0,IT,low 73 | 0.54,0.77,2,4,0,0,0,product_mng,low 74 | 0.88,0.1,2,3,0,0,0,product_mng,high 75 | 0.48,0.36,6,4,0,0,0,product_mng,low 76 | 0.92,0.11,4,4,0,0,0,product_mng,medium 77 | 0.55,0.81,7,4,0,0,0,product_mng,medium 78 | 0.55,0.43,6,4,0,0,0,product_mng,medium 79 | 0.54,0.9,2,3,0,0,0,marketing,medium 80 | 0.92,0.76,2,3,0,0,0,sales,low 81 | 0.46,0.43,2,3,0,0,0,accounting,low 82 | 0.94,0.74,2,3,0,0,0,support,low 83 | 0.81,0.09,2,3,0,0,0,technical,low 84 | 0.54,0.45,4,3,0,0,1,management,low 85 | 1,0.09,4,6,0,0,0,marketing,low 86 | 0.91,0.11,6,4,0,0,0,marketing,low 87 | 0.93,0.11,6,5,0,0,0,marketing,low 88 | 0.84,0.1,5,3,0,0,0,sales,medium 89 | 0.1,0.4,2,3,0,0,0,sales,medium 90 | 0.38,0.43,2,4,0,0,0,sales,medium 91 | 0.45,0.79,6,5,0,0,1,sales,medium 92 | 0.11,0.5,2,5,0,0,0,IT,high 93 | 0.41,1,5,3,0,0,0,IT,high 94 | 0.87,0.48,2,3,0,0,0,IT,high 95 | 0.45,0.55,2,4,0,0,0,IT,medium 96 | 0.4,0.83,6,5,0,0,0,IT,high 97 | 0.1,0.51,2,3,0,0,0,IT,high 98 | 0.09,0.77,6,3,0,0,0,IT,low 99 | 0.84,0.84,6,3,0,0,0,IT,low 100 | 0.4,0.97,6,3,0,0,0,IT,low 101 | 0.9,1,5,6,0,0,0,IT,medium --------------------------------------------------------------------------------