├── .gitattributes ├── HandOnExercise1 ├── Hands on Ex 4 ├── Exercise 6 ├── Ex 2 ├── Exercise 3 └── Exercise5.r /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto -------------------------------------------------------------------------------- /HandOnExercise1: -------------------------------------------------------------------------------- 1 | install.packages("data.table") 2 | install.packages("datasets") 3 | testDF <- women #Copying the data frame women into another dataset so that the original fields are not afected 4 | View(testDF) 5 | 6 | id <- 0 7 | for(i in 1:nrow(testDF)) 8 | { 9 | if 10 | (testDF$height[i]>65.0 & testDF$weight[i]<136.7) 11 | { 12 | id<- id+1 13 | } 14 | } 15 | print("Number of women satisfying the criteria are") 16 | print(id) 17 | 18 | #Rough Space 19 | testDF$height 20 | nrow(testDF) 21 | -------------------------------------------------------------------------------- /Hands on Ex 4: -------------------------------------------------------------------------------- 1 | 2 | getwd() 3 | setwd("D:\\MICA\\AMMA\\data_2017") 4 | std_per <-read.csv(file="students.csv",head=T) 5 | View(std_per) 6 | 7 | #Since the original dataset is unavailable I have used a similar replacement dataset 8 | #Finding the average age of the class using a for loop 9 | sumAge <- 0 10 | i <- 0 11 | for(i in 1:nrow(std_per)) 12 | { 13 | sumAge <- sumAge + std_per$Age[i] 14 | } 15 | averageAge <- sumAge/nrow(std_per) 16 | print("The average Age of class is: ") 17 | print(averageAge) 18 | -------------------------------------------------------------------------------- /Exercise 6: -------------------------------------------------------------------------------- 1 | install.packages("rvest") 2 | library(rvest) 3 | getwd() 4 | setwd("D:\\MICA\\AMMA\\data_2017") 5 | 6 | theurl <- "https://en.wikipedia.org/wiki/India%E2%80%93Pakistan_cricket_rivalry" 7 | file<-read_html(theurl) 8 | tables<-html_nodes(file, "table") 9 | table1 <- html_table(tables[7], fill = TRUE) 10 | View(table1) 11 | DF1 <- as.data.frame(table1) 12 | View(DF1) 13 | DF1$Tests 14 | top5 <- DF1[rev(order(DF1$Tests)),"Top 5 highest number of test wins"][1:5] 15 | View(top5) 16 | #Visualising the data using a plot 17 | 18 | install.packages("psych") 19 | library(psych) 20 | library(help=psych) 21 | 22 | # histogram 23 | hist(Age) 24 | ?hist 25 | hist(Age, breaks=50) 26 | matchesWon=0 #Initiating the tracking counter 27 | -------------------------------------------------------------------------------- /Ex 2: -------------------------------------------------------------------------------- 1 | library(rvest) 2 | theurl <- "https://en.wikipedia.org/wiki/India%E2%80%93Pakistan_cricket_rivalry" 3 | file<-read_html(theurl) 4 | tables<-html_nodes(file, "table") 5 | table1 <- html_table(tables[3], fill = TRUE) 6 | View(table1) 7 | table2 <- as.data.frame(table1) 8 | View(table2) 9 | matchesWon=0 #Initiating the tracking counter 10 | for(i in 1:nrow(table2)) 11 | { 12 | matchesWon <- matchesWon+table2$India[i] 13 | } 14 | print(paste('Total no of ODIs won by India are')) 15 | print(matchesWon) 16 | 17 | 18 | #ALTERNATE WAY TO DO THE SAME PROBLEM 19 | 20 | 21 | install.packages("rvest") 22 | input_wiki <- read.csv(file="C:\\YYYYYY\\AMMA 2017\\Data\\Wiki_Matches.csv") #We first save the table as a csv file and then import it 23 | matchesWon=0 #Initiating the tracking counter 24 | for(i in 1:nrow(input_wiki)) 25 | { 26 | matchesWon <- matchesWon+input_wiki$India[i] 27 | } 28 | print(paste('Total no of ODIs won by India are', matchesWon)) 29 | -------------------------------------------------------------------------------- /Exercise 3: -------------------------------------------------------------------------------- 1 | Name <- c('angad', 'ayush', 'rajesh', 'sonu', 'pakka') 2 | Age <- c(22,23,24,25,26) 3 | MathMark <- c(24,35,34,34,34) 4 | ScienceMark <- c(65,67,56,78,23) 5 | TotalMark <- c(152,153,161,143,182) 6 | Bigdata <- data.frame(Name,Age,MathMark, ScienceMark, TotalMark) 7 | Bigdata 8 | View(Bigdata) 9 | Bigdata$TotalMark <-Bigdata$MathMark+Bigdata$ScienceMark 10 | View(Bigdata) 11 | 12 | Bigdata$pct_math <- round(100*Bigdata$MathMark/Bigdata$TotalMark) 13 | View(Bigdata) 14 | Bigdata <- Bigdata[,c(2,6)] 15 | View(Bigdata) 16 | Bigdata <- Bigdata[,c(2:6)] 17 | Bigdata <- Bigdata(,c()) 18 | names_Bigdata <- names(Bigdata) 19 | names(Bigdata) <- c('a','b') 20 | View(Bigdata) 21 | names(Bigdata) <- names_Bigdata 22 | View(Bigdata) 23 | vector1 <- seq(1,10, by=2) 24 | vecttor1[3] <-100 25 | 26 | v_col <- names(Bigdata) 27 | v_col[3] <- "NewMaths" 28 | View(Bigdata) 29 | 30 | Bigdata$Age=exp(Bigdata$Age) 31 | 32 | View(Bigdata) 33 | class(Bigdata$Name) 34 | Bigdata$Name <- as.character(Bigdata$Name) 35 | class(Bigdata$Name) 36 | 37 | 38 | s1 <-Bigdata$Age>=23 39 | 40 | Bigdata <- Bigdata[s1,] 41 | View(Bigdata) 42 | 43 | ?sample 44 | 45 | sample_Bigdata <- sample(1:nrow(Bigdata),3,replace=F) 46 | sample_Bigdata 47 | sample_Bigdata 48 | 49 | 50 | -------------------------------------------------------------------------------- /Exercise5.r: -------------------------------------------------------------------------------- 1 | #Exercise5 using two alternate approaches 2 | install.packages("rvest") #rvest can be used to harvest data easily 3 | #Setting wd to where the employee csv file is located 4 | setwd("D://MICA/AMMA/Code&Data") 5 | empDF <- read.csv("emp.csv") 6 | depDF <- read.csv("dept.csv") 7 | 8 | mergedDF <- merge(empDF,depDF,by="departmentNO",all = TRUE) 9 | #Creating new DF using concatenation 10 | newDF<- c(rep(0,nrow(department))) 11 | salaryLoc <- c(rep(0,nrow(department))) 12 | for (i in 1:nrow(department)) 13 | { 14 | for (i2 in 1:nrow(mergedDF)) 15 | { 16 | if (!is.na(mergedDF$LOC[j])) 17 | { 18 | mergedDF$LOC[j] <- department$LOC[j] 19 | N_employeeLoc[i] <- N_employeeLoc[i] +1 20 | salaryLoc[i] <- salaryLoc[i] + mergedDF$SAL[j] 21 | } 22 | } 23 | } 24 | 25 | mean_salary_loc <- c(rep(0,nrow(department))) 26 | for (i3 in 1:nrow(department)) 27 | { 28 | mean_salary_loc[i3] <- salaryLoc[i3]/N_employeeLoc[i3] 29 | } 30 | employee_csv.df<-read.csv("employee.csv",header=T) 31 | department_csv.df<-read.csv("department.csv",header=T) 32 | employee_department_merged.df=merge(employee_csv.df,department_csv.df,by=c("departmentNO"),all=TRUE) 33 | employee_department_merged_hike.df <- employee_department_merged.df 34 | for(i in 1:nrow(employee_department_merged.df)) 35 | { 36 | if(!is.na(employee_department_merged.df$JOB[i])) 37 | { 38 | if(employee_department_merged.df$JOB[i]=="ANALYST") 39 | { 40 | employee_department_merged_hike.df$SAL[i] = employee_department_merged.df$SAL[i] + (0.15 * employee_department_merged.df$SAL[i]) 41 | } 42 | } 43 | } 44 | for(i in 1:nrow(employee_department_merged.df)) 45 | { 46 | if(!is.na(employee_department_merged.df$JOB[i])) 47 | { 48 | if(employee_department_merged.df$JOB[i]=="CLERK") 49 | { 50 | employee_department_merged_hike.df$SAL[i] = employee_department_merged.df$SAL[i] + (0.125 * employee_department_merged.df$SAL[i]) 51 | } 52 | } 53 | } 54 | for(i in 1:nrow(employee_department_merged.df)) 55 | { 56 | if(!is.na(employee_department_merged.df$JOB[i])) 57 | { 58 | if(employee_department_merged.df$JOB[i]=="MANAGER") 59 | { 60 | employee_department_merged_hike.df$SAL[i] = employee_department_merged.df$SAL[i] + (0.102 * employee_department_merged.df$SAL[i]) 61 | } 62 | } 63 | } 64 | 65 | for(i in 1:nrow(employee_department_merged.df)) 66 | { 67 | if(!is.na(employee_department_merged.df$JOB[i])) 68 | { 69 | if(employee_department_merged.df$JOB[i]=="PRESIDENT") 70 | { 71 | employee_department_merged_hike.df$SAL[i] = employee_department_merged.df$SAL[i] + (0.057 * employee_department_merged.df$SAL[i]) 72 | } 73 | } 74 | } 75 | 76 | for(i in 1:nrow(employee_department_merged.df)) 77 | { 78 | if(!is.na(employee_department_merged.df$JOB[i])) 79 | { 80 | if(employee_department_merged.df$JOB[i]=="SALESMAN") 81 | { 82 | employee_department_merged_hike.df$SAL[i] = employee_department_merged.df$SAL[i] + (0.133 * employee_department_merged.df$SAL[i]) 83 | } 84 | } 85 | } 86 | 87 | ## Increased Salaries DataFrame 88 | View(employee_department_merged_hike.df) 89 | --------------------------------------------------------------------------------