├── .gitattributes
├── HandOnExercise1
├── Hands on Ex 4
├── Exercise 6
├── Ex 2
├── Exercise 3
└── Exercise5.r


/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text=auto


--------------------------------------------------------------------------------
/HandOnExercise1:
--------------------------------------------------------------------------------
 1 | install.packages("data.table")
 2 | install.packages("datasets")
 3 | testDF <- women   #Copying the data frame women into another dataset so that the original fields are not afected
 4 | View(testDF)
 5 | 
 6 | id <- 0
 7 | for(i in 1:nrow(testDF))
 8 | {
 9 |   if
10 |   (testDF$height[i]>65.0 & testDF$weight[i]<136.7)
11 |   {
12 |   id<- id+1
13 |  }
14 | } 
15 | print("Number of women satisfying the criteria are")
16 | print(id)
17 | 
18 | #Rough Space
19 | testDF$height
20 | nrow(testDF)
21 | 


--------------------------------------------------------------------------------
/Hands on Ex 4:
--------------------------------------------------------------------------------
 1 | 
 2 | getwd()
 3 | setwd("D:\\MICA\\AMMA\\data_2017") 
 4 | std_per <-read.csv(file="students.csv",head=T)
 5 | View(std_per)
 6 | 
 7 | #Since the original dataset is unavailable I have used a similar replacement dataset
 8 | #Finding the average age of the class using a for loop
 9 | sumAge <- 0
10 | i <- 0
11 |   for(i in 1:nrow(std_per))
12 |     {
13 |       sumAge <- sumAge + std_per$Age[i]
14 |   }
15 | averageAge <- sumAge/nrow(std_per)
16 | print("The average Age of class is: ")
17 | print(averageAge)
18 | 


--------------------------------------------------------------------------------
/Exercise 6:
--------------------------------------------------------------------------------
 1 | install.packages("rvest")
 2 | library(rvest)
 3 | getwd()
 4 | setwd("D:\\MICA\\AMMA\\data_2017")
 5 | 
 6 | theurl <- "https://en.wikipedia.org/wiki/India%E2%80%93Pakistan_cricket_rivalry"
 7 | file<-read_html(theurl)
 8 | tables<-html_nodes(file, "table")
 9 | table1 <- html_table(tables[7], fill = TRUE)
10 | View(table1)
11 | DF1 <- as.data.frame(table1)
12 | View(DF1)
13 | DF1$Tests
14 | top5 <- DF1[rev(order(DF1$Tests)),"Top 5 highest number of test wins"][1:5]
15 | View(top5)
16 | #Visualising the data using a plot
17 | 
18 | install.packages("psych")
19 | library(psych)
20 | library(help=psych)
21 | 
22 | # histogram
23 | hist(Age)
24 | ?hist 
25 | hist(Age, breaks=50)
26 | matchesWon=0  #Initiating the tracking counter
27 | 


--------------------------------------------------------------------------------
/Ex 2:
--------------------------------------------------------------------------------
 1 | library(rvest)
 2 | theurl <- "https://en.wikipedia.org/wiki/India%E2%80%93Pakistan_cricket_rivalry"
 3 | file<-read_html(theurl)
 4 | tables<-html_nodes(file, "table")
 5 | table1 <- html_table(tables[3], fill = TRUE)
 6 | View(table1)
 7 | table2 <- as.data.frame(table1)
 8 | View(table2)
 9 | matchesWon=0  #Initiating the tracking counter
10 | for(i in 1:nrow(table2))
11 | {
12 |   matchesWon <- matchesWon+table2$India[i]
13 | }
14 | print(paste('Total no of ODIs won by India are'))
15 | print(matchesWon)
16 | 
17 |           
18 |                           #ALTERNATE WAY TO DO THE SAME PROBLEM
19 | 
20 | 
21 | install.packages("rvest")
22 | input_wiki <- read.csv(file="C:\\YYYYYY\\AMMA 2017\\Data\\Wiki_Matches.csv") #We first save the table as a csv file and then import it
23 | matchesWon=0  #Initiating the tracking counter
24 | for(i in 1:nrow(input_wiki))
25 | {
26 |   matchesWon <- matchesWon+input_wiki$India[i]
27 | }
28 | print(paste('Total no of ODIs won by India are', matchesWon))
29 | 


--------------------------------------------------------------------------------
/Exercise 3:
--------------------------------------------------------------------------------
 1 | Name <- c('angad', 'ayush', 'rajesh', 'sonu', 'pakka')
 2 | Age <- c(22,23,24,25,26)
 3 | MathMark <- c(24,35,34,34,34)
 4 | ScienceMark <- c(65,67,56,78,23)
 5 | TotalMark <- c(152,153,161,143,182)
 6 | Bigdata <- data.frame(Name,Age,MathMark, ScienceMark, TotalMark)
 7 | Bigdata
 8 | View(Bigdata)
 9 | Bigdata$TotalMark <-Bigdata$MathMark+Bigdata$ScienceMark
10 | View(Bigdata)
11 | 
12 | Bigdata$pct_math <- round(100*Bigdata$MathMark/Bigdata$TotalMark)
13 | View(Bigdata)
14 | Bigdata <- Bigdata[,c(2,6)]     
15 | View(Bigdata)
16 | Bigdata <- Bigdata[,c(2:6)]
17 | Bigdata <- Bigdata(,c())
18 | names_Bigdata <- names(Bigdata)
19 | names(Bigdata) <- c('a','b')
20 | View(Bigdata)
21 | names(Bigdata) <- names_Bigdata
22 | View(Bigdata)
23 | vector1 <- seq(1,10, by=2)
24 | vecttor1[3] <-100
25 | 
26 | v_col <- names(Bigdata)
27 | v_col[3] <- "NewMaths"
28 | View(Bigdata)
29 | 
30 | Bigdata$Age=exp(Bigdata$Age)
31 | 
32 | View(Bigdata)
33 | class(Bigdata$Name)
34 | Bigdata$Name <- as.character(Bigdata$Name)
35 | class(Bigdata$Name)
36 | 
37 | 
38 | s1 <-Bigdata$Age>=23
39 | 
40 | Bigdata <- Bigdata[s1,]
41 | View(Bigdata)
42 | 
43 | ?sample
44 | 
45 | sample_Bigdata <- sample(1:nrow(Bigdata),3,replace=F)
46 | sample_Bigdata
47 | sample_Bigdata
48 | 
49 | 
50 | 


--------------------------------------------------------------------------------
/Exercise5.r:
--------------------------------------------------------------------------------
 1 | #Exercise5 using two alternate approaches
 2 | install.packages("rvest")   #rvest can be used to harvest data easily
 3 | #Setting wd to where the employee csv file is located
 4 | setwd("D://MICA/AMMA/Code&Data")
 5 | empDF <- read.csv("emp.csv")
 6 | depDF <- read.csv("dept.csv")
 7 | 
 8 | mergedDF <- merge(empDF,depDF,by="departmentNO",all = TRUE)
 9 | #Creating new DF using concatenation
10 | newDF<- c(rep(0,nrow(department)))
11 | salaryLoc <- c(rep(0,nrow(department)))
12 | for (i in 1:nrow(department))
13 |   {
14 |     for (i2 in 1:nrow(mergedDF))
15 |     {
16 |         if (!is.na(mergedDF$LOC[j]))
17 |         {  
18 |               mergedDF$LOC[j] <- department$LOC[j]
19 |               N_employeeLoc[i] <- N_employeeLoc[i] +1
20 |               salaryLoc[i] <- salaryLoc[i] + mergedDF$SAL[j]
21 |         }    
22 |     }  
23 |   }    
24 | 
25 | mean_salary_loc <- c(rep(0,nrow(department)))
26 | for (i3 in 1:nrow(department))
27 | {
28 |   mean_salary_loc[i3] <- salaryLoc[i3]/N_employeeLoc[i3]
29 | }
30 | employee_csv.df<-read.csv("employee.csv",header=T)
31 | department_csv.df<-read.csv("department.csv",header=T)
32 | employee_department_merged.df=merge(employee_csv.df,department_csv.df,by=c("departmentNO"),all=TRUE)
33 | employee_department_merged_hike.df <- employee_department_merged.df
34 | for(i in 1:nrow(employee_department_merged.df))
35 | {
36 |   if(!is.na(employee_department_merged.df$JOB[i]))
37 |   {
38 |   if(employee_department_merged.df$JOB[i]=="ANALYST")
39 |   {
40 |     employee_department_merged_hike.df$SAL[i] = employee_department_merged.df$SAL[i] + (0.15 * employee_department_merged.df$SAL[i])
41 |   }
42 |   }
43 | }
44 | for(i in 1:nrow(employee_department_merged.df))
45 | {
46 |   if(!is.na(employee_department_merged.df$JOB[i]))
47 |      {
48 |   if(employee_department_merged.df$JOB[i]=="CLERK")
49 |   {
50 |     employee_department_merged_hike.df$SAL[i] = employee_department_merged.df$SAL[i] + (0.125 * employee_department_merged.df$SAL[i])
51 |   }
52 |   }
53 | }
54 | for(i in 1:nrow(employee_department_merged.df))
55 | {
56 |   if(!is.na(employee_department_merged.df$JOB[i]))
57 |      {
58 |   if(employee_department_merged.df$JOB[i]=="MANAGER")
59 |   {
60 |     employee_department_merged_hike.df$SAL[i] = employee_department_merged.df$SAL[i] + (0.102 * employee_department_merged.df$SAL[i])
61 |   }
62 |   }
63 | }
64 | 
65 | for(i in 1:nrow(employee_department_merged.df))
66 | {
67 |   if(!is.na(employee_department_merged.df$JOB[i]))
68 |   {
69 |   if(employee_department_merged.df$JOB[i]=="PRESIDENT")
70 |   {
71 |     employee_department_merged_hike.df$SAL[i] = employee_department_merged.df$SAL[i] + (0.057 * employee_department_merged.df$SAL[i])
72 |   }
73 |   }
74 | }
75 | 
76 | for(i in 1:nrow(employee_department_merged.df))
77 | {
78 |   if(!is.na(employee_department_merged.df$JOB[i]))
79 | {
80 |   if(employee_department_merged.df$JOB[i]=="SALESMAN")
81 |   {
82 |     employee_department_merged_hike.df$SAL[i] = employee_department_merged.df$SAL[i] + (0.133 * employee_department_merged.df$SAL[i])
83 |   }
84 |   }
85 | }
86 | 
87 | ## Increased Salaries DataFrame
88 | View(employee_department_merged_hike.df)
89 | 


--------------------------------------------------------------------------------