├── README.md └── Reading Data /README.md: -------------------------------------------------------------------------------- 1 | # Class-3 -------------------------------------------------------------------------------- /Reading Data: -------------------------------------------------------------------------------- 1 | 2 | ################## Reading Data ######################### 3 | 4 | 5 | ## Reading data from program editor 6 | 7 | ## Create a matrix 8 | swas_matrix <- matrix(1:100, ncol = 5) 9 | ## See the data 10 | swas_matrix # we see that a matrix with 5 columns and 20 rows has been formed 11 | 12 | # Uniform Random Numbers between 1 to 1000 13 | 14 | ss <-1000*runif(2) 15 | 16 | class(ss) 17 | 18 | ss<-as.integer(1000*runif(2)) 19 | 20 | 21 | 22 | class(ss) 23 | mode(ss) 24 | 25 | unif <- as.integer(1000*runif(200000)) 26 | 27 | plot(unif) 28 | 29 | head(unif) 30 | 31 | hist(unif) 32 | 33 | # Scenario 34 | 35 | # Application: 300,000 36 | # Selection: 60,000 37 | 38 | Prob <- 60000/300000 39 | 40 | Appl.num <- seq(1,300000,1) 41 | 42 | head(Appl.num) 43 | 44 | selected <- sample(Appl.num,60000,replace = F) 45 | 46 | selected.df <- data.frame(selected) 47 | 48 | 49 | LETTERS[1:4] 50 | 51 | sample(letters[1:4], 10, replace = T) 52 | 53 | ## Create a Data Frame 54 | input.df <- data.frame(ID = 1:10, 55 | Class = sample(letters[1:4], 10, replace = TRUE), 56 | Value = seq(1:5)) 57 | ## View the data frame 58 | View(input.df) 59 | # we see that the data frame has 3 columns named ID, Class, and Value 60 | # The data frame also has 10 rows of data 61 | 62 | ## See the column names 63 | colnames(input.df) 64 | names(input.df) 65 | 66 | row.names(input.df) 67 | ## See the dimensions 68 | dim(input.df) 69 | 70 | ## Set the Working Directory 71 | getwd() 72 | 73 | ## Reading data from Comma separated file (csv) 74 | input_csv.df <- read.csv(file="/Users/swastik/Desktop/AMMA 2017/Data/binary.csv") 75 | 76 | input_csv.df <- read.csv(file="/Users/swastik/Desktop/AMMA 2017/Data/Data_2017\\binary.csv") 77 | # reset directory and read file 78 | setwd("/Users/swastik/Desktop/AMMA 2017/Data") 79 | input_csv.df <- read.csv("binary.csv",header = T) 80 | 81 | getwd() 82 | 83 | 84 | ## See the structure of the data 85 | str(input_csv.df) 86 | ## reading the file to validate the data 87 | 88 | sum(input_csv.df$gre) 89 | 90 | ## Reading Date Values 91 | input_wthdt.df <- read.csv("binary_withdate.csv") 92 | str(input_wthdt.df) 93 | 94 | tab <- data.frame(table(input_wthdt.df$application_date)) 95 | sum(tab$Freq) 96 | # we see the date field has been read as factor 97 | 98 | # read Date as Factor 99 | input_wthdt.df <- read.csv("binary_withdate.csv", stringsAsFactors = F) 100 | str(input_wthdt.df) 101 | 102 | ## working with dates 103 | d <-"2004-12-03" 104 | 105 | d 106 | class(d) 107 | d1 <- as.Date(d) 108 | class(d1) 109 | mode(d1) 110 | ## find system date 111 | s <-Sys.Date() 112 | s 113 | ## Current date and time 114 | c <-date() 115 | c 116 | 117 | # reading dates with other than defaul format 118 | # d - Day e.g 1, 2 etc 119 | # m - month 120 | # b - month /Jan, Feb 121 | # B - Month January 122 | # y - 2 digit year 123 | # Y - 4 Digit year 124 | 125 | d2 <-as.Date("12-January-2012",format="%d-%B-%Y") 126 | 127 | d2 128 | class(d2) 129 | 130 | format(d2,"%B") 131 | 132 | 133 | d3 <-as.Date("12-February-2012",format="%d-%B-%Y") 134 | d3 <-as.Date("12-February-12",format="%d-%B-%y") 135 | 136 | d4 <-as.Date("12-12-12",format="%d-%m-%y") 137 | 138 | d2 139 | 140 | dd <- format(d2,"%d/%B/%Y") 141 | 142 | 143 | ## Calculate age 144 | dob <-as.Date("12-Jan-1983",format="%d-%b-%Y") 145 | dob 146 | age <- difftime(Sys.Date(),dob,units="days") 147 | as.integer(as.numeric(age)/365) 148 | 149 | ## Correct the date format 150 | input_wthdt.df$application_date1 <- as.Date(input_wthdt.df$application_date, format="%m/%d/%Y") 151 | 152 | str(input_wthdt.df) 153 | 154 | ## Read the data specifying the Class of the data 155 | input_wthdt.df1 <- read.csv("binary_withdate.csv", colClasses=c(application_date = 'myDate')) 156 | str(input_wthdt.df1) 157 | 158 | 159 | ## Read data from the web 160 | ## You can directly read a file directly from the internet by specifying the URL 161 | input_webdata.df <- read.table("http://www.stats.ox.ac.uk/pub/datasets/csb/ch11b.dat") 162 | str(input_webdata.df) 163 | 164 | input_webdata.df$ 165 | 166 | ## Read the first few records of a dataset 167 | head(input_webdata.df) 168 | ## The first 6 rows of data have been displayed 169 | 170 | ## Ques 1 : What is we need to display the first 10 rows instead? 171 | ## Ques 2 : What if we want to display the last few rows? 172 | 173 | csv_file <- read.table(file="C:\\Ram\\General 20150804 v1\\Trainings\\R Programming for Data Science\\data\\binary.csv", 174 | header = TRUE, 175 | sep = ',') 176 | 177 | ## Read a Tab Delimited file 178 | input_tabdlmtd.df <- read.table(file="C:\\Ram\\R for Data Science\\data\\tab_delimited_data.txt", 179 | header = TRUE, 180 | sep = '\t') 181 | 182 | head(input_tabdlmtd.df) 183 | 184 | ## We can use the same functions as with a csv file 185 | ## to read dates and modify formats with a tab-delimited file as well 186 | 187 | 188 | input_dollar.df <- read.table(file="C:\\Ram\\R for Data Science\\data\\dollar_delimited_data.txt", 189 | header = TRUE, 190 | sep = '$') 191 | 192 | 193 | 194 | 195 | ## Read Data From Facebook 196 | 197 | install.packages(c("Rfacebook","RCurl","rjson")) 198 | 199 | library(Rfacebook) 200 | library(RCurl) 201 | library(rjson) 202 | 203 | library(help=Rfacebook) 204 | # connecting to Facebook 205 | 206 | #https://developers.facebook.com/tools/explorer 207 | 208 | accessToken <-"EAACEdEose0cBAG6Br8me7NAiRGtnaK0NZBuFXA75eFANGDUOLotThDmXRGlo2x7G8ZAYFw9a9SKuJgeCMLPNqN07XJLGsmmv0Cvq6jImKi1jslSbmruQ1n8pxrMQADI44VsUpIfEzbgOyUROaa7608X8RBe5ld09ktJRo6z5hphsIgTnsJRBGJl58tdf4ZD" # Get data from a company page, extract data from a company's page on facebook 209 | flipkartPage <-getPage(page="flipkart", 210 | token=accessToken, 211 | n=10) 212 | #take the name from the link 213 | 214 | 215 | flipkartPage <-getPage(page="flipkart", 216 | token=accessToken, 217 | n=150) 218 | 219 | pages<- getURL("") 220 | library(XML) 221 | overall_matces= reahHTMLTable(pages, header=T, which=2, string) 222 | 223 | ################## Writing Data ######################### 224 | 225 | one_row <-bank_ins[,3] 226 | 227 | bank_ins.smpl <- bank_ins[1:1000,] 228 | ## Save the R object 229 | save(bank_ins,file="bank_ins.smpl.Rda") 230 | # remove 231 | rm(bank_ins.smpl) 232 | rm(bank_ins) 233 | ## Load the data back 234 | 235 | names(bank_ins) 236 | 237 | load("bank_ins.smpl.Rda") 238 | 239 | ## install and load datasets package 240 | install.packages("datasets") 241 | require(datasets) 242 | 243 | library(help="datasets") 244 | ## Save the data as a csv file 245 | 246 | tt <- mtcars 247 | 248 | names(tt) 249 | 250 | write.csv(mtcars, "mtcars.csv") 251 | 252 | tt$carmodel <- row.names(mtcars) 253 | row.names(tt) <- NULL 254 | 255 | write.csv(tt, "/Users/swastik/Desktop/AMMA 2017/Data/mtcars.csv", 256 | row.names=F 257 | ) 258 | 259 | 260 | ## Save the data as a Tab Delimited file 261 | write.table(mtcars, "mtcars.txt", sep = '%', quote = FALSE, row.names=F) 262 | 263 | ## write to file without column names 264 | 265 | write.table(mtcars, 266 | "mtcars_noheader.txt", 267 | sep = '%', 268 | quote = FALSE, 269 | row.names=F, 270 | col.names = F) 271 | 272 | write.csv(mtcars, 273 | "mtcars_noheader1.txt", 274 | quote = FALSE, 275 | row.names=F, 276 | col.names = F) 277 | 278 | write.table(mtcars, 279 | "mtcars_noheadercsv.csv", 280 | sep = ',', 281 | quote = FALSE, 282 | row.names=F, 283 | col.names = F) 284 | 285 | 286 | attach() 287 | 288 | names(male) 289 | 290 | summary(Age) 291 | 292 | rm(Age) 293 | 294 | Age 295 | 296 | mean(male$Age) 297 | 298 | attach(male) 299 | 300 | mean(Age) 301 | 302 | myfile <- read.table(text="MyName Age 303 | Swastik 25 304 | Jodu 20", header=T) 305 | 306 | write.csv(myfile,file="myfile.csv") 307 | 308 | write.table(myfile, 309 | "myfile.txt", 310 | sep = '$', 311 | row.names=F, 312 | col.names = F) 313 | 314 | students <- data.frame(Name= c("Gopu","Roma","Mitali","Kona"), 315 | Gender=c("Male","Male","Female","Female"), 316 | Age =rnorm(4,50,20)) 317 | View(students) 318 | 319 | write.csv(students, 320 | file="students.csv", 321 | row.names = F) 322 | getwd() 323 | # ----------------- Reference -------------------------- 324 | # http://dni-institute.in/blogs/read-large-files-into-r/ 325 | 326 | 327 | 328 | 329 | --------------------------------------------------------------------------------