├── README.md
└── Reading Data


/README.md:
--------------------------------------------------------------------------------
1 | # Class-3


--------------------------------------------------------------------------------
/Reading Data:
--------------------------------------------------------------------------------
  1 | 
  2 | ##################  Reading Data  #########################
  3 | 
  4 | 
  5 | ## Reading data from program editor
  6 | 
  7 | ## Create a matrix
  8 | swas_matrix <- matrix(1:100, ncol = 5)
  9 | ## See the data
 10 | swas_matrix         # we see that a matrix with 5 columns and 20 rows has been formed
 11 | 
 12 | # Uniform Random Numbers between 1 to 1000
 13 | 
 14 | ss <-1000*runif(2)
 15 | 
 16 | class(ss)
 17 | 
 18 | ss<-as.integer(1000*runif(2))
 19 | 
 20 | 
 21 | 
 22 | class(ss)
 23 | mode(ss)
 24 | 
 25 | unif <- as.integer(1000*runif(200000))
 26 | 
 27 | plot(unif)
 28 | 
 29 | head(unif)
 30 | 
 31 | hist(unif)
 32 | 
 33 | # Scenario 
 34 | 
 35 | # Application: 300,000
 36 | # Selection: 60,000
 37 | 
 38 | Prob <- 60000/300000
 39 | 
 40 | Appl.num <- seq(1,300000,1)
 41 | 
 42 | head(Appl.num)
 43 | 
 44 | selected <- sample(Appl.num,60000,replace = F)
 45 | 
 46 | selected.df <- data.frame(selected)
 47 | 
 48 | 
 49 | LETTERS[1:4]
 50 | 
 51 | sample(letters[1:4], 10, replace = T)
 52 | 
 53 | ## Create a Data Frame
 54 | input.df <- data.frame(ID = 1:10, 
 55 |                        Class = sample(letters[1:4], 10, replace = TRUE),
 56 |                        Value = seq(1:5))
 57 | ## View the data frame
 58 | View(input.df)
 59 | # we see that the data frame has 3 columns named ID, Class, and Value
 60 | # The data frame also has 10 rows of data
 61 | 
 62 | ## See the column names
 63 | colnames(input.df)
 64 | names(input.df)
 65 | 
 66 | row.names(input.df)
 67 | ## See the dimensions
 68 | dim(input.df)
 69 | 
 70 | ## Set the Working Directory
 71 | getwd()
 72 | 
 73 | ## Reading data from Comma separated file (csv)
 74 | input_csv.df <- read.csv(file="/Users/swastik/Desktop/AMMA 2017/Data/binary.csv")
 75 | 
 76 | input_csv.df <- read.csv(file="/Users/swastik/Desktop/AMMA 2017/Data/Data_2017\\binary.csv")
 77 | # reset directory and read file
 78 | setwd("/Users/swastik/Desktop/AMMA 2017/Data")
 79 | input_csv.df <- read.csv("binary.csv",header = T)
 80 | 
 81 | getwd()
 82 | 
 83 | 
 84 | ## See the structure of the data
 85 | str(input_csv.df)
 86 | ## reading the file to validate the data 
 87 | 
 88 | sum(input_csv.df$gre)
 89 | 
 90 | ## Reading Date Values
 91 | input_wthdt.df <- read.csv("binary_withdate.csv")
 92 | str(input_wthdt.df)
 93 | 
 94 | tab <- data.frame(table(input_wthdt.df$application_date))
 95 | sum(tab$Freq)
 96 | # we see the date field has been read as factor
 97 | 
 98 | # read Date as Factor
 99 | input_wthdt.df <- read.csv("binary_withdate.csv", stringsAsFactors = F)
100 | str(input_wthdt.df)
101 | 
102 | ## working with dates
103 | d <-"2004-12-03"
104 | 
105 | d
106 | class(d)
107 | d1 <- as.Date(d)
108 | class(d1)
109 | mode(d1)
110 | ## find system date
111 | s <-Sys.Date()
112 | s
113 | ## Current date and time
114 | c <-date()
115 | c
116 | 
117 | # reading dates with other than defaul format
118 | # d - Day e.g 1, 2 etc
119 | # m - month
120 | # b - month /Jan, Feb
121 | # B - Month January
122 | # y - 2 digit year
123 | # Y - 4 Digit year
124 | 
125 | d2 <-as.Date("12-January-2012",format="%d-%B-%Y")
126 | 
127 | d2
128 | class(d2)
129 | 
130 | format(d2,"%B")
131 | 
132 | 
133 | d3 <-as.Date("12-February-2012",format="%d-%B-%Y")
134 | d3 <-as.Date("12-February-12",format="%d-%B-%y")
135 | 
136 | d4 <-as.Date("12-12-12",format="%d-%m-%y")
137 | 
138 | d2
139 | 
140 | dd <- format(d2,"%d/%B/%Y")
141 | 
142 | 
143 | ## Calculate age 
144 | dob <-as.Date("12-Jan-1983",format="%d-%b-%Y")
145 | dob
146 | age <- difftime(Sys.Date(),dob,units="days")
147 | as.integer(as.numeric(age)/365)
148 | 
149 | ## Correct the date format
150 | input_wthdt.df$application_date1 <- as.Date(input_wthdt.df$application_date, format="%m/%d/%Y")
151 | 
152 | str(input_wthdt.df)
153 | 
154 | ## Read the data specifying the Class of the data
155 | input_wthdt.df1 <- read.csv("binary_withdate.csv", colClasses=c(application_date = 'myDate'))
156 | str(input_wthdt.df1)
157 | 
158 | 
159 | ## Read data from the web
160 | ## You can directly read a file directly from the internet by specifying the URL
161 | input_webdata.df <- read.table("http://www.stats.ox.ac.uk/pub/datasets/csb/ch11b.dat")
162 | str(input_webdata.df)
163 | 
164 | input_webdata.df$
165 | 
166 | ## Read the first few records of a dataset
167 | head(input_webdata.df)
168 | ## The first 6 rows of data have been displayed
169 | 
170 | ## Ques 1 : What is we need to display the first 10 rows instead?
171 | ## Ques 2 : What if we want to display the last few rows?
172 | 
173 | csv_file <- read.table(file="C:\\Ram\\General 20150804 v1\\Trainings\\R Programming for Data Science\\data\\binary.csv",
174 |                                 header = TRUE, 
175 |                                 sep = ',')
176 | 
177 | ## Read a Tab Delimited file
178 | input_tabdlmtd.df <- read.table(file="C:\\Ram\\R for Data Science\\data\\tab_delimited_data.txt",
179 |                                 header = TRUE, 
180 |                                 sep = '\t')
181 | 
182 | head(input_tabdlmtd.df)
183 | 
184 | ## We can use the same functions as with a csv file 
185 | ## to read dates and modify formats with a tab-delimited file as well
186 | 
187 | 
188 | input_dollar.df <- read.table(file="C:\\Ram\\R for Data Science\\data\\dollar_delimited_data.txt",
189 |                                 header = TRUE, 
190 |                                 sep = '$')
191 | 
192 | 
193 | 
194 | 
195 | ## Read Data From Facebook
196 | 
197 | install.packages(c("Rfacebook","RCurl","rjson"))
198 | 
199 | library(Rfacebook)
200 | library(RCurl)
201 | library(rjson)
202 | 
203 | library(help=Rfacebook)
204 | # connecting to Facebook
205 | 
206 | #https://developers.facebook.com/tools/explorer
207 | 
208 | accessToken <-"EAACEdEose0cBAG6Br8me7NAiRGtnaK0NZBuFXA75eFANGDUOLotThDmXRGlo2x7G8ZAYFw9a9SKuJgeCMLPNqN07XJLGsmmv0Cvq6jImKi1jslSbmruQ1n8pxrMQADI44VsUpIfEzbgOyUROaa7608X8RBe5ld09ktJRo6z5hphsIgTnsJRBGJl58tdf4ZD" # Get data from a company page, extract data from a company's page on facebook
209 | flipkartPage <-getPage(page="flipkart",
210 |                        token=accessToken,
211 |                        n=10)
212 | #take the name from the link
213 | 
214 | 
215 | flipkartPage <-getPage(page="flipkart",
216 |                        token=accessToken,
217 |                        n=150)
218 | 
219 | pages<- getURL("")
220 | library(XML)
221 | overall_matces= reahHTMLTable(pages, header=T, which=2, string)
222 | 
223 | ##################  Writing Data  #########################
224 | 
225 | one_row <-bank_ins[,3]
226 | 
227 | bank_ins.smpl <- bank_ins[1:1000,]
228 | ## Save the R object
229 | save(bank_ins,file="bank_ins.smpl.Rda")
230 | # remove
231 | rm(bank_ins.smpl)
232 | rm(bank_ins)
233 | ## Load the data back
234 | 
235 | names(bank_ins)
236 | 
237 | load("bank_ins.smpl.Rda")
238 | 
239 | ## install and load datasets package
240 | install.packages("datasets")
241 | require(datasets)
242 | 
243 | library(help="datasets")
244 | ## Save the data as a csv file
245 | 
246 | tt <- mtcars
247 | 
248 | names(tt)
249 | 
250 | write.csv(mtcars, "mtcars.csv")
251 | 
252 | tt$carmodel <- row.names(mtcars)
253 | row.names(tt) <- NULL
254 | 
255 | write.csv(tt, "/Users/swastik/Desktop/AMMA 2017/Data/mtcars.csv",
256 |           row.names=F
257 | )
258 | 
259 | 
260 | ## Save the data as a Tab Delimited file
261 | write.table(mtcars, "mtcars.txt", sep = '%', quote = FALSE, row.names=F)
262 | 
263 | ## write to file without column names
264 | 
265 | write.table(mtcars, 
266 |             "mtcars_noheader.txt", 
267 |             sep = '%', 
268 |             quote = FALSE, 
269 |             row.names=F,
270 |             col.names = F)
271 | 
272 | write.csv(mtcars, 
273 |             "mtcars_noheader1.txt", 
274 |             quote = FALSE, 
275 |             row.names=F,
276 |             col.names = F)
277 | 
278 | write.table(mtcars, 
279 |             "mtcars_noheadercsv.csv", 
280 |             sep = ',', 
281 |             quote = FALSE, 
282 |             row.names=F,
283 |             col.names = F)
284 | 
285 | 
286 | attach()
287 | 
288 | names(male)
289 | 
290 | summary(Age)
291 | 
292 | rm(Age)
293 | 
294 | Age
295 | 
296 | mean(male$Age)
297 | 
298 | attach(male)
299 | 
300 | mean(Age)
301 | 
302 | myfile <- read.table(text="MyName Age
303 |                            Swastik 25
304 |                            Jodu 20", header=T)
305 | 
306 | write.csv(myfile,file="myfile.csv")
307 | 
308 | write.table(myfile, 
309 |             "myfile.txt", 
310 |             sep = '$', 
311 |             row.names=F,
312 |             col.names = F)
313 | 
314 | students <- data.frame(Name= c("Gopu","Roma","Mitali","Kona"),
315 |                       Gender=c("Male","Male","Female","Female"),
316 |                       Age =rnorm(4,50,20))
317 | View(students)
318 | 
319 | write.csv(students,
320 |           file="students.csv",
321 |           row.names = F)
322 | getwd()
323 | # ----------------- Reference --------------------------
324 | # http://dni-institute.in/blogs/read-large-files-into-r/
325 | 
326 | 
327 | 
328 | 
329 | 


--------------------------------------------------------------------------------