├── README.md ├── Time Series Plot ├── Boxplot ├── Histogram ├── Histogram using ggplot ├── Stacked and Grouped column charts ├── Scatterplot using ggplot ├── Scatter Plot ├── Pie Chart and Proportions └── Barplot /README.md: -------------------------------------------------------------------------------- 1 | # Visualisation 2 | Some vsisualisation codes that wee have 3 | -------------------------------------------------------------------------------- /Time Series Plot: -------------------------------------------------------------------------------- 1 | library(help = "datasets") 2 | data(nottem,package = "datasets") 3 | 4 | head(nottem) 5 | class(nottem) 6 | 7 | plot(nottem) 8 | 9 | nottem 10 | 11 | plot(nottem, 12 | xlab="Years", 13 | ylab="Avg Monthly Temp", 14 | main="Temp across years", 15 | col="blue", 16 | type="l", 17 | pch=20) 18 | -------------------------------------------------------------------------------- /Boxplot: -------------------------------------------------------------------------------- 1 | 2 | boxplot(USmelanoma$mortality, 3 | ylim = xr, 4 | horizontal = TRUE, 5 | xlab = "Mortality") 6 | quantile(USmelanoma$mortality, probs = c(0, 0.25,0.5,0.75,1)) 7 | 8 | table(USmelanoma$ocean) 9 | 10 | boxplot(mortality ~ ocean, 11 | data = USmelanoma, 12 | xlab = "Contiguity to an ocean", 13 | ylab = "Mortality") 14 | -------------------------------------------------------------------------------- /Histogram: -------------------------------------------------------------------------------- 1 | # Generate Age data 2 | ## Generate a numeric vector for Age 3 | Age <- as.integer(rnorm(10000,m=55, sd=15)) 4 | # histogram 5 | hist(Age) 6 | ?hist 7 | hist(Age, breaks=50) 8 | 9 | Age <- as.integer(rnorm(10000,m=55, sd=15)) 10 | hist(Age) 11 | hist(Age, breaks=50) 12 | 13 | 14 | hist(Age, 15 | breaks=30, 16 | col="green", 17 | border="white", 18 | xlab="Age", 19 | ylab="Counts", 20 | main="Histogram:Age") 21 | -------------------------------------------------------------------------------- /Histogram using ggplot: -------------------------------------------------------------------------------- 1 | housingUS <- read.csv(file="housingUS.csv") 2 | names(housingUS) 3 | 4 | head(housingUS) 5 | 6 | housingUS$Home.Value <- gsub('$', '', housingUS$Home.Value, fixed = TRUE) 7 | housingUS$Home.Value <- as.numeric(gsub(',', '', housingUS$Home.Value, fixed = TRUE)) 8 | 9 | housingUS$Structure.Cost <- gsub('$', '', housingUS$Structure.Cost, fixed = TRUE) 10 | housingUS$Structure.Cost <- as.numeric(gsub(',', '', housingUS$Structure.Cost, fixed = TRUE)) 11 | 12 | ggplot(housingUS, aes(x = Home.Value)) + 13 | geom_histogram() 14 | -------------------------------------------------------------------------------- /Stacked and Grouped column charts: -------------------------------------------------------------------------------- 1 | library(help=datasets) 2 | head(mtcars) 3 | # Stacked 4 | counts <- table(mtcars$vs, mtcars$gear) 5 | barplot(counts, 6 | main="Car Distribution by Gears and V/S", 7 | xlab="Number of Gears", 8 | col=c("darkblue","red"), 9 | legend = rownames(counts)) 10 | 11 | counts <- table(mtcars$vs, mtcars$gear) 12 | barplot(counts, main="Car Distribution by Gears and V/S", 13 | xlab="Number of Gears", 14 | col=c("darkblue","red"), 15 | legend = rownames(counts), 16 | beside=TRUE) 17 | -------------------------------------------------------------------------------- /Scatterplot using ggplot: -------------------------------------------------------------------------------- 1 | housingUS <- read.csv(file="housingUS.csv") 2 | names(housingUS) 3 | 4 | head(housingUS) 5 | 6 | housingUS$Home.Value <- gsub('$', '', housingUS$Home.Value, fixed = TRUE) 7 | housingUS$Home.Value <- as.numeric(gsub(',', '', housingUS$Home.Value, fixed = TRUE)) 8 | 9 | housingUS$Structure.Cost <- gsub('$', '', housingUS$Structure.Cost, fixed = TRUE) 10 | housingUS$Structure.Cost <- as.numeric(gsub(',', '', housingUS$Structure.Cost, fixed = TRUE)) 11 | 12 | ggplot(subset(housingUS, STATE %in% c("MA", "TX")), 13 | aes(x=Date, 14 | y=Home.Value, 15 | color=STATE))+ 16 | geom_point() 17 | 18 | 19 | -------------------------------------------------------------------------------- /Scatter Plot: -------------------------------------------------------------------------------- 1 | install.packages("psych") 2 | library(psych) 3 | library(help=psych) 4 | 5 | data(galton,package = "psych") 6 | 7 | head(galton) 8 | 9 | plot(galton$parent, 10 | galton$child) 11 | 12 | 13 | plot(galton$parent, 14 | galton$child, 15 | xlab = "Height of Parent", 16 | ylab= "Height of Children", 17 | main=" Relationship between Parent and Children Heights") 18 | 19 | plot(galton$parent, 20 | galton$child, 21 | xlab = "Height of Parent", 22 | ylab= "Height of Children", 23 | main=" Relationship between Parent and Children Heights", 24 | pch=17, 25 | col="red") 26 | 27 | 28 | abline(lm(galton$parent~galton$child), 29 | col = "blue", 30 | lwd=4, 31 | lty=5) 32 | -------------------------------------------------------------------------------- /Pie Chart and Proportions: -------------------------------------------------------------------------------- 1 | 2 | Label <- c("0-20","20-30","30-40","40-50","50-60") 3 | Count <- c(16,395,315,161,68) 4 | age <- data.frame(Label,Count) 5 | 6 | 7 | pie(age$Count) 8 | 9 | 10 | pie(age$Count, 11 | label=age$Label, 12 | radius=1, 13 | main="Customers by Age Group", 14 | col=c("red","blue","orange","green","black"), 15 | border="white", 16 | clockwise=T 17 | ) 18 | 19 | Count.pct <- c(16,395,315,161,68)/sum(Count) 20 | Count.pct.label = as.integer(Count.pct*100) 21 | pie(Count.pct, 22 | label=Count.pct.label, 23 | radius=1, 24 | main="Customers by Age Group", 25 | col=c("red","blue","orange","green","black"), 26 | border="white", 27 | clockwise=T 28 | ) 29 | 30 | 31 | install.packages("plotrix") 32 | library(plotrix) 33 | slices <- c(10, 12, 4, 16, 8) 34 | lbls <- c("US", "UK", "Australia", "Germany", "France") 35 | pie3D(slices, 36 | labels=lbls, 37 | explode=0.1, 38 | main="Pie Chart of Countries ") 39 | -------------------------------------------------------------------------------- /Barplot: -------------------------------------------------------------------------------- 1 | setwd("C:\\Ram\\General 20150804 v1\\Trainings\\R Programming for Data Science\\data") 2 | 3 | prd_spend <- read.csv(file = "prod_spend.csv") 4 | 5 | 6 | names(prd_spend) 7 | # Avg Balance by Product 8 | avg.spend.prd <- aggregate(prd_spend$Spend_Value, 9 | by=list(prd_spend$Prod_Code), 10 | mean) 11 | names(avg.spend.prd) 12 | names(avg.spend.prd) <-c("Product","Avg.Spend") 13 | 14 | barplot(height=avg.spend.prd$Avg.Spend, 15 | names.arg = avg.spend.prd$Product, 16 | xlab="$ Spend", 17 | ylab="Product", 18 | main="Spend by Product", 19 | col="blue", 20 | border="white", 21 | horiz=T) 22 | box() 23 | 24 | ?barplot 25 | 26 | movies_data <- read.table("movies.tab", sep="\t", header=TRUE, quote="", comment="") 27 | 28 | library(sqldf) 29 | yearwise_anime<-sqldf("select year, count(*) as Num_movies 30 | from movies_data where Animation=1 31 | group by year") 32 | sum(movies_data$Animation) 33 | 34 | sum(yearwise_anime$Num_movies) 35 | barplot(height=yearwise_anime$Num_movies, 36 | col='deeppink4', 37 | names.arg=yearwise_anime$year, 38 | xlab="year", 39 | ylab="Count of Movies", 40 | main="Count of Movies across years") 41 | --------------------------------------------------------------------------------