├── .gitattributes ├── 9781484258286.jpg ├── Chapter01 ├── Listing1-11.R ├── Listing1-13.R ├── Listing1-14.R ├── Listing1-15.R ├── Listing1-16.R ├── Listing1-17.R ├── Listing1-18.R └── VisualCodeTemplate.R ├── Chapter02 ├── BubbleChart │ ├── BubbleChartCode.R │ ├── FakeData.xlsx │ ├── FakeFootballLeagueData.csv │ ├── PBI_FakeFootballLeague.pbix │ └── input.csv ├── Callout │ ├── CalloutChart.pbix │ ├── CalloutChartCode.R │ ├── calloutChartData.csv │ └── input.csv ├── Forecast │ ├── Forecast.pbix │ ├── ForecastCode.R │ ├── dwpageviews-20150701-20191231.csv │ ├── input.csv │ └── ljpageviews-20160618-20191231.csv ├── LineChartWithShade │ ├── GDP.csv │ ├── LineChartWithShadePBI - Copy.pbix │ ├── LineChartWithShadePBI.pbix │ ├── LineChartWithShadePBICode.R │ ├── PoliticalInfoWithGDP.csv │ ├── PoliticalInfoWithGDP.xlsx │ └── input.csv ├── Map │ ├── DataWrangling.xlsx │ ├── MapExample.pbix │ ├── chartdata.csv │ ├── input.csv │ └── stateMapCode.R ├── QuadChart │ ├── QuadChart.pbix │ ├── chartData.csv │ ├── createChartData.R │ ├── input.csv │ └── quadChartCode.R └── RegressionLine │ ├── RegressionLine.R │ ├── RegressionLine.pbix │ ├── input.csv │ └── women.csv ├── Chapter03 ├── Chapter01_README.md ├── Python_Code │ ├── CodeToCreateModel.py │ ├── CombineRollingMonths.py │ ├── Data │ │ ├── BostonHousingInfo.csv │ │ ├── EmailAddresses.csv │ │ └── SalesData │ │ │ ├── 2010-12-01.csv │ │ │ ├── 2011-01-01.csv │ │ │ ├── 2011-02-01.csv │ │ │ ├── 2011-03-01.csv │ │ │ ├── 2011-04-01.csv │ │ │ ├── 2011-05-01.csv │ │ │ ├── 2011-06-01.csv │ │ │ ├── 2011-07-01.csv │ │ │ ├── 2011-08-01.csv │ │ │ ├── 2011-09-01.csv │ │ │ ├── 2011-10-01.csv │ │ │ ├── 2011-11-01.csv │ │ │ ├── 2011-12-01.csv │ │ │ ├── 2012-01-01.csv │ │ │ ├── 2012-02-01.csv │ │ │ ├── 2012-03-01.csv │ │ │ ├── 2012-04-01.csv │ │ │ ├── 2012-05-01.csv │ │ │ ├── 2012-06-01.csv │ │ │ ├── 2012-07-01.csv │ │ │ ├── 2012-08-01.csv │ │ │ ├── 2012-09-01.csv │ │ │ ├── 2012-10-01.csv │ │ │ ├── 2012-11-01.csv │ │ │ ├── 2012-12-01.csv │ │ │ ├── 2013-01-01.csv │ │ │ ├── 2013-02-01.csv │ │ │ ├── 2013-03-01.csv │ │ │ ├── 2013-04-01.csv │ │ │ ├── 2013-05-01.csv │ │ │ ├── 2013-06-01.csv │ │ │ ├── 2013-07-01.csv │ │ │ ├── 2013-08-01.csv │ │ │ ├── 2013-09-01.csv │ │ │ ├── 2013-10-01.csv │ │ │ ├── 2013-11-01.csv │ │ │ ├── 2013-12-01.csv │ │ │ └── 2014-01-01.csv │ ├── Models │ │ └── model │ ├── ScoreData.py │ ├── UseRegularExpressionToFilterBadEmailAddresses.py │ └── ch01__Python.pbix └── R_Code │ ├── CombineRollingMonths.R │ ├── Data │ ├── BostonHousingInfo.csv │ ├── EmailAddresses.csv │ └── SalesData │ │ ├── 2010-12-01.csv │ │ ├── 2011-01-01.csv │ │ ├── 2011-02-01.csv │ │ ├── 2011-03-01.csv │ │ ├── 2011-04-01.csv │ │ ├── 2011-05-01.csv │ │ ├── 2011-06-01.csv │ │ ├── 2011-07-01.csv │ │ ├── 2011-08-01.csv │ │ ├── 2011-09-01.csv │ │ ├── 2011-10-01.csv │ │ ├── 2011-11-01.csv │ │ ├── 2011-12-01.csv │ │ ├── 2012-01-01.csv │ │ ├── 2012-02-01.csv │ │ ├── 2012-03-01.csv │ │ ├── 2012-04-01.csv │ │ ├── 2012-05-01.csv │ │ ├── 2012-06-01.csv │ │ ├── 2012-07-01.csv │ │ ├── 2012-08-01.csv │ │ ├── 2012-09-01.csv │ │ ├── 2012-10-01.csv │ │ ├── 2012-11-01.csv │ │ ├── 2012-12-01.csv │ │ ├── 2013-01-01.csv │ │ ├── 2013-02-01.csv │ │ ├── 2013-03-01.csv │ │ ├── 2013-04-01.csv │ │ ├── 2013-05-01.csv │ │ ├── 2013-06-01.csv │ │ ├── 2013-07-01.csv │ │ ├── 2013-08-01.csv │ │ ├── 2013-09-01.csv │ │ ├── 2013-10-01.csv │ │ ├── 2013-11-01.csv │ │ ├── 2013-12-01.csv │ │ └── 2014-01-01.csv │ ├── Models │ └── model.rds │ ├── ScoreData.R │ ├── UseRegularExpressionToFilterBadEmailAddresses.R │ └── ch01_R.pbix ├── Chapter05 ├── Chapter03_README.md ├── Data │ ├── AdventureWorksDW_StarSchema.zip │ ├── CreateLoadHistoryLog.sql │ └── SQL_to_Populate_database.sql ├── Python │ ├── ReadLog_DimDate.py │ ├── ReadLog_DimProduct.py │ ├── ReadLog_DimPromotion.py │ ├── ReadLog_DimSalesTerritory.py │ ├── ReadLog_FactInternetSales.py │ └── ch03_Python.pbix └── R │ ├── ReadLog_DimDate.R │ ├── ReadLog_DimProduct.R │ ├── ReadLog_DimPromotion.R │ ├── ReadLog_DimSalesTerritory.R │ ├── ReadLog_FactInternetSales.R │ └── ch03_R.pbix ├── Chapter06 ├── Python │ ├── GetVariableInfo.py │ ├── PythonScript.py │ └── ch04_Python.pbix ├── R │ ├── CodeSnippets.R │ ├── GetData.R │ ├── RScript.R │ ├── RScript_Old.R │ ├── RandomSnippets.R │ ├── ch04_R.pbix │ ├── population_rawdata.csv │ ├── race.csv │ └── v17.csv └── README.md ├── Chapter07 ├── Data │ ├── Comments.csv │ ├── DimEmployee.csv │ ├── Production_Grouped.csv │ └── yelp_training_set_review_sample.csv ├── Python │ ├── CleanComments.py │ ├── CountWords.py │ ├── ScrubNameScript.py │ └── monitoredProducts.py └── R │ ├── CleanComments.R │ ├── CountWordAndSentences.R │ ├── ScrubNameScript.R │ └── monitoredProducts.R ├── Chapter08 ├── Data │ └── EmployeeList.csv ├── Python │ ├── DistanceCalculation_Custom.py │ ├── DistanceCalculation_HaversineFunction.py │ └── GeocodeAddress.py └── R │ ├── DistanceCalculation_Custom.R │ ├── DistanceCalculation_distHaversine.R │ └── GeocodeAddresses.R ├── Chapter09 ├── Data │ ├── BostonHousingInfo.csv │ ├── SentimentAndToneAnalysisData.csv │ └── yelp_sample_goes_hear.txt ├── Models │ ├── model_Python.pkl │ └── model_R.rds ├── Python │ ├── GetYelpReviewSample.py │ ├── ScoreData.py │ ├── Sentiment.py │ └── Tone.py └── R │ └── ScoreData.R ├── Chapter10 ├── Data │ ├── Delete.txt │ └── GetYelpReviewSample.py ├── Databases │ ├── BostonHousingData.bak │ ├── CalculateDistance.bak │ └── Delete.txt ├── Python │ ├── AddModelToDatabase.py │ ├── Models │ │ └── model.pkl │ ├── calcDistance_Python.sql │ ├── getPythonLibraries.sql │ ├── getSentiments_Python.sql │ └── uspPredictHousePrices_Python.sql └── R │ ├── AddModelStoredProcedure.sql │ ├── AddModelToDatabase.R │ ├── Models │ └── delete.txt │ ├── calcDistinace_R.sql │ ├── getRPackages.sql │ ├── getSentiment_R.sql │ └── uspPredictHousePrices_R.sql ├── Contributing.md ├── LICENSE.txt ├── ReadMe.md └── _Introduction └── pbi.yml /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /9781484258286.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apress/adv-analytics-in-power-bi-w-r-and-python/1047beaa566a4aa7866fa41fd51dcc5e151b4d58/9781484258286.jpg -------------------------------------------------------------------------------- /Chapter01/Listing1-11.R: -------------------------------------------------------------------------------- 1 | ggplot(plot.data, aes(carat, price)) + 2 | geom_point(aes(color = clarity)) + 3 | labs( 4 | title = "Chart Title", 5 | subtitle = "Chart Subtitle", 6 | caption = "Chart Caption" 7 | ) + 8 | scale_x_continuous(name = "Size in Carats") + 9 | scale_y_continuous( 10 | name = "Price ($)", 11 | labels = dollar_format() 12 | ) + 13 | theme_minimal() + 14 | theme( 15 | plot.title = element_text(hjust = 0.5), 16 | plot.subtitle = element_text(hjust = 0.5), 17 | plot.caption = element_text(hjust = 1) 18 | ) -------------------------------------------------------------------------------- /Chapter01/Listing1-13.R: -------------------------------------------------------------------------------- 1 | library(tidyverse) 2 | library(scales) 3 | plot.data <- 4 | diamonds %>% 5 | filter(color == "D") %>% 6 | sample_n(200) 7 | 8 | ggplot(plot.data, aes(carat, price)) + 9 | geom_point(aes(color = "blue")) + 10 | labs( 11 | title = "Chart Title", 12 | subtitle = "Chart Subtitle", 13 | caption = "Chart Caption" 14 | ) + 15 | scale_x_continuous(name = "Size in Carats") + 16 | scale_y_continuous( 17 | name = "Price ($)", 18 | labels = dollar_format() 19 | ) + 20 | theme_minimal() + 21 | theme( 22 | plot.title = element_text(hjust = 0.5), 23 | plot.subtitle = element_text(hjust = 0.5), 24 | plot.caption = element_text(hjust = 1) 25 | ) -------------------------------------------------------------------------------- /Chapter01/Listing1-14.R: -------------------------------------------------------------------------------- 1 | library(tidyverse) 2 | library(scales) 3 | 4 | plot.data <- 5 | diamonds %>% 6 | filter(color == "D") %>% 7 | sample_n(200) 8 | 9 | ggplot(plot.data, aes(carat, price)) + 10 | geom_point(color = "blue") + 11 | labs( 12 | title = "Chart Title", 13 | subtitle = "Chart Subtitle", 14 | caption = "Chart Caption" 15 | ) + 16 | scale_x_continuous(name = "Size in Carats") + 17 | scale_y_continuous( 18 | name = "Price ($)", 19 | labels = dollar_format() 20 | ) + 21 | theme_minimal() + 22 | theme( 23 | plot.title = element_text(hjust = 0.5), 24 | plot.subtitle = element_text(hjust = 0.5), 25 | plot.caption = element_text(hjust = 1) 26 | ) -------------------------------------------------------------------------------- /Chapter01/Listing1-15.R: -------------------------------------------------------------------------------- 1 | library(tidyverse) 2 | 3 | plot.data <- 4 | data.frame(Titanic) %>% 5 | group_by(Class) %>% 6 | summarize(`Total Freq` = sum(Freq)) 7 | 8 | ggplot(plot.data, aes(x = Class, y = `Total Freq`)) + 9 | geom_bar(stat = "identity") + 10 | labs(title = "geom_bar") + 11 | theme_minimal() -------------------------------------------------------------------------------- /Chapter01/Listing1-16.R: -------------------------------------------------------------------------------- 1 | library(tidyverse) 2 | set.seed(50) 3 | probs <- c(0.0033, 0.0033, 0.0033, 0.0033, 0.0033, 0.0033, 0.0033, 0.0033, 4 | 0.0033, 0.0033, 0.0033, 0.0033, 0.0033, 0.0033, 0.0033, 0.01, 0.01, 0.01, 5 | 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.015, 0.015, 0.015, 0.015, 6 | 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.0133, 0.0133, 0.0133, 0.0133, 7 | 0.0133, 0.0133, 0.0133, 0.0133, 0.0133, 0.0133, 0.0133, 0.0133, 0.0133, 8 | 0.0133, 0.0133, 0.0133, 0.0133, 0.0133, 0.0133, 0.0133, 0.0133, 0.0133, 9 | 0.0133, 0.0133, 0.0133, 0.0133, 0.0133, 0.0133, 0.0133, 0.0133, 0.017, 10 | 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.01, 0.01, 11 | 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.0033, 0.0033, 0.0033, 12 | 0.0033, 0.0033, 0.0033, 0.0033, 0.0033, 0.0033, 0.0033, 0.0033, 0.0033, 13 | 0.0033, 0.0033, 0.0033) 14 | weights <- 265:364 15 | names <- c("Liam Galles", "Noah Raymond", "William Hammontree", 16 | "James Zaremba", "Oliver Zurcher", "Benjamin Hilker", "Elijah Loken", 17 | "Lucas Lewter", "Mason Straus", "Logan Work", "Alexander Jarret", 18 | "Ethan Wey", "Jacob Adolphsen", "Michael Solt", "Daniel Welcome", 19 | "Henry Portman", "Jackson Tichenor", "Sebastian Free", "Aiden Papp", 20 | "Matthew Lenzi", "Samuel Rinaldo", "David Goines", "Joseph Asuncion", 21 | "Carter Philhower", "Owen Freeborn", "Wyatt Ice", 22 | "John Mcguckin", "Jack Soden", "Luke Humfeld", "Jayden Natera", 23 | "Dylan Galles", "Grayson Raymond", "Levi Hammontree", "Isaac Zaremba", 24 | "Gabriel Zurcher", "Julian Hilker", "Mateo Loken", "Anthony Lewter", 25 | "Jaxon Straus", "Lincoln Work", "Joshua Jarret", "Christopher Wey", 26 | "Andrew Adolphsen", "Theodore Solt", "Caleb Welcome", 27 | "Ryan Portman", "Asher Tichenor", "Nathan Free", "Thomas Papp", 28 | "Leo Lenzi", "Isaiah Rinaldo", "Charles Goines", "Josiah Asuncion", 29 | "Hudson Philhower", "Christian Freeborn", "Hunter Ice", 30 | "Connor Mcguckin", "Eli Soden", "Ezra Humfeld", "Aaron Natera", 31 | "Landon Galles", "Adrian Raymond", "Jonathan Hammontree", 32 | "Nolan Zaremba", "Jeremiah Zurcher", "Easton Hilker", "Elias Loken", 33 | "Colton Lewter", "Cameron Straus", "Carson Work", "Robert Jarret", 34 | "Angel Wey", "Maverick Adolphsen", "Nicholas Solt", "Dominic Welcome", 35 | "Jaxson Portman", "Greyson Tichenor", "Adam Free", "Ian Papp", 36 | "Austin Lenzi", "Santiago Rinaldo", "Jordan Goines", "Cooper Asuncion", 37 | "Brayden Philhower", "Roman Freeborn", "Evan Ice", "Ezekiel Mcguckin", 38 | "Xavier Soden", "Jose Humfeld", "Jace Natera", "Jameson Adolphsen", 39 | "Leonardo Solt", "Bryson Welcome", "Axel Portman", "Everett Tichenor", 40 | "Parker Free", "Kayden Papp", "Miles Lenzi", "Sawyer Rinaldo", "Jason Goines" 41 | ) 42 | 43 | linemen_weights <- sample(weights, 100, replace = TRUE, prob= probs) 44 | plot.data <- data.frame(names, linemen_weights) 45 | 46 | ggplot(plot.data, aes(linemen_weights)) + 47 | geom_histogram(binwidth = 20) + 48 | labs(title = "geom_histogram") + 49 | theme_minimal() -------------------------------------------------------------------------------- /Chapter01/Listing1-17.R: -------------------------------------------------------------------------------- 1 | library(tidyverse) 2 | library(lubridate) 3 | 4 | players <- c("Kobe Bryant", "Pau Gasol", "Lamar Odom") 5 | plot.data <- lakers %>% 6 | filter(result == "made" & team == "LAL" & player %in% players) %>% 7 | mutate(date = ymd(date)) %>% 8 | group_by(player, date) %>% 9 | summarize(points = sum(points)) 10 | 11 | ggplot(plot.data, aes(x=date,y=points,color=player)) + 12 | geom_line() + 13 | labs(title = "geom_line") + 14 | theme_minimal() -------------------------------------------------------------------------------- /Chapter01/Listing1-18.R: -------------------------------------------------------------------------------- 1 | library(tidyverse) 2 | 3 | set.seed(1) 4 | 5 | plot.data <- 6 | diamonds %>% 7 | filter(color == "D") %>% 8 | sample_n(200) 9 | 10 | ggplot(plot.data, aes(x=carat, y=price)) + 11 | geom_point() + 12 | geom_smooth(method ="lm") + 13 | labs(title = "geom_point & geom_smooth") + 14 | theme_minimal() -------------------------------------------------------------------------------- /Chapter01/VisualCodeTemplate.R: -------------------------------------------------------------------------------- 1 | if (<"data test">) { 2 | 3 | #<"Code for R Visual"> 4 | 5 | } else { 6 | 7 | plot.new() 8 | title(main = "") 9 | 10 | } -------------------------------------------------------------------------------- /Chapter02/BubbleChart/BubbleChartCode.R: -------------------------------------------------------------------------------- 1 | # Input load. Please do not change # 2 | setwd("C:/Users/ryanw/OneDrive - Diesel Analytics/Professional/Clients/Apress/Book/Part2_ShapingData/Chapter09/BubbleChart") 3 | `dataset` = read.csv('input.csv', check.names = FALSE, encoding = "UTF-8", blank.lines.skip = FALSE); 4 | # Original Script. Please update your script content here and once completed copy below section back to the original editing window # 5 | # The following code to create a dataframe and remove duplicated rows is always executed and acts as a preamble for your script: 6 | 7 | # dataset <- data.frame(ID, Year, FN, LN, Weight, Height, Rusing Yards, Division, Conference) 8 | # dataset <- unique(dataset) 9 | 10 | # Paste or type your script code here: 11 | library(tidyverse) 12 | library(ggrepel) 13 | library(ggthemes) 14 | 15 | currentColumns <- sort(colnames(dataset)) 16 | requiredColumns <- c("Conference", "Division", "FN", "Height", "ID", "LN", "Rusing Yards", "Weight", "Year") 17 | columnTest <- isTRUE(all.equal(currentColumns, requiredColumns)) 18 | reportYear <- unique(dataset$Year) 19 | 20 | if(length(reportYear) == 1 & columnTest) { 21 | 22 | chartData <- 23 | dataset %>% 24 | mutate(Name = paste0(substring(FN,1,1),". ", LN)) 25 | 26 | divisonColors <- c("East"="#56B4E9", "West"="#33FAFF", "North"="#F0E442", "South"="#8B8D8D") 27 | conferenceColors <- c("USL"="#000000", "AL"="#FC4E07") 28 | chartTitle <- paste("Runningback Quad Chart for", reportYear, sep = " ") 29 | 30 | p <- ggplot( 31 | chartData, 32 | aes( 33 | x = Weight, y = Height, size = `Rusing Yards`, 34 | color = Conference, fill = Division, stroke = 2, 35 | label = Name 36 | ) 37 | ) + 38 | geom_point(shape = 21) + 39 | geom_text_repel(size = 5) + 40 | scale_color_manual(values = conferenceColors) + 41 | scale_fill_manual(values = divisonColors) + 42 | ggtitle(chartTitle) + 43 | theme_few() 44 | p 45 | 46 | } else{ 47 | 48 | plot.new() 49 | title("The data supplied did not meet the requirements of the chart.") 50 | 51 | } -------------------------------------------------------------------------------- /Chapter02/BubbleChart/FakeData.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apress/adv-analytics-in-power-bi-w-r-and-python/1047beaa566a4aa7866fa41fd51dcc5e151b4d58/Chapter02/BubbleChart/FakeData.xlsx -------------------------------------------------------------------------------- /Chapter02/BubbleChart/FakeFootballLeagueData.csv: -------------------------------------------------------------------------------- 1 | ID,Year,Key,FN,LN,Weight,Height,Rusing Yards,Division,Conference 2 | 1,2016,Bradford_Pena,Bradford,Pena,197,71,898,East,AL 3 | 2,2016,Adrian_Ellis,Adrian,Ellis,237,74,1375,West,USL 4 | 3,2016,Stuart_Vaughn,Stuart,Vaughn,201,67,1539,East,USL 5 | 4,2016,Bryon_Hardy,Bryon,Hardy,223,75,1645,East,USL 6 | 5,2016,Darius_Curry,Darius,Curry,212,72,1576,West,AL 7 | 6,2016,Clint_Mcdonald,Clint,Mcdonald,225,71,959,West,USL 8 | 7,2016,Johnnie_Mcdaniel,Johnnie,Mcdaniel,203,74,981,East,USL 9 | 8,2016,Archer_Lamble,Archer,Lamble,233,74,1542,West,USL 10 | 9,2016,Wayne_Tucker,Wayne,Tucker,224,68,1260,East,USL 11 | 10,2016,Damian_Beck,Damian,Beck,209,71,636,North,USL 12 | 11,2016,Anthony_Gonzales,Anthony,Gonzales,243,75,1470,West,USL 13 | 12,2016,Hudson_Hollinworth,Hudson,Hollinworth,209,68,642,West,USL 14 | 13,2016,Jed_Hammond,Jed,Hammond,232,74,1681,West,USL 15 | 14,2016,Hudson_Onslow,Hudson,Onslow,188,69,1465,East,AL 16 | 15,2016,Lamont_Walton,Lamont,Walton,191,70,1031,East,USL 17 | 16,2016,Jack_Potter,Jack,Potter,238,72,937,West,USL 18 | 17,2016,Jarod_Norman,Jarod,Norman,212,74,1464,South,USL 19 | 18,2016,Mitchell_Boyd,Mitchell,Boyd,215,73,729,West,USL 20 | 19,2016,Beau_Hunt,Beau,Hunt,230,69,903,North,AL 21 | 20,2016,Casey_Graham,Casey,Graham,194,70,1151,West,AL 22 | 21,2016,Barry_Morales,Barry,Morales,196,73,1483,East,AL 23 | 22,2016,Mike_Maxwell,Mike,Maxwell,214,73,1404,West,AL 24 | 23,2016,Darren_Burns,Darren,Burns,231,72,1063,West,AL 25 | 24,2016,Jai_Lopez,Jai,Lopez,197,72,1220,East,AL 26 | 25,2016,Elliott_Barker,Elliott,Barker,236,73,1258,East,AL 27 | 26,2016,Piper_Garcia,Piper,Garcia,215,73,1432,East,AL 28 | 27,2016,Ricky_Gonzales,Ricky,Gonzales,216,71,1484,East,USL 29 | 28,2016,Willie_Wells,Willie,Wells,213,74,700,West,USL 30 | 29,2016,Danny_Simpson,Danny,Simpson,200,70,912,East,USL 31 | 30,2016,Javier_Malone,Javier,Malone,235,75,1377,East,USL 32 | 31,2016,Russell_Sullivan,Russell,Sullivan,228,73,1079,West,USL 33 | 32,2016,Caleb_Ortiz,Caleb,Ortiz,216,70,1185,West,AL 34 | 33,2017,Archer_Lamble,Archer,Lamble,233,74,1542,West,AL 35 | 34,2017,Wayne_Tucker,Wayne,Tucker,224,68,1260,East,USL 36 | 35,2017,Damian_Beck,Damian,Beck,209,71,636,West,USL 37 | 36,2017,Anthony_Gonzales,Anthony,Gonzales,243,75,1470,East,USL 38 | 37,2017,Hudson_Hollinworth,Hudson,Hollinworth,209,68,642,East,AL 39 | 38,2017,Jed_Hammond,Jed,Hammond,232,74,1681,West,USL 40 | 39,2017,Hudson_Onslow,Hudson,Onslow,188,69,1465,East,AL 41 | 40,2017,Lamont_Walton,Lamont,Walton,191,70,1031,East,USL 42 | 41,2017,Jack_Potter,Jack,Potter,238,72,937,West,USL 43 | 42,2017,Jarod_Norman,Jarod,Norman,212,74,1464,East,AL 44 | 43,2017,Mitchell_Boyd,Mitchell,Boyd,215,73,729,West,USL 45 | 44,2017,Beau_Hunt,Beau,Hunt,230,69,903,West,USL 46 | 45,2017,Casey_Graham,Casey,Graham,194,70,1151,West,USL 47 | 46,2017,Barry_Morales,Barry,Morales,196,73,1483,East,AL 48 | 47,2017,Mike_Maxwell,Mike,Maxwell,214,73,1404,West,AL 49 | 48,2017,Darren_Burns,Darren,Burns,231,72,1063,West,USL 50 | 49,2017,Jai_Lopez,Jai,Lopez,197,72,1220,East,AL 51 | 50,2017,Elliott_Barker,Elliott,Barker,236,73,1258,North,AL 52 | 51,2017,Piper_Garcia,Piper,Garcia,215,73,1432,East,AL 53 | 52,2017,Ricky_Gonzales,Ricky,Gonzales,216,71,1484,East,USL 54 | 53,2017,Willie_Wells,Willie,Wells,213,74,700,West,USL 55 | 54,2017,Danny_Simpson,Danny,Simpson,200,70,912,North,AL 56 | 55,2017,Javier_Malone,Javier,Malone,235,75,1377,East,USL 57 | 56,2017,Russell_Sullivan,Russell,Sullivan,228,73,1079,West,USL 58 | 57,2017,Caleb_Ortiz,Caleb,Ortiz,216,70,1185,West,AL 59 | 58,2017,Ian_Griffin,Ian,Griffin,229,70,1498,West,AL 60 | 59,2017,Taj_Shand,Taj,Shand,203,75,1509,East,USL 61 | 60,2017,Garry_Guzman,Garry,Guzman,215,70,1057,South,AL 62 | 61,2017,Logan_Mendez,Logan,Mendez,198,72,1638,West,USL 63 | 62,2017,Nathanial_Hampton,Nathanial,Hampton,222,73,1348,East,USL 64 | 63,2017,Carl_Myers,Carl,Myers,231,72,713,West,USL 65 | 64,2017,Johnathon_Moody,Johnathon,Moody,235,74,888,West,USL 66 | 65,2018,Jed_Hammond,Jed,Hammond,232,74,1681,North,USL 67 | 66,2018,Hudson_Onslow,Hudson,Onslow,188,69,1465,East,AL 68 | 67,2018,Lamont_Walton,Lamont,Walton,191,70,1031,East,USL 69 | 68,2018,Jack_Potter,Jack,Potter,238,72,937,South,USL 70 | 69,2018,Jarod_Norman,Jarod,Norman,212,74,1464,East,USL 71 | 70,2018,Mitchell_Boyd,Mitchell,Boyd,215,73,729,East,USL 72 | 71,2018,Beau_Hunt,Beau,Hunt,230,69,903,West,USL 73 | 72,2018,Casey_Graham,Casey,Graham,194,70,1151,North,AL 74 | 73,2018,Barry_Morales,Barry,Morales,196,73,1483,East,AL 75 | 74,2018,Mike_Maxwell,Mike,Maxwell,214,73,1404,West,AL 76 | 75,2018,Darren_Burns,Darren,Burns,231,72,1063,West,AL 77 | 76,2018,Jai_Lopez,Jai,Lopez,197,72,1220,East,AL 78 | 77,2018,Elliott_Barker,Elliott,Barker,236,73,1258,East,AL 79 | 78,2018,Piper_Garcia,Piper,Garcia,215,73,1432,East,AL 80 | 79,2018,Ricky_Gonzales,Ricky,Gonzales,216,71,1484,East,USL 81 | 80,2018,Willie_Wells,Willie,Wells,213,74,700,West,USL 82 | 81,2018,Danny_Simpson,Danny,Simpson,200,70,912,East,USL 83 | 82,2018,Javier_Malone,Javier,Malone,235,75,1377,East,USL 84 | 83,2018,Russell_Sullivan,Russell,Sullivan,228,73,1079,West,USL 85 | 84,2018,Caleb_Ortiz,Caleb,Ortiz,216,70,1185,West,USL 86 | 85,2018,Ian_Griffin,Ian,Griffin,229,70,1498,West,USL 87 | 86,2018,Taj_Shand,Taj,Shand,203,75,1509,East,USL 88 | 87,2018,Garry_Guzman,Garry,Guzman,215,70,1057,East,AL 89 | 88,2018,Logan_Mendez,Logan,Mendez,198,72,1638,East,USL 90 | 89,2018,Nathanial_Hampton,Nathanial,Hampton,222,73,1348,East,AL 91 | 90,2018,Carl_Myers,Carl,Myers,231,72,713,West,AL 92 | 91,2018,Johnathon_Moody,Johnathon,Moody,235,74,888,West,USL 93 | 92,2018,Brett_Long,Brett,Long,196,69,1127,East,AL 94 | 93,2018,Solomon_Joseph,Solomon,Joseph,209,75,832,East,AL 95 | 94,2018,Gabriel_West,Gabriel,West,218,67,978,West,USL 96 | 95,2018,Curtis_Powell,Curtis,Powell,191,71,1589,East,USL 97 | 96,2018,Francisco_Conner,Francisco,Conner,197,70,1303,West,AL 98 | 97,2019,Mike_Maxwell,Mike,Maxwell,214,73,1404,West,AL 99 | 98,2019,Darren_Burns,Darren,Burns,231,72,1063,West,AL 100 | 99,2019,Jai_Lopez,Jai,Lopez,197,72,1220,East,AL 101 | 100,2019,Elliott_Barker,Elliott,Barker,236,73,1258,East,AL 102 | 101,2019,Piper_Garcia,Piper,Garcia,215,73,1432,East,AL 103 | 102,2019,Ricky_Gonzales,Ricky,Gonzales,216,71,1484,East,USL 104 | 103,2019,Willie_Wells,Willie,Wells,213,74,700,West,USL 105 | 104,2019,Danny_Simpson,Danny,Simpson,200,70,912,East,USL 106 | 105,2019,Javier_Malone,Javier,Malone,235,75,1377,North,USL 107 | 106,2019,Russell_Sullivan,Russell,Sullivan,228,73,1079,West,USL 108 | 107,2019,Caleb_Ortiz,Caleb,Ortiz,216,70,1185,West,AL 109 | 108,2019,Ian_Griffin,Ian,Griffin,229,70,1498,West,AL 110 | 109,2019,Taj_Shand,Taj,Shand,203,75,1509,East,AL 111 | 110,2019,Garry_Guzman,Garry,Guzman,215,70,1057,East,USL 112 | 111,2019,Logan_Mendez,Logan,Mendez,198,72,1638,East,USL 113 | 112,2019,Nathanial_Hampton,Nathanial,Hampton,222,73,1348,East,AL 114 | 113,2019,Carl_Myers,Carl,Myers,231,72,713,West,USL 115 | 114,2019,Johnathon_Moody,Johnathon,Moody,235,74,888,West,USL 116 | 115,2019,Brett_Long,Brett,Long,196,69,1127,East,AL 117 | 116,2019,Solomon_Joseph,Solomon,Joseph,209,75,832,East,USL 118 | 117,2019,Gabriel_West,Gabriel,West,218,67,978,West,USL 119 | 118,2019,Curtis_Powell,Curtis,Powell,191,71,1589,East,USL 120 | 119,2019,Francisco_Conner,Francisco,Conner,197,70,1303,North,AL 121 | 120,2019,Cecil_Love,Cecil,Love,218,69,1578,West,USL 122 | 121,2019,Kristopher_Woods,Kristopher,Woods,208,74,1050,South,USL 123 | 122,2019,Jonathon_Alexander,Jonathon,Alexander,194,70,1439,South,AL 124 | 123,2019,Bobby_Jordan,Bobby,Jordan,220,73,1007,South,USL 125 | 124,2019,Lorenzo_Blake,Lorenzo,Blake,187,68,1493,South,USL 126 | 125,2019,Philip_Washington,Philip,Washington,219,67,770,South,AL 127 | 126,2019,Henry_Forlonge,Henry,Forlonge,236,75,1558,South,AL 128 | 127,2019,Jon_Simmons,Jon,Simmons,212,67,1112,South,USL 129 | 128,2019,Oliver_Patton,Oliver,Patton,220,74,1260,South,AL 130 | -------------------------------------------------------------------------------- /Chapter02/BubbleChart/PBI_FakeFootballLeague.pbix: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apress/adv-analytics-in-power-bi-w-r-and-python/1047beaa566a4aa7866fa41fd51dcc5e151b4d58/Chapter02/BubbleChart/PBI_FakeFootballLeague.pbix -------------------------------------------------------------------------------- /Chapter02/BubbleChart/input.csv: -------------------------------------------------------------------------------- 1 | ID,Year,FN,LN,Weight,Height,Rusing Yards,Division,Conference 2 | 1,2016,Bradford,Pena,197,71,898,East,AL 3 | 2,2016,Adrian,Ellis,237,74,1375,West,USL 4 | 3,2016,Stuart,Vaughn,201,67,1539,East,USL 5 | 4,2016,Bryon,Hardy,223,75,1645,East,USL 6 | 5,2016,Darius,Curry,212,72,1576,West,AL 7 | 6,2016,Clint,Mcdonald,225,71,959,West,USL 8 | 7,2016,Johnnie,Mcdaniel,203,74,981,East,USL 9 | 8,2016,Archer,Lamble,233,74,1542,West,USL 10 | 9,2016,Wayne,Tucker,224,68,1260,East,USL 11 | 10,2016,Damian,Beck,209,71,636,North,USL 12 | 11,2016,Anthony,Gonzales,243,75,1470,West,USL 13 | 12,2016,Hudson,Hollinworth,209,68,642,West,USL 14 | 13,2016,Jed,Hammond,232,74,1681,West,USL 15 | 14,2016,Hudson,Onslow,188,69,1465,East,AL 16 | 15,2016,Lamont,Walton,191,70,1031,East,USL 17 | 16,2016,Jack,Potter,238,72,937,West,USL 18 | 17,2016,Jarod,Norman,212,74,1464,South,USL 19 | 18,2016,Mitchell,Boyd,215,73,729,West,USL 20 | 19,2016,Beau,Hunt,230,69,903,North,AL 21 | 20,2016,Casey,Graham,194,70,1151,West,AL 22 | 21,2016,Barry,Morales,196,73,1483,East,AL 23 | 22,2016,Mike,Maxwell,214,73,1404,West,AL 24 | 23,2016,Darren,Burns,231,72,1063,West,AL 25 | 24,2016,Jai,Lopez,197,72,1220,East,AL 26 | 25,2016,Elliott,Barker,236,73,1258,East,AL 27 | 26,2016,Piper,Garcia,215,73,1432,East,AL 28 | 27,2016,Ricky,Gonzales,216,71,1484,East,USL 29 | 28,2016,Willie,Wells,213,74,700,West,USL 30 | 29,2016,Danny,Simpson,200,70,912,East,USL 31 | 30,2016,Javier,Malone,235,75,1377,East,USL 32 | 31,2016,Russell,Sullivan,228,73,1079,West,USL 33 | 32,2016,Caleb,Ortiz,216,70,1185,West,AL 34 | -------------------------------------------------------------------------------- /Chapter02/Callout/CalloutChart.pbix: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apress/adv-analytics-in-power-bi-w-r-and-python/1047beaa566a4aa7866fa41fd51dcc5e151b4d58/Chapter02/Callout/CalloutChart.pbix -------------------------------------------------------------------------------- /Chapter02/Callout/CalloutChartCode.R: -------------------------------------------------------------------------------- 1 | # Input load. Please do not change # 2 | setwd("") 3 | `dataset` = read.csv('input.csv', check.names = FALSE, encoding = "UTF-8", blank.lines.skip = FALSE); 4 | # Original Script. Please update your script content here and once completed copy below section back to the original editing window # 5 | # The following code to create a dataframe and remove duplicated rows is always executed and acts as a preamble for your script: 6 | 7 | # dataset <- data.frame(Country Name, Percent Education Expenditure Ranking) 8 | # dataset <- unique(dataset) 9 | 10 | # Paste or type your script code here: 11 | library(tidyverse) 12 | library(scales) 13 | library(ggthemes) 14 | 15 | currentColumns <- sort(colnames(dataset)) 16 | requiredColumns <- c("Country", "Total Expend %", "Year") 17 | columnTest <- isTRUE(all.equal(currentColumns, requiredColumns)) 18 | reportYear <- unique(dataset$Year) 19 | 20 | if (length(reportYear) == 1 & columnTest) { 21 | 22 | plotdata <- 23 | dataset %>% 24 | mutate( 25 | rank = dense_rank(desc(`Total Expend %`)), 26 | `Country` = fct_reorder(`Country`, rank, .desc = TRUE), 27 | callout = ifelse(rank == 1, TRUE, FALSE) 28 | ) %>% 29 | filter(rank <= 7) 30 | 31 | countryToAnnotate <- plotdata$`Country`[plotdata$rank == 1] 32 | minExpenditure <- min(plotdata$`Total Expend %`) 33 | maxExpenditure <- max(plotdata$`Total Expend %`) 34 | minCountry <- 35 | plotdata$`Country`[ 36 | which(plotdata$`Total Expend %` == minExpenditure)] 37 | minCountry <- paste(minCountry, collapse = " & ") 38 | maxCountry <- 39 | plotdata$`Country`[ 40 | which(plotdata$`Total Expend %` == maxExpenditure)] 41 | maxCountry <- paste(maxCountry, collapse = " & ") 42 | mainTitle = "% of Government Expenditure on Education" 43 | subTitle = 44 | paste( 45 | "The top 7 ranked countries in", 46 | reportYear, 47 | sep = " ") 48 | caption = "Source: https://databank.worldbank.org/source/world-development-indicators" 49 | 50 | label_val <- 51 | str_wrap( 52 | paste(maxCountry, "spent", 53 | percent((maxExpenditure/minExpenditure-1)), 54 | "more than", minCountry, sep = " "), 55 | width = 25 56 | ) 57 | 58 | p <- 59 | ggplot( 60 | data = plotdata, 61 | aes(x = `Country`, y = `Total Expend %`, fill = callout, 62 | label = percent(`Total Expend %`)) 63 | ) + 64 | geom_bar(stat="identity", aes(fill = callout)) + 65 | geom_text(nudge_y = -0.05) + 66 | scale_y_continuous( 67 | limits = c(0,0.75), labels = NULL, breaks = NULL) + 68 | coord_flip() + 69 | annotate( 70 | "text", 71 | label = label_val, 72 | x = countryToAnnotate[1], 73 | y = maxExpenditure + 0.1) + 74 | labs( 75 | title = mainTitle, 76 | subtitle = subTitle, 77 | caption = caption 78 | ) + 79 | xlab(label = NULL) + 80 | ylab(label = NULL) + 81 | guides(fill=FALSE) + 82 | theme_few() + 83 | theme( 84 | plot.title = element_text(hjust = 0.5), 85 | plot.subtitle = element_text(hjust = 0.5)) 86 | p 87 | 88 | } else { 89 | plot.new() 90 | title("The data supplied did not meet the requirements of the chart.") 91 | } 92 | -------------------------------------------------------------------------------- /Chapter02/Callout/input.csv: -------------------------------------------------------------------------------- 1 | Country,Total Expend %,Year 2 | Afghanistan,0.141,2013 3 | Albania,0.1212,2013 4 | Andorra,0.0804,2013 5 | Argentina,0.1446,2013 6 | Armenia,0.1114,2013 7 | Aruba,0.2187,2013 8 | Australia,0.1404,2013 9 | Austria,0.1074,2013 10 | Azerbaijan,0.0646,2013 11 | Bahrain,0.0722,2013 12 | Bangladesh,0.1382,2013 13 | Barbados,0.1484,2013 14 | Belarus,0.1227,2013 15 | Belgium,0.1189,2013 16 | Belize,0.2224,2013 17 | Benin,0.2234,2013 18 | Bhutan,0.1617,2013 19 | Bolivia,0.1628,2013 20 | Brazil,0.1559,2013 21 | Bulgaria,0.1144,2013 22 | Burkina Faso,0.1617,2013 23 | Burundi,0.1724,2013 24 | Cabo Verde,0.1478,2013 25 | Cambodia,0.096,2013 26 | Cameroon,0.1382,2013 27 | Chad,0.1246,2013 28 | Chile,0.1965,2013 29 | Colombia,0.1691,2013 30 | Comoros,0.1607,2013 31 | "Congo, Dem. Rep.",0.1627,2013 32 | Costa Rica,0.2283,2013 33 | Cote d'Ivoire,0.2157,2013 34 | Croatia,0.0949,2013 35 | Cyprus,0.1536,2013 36 | Czech Republic,0.096,2013 37 | Denmark,0.1522,2013 38 | Ecuador,0.1144,2013 39 | El Salvador,0.1556,2013 40 | Estonia,0.1258,2013 41 | Ethiopia,0.2702,2013 42 | Fiji,0.143,2013 43 | Finland,0.1245,2013 44 | France,0.0961,2013 45 | Gabon,0.0874,2013 46 | "Gambia, The",0.1029,2013 47 | Germany,0.1104,2013 48 | Ghana,0.2122,2013 49 | Guatemala,0.2064,2013 50 | Guinea,0.1413,2013 51 | Guinea-Bissau,0.1619,2013 52 | Honduras,0.1986,2013 53 | "Hong Kong SAR, China",0.203,2013 54 | Hungary,0.0853,2013 55 | Iceland,0.1767,2013 56 | India,0.1405,2013 57 | Indonesia,0.176,2013 58 | "Iran, Islamic Rep.",0.2167,2013 59 | Ireland,0.1321,2013 60 | Israel,0.1459,2013 61 | Italy,0.0816,2013 62 | Jamaica,0.2057,2013 63 | Japan,0.0931,2013 64 | Kenya,0.1914,2013 65 | Kyrgyz Republic,0.1777,2013 66 | Lao PDR,0.1281,2013 67 | Latvia,0.1874,2013 68 | Lebanon,0.0858,2013 69 | Liberia,0.0467,2013 70 | Lithuania,0.1329,2013 71 | "Macao SAR, China",0.1259,2013 72 | Madagascar,0.1399,2013 73 | Malawi,0.2042,2013 74 | Malaysia,0.1945,2013 75 | Maldives,0.1391,2013 76 | Mali,0.1662,2013 77 | Malta,0.1878,2013 78 | Mauritania,0.1141,2013 79 | Mauritius,0.1479,2013 80 | Mexico,0.1691,2013 81 | Monaco,0.0706,2013 82 | Mongolia,0.1232,2013 83 | Mozambique,0.1898,2013 84 | Nepal,0.1757,2013 85 | Netherlands,0.1209,2013 86 | New Zealand,0.1655,2013 87 | Niger,0.1808,2013 88 | Norway,0.1727,2013 89 | Oman,0.1108,2013 90 | Pakistan,0.1151,2013 91 | Peru,0.1522,2013 92 | Poland,0.116,2013 93 | Portugal,0.1057,2013 94 | Qatar,0.1315,2013 95 | Russian Federation,0.1086,2013 96 | Rwanda,0.1686,2013 97 | Sao Tome and Principe,0.1883,2013 98 | Senegal,0.2574,2013 99 | Seychelles,0.0955,2013 100 | Sierra Leone,0.152,2013 101 | Singapore,0.1996,2013 102 | Slovak Republic,0.0986,2013 103 | Slovenia,0.1,2013 104 | South Africa,0.1917,2013 105 | South Sudan,0.0323,2013 106 | Spain,0.095,2013 107 | Sri Lanka,0.0942,2013 108 | St. Lucia,0.1385,2013 109 | Sweden,0.1514,2013 110 | Switzerland,0.1521,2013 111 | Thailand,0.1913,2013 112 | Timor-Leste,0.0589,2013 113 | Togo,0.1659,2013 114 | Uganda,0.1174,2013 115 | Ukraine,0.1387,2013 116 | United Kingdom,0.1292,2013 117 | United States,0.1339,2013 118 | Uzbekistan,0.2332,2013 119 | Vietnam,0.1852,2013 120 | Zimbabwe,0.2866,2013 121 | -------------------------------------------------------------------------------- /Chapter02/Forecast/Forecast.pbix: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apress/adv-analytics-in-power-bi-w-r-and-python/1047beaa566a4aa7866fa41fd51dcc5e151b4d58/Chapter02/Forecast/Forecast.pbix -------------------------------------------------------------------------------- /Chapter02/Forecast/ForecastCode.R: -------------------------------------------------------------------------------- 1 | # Input load. Please do not change # 2 | setwd("C:/Users/ryanw/OneDrive - Diesel Analytics/Professional/Clients/Apress/Book/Part2_ShapingData/Chapter09/Forecast") 3 | `dataset` = read.csv('input.csv', check.names = FALSE, encoding = "UTF-8", blank.lines.skip = FALSE); 4 | # Original Script. Please update your script content here and once completed copy below section back to the original editing window # 5 | # The following code to create a dataframe and remove duplicated rows is always executed and acts as a preamble for your script: 6 | 7 | # dataset <- data.frame(Year, Quarter, Month, Day, Lamar Jackson) 8 | # dataset <- unique(dataset) 9 | 10 | # Paste or type your script code here: 11 | library(tidyverse) 12 | library(prophet) 13 | library(ggthemes) 14 | library(gridExtra) 15 | library(lubridate) 16 | library(rlang) 17 | 18 | # I needed to load rlang. Look into this package. 19 | 20 | currentColumns <- sort(colnames(dataset)) 21 | requiredColumns <- c("Date", "Page Views", "Page Views (Log10)", "Quarterback") 22 | columnTest <- isTRUE(all.equal(currentColumns, requiredColumns)) 23 | 24 | qb <- unique(dataset$Quarterback) 25 | 26 | if (length(qb) == 1 & columnTest) { 27 | 28 | chartTitle <- paste0(qb, "'s Wikipedia page views forecast analysis") 29 | 30 | dfPageViews <- 31 | dataset %>% 32 | mutate(Date = ymd(Date)) %>% 33 | rename(ds = Date, y = `Page Views (Log10)`) 34 | 35 | m <- prophet(dfPageViews, yearly.seasonality=TRUE) 36 | 37 | future <- make_future_dataframe(m, periods = 365) 38 | 39 | forecast <- predict(m, future) 40 | 41 | p1 <- plot(m, forecast) + ggtitle(chartTitle) + theme_few() 42 | p <- prophet_plot_components(m, forecast) 43 | 44 | p2 <- p[[1]] + theme_few() 45 | p3 <- p[[2]] + theme_few() 46 | p4 <- p[[3]] + theme_few() 47 | p5 <- grid.arrange(p1, p2, p3, p4, nrow =4) 48 | p5 49 | 50 | } else { 51 | 52 | plot.new() 53 | title("The data supplied did not meet the requirements of the chart.") 54 | 55 | } 56 | 57 | -------------------------------------------------------------------------------- /Chapter02/LineChartWithShade/GDP.csv: -------------------------------------------------------------------------------- 1 | Year,GDP 2 | 1947,259.745 3 | 1948,280.366 4 | 1949,270.627 5 | 1950,319.945 6 | 1951,356.178 7 | 1952,380.812 8 | 1953,385.97 9 | 1954,399.734 10 | 1955,437.092 11 | 1956,460.463 12 | 1957,474.864 13 | 1958,499.555 14 | 1959,528.6 15 | 1960,540.197 16 | 1961,580.612 17 | 1962,612.28 18 | 1963,653.938 19 | 1964,697.319 20 | 1965,771.857 21 | 1966,833.302 22 | 1967,881.439 23 | 1968,968.03 24 | 1969,1038.147 25 | 1970,1088.608 26 | 1971,1190.297 27 | 1972,1328.904 28 | 1973,1476.289 29 | 1974,1599.679 30 | 1975,1761.831 31 | 1976,1934.273 32 | 1977,2164.27 33 | 1978,2476.949 34 | 1979,2723.883 35 | 1980,2985.557 36 | 1981,3280.818 37 | 1982,3402.561 38 | 1983,3794.706 39 | 1984,4148.551 40 | 1985,4444.094 41 | 1986,4657.627 42 | 1987,5007.994 43 | 1988,5399.509 44 | 1989,5747.237 45 | 1990,6004.733 46 | 1991,6264.54 47 | 1992,6680.803 48 | 1993,7013.738 49 | 1994,7455.288 50 | 1995,7772.586 51 | 1996,8259.771 52 | 1997,8765.907 53 | 1998,9293.991 54 | 1999,9899.378 55 | 2000,10439.025 56 | 2001,10660.294 57 | 2002,11071.463 58 | 2003,11769.275 59 | 2004,12522.425 60 | 2005,13332.316 61 | 2006,14037.228 62 | 2007,14681.501 63 | 2008,14559.543 64 | 2009,14628.021 65 | 2010,15240.843 66 | 2011,15796.46 67 | 2012,16358.863 68 | 2013,17083.137 69 | 2014,17849.912 70 | 2015,18350.825 71 | 2016,18991.883 72 | 2017,19918.91 73 | 2018,20897.804 74 | -------------------------------------------------------------------------------- /Chapter02/LineChartWithShade/LineChartWithShadePBI - Copy.pbix: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apress/adv-analytics-in-power-bi-w-r-and-python/1047beaa566a4aa7866fa41fd51dcc5e151b4d58/Chapter02/LineChartWithShade/LineChartWithShadePBI - Copy.pbix -------------------------------------------------------------------------------- /Chapter02/LineChartWithShade/LineChartWithShadePBI.pbix: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apress/adv-analytics-in-power-bi-w-r-and-python/1047beaa566a4aa7866fa41fd51dcc5e151b4d58/Chapter02/LineChartWithShade/LineChartWithShadePBI.pbix -------------------------------------------------------------------------------- /Chapter02/LineChartWithShade/LineChartWithShadePBICode.R: -------------------------------------------------------------------------------- 1 | # Input load. Please do not change # 2 | setwd("C:/Users/ryanw/OneDrive - Diesel Analytics/Professional/Clients/Apress/Book/Part2_ShapingData/Chapter09/LineChartWithShade") 3 | `dataset` = read.csv('input.csv', check.names = FALSE, encoding = "UTF-8", blank.lines.skip = FALSE); 4 | # Original Script. Please update your script content here and once completed copy below section back to the original editing window # 5 | # The following code to create a dataframe and remove duplicated rows is always executed and acts as a preamble for your script: 6 | 7 | # dataset <- data.frame(DATE, GetPoliticalLevel, GetPoliticalLevelName) 8 | # dataset <- unique(dataset) 9 | 10 | # Paste or type your script code here: 11 | library(tidyverse) 12 | library(ggthemes) 13 | library(scales) 14 | 15 | currentColumns <- sort(colnames(dataset)) 16 | requiredColumns <- c("GetGDPStat", "GetGDPStatName", "GetPoliticalLevel", "GetPoliticalLevelName", "Year") 17 | columnTest <- isTRUE(all.equal(currentColumns, requiredColumns)) 18 | 19 | politicalLevelName <- unique(dataset$GetPoliticalLevelName) 20 | gdpStatName <- unique(dataset$GetGDPStatName) 21 | 22 | if(length(politicalLevelName) == 1 & length(gdpStatName) == 1 & columnTest) { 23 | dfPI <- dataset 24 | 25 | #Variables for dynamic portions of the chart 26 | politicalLevelName <- unique(dfPI$GetPoliticalLevelName) 27 | gdpStatName <- unique(dfPI$GetGDPStatName) 28 | yAxisName <- paste(gdpStatName, ifelse(gdpStatName == "Actual GDP","(in Billions)",""), sep = " ") 29 | chartTitle <- paste(gdpStatName, "Analysis", sep = " ") 30 | chartSubtitle <- paste(politicalLevelName, "view", sep = " ") 31 | 32 | #Get the nuber of 33 | dfPI$GetPoliticalLevel <- as.character(dfPI$GetPoliticalLevel) 34 | runs <- rle(dfPI$GetPoliticalLevel) 35 | group_id <- rep(seq_along(runs$lengths), runs$lengths) 36 | 37 | #Create the data set for shade layer 38 | dfShadeInfo <- 39 | cbind(dfPI, group_id) %>% 40 | transmute(group_id, Year, Party = GetPoliticalLevel) %>% 41 | group_by(group_id, Party) %>% 42 | summarize(start = min(Year), end = max(Year)+0.99) %>% 43 | ungroup() 44 | 45 | #Create the data set for line chart layer 46 | dfLineChartInfo <- 47 | dfPI %>% 48 | transmute(Year, GetGDPStat) 49 | 50 | #Define the shade colors 51 | partyColors = c("Republican"="red", "Democrat"="blue", "Tie" = "white") 52 | 53 | #Create the visualization 54 | p <- ggplot() + 55 | geom_rect( 56 | data = dfShadeInfo, 57 | aes(xmin = start, xmax = end, fill = Party), 58 | ymin = -Inf, ymax = Inf, alpha = 0.4, color = NA 59 | ) + 60 | geom_line(data = dfLineChartInfo, aes(x = Year, y = GetGDPStat)) + 61 | scale_fill_manual(values=partyColors) + 62 | scale_y_continuous( 63 | labels=ifelse(gdpStatName == "Actual GDP", comma_format(),percent_format()) 64 | ) + 65 | ylab(yAxisName) + 66 | xlab("Year") + 67 | ggtitle(chartTitle, subtitle = chartSubtitle) + 68 | theme_economist() 69 | 70 | p 71 | } else { 72 | 73 | plot.new() 74 | title("The data supplied did not meet the requirements of the chart.") 75 | 76 | } -------------------------------------------------------------------------------- /Chapter02/LineChartWithShade/PoliticalInfoWithGDP.csv: -------------------------------------------------------------------------------- 1 | Index,Year,GDP,% GDP Change,Senate Majority,House Majority,President's Party,Overall Majority 2 | 0,1947,259.745,,Republican,Republican,Democrat,Republican 3 | 1,1948,280.366,0.079389401,Republican,Republican,Democrat,Republican 4 | 2,1949,270.627,-0.034736737,Republican,Republican,Democrat,Republican 5 | 3,1950,319.945,0.182236067,Democrat,Democrat,Democrat,Democrat 6 | 4,1951,356.178,0.113247589,Democrat,Democrat,Democrat,Democrat 7 | 5,1952,380.812,0.069162048,Democrat,Democrat,Democrat,Democrat 8 | 6,1953,385.97,0.013544741,Democrat,Democrat,Democrat,Democrat 9 | 7,1954,399.734,0.035660803,Republican,Republican,Republican,Republican 10 | 8,1955,437.092,0.093457149,Republican,Republican,Republican,Republican 11 | 9,1956,460.463,0.053469293,Democrat,Democrat,Republican,Democrat 12 | 10,1957,474.864,0.031275043,Democrat,Democrat,Republican,Democrat 13 | 11,1958,499.555,0.05199594,Democrat,Democrat,Republican,Democrat 14 | 12,1959,528.6,0.058141746,Democrat,Democrat,Republican,Democrat 15 | 13,1960,540.197,0.021939084,Democrat,Democrat,Republican,Democrat 16 | 14,1961,580.612,0.074815299,Democrat,Democrat,Republican,Democrat 17 | 15,1962,612.28,0.054542448,Democrat,Democrat,Democrat,Democrat 18 | 16,1963,653.938,0.068037499,Democrat,Democrat,Democrat,Democrat 19 | 17,1964,697.319,0.066338093,Democrat,Democrat,Democrat,Democrat 20 | 18,1965,771.857,0.106892254,Democrat,Democrat,Democrat,Democrat 21 | 19,1966,833.302,0.079606715,Democrat,Democrat,Democrat,Democrat 22 | 20,1967,881.439,0.057766572,Democrat,Democrat,Democrat,Democrat 23 | 21,1968,968.03,0.098238222,Democrat,Democrat,Democrat,Democrat 24 | 22,1969,1038.147,0.072432673,Democrat,Democrat,Democrat,Democrat 25 | 23,1970,1088.608,0.048606797,Democrat,Democrat,Republican,Democrat 26 | 24,1971,1190.297,0.093411954,Democrat,Democrat,Republican,Democrat 27 | 25,1972,1328.904,0.116447408,Democrat,Democrat,Republican,Democrat 28 | 26,1973,1476.289,0.110907184,Democrat,Democrat,Republican,Democrat 29 | 27,1974,1599.679,0.083581196,Democrat,Democrat,Republican,Democrat 30 | 28,1975,1761.831,0.101365336,Democrat,Democrat,Republican,Democrat 31 | 29,1976,1934.273,0.097876584,Democrat,Democrat,Republican,Democrat 32 | 30,1977,2164.27,0.118906173,Democrat,Democrat,Republican,Democrat 33 | 31,1978,2476.949,0.144473194,Democrat,Democrat,Democrat,Democrat 34 | 32,1979,2723.883,0.099692808,Democrat,Democrat,Democrat,Democrat 35 | 33,1980,2985.557,0.096066534,Democrat,Democrat,Democrat,Democrat 36 | 34,1981,3280.818,0.098896454,Democrat,Democrat,Democrat,Democrat 37 | 35,1982,3402.561,0.037107514,Republican,Democrat,Republican,Republican 38 | 36,1983,3794.706,0.115249954,Republican,Democrat,Republican,Republican 39 | 37,1984,4148.551,0.093247013,Republican,Democrat,Republican,Republican 40 | 38,1985,4444.094,0.071240055,Republican,Democrat,Republican,Republican 41 | 39,1986,4657.627,0.048048714,Republican,Democrat,Republican,Republican 42 | 40,1987,5007.994,0.075224358,Republican,Democrat,Republican,Republican 43 | 41,1988,5399.509,0.078178009,Democrat,Democrat,Republican,Democrat 44 | 42,1989,5747.237,0.06439993,Democrat,Democrat,Republican,Democrat 45 | 43,1990,6004.733,0.044803442,Democrat,Democrat,Republican,Democrat 46 | 44,1991,6264.54,0.043267036,Democrat,Democrat,Republican,Democrat 47 | 45,1992,6680.803,0.066447497,Democrat,Democrat,Republican,Democrat 48 | 46,1993,7013.738,0.049834578,Democrat,Democrat,Republican,Democrat 49 | 47,1994,7455.288,0.062955018,Democrat,Democrat,Democrat,Democrat 50 | 48,1995,7772.586,0.042560126,Democrat,Democrat,Democrat,Democrat 51 | 49,1996,8259.771,0.062679911,Republican,Republican,Democrat,Republican 52 | 50,1997,8765.907,0.061277244,Republican,Republican,Democrat,Republican 53 | 51,1998,9293.991,0.060242939,Republican,Republican,Democrat,Republican 54 | 52,1999,9899.378,0.065137464,Republican,Republican,Democrat,Republican 55 | 53,2000,10439.025,0.054513223,Republican,Republican,Democrat,Republican 56 | 54,2001,10660.294,0.021196328,Republican,Republican,Democrat,Republican 57 | 55,2002,11071.463,0.038570137,Tie,Republican,Republican,Republican 58 | 56,2003,11769.275,0.063027985,Tie,Republican,Republican,Republican 59 | 57,2004,12522.425,0.063992897,Republican,Republican,Republican,Republican 60 | 58,2005,13332.316,0.064675253,Republican,Republican,Republican,Republican 61 | 59,2006,14037.228,0.052872434,Republican,Republican,Republican,Republican 62 | 60,2007,14681.501,0.045897452,Republican,Republican,Republican,Republican 63 | 61,2008,14559.543,-0.008306916,Tie,Democrat,Republican,Democrat 64 | 62,2009,14628.021,0.004703307,Tie,Democrat,Republican,Democrat 65 | 63,2010,15240.843,0.041893705,Democrat,Democrat,Democrat,Democrat 66 | 64,2011,15796.46,0.036455792,Democrat,Democrat,Democrat,Democrat 67 | 65,2012,16358.863,0.035603103,Democrat,Republican,Democrat,Democrat 68 | 66,2013,17083.137,0.044274104,Democrat,Republican,Democrat,Democrat 69 | 67,2014,17849.912,0.044884906,Democrat,Republican,Democrat,Democrat 70 | 68,2015,18350.825,0.028062491,Democrat,Republican,Democrat,Democrat 71 | 69,2016,18991.883,0.03493347,Republican,Republican,Democrat,Republican 72 | 70,2017,19918.91,0.048811748,Republican,Republican,Democrat,Republican 73 | 71,2018,20897.804,0.049143954,Republican,Republican,Republican,Republican 74 | -------------------------------------------------------------------------------- /Chapter02/LineChartWithShade/PoliticalInfoWithGDP.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apress/adv-analytics-in-power-bi-w-r-and-python/1047beaa566a4aa7866fa41fd51dcc5e151b4d58/Chapter02/LineChartWithShade/PoliticalInfoWithGDP.xlsx -------------------------------------------------------------------------------- /Chapter02/LineChartWithShade/input.csv: -------------------------------------------------------------------------------- 1 | Year,GetPoliticalLevel,GetPoliticalLevelName,GetGDPStat,GetGDPStatName 2 | 1947,Republican,Overall,259.745,Actual GDP 3 | 1948,Republican,Overall,280.366,Actual GDP 4 | 1949,Republican,Overall,270.627,Actual GDP 5 | 1950,Democrat,Overall,319.945,Actual GDP 6 | 1951,Democrat,Overall,356.178,Actual GDP 7 | 1952,Democrat,Overall,380.812,Actual GDP 8 | 1953,Democrat,Overall,385.97,Actual GDP 9 | 1954,Republican,Overall,399.734,Actual GDP 10 | 1955,Republican,Overall,437.092,Actual GDP 11 | 1956,Democrat,Overall,460.463,Actual GDP 12 | 1957,Democrat,Overall,474.864,Actual GDP 13 | 1958,Democrat,Overall,499.555,Actual GDP 14 | 1959,Democrat,Overall,528.6,Actual GDP 15 | 1960,Democrat,Overall,540.197,Actual GDP 16 | 1961,Democrat,Overall,580.612,Actual GDP 17 | 1962,Democrat,Overall,612.28,Actual GDP 18 | 1963,Democrat,Overall,653.938,Actual GDP 19 | 1964,Democrat,Overall,697.319,Actual GDP 20 | 1965,Democrat,Overall,771.857,Actual GDP 21 | 1966,Democrat,Overall,833.302,Actual GDP 22 | 1967,Democrat,Overall,881.439,Actual GDP 23 | 1968,Democrat,Overall,968.03,Actual GDP 24 | 1969,Democrat,Overall,1038.147,Actual GDP 25 | 1970,Democrat,Overall,1088.608,Actual GDP 26 | 1971,Democrat,Overall,1190.297,Actual GDP 27 | 1972,Democrat,Overall,1328.904,Actual GDP 28 | 1973,Democrat,Overall,1476.289,Actual GDP 29 | 1974,Democrat,Overall,1599.679,Actual GDP 30 | 1975,Democrat,Overall,1761.831,Actual GDP 31 | 1976,Democrat,Overall,1934.273,Actual GDP 32 | 1977,Democrat,Overall,2164.27,Actual GDP 33 | 1978,Democrat,Overall,2476.949,Actual GDP 34 | 1979,Democrat,Overall,2723.883,Actual GDP 35 | 1980,Democrat,Overall,2985.557,Actual GDP 36 | 1981,Democrat,Overall,3280.818,Actual GDP 37 | 1982,Republican,Overall,3402.561,Actual GDP 38 | 1983,Republican,Overall,3794.706,Actual GDP 39 | 1984,Republican,Overall,4148.551,Actual GDP 40 | 1985,Republican,Overall,4444.094,Actual GDP 41 | 1986,Republican,Overall,4657.627,Actual GDP 42 | 1987,Republican,Overall,5007.994,Actual GDP 43 | 1988,Democrat,Overall,5399.509,Actual GDP 44 | 1989,Democrat,Overall,5747.237,Actual GDP 45 | 1990,Democrat,Overall,6004.733,Actual GDP 46 | 1991,Democrat,Overall,6264.54,Actual GDP 47 | 1992,Democrat,Overall,6680.803,Actual GDP 48 | 1993,Democrat,Overall,7013.738,Actual GDP 49 | 1994,Democrat,Overall,7455.288,Actual GDP 50 | 1995,Democrat,Overall,7772.586,Actual GDP 51 | 1996,Republican,Overall,8259.771,Actual GDP 52 | 1997,Republican,Overall,8765.907,Actual GDP 53 | 1998,Republican,Overall,9293.991,Actual GDP 54 | 1999,Republican,Overall,9899.378,Actual GDP 55 | 2000,Republican,Overall,10439.025,Actual GDP 56 | 2001,Republican,Overall,10660.294,Actual GDP 57 | 2002,Republican,Overall,11071.463,Actual GDP 58 | 2003,Republican,Overall,11769.275,Actual GDP 59 | 2004,Republican,Overall,12522.425,Actual GDP 60 | 2005,Republican,Overall,13332.316,Actual GDP 61 | 2006,Republican,Overall,14037.228,Actual GDP 62 | 2007,Republican,Overall,14681.501,Actual GDP 63 | 2008,Democrat,Overall,14559.543,Actual GDP 64 | 2009,Democrat,Overall,14628.021,Actual GDP 65 | 2010,Democrat,Overall,15240.843,Actual GDP 66 | 2011,Democrat,Overall,15796.46,Actual GDP 67 | 2012,Democrat,Overall,16358.863,Actual GDP 68 | 2013,Democrat,Overall,17083.137,Actual GDP 69 | 2014,Democrat,Overall,17849.912,Actual GDP 70 | 2015,Democrat,Overall,18350.825,Actual GDP 71 | 2016,Republican,Overall,18991.883,Actual GDP 72 | 2017,Republican,Overall,19918.91,Actual GDP 73 | 2018,Republican,Overall,20897.804,Actual GDP 74 | -------------------------------------------------------------------------------- /Chapter02/Map/DataWrangling.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apress/adv-analytics-in-power-bi-w-r-and-python/1047beaa566a4aa7866fa41fd51dcc5e151b4d58/Chapter02/Map/DataWrangling.xlsx -------------------------------------------------------------------------------- /Chapter02/Map/MapExample.pbix: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apress/adv-analytics-in-power-bi-w-r-and-python/1047beaa566a4aa7866fa41fd51dcc5e151b4d58/Chapter02/Map/MapExample.pbix -------------------------------------------------------------------------------- /Chapter02/Map/stateMapCode.R: -------------------------------------------------------------------------------- 1 | # Input load. Please do not change # 2 | setwd("C:/Users/ryanw/OneDrive - Diesel Analytics/Professional/Clients/Apress/Book/Part2_ShapingData/Chapter09/Map") 3 | `dataset` = read.csv('input.csv', check.names = FALSE, encoding = "UTF-8", blank.lines.skip = FALSE); 4 | # Original Script. Please update your script content here and once completed copy below section back to the original editing window # 5 | # The following code to create a dataframe and remove duplicated rows is always executed and acts as a preamble for your script: 6 | 7 | # dataset <- data.frame(Index, lat, long, County, Total Population) 8 | # dataset <- unique(dataset) 9 | 10 | # Paste or type your script code here: 11 | library(tidyverse) 12 | library(ggthemes) 13 | 14 | currentColumns <- sort(colnames(dataset)) 15 | requiredColumns <- c("County", "Index", "lat", "long", "State", "Total Population") 16 | columnTest <- isTRUE(all.equal(currentColumns, requiredColumns)) 17 | state <- str_to_title(unique(dataset$State)) 18 | 19 | if (length(state) == 1 & columnTest) { 20 | 21 | chartTitle <- paste0(state, "'s County Population Analysis") 22 | subTitle <- "(the darker shades the higher the population)" 23 | 24 | chartdata <- 25 | dataset %>% 26 | mutate(quintile = factor(ntile(`Total Population`, 5))) 27 | 28 | # http://www.stat.columbia.edu/~tzheng/files/Rcolor.pdf 29 | quintileColors <- 30 | c( 31 | "1" = "dodgerblue", 32 | "2" = "dodgerblue1", 33 | "3" = "dodgerblue2", 34 | "4" = "dodgerblue3", 35 | "5" = "dodgerblue4" 36 | ) 37 | 38 | ggplot(chartdata, aes(long, lat, group = County, fill = quintile)) + 39 | geom_polygon(show.legend = FALSE, color = "black") + 40 | scale_x_continuous(name = NULL, labels = NULL, breaks = NULL) + 41 | scale_y_continuous(name = NULL, labels = NULL, breaks = NULL) + 42 | scale_fill_manual(values = quintileColors) + 43 | coord_quickmap() + 44 | ggtitle(chartTitle, subtitle = subTitle) + 45 | theme_map() 46 | 47 | } else { 48 | 49 | plot.new() 50 | title("The data supplied did not meet the requirements of the chart.") 51 | 52 | } -------------------------------------------------------------------------------- /Chapter02/QuadChart/QuadChart.pbix: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apress/adv-analytics-in-power-bi-w-r-and-python/1047beaa566a4aa7866fa41fd51dcc5e151b4d58/Chapter02/QuadChart/QuadChart.pbix -------------------------------------------------------------------------------- /Chapter02/QuadChart/chartData.csv: -------------------------------------------------------------------------------- 1 | Game Type,Period,Player,Total Points,Rebounds 2 | away,1,Andrew Bynum,77,32 3 | away,1,D.J. Mbenga,0,1 4 | away,1,Derek Fisher,116,26 5 | away,1,Jordan Farmar,16,7 6 | away,1,Josh Powell,12,16 7 | away,1,Kobe Bryant,281,77 8 | away,1,Lamar Odom,115,90 9 | away,1,Luke Walton,41,29 10 | away,1,Pau Gasol,260,116 11 | away,1,Sasha Vujacic,7,7 12 | away,1,Shannon Brown,2,1 13 | away,1,Trevor Ariza,74,31 14 | away,1,Vladimir Radmanovic,38,16 15 | away,2,Andrew Bynum,75,38 16 | away,2,Chris Mihm,9,6 17 | away,2,D.J. Mbenga,19,13 18 | away,2,Derek Fisher,92,19 19 | away,2,Jordan Farmar,78,16 20 | away,2,Josh Powell,47,27 21 | away,2,Kobe Bryant,232,33 22 | away,2,Lamar Odom,104,56 23 | away,2,Luke Walton,45,20 24 | away,2,Pau Gasol,153,74 25 | away,2,Sasha Vujacic,68,21 26 | away,2,Shannon Brown,7,1 27 | away,2,Trevor Ariza,91,51 28 | away,2,Vladimir Radmanovic,29,10 29 | away,3,Andrew Bynum,84,42 30 | away,3,Derek Fisher,123,23 31 | away,3,Jordan Farmar,18,9 32 | away,3,Josh Powell,18,10 33 | away,3,Kobe Bryant,315,56 34 | away,3,Lamar Odom,124,84 35 | away,3,Luke Walton,39,20 36 | away,3,Pau Gasol,196,114 37 | away,3,Sasha Vujacic,17,4 38 | away,3,Shannon Brown,2,0 39 | away,3,Trevor Ariza,61,33 40 | away,3,Vladimir Radmanovic,37,15 41 | away,4,Adam Morrison,4,1 42 | away,4,Andrew Bynum,32,17 43 | away,4,Chris Mihm,10,8 44 | away,4,D.J. Mbenga,8,5 45 | away,4,Derek Fisher,49,15 46 | away,4,Jordan Farmar,72,16 47 | away,4,Josh Powell,47,32 48 | away,4,Kobe Bryant,287,46 49 | away,4,Lamar Odom,104,55 50 | away,4,Luke Walton,42,21 51 | away,4,Pau Gasol,153,84 52 | away,4,Sasha Vujacic,65,30 53 | away,4,Shannon Brown,19,4 54 | away,4,Trevor Ariza,101,46 55 | away,4,Vladimir Radmanovic,9,11 56 | away,5,Kobe Bryant,0,1 57 | away,5,Lamar Odom,4,2 58 | away,5,Pau Gasol,5,1 59 | home,1,Andrew Bynum,105,78 60 | home,1,D.J. Mbenga,2,1 61 | home,1,Derek Fisher,115,30 62 | home,1,Jordan Farmar,36,10 63 | home,1,Josh Powell,16,4 64 | home,1,Kobe Bryant,257,57 65 | home,1,Lamar Odom,89,87 66 | home,1,Luke Walton,43,17 67 | home,1,Pau Gasol,246,121 68 | home,1,Sasha Vujacic,29,7 69 | home,1,Shannon Brown,6,2 70 | home,1,Trevor Ariza,63,22 71 | home,1,Vladimir Radmanovic,54,22 72 | home,2,Adam Morrison,2,2 73 | home,2,Andrew Bynum,108,57 74 | home,2,Chris Mihm,0,1 75 | home,2,D.J. Mbenga,14,8 76 | home,2,Derek Fisher,65,17 77 | home,2,Jordan Farmar,62,17 78 | home,2,Josh Powell,37,34 79 | home,2,Kobe Bryant,201,36 80 | home,2,Lamar Odom,108,88 81 | home,2,Luke Walton,30,21 82 | home,2,Pau Gasol,145,55 83 | home,2,Sasha Vujacic,108,20 84 | home,2,Shannon Brown,11,3 85 | home,2,Trevor Ariza,113,64 86 | home,2,Vladimir Radmanovic,24,10 87 | home,3,Andrew Bynum,104,70 88 | home,3,Chris Mihm,2,1 89 | home,3,Derek Fisher,130,38 90 | home,3,Jordan Farmar,22,10 91 | home,3,Josh Powell,6,8 92 | home,3,Kobe Bryant,310,68 93 | home,3,Lamar Odom,99,61 94 | home,3,Luke Walton,46,25 95 | home,3,Pau Gasol,198,112 96 | home,3,Sasha Vujacic,25,7 97 | home,3,Shannon Brown,6,2 98 | home,3,Trevor Ariza,73,32 99 | home,3,Vladimir Radmanovic,46,21 100 | home,4,Adam Morrison,4,5 101 | home,4,Andrew Bynum,78,32 102 | home,4,Chris Mihm,15,18 103 | home,4,D.J. Mbenga,18,3 104 | home,4,Derek Fisher,54,11 105 | home,4,Jordan Farmar,95,25 106 | home,4,Josh Powell,58,39 107 | home,4,Kobe Bryant,181,35 108 | home,4,Lamar Odom,99,74 109 | home,4,Luke Walton,24,18 110 | home,4,Pau Gasol,117,74 111 | home,4,Sasha Vujacic,107,32 112 | home,4,Shannon Brown,5,7 113 | home,4,Trevor Ariza,127,55 114 | home,4,Vladimir Radmanovic,27,7 115 | home,4,Yue Sun,4,0 116 | home,5,Derek Fisher,4,1 117 | home,5,Kobe Bryant,11,1 118 | home,5,Lamar Odom,2,2 119 | home,5,Luke Walton,0,1 120 | home,5,Pau Gasol,0,1 121 | -------------------------------------------------------------------------------- /Chapter02/QuadChart/createChartData.R: -------------------------------------------------------------------------------- 1 | library(tidyverse) 2 | library(lubridate) 3 | 4 | setwd("C:/Users/ryanw/OneDrive - Diesel Analytics/Professional/Clients/Apress/Book/Part2_ShapingData/Chapter09/QuadChart") 5 | 6 | chartData <- 7 | lakers %>% 8 | filter( 9 | (player != "" & team == "LAL" & 10 | result == "made" & 11 | etype %in% c("shot","free throw") 12 | ) | 13 | (player != "" & 14 | team == "LAL" & 15 | etype == "rebound") 16 | ) %>% 17 | mutate( 18 | rebound = ifelse(etype == "rebound",1,0) 19 | ) %>% 20 | group_by(game_type, period, player) %>% 21 | summarize( 22 | `Total Points` = sum(points), 23 | Rebounds = sum(rebound) 24 | ) %>% 25 | rename( 26 | `Game Type` = game_type, 27 | Period = period, 28 | Player = player 29 | ) 30 | 31 | write_csv(chartdata, "chartData.csv") 32 | 33 | 34 | 35 | -------------------------------------------------------------------------------- /Chapter02/QuadChart/input.csv: -------------------------------------------------------------------------------- 1 | Player,Rebounds,Total Points 2 | Adam Morrison,8,10 3 | Andrew Bynum,366,663 4 | Chris Mihm,34,36 5 | D.J. Mbenga,31,61 6 | Derek Fisher,180,748 7 | Jordan Farmar,110,399 8 | Josh Powell,170,241 9 | Kobe Bryant,410,2075 10 | Lamar Odom,599,848 11 | Luke Walton,172,310 12 | Pau Gasol,752,1473 13 | Sasha Vujacic,128,426 14 | Shannon Brown,20,58 15 | Trevor Ariza,334,703 16 | Vladimir Radmanovic,112,264 17 | Yue Sun,0,4 18 | -------------------------------------------------------------------------------- /Chapter02/QuadChart/quadChartCode.R: -------------------------------------------------------------------------------- 1 | # Input load. Please do not change # 2 | setwd("C:/Users/ryanw/OneDrive - Diesel Analytics/Professional/Clients/Apress/Book/Part2_ShapingData/Chapter09/QuadChart") 3 | `dataset` = read.csv('input.csv', check.names = FALSE, encoding = "UTF-8", blank.lines.skip = FALSE); 4 | # Original Script. Please update your script content here and once completed copy below section back to the original editing window 5 | ############################## 6 | 7 | library(tidyverse) 8 | library(ggrepel) 9 | library(ggthemes) 10 | library(scales) 11 | library(stringi) 12 | 13 | noPlayerDups <- length(unique(dataset$Player)) == length(dataset$Player) 14 | currentColumns <- sort(colnames(dataset)) 15 | requiredColumns <- c("Player", "Rebounds", "Total Points") 16 | columnTest <- isTRUE(all.equal(currentColumns, requiredColumns)) 17 | 18 | if (noPlayerDups & columnTest) { 19 | 20 | chart.title <- "LA Lakers player comparison" 21 | chartsubtitle <- "(2008 - 2009 Season)" 22 | chart_source <- "Source: lubridate package" 23 | 24 | graph_data <- 25 | dataset %>% 26 | mutate( 27 | Scaled.Rebounds = 28 | round(rescale(Rebounds, to = c(-10, 10)), 1) 29 | ,Scaled.TotalPoints = 30 | round(rescale(`Total Points`, to = c(-10, 10)), 1) 31 | ) 32 | 33 | p <- ggplot(graph_data, aes(x = Scaled.Rebounds, y = Scaled.TotalPoints)) + 34 | geom_point() + 35 | geom_label_repel(aes(label = Player), size = 4, show.legend = FALSE) + 36 | geom_hline(yintercept = 0) + 37 | geom_vline(xintercept = 0) + 38 | 39 | # quad labels 40 | annotate("text", x = -5, y = -11, label = "Average", alpha = 0.2, size = 6) + 41 | annotate("text", x = -5, y = 11, label = "High Scorer", alpha = 0.2, size = 6) + 42 | annotate("text", x = 5, y = -11, label = "High Rebounder", alpha = 0.2, size = 6) + 43 | annotate("text", x = 5, y = 11, label = "Beast Mode", alpha = 0.2, size = 6) + 44 | 45 | # Squares for quad labels 46 | annotate("rect", xmin = -3.5, xmax = -6.5, ymin = -11.5, ymax = -10.5, alpha = .2) + 47 | annotate("rect", xmin = -3.5, xmax = -6.5, ymin = 10.5, ymax = 11.5, alpha = .2) + 48 | annotate("rect", xmin = 3.5, xmax = 6.5, ymin = -11.5, ymax = -10.5, alpha = .2) + 49 | annotate("rect", xmin = 3.5, xmax = 6.5, ymin = 10.5, ymax = 11.5, alpha = .2) + 50 | 51 | # Shade lower left quadrant 52 | annotate("rect", xmin = -Inf, xmax = 0.0, ymin = -Inf, ymax = 0, alpha = 0.1, fill = "lightskyblue") + 53 | 54 | # Shade upper right quadrant 55 | annotate("rect", xmin = 0.0, xmax = Inf, ymin = 0.0, ymax = Inf, alpha = 0.1, fill = "lightskyblue") + 56 | 57 | # Titles 58 | xlab(bquote("Reboounding" ~ symbol('\256'))) + 59 | ylab(bquote("Scoring" ~ symbol('\256'))) + 60 | #ggtitle(chart.title, subtitle = chartsubtitle) + 61 | labs(title = chart.title, subtitle = chartsubtitle, caption = chart_source) + 62 | 63 | # Prettying things up 64 | theme_tufte() + 65 | theme( 66 | plot.title = element_text(hjust = 0.5, size = 25) 67 | , plot.subtitle = element_text(hjust = 0.5, size = 20) 68 | , panel.border = element_rect(colour = "black", size = 2, fill = NA) 69 | , axis.title.x = element_text(hjust = 0.1, size = 18) 70 | , axis.title.y = element_text(hjust = 0.1, size = 18) 71 | ) + 72 | scale_x_continuous(labels = NULL, breaks = NULL) + 73 | scale_y_continuous(labels = NULL, breaks = NULL) 74 | 75 | p 76 | 77 | } else { 78 | 79 | plot.new() 80 | title("The data supplied did not meet the requirements of the chart.") 81 | 82 | } -------------------------------------------------------------------------------- /Chapter02/RegressionLine/RegressionLine.R: -------------------------------------------------------------------------------- 1 | # Input load. Please do not change # 2 | setwd("C:/Users/ryanw/OneDrive - Diesel Analytics/Professional/Clients/Apress/Book/Part2_ShapingData/Chapter09/RegressionLine/") 3 | `dataset` = read.csv('input.csv', check.names = FALSE, encoding = "UTF-8", blank.lines.skip = FALSE); 4 | # Original Script. Please update your script content here and once completed copy below section back to the original editing window # 5 | # The following code to create a dataframe and remove duplicated rows is always executed and acts as a preamble for your script: 6 | 7 | # dataset <- data.frame(height, weight) 8 | # dataset <- unique(dataset) 9 | 10 | # Paste or type your script code here: 11 | library(tidyverse) 12 | library(ggthemes) 13 | 14 | currentColumns <- sort(colnames(dataset)) 15 | requiredColumns <- c("height", "weight") 16 | columnTest <- isTRUE(all.equal(currentColumns, requiredColumns)) 17 | 18 | if (columnTest) { 19 | 20 | chartdata <- dataset 21 | 22 | ggplot(chartdata, aes(x = weight, y = height)) + 23 | geom_point() + 24 | geom_smooth(method='lm') + 25 | ggtitle( 26 | "Average Weight by Heights for American Women", 27 | subtitle = "(with added regression line and confidence interval)" 28 | ) + 29 | theme_tufte() + 30 | theme( 31 | axis.title = element_text(size = 20), 32 | plot.title = element_text(size = 30), 33 | plot.subtitle = element_text(size = 20) 34 | ) 35 | 36 | } else { 37 | 38 | plot.new() 39 | title("The data supplied did not meet the requirements of the chart.") 40 | 41 | } -------------------------------------------------------------------------------- /Chapter02/RegressionLine/RegressionLine.pbix: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apress/adv-analytics-in-power-bi-w-r-and-python/1047beaa566a4aa7866fa41fd51dcc5e151b4d58/Chapter02/RegressionLine/RegressionLine.pbix -------------------------------------------------------------------------------- /Chapter02/RegressionLine/input.csv: -------------------------------------------------------------------------------- 1 | height,weight 2 | 58,115 3 | 59,117 4 | 60,120 5 | 61,123 6 | 62,126 7 | 63,129 8 | 64,132 9 | 65,135 10 | 66,139 11 | 67,142 12 | 68,146 13 | 69,150 14 | 70,154 15 | 71,159 16 | 72,164 17 | -------------------------------------------------------------------------------- /Chapter02/RegressionLine/women.csv: -------------------------------------------------------------------------------- 1 | height,weight 2 | 58,115 3 | 59,117 4 | 60,120 5 | 61,123 6 | 62,126 7 | 63,129 8 | 64,132 9 | 65,135 10 | 66,139 11 | 67,142 12 | 68,146 13 | 69,150 14 | 70,154 15 | 71,159 16 | 72,164 17 | -------------------------------------------------------------------------------- /Chapter03/Chapter01_README.md: -------------------------------------------------------------------------------- 1 | Put chapter description here -------------------------------------------------------------------------------- /Chapter03/Python_Code/CodeToCreateModel.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pandas as pd 3 | import joblib 4 | from sklearn import datasets, linear_model 5 | from sklearn.metrics import mean_squared_error, r2_score 6 | from sklearn.model_selection import train_test_split 7 | 8 | os.chdir("..") 9 | housing = pd.read_csv("./Data/Boston_ModelData.csv") 10 | 11 | X = housing[["crim","rm","tax","lstat"]] 12 | y = housing["medv"] 13 | 14 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25) 15 | 16 | model = linear_model.LinearRegression() 17 | model.fit(X_train, y_train) 18 | 19 | y_pred = model.predict(X_test) 20 | print(r2_score(y_test, y_pred)) #At run produced a 0.73 R^2 21 | 22 | joblib.dump(model,"model.pkl") -------------------------------------------------------------------------------- /Chapter03/Python_Code/CombineRollingMonths.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pandas as pd 3 | from dateutil.relativedelta import relativedelta 4 | 5 | os.chdir("") 6 | monthly_reports = os.listdir("./Data/SalesData") 7 | 8 | d = {'monthly_reports': monthly_reports} 9 | df = pd.DataFrame(d) 10 | df["date_format"] = pd.to_datetime( 11 | df.monthly_reports.str.replace(".csv","")) 12 | 13 | min_month = df.date_format.max() - relativedelta(months=23) 14 | 15 | reports_to_read = df[df["date_format"]>=min_month] 16 | 17 | df_output = pd.concat( 18 | map( 19 | pd.read_csv, 20 | "./Data/SalesData/"+reports_to_read["monthly_reports"])) -------------------------------------------------------------------------------- /Chapter03/Python_Code/Data/BostonHousingInfo.csv: -------------------------------------------------------------------------------- 1 | id,crim,zn,indus,chas,nox,rm,age,dis,rad,tax,ptratio,black,lstat,medv 2 | 1,0.31533,0,6.2,0,0.504,8.266,78.3,2.8944,8,307,17.4,385.05,4.14,44.8 3 | 2,0.01301,35,1.52,0,0.442,7.241,49.3,7.0379,1,284,15.5,394.74,5.49,32.7 4 | 3,1.34284,0,19.58,0,0.605,6.066,100,1.7573,5,403,14.7,353.89,6.43,24.3 5 | 4,18.0846,0,18.1,0,0.679,6.434,100,1.8347,24,666,20.2,27.25,29.05,7.2 6 | 5,5.82115,0,18.1,0,0.713,6.513,89.9,2.8016,24,666,20.2,393.82,10.29,20.2 7 | 6,13.0751,0,18.1,0,0.58,5.713,56.7,2.8237,24,666,20.2,396.9,14.76,20.1 8 | 7,0.07013,0,13.89,0,0.55,6.642,85.1,3.4211,5,276,16.4,392.78,9.69,28.7 9 | 8,12.2472,0,18.1,0,0.584,5.837,59.7,1.9976,24,666,20.2,24.65,15.69,10.2 10 | 9,0.06417,0,5.96,0,0.499,5.933,68.2,3.3603,5,279,19.2,396.9,9.68,18.9 11 | 10,17.8667,0,18.1,0,0.671,6.223,100,1.3861,24,666,20.2,393.74,21.78,10.2 12 | 11,4.66883,0,18.1,0,0.713,5.976,87.9,2.5806,24,666,20.2,10.48,19.01,12.7 13 | 12,0.10008,0,2.46,0,0.488,6.563,95.6,2.847,3,193,17.8,396.9,5.68,32.5 14 | 13,19.6091,0,18.1,0,0.671,7.313,97.9,1.3163,24,666,20.2,396.9,13.44,15 15 | 14,5.87205,0,18.1,0,0.693,6.405,96,1.6768,24,666,20.2,396.9,19.37,12.5 16 | 15,0.16211,20,6.96,0,0.464,6.24,16.3,4.429,3,223,18.6,396.9,6.59,25.2 17 | 16,8.64476,0,18.1,0,0.693,6.193,92.6,1.7912,24,666,20.2,396.9,15.17,13.8 18 | 17,0.01381,80,0.46,0,0.422,7.875,32,5.6484,4,255,14.4,394.23,2.97,50 19 | 18,10.6718,0,18.1,0,0.74,6.459,94.8,1.9879,24,666,20.2,43.06,23.98,11.8 20 | 19,5.58107,0,18.1,0,0.713,6.436,87.9,2.3158,24,666,20.2,100.19,16.22,14.3 21 | 20,5.66637,0,18.1,0,0.74,6.219,100,2.0048,24,666,20.2,395.69,16.59,18.4 22 | 21,3.32105,0,19.58,1,0.871,5.403,100,1.3216,5,403,14.7,396.9,26.82,13.4 23 | 22,0.14866,0,8.56,0,0.52,6.727,79.9,2.7778,5,384,20.9,394.76,9.42,27.5 24 | 23,0.05059,0,4.49,0,0.449,6.389,48,4.7794,3,247,18.5,396.9,9.62,23.9 25 | 24,0.01538,90,3.75,0,0.394,7.454,34.2,6.3361,3,244,15.9,386.34,3.11,44 26 | 25,8.24809,0,18.1,0,0.713,7.393,99.3,2.4527,24,666,20.2,375.87,16.74,17.8 27 | 26,5.73116,0,18.1,0,0.532,7.061,77,3.4106,24,666,20.2,395.28,7.01,25 28 | 27,13.6781,0,18.1,0,0.74,5.935,87.9,1.8206,24,666,20.2,68.95,34.02,8.4 29 | 28,0.03466,35,6.06,0,0.4379,6.031,23.3,6.6407,1,304,16.9,362.25,7.83,19.4 30 | 29,0.17783,0,9.69,0,0.585,5.569,73.5,2.3999,6,391,19.2,395.77,15.1,17.5 31 | 30,1.15172,0,8.14,0,0.538,5.701,95,3.7872,4,307,21,358.77,18.35,13.1 32 | 31,0.62976,0,8.14,0,0.538,5.949,61.8,4.7075,4,307,21,396.9,8.26,20.4 33 | 32,0.54452,0,21.89,0,0.624,6.151,97.9,1.6687,4,437,21.2,396.9,18.46,17.8 34 | 33,0.26838,0,9.69,0,0.585,5.794,70.6,2.8927,6,391,19.2,396.9,14.1,18.3 35 | 34,0.14932,25,5.13,0,0.453,5.741,66.2,7.2254,8,284,19.7,395.11,13.15,18.7 36 | 35,0.0795,60,1.69,0,0.411,6.579,35.9,10.7103,4,411,18.3,370.78,5.49,24.1 37 | 36,0.08187,0,2.89,0,0.445,7.82,36.9,3.4952,2,276,18,393.53,3.57,43.8 38 | 37,0.3494,0,9.9,0,0.544,5.972,76.7,3.1025,4,304,18.4,396.24,9.97,20.3 39 | 38,0.10959,0,11.93,0,0.573,6.794,89.3,2.3889,1,273,21,393.45,6.48,22 40 | 39,22.0511,0,18.1,0,0.74,5.818,92.4,1.8662,24,666,20.2,391.45,22.11,10.5 41 | 40,37.6619,0,18.1,0,0.679,6.202,78.7,1.8629,24,666,20.2,18.82,14.52,10.9 42 | 41,0.52014,20,3.97,0,0.647,8.398,91.5,2.2885,5,264,13,386.86,5.91,48.8 43 | 42,1.12658,0,19.58,1,0.871,5.012,88,1.6102,5,403,14.7,343.28,12.12,15.3 44 | 43,6.28807,0,18.1,0,0.74,6.341,96.4,2.072,24,666,20.2,318.01,17.79,14.9 45 | 44,0.04544,0,3.24,0,0.46,6.144,32.2,5.8736,4,430,16.9,368.57,9.09,19.8 46 | 45,2.14918,0,19.58,0,0.871,5.709,98.5,1.6232,5,403,14.7,261.95,15.79,19.4 47 | 46,8.26725,0,18.1,1,0.668,5.875,89.6,1.1296,24,666,20.2,347.88,8.88,50 48 | 47,0.06047,0,2.46,0,0.488,6.153,68.8,3.2797,3,193,17.8,387.11,13.15,29.6 49 | 48,0.16902,0,25.65,0,0.581,5.986,88.4,1.9929,2,188,19.1,385.02,14.81,21.4 50 | 49,0.38214,0,6.2,0,0.504,8.04,86.5,3.2157,8,307,17.4,387.38,3.13,37.6 51 | 50,0.17134,0,10.01,0,0.547,5.928,88.2,2.4631,6,432,17.8,344.91,15.76,18.3 52 | 51,13.3598,0,18.1,0,0.693,5.887,94.7,1.7821,24,666,20.2,396.9,16.35,12.7 53 | 52,0.20608,22,5.86,0,0.431,5.593,76.5,7.9549,7,330,19.1,372.49,12.5,17.6 54 | 53,0.2498,0,21.89,0,0.624,5.857,98.2,1.6686,4,437,21.2,392.04,21.32,13.3 55 | 54,0.08244,30,4.93,0,0.428,6.481,18.5,6.1899,6,300,16.6,379.41,6.36,23.7 56 | 55,0.06466,70,2.24,0,0.4,6.345,20.1,7.8278,5,358,14.8,368.24,4.97,22.5 57 | 56,5.69175,0,18.1,0,0.583,6.114,79.8,3.5459,24,666,20.2,392.68,14.98,19.1 58 | 57,0.34109,0,7.38,0,0.493,6.415,40.1,4.7211,5,287,19.6,396.9,6.12,25 59 | 58,0.55007,20,3.97,0,0.647,7.206,91.6,1.9301,5,264,13,387.89,8.1,36.5 60 | 59,0.05789,12.5,6.07,0,0.409,5.878,21.4,6.498,4,345,18.9,396.21,8.1,22 61 | 60,8.71675,0,18.1,0,0.693,6.471,98.8,1.7257,24,666,20.2,391.98,17.12,13.1 62 | 61,15.288,0,18.1,0,0.671,6.649,93.3,1.3449,24,666,20.2,363.02,23.24,13.9 63 | 62,0.15038,0,25.65,0,0.581,5.856,97,1.9444,2,188,19.1,370.31,25.41,17.3 64 | 63,0.13158,0,10.01,0,0.547,6.176,72.5,2.7301,6,432,17.8,393.3,12.04,21.2 65 | 64,4.64689,0,18.1,0,0.614,6.98,67.6,2.5329,24,666,20.2,374.68,11.66,29.8 66 | 65,0.05302,0,3.41,0,0.489,7.079,63.1,3.4145,2,270,17.8,396.06,5.7,28.7 67 | 66,0.04684,0,3.41,0,0.489,6.417,66.1,3.0923,2,270,17.8,392.18,8.81,22.6 68 | 67,0.27957,0,9.69,0,0.585,5.926,42.6,2.3817,6,391,19.2,396.9,13.59,24.5 69 | 68,9.18702,0,18.1,0,0.7,5.536,100,1.5804,24,666,20.2,396.9,23.6,11.3 70 | 69,0.29916,20,6.96,0,0.464,5.856,42.1,4.429,3,223,18.6,388.65,13,21.1 71 | 70,0.10574,0,27.74,0,0.609,5.983,98.8,1.8681,4,711,20.1,390.11,18.07,13.6 72 | 71,0.22438,0,9.69,0,0.585,6.027,79.7,2.4982,6,391,19.2,396.9,14.33,16.8 73 | 72,4.0974,0,19.58,0,0.871,5.468,100,1.4118,5,403,14.7,396.9,26.42,15.6 74 | 73,12.0482,0,18.1,0,0.614,5.648,87.6,1.9512,24,666,20.2,291.55,14.1,20.8 75 | 74,0.12269,0,6.91,0,0.448,6.069,40,5.7209,3,233,17.9,389.39,9.55,21.2 76 | 75,0.10793,0,8.56,0,0.52,6.195,54.4,2.7778,5,384,20.9,393.49,13,21.7 77 | 76,0.40771,0,6.2,1,0.507,6.164,91.3,3.048,8,307,17.4,395.24,21.46,21.7 78 | 77,0.1415,0,6.91,0,0.448,6.169,6.6,5.7209,3,233,17.9,383.37,5.81,25.3 79 | 78,0.03584,80,3.37,0,0.398,6.29,17.8,6.6115,4,337,16.1,396.9,4.67,23.5 80 | 79,4.75237,0,18.1,0,0.713,6.525,86.5,2.4358,24,666,20.2,50.92,18.13,14.1 81 | 80,0.3692,0,9.9,0,0.544,6.567,87.3,3.6023,4,304,18.4,395.69,9.28,23.8 82 | 81,41.5292,0,18.1,0,0.693,5.531,85.4,1.6074,24,666,20.2,329.46,27.38,8.5 83 | 82,0.0136,75,4,0,0.41,5.888,47.6,7.3197,3,469,21.1,396.9,14.8,18.9 84 | 83,0.07165,0,25.65,0,0.581,6.004,84.1,2.1974,2,188,19.1,377.67,14.27,20.3 85 | 84,7.99248,0,18.1,0,0.7,5.52,100,1.5331,24,666,20.2,396.9,24.56,12.3 86 | 85,3.83684,0,18.1,0,0.77,6.251,91.1,2.2955,24,666,20.2,350.65,14.19,19.9 87 | 86,0.10469,40,6.41,1,0.447,7.267,49,4.7872,4,254,17.6,389.25,6.05,33.2 88 | 87,0.11504,0,2.89,0,0.445,6.163,69.6,3.4952,2,276,18,391.83,11.34,21.4 89 | 88,0.22188,20,6.96,1,0.464,7.691,51.8,4.3665,3,223,18.6,390.77,6.58,35.2 90 | 89,0.08873,21,5.64,0,0.439,5.963,45.7,6.8147,4,243,16.8,395.56,13.45,19.7 91 | 90,0.03237,0,2.18,0,0.458,6.998,45.8,6.0622,3,222,18.7,394.63,2.94,33.4 92 | 91,0.04113,25,4.86,0,0.426,6.727,33.5,5.4007,4,281,19,396.9,5.29,28 93 | 92,20.0849,0,18.1,0,0.7,4.368,91.2,1.4395,24,666,20.2,285.83,30.63,8.8 94 | 93,0.32264,0,21.89,0,0.624,5.942,93.5,1.9669,4,437,21.2,378.25,16.9,17.4 95 | 94,0.02177,82.5,2.03,0,0.415,7.61,15.7,6.27,2,348,14.7,395.38,3.11,42.3 96 | 95,0.13642,0,10.59,0,0.489,5.891,22.3,3.9454,4,277,18.6,396.9,10.87,22.6 97 | 96,0.0187,85,4.15,0,0.429,6.516,27.7,8.5353,4,351,17.9,392.43,6.36,23.1 98 | 97,11.0874,0,18.1,0,0.718,6.411,100,1.8589,24,666,20.2,318.75,15.02,16.7 99 | 98,0.05561,70,2.24,0,0.4,7.041,10,7.8278,5,358,14.8,371.58,4.74,29 100 | 99,0.12204,0,2.89,0,0.445,6.625,57.8,3.4952,2,276,18,357.98,6.65,28.4 101 | 100,0.1146,20,6.96,0,0.464,6.538,58.7,3.9175,3,223,18.6,394.96,7.73,24.4 102 | 101,7.52601,0,18.1,0,0.713,6.417,98.3,2.185,24,666,20.2,304.21,19.31,13 103 | 102,0.06129,20,3.33,1,0.4429,7.645,49.7,5.2119,5,216,14.9,377.07,3.01,46 104 | 103,0.10612,30,4.93,0,0.428,6.095,65.1,6.3361,6,300,16.6,394.62,12.4,20.1 105 | 104,0.09065,20,6.96,1,0.464,5.92,61.5,3.9175,3,223,18.6,391.34,13.65,20.7 106 | 105,0.04379,80,3.37,0,0.398,5.787,31.1,6.6115,4,337,16.1,396.9,10.24,19.4 107 | 106,23.6482,0,18.1,0,0.671,6.38,96.2,1.3861,24,666,20.2,396.9,23.69,13.1 108 | -------------------------------------------------------------------------------- /Chapter03/Python_Code/Data/SalesData/2010-12-01.csv: -------------------------------------------------------------------------------- 1 | Category,OrderDate,DueDate,ShipDate,OrderQuantity,UnitPrice,SalesAmount,TaxAmt,Order Month 2 | Bikes,2010-12-29T05:00:00Z,2011-01-10T05:00:00Z,2011-01-05T05:00:00Z,1,3578.27,3578.27,286.2616,12/1/2010 3 | Bikes,2010-12-29T05:00:00Z,2011-01-10T05:00:00Z,2011-01-05T05:00:00Z,1,3399.99,3399.99,271.9992,12/1/2010 4 | Bikes,2010-12-29T05:00:00Z,2011-01-10T05:00:00Z,2011-01-05T05:00:00Z,1,3399.99,3399.99,271.9992,12/1/2010 5 | Bikes,2010-12-29T05:00:00Z,2011-01-10T05:00:00Z,2011-01-05T05:00:00Z,1,699.0982,699.0982,55.9279,12/1/2010 6 | Bikes,2010-12-29T05:00:00Z,2011-01-10T05:00:00Z,2011-01-05T05:00:00Z,1,3399.99,3399.99,271.9992,12/1/2010 7 | Bikes,2010-12-30T05:00:00Z,2011-01-11T05:00:00Z,2011-01-06T05:00:00Z,1,3578.27,3578.27,286.2616,12/1/2010 8 | Bikes,2010-12-30T05:00:00Z,2011-01-11T05:00:00Z,2011-01-06T05:00:00Z,1,3578.27,3578.27,286.2616,12/1/2010 9 | Bikes,2010-12-30T05:00:00Z,2011-01-11T05:00:00Z,2011-01-06T05:00:00Z,1,3374.99,3374.99,269.9992,12/1/2010 10 | Bikes,2010-12-30T05:00:00Z,2011-01-11T05:00:00Z,2011-01-06T05:00:00Z,1,3399.99,3399.99,271.9992,12/1/2010 11 | Bikes,2010-12-31T05:00:00Z,2011-01-12T05:00:00Z,2011-01-07T05:00:00Z,1,3578.27,3578.27,286.2616,12/1/2010 12 | Bikes,2010-12-31T05:00:00Z,2011-01-12T05:00:00Z,2011-01-07T05:00:00Z,1,3578.27,3578.27,286.2616,12/1/2010 13 | Bikes,2010-12-31T05:00:00Z,2011-01-12T05:00:00Z,2011-01-07T05:00:00Z,1,699.0982,699.0982,55.9279,12/1/2010 14 | Bikes,2010-12-31T05:00:00Z,2011-01-12T05:00:00Z,2011-01-07T05:00:00Z,1,3578.27,3578.27,286.2616,12/1/2010 15 | Bikes,2010-12-31T05:00:00Z,2011-01-12T05:00:00Z,2011-01-07T05:00:00Z,1,3578.27,3578.27,286.2616,12/1/2010 16 | -------------------------------------------------------------------------------- /Chapter03/Python_Code/Data/SalesData/2011-01-01.csv: -------------------------------------------------------------------------------- 1 | Category,OrderDate,DueDate,ShipDate,OrderQuantity,UnitPrice,SalesAmount,TaxAmt,Order Month 2 | Bikes,2011-01-01T05:00:00Z,2011-01-13T05:00:00Z,2011-01-08T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 3 | Bikes,2011-01-01T05:00:00Z,2011-01-13T05:00:00Z,2011-01-08T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 4 | Bikes,2011-01-02T05:00:00Z,2011-01-14T05:00:00Z,2011-01-09T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 5 | Bikes,2011-01-02T05:00:00Z,2011-01-14T05:00:00Z,2011-01-09T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 6 | Bikes,2011-01-02T05:00:00Z,2011-01-14T05:00:00Z,2011-01-09T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 7 | Bikes,2011-01-02T05:00:00Z,2011-01-14T05:00:00Z,2011-01-09T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 8 | Bikes,2011-01-02T05:00:00Z,2011-01-14T05:00:00Z,2011-01-09T05:00:00Z,1,699.0982,699.0982,55.9279,1/1/2011 9 | Bikes,2011-01-03T05:00:00Z,2011-01-15T05:00:00Z,2011-01-10T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 10 | Bikes,2011-01-03T05:00:00Z,2011-01-15T05:00:00Z,2011-01-10T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 11 | Bikes,2011-01-03T05:00:00Z,2011-01-15T05:00:00Z,2011-01-10T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 12 | Bikes,2011-01-03T05:00:00Z,2011-01-15T05:00:00Z,2011-01-10T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 13 | Bikes,2011-01-04T05:00:00Z,2011-01-16T05:00:00Z,2011-01-11T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 14 | Bikes,2011-01-04T05:00:00Z,2011-01-16T05:00:00Z,2011-01-11T05:00:00Z,1,699.0982,699.0982,55.9279,1/1/2011 15 | Bikes,2011-01-04T05:00:00Z,2011-01-16T05:00:00Z,2011-01-11T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 16 | Bikes,2011-01-05T05:00:00Z,2011-01-17T05:00:00Z,2011-01-12T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 17 | Bikes,2011-01-05T05:00:00Z,2011-01-17T05:00:00Z,2011-01-12T05:00:00Z,1,699.0982,699.0982,55.9279,1/1/2011 18 | Bikes,2011-01-05T05:00:00Z,2011-01-17T05:00:00Z,2011-01-12T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 19 | Bikes,2011-01-06T05:00:00Z,2011-01-18T05:00:00Z,2011-01-13T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 20 | Bikes,2011-01-06T05:00:00Z,2011-01-18T05:00:00Z,2011-01-13T05:00:00Z,1,3399.99,3399.99,271.9992,1/1/2011 21 | Bikes,2011-01-06T05:00:00Z,2011-01-18T05:00:00Z,2011-01-13T05:00:00Z,1,3399.99,3399.99,271.9992,1/1/2011 22 | Bikes,2011-01-06T05:00:00Z,2011-01-18T05:00:00Z,2011-01-13T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 23 | Bikes,2011-01-06T05:00:00Z,2011-01-18T05:00:00Z,2011-01-13T05:00:00Z,1,3374.99,3374.99,269.9992,1/1/2011 24 | Bikes,2011-01-06T05:00:00Z,2011-01-18T05:00:00Z,2011-01-13T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 25 | Bikes,2011-01-07T05:00:00Z,2011-01-19T05:00:00Z,2011-01-14T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 26 | Bikes,2011-01-07T05:00:00Z,2011-01-19T05:00:00Z,2011-01-14T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 27 | Bikes,2011-01-07T05:00:00Z,2011-01-19T05:00:00Z,2011-01-14T05:00:00Z,1,3399.99,3399.99,271.9992,1/1/2011 28 | Bikes,2011-01-08T05:00:00Z,2011-01-20T05:00:00Z,2011-01-15T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 29 | Bikes,2011-01-08T05:00:00Z,2011-01-20T05:00:00Z,2011-01-15T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 30 | Bikes,2011-01-08T05:00:00Z,2011-01-20T05:00:00Z,2011-01-15T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 31 | Bikes,2011-01-08T05:00:00Z,2011-01-20T05:00:00Z,2011-01-15T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 32 | Bikes,2011-01-09T05:00:00Z,2011-01-21T05:00:00Z,2011-01-16T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 33 | Bikes,2011-01-09T05:00:00Z,2011-01-21T05:00:00Z,2011-01-16T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 34 | Bikes,2011-01-09T05:00:00Z,2011-01-21T05:00:00Z,2011-01-16T05:00:00Z,1,3399.99,3399.99,271.9992,1/1/2011 35 | Bikes,2011-01-09T05:00:00Z,2011-01-21T05:00:00Z,2011-01-16T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 36 | Bikes,2011-01-10T05:00:00Z,2011-01-22T05:00:00Z,2011-01-17T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 37 | Bikes,2011-01-10T05:00:00Z,2011-01-22T05:00:00Z,2011-01-17T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 38 | Bikes,2011-01-11T05:00:00Z,2011-01-23T05:00:00Z,2011-01-18T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 39 | Bikes,2011-01-11T05:00:00Z,2011-01-23T05:00:00Z,2011-01-18T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 40 | Bikes,2011-01-11T05:00:00Z,2011-01-23T05:00:00Z,2011-01-18T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 41 | Bikes,2011-01-11T05:00:00Z,2011-01-23T05:00:00Z,2011-01-18T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 42 | Bikes,2011-01-11T05:00:00Z,2011-01-23T05:00:00Z,2011-01-18T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 43 | Bikes,2011-01-11T05:00:00Z,2011-01-23T05:00:00Z,2011-01-18T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 44 | Bikes,2011-01-11T05:00:00Z,2011-01-23T05:00:00Z,2011-01-18T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 45 | Bikes,2011-01-12T05:00:00Z,2011-01-24T05:00:00Z,2011-01-19T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 46 | Bikes,2011-01-12T05:00:00Z,2011-01-24T05:00:00Z,2011-01-19T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 47 | Bikes,2011-01-12T05:00:00Z,2011-01-24T05:00:00Z,2011-01-19T05:00:00Z,1,699.0982,699.0982,55.9279,1/1/2011 48 | Bikes,2011-01-12T05:00:00Z,2011-01-24T05:00:00Z,2011-01-19T05:00:00Z,1,3374.99,3374.99,269.9992,1/1/2011 49 | Bikes,2011-01-13T05:00:00Z,2011-01-25T05:00:00Z,2011-01-20T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 50 | Bikes,2011-01-13T05:00:00Z,2011-01-25T05:00:00Z,2011-01-20T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 51 | Bikes,2011-01-13T05:00:00Z,2011-01-25T05:00:00Z,2011-01-20T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 52 | Bikes,2011-01-13T05:00:00Z,2011-01-25T05:00:00Z,2011-01-20T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 53 | Bikes,2011-01-14T05:00:00Z,2011-01-26T05:00:00Z,2011-01-21T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 54 | Bikes,2011-01-14T05:00:00Z,2011-01-26T05:00:00Z,2011-01-21T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 55 | Bikes,2011-01-14T05:00:00Z,2011-01-26T05:00:00Z,2011-01-21T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 56 | Bikes,2011-01-14T05:00:00Z,2011-01-26T05:00:00Z,2011-01-21T05:00:00Z,1,3399.99,3399.99,271.9992,1/1/2011 57 | Bikes,2011-01-15T05:00:00Z,2011-01-27T05:00:00Z,2011-01-22T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 58 | Bikes,2011-01-15T05:00:00Z,2011-01-27T05:00:00Z,2011-01-22T05:00:00Z,1,3374.99,3374.99,269.9992,1/1/2011 59 | Bikes,2011-01-16T05:00:00Z,2011-01-28T05:00:00Z,2011-01-23T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 60 | Bikes,2011-01-16T05:00:00Z,2011-01-28T05:00:00Z,2011-01-23T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 61 | Bikes,2011-01-16T05:00:00Z,2011-01-28T05:00:00Z,2011-01-23T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 62 | Bikes,2011-01-16T05:00:00Z,2011-01-28T05:00:00Z,2011-01-23T05:00:00Z,1,699.0982,699.0982,55.9279,1/1/2011 63 | Bikes,2011-01-16T05:00:00Z,2011-01-28T05:00:00Z,2011-01-23T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 64 | Bikes,2011-01-16T05:00:00Z,2011-01-28T05:00:00Z,2011-01-23T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 65 | Bikes,2011-01-16T05:00:00Z,2011-01-28T05:00:00Z,2011-01-23T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 66 | Bikes,2011-01-16T05:00:00Z,2011-01-28T05:00:00Z,2011-01-23T05:00:00Z,1,3399.99,3399.99,271.9992,1/1/2011 67 | Bikes,2011-01-17T05:00:00Z,2011-01-29T05:00:00Z,2011-01-24T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 68 | Bikes,2011-01-17T05:00:00Z,2011-01-29T05:00:00Z,2011-01-24T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 69 | Bikes,2011-01-17T05:00:00Z,2011-01-29T05:00:00Z,2011-01-24T05:00:00Z,1,3399.99,3399.99,271.9992,1/1/2011 70 | Bikes,2011-01-17T05:00:00Z,2011-01-29T05:00:00Z,2011-01-24T05:00:00Z,1,699.0982,699.0982,55.9279,1/1/2011 71 | Bikes,2011-01-18T05:00:00Z,2011-01-30T05:00:00Z,2011-01-25T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 72 | Bikes,2011-01-18T05:00:00Z,2011-01-30T05:00:00Z,2011-01-25T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 73 | Bikes,2011-01-18T05:00:00Z,2011-01-30T05:00:00Z,2011-01-25T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 74 | Bikes,2011-01-18T05:00:00Z,2011-01-30T05:00:00Z,2011-01-25T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 75 | Bikes,2011-01-19T05:00:00Z,2011-01-31T05:00:00Z,2011-01-26T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 76 | Bikes,2011-01-19T05:00:00Z,2011-01-31T05:00:00Z,2011-01-26T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 77 | Bikes,2011-01-19T05:00:00Z,2011-01-31T05:00:00Z,2011-01-26T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 78 | Bikes,2011-01-19T05:00:00Z,2011-01-31T05:00:00Z,2011-01-26T05:00:00Z,1,3399.99,3399.99,271.9992,1/1/2011 79 | Bikes,2011-01-19T05:00:00Z,2011-01-31T05:00:00Z,2011-01-26T05:00:00Z,1,3374.99,3374.99,269.9992,1/1/2011 80 | Bikes,2011-01-19T05:00:00Z,2011-01-31T05:00:00Z,2011-01-26T05:00:00Z,1,3374.99,3374.99,269.9992,1/1/2011 81 | Bikes,2011-01-19T05:00:00Z,2011-01-31T05:00:00Z,2011-01-26T05:00:00Z,1,3399.99,3399.99,271.9992,1/1/2011 82 | Bikes,2011-01-19T05:00:00Z,2011-01-31T05:00:00Z,2011-01-26T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 83 | Bikes,2011-01-19T05:00:00Z,2011-01-31T05:00:00Z,2011-01-26T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 84 | Bikes,2011-01-19T05:00:00Z,2011-01-31T05:00:00Z,2011-01-26T05:00:00Z,1,3399.99,3399.99,271.9992,1/1/2011 85 | Bikes,2011-01-19T05:00:00Z,2011-01-31T05:00:00Z,2011-01-26T05:00:00Z,1,3399.99,3399.99,271.9992,1/1/2011 86 | Bikes,2011-01-20T05:00:00Z,2011-02-01T05:00:00Z,2011-01-27T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 87 | Bikes,2011-01-20T05:00:00Z,2011-02-01T05:00:00Z,2011-01-27T05:00:00Z,1,699.0982,699.0982,55.9279,1/1/2011 88 | Bikes,2011-01-20T05:00:00Z,2011-02-01T05:00:00Z,2011-01-27T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 89 | Bikes,2011-01-20T05:00:00Z,2011-02-01T05:00:00Z,2011-01-27T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 90 | Bikes,2011-01-20T05:00:00Z,2011-02-01T05:00:00Z,2011-01-27T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 91 | Bikes,2011-01-21T05:00:00Z,2011-02-02T05:00:00Z,2011-01-28T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 92 | Bikes,2011-01-21T05:00:00Z,2011-02-02T05:00:00Z,2011-01-28T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 93 | Bikes,2011-01-21T05:00:00Z,2011-02-02T05:00:00Z,2011-01-28T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 94 | Bikes,2011-01-22T05:00:00Z,2011-02-03T05:00:00Z,2011-01-29T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 95 | Bikes,2011-01-22T05:00:00Z,2011-02-03T05:00:00Z,2011-01-29T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 96 | Bikes,2011-01-22T05:00:00Z,2011-02-03T05:00:00Z,2011-01-29T05:00:00Z,1,699.0982,699.0982,55.9279,1/1/2011 97 | Bikes,2011-01-22T05:00:00Z,2011-02-03T05:00:00Z,2011-01-29T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 98 | Bikes,2011-01-23T05:00:00Z,2011-02-04T05:00:00Z,2011-01-30T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 99 | Bikes,2011-01-23T05:00:00Z,2011-02-04T05:00:00Z,2011-01-30T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 100 | Bikes,2011-01-23T05:00:00Z,2011-02-04T05:00:00Z,2011-01-30T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 101 | Bikes,2011-01-23T05:00:00Z,2011-02-04T05:00:00Z,2011-01-30T05:00:00Z,1,3399.99,3399.99,271.9992,1/1/2011 102 | Bikes,2011-01-23T05:00:00Z,2011-02-04T05:00:00Z,2011-01-30T05:00:00Z,1,3399.99,3399.99,271.9992,1/1/2011 103 | Bikes,2011-01-24T05:00:00Z,2011-02-05T05:00:00Z,2011-01-31T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 104 | Bikes,2011-01-24T05:00:00Z,2011-02-05T05:00:00Z,2011-01-31T05:00:00Z,1,3374.99,3374.99,269.9992,1/1/2011 105 | Bikes,2011-01-24T05:00:00Z,2011-02-05T05:00:00Z,2011-01-31T05:00:00Z,1,3374.99,3374.99,269.9992,1/1/2011 106 | Bikes,2011-01-24T05:00:00Z,2011-02-05T05:00:00Z,2011-01-31T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 107 | Bikes,2011-01-24T05:00:00Z,2011-02-05T05:00:00Z,2011-01-31T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 108 | Bikes,2011-01-24T05:00:00Z,2011-02-05T05:00:00Z,2011-01-31T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 109 | Bikes,2011-01-24T05:00:00Z,2011-02-05T05:00:00Z,2011-01-31T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 110 | Bikes,2011-01-24T05:00:00Z,2011-02-05T05:00:00Z,2011-01-31T05:00:00Z,1,3399.99,3399.99,271.9992,1/1/2011 111 | Bikes,2011-01-25T05:00:00Z,2011-02-06T05:00:00Z,2011-02-01T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 112 | Bikes,2011-01-25T05:00:00Z,2011-02-06T05:00:00Z,2011-02-01T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 113 | Bikes,2011-01-25T05:00:00Z,2011-02-06T05:00:00Z,2011-02-01T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 114 | Bikes,2011-01-25T05:00:00Z,2011-02-06T05:00:00Z,2011-02-01T05:00:00Z,1,699.0982,699.0982,55.9279,1/1/2011 115 | Bikes,2011-01-25T05:00:00Z,2011-02-06T05:00:00Z,2011-02-01T05:00:00Z,1,699.0982,699.0982,55.9279,1/1/2011 116 | Bikes,2011-01-25T05:00:00Z,2011-02-06T05:00:00Z,2011-02-01T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 117 | Bikes,2011-01-25T05:00:00Z,2011-02-06T05:00:00Z,2011-02-01T05:00:00Z,1,3374.99,3374.99,269.9992,1/1/2011 118 | Bikes,2011-01-25T05:00:00Z,2011-02-06T05:00:00Z,2011-02-01T05:00:00Z,1,699.0982,699.0982,55.9279,1/1/2011 119 | Bikes,2011-01-26T05:00:00Z,2011-02-07T05:00:00Z,2011-02-02T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 120 | Bikes,2011-01-26T05:00:00Z,2011-02-07T05:00:00Z,2011-02-02T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 121 | Bikes,2011-01-26T05:00:00Z,2011-02-07T05:00:00Z,2011-02-02T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 122 | Bikes,2011-01-26T05:00:00Z,2011-02-07T05:00:00Z,2011-02-02T05:00:00Z,1,3374.99,3374.99,269.9992,1/1/2011 123 | Bikes,2011-01-26T05:00:00Z,2011-02-07T05:00:00Z,2011-02-02T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 124 | Bikes,2011-01-27T05:00:00Z,2011-02-08T05:00:00Z,2011-02-03T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 125 | Bikes,2011-01-27T05:00:00Z,2011-02-08T05:00:00Z,2011-02-03T05:00:00Z,1,3374.99,3374.99,269.9992,1/1/2011 126 | Bikes,2011-01-27T05:00:00Z,2011-02-08T05:00:00Z,2011-02-03T05:00:00Z,1,3374.99,3374.99,269.9992,1/1/2011 127 | Bikes,2011-01-27T05:00:00Z,2011-02-08T05:00:00Z,2011-02-03T05:00:00Z,1,699.0982,699.0982,55.9279,1/1/2011 128 | Bikes,2011-01-27T05:00:00Z,2011-02-08T05:00:00Z,2011-02-03T05:00:00Z,1,3374.99,3374.99,269.9992,1/1/2011 129 | Bikes,2011-01-28T05:00:00Z,2011-02-09T05:00:00Z,2011-02-04T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 130 | Bikes,2011-01-28T05:00:00Z,2011-02-09T05:00:00Z,2011-02-04T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 131 | Bikes,2011-01-28T05:00:00Z,2011-02-09T05:00:00Z,2011-02-04T05:00:00Z,1,699.0982,699.0982,55.9279,1/1/2011 132 | Bikes,2011-01-28T05:00:00Z,2011-02-09T05:00:00Z,2011-02-04T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 133 | Bikes,2011-01-28T05:00:00Z,2011-02-09T05:00:00Z,2011-02-04T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 134 | Bikes,2011-01-29T05:00:00Z,2011-02-10T05:00:00Z,2011-02-05T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 135 | Bikes,2011-01-29T05:00:00Z,2011-02-10T05:00:00Z,2011-02-05T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 136 | Bikes,2011-01-29T05:00:00Z,2011-02-10T05:00:00Z,2011-02-05T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 137 | Bikes,2011-01-29T05:00:00Z,2011-02-10T05:00:00Z,2011-02-05T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 138 | Bikes,2011-01-29T05:00:00Z,2011-02-10T05:00:00Z,2011-02-05T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 139 | Bikes,2011-01-30T05:00:00Z,2011-02-11T05:00:00Z,2011-02-06T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 140 | Bikes,2011-01-30T05:00:00Z,2011-02-11T05:00:00Z,2011-02-06T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 141 | Bikes,2011-01-30T05:00:00Z,2011-02-11T05:00:00Z,2011-02-06T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 142 | Bikes,2011-01-31T05:00:00Z,2011-02-12T05:00:00Z,2011-02-07T05:00:00Z,1,3374.99,3374.99,269.9992,1/1/2011 143 | Bikes,2011-01-31T05:00:00Z,2011-02-12T05:00:00Z,2011-02-07T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 144 | Bikes,2011-01-31T05:00:00Z,2011-02-12T05:00:00Z,2011-02-07T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 145 | Bikes,2011-01-31T05:00:00Z,2011-02-12T05:00:00Z,2011-02-07T05:00:00Z,1,699.0982,699.0982,55.9279,1/1/2011 146 | -------------------------------------------------------------------------------- /Chapter03/Python_Code/Data/SalesData/2011-02-01.csv: -------------------------------------------------------------------------------- 1 | Category,OrderDate,DueDate,ShipDate,OrderQuantity,UnitPrice,SalesAmount,TaxAmt,Order Month 2 | Bikes,2011-02-01T05:00:00Z,2011-02-13T05:00:00Z,2011-02-08T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 3 | Bikes,2011-02-01T05:00:00Z,2011-02-13T05:00:00Z,2011-02-08T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 4 | Bikes,2011-02-01T05:00:00Z,2011-02-13T05:00:00Z,2011-02-08T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 5 | Bikes,2011-02-01T05:00:00Z,2011-02-13T05:00:00Z,2011-02-08T05:00:00Z,1,3399.99,3399.99,271.9992,2/1/2011 6 | Bikes,2011-02-01T05:00:00Z,2011-02-13T05:00:00Z,2011-02-08T05:00:00Z,1,3399.99,3399.99,271.9992,2/1/2011 7 | Bikes,2011-02-02T05:00:00Z,2011-02-14T05:00:00Z,2011-02-09T05:00:00Z,1,699.0982,699.0982,55.9279,2/1/2011 8 | Bikes,2011-02-02T05:00:00Z,2011-02-14T05:00:00Z,2011-02-09T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 9 | Bikes,2011-02-02T05:00:00Z,2011-02-14T05:00:00Z,2011-02-09T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 10 | Bikes,2011-02-02T05:00:00Z,2011-02-14T05:00:00Z,2011-02-09T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 11 | Bikes,2011-02-02T05:00:00Z,2011-02-14T05:00:00Z,2011-02-09T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 12 | Bikes,2011-02-02T05:00:00Z,2011-02-14T05:00:00Z,2011-02-09T05:00:00Z,1,699.0982,699.0982,55.9279,2/1/2011 13 | Bikes,2011-02-03T05:00:00Z,2011-02-15T05:00:00Z,2011-02-10T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 14 | Bikes,2011-02-03T05:00:00Z,2011-02-15T05:00:00Z,2011-02-10T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 15 | Bikes,2011-02-03T05:00:00Z,2011-02-15T05:00:00Z,2011-02-10T05:00:00Z,1,699.0982,699.0982,55.9279,2/1/2011 16 | Bikes,2011-02-03T05:00:00Z,2011-02-15T05:00:00Z,2011-02-10T05:00:00Z,1,3399.99,3399.99,271.9992,2/1/2011 17 | Bikes,2011-02-03T05:00:00Z,2011-02-15T05:00:00Z,2011-02-10T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 18 | Bikes,2011-02-03T05:00:00Z,2011-02-15T05:00:00Z,2011-02-10T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 19 | Bikes,2011-02-03T05:00:00Z,2011-02-15T05:00:00Z,2011-02-10T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 20 | Bikes,2011-02-03T05:00:00Z,2011-02-15T05:00:00Z,2011-02-10T05:00:00Z,1,3399.99,3399.99,271.9992,2/1/2011 21 | Bikes,2011-02-04T05:00:00Z,2011-02-16T05:00:00Z,2011-02-11T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 22 | Bikes,2011-02-04T05:00:00Z,2011-02-16T05:00:00Z,2011-02-11T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 23 | Bikes,2011-02-04T05:00:00Z,2011-02-16T05:00:00Z,2011-02-11T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 24 | Bikes,2011-02-04T05:00:00Z,2011-02-16T05:00:00Z,2011-02-11T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 25 | Bikes,2011-02-05T05:00:00Z,2011-02-17T05:00:00Z,2011-02-12T05:00:00Z,1,699.0982,699.0982,55.9279,2/1/2011 26 | Bikes,2011-02-05T05:00:00Z,2011-02-17T05:00:00Z,2011-02-12T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 27 | Bikes,2011-02-05T05:00:00Z,2011-02-17T05:00:00Z,2011-02-12T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 28 | Bikes,2011-02-05T05:00:00Z,2011-02-17T05:00:00Z,2011-02-12T05:00:00Z,1,3399.99,3399.99,271.9992,2/1/2011 29 | Bikes,2011-02-06T05:00:00Z,2011-02-18T05:00:00Z,2011-02-13T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 30 | Bikes,2011-02-06T05:00:00Z,2011-02-18T05:00:00Z,2011-02-13T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 31 | Bikes,2011-02-06T05:00:00Z,2011-02-18T05:00:00Z,2011-02-13T05:00:00Z,1,3374.99,3374.99,269.9992,2/1/2011 32 | Bikes,2011-02-06T05:00:00Z,2011-02-18T05:00:00Z,2011-02-13T05:00:00Z,1,3374.99,3374.99,269.9992,2/1/2011 33 | Bikes,2011-02-07T05:00:00Z,2011-02-19T05:00:00Z,2011-02-14T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 34 | Bikes,2011-02-07T05:00:00Z,2011-02-19T05:00:00Z,2011-02-14T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 35 | Bikes,2011-02-07T05:00:00Z,2011-02-19T05:00:00Z,2011-02-14T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 36 | Bikes,2011-02-07T05:00:00Z,2011-02-19T05:00:00Z,2011-02-14T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 37 | Bikes,2011-02-07T05:00:00Z,2011-02-19T05:00:00Z,2011-02-14T05:00:00Z,1,3374.99,3374.99,269.9992,2/1/2011 38 | Bikes,2011-02-07T05:00:00Z,2011-02-19T05:00:00Z,2011-02-14T05:00:00Z,1,3399.99,3399.99,271.9992,2/1/2011 39 | Bikes,2011-02-08T05:00:00Z,2011-02-20T05:00:00Z,2011-02-15T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 40 | Bikes,2011-02-08T05:00:00Z,2011-02-20T05:00:00Z,2011-02-15T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 41 | Bikes,2011-02-08T05:00:00Z,2011-02-20T05:00:00Z,2011-02-15T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 42 | Bikes,2011-02-08T05:00:00Z,2011-02-20T05:00:00Z,2011-02-15T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 43 | Bikes,2011-02-08T05:00:00Z,2011-02-20T05:00:00Z,2011-02-15T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 44 | Bikes,2011-02-09T05:00:00Z,2011-02-21T05:00:00Z,2011-02-16T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 45 | Bikes,2011-02-09T05:00:00Z,2011-02-21T05:00:00Z,2011-02-16T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 46 | Bikes,2011-02-09T05:00:00Z,2011-02-21T05:00:00Z,2011-02-16T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 47 | Bikes,2011-02-09T05:00:00Z,2011-02-21T05:00:00Z,2011-02-16T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 48 | Bikes,2011-02-09T05:00:00Z,2011-02-21T05:00:00Z,2011-02-16T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 49 | Bikes,2011-02-09T05:00:00Z,2011-02-21T05:00:00Z,2011-02-16T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 50 | Bikes,2011-02-10T05:00:00Z,2011-02-22T05:00:00Z,2011-02-17T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 51 | Bikes,2011-02-10T05:00:00Z,2011-02-22T05:00:00Z,2011-02-17T05:00:00Z,1,699.0982,699.0982,55.9279,2/1/2011 52 | Bikes,2011-02-10T05:00:00Z,2011-02-22T05:00:00Z,2011-02-17T05:00:00Z,1,3374.99,3374.99,269.9992,2/1/2011 53 | Bikes,2011-02-10T05:00:00Z,2011-02-22T05:00:00Z,2011-02-17T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 54 | Bikes,2011-02-10T05:00:00Z,2011-02-22T05:00:00Z,2011-02-17T05:00:00Z,1,699.0982,699.0982,55.9279,2/1/2011 55 | Bikes,2011-02-11T05:00:00Z,2011-02-23T05:00:00Z,2011-02-18T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 56 | Bikes,2011-02-11T05:00:00Z,2011-02-23T05:00:00Z,2011-02-18T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 57 | Bikes,2011-02-11T05:00:00Z,2011-02-23T05:00:00Z,2011-02-18T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 58 | Bikes,2011-02-11T05:00:00Z,2011-02-23T05:00:00Z,2011-02-18T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 59 | Bikes,2011-02-12T05:00:00Z,2011-02-24T05:00:00Z,2011-02-19T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 60 | Bikes,2011-02-12T05:00:00Z,2011-02-24T05:00:00Z,2011-02-19T05:00:00Z,1,3399.99,3399.99,271.9992,2/1/2011 61 | Bikes,2011-02-12T05:00:00Z,2011-02-24T05:00:00Z,2011-02-19T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 62 | Bikes,2011-02-12T05:00:00Z,2011-02-24T05:00:00Z,2011-02-19T05:00:00Z,1,3399.99,3399.99,271.9992,2/1/2011 63 | Bikes,2011-02-12T05:00:00Z,2011-02-24T05:00:00Z,2011-02-19T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 64 | Bikes,2011-02-12T05:00:00Z,2011-02-24T05:00:00Z,2011-02-19T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 65 | Bikes,2011-02-14T05:00:00Z,2011-02-26T05:00:00Z,2011-02-21T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 66 | Bikes,2011-02-15T05:00:00Z,2011-02-27T05:00:00Z,2011-02-22T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 67 | Bikes,2011-02-15T05:00:00Z,2011-02-27T05:00:00Z,2011-02-22T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 68 | Bikes,2011-02-15T05:00:00Z,2011-02-27T05:00:00Z,2011-02-22T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 69 | Bikes,2011-02-15T05:00:00Z,2011-02-27T05:00:00Z,2011-02-22T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 70 | Bikes,2011-02-15T05:00:00Z,2011-02-27T05:00:00Z,2011-02-22T05:00:00Z,1,3374.99,3374.99,269.9992,2/1/2011 71 | Bikes,2011-02-15T05:00:00Z,2011-02-27T05:00:00Z,2011-02-22T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 72 | Bikes,2011-02-16T05:00:00Z,2011-02-28T05:00:00Z,2011-02-23T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 73 | Bikes,2011-02-16T05:00:00Z,2011-02-28T05:00:00Z,2011-02-23T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 74 | Bikes,2011-02-16T05:00:00Z,2011-02-28T05:00:00Z,2011-02-23T05:00:00Z,1,699.0982,699.0982,55.9279,2/1/2011 75 | Bikes,2011-02-16T05:00:00Z,2011-02-28T05:00:00Z,2011-02-23T05:00:00Z,1,699.0982,699.0982,55.9279,2/1/2011 76 | Bikes,2011-02-16T05:00:00Z,2011-02-28T05:00:00Z,2011-02-23T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 77 | Bikes,2011-02-16T05:00:00Z,2011-02-28T05:00:00Z,2011-02-23T05:00:00Z,1,699.0982,699.0982,55.9279,2/1/2011 78 | Bikes,2011-02-16T05:00:00Z,2011-02-28T05:00:00Z,2011-02-23T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 79 | Bikes,2011-02-16T05:00:00Z,2011-02-28T05:00:00Z,2011-02-23T05:00:00Z,1,699.0982,699.0982,55.9279,2/1/2011 80 | Bikes,2011-02-17T05:00:00Z,2011-03-01T05:00:00Z,2011-02-24T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 81 | Bikes,2011-02-17T05:00:00Z,2011-03-01T05:00:00Z,2011-02-24T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 82 | Bikes,2011-02-17T05:00:00Z,2011-03-01T05:00:00Z,2011-02-24T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 83 | Bikes,2011-02-17T05:00:00Z,2011-03-01T05:00:00Z,2011-02-24T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 84 | Bikes,2011-02-17T05:00:00Z,2011-03-01T05:00:00Z,2011-02-24T05:00:00Z,1,3374.99,3374.99,269.9992,2/1/2011 85 | Bikes,2011-02-17T05:00:00Z,2011-03-01T05:00:00Z,2011-02-24T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 86 | Bikes,2011-02-18T05:00:00Z,2011-03-02T05:00:00Z,2011-02-25T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 87 | Bikes,2011-02-18T05:00:00Z,2011-03-02T05:00:00Z,2011-02-25T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 88 | Bikes,2011-02-18T05:00:00Z,2011-03-02T05:00:00Z,2011-02-25T05:00:00Z,1,3374.99,3374.99,269.9992,2/1/2011 89 | Bikes,2011-02-18T05:00:00Z,2011-03-02T05:00:00Z,2011-02-25T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 90 | Bikes,2011-02-18T05:00:00Z,2011-03-02T05:00:00Z,2011-02-25T05:00:00Z,1,3374.99,3374.99,269.9992,2/1/2011 91 | Bikes,2011-02-18T05:00:00Z,2011-03-02T05:00:00Z,2011-02-25T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 92 | Bikes,2011-02-18T05:00:00Z,2011-03-02T05:00:00Z,2011-02-25T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 93 | Bikes,2011-02-18T05:00:00Z,2011-03-02T05:00:00Z,2011-02-25T05:00:00Z,1,3374.99,3374.99,269.9992,2/1/2011 94 | Bikes,2011-02-19T05:00:00Z,2011-03-03T05:00:00Z,2011-02-26T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 95 | Bikes,2011-02-19T05:00:00Z,2011-03-03T05:00:00Z,2011-02-26T05:00:00Z,1,3374.99,3374.99,269.9992,2/1/2011 96 | Bikes,2011-02-19T05:00:00Z,2011-03-03T05:00:00Z,2011-02-26T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 97 | Bikes,2011-02-19T05:00:00Z,2011-03-03T05:00:00Z,2011-02-26T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 98 | Bikes,2011-02-19T05:00:00Z,2011-03-03T05:00:00Z,2011-02-26T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 99 | Bikes,2011-02-20T05:00:00Z,2011-03-04T05:00:00Z,2011-02-27T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 100 | Bikes,2011-02-20T05:00:00Z,2011-03-04T05:00:00Z,2011-02-27T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 101 | Bikes,2011-02-20T05:00:00Z,2011-03-04T05:00:00Z,2011-02-27T05:00:00Z,1,3374.99,3374.99,269.9992,2/1/2011 102 | Bikes,2011-02-21T05:00:00Z,2011-03-05T05:00:00Z,2011-02-28T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 103 | Bikes,2011-02-21T05:00:00Z,2011-03-05T05:00:00Z,2011-02-28T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 104 | Bikes,2011-02-21T05:00:00Z,2011-03-05T05:00:00Z,2011-02-28T05:00:00Z,1,3399.99,3399.99,271.9992,2/1/2011 105 | Bikes,2011-02-21T05:00:00Z,2011-03-05T05:00:00Z,2011-02-28T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 106 | Bikes,2011-02-21T05:00:00Z,2011-03-05T05:00:00Z,2011-02-28T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 107 | Bikes,2011-02-21T05:00:00Z,2011-03-05T05:00:00Z,2011-02-28T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 108 | Bikes,2011-02-21T05:00:00Z,2011-03-05T05:00:00Z,2011-02-28T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 109 | Bikes,2011-02-21T05:00:00Z,2011-03-05T05:00:00Z,2011-02-28T05:00:00Z,1,699.0982,699.0982,55.9279,2/1/2011 110 | Bikes,2011-02-21T05:00:00Z,2011-03-05T05:00:00Z,2011-02-28T05:00:00Z,1,3399.99,3399.99,271.9992,2/1/2011 111 | Bikes,2011-02-22T05:00:00Z,2011-03-06T05:00:00Z,2011-03-01T05:00:00Z,1,699.0982,699.0982,55.9279,2/1/2011 112 | Bikes,2011-02-22T05:00:00Z,2011-03-06T05:00:00Z,2011-03-01T05:00:00Z,1,3374.99,3374.99,269.9992,2/1/2011 113 | Bikes,2011-02-22T05:00:00Z,2011-03-06T05:00:00Z,2011-03-01T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 114 | Bikes,2011-02-23T05:00:00Z,2011-03-07T05:00:00Z,2011-03-02T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 115 | Bikes,2011-02-23T05:00:00Z,2011-03-07T05:00:00Z,2011-03-02T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 116 | Bikes,2011-02-23T05:00:00Z,2011-03-07T05:00:00Z,2011-03-02T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 117 | Bikes,2011-02-23T05:00:00Z,2011-03-07T05:00:00Z,2011-03-02T05:00:00Z,1,3399.99,3399.99,271.9992,2/1/2011 118 | Bikes,2011-02-23T05:00:00Z,2011-03-07T05:00:00Z,2011-03-02T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 119 | Bikes,2011-02-23T05:00:00Z,2011-03-07T05:00:00Z,2011-03-02T05:00:00Z,1,3399.99,3399.99,271.9992,2/1/2011 120 | Bikes,2011-02-23T05:00:00Z,2011-03-07T05:00:00Z,2011-03-02T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 121 | Bikes,2011-02-24T05:00:00Z,2011-03-08T05:00:00Z,2011-03-03T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 122 | Bikes,2011-02-24T05:00:00Z,2011-03-08T05:00:00Z,2011-03-03T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 123 | Bikes,2011-02-25T05:00:00Z,2011-03-09T05:00:00Z,2011-03-04T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 124 | Bikes,2011-02-25T05:00:00Z,2011-03-09T05:00:00Z,2011-03-04T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 125 | Bikes,2011-02-26T05:00:00Z,2011-03-10T05:00:00Z,2011-03-05T05:00:00Z,1,699.0982,699.0982,55.9279,2/1/2011 126 | Bikes,2011-02-26T05:00:00Z,2011-03-10T05:00:00Z,2011-03-05T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 127 | Bikes,2011-02-26T05:00:00Z,2011-03-10T05:00:00Z,2011-03-05T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 128 | Bikes,2011-02-26T05:00:00Z,2011-03-10T05:00:00Z,2011-03-05T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 129 | Bikes,2011-02-26T05:00:00Z,2011-03-10T05:00:00Z,2011-03-05T05:00:00Z,1,699.0982,699.0982,55.9279,2/1/2011 130 | Bikes,2011-02-26T05:00:00Z,2011-03-10T05:00:00Z,2011-03-05T05:00:00Z,1,3374.99,3374.99,269.9992,2/1/2011 131 | Bikes,2011-02-26T05:00:00Z,2011-03-10T05:00:00Z,2011-03-05T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 132 | Bikes,2011-02-26T05:00:00Z,2011-03-10T05:00:00Z,2011-03-05T05:00:00Z,1,699.0982,699.0982,55.9279,2/1/2011 133 | Bikes,2011-02-27T05:00:00Z,2011-03-11T05:00:00Z,2011-03-06T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 134 | Bikes,2011-02-27T05:00:00Z,2011-03-11T05:00:00Z,2011-03-06T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 135 | Bikes,2011-02-27T05:00:00Z,2011-03-11T05:00:00Z,2011-03-06T05:00:00Z,1,3399.99,3399.99,271.9992,2/1/2011 136 | Bikes,2011-02-27T05:00:00Z,2011-03-11T05:00:00Z,2011-03-06T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 137 | Bikes,2011-02-27T05:00:00Z,2011-03-11T05:00:00Z,2011-03-06T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 138 | Bikes,2011-02-27T05:00:00Z,2011-03-11T05:00:00Z,2011-03-06T05:00:00Z,1,3399.99,3399.99,271.9992,2/1/2011 139 | Bikes,2011-02-27T05:00:00Z,2011-03-11T05:00:00Z,2011-03-06T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 140 | Bikes,2011-02-28T05:00:00Z,2011-03-12T05:00:00Z,2011-03-07T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 141 | Bikes,2011-02-28T05:00:00Z,2011-03-12T05:00:00Z,2011-03-07T05:00:00Z,1,3374.99,3374.99,269.9992,2/1/2011 142 | Bikes,2011-02-28T05:00:00Z,2011-03-12T05:00:00Z,2011-03-07T05:00:00Z,1,3374.99,3374.99,269.9992,2/1/2011 143 | Bikes,2011-02-28T05:00:00Z,2011-03-12T05:00:00Z,2011-03-07T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 144 | Bikes,2011-02-28T05:00:00Z,2011-03-12T05:00:00Z,2011-03-07T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 145 | Bikes,2011-02-28T05:00:00Z,2011-03-12T05:00:00Z,2011-03-07T05:00:00Z,1,3374.99,3374.99,269.9992,2/1/2011 146 | -------------------------------------------------------------------------------- /Chapter03/Python_Code/Models/model: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apress/adv-analytics-in-power-bi-w-r-and-python/1047beaa566a4aa7866fa41fd51dcc5e151b4d58/Chapter03/Python_Code/Models/model -------------------------------------------------------------------------------- /Chapter03/Python_Code/ScoreData.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pandas as pd 3 | from sklearn.externals import joblib 4 | from sklearn import linear_model 5 | 6 | model = joblib.load("./Models/model") 7 | boston_housing = pd.read_csv("./Data/BostonHousingInfo.csv") 8 | 9 | model_data = boston_housing[["crim","rm","tax","lstat"]] 10 | 11 | pred_medv = model.predict(model_data) 12 | 13 | final_output = model_data 14 | final_output["pred_mev"] = pred_medv -------------------------------------------------------------------------------- /Chapter03/Python_Code/UseRegularExpressionToFilterBadEmailAddresses.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pandas as pd 3 | 4 | os.chdir("C:/Users/ryanwade44/Downloads/Advanced Analytics in Power BI with R and Python/Chapterx03/R_Code") 5 | goodemails_raw = pd.read_csv("./Data/EmailAddresses.csv") 6 | 7 | email_pattern = "^([a-zA-Z][\\w\\_]{4,20})\\@([a-zA-Z0-9.-]+)\\.([a-zA-Z]{2,3})$" 8 | 9 | goodemails = goodemails_raw[ 10 | goodemails_raw["Email"].str.match(email_pattern)] -------------------------------------------------------------------------------- /Chapter03/Python_Code/ch01__Python.pbix: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apress/adv-analytics-in-power-bi-w-r-and-python/1047beaa566a4aa7866fa41fd51dcc5e151b4d58/Chapter03/Python_Code/ch01__Python.pbix -------------------------------------------------------------------------------- /Chapter03/R_Code/CombineRollingMonths.R: -------------------------------------------------------------------------------- 1 | library(tidyverse) 2 | library(lubridate) 3 | library(stringr) 4 | 5 | setwd("./Data/SalesData") 6 | monthly_reports <- list.files(".") 7 | date_format <- stringr::str_replace(monthly_reports, ".csv","") 8 | date_format <- lubridate::ymd(date_format) 9 | df <- data_frame(monthly_reports, date_format) 10 | 11 | max_month <- max(df$date_format) 12 | min_month <- max_month %m+% months(-12) 13 | 14 | reports_to_read <- 15 | df$monthly_reports[df$date_format >= min_month] 16 | 17 | df_output <- purrr::map_df(reports_to_read, read_csv) 18 | -------------------------------------------------------------------------------- /Chapter03/R_Code/Data/BostonHousingInfo.csv: -------------------------------------------------------------------------------- 1 | id,crim,zn,indus,chas,nox,rm,age,dis,rad,tax,ptratio,black,lstat,medv 2 | 1,0.31533,0,6.2,0,0.504,8.266,78.3,2.8944,8,307,17.4,385.05,4.14,44.8 3 | 2,0.01301,35,1.52,0,0.442,7.241,49.3,7.0379,1,284,15.5,394.74,5.49,32.7 4 | 3,1.34284,0,19.58,0,0.605,6.066,100,1.7573,5,403,14.7,353.89,6.43,24.3 5 | 4,18.0846,0,18.1,0,0.679,6.434,100,1.8347,24,666,20.2,27.25,29.05,7.2 6 | 5,5.82115,0,18.1,0,0.713,6.513,89.9,2.8016,24,666,20.2,393.82,10.29,20.2 7 | 6,13.0751,0,18.1,0,0.58,5.713,56.7,2.8237,24,666,20.2,396.9,14.76,20.1 8 | 7,0.07013,0,13.89,0,0.55,6.642,85.1,3.4211,5,276,16.4,392.78,9.69,28.7 9 | 8,12.2472,0,18.1,0,0.584,5.837,59.7,1.9976,24,666,20.2,24.65,15.69,10.2 10 | 9,0.06417,0,5.96,0,0.499,5.933,68.2,3.3603,5,279,19.2,396.9,9.68,18.9 11 | 10,17.8667,0,18.1,0,0.671,6.223,100,1.3861,24,666,20.2,393.74,21.78,10.2 12 | 11,4.66883,0,18.1,0,0.713,5.976,87.9,2.5806,24,666,20.2,10.48,19.01,12.7 13 | 12,0.10008,0,2.46,0,0.488,6.563,95.6,2.847,3,193,17.8,396.9,5.68,32.5 14 | 13,19.6091,0,18.1,0,0.671,7.313,97.9,1.3163,24,666,20.2,396.9,13.44,15 15 | 14,5.87205,0,18.1,0,0.693,6.405,96,1.6768,24,666,20.2,396.9,19.37,12.5 16 | 15,0.16211,20,6.96,0,0.464,6.24,16.3,4.429,3,223,18.6,396.9,6.59,25.2 17 | 16,8.64476,0,18.1,0,0.693,6.193,92.6,1.7912,24,666,20.2,396.9,15.17,13.8 18 | 17,0.01381,80,0.46,0,0.422,7.875,32,5.6484,4,255,14.4,394.23,2.97,50 19 | 18,10.6718,0,18.1,0,0.74,6.459,94.8,1.9879,24,666,20.2,43.06,23.98,11.8 20 | 19,5.58107,0,18.1,0,0.713,6.436,87.9,2.3158,24,666,20.2,100.19,16.22,14.3 21 | 20,5.66637,0,18.1,0,0.74,6.219,100,2.0048,24,666,20.2,395.69,16.59,18.4 22 | 21,3.32105,0,19.58,1,0.871,5.403,100,1.3216,5,403,14.7,396.9,26.82,13.4 23 | 22,0.14866,0,8.56,0,0.52,6.727,79.9,2.7778,5,384,20.9,394.76,9.42,27.5 24 | 23,0.05059,0,4.49,0,0.449,6.389,48,4.7794,3,247,18.5,396.9,9.62,23.9 25 | 24,0.01538,90,3.75,0,0.394,7.454,34.2,6.3361,3,244,15.9,386.34,3.11,44 26 | 25,8.24809,0,18.1,0,0.713,7.393,99.3,2.4527,24,666,20.2,375.87,16.74,17.8 27 | 26,5.73116,0,18.1,0,0.532,7.061,77,3.4106,24,666,20.2,395.28,7.01,25 28 | 27,13.6781,0,18.1,0,0.74,5.935,87.9,1.8206,24,666,20.2,68.95,34.02,8.4 29 | 28,0.03466,35,6.06,0,0.4379,6.031,23.3,6.6407,1,304,16.9,362.25,7.83,19.4 30 | 29,0.17783,0,9.69,0,0.585,5.569,73.5,2.3999,6,391,19.2,395.77,15.1,17.5 31 | 30,1.15172,0,8.14,0,0.538,5.701,95,3.7872,4,307,21,358.77,18.35,13.1 32 | 31,0.62976,0,8.14,0,0.538,5.949,61.8,4.7075,4,307,21,396.9,8.26,20.4 33 | 32,0.54452,0,21.89,0,0.624,6.151,97.9,1.6687,4,437,21.2,396.9,18.46,17.8 34 | 33,0.26838,0,9.69,0,0.585,5.794,70.6,2.8927,6,391,19.2,396.9,14.1,18.3 35 | 34,0.14932,25,5.13,0,0.453,5.741,66.2,7.2254,8,284,19.7,395.11,13.15,18.7 36 | 35,0.0795,60,1.69,0,0.411,6.579,35.9,10.7103,4,411,18.3,370.78,5.49,24.1 37 | 36,0.08187,0,2.89,0,0.445,7.82,36.9,3.4952,2,276,18,393.53,3.57,43.8 38 | 37,0.3494,0,9.9,0,0.544,5.972,76.7,3.1025,4,304,18.4,396.24,9.97,20.3 39 | 38,0.10959,0,11.93,0,0.573,6.794,89.3,2.3889,1,273,21,393.45,6.48,22 40 | 39,22.0511,0,18.1,0,0.74,5.818,92.4,1.8662,24,666,20.2,391.45,22.11,10.5 41 | 40,37.6619,0,18.1,0,0.679,6.202,78.7,1.8629,24,666,20.2,18.82,14.52,10.9 42 | 41,0.52014,20,3.97,0,0.647,8.398,91.5,2.2885,5,264,13,386.86,5.91,48.8 43 | 42,1.12658,0,19.58,1,0.871,5.012,88,1.6102,5,403,14.7,343.28,12.12,15.3 44 | 43,6.28807,0,18.1,0,0.74,6.341,96.4,2.072,24,666,20.2,318.01,17.79,14.9 45 | 44,0.04544,0,3.24,0,0.46,6.144,32.2,5.8736,4,430,16.9,368.57,9.09,19.8 46 | 45,2.14918,0,19.58,0,0.871,5.709,98.5,1.6232,5,403,14.7,261.95,15.79,19.4 47 | 46,8.26725,0,18.1,1,0.668,5.875,89.6,1.1296,24,666,20.2,347.88,8.88,50 48 | 47,0.06047,0,2.46,0,0.488,6.153,68.8,3.2797,3,193,17.8,387.11,13.15,29.6 49 | 48,0.16902,0,25.65,0,0.581,5.986,88.4,1.9929,2,188,19.1,385.02,14.81,21.4 50 | 49,0.38214,0,6.2,0,0.504,8.04,86.5,3.2157,8,307,17.4,387.38,3.13,37.6 51 | 50,0.17134,0,10.01,0,0.547,5.928,88.2,2.4631,6,432,17.8,344.91,15.76,18.3 52 | 51,13.3598,0,18.1,0,0.693,5.887,94.7,1.7821,24,666,20.2,396.9,16.35,12.7 53 | 52,0.20608,22,5.86,0,0.431,5.593,76.5,7.9549,7,330,19.1,372.49,12.5,17.6 54 | 53,0.2498,0,21.89,0,0.624,5.857,98.2,1.6686,4,437,21.2,392.04,21.32,13.3 55 | 54,0.08244,30,4.93,0,0.428,6.481,18.5,6.1899,6,300,16.6,379.41,6.36,23.7 56 | 55,0.06466,70,2.24,0,0.4,6.345,20.1,7.8278,5,358,14.8,368.24,4.97,22.5 57 | 56,5.69175,0,18.1,0,0.583,6.114,79.8,3.5459,24,666,20.2,392.68,14.98,19.1 58 | 57,0.34109,0,7.38,0,0.493,6.415,40.1,4.7211,5,287,19.6,396.9,6.12,25 59 | 58,0.55007,20,3.97,0,0.647,7.206,91.6,1.9301,5,264,13,387.89,8.1,36.5 60 | 59,0.05789,12.5,6.07,0,0.409,5.878,21.4,6.498,4,345,18.9,396.21,8.1,22 61 | 60,8.71675,0,18.1,0,0.693,6.471,98.8,1.7257,24,666,20.2,391.98,17.12,13.1 62 | 61,15.288,0,18.1,0,0.671,6.649,93.3,1.3449,24,666,20.2,363.02,23.24,13.9 63 | 62,0.15038,0,25.65,0,0.581,5.856,97,1.9444,2,188,19.1,370.31,25.41,17.3 64 | 63,0.13158,0,10.01,0,0.547,6.176,72.5,2.7301,6,432,17.8,393.3,12.04,21.2 65 | 64,4.64689,0,18.1,0,0.614,6.98,67.6,2.5329,24,666,20.2,374.68,11.66,29.8 66 | 65,0.05302,0,3.41,0,0.489,7.079,63.1,3.4145,2,270,17.8,396.06,5.7,28.7 67 | 66,0.04684,0,3.41,0,0.489,6.417,66.1,3.0923,2,270,17.8,392.18,8.81,22.6 68 | 67,0.27957,0,9.69,0,0.585,5.926,42.6,2.3817,6,391,19.2,396.9,13.59,24.5 69 | 68,9.18702,0,18.1,0,0.7,5.536,100,1.5804,24,666,20.2,396.9,23.6,11.3 70 | 69,0.29916,20,6.96,0,0.464,5.856,42.1,4.429,3,223,18.6,388.65,13,21.1 71 | 70,0.10574,0,27.74,0,0.609,5.983,98.8,1.8681,4,711,20.1,390.11,18.07,13.6 72 | 71,0.22438,0,9.69,0,0.585,6.027,79.7,2.4982,6,391,19.2,396.9,14.33,16.8 73 | 72,4.0974,0,19.58,0,0.871,5.468,100,1.4118,5,403,14.7,396.9,26.42,15.6 74 | 73,12.0482,0,18.1,0,0.614,5.648,87.6,1.9512,24,666,20.2,291.55,14.1,20.8 75 | 74,0.12269,0,6.91,0,0.448,6.069,40,5.7209,3,233,17.9,389.39,9.55,21.2 76 | 75,0.10793,0,8.56,0,0.52,6.195,54.4,2.7778,5,384,20.9,393.49,13,21.7 77 | 76,0.40771,0,6.2,1,0.507,6.164,91.3,3.048,8,307,17.4,395.24,21.46,21.7 78 | 77,0.1415,0,6.91,0,0.448,6.169,6.6,5.7209,3,233,17.9,383.37,5.81,25.3 79 | 78,0.03584,80,3.37,0,0.398,6.29,17.8,6.6115,4,337,16.1,396.9,4.67,23.5 80 | 79,4.75237,0,18.1,0,0.713,6.525,86.5,2.4358,24,666,20.2,50.92,18.13,14.1 81 | 80,0.3692,0,9.9,0,0.544,6.567,87.3,3.6023,4,304,18.4,395.69,9.28,23.8 82 | 81,41.5292,0,18.1,0,0.693,5.531,85.4,1.6074,24,666,20.2,329.46,27.38,8.5 83 | 82,0.0136,75,4,0,0.41,5.888,47.6,7.3197,3,469,21.1,396.9,14.8,18.9 84 | 83,0.07165,0,25.65,0,0.581,6.004,84.1,2.1974,2,188,19.1,377.67,14.27,20.3 85 | 84,7.99248,0,18.1,0,0.7,5.52,100,1.5331,24,666,20.2,396.9,24.56,12.3 86 | 85,3.83684,0,18.1,0,0.77,6.251,91.1,2.2955,24,666,20.2,350.65,14.19,19.9 87 | 86,0.10469,40,6.41,1,0.447,7.267,49,4.7872,4,254,17.6,389.25,6.05,33.2 88 | 87,0.11504,0,2.89,0,0.445,6.163,69.6,3.4952,2,276,18,391.83,11.34,21.4 89 | 88,0.22188,20,6.96,1,0.464,7.691,51.8,4.3665,3,223,18.6,390.77,6.58,35.2 90 | 89,0.08873,21,5.64,0,0.439,5.963,45.7,6.8147,4,243,16.8,395.56,13.45,19.7 91 | 90,0.03237,0,2.18,0,0.458,6.998,45.8,6.0622,3,222,18.7,394.63,2.94,33.4 92 | 91,0.04113,25,4.86,0,0.426,6.727,33.5,5.4007,4,281,19,396.9,5.29,28 93 | 92,20.0849,0,18.1,0,0.7,4.368,91.2,1.4395,24,666,20.2,285.83,30.63,8.8 94 | 93,0.32264,0,21.89,0,0.624,5.942,93.5,1.9669,4,437,21.2,378.25,16.9,17.4 95 | 94,0.02177,82.5,2.03,0,0.415,7.61,15.7,6.27,2,348,14.7,395.38,3.11,42.3 96 | 95,0.13642,0,10.59,0,0.489,5.891,22.3,3.9454,4,277,18.6,396.9,10.87,22.6 97 | 96,0.0187,85,4.15,0,0.429,6.516,27.7,8.5353,4,351,17.9,392.43,6.36,23.1 98 | 97,11.0874,0,18.1,0,0.718,6.411,100,1.8589,24,666,20.2,318.75,15.02,16.7 99 | 98,0.05561,70,2.24,0,0.4,7.041,10,7.8278,5,358,14.8,371.58,4.74,29 100 | 99,0.12204,0,2.89,0,0.445,6.625,57.8,3.4952,2,276,18,357.98,6.65,28.4 101 | 100,0.1146,20,6.96,0,0.464,6.538,58.7,3.9175,3,223,18.6,394.96,7.73,24.4 102 | 101,7.52601,0,18.1,0,0.713,6.417,98.3,2.185,24,666,20.2,304.21,19.31,13 103 | 102,0.06129,20,3.33,1,0.4429,7.645,49.7,5.2119,5,216,14.9,377.07,3.01,46 104 | 103,0.10612,30,4.93,0,0.428,6.095,65.1,6.3361,6,300,16.6,394.62,12.4,20.1 105 | 104,0.09065,20,6.96,1,0.464,5.92,61.5,3.9175,3,223,18.6,391.34,13.65,20.7 106 | 105,0.04379,80,3.37,0,0.398,5.787,31.1,6.6115,4,337,16.1,396.9,10.24,19.4 107 | 106,23.6482,0,18.1,0,0.671,6.38,96.2,1.3861,24,666,20.2,396.9,23.69,13.1 108 | -------------------------------------------------------------------------------- /Chapter03/R_Code/Data/SalesData/2010-12-01.csv: -------------------------------------------------------------------------------- 1 | Category,OrderDate,DueDate,ShipDate,OrderQuantity,UnitPrice,SalesAmount,TaxAmt,Order Month 2 | Bikes,2010-12-29T05:00:00Z,2011-01-10T05:00:00Z,2011-01-05T05:00:00Z,1,3578.27,3578.27,286.2616,12/1/2010 3 | Bikes,2010-12-29T05:00:00Z,2011-01-10T05:00:00Z,2011-01-05T05:00:00Z,1,3399.99,3399.99,271.9992,12/1/2010 4 | Bikes,2010-12-29T05:00:00Z,2011-01-10T05:00:00Z,2011-01-05T05:00:00Z,1,3399.99,3399.99,271.9992,12/1/2010 5 | Bikes,2010-12-29T05:00:00Z,2011-01-10T05:00:00Z,2011-01-05T05:00:00Z,1,699.0982,699.0982,55.9279,12/1/2010 6 | Bikes,2010-12-29T05:00:00Z,2011-01-10T05:00:00Z,2011-01-05T05:00:00Z,1,3399.99,3399.99,271.9992,12/1/2010 7 | Bikes,2010-12-30T05:00:00Z,2011-01-11T05:00:00Z,2011-01-06T05:00:00Z,1,3578.27,3578.27,286.2616,12/1/2010 8 | Bikes,2010-12-30T05:00:00Z,2011-01-11T05:00:00Z,2011-01-06T05:00:00Z,1,3578.27,3578.27,286.2616,12/1/2010 9 | Bikes,2010-12-30T05:00:00Z,2011-01-11T05:00:00Z,2011-01-06T05:00:00Z,1,3374.99,3374.99,269.9992,12/1/2010 10 | Bikes,2010-12-30T05:00:00Z,2011-01-11T05:00:00Z,2011-01-06T05:00:00Z,1,3399.99,3399.99,271.9992,12/1/2010 11 | Bikes,2010-12-31T05:00:00Z,2011-01-12T05:00:00Z,2011-01-07T05:00:00Z,1,3578.27,3578.27,286.2616,12/1/2010 12 | Bikes,2010-12-31T05:00:00Z,2011-01-12T05:00:00Z,2011-01-07T05:00:00Z,1,3578.27,3578.27,286.2616,12/1/2010 13 | Bikes,2010-12-31T05:00:00Z,2011-01-12T05:00:00Z,2011-01-07T05:00:00Z,1,699.0982,699.0982,55.9279,12/1/2010 14 | Bikes,2010-12-31T05:00:00Z,2011-01-12T05:00:00Z,2011-01-07T05:00:00Z,1,3578.27,3578.27,286.2616,12/1/2010 15 | Bikes,2010-12-31T05:00:00Z,2011-01-12T05:00:00Z,2011-01-07T05:00:00Z,1,3578.27,3578.27,286.2616,12/1/2010 16 | -------------------------------------------------------------------------------- /Chapter03/R_Code/Data/SalesData/2011-01-01.csv: -------------------------------------------------------------------------------- 1 | Category,OrderDate,DueDate,ShipDate,OrderQuantity,UnitPrice,SalesAmount,TaxAmt,Order Month 2 | Bikes,2011-01-01T05:00:00Z,2011-01-13T05:00:00Z,2011-01-08T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 3 | Bikes,2011-01-01T05:00:00Z,2011-01-13T05:00:00Z,2011-01-08T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 4 | Bikes,2011-01-02T05:00:00Z,2011-01-14T05:00:00Z,2011-01-09T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 5 | Bikes,2011-01-02T05:00:00Z,2011-01-14T05:00:00Z,2011-01-09T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 6 | Bikes,2011-01-02T05:00:00Z,2011-01-14T05:00:00Z,2011-01-09T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 7 | Bikes,2011-01-02T05:00:00Z,2011-01-14T05:00:00Z,2011-01-09T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 8 | Bikes,2011-01-02T05:00:00Z,2011-01-14T05:00:00Z,2011-01-09T05:00:00Z,1,699.0982,699.0982,55.9279,1/1/2011 9 | Bikes,2011-01-03T05:00:00Z,2011-01-15T05:00:00Z,2011-01-10T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 10 | Bikes,2011-01-03T05:00:00Z,2011-01-15T05:00:00Z,2011-01-10T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 11 | Bikes,2011-01-03T05:00:00Z,2011-01-15T05:00:00Z,2011-01-10T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 12 | Bikes,2011-01-03T05:00:00Z,2011-01-15T05:00:00Z,2011-01-10T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 13 | Bikes,2011-01-04T05:00:00Z,2011-01-16T05:00:00Z,2011-01-11T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 14 | Bikes,2011-01-04T05:00:00Z,2011-01-16T05:00:00Z,2011-01-11T05:00:00Z,1,699.0982,699.0982,55.9279,1/1/2011 15 | Bikes,2011-01-04T05:00:00Z,2011-01-16T05:00:00Z,2011-01-11T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 16 | Bikes,2011-01-05T05:00:00Z,2011-01-17T05:00:00Z,2011-01-12T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 17 | Bikes,2011-01-05T05:00:00Z,2011-01-17T05:00:00Z,2011-01-12T05:00:00Z,1,699.0982,699.0982,55.9279,1/1/2011 18 | Bikes,2011-01-05T05:00:00Z,2011-01-17T05:00:00Z,2011-01-12T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 19 | Bikes,2011-01-06T05:00:00Z,2011-01-18T05:00:00Z,2011-01-13T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 20 | Bikes,2011-01-06T05:00:00Z,2011-01-18T05:00:00Z,2011-01-13T05:00:00Z,1,3399.99,3399.99,271.9992,1/1/2011 21 | Bikes,2011-01-06T05:00:00Z,2011-01-18T05:00:00Z,2011-01-13T05:00:00Z,1,3399.99,3399.99,271.9992,1/1/2011 22 | Bikes,2011-01-06T05:00:00Z,2011-01-18T05:00:00Z,2011-01-13T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 23 | Bikes,2011-01-06T05:00:00Z,2011-01-18T05:00:00Z,2011-01-13T05:00:00Z,1,3374.99,3374.99,269.9992,1/1/2011 24 | Bikes,2011-01-06T05:00:00Z,2011-01-18T05:00:00Z,2011-01-13T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 25 | Bikes,2011-01-07T05:00:00Z,2011-01-19T05:00:00Z,2011-01-14T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 26 | Bikes,2011-01-07T05:00:00Z,2011-01-19T05:00:00Z,2011-01-14T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 27 | Bikes,2011-01-07T05:00:00Z,2011-01-19T05:00:00Z,2011-01-14T05:00:00Z,1,3399.99,3399.99,271.9992,1/1/2011 28 | Bikes,2011-01-08T05:00:00Z,2011-01-20T05:00:00Z,2011-01-15T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 29 | Bikes,2011-01-08T05:00:00Z,2011-01-20T05:00:00Z,2011-01-15T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 30 | Bikes,2011-01-08T05:00:00Z,2011-01-20T05:00:00Z,2011-01-15T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 31 | Bikes,2011-01-08T05:00:00Z,2011-01-20T05:00:00Z,2011-01-15T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 32 | Bikes,2011-01-09T05:00:00Z,2011-01-21T05:00:00Z,2011-01-16T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 33 | Bikes,2011-01-09T05:00:00Z,2011-01-21T05:00:00Z,2011-01-16T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 34 | Bikes,2011-01-09T05:00:00Z,2011-01-21T05:00:00Z,2011-01-16T05:00:00Z,1,3399.99,3399.99,271.9992,1/1/2011 35 | Bikes,2011-01-09T05:00:00Z,2011-01-21T05:00:00Z,2011-01-16T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 36 | Bikes,2011-01-10T05:00:00Z,2011-01-22T05:00:00Z,2011-01-17T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 37 | Bikes,2011-01-10T05:00:00Z,2011-01-22T05:00:00Z,2011-01-17T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 38 | Bikes,2011-01-11T05:00:00Z,2011-01-23T05:00:00Z,2011-01-18T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 39 | Bikes,2011-01-11T05:00:00Z,2011-01-23T05:00:00Z,2011-01-18T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 40 | Bikes,2011-01-11T05:00:00Z,2011-01-23T05:00:00Z,2011-01-18T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 41 | Bikes,2011-01-11T05:00:00Z,2011-01-23T05:00:00Z,2011-01-18T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 42 | Bikes,2011-01-11T05:00:00Z,2011-01-23T05:00:00Z,2011-01-18T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 43 | Bikes,2011-01-11T05:00:00Z,2011-01-23T05:00:00Z,2011-01-18T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 44 | Bikes,2011-01-11T05:00:00Z,2011-01-23T05:00:00Z,2011-01-18T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 45 | Bikes,2011-01-12T05:00:00Z,2011-01-24T05:00:00Z,2011-01-19T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 46 | Bikes,2011-01-12T05:00:00Z,2011-01-24T05:00:00Z,2011-01-19T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 47 | Bikes,2011-01-12T05:00:00Z,2011-01-24T05:00:00Z,2011-01-19T05:00:00Z,1,699.0982,699.0982,55.9279,1/1/2011 48 | Bikes,2011-01-12T05:00:00Z,2011-01-24T05:00:00Z,2011-01-19T05:00:00Z,1,3374.99,3374.99,269.9992,1/1/2011 49 | Bikes,2011-01-13T05:00:00Z,2011-01-25T05:00:00Z,2011-01-20T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 50 | Bikes,2011-01-13T05:00:00Z,2011-01-25T05:00:00Z,2011-01-20T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 51 | Bikes,2011-01-13T05:00:00Z,2011-01-25T05:00:00Z,2011-01-20T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 52 | Bikes,2011-01-13T05:00:00Z,2011-01-25T05:00:00Z,2011-01-20T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 53 | Bikes,2011-01-14T05:00:00Z,2011-01-26T05:00:00Z,2011-01-21T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 54 | Bikes,2011-01-14T05:00:00Z,2011-01-26T05:00:00Z,2011-01-21T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 55 | Bikes,2011-01-14T05:00:00Z,2011-01-26T05:00:00Z,2011-01-21T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 56 | Bikes,2011-01-14T05:00:00Z,2011-01-26T05:00:00Z,2011-01-21T05:00:00Z,1,3399.99,3399.99,271.9992,1/1/2011 57 | Bikes,2011-01-15T05:00:00Z,2011-01-27T05:00:00Z,2011-01-22T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 58 | Bikes,2011-01-15T05:00:00Z,2011-01-27T05:00:00Z,2011-01-22T05:00:00Z,1,3374.99,3374.99,269.9992,1/1/2011 59 | Bikes,2011-01-16T05:00:00Z,2011-01-28T05:00:00Z,2011-01-23T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 60 | Bikes,2011-01-16T05:00:00Z,2011-01-28T05:00:00Z,2011-01-23T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 61 | Bikes,2011-01-16T05:00:00Z,2011-01-28T05:00:00Z,2011-01-23T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 62 | Bikes,2011-01-16T05:00:00Z,2011-01-28T05:00:00Z,2011-01-23T05:00:00Z,1,699.0982,699.0982,55.9279,1/1/2011 63 | Bikes,2011-01-16T05:00:00Z,2011-01-28T05:00:00Z,2011-01-23T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 64 | Bikes,2011-01-16T05:00:00Z,2011-01-28T05:00:00Z,2011-01-23T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 65 | Bikes,2011-01-16T05:00:00Z,2011-01-28T05:00:00Z,2011-01-23T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 66 | Bikes,2011-01-16T05:00:00Z,2011-01-28T05:00:00Z,2011-01-23T05:00:00Z,1,3399.99,3399.99,271.9992,1/1/2011 67 | Bikes,2011-01-17T05:00:00Z,2011-01-29T05:00:00Z,2011-01-24T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 68 | Bikes,2011-01-17T05:00:00Z,2011-01-29T05:00:00Z,2011-01-24T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 69 | Bikes,2011-01-17T05:00:00Z,2011-01-29T05:00:00Z,2011-01-24T05:00:00Z,1,3399.99,3399.99,271.9992,1/1/2011 70 | Bikes,2011-01-17T05:00:00Z,2011-01-29T05:00:00Z,2011-01-24T05:00:00Z,1,699.0982,699.0982,55.9279,1/1/2011 71 | Bikes,2011-01-18T05:00:00Z,2011-01-30T05:00:00Z,2011-01-25T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 72 | Bikes,2011-01-18T05:00:00Z,2011-01-30T05:00:00Z,2011-01-25T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 73 | Bikes,2011-01-18T05:00:00Z,2011-01-30T05:00:00Z,2011-01-25T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 74 | Bikes,2011-01-18T05:00:00Z,2011-01-30T05:00:00Z,2011-01-25T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 75 | Bikes,2011-01-19T05:00:00Z,2011-01-31T05:00:00Z,2011-01-26T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 76 | Bikes,2011-01-19T05:00:00Z,2011-01-31T05:00:00Z,2011-01-26T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 77 | Bikes,2011-01-19T05:00:00Z,2011-01-31T05:00:00Z,2011-01-26T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 78 | Bikes,2011-01-19T05:00:00Z,2011-01-31T05:00:00Z,2011-01-26T05:00:00Z,1,3399.99,3399.99,271.9992,1/1/2011 79 | Bikes,2011-01-19T05:00:00Z,2011-01-31T05:00:00Z,2011-01-26T05:00:00Z,1,3374.99,3374.99,269.9992,1/1/2011 80 | Bikes,2011-01-19T05:00:00Z,2011-01-31T05:00:00Z,2011-01-26T05:00:00Z,1,3374.99,3374.99,269.9992,1/1/2011 81 | Bikes,2011-01-19T05:00:00Z,2011-01-31T05:00:00Z,2011-01-26T05:00:00Z,1,3399.99,3399.99,271.9992,1/1/2011 82 | Bikes,2011-01-19T05:00:00Z,2011-01-31T05:00:00Z,2011-01-26T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 83 | Bikes,2011-01-19T05:00:00Z,2011-01-31T05:00:00Z,2011-01-26T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 84 | Bikes,2011-01-19T05:00:00Z,2011-01-31T05:00:00Z,2011-01-26T05:00:00Z,1,3399.99,3399.99,271.9992,1/1/2011 85 | Bikes,2011-01-19T05:00:00Z,2011-01-31T05:00:00Z,2011-01-26T05:00:00Z,1,3399.99,3399.99,271.9992,1/1/2011 86 | Bikes,2011-01-20T05:00:00Z,2011-02-01T05:00:00Z,2011-01-27T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 87 | Bikes,2011-01-20T05:00:00Z,2011-02-01T05:00:00Z,2011-01-27T05:00:00Z,1,699.0982,699.0982,55.9279,1/1/2011 88 | Bikes,2011-01-20T05:00:00Z,2011-02-01T05:00:00Z,2011-01-27T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 89 | Bikes,2011-01-20T05:00:00Z,2011-02-01T05:00:00Z,2011-01-27T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 90 | Bikes,2011-01-20T05:00:00Z,2011-02-01T05:00:00Z,2011-01-27T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 91 | Bikes,2011-01-21T05:00:00Z,2011-02-02T05:00:00Z,2011-01-28T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 92 | Bikes,2011-01-21T05:00:00Z,2011-02-02T05:00:00Z,2011-01-28T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 93 | Bikes,2011-01-21T05:00:00Z,2011-02-02T05:00:00Z,2011-01-28T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 94 | Bikes,2011-01-22T05:00:00Z,2011-02-03T05:00:00Z,2011-01-29T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 95 | Bikes,2011-01-22T05:00:00Z,2011-02-03T05:00:00Z,2011-01-29T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 96 | Bikes,2011-01-22T05:00:00Z,2011-02-03T05:00:00Z,2011-01-29T05:00:00Z,1,699.0982,699.0982,55.9279,1/1/2011 97 | Bikes,2011-01-22T05:00:00Z,2011-02-03T05:00:00Z,2011-01-29T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 98 | Bikes,2011-01-23T05:00:00Z,2011-02-04T05:00:00Z,2011-01-30T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 99 | Bikes,2011-01-23T05:00:00Z,2011-02-04T05:00:00Z,2011-01-30T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 100 | Bikes,2011-01-23T05:00:00Z,2011-02-04T05:00:00Z,2011-01-30T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 101 | Bikes,2011-01-23T05:00:00Z,2011-02-04T05:00:00Z,2011-01-30T05:00:00Z,1,3399.99,3399.99,271.9992,1/1/2011 102 | Bikes,2011-01-23T05:00:00Z,2011-02-04T05:00:00Z,2011-01-30T05:00:00Z,1,3399.99,3399.99,271.9992,1/1/2011 103 | Bikes,2011-01-24T05:00:00Z,2011-02-05T05:00:00Z,2011-01-31T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 104 | Bikes,2011-01-24T05:00:00Z,2011-02-05T05:00:00Z,2011-01-31T05:00:00Z,1,3374.99,3374.99,269.9992,1/1/2011 105 | Bikes,2011-01-24T05:00:00Z,2011-02-05T05:00:00Z,2011-01-31T05:00:00Z,1,3374.99,3374.99,269.9992,1/1/2011 106 | Bikes,2011-01-24T05:00:00Z,2011-02-05T05:00:00Z,2011-01-31T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 107 | Bikes,2011-01-24T05:00:00Z,2011-02-05T05:00:00Z,2011-01-31T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 108 | Bikes,2011-01-24T05:00:00Z,2011-02-05T05:00:00Z,2011-01-31T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 109 | Bikes,2011-01-24T05:00:00Z,2011-02-05T05:00:00Z,2011-01-31T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 110 | Bikes,2011-01-24T05:00:00Z,2011-02-05T05:00:00Z,2011-01-31T05:00:00Z,1,3399.99,3399.99,271.9992,1/1/2011 111 | Bikes,2011-01-25T05:00:00Z,2011-02-06T05:00:00Z,2011-02-01T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 112 | Bikes,2011-01-25T05:00:00Z,2011-02-06T05:00:00Z,2011-02-01T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 113 | Bikes,2011-01-25T05:00:00Z,2011-02-06T05:00:00Z,2011-02-01T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 114 | Bikes,2011-01-25T05:00:00Z,2011-02-06T05:00:00Z,2011-02-01T05:00:00Z,1,699.0982,699.0982,55.9279,1/1/2011 115 | Bikes,2011-01-25T05:00:00Z,2011-02-06T05:00:00Z,2011-02-01T05:00:00Z,1,699.0982,699.0982,55.9279,1/1/2011 116 | Bikes,2011-01-25T05:00:00Z,2011-02-06T05:00:00Z,2011-02-01T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 117 | Bikes,2011-01-25T05:00:00Z,2011-02-06T05:00:00Z,2011-02-01T05:00:00Z,1,3374.99,3374.99,269.9992,1/1/2011 118 | Bikes,2011-01-25T05:00:00Z,2011-02-06T05:00:00Z,2011-02-01T05:00:00Z,1,699.0982,699.0982,55.9279,1/1/2011 119 | Bikes,2011-01-26T05:00:00Z,2011-02-07T05:00:00Z,2011-02-02T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 120 | Bikes,2011-01-26T05:00:00Z,2011-02-07T05:00:00Z,2011-02-02T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 121 | Bikes,2011-01-26T05:00:00Z,2011-02-07T05:00:00Z,2011-02-02T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 122 | Bikes,2011-01-26T05:00:00Z,2011-02-07T05:00:00Z,2011-02-02T05:00:00Z,1,3374.99,3374.99,269.9992,1/1/2011 123 | Bikes,2011-01-26T05:00:00Z,2011-02-07T05:00:00Z,2011-02-02T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 124 | Bikes,2011-01-27T05:00:00Z,2011-02-08T05:00:00Z,2011-02-03T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 125 | Bikes,2011-01-27T05:00:00Z,2011-02-08T05:00:00Z,2011-02-03T05:00:00Z,1,3374.99,3374.99,269.9992,1/1/2011 126 | Bikes,2011-01-27T05:00:00Z,2011-02-08T05:00:00Z,2011-02-03T05:00:00Z,1,3374.99,3374.99,269.9992,1/1/2011 127 | Bikes,2011-01-27T05:00:00Z,2011-02-08T05:00:00Z,2011-02-03T05:00:00Z,1,699.0982,699.0982,55.9279,1/1/2011 128 | Bikes,2011-01-27T05:00:00Z,2011-02-08T05:00:00Z,2011-02-03T05:00:00Z,1,3374.99,3374.99,269.9992,1/1/2011 129 | Bikes,2011-01-28T05:00:00Z,2011-02-09T05:00:00Z,2011-02-04T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 130 | Bikes,2011-01-28T05:00:00Z,2011-02-09T05:00:00Z,2011-02-04T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 131 | Bikes,2011-01-28T05:00:00Z,2011-02-09T05:00:00Z,2011-02-04T05:00:00Z,1,699.0982,699.0982,55.9279,1/1/2011 132 | Bikes,2011-01-28T05:00:00Z,2011-02-09T05:00:00Z,2011-02-04T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 133 | Bikes,2011-01-28T05:00:00Z,2011-02-09T05:00:00Z,2011-02-04T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 134 | Bikes,2011-01-29T05:00:00Z,2011-02-10T05:00:00Z,2011-02-05T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 135 | Bikes,2011-01-29T05:00:00Z,2011-02-10T05:00:00Z,2011-02-05T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 136 | Bikes,2011-01-29T05:00:00Z,2011-02-10T05:00:00Z,2011-02-05T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 137 | Bikes,2011-01-29T05:00:00Z,2011-02-10T05:00:00Z,2011-02-05T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 138 | Bikes,2011-01-29T05:00:00Z,2011-02-10T05:00:00Z,2011-02-05T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 139 | Bikes,2011-01-30T05:00:00Z,2011-02-11T05:00:00Z,2011-02-06T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 140 | Bikes,2011-01-30T05:00:00Z,2011-02-11T05:00:00Z,2011-02-06T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 141 | Bikes,2011-01-30T05:00:00Z,2011-02-11T05:00:00Z,2011-02-06T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 142 | Bikes,2011-01-31T05:00:00Z,2011-02-12T05:00:00Z,2011-02-07T05:00:00Z,1,3374.99,3374.99,269.9992,1/1/2011 143 | Bikes,2011-01-31T05:00:00Z,2011-02-12T05:00:00Z,2011-02-07T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 144 | Bikes,2011-01-31T05:00:00Z,2011-02-12T05:00:00Z,2011-02-07T05:00:00Z,1,3578.27,3578.27,286.2616,1/1/2011 145 | Bikes,2011-01-31T05:00:00Z,2011-02-12T05:00:00Z,2011-02-07T05:00:00Z,1,699.0982,699.0982,55.9279,1/1/2011 146 | -------------------------------------------------------------------------------- /Chapter03/R_Code/Data/SalesData/2011-02-01.csv: -------------------------------------------------------------------------------- 1 | Category,OrderDate,DueDate,ShipDate,OrderQuantity,UnitPrice,SalesAmount,TaxAmt,Order Month 2 | Bikes,2011-02-01T05:00:00Z,2011-02-13T05:00:00Z,2011-02-08T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 3 | Bikes,2011-02-01T05:00:00Z,2011-02-13T05:00:00Z,2011-02-08T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 4 | Bikes,2011-02-01T05:00:00Z,2011-02-13T05:00:00Z,2011-02-08T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 5 | Bikes,2011-02-01T05:00:00Z,2011-02-13T05:00:00Z,2011-02-08T05:00:00Z,1,3399.99,3399.99,271.9992,2/1/2011 6 | Bikes,2011-02-01T05:00:00Z,2011-02-13T05:00:00Z,2011-02-08T05:00:00Z,1,3399.99,3399.99,271.9992,2/1/2011 7 | Bikes,2011-02-02T05:00:00Z,2011-02-14T05:00:00Z,2011-02-09T05:00:00Z,1,699.0982,699.0982,55.9279,2/1/2011 8 | Bikes,2011-02-02T05:00:00Z,2011-02-14T05:00:00Z,2011-02-09T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 9 | Bikes,2011-02-02T05:00:00Z,2011-02-14T05:00:00Z,2011-02-09T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 10 | Bikes,2011-02-02T05:00:00Z,2011-02-14T05:00:00Z,2011-02-09T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 11 | Bikes,2011-02-02T05:00:00Z,2011-02-14T05:00:00Z,2011-02-09T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 12 | Bikes,2011-02-02T05:00:00Z,2011-02-14T05:00:00Z,2011-02-09T05:00:00Z,1,699.0982,699.0982,55.9279,2/1/2011 13 | Bikes,2011-02-03T05:00:00Z,2011-02-15T05:00:00Z,2011-02-10T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 14 | Bikes,2011-02-03T05:00:00Z,2011-02-15T05:00:00Z,2011-02-10T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 15 | Bikes,2011-02-03T05:00:00Z,2011-02-15T05:00:00Z,2011-02-10T05:00:00Z,1,699.0982,699.0982,55.9279,2/1/2011 16 | Bikes,2011-02-03T05:00:00Z,2011-02-15T05:00:00Z,2011-02-10T05:00:00Z,1,3399.99,3399.99,271.9992,2/1/2011 17 | Bikes,2011-02-03T05:00:00Z,2011-02-15T05:00:00Z,2011-02-10T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 18 | Bikes,2011-02-03T05:00:00Z,2011-02-15T05:00:00Z,2011-02-10T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 19 | Bikes,2011-02-03T05:00:00Z,2011-02-15T05:00:00Z,2011-02-10T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 20 | Bikes,2011-02-03T05:00:00Z,2011-02-15T05:00:00Z,2011-02-10T05:00:00Z,1,3399.99,3399.99,271.9992,2/1/2011 21 | Bikes,2011-02-04T05:00:00Z,2011-02-16T05:00:00Z,2011-02-11T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 22 | Bikes,2011-02-04T05:00:00Z,2011-02-16T05:00:00Z,2011-02-11T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 23 | Bikes,2011-02-04T05:00:00Z,2011-02-16T05:00:00Z,2011-02-11T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 24 | Bikes,2011-02-04T05:00:00Z,2011-02-16T05:00:00Z,2011-02-11T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 25 | Bikes,2011-02-05T05:00:00Z,2011-02-17T05:00:00Z,2011-02-12T05:00:00Z,1,699.0982,699.0982,55.9279,2/1/2011 26 | Bikes,2011-02-05T05:00:00Z,2011-02-17T05:00:00Z,2011-02-12T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 27 | Bikes,2011-02-05T05:00:00Z,2011-02-17T05:00:00Z,2011-02-12T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 28 | Bikes,2011-02-05T05:00:00Z,2011-02-17T05:00:00Z,2011-02-12T05:00:00Z,1,3399.99,3399.99,271.9992,2/1/2011 29 | Bikes,2011-02-06T05:00:00Z,2011-02-18T05:00:00Z,2011-02-13T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 30 | Bikes,2011-02-06T05:00:00Z,2011-02-18T05:00:00Z,2011-02-13T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 31 | Bikes,2011-02-06T05:00:00Z,2011-02-18T05:00:00Z,2011-02-13T05:00:00Z,1,3374.99,3374.99,269.9992,2/1/2011 32 | Bikes,2011-02-06T05:00:00Z,2011-02-18T05:00:00Z,2011-02-13T05:00:00Z,1,3374.99,3374.99,269.9992,2/1/2011 33 | Bikes,2011-02-07T05:00:00Z,2011-02-19T05:00:00Z,2011-02-14T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 34 | Bikes,2011-02-07T05:00:00Z,2011-02-19T05:00:00Z,2011-02-14T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 35 | Bikes,2011-02-07T05:00:00Z,2011-02-19T05:00:00Z,2011-02-14T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 36 | Bikes,2011-02-07T05:00:00Z,2011-02-19T05:00:00Z,2011-02-14T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 37 | Bikes,2011-02-07T05:00:00Z,2011-02-19T05:00:00Z,2011-02-14T05:00:00Z,1,3374.99,3374.99,269.9992,2/1/2011 38 | Bikes,2011-02-07T05:00:00Z,2011-02-19T05:00:00Z,2011-02-14T05:00:00Z,1,3399.99,3399.99,271.9992,2/1/2011 39 | Bikes,2011-02-08T05:00:00Z,2011-02-20T05:00:00Z,2011-02-15T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 40 | Bikes,2011-02-08T05:00:00Z,2011-02-20T05:00:00Z,2011-02-15T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 41 | Bikes,2011-02-08T05:00:00Z,2011-02-20T05:00:00Z,2011-02-15T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 42 | Bikes,2011-02-08T05:00:00Z,2011-02-20T05:00:00Z,2011-02-15T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 43 | Bikes,2011-02-08T05:00:00Z,2011-02-20T05:00:00Z,2011-02-15T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 44 | Bikes,2011-02-09T05:00:00Z,2011-02-21T05:00:00Z,2011-02-16T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 45 | Bikes,2011-02-09T05:00:00Z,2011-02-21T05:00:00Z,2011-02-16T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 46 | Bikes,2011-02-09T05:00:00Z,2011-02-21T05:00:00Z,2011-02-16T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 47 | Bikes,2011-02-09T05:00:00Z,2011-02-21T05:00:00Z,2011-02-16T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 48 | Bikes,2011-02-09T05:00:00Z,2011-02-21T05:00:00Z,2011-02-16T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 49 | Bikes,2011-02-09T05:00:00Z,2011-02-21T05:00:00Z,2011-02-16T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 50 | Bikes,2011-02-10T05:00:00Z,2011-02-22T05:00:00Z,2011-02-17T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 51 | Bikes,2011-02-10T05:00:00Z,2011-02-22T05:00:00Z,2011-02-17T05:00:00Z,1,699.0982,699.0982,55.9279,2/1/2011 52 | Bikes,2011-02-10T05:00:00Z,2011-02-22T05:00:00Z,2011-02-17T05:00:00Z,1,3374.99,3374.99,269.9992,2/1/2011 53 | Bikes,2011-02-10T05:00:00Z,2011-02-22T05:00:00Z,2011-02-17T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 54 | Bikes,2011-02-10T05:00:00Z,2011-02-22T05:00:00Z,2011-02-17T05:00:00Z,1,699.0982,699.0982,55.9279,2/1/2011 55 | Bikes,2011-02-11T05:00:00Z,2011-02-23T05:00:00Z,2011-02-18T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 56 | Bikes,2011-02-11T05:00:00Z,2011-02-23T05:00:00Z,2011-02-18T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 57 | Bikes,2011-02-11T05:00:00Z,2011-02-23T05:00:00Z,2011-02-18T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 58 | Bikes,2011-02-11T05:00:00Z,2011-02-23T05:00:00Z,2011-02-18T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 59 | Bikes,2011-02-12T05:00:00Z,2011-02-24T05:00:00Z,2011-02-19T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 60 | Bikes,2011-02-12T05:00:00Z,2011-02-24T05:00:00Z,2011-02-19T05:00:00Z,1,3399.99,3399.99,271.9992,2/1/2011 61 | Bikes,2011-02-12T05:00:00Z,2011-02-24T05:00:00Z,2011-02-19T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 62 | Bikes,2011-02-12T05:00:00Z,2011-02-24T05:00:00Z,2011-02-19T05:00:00Z,1,3399.99,3399.99,271.9992,2/1/2011 63 | Bikes,2011-02-12T05:00:00Z,2011-02-24T05:00:00Z,2011-02-19T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 64 | Bikes,2011-02-12T05:00:00Z,2011-02-24T05:00:00Z,2011-02-19T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 65 | Bikes,2011-02-14T05:00:00Z,2011-02-26T05:00:00Z,2011-02-21T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 66 | Bikes,2011-02-15T05:00:00Z,2011-02-27T05:00:00Z,2011-02-22T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 67 | Bikes,2011-02-15T05:00:00Z,2011-02-27T05:00:00Z,2011-02-22T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 68 | Bikes,2011-02-15T05:00:00Z,2011-02-27T05:00:00Z,2011-02-22T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 69 | Bikes,2011-02-15T05:00:00Z,2011-02-27T05:00:00Z,2011-02-22T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 70 | Bikes,2011-02-15T05:00:00Z,2011-02-27T05:00:00Z,2011-02-22T05:00:00Z,1,3374.99,3374.99,269.9992,2/1/2011 71 | Bikes,2011-02-15T05:00:00Z,2011-02-27T05:00:00Z,2011-02-22T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 72 | Bikes,2011-02-16T05:00:00Z,2011-02-28T05:00:00Z,2011-02-23T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 73 | Bikes,2011-02-16T05:00:00Z,2011-02-28T05:00:00Z,2011-02-23T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 74 | Bikes,2011-02-16T05:00:00Z,2011-02-28T05:00:00Z,2011-02-23T05:00:00Z,1,699.0982,699.0982,55.9279,2/1/2011 75 | Bikes,2011-02-16T05:00:00Z,2011-02-28T05:00:00Z,2011-02-23T05:00:00Z,1,699.0982,699.0982,55.9279,2/1/2011 76 | Bikes,2011-02-16T05:00:00Z,2011-02-28T05:00:00Z,2011-02-23T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 77 | Bikes,2011-02-16T05:00:00Z,2011-02-28T05:00:00Z,2011-02-23T05:00:00Z,1,699.0982,699.0982,55.9279,2/1/2011 78 | Bikes,2011-02-16T05:00:00Z,2011-02-28T05:00:00Z,2011-02-23T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 79 | Bikes,2011-02-16T05:00:00Z,2011-02-28T05:00:00Z,2011-02-23T05:00:00Z,1,699.0982,699.0982,55.9279,2/1/2011 80 | Bikes,2011-02-17T05:00:00Z,2011-03-01T05:00:00Z,2011-02-24T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 81 | Bikes,2011-02-17T05:00:00Z,2011-03-01T05:00:00Z,2011-02-24T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 82 | Bikes,2011-02-17T05:00:00Z,2011-03-01T05:00:00Z,2011-02-24T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 83 | Bikes,2011-02-17T05:00:00Z,2011-03-01T05:00:00Z,2011-02-24T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 84 | Bikes,2011-02-17T05:00:00Z,2011-03-01T05:00:00Z,2011-02-24T05:00:00Z,1,3374.99,3374.99,269.9992,2/1/2011 85 | Bikes,2011-02-17T05:00:00Z,2011-03-01T05:00:00Z,2011-02-24T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 86 | Bikes,2011-02-18T05:00:00Z,2011-03-02T05:00:00Z,2011-02-25T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 87 | Bikes,2011-02-18T05:00:00Z,2011-03-02T05:00:00Z,2011-02-25T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 88 | Bikes,2011-02-18T05:00:00Z,2011-03-02T05:00:00Z,2011-02-25T05:00:00Z,1,3374.99,3374.99,269.9992,2/1/2011 89 | Bikes,2011-02-18T05:00:00Z,2011-03-02T05:00:00Z,2011-02-25T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 90 | Bikes,2011-02-18T05:00:00Z,2011-03-02T05:00:00Z,2011-02-25T05:00:00Z,1,3374.99,3374.99,269.9992,2/1/2011 91 | Bikes,2011-02-18T05:00:00Z,2011-03-02T05:00:00Z,2011-02-25T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 92 | Bikes,2011-02-18T05:00:00Z,2011-03-02T05:00:00Z,2011-02-25T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 93 | Bikes,2011-02-18T05:00:00Z,2011-03-02T05:00:00Z,2011-02-25T05:00:00Z,1,3374.99,3374.99,269.9992,2/1/2011 94 | Bikes,2011-02-19T05:00:00Z,2011-03-03T05:00:00Z,2011-02-26T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 95 | Bikes,2011-02-19T05:00:00Z,2011-03-03T05:00:00Z,2011-02-26T05:00:00Z,1,3374.99,3374.99,269.9992,2/1/2011 96 | Bikes,2011-02-19T05:00:00Z,2011-03-03T05:00:00Z,2011-02-26T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 97 | Bikes,2011-02-19T05:00:00Z,2011-03-03T05:00:00Z,2011-02-26T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 98 | Bikes,2011-02-19T05:00:00Z,2011-03-03T05:00:00Z,2011-02-26T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 99 | Bikes,2011-02-20T05:00:00Z,2011-03-04T05:00:00Z,2011-02-27T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 100 | Bikes,2011-02-20T05:00:00Z,2011-03-04T05:00:00Z,2011-02-27T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 101 | Bikes,2011-02-20T05:00:00Z,2011-03-04T05:00:00Z,2011-02-27T05:00:00Z,1,3374.99,3374.99,269.9992,2/1/2011 102 | Bikes,2011-02-21T05:00:00Z,2011-03-05T05:00:00Z,2011-02-28T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 103 | Bikes,2011-02-21T05:00:00Z,2011-03-05T05:00:00Z,2011-02-28T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 104 | Bikes,2011-02-21T05:00:00Z,2011-03-05T05:00:00Z,2011-02-28T05:00:00Z,1,3399.99,3399.99,271.9992,2/1/2011 105 | Bikes,2011-02-21T05:00:00Z,2011-03-05T05:00:00Z,2011-02-28T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 106 | Bikes,2011-02-21T05:00:00Z,2011-03-05T05:00:00Z,2011-02-28T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 107 | Bikes,2011-02-21T05:00:00Z,2011-03-05T05:00:00Z,2011-02-28T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 108 | Bikes,2011-02-21T05:00:00Z,2011-03-05T05:00:00Z,2011-02-28T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 109 | Bikes,2011-02-21T05:00:00Z,2011-03-05T05:00:00Z,2011-02-28T05:00:00Z,1,699.0982,699.0982,55.9279,2/1/2011 110 | Bikes,2011-02-21T05:00:00Z,2011-03-05T05:00:00Z,2011-02-28T05:00:00Z,1,3399.99,3399.99,271.9992,2/1/2011 111 | Bikes,2011-02-22T05:00:00Z,2011-03-06T05:00:00Z,2011-03-01T05:00:00Z,1,699.0982,699.0982,55.9279,2/1/2011 112 | Bikes,2011-02-22T05:00:00Z,2011-03-06T05:00:00Z,2011-03-01T05:00:00Z,1,3374.99,3374.99,269.9992,2/1/2011 113 | Bikes,2011-02-22T05:00:00Z,2011-03-06T05:00:00Z,2011-03-01T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 114 | Bikes,2011-02-23T05:00:00Z,2011-03-07T05:00:00Z,2011-03-02T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 115 | Bikes,2011-02-23T05:00:00Z,2011-03-07T05:00:00Z,2011-03-02T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 116 | Bikes,2011-02-23T05:00:00Z,2011-03-07T05:00:00Z,2011-03-02T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 117 | Bikes,2011-02-23T05:00:00Z,2011-03-07T05:00:00Z,2011-03-02T05:00:00Z,1,3399.99,3399.99,271.9992,2/1/2011 118 | Bikes,2011-02-23T05:00:00Z,2011-03-07T05:00:00Z,2011-03-02T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 119 | Bikes,2011-02-23T05:00:00Z,2011-03-07T05:00:00Z,2011-03-02T05:00:00Z,1,3399.99,3399.99,271.9992,2/1/2011 120 | Bikes,2011-02-23T05:00:00Z,2011-03-07T05:00:00Z,2011-03-02T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 121 | Bikes,2011-02-24T05:00:00Z,2011-03-08T05:00:00Z,2011-03-03T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 122 | Bikes,2011-02-24T05:00:00Z,2011-03-08T05:00:00Z,2011-03-03T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 123 | Bikes,2011-02-25T05:00:00Z,2011-03-09T05:00:00Z,2011-03-04T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 124 | Bikes,2011-02-25T05:00:00Z,2011-03-09T05:00:00Z,2011-03-04T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 125 | Bikes,2011-02-26T05:00:00Z,2011-03-10T05:00:00Z,2011-03-05T05:00:00Z,1,699.0982,699.0982,55.9279,2/1/2011 126 | Bikes,2011-02-26T05:00:00Z,2011-03-10T05:00:00Z,2011-03-05T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 127 | Bikes,2011-02-26T05:00:00Z,2011-03-10T05:00:00Z,2011-03-05T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 128 | Bikes,2011-02-26T05:00:00Z,2011-03-10T05:00:00Z,2011-03-05T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 129 | Bikes,2011-02-26T05:00:00Z,2011-03-10T05:00:00Z,2011-03-05T05:00:00Z,1,699.0982,699.0982,55.9279,2/1/2011 130 | Bikes,2011-02-26T05:00:00Z,2011-03-10T05:00:00Z,2011-03-05T05:00:00Z,1,3374.99,3374.99,269.9992,2/1/2011 131 | Bikes,2011-02-26T05:00:00Z,2011-03-10T05:00:00Z,2011-03-05T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 132 | Bikes,2011-02-26T05:00:00Z,2011-03-10T05:00:00Z,2011-03-05T05:00:00Z,1,699.0982,699.0982,55.9279,2/1/2011 133 | Bikes,2011-02-27T05:00:00Z,2011-03-11T05:00:00Z,2011-03-06T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 134 | Bikes,2011-02-27T05:00:00Z,2011-03-11T05:00:00Z,2011-03-06T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 135 | Bikes,2011-02-27T05:00:00Z,2011-03-11T05:00:00Z,2011-03-06T05:00:00Z,1,3399.99,3399.99,271.9992,2/1/2011 136 | Bikes,2011-02-27T05:00:00Z,2011-03-11T05:00:00Z,2011-03-06T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 137 | Bikes,2011-02-27T05:00:00Z,2011-03-11T05:00:00Z,2011-03-06T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 138 | Bikes,2011-02-27T05:00:00Z,2011-03-11T05:00:00Z,2011-03-06T05:00:00Z,1,3399.99,3399.99,271.9992,2/1/2011 139 | Bikes,2011-02-27T05:00:00Z,2011-03-11T05:00:00Z,2011-03-06T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 140 | Bikes,2011-02-28T05:00:00Z,2011-03-12T05:00:00Z,2011-03-07T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 141 | Bikes,2011-02-28T05:00:00Z,2011-03-12T05:00:00Z,2011-03-07T05:00:00Z,1,3374.99,3374.99,269.9992,2/1/2011 142 | Bikes,2011-02-28T05:00:00Z,2011-03-12T05:00:00Z,2011-03-07T05:00:00Z,1,3374.99,3374.99,269.9992,2/1/2011 143 | Bikes,2011-02-28T05:00:00Z,2011-03-12T05:00:00Z,2011-03-07T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 144 | Bikes,2011-02-28T05:00:00Z,2011-03-12T05:00:00Z,2011-03-07T05:00:00Z,1,3578.27,3578.27,286.2616,2/1/2011 145 | Bikes,2011-02-28T05:00:00Z,2011-03-12T05:00:00Z,2011-03-07T05:00:00Z,1,3374.99,3374.99,269.9992,2/1/2011 146 | -------------------------------------------------------------------------------- /Chapter03/R_Code/Models/model.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apress/adv-analytics-in-power-bi-w-r-and-python/1047beaa566a4aa7866fa41fd51dcc5e151b4d58/Chapter03/R_Code/Models/model.rds -------------------------------------------------------------------------------- /Chapter03/R_Code/ScoreData.R: -------------------------------------------------------------------------------- 1 | library(tidyverse) 2 | 3 | model <- readRDS("./Models/model.rds") 4 | boston_housing <- read_csv("./Data/BostonHousingInfo.csv") 5 | 6 | model_data <- boston_housing[, c("crim","rm","tax","lstat")] 7 | pred_medv <- predict(model, model_data) 8 | 9 | final_output <- cbind(model_data, pred_medv) -------------------------------------------------------------------------------- /Chapter03/R_Code/UseRegularExpressionToFilterBadEmailAddresses.R: -------------------------------------------------------------------------------- 1 | library(tidyverse) 2 | library(stringr) 3 | 4 | setwd("") 5 | goodemails_raw <- read_csv("./Data/EmailAddresses.csv") 6 | 7 | email_pattern <- "^([a-zA-Z][\\w\\_]{4,20})\\@([a-zA-Z0-9.-]+)\\.([a-zA-Z]{2,3})$" 8 | 9 | goodemails <- 10 | goodemails_raw %>% 11 | filter(str_detect(Email, email_pattern) == TRUE) 12 | -------------------------------------------------------------------------------- /Chapter03/R_Code/ch01_R.pbix: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apress/adv-analytics-in-power-bi-w-r-and-python/1047beaa566a4aa7866fa41fd51dcc5e151b4d58/Chapter03/R_Code/ch01_R.pbix -------------------------------------------------------------------------------- /Chapter05/Chapter03_README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apress/adv-analytics-in-power-bi-w-r-and-python/1047beaa566a4aa7866fa41fd51dcc5e151b4d58/Chapter05/Chapter03_README.md -------------------------------------------------------------------------------- /Chapter05/Data/AdventureWorksDW_StarSchema.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apress/adv-analytics-in-power-bi-w-r-and-python/1047beaa566a4aa7866fa41fd51dcc5e151b4d58/Chapter05/Data/AdventureWorksDW_StarSchema.zip -------------------------------------------------------------------------------- /Chapter05/Data/CreateLoadHistoryLog.sql: -------------------------------------------------------------------------------- 1 | USE AdventureWorksDW_StarSchema 2 | 3 | CREATE TABLE dbo.[LoadHistoryLog] ( 4 | DATESTAMP DATETIME, 5 | TABLENAME VARCHAR(25), 6 | NUM_RECORDS INT 7 | ) -------------------------------------------------------------------------------- /Chapter05/Data/SQL_to_Populate_database.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | PromotionKey 3 | ,[PromotionName] = EnglishPromotionName 4 | ,[DiscountPct] 5 | ,[PromotionType] = [EnglishPromotionType] 6 | ,[PromotionCategory] = [EnglishPromotionCategory] 7 | ,[StartDate] 8 | ,[EndDate] 9 | ,[MinQty] 10 | ,[MaxQty] 11 | INTO [AdventureWorksDW_StarSchema].[dbo].[DimPromotion] 12 | FROM [dbo].[DimPromotion] 13 | 14 | 15 | SELECT 16 | [SalesTerritoryKey] 17 | ,[SalesTerritoryRegion] 18 | ,[SalesTerritoryCountry] 19 | ,[SalesTerritoryGroup] 20 | INTO [AdventureWorksDW_StarSchema].[dbo].[DimSalesTerritory] 21 | FROM [dbo].[DimSalesTerritory] 22 | 23 | 24 | SELECT 25 | p.[ProductKey] 26 | ,[ProductName] = p.[EnglishProductName] 27 | ,[ProductSubcategoryName] = sc.EnglishProductSubcategoryName 28 | ,[ProductCategoryName] = c.EnglishProductCategoryName 29 | ,p.StandardCost 30 | ,p.Color 31 | ,p.ListPrice 32 | ,p.SizeRange 33 | ,p.ModelName 34 | ,p.Class 35 | ,p.Style 36 | ,p.StartDate 37 | ,p.EndDate 38 | INTO [AdventureWorksDW_StarSchema].[dbo].[DimProduct] 39 | FROM [dbo].[DimProduct] p 40 | INNER JOIN [dbo].[DimProductSubcategory] sc 41 | ON p.ProductSubcategoryKey = sc.ProductSubcategoryKey 42 | INNER JOIN [dbo].[DimProductCategory] c 43 | ON sc.ProductCategoryKey = c.ProductCategoryKey 44 | 45 | 46 | SELECT [ProductKey] 47 | ,[OrderDateKey] 48 | ,[DueDateKey] 49 | ,[ShipDateKey] 50 | ,[CustomerKey] 51 | ,[PromotionKey] 52 | ,[SalesTerritoryKey] 53 | ,[OrderQuantity] 54 | ,[SalesAmount] 55 | ,[TaxAmt] 56 | INTO [AdventureWorksDW_StarSchema].[dbo].[FactInternetSales] 57 | FROM [dbo].[FactInternetSales] 58 | 59 | 60 | SELECT [CustomerKey] 61 | ,[Title] 62 | ,[FirstName] 63 | ,[MiddleName] 64 | ,[LastName] 65 | ,[BirthDate] 66 | ,[MaritalStatus] 67 | ,[Suffix] 68 | ,[Gender] 69 | ,[EmailAddress] 70 | ,[YearlyIncome] 71 | ,[TotalChildren] 72 | ,[NumberChildrenAtHome] 73 | ,[Education] = [EnglishEducation] 74 | ,[HouseOwnerFlag] 75 | ,[NumberCarsOwned] 76 | ,[AddressLine1] 77 | ,[AddressLine2] 78 | ,[Phone] 79 | ,[DateFirstPurchase] 80 | ,[CommuteDistance] 81 | INTO [AdventureWorksDW_StarSchema].[dbo].[DimCustomer] 82 | FROM [dbo].[DimCustomer] 83 | 84 | 85 | SELECT [DateKey] 86 | ,[Date] = [FullDateAlternateKey] 87 | ,[DayNumberOfWeek] 88 | ,[WeekdayName] = [EnglishDayNameOfWeek] 89 | ,[DayNumberOfMonth] 90 | ,[DayNumberOfYear] 91 | ,[WeekNumberOfYear] 92 | ,[MonthName] = [EnglishMonthName] 93 | ,[MonthNumberOfYear] 94 | ,[CalendarQuarter] 95 | ,[CalendarYear] 96 | ,[CalendarSemester] 97 | INTO [AdventureWorksDW_StarSchema].[dbo].[DimDate] 98 | FROM [dbo].[DimDate] -------------------------------------------------------------------------------- /Chapter05/Python/ReadLog_DimDate.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from sqlalchemy import create_engine 3 | from datetime import datetime 4 | 5 | tablename = "DimDate" 6 | 7 | conn = create_engine("mssql+pyodbc://SQLServer2017") 8 | 9 | df_read = pd.read_sql_table(tablename, conn) 10 | 11 | datestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S') 12 | num_records = df_read.shape[0] 13 | 14 | dict_insert = { 15 | "DATESTAMP":[datestamp], 16 | "TABLENAME":[tablename], 17 | "NUM_RECORDS":[num_records]} 18 | 19 | df_insert = pd.DataFrame.from_dict(dict_insert) 20 | 21 | df_insert.to_sql( 22 | name='LoadHistoryLog', 23 | con=conn, 24 | index=False, 25 | if_exists = 'append' 26 | ) -------------------------------------------------------------------------------- /Chapter05/Python/ReadLog_DimProduct.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from sqlalchemy import create_engine 3 | from datetime import datetime 4 | 5 | tablename = "DimProduct" 6 | 7 | conn = create_engine("mssql+pyodbc://SQLServer2017") 8 | 9 | df_read = pd.read_sql_table(tablename, conn) 10 | 11 | datestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S') 12 | num_records = df_read.shape[0] 13 | 14 | dict_insert = { 15 | "DATESTAMP":[datestamp], 16 | "TABLENAME":[tablename], 17 | "NUM_RECORDS":[num_records]} 18 | df_insert = pd.DataFrame.from_dict(dict_insert) 19 | 20 | df_insert.to_sql( 21 | name='LoadHistoryLog', 22 | con=conn, 23 | index=False, 24 | if_exists = 'append' 25 | ) -------------------------------------------------------------------------------- /Chapter05/Python/ReadLog_DimPromotion.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from sqlalchemy import create_engine 3 | from datetime import datetime 4 | 5 | tablename = "DimPromotion" 6 | 7 | conn = create_engine("mssql+pyodbc://SQLServer2017") 8 | 9 | df_read = pd.read_sql_table(tablename, conn) 10 | 11 | datestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S') 12 | num_records = df_read.shape[0] 13 | 14 | dict_insert = { 15 | "DATESTAMP":[datestamp], 16 | "TABLENAME":[tablename], 17 | "NUM_RECORDS":[num_records]} 18 | df_insert = pd.DataFrame.from_dict(dict_insert) 19 | 20 | df_insert.to_sql( 21 | name='LoadHistoryLog', 22 | con=conn, 23 | index=False, 24 | if_exists = 'append' 25 | ) -------------------------------------------------------------------------------- /Chapter05/Python/ReadLog_DimSalesTerritory.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from sqlalchemy import create_engine 3 | from datetime import datetime 4 | 5 | tablename = "DimSalesTerritory" 6 | 7 | conn = create_engine("mssql+pyodbc://SQLServer2017") 8 | 9 | df_read = pd.read_sql_table(tablename, conn) 10 | 11 | datestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S') 12 | num_records = df_read.shape[0] 13 | 14 | dict_insert = { 15 | "DATESTAMP":[datestamp], 16 | "TABLENAME":[tablename], 17 | "NUM_RECORDS":[num_records]} 18 | df_insert = pd.DataFrame.from_dict(dict_insert) 19 | 20 | df_insert.to_sql( 21 | name='LoadHistoryLog', 22 | con=conn, 23 | index=False, 24 | if_exists = 'append' 25 | ) -------------------------------------------------------------------------------- /Chapter05/Python/ReadLog_FactInternetSales.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from sqlalchemy import create_engine 3 | from datetime import datetime 4 | 5 | tablename = "FactInternetSales" 6 | 7 | conn = create_engine("mssql+pyodbc://SQLServer2017") 8 | 9 | df_read = pd.read_sql_table(tablename, conn) 10 | 11 | datestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S') 12 | num_records = df_read.shape[0] 13 | 14 | dict_insert = { 15 | "DATESTAMP":[datestamp], 16 | "TABLENAME":[tablename], 17 | "NUM_RECORDS":[num_records]} 18 | df_insert = pd.DataFrame.from_dict(dict_insert) 19 | 20 | df_insert.to_sql( 21 | name='LoadHistoryLog', 22 | con=conn, 23 | index=False, 24 | if_exists = 'append' 25 | ) -------------------------------------------------------------------------------- /Chapter05/Python/ch03_Python.pbix: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apress/adv-analytics-in-power-bi-w-r-and-python/1047beaa566a4aa7866fa41fd51dcc5e151b4d58/Chapter05/Python/ch03_Python.pbix -------------------------------------------------------------------------------- /Chapter05/R/ReadLog_DimDate.R: -------------------------------------------------------------------------------- 1 | library(tidyverse) 2 | library(RODBC) 3 | library(lubridate) 4 | 5 | table_name <- "DimDate" 6 | sql <- paste0( 7 | "SELECT * FROM dbo.", 8 | table_name) 9 | 10 | conn <- odbcConnect(dsn = "SQLServer2017") 11 | df_read <- sqlQuery(channel = conn, query = sql) 12 | 13 | datestamp <- now() 14 | num_records <- nrow(df_read) 15 | 16 | list_insert <- list( 17 | "DATESTAMP" = datestamp, 18 | "TABLENAME" = table_name, 19 | "NUM_RECORDS" = num_records) 20 | 21 | df_insert <- as_data_frame(list_insert) 22 | 23 | sqlSave( 24 | channel = conn, 25 | dat = df_insert, 26 | tablename = "LoadHistoryLog", 27 | append = TRUE, 28 | rownames=FALSE) 29 | 30 | odbcClose(conn) -------------------------------------------------------------------------------- /Chapter05/R/ReadLog_DimProduct.R: -------------------------------------------------------------------------------- 1 | library(tidyverse) 2 | library(RODBC) 3 | library(lubridate) 4 | 5 | table_name <- "DimProduct" 6 | sql <- paste0( 7 | "SELECT * FROM dbo.", 8 | table_name) 9 | 10 | conn <- odbcConnect(dsn = "SQLServer2017") 11 | df_read <- sqlQuery(channel = conn, query = sql) 12 | 13 | datestamp <- now() 14 | num_records <- nrow(df_read) 15 | 16 | list_insert <- list( 17 | "DATESTAMP" = datestamp, 18 | "TABLENAME" = table_name, 19 | "NUM_RECORDS" = num_records) 20 | 21 | df_insert <- as_data_frame(list_insert) 22 | 23 | sqlSave( 24 | channel = conn, 25 | dat = df_insert, 26 | tablename = "LoadHistoryLog", 27 | append = TRUE, 28 | rownames=FALSE) 29 | 30 | odbcClose(conn) -------------------------------------------------------------------------------- /Chapter05/R/ReadLog_DimPromotion.R: -------------------------------------------------------------------------------- 1 | library(tidyverse) 2 | library(RODBC) 3 | library(lubridate) 4 | 5 | table_name <- "DimPromotion" 6 | sql <- paste0( 7 | "SELECT * FROM dbo.", 8 | table_name) 9 | 10 | conn <- odbcConnect(dsn = "SQLServer2017") 11 | df_read <- sqlQuery(channel = conn, query = sql) 12 | 13 | datestamp <- now() 14 | num_records <- nrow(df_read) 15 | 16 | list_insert <- list( 17 | "DATESTAMP" = datestamp, 18 | "TABLENAME" = table_name, 19 | "NUM_RECORDS" = num_records) 20 | 21 | df_insert <- as_data_frame(list_insert) 22 | 23 | sqlSave( 24 | channel = conn, 25 | dat = df_insert, 26 | tablename = "LoadHistoryLog", 27 | append = TRUE, 28 | rownames=FALSE) 29 | 30 | odbcClose(conn) -------------------------------------------------------------------------------- /Chapter05/R/ReadLog_DimSalesTerritory.R: -------------------------------------------------------------------------------- 1 | library(tidyverse) 2 | library(RODBC) 3 | library(lubridate) 4 | 5 | table_name <- "DimSalesTerritory" 6 | sql <- paste0( 7 | "SELECT * FROM dbo.", 8 | table_name) 9 | 10 | conn <- odbcConnect(dsn = "SQLServer2017") 11 | df_read <- sqlQuery(channel = conn, query = sql) 12 | 13 | datestamp <- now() 14 | num_records <- nrow(df_read) 15 | 16 | list_insert <- list( 17 | "DATESTAMP" = datestamp, 18 | "TABLENAME" = table_name, 19 | "NUM_RECORDS" = num_records) 20 | 21 | df_insert <- as_data_frame(list_insert) 22 | 23 | sqlSave( 24 | channel = conn, 25 | dat = df_insert, 26 | tablename = "LoadHistoryLog", 27 | append = TRUE, 28 | rownames=FALSE) 29 | 30 | odbcClose(conn) -------------------------------------------------------------------------------- /Chapter05/R/ReadLog_FactInternetSales.R: -------------------------------------------------------------------------------- 1 | library(tidyverse) 2 | library(RODBC) 3 | library(lubridate) 4 | 5 | table_name <- "FactInternetSales" 6 | sql <- paste0( 7 | "SELECT * FROM dbo.", 8 | table_name) 9 | 10 | conn <- odbcConnect(dsn = "SQLServer2017") 11 | df_read <- sqlQuery(channel = conn, query = sql) 12 | 13 | datestamp <- now() 14 | num_records <- nrow(df_read) 15 | 16 | list_insert <- list( 17 | "DATESTAMP" = datestamp, 18 | "TABLENAME" = table_name, 19 | "NUM_RECORDS" = num_records) 20 | 21 | df_insert <- as_data_frame(list_insert) 22 | 23 | sqlSave( 24 | channel = conn, 25 | dat = df_insert, 26 | tablename = "LoadHistoryLog", 27 | append = TRUE, 28 | rownames=FALSE) 29 | 30 | odbcClose(conn) -------------------------------------------------------------------------------- /Chapter05/R/ch03_R.pbix: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apress/adv-analytics-in-power-bi-w-r-and-python/1047beaa566a4aa7866fa41fd51dcc5e151b4d58/Chapter05/R/ch03_R.pbix -------------------------------------------------------------------------------- /Chapter06/Python/GetVariableInfo.py: -------------------------------------------------------------------------------- 1 | 2 | # Retrieve a variable information based on the 3 | # parameters provided. The data is returned as 4 | # a list of tuples 5 | v15 = censusdata.search( 6 | 'acs5',2015,'concept', 7 | 'PLACE OF BIRTH BY AGE IN THE UNITED STATES') 8 | 9 | # Converts the list of tuples to a data frame 10 | df_V15 = pd.DataFrame( 11 | v15, columns = ['Name', 'Concept', 'Label']) 12 | 13 | # Saves the data frame as a csv file 14 | df_V15.to_csv("Test.csv") -------------------------------------------------------------------------------- /Chapter06/Python/PythonScript.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import censusdata 3 | 4 | cns_vars = [ 5 | "B06001_001E","B06001_002E","B06001_003E","B06001_004E", 6 | "B06001_005E","B06001_006E","B06001_007E","B06001_008E", 7 | "B06001_009E","B06001_010E","B06001_011E","B06001_012E" 8 | ] 9 | 10 | geographies = [('state', '18'),('county', '*')] 11 | 12 | IN_POP_BY_COUNTY_BY_AGE = censusdata.download( 13 | 'acs5', 2015, censusdata.censusgeo(geographies), 14 | cns_vars) 15 | 16 | IN_POP_BY_COUNTY_BY_AGE = IN_POP_BY_COUNTY_BY_AGE.reset_index() 17 | 18 | new_names = { 19 | 'index':'Geography','B06001_001E':'Total', 20 | 'B06001_002E':'Under 5','B06001_003E':'5 to 17', 21 | 'B06001_004E':'18 to 24','B06001_005E':'25 to 34', 22 | 'B06001_006E':'35 to 44','B06001_007E':'45 to 54', 23 | 'B06001_008E':'55 to 59','B06001_009E':'60 and 61', 24 | 'B06001_010E':'62 to 64','B06001_011E':'65 to 74', 25 | 'B06001_012E':'75+' 26 | } 27 | 28 | IN_POP_BY_COUNTY_BY_AGE = 29 | IN_POP_BY_COUNTY_BY_AGE.rename(columns=new_names) 30 | -------------------------------------------------------------------------------- /Chapter06/Python/ch04_Python.pbix: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apress/adv-analytics-in-power-bi-w-r-and-python/1047beaa566a4aa7866fa41fd51dcc5e151b4d58/Chapter06/Python/ch04_Python.pbix -------------------------------------------------------------------------------- /Chapter06/R/CodeSnippets.R: -------------------------------------------------------------------------------- 1 | library(tidyverse) 2 | library(scales) 3 | 4 | #https://dplyr.tidyverse.org/reference/select.html 5 | # Talk about Data types 6 | 7 | 8 | setwd("C:/Users/ryanw/Downloads/Advanced Analytics in Power BI with R and Python (3)/Chapter06/R") 9 | population_data <- read_csv("population_rawdata.csv") 10 | 11 | # Verify that E stands for estimates and M stands for margin of error 12 | 13 | popdata_estimates <- 14 | select(population_data, County, State, B02001_002E, 15 | B02001_003E, B02001_004E, B02001_005E, B02001_006E, 16 | B02001_007E, B02001_008E) 17 | 18 | popdata_estimates <- select(population_data, County, State, ends_with("E")) 19 | 20 | 21 | popdata_rename <- 22 | rename( 23 | popdata_estimates, 24 | `White` = B02001_002E, 25 | `Black` = B02001_003E, 26 | `American Indian` = B02001_004E, 27 | `Asian` = B02001_005E, 28 | `Pacific Islander` = B02001_006E, 29 | `Other Race` = B02001_007E, 30 | `Two or More Races` = B02001_008E 31 | ) 32 | 33 | 34 | # Calcuated Columns 35 | popdata_rowtotals <- 36 | mutate(popdata_rename, 37 | All = White + Black + `American Indian` + Asian + `Pacific Islander` + `Other Race` + `Two or More Races`) 38 | 39 | popdata_rowtotals <- 40 | mutate(popdata_rename, 41 | All = rowSums(popdata_rename[3:9])) 42 | View(popdata_rowtotals) 43 | 44 | popdata_percents <- 45 | mutate(popdata_rename, 46 | `Percent White` = White / (White + Black + `American Indian` + Asian + `Pacific Islander` + `Other Race` + `Two or More Races`), 47 | `Percent Black` = Black / (White + Black + `American Indian` + Asian + `Pacific Islander` + `Other Race` + `Two or More Races`), 48 | `Percent American Indian` = `American Indian` / (White + Black + `American Indian` + Asian + `Pacific Islander` + `Other Race` + `Two or More Races`), 49 | `Percent Asian` = Asian / (White + Black + `American Indian` + Asian + `Pacific Islander` + `Other Race` + `Two or More Races`), 50 | `Percent Pacific Islander` = `Pacific Islander` / (White + Black + `American Indian` + Asian + `Pacific Islander` + `Other Race` + `Two or More Races`), 51 | `Percent Other Race` = `Other Race` / (White + Black + `American Indian` + Asian + `Pacific Islander` + `Other Race` + `Two or More Races`), 52 | `Percent Two or More Races` = `Two or More Races` / (White + Black + `American Indian` + Asian + `Pacific Islander` + `Other Race` + `Two or More Races`) 53 | ) 54 | 55 | popdata_percents <- 56 | mutate(popdata_rename, 57 | `Percent White` = White / rowSums(popdata_rename[3:9]), 58 | `Percent Black` = Black / rowSums(popdata_rename[3:9]), 59 | `Percent American Indian` = `American Indian` / rowSums(popdata_rename[3:9]), 60 | `Percent Asian` = Asian / rowSums(popdata_rename[3:9]), 61 | `Percent Pacific Islander` = `Pacific Islander` / rowSums(popdata_rename[3:9]), 62 | `Percent Other Race` = `Other Race` / rowSums(popdata_rename[3:9]), 63 | `Percent Two or More Races` = `Two or More Races` / rowSums(popdata_rename[3:9]) 64 | ) 65 | 66 | popdata_percents_transumte <- 67 | transmute(popdata_rename, 68 | County, 69 | State, 70 | `Percent White` = White / rowSums(popdata_rename[3:9]), 71 | `Percent Black` = Black / rowSums(popdata_rename[3:9]), 72 | `Percent American Indian` = `American Indian` / rowSums(popdata_rename[3:9]), 73 | `Percent Asian` = Asian / rowSums(popdata_rename[3:9]), 74 | `Percent Pacific Islander` = `Pacific Islander` / rowSums(popdata_rename[3:9]), 75 | `Percent Other Race` = `Other Race` / rowSums(popdata_rename[3:9]), 76 | `Percent Two or More Races` = `Two or More Races` / rowSums(popdata_rename[3:9]) 77 | ) 78 | View(popdata_percents_transumte) 79 | 80 | popdata_percents_transumte <- 81 | transmute(popdata_rename, 82 | County, 83 | State, 84 | `Percent White` = percent(White / rowSums(popdata_rename[3:9])), 85 | `Percent Black` = percent(Black / rowSums(popdata_rename[3:9])), 86 | `Percent American Indian` = percent(`American Indian` / rowSums(popdata_rename[3:9])), 87 | `Percent Asian` = percent(Asian / rowSums(popdata_rename[3:9])), 88 | `Percent Pacific Islander` = percent(`Pacific Islander` / rowSums(popdata_rename[3:9])), 89 | `Percent Other Race` = percent(`Other Race` / rowSums(popdata_rename[3:9])), 90 | `Percent Two or More Races` = percent(`Two or More Races` / rowSums(popdata_rename[3:9])) 91 | ) 92 | View(popdata_percents_transumte) 93 | 94 | 95 | popdata_bw <- 96 | transmute(popdata_rename, 97 | County, 98 | State, 99 | White, 100 | Black, 101 | `Black and White` = rowSums(popdata_rename[,c(3,4)]) 102 | ) 103 | View(popdata_bw) 104 | 105 | popdata_bw <- 106 | transmute(popdata_rename, 107 | County, 108 | State, 109 | White, 110 | Black, 111 | `Black and White` = rowSums(popdata_rename[,c("White","Black")]) 112 | ) 113 | View(popdata_bw) 114 | 115 | # Talk about chaining here 116 | 117 | 118 | popdata_group <- 119 | popdata_rename %>% 120 | group_by(State) %>% 121 | summarize( 122 | White = sum(White, na.rm=TRUE), 123 | Black = sum(Black, na.rm=TRUE), 124 | `American Indian`= sum(`American Indian`, na.rm=TRUE), 125 | Asian = sum(Asian, na.rm=TRUE), 126 | `Pacific Islander`= sum(`Pacific Islander`, na.rm=TRUE), 127 | `Other Race` = sum(`Other Race`, na.rm=TRUE), 128 | `Two or More Races` = sum(`Two or More Races`, na.rm=TRUE), 129 | `Number of Counties`=n()) 130 | View(popdata_group) 131 | 132 | popdata_group <- 133 | popdata_rename %>% 134 | group_by(State) %>% 135 | summarize_if(is.numeric, sum, na.rm = TRUE) 136 | View(popdata_group) 137 | 138 | 139 | popdata_unpivot <- 140 | popdata_group %>% 141 | pivot_longer( 142 | !State, 143 | names_to = "Race", 144 | values_to = "Population" 145 | ) 146 | View(popdata_unpivot) 147 | 148 | 149 | chart_data <- 150 | popdata_unpivot %>% 151 | filter(State == "Michigan") %>% 152 | transmute(Race=fct_reorder(Race, Population), Population) 153 | 154 | ggplot(data = chart_data, aes(x=Race, y=Population, label=comma(Population))) + 155 | geom_bar(stat="Identity", fill="red") + 156 | geom_label() + 157 | coord_flip() + 158 | scale_y_continuous(labels=NULL) + 159 | theme_minimal() + 160 | labs( 161 | title = "Main Title" 162 | ,subtitle = "Sub Title" 163 | ,caption = "My Caption" 164 | ) -------------------------------------------------------------------------------- /Chapter06/R/GetData.R: -------------------------------------------------------------------------------- 1 | library(tidycensus) 2 | library(tidyverse) 3 | 4 | census_api_key("adf9111a430221eb10f5bb5b76f15b250578b7af", overwrite = FALSE, install = FALSE) 5 | cns_vars <- c("B02001_002","B02001_003","B02001_004","B02001_005","B02001_006","B02001_007","B02001_008") 6 | 7 | population_rawdata <- 8 | suppressMessages( 9 | get_acs( 10 | geography = "county", 11 | variables = cns_vars, 12 | survey = "acs5", 13 | year = 2017, 14 | output = "wide" 15 | ) 16 | ) 17 | 18 | write_csv(population_rawdata,"population_rawdata.csv") 19 | -------------------------------------------------------------------------------- /Chapter06/R/RScript.R: -------------------------------------------------------------------------------- 1 | library(tidycensus) 2 | library(tidyverse) 3 | 4 | census_api_key("", overwrite = FALSE, install = FALSE) 5 | cns_vars <- c("B06001_001E","B06001_002E","B06001_003E","B06001_004E","B06001_005E","B06001_006E","B06001_007E","B06001_008E","B06001_009E","B06001_010E","B06001_011E","B06001_012E") 6 | 7 | IN_POP_BY_COUNTY_BY_AGE <- 8 | suppressMessages( 9 | get_acs( 10 | geography = "county", 11 | state = "IN", 12 | variables = cns_vars, 13 | survey = "acs1", 14 | year = 2015, 15 | output = "wide" 16 | ) 17 | ) 18 | 19 | IN_POP_BY_COUNTY_BY_AGE = 20 | rename( 21 | IN_POP_BY_COUNTY_BY_AGE, 22 | `Total`=B06001_001E,`Under 5`=B06001_002E, 23 | `5 to 17`=B06001_003E, 24 | `18 to 24`=B06001_004E, 25 | `25 to 34`=B06001_005E, 26 | `35 to 44`=B06001_006E, 27 | `45 to 54`=B06001_007E, 28 | `55 to 59`=B06001_008E, 29 | `60 and 61`=B06001_009E, 30 | `62 to 64`=B06001_010E, 31 | `65 to 74`=B06001_011E, 32 | `75+`=B06001_012E 33 | ) -------------------------------------------------------------------------------- /Chapter06/R/RScript_Old.R: -------------------------------------------------------------------------------- 1 | library(tidycensus) 2 | library(tidyverse) 3 | 4 | census_api_key("aXXXXXXXXXXXXXXXXXXXf") 5 | 6 | cns_vars <- c(`Total`="B06001_001E",`Under 5`="B06001_002E",`5 to 17`="B06001_003E",`18 to 24`="B06001_004E",`25 to 34`="B06001_005E",`35 to 44`="B06001_006E",`45 to 54`="B06001_007E",`55 to 59`="B06001_008E",`60 and 61`="B06001_009E",`62 to 64`="B06001_010E",`65 to 74`="B06001_011E",`75+`="B06001_012E") 7 | 8 | get_data <- function(x) { 9 | df <- suppressMessages(get_acs( 10 | geography = "county", 11 | state = "IN", 12 | variables = cns_vars, 13 | survey = "acs1", 14 | year = x, 15 | output = "wide" 16 | ))%>% 17 | mutate(Year = x) 18 | } 19 | 20 | years <- 2015:2017 21 | 22 | IN_POP_BY_COUNTY_BY_AGE <- map_df(years, get_data) -------------------------------------------------------------------------------- /Chapter06/R/RandomSnippets.R: -------------------------------------------------------------------------------- 1 | library(tidycensus) 2 | library(tidyverse) 3 | 4 | census_api_key("", overwrite = FALSE, install = FALSE) 5 | 6 | v17 <- load_variables(2017, "acs5", cache = TRUE) 7 | 8 | race_vars <- filter(v17, concept == "RACE") 9 | -------------------------------------------------------------------------------- /Chapter06/R/ch04_R.pbix: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apress/adv-analytics-in-power-bi-w-r-and-python/1047beaa566a4aa7866fa41fd51dcc5e151b4d58/Chapter06/R/ch04_R.pbix -------------------------------------------------------------------------------- /Chapter06/R/race.csv: -------------------------------------------------------------------------------- 1 | name,label,concept 2 | B02001_001,Estimate!!Total,RACE 3 | B02001_002,Estimate!!Total!!White alone,RACE 4 | B02001_003,Estimate!!Total!!Black or African American alone,RACE 5 | B02001_004,Estimate!!Total!!American Indian and Alaska Native alone,RACE 6 | B02001_005,Estimate!!Total!!Asian alone,RACE 7 | B02001_006,Estimate!!Total!!Native Hawaiian and Other Pacific Islander alone,RACE 8 | B02001_007,Estimate!!Total!!Some other race alone,RACE 9 | B02001_008,Estimate!!Total!!Two or more races,RACE 10 | B02001_009,Estimate!!Total!!Two or more races!!Two races including Some other race,RACE 11 | B02001_010,Estimate!!Total!!Two or more races!!Two races excluding Some other race and three or more races,RACE 12 | -------------------------------------------------------------------------------- /Chapter06/README.md: -------------------------------------------------------------------------------- 1 | # Chapter 4 README -------------------------------------------------------------------------------- /Chapter07/Data/Comments.csv: -------------------------------------------------------------------------------- 1 | Debtor ID,Date,Comment 2 | 120307,9/5/2019,Debtor's new cell phone number is 317-555-5555. Can call after 6:00 p.m. 3 | 130939,9/5/2019,Corrected SSN number to 435-55-5555 4 | 126783,9/5/2019,New cell phone number is 678-555-5555 and new work number is 404-555-5555 5 | 137561,9/5/2019,Through skip tracing found that possible new number is 202-555-5555 6 | 139214,9/5/2019,Original SSN was incorrect. Correct one is 312-55-5555. 7 | 146995,9/5/2019,Christina's new phone number is 433-444-4444 8 | 154539,9/5/2019,Called John 3 times on 8/1/2019 @ 311-111-1234 but the number was disconnected 9 | 115511,9/5/2019,333-33-1234 is not the correct Luke's correct SSNs. Debtor has been put in the skip tracing que 10 | 193710,9/5/2019,Phone number 223-443-5555 and SSN 443-23-1234 for Tina have been identified as incorrect 11 | 188565,9/5/2019,Ivan's number was updated from 345-432-5555 to 345-467-5555 12 | -------------------------------------------------------------------------------- /Chapter07/Data/DimEmployee.csv: -------------------------------------------------------------------------------- 1 | Employee ID,Name,Gender,DOB,Employment Begin Date 2 | 78,Christy Wood,F,2/3/1974,10/23/2016 3 | 80,Tonya D Ross,F,1/12/1965,12/26/2015 4 | 85,Curtis E Powell,M,2/22/1967,9/13/2013 5 | 86,Brett O Long,M,5/16/1976,10/23/2014 6 | 89,Alison P Flores,F,12/19/1974,10/9/2013 7 | 90,Philip L Washington,M,3/20/1972,7/12/2013 8 | 92,Jon L Simmons,M,3/4/1975,5/8/2015 9 | 94,Ricky O Gonzales,M,9/18/1968,1/9/2015 10 | 95,Erika S,F,10/16/1971,1/29/2016 11 | 96,Jonathon K,M,3/18/1967,2/20/2013 12 | 98,Ian O Griffin,M,1/22/1976,5/18/2015 13 | 99,Maria G Diaz,F,6/20/1976,11/2/2015 14 | 100,Mindy B Hayes,F,3/24/1976,1/7/2016 15 | 101,Carl R Myers,M,9/21/1966,6/13/2014 16 | 102,Pamela B Ford,F,4/13/1971,4/23/2015 17 | 103,Vanessa Hamilton,F,2/11/1970,6/10/2016 18 | 104,Casey T Graham,M,10/16/1972,12/3/2016 19 | 105,Russell V Sullivan,M,1/12/1967,11/20/2013 20 | 106,Casey N Wallace,F,2/4/1975,10/3/2013 21 | 107,Kristopher Q Woods,M,12/26/1975,5/17/2013 22 | 108,Valerie P Cole,F,11/14/1975,6/25/2015 23 | 109,Gabriel M West,M,8/11/1967,2/22/2014 24 | 110,Bobby I Jordan,M,12/6/1971,4/10/2015 25 | 111,Angel E Owens,F,11/19/1973,9/26/2013 26 | 112,Michele M Reynolds,F,9/1/1967,9/2/2015 27 | 113,Shawna V Fisher,F,4/26/1971,7/29/2015 28 | 114,Adrian Ellis,M,9/10/1971,2/6/2015 29 | 117,Clint M Mcdonald,M,10/23/1975,6/4/2014 30 | 118,Trisha Cruz,F,8/7/1966,11/13/2014 31 | 119,Latoya Marshall,F,3/24/1971,4/11/2014 32 | 120,Caleb Ortiz,M,11/24/1972,6/5/2016 33 | 121,Jaime I Gomez,F,1/5/1970,1/28/2015 34 | 123,Sabrina Q Freeman,F,4/19/1972,7/13/2013 35 | 124,Willie N Wells,M,1/14/1969,7/13/2016 36 | 125,Rebekah F Webb,F,10/20/1965,9/23/2016 37 | 126,Danny Simpson,M,6/8/1974,8/14/2014 38 | 127,Candice N Stevens,F,11/4/1974,1/27/2016 39 | 128,Wayne,M,9/16/1975,8/30/2013 40 | 132,Tabitha C Crawford,F,10/13/1975,10/25/2013 41 | 134,Mitchell K Boyd,M,12/17/1972,2/9/2016 42 | 136,Barry,M,5/17/1967,6/27/2014 43 | 139,Bridget Dixon,F,12/14/1965,1/23/2014 44 | 140,Kendra J Ramos,F,3/13/1971,12/17/2015 45 | 142,Darren F,M,10/14/1965,5/9/2013 46 | 143,Tia J Gordon,F,12/11/1972,1/1/2016 47 | 145,Linda J Holmes,F,6/16/1973,3/5/2015 48 | 146,Cara U Rice,F,3/3/1965,11/18/2015 49 | 147,Abby Robertson,F,9/7/1974,1/17/2014 50 | 148,Beau A Hunt,M,11/24/1975,6/4/2015 51 | 149,Latasha,F,10/18/1974,11/15/2013 52 | 298,Bradford Q Pena,M,10/15/1974,8/13/2013 53 | 299,Damian K Beck,M,10/17/1975,6/4/2013 54 | 302,Johnnie K Mcdaniel,M,12/5/1967,4/16/2014 55 | 303,Logan F Mendez,M,2/7/1974,12/4/2013 56 | 305,Stuart J Vaughn,M,4/21/1976,10/30/2015 57 | 307,Candy V Dawson,F,9/12/1965,1/21/2016 58 | 308,Chandra Santiago,F,4/21/1976,4/28/2016 59 | 309,Charlene G Norris,F,11/11/1973,7/15/2016 60 | 310,Bryon A Hardy,M,10/9/1970,3/18/2016 61 | 311,Cecil Love,M,1/14/1973,8/20/2015 62 | 313,Darius A Curry,M,5/23/1974,5/8/2016 63 | 314,Helen M Powers,F,4/8/1973,11/7/2013 64 | 315,Jessie,F,5/23/1967,6/16/2015 65 | 316,Elliott Barker,M,3/3/1973,1/23/2013 66 | 317,Garry N Guzman,M,4/24/1968,5/28/2015 67 | 319,Rita A Munoz,F,3/2/1973,7/28/2015 68 | 320,Ryan N Ball,F,6/1/1968,1/15/2015 69 | 321,Tiana Keller,F,1/21/1965,2/20/2014 70 | 322,Tiffanie J Chandler,F,1/11/1965,7/21/2015 71 | 378,Lamont M Walton,M,6/13/1975,7/7/2015 72 | 380,Nathanial Hampton,M,1/15/1970,3/20/2014 73 | 382,Oliver Patton,M,4/25/1965,4/30/2015 74 | 383,Kami Swanson,F,4/24/1976,4/12/2013 75 | 384,Solomon J,M,12/10/1974,11/11/2015 76 | 385,Maranda Francis,F,12/25/1975,6/26/2015 77 | 386,Maureen B Goodman,F,2/17/1970,8/17/2013 78 | 388,Misti V Yates,F,8/22/1965,2/28/2013 79 | 390,Priscilla E,F,2/6/1976,4/18/2013 80 | 391,Rhiannon Hodges,F,7/26/1973,4/29/2014 81 | 392,Tami E Rios,F,5/9/1967,12/26/2014 82 | 393,Francisco K Conner,M,7/6/1968,3/19/2016 83 | 395,Camille S Webster,F,2/5/1975,10/16/2015 84 | 396,Jarod B Norman,M,10/17/1974,2/23/2016 85 | 397,Javier U Malone,M,7/20/1974,11/26/2016 86 | 398,Jed B Hammond,M,9/14/1966,3/26/2013 87 | 400,Gwendolyn K Cobb,F,10/23/1969,10/3/2013 88 | 401,Johnathon P Moody,M,3/12/1965,5/11/2016 89 | 403,Lorenzo Blake,M,10/25/1969,9/5/2014 90 | 404,Mike A Maxwell,M,1/26/1971,6/30/2016 91 | 405,Kirsten B Pope,F,10/8/1971,8/13/2014 92 | 408,Lily C,F,8/9/1968,7/9/2016 93 | 410,Isabella L Rupp,F,4/22/1965,1/26/2014 94 | 413,Ethan K Onslow,F,3/21/1965,8/10/2015 95 | 416,Amy A Trefl,F,7/27/1975,3/24/2013 96 | 419,Jai I Lopez,M,3/29/1970,7/29/2015 97 | 422,Anthony V Gonzales,M,7/2/1965,10/9/2015 98 | 424,Taj R Shand,M,7/14/1976,3/15/2015 99 | 428,Hudson R Hollinworth,M,2/4/1975,4/21/2014 100 | 432,Jack Potter,M,2/25/1976,9/3/2015 101 | 434,Piper K Garcia,M,4/2/1970,8/30/2015 102 | 436,Hudson A Onslow,M,11/23/1965,5/25/2014 103 | 440,Sophia M,F,3/12/1976,6/24/2015 104 | 444,Henry G Forlonge,M,10/29/1973,6/21/2013 105 | 446,Stella F Rosenhain,F,6/29/1967,1/1/2016 106 | 448,Kayla Woodcock,F,10/2/1968,9/21/2015 107 | 452,Katie J Darwin,F,4/19/1975,11/9/2013 108 | 454,Alica L Fatnowna,F,12/31/1974,5/21/2013 109 | 458,Eva F Muirden,F,8/27/1974,6/24/2013 110 | 462,Archer Lamble,M,10/14/1966,7/19/2014 111 | -------------------------------------------------------------------------------- /Chapter07/Python/CleanComments.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import re 3 | import os 4 | 5 | def mask_text (unmask_text): 6 | phone_pattern = \ 7 | r"([2-9][0-9]{2})[- .]([0-9]{3})[- .]([0-9]{4})" 8 | ssn_pattern = r"\d{3}-\d{2}-\d{4}" 9 | cleanned_text = re.sub( 10 | phone_pattern, "XXX-XXX-XXXX", unmask_text) 11 | cleanned_text = re.sub( 12 | ssn_pattern, "XXX-XX-XXXX", cleanned_text) 13 | return cleanned_text 14 | 15 | os.chdir("") 5 | 6 | df = pd.read_csv("yelp_training_set_review_sample.csv") 7 | df["word_count"] = df["text"].str.split().apply(len) -------------------------------------------------------------------------------- /Chapter07/Python/ScrubNameScript.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import re 3 | import os 4 | 5 | os.chdir(r"") 6 | df = pd.read_csv("DimEmployee.csv") 7 | 8 | namepattern = \ 9 | r'([A-Z][a-z]{1,10}\s[a-zA-Z]\s[A-Z][a-z]{1,10})|' \ 10 | r'([A-Z][a-z]{1,10}\s[A-Z][a-z]{1,10})' 11 | 12 | nameWithOrWithoutMiddleInitial = re.compile(namepattern) 13 | 14 | def scrubName(name): 15 | m = nameWithOrWithoutMiddleInitial.fullmatch(name) 16 | if m: 17 | return m.group(0) 18 | else: 19 | return None 20 | 21 | df["Name"] = df.Name.apply(scrubName) -------------------------------------------------------------------------------- /Chapter07/Python/monitoredProducts.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import re 3 | import os 4 | 5 | os.chdir("") 6 | pattern = re.compile(r"^(561769|561394)(?=(01|22|39)(A|B)\d{1}$)") 7 | 8 | def monitoredProducts(sku): 9 | 10 | m = pattern.match(sku) 11 | if m == True: 12 | return m.group(0) 13 | else: 14 | return "Not Moinitored" 15 | 16 | df = pd.read_csv("ProductionOrders.csv") 17 | 18 | df["Monitored Products"] = df["SKU"].apply(monitoredProducts) 19 | -------------------------------------------------------------------------------- /Chapter07/R/CleanComments.R: -------------------------------------------------------------------------------- 1 | library(tidyverse) 2 | library(stringr) 3 | 4 | setwd("") 5 | 6 | mask_text <- function(unmask_text){ 7 | phone_pattern = "([2-9][0-9]{2})[- .]([0-9]{3})[- .]([0-9]{4})" 8 | ssn_pattern = "\\d{3}-\\d{2}-\\d{4}" 9 | 10 | cleanned_text <- str_replace( 11 | unmask_text, pattern = phone_pattern, 12 | replacement = "XXX-XXX-XXXX") 13 | 14 | cleanned_text <- str_replace( 15 | cleanned_text, pattern = ssn_pattern, 16 | replacement = "XXX-XX-XXXX") 17 | 18 | return(cleanned_text) 19 | } 20 | 21 | df <- read_csv("Comments.csv") 22 | 23 | df <- 24 | df %>% 25 | mutate(Comment = mask_text(Comment)) 26 | -------------------------------------------------------------------------------- /Chapter07/R/CountWordAndSentences.R: -------------------------------------------------------------------------------- 1 | library(tidyverse) 2 | library(stringr) 3 | 4 | setwd("") 5 | 6 | df <- read_csv("yelp_training_set_review_sample.csv") 7 | 8 | df <- 9 | df %>% 10 | select(id, business_categories, business_review_count, business_stars, business_state,business_type,stars,text) %>% 11 | mutate(word_count = str_count(text, boundary("word")), sent_count = str_count(text, boundary("sentence"))) 12 | -------------------------------------------------------------------------------- /Chapter07/R/ScrubNameScript.R: -------------------------------------------------------------------------------- 1 | library(tidyverse) 2 | library(stringr) 3 | 4 | setwd("") 5 | 6 | nameWithOrWithoutMiddleInitial = "(^[A-Z][a-z]{1,10}\\s[a-zA-Z]\\s[A-Z][a-z]{1,10}$)|(^[A-Z][a-z]{1,10}\\s[A-Z][a-z]{1,10}$)" 7 | 8 | df <- read_csv("DimEmployee.csv") 9 | 10 | df <- 11 | df %>% 12 | mutate(Name = str_extract(Name, nameWithOrWithoutMiddleInitial)) -------------------------------------------------------------------------------- /Chapter07/R/monitoredProducts.R: -------------------------------------------------------------------------------- 1 | library(tidyverse) 2 | library(stringr) 3 | 4 | setwd("C:/Users/ryanw/OneDrive - Diesel Analytics/Professional/Clients/Apress/Book/Part2_ShapingData/Chapter5/Misc") 5 | 6 | monitoredProducts <- function(x){ 7 | pattern = "^(561769|561394)(?=(01|22|39)(A|B))" 8 | mp = str_extract(x, pattern = pattern) 9 | return_value = ifelse(is.na(mp),"Not Monitored", mp) 10 | return(return_value) 11 | } 12 | 13 | df <- read_csv("ProductionOrders.csv") 14 | 15 | df <- 16 | df %>% 17 | mutate(`Monitored Products` = monitoredProducts(SKU)) 18 | -------------------------------------------------------------------------------- /Chapter08/Data/EmployeeList.csv: -------------------------------------------------------------------------------- 1 | Employee ID,Name,DOH,DOB,SSN,EmployeeAddress,TerminalAddress,lon_EmployeeAddress,lat_EmployeeAddress,lon_TerminalAddress,lat_TerminalAddress,Geosphere Function 2 | 1,Christy F Wood,2016-10-23,1985-02-03,999-79-8630,"500 Linwood Dr Greencastle, IN 46135","99 Crestview Dr Greenwood, IN 46143",-86.8568406,39.6513699,-86.09771289999999,39.6274365,40.5 3 | 2,Tonya F Ross,2015-12-26,1965-01-12,999-55-8000,"4976 W 600 N Fairland, IN 46126","99 Crestview Dr Greenwood, IN 46143",-85.87572829999999,39.6113165,-86.09771289999999,39.6274365,11.9 4 | 3,Curtis M Powell,2013-09-13,1967-02-22,999-47-8489,"7525 Wellingshire Blvd Indianapolis, IN 46217","340 Stadium Dr Brownsburg, IN 46112",-86.2052077,39.6546561,-86.3898542,39.83719019999999,16 5 | 4,Brett M Long,2014-10-23,1976-05-16,999-19-2979,"11259 N 300 W Alexandria, IN 46001","900 Sawmill Rd New Whiteland, IN 46184",-85.7282439,40.2670985,-86.1028011,39.563263,52.6 6 | 5,Alison F Flores,2013-10-09,1986-12-19,999-59-9092,"389 E Jackson St Martinsville, IN 46151","900 Sawmill Rd New Whiteland, IN 46184",-86.4240296,39.4255873,-86.1028011,39.563263,19.6 7 | 6,Philip M Washington,2013-07-12,1972-03-20,999-69-8323,"2200 W 22nd St Anderson, IN 46016","2301 N Park Ave Indianapolis, IN 46205",-85.70743709999999,40.094985200000004,-86.1472958,39.7996407,31 8 | -------------------------------------------------------------------------------- /Chapter08/Python/DistanceCalculation_Custom.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import os 4 | from math import cos, sin, atan2, pi, sqrt, pow 5 | 6 | def ComputeDist(row): 7 | R = 6371 / 1.609344 #radius in mile 8 | 9 | delta_lat = row["lat_TerminalAddressGC"] - 10 | row["lat_EmployeeAddress"] 11 | 12 | delta_lon = row["lon_TerminalAddress"] - 13 | row["lon_EmployeeAddress"] 14 | 15 | degrees_to_radians = pi / 180.0 16 | a1 = sin(delta_lat / 2 * degrees_to_radians) 17 | a2 = pow(a1,2) 18 | a3 = cos(row["lat_EmployeeAddress"] * degrees_to_radians) 19 | a4 = cos(row["lat_TerminalAddress"] * degrees_to_radians) 20 | a5 = sin(delta_lon / 2 * degrees_to_radians) 21 | a6 = pow(a5,2) 22 | a = a2 + a3 * a4 * a6 23 | c = 2 * atan2(sqrt(a), sqrt(1 - a)) 24 | d = R * c 25 | return d 26 | 27 | os.chdir("") 28 | EmployeeList = pd.read_csv("EmployeeList_Python.csv") 29 | 30 | EmployeeList["Custom Function"] = \ 31 | EmployeeList.apply(lambda row: ComputeDist(row), axis=1) 32 | -------------------------------------------------------------------------------- /Chapter08/Python/DistanceCalculation_HaversineFunction.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import os 3 | from haversine import haversine 4 | 5 | os.chdir(r"") 6 | EmployeeList = pd.read_csv("EmployeeList.csv") 7 | 8 | def useHaversine(row): 9 | point_one = \ 10 | (row["lat_EmployeeAddress"], row["lon_EmployeeAddress"]) 11 | 12 | point_two = \ 13 | (row["lat_TerminalAddress"], row["lon_TerminalAddress"]) 14 | 15 | return haversine(point_one, point_two, unit="mi") 16 | 17 | EmployeeList["Haversine Function Result"] = \ 18 | EmployeeList.apply(lambda row: useHaversine(row), axis=1) 19 | 20 | -------------------------------------------------------------------------------- /Chapter08/Python/GeocodeAddress.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import os 3 | from geopy import geocoders 4 | 5 | g_api_key = "" 6 | g = geocoders.GoogleV3(g_api_key) 7 | 8 | os.chdir("") 9 | EmployeeList = pd.read_csv("EmployeeList.csv") 10 | 11 | EmployeeList["EmployeeAddressGC"] = \ 12 | EmployeeList["EmployeeAddress"].apply(g.geocode) 13 | 14 | EmployeeList["lat_EmployeeAddress"] = \ 15 | EmployeeList["EmployeeAddressGC"].apply(lambda x: x.latitude) 16 | 17 | EmployeeList["lon_EmployeeAddress"] = \ 18 | EmployeeList["EmployeeAddressGC"].apply(lambda x: x.longitude) 19 | 20 | EmployeeList["TerminalAddressGC"] = \ 21 | EmployeeList["TerminalAddress"].apply(g.geocode) 22 | 23 | EmployeeList["lat_TerminalAddressGC"] = \ 24 | EmployeeList["TerminalAddressGC"].apply(lambda x: x.latitude) 25 | 26 | EmployeeList["lon_TerminalAddress"] = \ 27 | EmployeeList["TerminalAddressGC"].apply(lambda x: x.longitude) 28 | 29 | cols = ["EmployeeAddressGC", "TerminalAddressGC"] 30 | EmployeeList.drop(cols, inplace=True) 31 | 32 | EmployeeList.to_csv("EmployeeList.csv", index = False) 33 | 34 | -------------------------------------------------------------------------------- /Chapter08/R/DistanceCalculation_Custom.R: -------------------------------------------------------------------------------- 1 | library(tidyverse) 2 | 3 | ComputeDist <- 4 | function(pickup_long, pickup_lat, dropoff_long, dropoff_lat) { 5 | R <- 6371 / 1.609344 6 | delta_lat <- dropoff_lat - pickup_lat 7 | delta_long <- dropoff_long - pickup_long 8 | degrees_to_radians = pi / 180.0 9 | a1 <- sin(delta_lat / 2 * degrees_to_radians) 10 | a2 <- as.numeric(a1) ^ 2 11 | a3 <- cos(pickup_lat * degrees_to_radians) 12 | a4 <- cos(dropoff_lat * degrees_to_radians) 13 | a5 <- sin(delta_long / 2 * degrees_to_radians) 14 | a6 <- as.numeric(a5) ^ 2 15 | a <- a2 + a3 * a4 * a6 16 | c <- 2 * atan2(sqrt(a), sqrt(1 - a)) 17 | d <- R * c 18 | return(d) 19 | } 20 | 21 | setwd("") 22 | EmployeeList <- read_csv("EmployeeList.csv") 23 | 24 | EmployeeList <- 25 | EmployeeList %>% 26 | mutate( 27 | `Custom Distance Function Results` = 28 | round( 29 | ComputeDist( 30 | lon_EmployeeAddress, 31 | lat_EmployeeAddress, 32 | lon_TerminalAddress, 33 | lat_TerminalAddress 34 | ),1 35 | ) 36 | ) 37 | -------------------------------------------------------------------------------- /Chapter08/R/DistanceCalculation_distHaversine.R: -------------------------------------------------------------------------------- 1 | library(tidyverse) 2 | library(geosphere) 3 | 4 | setwd("") 5 | EmployeeList <- read_csv("EmployeeList.csv") 6 | 7 | EmployeeList <- 8 | EmployeeList %>% 9 | rowwise() %>% 10 | mutate( 11 | `Geosphere Function` = 12 | distHaversine( 13 | c(lon_EmployeeAddress, lat_EmployeeAddress), 14 | c(lon_TerminalAddress, lat_TerminalAddress) 15 | ) / 1609.34 16 | ) -------------------------------------------------------------------------------- /Chapter08/R/GeocodeAddresses.R: -------------------------------------------------------------------------------- 1 | library(tidyverse) 2 | library(ggmap) 3 | 4 | register_google(key = "") 5 | 6 | setwd("") 7 | EmployeeList <- read_csv("EmployeeList.csv") 8 | 9 | EmployeeList <- 10 | EmployeeList %>% 11 | mutate_geocode(EmployeeAddress, sensor = FALSE) %>% 12 | rename(lon_EmployeeAddress = lon, lat_EmployeeAddress = lat) %>% 13 | mutate_geocode(TerminalAddress, sensor = FALSE) %>% 14 | rename(lon_TerminalAddress = lon, lat_TerminalAddress = lat) 15 | 16 | write_csv(EmployeeList, "EmployeeList.csv") 17 | -------------------------------------------------------------------------------- /Chapter09/Data/BostonHousingInfo.csv: -------------------------------------------------------------------------------- 1 | id,crim,zn,indus,chas,nox,rm,age,dis,rad,tax,ptratio,black,lstat,medv 2 | 1,0.31533,0,6.2,0,0.504,8.266,78.3,2.8944,8,307,17.4,385.05,4.14,44.8 3 | 2,0.01301,35,1.52,0,0.442,7.241,49.3,7.0379,1,284,15.5,394.74,5.49,32.7 4 | 3,1.34284,0,19.58,0,0.605,6.066,100,1.7573,5,403,14.7,353.89,6.43,24.3 5 | 4,18.0846,0,18.1,0,0.679,6.434,100,1.8347,24,666,20.2,27.25,29.05,7.2 6 | 5,5.82115,0,18.1,0,0.713,6.513,89.9,2.8016,24,666,20.2,393.82,10.29,20.2 7 | 6,13.0751,0,18.1,0,0.58,5.713,56.7,2.8237,24,666,20.2,396.9,14.76,20.1 8 | 7,0.07013,0,13.89,0,0.55,6.642,85.1,3.4211,5,276,16.4,392.78,9.69,28.7 9 | 8,12.2472,0,18.1,0,0.584,5.837,59.7,1.9976,24,666,20.2,24.65,15.69,10.2 10 | 9,0.06417,0,5.96,0,0.499,5.933,68.2,3.3603,5,279,19.2,396.9,9.68,18.9 11 | 10,17.8667,0,18.1,0,0.671,6.223,100,1.3861,24,666,20.2,393.74,21.78,10.2 12 | 11,4.66883,0,18.1,0,0.713,5.976,87.9,2.5806,24,666,20.2,10.48,19.01,12.7 13 | 12,0.10008,0,2.46,0,0.488,6.563,95.6,2.847,3,193,17.8,396.9,5.68,32.5 14 | 13,19.6091,0,18.1,0,0.671,7.313,97.9,1.3163,24,666,20.2,396.9,13.44,15 15 | 14,5.87205,0,18.1,0,0.693,6.405,96,1.6768,24,666,20.2,396.9,19.37,12.5 16 | 15,0.16211,20,6.96,0,0.464,6.24,16.3,4.429,3,223,18.6,396.9,6.59,25.2 17 | 16,8.64476,0,18.1,0,0.693,6.193,92.6,1.7912,24,666,20.2,396.9,15.17,13.8 18 | 17,0.01381,80,0.46,0,0.422,7.875,32,5.6484,4,255,14.4,394.23,2.97,50 19 | 18,10.6718,0,18.1,0,0.74,6.459,94.8,1.9879,24,666,20.2,43.06,23.98,11.8 20 | 19,5.58107,0,18.1,0,0.713,6.436,87.9,2.3158,24,666,20.2,100.19,16.22,14.3 21 | 20,5.66637,0,18.1,0,0.74,6.219,100,2.0048,24,666,20.2,395.69,16.59,18.4 22 | 21,3.32105,0,19.58,1,0.871,5.403,100,1.3216,5,403,14.7,396.9,26.82,13.4 23 | 22,0.14866,0,8.56,0,0.52,6.727,79.9,2.7778,5,384,20.9,394.76,9.42,27.5 24 | 23,0.05059,0,4.49,0,0.449,6.389,48,4.7794,3,247,18.5,396.9,9.62,23.9 25 | 24,0.01538,90,3.75,0,0.394,7.454,34.2,6.3361,3,244,15.9,386.34,3.11,44 26 | 25,8.24809,0,18.1,0,0.713,7.393,99.3,2.4527,24,666,20.2,375.87,16.74,17.8 27 | 26,5.73116,0,18.1,0,0.532,7.061,77,3.4106,24,666,20.2,395.28,7.01,25 28 | 27,13.6781,0,18.1,0,0.74,5.935,87.9,1.8206,24,666,20.2,68.95,34.02,8.4 29 | 28,0.03466,35,6.06,0,0.4379,6.031,23.3,6.6407,1,304,16.9,362.25,7.83,19.4 30 | 29,0.17783,0,9.69,0,0.585,5.569,73.5,2.3999,6,391,19.2,395.77,15.1,17.5 31 | 30,1.15172,0,8.14,0,0.538,5.701,95,3.7872,4,307,21,358.77,18.35,13.1 32 | 31,0.62976,0,8.14,0,0.538,5.949,61.8,4.7075,4,307,21,396.9,8.26,20.4 33 | 32,0.54452,0,21.89,0,0.624,6.151,97.9,1.6687,4,437,21.2,396.9,18.46,17.8 34 | 33,0.26838,0,9.69,0,0.585,5.794,70.6,2.8927,6,391,19.2,396.9,14.1,18.3 35 | 34,0.14932,25,5.13,0,0.453,5.741,66.2,7.2254,8,284,19.7,395.11,13.15,18.7 36 | 35,0.0795,60,1.69,0,0.411,6.579,35.9,10.7103,4,411,18.3,370.78,5.49,24.1 37 | 36,0.08187,0,2.89,0,0.445,7.82,36.9,3.4952,2,276,18,393.53,3.57,43.8 38 | 37,0.3494,0,9.9,0,0.544,5.972,76.7,3.1025,4,304,18.4,396.24,9.97,20.3 39 | 38,0.10959,0,11.93,0,0.573,6.794,89.3,2.3889,1,273,21,393.45,6.48,22 40 | 39,22.0511,0,18.1,0,0.74,5.818,92.4,1.8662,24,666,20.2,391.45,22.11,10.5 41 | 40,37.6619,0,18.1,0,0.679,6.202,78.7,1.8629,24,666,20.2,18.82,14.52,10.9 42 | 41,0.52014,20,3.97,0,0.647,8.398,91.5,2.2885,5,264,13,386.86,5.91,48.8 43 | 42,1.12658,0,19.58,1,0.871,5.012,88,1.6102,5,403,14.7,343.28,12.12,15.3 44 | 43,6.28807,0,18.1,0,0.74,6.341,96.4,2.072,24,666,20.2,318.01,17.79,14.9 45 | 44,0.04544,0,3.24,0,0.46,6.144,32.2,5.8736,4,430,16.9,368.57,9.09,19.8 46 | 45,2.14918,0,19.58,0,0.871,5.709,98.5,1.6232,5,403,14.7,261.95,15.79,19.4 47 | 46,8.26725,0,18.1,1,0.668,5.875,89.6,1.1296,24,666,20.2,347.88,8.88,50 48 | 47,0.06047,0,2.46,0,0.488,6.153,68.8,3.2797,3,193,17.8,387.11,13.15,29.6 49 | 48,0.16902,0,25.65,0,0.581,5.986,88.4,1.9929,2,188,19.1,385.02,14.81,21.4 50 | 49,0.38214,0,6.2,0,0.504,8.04,86.5,3.2157,8,307,17.4,387.38,3.13,37.6 51 | 50,0.17134,0,10.01,0,0.547,5.928,88.2,2.4631,6,432,17.8,344.91,15.76,18.3 52 | 51,13.3598,0,18.1,0,0.693,5.887,94.7,1.7821,24,666,20.2,396.9,16.35,12.7 53 | 52,0.20608,22,5.86,0,0.431,5.593,76.5,7.9549,7,330,19.1,372.49,12.5,17.6 54 | 53,0.2498,0,21.89,0,0.624,5.857,98.2,1.6686,4,437,21.2,392.04,21.32,13.3 55 | 54,0.08244,30,4.93,0,0.428,6.481,18.5,6.1899,6,300,16.6,379.41,6.36,23.7 56 | 55,0.06466,70,2.24,0,0.4,6.345,20.1,7.8278,5,358,14.8,368.24,4.97,22.5 57 | 56,5.69175,0,18.1,0,0.583,6.114,79.8,3.5459,24,666,20.2,392.68,14.98,19.1 58 | 57,0.34109,0,7.38,0,0.493,6.415,40.1,4.7211,5,287,19.6,396.9,6.12,25 59 | 58,0.55007,20,3.97,0,0.647,7.206,91.6,1.9301,5,264,13,387.89,8.1,36.5 60 | 59,0.05789,12.5,6.07,0,0.409,5.878,21.4,6.498,4,345,18.9,396.21,8.1,22 61 | 60,8.71675,0,18.1,0,0.693,6.471,98.8,1.7257,24,666,20.2,391.98,17.12,13.1 62 | 61,15.288,0,18.1,0,0.671,6.649,93.3,1.3449,24,666,20.2,363.02,23.24,13.9 63 | 62,0.15038,0,25.65,0,0.581,5.856,97,1.9444,2,188,19.1,370.31,25.41,17.3 64 | 63,0.13158,0,10.01,0,0.547,6.176,72.5,2.7301,6,432,17.8,393.3,12.04,21.2 65 | 64,4.64689,0,18.1,0,0.614,6.98,67.6,2.5329,24,666,20.2,374.68,11.66,29.8 66 | 65,0.05302,0,3.41,0,0.489,7.079,63.1,3.4145,2,270,17.8,396.06,5.7,28.7 67 | 66,0.04684,0,3.41,0,0.489,6.417,66.1,3.0923,2,270,17.8,392.18,8.81,22.6 68 | 67,0.27957,0,9.69,0,0.585,5.926,42.6,2.3817,6,391,19.2,396.9,13.59,24.5 69 | 68,9.18702,0,18.1,0,0.7,5.536,100,1.5804,24,666,20.2,396.9,23.6,11.3 70 | 69,0.29916,20,6.96,0,0.464,5.856,42.1,4.429,3,223,18.6,388.65,13,21.1 71 | 70,0.10574,0,27.74,0,0.609,5.983,98.8,1.8681,4,711,20.1,390.11,18.07,13.6 72 | 71,0.22438,0,9.69,0,0.585,6.027,79.7,2.4982,6,391,19.2,396.9,14.33,16.8 73 | 72,4.0974,0,19.58,0,0.871,5.468,100,1.4118,5,403,14.7,396.9,26.42,15.6 74 | 73,12.0482,0,18.1,0,0.614,5.648,87.6,1.9512,24,666,20.2,291.55,14.1,20.8 75 | 74,0.12269,0,6.91,0,0.448,6.069,40,5.7209,3,233,17.9,389.39,9.55,21.2 76 | 75,0.10793,0,8.56,0,0.52,6.195,54.4,2.7778,5,384,20.9,393.49,13,21.7 77 | 76,0.40771,0,6.2,1,0.507,6.164,91.3,3.048,8,307,17.4,395.24,21.46,21.7 78 | 77,0.1415,0,6.91,0,0.448,6.169,6.6,5.7209,3,233,17.9,383.37,5.81,25.3 79 | 78,0.03584,80,3.37,0,0.398,6.29,17.8,6.6115,4,337,16.1,396.9,4.67,23.5 80 | 79,4.75237,0,18.1,0,0.713,6.525,86.5,2.4358,24,666,20.2,50.92,18.13,14.1 81 | 80,0.3692,0,9.9,0,0.544,6.567,87.3,3.6023,4,304,18.4,395.69,9.28,23.8 82 | 81,41.5292,0,18.1,0,0.693,5.531,85.4,1.6074,24,666,20.2,329.46,27.38,8.5 83 | 82,0.0136,75,4,0,0.41,5.888,47.6,7.3197,3,469,21.1,396.9,14.8,18.9 84 | 83,0.07165,0,25.65,0,0.581,6.004,84.1,2.1974,2,188,19.1,377.67,14.27,20.3 85 | 84,7.99248,0,18.1,0,0.7,5.52,100,1.5331,24,666,20.2,396.9,24.56,12.3 86 | 85,3.83684,0,18.1,0,0.77,6.251,91.1,2.2955,24,666,20.2,350.65,14.19,19.9 87 | 86,0.10469,40,6.41,1,0.447,7.267,49,4.7872,4,254,17.6,389.25,6.05,33.2 88 | 87,0.11504,0,2.89,0,0.445,6.163,69.6,3.4952,2,276,18,391.83,11.34,21.4 89 | 88,0.22188,20,6.96,1,0.464,7.691,51.8,4.3665,3,223,18.6,390.77,6.58,35.2 90 | 89,0.08873,21,5.64,0,0.439,5.963,45.7,6.8147,4,243,16.8,395.56,13.45,19.7 91 | 90,0.03237,0,2.18,0,0.458,6.998,45.8,6.0622,3,222,18.7,394.63,2.94,33.4 92 | 91,0.04113,25,4.86,0,0.426,6.727,33.5,5.4007,4,281,19,396.9,5.29,28 93 | 92,20.0849,0,18.1,0,0.7,4.368,91.2,1.4395,24,666,20.2,285.83,30.63,8.8 94 | 93,0.32264,0,21.89,0,0.624,5.942,93.5,1.9669,4,437,21.2,378.25,16.9,17.4 95 | 94,0.02177,82.5,2.03,0,0.415,7.61,15.7,6.27,2,348,14.7,395.38,3.11,42.3 96 | 95,0.13642,0,10.59,0,0.489,5.891,22.3,3.9454,4,277,18.6,396.9,10.87,22.6 97 | 96,0.0187,85,4.15,0,0.429,6.516,27.7,8.5353,4,351,17.9,392.43,6.36,23.1 98 | 97,11.0874,0,18.1,0,0.718,6.411,100,1.8589,24,666,20.2,318.75,15.02,16.7 99 | 98,0.05561,70,2.24,0,0.4,7.041,10,7.8278,5,358,14.8,371.58,4.74,29 100 | 99,0.12204,0,2.89,0,0.445,6.625,57.8,3.4952,2,276,18,357.98,6.65,28.4 101 | 100,0.1146,20,6.96,0,0.464,6.538,58.7,3.9175,3,223,18.6,394.96,7.73,24.4 102 | 101,7.52601,0,18.1,0,0.713,6.417,98.3,2.185,24,666,20.2,304.21,19.31,13 103 | 102,0.06129,20,3.33,1,0.4429,7.645,49.7,5.2119,5,216,14.9,377.07,3.01,46 104 | 103,0.10612,30,4.93,0,0.428,6.095,65.1,6.3361,6,300,16.6,394.62,12.4,20.1 105 | 104,0.09065,20,6.96,1,0.464,5.92,61.5,3.9175,3,223,18.6,391.34,13.65,20.7 106 | 105,0.04379,80,3.37,0,0.398,5.787,31.1,6.6115,4,337,16.1,396.9,10.24,19.4 107 | 106,23.6482,0,18.1,0,0.671,6.38,96.2,1.3861,24,666,20.2,396.9,23.69,13.1 108 | -------------------------------------------------------------------------------- /Chapter09/Data/SentimentAndToneAnalysisData.csv: -------------------------------------------------------------------------------- 1 | id,text 2 | 0,"But seriously, the Goodtimes is one of the best sitcoms ever!" 3 | 1,Starwars is horrible.. 4 | 2,"Man, I love the movie Shaft." 5 | 3,The Color Purple was really depressing. 6 | 4,"Anyway, I loved the Turkey Hut" 7 | 5,Red and pink together looks stupid 8 | 6,My mom hates when I don't clean my room 9 | 7,"Sweet potato pie is awesome, I don't care if anyone says differently!" 10 | 8,I love the show the office. 11 | 9,I do not liketo play basketball 12 | 10,I like lifting weights. 13 | 11,Ice cream is one of my favorite deserts 14 | 12,The rain outside made me feel sad. 15 | 13,I get really upset about unnecessary violence 16 | 14,The clothes he had on looks cheap. 17 | 15,I heard Starwars sucks. 18 | 16,Al Bundy awesome!! 19 | 17,The game is horrible.. 20 | 18,The dog scared the little kid. 21 | 19,The cheeseburger tastes great! 22 | 20,McDonald's is over rated. 23 | 21,The movie was long and depressing. 24 | 22,The home cook meal made me feel good inside. 25 | 23,The call the referree made caused my blood to boil 26 | 24,I hate the way she sang the song. 27 | 25,I love when people help eachother. 28 | 26,The ending of the movie Five Heartbeats made me happy. 29 | 27,The Galaxy 10 is a great phone 30 | 28,The Galaxy 10 is much better than the iphone. 31 | 29,The bully took the pencil from the other student 32 | 30,The kid helped his mother carry the groceries 33 | 31,The child was very happy to get a piece of candy 34 | 32,The kid was sad when his dog got sick 35 | 33,The young girl was frustrated that she could not braid her hair. 36 | 34,The woman was was appreciative of the purse her husband got her. 37 | 35,The wife felt very fortunate to have a loving husband 38 | 36,The husband felt very fortunate to have a loving wife. 39 | 37,The brother was happy to help his sister with her homework. 40 | 38,Charlie did not want to run in his brand new shoes 41 | 39,"Ok, why can't you get the phone to work." 42 | 40,I can't get your product to work. 43 | 41,Can you help me understand how to use your product? 44 | 42,The computer will not boot up no matter what I do:-( 45 | 43,I can't get anything done today:-( 46 | 44,Sorry to hear that you had a hard time writing the script 47 | 45,Sorry to hear the the DAX measure was hard to write 48 | 46,Sorry hear that you have to choose between Python and R 49 | 47,Both Python and R are great programming languages 50 | 48,SQL is a must learn for data science 51 | 49,The SQL developer worked hard to teach the newbie how to write the SELECT statement 52 | 50,The doctor was very helpful in making diet recommendation 53 | 51,This quiz was hard because the material was not covered during class 54 | 52,He helped the other students with their homework 55 | 53,Microsoft is kicking the competition's butt! 56 | 54,I prefer Power BI cost a lot less than Tableau 57 | 55,I would rather use Azure than AWS 58 | 56,Microsoft importance of Excel are misunderstood by IT 59 | 57,The customer yelled at the operator was unsatified with the installation 60 | 58,The man tackled the guy who tried to steal the lady's purse. 61 | 59,Sorry to hear that the burgular took your valuable items. 62 | -------------------------------------------------------------------------------- /Chapter09/Data/yelp_sample_goes_hear.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apress/adv-analytics-in-power-bi-w-r-and-python/1047beaa566a4aa7866fa41fd51dcc5e151b4d58/Chapter09/Data/yelp_sample_goes_hear.txt -------------------------------------------------------------------------------- /Chapter09/Models/model_Python.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apress/adv-analytics-in-power-bi-w-r-and-python/1047beaa566a4aa7866fa41fd51dcc5e151b4d58/Chapter09/Models/model_Python.pkl -------------------------------------------------------------------------------- /Chapter09/Models/model_R.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apress/adv-analytics-in-power-bi-w-r-and-python/1047beaa566a4aa7866fa41fd51dcc5e151b4d58/Chapter09/Models/model_R.rds -------------------------------------------------------------------------------- /Chapter09/Python/GetYelpReviewSample.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import os 4 | 5 | os.chdir("") 6 | 7 | dfYelpReviews = pd.read_csv("yelp_review.csv") 8 | dfYelpReviewsSample = dfYelpReviews.sample(5, random_state = 1) 9 | dfYelpReviewsSample["id"] = np.arange(len(dfYelpReviewsSample)) 10 | dfYelpReviewsSample = dfYelpReviewsSample[["id","text"]] 11 | dfYelpReviewsSample.to_csv("yelp_review_sample.csv", index = False) -------------------------------------------------------------------------------- /Chapter09/Python/ScoreData.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pandas as pd 3 | import joblib 4 | from sklearn import preprocessing 5 | 6 | model = joblib.load("../Models/model_Python.pkl") 7 | boston_housing = pd.read_csv("../Data/BostonHousingInfo.csv") 8 | 9 | model_data = preprocessing.scale(boston_housing[["crim","rm","tax","lstat"]]) 10 | 11 | pred_medv = model.predict(model_data) 12 | 13 | final_output = boston_housing.loc[:,["crim","rm","tax","lstat"]] 14 | final_output["pred_medv"] = pred_medv -------------------------------------------------------------------------------- /Chapter09/Python/Sentiment.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import json 3 | import ast 4 | from pandas.io.json import json_normalize 5 | from azure.cognitiveservices.language.textanalytics import TextAnalyticsClient 6 | from msrest.authentication import CognitiveServicesCredentials 7 | 8 | # Sets the necessary variables that are needed for the script 9 | api_key = "" 10 | endpoint = "https://rwtextanalyticsapi.cognitiveservices.azure.com/" 11 | credentials = CognitiveServicesCredentials(api_key) 12 | text_analytics = TextAnalyticsClient(endpoint=endpoint, credentials=credentials) 13 | 14 | # Write code to read in data that you want to do sentiment analysis on in a pandas data frame 15 | df = pd.read_csv("") 16 | df["language"] = "en" 17 | 18 | # Convert pandas to json string 19 | documents = df.to_json(orient='records') 20 | documents = ast.literal_eval(documents) 21 | response = text_analytics.sentiment(documents=documents, raw = False) 22 | 23 | # Creates an empty list and populates it with the sentiment scores 24 | listSentiments = [] 25 | for document in response.documents: 26 | id = document.id 27 | score = document.score 28 | listSentiments.append([id, score]) 29 | 30 | # Converts the list created above into a data frame 31 | dfSentiments = pd.DataFrame(listSentiments, columns=['ID','Score']) 32 | dfSentiments -------------------------------------------------------------------------------- /Chapter09/Python/Tone.py: -------------------------------------------------------------------------------- 1 | import json 2 | import pandas as pd 3 | import ast 4 | import os 5 | from ibm_watson import ToneAnalyzerV3 6 | from ibm_watson.tone_analyzer_v3 import ToneInput 7 | from ibm_cloud_sdk_core.authenticators import IAMAuthenticator 8 | 9 | authenticator = IAMAuthenticator("") 10 | 11 | service = ToneAnalyzerV3( 12 | version='2019-12-22', 13 | authenticator=authenticator) 14 | 15 | service.set_service_url( 16 | "https://gateway.watsonplatform.net/tone-analyzer/api") 17 | 18 | dfDocuments = pd.read_csv( 19 | "") 20 | 21 | listReturnedUtterance = [] 22 | for index, row in dfDocuments.iterrows(): 23 | submissionText = row["text"].replace("'","") 24 | submissionText = submissionText[0:500] 25 | PythonExpression = "[{'text': '" + submissionText + "'}]" 26 | Submission = ast.literal_eval(PythonExpression) 27 | tone_chat = service.tone_chat(Submission).get_result() 28 | 29 | utterances = tone_chat["utterances_tone"] 30 | 31 | sad, frustrated, satisfied, excited, \ 32 | polite, impolite, sympathetic, unknown = \ 33 | 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 34 | 35 | tones = utterances[0]["tones"] 36 | for tone in tones: 37 | toneid = tone["tone_id"] 38 | if toneid == "sad": 39 | sad = tone["score"] 40 | elif toneid == "frustrated": 41 | frustrated = tone["score"] 42 | elif toneid == "satisfied": 43 | satisfied = tone["score"] 44 | elif toneid == "excited": 45 | excited = tone["score"] 46 | elif toneid == "polite": 47 | polite = tone["score"] 48 | elif toneid == "impolite": 49 | impolite = tone["score"] 50 | elif toneid == "sympathetic": 51 | sympathetic = tone["score"] 52 | else: 53 | unknown = tone["score"] 54 | 55 | listReturnedUtterance.append( 56 | [index, sad, frustrated, satisfied, excited, 57 | polite, impolite, sympathetic, unknown]) 58 | 59 | colnames = ["index", "sad", "frustrated", "satisfied", "excited", 60 | "polite", "impolite", "sympathetic", "unknown"] 61 | dfReturnedUtterance = pd.DataFrame( 62 | listReturnedUtterance, columns=colnames) 63 | 64 | dfOutput = pd.merge( 65 | dfDocuments, dfReturnedUtterance, 66 | how='inner', left_on = 'id', right_on = 'index') 67 | 68 | dfOutput = dfOutput[ 69 | ['id', 'text', 'sad', 'frustrated', 'satisfied', 70 | 'excited', 'polite', 'impolite', 'sympathetic', 'unknown']] -------------------------------------------------------------------------------- /Chapter09/R/ScoreData.R: -------------------------------------------------------------------------------- 1 | library(tidyverse) 2 | 3 | model <- readRDS("./Models/model.rds") 4 | boston_housing <- read_csv("./Data/BostonHousingInfo.csv") 5 | 6 | model_data <- boston_housing[, c("crim","rm","tax","lstat")] 7 | pred_medv <- predict(model, model_data) 8 | 9 | final_output <- cbind(model_data, pred_medv) -------------------------------------------------------------------------------- /Chapter10/Data/Delete.txt: -------------------------------------------------------------------------------- 1 | Write code to the boston data source. 2 | 3 | Include code to get the other data sources. -------------------------------------------------------------------------------- /Chapter10/Data/GetYelpReviewSample.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import os 4 | 5 | os.chdir("") 6 | 7 | dfYelpReviews = pd.read_csv("yelp_review.csv") 8 | dfYelpReviewsSample = dfYelpReviews.sample(200, random_state = 1) 9 | dfYelpReviewsSample["id"] = np.arange(len(dfYelpReviewsSample)) 10 | dfYelpReviewsSample = dfYelpReviewsSample[["id","text"]] 11 | dfYelpReviewsSample.to_csv("yelp_review_sample.csv", index = False) -------------------------------------------------------------------------------- /Chapter10/Databases/BostonHousingData.bak: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apress/adv-analytics-in-power-bi-w-r-and-python/1047beaa566a4aa7866fa41fd51dcc5e151b4d58/Chapter10/Databases/BostonHousingData.bak -------------------------------------------------------------------------------- /Chapter10/Databases/CalculateDistance.bak: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apress/adv-analytics-in-power-bi-w-r-and-python/1047beaa566a4aa7866fa41fd51dcc5e151b4d58/Chapter10/Databases/CalculateDistance.bak -------------------------------------------------------------------------------- /Chapter10/Databases/Delete.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apress/adv-analytics-in-power-bi-w-r-and-python/1047beaa566a4aa7866fa41fd51dcc5e151b4d58/Chapter10/Databases/Delete.txt -------------------------------------------------------------------------------- /Chapter10/Python/AddModelToDatabase.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pyodbc 3 | import pickle 4 | 5 | server = 'DSVM2019' 6 | database = 'BostonHousingData' 7 | con = pyodbc.connect( 8 | 'Trusted_Connection=yes', 9 | driver = '{SQL Server}', 10 | server = server, 11 | database = database 12 | ) 13 | 14 | cursor = con.cursor() 15 | 16 | model = pickle.load(open("./Models/model.pkl", "rb")) 17 | modelstr = pickle.dumps(model) 18 | 19 | cursor.execute( 20 | "INSERT INTO [dbo].[Models](ModelName, Model) VALUES (?, ?)", 21 | "Python Model", modelstr 22 | ) 23 | 24 | con.commit() 25 | con.close() -------------------------------------------------------------------------------- /Chapter10/Python/Models/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apress/adv-analytics-in-power-bi-w-r-and-python/1047beaa566a4aa7866fa41fd51dcc5e151b4d58/Chapter10/Python/Models/model.pkl -------------------------------------------------------------------------------- /Chapter10/Python/calcDistance_Python.sql: -------------------------------------------------------------------------------- 1 | USE [SentimentAnalysisDB] 2 | GO 3 | 4 | /****** Object: StoredProcedure [dbo].[calcDistance_Python] Script Date: 2/12/2020 7:14:53 AM ******/ 5 | SET ANSI_NULLS ON 6 | GO 7 | 8 | SET QUOTED_IDENTIFIER ON 9 | GO 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | CREATE PROCEDURE [dbo].[calcDistance_Python] 19 | AS 20 | 21 | BEGIN 22 | 23 | DECLARE @PythonScript nvarchar(max); 24 | DECLARE @Query nvarchar(max); 25 | DECLARE @InputDFName nvarchar(128) = 'dfInput'; 26 | DECLARE @OutputDFName nvarchar(128) = 'dfOutput'; 27 | 28 | SET @Query = 'SELECT 29 | ID 30 | ,d.AddressA 31 | ,d.AddressB 32 | ,lon_AddressA = CAST(d.lon_AddressA AS FLOAT) 33 | ,lat_AddressA = CAST(d.lat_AddressA AS FLOAT) 34 | ,lon_AddressB = CAST(d.lon_AddressB AS FLOAT) 35 | ,lat_AddressB = CAST(d.lat_AddressB AS FLOAT) 36 | FROM [dbo].[Destinations] d' 37 | 38 | SET @PythonScript = ' 39 | import pandas as pd 40 | import numpy as np 41 | from math import cos, sin, atan2, pi, sqrt, pow 42 | 43 | def ComputeDist(row): 44 | R = 6371 / 1.609344 #radius in mile 45 | delta_lat = row["lat_AddressA"] - row["lat_AddressB"] 46 | delta_lon = row["lon_AddressA"] - row["lon_AddressB"] 47 | degrees_to_radians = pi / 180.0 48 | a1 = sin(delta_lat / 2 * degrees_to_radians) 49 | a2 = pow(a1,2) 50 | a3 = cos(row["lat_AddressB"] * degrees_to_radians) 51 | a4 = cos(row["lat_AddressA"] * degrees_to_radians) 52 | a5 = sin(delta_lon / 2 * degrees_to_radians) 53 | a6 = pow(a5,2) 54 | a = a2 + a3 * a4 * a6 55 | c = 2 * atan2(sqrt(a), sqrt(1 - a)) 56 | d = R * c 57 | 58 | return d 59 | 60 | dfOutput = dfInput 61 | 62 | dfOutput["Distance"] = dfOutput.apply(lambda row: ComputeDist(row), axis=1) 63 | ' 64 | 65 | EXEC sp_execute_external_script 66 | @language = N'Python' 67 | ,@input_data_1 = @Query 68 | ,@input_data_1_name = @InputDFName 69 | ,@output_data_1_name = @OutputDFName 70 | ,@script = @PythonScript 71 | 72 | WITH RESULT SETS ( 73 | ( 74 | [ID] [bigint], 75 | [AddressA] [varchar](50), 76 | [AddressB] [varchar](50), 77 | [lon_AddressA] [FLOAT], 78 | [lat_AddressA] [FLOAT], 79 | [lon_AddressB] [FLOAT], 80 | [lat_AddressB] [FLOAT], 81 | [Distance] [FLOAT] 82 | ) 83 | ) 84 | 85 | END 86 | GO 87 | 88 | 89 | -------------------------------------------------------------------------------- /Chapter10/Python/getPythonLibraries.sql: -------------------------------------------------------------------------------- 1 | EXECUTE sp_execute_external_script 2 | @language = N'Python' 3 | ,@script = N' 4 | import pkg_resources 5 | import pandas as pd 6 | installed_packages = pkg_resources.working_set 7 | installed_packages_list = sorted( 8 | ["%s==%s" % (i.key, i.version) for i in installed_packages]) 9 | 10 | df = pd.DataFrame(installed_packages_list) 11 | OutputDataSet = df' 12 | 13 | WITH RESULT SETS (( PackageVersion nvarchar (150) )) 14 | -------------------------------------------------------------------------------- /Chapter10/Python/getSentiments_Python.sql: -------------------------------------------------------------------------------- 1 | USE [SentimentAnalysisDB] 2 | GO 3 | 4 | /****** Object: StoredProcedure [dbo].[getSentiments_Python] Script Date: 2/12/2020 7:25:54 AM ******/ 5 | SET ANSI_NULLS ON 6 | GO 7 | 8 | SET QUOTED_IDENTIFIER ON 9 | GO 10 | 11 | 12 | 13 | 14 | CREATE PROCEDURE [dbo].[getSentiments_Python] 15 | AS 16 | 17 | BEGIN 18 | 19 | DECLARE @PythonScript nvarchar(max); 20 | DECLARE @Query nvarchar(max); 21 | DECLARE @InputDFName nvarchar(128) = 'dfInput'; 22 | DECLARE @OutputDFName nvarchar(128) = 'dfOutput'; 23 | 24 | SET @Query = 'SELECT ID, [text] FROM dbo.SentimentData' 25 | 26 | SET @PythonScript = ' 27 | import pandas as pd 28 | from microsoftml import rx_featurize, get_sentiment 29 | 30 | sentiment_scores = rx_featurize( 31 | data=dfInput, 32 | ml_transforms=[get_sentiment(cols=dict(scores="text"))]) 33 | 34 | dfOutput = sentiment_scores 35 | ' 36 | 37 | EXEC sp_execute_external_script 38 | @language = N'Python' 39 | ,@input_data_1 = @Query 40 | ,@input_data_1_name = @InputDFName 41 | ,@output_data_1_name = @OutputDFName 42 | ,@script = @PythonScript 43 | 44 | WITH RESULT SETS ( 45 | ( 46 | [ID] [bigint], 47 | [text] [varchar](8000), 48 | [score] [float] 49 | ) 50 | ) 51 | 52 | END 53 | GO 54 | -------------------------------------------------------------------------------- /Chapter10/Python/uspPredictHousePrices_Python.sql: -------------------------------------------------------------------------------- 1 | CREATE PROCEDURE [dbo].[uspPredictHousePrices_Python] 2 | AS 3 | BEGIN 4 | 5 | DECLARE @model VARBINARY(max) = 6 | (SELECT MODEL 7 | FROM [dbo].[Models] 8 | WHERE ModelName = 'Python Model'); 9 | DECLARE @PythonScript nvarchar(max); 10 | DECLARE @Query nvarchar(max); 11 | DECLARE @InputDFName nvarchar(25); 12 | DECLARE @OutputDFName nvarchar(25); 13 | 14 | -- Define source data 15 | SET @Query='SELECT [crim], [rm], [tax], [lstat] 16 | FROM [dbo].[BostonHousingInfo]' 17 | 18 | -- Python script to score data 19 | SET @PythonScript = N' 20 | import pickle 21 | from sklearn import linear_model 22 | 23 | bhmodel_deserialized = pickle.loads(bhmodel_serialized) 24 | pred_medv = bhmodel_deserialized.predict(dfInputData) 25 | dfOutputData = dfInputData 26 | dfOutputData["pred_medv"] = pred_medv 27 | ' 28 | SET @InputDFName = 'dfInputData' 29 | SET @OutputDFName = 'dfOutputData' 30 | 31 | EXECUTE sp_execute_external_script 32 | @language = N'Python' 33 | ,@script = @PythonScript 34 | ,@input_data_1 = @Query 35 | ,@input_data_1_name = @InputDFName 36 | ,@output_data_1_name = @OutputDFName 37 | ,@params = N'@bhmodel_serialized varbinary(max)' 38 | ,@bhmodel_serialized = @model 39 | WITH RESULT SETS(( 40 | [crim] float 41 | ,[rm] float 42 | ,[tax] float 43 | ,[lstat] float 44 | ,[pred_medv] float 45 | )); 46 | END; -------------------------------------------------------------------------------- /Chapter10/R/AddModelStoredProcedure.sql: -------------------------------------------------------------------------------- 1 | CREATE PROCEDURE [dbo].[AddModel_R] 2 | @ModelName varchar(50), 3 | @Model_Serialized nvarchar(max) 4 | 5 | AS 6 | BEGIN 7 | 8 | SET NOCOUNT ON; 9 | 10 | -- Declare the variables 11 | DECLARE @Model_Binary varbinary(max); 12 | 13 | -- Converts the model to a varbinary data type. 14 | SELECT @Model_Binary = convert(varbinary(max),@Model_Serialized,2) 15 | 16 | -- Inserts the values that were passed to the stored procedure and the variables that were 17 | -- created above to a record in the Models table 18 | INSERT INTO [dbo].[Models](ModelName, MODEL) 19 | VALUES (@Modelname, @Model_Binary) 20 | 21 | END -------------------------------------------------------------------------------- /Chapter10/R/AddModelToDatabase.R: -------------------------------------------------------------------------------- 1 | library(RODBC) 2 | 3 | # Load model into our R session 4 | model <- readRDS("./Models/Model.rds") 5 | 6 | # Connects to the database 7 | server.name = "DSVM2019" 8 | db.name = "BostonHousingData" 9 | connection.string = paste( 10 | "driver={SQL Server}", ";", 11 | "server=", server.name, ";", 12 | "database=", db.name, ";", 13 | "trusted_connection=true", sep = "") 14 | conn <- odbcDriverConnect(connection.string) 15 | 16 | # Define parameters 17 | model_name <- "R Model" 18 | modelbin <- serialize(model, NULL) 19 | modelbinstr = paste(modelbin, collapse = "") 20 | 21 | # Builds the SQL Statement and execute it 22 | # using the sqlQuery command 23 | sql_code <- paste0( 24 | "EXEC AddModel_R ", 25 | "@ModelName='", 26 | model_name, "', ", 27 | "@Model_Serialized='", 28 | modelbinstr, "'") 29 | sqlQuery(conn, sql_code) 30 | 31 | odbcClose(conn) -------------------------------------------------------------------------------- /Chapter10/R/Models/delete.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Apress/adv-analytics-in-power-bi-w-r-and-python/1047beaa566a4aa7866fa41fd51dcc5e151b4d58/Chapter10/R/Models/delete.txt -------------------------------------------------------------------------------- /Chapter10/R/calcDistinace_R.sql: -------------------------------------------------------------------------------- 1 | USE [SentimentAnalysisDB] 2 | GO 3 | 4 | /****** Object: StoredProcedure [dbo].[calcDistance_R] Script Date: 2/12/2020 7:15:02 AM ******/ 5 | SET ANSI_NULLS ON 6 | GO 7 | 8 | SET QUOTED_IDENTIFIER ON 9 | GO 10 | 11 | 12 | CREATE PROCEDURE [dbo].[calcDistance_R] 13 | AS 14 | 15 | BEGIN 16 | 17 | DECLARE @RScript nvarchar(max); 18 | DECLARE @Query nvarchar(max); 19 | DECLARE @InputDFName nvarchar(128) = 'dfInput'; 20 | DECLARE @OutputDFName nvarchar(128) = 'dfOutput'; 21 | 22 | SET @Query = 'SELECT ID, AddressA, AddressB, lon_AddressA, lat_AddressA, lon_AddressB, lat_AddressB 23 | FROM [dbo].[Destinations]' 24 | 25 | SET @RScript = ' 26 | library(tidyverse) 27 | 28 | ComputeDist <- 29 | function(addressA_long, addressA_lat, addressB_long, addressB_lat) { 30 | R <- 6371 / 1.609344 #radius in mile 31 | delta_lat <- addressB_lat - addressA_lat 32 | delta_long <- addressB_long - addressA_long 33 | degrees_to_radians = pi / 180.0 34 | a1 <- sin(delta_lat / 2 * degrees_to_radians) 35 | a2 <- as.numeric(a1) ^ 2 36 | a3 <- cos(addressA_lat * degrees_to_radians) 37 | a4 <- cos(addressB_lat * degrees_to_radians) 38 | a5 <- sin(delta_long / 2 * degrees_to_radians) 39 | a6 <- as.numeric(a5) ^ 2 40 | a <- a2 + a3 * a4 * a6 41 | c <- 2 * atan2(sqrt(a), sqrt(1 - a)) 42 | d <- R * c 43 | return(d) 44 | } 45 | 46 | dfOutput <- 47 | dfInput %>% 48 | mutate( 49 | Distance = 50 | round(ComputeDist(lon_AddressA, lat_AddressA, lon_AddressB, lat_AddressB), 1) 51 | ) 52 | ' 53 | 54 | EXEC sp_execute_external_script 55 | @language = N'R' 56 | ,@input_data_1 = @Query 57 | ,@input_data_1_name = @InputDFName 58 | ,@output_data_1_name = @OutputDFName 59 | ,@script = @RScript 60 | 61 | WITH RESULT SETS ( 62 | ( 63 | [ID] [bigint], 64 | [AddressA] [varchar](50), 65 | [AddressB] [varchar](50), 66 | [lon_AddressA] [decimal](10,8), 67 | [lat_AddressA] [decimal](10,8), 68 | [lon_AddressB] [decimal](10,8), 69 | [lat_AddressB] [decimal](10,8), 70 | [Distance] [decimal](4,1) 71 | ) 72 | ) 73 | 74 | END 75 | GO -------------------------------------------------------------------------------- /Chapter10/R/getRPackages.sql: -------------------------------------------------------------------------------- 1 | EXECUTE sp_execute_external_script 2 | @language=N'R', 3 | @script = N' 4 | packagematrix <- installed.packages(); 5 | Name <- packagematrix[,1]; 6 | Version <- packagematrix[,3]; 7 | OutputDataSet <- data.frame(Name, Version);' 8 | 9 | WITH RESULT SETS ((PackageName nvarchar(250), PackageVersion nvarchar(max) )) -------------------------------------------------------------------------------- /Chapter10/R/getSentiment_R.sql: -------------------------------------------------------------------------------- 1 | USE [SentimentAnalysisDB] 2 | GO 3 | 4 | /****** Object: StoredProcedure [dbo].[getSentiments_R] Script Date: 2/12/2020 7:26:00 AM ******/ 5 | SET ANSI_NULLS ON 6 | GO 7 | 8 | SET QUOTED_IDENTIFIER ON 9 | GO 10 | 11 | 12 | 13 | CREATE PROCEDURE [dbo].[getSentiments_R] 14 | AS 15 | 16 | BEGIN 17 | 18 | DECLARE @RScript nvarchar(max); 19 | DECLARE @Query nvarchar(max); 20 | DECLARE @InputDFName nvarchar(128) = 'dfInput'; 21 | DECLARE @OutputDFName nvarchar(128) = 'dfOutput'; 22 | 23 | SET @Query = 'SELECT ID, [text] FROM dbo.SentimentData' 24 | 25 | SET @RScript = ' 26 | dfInput$text = as.character(dfInput$text) 27 | sentimentScores <- rxFeaturize( 28 | data = dfInput, 29 | mlTransforms = getSentiment( 30 | vars = list(SentimentScore = "text")) 31 | ) 32 | 33 | sentimentScores$text <- NULL 34 | dfOutput <- cbind(dfInput, sentimentScores)' 35 | 36 | EXEC sp_execute_external_script 37 | @language = N'R' 38 | ,@input_data_1 = @Query 39 | ,@input_data_1_name = @InputDFName 40 | ,@output_data_1_name = @OutputDFName 41 | ,@script = @RScript 42 | 43 | WITH RESULT SETS ( 44 | ( 45 | [ID] [bigint], 46 | [text] [varchar](8000), 47 | [score] [float] 48 | ) 49 | ) 50 | 51 | END 52 | GO -------------------------------------------------------------------------------- /Chapter10/R/uspPredictHousePrices_R.sql: -------------------------------------------------------------------------------- 1 | CREATE PROCEDURE [dbo].[uspPredictHousePrices_R] 2 | 3 | AS 4 | 5 | BEGIN 6 | 7 | -- Define variables 8 | DECLARE @model varbinary(max) = 9 | (SELECT MODEL 10 | FROM [dbo].[Models] 11 | WHERE ModelName = 'R Model'); 12 | DECLARE @RScript nvarchar(max); 13 | DECLARE @Query nvarchar(max); 14 | DECLARE @InputDFName nvarchar(25); 15 | DECLARE @OutputDFName nvarchar(25); 16 | 17 | -- Define source data 18 | SET @Query=' 19 | SELECT [crim], [rm], [tax], [lstat] 20 | FROM [dbo].[BostonHousingInfo]' 21 | 22 | -- R script to score data 23 | SET @RScript = N' 24 | bhmodel_deserialized <- 25 | unserialize(as.raw(bhmodel_serialized)); 26 | model_data <- dfInputData 27 | pred_medv <- 28 | predict(bhmodel_deserialized, model_data) 29 | 30 | dfOutputData <- 31 | cbind(model_data, pred_medv)' 32 | 33 | SET @InputDFName = 'dfInputData' 34 | SET @OutputDFName = 'dfOutputData' 35 | 36 | EXEC sp_execute_external_script 37 | @language = N'R' 38 | ,@script = @RScript 39 | ,@input_data_1 = @Query 40 | ,@input_data_1_name = @InputDFName 41 | ,@output_data_1_name = @OutputDFName 42 | ,@params = N'@bhmodel_serialized varbinary(max)' 43 | ,@bhmodel_serialized = @model 44 | WITH RESULT SETS(( 45 | [crim] float 46 | ,[rm] float 47 | ,[tax] float 48 | ,[lstat] float 49 | ,[pred_medv] float 50 | )); 51 | 52 | END -------------------------------------------------------------------------------- /Contributing.md: -------------------------------------------------------------------------------- 1 | # Contributing to Apress Source Code 2 | 3 | Copyright for Apress source code belongs to the author(s). However, under fair use you are encouraged to fork and contribute minor corrections and updates for the benefit of the author(s) and other readers. 4 | 5 | ## How to Contribute 6 | 7 | 1. Make sure you have a GitHub account. 8 | 2. Fork the repository for the relevant book. 9 | 3. Create a new branch on which to make your change, e.g. 10 | `git checkout -b my_code_contribution` 11 | 4. Commit your change. Include a commit message describing the correction. Please note that if your commit message is not clear, the correction will not be accepted. 12 | 5. Submit a pull request. 13 | 14 | Thank you for your contribution! -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Freeware License, some rights reserved 2 | 3 | Copyright (c) 2020 Ryan Wade 4 | 5 | Permission is hereby granted, free of charge, to anyone obtaining a copy 6 | of this software and associated documentation files (the "Software"), 7 | to work with the Software within the limits of freeware distribution and fair use. 8 | This includes the rights to use, copy, and modify the Software for personal use. 9 | Users are also allowed and encouraged to submit corrections and modifications 10 | to the Software for the benefit of other users. 11 | 12 | It is not allowed to reuse, modify, or redistribute the Software for 13 | commercial use in any way, or for a user’s educational materials such as books 14 | or blog articles without prior permission from the copyright holder. 15 | 16 | The above copyright notice and this permission notice need to be included 17 | in all copies or substantial portions of the software. 18 | 19 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 22 | AUTHORS OR COPYRIGHT HOLDERS OR APRESS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 23 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 24 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 25 | SOFTWARE. 26 | 27 | 28 | -------------------------------------------------------------------------------- /ReadMe.md: -------------------------------------------------------------------------------- 1 | # Apress Source Code 2 | 3 | This repository accompanies [*Advanced Analytics in Power BI with R and Python*](https://www.apress.com/9781484258286) by Ryan Wade (Apress, 2020). 4 | 5 | [comment]: #cover 6 | ![Cover image](9781484258286.jpg) 7 | 8 | Download the files as a zip using the green button, or clone the repository to your machine using Git. 9 | 10 | ## Releases 11 | 12 | Release v1.0 corresponds to the code in the published book, without corrections or updates. 13 | 14 | ## Contributions 15 | 16 | See the file Contributing.md for more information on how you can contribute to this repository. -------------------------------------------------------------------------------- /_Introduction/pbi.yml: -------------------------------------------------------------------------------- 1 | name: pbi 2 | channels: 3 | - anaconda 4 | - defaults 5 | dependencies: 6 | - astroid=2.3.3=py37_0 7 | - blas=1.0=mkl 8 | - ca-certificates=2020.1.1=0 9 | - certifi=2020.4.5.1=py37_0 10 | - colorama=0.4.3=py_0 11 | - icc_rt=2019.0.0=h0cc432a_1 12 | - intel-openmp=2020.0=166 13 | - isort=4.3.21=py37_0 14 | - joblib=0.15.1=py_0 15 | - lazy-object-proxy=1.4.3=py37he774522_0 16 | - mccabe=0.6.1=py37_1 17 | - mkl=2020.0=166 18 | - mkl-service=2.3.0=py37hb782905_0 19 | - mkl_fft=1.0.15=py37h14836fe_0 20 | - mkl_random=1.1.0=py37h675688f_0 21 | - numpy=1.18.1=py37h93ca92e_0 22 | - numpy-base=1.18.1=py37hc3f5095_1 23 | - openssl=1.1.1g=he774522_0 24 | - pandas=1.0.3=py37h47e9c7a_0 25 | - pip=20.0.2=py37_1 26 | - pylint=2.4.4=py37_0 27 | - python=3.7.7=h60c2a47_2 28 | - python-dateutil=2.8.1=py_0 29 | - pytz=2019.3=py_0 30 | - scikit-learn=0.22.1=py37h6288b17_0 31 | - scipy=1.4.1=py37h9439919_0 32 | - setuptools=46.1.3=py37_0 33 | - six=1.14.0=py37_0 34 | - sqlite=3.31.1=he774522_0 35 | - vc=14.1=h0510ff6_4 36 | - vs2015_runtime=14.16.27012=hf0eaf9b_1 37 | - wheel=0.34.2=py37_0 38 | - wincertstore=0.2=py37_0 39 | - wrapt=1.12.1=py37he774522_1 40 | - pip: 41 | - censusdata==1.7 42 | - chardet==3.0.4 43 | - ibm-cloud-sdk-core==1.5.1 44 | - ibm-watson==4.4.1 45 | - idna==2.9 46 | - pyjwt==1.7.1 47 | - requests==2.23.0 48 | - urllib3==1.25.9 49 | - websocket-client==0.48.0 50 | prefix: C:\Miniconda\envs\pbi 51 | 52 | --------------------------------------------------------------------------------