├── .gitignore ├── README.md ├── blog.R ├── diagnostics.R ├── documentation.R ├── downloads-sites-search.R ├── oauth-key.json ├── other-dimensions.R ├── overview.R ├── r4ds.R ├── search-terms.R ├── search-trends.R ├── searches.R ├── utils ├── analytics.R ├── auth.R └── search-console.R └── website-analytics.Rproj /.gitignore: -------------------------------------------------------------------------------- 1 | .httr-oauth 2 | sc.oauth 3 | .Rhistory 4 | .RData 5 | .Rproj.user 6 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # website-analytics 2 | 3 | 4 | 5 | 6 | Website analytics for the tidyverse, r-lib, and and a few others. To run this code, you'll need read permission for the tidyverse google analytics account (id 115082821) and the tidyverse search console account, which is usually only available to members of the tidyverse team at RStudio. 7 | 8 | -------------------------------------------------------------------------------- /blog.R: -------------------------------------------------------------------------------- 1 | library(tidyverse) 2 | library(lubridate) 3 | source("utils/analytics.R") 4 | source("utils/search-console.R") 5 | 6 | filter_blog <- filter_or( 7 | googleAnalyticsR::dim_filter("pagePath", "BEGINS_WITH", "/blog/"), 8 | googleAnalyticsR::dim_filter("pagePath", "BEGINS_WITH", "/articles/") 9 | ) 10 | 11 | search_queries("www.tidyverse.org/articles", duration = "1 year") 12 | 13 | by_page <- analytics(c("hostname", "pagePath"), dim_filters = filter_blog) 14 | 15 | by_page %>% 16 | filter(hostname == "www.tidyverse.org") %>% 17 | arrange(desc(sessions)) %>% 18 | print(n = 20) 19 | 20 | # Blog shows fundamentally different temporal pattern: 21 | by_week <- analytics_weekly("hostname", dim_filters = filter_blog) 22 | by_week %>% 23 | filter(hostname == "www.tidyverse.org") %>% 24 | ggplot(aes(week, sessions)) + 25 | geom_line() 26 | 27 | # Because the vast majority of blog posts decay expoentially after release 28 | by_week_page <- analytics_weekly(c("hostname", "pagePath"), dim_filters = filter_blog) 29 | 30 | posts <- by_week_page %>% 31 | filter(hostname == "www.tidyverse.org") %>% 32 | filter(!str_detect(pagePath, fixed("?"))) %>% 33 | separate(pagePath, c(NA, NA, "year", "month", "title"), "/", remove = FALSE) %>% 34 | filter(title != "") %>% 35 | mutate(label = str_glue("{title}\n{year}-{month}")) 36 | 37 | top_posts <- posts %>% 38 | group_by(pagePath) %>% 39 | filter(n() > 4) %>% 40 | summarise( 41 | sum = sum(sessions), 42 | mean = mean(sessions), 43 | max = max(sessions), 44 | sd = sd(sessions), 45 | n = n(), 46 | deriv = max(abs(diff(sessions) / diff(as.numeric(week)))) 47 | ) 48 | 49 | # Max 50 | posts %>% 51 | semi_join(top_posts %>% arrange(desc(max)) %>% head(12), by = "pagePath") %>% 52 | mutate(label = fct_reorder(label, sessions, max)) %>% 53 | ggplot(aes(week, sessions, group = pagePath)) + 54 | geom_line() + 55 | facet_wrap(~ label) 56 | 57 | # Sum 58 | posts %>% 59 | semi_join(top_posts %>% arrange(desc(sum)) %>% head(12), by = "pagePath") %>% 60 | mutate(label = fct_reorder(label, sessions, sum)) %>% 61 | ggplot(aes(week, sessions, group = pagePath)) + 62 | geom_line() + 63 | facet_wrap(~ label) 64 | 65 | # Not sure what we want here? Maybe top articles for last month? 66 | # Top articles in last year? 67 | 68 | search_queries("www.tidyverse.org/articles/2017/12/workflow-vs-script/", duration = "1 year") 69 | # call `rlang::last_error()` to see a backtrace 70 | search_queries("www.tidyverse.org/articles/2018/10/rlang-0-3-0/", duration = "1 year") 71 | # scales.r-lib.org is not doing well on search 72 | search_queries("www.tidyverse.org/articles/2018/08/scales-1-0-0/", duration = "1 year") 73 | -------------------------------------------------------------------------------- /diagnostics.R: -------------------------------------------------------------------------------- 1 | library(tidyverse) 2 | library(lubridate) 3 | source("utils/analytics.R") 4 | 5 | # Diagnostics to check if we have accidentally broken google analytics 6 | # on any existing sites 7 | 8 | by_site <- analytics_weekly( 9 | from = today() - weeks(12), 10 | dimensions = "hostname") 11 | ) 12 | 13 | # sanity check is_tidyverse() function 14 | by_site %>% 15 | count(hostname, wt = sessions, sort = TRUE) %>% 16 | filter(!is_tidyverse(hostname)) 17 | 18 | by_site %>% 19 | filter(is_tidyverse(hostname)) %>% 20 | count(week) %>% 21 | ggplot(aes(week, n)) + 22 | geom_line() + 23 | geom_point() + 24 | labs( 25 | title = "GA usage", 26 | x = NULL, 27 | y = "Sites" 28 | ) 29 | 30 | missing_records <- by_site %>% 31 | filter(is_tidyverse(hostname)) %>% 32 | complete(week, hostname) %>% 33 | group_by(hostname) %>% 34 | summarise(n_missing = sum(is.na(users))) %>% 35 | filter(n_missing > 0) 36 | 37 | # What sites have changed their inclusion status? 38 | by_site %>% 39 | semi_join(missing_records) %>% 40 | ggplot(aes(week, fct_rev(fct_infreq(hostname)))) + 41 | geom_tile() + 42 | scale_x_date(NULL, expand = c(0, 0), minor_breaks = "week") + 43 | scale_y_discrete("Site", expand = c(0, 0)) + 44 | labs( 45 | title = "Usage by site", 46 | x = NULL, 47 | y = NULL 48 | ) 49 | 50 | daily <- analytics(c("hostname", "date")) 51 | daily %>% 52 | filter(is_tidyverse(hostname)) %>% 53 | semi_join(missing_records, by = "hostname") %>% 54 | complete(hostname, date, fill = list(users = 0, sessions = 0)) %>% 55 | ggplot(aes(date, users)) + 56 | geom_line() + 57 | facet_wrap(~ hostname, scales = "free_y") 58 | -------------------------------------------------------------------------------- /documentation.R: -------------------------------------------------------------------------------- 1 | library(tidyverse) 2 | library(lubridate) 3 | library(shiny) 4 | source("utils/analytics.R") 5 | source("utils/search-console.R") 6 | 7 | # Documentation and articles ------------------------------------------------- 8 | 9 | filter_docs <- filter_or( 10 | googleAnalyticsR::dim_filter("pagePath", "BEGINS_WITH", "/reference/"), 11 | googleAnalyticsR::dim_filter("pagePath", "BEGINS_WITH", "/articles/") 12 | ) 13 | 14 | topics_raw <- analytics(c("hostname", "pagePath"), dim_filters = filter_docs) 15 | topics <- topics_raw %>% 16 | filter(is_tidyverse(hostname)) %>% 17 | mutate(package = package_name(hostname), hostname = NULL) %>% 18 | filter(package != "www") %>% 19 | select(package, pagePath, sessions, users) %>% 20 | arrange(desc(sessions)) 21 | 22 | topics 23 | 24 | topics %>% 25 | filter(package == "dplyr", sessions > 500) %>% 26 | print(n = 20) 27 | 28 | search_queries("dplyr.tidyverse.org/reference/join.html") %>% group_terms() 29 | 30 | topics %>% 31 | filter(sessions > 100) %>% 32 | group_by(package) %>% 33 | filter(row_number() <= 10) %>% 34 | arrange(.by_group = TRUE) %>% 35 | print(n = Inf) 36 | 37 | 38 | # Weekly trends ----------------------------------------------------------- 39 | # Not really sure what we should be looking for here 40 | 41 | by_week <- analytics_weekly(c("hostname", "pagePath"), dim_filters = filter_docs) 42 | pages <- by_week %>% 43 | filter(is_tidyverse(hostname)) %>% 44 | mutate(package = package_name(hostname), hostname = NULL) %>% 45 | filter(package != "www") %>% 46 | extract(pagePath, c("type", "topic"), "/(.*)/(.*)\\.html", remove = FALSE) %>% 47 | mutate(label = str_glue("{package}::{topic}")) %>% 48 | filter(!is.na(type)) %>% 49 | group_by(label, week) %>% 50 | summarise(sessions = sum(sessions), users = sum(users)) %>% 51 | filter(mean(sessions) > 100) %>% 52 | ungroup() %>% 53 | complete(label, week, fill = list(sessions = 1, users = 1)) 54 | 55 | ui <- fluidPage( 56 | sidebarLayout( 57 | sidebarPanel( 58 | selectInput("docs", "docs", choices = unique(pages$label), multiple = TRUE) 59 | ), 60 | mainPanel( 61 | plotOutput("trend", height = "600px") 62 | ) 63 | ) 64 | ) 65 | server <- function(input, output, session) { 66 | selected <- reactive({ 67 | req(input$docs) 68 | filter(pages, label %in% input$docs) 69 | }) 70 | 71 | output$trend <- renderPlot({ 72 | selected() %>% 73 | mutate(label = fct_reorder2(label, week, sessions)) %>% 74 | ggplot(aes(week, sessions, colour = label)) + 75 | geom_line() + 76 | scale_y_log10() + 77 | theme_grey(16) 78 | }) 79 | } 80 | shinyApp(ui, server) 81 | -------------------------------------------------------------------------------- /downloads-sites-search.R: -------------------------------------------------------------------------------- 1 | library(tidyverse) 2 | library(lubridate) 3 | source("api-analytics.R") 4 | library(cranlogs) 5 | 6 | date_end <- today() - days(3) 7 | date_start <- date_end - days(30) 8 | 9 | # Google analytics -------------------------------------------------------- 10 | 11 | daily <- ga_get( 12 | from = date_start, 13 | to = date_end, 14 | metrics = c("hits", "sessions", "users"), 15 | dimensions = c("date", "hostname"), 16 | ) 17 | 18 | sites <- daily %>% 19 | extract(hostname, "package", "^(.*)\\.tidyverse\\.org$", remove = FALSE) %>% 20 | distinct(hostname, package) %>% 21 | filter(!is.na(package)) 22 | 23 | package_hits <- daily %>% 24 | inner_join(sites, by = "hostname") %>% 25 | select(date, package, site_hits = hits, site_users = users, site_sessions = sessions) 26 | 27 | # Google searches --------------------------------------------------------- 28 | 29 | site_searches <- function(url) { 30 | filter <- paste0("page~~", url) 31 | sc_get(dimension = "date", filter = filter, from = date_start, to = date_end) 32 | } 33 | 34 | searches_raw <- sites$hostname %>% set_names() %>% map_dfr(site_searches, .id = "hostname") 35 | 36 | package_searches <- searches_raw %>% 37 | inner_join(sites, by = "hostname") %>% 38 | select(date, package, search_clicks = clicks, search_views = impressions) 39 | 40 | # Package downloads ------------------------------------------------------- 41 | 42 | downloads <- cran_downloads(sites$package, from = date_start, to = date_end) 43 | 44 | package_downloads <- downloads %>% 45 | as_tibble() %>% 46 | select(date, package, downloads = count) 47 | 48 | # All together ------------------------------------------------------------ 49 | 50 | anti_join(package_hits, package_downloads) 51 | anti_join(package_downloads, package_hits) 52 | 53 | package_info <- package_hits %>% 54 | left_join(package_searches, by = c("date", "package")) %>% 55 | left_join(package_downloads, by = c("date", "package")) 56 | 57 | package_sum <- package_info %>% 58 | group_by(package) %>% 59 | summarise_if(is.numeric, sum) %>% 60 | filter(downloads != 0, package != "blob") %>% 61 | mutate( 62 | prop_from_search = search_clicks / site_hits, 63 | users_per_download = site_users / downloads 64 | ) 65 | 66 | View(package_sum) 67 | 68 | package_sum %>% 69 | filter(package != "googledrive") %>% 70 | ggplot(aes(downloads, site_users)) + 71 | geom_point() + 72 | scale_x_log10() + 73 | scale_y_log10() 74 | 75 | 76 | 77 | package_sum %>% filter(downloads < 20000) %>% pull(package) 78 | 79 | package_sum %>% 80 | arrange(desc(prop_from_search)) %>% 81 | select(package, search_clicks, site_hits, prop_from_search) %>% 82 | print(n = Inf) 83 | 84 | sc_get_queries("rvest.tidyverse.org") 85 | sc_get_queries("glue.tidyverse.org") 86 | -------------------------------------------------------------------------------- /oauth-key.json: -------------------------------------------------------------------------------- 1 | {"installed":{"client_id":"274865854847-10ebkukmiuoelv4j9e26lkn1sj0ij86m.apps.googleusercontent.com","project_id":"tidyverse-analysis","auth_uri":"https://accounts.google.com/o/oauth2/auth","token_uri":"https://oauth2.googleapis.com/token","auth_provider_x509_cert_url":"https://www.googleapis.com/oauth2/v1/certs","client_secret":"HyMLe1btTvDZoUUMdGULMrtS","redirect_uris":["urn:ietf:wg:oauth:2.0:oob","http://localhost"]}} -------------------------------------------------------------------------------- /other-dimensions.R: -------------------------------------------------------------------------------- 1 | library(tidyverse) 2 | library(lubridate) 3 | source("utils/analytics.R") 4 | 5 | os <- analytics_weekly("operatingSystem") 6 | os %>% 7 | group_by(week, os = fct_lump(operatingSystem, 5, w = sessions)) %>% 8 | summarise(users = sum(users), sessions = sum(sessions)) %>% 9 | mutate(prop = users / sum(users)) %>% 10 | ggplot(aes(week, prop, colour = fct_reorder2(os, week, prop))) + 11 | geom_line() + 12 | labs(colour = "OS") 13 | 14 | country <- analytics_weekly("country") 15 | country %>% 16 | group_by(week, country = fct_lump(country, 8, w = sessions)) %>% 17 | summarise(users = sum(users), sessions = sum(sessions)) %>% 18 | mutate(prop = users / sum(users)) %>% 19 | ggplot(aes(week, sessions, colour = fct_reorder2(country, week, sessions))) + 20 | geom_line() + 21 | labs(colour = NULL) + 22 | scale_y_log10(labels = scales::comma) + 23 | scale_colour_brewer(palette = "Set1") 24 | 25 | language <- analytics_weekly("language") 26 | language %>% 27 | group_by(week, language = fct_lump(language, 8, w = sessions)) %>% 28 | summarise(users = sum(users), sessions = sum(sessions)) %>% 29 | mutate(prop = users / sum(users)) %>% 30 | ggplot(aes(week, sessions, colour = fct_reorder2(language, week, sessions))) + 31 | geom_line() + 32 | labs(colour = NULL) + 33 | scale_y_log10(labels = scales::comma) + 34 | scale_colour_brewer(palette = "Set1") 35 | 36 | 37 | gender <- analytics_weekly("userGender") 38 | -------------------------------------------------------------------------------- /overview.R: -------------------------------------------------------------------------------- 1 | library(tidyverse) 2 | library(lubridate) 3 | source("utils/analytics.R") 4 | 5 | # Rough overview of all traffic, broken down by date and site 6 | 7 | # Total ------------------------------------------------------------------- 8 | 9 | analytics() 10 | analytics(from = today() - 365, to = today()) 11 | 12 | # Weekly users/sessions --------------------------------------------------- 13 | 14 | weekly <- analytics_weekly() 15 | weekly %>% 16 | head(-1) %>% 17 | pivot_longer(users:sessions, names_to = "var", values_to = "n") %>% 18 | ggplot(aes(week, n, colour = var)) + 19 | scale_y_continuous(labels = scales::comma) + 20 | geom_line() + 21 | labs( 22 | title = "Weekly tidyverse website stats", 23 | x = NULL, 24 | y = NULL, 25 | colour = NULL 26 | ) + 27 | theme(legend.position = "bottom") 28 | 29 | # Dangerous to read too much into this plot as it's taken some time to 30 | # get all major sites using google analytics. 31 | # * tidytemplate accientally conditioned out analytics on Sep 3, and 32 | # wasn't fully restored until Nov 10. 33 | 34 | # By site ----------------------------------------------------------------- 35 | 36 | by_site <- analytics("hostname") 37 | by_site %>% 38 | filter(is_tidyverse(hostname)) %>% 39 | ggplot(aes(fct_reorder(hostname, users), users)) + 40 | geom_point() + 41 | scale_y_log10( 42 | breaks = scales::log_breaks(n = 6), 43 | labels = scales::comma, minor_breaks = 10 44 | ) + 45 | coord_flip() + 46 | labs( 47 | title = "Last month", 48 | x = NULL, 49 | y = "Total sessions (log scale)" 50 | ) 51 | 52 | site_weekly <- analytics_weekly("hostname") 53 | 54 | # Weekly sessions for top sites 55 | site_weekly %>% 56 | filter(is_tidyverse(hostname)) %>% 57 | lump_var(hostname) %>% 58 | ggplot(aes(week, sessions, colour = hostname)) + 59 | geom_line(size = 1) + 60 | labs(title = "Weekly sessions", x = NULL, y = "Sessions", colour = NULL) + 61 | scale_y_continuous(labels = scales::comma) + 62 | scale_colour_brewer(palette = "Set1") + 63 | theme(legend.position = "right", legend.justification = "top") 64 | 65 | site_sessions <- function(hostname) { 66 | site_weekly %>% 67 | filter(.data$hostname %in% c(.env$hostname)) %>% 68 | ggplot(aes(week, sessions, colour = fct_reorder2(hostname, week, sessions))) + 69 | geom_line(size = 1) + 70 | scale_x_date(NULL, date_breaks = "1 month", date_labels = "%b") + 71 | scale_y_continuous(labels = scales::comma) + 72 | labs( 73 | title = "Sessions per week", 74 | x = NULL, 75 | y = "Sessions", 76 | colour = NULL 77 | ) + 78 | scale_colour_brewer(palette = "Set1") + 79 | theme(legend.position = "bottom") 80 | } 81 | site_sessions(c("dplyr.tidyverse.org", "r4ds.had.co.nz", "ggplot2.tidyverse.org")) 82 | site_sessions(c("readxl.tidyverse.org", "readr.tidyverse.org", "haven.tidyverse.org", "vroom.r-lib.org")) 83 | site_sessions(c("tidyr.tidyverse.org", "stringr.tidyverse.org", "purrr.tidyverse.org")) 84 | site_sessions(c("lubridate.tidyverse.org", "forcats.tidyverse.org", "tibble.tidyverse.org")) 85 | site_sessions(c("httr.r-lib.org", "rvest.tidyverse.org", "xml2.r-lib.org")) 86 | site_sessions(c("magrittr.tidyverse.org", "glue.tidyverse.org", "reprex.tidyverse.org")) 87 | site_sessions(c("usethis.r-lib.org", "roxygen2.r-lib.org", "testthat.r-lib.org", "devtools.r-lib.org", "pkgdown.r-lib.org")) 88 | -------------------------------------------------------------------------------- /r4ds.R: -------------------------------------------------------------------------------- 1 | library(tidyverse) 2 | library(lubridate) 3 | source("utils/analytics.R") 4 | 5 | filter_r4ds <- filter_or( 6 | googleAnalyticsR::dim_filter("hostname", "EXACT", "r4ds.had.co.nz") 7 | ) 8 | 9 | analytics(dim_filters = filter_r4ds) 10 | 11 | by_chapter <- analytics("pagePath", dim_filters = filter_r4ds) 12 | by_chapter %>% 13 | arrange(desc(sessions)) %>% 14 | print(n = 20) 15 | 16 | # Trimming anchors and query strings doesn't change overall numbers that much 17 | collapsed <- by_chapter %>% 18 | mutate(pagePath = str_replace(pagePath, "\\?.*$", "")) %>% 19 | mutate(pagePath = str_replace(pagePath, "\\#.*$", "")) %>% 20 | mutate(pagePath = str_replace(pagePath, ".html?$", "")) %>% 21 | mutate(pagePath = str_replace(pagePath, "/index$", "/")) %>% 22 | count(pagePath, wt = users, sort = TRUE) 23 | 24 | # Look at patterns over time: 25 | # there's not much going on (unsurprisingly) 26 | by_chapter_week <- analytics_weekly("pagePath", dim_filters = filter_r4ds) 27 | by_chapter_week %>% 28 | lump_var(pagePath, n = 15) %>% 29 | ggplot(aes(week, sessions)) + 30 | geom_line() + 31 | scale_y_log10() + 32 | facet_wrap(~ pagePath) 33 | -------------------------------------------------------------------------------- /search-terms.R: -------------------------------------------------------------------------------- 1 | library(tidyverse) 2 | source("utils/search-console.R") 3 | 4 | readxl <- sc_get_queries("readxl.tidyverse.org") %>% 5 | filter(clicks > 0) %>% 6 | group_terms() 7 | readxl 8 | 9 | readxl %>% 10 | ggplot(aes(ctr, wt = impressions)) + 11 | geom_histogram(binwidth = 0.02) 12 | 13 | # Code searches tend to have high ctr? 14 | readxl %>% filter(str_detect(query_a, "_")) 15 | 16 | # See original queries 17 | readxl %>% 18 | filter(query_a == "package readxl [r]") %>% 19 | pull(query) %>% 20 | .[[1]] 21 | 22 | # Other sites ------------------------------------------------------------- 23 | 24 | readr <- sc_get_queries("readr.tidyverse.org") %>% 25 | filter(clicks > 0) %>% 26 | group_terms() 27 | 28 | readr 29 | readr %>% filter(str_detect(query_a, "_")) 30 | -------------------------------------------------------------------------------- /search-trends.R: -------------------------------------------------------------------------------- 1 | library(tidyverse) 2 | # remotes::install_github("josiahparry/gtrendsR", "interest_refactor") 3 | library(gtrendsR) 4 | 5 | ob <- gtrends(c("ggplot2", "dplyr", "tidyverse")) 6 | df <- as_tibble(ob$interest_over_time) 7 | df %>% 8 | filter(hits > 0) %>% 9 | ggplot(aes(date, hits, colour = keyword, group = keyword)) + 10 | geom_line(colour = "grey80") + 11 | geom_smooth(se = FALSE) + 12 | scale_y_log10() + 13 | labs( 14 | title = "Google search trends", 15 | x = NULL, 16 | y = "Relative hits", 17 | colour = "Search term" 18 | ) 19 | -------------------------------------------------------------------------------- /searches.R: -------------------------------------------------------------------------------- 1 | library(lubridate) 2 | library(tidyverse) 3 | source("utils/search-console.R") 4 | 5 | sc_get("query") 6 | sc_get("date") 7 | sc_get("page") 8 | 9 | # How many people are finding tidyverse sites from google over the last year? 10 | over_time <- sc_get("date", duration = "1 year") 11 | over_time %>% summarise( 12 | impressions = sum(impressions), 13 | clicks = sum(clicks) 14 | ) %>% 15 | mutate( 16 | ctr = clicks / impressions, 17 | ) 18 | 19 | over_time %>% 20 | mutate( 21 | weekend = ifelse(wday(date, label = TRUE) %in% c("Sat", "Sun"), "weekend", "weekday"), 22 | ) %>% 23 | filter(position < 20) %>% 24 | ggplot(aes(date, position)) + 25 | geom_hline(yintercept = 12, size = 2, colour = "white") + 26 | geom_smooth(se = FALSE, size = 2, colour = "grey70") + 27 | geom_point() + 28 | facet_wrap(~ weekend, ncol = 1) + 29 | scale_y_reverse() + 30 | labs( 31 | title = "Search ranking across all tidyverse sites", 32 | y = "Position in search query", 33 | x = NULL 34 | ) 35 | 36 | 37 | over_time %>% 38 | group_by(date = floor_date(date, "week")) %>% 39 | summarise(clicks = sum(clicks), days = n()) %>% 40 | filter(days == 7) %>% 41 | ggplot(aes(date, clicks)) + 42 | geom_line() + 43 | geom_smooth(se = FALSE) + 44 | scale_y_continuous(labels = scales::comma) 45 | 46 | # How are people finding us? ---------------------------------------------- 47 | 48 | search_queries("stringr.tidyverse.org") 49 | search_queries("ggplot2.tidyverse.org") 50 | search_queries("stringr.tidyverse.org") 51 | search_queries("dplyr.tidyverse.org") 52 | 53 | search_queries("www.tidyverse.org/articles/") 54 | search_queries("www.tidyverse.org/articles/2017/12/workflow-vs-script/") 55 | 56 | search_queries("ggplot2.tidyverse.org/reference/geom_bar.html") 57 | search_pages("geom_bar") 58 | search_pages("ggplot2 bar chart") 59 | geom_bar <- search_queries("ggplot2.tidyverse.org/reference/geom_bar.html") 60 | 61 | geom_bar %>% 62 | mutate(geom_bar = str_detect(query, "geom_bar|geom bar")) %>% 63 | count(geom_bar, wt = clicks) %>% 64 | mutate(prop = n / sum(n)) 65 | 66 | geom_smooth <- search_queries("ggplot2.tidyverse.org/reference/geom_smooth.html") 67 | 68 | geom_smooth %>% 69 | mutate(geom_bar = str_detect(query, "geom_smooth|geom smooth")) %>% 70 | count(geom_bar, wt = clicks) %>% 71 | mutate(prop = n / sum(n)) 72 | -------------------------------------------------------------------------------- /utils/analytics.R: -------------------------------------------------------------------------------- 1 | source("utils/auth.R") 2 | 3 | is_tidyverse <- function(x) { 4 | str_detect(x, "\\.(tidyverse\\.org|r-lib\\.org|r-dbi\\.org|had.co.nz)$") 5 | } 6 | 7 | package_name <- function(x) { 8 | str_match(x, "(.*)\\.(tidyverse|r-lib|r-dbi)\\.org$")[, 2] 9 | } 10 | 11 | filter_or <- function(...) { 12 | googleAnalyticsR::filter_clause_ga4(operator = "OR", list(...)) 13 | } 14 | filter_and <- function(...) { 15 | googleAnalyticsR::filter_clause_ga4(operator = "AND", list(...)) 16 | } 17 | 18 | # google analytics -------------------------------------------------------- 19 | 20 | analytics <- function( 21 | dimensions = c(), ..., 22 | metrics = c("users", "sessions"), 23 | from = "31daysAgo", 24 | to = "yesterday") { 25 | out <- googleAnalyticsR::google_analytics( 26 | dimensions = dimensions, 27 | viewId = 170811407, 28 | metrics = metrics, 29 | ..., 30 | date_range = c(from, to), 31 | max = -1 32 | ) 33 | tibble::as_tibble(out) 34 | } 35 | 36 | # Get last year of data, starting and ending on a monday. 37 | analytics_weekly <- function(dimensions = c(), ..., from = NULL, to = today() - 1L) { 38 | # google analytics week starts on a Sunday 39 | sunday <- function(x) x - wday(x) + 1 40 | 41 | to <- sunday(to) - 1L # so ends on a Saturday 42 | if (is.null(from)) { 43 | from <- to - weeks(52) 44 | } else { 45 | from <- sunday(from) 46 | } 47 | 48 | dimensions <- c(dimensions, "year", "week") 49 | df <- analytics( 50 | ..., 51 | from = from, 52 | to = to, 53 | dimensions = dimensions 54 | ) 55 | df %>% 56 | mutate( 57 | week = make_date(as.numeric(year)) + weeks(as.numeric(week) - 1) - days(1), 58 | year = NULL 59 | ) 60 | } 61 | 62 | lump_var <- function(df, var, n = 8) { 63 | df %>% 64 | mutate( 65 | {{var}} := fct_reorder2(fct_lump({{var}}, n = n, w = sessions), week, sessions) 66 | ) %>% 67 | group_by(week, {{var}}) %>% 68 | summarise(sessions = sum(sessions), users = sum(users)) %>% 69 | ungroup() 70 | } 71 | -------------------------------------------------------------------------------- /utils/auth.R: -------------------------------------------------------------------------------- 1 | scopes <- c( 2 | "https://www.googleapis.com/auth/analytics", 3 | "https://www.googleapis.com/auth/webmasters" 4 | ) 5 | googleAuthR::gar_auth_configure(path = "oauth-key.json") 6 | googleAuthR::gar_auth(email = TRUE, scopes = scopes) 7 | -------------------------------------------------------------------------------- /utils/search-console.R: -------------------------------------------------------------------------------- 1 | source("utils/auth.R") 2 | 3 | # suppress broken check 4 | assignInNamespace("check.Url", function(url, ...) url, "searchConsoleR") 5 | 6 | # "today" in search console time is 3 days ago 7 | 8 | sc_get <- function(dimension, filter = NULL, to = today() - 3, duration = "1 month", row_limit = 100) { 9 | # dimension <- rlang::arg_match(dimension, c("query", "date", "country", "device", "page")) 10 | 11 | from <- to - as.period(duration) 12 | 13 | out <- suppressMessages(searchConsoleR::search_analytics( 14 | "sc-domain:tidyverse.org", 15 | dimensions = dimension, 16 | dimensionFilterExp = filter, 17 | startDate = from, 18 | endDate = to, 19 | rowLimit = row_limit 20 | )) 21 | 22 | tibble::as_tibble(out) 23 | } 24 | 25 | search_queries <- function(url, to = today() - 3, duration = "1 month") { 26 | filter <- paste0("page~~", url) 27 | sc_get(dimension = "query", filter = filter, to = to, duration = duration) 28 | } 29 | 30 | search_pages <- function(query, to = today() - 3, duration = "1 month") { 31 | filter <- paste0("query==", query) 32 | sc_get(dimension = "page", filter = filter, to = to, duration = duration) 33 | } 34 | 35 | alphabetise <- function(x) { 36 | r_match <- "\\b(in )?r\\b" 37 | has_r <- str_detect(x, r_match) 38 | 39 | out <- x %>% 40 | str_replace_all(r_match, "") %>% 41 | str_squish() %>% 42 | str_split(" ") %>% 43 | map(str_sort) %>% 44 | map_chr(str_flatten, " ") 45 | 46 | out[has_r] <- paste0(out[has_r], " [r]") 47 | out 48 | } 49 | 50 | group_terms <- function(df) { 51 | df %>% 52 | mutate(query_a = alphabetise(query)) %>% 53 | group_by(query_a) %>% 54 | summarise( 55 | query = list(query), 56 | clicks = sum(clicks), 57 | position = weighted.mean(position, impressions), 58 | impressions = sum(impressions), 59 | ) %>% 60 | select(query_a, query, clicks, impressions, position) %>% 61 | mutate(ctr = clicks / impressions) %>% 62 | arrange(desc(clicks)) 63 | } 64 | -------------------------------------------------------------------------------- /website-analytics.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: knitr 13 | LaTeX: XeLaTeX 14 | 15 | AutoAppendNewline: Yes 16 | StripTrailingWhitespace: Yes 17 | --------------------------------------------------------------------------------