├── config.txt
├── logs
└── .gitignore
├── run.sh
├── .gitignore
├── favicon.png
├── world_map.png
├── functions
├── strip.R
├── form.R
├── plot.R
├── crawl.R
└── get.R
├── LICENSE
├── sandbox.R
├── _ui.R
├── README.md
├── _server.R
├── _daemon.R
├── analysis.R
└── style.css
/config.txt:
--------------------------------------------------------------------------------
1 | port = 3030
--------------------------------------------------------------------------------
/logs/.gitignore:
--------------------------------------------------------------------------------
1 | *
2 | !.gitignore
--------------------------------------------------------------------------------
/run.sh:
--------------------------------------------------------------------------------
1 | Rscript --vanilla _daemon.R
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store
2 | maps/
3 | tables/
4 | graphs/
5 | _secret.R
--------------------------------------------------------------------------------
/favicon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hxrts/spider/HEAD/favicon.png
--------------------------------------------------------------------------------
/world_map.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hxrts/spider/HEAD/world_map.png
--------------------------------------------------------------------------------
/functions/strip.R:
--------------------------------------------------------------------------------
1 |
2 |
3 | message(' - strip')
4 |
5 |
6 | strip <- function(string) { gsub('^\\s+|\\s+$', '', string) }
7 |
8 |
--------------------------------------------------------------------------------
/functions/form.R:
--------------------------------------------------------------------------------
1 |
2 |
3 | message(' - form')
4 |
5 |
6 | FormArrows <- function(reply) { # format reply as source -> target table
7 |
8 | reply %>%
9 | filter(hierarchy != 'identity') %>%
10 | mutate(source = ifelse(hierarchy == 'child', query, slug)) %>%
11 | mutate(target = ifelse(hierarchy == 'child', slug, query)) %>%
12 | select(source, target) %>%
13 | unique
14 |
15 | }
16 |
17 |
18 | FormObjects <- function(reply) { # format reply as channel metadata table
19 |
20 | reply %>%
21 | select(title, slug, user.name, user.slug, length, type = status)
22 |
23 | }
24 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2016 Sam Hart
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/sandbox.R:
--------------------------------------------------------------------------------
1 |
2 |
3 | Server <- shinyServer(function(input, output, session) {
4 |
5 | # vis function
6 | Net <- function() { visNetwork(
7 | nodes = data.frame(id = 1:3),
8 | edges = data.frame(from = c(1,2), to = c(1,3))
9 | )}
10 |
11 | # output
12 | output$network_proxy <- renderVisNetwork({ Net() })
13 |
14 |
15 | # # observe
16 | # observer <- observe({
17 |
18 | # input$getAll
19 |
20 | # visNetworkProxy("network_proxy") %>%
21 | # visGetEdges()
22 |
23 | # visNetworkProxy("network_proxy") %>%
24 | # visGetNodes()
25 |
26 | # print(input$network_proxy_edges)
27 |
28 | # print(input$network_proxy_nodes)
29 |
30 | # })
31 |
32 |
33 | # # output
34 | # output$edges_data_from_shiny <- renderPrint({
35 | # input$network_proxy_edges
36 | # })
37 |
38 | # output$nodes_data_from_shiny <- renderPrint({
39 | # input$network_proxy_nodes
40 | # })
41 |
42 |
43 | # # cleanup
44 | # session$onSessionEnded(function() {
45 | # observer$suspend()
46 | # })
47 |
48 |
49 | })
50 |
51 |
52 |
53 | UI <- shinyUI(fluidPage(
54 | actionButton("getAll", "fetch"),
55 | visNetworkOutput("network_proxy", height = "400px"),
56 | verbatimTextOutput("edges_data_from_shiny"),
57 | verbatimTextOutput("nodes_data_from_shiny")
58 | ))
59 |
60 |
61 |
62 | Test <- function(){ shinyApp(
63 | UI, Server
64 | )}
65 |
66 |
67 |
68 |
69 | #'test' %>% str_c('https://api.are.na/v2/channels/', ., '/thumb') %>% fromJSON
--------------------------------------------------------------------------------
/_ui.R:
--------------------------------------------------------------------------------
1 |
2 |
3 | message('|-- ui')
4 |
5 |
6 | UI <- function(origin, pop, direction, up.initial, depth, type) {
7 |
8 | fluidPage(
9 |
10 | includeCSS('style.css'),
11 | headerPanel('spider'),
12 | fluidRow(
13 |
14 | tags$head(tags$link(rel = 'shortcut icon', href = 'https://rawgit.com/hxrts/spider/master/favicon.png')),
15 |
16 | column(6, wellPanel(
17 | textInput(
18 | inputId = 'origin',
19 | label = 'Origin channel ID · Are.na/sam-hart/research-tactics',
20 | value = origin,
21 | placeholder = origin))),
22 |
23 | column(6, wellPanel(textInput(
24 | inputId = 'pop',
25 | label = 'Prune channels · IDs comma separated',
26 | value = pop,
27 | placeholder = pop))),
28 |
29 | column(2, wellPanel(selectInput(
30 | inputId = 'direction',
31 | label = 'Direction',
32 | choices = c('down', 'up', 'down & up'),
33 | multiple = FALSE))),
34 |
35 | column(2, wellPanel(selectInput(
36 | inputId = 'up.initial',
37 | label = 'Origin crawl up',
38 | choices = c('yes', 'no'),
39 | multiple = FALSE))),
40 |
41 | column(2, wellPanel(selectInput(
42 | inputId = 'depth',
43 | label = 'Depth', choices = 1:4,
44 | multiple = FALSE))),
45 |
46 | column(2, wellPanel(selectInput(
47 | inputId = 'type',
48 | label = 'Type',
49 | choices = c('all', 'public', 'closed'),
50 | multiple = FALSE))),
51 |
52 | column(2, wellPanel(selectInput(
53 | inputId = 'private',
54 | label = 'Include private',
55 | choices = c('yes', 'no'),
56 | multiple = FALSE))),
57 |
58 | column(1, wellPanel(actionButton('buildGraph', 'Build'))),
59 |
60 | column(1, wellPanel(actionButton('clearGraph', 'Clear'))),
61 |
62 | column(12, visNetworkOutput('network_proxy', width = '100%', height = '1200px'))
63 | )
64 | )
65 | }
66 |
67 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Are.na Spider
2 | Crawl connected Are.na channels and visualize the resulting network.
3 |
4 |
5 |
6 | ## Setup
7 |
8 | Working prototype, but there are a couple setup steps for first time use:
9 |
10 | - install [XQuartz](https://www.xquartz.org/) if you don't yet have it
11 | - install [R](https://www.r-project.org/)
12 | - clone the repo, move into app directory, set permissions for run script, and start R environment
13 |
14 | ```> git clone git@github.com:hxrts/spider.git && cd spider && chmod +x run.sh && R```
15 |
16 | - from R, install the pacman package manager and import remaining code to install libraries as needed, exit upon completion
17 |
18 | ```> install.packages('pacman') ; source('_daemon.R') ; q(save = 'no')```
19 |
20 | - in the root directory create a file called ```_secret.R```
21 | - register your app on [dev.are.na](https://dev.are.na/oauth/applications/new) and copy your personal access token.
22 | - edit ```_secret.R``` by pasting ```token = 'personal-access-token'``` and adding the token from [dev.are.na](https://dev.are.na)
23 |
24 |
25 | ## Run
26 |
27 | - [optional] specify local port in config.txt file
28 | - from spider directory run startup script ```> sh run.sh```
29 | - point your browser to the address displayed
30 |
31 | ## Usage
32 |
33 | Pick an origin channel and choose crawling parameters as desired. Clear the window after your search or add a new query to the existing graph.
34 |
35 | Note: The scraping is rather slow, especially graphs of high degree, you can watch crawling progrss in the command prompt. Please be patient.
36 |
37 |
38 | ## Stop
39 |
40 | - ```ctrl + c``` halts app and returns command prompt
41 |
42 | ## World Maps
43 |
44 | Add interesting results to the channel [World Maps](https://www.are.na/sam-hart/world-maps) with some information about the parameters used to generate the plot.
45 |
46 | ## Known Bugs
47 |
48 | I've run into issues building graphs with overlapping edges without clearing the previous build, especially working from the initial graph. If the app encounters an error the screen will becomed greyed out, refreshing the window should re-initialize.
49 |
--------------------------------------------------------------------------------
/functions/plot.R:
--------------------------------------------------------------------------------
1 |
2 |
3 | message(' - plot')
4 |
5 |
6 | PlotGraph <- function(objects, arrows, origin) {
7 | visNetwork(
8 | objects %>% unique,
9 | arrows %>% unique,
10 | # main = list(text = 'Are.na / ',
11 | # style = 'display: inline-block;
12 | # font-family: Arial;
13 | # color: #9d9d9d;
14 | # font-weight: bold;
15 | # font-size: 20px;
16 | # text-align: left'),
17 | # submain = list(text = str_c('',
19 | # objects %>% filter(id == origin) %$% label,
20 | # '
'),
21 | # style = 'margin-left: 0.3em;
22 | # display: inline;
23 | # font-family: Arial;
24 | # font-weight: bold;
25 | # font-size: 20px;
26 | # text-align: left'),
27 | width = '100%',
28 | height = '800px'
29 | ) %>%
30 | visLayout(improvedLayout = TRUE) %>%
31 | # visIgraphLayout(
32 | # #layout = 'layout_nicely',
33 | # layout = 'layout_with_fr',
34 | # physics = FALSE,
35 | # smooth = FALSE,
36 | # type = 'full',
37 | # randomSeed = NULL,
38 | # layoutMatrix = NULL
39 | # ) %>%
40 | visNodes(
41 | font = '20px arial #333',
42 | shape = 'square',
43 | shadow = FALSE,
44 | size = 10,
45 | scaling = list(min = 3, max = 30),
46 | borderWidthSelected = 0,
47 | labelHighlightBold = TRUE
48 | ) %>%
49 | visEdges(
50 | shadow = FALSE,
51 | arrows = list(to = list(enabled = TRUE, scaleFactor = 0.5)),
52 | selectionWidth = 1,
53 | hoverWidth = 1
54 | ) %>%
55 | visOptions(
56 | selectedBy = list(variable = 'user'),
57 | highlightNearest = list(enabled = TRUE,
58 | degree = 1,
59 | hover = TRUE),
60 | nodesIdSelection = list(useLabels = TRUE)
61 | ) %>%
62 | visInteraction(
63 | tooltipStyle =
64 | 'position: fixed;
65 | visibility: hidden;
66 | padding: 8px 12px 1px 12px;
67 | white-space: nowrap;
68 | font-family: Arial;
69 | font-size: 14px;
70 | color: #333;
71 | background-color: white;
72 | webkit-border-radius: 0;
73 | border-radius: 0;
74 | border: solid 2px #cbcbcb',
75 | hover = TRUE)
76 | }
77 |
78 |
--------------------------------------------------------------------------------
/_server.R:
--------------------------------------------------------------------------------
1 |
2 |
3 | message('|-- server')
4 |
5 |
6 | Server <- shiny::shinyServer(function(input, output, session) {
7 |
8 | # initialize variables
9 | old.nodes = old.edges = NULL
10 |
11 | # system time
12 | log.file <- str_c('logs/record_', format(Sys.time(), "%d-%m-%y"), '.txt')
13 |
14 |
15 | # output
16 | output$network_proxy <- renderVisNetwork({
17 |
18 | map <- Crawl(origin, depth, direction, type, private = NULL, up.initial, pop)
19 | PlotGraph(map$objects, map$arrows, map$origin)
20 |
21 | })
22 |
23 |
24 | # observe
25 | build <- observe({
26 |
27 | # listener
28 | input$buildGraph
29 |
30 | isolate(if(input$direction == 'down') {
31 | direction = 1
32 | } else if(isolate(input$direction) == 'down & up') {
33 | direction = 0
34 | } else {
35 | direction = -1
36 | })
37 |
38 | isolate(if(input$private == 'yes' & input$type == 'all') {
39 | private = token
40 | } else {
41 | private = NULL
42 | })
43 |
44 | isolate(if(isolate(input$up.initial) == 'yes') {
45 | up.initial = 1
46 | } else {
47 | up.initial = 0
48 | })
49 |
50 | depth <- isolate(as.numeric(input$depth))
51 |
52 |
53 | isolate(if(input$buildGraph) {
54 |
55 | map <- isolate(Crawl(input$origin, depth, direction, input$type, private, up.initial, input$pop))
56 |
57 | visNetworkProxy('network_proxy') %>%
58 | visUpdateNodes(map$objects %>% unique) %>%
59 | visUpdateEdges(map$arrows %>% unique) %>%
60 | visLayout(improvedLayout = TRUE)
61 |
62 | })
63 |
64 | visNetworkProxy("network_proxy") %>%
65 | visGetEdges()
66 |
67 | visNetworkProxy("network_proxy") %>%
68 | visGetNodes()
69 |
70 | isolate(cat(input$origin, '\n', file = log.file, append = TRUE))
71 |
72 | })
73 |
74 | clear <- observe({
75 |
76 | input$clearGraph
77 |
78 | isolate(if(input$clearGraph) {
79 |
80 | if(!is.null(input$network_proxy_edges) & !is.null(input$network_proxy_nodes)) {
81 |
82 | old.edges <-
83 | input$network_proxy_edges %>%
84 | map(~ { dplyr::as_data_frame(rbind(unlist(.x))) }) %>%
85 | bind_rows
86 |
87 | old.nodes <-
88 | input$network_proxy_nodes %>%
89 | map(~ { dplyr::as_data_frame(rbind(unlist(.x))) }) %>%
90 | bind_rows
91 |
92 | visNetworkProxy('network_proxy') %>%
93 | visRemoveNodes(old.nodes$id) %>%
94 | visRemoveEdges(old.edges$id)
95 |
96 | }
97 |
98 | })
99 |
100 | })
101 |
102 | # cleanup
103 | session$onSessionEnded(function() {
104 | build$suspend()
105 | clear$suspend()
106 | unlink(log.file)
107 | })
108 |
109 | })
110 |
--------------------------------------------------------------------------------
/_daemon.R:
--------------------------------------------------------------------------------
1 |
2 |
3 | SourceDir <- function (path, pattern = '\\.[rR]$', env = NULL, chdir = TRUE)
4 |
5 | message('\n|-- functions')
6 |
7 | source('functions/strip.R')
8 | source('functions/crawl.R')
9 | source('functions/form.R')
10 | source('functions/get.R')
11 | source('functions/plot.R')
12 |
13 | if(file.exists('_secret.R')) { source('_secret.R') }
14 | source('_server.R')
15 | source('_ui.R')
16 |
17 |
18 | #-----------------------
19 | message('|-- libraries')
20 | #-----------------------
21 |
22 |
23 | if(!'shiny' %in% rownames(installed.packages())) {
24 | install.packages(shiny)
25 | }
26 |
27 | pacman::p_load(dplyr, readr, tidyr, stringr, magrittr, purrr, rlist, httr, jsonlite, visNetwork, igraph, shiny)
28 |
29 |
30 | #--------------------
31 | message('|-- daemon')
32 | #--------------------
33 |
34 |
35 | Spider <- function(port = 3030){ shinyApp(
36 |
37 | UI(origin, pop, direction, up.initial, depth, type),
38 | Server,
39 | options = list(port = port)
40 |
41 | )}
42 |
43 |
44 | #------------------------------
45 | message('|-- initialization\n')
46 | #------------------------------
47 |
48 |
49 | # start parameters
50 | origin = 'research-tactics'
51 | direction = 1
52 | depth = 1
53 | type = 'all'
54 | seed = 0
55 | up.initial = 1
56 | pop = 'superchannels, channels-categorizing-other-channels, root, looseleaf, dhr-quick-reference, primer'
57 |
58 | set.seed(seed)
59 |
60 |
61 | # palette
62 | colors <- c(
63 | '#d99aac', '#ffa9c0', '#e2979f', '#ffc6bf', '#ffb18f',
64 | '#d3a088', '#efb17b', '#ffcc95', '#bba98d', '#ffe2a8',
65 | '#fff9d3', '#cfce81', '#a1b279', '#dcf3a7', '#ebffc3',
66 | '#b0d791', '#ccffd5', '#80b88e', '#b7ffd1', '#56bd9c',
67 | '#7deac8', '#74b8a7', '#3ec1ad', '#53d4c1', '#92ffef',
68 | '#affff3', '#6ef4e8', '#d1fef9', '#85f8ff', '#a5d1d6',
69 | '#47e2f6', '#02c0d8', '#a5f0ff', '#6ab6cb', '#4fdcff',
70 | '#94bcc9', '#82deff', '#b2d5e8', '#76b2dc', '#98d2ff',
71 | '#8ac7ff', '#75b1e8', '#b6a2d9', '#e3cdff', '#ffdeff',
72 | '#ca9dc8', '#edabe8', '#f09ccc', '#ffb7de', '#e4bfcb')
73 |
74 |
75 | # read in config
76 | config <- suppressMessages(read_delim('config.txt', delim = '=', col_names = FALSE))
77 |
78 |
79 | if(nrow(config) == 0) {
80 |
81 | message('config file empty, trying default port: 3030')
82 | port <- 3030
83 |
84 | } else {
85 | # parse config file
86 | params <-
87 | config %>%
88 | set_names(c('param', 'value')) %>%
89 | mutate_each(funs(strip))
90 |
91 | # get port value
92 | port <-
93 | params %>%
94 | filter(param == 'port') %>%
95 | slice(1) %$%
96 | value %>%
97 | as.numeric
98 |
99 | message(str_c('using specified port: ', port))
100 | }
101 |
102 | # run spider
103 | Spider(port = port)
104 |
105 |
--------------------------------------------------------------------------------
/functions/crawl.R:
--------------------------------------------------------------------------------
1 |
2 |
3 | message(' - crawl')
4 |
5 |
6 | Crawl <- function(origin, depth = 1, direction = 0, type = 'all', private = NULL, up.initial = TRUE, pop = '') { # main crawling loop
7 |
8 | pool = list(origin)
9 | spent = list()
10 | arrows = list()
11 | objects = list()
12 | exit = 0
13 |
14 | pop %<>% strsplit(',') %>% map(~{ strip(.x) }) %>% unlist %>% list.filter(!. %in% origin)
15 |
16 | if(length(pop) == 0) {
17 | pop = ''
18 | }
19 |
20 | # crawl recursion
21 | for(level in 1:depth) {
22 |
23 | level.msg <- str_c('\n[[ level ', level, ' ]]\n')
24 | message(level.msg)
25 |
26 | if(level == 1) {
27 | if(is.null(private)) {
28 | channel.status <- str_c('https://are.na/', origin[1]) %>% GET %>% http_status %$% category
29 | } else {
30 | channel.status <- str_c('https://api.are.na/v2/channels/', origin[1], '/contents') %>% GET(add_headers(Authorization = str_c('bearer ', private))) %>% http_status %$% category
31 | }
32 | if(channel.status != 'Success' | origin[1] %in% c('', 'explore', 'feed', 'tools', 'about', 'blog', 'pricing', 'import')) {
33 | message(str_c('no channel with the slug "', origin[1], '"'))
34 | pool = list('failure')
35 | depth = 1
36 | type = 'all'
37 | up.initial = FALSE
38 | }
39 | }
40 |
41 | if(level <= length(pool)) {
42 | if(pool[[level]] %>% length > 0) {
43 | if(up.initial == FALSE & depth == 1 & length(origin) == 1) {
44 |
45 | reply <-
46 | pool[[level]] %>%
47 | list.map(GetChannel(., 1, type, private)) %>%
48 | bind_rows %>%
49 | mutate(level = level)
50 |
51 | } else {
52 |
53 | queries <- split(pool[[level]], ceiling(seq_along(pool[[level]])/100))
54 |
55 | reply <- queries %>%
56 | map( ~ {
57 | sub.reply <-
58 | .x %>%
59 | list.map(GetChannel(., direction, type, private)) %>%
60 | bind_rows %>%
61 | mutate(level = level)
62 |
63 | if(level > 2) {
64 | message('\n-- pause --\n') ; Sys.sleep(5)
65 | }
66 |
67 | sub.reply
68 | }) %>%
69 | bind_rows
70 | }
71 |
72 | if(!is.null(pop)) {
73 | reply %<>% filter(!query %in% pop)
74 | }
75 |
76 | arrows[[level]] <- FormArrows(reply)
77 |
78 | objects[[level]] <- FormObjects(reply)
79 |
80 | spent[[level]] <- pool[[level]]
81 |
82 | pool[[level + 1]] <-
83 | arrows[[level]] %>%
84 | unlist %>%
85 | unname %>%
86 | unique %>%
87 | sort %>%
88 | list.filter(! . %in% unlist(spent))
89 | }
90 | }
91 | }
92 |
93 | web <- list(arrows = arrows %>% bind_rows %>% unique,
94 | objects = objects %>% bind_rows %>% unique)
95 |
96 | objects <-
97 | data_frame(id = web$objects$slug %>% unique) %>%
98 | left_join(web$objects, by = c('id' = 'slug')) %>%
99 | rename(label = title) %>%
100 | unique %>%
101 | group_by(id) %>%
102 | arrange(desc(length)) %>%
103 | slice(1) %>%
104 | ungroup %>%
105 | arrange(label)
106 |
107 | arrows <-
108 | web$arrows %>%
109 | mutate(id = str_c(source, '_', target)) %>%
110 | rename(from = source, to = target) %>%
111 | unique
112 |
113 | graph <- graph_from_data_frame(arrows, directed = TRUE, vertices = objects)
114 |
115 | objects$group <- walktrap.community(graph, steps = 6, modularity = TRUE) %>% membership
116 | objects$value <- betweenness(graph, directed = FALSE, normalized = TRUE)
117 |
118 | objects <-
119 | objects %>%
120 | mutate(color = sample(colors, 50)[objects$group]) %>%
121 | rename(user = user.name) %>%
122 | mutate(title = str_c(
123 | '
user: ', user, '
124 | channel: ', label, '
125 | id: ', id,'
126 | type: ', type,'
127 | blocks: ', length,'
128 | betweeness: ', round(value, 2),'