├── README_files └── figure-gfm │ ├── main_plot-1.png │ └── pride_plot-1.png ├── README.md ├── scripts └── the_emotions_of_jane_austen.R └── README.Rmd /README_files/figure-gfm/main_plot-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/filmicaesthetic/JaneAustenStreamgraphs/HEAD/README_files/figure-gfm/main_plot-1.png -------------------------------------------------------------------------------- /README_files/figure-gfm/pride_plot-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/filmicaesthetic/JaneAustenStreamgraphs/HEAD/README_files/figure-gfm/pride_plot-1.png -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | The Emotions of Jane Austen 2 | ================ 3 | 4 | This personal project explores the literature of Jane Austen novels, 5 | provided by Julia Silge’s 6 | janeaustenr 7 | package in R, using the 8 | NRC 9 | Word-Emotion Association Lexicon by Saif Mohammad. The lexicon is 10 | used to identify words associated with a range of 8 emotions: joy, 11 | trust, surprise, anticipation, sadness, fear, anger, and disgust. 12 | 13 | The project aims to capture a visual interpretation of the emotional 14 | journey in each novel with a single visualisation. 15 | 16 | ![](README_files/figure-gfm/main_plot-1.png) 17 | 18 | ## Files 19 | 20 | ‘the_emotions_of_jane_austen.R’ : 21 | view 22 | code 23 | 24 | ## Process 25 | 26 | - Load the NRC Word-Emotion Association Lexicon and Jane Austen novels 27 | datasets and prepare for further analysis. 28 | - Count occurrences of words with an emotional association for each 29 | line of each book. 30 | - Group lines approximately into pages (30 lines) to help reduce noise 31 | in the final visualisation. 32 | - Identify the two emotions with strongest association for each group. 33 | - Plot the results for each novel in a streamgraph. 34 | - Create grid of all plots and bookshelf-style legend. 35 | -------------------------------------------------------------------------------- /scripts/the_emotions_of_jane_austen.R: -------------------------------------------------------------------------------- 1 | ########### 2 | # 3 | # Analysis of the emotion of language used in Jane Austen novels, 4 | # using the NRC Emotion Lexicon. 5 | # 6 | ########### 7 | 8 | # load packages 9 | library(plyr) 10 | library(stringr) 11 | library(data.table) 12 | library(ggplot2) 13 | library(ggstream) 14 | library(tidytext) 15 | library(janeaustenr) 16 | library(extrafont) 17 | library(showtext) 18 | library(dplyr) 19 | library(gridExtra) 20 | library(grid) 21 | library(cowplot) 22 | 23 | # format the data from {janeaustenr} for analysis 24 | tidy_books <- austen_books() %>% 25 | group_by(book) %>% 26 | mutate( 27 | linenumber = row_number(), 28 | chapter = cumsum(str_detect(text, 29 | regex("^chapter [\\divxlc]", 30 | ignore_case = TRUE)))) %>% 31 | ungroup() %>% 32 | unnest_tokens(word, text) 33 | 34 | # import NRC Word-Emotion Association Sentiment Lexicon 35 | nrc_all <- get_sentiments("nrc") 36 | 37 | # list of unique emotions 38 | sentiments <- unique(nrc_all$sentiment) 39 | 40 | # identify words that appear in the emotion lexicon 41 | all_books <- tidy_books %>% 42 | filter() %>% 43 | mutate(joy = as.numeric(word %in% nrc_all$word[nrc_all$sentiment == "joy"]), 44 | trust = as.numeric(word %in% nrc_all$word[nrc_all$sentiment == "trust"]), 45 | surprise = as.numeric(word %in% nrc_all$word[nrc_all$sentiment == "surprise"]), 46 | anticipation = as.numeric(word %in% nrc_all$word[nrc_all$sentiment == "anticipation"]), 47 | sadness = as.numeric(word %in% nrc_all$word[nrc_all$sentiment == "sadness"]), 48 | fear = as.numeric(word %in% nrc_all$word[nrc_all$sentiment == "fear"]), 49 | anger = as.numeric(word %in% nrc_all$word[nrc_all$sentiment == "anger"]), 50 | disgust = as.numeric(word %in% nrc_all$word[nrc_all$sentiment == "disgust"])) 51 | 52 | # reshape the data frame 53 | all_books_long <- melt(setDT(all_books), id.vars = c("book","linenumber","chapter","word"), variable.name = "emotion") 54 | # round lines to nearest 30 (approximately 1 page) to help remove noise 55 | all_books_long_10 <- all_books_long %>% mutate(linenumber = round_any(linenumber, 30, f = ceiling)) 56 | 57 | # aggregate by "linenumber" groups 58 | all_books_byline <- all_books_long_10 %>% 59 | group_by(book, linenumber, emotion) %>% 60 | summarise(value = sum(value)) %>% 61 | filter(value > 0) %>% 62 | group_by(book, linenumber) %>% 63 | mutate(test = value / sum(value) * n()) 64 | 65 | # add ids for each book 66 | all_books_byline <- all_books_byline %>% group_by(book) %>% mutate(id = row_number()) 67 | 68 | # add unique identifier 69 | all_books_byline_dt <- all_books_byline %>% mutate(book_line = paste0(book, linenumber)) 70 | all_books_byline_dt <- as.data.table(all_books_byline_dt) 71 | 72 | # identify the 2 most common emotions in each linenumber group 73 | all_books_byline_dt <- all_books_byline_dt[all_books_byline_dt[, .I[value >= max( value[value!=max(value)] )], by=book_line]$V1] 74 | 75 | # apply an equal value to these emotions 76 | all_books_byline_dt$value <- 1 77 | 78 | # apply calculation to adjust appearance of the streamgraph and remove extreme spikes 79 | all_books_byline_dt <- all_books_byline_dt %>% 80 | group_by(book, emotion) %>% 81 | mutate(test = n() / value) 82 | 83 | # load Google font for visualisations 84 | font_add_google("Noto Serif", "Noto Serif") 85 | showtext_auto() 86 | 87 | # create custom colour palette 88 | pal <- c("joy" = "#6b9a3e", 89 | "trust" = "#25654d", 90 | "surprise" = "#efd041", 91 | "anticipation" = "#34478b", 92 | "sadness" = "#4175a5", 93 | "fear" = "#ed9953", 94 | "anger" = "#9b262c", 95 | "disgust" = "#807094") 96 | 97 | # create plot for Sense & Sensibility 98 | sense_p <- all_books_byline_dt %>% filter(book == "Sense & Sensibility") %>% 99 | ggplot(aes(x = id, y = test)) + 100 | geom_stream(bw = 0.6, n_grid = 16561, aes(fill = emotion), alpha = 0.96) + 101 | scale_fill_manual(values = pal) + 102 | ggtitle("S E N S E & S E N S I B I L I T Y") + 103 | theme(panel.background = element_rect(fill = "#f2f0ed", color = "#f2f0ed"), 104 | plot.background = element_rect(fill = "#f2f0ed", color = "#f2f0ed"), 105 | panel.grid.major = element_blank(), 106 | legend.background = element_blank(), 107 | legend.key = element_blank(), 108 | legend.text = element_text(size = 24), 109 | legend.title = element_blank(), 110 | legend.position = "none", 111 | legend.box.margin = margin(r = 0, l = 0), 112 | panel.grid.minor = element_blank(), 113 | plot.title = element_text(color = "#69543f", size = 18, hjust = 0.5), 114 | axis.text = element_blank(), 115 | strip.background = element_blank(), 116 | strip.text = element_text(color = "#69543f"), 117 | axis.title = element_blank(), 118 | axis.ticks = element_blank(), 119 | text=element_text(family = "Noto Serif", face = "bold", color = "#69543f")) 120 | 121 | # create plot for Emma 122 | emma_p <- all_books_byline_dt %>% filter(book == "Emma") %>% 123 | ggplot(aes(x = id, y = test)) + 124 | geom_stream(bw = 0.6, n_grid = 16561, aes(fill = emotion), alpha = 0.96) + 125 | scale_fill_manual(values = pal) + 126 | ggtitle("E M M A") + 127 | theme(panel.background = element_rect(fill = "#f2f0ed", color = "#f2f0ed"), 128 | plot.background = element_rect(fill = "#f2f0ed", color = "#f2f0ed"), 129 | panel.grid.major = element_blank(), 130 | legend.background = element_blank(), 131 | legend.text = element_text(size = 24), 132 | legend.key = element_blank(), 133 | legend.title = element_blank(), 134 | legend.position = "none", 135 | legend.box.margin = margin(r = 0, l = 0), 136 | panel.grid.minor = element_blank(), 137 | plot.title = element_text(color = "#69543f", size = 18, hjust = 0.5), 138 | axis.text = element_blank(), 139 | strip.background = element_blank(), 140 | strip.text = element_text(color = "#69543f"), 141 | axis.title = element_blank(), 142 | axis.ticks = element_blank(), 143 | text=element_text(family = "Noto Serif", face = "bold", color = "#69543f")) 144 | 145 | # create plot for Northanger Abbey 146 | north_p <- all_books_byline_dt %>% filter(book == "Northanger Abbey") %>% 147 | ggplot(aes(x = id, y = test)) + 148 | geom_stream(bw = 0.6, n_grid = 16561, aes(fill = emotion), alpha = 0.96) + 149 | scale_fill_manual(values = pal) + 150 | ggtitle("N O R T H A N G E R A B B E Y") + 151 | theme(panel.background = element_rect(fill = "#f2f0ed", color = "#f2f0ed"), 152 | plot.background = element_rect(fill = "#f2f0ed", color = "#f2f0ed"), 153 | panel.grid.major = element_blank(), 154 | legend.background = element_blank(), 155 | legend.text = element_text(size = 24), 156 | legend.key = element_blank(), 157 | legend.title = element_blank(), 158 | legend.position = "none", 159 | legend.box.margin = margin(r = 0, l = 0), 160 | panel.grid.minor = element_blank(), 161 | plot.title = element_text(color = "#69543f", size = 18, hjust = 0.5), 162 | axis.text = element_blank(), 163 | strip.background = element_blank(), 164 | strip.text = element_text(color = "#69543f"), 165 | axis.title = element_blank(), 166 | axis.ticks = element_blank(), 167 | text=element_text(family = "Noto Serif", face = "bold", color = "#69543f")) 168 | 169 | # create plot for Mansfield Park 170 | mansfield_p <- all_books_byline_dt %>% filter(book == "Mansfield Park") %>% 171 | ggplot(aes(x = id, y = test)) + 172 | geom_stream(bw = 0.6, n_grid = 16561, aes(fill = emotion), alpha = 0.96) + 173 | scale_fill_manual(values = pal) + 174 | ggtitle("M A N S F I E L D P A R K") + 175 | theme(panel.background = element_rect(fill = "#f2f0ed", color = "#f2f0ed"), 176 | plot.background = element_rect(fill = "#f2f0ed", color = "#f2f0ed"), 177 | panel.grid.major = element_blank(), 178 | legend.background = element_blank(), 179 | legend.key = element_blank(), 180 | legend.text = element_text(size = 24), 181 | legend.title = element_blank(), 182 | legend.position = "none", 183 | legend.box.margin = margin(r = 0, l = 0), 184 | panel.grid.minor = element_blank(), 185 | plot.title = element_text(color = "#69543f", size = 18, hjust = 0.5), 186 | axis.text = element_blank(), 187 | strip.background = element_blank(), 188 | strip.text = element_text(color = "#69543f"), 189 | axis.title = element_blank(), 190 | axis.ticks = element_blank(), 191 | text=element_text(family = "Noto Serif", face = "bold", color = "#69543f")) 192 | 193 | # create plot for Pride & Prejudice 194 | pride_p <- all_books_byline_dt %>% filter(book == "Pride & Prejudice") %>% 195 | ggplot(aes(x = id, y = test)) + 196 | geom_stream(bw = 0.6, n_grid = 16561, aes(fill = emotion), alpha = 0.96) + 197 | scale_fill_manual(values = pal) + 198 | ggtitle("P R I D E & P R E J U D I C E") + 199 | labs(caption = "Sentiment Analysis Data: NRC Word-Emotion Association Lexicon / saifmohammad.com | Visualisation: @filmicaesthetic") + 200 | theme(panel.background = element_rect(fill = "#f2f0ed", color = "#f2f0ed"), 201 | plot.background = element_rect(fill = "#f2f0ed", color = "#f2f0ed"), 202 | panel.grid.major = element_blank(), 203 | legend.background = element_blank(), 204 | legend.key = element_blank(), 205 | legend.text = element_text(size = 18), 206 | legend.title = element_blank(), 207 | legend.position = "none", 208 | legend.box.margin = margin(r = 0, l = 0), 209 | panel.grid.minor = element_blank(), 210 | plot.title = element_text(color = "#69543f", size = 18, hjust = 0.5), 211 | axis.text = element_blank(), 212 | strip.background = element_blank(), 213 | strip.text = element_text(color = "#69543f"), 214 | axis.title = element_blank(), 215 | axis.ticks = element_blank(), 216 | text=element_text(family = "Noto Serif", face = "bold", color = "#69543f")) 217 | 218 | # build styled legend with a bar chart 219 | leg <- data.frame(EMOTION = as.factor(c("joy", "trust", "surprise", "anticipation", "sadness", "fear", "anger", "disgust")), value = c(1, 1, 1, 1, 1, 1, 1, 1)) 220 | leg$EMOTION <- factor(leg$EMOTION, levels = leg$EMOTION) 221 | 222 | # plot legend 223 | hist <- ggplot(leg, aes(x = EMOTION, y = value, fill = EMOTION)) + 224 | geom_bar(stat = "identity") + 225 | geom_text(aes(y = 0.502, label = toupper(EMOTION)), angle = 90, family = "Noto Serif", color = c("#b07e13", "#f2ddb1", "#b07e13", "#f2ddb1", "#b07e13", "#b07e13", "#f2ddb1", "#b07e13"), size = 8) + 226 | geom_text(aes(y = 0.498, label = toupper(EMOTION)), angle = 90, family = "Noto Serif", color = c("#403020", "#b07e13", "#403020", "#b07e13", "#403020", "#403020", "#b07e13", "#403020"), size = 8) + 227 | geom_text(aes(y = 0.95, label = "JANE AUSTEN"), family = "Noto Serif", color = "#f2ddb1", size = 1.5) + 228 | geom_text(aes(y = 0.95001, label = "JANE AUSTEN"), family = "Noto Serif", color = "#291d12", size = 1.5) + 229 | geom_text(aes(y = 0.05, label = "SIMON & SCHUSTER"), family = "Noto Serif", color = "#f2ddb1", size = 1.1) + 230 | geom_text(aes(y = 0.05001, label = "SIMON & SCHUSTER"), family = "Noto Serif", color = "#291d12", size = 1.1) + 231 | scale_fill_manual(values = pal) + 232 | ggtitle(toupper("The Emotions of Jane Austen"), subtitle = "A visual interpretation of the emotion of language used in Jane Austen novels.") + 233 | theme(panel.background = element_rect(fill = "#f2f0ed", color = "#f2f0ed"), 234 | plot.background = element_rect(fill = "#f2f0ed", color = "#f2f0ed"), 235 | panel.grid.major = element_blank(), 236 | legend.background = element_blank(), 237 | legend.key = element_blank(), 238 | plot.margin = margin(r = 4, l = 4, unit = "cm"), 239 | legend.position = "none", 240 | legend.box.margin = margin(r = 0, l = 0), 241 | panel.grid.minor = element_blank(), 242 | plot.subtitle = element_text(hjust = 0.5, size= 18, margin = margin(t = 5, b = 5, unit = "pt")), 243 | plot.title = element_text(angle = 0, family = "Noto Serif", color = "#69543f", size = 24, face="bold", hjust = 0.5, margin = margin(t = 12, unit = "pt")), 244 | axis.text = element_blank(), 245 | strip.background = element_blank(), 246 | strip.text = element_text(color = "#80664d"), 247 | axis.title = element_blank(), 248 | axis.ticks = element_blank(), 249 | text=element_text(family="Noto Serif", face = "bold", color = "#69543f")) 250 | 251 | # combine all the plots into grid 252 | plot <- plot_grid(hist, sense_p, emma_p, north_p, mansfield_p, pride_p, nrow = 6, rel_heights = c(1.2, 1, 1, 1, 1, 1)) 253 | 254 | # save the plot 255 | ggsave("JaneAusten.png", plot = plot, dpi = 300, width = 13, height = 20, units = "cm") 256 | -------------------------------------------------------------------------------- /README.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "The Emotions of Jane Austen" 3 | output: 4 | rmarkdown::github_document: 5 | theme: paper 6 | --- 7 | 8 | ```{r setup, include=FALSE} 9 | knitr::opts_chunk$set(echo = TRUE) 10 | 11 | # load packages 12 | library(plyr) 13 | library(stringr) 14 | library(data.table) 15 | library(ggplot2) 16 | library(ggstream) 17 | library(tidytext) 18 | library(janeaustenr) 19 | library(extrafont) 20 | library(showtext) 21 | library(dplyr) 22 | library(gridExtra) 23 | library(grid) 24 | library(cowplot) 25 | 26 | # load Google font for visualisations 27 | font_add_google("Noto Serif", "Noto Serif") 28 | showtext_auto() 29 | 30 | # format the data from {janeaustenr} for analysis 31 | tidy_books <- austen_books() %>% 32 | group_by(book) %>% 33 | mutate( 34 | linenumber = row_number(), 35 | chapter = cumsum(str_detect(text, 36 | regex("^chapter [\\divxlc]", 37 | ignore_case = TRUE)))) %>% 38 | ungroup() %>% 39 | unnest_tokens(word, text) 40 | 41 | # import NRC Emotion Lexicon 42 | nrc_all <- get_sentiments("nrc") 43 | 44 | # list of unique emotions 45 | sentiments <- unique(nrc_all$sentiment) 46 | 47 | # identify words that appear in the emotion lexicon 48 | all_books <- tidy_books %>% 49 | filter() %>% 50 | mutate(joy = as.numeric(word %in% nrc_all$word[nrc_all$sentiment == "joy"]), 51 | trust = as.numeric(word %in% nrc_all$word[nrc_all$sentiment == "trust"]), 52 | surprise = as.numeric(word %in% nrc_all$word[nrc_all$sentiment == "surprise"]), 53 | anticipation = as.numeric(word %in% nrc_all$word[nrc_all$sentiment == "anticipation"]), 54 | sadness = as.numeric(word %in% nrc_all$word[nrc_all$sentiment == "sadness"]), 55 | fear = as.numeric(word %in% nrc_all$word[nrc_all$sentiment == "fear"]), 56 | anger = as.numeric(word %in% nrc_all$word[nrc_all$sentiment == "anger"]), 57 | disgust = as.numeric(word %in% nrc_all$word[nrc_all$sentiment == "disgust"])) 58 | 59 | # reshape the data frame 60 | all_books_long <- melt(setDT(all_books), id.vars = c("book","linenumber","chapter","word"), variable.name = "emotion") 61 | # round lines to nearest 30 (approximately 1 page) to help remove noise 62 | all_books_long_10 <- all_books_long %>% mutate(linenumber = round_any(linenumber, 30, f = ceiling)) 63 | 64 | # aggregate by "linenumber" groups 65 | all_books_byline <- all_books_long_10 %>% 66 | group_by(book, linenumber, emotion) %>% 67 | summarise(value = sum(value)) %>% 68 | filter(value > 0) %>% 69 | group_by(book, linenumber) %>% 70 | mutate(test = value / sum(value) * n()) 71 | 72 | # add ids for each book 73 | all_books_byline <- all_books_byline %>% group_by(book) %>% mutate(id = row_number()) 74 | 75 | # add unique identifier 76 | all_books_byline_dt <- all_books_byline %>% mutate(book_line = paste0(book, linenumber)) 77 | all_books_byline_dt <- as.data.table(all_books_byline_dt) 78 | 79 | # identify the 2 most common emotions in each linenumber group 80 | all_books_byline_dt <- all_books_byline_dt[all_books_byline_dt[, .I[value >= max( value[value!=max(value)] )], by=book_line]$V1] 81 | 82 | # apply an equal value to these emotions 83 | all_books_byline_dt$value <- 1 84 | 85 | # apply calculation to adjust appearance of the streamgraph and remove extreme spikes 86 | all_books_byline_dt <- all_books_byline_dt %>% 87 | group_by(book, emotion) %>% 88 | mutate(test = n() / value) 89 | 90 | # create custom colour palette 91 | pal <- c("joy" = "#6b9a3e", 92 | "trust" = "#25654d", 93 | "surprise" = "#efd041", 94 | "anticipation" = "#34478b", 95 | "sadness" = "#4175a5", 96 | "fear" = "#ed9953", 97 | "anger" = "#9b262c", 98 | "disgust" = "#807094") 99 | 100 | # create plot for Sense & Sensibility 101 | sense_p <- all_books_byline_dt %>% filter(book == "Sense & Sensibility") %>% 102 | ggplot(aes(x = id, y = test)) + 103 | geom_stream(bw = 0.6, n_grid = 16561, aes(fill = emotion), alpha = 0.96) + 104 | scale_fill_manual(values = pal) + 105 | ggtitle("S E N S E & S E N S I B I L I T Y") + 106 | theme(panel.background = element_rect(fill = "#f2f0ed", color = "#f2f0ed"), 107 | plot.background = element_rect(fill = "#f2f0ed", color = "#f2f0ed"), 108 | panel.grid.major = element_blank(), 109 | legend.background = element_blank(), 110 | legend.key = element_blank(), 111 | legend.text = element_text(size = 24), 112 | legend.title = element_blank(), 113 | legend.position = "none", 114 | legend.box.margin = margin(r = 0, l = 0), 115 | panel.grid.minor = element_blank(), 116 | plot.title = element_text(color = "#69543f", size = 18, hjust = 0.5), 117 | axis.text = element_blank(), 118 | strip.background = element_blank(), 119 | strip.text = element_text(color = "#69543f"), 120 | axis.title = element_blank(), 121 | axis.ticks = element_blank(), 122 | text=element_text(family = "Noto Serif", face = "bold", color = "#69543f")) 123 | 124 | # create plot for Emma 125 | emma_p <- all_books_byline_dt %>% filter(book == "Emma") %>% 126 | ggplot(aes(x = id, y = test)) + 127 | geom_stream(bw = 0.6, n_grid = 16561, aes(fill = emotion), alpha = 0.96) + 128 | scale_fill_manual(values = pal) + 129 | ggtitle("E M M A") + 130 | theme(panel.background = element_rect(fill = "#f2f0ed", color = "#f2f0ed"), 131 | plot.background = element_rect(fill = "#f2f0ed", color = "#f2f0ed"), 132 | panel.grid.major = element_blank(), 133 | legend.background = element_blank(), 134 | legend.text = element_text(size = 24), 135 | legend.key = element_blank(), 136 | legend.title = element_blank(), 137 | legend.position = "none", 138 | legend.box.margin = margin(r = 0, l = 0), 139 | panel.grid.minor = element_blank(), 140 | plot.title = element_text(color = "#69543f", size = 18, hjust = 0.5), 141 | axis.text = element_blank(), 142 | strip.background = element_blank(), 143 | strip.text = element_text(color = "#69543f"), 144 | axis.title = element_blank(), 145 | axis.ticks = element_blank(), 146 | text=element_text(family = "Noto Serif", face = "bold", color = "#69543f")) 147 | 148 | # create plot for Northanger Abbey 149 | north_p <- all_books_byline_dt %>% filter(book == "Northanger Abbey") %>% 150 | ggplot(aes(x = id, y = test)) + 151 | geom_stream(bw = 0.6, n_grid = 16561, aes(fill = emotion), alpha = 0.96) + 152 | scale_fill_manual(values = pal) + 153 | ggtitle("N O R T H A N G E R A B B E Y") + 154 | theme(panel.background = element_rect(fill = "#f2f0ed", color = "#f2f0ed"), 155 | plot.background = element_rect(fill = "#f2f0ed", color = "#f2f0ed"), 156 | panel.grid.major = element_blank(), 157 | legend.background = element_blank(), 158 | legend.text = element_text(size = 24), 159 | legend.key = element_blank(), 160 | legend.title = element_blank(), 161 | legend.position = "none", 162 | legend.box.margin = margin(r = 0, l = 0), 163 | panel.grid.minor = element_blank(), 164 | plot.title = element_text(color = "#69543f", size = 18, hjust = 0.5), 165 | axis.text = element_blank(), 166 | strip.background = element_blank(), 167 | strip.text = element_text(color = "#69543f"), 168 | axis.title = element_blank(), 169 | axis.ticks = element_blank(), 170 | text=element_text(family = "Noto Serif", face = "bold", color = "#69543f")) 171 | 172 | # create plot for Mansfield Park 173 | mansfield_p <- all_books_byline_dt %>% filter(book == "Mansfield Park") %>% 174 | ggplot(aes(x = id, y = test)) + 175 | geom_stream(bw = 0.6, n_grid = 16561, aes(fill = emotion), alpha = 0.96) + 176 | scale_fill_manual(values = pal) + 177 | ggtitle("M A N S F I E L D P A R K") + 178 | theme(panel.background = element_rect(fill = "#f2f0ed", color = "#f2f0ed"), 179 | plot.background = element_rect(fill = "#f2f0ed", color = "#f2f0ed"), 180 | panel.grid.major = element_blank(), 181 | legend.background = element_blank(), 182 | legend.key = element_blank(), 183 | legend.text = element_text(size = 24), 184 | legend.title = element_blank(), 185 | legend.position = "none", 186 | legend.box.margin = margin(r = 0, l = 0), 187 | panel.grid.minor = element_blank(), 188 | plot.title = element_text(color = "#69543f", size = 18, hjust = 0.5), 189 | axis.text = element_blank(), 190 | strip.background = element_blank(), 191 | strip.text = element_text(color = "#69543f"), 192 | axis.title = element_blank(), 193 | axis.ticks = element_blank(), 194 | text=element_text(family = "Noto Serif", face = "bold", color = "#69543f")) 195 | 196 | # create plot for Pride & Prejudice 197 | pride_p <- all_books_byline_dt %>% filter(book == "Pride & Prejudice") %>% 198 | ggplot(aes(x = id, y = test)) + 199 | geom_stream(bw = 0.6, n_grid = 16561, aes(fill = emotion), alpha = 0.96) + 200 | scale_fill_manual(values = pal) + 201 | ggtitle("P R I D E & P R E J U D I C E") + 202 | labs(caption = "Sentiment Analysis Data: NRC Word-Emotion Association Lexicon / saifmohammad.com | Visualisation: @filmicaesthetic") + 203 | theme(panel.background = element_rect(fill = "#f2f0ed", color = "#f2f0ed"), 204 | plot.background = element_rect(fill = "#f2f0ed", color = "#f2f0ed"), 205 | panel.grid.major = element_blank(), 206 | legend.background = element_blank(), 207 | legend.key = element_blank(), 208 | legend.text = element_text(size = 18), 209 | legend.title = element_blank(), 210 | legend.position = "none", 211 | legend.box.margin = margin(r = 0, l = 0), 212 | panel.grid.minor = element_blank(), 213 | plot.title = element_text(color = "#69543f", size = 18, hjust = 0.5), 214 | axis.text = element_blank(), 215 | strip.background = element_blank(), 216 | strip.text = element_text(color = "#69543f"), 217 | axis.title = element_blank(), 218 | axis.ticks = element_blank(), 219 | text=element_text(family = "Noto Serif", face = "bold", color = "#69543f")) 220 | 221 | # build styled legend with a bar chart 222 | leg <- data.frame(EMOTION = as.factor(c("joy", "trust", "surprise", "anticipation", "sadness", "fear", "anger", "disgust")), value = c(1, 1, 1, 1, 1, 1, 1, 1)) 223 | leg$EMOTION <- factor(leg$EMOTION, levels = leg$EMOTION) 224 | 225 | # plot legend 226 | hist <- ggplot(leg, aes(x = EMOTION, y = value, fill = EMOTION)) + 227 | geom_bar(stat = "identity") + 228 | geom_text(aes(y = 0.502, label = toupper(EMOTION)), angle = 90, family = "Noto Serif", color = c("#b07e13", "#f2ddb1", "#b07e13", "#f2ddb1", "#b07e13", "#b07e13", "#f2ddb1", "#b07e13"), size = 8) + 229 | geom_text(aes(y = 0.498, label = toupper(EMOTION)), angle = 90, family = "Noto Serif", color = c("#403020", "#b07e13", "#403020", "#b07e13", "#403020", "#403020", "#b07e13", "#403020"), size = 8) + 230 | geom_text(aes(y = 0.95, label = "JANE AUSTEN"), family = "Noto Serif", color = "#f2ddb1", size = 1.5) + 231 | geom_text(aes(y = 0.95001, label = "JANE AUSTEN"), family = "Noto Serif", color = "#291d12", size = 1.5) + 232 | geom_text(aes(y = 0.05, label = "SIMON & SCHUSTER"), family = "Noto Serif", color = "#f2ddb1", size = 1.1) + 233 | geom_text(aes(y = 0.05001, label = "SIMON & SCHUSTER"), family = "Noto Serif", color = "#291d12", size = 1.1) + 234 | scale_fill_manual(values = pal) + 235 | ggtitle(toupper("The Emotions of Jane Austen"), subtitle = "A visual interpretation of the emotion of language used in Jane Austen novels.") + 236 | theme(panel.background = element_rect(fill = "#f2f0ed", color = "#f2f0ed"), 237 | plot.background = element_rect(fill = "#f2f0ed", color = "#f2f0ed"), 238 | panel.grid.major = element_blank(), 239 | legend.background = element_blank(), 240 | legend.key = element_blank(), 241 | plot.margin = margin(r = 4, l = 4, unit = "cm"), 242 | legend.position = "none", 243 | legend.box.margin = margin(r = 0, l = 0), 244 | panel.grid.minor = element_blank(), 245 | plot.subtitle = element_text(hjust = 0.5, size= 18, margin = margin(t = 5, b = 5, unit = "pt")), 246 | plot.title = element_text(angle = 0, family = "Noto Serif", color = "#69543f", size = 24, face="bold", hjust = 0.5, margin = margin(t = 12, unit = "pt")), 247 | axis.text = element_blank(), 248 | strip.background = element_blank(), 249 | strip.text = element_text(color = "#80664d"), 250 | axis.title = element_blank(), 251 | axis.ticks = element_blank(), 252 | text=element_text(family="Noto Serif", face = "bold", color = "#69543f")) 253 | 254 | # combine all the plots into one 255 | plot <- plot_grid(hist, sense_p, emma_p, north_p, mansfield_p, pride_p, nrow = 6, rel_heights = c(1.2, 1, 1, 1, 1, 1)) 256 | 257 | ``` 258 | 259 | This personal project explores the literature of Jane Austen novels, provided by Julia Silge's janeaustenr package in R, using the NRC Word-Emotion Association Lexicon by Saif Mohammad. The lexicon is used to identify words associated with a range of 8 emotions: joy, trust, surprise, anticipation, sadness, fear, anger, and disgust. 260 | 261 | The project aims to capture a visual interpretation of the emotional journey in each novel with a single visualisation. 262 | 263 | ``` {r main_plot, echo = FALSE, warning = FALSE, message = FALSE, fig.width = 13, fig.height = 20} 264 | plot 265 | ``` 266 | 267 | ## Files 268 | 269 | 'the_emotions_of_jane_austen.R' : view code 270 | 271 | ## Process 272 | 273 | * Load the NRC Word-Emotion Association Lexicon and Jane Austen novels datasets and prepare for further analysis. 274 | * Count occurrences of words with an emotional association for each line of each book. 275 | * Group lines approximately into pages (30 lines) to help reduce noise in the final visualisation. 276 | * Identify the two emotions with strongest association for each group. 277 | * Plot the results for each novel in a streamgraph. 278 | * Create grid of all plots and bookshelf-style legend. --------------------------------------------------------------------------------