├── README_files
└── figure-gfm
│ ├── main_plot-1.png
│ └── pride_plot-1.png
├── README.md
├── scripts
└── the_emotions_of_jane_austen.R
└── README.Rmd
/README_files/figure-gfm/main_plot-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/filmicaesthetic/JaneAustenStreamgraphs/HEAD/README_files/figure-gfm/main_plot-1.png
--------------------------------------------------------------------------------
/README_files/figure-gfm/pride_plot-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/filmicaesthetic/JaneAustenStreamgraphs/HEAD/README_files/figure-gfm/pride_plot-1.png
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | The Emotions of Jane Austen
2 | ================
3 |
4 | This personal project explores the literature of Jane Austen novels,
5 | provided by Julia Silge’s
6 | janeaustenr
7 | package in R, using the
8 | NRC
9 | Word-Emotion Association Lexicon by Saif Mohammad. The lexicon is
10 | used to identify words associated with a range of 8 emotions: joy,
11 | trust, surprise, anticipation, sadness, fear, anger, and disgust.
12 |
13 | The project aims to capture a visual interpretation of the emotional
14 | journey in each novel with a single visualisation.
15 |
16 | 
17 |
18 | ## Files
19 |
20 | ‘the_emotions_of_jane_austen.R’ :
21 | view
22 | code
23 |
24 | ## Process
25 |
26 | - Load the NRC Word-Emotion Association Lexicon and Jane Austen novels
27 | datasets and prepare for further analysis.
28 | - Count occurrences of words with an emotional association for each
29 | line of each book.
30 | - Group lines approximately into pages (30 lines) to help reduce noise
31 | in the final visualisation.
32 | - Identify the two emotions with strongest association for each group.
33 | - Plot the results for each novel in a streamgraph.
34 | - Create grid of all plots and bookshelf-style legend.
35 |
--------------------------------------------------------------------------------
/scripts/the_emotions_of_jane_austen.R:
--------------------------------------------------------------------------------
1 | ###########
2 | #
3 | # Analysis of the emotion of language used in Jane Austen novels,
4 | # using the NRC Emotion Lexicon.
5 | #
6 | ###########
7 |
8 | # load packages
9 | library(plyr)
10 | library(stringr)
11 | library(data.table)
12 | library(ggplot2)
13 | library(ggstream)
14 | library(tidytext)
15 | library(janeaustenr)
16 | library(extrafont)
17 | library(showtext)
18 | library(dplyr)
19 | library(gridExtra)
20 | library(grid)
21 | library(cowplot)
22 |
23 | # format the data from {janeaustenr} for analysis
24 | tidy_books <- austen_books() %>%
25 | group_by(book) %>%
26 | mutate(
27 | linenumber = row_number(),
28 | chapter = cumsum(str_detect(text,
29 | regex("^chapter [\\divxlc]",
30 | ignore_case = TRUE)))) %>%
31 | ungroup() %>%
32 | unnest_tokens(word, text)
33 |
34 | # import NRC Word-Emotion Association Sentiment Lexicon
35 | nrc_all <- get_sentiments("nrc")
36 |
37 | # list of unique emotions
38 | sentiments <- unique(nrc_all$sentiment)
39 |
40 | # identify words that appear in the emotion lexicon
41 | all_books <- tidy_books %>%
42 | filter() %>%
43 | mutate(joy = as.numeric(word %in% nrc_all$word[nrc_all$sentiment == "joy"]),
44 | trust = as.numeric(word %in% nrc_all$word[nrc_all$sentiment == "trust"]),
45 | surprise = as.numeric(word %in% nrc_all$word[nrc_all$sentiment == "surprise"]),
46 | anticipation = as.numeric(word %in% nrc_all$word[nrc_all$sentiment == "anticipation"]),
47 | sadness = as.numeric(word %in% nrc_all$word[nrc_all$sentiment == "sadness"]),
48 | fear = as.numeric(word %in% nrc_all$word[nrc_all$sentiment == "fear"]),
49 | anger = as.numeric(word %in% nrc_all$word[nrc_all$sentiment == "anger"]),
50 | disgust = as.numeric(word %in% nrc_all$word[nrc_all$sentiment == "disgust"]))
51 |
52 | # reshape the data frame
53 | all_books_long <- melt(setDT(all_books), id.vars = c("book","linenumber","chapter","word"), variable.name = "emotion")
54 | # round lines to nearest 30 (approximately 1 page) to help remove noise
55 | all_books_long_10 <- all_books_long %>% mutate(linenumber = round_any(linenumber, 30, f = ceiling))
56 |
57 | # aggregate by "linenumber" groups
58 | all_books_byline <- all_books_long_10 %>%
59 | group_by(book, linenumber, emotion) %>%
60 | summarise(value = sum(value)) %>%
61 | filter(value > 0) %>%
62 | group_by(book, linenumber) %>%
63 | mutate(test = value / sum(value) * n())
64 |
65 | # add ids for each book
66 | all_books_byline <- all_books_byline %>% group_by(book) %>% mutate(id = row_number())
67 |
68 | # add unique identifier
69 | all_books_byline_dt <- all_books_byline %>% mutate(book_line = paste0(book, linenumber))
70 | all_books_byline_dt <- as.data.table(all_books_byline_dt)
71 |
72 | # identify the 2 most common emotions in each linenumber group
73 | all_books_byline_dt <- all_books_byline_dt[all_books_byline_dt[, .I[value >= max( value[value!=max(value)] )], by=book_line]$V1]
74 |
75 | # apply an equal value to these emotions
76 | all_books_byline_dt$value <- 1
77 |
78 | # apply calculation to adjust appearance of the streamgraph and remove extreme spikes
79 | all_books_byline_dt <- all_books_byline_dt %>%
80 | group_by(book, emotion) %>%
81 | mutate(test = n() / value)
82 |
83 | # load Google font for visualisations
84 | font_add_google("Noto Serif", "Noto Serif")
85 | showtext_auto()
86 |
87 | # create custom colour palette
88 | pal <- c("joy" = "#6b9a3e",
89 | "trust" = "#25654d",
90 | "surprise" = "#efd041",
91 | "anticipation" = "#34478b",
92 | "sadness" = "#4175a5",
93 | "fear" = "#ed9953",
94 | "anger" = "#9b262c",
95 | "disgust" = "#807094")
96 |
97 | # create plot for Sense & Sensibility
98 | sense_p <- all_books_byline_dt %>% filter(book == "Sense & Sensibility") %>%
99 | ggplot(aes(x = id, y = test)) +
100 | geom_stream(bw = 0.6, n_grid = 16561, aes(fill = emotion), alpha = 0.96) +
101 | scale_fill_manual(values = pal) +
102 | ggtitle("S E N S E & S E N S I B I L I T Y") +
103 | theme(panel.background = element_rect(fill = "#f2f0ed", color = "#f2f0ed"),
104 | plot.background = element_rect(fill = "#f2f0ed", color = "#f2f0ed"),
105 | panel.grid.major = element_blank(),
106 | legend.background = element_blank(),
107 | legend.key = element_blank(),
108 | legend.text = element_text(size = 24),
109 | legend.title = element_blank(),
110 | legend.position = "none",
111 | legend.box.margin = margin(r = 0, l = 0),
112 | panel.grid.minor = element_blank(),
113 | plot.title = element_text(color = "#69543f", size = 18, hjust = 0.5),
114 | axis.text = element_blank(),
115 | strip.background = element_blank(),
116 | strip.text = element_text(color = "#69543f"),
117 | axis.title = element_blank(),
118 | axis.ticks = element_blank(),
119 | text=element_text(family = "Noto Serif", face = "bold", color = "#69543f"))
120 |
121 | # create plot for Emma
122 | emma_p <- all_books_byline_dt %>% filter(book == "Emma") %>%
123 | ggplot(aes(x = id, y = test)) +
124 | geom_stream(bw = 0.6, n_grid = 16561, aes(fill = emotion), alpha = 0.96) +
125 | scale_fill_manual(values = pal) +
126 | ggtitle("E M M A") +
127 | theme(panel.background = element_rect(fill = "#f2f0ed", color = "#f2f0ed"),
128 | plot.background = element_rect(fill = "#f2f0ed", color = "#f2f0ed"),
129 | panel.grid.major = element_blank(),
130 | legend.background = element_blank(),
131 | legend.text = element_text(size = 24),
132 | legend.key = element_blank(),
133 | legend.title = element_blank(),
134 | legend.position = "none",
135 | legend.box.margin = margin(r = 0, l = 0),
136 | panel.grid.minor = element_blank(),
137 | plot.title = element_text(color = "#69543f", size = 18, hjust = 0.5),
138 | axis.text = element_blank(),
139 | strip.background = element_blank(),
140 | strip.text = element_text(color = "#69543f"),
141 | axis.title = element_blank(),
142 | axis.ticks = element_blank(),
143 | text=element_text(family = "Noto Serif", face = "bold", color = "#69543f"))
144 |
145 | # create plot for Northanger Abbey
146 | north_p <- all_books_byline_dt %>% filter(book == "Northanger Abbey") %>%
147 | ggplot(aes(x = id, y = test)) +
148 | geom_stream(bw = 0.6, n_grid = 16561, aes(fill = emotion), alpha = 0.96) +
149 | scale_fill_manual(values = pal) +
150 | ggtitle("N O R T H A N G E R A B B E Y") +
151 | theme(panel.background = element_rect(fill = "#f2f0ed", color = "#f2f0ed"),
152 | plot.background = element_rect(fill = "#f2f0ed", color = "#f2f0ed"),
153 | panel.grid.major = element_blank(),
154 | legend.background = element_blank(),
155 | legend.text = element_text(size = 24),
156 | legend.key = element_blank(),
157 | legend.title = element_blank(),
158 | legend.position = "none",
159 | legend.box.margin = margin(r = 0, l = 0),
160 | panel.grid.minor = element_blank(),
161 | plot.title = element_text(color = "#69543f", size = 18, hjust = 0.5),
162 | axis.text = element_blank(),
163 | strip.background = element_blank(),
164 | strip.text = element_text(color = "#69543f"),
165 | axis.title = element_blank(),
166 | axis.ticks = element_blank(),
167 | text=element_text(family = "Noto Serif", face = "bold", color = "#69543f"))
168 |
169 | # create plot for Mansfield Park
170 | mansfield_p <- all_books_byline_dt %>% filter(book == "Mansfield Park") %>%
171 | ggplot(aes(x = id, y = test)) +
172 | geom_stream(bw = 0.6, n_grid = 16561, aes(fill = emotion), alpha = 0.96) +
173 | scale_fill_manual(values = pal) +
174 | ggtitle("M A N S F I E L D P A R K") +
175 | theme(panel.background = element_rect(fill = "#f2f0ed", color = "#f2f0ed"),
176 | plot.background = element_rect(fill = "#f2f0ed", color = "#f2f0ed"),
177 | panel.grid.major = element_blank(),
178 | legend.background = element_blank(),
179 | legend.key = element_blank(),
180 | legend.text = element_text(size = 24),
181 | legend.title = element_blank(),
182 | legend.position = "none",
183 | legend.box.margin = margin(r = 0, l = 0),
184 | panel.grid.minor = element_blank(),
185 | plot.title = element_text(color = "#69543f", size = 18, hjust = 0.5),
186 | axis.text = element_blank(),
187 | strip.background = element_blank(),
188 | strip.text = element_text(color = "#69543f"),
189 | axis.title = element_blank(),
190 | axis.ticks = element_blank(),
191 | text=element_text(family = "Noto Serif", face = "bold", color = "#69543f"))
192 |
193 | # create plot for Pride & Prejudice
194 | pride_p <- all_books_byline_dt %>% filter(book == "Pride & Prejudice") %>%
195 | ggplot(aes(x = id, y = test)) +
196 | geom_stream(bw = 0.6, n_grid = 16561, aes(fill = emotion), alpha = 0.96) +
197 | scale_fill_manual(values = pal) +
198 | ggtitle("P R I D E & P R E J U D I C E") +
199 | labs(caption = "Sentiment Analysis Data: NRC Word-Emotion Association Lexicon / saifmohammad.com | Visualisation: @filmicaesthetic") +
200 | theme(panel.background = element_rect(fill = "#f2f0ed", color = "#f2f0ed"),
201 | plot.background = element_rect(fill = "#f2f0ed", color = "#f2f0ed"),
202 | panel.grid.major = element_blank(),
203 | legend.background = element_blank(),
204 | legend.key = element_blank(),
205 | legend.text = element_text(size = 18),
206 | legend.title = element_blank(),
207 | legend.position = "none",
208 | legend.box.margin = margin(r = 0, l = 0),
209 | panel.grid.minor = element_blank(),
210 | plot.title = element_text(color = "#69543f", size = 18, hjust = 0.5),
211 | axis.text = element_blank(),
212 | strip.background = element_blank(),
213 | strip.text = element_text(color = "#69543f"),
214 | axis.title = element_blank(),
215 | axis.ticks = element_blank(),
216 | text=element_text(family = "Noto Serif", face = "bold", color = "#69543f"))
217 |
218 | # build styled legend with a bar chart
219 | leg <- data.frame(EMOTION = as.factor(c("joy", "trust", "surprise", "anticipation", "sadness", "fear", "anger", "disgust")), value = c(1, 1, 1, 1, 1, 1, 1, 1))
220 | leg$EMOTION <- factor(leg$EMOTION, levels = leg$EMOTION)
221 |
222 | # plot legend
223 | hist <- ggplot(leg, aes(x = EMOTION, y = value, fill = EMOTION)) +
224 | geom_bar(stat = "identity") +
225 | geom_text(aes(y = 0.502, label = toupper(EMOTION)), angle = 90, family = "Noto Serif", color = c("#b07e13", "#f2ddb1", "#b07e13", "#f2ddb1", "#b07e13", "#b07e13", "#f2ddb1", "#b07e13"), size = 8) +
226 | geom_text(aes(y = 0.498, label = toupper(EMOTION)), angle = 90, family = "Noto Serif", color = c("#403020", "#b07e13", "#403020", "#b07e13", "#403020", "#403020", "#b07e13", "#403020"), size = 8) +
227 | geom_text(aes(y = 0.95, label = "JANE AUSTEN"), family = "Noto Serif", color = "#f2ddb1", size = 1.5) +
228 | geom_text(aes(y = 0.95001, label = "JANE AUSTEN"), family = "Noto Serif", color = "#291d12", size = 1.5) +
229 | geom_text(aes(y = 0.05, label = "SIMON & SCHUSTER"), family = "Noto Serif", color = "#f2ddb1", size = 1.1) +
230 | geom_text(aes(y = 0.05001, label = "SIMON & SCHUSTER"), family = "Noto Serif", color = "#291d12", size = 1.1) +
231 | scale_fill_manual(values = pal) +
232 | ggtitle(toupper("The Emotions of Jane Austen"), subtitle = "A visual interpretation of the emotion of language used in Jane Austen novels.") +
233 | theme(panel.background = element_rect(fill = "#f2f0ed", color = "#f2f0ed"),
234 | plot.background = element_rect(fill = "#f2f0ed", color = "#f2f0ed"),
235 | panel.grid.major = element_blank(),
236 | legend.background = element_blank(),
237 | legend.key = element_blank(),
238 | plot.margin = margin(r = 4, l = 4, unit = "cm"),
239 | legend.position = "none",
240 | legend.box.margin = margin(r = 0, l = 0),
241 | panel.grid.minor = element_blank(),
242 | plot.subtitle = element_text(hjust = 0.5, size= 18, margin = margin(t = 5, b = 5, unit = "pt")),
243 | plot.title = element_text(angle = 0, family = "Noto Serif", color = "#69543f", size = 24, face="bold", hjust = 0.5, margin = margin(t = 12, unit = "pt")),
244 | axis.text = element_blank(),
245 | strip.background = element_blank(),
246 | strip.text = element_text(color = "#80664d"),
247 | axis.title = element_blank(),
248 | axis.ticks = element_blank(),
249 | text=element_text(family="Noto Serif", face = "bold", color = "#69543f"))
250 |
251 | # combine all the plots into grid
252 | plot <- plot_grid(hist, sense_p, emma_p, north_p, mansfield_p, pride_p, nrow = 6, rel_heights = c(1.2, 1, 1, 1, 1, 1))
253 |
254 | # save the plot
255 | ggsave("JaneAusten.png", plot = plot, dpi = 300, width = 13, height = 20, units = "cm")
256 |
--------------------------------------------------------------------------------
/README.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "The Emotions of Jane Austen"
3 | output:
4 | rmarkdown::github_document:
5 | theme: paper
6 | ---
7 |
8 | ```{r setup, include=FALSE}
9 | knitr::opts_chunk$set(echo = TRUE)
10 |
11 | # load packages
12 | library(plyr)
13 | library(stringr)
14 | library(data.table)
15 | library(ggplot2)
16 | library(ggstream)
17 | library(tidytext)
18 | library(janeaustenr)
19 | library(extrafont)
20 | library(showtext)
21 | library(dplyr)
22 | library(gridExtra)
23 | library(grid)
24 | library(cowplot)
25 |
26 | # load Google font for visualisations
27 | font_add_google("Noto Serif", "Noto Serif")
28 | showtext_auto()
29 |
30 | # format the data from {janeaustenr} for analysis
31 | tidy_books <- austen_books() %>%
32 | group_by(book) %>%
33 | mutate(
34 | linenumber = row_number(),
35 | chapter = cumsum(str_detect(text,
36 | regex("^chapter [\\divxlc]",
37 | ignore_case = TRUE)))) %>%
38 | ungroup() %>%
39 | unnest_tokens(word, text)
40 |
41 | # import NRC Emotion Lexicon
42 | nrc_all <- get_sentiments("nrc")
43 |
44 | # list of unique emotions
45 | sentiments <- unique(nrc_all$sentiment)
46 |
47 | # identify words that appear in the emotion lexicon
48 | all_books <- tidy_books %>%
49 | filter() %>%
50 | mutate(joy = as.numeric(word %in% nrc_all$word[nrc_all$sentiment == "joy"]),
51 | trust = as.numeric(word %in% nrc_all$word[nrc_all$sentiment == "trust"]),
52 | surprise = as.numeric(word %in% nrc_all$word[nrc_all$sentiment == "surprise"]),
53 | anticipation = as.numeric(word %in% nrc_all$word[nrc_all$sentiment == "anticipation"]),
54 | sadness = as.numeric(word %in% nrc_all$word[nrc_all$sentiment == "sadness"]),
55 | fear = as.numeric(word %in% nrc_all$word[nrc_all$sentiment == "fear"]),
56 | anger = as.numeric(word %in% nrc_all$word[nrc_all$sentiment == "anger"]),
57 | disgust = as.numeric(word %in% nrc_all$word[nrc_all$sentiment == "disgust"]))
58 |
59 | # reshape the data frame
60 | all_books_long <- melt(setDT(all_books), id.vars = c("book","linenumber","chapter","word"), variable.name = "emotion")
61 | # round lines to nearest 30 (approximately 1 page) to help remove noise
62 | all_books_long_10 <- all_books_long %>% mutate(linenumber = round_any(linenumber, 30, f = ceiling))
63 |
64 | # aggregate by "linenumber" groups
65 | all_books_byline <- all_books_long_10 %>%
66 | group_by(book, linenumber, emotion) %>%
67 | summarise(value = sum(value)) %>%
68 | filter(value > 0) %>%
69 | group_by(book, linenumber) %>%
70 | mutate(test = value / sum(value) * n())
71 |
72 | # add ids for each book
73 | all_books_byline <- all_books_byline %>% group_by(book) %>% mutate(id = row_number())
74 |
75 | # add unique identifier
76 | all_books_byline_dt <- all_books_byline %>% mutate(book_line = paste0(book, linenumber))
77 | all_books_byline_dt <- as.data.table(all_books_byline_dt)
78 |
79 | # identify the 2 most common emotions in each linenumber group
80 | all_books_byline_dt <- all_books_byline_dt[all_books_byline_dt[, .I[value >= max( value[value!=max(value)] )], by=book_line]$V1]
81 |
82 | # apply an equal value to these emotions
83 | all_books_byline_dt$value <- 1
84 |
85 | # apply calculation to adjust appearance of the streamgraph and remove extreme spikes
86 | all_books_byline_dt <- all_books_byline_dt %>%
87 | group_by(book, emotion) %>%
88 | mutate(test = n() / value)
89 |
90 | # create custom colour palette
91 | pal <- c("joy" = "#6b9a3e",
92 | "trust" = "#25654d",
93 | "surprise" = "#efd041",
94 | "anticipation" = "#34478b",
95 | "sadness" = "#4175a5",
96 | "fear" = "#ed9953",
97 | "anger" = "#9b262c",
98 | "disgust" = "#807094")
99 |
100 | # create plot for Sense & Sensibility
101 | sense_p <- all_books_byline_dt %>% filter(book == "Sense & Sensibility") %>%
102 | ggplot(aes(x = id, y = test)) +
103 | geom_stream(bw = 0.6, n_grid = 16561, aes(fill = emotion), alpha = 0.96) +
104 | scale_fill_manual(values = pal) +
105 | ggtitle("S E N S E & S E N S I B I L I T Y") +
106 | theme(panel.background = element_rect(fill = "#f2f0ed", color = "#f2f0ed"),
107 | plot.background = element_rect(fill = "#f2f0ed", color = "#f2f0ed"),
108 | panel.grid.major = element_blank(),
109 | legend.background = element_blank(),
110 | legend.key = element_blank(),
111 | legend.text = element_text(size = 24),
112 | legend.title = element_blank(),
113 | legend.position = "none",
114 | legend.box.margin = margin(r = 0, l = 0),
115 | panel.grid.minor = element_blank(),
116 | plot.title = element_text(color = "#69543f", size = 18, hjust = 0.5),
117 | axis.text = element_blank(),
118 | strip.background = element_blank(),
119 | strip.text = element_text(color = "#69543f"),
120 | axis.title = element_blank(),
121 | axis.ticks = element_blank(),
122 | text=element_text(family = "Noto Serif", face = "bold", color = "#69543f"))
123 |
124 | # create plot for Emma
125 | emma_p <- all_books_byline_dt %>% filter(book == "Emma") %>%
126 | ggplot(aes(x = id, y = test)) +
127 | geom_stream(bw = 0.6, n_grid = 16561, aes(fill = emotion), alpha = 0.96) +
128 | scale_fill_manual(values = pal) +
129 | ggtitle("E M M A") +
130 | theme(panel.background = element_rect(fill = "#f2f0ed", color = "#f2f0ed"),
131 | plot.background = element_rect(fill = "#f2f0ed", color = "#f2f0ed"),
132 | panel.grid.major = element_blank(),
133 | legend.background = element_blank(),
134 | legend.text = element_text(size = 24),
135 | legend.key = element_blank(),
136 | legend.title = element_blank(),
137 | legend.position = "none",
138 | legend.box.margin = margin(r = 0, l = 0),
139 | panel.grid.minor = element_blank(),
140 | plot.title = element_text(color = "#69543f", size = 18, hjust = 0.5),
141 | axis.text = element_blank(),
142 | strip.background = element_blank(),
143 | strip.text = element_text(color = "#69543f"),
144 | axis.title = element_blank(),
145 | axis.ticks = element_blank(),
146 | text=element_text(family = "Noto Serif", face = "bold", color = "#69543f"))
147 |
148 | # create plot for Northanger Abbey
149 | north_p <- all_books_byline_dt %>% filter(book == "Northanger Abbey") %>%
150 | ggplot(aes(x = id, y = test)) +
151 | geom_stream(bw = 0.6, n_grid = 16561, aes(fill = emotion), alpha = 0.96) +
152 | scale_fill_manual(values = pal) +
153 | ggtitle("N O R T H A N G E R A B B E Y") +
154 | theme(panel.background = element_rect(fill = "#f2f0ed", color = "#f2f0ed"),
155 | plot.background = element_rect(fill = "#f2f0ed", color = "#f2f0ed"),
156 | panel.grid.major = element_blank(),
157 | legend.background = element_blank(),
158 | legend.text = element_text(size = 24),
159 | legend.key = element_blank(),
160 | legend.title = element_blank(),
161 | legend.position = "none",
162 | legend.box.margin = margin(r = 0, l = 0),
163 | panel.grid.minor = element_blank(),
164 | plot.title = element_text(color = "#69543f", size = 18, hjust = 0.5),
165 | axis.text = element_blank(),
166 | strip.background = element_blank(),
167 | strip.text = element_text(color = "#69543f"),
168 | axis.title = element_blank(),
169 | axis.ticks = element_blank(),
170 | text=element_text(family = "Noto Serif", face = "bold", color = "#69543f"))
171 |
172 | # create plot for Mansfield Park
173 | mansfield_p <- all_books_byline_dt %>% filter(book == "Mansfield Park") %>%
174 | ggplot(aes(x = id, y = test)) +
175 | geom_stream(bw = 0.6, n_grid = 16561, aes(fill = emotion), alpha = 0.96) +
176 | scale_fill_manual(values = pal) +
177 | ggtitle("M A N S F I E L D P A R K") +
178 | theme(panel.background = element_rect(fill = "#f2f0ed", color = "#f2f0ed"),
179 | plot.background = element_rect(fill = "#f2f0ed", color = "#f2f0ed"),
180 | panel.grid.major = element_blank(),
181 | legend.background = element_blank(),
182 | legend.key = element_blank(),
183 | legend.text = element_text(size = 24),
184 | legend.title = element_blank(),
185 | legend.position = "none",
186 | legend.box.margin = margin(r = 0, l = 0),
187 | panel.grid.minor = element_blank(),
188 | plot.title = element_text(color = "#69543f", size = 18, hjust = 0.5),
189 | axis.text = element_blank(),
190 | strip.background = element_blank(),
191 | strip.text = element_text(color = "#69543f"),
192 | axis.title = element_blank(),
193 | axis.ticks = element_blank(),
194 | text=element_text(family = "Noto Serif", face = "bold", color = "#69543f"))
195 |
196 | # create plot for Pride & Prejudice
197 | pride_p <- all_books_byline_dt %>% filter(book == "Pride & Prejudice") %>%
198 | ggplot(aes(x = id, y = test)) +
199 | geom_stream(bw = 0.6, n_grid = 16561, aes(fill = emotion), alpha = 0.96) +
200 | scale_fill_manual(values = pal) +
201 | ggtitle("P R I D E & P R E J U D I C E") +
202 | labs(caption = "Sentiment Analysis Data: NRC Word-Emotion Association Lexicon / saifmohammad.com | Visualisation: @filmicaesthetic") +
203 | theme(panel.background = element_rect(fill = "#f2f0ed", color = "#f2f0ed"),
204 | plot.background = element_rect(fill = "#f2f0ed", color = "#f2f0ed"),
205 | panel.grid.major = element_blank(),
206 | legend.background = element_blank(),
207 | legend.key = element_blank(),
208 | legend.text = element_text(size = 18),
209 | legend.title = element_blank(),
210 | legend.position = "none",
211 | legend.box.margin = margin(r = 0, l = 0),
212 | panel.grid.minor = element_blank(),
213 | plot.title = element_text(color = "#69543f", size = 18, hjust = 0.5),
214 | axis.text = element_blank(),
215 | strip.background = element_blank(),
216 | strip.text = element_text(color = "#69543f"),
217 | axis.title = element_blank(),
218 | axis.ticks = element_blank(),
219 | text=element_text(family = "Noto Serif", face = "bold", color = "#69543f"))
220 |
221 | # build styled legend with a bar chart
222 | leg <- data.frame(EMOTION = as.factor(c("joy", "trust", "surprise", "anticipation", "sadness", "fear", "anger", "disgust")), value = c(1, 1, 1, 1, 1, 1, 1, 1))
223 | leg$EMOTION <- factor(leg$EMOTION, levels = leg$EMOTION)
224 |
225 | # plot legend
226 | hist <- ggplot(leg, aes(x = EMOTION, y = value, fill = EMOTION)) +
227 | geom_bar(stat = "identity") +
228 | geom_text(aes(y = 0.502, label = toupper(EMOTION)), angle = 90, family = "Noto Serif", color = c("#b07e13", "#f2ddb1", "#b07e13", "#f2ddb1", "#b07e13", "#b07e13", "#f2ddb1", "#b07e13"), size = 8) +
229 | geom_text(aes(y = 0.498, label = toupper(EMOTION)), angle = 90, family = "Noto Serif", color = c("#403020", "#b07e13", "#403020", "#b07e13", "#403020", "#403020", "#b07e13", "#403020"), size = 8) +
230 | geom_text(aes(y = 0.95, label = "JANE AUSTEN"), family = "Noto Serif", color = "#f2ddb1", size = 1.5) +
231 | geom_text(aes(y = 0.95001, label = "JANE AUSTEN"), family = "Noto Serif", color = "#291d12", size = 1.5) +
232 | geom_text(aes(y = 0.05, label = "SIMON & SCHUSTER"), family = "Noto Serif", color = "#f2ddb1", size = 1.1) +
233 | geom_text(aes(y = 0.05001, label = "SIMON & SCHUSTER"), family = "Noto Serif", color = "#291d12", size = 1.1) +
234 | scale_fill_manual(values = pal) +
235 | ggtitle(toupper("The Emotions of Jane Austen"), subtitle = "A visual interpretation of the emotion of language used in Jane Austen novels.") +
236 | theme(panel.background = element_rect(fill = "#f2f0ed", color = "#f2f0ed"),
237 | plot.background = element_rect(fill = "#f2f0ed", color = "#f2f0ed"),
238 | panel.grid.major = element_blank(),
239 | legend.background = element_blank(),
240 | legend.key = element_blank(),
241 | plot.margin = margin(r = 4, l = 4, unit = "cm"),
242 | legend.position = "none",
243 | legend.box.margin = margin(r = 0, l = 0),
244 | panel.grid.minor = element_blank(),
245 | plot.subtitle = element_text(hjust = 0.5, size= 18, margin = margin(t = 5, b = 5, unit = "pt")),
246 | plot.title = element_text(angle = 0, family = "Noto Serif", color = "#69543f", size = 24, face="bold", hjust = 0.5, margin = margin(t = 12, unit = "pt")),
247 | axis.text = element_blank(),
248 | strip.background = element_blank(),
249 | strip.text = element_text(color = "#80664d"),
250 | axis.title = element_blank(),
251 | axis.ticks = element_blank(),
252 | text=element_text(family="Noto Serif", face = "bold", color = "#69543f"))
253 |
254 | # combine all the plots into one
255 | plot <- plot_grid(hist, sense_p, emma_p, north_p, mansfield_p, pride_p, nrow = 6, rel_heights = c(1.2, 1, 1, 1, 1, 1))
256 |
257 | ```
258 |
259 | This personal project explores the literature of Jane Austen novels, provided by Julia Silge's janeaustenr package in R, using the NRC Word-Emotion Association Lexicon by Saif Mohammad. The lexicon is used to identify words associated with a range of 8 emotions: joy, trust, surprise, anticipation, sadness, fear, anger, and disgust.
260 |
261 | The project aims to capture a visual interpretation of the emotional journey in each novel with a single visualisation.
262 |
263 | ``` {r main_plot, echo = FALSE, warning = FALSE, message = FALSE, fig.width = 13, fig.height = 20}
264 | plot
265 | ```
266 |
267 | ## Files
268 |
269 | 'the_emotions_of_jane_austen.R' : view code
270 |
271 | ## Process
272 |
273 | * Load the NRC Word-Emotion Association Lexicon and Jane Austen novels datasets and prepare for further analysis.
274 | * Count occurrences of words with an emotional association for each line of each book.
275 | * Group lines approximately into pages (30 lines) to help reduce noise in the final visualisation.
276 | * Identify the two emotions with strongest association for each group.
277 | * Plot the results for each novel in a streamgraph.
278 | * Create grid of all plots and bookshelf-style legend.
--------------------------------------------------------------------------------