install.packages(c("tidyverse","tidytext","pdftools","tesseract","tidyr","stringr","ggplot2"))
WARNING: Rtools is required to build R packages but is not currently installed. Please download and install the appropriate version of Rtools before proceeding:
https://cran.rstudio.com/bin/windows/Rtools/
Installing packages into 㤼㸱C:/Users/juand/Documents/R/win-library/4.0㤼㸲
(as 㤼㸱lib㤼㸲 is unspecified)
trying URL 'https://cran.rstudio.com/bin/windows/contrib/4.0/tidyverse_1.3.1.zip'
Content type 'application/zip' length 430106 bytes (420 KB)
downloaded 420 KB
trying URL 'https://cran.rstudio.com/bin/windows/contrib/4.0/tidytext_0.3.2.zip'
Content type 'application/zip' length 3050589 bytes (2.9 MB)
downloaded 2.9 MB
trying URL 'https://cran.rstudio.com/bin/windows/contrib/4.0/pdftools_3.0.1.zip'
Content type 'application/zip' length 10745827 bytes (10.2 MB)
downloaded 10.2 MB
trying URL 'https://cran.rstudio.com/bin/windows/contrib/4.0/tesseract_4.1.2.zip'
Content type 'application/zip' length 13177160 bytes (12.6 MB)
downloaded 12.6 MB
trying URL 'https://cran.rstudio.com/bin/windows/contrib/4.0/tidyr_1.1.4.zip'
Content type 'application/zip' length 1075509 bytes (1.0 MB)
downloaded 1.0 MB
trying URL 'https://cran.rstudio.com/bin/windows/contrib/4.0/stringr_1.4.0.zip'
Content type 'application/zip' length 216799 bytes (211 KB)
downloaded 211 KB
trying URL 'https://cran.rstudio.com/bin/windows/contrib/4.0/ggplot2_3.3.5.zip'
Content type 'application/zip' length 4129116 bytes (3.9 MB)
downloaded 3.9 MB
package ‘tidyverse’ successfully unpacked and MD5 sums checked
package ‘tidytext’ successfully unpacked and MD5 sums checked
package ‘pdftools’ successfully unpacked and MD5 sums checked
package ‘tesseract’ successfully unpacked and MD5 sums checked
package ‘tidyr’ successfully unpacked and MD5 sums checked
package ‘stringr’ successfully unpacked and MD5 sums checked
package ‘ggplot2’ successfully unpacked and MD5 sums checked
The downloaded binary packages are in
C:\Users\juand\AppData\Local\Temp\RtmpQfKyBS\downloaded_packages
install.packages("tidyr")
WARNING: Rtools is required to build R packages but is not currently installed. Please download and install the appropriate version of Rtools before proceeding:
https://cran.rstudio.com/bin/windows/Rtools/
Installing package into 㤼㸱C:/Users/juand/Documents/R/win-library/4.0㤼㸲
(as 㤼㸱lib㤼㸲 is unspecified)
trying URL 'https://cran.rstudio.com/bin/windows/contrib/4.0/tidyr_1.1.4.zip'
Content type 'application/zip' length 1075509 bytes (1.0 MB)
downloaded 1.0 MB
package ‘tidyr’ successfully unpacked and MD5 sums checked
The downloaded binary packages are in
C:\Users\juand\AppData\Local\Temp\RtmpQfKyBS\downloaded_packages
install.packages("devtools")
WARNING: Rtools is required to build R packages but is not currently installed. Please download and install the appropriate version of Rtools before proceeding:
https://cran.rstudio.com/bin/windows/Rtools/
Installing package into 㤼㸱C:/Users/juand/Documents/R/win-library/4.0㤼㸲
(as 㤼㸱lib㤼㸲 is unspecified)
trying URL 'https://cran.rstudio.com/bin/windows/contrib/4.0/devtools_2.4.2.zip'
Content type 'application/zip' length 397053 bytes (387 KB)
downloaded 387 KB
package ‘devtools’ successfully unpacked and MD5 sums checked
The downloaded binary packages are in
C:\Users\juand\AppData\Local\Temp\RtmpQfKyBS\downloaded_packages
install.packages("textdata")
WARNING: Rtools is required to build R packages but is not currently installed. Please download and install the appropriate version of Rtools before proceeding:
https://cran.rstudio.com/bin/windows/Rtools/
Installing package into 㤼㸱C:/Users/juand/Documents/R/win-library/4.0㤼㸲
(as 㤼㸱lib㤼㸲 is unspecified)
trying URL 'https://cran.rstudio.com/bin/windows/contrib/4.0/textdata_0.4.1.zip'
Content type 'application/zip' length 496672 bytes (485 KB)
downloaded 485 KB
package ‘textdata’ successfully unpacked and MD5 sums checked
The downloaded binary packages are in
C:\Users\juand\AppData\Local\Temp\RtmpQfKyBS\downloaded_packages
install.packages("plotly")
WARNING: Rtools is required to build R packages but is not currently installed. Please download and install the appropriate version of Rtools before proceeding:
https://cran.rstudio.com/bin/windows/Rtools/
Installing package into 㤼㸱C:/Users/juand/Documents/R/win-library/4.0㤼㸲
(as 㤼㸱lib㤼㸲 is unspecified)
trying URL 'https://cran.rstudio.com/bin/windows/contrib/4.0/plotly_4.10.0.zip'
Content type 'application/zip' length 3176069 bytes (3.0 MB)
downloaded 3.0 MB
package ‘plotly’ successfully unpacked and MD5 sums checked
The downloaded binary packages are in
C:\Users\juand\AppData\Local\Temp\RtmpQfKyBS\downloaded_packages
library(tidyverse)
package 㤼㸱tidyverse㤼㸲 was built under R version 4.0.5Registered S3 methods overwritten by 'dbplyr':
method from
print.tbl_lazy
print.tbl_sql
-- Attaching packages ------------------------------------------------------------------------------------------------------------------------ tidyverse 1.3.1 --
v ggplot2 3.3.5 v purrr 0.3.4
v tibble 3.1.4 v dplyr 1.0.7
v tidyr 1.1.4 v stringr 1.4.0
v readr 2.0.1 v forcats 0.5.1
package 㤼㸱ggplot2㤼㸲 was built under R version 4.0.5package 㤼㸱tibble㤼㸲 was built under R version 4.0.5package 㤼㸱tidyr㤼㸲 was built under R version 4.0.5package 㤼㸱readr㤼㸲 was built under R version 4.0.5package 㤼㸱purrr㤼㸲 was built under R version 4.0.5package 㤼㸱dplyr㤼㸲 was built under R version 4.0.5package 㤼㸱stringr㤼㸲 was built under R version 4.0.5package 㤼㸱forcats㤼㸲 was built under R version 4.0.5-- Conflicts --------------------------------------------------------------------------------------------------------------------------- tidyverse_conflicts() --
x dplyr::filter() masks stats::filter()
x dplyr::lag() masks stats::lag()
library(pdftools)
package 㤼㸱pdftools㤼㸲 was built under R version 4.0.5Using poppler version 21.04.0
library(tesseract)
package 㤼㸱tesseract㤼㸲 was built under R version 4.0.5
library(tidyr)
library(devtools)
package 㤼㸱devtools㤼㸲 was built under R version 4.0.5Loading required package: usethis
package 㤼㸱usethis㤼㸲 was built under R version 4.0.5
library(textdata)
package 㤼㸱textdata㤼㸲 was built under R version 4.0.5
library(tidytext)
package 㤼㸱tidytext㤼㸲 was built under R version 4.0.5
library(dplyr)
library(stringr)
library(ggplot2)
library(plotly)
package 㤼㸱plotly㤼㸲 was built under R version 4.0.5Registered S3 method overwritten by 'data.table':
method from
print.data.table
Registered S3 method overwritten by 'htmlwidgets':
method from
print.htmlwidget tools:rstudio
Attaching package: 㤼㸱plotly㤼㸲
The following object is masked from 㤼㸱package:ggplot2㤼㸲:
last_plot
The following object is masked from 㤼㸱package:stats㤼㸲:
filter
The following object is masked from 㤼㸱package:graphics㤼㸲:
layout
episodes <- 1:14
episodes <- as.character(episodes)
all_episodes <- data.frame(anger = integer(),
anticipation = integer(),
disgust = integer(),
fear = integer(),
joy = integer(),
sadness = integer(),
surprise = integer(),
trust = integer())
nwords <- vector()
for(ep in episodes)
{
txt = 0
txt <- read_file(paste("C:\\Users\\juand\\Downloads\\sequential-20211116T115107Z-001\\sequential\\", ep, ".txt",sep =""))
txt_sentences <- tibble(text = txt) %>%
unnest_tokens(sentence, text, token = "sentences")
d = nrow(txt_sentences)
tidy_text <- txt_sentences %>%
add_column(linenumber = 1:d, .before = "sentence")
tidy_text <- tidy_text %>%
unnest_tokens(word, sentence)
nwords <- append(nwords, nrow(tidy_text))
tidy_text <- tidy_text %>%
anti_join(stop_words)
text_sentiment <- tidy_text %>%
inner_join(get_sentiments("nrc"))
episode_sentiments <- count(text_sentiment, sentiment)
episode_sentiments <- as.data.frame(t(episode_sentiments))
names(episode_sentiments) <- episode_sentiments[1, ]
episode_sentiments <- episode_sentiments[-1,]
all_episodes <- rbind(all_episodes, episode_sentiments)
}
Joining, by = "word"
Joining, by = "word"
Joining, by = "word"
Joining, by = "word"
Joining, by = "word"
Joining, by = "word"
Joining, by = "word"
Joining, by = "word"
Joining, by = "word"
Joining, by = "word"
Joining, by = "word"
Joining, by = "word"
Joining, by = "word"
Joining, by = "word"
Joining, by = "word"
Joining, by = "word"
Joining, by = "word"
Joining, by = "word"
Joining, by = "word"
Joining, by = "word"
Joining, by = "word"
Joining, by = "word"
Joining, by = "word"
Joining, by = "word"
Joining, by = "word"
Joining, by = "word"
Joining, by = "word"
Joining, by = "word"
all_episodes$episode = as.integer(episodes)
all_episodes$words = nwords
transforming raw counts into ratio of the total number of words
all_episodes$anger = as.integer(all_episodes$anger)/all_episodes$words
all_episodes$joy = as.integer(all_episodes$joy)/all_episodes$words
all_episodes$fear = as.integer(all_episodes$fear)/all_episodes$words
all_episodes$sadness = as.integer(all_episodes$sadness)/all_episodes$words
Graphing RR sentiments
p <- ggplot(all_episodes, aes(x = episode, group = 1)) + geom_line(aes(y=anger), color="coral1") + geom_line(aes(y=joy), color="darkgoldenrod1") + geom_line(aes(y=fear), color ="darkgreen") + geom_line(aes(y=sadness), color="cornflowerblue") + ylab("ratio") + geom_point(aes(y=anger), color="coral1", size = 1.5) + geom_point(aes(y=joy), color="darkgoldenrod1", size = 1.5) + geom_point(aes(y=fear), color ="darkgreen", size = 1.5) + geom_point(aes(y=sadness), color="cornflowerblue", size = 1.5) + theme_bw() +
scale_x_continuous(breaks = seq(1, 14, 1))
p
joy - yellow, anger - coral, fear = green - sadness = blue