O código desenvolvido e aplicado às análises de texto pode ser consultado abaixo, sob forma comentada. Em alternativa, os scripts podem também ser diretamente descarregados em ficheiro R a partir dos seguintes links:
↓ Visão Panorâmica (Bird’s-Eye View)
↓ Reinos Animal, Vegetal e Mineral (Kingdoms of Nature)
↓ Quatro Elementos (The Four Elements)
A versão do código que aqui se publica em acesso aberto, para inspeção e reuso, corresponde àquela utilizada para gerar o datasets-bundle-2 e respetivas visualizações [release 1, junho 2024].
SUMMARY
RSTUDIO SETTINGS
## List of required packages
### tm - general use
### tidyverse (package collection) - general use
### stopwords - stopwords removal
### qdap - stopwords removal
### textstem - lemmatization process
### quanteda - lemmatization process
## Installing required packages
install.packages(c("tm",
"tidyverse",
"stopwords",
"qdap",
"textstem",
"quanteda"))
## Loading installed packages
library(tm)
library(tidyverse)
library(stopwords)
library(qdap)
library(textstem)
library(quanteda)
DATA PREPROCESSING
## 1st - Importing Data
### directory assignment
arrcorpus <- "/Users/patriciareina/Desktop/FAT/BASE DE DADOS/Obra Completa TXT/TXT-livros/OP-all"
### verifying the files
arrfiles <- list.files(path=arrcorpus)
arrfiles #files correctly ordered
### building corpus arrangement: [[ ]] file, [ ] verse (strings)
corpuslist <- paste(arrcorpus,"/", arrfiles, sep="")
corpuslist
typeof(corpuslist) #character
corpus.list <- lapply(corpuslist, FUN=readLines)
corpus.list [[1]]
typeof(corpus.list) #list
## 2nd - Data cleaning
### converting into one string
corpus.list.line <- lapply(corpus.list, FUN=paste, collapse=" ")
corpus.list.line [[78]]
typeof(corpus.list.line) #list
### de-captalizing the words
corpus.list.line.lower <- tolower(corpus.list.line)
corpus.list.line.lower [[78]]
typeof(corpus.list.line.lower) #character
### tokenizing with text/file separation
corpus.list.line.clean <- strsplit(corpus.list.line.lower, "\\W")
corpus.list.line.clean [[78]]
typeof(corpus.list.line.clean) #list
typeof(corpus.list.line.clean[[78]][985]) #character
## 3rd - Corpus
### making a Simple Corpus: lists of vectors
corpus.as.list <- Corpus(VectorSource(as.vector(corpus.list.line.clean)))
corpus.as.list
typeof(arrcorpus) #character
typeof(corpus.as.list) #list
inspect(corpus.as.list[[2]])
## 4th - Removing stopwords
### adapted list assignment (based on "stopwords-iso")
allstops_iso_alt <- scan("/Users/patriciareina/Desktop/FAT/LISTAS/stopwords/stopwords-pt_txt_alterado_pr_24-01-24.txt", what="character", sep="\n")
allstops_iso_alt
### stopwords removal
corpus.as.list <- tm_map(corpus.as.list, removeWords, c(allstops_iso_alt))
inspect(corpus.as.list[[1]])
inspect(corpus.as.list[[48]])
## 5th - More Data Cleaning
### removing numbers
corpus.as.list <- tm_map(corpus.as.list, content_transformer(removeNumbers))
inspect(corpus.as.list[[48]])
### removing punctuation
corpus.as.list <- tm_map(corpus.as.list , content_transformer(removePunctuation))
inspect(corpus.as.list[48])
### removing white spaces
corpus.as.list <- tm_map(corpus.as.list, content_transformer(stripWhitespace))
typeof(corpus.as.list[48]) #list
## 6th - Lemmatization
corpus.as.list.lemma <- corpus.as.list
### setting a list/dictionary to proceed lemmatization (based on "lemmatization-pt" from Global Glossary Project)
lemma_dic <- read.delim(file = "/Users/patriciareina/Desktop/FAT/LISTAS/lematização/lemmatization-pt_rev_06-06-24.txt", header = FALSE, stringsAsFactors = FALSE)
names(lemma_dic) <- c("lemma", "term")
lemma_dic[1:1000,]
typeof(lemma_dic) #list
### fixing duplicate terms, keeping only first occurrences
lemma_dic_unique <- lemma_dic[!duplicated(lemma_dic$term),]
### list of control for removed duplicated terms [external output]
term_doc <- print(lemma_dic$term)
term_doc[duplicated(term_doc)]
options(max.print=99999)
capture.output(term_doc[duplicated(term_doc)], file = "duplicados_lemma_06-06-24.txt")
options(max.print=9999)
### reordering dictionary disposition for term-lemma instead of lemma-term
lemma_dic_unique_term_lemma <- select(lemma_dic_unique, term, lemma)
lemma_dic_unique_term_lemma[1:10,]
### lemmatization
for (i in 1:length(corpus.as.list.lemma)) {corpus.as.list.lemma [[i]][[1]] <- lemmatize_strings(corpus.as.list.lemma [[i]][[1]], dictionary = lemma_dic_unique_term_lemma)}
SETTING DATA ANALYSIS
corpus.as.list.lemma # still a Simple Corpus ### making a DocumentTermMatrix corpuslist_DTM_lemma <- DocumentTermMatrix(corpus.as.list.lemma, control = list(wordLengths = c(2, Inf))) inspect(corpuslist_DTM_lemma) ### word frequency in corpus, publication division corpuslist_DTM_lemma.inv <- t(corpuslist_DTM_lemma) corpuslist_DTM_lemma.matrix <- as.matrix(corpuslist_DTM_lemma.inv) ### finding in how many books each word in the corpus occurs corpuslist_DTM_lemma.freq.doc <- colSums(as.matrix(corpuslist_DTM_lemma)>0) #alphabetical order corpuslist_DTM_lemma.freq.doc.order <- corpuslist_DTM_lemma.freq.doc[order(corpuslist_DTM_lemma.freq.doc, decreasing = TRUE)] #sort by decreasing order
BIRD’S EYE VIEW ANALYSIS
### setting directory for the external outputs
getwd()
setwd("/Users/patriciareina/Desktop/FAT/RAWGRAPHS")
### the 10 more frequent words in corpus [absolute frequency]
corpuslist_DTM_lemma.freq.order [1:10]
### data preparation for 'the 10 more frequent words in corpus' viz [external output for RAWGraphs]
corpuslist_DTM_lemma.freq.order.stack <- stack(corpuslist_DTM_lemma.freq.order)
corpuslist_DTM_lemma.freq.order.stack.df <- data.frame(corpuslist_DTM_lemma.freq.order.stack) #building a data frame
corpuslist_DTM_lemma.freq.order.stack.top10words <- stack(corpuslist_DTM_lemma.freq.order [1:10])
names(corpuslist_DTM_lemma.freq.order.stack.top10words) <- c("frequency", "terms")
corpuslist_DTM_lemma.freq.order.stack.top10words
write.csv(corpuslist_DTM_lemma.freq.order.stack.top10words, file = "corpuslist_DTM_lemma.freq.order.stack.top10words.csv", row.names = FALSE)
### the 10 more frequent words in each book [absolute frequency]
corpuslist_DTM_lemma.top10words.df <- data.frame(corpuslist_DTM_lemma.freq.order [1:10])
top10words <- rownames(corpuslist_DTM_lemma.top10words.df)
corpuslist_DTM_lemma.matrix [top10words,]
### data preparation for 'the 10 more frequent words in each book' viz [external output for RAWGraphs]
corpuslist_DTM_lemma.matrix.top10words.df <- data.frame(corpuslist_DTM_lemma.matrix [top10words,]) #adjusting hierarchies
nomes.livros.arr <- scan("/Users/patriciareina/Desktop/FAT/LISTAS/nomes_obras_poeticas_arr.txt", what="character", sep="\n")
colnames(corpuslist_DTM_lemma.matrix.top10words.df) <- c(nomes.livros.arr) #the order of the books is replaced by their names
corpuslist_DTM_lemma.matrix.top10words.df #testing
corpuslist_DTM_lemma.matrix.top10words.df.row <- rownames_to_column(corpuslist_DTM_lemma.matrix.top10words.df, var="terms")
write.csv(corpuslist_DTM_lemma.matrix.top10words.df.row, file="corpuslist_DTM_lemma.matrix.top10words.df.row.csv", row.names = FALSE)
### which words are most frequent in most books?
corpuslist_DTM_lemma.freq.doc.order [1:10]
corpuslist_DTM_lemma.freq.doc.order [1:15] #testing if there are other words with 76 occurrences. YES, three more.
### data preparation for 'which words are most frequent in most books?' viz [external output for RAWGraphs]
corpuslist_DTM_lemma.freq.order.doc.stack.top13doc <- stack(corpuslist_DTM_lemma.freq.doc.order [1:13])
names(corpuslist_DTM_lemma.freq.order.doc.stack.top13doc) <- c("total of books","terms")
corpuslist_DTM_lemma.freq.order.doc.stack.top13doc<- select(corpuslist_DTM_lemma.freq.order.doc.stack.top13doc, terms, `total of books`)
corpuslist_DTM_lemma.freq.order.doc.stack.top13doc
write.csv(corpuslist_DTM_lemma.freq.order.doc.stack.top13doc, file = "corpuslist_DTM_lemma.freq.order.doc.stack.csv.top13doc.csv", row.names = FALSE)
### how many times do these most frequent words occur in each of the 79 books?
corpuslist_DTM_lemma.top13doc.df <- data.frame(corpuslist_DTM_lemma.freq.doc.order [1:13])
top13docs <- rownames(corpuslist_DTM_lemma.top13doc.df)
corpuslist_DTM_lemma.matrix [top13docs,]
### data preparation for 'how many times do these most...?' viz [external output for RAWGraphs]
corpuslist_DTM_lemma.matrix.top13doc.df <- data.frame(corpuslist_DTM_lemma.matrix [top13docs,])
colnames(corpuslist_DTM_lemma.matrix.top13doc.df) <- c(nomes.livros.arr)
corpuslist_DTM_lemma.matrix.top13doc.df
corpuslist_DTM_lemma.matrix.top13doc.df.row <- rownames_to_column(corpuslist_DTM_lemma.matrix.top13doc.df, var="terms")
write.csv(corpuslist_DTM_lemma.matrix.top13doc.df.row, file="corpuslist_DTM_lemma.matrix.top13doc.df.row.csv", row.names = FALSE)
KINGDOMS OF NATURE ANALYSIS
### setting directory for external outputs
getwd()
setwd("/Users/patriciareina/Desktop/FAT/RAWGRAPHS")
## Literal terms
### absolute frequency of the literal terms
animal1 <- corpuslist_DTM_lemma.freq ["animal"]
vegetal1 <- corpuslist_DTM_lemma.freq ["vegetal"]
mineral1 <- corpuslist_DTM_lemma.freq ["mineral"]
### data preparation for 'absolute frequency of the literal terms ' [external output for RAWGraphs]
#### building data frame
reino.geral.freq.data <- data.frame(
animal = animal1,
vegetal = vegetal1,
mineral = mineral1
)
reino.geral.freq.data.stack <- stack(reino.geral.freq.data)
#### improving data frame
names(reino.geral.freq.data.stack) <- c("frequency","terms")
reino.geral.freq.data.stack <- select(reino.geral.freq.data.stack, terms, frequency)
#### exporting data frame
write.csv(reino.geral.freq.data.stack, file = "reino.geral.freq.data.stack.csv", row.names = FALSE)
### relative frequency of the literal terms
reino.geral.freq.rel.data <- data.frame(
animal = animal1/sum(corpuslist_DTM_lemma.freq)*100,
vegetal = vegetal1/sum(corpuslist_DTM_lemma.freq)*100,
mineral = mineral1/sum(corpuslist_DTM_lemma.freq)*100
)
### data preparation for 'relative frequency of the literal terms' [external output for RAWGraphs]
reino.geral.freq.rel.data <- reino.geral.freq.rel.data %>%
mutate(across(1:3, round, 2))
reino.geral.freq.rel.data.stack <- stack(reino.geral.freq.rel.data)
names(reino.geral.freq.rel.data.stack) <- c("relative frequency","terms")
reino.geral.freq.rel.data.stack <- select(reino.geral.freq.rel.data.stack, terms, `relative frequency`)
write.csv(reino.geral.freq.rel.data.stack, file = "reino.geral.freq.rel.data.stack.csv", row.names = FALSE)
### visualization optimization: putting together abs. and rel. freqs. in one viz
reino.geral.freq.abs.rel.data.stack <- cbind(reino.geral.freq.data.stack, reino.geral.freq.rel.data.stack[2])
write.csv(reino.geral.freq.abs.rel.data.stack, file = "reino.geral.freq.abs.rel.data.stack.csv", row.names = FALSE)
### in how many books do these literal terms occur?
corpuslist_DTM_lemma.freq.doc ["animal"]
corpuslist_DTM_lemma.freq.doc ["vegetal"]
corpuslist_DTM_lemma.freq.doc ["mineral"]
### in which books do these literal terms occur?
corpuslist_DTM_lemma.matrix ['animal',]
corpuslist_DTM_lemma.matrix ['vegetal',]
corpuslist_DTM_lemma.matrix ['mineral',]
### data preparation for 'in which books do these literal terms occur?' [external output for RAWGraphs]
reino.geral.freq.per.livro.data <- data.frame(
animal = corpuslist_DTM_lemma.matrix ['animal',],
vegetal = corpuslist_DTM_lemma.matrix ['vegetal',],
mineral = corpuslist_DTM_lemma.matrix ['mineral',]
)
reino.geral.freq.per.livro.data <- t(reino.geral.freq.per.livro.data)
colnames(reino.geral.freq.per.livro.data) <- c(nomes.livros.arr) #the order of the books is replaced by their names
reino.geral.freq.per.livro.data <- as.data.frame(reino.geral.freq.per.livro.data)
reino.geral.freq.per.livro.data.row <- rownames_to_column(reino.geral.freq.per.livro.data, var="terms")
write.csv(reino.geral.freq.per.livro.data.row, file="reino.geral.freq.per.livro.data.row.csv", row.names = FALSE)
## Generic terms
### retrieving data from lists - generic terms
#### animal
reino.animal.tipo.lexico <- scan("/Users/patriciareina/Desktop/FAT/LISTAS/Listas Bruno 12-04 /animais_tipo.txt", what="character", sep="\n")
reino.animal.tipo.lexico
##### absolute frequency of the set
corpuslist_DTM_lemma.freq.animal.tipo<- corpuslist_DTM_lemma.freq [reino.animal.tipo.lexico]
animal2 <- corpuslist_DTM_lemma.freq.animal.tipo
##### total of occurrences of the set in books
corpuslist_DTM_lemma.freq.animal.tipo.values <- na.omit(corpuslist_DTM_lemma.freq.animal.tipo) #suppression of null/NA values
animal.tipo.lexico.values <- rownames(as.table(corpuslist_DTM_lemma.freq.animal.tipo.values)) #only considers words that have more than zero occurrences
corpuslist_DTM_lemma.matrix [animal.tipo.lexico.values, ]
#### vegetal
reino.vegetal.tipo.lexico <- scan("/Users/patriciareina/Desktop/FAT/LISTAS/Listas Bruno 12-04 /vegetais_tipo.txt", what="character", sep="\n")
reino.vegetal.tipo.lexico
##### absolute frequency of the set
corpuslist_DTM_lemma.freq.vegetal.tipo<- corpuslist_DTM_lemma.freq [reino.vegetal.tipo.lexico]
vegetal2 <- corpuslist_DTM_lemma.freq.vegetal.tipo
##### total of occurrences of the set in books
corpuslist_DTM_lemma.freq.vegetal.tipo.values <- na.omit(corpuslist_DTM_lemma.freq.vegetal.tipo)
vegetal.tipo.lexico.values <- rownames(as.table(corpuslist_DTM_lemma.freq.vegetal.tipo.values))
corpuslist_DTM_lemma.matrix [vegetal.tipo.lexico.values, ]
#### mineral
reino.mineral.tipo.lexico <- scan("/Users/patriciareina/Desktop/FAT/LISTAS/Listas Bruno 12-04 /minerais_tipo.txt", what="character", sep="\n")
reino.mineral.tipo.lexico
##### absolute frequency of the set
corpuslist_DTM_lemma.freq.mineral.tipo<- corpuslist_DTM_lemma.freq [reino.mineral.tipo.lexico]
mineral2 <- corpuslist_DTM_lemma.freq.mineral.tipo
##### total of occurrences of the set in books
corpuslist_DTM_lemma.freq.mineral.tipo.values <- na.omit(corpuslist_DTM_lemma.freq.mineral.tipo)
mineral.tipo.lexico.values <- rownames(as.table(corpuslist_DTM_lemma.freq.mineral.tipo.values))
corpuslist_DTM_lemma.matrix [mineral.tipo.lexico.values, ]
### absolute frequency of the generic terms
reino.tipo <- c(reino.animal.tipo.lexico, reino.vegetal.tipo.lexico, reino.mineral.tipo.lexico)
corpuslist_DTM_lemma.freq [reino.tipo]
reino.tipo.freq.data <- data.frame(
animal = sum(corpuslist_DTM_lemma.freq.animal.tipo.values),
vegetal = sum(corpuslist_DTM_lemma.freq.vegetal.tipo.values),
mineral = sum(corpuslist_DTM_lemma.freq.mineral.tipo.values)
)
### data preparation for 'absolute frequency of the generic terms' viz [external output for RAWGraphs]
reino.tipo.freq.data.stack <- stack(reino.tipo.freq.data)
reino.tipo.freq.data.stack <- na.omit(reino.tipo.freq.data.stack)
names(reino.tipo.freq.data.stack) <- c("frequency","general terms")
reino.tipo.freq.data.stack <- select(reino.tipo.freq.data.stack, `general terms`, frequency)
write.csv(reino.tipo.freq.data.stack, file = "reino.tipo.freq.data.stack.csv", row.names = FALSE)
# relative frequency of the generic terms
reino.tipo.freq.rel.data <- data.frame(
animal = sum(corpuslist_DTM_lemma.freq.animal.tipo.values)/sum(corpuslist_DTM_lemma.freq)*100,
vegetal = sum(corpuslist_DTM_lemma.freq.vegetal.tipo.values)/sum(corpuslist_DTM_lemma.freq)*100,
mineral = sum(corpuslist_DTM_lemma.freq.mineral.tipo.values)/sum(corpuslist_DTM_lemma.freq)*100
)
### data preparation for 'relative frequency of the generic terms' viz [external output for RAWGraphs]
reino.tipo.freq.rel.data <- reino.tipo.freq.rel.data %>%
mutate(across(1:3, round, 2))
reino.tipo.freq.rel.data.stack <- stack(reino.tipo.freq.rel.data)
names(reino.tipo.freq.rel.data.stack) <- c("relative frequency","general terms")
reino.tipo.freq.rel.data.stack <- select(reino.tipo.freq.rel.data.stack, `general terms`, `relative frequency`)
write.csv(reino.tipo.freq.rel.data.stack, file = "reino.tipo.freq.rel.data.stack.csv", row.names = FALSE)
### visualization optimization: putting together abs. and rel. freqs. in one viz
reino.tipo.freq.abs.rel.data.stack <- cbind(reino.tipo.freq.data.stack, reino.tipo.freq.rel.data.stack[2])
write.csv(reino.tipo.freq.abs.rel.data.stack, file = "reino.tipo.freq.abs.rel.data.stack.csv", row.names = FALSE)
### visualization optimization: adding the related kingdom for each term
#### animal
animal2.stack <- stack(animal2)
animal2.stack$kingdom <- rep(c("animal"))
animal2.stack <- na.omit(animal2.stack)
names(animal2.stack) <- c("frequency","terms","kingdom")
#### vegetal
vegetal2.stack <- stack(vegetal2)
vegetal2.stack$kingdom <- rep(c("vegetal"))
vegetal2.stack <- na.omit(vegetal2.stack)
names(vegetal2.stack) <- c("frequency","terms","kingdom")
#### mineral
mineral2.stack <- stack(mineral2)
mineral2.stack$kingdom <- rep(c("mineral"))
mineral2.stack <- na.omit(mineral2.stack)
names(mineral2.stack) <- c("frequency","terms","kingdom")
#### merging the data, in order
reino.tipo.anim.veg.min.stack <- rbind(animal2.stack, vegetal2.stack, mineral2.stack)
write.csv(reino.tipo.anim.veg.min.stack, file = "reino.tipo.anim.veg.min.stack.csv", row.names = FALSE)
### occurrences of generic terms per book
#### total of occurrences in just one column in matrix
colSums(corpuslist_DTM_lemma.matrix [animal.tipo.lexico.values, ])
colSums(corpuslist_DTM_lemma.matrix [vegetal.tipo.lexico.values, ])
colSums(corpuslist_DTM_lemma.matrix [mineral.tipo.lexico.values, ])
#### building data frame
reino.tipo.freq.per.livro.data <- data.frame(
animal = colSums(corpuslist_DTM_lemma.matrix [animal.tipo.lexico.values, ]),
vegetal = colSums(corpuslist_DTM_lemma.matrix [vegetal.tipo.lexico.values, ]),
mineral = colSums(corpuslist_DTM_lemma.matrix [mineral.tipo.lexico.values, ])
)
### data preparation for 'occurrences of generic terms per book' viz [external output for RAWGraphs]
reino.tipo.freq.per.livro.data <- t(reino.tipo.freq.per.livro.data)
colnames(reino.tipo.freq.per.livro.data) <- c(nomes.livros.arr)
reino.tipo.freq.per.livro.data <- as.data.frame(reino.tipo.freq.per.livro.data)
reino.tipo.freq.per.livro.data.row <- rownames_to_column(reino.tipo.freq.per.livro.data, var="reinos")
write.csv(reino.tipo.freq.per.livro.data.row, file="reino.tipo.freq.per.livro.data.row.csv", row.names = FALSE)
## Specific terms
### retrieving data from lists - specific terms
#### animal
reino.animal.especie.lexico <- scan("/Users/patriciareina/Desktop/FAT/LISTAS/Listas Bruno 12-04 /animais_especie.txt", what="character", sep="\n")
reino.animal.especie.lexico
##### absolute frequency of the set
corpuslist_DTM_lemma.freq.animal.especie<- corpuslist_DTM_lemma.freq [reino.animal.especie.lexico]
animal3 <- corpuslist_DTM_lemma.freq.animal.especie
##### total of occurrences of the set in books
corpuslist_DTM_lemma.freq.animal.especie.values <- na.omit(corpuslist_DTM_lemma.freq.animal.especie)
animal.especie.lexico.values <- rownames(as.table(corpuslist_DTM_lemma.freq.animal.especie.values))
corpuslist_DTM_lemma.matrix [animal.especie.lexico.values, ]
#### vegetal
reino.vegetal.especie.lexico <- scan("/Users/patriciareina/Desktop/FAT/LISTAS/Listas Bruno 12-04 /vegetais_especie.txt", what="character", sep="\n")
reino.vegetal.especie.lexico
##### absolute frequency of the set
corpuslist_DTM_lemma.freq.vegetal.especie <- corpuslist_DTM_lemma.freq [reino.vegetal.especie.lexico]
vegetal3 <- corpuslist_DTM_lemma.freq.vegetal.especie
##### total of occurrences of the set in books
corpuslist_DTM_lemma.freq.vegetal.especie.values <- na.omit(corpuslist_DTM_lemma.freq.vegetal.especie)
vegetal.especie.lexico.values <- rownames(as.table(corpuslist_DTM_lemma.freq.vegetal.especie.values))
corpuslist_DTM_lemma.matrix [vegetal.especie.lexico.values, ]
#### mineral
reino.mineral.especie.lexico <- scan("/Users/patriciareina/Desktop/FAT/LISTAS/Listas Bruno 12-04 /minerais_especie.txt", what="character", sep="\n")
reino.mineral.especie.lexico
##### absolute frequency of the set
corpuslist_DTM_lemma.freq.mineral.especie<- corpuslist_DTM_lemma.freq [reino.mineral.especie.lexico]
mineral3 <- corpuslist_DTM_lemma.freq.mineral.especie
##### total of occurrences of the set in books
corpuslist_DTM_lemma.freq.mineral.especie.values <- na.omit(corpuslist_DTM_lemma.freq.mineral.especie)
mineral.especie.lexico.values <- rownames(as.table(corpuslist_DTM_lemma.freq.mineral.especie.values))
corpuslist_DTM_lemma.matrix [mineral.especie.lexico.values, ]
### absolute frequency of specific terms per book
reino.especie <- c(reino.animal.especie.lexico, reino.vegetal.especie.lexico, reino.mineral.especie.lexico)
corpuslist_DTM_lemma.freq [reino.especie]
reino.especie.freq.data <- data.frame(
animal = sum(corpuslist_DTM_lemma.freq.animal.especie.values),
vegetal = sum(corpuslist_DTM_lemma.freq.vegetal.especie.values),
mineral = sum(corpuslist_DTM_lemma.freq.mineral.especie.values)
)
### data preparation for 'absolute frequency of specific terms per book' [external output for RAWGraphs]
reino.especie.freq.data.stack <- stack(reino.especie.freq.data)
reino.especie.freq.data.stack <- na.omit(reino.especie.freq.data.stack)
names(reino.especie.freq.data.stack) <- c("frequency","specific terms")
reino.especie.freq.data.stack <- select(reino.especie.freq.data.stack, `specific terms`, frequency)
write.csv(reino.especie.freq.data.stack, file = "reino.especie.freq.data.stack.csv", row.names = FALSE)
### relative frequency of specific terms per book
reino.especie.freq.rel.data <- data.frame(
animal = sum(corpuslist_DTM_lemma.freq.animal.especie.values)/sum(corpuslist_DTM_lemma.freq)*100,
vegetal = sum(corpuslist_DTM_lemma.freq.vegetal.especie.values)/sum(corpuslist_DTM_lemma.freq)*100,
mineral = sum(corpuslist_DTM_lemma.freq.mineral.especie.values)/sum(corpuslist_DTM_lemma.freq)*100
)
### data preparation for 'relative frequency of specific terms per book' [external output for RAWGraphs]
reino.especie.freq.rel.data <- reino.especie.freq.rel.data %>%
mutate(across(1:3, round, 2))
reino.especie.freq.rel.data.stack <- stack(reino.especie.freq.rel.data)
names(reino.especie.freq.rel.data.stack) <- c("relative frequency","specific terms")
reino.especie.freq.rel.data.stack <- select(reino.especie.freq.rel.data.stack, `specific terms`, `relative frequency`)
write.csv(reino.especie.freq.rel.data.stack, file = "reino.especie.freq.rel.data.stack.csv", row.names = FALSE)
### visualization optimization: putting together abs. and rel. freqs. in one viz
reino.especie.freq.abs.rel.data.stack <- cbind(reino.especie.freq.data.stack, reino.especie.freq.rel.data.stack[2])
write.csv(reino.especie.freq.abs.rel.data.stack, file = "reino.especie.freq.abs.rel.data.stack.csv", row.names = FALSE)
### visualization optimization: adding the related kingdom for each term
#### animal
animal3.stack <- stack(animal3)
animal3.stack$kingdom <- rep(c("animal"))
animal3.stack <- na.omit(animal3.stack)
names(animal3.stack) <- c("frequency","terms","kingdom")
#### vegetal
vegetal3.stack <- stack(vegetal3)
vegetal3.stack$kingdom <- rep(c("vegetal"))
vegetal3.stack <- na.omit(vegetal3.stack)
names(vegetal3.stack) <- c("frequency","terms","kingdom")
#### mineral
mineral3.stack <- stack(mineral3)
mineral3.stack$kingdom <- rep(c("mineral"))
mineral3.stack <- na.omit(mineral3.stack)
names(mineral3.stack) <- c("frequency","terms","kingdom")
#### merging the data, in order
reino.especie.anim.veg.min.stack <- rbind(animal3.stack, vegetal3.stack, mineral3.stack)
write.csv(reino.especie.anim.veg.min.stack, file = "reino.especie.anim.veg.min.stack.csv", row.names = FALSE)
### occurrences of specific terms per book
#### total of occurrences in just one column in matrix
colSums(corpuslist_DTM_lemma.matrix [animal.especie.lexico.values, ])
colSums(corpuslist_DTM_lemma.matrix [vegetal.especie.lexico.values, ])
colSums(corpuslist_DTM_lemma.matrix [mineral.especie.lexico.values, ])
#### building data frame
reino.especie.freq.per.livro.data <- data.frame(
animal = colSums(corpuslist_DTM_lemma.matrix [animal.especie.lexico.values, ]),
vegetal = colSums(corpuslist_DTM_lemma.matrix [vegetal.especie.lexico.values, ]),
mineral = colSums(corpuslist_DTM_lemma.matrix [mineral.especie.lexico.values, ])
)
### data preparation for 'occurrences of specific terms per book' [external output for RAWGraphs]
reino.especie.freq.per.livro.data <- t(reino.especie.freq.per.livro.data)
colnames(reino.especie.freq.per.livro.data) <- c(nomes.livros.arr)
reino.especie.freq.per.livro.data <- as.data.frame(reino.especie.freq.per.livro.data)
reino.especie.freq.per.livro.data.row <- rownames_to_column(reino.especie.freq.per.livro.data, var="reinos")
write.csv(reino.especie.freq.per.livro.data.row, file="reino.especie.freq.per.livro.data.row.csv", row.names = FALSE)
## All together: literal, generic and specific terms
### absolute frequency all the terms
#### animal
animal.total <- c(animal1, animal2, animal3)
animal.total.clean <- na.omit(animal.total)
animal.total.sum <- sum(animal.total.clean)
#### vegetal
vegetal.total <- c(vegetal1, vegetal2, vegetal3)
vegetal.total.clean <- na.omit(vegetal.total)
vegetal.total.sum <- sum(vegetal.total.clean)
#### mineral
mineral.total <- c(mineral1, mineral2, mineral3)
mineral.total.clean <- na.omit(mineral.total)
mineral.total.sum <- sum(mineral.total.clean)
#### building data frame
reino.total.freq.data <- data.frame(
animal = animal.total.sum,
vegetal = vegetal.total.sum,
mineral = mineral.total.sum
)
### data preparation for 'absolute frequency all the terms' [external output for RAWGraphs]
reino.total.freq.data.stack <- stack(reino.total.freq.data)
reino.total.freq.data.stack <- na.omit(reino.total.freq.data.stack)
names(reino.total.freq.data.stack) <- c("frequency","total terms")
reino.total.freq.data.stack <- select(reino.total.freq.data.stack, `total terms`, frequency)
write.csv(reino.total.freq.data.stack, file = "reino.total.freq.data.stack.csv", row.names = FALSE)
### relative frequency all the terms
reino.total.freq.rel.data <- data.frame(
animal = animal.total.sum/sum(corpuslist_DTM_lemma.freq)*100,
vegetal = vegetal.total.sum/sum(corpuslist_DTM_lemma.freq)*100,
mineral = mineral.total.sum/sum(corpuslist_DTM_lemma.freq)*100
)
### data preparation for 'relative frequency all the terms' [external output for RAWGraphs]
reino.total.freq.rel.data <- reino.total.freq.rel.data %>%
mutate(across(1:3, round, 2))
reino.total.freq.rel.data.stack <- stack(reino.total.freq.rel.data)
names(reino.total.freq.rel.data.stack) <- c("relative frequency","total terms")
reino.total.freq.rel.data.stack <- select(reino.total.freq.rel.data.stack, `total terms`, `relative frequency`)
write.csv(reino.total.freq.rel.data.stack, file = "reino.total.freq.rel.data.stack.csv", row.names = FALSE)
### visualization optimization: putting together abs. and rel. freqs. in one viz
reino.total.freq.abs.rel.data.stack <- cbind(reino.total.freq.data.stack, reino.total.freq.rel.data.stack[2])
write.csv(reino.total.freq.abs.rel.data.stack, file = "reino.total.freq.abs.rel.data.stack.csv", row.names = FALSE)
### occurrences per books all the terms
#### merging the groups
animal.total.lexico.values <- c("animal", animal.tipo.lexico.values, animal.especie.lexico.values)
vegetal.total.lexico.values <- c("vegetal", vegetal.tipo.lexico.values, vegetal.especie.lexico.values)
mineral.total.lexico.values <- c("mineral", mineral.tipo.lexico.values, mineral.especie.lexico.values)
#### total of occurrences in just one column in matrix
colSums(corpuslist_DTM_lemma.matrix [animal.total.lexico.values, ])
colSums(corpuslist_DTM_lemma.matrix [vegetal.total.lexico.values, ])
colSums(corpuslist_DTM_lemma.matrix [mineral.total.lexico.values, ])
#### building data frame
reino.total.freq.per.livro.data <- data.frame(
animal = colSums(corpuslist_DTM_lemma.matrix [animal.total.lexico.values, ]),
vegetal = colSums(corpuslist_DTM_lemma.matrix [vegetal.total.lexico.values, ]),
mineral = colSums(corpuslist_DTM_lemma.matrix [mineral.total.lexico.values, ])
)
### data preparation for 'occurrences per books all the terms' [external output for RAWGraphs]
reino.total.freq.per.livro.data <- t(reino.total.freq.per.livro.data)
colnames(reino.total.freq.per.livro.data) <- c(nomes.livros.arr)
reino.total.freq.per.livro.data <- as.data.frame(reino.total.freq.per.livro.data)
reino.total.freq.per.livro.data.row <- rownames_to_column(reino.total.freq.per.livro.data, var="reinos")
write.csv(reino.total.freq.per.livro.data.row, file="reino.total.freq.per.livro.data.row.csv", row.names = FALSE)
FOUR ELEMENTS ANALYSIS
### setting directory for external outputs
getwd()
setwd("/Users/patriciareina/Desktop/FAT/RAWGRAPHS")
## The four elements
### absolute frequency of the four elements
corpuslist_DTM_lemma.freq ["terra"]
corpuslist_DTM_lemma.freq ["água"]
corpuslist_DTM_lemma.freq ["ar"]
corpuslist_DTM_lemma.freq ["fogo"]
#### making sets
quatro.elementos <- c("terra", "água", "ar", "fogo")
corpuslist_DTM_lemma.freq [quatro.elementos]
### data preparation for 'absolute frequency of the four elements' [external output for RAWGraphs]
corpuslist_DTM_lemma.freq.quatro.elementos.stack <- stack(corpuslist_DTM_lemma.freq [quatro.elementos])
names(corpuslist_DTM_lemma.freq.quatro.elementos.stack) <- c("frequency","terms")
corpuslist_DTM_lemma.freq.quatro.elementos.stack <- select(corpuslist_DTM_lemma.freq.quatro.elementos.stack, terms, frequency)
write.csv(corpuslist_DTM_lemma.freq.quatro.elementos.stack, file="corpuslist_DTM_lemma.freq.quatro.elementos.stack.csv", row.names = FALSE)
### relative frequency of the four elements
quatro.elementos.uni.data <- data.frame(
terra = corpuslist_DTM_lemma.freq ["terra"]*100/ sum(corpuslist_DTM_lemma.freq),
água = corpuslist_DTM_lemma.freq ["água"]*100/ sum(corpuslist_DTM_lemma.freq),
ar = corpuslist_DTM_lemma.freq ["ar"]*100/ sum(corpuslist_DTM_lemma.freq),
fogo = corpuslist_DTM_lemma.freq ["fogo"]*100/ sum(corpuslist_DTM_lemma.freq)
)
sum(corpuslist_DTM_lemma.freq) #total of words
#### uniforming percentage values
quatro.elementos.uni.data <- quatro.elementos.uni.data %>%
mutate(across(1:4, round, 2))
### data preparation for 'relative frequency of the four elements' [external output for RAWGraphs]
quatro.elementos.uni.data.stack <- stack(quatro.elementos.uni.data)
names(quatro.elementos.uni.data.stack) <- c("relative frequency","terms")
quatro.elementos.uni.data.stack <- select(quatro.elementos.uni.data.stack, terms,`relative frequency`)
write.csv(quatro.elementos.uni.data.stack, file = "quatro.elementos.uni.data.stack.csv", row.names = FALSE)
### visualization optimization: putting together abs. and rel. freqs. in one viz
quatro.elementos.freq.abs.rel.stack <- cbind(corpuslist_DTM_lemma.freq.quatro.elementos.stack, quatro.elementos.uni.data.stack[2])
write.csv(quatro.elementos.freq.abs.rel.stack, file = "quatro.elementos.freq.abs.rel.stack.csv", row.names = FALSE)
### in how many books do these four elements' terms occur?
corpuslist_DTM_lemma.freq.doc [quatro.elementos]
### data preparation for 'in how many books...' [external output for RAWGraphs]
corpuslist_DTM_lemma.freq.doc.quatro.elem.stack <- stack(corpuslist_DTM_lemma.freq.doc [quatro.elementos])
names(corpuslist_DTM_lemma.freq.doc.quatro.elem.stack) <- c("number of books","terms")
corpuslist_DTM_lemma.freq.doc.quatro.elem.stack <- select(corpuslist_DTM_lemma.freq.doc.quatro.elem.stack, terms, `number of books`)
write.csv(corpuslist_DTM_lemma.freq.doc.quatro.elem.stack, file="corpuslist_DTM_lemma.freq.doc.quatro.elem.stack.csv", row.names = FALSE)
### in which books do these four elements' terms occur?
corpuslist_DTM_lemma.matrix [quatro.elementos,]
### data preparation for 'in which books do these...' [external output for RAWGraphs]
corpuslist_DTM_lemma.matrix.quatro.elem.df <- data.frame(corpuslist_DTM_lemma.matrix [quatro.elementos,]) #adjusting hierarchies
colnames(corpuslist_DTM_lemma.matrix.quatro.elem.df) <- c(nomes.livros.arr) #the order of the books is replaced by their names
corpuslist_DTM_lemma.matrix.quatro.elem.df
corpuslist_DTM_lemma.matrix.quatro.elem.df.row <- rownames_to_column(corpuslist_DTM_lemma.matrix.quatro.elem.df, var="terms")
write.csv(corpuslist_DTM_lemma.matrix.quatro.elem.df.row, file="corpuslist_DTM_lemma.matrix.quatro.elem.df.row.csv", row.names = FALSE)
## Terms associated with the four elements
### retrieving data from lists - associated terms
#### set of terms associated with the "terra" element (earth)
terra.lexico <- scan("/Users/patriciareina/Desktop/FAT/LISTAS/Listas Helena 12-04/ARR_terra.txt", what="character", sep="\n")
##### absolute frequency of the set
corpuslist_DTM_lemma.freq.terra <- corpuslist_DTM_lemma.freq [terra.lexico]
corpuslist_DTM_lemma.freq.terra
##### total of occurrences of the set in books
corpuslist_DTM_lemma.freq.terra.values <- na.omit(corpuslist_DTM_lemma.freq.terra) #suppression of null/NA values
terra.lexico.values <- rownames(as.table(corpuslist_DTM_lemma.freq.terra.values)) #only considers words that have more than zero occurrences
corpuslist_DTM_lemma.matrix [terra.lexico.values, ]
##### in how many books do the set occur?
corpuslist_DTM_lemma.freq.doc [terra.lexico.values]
#### set of terms associated with the "água" element (water)
agua.lexico <- scan("/Users/patriciareina/Desktop/FAT/LISTAS/Listas Helena 12-04/ARR_Agua.txt", what="character", sep="\n")
##### absolute frequency of the set
corpuslist_DTM_lemma.freq.agua <- corpuslist_DTM_lemma.freq [agua.lexico]
corpuslist_DTM_lemma.freq.agua
##### total of occurrences of the set in books
corpuslist_DTM_lemma.freq.agua.values <- na.omit(corpuslist_DTM_lemma.freq.agua)
agua.lexico.values <- rownames(as.table(corpuslist_DTM_lemma.freq.agua.values))
corpuslist_DTM_lemma.matrix [agua.lexico.values, ]
##### in how many books do the set occur?
corpuslist_DTM_lemma.freq.doc [agua.lexico.values]
#### set of terms associated with the "ar" element (air)
ar.lexico <- scan("/Users/patriciareina/Desktop/FAT/LISTAS/Listas Helena 12-04/ARR_ar.txt", what="character", sep="\n")
##### absolute frequency of the set
corpuslist_DTM_lemma.freq.ar <- corpuslist_DTM_lemma.freq [ar.lexico]
corpuslist_DTM_lemma.freq.ar
##### total of occurrences of the set in books
corpuslist_DTM_lemma.freq.ar.values <- na.omit(corpuslist_DTM_lemma.freq.ar)
ar.lexico.values <- rownames(as.table(corpuslist_DTM_lemma.freq.ar.values))
corpuslist_DTM_lemma.matrix [ar.lexico.values, ]
##### in how many books do the set occur?
corpuslist_DTM_lemma.freq.doc [ar.lexico.values]
#### set of terms associated with the "fogo" element (fire)
fogo.lexico <- scan("/Users/patriciareina/Desktop/FAT/LISTAS/Listas Helena 12-04/ARR_fogo.txt", what="character", sep="\n")
##### absolute frequency of the set
corpuslist_DTM_lemma.freq.fogo <- corpuslist_DTM_lemma.freq [fogo.lexico]
corpuslist_DTM_lemma.freq.fogo
##### total of occurrences of the set in books
corpuslist_DTM_lemma.freq.fogo.values <- na.omit(corpuslist_DTM_lemma.freq.fogo)
fogo.lexico.values <- rownames(as.table(corpuslist_DTM_lemma.freq.fogo.values))
corpuslist_DTM_lemma.matrix [fogo.lexico.values, ]
##### in how many books do the set occur?
corpuslist_DTM_lemma.freq.doc [fogo.lexico.values]
### absolute frequency of all the associated terms (literal terms included), considering each element as a category
#### setting together the four elements (literal) terms and the their associated terms
total.terra <- c(corpuslist_DTM_lemma.freq.terra, corpuslist_DTM_lemma.freq ["terra"])
total.ar <- c(corpuslist_DTM_lemma.freq.ar, corpuslist_DTM_lemma.freq ["ar"])
total.agua <- c(corpuslist_DTM_lemma.freq.agua, corpuslist_DTM_lemma.freq ["água"])
total.fogo <- c(corpuslist_DTM_lemma.freq.fogo, corpuslist_DTM_lemma.freq ["fogo"])
#### suppression of null/NA values
total.terra.clean <- na.omit(total.terra)
total.ar.clean <- na.omit(total.ar)
total.agua.clean <- na.omit(total.agua)
total.fogo.clean <- na.omit(total.fogo)
#### summing up all the frequencies
total.terra.clean.sum <- sum(total.terra.clean)
total.ar.clean.sum <- sum(total.ar.clean)
total.agua.clean.sum <- sum(total.agua.clean)
total.fogo.clean.sum <- sum(total.fogo.clean)
#### comparing all the frequencies
quatro.elem.data <- data.frame(
terra = total.terra.clean.sum,
ar = total.ar.clean.sum,
água = total.agua.clean.sum,
fogo = total.fogo.clean.sum
)
quatro.elem.data
### data preparation for 'absolute frequency of all the...' [external output for RAWGraphs]
quatro.elem.data.stack <- stack(quatro.elem.data)
names(quatro.elem.data.stack) <- c("frequency","related terms")
quatro.elem.data.stack <- select(quatro.elem.data.stack, `related terms`, frequency)
write.csv(quatro.elem.data.stack, file = "quatro.elem.data.stack.csv", row.names = FALSE)
### relative frequency of all the associated terms (literal terms included), considering each element as a category
quatro.elem.data.per <- data.frame(
terra = total.terra.clean.sum/sum(corpuslist_DTM_lemma.freq)*100,
ar = total.ar.clean.sum/sum(corpuslist_DTM_lemma.freq)*100,
água = total.agua.clean.sum/sum(corpuslist_DTM_lemma.freq)*100,
fogo = total.fogo.clean.sum/sum(corpuslist_DTM_lemma.freq)*100
)
quatro.elem.data.per
#### uniforming percentage values
quatro.elem.data.per <- quatro.elem.data.per %>%
mutate(across(1:4, round, 2))
### data preparation for 'relative frequency of all the...' [external output for RAWGraphs]
quatro.elem.data.per.stack <- stack(quatro.elem.data.per)
names(quatro.elem.data.per.stack) <- c("relative frequency","related terms")
quatro.elem.data.per.stack <- select(quatro.elem.data.per.stack, `related terms`, `relative frequency`)
write.csv(quatro.elem.data.per.stack, file = "quatro.elem.data.per.stack.csv", row.names = FALSE)
### visualization optimization: putting together abs. and rel. freqs. in one viz
quatro.elem.abs.rel.data.stack <- cbind(quatro.elem.data.stack, quatro.elem.data.per.stack[2])
write.csv(quatro.elem.abs.rel.data.stack, file = "quatro.elem.abs.rel.data.stack.csv", row.names = FALSE)
### absolute frequency of all the associated terms (literal terms included), discriminating each term and specifying its category
#### terra
total.termos.ass.terra.stack <- stack(total.terra.clean)
total.termos.ass.terra.stack$element <- rep(c("terra"))
names(total.termos.ass.terra.stack) <- c("frequency","related terms","element")
#### ar
total.termos.ass.ar.stack <- stack(total.ar.clean)
total.termos.ass.ar.stack$element <- rep(c("ar"))
names(total.termos.ass.ar.stack) <- c("frequency","related terms","element")
#### água
total.termos.ass.agua.stack <- stack(total.agua.clean)
total.termos.ass.agua.stack$element <- rep(c("água"))
names(total.termos.ass.agua.stack) <- c("frequency","related terms","element")
#### fogo
total.termos.ass.fogo.stack <- stack(total.fogo.clean)
total.termos.ass.fogo.stack$element <- rep(c("fogo"))
names(total.termos.ass.fogo.stack) <- c("frequency","related terms","element")
### data preparation for 'absolute frequency of all the... discriminating each term...' [external output for RAWGraphs]
total.termos.ass.todos.elem.stack <- rbind(total.termos.ass.terra.stack, total.termos.ass.ar.stack, total.termos.ass.agua.stack, total.termos.ass.fogo.stack)
write.csv(total.termos.ass.todos.elem.stack, file = "total.termos.ass.todos.elem.stack.csv", row.names = FALSE)
### occurrences in each book of both associated terms and literal terms of the four elements
total.livros.terra <- c(corpuslist_DTM_lemma.freq.doc["terra"], corpuslist_DTM_lemma.freq.doc [terra.lexico.values])
total.livros.ar <- c(corpuslist_DTM_lemma.freq.doc["ar"], corpuslist_DTM_lemma.freq.doc [ar.lexico.values])
total.livros.agua <- c(corpuslist_DTM_lemma.freq.doc["água"], corpuslist_DTM_lemma.freq.doc [agua.lexico.values])
total.livros.fogo <- c(corpuslist_DTM_lemma.freq.doc["fogo"], corpuslist_DTM_lemma.freq.doc [fogo.lexico.values])
total.livros <- c(total.livros.terra, total.livros.ar, total.livros.agua, total.livros.fogo)
### data preparation for 'occurrences in each book of both associated...' [external output for RAWGraphs]
total.livros.quatro.elementos.relacionados.stack <- stack(total.livros)
names(total.livros.quatro.elementos.relacionados.stack) <- c("frequency","related terms")
total.livros.quatro.elementos.relacionados.stack <- select(total.livros.quatro.elementos.relacionados.stack, `related terms`, frequency)
total.livros.quatro.elementos.relacionados.stack <- na.omit(total.livros.quatro.elementos.relacionados.stack)
write.csv(total.livros.quatro.elementos.relacionados.stack, file = "total.livros.quatro.elementos.relacionados.stack.csv", row.names = FALSE)
### visualization optimization: identifying each associated term with one of the four elements as a category
#### terra
total.livros.terra.stack <- stack(total.livros.terra)
total.livros.terra.stack$element <- rep(c("terra"))
total.livros.terra.stack <- na.omit(total.livros.terra.stack)
names(total.livros.terra.stack) <- c("frequency","related terms","element")
#### ar
total.livros.ar.stack <- stack(total.livros.ar)
total.livros.ar.stack$element <- rep(c("ar"))
total.livros.ar.stack <- na.omit(total.livros.ar.stack)
names(total.livros.ar.stack) <- c("frequency","related terms","element")
#### água
total.livros.agua.stack <- stack(total.livros.agua)
total.livros.agua.stack$element <- rep(c("água"))
total.livros.agua.stack <- na.omit(total.livros.agua.stack)
names(total.livros.agua.stack) <- c("frequency","related terms","element")
#### fogo
total.livros.fogo.stack <- stack(total.livros.fogo)
total.livros.fogo.stack$element <- rep(c("fogo"))
total.livros.fogo.stack <- na.omit(total.livros.fogo.stack)
names(total.livros.fogo.stack) <- c("frequency","related terms","element")
#### merging data
total.livro.todos.elem.uni.rel.stack <- rbind(total.livros.terra.stack, total.livros.ar.stack, total.livros.agua.stack, total.livros.fogo.stack)
write.csv(total.livro.todos.elem.uni.rel.stack, file = "total.livro.todos.elem.uni.rel.stack.csv", row.names = FALSE)
### comparing all the four elements categories by their occurrences in each book
#### combining associated and literal terms
terra.rel.lexico.values <- c("terra", terra.lexico.values)
ar.rel.lexico.values <- c("ar", ar.lexico.values)
agua.rel.lexico.values <- c("água", agua.lexico.values)
fogo.rel.lexico.values <- c("fogo", fogo.lexico.values)
#### summing up occurrences in one column
colSums(corpuslist_DTM_lemma.matrix [terra.rel.lexico.values, ])
colSums(corpuslist_DTM_lemma.matrix [ar.rel.lexico.values, ])
colSums(corpuslist_DTM_lemma.matrix [agua.rel.lexico.values, ])
colSums(corpuslist_DTM_lemma.matrix [fogo.rel.lexico.values, ])
#### building data frame
quatro.elementos.rel.lexico.data <- data.frame(
terra = colSums(corpuslist_DTM_lemma.matrix [terra.rel.lexico.values, ]),
ar = colSums(corpuslist_DTM_lemma.matrix [ar.rel.lexico.values, ]),
água = colSums(corpuslist_DTM_lemma.matrix [agua.rel.lexico.values, ]),
fogo = colSums(corpuslist_DTM_lemma.matrix [fogo.rel.lexico.values, ])
)
### data preparation for 'comparing all the four elements categories...' [external output for RAWGraphs]
quatro.elementos.rel.lexico.data <- t(quatro.elementos.rel.lexico.data)
colnames(quatro.elementos.rel.lexico.data) <- c(nomes.livros.arr)
quatro.elementos.rel.lexico.data <- as.data.frame(quatro.elementos.rel.lexico.data)
quatro.elementos.rel.lexico.data.row <- rownames_to_column(quatro.elementos.rel.lexico.data, var="terms")
write.csv(quatro.elementos.rel.lexico.data.row, file="quatro.elementos.rel.lexico.data.row.csv", row.names = FALSE)