Code and Scripts

The code developed for text analysis is presented below, complete with comments and annotations. Alternatively, the scripts can be downloaded as R files from the following links:

Visão Panorâmica (Bird’s-Eye View)

Reinos Animal, Vegetal e Mineral (Kingdoms of Nature)

Quatro Elementos (The Four Elements)

The code version shared here in open access, intended for inspection and reuse, corresponds to that used to generate datasets-bundle-2 and its visualizations [release 1, June 2024].


SUMMARY

RSTUDIO SETTINGS

DATA PREPROCESSING

SETTING-DATA-ANALYSIS

BIRD’S EYE VIEW ANALYSIS

KINGDOMS OF NATURE ANALYSIS

FOUR ELEMENTS ANALYSIS


RSTUDIO SETTINGS

## List of required packages 

### tm - general use
### tidyverse (package collection) - general use
### stopwords - stopwords removal
### qdap - stopwords removal
### textstem - lemmatization process
### quanteda - lemmatization process

## Installing required packages

install.packages(c("tm",
                   "tidyverse", 
                   "stopwords",
                   "qdap",
                   "textstem",
                   "quanteda"))
 
## Loading installed packages

library(tm)       
library(tidyverse)
library(stopwords)
library(qdap)
library(textstem)
library(quanteda)

DATA PREPROCESSING

## 1st - Importing Data

### directory assignment
arrcorpus <- "/Users/patriciareina/Desktop/FAT/BASE DE DADOS/Obra Completa TXT/TXT-livros/OP-all"

### verifying the files
arrfiles <- list.files(path=arrcorpus)
arrfiles #files correctly ordered

### building corpus arrangement: [[ ]] file, [ ] verse (strings)
corpuslist <- paste(arrcorpus,"/", arrfiles, sep="")
corpuslist 
typeof(corpuslist) #character

corpus.list <- lapply(corpuslist, FUN=readLines)
corpus.list [[1]]
typeof(corpus.list) #list

## 2nd - Data cleaning

### converting into one string 
corpus.list.line <- lapply(corpus.list, FUN=paste, collapse=" ")
corpus.list.line [[78]]
typeof(corpus.list.line) #list

### de-captalizing the words
corpus.list.line.lower <- tolower(corpus.list.line)
corpus.list.line.lower [[78]]
typeof(corpus.list.line.lower) #character

### tokenizing with text/file separation
corpus.list.line.clean <- strsplit(corpus.list.line.lower, "\\W")
corpus.list.line.clean [[78]]
typeof(corpus.list.line.clean) #list
typeof(corpus.list.line.clean[[78]][985]) #character

## 3rd - Corpus 

### making a Simple Corpus: lists of vectors
corpus.as.list <- Corpus(VectorSource(as.vector(corpus.list.line.clean))) 
corpus.as.list 
typeof(arrcorpus) #character
typeof(corpus.as.list) #list
inspect(corpus.as.list[[2]]) 

## 4th - Removing stopwords 

### adapted list assignment (based on "stopwords-iso")
allstops_iso_alt <- scan("/Users/patriciareina/Desktop/FAT/LISTAS/stopwords/stopwords-pt_txt_alterado_pr_24-01-24.txt", what="character", sep="\n")
allstops_iso_alt

### stopwords removal
corpus.as.list  <- tm_map(corpus.as.list, removeWords, c(allstops_iso_alt))
inspect(corpus.as.list[[1]])
inspect(corpus.as.list[[48]])

## 5th - More Data Cleaning

### removing numbers
corpus.as.list  <- tm_map(corpus.as.list, content_transformer(removeNumbers))
inspect(corpus.as.list[[48]])

### removing punctuation
corpus.as.list <- tm_map(corpus.as.list , content_transformer(removePunctuation))
inspect(corpus.as.list[48])

### removing white spaces
corpus.as.list <- tm_map(corpus.as.list, content_transformer(stripWhitespace))
typeof(corpus.as.list[48]) #list

## 6th - Lemmatization

corpus.as.list.lemma <- corpus.as.list

### setting a list/dictionary to proceed lemmatization (based on "lemmatization-pt" from Global Glossary Project)
lemma_dic <- read.delim(file = "/Users/patriciareina/Desktop/FAT/LISTAS/lematização/lemmatization-pt_rev_06-06-24.txt", header = FALSE, stringsAsFactors = FALSE)
names(lemma_dic) <- c("lemma", "term")
lemma_dic[1:1000,]
typeof(lemma_dic) #list

### fixing duplicate terms, keeping only first occurrences 
lemma_dic_unique <- lemma_dic[!duplicated(lemma_dic$term),]

### list of control for removed duplicated terms [external output]
term_doc <- print(lemma_dic$term)
term_doc[duplicated(term_doc)]
options(max.print=99999)
capture.output(term_doc[duplicated(term_doc)], file = "duplicados_lemma_06-06-24.txt")
options(max.print=9999)

### reordering dictionary disposition for term-lemma instead of lemma-term
lemma_dic_unique_term_lemma <- select(lemma_dic_unique, term, lemma) 
lemma_dic_unique_term_lemma[1:10,]

### lemmatization
for (i in 1:length(corpus.as.list.lemma)) {corpus.as.list.lemma [[i]][[1]] <- lemmatize_strings(corpus.as.list.lemma [[i]][[1]], dictionary = lemma_dic_unique_term_lemma)} 

SETTING DATA ANALYSIS

corpus.as.list.lemma # still a Simple Corpus

### making a DocumentTermMatrix
corpuslist_DTM_lemma <- DocumentTermMatrix(corpus.as.list.lemma, control = list(wordLengths = c(2, Inf))) 
inspect(corpuslist_DTM_lemma)

### word frequency in corpus, publication division
corpuslist_DTM_lemma.inv <- t(corpuslist_DTM_lemma)
corpuslist_DTM_lemma.matrix <- as.matrix(corpuslist_DTM_lemma.inv)

### finding in how many books each word in the corpus occurs
corpuslist_DTM_lemma.freq.doc <- colSums(as.matrix(corpuslist_DTM_lemma)>0) #alphabetical order
corpuslist_DTM_lemma.freq.doc.order <- corpuslist_DTM_lemma.freq.doc[order(corpuslist_DTM_lemma.freq.doc, decreasing = TRUE)] #sort by decreasing order

BIRD’S EYE VIEW ANALYSIS

### setting directory for the external outputs
getwd()
setwd("/Users/patriciareina/Desktop/FAT/RAWGRAPHS")

### the 10 more frequent words in corpus [absolute frequency]
corpuslist_DTM_lemma.freq.order [1:10]
### data preparation for 'the 10 more frequent words in corpus' viz [external output for RAWGraphs]
corpuslist_DTM_lemma.freq.order.stack <- stack(corpuslist_DTM_lemma.freq.order)
corpuslist_DTM_lemma.freq.order.stack.df <- data.frame(corpuslist_DTM_lemma.freq.order.stack) #building a data frame
corpuslist_DTM_lemma.freq.order.stack.top10words <- stack(corpuslist_DTM_lemma.freq.order [1:10])
names(corpuslist_DTM_lemma.freq.order.stack.top10words) <- c("frequency", "terms")
corpuslist_DTM_lemma.freq.order.stack.top10words
write.csv(corpuslist_DTM_lemma.freq.order.stack.top10words, file = "corpuslist_DTM_lemma.freq.order.stack.top10words.csv", row.names = FALSE)

### the 10 more frequent words in each book [absolute frequency] 
corpuslist_DTM_lemma.top10words.df <- data.frame(corpuslist_DTM_lemma.freq.order [1:10]) 
top10words <- rownames(corpuslist_DTM_lemma.top10words.df)
corpuslist_DTM_lemma.matrix [top10words,] 
### data preparation for 'the 10 more frequent words in each book' viz [external output for RAWGraphs]
corpuslist_DTM_lemma.matrix.top10words.df <- data.frame(corpuslist_DTM_lemma.matrix [top10words,]) #adjusting hierarchies
nomes.livros.arr <- scan("/Users/patriciareina/Desktop/FAT/LISTAS/nomes_obras_poeticas_arr.txt", what="character", sep="\n")
colnames(corpuslist_DTM_lemma.matrix.top10words.df) <- c(nomes.livros.arr) #the order of the books is replaced by their names
corpuslist_DTM_lemma.matrix.top10words.df #testing 
corpuslist_DTM_lemma.matrix.top10words.df.row <- rownames_to_column(corpuslist_DTM_lemma.matrix.top10words.df, var="terms")
write.csv(corpuslist_DTM_lemma.matrix.top10words.df.row, file="corpuslist_DTM_lemma.matrix.top10words.df.row.csv", row.names = FALSE)

### which words are most frequent in most books?
corpuslist_DTM_lemma.freq.doc.order [1:10] 
corpuslist_DTM_lemma.freq.doc.order [1:15] #testing if there are other words with 76 occurrences. YES, three more. 
### data preparation for 'which words are most frequent in most books?' viz [external output for RAWGraphs]
corpuslist_DTM_lemma.freq.order.doc.stack.top13doc <- stack(corpuslist_DTM_lemma.freq.doc.order [1:13])
names(corpuslist_DTM_lemma.freq.order.doc.stack.top13doc) <- c("total of books","terms")
corpuslist_DTM_lemma.freq.order.doc.stack.top13doc<- select(corpuslist_DTM_lemma.freq.order.doc.stack.top13doc, terms, `total of books`)
corpuslist_DTM_lemma.freq.order.doc.stack.top13doc
write.csv(corpuslist_DTM_lemma.freq.order.doc.stack.top13doc, file = "corpuslist_DTM_lemma.freq.order.doc.stack.csv.top13doc.csv", row.names = FALSE)

### how many times do these most frequent words occur in each of the 79 books?
corpuslist_DTM_lemma.top13doc.df <- data.frame(corpuslist_DTM_lemma.freq.doc.order [1:13])
top13docs <- rownames(corpuslist_DTM_lemma.top13doc.df)
corpuslist_DTM_lemma.matrix [top13docs,] 
### data preparation for 'how many times do these most...?' viz [external output for RAWGraphs]
corpuslist_DTM_lemma.matrix.top13doc.df <- data.frame(corpuslist_DTM_lemma.matrix [top13docs,]) 
colnames(corpuslist_DTM_lemma.matrix.top13doc.df) <- c(nomes.livros.arr) 
corpuslist_DTM_lemma.matrix.top13doc.df
corpuslist_DTM_lemma.matrix.top13doc.df.row <- rownames_to_column(corpuslist_DTM_lemma.matrix.top13doc.df, var="terms")
write.csv(corpuslist_DTM_lemma.matrix.top13doc.df.row, file="corpuslist_DTM_lemma.matrix.top13doc.df.row.csv", row.names = FALSE)

KINGDOMS OF NATURE ANALYSIS

### setting directory for external outputs
getwd()
setwd("/Users/patriciareina/Desktop/FAT/RAWGRAPHS")

## Literal terms  

### absolute frequency of the literal terms 
animal1 <- corpuslist_DTM_lemma.freq ["animal"] 
vegetal1 <- corpuslist_DTM_lemma.freq ["vegetal"]
mineral1 <- corpuslist_DTM_lemma.freq ["mineral"]
### data preparation for 'absolute frequency of the literal terms ' [external output for RAWGraphs]
#### building data frame
reino.geral.freq.data <- data.frame(
  animal = animal1, 
  vegetal = vegetal1,
  mineral = mineral1
)
reino.geral.freq.data.stack <- stack(reino.geral.freq.data)
#### improving data frame
names(reino.geral.freq.data.stack) <- c("frequency","terms")
reino.geral.freq.data.stack <- select(reino.geral.freq.data.stack, terms, frequency)
#### exporting data frame
write.csv(reino.geral.freq.data.stack, file = "reino.geral.freq.data.stack.csv", row.names = FALSE)

### relative frequency of the literal terms 
reino.geral.freq.rel.data <- data.frame(
  animal = animal1/sum(corpuslist_DTM_lemma.freq)*100, 
  vegetal = vegetal1/sum(corpuslist_DTM_lemma.freq)*100,
  mineral = mineral1/sum(corpuslist_DTM_lemma.freq)*100
)
### data preparation for 'relative frequency of the literal terms' [external output for RAWGraphs]
reino.geral.freq.rel.data <- reino.geral.freq.rel.data %>%
  mutate(across(1:3, round, 2))
reino.geral.freq.rel.data.stack <- stack(reino.geral.freq.rel.data)
names(reino.geral.freq.rel.data.stack) <- c("relative frequency","terms")
reino.geral.freq.rel.data.stack <- select(reino.geral.freq.rel.data.stack, terms, `relative frequency`)
write.csv(reino.geral.freq.rel.data.stack, file = "reino.geral.freq.rel.data.stack.csv", row.names = FALSE)

### visualization optimization: putting together abs. and rel. freqs. in one viz 
reino.geral.freq.abs.rel.data.stack <- cbind(reino.geral.freq.data.stack, reino.geral.freq.rel.data.stack[2])
write.csv(reino.geral.freq.abs.rel.data.stack, file = "reino.geral.freq.abs.rel.data.stack.csv", row.names = FALSE)

### in how many books do these literal terms occur?
corpuslist_DTM_lemma.freq.doc ["animal"] 
corpuslist_DTM_lemma.freq.doc ["vegetal"] 
corpuslist_DTM_lemma.freq.doc ["mineral"]

### in which books do these literal terms occur?
corpuslist_DTM_lemma.matrix ['animal',]
corpuslist_DTM_lemma.matrix ['vegetal',]
corpuslist_DTM_lemma.matrix ['mineral',]
### data preparation for 'in which books do these literal terms occur?' [external output for RAWGraphs]
reino.geral.freq.per.livro.data <- data.frame(
  animal = corpuslist_DTM_lemma.matrix ['animal',],
  vegetal = corpuslist_DTM_lemma.matrix ['vegetal',],
  mineral = corpuslist_DTM_lemma.matrix ['mineral',]
)
reino.geral.freq.per.livro.data <- t(reino.geral.freq.per.livro.data)
colnames(reino.geral.freq.per.livro.data) <- c(nomes.livros.arr) #the order of the books is replaced by their names
reino.geral.freq.per.livro.data <- as.data.frame(reino.geral.freq.per.livro.data)
reino.geral.freq.per.livro.data.row <- rownames_to_column(reino.geral.freq.per.livro.data, var="terms")
write.csv(reino.geral.freq.per.livro.data.row, file="reino.geral.freq.per.livro.data.row.csv", row.names = FALSE)

## Generic terms 

### retrieving data from lists - generic terms
#### animal
reino.animal.tipo.lexico <- scan("/Users/patriciareina/Desktop/FAT/LISTAS/Listas Bruno 12-04 /animais_tipo.txt", what="character", sep="\n")
reino.animal.tipo.lexico
##### absolute frequency of the set
corpuslist_DTM_lemma.freq.animal.tipo<- corpuslist_DTM_lemma.freq [reino.animal.tipo.lexico]
animal2 <- corpuslist_DTM_lemma.freq.animal.tipo
##### total of occurrences of the set in books 
corpuslist_DTM_lemma.freq.animal.tipo.values <- na.omit(corpuslist_DTM_lemma.freq.animal.tipo) #suppression of null/NA values
animal.tipo.lexico.values <- rownames(as.table(corpuslist_DTM_lemma.freq.animal.tipo.values)) #only considers words that have more than zero occurrences
corpuslist_DTM_lemma.matrix [animal.tipo.lexico.values, ]
#### vegetal
reino.vegetal.tipo.lexico <- scan("/Users/patriciareina/Desktop/FAT/LISTAS/Listas Bruno 12-04 /vegetais_tipo.txt", what="character", sep="\n")
reino.vegetal.tipo.lexico
##### absolute frequency of the set
corpuslist_DTM_lemma.freq.vegetal.tipo<- corpuslist_DTM_lemma.freq [reino.vegetal.tipo.lexico]
vegetal2 <- corpuslist_DTM_lemma.freq.vegetal.tipo
##### total of occurrences of the set in books 
corpuslist_DTM_lemma.freq.vegetal.tipo.values <- na.omit(corpuslist_DTM_lemma.freq.vegetal.tipo)
vegetal.tipo.lexico.values <- rownames(as.table(corpuslist_DTM_lemma.freq.vegetal.tipo.values)) 
corpuslist_DTM_lemma.matrix [vegetal.tipo.lexico.values, ]
#### mineral
reino.mineral.tipo.lexico <- scan("/Users/patriciareina/Desktop/FAT/LISTAS/Listas Bruno 12-04 /minerais_tipo.txt", what="character", sep="\n")
reino.mineral.tipo.lexico
##### absolute frequency of the set
corpuslist_DTM_lemma.freq.mineral.tipo<- corpuslist_DTM_lemma.freq [reino.mineral.tipo.lexico]
mineral2 <- corpuslist_DTM_lemma.freq.mineral.tipo
##### total of occurrences of the set in books 
corpuslist_DTM_lemma.freq.mineral.tipo.values <- na.omit(corpuslist_DTM_lemma.freq.mineral.tipo)
mineral.tipo.lexico.values <- rownames(as.table(corpuslist_DTM_lemma.freq.mineral.tipo.values)) 
corpuslist_DTM_lemma.matrix [mineral.tipo.lexico.values, ]

### absolute frequency of the generic terms 
reino.tipo <- c(reino.animal.tipo.lexico, reino.vegetal.tipo.lexico, reino.mineral.tipo.lexico)
corpuslist_DTM_lemma.freq [reino.tipo]
reino.tipo.freq.data <- data.frame(
  animal = sum(corpuslist_DTM_lemma.freq.animal.tipo.values), 
  vegetal = sum(corpuslist_DTM_lemma.freq.vegetal.tipo.values),
  mineral = sum(corpuslist_DTM_lemma.freq.mineral.tipo.values)
)
### data preparation for 'absolute frequency of the generic terms' viz [external output for RAWGraphs]
reino.tipo.freq.data.stack <- stack(reino.tipo.freq.data)
reino.tipo.freq.data.stack <- na.omit(reino.tipo.freq.data.stack)
names(reino.tipo.freq.data.stack) <- c("frequency","general terms")
reino.tipo.freq.data.stack <- select(reino.tipo.freq.data.stack, `general terms`, frequency)
write.csv(reino.tipo.freq.data.stack, file = "reino.tipo.freq.data.stack.csv", row.names = FALSE)

# relative frequency of the generic terms
reino.tipo.freq.rel.data <- data.frame(
  animal = sum(corpuslist_DTM_lemma.freq.animal.tipo.values)/sum(corpuslist_DTM_lemma.freq)*100, 
  vegetal = sum(corpuslist_DTM_lemma.freq.vegetal.tipo.values)/sum(corpuslist_DTM_lemma.freq)*100,
  mineral = sum(corpuslist_DTM_lemma.freq.mineral.tipo.values)/sum(corpuslist_DTM_lemma.freq)*100
)
### data preparation for 'relative frequency of the generic terms' viz [external output for RAWGraphs]
reino.tipo.freq.rel.data <- reino.tipo.freq.rel.data %>%
  mutate(across(1:3, round, 2))
reino.tipo.freq.rel.data.stack <- stack(reino.tipo.freq.rel.data)
names(reino.tipo.freq.rel.data.stack) <- c("relative frequency","general terms")
reino.tipo.freq.rel.data.stack <- select(reino.tipo.freq.rel.data.stack, `general terms`, `relative frequency`)
write.csv(reino.tipo.freq.rel.data.stack, file = "reino.tipo.freq.rel.data.stack.csv", row.names = FALSE)

### visualization optimization: putting together abs. and rel. freqs. in one viz 
reino.tipo.freq.abs.rel.data.stack <- cbind(reino.tipo.freq.data.stack, reino.tipo.freq.rel.data.stack[2])
write.csv(reino.tipo.freq.abs.rel.data.stack, file = "reino.tipo.freq.abs.rel.data.stack.csv", row.names = FALSE)

### visualization optimization: adding the related kingdom for each term 
#### animal
animal2.stack <- stack(animal2)
animal2.stack$kingdom <- rep(c("animal"))
animal2.stack <- na.omit(animal2.stack)
names(animal2.stack) <- c("frequency","terms","kingdom")
#### vegetal
vegetal2.stack <- stack(vegetal2)
vegetal2.stack$kingdom <- rep(c("vegetal"))
vegetal2.stack <- na.omit(vegetal2.stack)
names(vegetal2.stack) <- c("frequency","terms","kingdom")
#### mineral
mineral2.stack <- stack(mineral2)
mineral2.stack$kingdom <- rep(c("mineral"))
mineral2.stack <- na.omit(mineral2.stack)
names(mineral2.stack) <- c("frequency","terms","kingdom")
#### merging the data, in order
reino.tipo.anim.veg.min.stack <- rbind(animal2.stack, vegetal2.stack, mineral2.stack) 
write.csv(reino.tipo.anim.veg.min.stack, file = "reino.tipo.anim.veg.min.stack.csv", row.names = FALSE)

### occurrences of generic terms per book 
#### total of occurrences in just one column in matrix 
colSums(corpuslist_DTM_lemma.matrix [animal.tipo.lexico.values, ])
colSums(corpuslist_DTM_lemma.matrix [vegetal.tipo.lexico.values, ])
colSums(corpuslist_DTM_lemma.matrix [mineral.tipo.lexico.values, ])
#### building data frame
reino.tipo.freq.per.livro.data <- data.frame(
  animal = colSums(corpuslist_DTM_lemma.matrix [animal.tipo.lexico.values, ]),
  vegetal = colSums(corpuslist_DTM_lemma.matrix [vegetal.tipo.lexico.values, ]),
  mineral = colSums(corpuslist_DTM_lemma.matrix [mineral.tipo.lexico.values, ])
)
### data preparation for 'occurrences of generic terms per book' viz [external output for RAWGraphs]
reino.tipo.freq.per.livro.data <- t(reino.tipo.freq.per.livro.data)
colnames(reino.tipo.freq.per.livro.data) <- c(nomes.livros.arr) 
reino.tipo.freq.per.livro.data <- as.data.frame(reino.tipo.freq.per.livro.data)
reino.tipo.freq.per.livro.data.row <- rownames_to_column(reino.tipo.freq.per.livro.data, var="reinos")
write.csv(reino.tipo.freq.per.livro.data.row, file="reino.tipo.freq.per.livro.data.row.csv", row.names = FALSE)

## Specific terms

### retrieving data from lists - specific terms
#### animal
reino.animal.especie.lexico <- scan("/Users/patriciareina/Desktop/FAT/LISTAS/Listas Bruno 12-04 /animais_especie.txt", what="character", sep="\n")
reino.animal.especie.lexico
##### absolute frequency of the set
corpuslist_DTM_lemma.freq.animal.especie<- corpuslist_DTM_lemma.freq [reino.animal.especie.lexico]
animal3 <- corpuslist_DTM_lemma.freq.animal.especie
##### total of occurrences of the set in books 
corpuslist_DTM_lemma.freq.animal.especie.values <- na.omit(corpuslist_DTM_lemma.freq.animal.especie) 
animal.especie.lexico.values <- rownames(as.table(corpuslist_DTM_lemma.freq.animal.especie.values)) 
corpuslist_DTM_lemma.matrix [animal.especie.lexico.values, ]
#### vegetal
reino.vegetal.especie.lexico <- scan("/Users/patriciareina/Desktop/FAT/LISTAS/Listas Bruno 12-04 /vegetais_especie.txt", what="character", sep="\n")
reino.vegetal.especie.lexico
##### absolute frequency of the set
corpuslist_DTM_lemma.freq.vegetal.especie <- corpuslist_DTM_lemma.freq [reino.vegetal.especie.lexico]
vegetal3 <- corpuslist_DTM_lemma.freq.vegetal.especie
##### total of occurrences of the set in books 
corpuslist_DTM_lemma.freq.vegetal.especie.values <- na.omit(corpuslist_DTM_lemma.freq.vegetal.especie) 
vegetal.especie.lexico.values <- rownames(as.table(corpuslist_DTM_lemma.freq.vegetal.especie.values)) 
corpuslist_DTM_lemma.matrix [vegetal.especie.lexico.values, ]
#### mineral
reino.mineral.especie.lexico <- scan("/Users/patriciareina/Desktop/FAT/LISTAS/Listas Bruno 12-04 /minerais_especie.txt", what="character", sep="\n")
reino.mineral.especie.lexico
##### absolute frequency of the set
corpuslist_DTM_lemma.freq.mineral.especie<- corpuslist_DTM_lemma.freq [reino.mineral.especie.lexico]
mineral3 <- corpuslist_DTM_lemma.freq.mineral.especie
##### total of occurrences of the set in books 
corpuslist_DTM_lemma.freq.mineral.especie.values <- na.omit(corpuslist_DTM_lemma.freq.mineral.especie) 
mineral.especie.lexico.values <- rownames(as.table(corpuslist_DTM_lemma.freq.mineral.especie.values)) 
corpuslist_DTM_lemma.matrix [mineral.especie.lexico.values, ]

### absolute frequency of specific terms per book
reino.especie <- c(reino.animal.especie.lexico, reino.vegetal.especie.lexico, reino.mineral.especie.lexico)
corpuslist_DTM_lemma.freq [reino.especie]
reino.especie.freq.data <- data.frame(
  animal = sum(corpuslist_DTM_lemma.freq.animal.especie.values), 
  vegetal = sum(corpuslist_DTM_lemma.freq.vegetal.especie.values),
  mineral = sum(corpuslist_DTM_lemma.freq.mineral.especie.values)
)
### data preparation for 'absolute frequency of specific terms per book' [external output for RAWGraphs]
reino.especie.freq.data.stack <- stack(reino.especie.freq.data)
reino.especie.freq.data.stack <- na.omit(reino.especie.freq.data.stack)
names(reino.especie.freq.data.stack) <- c("frequency","specific terms")
reino.especie.freq.data.stack <- select(reino.especie.freq.data.stack, `specific terms`, frequency)
write.csv(reino.especie.freq.data.stack, file = "reino.especie.freq.data.stack.csv", row.names = FALSE)

### relative frequency of specific terms per book
reino.especie.freq.rel.data <- data.frame(
  animal = sum(corpuslist_DTM_lemma.freq.animal.especie.values)/sum(corpuslist_DTM_lemma.freq)*100, 
  vegetal = sum(corpuslist_DTM_lemma.freq.vegetal.especie.values)/sum(corpuslist_DTM_lemma.freq)*100,
  mineral = sum(corpuslist_DTM_lemma.freq.mineral.especie.values)/sum(corpuslist_DTM_lemma.freq)*100
)
### data preparation for 'relative frequency of specific terms per book' [external output for RAWGraphs]
reino.especie.freq.rel.data <- reino.especie.freq.rel.data %>%
  mutate(across(1:3, round, 2))
reino.especie.freq.rel.data.stack <- stack(reino.especie.freq.rel.data)
names(reino.especie.freq.rel.data.stack) <- c("relative frequency","specific terms")
reino.especie.freq.rel.data.stack <- select(reino.especie.freq.rel.data.stack, `specific terms`, `relative frequency`)
write.csv(reino.especie.freq.rel.data.stack, file = "reino.especie.freq.rel.data.stack.csv", row.names = FALSE)

### visualization optimization: putting together abs. and rel. freqs. in one viz 
reino.especie.freq.abs.rel.data.stack <- cbind(reino.especie.freq.data.stack, reino.especie.freq.rel.data.stack[2])
write.csv(reino.especie.freq.abs.rel.data.stack, file = "reino.especie.freq.abs.rel.data.stack.csv", row.names = FALSE)

### visualization optimization: adding the related kingdom for each term 
#### animal
animal3.stack <- stack(animal3)
animal3.stack$kingdom <- rep(c("animal"))
animal3.stack <- na.omit(animal3.stack)
names(animal3.stack) <- c("frequency","terms","kingdom")
#### vegetal
vegetal3.stack <- stack(vegetal3)
vegetal3.stack$kingdom <- rep(c("vegetal"))
vegetal3.stack <- na.omit(vegetal3.stack)
names(vegetal3.stack) <- c("frequency","terms","kingdom")
#### mineral
mineral3.stack <- stack(mineral3)
mineral3.stack$kingdom <- rep(c("mineral"))
mineral3.stack <- na.omit(mineral3.stack)
names(mineral3.stack) <- c("frequency","terms","kingdom")
#### merging the data, in order
reino.especie.anim.veg.min.stack <- rbind(animal3.stack, vegetal3.stack, mineral3.stack) 
write.csv(reino.especie.anim.veg.min.stack, file = "reino.especie.anim.veg.min.stack.csv", row.names = FALSE)

### occurrences of specific terms per book 
#### total of occurrences in just one column in matrix 
colSums(corpuslist_DTM_lemma.matrix [animal.especie.lexico.values, ])
colSums(corpuslist_DTM_lemma.matrix [vegetal.especie.lexico.values, ])
colSums(corpuslist_DTM_lemma.matrix [mineral.especie.lexico.values, ])
#### building data frame
reino.especie.freq.per.livro.data <- data.frame(
  animal = colSums(corpuslist_DTM_lemma.matrix [animal.especie.lexico.values, ]),
  vegetal = colSums(corpuslist_DTM_lemma.matrix [vegetal.especie.lexico.values, ]),
  mineral = colSums(corpuslist_DTM_lemma.matrix [mineral.especie.lexico.values, ])
)
### data preparation for 'occurrences of specific terms per book' [external output for RAWGraphs]
reino.especie.freq.per.livro.data <- t(reino.especie.freq.per.livro.data)
colnames(reino.especie.freq.per.livro.data) <- c(nomes.livros.arr) 
reino.especie.freq.per.livro.data <- as.data.frame(reino.especie.freq.per.livro.data)
reino.especie.freq.per.livro.data.row <- rownames_to_column(reino.especie.freq.per.livro.data, var="reinos")
write.csv(reino.especie.freq.per.livro.data.row, file="reino.especie.freq.per.livro.data.row.csv", row.names = FALSE)

## All together: literal, generic and specific terms

### absolute frequency all the terms
#### animal
animal.total <- c(animal1, animal2, animal3)
animal.total.clean <- na.omit(animal.total)
animal.total.sum <- sum(animal.total.clean)
#### vegetal
vegetal.total <- c(vegetal1, vegetal2, vegetal3)
vegetal.total.clean <- na.omit(vegetal.total)
vegetal.total.sum <- sum(vegetal.total.clean)
#### mineral
mineral.total <- c(mineral1, mineral2, mineral3)
mineral.total.clean <- na.omit(mineral.total)
mineral.total.sum <- sum(mineral.total.clean)
#### building data frame
reino.total.freq.data <- data.frame(
  animal = animal.total.sum,
  vegetal = vegetal.total.sum,
  mineral = mineral.total.sum
)
### data preparation for 'absolute frequency all the terms' [external output for RAWGraphs]
reino.total.freq.data.stack <- stack(reino.total.freq.data)
reino.total.freq.data.stack <- na.omit(reino.total.freq.data.stack)
names(reino.total.freq.data.stack) <- c("frequency","total terms")
reino.total.freq.data.stack <- select(reino.total.freq.data.stack, `total terms`, frequency)
write.csv(reino.total.freq.data.stack, file = "reino.total.freq.data.stack.csv", row.names = FALSE)

### relative frequency all the terms
reino.total.freq.rel.data <- data.frame(
  animal = animal.total.sum/sum(corpuslist_DTM_lemma.freq)*100,
  vegetal = vegetal.total.sum/sum(corpuslist_DTM_lemma.freq)*100,
  mineral = mineral.total.sum/sum(corpuslist_DTM_lemma.freq)*100
)
### data preparation for 'relative frequency all the terms' [external output for RAWGraphs]
reino.total.freq.rel.data <- reino.total.freq.rel.data %>%
  mutate(across(1:3, round, 2))
reino.total.freq.rel.data.stack <- stack(reino.total.freq.rel.data)
names(reino.total.freq.rel.data.stack) <- c("relative frequency","total terms")
reino.total.freq.rel.data.stack <- select(reino.total.freq.rel.data.stack, `total terms`, `relative frequency`)
write.csv(reino.total.freq.rel.data.stack, file = "reino.total.freq.rel.data.stack.csv", row.names = FALSE)

### visualization optimization: putting together abs. and rel. freqs. in one viz 
reino.total.freq.abs.rel.data.stack <- cbind(reino.total.freq.data.stack, reino.total.freq.rel.data.stack[2])
write.csv(reino.total.freq.abs.rel.data.stack, file = "reino.total.freq.abs.rel.data.stack.csv", row.names = FALSE)

### occurrences per books all the terms
#### merging the groups 
animal.total.lexico.values <- c("animal", animal.tipo.lexico.values, animal.especie.lexico.values)
vegetal.total.lexico.values <- c("vegetal", vegetal.tipo.lexico.values, vegetal.especie.lexico.values)
mineral.total.lexico.values <- c("mineral", mineral.tipo.lexico.values, mineral.especie.lexico.values)
#### total of occurrences in just one column in matrix 
colSums(corpuslist_DTM_lemma.matrix [animal.total.lexico.values, ])
colSums(corpuslist_DTM_lemma.matrix [vegetal.total.lexico.values, ])
colSums(corpuslist_DTM_lemma.matrix [mineral.total.lexico.values, ])
#### building data frame
reino.total.freq.per.livro.data <- data.frame(
  animal = colSums(corpuslist_DTM_lemma.matrix [animal.total.lexico.values, ]),
  vegetal = colSums(corpuslist_DTM_lemma.matrix [vegetal.total.lexico.values, ]),
  mineral = colSums(corpuslist_DTM_lemma.matrix [mineral.total.lexico.values, ])
)
### data preparation for 'occurrences per books all the terms' [external output for RAWGraphs]
reino.total.freq.per.livro.data <- t(reino.total.freq.per.livro.data)
colnames(reino.total.freq.per.livro.data) <- c(nomes.livros.arr) 
reino.total.freq.per.livro.data <- as.data.frame(reino.total.freq.per.livro.data)
reino.total.freq.per.livro.data.row <- rownames_to_column(reino.total.freq.per.livro.data, var="reinos")
write.csv(reino.total.freq.per.livro.data.row, file="reino.total.freq.per.livro.data.row.csv", row.names = FALSE)

FOUR ELEMENTS ANALYSIS

### setting directory for external outputs
getwd()
setwd("/Users/patriciareina/Desktop/FAT/RAWGRAPHS")

## The four elements

### absolute frequency of the four elements
corpuslist_DTM_lemma.freq ["terra"]
corpuslist_DTM_lemma.freq ["água"]
corpuslist_DTM_lemma.freq ["ar"]
corpuslist_DTM_lemma.freq ["fogo"]
#### making sets 
quatro.elementos <- c("terra", "água", "ar", "fogo")
corpuslist_DTM_lemma.freq [quatro.elementos]
###  data preparation for 'absolute frequency of the four elements' [external output for RAWGraphs]
corpuslist_DTM_lemma.freq.quatro.elementos.stack <- stack(corpuslist_DTM_lemma.freq [quatro.elementos])
names(corpuslist_DTM_lemma.freq.quatro.elementos.stack) <- c("frequency","terms")
corpuslist_DTM_lemma.freq.quatro.elementos.stack <- select(corpuslist_DTM_lemma.freq.quatro.elementos.stack, terms, frequency)
write.csv(corpuslist_DTM_lemma.freq.quatro.elementos.stack, file="corpuslist_DTM_lemma.freq.quatro.elementos.stack.csv", row.names = FALSE)

### relative frequency of the four elements 
quatro.elementos.uni.data <- data.frame(
  terra = corpuslist_DTM_lemma.freq ["terra"]*100/ sum(corpuslist_DTM_lemma.freq),
  água = corpuslist_DTM_lemma.freq ["água"]*100/ sum(corpuslist_DTM_lemma.freq),
  ar = corpuslist_DTM_lemma.freq ["ar"]*100/ sum(corpuslist_DTM_lemma.freq),
  fogo = corpuslist_DTM_lemma.freq ["fogo"]*100/ sum(corpuslist_DTM_lemma.freq)
)
sum(corpuslist_DTM_lemma.freq) #total of words
#### uniforming percentage values
quatro.elementos.uni.data <- quatro.elementos.uni.data %>%
  mutate(across(1:4, round, 2))
### data preparation for 'relative frequency of the four elements' [external output for RAWGraphs]
quatro.elementos.uni.data.stack <- stack(quatro.elementos.uni.data)
names(quatro.elementos.uni.data.stack) <- c("relative frequency","terms")
quatro.elementos.uni.data.stack <- select(quatro.elementos.uni.data.stack, terms,`relative frequency`)
write.csv(quatro.elementos.uni.data.stack, file = "quatro.elementos.uni.data.stack.csv", row.names = FALSE)

### visualization optimization: putting together abs. and rel. freqs. in one viz 
quatro.elementos.freq.abs.rel.stack <- cbind(corpuslist_DTM_lemma.freq.quatro.elementos.stack, quatro.elementos.uni.data.stack[2])
write.csv(quatro.elementos.freq.abs.rel.stack, file = "quatro.elementos.freq.abs.rel.stack.csv", row.names = FALSE)

###  in how many books do these four elements' terms occur?
corpuslist_DTM_lemma.freq.doc [quatro.elementos]
### data preparation for 'in how many books...' [external output for RAWGraphs]
corpuslist_DTM_lemma.freq.doc.quatro.elem.stack <- stack(corpuslist_DTM_lemma.freq.doc [quatro.elementos])
names(corpuslist_DTM_lemma.freq.doc.quatro.elem.stack) <- c("number of books","terms")
corpuslist_DTM_lemma.freq.doc.quatro.elem.stack <- select(corpuslist_DTM_lemma.freq.doc.quatro.elem.stack, terms, `number of books`)
write.csv(corpuslist_DTM_lemma.freq.doc.quatro.elem.stack, file="corpuslist_DTM_lemma.freq.doc.quatro.elem.stack.csv", row.names = FALSE)

### in which books do these four elements' terms occur?
corpuslist_DTM_lemma.matrix [quatro.elementos,] 
### data preparation for 'in which books do these...' [external output for RAWGraphs]
corpuslist_DTM_lemma.matrix.quatro.elem.df <- data.frame(corpuslist_DTM_lemma.matrix [quatro.elementos,]) #adjusting hierarchies
colnames(corpuslist_DTM_lemma.matrix.quatro.elem.df) <- c(nomes.livros.arr)  #the order of the books is replaced by their names
corpuslist_DTM_lemma.matrix.quatro.elem.df
corpuslist_DTM_lemma.matrix.quatro.elem.df.row <- rownames_to_column(corpuslist_DTM_lemma.matrix.quatro.elem.df, var="terms")
write.csv(corpuslist_DTM_lemma.matrix.quatro.elem.df.row, file="corpuslist_DTM_lemma.matrix.quatro.elem.df.row.csv", row.names = FALSE)

## Terms associated with the four elements  

### retrieving data from lists - associated terms
#### set of terms associated with the "terra" element (earth)
terra.lexico <- scan("/Users/patriciareina/Desktop/FAT/LISTAS/Listas Helena 12-04/ARR_terra.txt", what="character", sep="\n")
##### absolute frequency of the set
corpuslist_DTM_lemma.freq.terra <- corpuslist_DTM_lemma.freq [terra.lexico]
corpuslist_DTM_lemma.freq.terra
##### total of occurrences of the set in books  
corpuslist_DTM_lemma.freq.terra.values <- na.omit(corpuslist_DTM_lemma.freq.terra) #suppression of null/NA values
terra.lexico.values <- rownames(as.table(corpuslist_DTM_lemma.freq.terra.values)) #only considers words that have more than zero occurrences
corpuslist_DTM_lemma.matrix [terra.lexico.values, ]
##### in how many books do the set occur?
corpuslist_DTM_lemma.freq.doc [terra.lexico.values]
#### set of terms associated with the "água" element (water)
agua.lexico <- scan("/Users/patriciareina/Desktop/FAT/LISTAS/Listas Helena 12-04/ARR_Agua.txt", what="character", sep="\n")
##### absolute frequency of the set
corpuslist_DTM_lemma.freq.agua <- corpuslist_DTM_lemma.freq [agua.lexico]
corpuslist_DTM_lemma.freq.agua
##### total of occurrences of the set in books  
corpuslist_DTM_lemma.freq.agua.values <- na.omit(corpuslist_DTM_lemma.freq.agua) 
agua.lexico.values <- rownames(as.table(corpuslist_DTM_lemma.freq.agua.values)) 
corpuslist_DTM_lemma.matrix [agua.lexico.values, ]
##### in how many books do the set occur?
corpuslist_DTM_lemma.freq.doc [agua.lexico.values]
#### set of terms associated with the "ar" element (air)
ar.lexico <- scan("/Users/patriciareina/Desktop/FAT/LISTAS/Listas Helena 12-04/ARR_ar.txt", what="character", sep="\n")
##### absolute frequency of the set
corpuslist_DTM_lemma.freq.ar <- corpuslist_DTM_lemma.freq [ar.lexico]
corpuslist_DTM_lemma.freq.ar
##### total of occurrences of the set in books  
corpuslist_DTM_lemma.freq.ar.values <- na.omit(corpuslist_DTM_lemma.freq.ar) 
ar.lexico.values <- rownames(as.table(corpuslist_DTM_lemma.freq.ar.values)) 
corpuslist_DTM_lemma.matrix [ar.lexico.values, ]
##### in how many books do the set occur?
corpuslist_DTM_lemma.freq.doc [ar.lexico.values]
#### set of terms associated with the "fogo" element (fire)
fogo.lexico <- scan("/Users/patriciareina/Desktop/FAT/LISTAS/Listas Helena 12-04/ARR_fogo.txt", what="character", sep="\n")
##### absolute frequency of the set
corpuslist_DTM_lemma.freq.fogo <- corpuslist_DTM_lemma.freq [fogo.lexico]
corpuslist_DTM_lemma.freq.fogo
##### total of occurrences of the set in books  
corpuslist_DTM_lemma.freq.fogo.values <- na.omit(corpuslist_DTM_lemma.freq.fogo) 
fogo.lexico.values <- rownames(as.table(corpuslist_DTM_lemma.freq.fogo.values))
corpuslist_DTM_lemma.matrix [fogo.lexico.values, ]
##### in how many books do the set occur?
corpuslist_DTM_lemma.freq.doc [fogo.lexico.values]

### absolute frequency of all the associated terms (literal terms included), considering each element as a category
#### setting together the four elements (literal) terms and the their associated terms
total.terra <- c(corpuslist_DTM_lemma.freq.terra, corpuslist_DTM_lemma.freq ["terra"])
total.ar <- c(corpuslist_DTM_lemma.freq.ar, corpuslist_DTM_lemma.freq ["ar"])
total.agua <- c(corpuslist_DTM_lemma.freq.agua, corpuslist_DTM_lemma.freq ["água"])
total.fogo <- c(corpuslist_DTM_lemma.freq.fogo, corpuslist_DTM_lemma.freq ["fogo"])
#### suppression of null/NA values
total.terra.clean <- na.omit(total.terra)
total.ar.clean <- na.omit(total.ar)
total.agua.clean <- na.omit(total.agua)
total.fogo.clean <- na.omit(total.fogo)
#### summing up all the frequencies
total.terra.clean.sum <- sum(total.terra.clean)
total.ar.clean.sum <- sum(total.ar.clean)
total.agua.clean.sum <- sum(total.agua.clean)
total.fogo.clean.sum <- sum(total.fogo.clean)
#### comparing all the frequencies
quatro.elem.data <- data.frame(
  terra = total.terra.clean.sum,
  ar = total.ar.clean.sum,
  água = total.agua.clean.sum,
  fogo = total.fogo.clean.sum
)
quatro.elem.data
### data preparation for 'absolute frequency of all the...' [external output for RAWGraphs]
quatro.elem.data.stack <- stack(quatro.elem.data)
names(quatro.elem.data.stack) <- c("frequency","related terms")
quatro.elem.data.stack <- select(quatro.elem.data.stack, `related terms`, frequency)
write.csv(quatro.elem.data.stack, file = "quatro.elem.data.stack.csv", row.names = FALSE)

### relative frequency of all the associated terms (literal terms included), considering each element as a category
quatro.elem.data.per <- data.frame(
  terra = total.terra.clean.sum/sum(corpuslist_DTM_lemma.freq)*100,
  ar = total.ar.clean.sum/sum(corpuslist_DTM_lemma.freq)*100,
  água = total.agua.clean.sum/sum(corpuslist_DTM_lemma.freq)*100,
  fogo = total.fogo.clean.sum/sum(corpuslist_DTM_lemma.freq)*100
)
quatro.elem.data.per 
#### uniforming percentage values
quatro.elem.data.per <- quatro.elem.data.per %>%
  mutate(across(1:4, round, 2))
### data preparation for 'relative frequency of all the...' [external output for RAWGraphs]
quatro.elem.data.per.stack <- stack(quatro.elem.data.per)
names(quatro.elem.data.per.stack) <- c("relative frequency","related terms")
quatro.elem.data.per.stack <- select(quatro.elem.data.per.stack, `related terms`, `relative frequency`)
write.csv(quatro.elem.data.per.stack, file = "quatro.elem.data.per.stack.csv", row.names = FALSE)

### visualization optimization: putting together abs. and rel. freqs. in one viz 
quatro.elem.abs.rel.data.stack <- cbind(quatro.elem.data.stack, quatro.elem.data.per.stack[2])
write.csv(quatro.elem.abs.rel.data.stack, file = "quatro.elem.abs.rel.data.stack.csv", row.names = FALSE)

### absolute frequency of all the associated terms (literal terms included), discriminating each term and specifying its category
#### terra
total.termos.ass.terra.stack <- stack(total.terra.clean)
total.termos.ass.terra.stack$element <- rep(c("terra"))
names(total.termos.ass.terra.stack) <- c("frequency","related terms","element")
#### ar
total.termos.ass.ar.stack <- stack(total.ar.clean)
total.termos.ass.ar.stack$element <- rep(c("ar"))
names(total.termos.ass.ar.stack) <- c("frequency","related terms","element")
#### água
total.termos.ass.agua.stack <- stack(total.agua.clean)
total.termos.ass.agua.stack$element <- rep(c("água"))
names(total.termos.ass.agua.stack) <- c("frequency","related terms","element")
#### fogo
total.termos.ass.fogo.stack <- stack(total.fogo.clean)
total.termos.ass.fogo.stack$element <- rep(c("fogo"))
names(total.termos.ass.fogo.stack) <- c("frequency","related terms","element")
### data preparation for 'absolute frequency of all the... discriminating each term...' [external output for RAWGraphs]
total.termos.ass.todos.elem.stack <- rbind(total.termos.ass.terra.stack, total.termos.ass.ar.stack, total.termos.ass.agua.stack, total.termos.ass.fogo.stack) 
write.csv(total.termos.ass.todos.elem.stack, file = "total.termos.ass.todos.elem.stack.csv", row.names = FALSE)

### occurrences in each book of both associated terms and literal terms of the four elements 
total.livros.terra <- c(corpuslist_DTM_lemma.freq.doc["terra"], corpuslist_DTM_lemma.freq.doc [terra.lexico.values])
total.livros.ar <- c(corpuslist_DTM_lemma.freq.doc["ar"], corpuslist_DTM_lemma.freq.doc [ar.lexico.values])
total.livros.agua <- c(corpuslist_DTM_lemma.freq.doc["água"], corpuslist_DTM_lemma.freq.doc [agua.lexico.values])
total.livros.fogo <- c(corpuslist_DTM_lemma.freq.doc["fogo"], corpuslist_DTM_lemma.freq.doc [fogo.lexico.values])
total.livros <- c(total.livros.terra, total.livros.ar, total.livros.agua, total.livros.fogo)
### data preparation for 'occurrences in each book of both associated...' [external output for RAWGraphs]
total.livros.quatro.elementos.relacionados.stack <- stack(total.livros)
names(total.livros.quatro.elementos.relacionados.stack) <- c("frequency","related terms")
total.livros.quatro.elementos.relacionados.stack <- select(total.livros.quatro.elementos.relacionados.stack, `related terms`, frequency)
total.livros.quatro.elementos.relacionados.stack <- na.omit(total.livros.quatro.elementos.relacionados.stack)
write.csv(total.livros.quatro.elementos.relacionados.stack, file = "total.livros.quatro.elementos.relacionados.stack.csv", row.names = FALSE)

### visualization optimization: identifying each associated term with one of the four elements as a category 
#### terra
total.livros.terra.stack <- stack(total.livros.terra)
total.livros.terra.stack$element <- rep(c("terra"))
total.livros.terra.stack <- na.omit(total.livros.terra.stack)
names(total.livros.terra.stack) <- c("frequency","related terms","element")
#### ar
total.livros.ar.stack <- stack(total.livros.ar)
total.livros.ar.stack$element <- rep(c("ar"))
total.livros.ar.stack <- na.omit(total.livros.ar.stack)
names(total.livros.ar.stack) <- c("frequency","related terms","element")
#### água
total.livros.agua.stack <- stack(total.livros.agua)
total.livros.agua.stack$element <- rep(c("água"))
total.livros.agua.stack <- na.omit(total.livros.agua.stack)
names(total.livros.agua.stack) <- c("frequency","related terms","element")
#### fogo
total.livros.fogo.stack <- stack(total.livros.fogo)
total.livros.fogo.stack$element <- rep(c("fogo"))
total.livros.fogo.stack <- na.omit(total.livros.fogo.stack)
names(total.livros.fogo.stack) <- c("frequency","related terms","element")
#### merging data
total.livro.todos.elem.uni.rel.stack <- rbind(total.livros.terra.stack, total.livros.ar.stack, total.livros.agua.stack, total.livros.fogo.stack) 
write.csv(total.livro.todos.elem.uni.rel.stack, file = "total.livro.todos.elem.uni.rel.stack.csv", row.names = FALSE)

### comparing all the four elements categories by their occurrences in each book 
#### combining associated and literal terms 
terra.rel.lexico.values <- c("terra", terra.lexico.values)
ar.rel.lexico.values <- c("ar", ar.lexico.values)
agua.rel.lexico.values <- c("água", agua.lexico.values)
fogo.rel.lexico.values <- c("fogo", fogo.lexico.values)
#### summing up occurrences in one column
colSums(corpuslist_DTM_lemma.matrix [terra.rel.lexico.values, ])
colSums(corpuslist_DTM_lemma.matrix [ar.rel.lexico.values, ])
colSums(corpuslist_DTM_lemma.matrix [agua.rel.lexico.values, ])
colSums(corpuslist_DTM_lemma.matrix [fogo.rel.lexico.values, ])
#### building data frame
quatro.elementos.rel.lexico.data <- data.frame(
  terra = colSums(corpuslist_DTM_lemma.matrix [terra.rel.lexico.values, ]),
  ar = colSums(corpuslist_DTM_lemma.matrix [ar.rel.lexico.values, ]),
  água = colSums(corpuslist_DTM_lemma.matrix [agua.rel.lexico.values, ]),
  fogo = colSums(corpuslist_DTM_lemma.matrix [fogo.rel.lexico.values, ])
)
### data preparation for 'comparing all the four elements categories...' [external output for RAWGraphs]
quatro.elementos.rel.lexico.data <- t(quatro.elementos.rel.lexico.data)
colnames(quatro.elementos.rel.lexico.data) <- c(nomes.livros.arr)
quatro.elementos.rel.lexico.data <- as.data.frame(quatro.elementos.rel.lexico.data)
quatro.elementos.rel.lexico.data.row <- rownames_to_column(quatro.elementos.rel.lexico.data, var="terms")
write.csv(quatro.elementos.rel.lexico.data.row, file="quatro.elementos.rel.lexico.data.row.csv", row.names = FALSE)