The code developed for text analysis is presented below, complete with comments and annotations. Alternatively, the scripts can be downloaded as R files from the following links:
↓ Visão Panorâmica (Bird’s-Eye View)
↓ Reinos Animal, Vegetal e Mineral (Kingdoms of Nature)
↓ Quatro Elementos (The Four Elements)
The code version shared here in open access, intended for inspection and reuse, corresponds to that used to generate datasets-bundle-2 and its visualizations [release 1, June 2024].
SUMMARY
RSTUDIO SETTINGS
## List of required packages ### tm - general use ### tidyverse (package collection) - general use ### stopwords - stopwords removal ### qdap - stopwords removal ### textstem - lemmatization process ### quanteda - lemmatization process ## Installing required packages install.packages(c("tm", "tidyverse", "stopwords", "qdap", "textstem", "quanteda")) ## Loading installed packages library(tm) library(tidyverse) library(stopwords) library(qdap) library(textstem) library(quanteda)
DATA PREPROCESSING
## 1st - Importing Data ### directory assignment arrcorpus <- "/Users/patriciareina/Desktop/FAT/BASE DE DADOS/Obra Completa TXT/TXT-livros/OP-all" ### verifying the files arrfiles <- list.files(path=arrcorpus) arrfiles #files correctly ordered ### building corpus arrangement: [[ ]] file, [ ] verse (strings) corpuslist <- paste(arrcorpus,"/", arrfiles, sep="") corpuslist typeof(corpuslist) #character corpus.list <- lapply(corpuslist, FUN=readLines) corpus.list [[1]] typeof(corpus.list) #list ## 2nd - Data cleaning ### converting into one string corpus.list.line <- lapply(corpus.list, FUN=paste, collapse=" ") corpus.list.line [[78]] typeof(corpus.list.line) #list ### de-captalizing the words corpus.list.line.lower <- tolower(corpus.list.line) corpus.list.line.lower [[78]] typeof(corpus.list.line.lower) #character ### tokenizing with text/file separation corpus.list.line.clean <- strsplit(corpus.list.line.lower, "\\W") corpus.list.line.clean [[78]] typeof(corpus.list.line.clean) #list typeof(corpus.list.line.clean[[78]][985]) #character ## 3rd - Corpus ### making a Simple Corpus: lists of vectors corpus.as.list <- Corpus(VectorSource(as.vector(corpus.list.line.clean))) corpus.as.list typeof(arrcorpus) #character typeof(corpus.as.list) #list inspect(corpus.as.list[[2]]) ## 4th - Removing stopwords ### adapted list assignment (based on "stopwords-iso") allstops_iso_alt <- scan("/Users/patriciareina/Desktop/FAT/LISTAS/stopwords/stopwords-pt_txt_alterado_pr_24-01-24.txt", what="character", sep="\n") allstops_iso_alt ### stopwords removal corpus.as.list <- tm_map(corpus.as.list, removeWords, c(allstops_iso_alt)) inspect(corpus.as.list[[1]]) inspect(corpus.as.list[[48]]) ## 5th - More Data Cleaning ### removing numbers corpus.as.list <- tm_map(corpus.as.list, content_transformer(removeNumbers)) inspect(corpus.as.list[[48]]) ### removing punctuation corpus.as.list <- tm_map(corpus.as.list , content_transformer(removePunctuation)) inspect(corpus.as.list[48]) ### removing white spaces corpus.as.list <- tm_map(corpus.as.list, content_transformer(stripWhitespace)) typeof(corpus.as.list[48]) #list ## 6th - Lemmatization corpus.as.list.lemma <- corpus.as.list ### setting a list/dictionary to proceed lemmatization (based on "lemmatization-pt" from Global Glossary Project) lemma_dic <- read.delim(file = "/Users/patriciareina/Desktop/FAT/LISTAS/lematização/lemmatization-pt_rev_06-06-24.txt", header = FALSE, stringsAsFactors = FALSE) names(lemma_dic) <- c("lemma", "term") lemma_dic[1:1000,] typeof(lemma_dic) #list ### fixing duplicate terms, keeping only first occurrences lemma_dic_unique <- lemma_dic[!duplicated(lemma_dic$term),] ### list of control for removed duplicated terms [external output] term_doc <- print(lemma_dic$term) term_doc[duplicated(term_doc)] options(max.print=99999) capture.output(term_doc[duplicated(term_doc)], file = "duplicados_lemma_06-06-24.txt") options(max.print=9999) ### reordering dictionary disposition for term-lemma instead of lemma-term lemma_dic_unique_term_lemma <- select(lemma_dic_unique, term, lemma) lemma_dic_unique_term_lemma[1:10,] ### lemmatization for (i in 1:length(corpus.as.list.lemma)) {corpus.as.list.lemma [[i]][[1]] <- lemmatize_strings(corpus.as.list.lemma [[i]][[1]], dictionary = lemma_dic_unique_term_lemma)}
SETTING DATA ANALYSIS
corpus.as.list.lemma # still a Simple Corpus ### making a DocumentTermMatrix corpuslist_DTM_lemma <- DocumentTermMatrix(corpus.as.list.lemma, control = list(wordLengths = c(2, Inf))) inspect(corpuslist_DTM_lemma) ### word frequency in corpus, publication division corpuslist_DTM_lemma.inv <- t(corpuslist_DTM_lemma) corpuslist_DTM_lemma.matrix <- as.matrix(corpuslist_DTM_lemma.inv) ### finding in how many books each word in the corpus occurs corpuslist_DTM_lemma.freq.doc <- colSums(as.matrix(corpuslist_DTM_lemma)>0) #alphabetical order corpuslist_DTM_lemma.freq.doc.order <- corpuslist_DTM_lemma.freq.doc[order(corpuslist_DTM_lemma.freq.doc, decreasing = TRUE)] #sort by decreasing order
BIRD’S EYE VIEW ANALYSIS
### setting directory for the external outputs getwd() setwd("/Users/patriciareina/Desktop/FAT/RAWGRAPHS") ### the 10 more frequent words in corpus [absolute frequency] corpuslist_DTM_lemma.freq.order [1:10] ### data preparation for 'the 10 more frequent words in corpus' viz [external output for RAWGraphs] corpuslist_DTM_lemma.freq.order.stack <- stack(corpuslist_DTM_lemma.freq.order) corpuslist_DTM_lemma.freq.order.stack.df <- data.frame(corpuslist_DTM_lemma.freq.order.stack) #building a data frame corpuslist_DTM_lemma.freq.order.stack.top10words <- stack(corpuslist_DTM_lemma.freq.order [1:10]) names(corpuslist_DTM_lemma.freq.order.stack.top10words) <- c("frequency", "terms") corpuslist_DTM_lemma.freq.order.stack.top10words write.csv(corpuslist_DTM_lemma.freq.order.stack.top10words, file = "corpuslist_DTM_lemma.freq.order.stack.top10words.csv", row.names = FALSE) ### the 10 more frequent words in each book [absolute frequency] corpuslist_DTM_lemma.top10words.df <- data.frame(corpuslist_DTM_lemma.freq.order [1:10]) top10words <- rownames(corpuslist_DTM_lemma.top10words.df) corpuslist_DTM_lemma.matrix [top10words,] ### data preparation for 'the 10 more frequent words in each book' viz [external output for RAWGraphs] corpuslist_DTM_lemma.matrix.top10words.df <- data.frame(corpuslist_DTM_lemma.matrix [top10words,]) #adjusting hierarchies nomes.livros.arr <- scan("/Users/patriciareina/Desktop/FAT/LISTAS/nomes_obras_poeticas_arr.txt", what="character", sep="\n") colnames(corpuslist_DTM_lemma.matrix.top10words.df) <- c(nomes.livros.arr) #the order of the books is replaced by their names corpuslist_DTM_lemma.matrix.top10words.df #testing corpuslist_DTM_lemma.matrix.top10words.df.row <- rownames_to_column(corpuslist_DTM_lemma.matrix.top10words.df, var="terms") write.csv(corpuslist_DTM_lemma.matrix.top10words.df.row, file="corpuslist_DTM_lemma.matrix.top10words.df.row.csv", row.names = FALSE) ### which words are most frequent in most books? corpuslist_DTM_lemma.freq.doc.order [1:10] corpuslist_DTM_lemma.freq.doc.order [1:15] #testing if there are other words with 76 occurrences. YES, three more. ### data preparation for 'which words are most frequent in most books?' viz [external output for RAWGraphs] corpuslist_DTM_lemma.freq.order.doc.stack.top13doc <- stack(corpuslist_DTM_lemma.freq.doc.order [1:13]) names(corpuslist_DTM_lemma.freq.order.doc.stack.top13doc) <- c("total of books","terms") corpuslist_DTM_lemma.freq.order.doc.stack.top13doc<- select(corpuslist_DTM_lemma.freq.order.doc.stack.top13doc, terms, `total of books`) corpuslist_DTM_lemma.freq.order.doc.stack.top13doc write.csv(corpuslist_DTM_lemma.freq.order.doc.stack.top13doc, file = "corpuslist_DTM_lemma.freq.order.doc.stack.csv.top13doc.csv", row.names = FALSE) ### how many times do these most frequent words occur in each of the 79 books? corpuslist_DTM_lemma.top13doc.df <- data.frame(corpuslist_DTM_lemma.freq.doc.order [1:13]) top13docs <- rownames(corpuslist_DTM_lemma.top13doc.df) corpuslist_DTM_lemma.matrix [top13docs,] ### data preparation for 'how many times do these most...?' viz [external output for RAWGraphs] corpuslist_DTM_lemma.matrix.top13doc.df <- data.frame(corpuslist_DTM_lemma.matrix [top13docs,]) colnames(corpuslist_DTM_lemma.matrix.top13doc.df) <- c(nomes.livros.arr) corpuslist_DTM_lemma.matrix.top13doc.df corpuslist_DTM_lemma.matrix.top13doc.df.row <- rownames_to_column(corpuslist_DTM_lemma.matrix.top13doc.df, var="terms") write.csv(corpuslist_DTM_lemma.matrix.top13doc.df.row, file="corpuslist_DTM_lemma.matrix.top13doc.df.row.csv", row.names = FALSE)
KINGDOMS OF NATURE ANALYSIS
### setting directory for external outputs getwd() setwd("/Users/patriciareina/Desktop/FAT/RAWGRAPHS") ## Literal terms ### absolute frequency of the literal terms animal1 <- corpuslist_DTM_lemma.freq ["animal"] vegetal1 <- corpuslist_DTM_lemma.freq ["vegetal"] mineral1 <- corpuslist_DTM_lemma.freq ["mineral"] ### data preparation for 'absolute frequency of the literal terms ' [external output for RAWGraphs] #### building data frame reino.geral.freq.data <- data.frame( animal = animal1, vegetal = vegetal1, mineral = mineral1 ) reino.geral.freq.data.stack <- stack(reino.geral.freq.data) #### improving data frame names(reino.geral.freq.data.stack) <- c("frequency","terms") reino.geral.freq.data.stack <- select(reino.geral.freq.data.stack, terms, frequency) #### exporting data frame write.csv(reino.geral.freq.data.stack, file = "reino.geral.freq.data.stack.csv", row.names = FALSE) ### relative frequency of the literal terms reino.geral.freq.rel.data <- data.frame( animal = animal1/sum(corpuslist_DTM_lemma.freq)*100, vegetal = vegetal1/sum(corpuslist_DTM_lemma.freq)*100, mineral = mineral1/sum(corpuslist_DTM_lemma.freq)*100 ) ### data preparation for 'relative frequency of the literal terms' [external output for RAWGraphs] reino.geral.freq.rel.data <- reino.geral.freq.rel.data %>% mutate(across(1:3, round, 2)) reino.geral.freq.rel.data.stack <- stack(reino.geral.freq.rel.data) names(reino.geral.freq.rel.data.stack) <- c("relative frequency","terms") reino.geral.freq.rel.data.stack <- select(reino.geral.freq.rel.data.stack, terms, `relative frequency`) write.csv(reino.geral.freq.rel.data.stack, file = "reino.geral.freq.rel.data.stack.csv", row.names = FALSE) ### visualization optimization: putting together abs. and rel. freqs. in one viz reino.geral.freq.abs.rel.data.stack <- cbind(reino.geral.freq.data.stack, reino.geral.freq.rel.data.stack[2]) write.csv(reino.geral.freq.abs.rel.data.stack, file = "reino.geral.freq.abs.rel.data.stack.csv", row.names = FALSE) ### in how many books do these literal terms occur? corpuslist_DTM_lemma.freq.doc ["animal"] corpuslist_DTM_lemma.freq.doc ["vegetal"] corpuslist_DTM_lemma.freq.doc ["mineral"] ### in which books do these literal terms occur? corpuslist_DTM_lemma.matrix ['animal',] corpuslist_DTM_lemma.matrix ['vegetal',] corpuslist_DTM_lemma.matrix ['mineral',] ### data preparation for 'in which books do these literal terms occur?' [external output for RAWGraphs] reino.geral.freq.per.livro.data <- data.frame( animal = corpuslist_DTM_lemma.matrix ['animal',], vegetal = corpuslist_DTM_lemma.matrix ['vegetal',], mineral = corpuslist_DTM_lemma.matrix ['mineral',] ) reino.geral.freq.per.livro.data <- t(reino.geral.freq.per.livro.data) colnames(reino.geral.freq.per.livro.data) <- c(nomes.livros.arr) #the order of the books is replaced by their names reino.geral.freq.per.livro.data <- as.data.frame(reino.geral.freq.per.livro.data) reino.geral.freq.per.livro.data.row <- rownames_to_column(reino.geral.freq.per.livro.data, var="terms") write.csv(reino.geral.freq.per.livro.data.row, file="reino.geral.freq.per.livro.data.row.csv", row.names = FALSE) ## Generic terms ### retrieving data from lists - generic terms #### animal reino.animal.tipo.lexico <- scan("/Users/patriciareina/Desktop/FAT/LISTAS/Listas Bruno 12-04 /animais_tipo.txt", what="character", sep="\n") reino.animal.tipo.lexico ##### absolute frequency of the set corpuslist_DTM_lemma.freq.animal.tipo<- corpuslist_DTM_lemma.freq [reino.animal.tipo.lexico] animal2 <- corpuslist_DTM_lemma.freq.animal.tipo ##### total of occurrences of the set in books corpuslist_DTM_lemma.freq.animal.tipo.values <- na.omit(corpuslist_DTM_lemma.freq.animal.tipo) #suppression of null/NA values animal.tipo.lexico.values <- rownames(as.table(corpuslist_DTM_lemma.freq.animal.tipo.values)) #only considers words that have more than zero occurrences corpuslist_DTM_lemma.matrix [animal.tipo.lexico.values, ] #### vegetal reino.vegetal.tipo.lexico <- scan("/Users/patriciareina/Desktop/FAT/LISTAS/Listas Bruno 12-04 /vegetais_tipo.txt", what="character", sep="\n") reino.vegetal.tipo.lexico ##### absolute frequency of the set corpuslist_DTM_lemma.freq.vegetal.tipo<- corpuslist_DTM_lemma.freq [reino.vegetal.tipo.lexico] vegetal2 <- corpuslist_DTM_lemma.freq.vegetal.tipo ##### total of occurrences of the set in books corpuslist_DTM_lemma.freq.vegetal.tipo.values <- na.omit(corpuslist_DTM_lemma.freq.vegetal.tipo) vegetal.tipo.lexico.values <- rownames(as.table(corpuslist_DTM_lemma.freq.vegetal.tipo.values)) corpuslist_DTM_lemma.matrix [vegetal.tipo.lexico.values, ] #### mineral reino.mineral.tipo.lexico <- scan("/Users/patriciareina/Desktop/FAT/LISTAS/Listas Bruno 12-04 /minerais_tipo.txt", what="character", sep="\n") reino.mineral.tipo.lexico ##### absolute frequency of the set corpuslist_DTM_lemma.freq.mineral.tipo<- corpuslist_DTM_lemma.freq [reino.mineral.tipo.lexico] mineral2 <- corpuslist_DTM_lemma.freq.mineral.tipo ##### total of occurrences of the set in books corpuslist_DTM_lemma.freq.mineral.tipo.values <- na.omit(corpuslist_DTM_lemma.freq.mineral.tipo) mineral.tipo.lexico.values <- rownames(as.table(corpuslist_DTM_lemma.freq.mineral.tipo.values)) corpuslist_DTM_lemma.matrix [mineral.tipo.lexico.values, ] ### absolute frequency of the generic terms reino.tipo <- c(reino.animal.tipo.lexico, reino.vegetal.tipo.lexico, reino.mineral.tipo.lexico) corpuslist_DTM_lemma.freq [reino.tipo] reino.tipo.freq.data <- data.frame( animal = sum(corpuslist_DTM_lemma.freq.animal.tipo.values), vegetal = sum(corpuslist_DTM_lemma.freq.vegetal.tipo.values), mineral = sum(corpuslist_DTM_lemma.freq.mineral.tipo.values) ) ### data preparation for 'absolute frequency of the generic terms' viz [external output for RAWGraphs] reino.tipo.freq.data.stack <- stack(reino.tipo.freq.data) reino.tipo.freq.data.stack <- na.omit(reino.tipo.freq.data.stack) names(reino.tipo.freq.data.stack) <- c("frequency","general terms") reino.tipo.freq.data.stack <- select(reino.tipo.freq.data.stack, `general terms`, frequency) write.csv(reino.tipo.freq.data.stack, file = "reino.tipo.freq.data.stack.csv", row.names = FALSE) # relative frequency of the generic terms reino.tipo.freq.rel.data <- data.frame( animal = sum(corpuslist_DTM_lemma.freq.animal.tipo.values)/sum(corpuslist_DTM_lemma.freq)*100, vegetal = sum(corpuslist_DTM_lemma.freq.vegetal.tipo.values)/sum(corpuslist_DTM_lemma.freq)*100, mineral = sum(corpuslist_DTM_lemma.freq.mineral.tipo.values)/sum(corpuslist_DTM_lemma.freq)*100 ) ### data preparation for 'relative frequency of the generic terms' viz [external output for RAWGraphs] reino.tipo.freq.rel.data <- reino.tipo.freq.rel.data %>% mutate(across(1:3, round, 2)) reino.tipo.freq.rel.data.stack <- stack(reino.tipo.freq.rel.data) names(reino.tipo.freq.rel.data.stack) <- c("relative frequency","general terms") reino.tipo.freq.rel.data.stack <- select(reino.tipo.freq.rel.data.stack, `general terms`, `relative frequency`) write.csv(reino.tipo.freq.rel.data.stack, file = "reino.tipo.freq.rel.data.stack.csv", row.names = FALSE) ### visualization optimization: putting together abs. and rel. freqs. in one viz reino.tipo.freq.abs.rel.data.stack <- cbind(reino.tipo.freq.data.stack, reino.tipo.freq.rel.data.stack[2]) write.csv(reino.tipo.freq.abs.rel.data.stack, file = "reino.tipo.freq.abs.rel.data.stack.csv", row.names = FALSE) ### visualization optimization: adding the related kingdom for each term #### animal animal2.stack <- stack(animal2) animal2.stack$kingdom <- rep(c("animal")) animal2.stack <- na.omit(animal2.stack) names(animal2.stack) <- c("frequency","terms","kingdom") #### vegetal vegetal2.stack <- stack(vegetal2) vegetal2.stack$kingdom <- rep(c("vegetal")) vegetal2.stack <- na.omit(vegetal2.stack) names(vegetal2.stack) <- c("frequency","terms","kingdom") #### mineral mineral2.stack <- stack(mineral2) mineral2.stack$kingdom <- rep(c("mineral")) mineral2.stack <- na.omit(mineral2.stack) names(mineral2.stack) <- c("frequency","terms","kingdom") #### merging the data, in order reino.tipo.anim.veg.min.stack <- rbind(animal2.stack, vegetal2.stack, mineral2.stack) write.csv(reino.tipo.anim.veg.min.stack, file = "reino.tipo.anim.veg.min.stack.csv", row.names = FALSE) ### occurrences of generic terms per book #### total of occurrences in just one column in matrix colSums(corpuslist_DTM_lemma.matrix [animal.tipo.lexico.values, ]) colSums(corpuslist_DTM_lemma.matrix [vegetal.tipo.lexico.values, ]) colSums(corpuslist_DTM_lemma.matrix [mineral.tipo.lexico.values, ]) #### building data frame reino.tipo.freq.per.livro.data <- data.frame( animal = colSums(corpuslist_DTM_lemma.matrix [animal.tipo.lexico.values, ]), vegetal = colSums(corpuslist_DTM_lemma.matrix [vegetal.tipo.lexico.values, ]), mineral = colSums(corpuslist_DTM_lemma.matrix [mineral.tipo.lexico.values, ]) ) ### data preparation for 'occurrences of generic terms per book' viz [external output for RAWGraphs] reino.tipo.freq.per.livro.data <- t(reino.tipo.freq.per.livro.data) colnames(reino.tipo.freq.per.livro.data) <- c(nomes.livros.arr) reino.tipo.freq.per.livro.data <- as.data.frame(reino.tipo.freq.per.livro.data) reino.tipo.freq.per.livro.data.row <- rownames_to_column(reino.tipo.freq.per.livro.data, var="reinos") write.csv(reino.tipo.freq.per.livro.data.row, file="reino.tipo.freq.per.livro.data.row.csv", row.names = FALSE) ## Specific terms ### retrieving data from lists - specific terms #### animal reino.animal.especie.lexico <- scan("/Users/patriciareina/Desktop/FAT/LISTAS/Listas Bruno 12-04 /animais_especie.txt", what="character", sep="\n") reino.animal.especie.lexico ##### absolute frequency of the set corpuslist_DTM_lemma.freq.animal.especie<- corpuslist_DTM_lemma.freq [reino.animal.especie.lexico] animal3 <- corpuslist_DTM_lemma.freq.animal.especie ##### total of occurrences of the set in books corpuslist_DTM_lemma.freq.animal.especie.values <- na.omit(corpuslist_DTM_lemma.freq.animal.especie) animal.especie.lexico.values <- rownames(as.table(corpuslist_DTM_lemma.freq.animal.especie.values)) corpuslist_DTM_lemma.matrix [animal.especie.lexico.values, ] #### vegetal reino.vegetal.especie.lexico <- scan("/Users/patriciareina/Desktop/FAT/LISTAS/Listas Bruno 12-04 /vegetais_especie.txt", what="character", sep="\n") reino.vegetal.especie.lexico ##### absolute frequency of the set corpuslist_DTM_lemma.freq.vegetal.especie <- corpuslist_DTM_lemma.freq [reino.vegetal.especie.lexico] vegetal3 <- corpuslist_DTM_lemma.freq.vegetal.especie ##### total of occurrences of the set in books corpuslist_DTM_lemma.freq.vegetal.especie.values <- na.omit(corpuslist_DTM_lemma.freq.vegetal.especie) vegetal.especie.lexico.values <- rownames(as.table(corpuslist_DTM_lemma.freq.vegetal.especie.values)) corpuslist_DTM_lemma.matrix [vegetal.especie.lexico.values, ] #### mineral reino.mineral.especie.lexico <- scan("/Users/patriciareina/Desktop/FAT/LISTAS/Listas Bruno 12-04 /minerais_especie.txt", what="character", sep="\n") reino.mineral.especie.lexico ##### absolute frequency of the set corpuslist_DTM_lemma.freq.mineral.especie<- corpuslist_DTM_lemma.freq [reino.mineral.especie.lexico] mineral3 <- corpuslist_DTM_lemma.freq.mineral.especie ##### total of occurrences of the set in books corpuslist_DTM_lemma.freq.mineral.especie.values <- na.omit(corpuslist_DTM_lemma.freq.mineral.especie) mineral.especie.lexico.values <- rownames(as.table(corpuslist_DTM_lemma.freq.mineral.especie.values)) corpuslist_DTM_lemma.matrix [mineral.especie.lexico.values, ] ### absolute frequency of specific terms per book reino.especie <- c(reino.animal.especie.lexico, reino.vegetal.especie.lexico, reino.mineral.especie.lexico) corpuslist_DTM_lemma.freq [reino.especie] reino.especie.freq.data <- data.frame( animal = sum(corpuslist_DTM_lemma.freq.animal.especie.values), vegetal = sum(corpuslist_DTM_lemma.freq.vegetal.especie.values), mineral = sum(corpuslist_DTM_lemma.freq.mineral.especie.values) ) ### data preparation for 'absolute frequency of specific terms per book' [external output for RAWGraphs] reino.especie.freq.data.stack <- stack(reino.especie.freq.data) reino.especie.freq.data.stack <- na.omit(reino.especie.freq.data.stack) names(reino.especie.freq.data.stack) <- c("frequency","specific terms") reino.especie.freq.data.stack <- select(reino.especie.freq.data.stack, `specific terms`, frequency) write.csv(reino.especie.freq.data.stack, file = "reino.especie.freq.data.stack.csv", row.names = FALSE) ### relative frequency of specific terms per book reino.especie.freq.rel.data <- data.frame( animal = sum(corpuslist_DTM_lemma.freq.animal.especie.values)/sum(corpuslist_DTM_lemma.freq)*100, vegetal = sum(corpuslist_DTM_lemma.freq.vegetal.especie.values)/sum(corpuslist_DTM_lemma.freq)*100, mineral = sum(corpuslist_DTM_lemma.freq.mineral.especie.values)/sum(corpuslist_DTM_lemma.freq)*100 ) ### data preparation for 'relative frequency of specific terms per book' [external output for RAWGraphs] reino.especie.freq.rel.data <- reino.especie.freq.rel.data %>% mutate(across(1:3, round, 2)) reino.especie.freq.rel.data.stack <- stack(reino.especie.freq.rel.data) names(reino.especie.freq.rel.data.stack) <- c("relative frequency","specific terms") reino.especie.freq.rel.data.stack <- select(reino.especie.freq.rel.data.stack, `specific terms`, `relative frequency`) write.csv(reino.especie.freq.rel.data.stack, file = "reino.especie.freq.rel.data.stack.csv", row.names = FALSE) ### visualization optimization: putting together abs. and rel. freqs. in one viz reino.especie.freq.abs.rel.data.stack <- cbind(reino.especie.freq.data.stack, reino.especie.freq.rel.data.stack[2]) write.csv(reino.especie.freq.abs.rel.data.stack, file = "reino.especie.freq.abs.rel.data.stack.csv", row.names = FALSE) ### visualization optimization: adding the related kingdom for each term #### animal animal3.stack <- stack(animal3) animal3.stack$kingdom <- rep(c("animal")) animal3.stack <- na.omit(animal3.stack) names(animal3.stack) <- c("frequency","terms","kingdom") #### vegetal vegetal3.stack <- stack(vegetal3) vegetal3.stack$kingdom <- rep(c("vegetal")) vegetal3.stack <- na.omit(vegetal3.stack) names(vegetal3.stack) <- c("frequency","terms","kingdom") #### mineral mineral3.stack <- stack(mineral3) mineral3.stack$kingdom <- rep(c("mineral")) mineral3.stack <- na.omit(mineral3.stack) names(mineral3.stack) <- c("frequency","terms","kingdom") #### merging the data, in order reino.especie.anim.veg.min.stack <- rbind(animal3.stack, vegetal3.stack, mineral3.stack) write.csv(reino.especie.anim.veg.min.stack, file = "reino.especie.anim.veg.min.stack.csv", row.names = FALSE) ### occurrences of specific terms per book #### total of occurrences in just one column in matrix colSums(corpuslist_DTM_lemma.matrix [animal.especie.lexico.values, ]) colSums(corpuslist_DTM_lemma.matrix [vegetal.especie.lexico.values, ]) colSums(corpuslist_DTM_lemma.matrix [mineral.especie.lexico.values, ]) #### building data frame reino.especie.freq.per.livro.data <- data.frame( animal = colSums(corpuslist_DTM_lemma.matrix [animal.especie.lexico.values, ]), vegetal = colSums(corpuslist_DTM_lemma.matrix [vegetal.especie.lexico.values, ]), mineral = colSums(corpuslist_DTM_lemma.matrix [mineral.especie.lexico.values, ]) ) ### data preparation for 'occurrences of specific terms per book' [external output for RAWGraphs] reino.especie.freq.per.livro.data <- t(reino.especie.freq.per.livro.data) colnames(reino.especie.freq.per.livro.data) <- c(nomes.livros.arr) reino.especie.freq.per.livro.data <- as.data.frame(reino.especie.freq.per.livro.data) reino.especie.freq.per.livro.data.row <- rownames_to_column(reino.especie.freq.per.livro.data, var="reinos") write.csv(reino.especie.freq.per.livro.data.row, file="reino.especie.freq.per.livro.data.row.csv", row.names = FALSE) ## All together: literal, generic and specific terms ### absolute frequency all the terms #### animal animal.total <- c(animal1, animal2, animal3) animal.total.clean <- na.omit(animal.total) animal.total.sum <- sum(animal.total.clean) #### vegetal vegetal.total <- c(vegetal1, vegetal2, vegetal3) vegetal.total.clean <- na.omit(vegetal.total) vegetal.total.sum <- sum(vegetal.total.clean) #### mineral mineral.total <- c(mineral1, mineral2, mineral3) mineral.total.clean <- na.omit(mineral.total) mineral.total.sum <- sum(mineral.total.clean) #### building data frame reino.total.freq.data <- data.frame( animal = animal.total.sum, vegetal = vegetal.total.sum, mineral = mineral.total.sum ) ### data preparation for 'absolute frequency all the terms' [external output for RAWGraphs] reino.total.freq.data.stack <- stack(reino.total.freq.data) reino.total.freq.data.stack <- na.omit(reino.total.freq.data.stack) names(reino.total.freq.data.stack) <- c("frequency","total terms") reino.total.freq.data.stack <- select(reino.total.freq.data.stack, `total terms`, frequency) write.csv(reino.total.freq.data.stack, file = "reino.total.freq.data.stack.csv", row.names = FALSE) ### relative frequency all the terms reino.total.freq.rel.data <- data.frame( animal = animal.total.sum/sum(corpuslist_DTM_lemma.freq)*100, vegetal = vegetal.total.sum/sum(corpuslist_DTM_lemma.freq)*100, mineral = mineral.total.sum/sum(corpuslist_DTM_lemma.freq)*100 ) ### data preparation for 'relative frequency all the terms' [external output for RAWGraphs] reino.total.freq.rel.data <- reino.total.freq.rel.data %>% mutate(across(1:3, round, 2)) reino.total.freq.rel.data.stack <- stack(reino.total.freq.rel.data) names(reino.total.freq.rel.data.stack) <- c("relative frequency","total terms") reino.total.freq.rel.data.stack <- select(reino.total.freq.rel.data.stack, `total terms`, `relative frequency`) write.csv(reino.total.freq.rel.data.stack, file = "reino.total.freq.rel.data.stack.csv", row.names = FALSE) ### visualization optimization: putting together abs. and rel. freqs. in one viz reino.total.freq.abs.rel.data.stack <- cbind(reino.total.freq.data.stack, reino.total.freq.rel.data.stack[2]) write.csv(reino.total.freq.abs.rel.data.stack, file = "reino.total.freq.abs.rel.data.stack.csv", row.names = FALSE) ### occurrences per books all the terms #### merging the groups animal.total.lexico.values <- c("animal", animal.tipo.lexico.values, animal.especie.lexico.values) vegetal.total.lexico.values <- c("vegetal", vegetal.tipo.lexico.values, vegetal.especie.lexico.values) mineral.total.lexico.values <- c("mineral", mineral.tipo.lexico.values, mineral.especie.lexico.values) #### total of occurrences in just one column in matrix colSums(corpuslist_DTM_lemma.matrix [animal.total.lexico.values, ]) colSums(corpuslist_DTM_lemma.matrix [vegetal.total.lexico.values, ]) colSums(corpuslist_DTM_lemma.matrix [mineral.total.lexico.values, ]) #### building data frame reino.total.freq.per.livro.data <- data.frame( animal = colSums(corpuslist_DTM_lemma.matrix [animal.total.lexico.values, ]), vegetal = colSums(corpuslist_DTM_lemma.matrix [vegetal.total.lexico.values, ]), mineral = colSums(corpuslist_DTM_lemma.matrix [mineral.total.lexico.values, ]) ) ### data preparation for 'occurrences per books all the terms' [external output for RAWGraphs] reino.total.freq.per.livro.data <- t(reino.total.freq.per.livro.data) colnames(reino.total.freq.per.livro.data) <- c(nomes.livros.arr) reino.total.freq.per.livro.data <- as.data.frame(reino.total.freq.per.livro.data) reino.total.freq.per.livro.data.row <- rownames_to_column(reino.total.freq.per.livro.data, var="reinos") write.csv(reino.total.freq.per.livro.data.row, file="reino.total.freq.per.livro.data.row.csv", row.names = FALSE)
FOUR ELEMENTS ANALYSIS
### setting directory for external outputs getwd() setwd("/Users/patriciareina/Desktop/FAT/RAWGRAPHS") ## The four elements ### absolute frequency of the four elements corpuslist_DTM_lemma.freq ["terra"] corpuslist_DTM_lemma.freq ["água"] corpuslist_DTM_lemma.freq ["ar"] corpuslist_DTM_lemma.freq ["fogo"] #### making sets quatro.elementos <- c("terra", "água", "ar", "fogo") corpuslist_DTM_lemma.freq [quatro.elementos] ### data preparation for 'absolute frequency of the four elements' [external output for RAWGraphs] corpuslist_DTM_lemma.freq.quatro.elementos.stack <- stack(corpuslist_DTM_lemma.freq [quatro.elementos]) names(corpuslist_DTM_lemma.freq.quatro.elementos.stack) <- c("frequency","terms") corpuslist_DTM_lemma.freq.quatro.elementos.stack <- select(corpuslist_DTM_lemma.freq.quatro.elementos.stack, terms, frequency) write.csv(corpuslist_DTM_lemma.freq.quatro.elementos.stack, file="corpuslist_DTM_lemma.freq.quatro.elementos.stack.csv", row.names = FALSE) ### relative frequency of the four elements quatro.elementos.uni.data <- data.frame( terra = corpuslist_DTM_lemma.freq ["terra"]*100/ sum(corpuslist_DTM_lemma.freq), água = corpuslist_DTM_lemma.freq ["água"]*100/ sum(corpuslist_DTM_lemma.freq), ar = corpuslist_DTM_lemma.freq ["ar"]*100/ sum(corpuslist_DTM_lemma.freq), fogo = corpuslist_DTM_lemma.freq ["fogo"]*100/ sum(corpuslist_DTM_lemma.freq) ) sum(corpuslist_DTM_lemma.freq) #total of words #### uniforming percentage values quatro.elementos.uni.data <- quatro.elementos.uni.data %>% mutate(across(1:4, round, 2)) ### data preparation for 'relative frequency of the four elements' [external output for RAWGraphs] quatro.elementos.uni.data.stack <- stack(quatro.elementos.uni.data) names(quatro.elementos.uni.data.stack) <- c("relative frequency","terms") quatro.elementos.uni.data.stack <- select(quatro.elementos.uni.data.stack, terms,`relative frequency`) write.csv(quatro.elementos.uni.data.stack, file = "quatro.elementos.uni.data.stack.csv", row.names = FALSE) ### visualization optimization: putting together abs. and rel. freqs. in one viz quatro.elementos.freq.abs.rel.stack <- cbind(corpuslist_DTM_lemma.freq.quatro.elementos.stack, quatro.elementos.uni.data.stack[2]) write.csv(quatro.elementos.freq.abs.rel.stack, file = "quatro.elementos.freq.abs.rel.stack.csv", row.names = FALSE) ### in how many books do these four elements' terms occur? corpuslist_DTM_lemma.freq.doc [quatro.elementos] ### data preparation for 'in how many books...' [external output for RAWGraphs] corpuslist_DTM_lemma.freq.doc.quatro.elem.stack <- stack(corpuslist_DTM_lemma.freq.doc [quatro.elementos]) names(corpuslist_DTM_lemma.freq.doc.quatro.elem.stack) <- c("number of books","terms") corpuslist_DTM_lemma.freq.doc.quatro.elem.stack <- select(corpuslist_DTM_lemma.freq.doc.quatro.elem.stack, terms, `number of books`) write.csv(corpuslist_DTM_lemma.freq.doc.quatro.elem.stack, file="corpuslist_DTM_lemma.freq.doc.quatro.elem.stack.csv", row.names = FALSE) ### in which books do these four elements' terms occur? corpuslist_DTM_lemma.matrix [quatro.elementos,] ### data preparation for 'in which books do these...' [external output for RAWGraphs] corpuslist_DTM_lemma.matrix.quatro.elem.df <- data.frame(corpuslist_DTM_lemma.matrix [quatro.elementos,]) #adjusting hierarchies colnames(corpuslist_DTM_lemma.matrix.quatro.elem.df) <- c(nomes.livros.arr) #the order of the books is replaced by their names corpuslist_DTM_lemma.matrix.quatro.elem.df corpuslist_DTM_lemma.matrix.quatro.elem.df.row <- rownames_to_column(corpuslist_DTM_lemma.matrix.quatro.elem.df, var="terms") write.csv(corpuslist_DTM_lemma.matrix.quatro.elem.df.row, file="corpuslist_DTM_lemma.matrix.quatro.elem.df.row.csv", row.names = FALSE) ## Terms associated with the four elements ### retrieving data from lists - associated terms #### set of terms associated with the "terra" element (earth) terra.lexico <- scan("/Users/patriciareina/Desktop/FAT/LISTAS/Listas Helena 12-04/ARR_terra.txt", what="character", sep="\n") ##### absolute frequency of the set corpuslist_DTM_lemma.freq.terra <- corpuslist_DTM_lemma.freq [terra.lexico] corpuslist_DTM_lemma.freq.terra ##### total of occurrences of the set in books corpuslist_DTM_lemma.freq.terra.values <- na.omit(corpuslist_DTM_lemma.freq.terra) #suppression of null/NA values terra.lexico.values <- rownames(as.table(corpuslist_DTM_lemma.freq.terra.values)) #only considers words that have more than zero occurrences corpuslist_DTM_lemma.matrix [terra.lexico.values, ] ##### in how many books do the set occur? corpuslist_DTM_lemma.freq.doc [terra.lexico.values] #### set of terms associated with the "água" element (water) agua.lexico <- scan("/Users/patriciareina/Desktop/FAT/LISTAS/Listas Helena 12-04/ARR_Agua.txt", what="character", sep="\n") ##### absolute frequency of the set corpuslist_DTM_lemma.freq.agua <- corpuslist_DTM_lemma.freq [agua.lexico] corpuslist_DTM_lemma.freq.agua ##### total of occurrences of the set in books corpuslist_DTM_lemma.freq.agua.values <- na.omit(corpuslist_DTM_lemma.freq.agua) agua.lexico.values <- rownames(as.table(corpuslist_DTM_lemma.freq.agua.values)) corpuslist_DTM_lemma.matrix [agua.lexico.values, ] ##### in how many books do the set occur? corpuslist_DTM_lemma.freq.doc [agua.lexico.values] #### set of terms associated with the "ar" element (air) ar.lexico <- scan("/Users/patriciareina/Desktop/FAT/LISTAS/Listas Helena 12-04/ARR_ar.txt", what="character", sep="\n") ##### absolute frequency of the set corpuslist_DTM_lemma.freq.ar <- corpuslist_DTM_lemma.freq [ar.lexico] corpuslist_DTM_lemma.freq.ar ##### total of occurrences of the set in books corpuslist_DTM_lemma.freq.ar.values <- na.omit(corpuslist_DTM_lemma.freq.ar) ar.lexico.values <- rownames(as.table(corpuslist_DTM_lemma.freq.ar.values)) corpuslist_DTM_lemma.matrix [ar.lexico.values, ] ##### in how many books do the set occur? corpuslist_DTM_lemma.freq.doc [ar.lexico.values] #### set of terms associated with the "fogo" element (fire) fogo.lexico <- scan("/Users/patriciareina/Desktop/FAT/LISTAS/Listas Helena 12-04/ARR_fogo.txt", what="character", sep="\n") ##### absolute frequency of the set corpuslist_DTM_lemma.freq.fogo <- corpuslist_DTM_lemma.freq [fogo.lexico] corpuslist_DTM_lemma.freq.fogo ##### total of occurrences of the set in books corpuslist_DTM_lemma.freq.fogo.values <- na.omit(corpuslist_DTM_lemma.freq.fogo) fogo.lexico.values <- rownames(as.table(corpuslist_DTM_lemma.freq.fogo.values)) corpuslist_DTM_lemma.matrix [fogo.lexico.values, ] ##### in how many books do the set occur? corpuslist_DTM_lemma.freq.doc [fogo.lexico.values] ### absolute frequency of all the associated terms (literal terms included), considering each element as a category #### setting together the four elements (literal) terms and the their associated terms total.terra <- c(corpuslist_DTM_lemma.freq.terra, corpuslist_DTM_lemma.freq ["terra"]) total.ar <- c(corpuslist_DTM_lemma.freq.ar, corpuslist_DTM_lemma.freq ["ar"]) total.agua <- c(corpuslist_DTM_lemma.freq.agua, corpuslist_DTM_lemma.freq ["água"]) total.fogo <- c(corpuslist_DTM_lemma.freq.fogo, corpuslist_DTM_lemma.freq ["fogo"]) #### suppression of null/NA values total.terra.clean <- na.omit(total.terra) total.ar.clean <- na.omit(total.ar) total.agua.clean <- na.omit(total.agua) total.fogo.clean <- na.omit(total.fogo) #### summing up all the frequencies total.terra.clean.sum <- sum(total.terra.clean) total.ar.clean.sum <- sum(total.ar.clean) total.agua.clean.sum <- sum(total.agua.clean) total.fogo.clean.sum <- sum(total.fogo.clean) #### comparing all the frequencies quatro.elem.data <- data.frame( terra = total.terra.clean.sum, ar = total.ar.clean.sum, água = total.agua.clean.sum, fogo = total.fogo.clean.sum ) quatro.elem.data ### data preparation for 'absolute frequency of all the...' [external output for RAWGraphs] quatro.elem.data.stack <- stack(quatro.elem.data) names(quatro.elem.data.stack) <- c("frequency","related terms") quatro.elem.data.stack <- select(quatro.elem.data.stack, `related terms`, frequency) write.csv(quatro.elem.data.stack, file = "quatro.elem.data.stack.csv", row.names = FALSE) ### relative frequency of all the associated terms (literal terms included), considering each element as a category quatro.elem.data.per <- data.frame( terra = total.terra.clean.sum/sum(corpuslist_DTM_lemma.freq)*100, ar = total.ar.clean.sum/sum(corpuslist_DTM_lemma.freq)*100, água = total.agua.clean.sum/sum(corpuslist_DTM_lemma.freq)*100, fogo = total.fogo.clean.sum/sum(corpuslist_DTM_lemma.freq)*100 ) quatro.elem.data.per #### uniforming percentage values quatro.elem.data.per <- quatro.elem.data.per %>% mutate(across(1:4, round, 2)) ### data preparation for 'relative frequency of all the...' [external output for RAWGraphs] quatro.elem.data.per.stack <- stack(quatro.elem.data.per) names(quatro.elem.data.per.stack) <- c("relative frequency","related terms") quatro.elem.data.per.stack <- select(quatro.elem.data.per.stack, `related terms`, `relative frequency`) write.csv(quatro.elem.data.per.stack, file = "quatro.elem.data.per.stack.csv", row.names = FALSE) ### visualization optimization: putting together abs. and rel. freqs. in one viz quatro.elem.abs.rel.data.stack <- cbind(quatro.elem.data.stack, quatro.elem.data.per.stack[2]) write.csv(quatro.elem.abs.rel.data.stack, file = "quatro.elem.abs.rel.data.stack.csv", row.names = FALSE) ### absolute frequency of all the associated terms (literal terms included), discriminating each term and specifying its category #### terra total.termos.ass.terra.stack <- stack(total.terra.clean) total.termos.ass.terra.stack$element <- rep(c("terra")) names(total.termos.ass.terra.stack) <- c("frequency","related terms","element") #### ar total.termos.ass.ar.stack <- stack(total.ar.clean) total.termos.ass.ar.stack$element <- rep(c("ar")) names(total.termos.ass.ar.stack) <- c("frequency","related terms","element") #### água total.termos.ass.agua.stack <- stack(total.agua.clean) total.termos.ass.agua.stack$element <- rep(c("água")) names(total.termos.ass.agua.stack) <- c("frequency","related terms","element") #### fogo total.termos.ass.fogo.stack <- stack(total.fogo.clean) total.termos.ass.fogo.stack$element <- rep(c("fogo")) names(total.termos.ass.fogo.stack) <- c("frequency","related terms","element") ### data preparation for 'absolute frequency of all the... discriminating each term...' [external output for RAWGraphs] total.termos.ass.todos.elem.stack <- rbind(total.termos.ass.terra.stack, total.termos.ass.ar.stack, total.termos.ass.agua.stack, total.termos.ass.fogo.stack) write.csv(total.termos.ass.todos.elem.stack, file = "total.termos.ass.todos.elem.stack.csv", row.names = FALSE) ### occurrences in each book of both associated terms and literal terms of the four elements total.livros.terra <- c(corpuslist_DTM_lemma.freq.doc["terra"], corpuslist_DTM_lemma.freq.doc [terra.lexico.values]) total.livros.ar <- c(corpuslist_DTM_lemma.freq.doc["ar"], corpuslist_DTM_lemma.freq.doc [ar.lexico.values]) total.livros.agua <- c(corpuslist_DTM_lemma.freq.doc["água"], corpuslist_DTM_lemma.freq.doc [agua.lexico.values]) total.livros.fogo <- c(corpuslist_DTM_lemma.freq.doc["fogo"], corpuslist_DTM_lemma.freq.doc [fogo.lexico.values]) total.livros <- c(total.livros.terra, total.livros.ar, total.livros.agua, total.livros.fogo) ### data preparation for 'occurrences in each book of both associated...' [external output for RAWGraphs] total.livros.quatro.elementos.relacionados.stack <- stack(total.livros) names(total.livros.quatro.elementos.relacionados.stack) <- c("frequency","related terms") total.livros.quatro.elementos.relacionados.stack <- select(total.livros.quatro.elementos.relacionados.stack, `related terms`, frequency) total.livros.quatro.elementos.relacionados.stack <- na.omit(total.livros.quatro.elementos.relacionados.stack) write.csv(total.livros.quatro.elementos.relacionados.stack, file = "total.livros.quatro.elementos.relacionados.stack.csv", row.names = FALSE) ### visualization optimization: identifying each associated term with one of the four elements as a category #### terra total.livros.terra.stack <- stack(total.livros.terra) total.livros.terra.stack$element <- rep(c("terra")) total.livros.terra.stack <- na.omit(total.livros.terra.stack) names(total.livros.terra.stack) <- c("frequency","related terms","element") #### ar total.livros.ar.stack <- stack(total.livros.ar) total.livros.ar.stack$element <- rep(c("ar")) total.livros.ar.stack <- na.omit(total.livros.ar.stack) names(total.livros.ar.stack) <- c("frequency","related terms","element") #### água total.livros.agua.stack <- stack(total.livros.agua) total.livros.agua.stack$element <- rep(c("água")) total.livros.agua.stack <- na.omit(total.livros.agua.stack) names(total.livros.agua.stack) <- c("frequency","related terms","element") #### fogo total.livros.fogo.stack <- stack(total.livros.fogo) total.livros.fogo.stack$element <- rep(c("fogo")) total.livros.fogo.stack <- na.omit(total.livros.fogo.stack) names(total.livros.fogo.stack) <- c("frequency","related terms","element") #### merging data total.livro.todos.elem.uni.rel.stack <- rbind(total.livros.terra.stack, total.livros.ar.stack, total.livros.agua.stack, total.livros.fogo.stack) write.csv(total.livro.todos.elem.uni.rel.stack, file = "total.livro.todos.elem.uni.rel.stack.csv", row.names = FALSE) ### comparing all the four elements categories by their occurrences in each book #### combining associated and literal terms terra.rel.lexico.values <- c("terra", terra.lexico.values) ar.rel.lexico.values <- c("ar", ar.lexico.values) agua.rel.lexico.values <- c("água", agua.lexico.values) fogo.rel.lexico.values <- c("fogo", fogo.lexico.values) #### summing up occurrences in one column colSums(corpuslist_DTM_lemma.matrix [terra.rel.lexico.values, ]) colSums(corpuslist_DTM_lemma.matrix [ar.rel.lexico.values, ]) colSums(corpuslist_DTM_lemma.matrix [agua.rel.lexico.values, ]) colSums(corpuslist_DTM_lemma.matrix [fogo.rel.lexico.values, ]) #### building data frame quatro.elementos.rel.lexico.data <- data.frame( terra = colSums(corpuslist_DTM_lemma.matrix [terra.rel.lexico.values, ]), ar = colSums(corpuslist_DTM_lemma.matrix [ar.rel.lexico.values, ]), água = colSums(corpuslist_DTM_lemma.matrix [agua.rel.lexico.values, ]), fogo = colSums(corpuslist_DTM_lemma.matrix [fogo.rel.lexico.values, ]) ) ### data preparation for 'comparing all the four elements categories...' [external output for RAWGraphs] quatro.elementos.rel.lexico.data <- t(quatro.elementos.rel.lexico.data) colnames(quatro.elementos.rel.lexico.data) <- c(nomes.livros.arr) quatro.elementos.rel.lexico.data <- as.data.frame(quatro.elementos.rel.lexico.data) quatro.elementos.rel.lexico.data.row <- rownames_to_column(quatro.elementos.rel.lexico.data, var="terms") write.csv(quatro.elementos.rel.lexico.data.row, file="quatro.elementos.rel.lexico.data.row.csv", row.names = FALSE)