103 lines
No EOL
2.5 KiB
Text
103 lines
No EOL
2.5 KiB
Text
---
|
|
title: "Analyse BD"
|
|
author: "François Pelletier"
|
|
date: "07/10/2019"
|
|
output: html_document
|
|
---
|
|
|
|
```{r setup, include=FALSE}
|
|
knitr::opts_chunk$set(echo = TRUE)
|
|
```
|
|
|
|
```{r}
|
|
library("sentometrics")
|
|
library("tidyverse")
|
|
library("plotly")
|
|
```
|
|
|
|
```{r}
|
|
core_features_corpus.RDS <- readRDS("core_features_corpus.RDS")
|
|
top_10_country <- readRDS("top_10_country.RDS")
|
|
top_10_sites <- readRDS("top_10_sites.RDS")
|
|
corpusSample <- quanteda::corpus_sample(core_features_corpus.RDS, size = 200)
|
|
```
|
|
|
|
# Définition des lexiques
|
|
|
|
```{r}
|
|
data("list_valence_shifters", package = "sentometrics")
|
|
data("list_lexicons", package = "sentometrics")
|
|
|
|
lexIn <- list_lexicons[c("FEEL_en_tr")]
|
|
valIn <- list_valence_shifters[["en"]]
|
|
|
|
l1 <- sento_lexicons(lexIn,valIn)
|
|
```
|
|
|
|
# Calcul des sentiments
|
|
|
|
```{r}
|
|
c_sentiments_sample <- compute_sentiment(x = corpusSample,
|
|
lexicons = l1,
|
|
how = "counts",
|
|
nCore = 8)
|
|
c_sentiments_sample
|
|
```
|
|
|
|
```{r}
|
|
c_control_compute <- ctr_agg(howWithin = "proportional",
|
|
howDocs = "equal_weight",
|
|
howTime = "equal_weight",
|
|
lag = 7,
|
|
by = "day")
|
|
|
|
c_sentiments <- sento_measures(sento_corpus = core_features_corpus.RDS,
|
|
lexicons = l1,
|
|
ctr = c_control_compute)
|
|
```
|
|
|
|
```{r}
|
|
c_measures <- as.data.table(c_sentiments)
|
|
```
|
|
|
|
```{r}
|
|
c_measures_g <- measures_global(c_sentiments)
|
|
```
|
|
|
|
# Sentiment par site
|
|
|
|
```{r}
|
|
c_measures_melt <- c_measures %>%
|
|
select(date,starts_with("FEEL_en_tr--site")) %>%
|
|
`colnames<-`(c("date",top_10_sites$site)) %>%
|
|
melt(id="date",variable.name = "site")
|
|
plot_site <- ggplot(data=c_measures_melt,
|
|
aes(x=date, y=value, colour=site))+
|
|
geom_line()
|
|
ggplotly(plot_site)
|
|
```
|
|
|
|
# Sentiment par pays
|
|
|
|
```{r}
|
|
c_measures_melt <- c_measures %>%
|
|
select(date,starts_with("FEEL_en_tr--country")) %>%
|
|
`colnames<-`(c("date",top_10_country$country)) %>%
|
|
melt(id="date",variable.name = "country")
|
|
plot_country <- ggplot(data=c_measures_melt,
|
|
aes(x=date, y=value, colour=country))+
|
|
geom_line()
|
|
ggplotly(plot_country)
|
|
```
|
|
|
|
# Sentiment par compteur d'entités
|
|
|
|
```{r}
|
|
c_measures_melt <- c_measures %>%
|
|
select(date,starts_with("FEEL_en_tr--entity")) %>%
|
|
melt(id="date",variable.name = "entity")
|
|
plot_entity <- ggplot(data=c_measures_melt,
|
|
aes(x=date, y=value, colour=entity))+
|
|
geom_line()
|
|
ggplotly(plot_entity)
|
|
``` |