mise a jour Rmd
This commit is contained in:
parent
97443e863b
commit
120a075b61
2 changed files with 108 additions and 3 deletions
103
Analyse_BD.Rmd
Normal file
103
Analyse_BD.Rmd
Normal file
|
@ -0,0 +1,103 @@
|
|||
---
|
||||
title: "Analyse BD"
|
||||
author: "François Pelletier"
|
||||
date: "07/10/2019"
|
||||
output: html_document
|
||||
---
|
||||
|
||||
```{r setup, include=FALSE}
|
||||
knitr::opts_chunk$set(echo = TRUE)
|
||||
```
|
||||
|
||||
```{r}
|
||||
library("sentometrics")
|
||||
library("tidyverse")
|
||||
library("plotly")
|
||||
```
|
||||
|
||||
```{r}
|
||||
core_features_corpus.RDS <- readRDS("core_features_corpus.RDS")
|
||||
top_10_country <- readRDS("top_10_country.RDS")
|
||||
top_10_sites <- readRDS("top_10_sites.RDS")
|
||||
corpusSample <- quanteda::corpus_sample(core_features_corpus.RDS, size = 200)
|
||||
```
|
||||
|
||||
# Définition des lexiques
|
||||
|
||||
```{r}
|
||||
data("list_valence_shifters", package = "sentometrics")
|
||||
data("list_lexicons", package = "sentometrics")
|
||||
|
||||
lexIn <- list_lexicons[c("FEEL_en_tr")]
|
||||
valIn <- list_valence_shifters[["en"]]
|
||||
|
||||
l1 <- sento_lexicons(lexIn,valIn)
|
||||
```
|
||||
|
||||
# Calcul des sentiments
|
||||
|
||||
```{r}
|
||||
c_sentiments_sample <- compute_sentiment(x = corpusSample,
|
||||
lexicons = l1,
|
||||
how = "counts",
|
||||
nCore = 8)
|
||||
c_sentiments_sample
|
||||
```
|
||||
|
||||
```{r}
|
||||
c_control_compute <- ctr_agg(howWithin = "proportional",
|
||||
howDocs = "equal_weight",
|
||||
howTime = "equal_weight",
|
||||
lag = 7,
|
||||
by = "day")
|
||||
|
||||
c_sentiments <- sento_measures(sento_corpus = core_features_corpus.RDS,
|
||||
lexicons = l1,
|
||||
ctr = c_control_compute)
|
||||
```
|
||||
|
||||
```{r}
|
||||
c_measures <- as.data.table(c_sentiments)
|
||||
```
|
||||
|
||||
```{r}
|
||||
c_measures_g <- measures_global(c_sentiments)
|
||||
```
|
||||
|
||||
# Sentiment par site
|
||||
|
||||
```{r}
|
||||
c_measures_melt <- c_measures %>%
|
||||
select(date,starts_with("FEEL_en_tr--site")) %>%
|
||||
`colnames<-`(c("date",top_10_sites$site)) %>%
|
||||
melt(id="date",variable.name = "site")
|
||||
plot_site <- ggplot(data=c_measures_melt,
|
||||
aes(x=date, y=value, colour=site))+
|
||||
geom_line()
|
||||
ggplotly(plot_site)
|
||||
```
|
||||
|
||||
# Sentiment par pays
|
||||
|
||||
```{r}
|
||||
c_measures_melt <- c_measures %>%
|
||||
select(date,starts_with("FEEL_en_tr--country")) %>%
|
||||
`colnames<-`(c("date",top_10_country$country)) %>%
|
||||
melt(id="date",variable.name = "country")
|
||||
plot_country <- ggplot(data=c_measures_melt,
|
||||
aes(x=date, y=value, colour=country))+
|
||||
geom_line()
|
||||
ggplotly(plot_country)
|
||||
```
|
||||
|
||||
# Sentiment par compteur d'entités
|
||||
|
||||
```{r}
|
||||
c_measures_melt <- c_measures %>%
|
||||
select(date,starts_with("FEEL_en_tr--entity")) %>%
|
||||
melt(id="date",variable.name = "entity")
|
||||
plot_entity <- ggplot(data=c_measures_melt,
|
||||
aes(x=date, y=value, colour=entity))+
|
||||
geom_line()
|
||||
ggplotly(plot_entity)
|
||||
```
|
|
@ -1,4 +1,4 @@
|
|||
---
|
||||
---
|
||||
title: "Formatage des données"
|
||||
author: "François Pelletier"
|
||||
date: "07/10/2019"
|
||||
|
@ -32,6 +32,7 @@ top_10_sites <- tbl(con,"core") %>%
|
|||
arrange(desc(n)) %>%
|
||||
head(10) %>%
|
||||
collect()
|
||||
saveRDS(top_10_sites,"top_10_sites.RDS")
|
||||
top_10_sites
|
||||
```
|
||||
|
||||
|
@ -44,6 +45,7 @@ top_10_country <- tbl(con,"core") %>%
|
|||
arrange(desc(n)) %>%
|
||||
head(10) %>%
|
||||
collect()
|
||||
saveRDS(top_10_country,"top_10_country.RDS")
|
||||
top_10_country
|
||||
```
|
||||
|
||||
|
@ -56,7 +58,7 @@ entities_count <- tbl(con,"entities") %>% group_by(uuid,entity_type) %>% count %
|
|||
```
|
||||
|
||||
```{r}
|
||||
entities_count_t <- entities_count %>% reshape2::dcast(uuid~entity_type,fun.aggregate = sum, value.var = "n")
|
||||
entities_count_t <- entities_count %>% reshape2::dcast(uuid~paste0("entity_",entity_type),fun.aggregate = sum, value.var = "n")
|
||||
entities_count_t %>% head(10) %>% glimpse()
|
||||
```
|
||||
|
||||
|
@ -73,7 +75,7 @@ core_features_corpus <- tbl(con,"core") %>% collect() %>%
|
|||
transmute(
|
||||
id=uuid,
|
||||
date=lubridate::as_datetime(published),
|
||||
texts=paste(title_full,text,sep = "\n") %>% substr(start = 1,stop = 1000),
|
||||
texts=paste(title_full,text,sep = "\n"),
|
||||
# Site
|
||||
site_01 = ifelse(site==top_10_sites$site[1],1,0),
|
||||
site_02 = ifelse(site==top_10_sites$site[2],1,0),
|
||||
|
|
Loading…
Reference in a new issue