diff --git a/.gitignore b/.gitignore index 758b29e..f536467 100644 --- a/.gitignore +++ b/.gitignore @@ -123,3 +123,5 @@ dmypy.json *.Rproj *.pdf +# fichiers latex +*.tex diff --git a/Analyse_Articles.ipynb b/Analyse_Articles.ipynb index d24f248..0a5775a 100644 --- a/Analyse_Articles.ipynb +++ b/Analyse_Articles.ipynb @@ -1,8 +1,19 @@ { "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Analyse des données pour le rapport\n", + "\n", + "## Lecture des fichiers de données et affichage d'un échantillon de données\n", + "\n", + "### Articles" + ] + }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -11,7 +22,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -22,16 +33,191 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " | media | \n", + "post_id | \n", + "text | \n", + "ner_dict | \n", + "pos_dict | \n", + "
---|---|---|---|---|---|
0 | \n", + "FIG | \n", + "5dc7ac7f359e2-10157143278136339 | \n", + "L'ancien international de football Vikash Dhor... | \n", + "{('Vikash', 'PERSON'): 2, ('Dhorasoo', 'PERSON... | \n", + "{('ancien', 'ADJ'): 3, ('international', 'NOUN... | \n", + "
1 | \n", + "FIG | \n", + "5dc7acd0d44b1-10157142962296339 | \n", + "Les personnes qui iront manifester dimanche 10... | \n", + "{('10', 'NUMBER'): 2, ('La', 'ORGANIZATION'): ... | \n", + "{('personnes', 'NOUN'): 2, ('iront', 'VERB'): ... | \n", + "
2 | \n", + "FIG | \n", + "5dc7adde8bd8e-10157142482251339 | \n", + "Selon Jason Farago, la Joconde prend le musée ... | \n", + "{('Jason', 'PERSON'): 8, ('Farago', 'PERSON'):... | \n", + "{('Jason', 'PROPN'): 8, ('Farago', 'PROPN'): 8... | \n", + "
3 | \n", + "FIG | \n", + "5dc7ab8df19a0-10157144491741339 | \n", + "We're just checking that you want to follow a ... | \n", + "{} | \n", + "{('We', 'PROPN'): 1, ('just', 'PROPN'): 1, ('c... | \n", + "
4 | \n", + "FIG | \n", + "5dc7ac188a6d6-10157143773291339 | \n", + "Les défections se sont enchaînées, et peu de p... | \n", + "{('Jean-Luc', 'PERSON'): 3, ('Mélenchon', 'PER... | \n", + "{('défections', 'NOUN'): 2, ('enchaînées', 'VE... | \n", + "
5 | \n", + "FIG | \n", + "5dc7ac51516dc-10157143472656339 | \n", + "We're just checking that you want to follow a ... | \n", + "{} | \n", + "{('We', 'PROPN'): 1, ('just', 'PROPN'): 1, ('c... | \n", + "
6 | \n", + "FIG | \n", + "5dc7ab9fe4530-10157144373586339 | \n", + "FIGAROVOX/TRIBUNE - Les derniers chiffres offi... | \n", + "{('Claude', 'PERSON'): 2, ('Goasguen', 'PERSON... | \n", + "{('FIGAROVOX', 'PROPN'): 1, ('TRIBUNE', 'NOUN'... | \n", + "
7 | \n", + "FIG | \n", + "5dc7ae3950eea-10157141592561339 | \n", + "La DGSI est chef de file de la lutte antiterro... | \n", + "{('France', 'LOCATION'): 1, ('1200', 'DATE'): ... | \n", + "{('DGSI', 'PROPN'): 2, ('est', 'VERB'): 2, ('c... | \n", + "
8 | \n", + "FIG | \n", + "5dc7ac9063012-10157143218116339 | \n", + "Le voyage en Chine est devenu en ce début de X... | \n", + "{('Chine', 'ORGANIZATION'): 1, ('New', 'LOCATI... | \n", + "{('voyage', 'NOUN'): 3, ('Chine', 'PROPN'): 1,... | \n", + "
9 | \n", + "FIG | \n", + "5dc7adf1bf8ff-10157142446816339 | \n", + "Les nouvelles habitudes de consommation font s... | \n", + "{('Carrefour', 'ORGANIZATION'): 2, ('Auchan', ... | \n", + "{('nouvelles', 'NOUN'): 1, ('habitudes', 'ADJ'... | \n", + "
\n", + " | comment_id | \n", + "nested_id | \n", + "name | \n", + "id | \n", + "date | \n", + "likes | \n", + "comment | \n", + "media | \n", + "post_id | \n", + "list_names | \n", + "auteurs_referes | \n", + "comment_clean | \n", + "ner_dict | \n", + "pos_dict | \n", + "emoji_dict | \n", + "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", + "1.0 | \n", + "0 | \n", + "Ycf Bullit | \n", + "ID: 100000615866313 | \n", + "2019-11-09 14:17:13 | \n", + "0 | \n", + "C'est une blague mdr 🤣🤣🤣🤣🤣 | \n", + "FIG | \n", + "5dc7ac7f359e2-10157143278136339 | \n", + "[Ycf Bullit] | \n", + "[] | \n", + "C'est une blague mdr 🤣🤣🤣🤣🤣 | \n", + "{} | \n", + "{('est', 'VERB'): 1, ('blague', 'NOUN'): 1, ('... | \n", + "{':rolling_on_the_floor_laughing:': [5, 6, 7]} | \n", + "
1 | \n", + "2.0 | \n", + "0 | \n", + "Steph Alcazar | \n", + "ID: 100001175077263 | \n", + "2019-11-09 14:17:34 | \n", + "0 | \n", + "La seule question c'est de savoir s'il fera pl... | \n", + "FIG | \n", + "5dc7ac7f359e2-10157143278136339 | \n", + "[Steph Alcazar] | \n", + "[] | \n", + "La seule question c'est de savoir s'il fera pl... | \n", + "{} | \n", + "{('seule', 'ADJ'): 1, ('question', 'NOUN'): 1,... | \n", + "{} | \n", + "
2 | \n", + "3.0 | \n", + "0 | \n", + "Töm Müstäine | \n", + "ID: 1365879404 | \n", + "2019-11-09 14:17:51 | \n", + "0 | \n", + "Romain Debrigode l info du jour qui fait plaise | \n", + "FIG | \n", + "5dc7ac7f359e2-10157143278136339 | \n", + "[Töm Müstäine] | \n", + "[] | \n", + "Romain Debrigode l info du jour qui fait plaise | \n", + "{('Romain', 'PERSON'): 1, ('Debrigode', 'PERSO... | \n", + "{('Romain', 'PROPN'): 1, ('Debrigode', 'PROPN'... | \n", + "{} | \n", + "
3 | \n", + "4.0 | \n", + "0 | \n", + "Pierre Crouzet | \n", + "ID: 100000270292007 | \n", + "2019-11-09 14:18:06 | \n", + "0 | \n", + "Vasanth Toure 😍 | \n", + "FIG | \n", + "5dc7ac7f359e2-10157143278136339 | \n", + "[Pierre Crouzet, Vasanth Toure] | \n", + "['Vasanth Toure'] | \n", + "😍 | \n", + "{} | \n", + "{} | \n", + "{} | \n", + "
4 | \n", + "4.0 | \n", + "1 | \n", + "Vasanth Toure | \n", + "ID: 100001494607801 | \n", + "2019-11-09 14:20:57 | \n", + "0 | \n", + "Pierre Crouzet Paris n'est pas prêt encore... | \n", + "FIG | \n", + "5dc7ac7f359e2-10157143278136339 | \n", + "[Pierre Crouzet, Vasanth Toure] | \n", + "['Pierre Crouzet'] | \n", + "Paris n'est pas prêt encore... | \n", + "{('Paris', 'LOCATION'): 1} | \n", + "{('Paris', 'PROPN'): 1, ('est', 'VERB'): 1, ('... | \n", + "{} | \n", + "
5 | \n", + "4.0 | \n", + "2 | \n", + "Pierre Crouzet | \n", + "ID: 100000270292007 | \n", + "2019-11-09 14:26:37 | \n", + "0 | \n", + "Vasanth Toure le prochain c’est Adrien Rabiot | \n", + "FIG | \n", + "5dc7ac7f359e2-10157143278136339 | \n", + "[Pierre Crouzet, Vasanth Toure] | \n", + "['Vasanth Toure'] | \n", + "le prochain c’est Adrien Rabiot | \n", + "{('Adrien', 'PERSON'): 1, ('Rabiot', 'PERSON')... | \n", + "{('prochain', 'ADJ'): 1, ('Adrien', 'PROPN'): ... | \n", + "{} | \n", + "
6 | \n", + "5.0 | \n", + "0 | \n", + "Stéphane Pirnaci | \n", + "ID: 100008541367302 | \n", + "2019-11-09 14:18:51 | \n", + "0 | \n", + "Mdr | \n", + "FIG | \n", + "5dc7ac7f359e2-10157143278136339 | \n", + "[Stéphane Pirnaci] | \n", + "[] | \n", + "Mdr | \n", + "{} | \n", + "{} | \n", + "{} | \n", + "
7 | \n", + "6.0 | \n", + "0 | \n", + "Adil Bennani | \n", + "ID: 100006432917292 | \n", + "2019-11-09 14:19:03 | \n", + "0 | \n", + "moi je propose mamadou sissoko | \n", + "FIG | \n", + "5dc7ac7f359e2-10157143278136339 | \n", + "[Adil Bennani] | \n", + "[] | \n", + "moi je propose mamadou sissoko | \n", + "{} | \n", + "{('propose', 'VERB'): 1, ('mamadou', 'NOUN'): ... | \n", + "{} | \n", + "
8 | \n", + "7.0 | \n", + "0 | \n", + "Hadrien De Cournon | \n", + "ID: 1131290552 | \n", + "2019-11-09 14:19:09 | \n", + "0 | \n", + "Louis Prt Corentin Corman Victor Mdv ah ouais? | \n", + "FIG | \n", + "5dc7ac7f359e2-10157143278136339 | \n", + "[Hadrien De Cournon] | \n", + "[] | \n", + "Louis Prt Corentin Corman Victor Mdv ah ouais? | \n", + "{('Louis', 'PERSON'): 1, ('Prt', 'PERSON'): 1,... | \n", + "{('Louis', 'PROPN'): 1, ('Prt', 'PROPN'): 1, (... | \n", + "{} | \n", + "
9 | \n", + "8.0 | \n", + "0 | \n", + "Marwa Larose | \n", + "ID: 100022577589611 | \n", + "2019-11-09 14:19:38 | \n", + "0 | \n", + "Marier le foot à la mairie est génial | \n", + "FIG | \n", + "5dc7ac7f359e2-10157143278136339 | \n", + "[Marwa Larose] | \n", + "[] | \n", + "Marier le foot à la mairie est génial | \n", + "{('Marier', 'PERSON'): 1} | \n", + "{('Marier', 'VERB'): 1, ('foot', 'NOUN'): 1, (... | \n", + "{} | \n", + "