corrections pour que ça roule encore
This commit is contained in:
parent
e507d1b5f7
commit
732138f916
7 changed files with 116 additions and 127 deletions
6
Makefile
6
Makefile
|
@ -1,7 +1,9 @@
|
|||
build: rapport.md analyse_articles.ipynb commentaires.ipynb textes_articles.ipynb traitement_articles.ipynb traitement_commentaires.ipynb
|
||||
pandoc --filter=pandoc-citeproc --dpi=180 -f markdown+raw_tex+latex_macros rapport.md -o rapport.pdf
|
||||
|
||||
jupyter nbconvert --to script analyse_articles.ipynb
|
||||
jupyter nbconvert --to script commentaires.ipynb
|
||||
jupyter nbconvert --to script textes_articles.ipynb
|
||||
jupyter nbconvert --to script traitement_articles.ipynb
|
||||
jupyter nbconvert --to script traitement_commentaires.ipynb
|
||||
jupyter nbconvert --to script traitement_commentaires.ipynb
|
||||
|
||||
pandoc --citeproc --dpi=180 -f markdown+raw_tex+latex_macros rapport.md -o rapport.pdf
|
||||
|
|
|
@ -22,6 +22,7 @@ pip install git+https://github.com/ClaudeCoulombe/FrenchLefffLemmatizer.git
|
|||
import nltk
|
||||
nltk.download('wordnet')
|
||||
nltk.download('omw')
|
||||
nltk.download('punkt')
|
||||
```
|
||||
|
||||
## Obtenir CoreNLP
|
||||
|
@ -38,7 +39,7 @@ https://stanfordnlp.github.io/CoreNLP/download.html
|
|||
Exécuter cette commande dans le répertoire de CoreNLP
|
||||
|
||||
```bash
|
||||
java -mx4g -cp "*" edu.stanford.nlp.pipeline.StanfordCoreNLPServer -serverProperties StanfordCoreNLP-french.properties -preload tokenize,ssplit,pos,lemma,ner,parse,depparse -status_port 9000 -port 9000 -timeout 15000 &
|
||||
java -mx8g -cp "*" edu.stanford.nlp.pipeline.StanfordCoreNLPServer -serverProperties StanfordCoreNLP-french.properties -preload tokenize,ssplit,pos,lemma,ner,parse,depparse -status_port 9000 -port 9000 -timeout 15000 &
|
||||
```
|
||||
|
||||
## Compilation du rapport en PDF (requiert pandoc et texlive, a installer depuis les paquets de la distribution linux utilisée):
|
||||
|
|
File diff suppressed because one or more lines are too long
|
@ -1,7 +1,7 @@
|
|||
---
|
||||
title: IFT7022 - TP 3 - Commentaires Facebook en lien avec la presse écrite. Revue de littérature et mesure de la pertinence.
|
||||
title: Commentaires Facebook en lien avec la presse écrite. Revue de littérature et mesure de la pertinence.
|
||||
author: François Pelletier
|
||||
date: \today
|
||||
date: Décembre 2019
|
||||
citation_package: natbib
|
||||
numbersections: true
|
||||
toc: yes
|
||||
|
@ -22,6 +22,10 @@ header-includes: |
|
|||
|
||||
\pagebreak
|
||||
|
||||
# Note
|
||||
|
||||
Ce document a été à l'origine produit comme travail de session dans le cadre du cours IFT-7022 Traitement du langage naturel donné par Luc Lamontagne à l'Université Laval en automne 2019.
|
||||
|
||||
# Introduction
|
||||
|
||||
Les commentaires extraits des fils de discussions sur des publications Facebook constituent une nouvelle forme de discours, complètement différent des textes formatés et normalisés qui proviennent de médias écrits. Leurs caractéristiques distinctes en tant que type littéraire à l'intersection de la critique et du dialogue vont demander des adaptations dans le traitement du langage naturel. Les attributs linguistiques, les entités nommées, la prise de position et les relations entre les commentaires seront abordés. Puis, on présente différentes approches de modélisation. Un exemple pratique permettra d'illustrer plusieurs concepts et de tirer quelques premières conclusions.
|
||||
|
|
|
@ -6,15 +6,18 @@
|
|||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"ename": "ModuleNotFoundError",
|
||||
"evalue": "No module named 'newspaper'",
|
||||
"ename": "AttributeError",
|
||||
"evalue": "module 'base64' has no attribute 'decodestring'",
|
||||
"output_type": "error",
|
||||
"traceback": [
|
||||
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
||||
"\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)",
|
||||
"\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)",
|
||||
"\u001b[0;32m<ipython-input-1-e35ec13ebf3b>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0;32mimport\u001b[0m \u001b[0mparsing_functions\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mpf\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mre\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mpandas\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mpd\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mtime\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
||||
"\u001b[0;32m~/nlp_a2019_tp3/parsing_functions.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mrequests\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0murllib\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mparse\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0munquote\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 6\u001b[0;31m \u001b[0;32mimport\u001b[0m \u001b[0mnewspaper\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 7\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 8\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mget_comments\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfile_path\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
||||
"\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'newspaper'"
|
||||
"\u001b[0;32m~/.local/lib/python3.9/site-packages/newspaper/__init__.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 8\u001b[0m \u001b[0m__copyright__\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m'Copyright 2014, Lucas Ou-Yang'\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 9\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 10\u001b[0;31m from .api import (build, build_article, fulltext, hot, languages,\n\u001b[0m\u001b[1;32m 11\u001b[0m popular_urls, Configuration as Config)\n\u001b[1;32m 12\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0;34m.\u001b[0m\u001b[0marticle\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mArticle\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mArticleException\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
||||
"\u001b[0;32m~/.local/lib/python3.9/site-packages/newspaper/api.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 10\u001b[0m \u001b[0m__copyright__\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m'Copyright 2014, Lucas Ou-Yang'\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 11\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 12\u001b[0;31m \u001b[0;32mimport\u001b[0m \u001b[0mfeedparser\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 13\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 14\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0;34m.\u001b[0m\u001b[0marticle\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mArticle\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
||||
"\u001b[0;32m~/anaconda3/envs/nlp_a2019_tp3/lib/python3.9/site-packages/feedparser.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 91\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 92\u001b[0m \u001b[0;31m# Python 3.1 deprecates decodestring in favor of decodebytes\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 93\u001b[0;31m \u001b[0m_base64decode\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mgetattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mbase64\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'decodebytes'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbase64\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdecodestring\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 94\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 95\u001b[0m \u001b[0;31m# _s2bytes: convert a UTF-8 str to bytes if the interpreter is Python 3\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
||||
"\u001b[0;31mAttributeError\u001b[0m: module 'base64' has no attribute 'decodestring'"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
@ -89,7 +92,7 @@
|
|||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.3"
|
||||
"version": "3.9.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
|
|
@ -14,7 +14,8 @@
|
|||
"import re\n",
|
||||
"import pickle\n",
|
||||
"import emoji\n",
|
||||
"import pretraitement as pr"
|
||||
"import pretraitement as pr\n",
|
||||
"import os"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -83,6 +84,15 @@
|
|||
"textes_articles_df['pos_dict']=[pr.aggreger_pos_tags(article) for article in article_pretraite]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"os.mkdir(\"pickle\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
|
@ -118,7 +128,7 @@
|
|||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.4"
|
||||
"version": "3.9.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
|
|
@ -235,20 +235,10 @@
|
|||
"commentaires_pretraite = []\n",
|
||||
"compteur=0\n",
|
||||
"for x in list(commentaires_df_names[\"comment_clean\"]):\n",
|
||||
" print(str(compteur)+\": \"+x)\n",
|
||||
" commentaires_pretraite.append(pr.pretraitement(x,tok,ner_tagger,pos_tagger))\n",
|
||||
" compteur += 1"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"commentaires_pretraite"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
|
@ -295,7 +285,7 @@
|
|||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.4"
|
||||
"version": "3.9.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
|
Loading…
Add table
Reference in a new issue