2019-12-16 01:31:38 +00:00
|
|
|
{
|
|
|
|
"cells": [
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
2019-12-16 23:25:47 +00:00
|
|
|
"execution_count": null,
|
2019-12-16 01:31:38 +00:00
|
|
|
"metadata": {},
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
|
|
|
"import parsing_functions as pf\n",
|
|
|
|
"import re\n",
|
|
|
|
"import pandas as pd\n",
|
|
|
|
"import time"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
2019-12-16 23:25:47 +00:00
|
|
|
"execution_count": null,
|
2019-12-16 01:31:38 +00:00
|
|
|
"metadata": {},
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
|
|
|
"listOfFiles = pf.getListOfFiles(\"data\")"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
2019-12-16 23:25:47 +00:00
|
|
|
"execution_count": null,
|
2019-12-16 01:31:38 +00:00
|
|
|
"metadata": {},
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
|
|
|
"commentaires = []\n",
|
|
|
|
"\n",
|
|
|
|
"for xlpath in listOfFiles:\n",
|
|
|
|
" comments_df = []\n",
|
|
|
|
" media, post_id = re.match(r\"data/([A-Z]+)/comments([0-9a-z\\-]+)\\.xlsx\",xlpath).groups()\n",
|
|
|
|
" comments_df = pf.get_comments(xlpath)\n",
|
|
|
|
" comments_df['media']=media\n",
|
|
|
|
" comments_df['post_id']=post_id\n",
|
|
|
|
" commentaires.append(comments_df)"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
2019-12-16 23:25:47 +00:00
|
|
|
"execution_count": null,
|
2019-12-16 01:31:38 +00:00
|
|
|
"metadata": {},
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
|
|
|
"commentaires_df = pd.concat(commentaires, ignore_index=True)"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
2019-12-16 23:25:47 +00:00
|
|
|
"execution_count": null,
|
2019-12-16 01:31:38 +00:00
|
|
|
"metadata": {},
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
|
|
|
"commentaires_df.to_csv(\"refined_data/commentaires_df.csv\")"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": null,
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [],
|
|
|
|
"source": []
|
|
|
|
}
|
|
|
|
],
|
|
|
|
"metadata": {
|
|
|
|
"kernelspec": {
|
|
|
|
"display_name": "Python 3",
|
|
|
|
"language": "python",
|
|
|
|
"name": "python3"
|
|
|
|
},
|
|
|
|
"language_info": {
|
|
|
|
"codemirror_mode": {
|
|
|
|
"name": "ipython",
|
|
|
|
"version": 3
|
|
|
|
},
|
|
|
|
"file_extension": ".py",
|
|
|
|
"mimetype": "text/x-python",
|
|
|
|
"name": "python",
|
|
|
|
"nbconvert_exporter": "python",
|
|
|
|
"pygments_lexer": "ipython3",
|
|
|
|
"version": "3.7.3"
|
|
|
|
}
|
|
|
|
},
|
|
|
|
"nbformat": 4,
|
|
|
|
"nbformat_minor": 4
|
|
|
|
}
|