{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import parsing_functions as pf\n", "import re\n", "import pandas as pd\n", "import time" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "listOfFiles = pf.getListOfFiles(\"data\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "commentaires = []\n", "\n", "for xlpath in listOfFiles:\n", " comments_df = []\n", " media, post_id = re.match(r\"data/([A-Z]+)/comments([0-9a-z\\-]+)\\.xlsx\",xlpath).groups()\n", " comments_df = pf.get_comments(xlpath)\n", " comments_df['media']=media\n", " comments_df['post_id']=post_id\n", " commentaires.append(comments_df)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "commentaires_df = pd.concat(commentaires, ignore_index=True)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "commentaires_df.to_csv(\"refined_data/commentaires_df.csv\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.3" } }, "nbformat": 4, "nbformat_minor": 4 }