libere-tes-chaine-de-mots/import_data/32_importation_linkedin_comments.py

import pandas as pd
import datetime

from utils.documents_to_database import documents_to_database

# In[ ]:
linkedin_data_path = "data/LinkedIn/comments/Comments-FIX.csv"

# In[ ]:
raw_comments_csv = pd.read_csv(linkedin_data_path, encoding='utf-8')
raw_comments = raw_comments_csv[(raw_comments_csv['MessageFix'] != "")].drop_duplicates()

# In[ ]:
raw_comments['index'] = "rs_linkedin_comments"
raw_comments['type'] = "comments"
raw_comments['network'] = "LinkedIn"
raw_comments['chemin'] = linkedin_data_path

# In[ ]:
raw_comments["datepublication"] = raw_comments["Date"].apply(
    lambda x: str(datetime.datetime.fromisoformat(str(x)).isoformat()))
del raw_comments["Date"]

# In[ ]:
raw_comments.rename(columns={"Link": "uri", "MessageFix": "texte"}, inplace=True)

# In[ ]:
raw_comments["chemin"] = linkedin_data_path

# In[ ]:
raw_comments.fillna(value="", inplace=True)

# In[ ]:
raw_comments.drop_duplicates(subset=['texte', 'datepublication'], inplace=True)

# In[ ]:
# Filter empty texte
raw_comments = raw_comments[~raw_comments['texte'].str.strip('\n').str.strip().eq('')]

# In[ ]:
documents_to_database(raw_comments)
Version initiale pour importer les données 2024-07-19 00:04:51 +00:00			`import pandas as pd`
			`import datetime`

			`from utils.documents_to_database import documents_to_database`

			`# In[ ]:`
importation corrigée des commentaires LinkedIn 2024-07-19 01:54:15 +00:00			`linkedin_data_path = "data/LinkedIn/comments/Comments-FIX.csv"`
Version initiale pour importer les données 2024-07-19 00:04:51 +00:00
			`# In[ ]:`
importation corrigée des commentaires LinkedIn 2024-07-19 01:54:15 +00:00			`raw_comments_csv = pd.read_csv(linkedin_data_path, encoding='utf-8')`
			`raw_comments = raw_comments_csv[(raw_comments_csv['MessageFix'] != "")].drop_duplicates()`
Version initiale pour importer les données 2024-07-19 00:04:51 +00:00
			`# In[ ]:`
			`raw_comments['index'] = "rs_linkedin_comments"`
			`raw_comments['type'] = "comments"`
			`raw_comments['network'] = "LinkedIn"`
			`raw_comments['chemin'] = linkedin_data_path`

			`# In[ ]:`
			`raw_comments["datepublication"] = raw_comments["Date"].apply(`
			`lambda x: str(datetime.datetime.fromisoformat(str(x)).isoformat()))`
			`del raw_comments["Date"]`

			`# In[ ]:`
importation corrigée des commentaires LinkedIn 2024-07-19 01:54:15 +00:00			`raw_comments.rename(columns={"Link": "uri", "MessageFix": "texte"}, inplace=True)`
Version initiale pour importer les données 2024-07-19 00:04:51 +00:00
			`# In[ ]:`
			`raw_comments["chemin"] = linkedin_data_path`

			`# In[ ]:`
			`raw_comments.fillna(value="", inplace=True)`

			`# In[ ]:`
			`raw_comments.drop_duplicates(subset=['texte', 'datepublication'], inplace=True)`

			`# In[ ]:`
			`# Filter empty texte`
			`raw_comments = raw_comments[~raw_comments['texte'].str.strip('\n').str.strip().eq('')]`

			`# In[ ]:`
			`documents_to_database(raw_comments)`