50 lines
1.6 KiB
Python
50 lines
1.6 KiB
Python
|
import json
|
||
|
|
||
|
import requests
|
||
|
import bs4 as bs
|
||
|
import pandas as pd
|
||
|
|
||
|
# %% URL du lien partagé
|
||
|
url = ""
|
||
|
|
||
|
# %%
|
||
|
r = requests.get(url).content
|
||
|
b = bs.BeautifulSoup(r, "html.parser")
|
||
|
|
||
|
# %%
|
||
|
j = json.loads(b.findAll("script", {"id": "__NEXT_DATA__"})[0].string)
|
||
|
|
||
|
# %%
|
||
|
sharedConversationId = j["props"]["pageProps"]['sharedConversationId']
|
||
|
title = j["props"]["pageProps"]['serverResponse']['data']['title']
|
||
|
mapping_keys = j["props"]["pageProps"]['serverResponse']['data']['mapping'].keys()
|
||
|
|
||
|
# %%
|
||
|
messages = []
|
||
|
for key in mapping_keys:
|
||
|
m = j["props"]["pageProps"]['serverResponse']['data']['mapping'][key]
|
||
|
try:
|
||
|
mapping_id = m["id"]
|
||
|
if m["message"]:
|
||
|
message_id = m["message"]["id"]
|
||
|
message_author_role = m["message"]["author"]["role"]
|
||
|
message_create_time = m["message"]["create_time"]
|
||
|
message_parts = m["message"]['content']['parts']
|
||
|
m_parent = m["parent"]
|
||
|
m_children = m["children"]
|
||
|
message = {
|
||
|
"sharedConversationId": sharedConversationId,
|
||
|
"title": title,
|
||
|
"id": message_id,
|
||
|
"time": message_create_time,
|
||
|
"role": message_author_role,
|
||
|
"parts": message_parts,
|
||
|
"parent": m_parent,
|
||
|
"children": m_children
|
||
|
}
|
||
|
messages.append(message)
|
||
|
except Exception as e:
|
||
|
print(e)
|
||
|
# %%
|
||
|
messages_df = pd.DataFrame(messages).sort_values(by=['time']).reset_index(drop=True)
|
||
|
messages_df.to_csv(f"messages_{sharedConversationId}.csv", index=False)
|