import json import requests import bs4 as bs import pandas as pd # %% URL du lien partagé url = "" # %% r = requests.get(url).content b = bs.BeautifulSoup(r, "html.parser") # %% j = json.loads(b.findAll("script", {"id": "__NEXT_DATA__"})[0].string) # %% sharedConversationId = j["props"]["pageProps"]['sharedConversationId'] title = j["props"]["pageProps"]['serverResponse']['data']['title'] mapping_keys = j["props"]["pageProps"]['serverResponse']['data']['mapping'].keys() # %% messages = [] for key in mapping_keys: m = j["props"]["pageProps"]['serverResponse']['data']['mapping'][key] try: mapping_id = m["id"] if m["message"]: message_id = m["message"]["id"] message_author_role = m["message"]["author"]["role"] message_create_time = m["message"]["create_time"] message_parts = m["message"]['content']['parts'] m_parent = m["parent"] m_children = m["children"] message = { "sharedConversationId": sharedConversationId, "title": title, "id": message_id, "time": message_create_time, "role": message_author_role, "parts": message_parts, "parent": m_parent, "children": m_children } messages.append(message) except Exception as e: print(e) # %% messages_df = pd.DataFrame(messages).sort_values(by=['time']).reset_index(drop=True) messages_df.to_csv(f"messages_{sharedConversationId}.csv", index=False)