Initial commit
This commit is contained in:
commit
922ee57e65
3 changed files with 56 additions and 0 deletions
3
.gitignore
vendored
Normal file
3
.gitignore
vendored
Normal file
|
@ -0,0 +1,3 @@
|
|||
/messages_a778dec5-8e80-43e0-a1e7-f1019ef4e5e8.csv
|
||||
/chatgpt-backup.iml
|
||||
/.idea/
|
50
extract.py
Normal file
50
extract.py
Normal file
|
@ -0,0 +1,50 @@
|
|||
import json
|
||||
|
||||
import requests
|
||||
import bs4 as bs
|
||||
import pandas as pd
|
||||
|
||||
# %% URL du lien partagé
|
||||
url = ""
|
||||
|
||||
# %%
|
||||
r = requests.get(url).content
|
||||
b = bs.BeautifulSoup(r, "html.parser")
|
||||
|
||||
# %%
|
||||
j = json.loads(b.findAll("script", {"id": "__NEXT_DATA__"})[0].string)
|
||||
|
||||
# %%
|
||||
sharedConversationId = j["props"]["pageProps"]['sharedConversationId']
|
||||
title = j["props"]["pageProps"]['serverResponse']['data']['title']
|
||||
mapping_keys = j["props"]["pageProps"]['serverResponse']['data']['mapping'].keys()
|
||||
|
||||
# %%
|
||||
messages = []
|
||||
for key in mapping_keys:
|
||||
m = j["props"]["pageProps"]['serverResponse']['data']['mapping'][key]
|
||||
try:
|
||||
mapping_id = m["id"]
|
||||
if m["message"]:
|
||||
message_id = m["message"]["id"]
|
||||
message_author_role = m["message"]["author"]["role"]
|
||||
message_create_time = m["message"]["create_time"]
|
||||
message_parts = m["message"]['content']['parts']
|
||||
m_parent = m["parent"]
|
||||
m_children = m["children"]
|
||||
message = {
|
||||
"sharedConversationId": sharedConversationId,
|
||||
"title": title,
|
||||
"id": message_id,
|
||||
"time": message_create_time,
|
||||
"role": message_author_role,
|
||||
"parts": message_parts,
|
||||
"parent": m_parent,
|
||||
"children": m_children
|
||||
}
|
||||
messages.append(message)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
# %%
|
||||
messages_df = pd.DataFrame(messages).sort_values(by=['time']).reset_index(drop=True)
|
||||
messages_df.to_csv(f"messages_{sharedConversationId}.csv", index=False)
|
3
requirements.txt
Normal file
3
requirements.txt
Normal file
|
@ -0,0 +1,3 @@
|
|||
requests~=2.31.0
|
||||
beautifulsoup4~=4.12.2
|
||||
pandas~=2.1.2
|
Loading…
Reference in a new issue