# Backup ThriveCart Learn

Apache Licence 2.0
Copyright 2023 François Pelletier

## Chargement des informations

In [None]:
import requests
from bs4 import BeautifulSoup
import os

In [None]:
from thrivecart_utils import extract_image_filename_thrivecart

In [None]:
# Mettre l'URL de ta formation ici tel qu'accédé par les apprenants. Doit inclure le / à la fin de l'URL.

url = ''

In [None]:
# Mettre ici le User-Agent
headers = {
    'User-Agent': ''
}

In [None]:
# Mettre tous les cookies ici dans un dictionnaire, changer les noms au besoin.
cookies = {
    'thrivecart_v2': '',
    '__stripe_mid': '',
    'tc_account': '',
    'tcc_v1_23439': ''
}

## Page d'accueil

Dans cette section, on va sauvegarder la page d'accueil de la formation, visible une fois connecté à ThriveCart.

### Téléchargement de la page

In [None]:
# Send a GET request to the URL with the user agent and cookies
response_home = requests.get(url, headers=headers, cookies=cookies)
html_content_home = response_home.text

### Enregistrement de la page

In [None]:
current_dir = 'html'
os.makedirs(current_dir, exist_ok=True)
with open(os.path.join(current_dir,'index.html'), 'w') as f:
    f.write(html_content_home)

### Soupe de la page

In [None]:
# Create a BeautifulSoup object with the HTML content
home_soup = BeautifulSoup(html_content_home, 'html.parser')

### Téléchargement des images de la page

In [None]:
# Extract all URLs from the page that start with the desired prefix
home_images_dir = os.path.join(current_dir, 'images')
os.makedirs(os.path.join(current_dir,'images'), exist_ok=True)
home_images = []
for image in home_soup.find_all('img'):
    href = image.get('src')
    home_images.append(href)

In [None]:
# Send a GET request to the URL with the user agent and cookies
for home_image in home_images:
    response_lesson_image = requests.get(home_image, headers=headers, cookies=cookies)
    image_filename = extract_image_filename_thrivecart(home_image)
    if response_lesson_image.status_code == 200:
        # Save the image to a file
        with open(os.path.join(home_images_dir,image_filename), 'wb') as file:
            file.write(response_lesson_image.content)
        print(f"Image downloaded and saved as {image_filename}")
    else:
        print("Failed to download the image")

## Modules

Dans cette section, on va sauvegarder les modules de la formation, accessible via des liens sur la page d'accueil de la formation.

### Extraction des URLs des modules depuis la page d'accueil

In [None]:
# Extract all URLs from the page that start with the desired prefix
module_urls = []
for link in home_soup.find_all('a'):
    href = link.get('href')
    if href and href.startswith(url) and not href.endswith(url):
        module_urls.append(href)

In [None]:
# Process the extracted URLs as per your requirement
lessons_urls = []
for module_url in module_urls:
    print(module_url)

    # Send a GET request to the URL with the user agent and cookies
    response_module = requests.get(module_url, headers=headers, cookies=cookies)
    html_content_module = response_module.text

    # Enregistrement de la page
    current_dir = 'html/modules/'
    current_module = module_url.split('/')[-2]
    os.makedirs(os.path.join(current_dir,
                             current_module), exist_ok=True)
    with open(os.path.join(current_dir,current_module,'index.html'), 'w') as f:
        f.write(html_content_module)

    # Create a BeautifulSoup object with the HTML content
    module_soup = BeautifulSoup(html_content_module, 'html.parser')

    # Extract all images from the page
    module_images = []
    for image in module_soup.find_all('img'):
        href = image.get('src')
        if href.startswith("https://spark.thrivecart.com/"):
            module_images.append(href)

    # Send a GET request to the URL with the user agent and cookies
    os.makedirs(os.path.join(current_dir,
                             current_module,'images'), exist_ok=True)
    for module_image in module_images:
        response_module_image = requests.get(module_image, headers=headers, cookies=cookies)
        image_filename = extract_image_filename_thrivecart(module_image)
        if response_module_image.status_code == 200:
            # Save the image to a file
            with open(os.path.join(current_dir,current_module,'images',image_filename), 'wb') as file:
                file.write(response_module_image.content)

    # Extract all URLs from the page that start with the desired prefix
    for link in module_soup.find_all('a'):
        lesson_url = link.get('href')
        if lesson_url and lesson_url.startswith(url) and not lesson_url.endswith(url) and lesson_url not in module_urls:
            lessons_urls.append((current_module,lesson_url))

## Leçons

Dans cette section, on va sauvegarder les leçons de la formation, accessible via des liens dans chaque module de la formation.

### Extraction des URLs des leçons depuis la page d'accueil

In [None]:
# Process the extracted URLs as per your requirement
for lesson_urlpair in lessons_urls:
    current_module = lesson_urlpair[0]
    lesson_url = lesson_urlpair[1]

    # Send a GET request to the URL with the user agent and cookies
    response_lesson = requests.get(lesson_url, headers=headers, cookies=cookies)
    html_content_lesson = response_lesson.text

    # Enregistrement de la page
    current_dir = os.path.join('html/modules/',current_module)
    current_lesson = lesson_url.split('/')[-2]
    os.makedirs(os.path.join(current_dir,
                             current_lesson), exist_ok=True)
    with open(os.path.join(current_dir,current_lesson,'index.html'), 'w') as f:
        f.write(html_content_lesson)
        print(f'Fichier enregistré: {current_lesson}/index.html')
    # Create a BeautifulSoup object with the HTML content
    lesson_soup = BeautifulSoup(html_content_lesson, 'html.parser')

    # Extract all images from the page
    lesson_images = []
    for image in lesson_soup.find_all('img'):
        href = image.get('src')
        if href.startswith("https://spark.thrivecart.com/"):
            lesson_images.append(href)

    # Send a GET request to the URL with the user agent and cookies
    os.makedirs(os.path.join(current_dir,
                             current_lesson,'images'), exist_ok=True)
    for lesson_image in lesson_images:
        response_lesson_image = requests.get(lesson_image, headers=headers, cookies=cookies)
        image_filename = extract_image_filename_thrivecart(lesson_image)
        # Save the image to a file
        with open(os.path.join(current_dir,current_lesson,'images',image_filename), 'wb') as file:
            file.write(response_lesson_image.content)
            print(f'Fichier enregistré: {image_filename}')