🚀 Add feature: Instagram and Facebook business posts conversion

This commit is contained in:
François Pelletier 2025-05-21 00:05:43 -04:00
parent 5ee50a0f0f
commit f9e5a6e013
8 changed files with 2357 additions and 33 deletions

View file

@ -1,30 +1,18 @@
import json
import datetime
from typing import Dict, Union, List
import pandas as pd
from app.config import logger
from app.models import ConversionResponse
from app.convert.utils.content_from_file import content_from_file
from app.convert.base_converter import BaseConverter
from app.convert.utils.convert_encoding_meta import convert_encoding_meta
from app.convert.utils.encode_utf8 import encode_utf8
class FacebookBusinessPostsConverter(BaseConverter):
def read_file(self) -> None:
json_file = content_from_file(self.content)
content = convert_encoding_meta(json_file.read())
content = encode_utf8(self.content)
self.datadict = json.loads(content)
def add_metadata(self) -> None:
self.df = self.df.assign(
index="facebook_business_posts",
type="posts",
network="FacebookBusiness"
)
def convert_columns(self) -> None:
posts_medias = []
for post in self.datadict:
data_post_items = post['data']
@ -45,6 +33,15 @@ class FacebookBusinessPostsConverter(BaseConverter):
})
self.df = pd.DataFrame(posts_medias).explode(['chemin'])
def add_metadata(self) -> None:
self.df = self.df.assign(
index="facebook_business_posts",
type="posts",
network="FacebookBusiness"
)
def convert_columns(self) -> None:
self.df['creation_timestamp'] = self.df['creation_timestamp'].astype(
int)

View file

@ -4,26 +4,14 @@ from typing import Union, List, Dict
import pandas as pd
from app.config import logger
from app.models import ConversionResponse
from app.convert.utils.content_from_file import content_from_file
from app.convert.base_converter import BaseConverter
from app.convert.utils.convert_encoding_meta import convert_encoding_meta
from app.convert.utils.encode_utf8 import encode_utf8
class InstagramPostsConverter(BaseConverter):
def read_file(self) -> None:
json_file = content_from_file(self.content)
content = convert_encoding_meta(json_file.read())
content = encode_utf8(self.content)
self.datadict = json.loads(content)
def add_metadata(self) -> None:
self.df = self.df.assign(
index="instagram_posts",
type="posts",
network="Instagram"
)
def convert_columns(self) -> None:
posts_medias = []
for post in self.datadict:
medias = post['media']
@ -45,15 +33,22 @@ class InstagramPostsConverter(BaseConverter):
})
self.df = pd.DataFrame(posts_medias).explode(['chemin'])
self.df['creation_timestamp'] = self.df['creation_timestamp'].astype(
int)
def add_metadata(self) -> None:
self.df = self.df.assign(
index="instagram_posts",
type="posts",
network="Instagram"
)
def convert_columns(self) -> None:
self.df['creation_timestamp'] = (self.df['creation_timestamp']
.astype(int))
def rename_columns(self) -> None:
# No column renaming needed for this converter
pass
self.df = self.df.rename(columns={})
def clean_data(self) -> None:
super().clean_data()
self.df['url'] = ""
self.df.fillna(value="", inplace=True)

View file

@ -0,0 +1,18 @@
import chardet
def encode_utf8(raw_data):
# Detect the encoding of the file
encoding = chardet.detect(raw_data)['encoding']
# Decode the file based on the detected encoding
if encoding == 'utf-8':
content = raw_data.decode('utf-8').encode('utf-8')
elif encoding == 'latin-1':
content = raw_data.decode('latin-1').encode('utf-8')
elif encoding == 'ascii':
content = raw_data.decode('ascii').encode('utf-8')
else:
raise ValueError(f"Unsupported encoding: {encoding}")
return content

View file

@ -7,3 +7,4 @@ minio
python-dotenv
xmltodict
markdownify
chardet

View file

@ -28,4 +28,25 @@ Content-Type: application/json
"source_type": "wordpress",
"source_format": "xml",
"filename": "wordpress.xml"
}
### Convert Facebook Business Posts
POST {{baseUrl}}/convert
Content-Type: application/json
{
"source_type": "facebook_business_posts",
"source_format": "json",
"filename": "facebook_business_posts.json"
}
### Convert Instagram Posts
POST {{baseUrl}}/convert
Content-Type: application/json
{
"source_type": "instagram_posts",
"source_format": "json",
"filename": "instagram_posts.json"
}

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -5,6 +5,7 @@ MINIO_ALIAS="minio"
BUCKET_NAME="systeme-retro-testing"
FILE_PATH="$1"
# Check if the file exists
if [ ! -f "$FILE_PATH" ]; then
echo "Error: File $FILE_PATH not found."