🚀 Add feature: Instagram and Facebook business posts conversion
This commit is contained in:
parent
5ee50a0f0f
commit
f9e5a6e013
8 changed files with 2357 additions and 33 deletions
|
@ -1,30 +1,18 @@
|
|||
import json
|
||||
import datetime
|
||||
from typing import Dict, Union, List
|
||||
|
||||
import pandas as pd
|
||||
|
||||
from app.config import logger
|
||||
from app.models import ConversionResponse
|
||||
from app.convert.utils.content_from_file import content_from_file
|
||||
from app.convert.base_converter import BaseConverter
|
||||
from app.convert.utils.convert_encoding_meta import convert_encoding_meta
|
||||
from app.convert.utils.encode_utf8 import encode_utf8
|
||||
|
||||
|
||||
class FacebookBusinessPostsConverter(BaseConverter):
|
||||
def read_file(self) -> None:
|
||||
json_file = content_from_file(self.content)
|
||||
content = convert_encoding_meta(json_file.read())
|
||||
content = encode_utf8(self.content)
|
||||
self.datadict = json.loads(content)
|
||||
|
||||
def add_metadata(self) -> None:
|
||||
self.df = self.df.assign(
|
||||
index="facebook_business_posts",
|
||||
type="posts",
|
||||
network="FacebookBusiness"
|
||||
)
|
||||
|
||||
def convert_columns(self) -> None:
|
||||
posts_medias = []
|
||||
for post in self.datadict:
|
||||
data_post_items = post['data']
|
||||
|
@ -45,6 +33,15 @@ class FacebookBusinessPostsConverter(BaseConverter):
|
|||
})
|
||||
|
||||
self.df = pd.DataFrame(posts_medias).explode(['chemin'])
|
||||
|
||||
def add_metadata(self) -> None:
|
||||
self.df = self.df.assign(
|
||||
index="facebook_business_posts",
|
||||
type="posts",
|
||||
network="FacebookBusiness"
|
||||
)
|
||||
|
||||
def convert_columns(self) -> None:
|
||||
self.df['creation_timestamp'] = self.df['creation_timestamp'].astype(
|
||||
int)
|
||||
|
||||
|
|
|
@ -4,26 +4,14 @@ from typing import Union, List, Dict
|
|||
import pandas as pd
|
||||
|
||||
from app.config import logger
|
||||
from app.models import ConversionResponse
|
||||
from app.convert.utils.content_from_file import content_from_file
|
||||
from app.convert.base_converter import BaseConverter
|
||||
from app.convert.utils.convert_encoding_meta import convert_encoding_meta
|
||||
from app.convert.utils.encode_utf8 import encode_utf8
|
||||
|
||||
|
||||
class InstagramPostsConverter(BaseConverter):
|
||||
def read_file(self) -> None:
|
||||
json_file = content_from_file(self.content)
|
||||
content = convert_encoding_meta(json_file.read())
|
||||
content = encode_utf8(self.content)
|
||||
self.datadict = json.loads(content)
|
||||
|
||||
def add_metadata(self) -> None:
|
||||
self.df = self.df.assign(
|
||||
index="instagram_posts",
|
||||
type="posts",
|
||||
network="Instagram"
|
||||
)
|
||||
|
||||
def convert_columns(self) -> None:
|
||||
posts_medias = []
|
||||
for post in self.datadict:
|
||||
medias = post['media']
|
||||
|
@ -45,15 +33,22 @@ class InstagramPostsConverter(BaseConverter):
|
|||
})
|
||||
|
||||
self.df = pd.DataFrame(posts_medias).explode(['chemin'])
|
||||
self.df['creation_timestamp'] = self.df['creation_timestamp'].astype(
|
||||
int)
|
||||
|
||||
def add_metadata(self) -> None:
|
||||
self.df = self.df.assign(
|
||||
index="instagram_posts",
|
||||
type="posts",
|
||||
network="Instagram"
|
||||
)
|
||||
|
||||
def convert_columns(self) -> None:
|
||||
self.df['creation_timestamp'] = (self.df['creation_timestamp']
|
||||
.astype(int))
|
||||
|
||||
def rename_columns(self) -> None:
|
||||
# No column renaming needed for this converter
|
||||
pass
|
||||
self.df = self.df.rename(columns={})
|
||||
|
||||
def clean_data(self) -> None:
|
||||
super().clean_data()
|
||||
self.df['url'] = ""
|
||||
self.df.fillna(value="", inplace=True)
|
||||
|
||||
|
|
18
backend/app/convert/utils/encode_utf8.py
Normal file
18
backend/app/convert/utils/encode_utf8.py
Normal file
|
@ -0,0 +1,18 @@
|
|||
import chardet
|
||||
|
||||
|
||||
def encode_utf8(raw_data):
|
||||
# Detect the encoding of the file
|
||||
encoding = chardet.detect(raw_data)['encoding']
|
||||
|
||||
# Decode the file based on the detected encoding
|
||||
if encoding == 'utf-8':
|
||||
content = raw_data.decode('utf-8').encode('utf-8')
|
||||
elif encoding == 'latin-1':
|
||||
content = raw_data.decode('latin-1').encode('utf-8')
|
||||
elif encoding == 'ascii':
|
||||
content = raw_data.decode('ascii').encode('utf-8')
|
||||
else:
|
||||
raise ValueError(f"Unsupported encoding: {encoding}")
|
||||
|
||||
return content
|
|
@ -7,3 +7,4 @@ minio
|
|||
python-dotenv
|
||||
xmltodict
|
||||
markdownify
|
||||
chardet
|
Loading…
Add table
Add a link
Reference in a new issue