🚀 Add feature: Using MinIO as file storage for conversion both as input and output

This commit is contained in:
François Pelletier 2025-05-14 18:44:28 -04:00
parent afdfe1dbac
commit 579a3fe379
26 changed files with 16204 additions and 133 deletions

View file

@ -1,10 +1,10 @@
from app.config import logger
from app.models import ConversionResponse
from fastapi import UploadFile
def convert_bluesky_car(file: UploadFile):
def convert_bluesky_car(content):
# Implement conversion logic here
logger.info(file.headers)
logger.info(f"Starting conversion of {len(content)} bytes")
converted_data = {} # Example data
return ConversionResponse(converted_data=converted_data, status="success")
return ConversionResponse(converted_data=converted_data, status="success",
metadata={})

View file

@ -1,10 +1,10 @@
from app.config import logger
from app.models import ConversionResponse
from fastapi import UploadFile
def convert_export_txt(file: UploadFile):
def convert_export_txt(content):
# Implement conversion logic here
logger.info(file.headers)
logger.info(f"Starting conversion of {len(content)} bytes")
converted_data = {} # Example data
return ConversionResponse(converted_data=converted_data, status="success")
return ConversionResponse(converted_data=converted_data, status="success",
metadata={})

View file

@ -1,10 +1,10 @@
from app.config import logger
from app.models import ConversionResponse
from fastapi import UploadFile
def convert_facebook_comments_json(file: UploadFile):
def convert_facebook_comments_json(content):
# Implement conversion logic here
logger.info(file.headers)
logger.info(f"Starting conversion of {len(content)} bytes")
converted_data = {} # Example data
return ConversionResponse(converted_data=converted_data, status="success")
return ConversionResponse(converted_data=converted_data, status="success",
metadata={})

View file

@ -1,10 +1,10 @@
from app.config import logger
from app.models import ConversionResponse
from fastapi import UploadFile
def convert_facebook_posts_json(file: UploadFile):
def convert_facebook_posts_json(content):
# Implement conversion logic here
logger.info(file.headers)
logger.info(f"Starting conversion of {len(content)} bytes")
converted_data = {} # Example data
return ConversionResponse(converted_data=converted_data, status="success")
return ConversionResponse(converted_data=converted_data, status="success",
metadata={})

View file

@ -1,10 +1,10 @@
from app.config import logger
from app.models import ConversionResponse
from fastapi import UploadFile
def convert_instagram_comments_json(file: UploadFile):
def convert_instagram_comments_json(content):
# Implement conversion logic here
logger.info(file.headers)
logger.info(f"Starting conversion of {len(content)} bytes")
converted_data = {} # Example data
return ConversionResponse(converted_data=converted_data, status="success")
return ConversionResponse(converted_data=converted_data, status="success",
metadata={})

View file

@ -1,10 +1,10 @@
from app.config import logger
from app.models import ConversionResponse
from fastapi import UploadFile
def convert_instagram_posts_json(file: UploadFile):
def convert_instagram_posts_json(content):
# Implement conversion logic here
logger.info(file.headers)
logger.info(f"Starting conversion of {len(content)} bytes")
converted_data = {} # Example data
return ConversionResponse(converted_data=converted_data, status="success")
return ConversionResponse(converted_data=converted_data, status="success",
metadata={})

View file

@ -1,10 +1,10 @@
from app.config import logger
from app.models import ConversionResponse
from fastapi import UploadFile
def convert_instagram_reels_json(file: UploadFile):
def convert_instagram_reels_json(content):
# Implement conversion logic here
logger.info(file.headers)
logger.info(f"Starting conversion of {len(content)} bytes")
converted_data = {} # Example data
return ConversionResponse(converted_data=converted_data, status="success")
return ConversionResponse(converted_data=converted_data, status="success",
metadata={})

View file

@ -1,10 +1,10 @@
from app.config import logger
from app.models import ConversionResponse
from fastapi import UploadFile
def convert_instagram_reels_video(file: UploadFile):
def convert_instagram_reels_video(content):
# Implement conversion logic here
logger.info(file.headers)
logger.info(f"Starting conversion of {len(content)} bytes")
converted_data = {} # Example data
return ConversionResponse(converted_data=converted_data, status="success")
return ConversionResponse(converted_data=converted_data, status="success",
metadata={})

View file

@ -1,10 +1,10 @@
from app.config import logger
from app.models import ConversionResponse
from fastapi import UploadFile
def convert_instagram_stories_image(file: UploadFile):
def convert_instagram_stories_image(content):
# Implement conversion logic here
logger.info(file.headers)
logger.info(f"Starting conversion of {len(content)} bytes")
converted_data = {} # Example data
return ConversionResponse(converted_data=converted_data, status="success")
return ConversionResponse(converted_data=converted_data, status="success",
metadata={})

View file

@ -1,10 +1,10 @@
from app.config import logger
from app.models import ConversionResponse
from fastapi import UploadFile
def convert_instagram_stories_json(file: UploadFile):
def convert_instagram_stories_json(content):
# Implement conversion logic here
logger.info(file.headers)
logger.info(f"Starting conversion of {len(content)} bytes")
converted_data = {} # Example data
return ConversionResponse(converted_data=converted_data, status="success")
return ConversionResponse(converted_data=converted_data, status="success",
metadata={})

View file

@ -1,10 +1,10 @@
from app.config import logger
from app.models import ConversionResponse
from fastapi import UploadFile
def convert_linkedin_comments_csv(file: UploadFile):
def convert_linkedin_comments_csv(content):
# Implement conversion logic here
logger.info(file.headers)
logger.info(f"Starting conversion of {len(content)} bytes")
converted_data = {} # Example data
return ConversionResponse(converted_data=converted_data, status="success")
return ConversionResponse(converted_data=converted_data, status="success",
metadata={})

View file

@ -1,10 +1,106 @@
import datetime
from io import StringIO, BytesIO
from typing import Dict, Union
import pandas as pd
from app.config import logger
from app.models import ConversionResponse
from fastapi import UploadFile
def convert_linkedin_shares_csv(file: UploadFile):
# Implement conversion logic here
logger.info(file.headers)
converted_data = {} # Example data
return ConversionResponse(converted_data=converted_data, status="success")
def convert_linkedin_shares_csv(content: Union[str, bytes]) -> Dict:
"""
Convert a LinkedIn shares CSV file from MinIO into a standardized format.
Args:
content: CSV content as string or bytes
Returns:
Dictionary with converted data
Raises:
ValueError: If conversion fails
"""
try:
# Handle content based on its type
logger.info("Preparing to read CSV content")
if isinstance(content, bytes):
# If content is bytes, convert to string
try:
content_str = content.decode('utf-8')
csv_file = StringIO(content_str)
logger.debug("Converted bytes content to string")
except UnicodeDecodeError:
# If UTF-8 decoding fails, use BytesIO
csv_file = BytesIO(content)
logger.debug("Using binary content with BytesIO")
elif isinstance(content, str):
# If content is already a string, use it directly
csv_file = StringIO(content)
logger.debug("Using string content with StringIO")
else:
raise TypeError(f"Unsupported content type: {type(content)}")
# Read CSV into DataFrame
raw_shares = pd.read_csv(csv_file)
logger.info(f"Successfully read CSV with {len(raw_shares)} rows")
# Add identification columns
logger.info(
"Adding identification columns: 'index', 'type', 'network'"
)
raw_shares = raw_shares.assign(
index="linkedin_shares", type="posts", network="LinkedIn"
)
# Convert date to timestamp
logger.info("Converting 'Date' column to timestamp")
raw_shares["creation_timestamp"] = raw_shares["Date"].apply(
lambda x: int(datetime.datetime.fromisoformat(x).timestamp())
)
del raw_shares["Date"]
logger.info("Date column converted and deleted")
# Rename columns
logger.info("Renaming columns to standard format")
raw_shares = raw_shares.rename(
columns={"ShareLink": "uri", "ShareCommentary": "texte"}
)
# Ensure 'texte' has string type
logger.info("Ensuring 'texte' column is of type string")
raw_shares["texte"] = raw_shares["texte"].astype(str)
# Fill missing values
logger.info("Filling missing values with empty strings")
raw_shares = raw_shares.fillna("")
# Remove duplicates
logger.info(
"Removing duplicates based on 'texte' and 'creation_timestamp'"
)
raw_shares = raw_shares.drop_duplicates(
subset=["texte", "creation_timestamp"]
)
# Remove empty rows
logger.info("Removing rows with empty 'texte'")
raw_shares = raw_shares[raw_shares["texte"].str.strip() != ""]
# Convert to dictionary and return
logger.info("Converting DataFrame to dictionary format")
result = raw_shares.to_dict(orient="records")
logger.info(
f"Conversion completed successfully with {len(result)} records")
return result
except pd.errors.EmptyDataError as e:
logger.error(f"CSV file is empty or malformed: {str(e)}")
raise ValueError(f"CSV file is empty or malformed: {str(e)}")
except KeyError as e:
logger.error(f"Missing expected column in CSV: {str(e)}")
raise ValueError(f"Missing expected column in CSV: {str(e)}")
except Exception as e:
logger.exception(f"Unexpected error during conversion: {str(e)}")
raise ValueError(f"Unexpected error during conversion: {str(e)}")

View file

@ -1,10 +1,10 @@
from app.config import logger
from app.models import ConversionResponse
from fastapi import UploadFile
def convert_markdown_txt(file: UploadFile):
def convert_markdown_txt(content):
# Implement conversion logic here
logger.info(file.headers)
logger.info(f"Starting conversion of {len(content)} bytes")
converted_data = {} # Example data
return ConversionResponse(converted_data=converted_data, status="success")
return ConversionResponse(converted_data=converted_data, status="success",
metadata={})

View file

@ -1,10 +1,10 @@
from app.config import logger
from app.models import ConversionResponse
from fastapi import UploadFile
def convert_youtube_shorts_video(file: UploadFile):
def convert_youtube_shorts_video(content):
# Implement conversion logic here
logger.info(file.headers)
logger.info(f"Starting conversion of {len(content)} bytes")
converted_data = {} # Example data
return ConversionResponse(converted_data=converted_data, status="success")
return ConversionResponse(converted_data=converted_data, status="success",
metadata={})

View file

@ -1,10 +1,10 @@
from app.config import logger
from app.models import ConversionResponse
from fastapi import UploadFile
def convert_youtube_video_video(file: UploadFile):
def convert_youtube_video_video(content):
# Implement conversion logic here
logger.info(file.headers)
logger.info(f"Starting conversion of {len(content)} bytes")
converted_data = {} # Example data
return ConversionResponse(converted_data=converted_data, status="success")
return ConversionResponse(converted_data=converted_data, status="success",
metadata={})