🚀 Add feature: Using MinIO as file storage for conversion both as input and output
This commit is contained in:
parent
afdfe1dbac
commit
579a3fe379
26 changed files with 16204 additions and 133 deletions
|
@ -1,10 +1,10 @@
|
|||
from app.config import logger
|
||||
from app.models import ConversionResponse
|
||||
from fastapi import UploadFile
|
||||
|
||||
|
||||
def convert_bluesky_car(file: UploadFile):
|
||||
def convert_bluesky_car(content):
|
||||
# Implement conversion logic here
|
||||
logger.info(file.headers)
|
||||
logger.info(f"Starting conversion of {len(content)} bytes")
|
||||
converted_data = {} # Example data
|
||||
return ConversionResponse(converted_data=converted_data, status="success")
|
||||
return ConversionResponse(converted_data=converted_data, status="success",
|
||||
metadata={})
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
from app.config import logger
|
||||
from app.models import ConversionResponse
|
||||
from fastapi import UploadFile
|
||||
|
||||
|
||||
def convert_export_txt(file: UploadFile):
|
||||
def convert_export_txt(content):
|
||||
# Implement conversion logic here
|
||||
logger.info(file.headers)
|
||||
logger.info(f"Starting conversion of {len(content)} bytes")
|
||||
converted_data = {} # Example data
|
||||
return ConversionResponse(converted_data=converted_data, status="success")
|
||||
return ConversionResponse(converted_data=converted_data, status="success",
|
||||
metadata={})
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
from app.config import logger
|
||||
from app.models import ConversionResponse
|
||||
from fastapi import UploadFile
|
||||
|
||||
|
||||
def convert_facebook_comments_json(file: UploadFile):
|
||||
def convert_facebook_comments_json(content):
|
||||
# Implement conversion logic here
|
||||
logger.info(file.headers)
|
||||
logger.info(f"Starting conversion of {len(content)} bytes")
|
||||
converted_data = {} # Example data
|
||||
return ConversionResponse(converted_data=converted_data, status="success")
|
||||
return ConversionResponse(converted_data=converted_data, status="success",
|
||||
metadata={})
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
from app.config import logger
|
||||
from app.models import ConversionResponse
|
||||
from fastapi import UploadFile
|
||||
|
||||
|
||||
def convert_facebook_posts_json(file: UploadFile):
|
||||
def convert_facebook_posts_json(content):
|
||||
# Implement conversion logic here
|
||||
logger.info(file.headers)
|
||||
logger.info(f"Starting conversion of {len(content)} bytes")
|
||||
converted_data = {} # Example data
|
||||
return ConversionResponse(converted_data=converted_data, status="success")
|
||||
return ConversionResponse(converted_data=converted_data, status="success",
|
||||
metadata={})
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
from app.config import logger
|
||||
from app.models import ConversionResponse
|
||||
from fastapi import UploadFile
|
||||
|
||||
|
||||
def convert_instagram_comments_json(file: UploadFile):
|
||||
def convert_instagram_comments_json(content):
|
||||
# Implement conversion logic here
|
||||
logger.info(file.headers)
|
||||
logger.info(f"Starting conversion of {len(content)} bytes")
|
||||
converted_data = {} # Example data
|
||||
return ConversionResponse(converted_data=converted_data, status="success")
|
||||
return ConversionResponse(converted_data=converted_data, status="success",
|
||||
metadata={})
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
from app.config import logger
|
||||
from app.models import ConversionResponse
|
||||
from fastapi import UploadFile
|
||||
|
||||
|
||||
def convert_instagram_posts_json(file: UploadFile):
|
||||
def convert_instagram_posts_json(content):
|
||||
# Implement conversion logic here
|
||||
logger.info(file.headers)
|
||||
logger.info(f"Starting conversion of {len(content)} bytes")
|
||||
converted_data = {} # Example data
|
||||
return ConversionResponse(converted_data=converted_data, status="success")
|
||||
return ConversionResponse(converted_data=converted_data, status="success",
|
||||
metadata={})
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
from app.config import logger
|
||||
from app.models import ConversionResponse
|
||||
from fastapi import UploadFile
|
||||
|
||||
|
||||
def convert_instagram_reels_json(file: UploadFile):
|
||||
def convert_instagram_reels_json(content):
|
||||
# Implement conversion logic here
|
||||
logger.info(file.headers)
|
||||
logger.info(f"Starting conversion of {len(content)} bytes")
|
||||
converted_data = {} # Example data
|
||||
return ConversionResponse(converted_data=converted_data, status="success")
|
||||
return ConversionResponse(converted_data=converted_data, status="success",
|
||||
metadata={})
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
from app.config import logger
|
||||
from app.models import ConversionResponse
|
||||
from fastapi import UploadFile
|
||||
|
||||
|
||||
def convert_instagram_reels_video(file: UploadFile):
|
||||
def convert_instagram_reels_video(content):
|
||||
# Implement conversion logic here
|
||||
logger.info(file.headers)
|
||||
logger.info(f"Starting conversion of {len(content)} bytes")
|
||||
converted_data = {} # Example data
|
||||
return ConversionResponse(converted_data=converted_data, status="success")
|
||||
return ConversionResponse(converted_data=converted_data, status="success",
|
||||
metadata={})
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
from app.config import logger
|
||||
from app.models import ConversionResponse
|
||||
from fastapi import UploadFile
|
||||
|
||||
|
||||
def convert_instagram_stories_image(file: UploadFile):
|
||||
def convert_instagram_stories_image(content):
|
||||
# Implement conversion logic here
|
||||
logger.info(file.headers)
|
||||
logger.info(f"Starting conversion of {len(content)} bytes")
|
||||
converted_data = {} # Example data
|
||||
return ConversionResponse(converted_data=converted_data, status="success")
|
||||
return ConversionResponse(converted_data=converted_data, status="success",
|
||||
metadata={})
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
from app.config import logger
|
||||
from app.models import ConversionResponse
|
||||
from fastapi import UploadFile
|
||||
|
||||
|
||||
def convert_instagram_stories_json(file: UploadFile):
|
||||
def convert_instagram_stories_json(content):
|
||||
# Implement conversion logic here
|
||||
logger.info(file.headers)
|
||||
logger.info(f"Starting conversion of {len(content)} bytes")
|
||||
converted_data = {} # Example data
|
||||
return ConversionResponse(converted_data=converted_data, status="success")
|
||||
return ConversionResponse(converted_data=converted_data, status="success",
|
||||
metadata={})
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
from app.config import logger
|
||||
from app.models import ConversionResponse
|
||||
from fastapi import UploadFile
|
||||
|
||||
|
||||
def convert_linkedin_comments_csv(file: UploadFile):
|
||||
def convert_linkedin_comments_csv(content):
|
||||
# Implement conversion logic here
|
||||
logger.info(file.headers)
|
||||
logger.info(f"Starting conversion of {len(content)} bytes")
|
||||
converted_data = {} # Example data
|
||||
return ConversionResponse(converted_data=converted_data, status="success")
|
||||
return ConversionResponse(converted_data=converted_data, status="success",
|
||||
metadata={})
|
||||
|
|
|
@ -1,10 +1,106 @@
|
|||
import datetime
|
||||
from io import StringIO, BytesIO
|
||||
from typing import Dict, Union
|
||||
|
||||
import pandas as pd
|
||||
|
||||
from app.config import logger
|
||||
from app.models import ConversionResponse
|
||||
from fastapi import UploadFile
|
||||
|
||||
|
||||
def convert_linkedin_shares_csv(file: UploadFile):
|
||||
# Implement conversion logic here
|
||||
logger.info(file.headers)
|
||||
converted_data = {} # Example data
|
||||
return ConversionResponse(converted_data=converted_data, status="success")
|
||||
def convert_linkedin_shares_csv(content: Union[str, bytes]) -> Dict:
|
||||
"""
|
||||
Convert a LinkedIn shares CSV file from MinIO into a standardized format.
|
||||
|
||||
Args:
|
||||
content: CSV content as string or bytes
|
||||
|
||||
Returns:
|
||||
Dictionary with converted data
|
||||
|
||||
Raises:
|
||||
ValueError: If conversion fails
|
||||
"""
|
||||
try:
|
||||
# Handle content based on its type
|
||||
logger.info("Preparing to read CSV content")
|
||||
if isinstance(content, bytes):
|
||||
# If content is bytes, convert to string
|
||||
try:
|
||||
content_str = content.decode('utf-8')
|
||||
csv_file = StringIO(content_str)
|
||||
logger.debug("Converted bytes content to string")
|
||||
except UnicodeDecodeError:
|
||||
# If UTF-8 decoding fails, use BytesIO
|
||||
csv_file = BytesIO(content)
|
||||
logger.debug("Using binary content with BytesIO")
|
||||
elif isinstance(content, str):
|
||||
# If content is already a string, use it directly
|
||||
csv_file = StringIO(content)
|
||||
logger.debug("Using string content with StringIO")
|
||||
else:
|
||||
raise TypeError(f"Unsupported content type: {type(content)}")
|
||||
|
||||
# Read CSV into DataFrame
|
||||
raw_shares = pd.read_csv(csv_file)
|
||||
logger.info(f"Successfully read CSV with {len(raw_shares)} rows")
|
||||
|
||||
# Add identification columns
|
||||
logger.info(
|
||||
"Adding identification columns: 'index', 'type', 'network'"
|
||||
)
|
||||
raw_shares = raw_shares.assign(
|
||||
index="linkedin_shares", type="posts", network="LinkedIn"
|
||||
)
|
||||
|
||||
# Convert date to timestamp
|
||||
logger.info("Converting 'Date' column to timestamp")
|
||||
raw_shares["creation_timestamp"] = raw_shares["Date"].apply(
|
||||
lambda x: int(datetime.datetime.fromisoformat(x).timestamp())
|
||||
)
|
||||
del raw_shares["Date"]
|
||||
logger.info("Date column converted and deleted")
|
||||
|
||||
# Rename columns
|
||||
logger.info("Renaming columns to standard format")
|
||||
raw_shares = raw_shares.rename(
|
||||
columns={"ShareLink": "uri", "ShareCommentary": "texte"}
|
||||
)
|
||||
|
||||
# Ensure 'texte' has string type
|
||||
logger.info("Ensuring 'texte' column is of type string")
|
||||
raw_shares["texte"] = raw_shares["texte"].astype(str)
|
||||
|
||||
# Fill missing values
|
||||
logger.info("Filling missing values with empty strings")
|
||||
raw_shares = raw_shares.fillna("")
|
||||
|
||||
# Remove duplicates
|
||||
logger.info(
|
||||
"Removing duplicates based on 'texte' and 'creation_timestamp'"
|
||||
)
|
||||
raw_shares = raw_shares.drop_duplicates(
|
||||
subset=["texte", "creation_timestamp"]
|
||||
)
|
||||
|
||||
# Remove empty rows
|
||||
logger.info("Removing rows with empty 'texte'")
|
||||
raw_shares = raw_shares[raw_shares["texte"].str.strip() != ""]
|
||||
|
||||
# Convert to dictionary and return
|
||||
logger.info("Converting DataFrame to dictionary format")
|
||||
result = raw_shares.to_dict(orient="records")
|
||||
logger.info(
|
||||
f"Conversion completed successfully with {len(result)} records")
|
||||
return result
|
||||
|
||||
except pd.errors.EmptyDataError as e:
|
||||
logger.error(f"CSV file is empty or malformed: {str(e)}")
|
||||
raise ValueError(f"CSV file is empty or malformed: {str(e)}")
|
||||
|
||||
except KeyError as e:
|
||||
logger.error(f"Missing expected column in CSV: {str(e)}")
|
||||
raise ValueError(f"Missing expected column in CSV: {str(e)}")
|
||||
|
||||
except Exception as e:
|
||||
logger.exception(f"Unexpected error during conversion: {str(e)}")
|
||||
raise ValueError(f"Unexpected error during conversion: {str(e)}")
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
from app.config import logger
|
||||
from app.models import ConversionResponse
|
||||
from fastapi import UploadFile
|
||||
|
||||
|
||||
def convert_markdown_txt(file: UploadFile):
|
||||
def convert_markdown_txt(content):
|
||||
# Implement conversion logic here
|
||||
logger.info(file.headers)
|
||||
logger.info(f"Starting conversion of {len(content)} bytes")
|
||||
converted_data = {} # Example data
|
||||
return ConversionResponse(converted_data=converted_data, status="success")
|
||||
return ConversionResponse(converted_data=converted_data, status="success",
|
||||
metadata={})
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
from app.config import logger
|
||||
from app.models import ConversionResponse
|
||||
from fastapi import UploadFile
|
||||
|
||||
|
||||
def convert_youtube_shorts_video(file: UploadFile):
|
||||
def convert_youtube_shorts_video(content):
|
||||
# Implement conversion logic here
|
||||
logger.info(file.headers)
|
||||
logger.info(f"Starting conversion of {len(content)} bytes")
|
||||
converted_data = {} # Example data
|
||||
return ConversionResponse(converted_data=converted_data, status="success")
|
||||
return ConversionResponse(converted_data=converted_data, status="success",
|
||||
metadata={})
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
from app.config import logger
|
||||
from app.models import ConversionResponse
|
||||
from fastapi import UploadFile
|
||||
|
||||
|
||||
def convert_youtube_video_video(file: UploadFile):
|
||||
def convert_youtube_video_video(content):
|
||||
# Implement conversion logic here
|
||||
logger.info(file.headers)
|
||||
logger.info(f"Starting conversion of {len(content)} bytes")
|
||||
converted_data = {} # Example data
|
||||
return ConversionResponse(converted_data=converted_data, status="success")
|
||||
return ConversionResponse(converted_data=converted_data, status="success",
|
||||
metadata={})
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue