🚀 Add feature: Conversion scripts

🚀 Refactor: Object model for conversions
This commit is contained in:
François Pelletier 2025-05-19 18:15:50 -04:00
parent 579a3fe379
commit f3dec3b49a
29 changed files with 23968 additions and 199 deletions

View file

@ -1,13 +1,15 @@
import datetime
import json
import os
from typing import Any, Dict, Union
from typing import Any, Dict, Union, List
from fastapi import APIRouter, HTTPException
from app.config import logger, minio_bucket_name, minio_client
from app.convert.convert_bluesky_car import convert_bluesky_car
from app.convert.convert_export_txt import convert_export_txt
from app.convert.convert_facebook_business_posts_json import \
convert_facebook_business_posts_json
from app.convert.convert_facebook_comments_json import (
convert_facebook_comments_json,
)
@ -54,6 +56,9 @@ CONVERTERS = {
"facebook_posts": {
"json": convert_facebook_posts_json
},
"facebook_business_posts": {
"json": convert_facebook_business_posts_json
},
"facebook_comments": {
"json": convert_facebook_comments_json
},
@ -89,6 +94,35 @@ CONVERTERS = {
}
def generate_temp_file(data: List[Dict], source_type: str) -> str:
"""
Generate a unique filename and write data to a temporary file.
Args:
data: The data to save
source_type: The source name to use in the filename
Returns:
The filename of the temporary file
Raises:
IOError: If the file cannot be written
"""
timestamp = datetime.datetime.now(tz=datetime.UTC).isoformat().replace(":",
"-")
tmp_filename = f"{source_type}_{timestamp}.json"
logger.info(f"Saving converted data to temporary file '{tmp_filename}'")
try:
with open(tmp_filename, "w") as f:
f.write(json.dumps(data))
except IOError as e:
logger.error(f"Failed to write temporary file '{tmp_filename}': {e}")
raise
return tmp_filename
def read_content_from_minio(request: ConversionRequest) -> Union[str, bytes]:
"""
Read content from MinIO storage based on the request filename.
@ -137,13 +171,13 @@ def read_content_from_minio(request: ConversionRequest) -> Union[str, bytes]:
) from e
def save_to_minio(data: Dict[str, Any], source_name: str) -> str:
def save_to_minio(data: List[Dict], source_type: str) -> str:
"""
Save converted data to MinIO as a JSON file.
Args:
data: The data to save
source_name: The source name to use in the filename
source_type: The source name to use in the filename
Returns:
The filename of the saved file
@ -152,16 +186,7 @@ def save_to_minio(data: Dict[str, Any], source_name: str) -> str:
HTTPException: If the file cannot be saved
"""
try:
# Generate a unique filename with timestamp
timestamp = datetime.datetime.now(tz=datetime.UTC).isoformat().replace(
":", "-")
tmp_filename = f"{source_name}_{timestamp}.json"
logger.info(f"Saving converted data to temporary file '{tmp_filename}'")
# Write to temporary file
with open(tmp_filename, "w") as f:
json.dump(data, f)
tmp_filename = generate_temp_file(data, source_type)
# Upload to MinIO
logger.info(
@ -206,28 +231,28 @@ def convert_data(request: ConversionRequest):
"""
try:
logger.info(
f"Processing conversion request for {request.source_name} in {request.source_format} format")
f"Processing conversion request for {request.source_type} in {request.source_format} format")
# Read content from MinIO
content = read_content_from_minio(request)
# Check if source and format are supported
if request.source_name not in CONVERTERS:
error_msg = f"Unsupported source name: {request.source_name}"
if request.source_type not in CONVERTERS:
error_msg = f"Unsupported source name: {request.source_type}"
logger.error(error_msg)
raise HTTPException(status_code=400, detail=error_msg)
if request.source_format not in CONVERTERS[request.source_name]:
error_msg = f"Unsupported format '{request.source_format}' for source '{request.source_name}'"
if request.source_format not in CONVERTERS[request.source_type]:
error_msg = f"Unsupported format '{request.source_format}' for source '{request.source_type}'"
logger.error(error_msg)
raise HTTPException(status_code=400, detail=error_msg)
# Get the appropriate converter function
converter = CONVERTERS[request.source_name][request.source_format]
converter = CONVERTERS[request.source_type][request.source_format]
# Convert the content
logger.info(
f"Converting {request.source_name} data using {converter.__name__}")
f"Converting {request.source_type} data using {converter.__name__}")
try:
converted_data = converter(content)
logger.info(
@ -238,14 +263,13 @@ def convert_data(request: ConversionRequest):
raise HTTPException(status_code=500, detail=error_msg) from e
# Save the converted data to MinIO
saved_filename = save_to_minio(converted_data, request.source_name)
saved_filename = save_to_minio(converted_data, request.source_type)
# Return success response
return ConversionResponse(
converted_data={}, # Empty dict as per original implementation
status="ok",
metadata={
"source": request.source_name,
"source": request.source_type,
"format": request.source_format,
"records_count": len(converted_data) if isinstance(
converted_data, list) else 1,