🚀 Add feature: Conversion scripts
🚀 Refactor: Object model for conversions
This commit is contained in:
parent
579a3fe379
commit
f3dec3b49a
29 changed files with 23968 additions and 199 deletions
|
@ -1,13 +1,15 @@
|
|||
import datetime
|
||||
import json
|
||||
import os
|
||||
from typing import Any, Dict, Union
|
||||
from typing import Any, Dict, Union, List
|
||||
|
||||
from fastapi import APIRouter, HTTPException
|
||||
|
||||
from app.config import logger, minio_bucket_name, minio_client
|
||||
from app.convert.convert_bluesky_car import convert_bluesky_car
|
||||
from app.convert.convert_export_txt import convert_export_txt
|
||||
from app.convert.convert_facebook_business_posts_json import \
|
||||
convert_facebook_business_posts_json
|
||||
from app.convert.convert_facebook_comments_json import (
|
||||
convert_facebook_comments_json,
|
||||
)
|
||||
|
@ -54,6 +56,9 @@ CONVERTERS = {
|
|||
"facebook_posts": {
|
||||
"json": convert_facebook_posts_json
|
||||
},
|
||||
"facebook_business_posts": {
|
||||
"json": convert_facebook_business_posts_json
|
||||
},
|
||||
"facebook_comments": {
|
||||
"json": convert_facebook_comments_json
|
||||
},
|
||||
|
@ -89,6 +94,35 @@ CONVERTERS = {
|
|||
}
|
||||
|
||||
|
||||
def generate_temp_file(data: List[Dict], source_type: str) -> str:
|
||||
"""
|
||||
Generate a unique filename and write data to a temporary file.
|
||||
|
||||
Args:
|
||||
data: The data to save
|
||||
source_type: The source name to use in the filename
|
||||
|
||||
Returns:
|
||||
The filename of the temporary file
|
||||
|
||||
Raises:
|
||||
IOError: If the file cannot be written
|
||||
"""
|
||||
timestamp = datetime.datetime.now(tz=datetime.UTC).isoformat().replace(":",
|
||||
"-")
|
||||
tmp_filename = f"{source_type}_{timestamp}.json"
|
||||
|
||||
logger.info(f"Saving converted data to temporary file '{tmp_filename}'")
|
||||
|
||||
try:
|
||||
with open(tmp_filename, "w") as f:
|
||||
f.write(json.dumps(data))
|
||||
except IOError as e:
|
||||
logger.error(f"Failed to write temporary file '{tmp_filename}': {e}")
|
||||
raise
|
||||
|
||||
return tmp_filename
|
||||
|
||||
def read_content_from_minio(request: ConversionRequest) -> Union[str, bytes]:
|
||||
"""
|
||||
Read content from MinIO storage based on the request filename.
|
||||
|
@ -137,13 +171,13 @@ def read_content_from_minio(request: ConversionRequest) -> Union[str, bytes]:
|
|||
) from e
|
||||
|
||||
|
||||
def save_to_minio(data: Dict[str, Any], source_name: str) -> str:
|
||||
def save_to_minio(data: List[Dict], source_type: str) -> str:
|
||||
"""
|
||||
Save converted data to MinIO as a JSON file.
|
||||
|
||||
Args:
|
||||
data: The data to save
|
||||
source_name: The source name to use in the filename
|
||||
source_type: The source name to use in the filename
|
||||
|
||||
Returns:
|
||||
The filename of the saved file
|
||||
|
@ -152,16 +186,7 @@ def save_to_minio(data: Dict[str, Any], source_name: str) -> str:
|
|||
HTTPException: If the file cannot be saved
|
||||
"""
|
||||
try:
|
||||
# Generate a unique filename with timestamp
|
||||
timestamp = datetime.datetime.now(tz=datetime.UTC).isoformat().replace(
|
||||
":", "-")
|
||||
tmp_filename = f"{source_name}_{timestamp}.json"
|
||||
|
||||
logger.info(f"Saving converted data to temporary file '{tmp_filename}'")
|
||||
|
||||
# Write to temporary file
|
||||
with open(tmp_filename, "w") as f:
|
||||
json.dump(data, f)
|
||||
tmp_filename = generate_temp_file(data, source_type)
|
||||
|
||||
# Upload to MinIO
|
||||
logger.info(
|
||||
|
@ -206,28 +231,28 @@ def convert_data(request: ConversionRequest):
|
|||
"""
|
||||
try:
|
||||
logger.info(
|
||||
f"Processing conversion request for {request.source_name} in {request.source_format} format")
|
||||
f"Processing conversion request for {request.source_type} in {request.source_format} format")
|
||||
|
||||
# Read content from MinIO
|
||||
content = read_content_from_minio(request)
|
||||
|
||||
# Check if source and format are supported
|
||||
if request.source_name not in CONVERTERS:
|
||||
error_msg = f"Unsupported source name: {request.source_name}"
|
||||
if request.source_type not in CONVERTERS:
|
||||
error_msg = f"Unsupported source name: {request.source_type}"
|
||||
logger.error(error_msg)
|
||||
raise HTTPException(status_code=400, detail=error_msg)
|
||||
|
||||
if request.source_format not in CONVERTERS[request.source_name]:
|
||||
error_msg = f"Unsupported format '{request.source_format}' for source '{request.source_name}'"
|
||||
if request.source_format not in CONVERTERS[request.source_type]:
|
||||
error_msg = f"Unsupported format '{request.source_format}' for source '{request.source_type}'"
|
||||
logger.error(error_msg)
|
||||
raise HTTPException(status_code=400, detail=error_msg)
|
||||
|
||||
# Get the appropriate converter function
|
||||
converter = CONVERTERS[request.source_name][request.source_format]
|
||||
converter = CONVERTERS[request.source_type][request.source_format]
|
||||
|
||||
# Convert the content
|
||||
logger.info(
|
||||
f"Converting {request.source_name} data using {converter.__name__}")
|
||||
f"Converting {request.source_type} data using {converter.__name__}")
|
||||
try:
|
||||
converted_data = converter(content)
|
||||
logger.info(
|
||||
|
@ -238,14 +263,13 @@ def convert_data(request: ConversionRequest):
|
|||
raise HTTPException(status_code=500, detail=error_msg) from e
|
||||
|
||||
# Save the converted data to MinIO
|
||||
saved_filename = save_to_minio(converted_data, request.source_name)
|
||||
saved_filename = save_to_minio(converted_data, request.source_type)
|
||||
|
||||
# Return success response
|
||||
return ConversionResponse(
|
||||
converted_data={}, # Empty dict as per original implementation
|
||||
status="ok",
|
||||
metadata={
|
||||
"source": request.source_name,
|
||||
"source": request.source_type,
|
||||
"format": request.source_format,
|
||||
"records_count": len(converted_data) if isinstance(
|
||||
converted_data, list) else 1,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue