🚀 Add feature: Charging in Milvus is now working

This commit is contained in:
François Pelletier 2025-05-21 17:21:57 -04:00
parent f9e5a6e013
commit 64832e2989
23 changed files with 354 additions and 109 deletions

View file

@ -1,7 +1,7 @@
import datetime
import json
import os
from typing import Dict, Union, List
from typing import Dict, List
from fastapi import APIRouter, HTTPException
@ -43,6 +43,7 @@ from app.convert.convert_youtube_shorts_video import (
)
from app.convert.convert_youtube_video_video import convert_youtube_video_video
from app.models import ConversionRequest, ConversionResponse
from app.routers.utils.read_content_from_minio import read_content_from_minio
convert_router = APIRouter(prefix="/convert", tags=["Convert"])
@ -127,53 +128,6 @@ def generate_temp_file(data: List[Dict], source_type: str) -> str:
return tmp_filename
def read_content_from_minio(request: ConversionRequest) -> Union[str, bytes]:
"""
Read content from MinIO storage based on the request filename.
Args:
request: The conversion request containing the filename
Returns:
The file content as string (for text files) or bytes (for binary files)
Raises:
HTTPException: If the file cannot be read or doesn't exist
"""
# Check if filename exists
if not request.filename:
logger.error("Filename is empty or invalid")
raise HTTPException(
status_code=400, detail="Filename is required"
)
# Read file from MinIO
try:
logger.info(
f"Reading file '{request.filename}' from MinIO bucket '{minio_bucket_name}'")
with minio_client.get_object(
bucket_name=minio_bucket_name, object_name=request.filename
) as response:
content_type = response.headers.get("content-type", "")
logger.debug(f"File content type: {content_type}")
if content_type.startswith("text/"):
# Read as text (UTF-8)
content = response.read().decode("utf-8")
logger.debug(f"Read {len(content)} characters from text file")
else:
# Read as binary
content = response.read()
logger.debug(f"Read {len(content)} bytes from binary file")
return content
except Exception as e:
error_msg = f"Error reading file '{request.filename}' from MinIO: {e!s}"
logger.error(error_msg)
raise HTTPException(
status_code=500, detail=error_msg
) from e
def save_to_minio(data: List[Dict], source_type: str) -> str:
"""
@ -197,7 +151,7 @@ def save_to_minio(data: List[Dict], source_type: str) -> str:
f"Uploading '{tmp_filename}' to MinIO bucket '{minio_bucket_name}'")
minio_client.fput_object(
bucket_name=minio_bucket_name,
object_name=tmp_filename,
object_name="output/" + tmp_filename,
file_path=tmp_filename
)
@ -238,7 +192,7 @@ def convert_data(request: ConversionRequest):
f"Processing conversion request for {request.source_type} in {request.source_format} format")
# Read content from MinIO
content = read_content_from_minio(request)
content = read_content_from_minio(request.filename)
# Check if source and format are supported
if request.source_type not in CONVERTERS:

View file

@ -1,26 +1,123 @@
from app.config import available_sources, logger
from app.models import AvailableSourcesResponse, ImportRequest, ImportResponse
from fastapi import APIRouter
import json
import traceback
from fastapi import APIRouter, HTTPException
from pymilvus import connections, Collection, FieldSchema, CollectionSchema, \
DataType, utility
from app.config import logger
from app.models import ImportRequest, ImportResponse, \
AvailableCollectionsResponse
from app.routers.utils.generate_embeddings import generate_embeddings
from app.routers.utils.read_content_from_minio import read_content_from_minio
import_router = APIRouter(prefix="/import", tags=["Import"])
@import_router.post("/", response_model=ImportResponse)
def import_data(request: ImportRequest):
"""
Import data (e.g., text, files, or structured data).
"""
logger.info(f"Receiver importation request: {request.type}")
return ...
async def import_data(request: ImportRequest):
try:
logger.info(f"Starting import process for {request.source_type}")
# Check Milvus connection
try:
connections.connect("default", host="milvus", port="19530")
logger.info("Successfully connected to Milvus")
except Exception as e:
logger.error(f"Failed to connect to Milvus: {str(e)}")
return ImportResponse(status="error",
message="Failed to connect to Milvus")
# Fetch data from MinIO
try:
data = read_content_from_minio(request.object_name)
logger.info(
f"Successfully fetched data from MinIO: {request.object_name}")
except Exception as e:
logger.error(f"Failed to fetch data from MinIO: {str(e)}")
return ImportResponse(status="error",
message="Failed to fetch data from MinIO")
# Process data
processed_data = json.loads(data)
logger.info("Data processed successfully")
# Generate embeddings and insert into Milvus
collection_name = f"{request.source_type}_collection"
if not utility.has_collection(collection_name):
create_collection(collection_name)
collection = Collection(collection_name)
total_items = len(processed_data)
for i, item in enumerate(processed_data, 1):
try:
item["embedding"] = generate_embeddings(item)
filtered_item = {
"content": item.get("content", ""),
"embedding": item["embedding"],
"creation_timestamp": int(
item.get("creation_timestamp", 0)),
"index": item.get("index", ""),
"type": item.get("type", ""),
"network": item.get("network", ""),
"url": item.get("url", "")
}
_ = collection.insert([filtered_item])
logger.info(
f"Inserted item {i}/{total_items} into Milvus collection {collection_name}")
except Exception as e:
logger.error(f"Failed to process item {i}: {str(e)}")
logger.info(f"Import completed for {request.source_type}")
return ImportResponse(status="success",
message="Import completed successfully")
except Exception as e:
logger.error(f"Unexpected error during import: {str(e)}")
logger.error(traceback.format_exc())
return ImportResponse(status="error",
message=f"Unexpected error: {str(e)}")
@import_router.get(
"/available_sources", response_model=AvailableSourcesResponse
)
def get_available_sources():
"""
Get available sources from database
:return: Available sources in an AvailableSourcesResponse object
"""
logger.info("Get available sources from database")
return available_sources
def create_collection(collection_name: str):
fields = [
FieldSchema(name="id", dtype=DataType.INT64, is_primary=True,
auto_id=True),
FieldSchema(name="content", dtype=DataType.VARCHAR, max_length=65535),
FieldSchema(name="embedding", dtype=DataType.FLOAT_VECTOR, dim=1024),
FieldSchema(name="creation_timestamp", dtype=DataType.INT64),
FieldSchema(name="index", dtype=DataType.VARCHAR, max_length=255),
FieldSchema(name="type", dtype=DataType.VARCHAR, max_length=255),
FieldSchema(name="network", dtype=DataType.VARCHAR, max_length=255),
FieldSchema(name="url", dtype=DataType.VARCHAR, max_length=2083),
]
schema = CollectionSchema(fields, "A collection for storing embeddings")
collection = Collection(collection_name, schema)
index_params = {
"metric_type": "L2",
"index_type": "IVF_FLAT",
"params": {"nlist": 1024}
}
collection.create_index("embedding", index_params)
logger.info(f"Created new collection: {collection_name}")
@import_router.get("/available_collections",
response_model=AvailableCollectionsResponse)
def get_available_collections():
logger.info("Getting available collections from Milvus")
try:
if not utility.has_collection("default"):
connections.connect("default", host="milvus", port="19530")
collections = utility.list_collections()
logger.info(f"Found {len(collections)} collections")
return AvailableCollectionsResponse(collections=collections)
except Exception as e:
logger.error(f"Error getting collections from Milvus: {str(e)}")
raise HTTPException(status_code=500,
detail=f"Error getting collections from Milvus: {str(e)}")

View file

View file

@ -0,0 +1,43 @@
import json
import requests
from app.config import ollama_url, embedding_model_name, logger
def generate_embeddings(content):
# Convert content to string if it's not already
if not isinstance(content, str):
try:
content = json.dumps(content)
except Exception as e:
logger.error(
f"Error converting content to string: {str(e)}. Defaulting to string.")
content = str(content)
logger.info(
f"Generating embeddings for content: {content[:100]}...") # Log first 100 chars
try:
response = requests.post(f"{ollama_url}/api/embed", json={
"model": embedding_model_name,
"input": content
})
response.raise_for_status() # Raise an exception for bad status codes
embeddings = response.json().get('embeddings')[0]
if embeddings:
logger.info(
f"Successfully generated embeddings of length {len(embeddings)}")
return embeddings
else:
raise ValueError("No embeddings found in response")
except requests.RequestException as e:
logger.error(f"Error making request to Ollama API: {str(e)}")
logger.error(
f"Response content: {e.response.text if e.response else 'No response'}")
raise
except json.JSONDecodeError:
logger.error(f"Error decoding JSON response: {e.response.text}")
raise
except Exception as e:
logger.error(f"Unexpected error generating embeddings: {str(e)}")
raise

View file

@ -0,0 +1,54 @@
from typing import Union
from fastapi import HTTPException
from app.config import logger, minio_bucket_name, minio_client
def read_content_from_minio(filename: str) -> Union[str, bytes]:
"""
Read content from MinIO storage based on the request filename.
Args:
request: The conversion request containing the filename
Returns:
The file content as string (for text files) or bytes (for binary files)
Raises:
HTTPException: If the file cannot be read or doesn't exist
:param filename:
"""
# Check if filename exists
if not filename:
logger.error("Filename is empty or invalid")
raise HTTPException(
status_code=400, detail="Filename is required"
)
# Read file from MinIO
try:
logger.info(
f"Reading file '{filename}' from MinIO bucket '{minio_bucket_name}'")
with minio_client.get_object(
bucket_name=minio_bucket_name, object_name=filename
) as response:
content_type = response.headers.get("content-type", "")
logger.debug(f"File content type: {content_type}")
if content_type.startswith("text/"):
# Read as text (UTF-8)
content = response.read().decode("utf-8")
logger.debug(f"Read {len(content)} characters from text file")
else:
# Read as binary
content = response.read()
logger.debug(f"Read {len(content)} bytes from binary file")
return content
except Exception as e:
error_msg = f"Error reading file '{filename}' from MinIO: {e!s}"
logger.error(error_msg)
raise HTTPException(
status_code=500, detail=error_msg
) from e