🚀 Add feature: Charging in Milvus is now working
This commit is contained in:
parent
f9e5a6e013
commit
64832e2989
23 changed files with 354 additions and 109 deletions
|
@ -1,7 +1,7 @@
|
|||
import datetime
|
||||
import json
|
||||
import os
|
||||
from typing import Dict, Union, List
|
||||
from typing import Dict, List
|
||||
|
||||
from fastapi import APIRouter, HTTPException
|
||||
|
||||
|
@ -43,6 +43,7 @@ from app.convert.convert_youtube_shorts_video import (
|
|||
)
|
||||
from app.convert.convert_youtube_video_video import convert_youtube_video_video
|
||||
from app.models import ConversionRequest, ConversionResponse
|
||||
from app.routers.utils.read_content_from_minio import read_content_from_minio
|
||||
|
||||
convert_router = APIRouter(prefix="/convert", tags=["Convert"])
|
||||
|
||||
|
@ -127,53 +128,6 @@ def generate_temp_file(data: List[Dict], source_type: str) -> str:
|
|||
|
||||
return tmp_filename
|
||||
|
||||
def read_content_from_minio(request: ConversionRequest) -> Union[str, bytes]:
|
||||
"""
|
||||
Read content from MinIO storage based on the request filename.
|
||||
|
||||
Args:
|
||||
request: The conversion request containing the filename
|
||||
|
||||
Returns:
|
||||
The file content as string (for text files) or bytes (for binary files)
|
||||
|
||||
Raises:
|
||||
HTTPException: If the file cannot be read or doesn't exist
|
||||
"""
|
||||
# Check if filename exists
|
||||
if not request.filename:
|
||||
logger.error("Filename is empty or invalid")
|
||||
raise HTTPException(
|
||||
status_code=400, detail="Filename is required"
|
||||
)
|
||||
|
||||
# Read file from MinIO
|
||||
try:
|
||||
logger.info(
|
||||
f"Reading file '{request.filename}' from MinIO bucket '{minio_bucket_name}'")
|
||||
with minio_client.get_object(
|
||||
bucket_name=minio_bucket_name, object_name=request.filename
|
||||
) as response:
|
||||
content_type = response.headers.get("content-type", "")
|
||||
logger.debug(f"File content type: {content_type}")
|
||||
|
||||
if content_type.startswith("text/"):
|
||||
# Read as text (UTF-8)
|
||||
content = response.read().decode("utf-8")
|
||||
logger.debug(f"Read {len(content)} characters from text file")
|
||||
else:
|
||||
# Read as binary
|
||||
content = response.read()
|
||||
logger.debug(f"Read {len(content)} bytes from binary file")
|
||||
|
||||
return content
|
||||
except Exception as e:
|
||||
error_msg = f"Error reading file '{request.filename}' from MinIO: {e!s}"
|
||||
logger.error(error_msg)
|
||||
raise HTTPException(
|
||||
status_code=500, detail=error_msg
|
||||
) from e
|
||||
|
||||
|
||||
def save_to_minio(data: List[Dict], source_type: str) -> str:
|
||||
"""
|
||||
|
@ -197,7 +151,7 @@ def save_to_minio(data: List[Dict], source_type: str) -> str:
|
|||
f"Uploading '{tmp_filename}' to MinIO bucket '{minio_bucket_name}'")
|
||||
minio_client.fput_object(
|
||||
bucket_name=minio_bucket_name,
|
||||
object_name=tmp_filename,
|
||||
object_name="output/" + tmp_filename,
|
||||
file_path=tmp_filename
|
||||
)
|
||||
|
||||
|
@ -238,7 +192,7 @@ def convert_data(request: ConversionRequest):
|
|||
f"Processing conversion request for {request.source_type} in {request.source_format} format")
|
||||
|
||||
# Read content from MinIO
|
||||
content = read_content_from_minio(request)
|
||||
content = read_content_from_minio(request.filename)
|
||||
|
||||
# Check if source and format are supported
|
||||
if request.source_type not in CONVERTERS:
|
||||
|
|
|
@ -1,26 +1,123 @@
|
|||
from app.config import available_sources, logger
|
||||
from app.models import AvailableSourcesResponse, ImportRequest, ImportResponse
|
||||
from fastapi import APIRouter
|
||||
import json
|
||||
import traceback
|
||||
|
||||
from fastapi import APIRouter, HTTPException
|
||||
from pymilvus import connections, Collection, FieldSchema, CollectionSchema, \
|
||||
DataType, utility
|
||||
|
||||
from app.config import logger
|
||||
from app.models import ImportRequest, ImportResponse, \
|
||||
AvailableCollectionsResponse
|
||||
from app.routers.utils.generate_embeddings import generate_embeddings
|
||||
from app.routers.utils.read_content_from_minio import read_content_from_minio
|
||||
|
||||
import_router = APIRouter(prefix="/import", tags=["Import"])
|
||||
|
||||
|
||||
@import_router.post("/", response_model=ImportResponse)
|
||||
def import_data(request: ImportRequest):
|
||||
"""
|
||||
Import data (e.g., text, files, or structured data).
|
||||
"""
|
||||
logger.info(f"Receiver importation request: {request.type}")
|
||||
return ...
|
||||
async def import_data(request: ImportRequest):
|
||||
try:
|
||||
logger.info(f"Starting import process for {request.source_type}")
|
||||
|
||||
# Check Milvus connection
|
||||
try:
|
||||
connections.connect("default", host="milvus", port="19530")
|
||||
logger.info("Successfully connected to Milvus")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to connect to Milvus: {str(e)}")
|
||||
return ImportResponse(status="error",
|
||||
message="Failed to connect to Milvus")
|
||||
|
||||
# Fetch data from MinIO
|
||||
try:
|
||||
data = read_content_from_minio(request.object_name)
|
||||
logger.info(
|
||||
f"Successfully fetched data from MinIO: {request.object_name}")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to fetch data from MinIO: {str(e)}")
|
||||
return ImportResponse(status="error",
|
||||
message="Failed to fetch data from MinIO")
|
||||
|
||||
# Process data
|
||||
processed_data = json.loads(data)
|
||||
logger.info("Data processed successfully")
|
||||
|
||||
# Generate embeddings and insert into Milvus
|
||||
collection_name = f"{request.source_type}_collection"
|
||||
if not utility.has_collection(collection_name):
|
||||
create_collection(collection_name)
|
||||
|
||||
collection = Collection(collection_name)
|
||||
|
||||
total_items = len(processed_data)
|
||||
for i, item in enumerate(processed_data, 1):
|
||||
try:
|
||||
item["embedding"] = generate_embeddings(item)
|
||||
filtered_item = {
|
||||
"content": item.get("content", ""),
|
||||
"embedding": item["embedding"],
|
||||
"creation_timestamp": int(
|
||||
item.get("creation_timestamp", 0)),
|
||||
"index": item.get("index", ""),
|
||||
"type": item.get("type", ""),
|
||||
"network": item.get("network", ""),
|
||||
"url": item.get("url", "")
|
||||
}
|
||||
_ = collection.insert([filtered_item])
|
||||
logger.info(
|
||||
f"Inserted item {i}/{total_items} into Milvus collection {collection_name}")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to process item {i}: {str(e)}")
|
||||
|
||||
logger.info(f"Import completed for {request.source_type}")
|
||||
return ImportResponse(status="success",
|
||||
message="Import completed successfully")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Unexpected error during import: {str(e)}")
|
||||
logger.error(traceback.format_exc())
|
||||
return ImportResponse(status="error",
|
||||
message=f"Unexpected error: {str(e)}")
|
||||
|
||||
|
||||
@import_router.get(
|
||||
"/available_sources", response_model=AvailableSourcesResponse
|
||||
)
|
||||
def get_available_sources():
|
||||
"""
|
||||
Get available sources from database
|
||||
:return: Available sources in an AvailableSourcesResponse object
|
||||
"""
|
||||
logger.info("Get available sources from database")
|
||||
return available_sources
|
||||
def create_collection(collection_name: str):
|
||||
fields = [
|
||||
FieldSchema(name="id", dtype=DataType.INT64, is_primary=True,
|
||||
auto_id=True),
|
||||
FieldSchema(name="content", dtype=DataType.VARCHAR, max_length=65535),
|
||||
FieldSchema(name="embedding", dtype=DataType.FLOAT_VECTOR, dim=1024),
|
||||
FieldSchema(name="creation_timestamp", dtype=DataType.INT64),
|
||||
FieldSchema(name="index", dtype=DataType.VARCHAR, max_length=255),
|
||||
FieldSchema(name="type", dtype=DataType.VARCHAR, max_length=255),
|
||||
FieldSchema(name="network", dtype=DataType.VARCHAR, max_length=255),
|
||||
FieldSchema(name="url", dtype=DataType.VARCHAR, max_length=2083),
|
||||
]
|
||||
schema = CollectionSchema(fields, "A collection for storing embeddings")
|
||||
collection = Collection(collection_name, schema)
|
||||
|
||||
index_params = {
|
||||
"metric_type": "L2",
|
||||
"index_type": "IVF_FLAT",
|
||||
"params": {"nlist": 1024}
|
||||
}
|
||||
collection.create_index("embedding", index_params)
|
||||
logger.info(f"Created new collection: {collection_name}")
|
||||
|
||||
|
||||
@import_router.get("/available_collections",
|
||||
response_model=AvailableCollectionsResponse)
|
||||
def get_available_collections():
|
||||
logger.info("Getting available collections from Milvus")
|
||||
|
||||
try:
|
||||
if not utility.has_collection("default"):
|
||||
connections.connect("default", host="milvus", port="19530")
|
||||
|
||||
collections = utility.list_collections()
|
||||
|
||||
logger.info(f"Found {len(collections)} collections")
|
||||
return AvailableCollectionsResponse(collections=collections)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting collections from Milvus: {str(e)}")
|
||||
raise HTTPException(status_code=500,
|
||||
detail=f"Error getting collections from Milvus: {str(e)}")
|
||||
|
|
0
backend/app/routers/utils/__init__.py
Normal file
0
backend/app/routers/utils/__init__.py
Normal file
43
backend/app/routers/utils/generate_embeddings.py
Normal file
43
backend/app/routers/utils/generate_embeddings.py
Normal file
|
@ -0,0 +1,43 @@
|
|||
import json
|
||||
|
||||
import requests
|
||||
|
||||
from app.config import ollama_url, embedding_model_name, logger
|
||||
|
||||
|
||||
def generate_embeddings(content):
|
||||
# Convert content to string if it's not already
|
||||
if not isinstance(content, str):
|
||||
try:
|
||||
content = json.dumps(content)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"Error converting content to string: {str(e)}. Defaulting to string.")
|
||||
content = str(content)
|
||||
|
||||
logger.info(
|
||||
f"Generating embeddings for content: {content[:100]}...") # Log first 100 chars
|
||||
try:
|
||||
response = requests.post(f"{ollama_url}/api/embed", json={
|
||||
"model": embedding_model_name,
|
||||
"input": content
|
||||
})
|
||||
response.raise_for_status() # Raise an exception for bad status codes
|
||||
embeddings = response.json().get('embeddings')[0]
|
||||
if embeddings:
|
||||
logger.info(
|
||||
f"Successfully generated embeddings of length {len(embeddings)}")
|
||||
return embeddings
|
||||
else:
|
||||
raise ValueError("No embeddings found in response")
|
||||
except requests.RequestException as e:
|
||||
logger.error(f"Error making request to Ollama API: {str(e)}")
|
||||
logger.error(
|
||||
f"Response content: {e.response.text if e.response else 'No response'}")
|
||||
raise
|
||||
except json.JSONDecodeError:
|
||||
logger.error(f"Error decoding JSON response: {e.response.text}")
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Unexpected error generating embeddings: {str(e)}")
|
||||
raise
|
54
backend/app/routers/utils/read_content_from_minio.py
Normal file
54
backend/app/routers/utils/read_content_from_minio.py
Normal file
|
@ -0,0 +1,54 @@
|
|||
from typing import Union
|
||||
|
||||
from fastapi import HTTPException
|
||||
|
||||
from app.config import logger, minio_bucket_name, minio_client
|
||||
|
||||
|
||||
def read_content_from_minio(filename: str) -> Union[str, bytes]:
|
||||
"""
|
||||
Read content from MinIO storage based on the request filename.
|
||||
|
||||
Args:
|
||||
request: The conversion request containing the filename
|
||||
|
||||
Returns:
|
||||
The file content as string (for text files) or bytes (for binary files)
|
||||
|
||||
Raises:
|
||||
HTTPException: If the file cannot be read or doesn't exist
|
||||
:param filename:
|
||||
"""
|
||||
# Check if filename exists
|
||||
if not filename:
|
||||
logger.error("Filename is empty or invalid")
|
||||
raise HTTPException(
|
||||
status_code=400, detail="Filename is required"
|
||||
)
|
||||
|
||||
# Read file from MinIO
|
||||
try:
|
||||
logger.info(
|
||||
f"Reading file '{filename}' from MinIO bucket '{minio_bucket_name}'")
|
||||
with minio_client.get_object(
|
||||
bucket_name=minio_bucket_name, object_name=filename
|
||||
) as response:
|
||||
content_type = response.headers.get("content-type", "")
|
||||
logger.debug(f"File content type: {content_type}")
|
||||
|
||||
if content_type.startswith("text/"):
|
||||
# Read as text (UTF-8)
|
||||
content = response.read().decode("utf-8")
|
||||
logger.debug(f"Read {len(content)} characters from text file")
|
||||
else:
|
||||
# Read as binary
|
||||
content = response.read()
|
||||
logger.debug(f"Read {len(content)} bytes from binary file")
|
||||
|
||||
return content
|
||||
except Exception as e:
|
||||
error_msg = f"Error reading file '{filename}' from MinIO: {e!s}"
|
||||
logger.error(error_msg)
|
||||
raise HTTPException(
|
||||
status_code=500, detail=error_msg
|
||||
) from e
|
Loading…
Add table
Add a link
Reference in a new issue