importation de documents dans typesense

This commit is contained in:
François Pelletier 2024-10-02 21:53:37 -04:00
parent f4acc32451
commit 7a74dbf413
24 changed files with 390 additions and 332 deletions

View file

@ -1,20 +1,11 @@
import pandas as pd
import requests
import tqdm
from utils.opensearch import opensearch_client
from .typesense_client import client
def documents_to_database(documents_list, os_client=opensearch_client):
# Check if opensearch is available
if not os_client.ping():
raise requests.exceptions.ConnectionError("Opensearch is not reachable")
# Check if the specified index exists
if not os_client.indices.exists(index=documents_list['index'].iloc[0]):
raise requests.exceptions.HTTPError(f"Index '{documents_list['index'].iloc[0]}' does not exist")
# Insert each document into opensearch index(es)
for document in documents_list.to_dict(orient='records'):
index_name = document.pop('index', None)
if not index_name:
raise ValueError("Document must have an 'index' field")
os_client.index(index=index_name,
body=document)
def documents_to_database(documents_list, os_client=client):
try:
for document in tqdm.tqdm(documents_list.to_dict(orient='records')):
os_client.collections['social_media_posts'].documents.create(document)
print(f"Successfully inserted {len(documents_list)} documents.")
except Exception as e:
print(f"Error inserting documents: {str(e)}")

View file

@ -1,22 +0,0 @@
import os
import dotenv
# Load environment variables from.env file
dotenv.load_dotenv()
# Connect to OpenSearch using the provided credentials and hostname/port.
from opensearchpy import OpenSearch
host = 'localhost'
port = 9200
auth = ('admin', os.getenv("OPENSEARCH_INITIAL_ADMIN_PASSWORD")) # For testing only. Don't store credentials in code.
# Create the client with SSL/TLS enabled, but hostname verification disabled.
opensearch_client = OpenSearch(
hosts=[{'host': host, 'port': port}],
http_compress=True, # enables gzip compression for request bodies
http_auth=auth,
use_ssl=True,
verify_certs=False,
ssl_assert_hostname=False,
ssl_show_warn=False
)

View file

@ -11,4 +11,7 @@ reseau_social_data = [{"nom": "LinkedIn",
{"nom": "Facebook",
"repertoires": ["comments_and_reactions", "posts"]},
{"nom": "FacebookBusiness",
"repertoires": ["posts"]}]
"repertoires": ["posts"]},
{"nom": "Podcast",
"repertoires": ["shownotes", "audio"]}
]

View file

@ -0,0 +1,15 @@
import typesense
import os
from dotenv import load_dotenv
load_dotenv()
client = typesense.Client({
'nodes': [{
'host': 'localhost',
'port': '8108',
'protocol': 'http'
}],
'api_key': os.getenv('TYPESENSE_API_KEY'),
'connection_timeout_seconds': 2
})