importation de documents dans typesense
This commit is contained in:
parent
f4acc32451
commit
7a74dbf413
24 changed files with 390 additions and 332 deletions
|
@ -1,20 +1,11 @@
|
|||
import pandas as pd
|
||||
import requests
|
||||
import tqdm
|
||||
|
||||
from utils.opensearch import opensearch_client
|
||||
from .typesense_client import client
|
||||
|
||||
|
||||
def documents_to_database(documents_list, os_client=opensearch_client):
|
||||
# Check if opensearch is available
|
||||
if not os_client.ping():
|
||||
raise requests.exceptions.ConnectionError("Opensearch is not reachable")
|
||||
# Check if the specified index exists
|
||||
if not os_client.indices.exists(index=documents_list['index'].iloc[0]):
|
||||
raise requests.exceptions.HTTPError(f"Index '{documents_list['index'].iloc[0]}' does not exist")
|
||||
# Insert each document into opensearch index(es)
|
||||
for document in documents_list.to_dict(orient='records'):
|
||||
index_name = document.pop('index', None)
|
||||
if not index_name:
|
||||
raise ValueError("Document must have an 'index' field")
|
||||
os_client.index(index=index_name,
|
||||
body=document)
|
||||
def documents_to_database(documents_list, os_client=client):
|
||||
try:
|
||||
for document in tqdm.tqdm(documents_list.to_dict(orient='records')):
|
||||
os_client.collections['social_media_posts'].documents.create(document)
|
||||
print(f"Successfully inserted {len(documents_list)} documents.")
|
||||
except Exception as e:
|
||||
print(f"Error inserting documents: {str(e)}")
|
|
@ -1,22 +0,0 @@
|
|||
import os
|
||||
import dotenv
|
||||
|
||||
# Load environment variables from.env file
|
||||
dotenv.load_dotenv()
|
||||
|
||||
# Connect to OpenSearch using the provided credentials and hostname/port.
|
||||
from opensearchpy import OpenSearch
|
||||
|
||||
host = 'localhost'
|
||||
port = 9200
|
||||
auth = ('admin', os.getenv("OPENSEARCH_INITIAL_ADMIN_PASSWORD")) # For testing only. Don't store credentials in code.
|
||||
# Create the client with SSL/TLS enabled, but hostname verification disabled.
|
||||
opensearch_client = OpenSearch(
|
||||
hosts=[{'host': host, 'port': port}],
|
||||
http_compress=True, # enables gzip compression for request bodies
|
||||
http_auth=auth,
|
||||
use_ssl=True,
|
||||
verify_certs=False,
|
||||
ssl_assert_hostname=False,
|
||||
ssl_show_warn=False
|
||||
)
|
|
@ -11,4 +11,7 @@ reseau_social_data = [{"nom": "LinkedIn",
|
|||
{"nom": "Facebook",
|
||||
"repertoires": ["comments_and_reactions", "posts"]},
|
||||
{"nom": "FacebookBusiness",
|
||||
"repertoires": ["posts"]}]
|
||||
"repertoires": ["posts"]},
|
||||
{"nom": "Podcast",
|
||||
"repertoires": ["shownotes", "audio"]}
|
||||
]
|
||||
|
|
15
import_data/utils/typesense_client.py
Normal file
15
import_data/utils/typesense_client.py
Normal file
|
@ -0,0 +1,15 @@
|
|||
import typesense
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
|
||||
client = typesense.Client({
|
||||
'nodes': [{
|
||||
'host': 'localhost',
|
||||
'port': '8108',
|
||||
'protocol': 'http'
|
||||
}],
|
||||
'api_key': os.getenv('TYPESENSE_API_KEY'),
|
||||
'connection_timeout_seconds': 2
|
||||
})
|
Loading…
Add table
Add a link
Reference in a new issue