Version initiale pour importer les données
This commit is contained in:
parent
3d337d064b
commit
aff201f6cf
22 changed files with 694 additions and 0 deletions
0
import_data/utils/__init__.py
Normal file
0
import_data/utils/__init__.py
Normal file
2
import_data/utils/config.py
Normal file
2
import_data/utils/config.py
Normal file
|
@ -0,0 +1,2 @@
|
|||
API_URL = "http://localhost:8000"
|
||||
WORDPRESS_NAMES = "jevalideca" # Séparer les noms de blog par une virgule
|
7
import_data/utils/convert_encoding_meta.py
Normal file
7
import_data/utils/convert_encoding_meta.py
Normal file
|
@ -0,0 +1,7 @@
|
|||
import re
|
||||
|
||||
|
||||
def convert_encoding_meta(text):
|
||||
conv_text = re.sub(r'[\xc2-\xf4][\x80-\xbf]+',
|
||||
lambda m: m.group(0).encode('latin1').decode('utf8'), text)
|
||||
return conv_text
|
20
import_data/utils/documents_to_database.py
Normal file
20
import_data/utils/documents_to_database.py
Normal file
|
@ -0,0 +1,20 @@
|
|||
import pandas as pd
|
||||
import requests
|
||||
|
||||
from utils.opensearch import opensearch_client
|
||||
|
||||
|
||||
def documents_to_database(documents_list, os_client=opensearch_client):
|
||||
# Check if opensearch is available
|
||||
if not os_client.ping():
|
||||
raise requests.exceptions.ConnectionError("Opensearch is not reachable")
|
||||
# Check if the specified index exists
|
||||
if not os_client.indices.exists(index=documents_list['index'].iloc[0]):
|
||||
raise requests.exceptions.HTTPError(f"Index '{documents_list['index'].iloc[0]}' does not exist")
|
||||
# Insert each document into opensearch index(es)
|
||||
for document in documents_list.to_dict(orient='records'):
|
||||
index_name = document.pop('index', None)
|
||||
if not index_name:
|
||||
raise ValueError("Document must have an 'index' field")
|
||||
os_client.index(index=index_name,
|
||||
body=document)
|
16
import_data/utils/get_ids.py
Normal file
16
import_data/utils/get_ids.py
Normal file
|
@ -0,0 +1,16 @@
|
|||
import pandas as pd
|
||||
import requests
|
||||
|
||||
import utils.config as config
|
||||
|
||||
API_URL = config.API_URL
|
||||
|
||||
|
||||
def get_idreseausocial(nom, endpoint=f"{API_URL}/reseauxsociaux/"):
|
||||
reseaux_sociaux = pd.DataFrame(requests.get(endpoint).json())
|
||||
return list(reseaux_sociaux[reseaux_sociaux["nom"] == nom]["id"])[0]
|
||||
|
||||
|
||||
def get_idtypedocument(nom, endpoint=f"{API_URL}/typedocuments/"):
|
||||
type_documents = pd.DataFrame(requests.get(endpoint).json())
|
||||
return list(type_documents[type_documents["nom"] == nom]["id"])[0]
|
22
import_data/utils/opensearch.py
Normal file
22
import_data/utils/opensearch.py
Normal file
|
@ -0,0 +1,22 @@
|
|||
import os
|
||||
import dotenv
|
||||
|
||||
# Load environment variables from.env file
|
||||
dotenv.load_dotenv()
|
||||
|
||||
# Connect to OpenSearch using the provided credentials and hostname/port.
|
||||
from opensearchpy import OpenSearch
|
||||
|
||||
host = 'localhost'
|
||||
port = 9200
|
||||
auth = ('admin', os.getenv("OPENSEARCH_INITIAL_ADMIN_PASSWORD")) # For testing only. Don't store credentials in code.
|
||||
# Create the client with SSL/TLS enabled, but hostname verification disabled.
|
||||
opensearch_client = OpenSearch(
|
||||
hosts=[{'host': host, 'port': port}],
|
||||
http_compress=True, # enables gzip compression for request bodies
|
||||
http_auth=auth,
|
||||
use_ssl=True,
|
||||
verify_certs=False,
|
||||
ssl_assert_hostname=False,
|
||||
ssl_show_warn=False
|
||||
)
|
14
import_data/utils/reseau_social_data.py
Normal file
14
import_data/utils/reseau_social_data.py
Normal file
|
@ -0,0 +1,14 @@
|
|||
import utils.config as config
|
||||
|
||||
wordpress_names = config.WORDPRESS_NAMES.split(",")
|
||||
|
||||
reseau_social_data = [{"nom": "LinkedIn",
|
||||
"repertoires": ["comments", "shares"]},
|
||||
{"nom": "Wordpress",
|
||||
"repertoires": wordpress_names},
|
||||
{"nom": "Instagram",
|
||||
"repertoires": ["comments", "content", "threads"]},
|
||||
{"nom": "Facebook",
|
||||
"repertoires": ["comments_and_reactions", "posts"]},
|
||||
{"nom": "FacebookBusiness",
|
||||
"repertoires": ["posts"]}]
|
Loading…
Add table
Add a link
Reference in a new issue