Changements environnement
This commit is contained in:
parent
fd0b15c9c4
commit
d4bf1607f7
5 changed files with 19 additions and 23 deletions
193
search_app_ui/streamlit_app.py
Normal file
193
search_app_ui/streamlit_app.py
Normal file
|
@ -0,0 +1,193 @@
|
|||
import streamlit as st
|
||||
import typesense
|
||||
from datetime import datetime, time
|
||||
import pandas as pd
|
||||
import plotly.express as px
|
||||
from dotenv import load_dotenv
|
||||
import os
|
||||
|
||||
# Set page config to wide mode
|
||||
st.set_page_config(layout="wide")
|
||||
|
||||
# Force dark theme
|
||||
st.markdown("""
|
||||
<style>
|
||||
/* Main background */
|
||||
.stApp {
|
||||
background-color: #0e1117;
|
||||
color: #fafafa;
|
||||
}
|
||||
/* Sidebar */
|
||||
.css-1d391kg {
|
||||
background-color: #262730;
|
||||
}
|
||||
/* Buttons */
|
||||
.stButton>button {
|
||||
color: #fafafa;
|
||||
background-color: #262730;
|
||||
border-color: #fafafa;
|
||||
}
|
||||
/* Text inputs */
|
||||
.stTextInput>div>div>input {
|
||||
color: #fafafa;
|
||||
background-color: #262730;
|
||||
}
|
||||
/* Selectbox */
|
||||
.stSelectbox>div>div>select {
|
||||
color: #fafafa;
|
||||
background-color: #262730;
|
||||
}
|
||||
/* Multiselect */
|
||||
.stMultiSelect>div>div>select {
|
||||
color: #fafafa;
|
||||
background-color: #262730;
|
||||
}
|
||||
/* Date input */
|
||||
.stDateInput>div>div>input {
|
||||
color: #fafafa;
|
||||
background-color: #262730;
|
||||
}
|
||||
</style>
|
||||
""", unsafe_allow_html=True)
|
||||
|
||||
# Add this CSS to create a scrollable results area
|
||||
st.markdown("""
|
||||
<style>
|
||||
.scrollable-results {
|
||||
height: 400px;
|
||||
overflow-y: scroll;
|
||||
border: 1px solid #ccc;
|
||||
padding: 10px;
|
||||
border-radius: 5px;
|
||||
}
|
||||
</style>
|
||||
""", unsafe_allow_html=True)
|
||||
|
||||
# Charger les variables d'environnement
|
||||
load_dotenv()
|
||||
|
||||
# Initialiser le client Typesense
|
||||
client = typesense.Client({
|
||||
'nodes': [{
|
||||
'host': 'localhost',
|
||||
'port': '8108',
|
||||
'protocol': 'http'
|
||||
}],
|
||||
'api_key': os.getenv('TYPESENSE_API_KEY'),
|
||||
'connection_timeout_seconds': 2
|
||||
})
|
||||
|
||||
def rechercher_documents(cette_requete, ces_filtres=None, facette_par=None):
|
||||
parametres_recherche = {
|
||||
'q': cette_requete,
|
||||
'query_by': 'texte',
|
||||
'sort_by': 'creation_timestamp:desc',
|
||||
'per_page': 100,
|
||||
'page': 1
|
||||
}
|
||||
|
||||
if ces_filtres:
|
||||
parametres_recherche['filter_by'] = ces_filtres
|
||||
|
||||
if facette_par:
|
||||
parametres_recherche['facet_by'] = facette_par
|
||||
|
||||
all_results = []
|
||||
while True:
|
||||
results = client.collections['social_media_posts'].documents.search(parametres_recherche)
|
||||
all_results.extend(results['hits'])
|
||||
if len(all_results) >= results['found']:
|
||||
break
|
||||
parametres_recherche['page'] += 1
|
||||
|
||||
results['hits'] = all_results
|
||||
return results
|
||||
|
||||
# Interface utilisateur Streamlit
|
||||
st.title('Recherche dans tes contenus publiés sur le web')
|
||||
|
||||
# Champ de recherche
|
||||
requete = st.text_input('Entrez votre requête de recherche')
|
||||
|
||||
# Filtre de plage de dates
|
||||
col1, col2 = st.columns(2)
|
||||
date_debut = col1.date_input('Date de début')
|
||||
date_fin = col2.date_input('Date de fin')
|
||||
|
||||
# Filtre de réseau social
|
||||
reseaux = ['Facebook', 'Instagram', 'Threads' ,'LinkedIn', 'WordPress']
|
||||
reseaux_selectionnes = st.multiselect('Sélectionnez les réseaux sociaux', reseaux)
|
||||
|
||||
if st.button('Rechercher'):
|
||||
# Préparer les filtres
|
||||
debut_datetime = datetime.combine(date_debut, time.min)
|
||||
fin_datetime = datetime.combine(date_fin, time.max)
|
||||
filtre_date = f"creation_timestamp:[{int(debut_datetime.timestamp())}..{int(fin_datetime.timestamp())}]"
|
||||
filtre_reseau = f"network:[{' '.join(reseaux_selectionnes)}]" if reseaux_selectionnes else None
|
||||
|
||||
filtres = ' && '.join(filter(None, [filtre_date, filtre_reseau]))
|
||||
|
||||
# Effectuer la recherche pour tous les résultats
|
||||
tous_resultats = rechercher_documents(requete, ces_filtres=filtres, facette_par='network')
|
||||
nombre_total_resultats = tous_resultats['found']
|
||||
|
||||
# Afficher le nombre total de résultats
|
||||
st.subheader(f"Trouvé {nombre_total_resultats} résultats")
|
||||
|
||||
# Afficher les facettes
|
||||
if 'facet_counts' in tous_resultats:
|
||||
facettes_reseau = {facette['value']: facette['count'] for facette in tous_resultats['facet_counts'][0]['counts']}
|
||||
st.subheader("Résultats par Réseau")
|
||||
fig = px.pie(values=list(facettes_reseau.values()), names=list(facettes_reseau.keys()), title="Distribution par Réseau")
|
||||
st.plotly_chart(fig)
|
||||
|
||||
# Distribution temporelle par réseau et par mois
|
||||
if nombre_total_resultats > 0:
|
||||
st.subheader("Résultats au fil du temps par réseau (agrégation mensuelle)")
|
||||
|
||||
df_temporel = pd.DataFrame({
|
||||
'date': [datetime.fromtimestamp(hit['document']['creation_timestamp']) for hit in tous_resultats['hits']],
|
||||
'network': [hit['document']['network'] for hit in tous_resultats['hits']]
|
||||
})
|
||||
|
||||
df_temporel['mois'] = df_temporel['date'].dt.to_period('M')
|
||||
df_temporel = df_temporel.groupby(['mois', 'network']).size().reset_index(name='count')
|
||||
df_temporel['mois'] = df_temporel['mois'].dt.to_timestamp()
|
||||
|
||||
fig = px.line(df_temporel, x='mois', y='count', color='network',
|
||||
title="Distribution temporelle par réseau (agrégation mensuelle)")
|
||||
fig.update_layout(xaxis_title="Mois", yaxis_title="Nombre de posts")
|
||||
fig.update_xaxes(tickformat="%B %Y")
|
||||
st.plotly_chart(fig)
|
||||
|
||||
fig_bar = px.bar(df_temporel, x='mois', y='count', color='network',
|
||||
title="Distribution temporelle par réseau (barres empilées, agrégation mensuelle)")
|
||||
fig_bar.update_layout(xaxis_title="Mois", yaxis_title="Nombre de posts")
|
||||
fig_bar.update_xaxes(tickformat="%B %Y")
|
||||
st.plotly_chart(fig_bar)
|
||||
|
||||
st.subheader("Tableau récapitulatif mensuel")
|
||||
df_pivot = df_temporel.pivot(index='mois', columns='network', values='count').fillna(0)
|
||||
df_pivot['Total'] = df_pivot.sum(axis=1)
|
||||
df_pivot = df_pivot.reset_index()
|
||||
df_pivot['mois'] = df_pivot['mois'].dt.strftime('%B %Y')
|
||||
st.dataframe(df_pivot)
|
||||
|
||||
# Create a string to hold all results
|
||||
all_results_text = ""
|
||||
|
||||
# Populate the string with all results
|
||||
for hit in tous_resultats['hits']:
|
||||
horodatage = hit['document']['creation_timestamp']
|
||||
all_results_text += f"**{hit['document']['network']}** - {datetime.fromtimestamp(horodatage).strftime('%Y-%m-%d %H:%M:%S')}\n\n"
|
||||
|
||||
paragraphes = hit['document']['texte'].split('\n')
|
||||
for paragraphe in paragraphes:
|
||||
if paragraphe.strip():
|
||||
all_results_text += f"{paragraphe}\n\n"
|
||||
|
||||
all_results_text += "---\n\n"
|
||||
|
||||
# Display the results in a text area
|
||||
st.text_area("Résultats de la recherche", all_results_text, height=400)
|
||||
|
Loading…
Add table
Add a link
Reference in a new issue