Améliorations recherche

This commit is contained in:
François Pelletier 2025-01-02 23:04:35 -05:00
parent 731d9bce6d
commit ad0c34fcff
10 changed files with 317 additions and 305 deletions

View file

@ -0,0 +1,30 @@
import spacy
import subprocess
import sys
def download_spacy_model(model_name):
print(f"Downloading and installing spaCy model: {model_name}")
try:
subprocess.check_call([sys.executable, "-m", "spacy", "download", model_name])
print(f"Successfully installed {model_name}")
except subprocess.CalledProcessError:
print(f"Error installing {model_name}. Please make sure you have the necessary permissions.")
# Download and install English model
download_spacy_model("en_core_web_sm")
# Download and install French model
download_spacy_model("fr_core_news_sm")
# Load the models to verify installation
try:
nlp_en = spacy.load("en_core_web_sm")
print("English model loaded successfully")
except:
print("Error loading English model")
try:
nlp_fr = spacy.load("fr_core_news_sm")
print("French model loaded successfully")
except:
print("Error loading French model")

View file

@ -1,97 +0,0 @@
altair==5.5.0
annotated-types==0.7.0
attrs==24.2.0
av==13.1.0
beautifulsoup4==4.12.3
blinker==1.9.0
blis==1.0.1
cachetools==5.5.0
catalogue==2.0.10
certifi==2024.8.30
charset-normalizer==3.4.0
click==8.1.7
cloudpathlib==0.20.0
coloredlogs==15.0.1
confection==0.1.5
contourpy==1.3.1
ctranslate2==4.5.0
cycler==0.12.1
cymem==2.0.10
en_core_web_sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl#sha256=1932429db727d4bff3deed6b34cfc05df17794f4a52eeb26cf8928f7c1a0fb85
faster-whisper==1.1.0
filelock==3.16.1
flatbuffers==24.3.25
fonttools==4.55.0
fr_core_news_sm @ https://github.com/explosion/spacy-models/releases/download/fr_core_news_sm-3.8.0/fr_core_news_sm-3.8.0-py3-none-any.whl#sha256=7d6ad14cd5078e53147bfbf70fb9d433c6a3865b695fda2657140bbc59a27e29
fsspec==2024.10.0
gitdb==4.0.11
GitPython==3.1.43
huggingface-hub==0.26.3
humanfriendly==10.0
idna==3.10
Jinja2==3.1.4
jsonschema==4.23.0
jsonschema-specifications==2024.10.1
kiwisolver==1.4.7
langcodes==3.5.0
langdetect==1.0.9
language_data==1.3.0
marisa-trie==1.2.1
markdown-it-py==3.0.0
markdownify==0.11.6
MarkupSafe==3.0.2
matplotlib==3.9.3
mdurl==0.1.2
mpmath==1.3.0
murmurhash==1.0.11
narwhals==1.15.0
numpy==2.0.2
onnxruntime==1.20.1
packaging==24.2
pandas==2.2.3
pillow==11.0.0
plotly==5.24.1
preshed==3.0.9
protobuf==5.29.0
pyarrow==17.0.0
pydantic==2.10.2
pydantic_core==2.27.1
pydeck==0.9.1
Pygments==2.18.0
pyparsing==3.2.0
python-dateutil==2.9.0.post0
python-dotenv==1.0.1
pytz==2024.2
PyYAML==6.0.2
referencing==0.35.1
requests==2.31.0
rich==13.9.4
rpds-py==0.21.0
setuptools==75.6.0
shellingham==1.5.4
six==1.16.0
smart-open==7.0.5
smmap==5.0.1
soupsieve==2.6
spacy==3.8.2
spacy-language-detection==0.2.1
spacy-legacy==3.0.12
spacy-loggers==1.0.5
srsly==2.4.8
streamlit==1.40.2
sympy==1.13.3
tenacity==9.0.0
thinc==8.3.2
tokenizers==0.21.0
toml==0.10.2
tornado==6.4.2
tqdm==4.67.1
typer==0.14.0
typesense==0.21.0
typing_extensions==4.12.2
tzdata==2024.2
urllib3==2.2.3
wasabi==1.1.3
weasel==0.4.1
wrapt==1.17.0
xmltodict==0.13.0

View file

@ -1,4 +1,4 @@
import utils.config as config
from import_data.utils import config
wordpress_names = config.WORDPRESS_NAMES.split(",")
@ -13,5 +13,5 @@ reseau_social_data = [{"nom": "LinkedIn",
{"nom": "FacebookBusiness",
"repertoires": ["posts"]},
{"nom": "Podcast",
"repertoires": ["shownotes", "audio"]}
"repertoires": ["shownotes", "audio", "feeds"]}
]

View file

@ -6,10 +6,10 @@ load_dotenv()
client = typesense.Client({
'nodes': [{
'host': 'localhost',
'port': '8108',
'protocol': 'http'
'host': os.getenv('TYPESENSE_HOST','localhost'),
'port': os.getenv('TYPESENSE_PORT','8108'),
'protocol': os.getenv('TYPESENSE_PROTOCOL','http'),
}],
'api_key': os.getenv('TYPESENSE_API_KEY'),
'connection_timeout_seconds': 2
'connection_timeout_seconds': 10
})