Améliorations recherche
This commit is contained in:
parent
731d9bce6d
commit
ad0c34fcff
10 changed files with 317 additions and 305 deletions
30
import_data/10_install_nlp_models.py
Normal file
30
import_data/10_install_nlp_models.py
Normal file
|
@ -0,0 +1,30 @@
|
|||
import spacy
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
def download_spacy_model(model_name):
|
||||
print(f"Downloading and installing spaCy model: {model_name}")
|
||||
try:
|
||||
subprocess.check_call([sys.executable, "-m", "spacy", "download", model_name])
|
||||
print(f"Successfully installed {model_name}")
|
||||
except subprocess.CalledProcessError:
|
||||
print(f"Error installing {model_name}. Please make sure you have the necessary permissions.")
|
||||
|
||||
# Download and install English model
|
||||
download_spacy_model("en_core_web_sm")
|
||||
|
||||
# Download and install French model
|
||||
download_spacy_model("fr_core_news_sm")
|
||||
|
||||
# Load the models to verify installation
|
||||
try:
|
||||
nlp_en = spacy.load("en_core_web_sm")
|
||||
print("English model loaded successfully")
|
||||
except:
|
||||
print("Error loading English model")
|
||||
|
||||
try:
|
||||
nlp_fr = spacy.load("fr_core_news_sm")
|
||||
print("French model loaded successfully")
|
||||
except:
|
||||
print("Error loading French model")
|
|
@ -1,97 +0,0 @@
|
|||
altair==5.5.0
|
||||
annotated-types==0.7.0
|
||||
attrs==24.2.0
|
||||
av==13.1.0
|
||||
beautifulsoup4==4.12.3
|
||||
blinker==1.9.0
|
||||
blis==1.0.1
|
||||
cachetools==5.5.0
|
||||
catalogue==2.0.10
|
||||
certifi==2024.8.30
|
||||
charset-normalizer==3.4.0
|
||||
click==8.1.7
|
||||
cloudpathlib==0.20.0
|
||||
coloredlogs==15.0.1
|
||||
confection==0.1.5
|
||||
contourpy==1.3.1
|
||||
ctranslate2==4.5.0
|
||||
cycler==0.12.1
|
||||
cymem==2.0.10
|
||||
en_core_web_sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl#sha256=1932429db727d4bff3deed6b34cfc05df17794f4a52eeb26cf8928f7c1a0fb85
|
||||
faster-whisper==1.1.0
|
||||
filelock==3.16.1
|
||||
flatbuffers==24.3.25
|
||||
fonttools==4.55.0
|
||||
fr_core_news_sm @ https://github.com/explosion/spacy-models/releases/download/fr_core_news_sm-3.8.0/fr_core_news_sm-3.8.0-py3-none-any.whl#sha256=7d6ad14cd5078e53147bfbf70fb9d433c6a3865b695fda2657140bbc59a27e29
|
||||
fsspec==2024.10.0
|
||||
gitdb==4.0.11
|
||||
GitPython==3.1.43
|
||||
huggingface-hub==0.26.3
|
||||
humanfriendly==10.0
|
||||
idna==3.10
|
||||
Jinja2==3.1.4
|
||||
jsonschema==4.23.0
|
||||
jsonschema-specifications==2024.10.1
|
||||
kiwisolver==1.4.7
|
||||
langcodes==3.5.0
|
||||
langdetect==1.0.9
|
||||
language_data==1.3.0
|
||||
marisa-trie==1.2.1
|
||||
markdown-it-py==3.0.0
|
||||
markdownify==0.11.6
|
||||
MarkupSafe==3.0.2
|
||||
matplotlib==3.9.3
|
||||
mdurl==0.1.2
|
||||
mpmath==1.3.0
|
||||
murmurhash==1.0.11
|
||||
narwhals==1.15.0
|
||||
numpy==2.0.2
|
||||
onnxruntime==1.20.1
|
||||
packaging==24.2
|
||||
pandas==2.2.3
|
||||
pillow==11.0.0
|
||||
plotly==5.24.1
|
||||
preshed==3.0.9
|
||||
protobuf==5.29.0
|
||||
pyarrow==17.0.0
|
||||
pydantic==2.10.2
|
||||
pydantic_core==2.27.1
|
||||
pydeck==0.9.1
|
||||
Pygments==2.18.0
|
||||
pyparsing==3.2.0
|
||||
python-dateutil==2.9.0.post0
|
||||
python-dotenv==1.0.1
|
||||
pytz==2024.2
|
||||
PyYAML==6.0.2
|
||||
referencing==0.35.1
|
||||
requests==2.31.0
|
||||
rich==13.9.4
|
||||
rpds-py==0.21.0
|
||||
setuptools==75.6.0
|
||||
shellingham==1.5.4
|
||||
six==1.16.0
|
||||
smart-open==7.0.5
|
||||
smmap==5.0.1
|
||||
soupsieve==2.6
|
||||
spacy==3.8.2
|
||||
spacy-language-detection==0.2.1
|
||||
spacy-legacy==3.0.12
|
||||
spacy-loggers==1.0.5
|
||||
srsly==2.4.8
|
||||
streamlit==1.40.2
|
||||
sympy==1.13.3
|
||||
tenacity==9.0.0
|
||||
thinc==8.3.2
|
||||
tokenizers==0.21.0
|
||||
toml==0.10.2
|
||||
tornado==6.4.2
|
||||
tqdm==4.67.1
|
||||
typer==0.14.0
|
||||
typesense==0.21.0
|
||||
typing_extensions==4.12.2
|
||||
tzdata==2024.2
|
||||
urllib3==2.2.3
|
||||
wasabi==1.1.3
|
||||
weasel==0.4.1
|
||||
wrapt==1.17.0
|
||||
xmltodict==0.13.0
|
|
@ -1,4 +1,4 @@
|
|||
import utils.config as config
|
||||
from import_data.utils import config
|
||||
|
||||
wordpress_names = config.WORDPRESS_NAMES.split(",")
|
||||
|
||||
|
@ -13,5 +13,5 @@ reseau_social_data = [{"nom": "LinkedIn",
|
|||
{"nom": "FacebookBusiness",
|
||||
"repertoires": ["posts"]},
|
||||
{"nom": "Podcast",
|
||||
"repertoires": ["shownotes", "audio"]}
|
||||
"repertoires": ["shownotes", "audio", "feeds"]}
|
||||
]
|
||||
|
|
|
@ -6,10 +6,10 @@ load_dotenv()
|
|||
|
||||
client = typesense.Client({
|
||||
'nodes': [{
|
||||
'host': 'localhost',
|
||||
'port': '8108',
|
||||
'protocol': 'http'
|
||||
'host': os.getenv('TYPESENSE_HOST','localhost'),
|
||||
'port': os.getenv('TYPESENSE_PORT','8108'),
|
||||
'protocol': os.getenv('TYPESENSE_PROTOCOL','http'),
|
||||
}],
|
||||
'api_key': os.getenv('TYPESENSE_API_KEY'),
|
||||
'connection_timeout_seconds': 2
|
||||
'connection_timeout_seconds': 10
|
||||
})
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue