From 29fbdb4ff3c4d0bb4bc50810df5d9194bfaf4e8f Mon Sep 17 00:00:00 2001 From: Francois Pelletier Date: Mon, 17 Apr 2023 19:03:52 -0400 Subject: [PATCH] correction demi-mot --- main.py | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/main.py b/main.py index cb7739d..cf9934d 100644 --- a/main.py +++ b/main.py @@ -23,15 +23,22 @@ def remove_space_before_punctuation(text): """ Removes space before punctuation. """ - text = re.sub(r"\s+\*\*([.;,?!])\*\*", "\1", text) + text = re.sub(r"(\s+)([.,;?!'])]", "\2", text) return text +def only_alphanumerics(text): + """ + Detects if the text contains only alphanumerics. + """ + return re.search("[^a-zA-Z0-9]", text) is None + + def make_bold_part(token): """ Makes bold text. """ - if len(token) <= 2: + if (not only_alphanumerics(token)) or len(token) < 2: return token else: mid_token = round(len(token)/2) @@ -46,6 +53,7 @@ def split_tokens(text, lang): new_paragraphs = [] for paragraph in paragraphs: tokens = nltk.word_tokenize(paragraph, language=lang) + tokens = [re.sub(r"\sn\'t", "n't", token) for token in tokens] tokens = [make_bold_part(token) for token in tokens] par = remove_space_before_punctuation(" ".join(tokens)) new_paragraphs.append(par) @@ -60,9 +68,10 @@ def write_app(): st.title("Texte Rapide") lang = st.sidebar.selectbox(label="Langue", options=["french", "english"], index=0) st.sidebar.text(f"Langue: {lang}") - text = st.text_area("Text", "Entre ton texte ici") - bouton = st.button("Envoyer") - if bouton: + st.text("Entre ton texte ici:") + text = st.text_area("Text", '', placeholder='entre ton texte ici') + submit = st.button("Convertir") + if submit: st.markdown(f"{split_tokens(text, lang)}") st.markdown("Développé avec ❤ par [François Pelletier](https://linktr.ee/jevalideca)")