correction demi-mot

2023-04-17 19:03:52 -04:00 · 2023-04-17 19:03:52 -04:00 · 29fbdb4ff3
commit 29fbdb4ff3
parent 5e8f941b67
1 changed files with 14 additions and 5 deletions
--- a/main.py
+++ b/main.py
@ -23,15 +23,22 @@ def remove_space_before_punctuation(text):
    """
    Removes space before punctuation.
    """
-    text = re.sub(r"\s+\*\*([.;,?!])\*\*", "\1", text)
+    text = re.sub(r"(\s+)([.,;?!'])]", "\2", text)
    return text


+def only_alphanumerics(text):
+    """
+    Detects if the text contains only alphanumerics.
+    """
+    return re.search("[^a-zA-Z0-9]", text) is None
+
+
 def make_bold_part(token):
    """
    Makes bold text.
    """
-    if len(token) <= 2:
+    if (not only_alphanumerics(token)) or len(token) < 2:
        return token
    else:
        mid_token = round(len(token)/2)
@ -46,6 +53,7 @@ def split_tokens(text, lang):
    new_paragraphs = []
    for paragraph in paragraphs:
        tokens = nltk.word_tokenize(paragraph, language=lang)
+        tokens = [re.sub(r"\sn\'t", "n't", token) for token in tokens]
        tokens = [make_bold_part(token) for token in tokens]
        par = remove_space_before_punctuation(" ".join(tokens))
        new_paragraphs.append(par)
@ -60,9 +68,10 @@ def write_app():
    st.title("Texte Rapide")
    lang = st.sidebar.selectbox(label="Langue", options=["french", "english"], index=0)
    st.sidebar.text(f"Langue: {lang}")
-    text = st.text_area("Text", "Entre ton texte ici")
-    bouton = st.button("Envoyer")
-    if bouton:
+    st.text("Entre ton texte ici:")
+    text = st.text_area("Text", '', placeholder='entre ton texte ici')
+    submit = st.button("Convertir")
+    if submit:
        st.markdown(f"{split_tokens(text, lang)}")
    st.markdown("Développé avec ❤ par [François Pelletier](https://linktr.ee/jevalideca)")