correction demi-mot
This commit is contained in:
parent
5e8f941b67
commit
29fbdb4ff3
1 changed files with 14 additions and 5 deletions
19
main.py
19
main.py
|
@ -23,15 +23,22 @@ def remove_space_before_punctuation(text):
|
|||
"""
|
||||
Removes space before punctuation.
|
||||
"""
|
||||
text = re.sub(r"\s+\*\*([.;,?!])\*\*", "\1", text)
|
||||
text = re.sub(r"(\s+)([.,;?!'])]", "\2", text)
|
||||
return text
|
||||
|
||||
|
||||
def only_alphanumerics(text):
|
||||
"""
|
||||
Detects if the text contains only alphanumerics.
|
||||
"""
|
||||
return re.search("[^a-zA-Z0-9]", text) is None
|
||||
|
||||
|
||||
def make_bold_part(token):
|
||||
"""
|
||||
Makes bold text.
|
||||
"""
|
||||
if len(token) <= 2:
|
||||
if (not only_alphanumerics(token)) or len(token) < 2:
|
||||
return token
|
||||
else:
|
||||
mid_token = round(len(token)/2)
|
||||
|
@ -46,6 +53,7 @@ def split_tokens(text, lang):
|
|||
new_paragraphs = []
|
||||
for paragraph in paragraphs:
|
||||
tokens = nltk.word_tokenize(paragraph, language=lang)
|
||||
tokens = [re.sub(r"\sn\'t", "n't", token) for token in tokens]
|
||||
tokens = [make_bold_part(token) for token in tokens]
|
||||
par = remove_space_before_punctuation(" ".join(tokens))
|
||||
new_paragraphs.append(par)
|
||||
|
@ -60,9 +68,10 @@ def write_app():
|
|||
st.title("Texte Rapide")
|
||||
lang = st.sidebar.selectbox(label="Langue", options=["french", "english"], index=0)
|
||||
st.sidebar.text(f"Langue: {lang}")
|
||||
text = st.text_area("Text", "Entre ton texte ici")
|
||||
bouton = st.button("Envoyer")
|
||||
if bouton:
|
||||
st.text("Entre ton texte ici:")
|
||||
text = st.text_area("Text", '', placeholder='entre ton texte ici')
|
||||
submit = st.button("Convertir")
|
||||
if submit:
|
||||
st.markdown(f"{split_tokens(text, lang)}")
|
||||
st.markdown("Développé avec ❤ par [François Pelletier](https://linktr.ee/jevalideca)")
|
||||
|
||||
|
|
Loading…
Reference in a new issue