2023-03-16 04:42:38 +00:00
|
|
|
|
|
|
|
# Copyright (C) 2023 François Pelletier - Je valide ça, service-conseil
|
|
|
|
#
|
|
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
# you may not use this file except in compliance with the License.
|
|
|
|
# You may obtain a copy of the License at
|
|
|
|
#
|
|
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
#
|
|
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
# See the License for the specific language governing permissions and
|
|
|
|
# limitations under the License.
|
|
|
|
|
2023-03-16 04:20:38 +00:00
|
|
|
import re
|
|
|
|
|
|
|
|
import streamlit as st
|
|
|
|
import nltk
|
|
|
|
|
|
|
|
|
|
|
|
def remove_space_before_punctuation(text):
|
|
|
|
"""
|
|
|
|
Removes space before punctuation.
|
|
|
|
"""
|
2023-04-17 23:03:52 +00:00
|
|
|
text = re.sub(r"(\s+)([.,;?!'])]", "\2", text)
|
2023-03-16 04:20:38 +00:00
|
|
|
return text
|
|
|
|
|
|
|
|
|
2023-04-17 23:03:52 +00:00
|
|
|
def only_alphanumerics(text):
|
|
|
|
"""
|
|
|
|
Detects if the text contains only alphanumerics.
|
|
|
|
"""
|
|
|
|
return re.search("[^a-zA-Z0-9]", text) is None
|
|
|
|
|
|
|
|
|
2023-04-17 22:19:21 +00:00
|
|
|
def make_bold_part(token):
|
2023-03-16 04:20:38 +00:00
|
|
|
"""
|
|
|
|
Makes bold text.
|
|
|
|
"""
|
2023-04-17 23:03:52 +00:00
|
|
|
if (not only_alphanumerics(token)) or len(token) < 2:
|
2023-03-16 04:20:38 +00:00
|
|
|
return token
|
|
|
|
else:
|
2023-04-17 22:19:21 +00:00
|
|
|
mid_token = round(len(token)/2)
|
|
|
|
return f"**{token[0:mid_token]}**{token[mid_token:]}"
|
2023-03-16 04:20:38 +00:00
|
|
|
|
|
|
|
|
2023-04-17 22:19:21 +00:00
|
|
|
def split_tokens(text, lang):
|
2023-03-16 04:20:38 +00:00
|
|
|
"""
|
|
|
|
Splits a text into tokens.
|
|
|
|
"""
|
|
|
|
paragraphs = text.split("\n\n")
|
|
|
|
new_paragraphs = []
|
|
|
|
for paragraph in paragraphs:
|
|
|
|
tokens = nltk.word_tokenize(paragraph, language=lang)
|
2023-04-17 23:03:52 +00:00
|
|
|
tokens = [re.sub(r"\sn\'t", "n't", token) for token in tokens]
|
2023-04-17 22:19:21 +00:00
|
|
|
tokens = [make_bold_part(token) for token in tokens]
|
2023-03-16 04:20:38 +00:00
|
|
|
par = remove_space_before_punctuation(" ".join(tokens))
|
|
|
|
new_paragraphs.append(par)
|
|
|
|
return "\n\n".join(new_paragraphs)
|
|
|
|
|
|
|
|
|
|
|
|
def write_app():
|
|
|
|
"""
|
|
|
|
Write a streamlit app with an input box, a numerical slider from 1 to 5 and an output box.
|
|
|
|
:return:
|
|
|
|
"""
|
|
|
|
st.title("Texte Rapide")
|
|
|
|
lang = st.sidebar.selectbox(label="Langue", options=["french", "english"], index=0)
|
|
|
|
st.sidebar.text(f"Langue: {lang}")
|
2023-04-17 23:03:52 +00:00
|
|
|
st.text("Entre ton texte ici:")
|
|
|
|
text = st.text_area("Text", '', placeholder='entre ton texte ici')
|
|
|
|
submit = st.button("Convertir")
|
|
|
|
if submit:
|
2023-04-17 22:19:21 +00:00
|
|
|
st.markdown(f"{split_tokens(text, lang)}")
|
2023-03-16 04:42:38 +00:00
|
|
|
st.markdown("Développé avec ❤ par [François Pelletier](https://linktr.ee/jevalideca)")
|
2023-03-16 04:20:38 +00:00
|
|
|
|
|
|
|
|
|
|
|
def main():
|
|
|
|
write_app()
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
main()
|