split-reading-bold/main.py

89 lines
2.8 KiB
Python
Raw Normal View History

2023-03-16 04:42:38 +00:00
# Copyright (C) 2023 François Pelletier - Je valide ça, service-conseil
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
2023-03-16 04:20:38 +00:00
import re
import streamlit as st
import nltk
def remove_space_before_punctuation(text):
"""
Removes space before punctuation.
"""
2023-04-17 23:03:52 +00:00
text = re.sub(r"(\s+)([.,;?!'])]", "\2", text)
2023-03-16 04:20:38 +00:00
return text
2023-04-17 23:03:52 +00:00
def only_alphanumerics(text):
"""
Detects if the text contains only alphanumerics.
"""
2023-04-18 03:39:56 +00:00
return re.search(r"\W", text) is None
2023-04-17 23:03:52 +00:00
2023-04-17 22:19:21 +00:00
def make_bold_part(token):
2023-03-16 04:20:38 +00:00
"""
Makes bold text.
"""
2023-04-17 23:03:52 +00:00
if (not only_alphanumerics(token)) or len(token) < 2:
2023-03-16 04:20:38 +00:00
return token
else:
2023-04-17 22:19:21 +00:00
mid_token = round(len(token)/2)
return f"**{token[0:mid_token]}**{token[mid_token:]}"
2023-03-16 04:20:38 +00:00
2023-04-17 22:19:21 +00:00
def split_tokens(text, lang):
2023-03-16 04:20:38 +00:00
"""
Splits a text into tokens.
"""
paragraphs = text.split("\n\n")
new_paragraphs = []
for paragraph in paragraphs:
tokens = nltk.word_tokenize(paragraph, language=lang)
2023-04-17 23:03:52 +00:00
tokens = [re.sub(r"\sn\'t", "n't", token) for token in tokens]
2023-04-17 22:19:21 +00:00
tokens = [make_bold_part(token) for token in tokens]
2023-03-16 04:20:38 +00:00
par = remove_space_before_punctuation(" ".join(tokens))
new_paragraphs.append(par)
return "\n\n".join(new_paragraphs)
def write_app():
"""
Write a streamlit app with an input box, a numerical slider from 1 to 5 and an output box.
:return:
"""
2023-04-18 03:31:39 +00:00
st.title("Lecture rapide 'bionique'")
2023-04-18 03:36:55 +00:00
st.markdown("La lecture bionique consiste à convertir la moitié des lettres d'un mot en caractères gras. C'est "
"supposé augmenter la vitesse de lecture d'un texte.")
2023-03-16 04:20:38 +00:00
lang = st.sidebar.selectbox(label="Langue", options=["french", "english"], index=0)
st.sidebar.text(f"Langue: {lang}")
2023-04-17 23:03:52 +00:00
st.text("Entre ton texte ici:")
text = st.text_area("Text", '', placeholder='entre ton texte ici')
submit = st.button("Convertir")
if submit:
2023-04-17 22:19:21 +00:00
st.markdown(f"{split_tokens(text, lang)}")
2023-03-16 04:42:38 +00:00
st.markdown("Développé avec ❤ par [François Pelletier](https://linktr.ee/jevalideca)")
2023-04-18 03:36:55 +00:00
st.markdown("Si tu veux, toi aussi, apprendre à créer tes propres apps, [Y'app pas d'problème]("
"https://jevalide.ca/yapp). Inscris-toi sans aucun engagement ! Lancement en mai 2023")
2023-03-16 04:20:38 +00:00
def main():
write_app()
if __name__ == "__main__":
main()