split-reading-bold/main.py


#  Copyright (C) 2023 François Pelletier - Je valide ça, service-conseil
#
#  Licensed under the Apache License, Version 2.0 (the "License");
#  you may not use this file except in compliance with the License.
#  You may obtain a copy of the License at
#
#       http://www.apache.org/licenses/LICENSE-2.0
#
#  Unless required by applicable law or agreed to in writing, software
#  distributed under the License is distributed on an "AS IS" BASIS,
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#  See the License for the specific language governing permissions and
#  limitations under the License.

import re

import streamlit as st
import nltk


def remove_space_before_punctuation(text):
    """
    Removes space before punctuation.
    """
    text = re.sub(r"(\s+)([.,;?!'])]", "\2", text)
    return text


def only_alphanumerics(text):
    """
    Detects if the text contains only alphanumerics.
    """
    return re.search(r"[^\w\'\-]", text) is None


def make_bold_part(token):
    """
    Makes bold text.
    """
    if (not only_alphanumerics(token)) or len(token) < 2:
        return token
    else:
        mid_token = round(len(token)/2)
        return f"**{token[0:mid_token]}**{token[mid_token:]}"


def split_tokens(text, lang):
    """
    Splits a text into tokens.
    """
    paragraphs = text.split("\n\n")
    new_paragraphs = []
    for paragraph in paragraphs:
        tokens = nltk.word_tokenize(paragraph, language=lang)
        tokens = [re.sub(r"\sn\'t", "n't", token) for token in tokens]
        tokens = [make_bold_part(token) for token in tokens]
        par = remove_space_before_punctuation(" ".join(tokens))
        new_paragraphs.append(par)
    return "\n\n".join(new_paragraphs)


def write_app():
    """
    Write a streamlit app with an input box, a numerical slider from 1 to 5 and an output box.
    :return:
    """
    st.title("Lecture rapide 'bionique'")
    st.markdown("La lecture bionique consiste à convertir la moitié des lettres d'un mot en caractères gras. C'est "
                "supposé augmenter la vitesse de lecture d'un texte.")
    lang = st.sidebar.selectbox(label="Langue", options=["french", "english"], index=0)
    st.sidebar.text(f"Langue: {lang}")
    st.text("Entre ton texte ici:")
    text = st.text_area("Text", '', placeholder='entre ton texte ici')
    submit = st.button("Convertir")
    if submit:
        st.markdown(f"{split_tokens(text, lang)}")
    st.markdown("Développé avec ❤ par [François Pelletier](https://linktr.ee/jevalideca)")
    st.markdown("Si tu veux, toi aussi, apprendre à créer tes propres apps, [Y'app pas d'problème]("
                "https://jevalide.ca/yapp). Inscris-toi sans aucun engagement ! Lancement en mai 2023")


def main():
    write_app()


if __name__ == "__main__":
    main()