split-reading-bold/main.py


#  Copyright (C) 2023 François Pelletier - Je valide ça, service-conseil
#
#  Licensed under the Apache License, Version 2.0 (the "License");
#  you may not use this file except in compliance with the License.
#  You may obtain a copy of the License at
#
#       http://www.apache.org/licenses/LICENSE-2.0
#
#  Unless required by applicable law or agreed to in writing, software
#  distributed under the License is distributed on an "AS IS" BASIS,
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#  See the License for the specific language governing permissions and
#  limitations under the License.

import re

import streamlit as st
import nltk


def remove_space_before_punctuation(text):
    """
    Removes space before punctuation.
    """
    text = re.sub(r"(\s+)([.,;?!'])]", "\2", text)
    return text


def only_alphanumerics(text):
    """
    Detects if the text contains only alphanumerics.
    """
    return re.search("[^a-zA-Z0-9]", text) is None


def make_bold_part(token):
    """
    Makes bold text.
    """
    if (not only_alphanumerics(token)) or len(token) < 2:
        return token
    else:
        mid_token = round(len(token)/2)
        return f"**{token[0:mid_token]}**{token[mid_token:]}"


def split_tokens(text, lang):
    """
    Splits a text into tokens.
    """
    paragraphs = text.split("\n\n")
    new_paragraphs = []
    for paragraph in paragraphs:
        tokens = nltk.word_tokenize(paragraph, language=lang)
        tokens = [re.sub(r"\sn\'t", "n't", token) for token in tokens]
        tokens = [make_bold_part(token) for token in tokens]
        par = remove_space_before_punctuation(" ".join(tokens))
        new_paragraphs.append(par)
    return "\n\n".join(new_paragraphs)


def write_app():
    """
    Write a streamlit app with an input box, a numerical slider from 1 to 5 and an output box.
    :return:
    """
    st.title("Texte Rapide")
    lang = st.sidebar.selectbox(label="Langue", options=["french", "english"], index=0)
    st.sidebar.text(f"Langue: {lang}")
    st.text("Entre ton texte ici:")
    text = st.text_area("Text", '', placeholder='entre ton texte ici')
    submit = st.button("Convertir")
    if submit:
        st.markdown(f"{split_tokens(text, lang)}")
    st.markdown("Développé avec ❤ par [François Pelletier](https://linktr.ee/jevalideca)")


def main():
    write_app()


if __name__ == "__main__":
    main()
Ajout copyright et mention 2023-03-16 04:42:38 +00:00
			`# Copyright (C) 2023 François Pelletier - Je valide ça, service-conseil`
			`#`
			`# Licensed under the Apache License, Version 2.0 (the "License");`
			`# you may not use this file except in compliance with the License.`
			`# You may obtain a copy of the License at`
			`#`
			`# http://www.apache.org/licenses/LICENSE-2.0`
			`#`
			`# Unless required by applicable law or agreed to in writing, software`
			`# distributed under the License is distributed on an "AS IS" BASIS,`
			`# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`# See the License for the specific language governing permissions and`
			`# limitations under the License.`

Version initiale 2023-03-16 04:20:38 +00:00			`import re`

			`import streamlit as st`
			`import nltk`


			`def remove_space_before_punctuation(text):`
			`"""`
			`Removes space before punctuation.`
			`"""`
correction demi-mot 2023-04-17 23:03:52 +00:00			`text = re.sub(r"(\s+)([.,;?!'])]", "\2", text)`
Version initiale 2023-03-16 04:20:38 +00:00			`return text`


correction demi-mot 2023-04-17 23:03:52 +00:00			`def only_alphanumerics(text):`
			`"""`
			`Detects if the text contains only alphanumerics.`
			`"""`
			`return re.search("[^a-zA-Z0-9]", text) is None`


correction demi-mot 2023-04-17 22:19:21 +00:00			`def make_bold_part(token):`
Version initiale 2023-03-16 04:20:38 +00:00			`"""`
			`Makes bold text.`
			`"""`
correction demi-mot 2023-04-17 23:03:52 +00:00			`if (not only_alphanumerics(token)) or len(token) < 2:`
Version initiale 2023-03-16 04:20:38 +00:00			`return token`
			`else:`
correction demi-mot 2023-04-17 22:19:21 +00:00			`mid_token = round(len(token)/2)`
			`return f"{token[0:mid_token]}{token[mid_token:]}"`
Version initiale 2023-03-16 04:20:38 +00:00

correction demi-mot 2023-04-17 22:19:21 +00:00			`def split_tokens(text, lang):`
Version initiale 2023-03-16 04:20:38 +00:00			`"""`
			`Splits a text into tokens.`
			`"""`
			`paragraphs = text.split("\n\n")`
			`new_paragraphs = []`
			`for paragraph in paragraphs:`
			`tokens = nltk.word_tokenize(paragraph, language=lang)`
correction demi-mot 2023-04-17 23:03:52 +00:00			`tokens = [re.sub(r"\sn\'t", "n't", token) for token in tokens]`
correction demi-mot 2023-04-17 22:19:21 +00:00			`tokens = [make_bold_part(token) for token in tokens]`
Version initiale 2023-03-16 04:20:38 +00:00			`par = remove_space_before_punctuation(" ".join(tokens))`
			`new_paragraphs.append(par)`
			`return "\n\n".join(new_paragraphs)`


			`def write_app():`
			`"""`
			`Write a streamlit app with an input box, a numerical slider from 1 to 5 and an output box.`
			`:return:`
			`"""`
			`st.title("Texte Rapide")`
			`lang = st.sidebar.selectbox(label="Langue", options=["french", "english"], index=0)`
			`st.sidebar.text(f"Langue: {lang}")`
correction demi-mot 2023-04-17 23:03:52 +00:00			`st.text("Entre ton texte ici:")`
			`text = st.text_area("Text", '', placeholder='entre ton texte ici')`
			`submit = st.button("Convertir")`
			`if submit:`
correction demi-mot 2023-04-17 22:19:21 +00:00			`st.markdown(f"{split_tokens(text, lang)}")`
Ajout copyright et mention 2023-03-16 04:42:38 +00:00			`st.markdown("Développé avec ❤ par [François Pelletier](https://linktr.ee/jevalideca)")`
Version initiale 2023-03-16 04:20:38 +00:00

			`def main():`
			`write_app()`


			`if __name__ == "__main__":`
			`main()`