traitement des urls

This commit is contained in:
François Pelletier 2024-01-12 13:53:17 -05:00
parent 3317bb7de6
commit 9859ec4f63
3 changed files with 24 additions and 7 deletions

View file

@ -8,7 +8,7 @@ License:
import re
def replace_title_with_list(text: str) -> str:
def replace_title_with_list(text: str, depth=2) -> str:
r"""Replace all titles in the text with a numbered list.
The titles are defined by a hashtag symbol followed by one or more
@ -37,8 +37,8 @@ def replace_title_with_list(text: str) -> str:
matches = re.findall(pattern, text, re.MULTILINE)
for match in matches:
nb_hashtag = len(match)
if nb_hashtag > 1:
text = re.sub(match, ' ' * (nb_hashtag-1)+'-', text, count=1)
if nb_hashtag >= depth:
text = re.sub(match, ' ' * (nb_hashtag-depth)+'-', text, count=1)
text = re.sub('\n\n', '\n', text)
pattern_url = \
r"(?i)\b((?:https?://|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:'\".,<>?«»“”‘’]))"