From b1100db649dd4b5b0ed76ba0a6188bc5291d25f5 Mon Sep 17 00:00:00 2001 From: jkhsjdhjs Date: Sat, 5 Aug 2023 13:42:46 +0000 Subject: [PATCH] urlinfo: make .* in front of the URL non-greedy This is done because URLs may contain one or more other URLs that match the url pattern (e.g. https://web.archive.org links). Because .* is greedy by default, this caused only the last matching URL to be captured, instead of the full URL. --- bot/urlinfo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bot/urlinfo.py b/bot/urlinfo.py index 24b1661..f75349c 100644 --- a/bot/urlinfo.py +++ b/bot/urlinfo.py @@ -19,7 +19,7 @@ class URLInfo(Plugin): # set the size limit to 2 MB so we don't fully download too large resources SIZE_LIMIT = 2 * 1024 ** 2 - @irc3.event(r'(?i)^:\S+ PRIVMSG (?P\S+) :.*(?Phttps?:\/\/\S+\.\S+).*') + @irc3.event(r'(?i)^:\S+ PRIVMSG (?P\S+) :.*?(?Phttps?:\/\/\S+\.\S+).*') def url_parser(self, target: str, url: str): for regex in self.BLACKLIST: if re.match(regex, url):