urlinfo: make .* in front of the URL non-greedy

This is done because URLs may contain one or more other URLs that match the url pattern (e.g. https://web.archive.org links). Because .* is greedy by default, this caused only the last matching URL to be captured, instead of the full URL.
2023-08-05 13:42:46 +00:00
parent 1149417b56
commit b1100db649
1 changed files with 1 additions and 1 deletions
--- a/bot/urlinfo.py
+++ b/bot/urlinfo.py
@@ -19,7 +19,7 @@ class URLInfo(Plugin):
    # set the size limit to 2 MB so we don't fully download too large resources
    SIZE_LIMIT = 2 * 1024 ** 2
-    @irc3.event(r'(?i)^:\S+ PRIVMSG (?P<target>\S+) :.*(?P<url>https?:\/\/\S+\.\S+).*')
+    @irc3.event(r'(?i)^:\S+ PRIVMSG (?P<target>\S+) :.*?(?P<url>https?:\/\/\S+\.\S+).*')
    def url_parser(self, target: str, url: str):
        for regex in self.BLACKLIST:
            if re.match(regex, url):