urlinfo: make .* in front of the URL non-greedy

This is done because URLs may contain one or more other URLs that match
the url pattern (e.g. https://web.archive.org links). Because .* is
greedy by default, this caused only the last matching URL to be
captured, instead of the full URL.
This commit is contained in:
jkhsjdhjs 2023-08-05 13:42:46 +00:00
parent 1149417b56
commit b1100db649

View File

@ -19,7 +19,7 @@ class URLInfo(Plugin):
# set the size limit to 2 MB so we don't fully download too large resources # set the size limit to 2 MB so we don't fully download too large resources
SIZE_LIMIT = 2 * 1024 ** 2 SIZE_LIMIT = 2 * 1024 ** 2
@irc3.event(r'(?i)^:\S+ PRIVMSG (?P<target>\S+) :.*(?P<url>https?:\/\/\S+\.\S+).*') @irc3.event(r'(?i)^:\S+ PRIVMSG (?P<target>\S+) :.*?(?P<url>https?:\/\/\S+\.\S+).*')
def url_parser(self, target: str, url: str): def url_parser(self, target: str, url: str):
for regex in self.BLACKLIST: for regex in self.BLACKLIST:
if re.match(regex, url): if re.match(regex, url):