urlinfo: make .* in front of the URL non-greedy
This is done because URLs may contain one or more other URLs that match the url pattern (e.g. https://web.archive.org links). Because .* is greedy by default, this caused only the last matching URL to be captured, instead of the full URL.
This commit is contained in:
parent
1149417b56
commit
b1100db649
|
@ -19,7 +19,7 @@ class URLInfo(Plugin):
|
||||||
# set the size limit to 2 MB so we don't fully download too large resources
|
# set the size limit to 2 MB so we don't fully download too large resources
|
||||||
SIZE_LIMIT = 2 * 1024 ** 2
|
SIZE_LIMIT = 2 * 1024 ** 2
|
||||||
|
|
||||||
@irc3.event(r'(?i)^:\S+ PRIVMSG (?P<target>\S+) :.*(?P<url>https?:\/\/\S+\.\S+).*')
|
@irc3.event(r'(?i)^:\S+ PRIVMSG (?P<target>\S+) :.*?(?P<url>https?:\/\/\S+\.\S+).*')
|
||||||
def url_parser(self, target: str, url: str):
|
def url_parser(self, target: str, url: str):
|
||||||
for regex in self.BLACKLIST:
|
for regex in self.BLACKLIST:
|
||||||
if re.match(regex, url):
|
if re.match(regex, url):
|
||||||
|
|
Loading…
Reference in New Issue
Block a user