urlinfo: limit download size + check content type
This commit is contained in:
parent
233cfc9089
commit
c4f66e5346
|
@ -15,16 +15,32 @@ class URLInfo(Plugin):
|
|||
"^https?:\/\/f0ck\.me"
|
||||
]
|
||||
|
||||
SIZE_LIMIT = 2 * 1024 ** 2
|
||||
|
||||
@irc3.event(r'(?i)^:\S+ PRIVMSG (?P<target>\S+) :.*(?P<url>https?:\/\/\S+\.\S+).*')
|
||||
def url_parser(self, target: str, url: str):
|
||||
for regex in self.BLACKLIST:
|
||||
if re.match(regex, url):
|
||||
return
|
||||
bytes_io = io.BytesIO()
|
||||
try:
|
||||
response = requests.get(url, timeout=10)
|
||||
except requests.exceptions.ConnectionError:
|
||||
with requests.get(url, timeout=10, stream=True) as r:
|
||||
r.raise_for_status()
|
||||
size = 0
|
||||
for chunk in r.iter_content(chunk_size=1024 ** 2):
|
||||
size += len(chunk)
|
||||
if size >= self.SIZE_LIMIT:
|
||||
return
|
||||
bytes_io.write(chunk)
|
||||
except (requests.exceptions.ConnectionError, requests.exceptions.HTTPError) as e:
|
||||
return
|
||||
tree = etree.parse(io.StringIO(response.text), etree.HTMLParser()).getroot()
|
||||
|
||||
mime_type = r.headers.get("content-type")
|
||||
if mime_type is not None:
|
||||
if mime_type.split(";")[0] != "text/html":
|
||||
return
|
||||
bytes_io.seek(0)
|
||||
tree = etree.parse(bytes_io, etree.HTMLParser()).getroot()
|
||||
title = tree.xpath("/html/head/title")
|
||||
if len(title) > 0:
|
||||
self.bot.privmsg(target, '\x02[URLInfo]\x02 ' + title[0].text.strip())
|
||||
|
|
Loading…
Reference in New Issue
Block a user