urlinfo: check content-type before downloading + handle timeouts
improve codestyle and formatting
This commit is contained in:
parent
e7b89e7fdb
commit
9c3cd2f816
|
@ -16,6 +16,7 @@ class URLInfo(Plugin):
|
||||||
"^https?:\/\/(?:(?:vid|img|thumb)\.)?pr0gramm\.com"
|
"^https?:\/\/(?:(?:vid|img|thumb)\.)?pr0gramm\.com"
|
||||||
]
|
]
|
||||||
|
|
||||||
|
# set the size limit to 2 MB so we don't fully download too large resources
|
||||||
SIZE_LIMIT = 2 * 1024 ** 2
|
SIZE_LIMIT = 2 * 1024 ** 2
|
||||||
|
|
||||||
@irc3.event(r'(?i)^:\S+ PRIVMSG (?P<target>\S+) :.*(?P<url>https?:\/\/\S+\.\S+).*')
|
@irc3.event(r'(?i)^:\S+ PRIVMSG (?P<target>\S+) :.*(?P<url>https?:\/\/\S+\.\S+).*')
|
||||||
|
@ -23,26 +24,27 @@ class URLInfo(Plugin):
|
||||||
for regex in self.BLACKLIST:
|
for regex in self.BLACKLIST:
|
||||||
if re.match(regex, url):
|
if re.match(regex, url):
|
||||||
return
|
return
|
||||||
|
|
||||||
bytes_io = io.BytesIO()
|
bytes_io = io.BytesIO()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
with requests.get(url, timeout=10, stream=True) as r:
|
with requests.get(url, timeout=10, stream=True) as r:
|
||||||
r.raise_for_status()
|
r.raise_for_status()
|
||||||
|
mime_type = r.headers.get("content-type")
|
||||||
|
if mime_type is not None and mime_type.split(";")[0] != "text/html":
|
||||||
|
return
|
||||||
size = 0
|
size = 0
|
||||||
for chunk in r.iter_content(chunk_size=1024 ** 2):
|
for chunk in r.iter_content(chunk_size=1024 ** 2):
|
||||||
size += len(chunk)
|
size += len(chunk)
|
||||||
if size >= self.SIZE_LIMIT:
|
if size >= self.SIZE_LIMIT:
|
||||||
return
|
return
|
||||||
bytes_io.write(chunk)
|
bytes_io.write(chunk)
|
||||||
except (requests.exceptions.ConnectionError, requests.exceptions.HTTPError) as e:
|
except (requests.exceptions.ConnectionError, requests.exceptions.HTTPError, requests.exceptions.ReadTimeout):
|
||||||
return
|
return
|
||||||
|
|
||||||
mime_type = r.headers.get("content-type")
|
|
||||||
if mime_type is not None:
|
|
||||||
if mime_type.split(";")[0] != "text/html":
|
|
||||||
return
|
|
||||||
bytes_io.seek(0)
|
bytes_io.seek(0)
|
||||||
tree = etree.parse(bytes_io, etree.HTMLParser()).getroot()
|
tree = etree.parse(bytes_io, etree.HTMLParser()).getroot()
|
||||||
title = tree.xpath("/html/head/title")
|
title_elements = tree.xpath("/html/head/title")
|
||||||
if len(title) > 0:
|
if len(title_elements) > 0:
|
||||||
self.bot.privmsg(target, '\x02[URLInfo]\x02 ' + title[0].text.strip())
|
self.bot.privmsg(target, '\x02[URLInfo]\x02 ' + title_elements[0].text.strip())
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user