diff --git a/bot/urlinfo.py b/bot/urlinfo.py index bf540d0..43aa774 100644 --- a/bot/urlinfo.py +++ b/bot/urlinfo.py @@ -15,16 +15,32 @@ class URLInfo(Plugin): "^https?:\/\/f0ck\.me" ] + SIZE_LIMIT = 2 * 1024 ** 2 + @irc3.event(r'(?i)^:\S+ PRIVMSG (?P\S+) :.*(?Phttps?:\/\/\S+\.\S+).*') def url_parser(self, target: str, url: str): for regex in self.BLACKLIST: if re.match(regex, url): return + bytes_io = io.BytesIO() try: - response = requests.get(url, timeout=10) - except requests.exceptions.ConnectionError: + with requests.get(url, timeout=10, stream=True) as r: + r.raise_for_status() + size = 0 + for chunk in r.iter_content(chunk_size=1024 ** 2): + size += len(chunk) + if size >= self.SIZE_LIMIT: + return + bytes_io.write(chunk) + except (requests.exceptions.ConnectionError, requests.exceptions.HTTPError) as e: return - tree = etree.parse(io.StringIO(response.text), etree.HTMLParser()).getroot() + + mime_type = r.headers.get("content-type") + if mime_type is not None: + if mime_type.split(";")[0] != "text/html": + return + bytes_io.seek(0) + tree = etree.parse(bytes_io, etree.HTMLParser()).getroot() title = tree.xpath("/html/head/title") if len(title) > 0: self.bot.privmsg(target, '\x02[URLInfo]\x02 ' + title[0].text.strip())