urlinfo: check content-type before downloading + handle timeouts
improve codestyle and formatting
This commit is contained in:
		| @@ -16,6 +16,7 @@ class URLInfo(Plugin): | |||||||
|         "^https?:\/\/(?:(?:vid|img|thumb)\.)?pr0gramm\.com" |         "^https?:\/\/(?:(?:vid|img|thumb)\.)?pr0gramm\.com" | ||||||
|     ] |     ] | ||||||
|  |  | ||||||
|  |     # set the size limit to 2 MB so we don't fully download too large resources | ||||||
|     SIZE_LIMIT = 2 * 1024 ** 2 |     SIZE_LIMIT = 2 * 1024 ** 2 | ||||||
|  |  | ||||||
|     @irc3.event(r'(?i)^:\S+ PRIVMSG (?P<target>\S+) :.*(?P<url>https?:\/\/\S+\.\S+).*') |     @irc3.event(r'(?i)^:\S+ PRIVMSG (?P<target>\S+) :.*(?P<url>https?:\/\/\S+\.\S+).*') | ||||||
| @@ -23,26 +24,27 @@ class URLInfo(Plugin): | |||||||
|         for regex in self.BLACKLIST: |         for regex in self.BLACKLIST: | ||||||
|             if re.match(regex, url): |             if re.match(regex, url): | ||||||
|                 return |                 return | ||||||
|  |  | ||||||
|         bytes_io = io.BytesIO() |         bytes_io = io.BytesIO() | ||||||
|  |  | ||||||
|         try: |         try: | ||||||
|             with requests.get(url, timeout=10, stream=True) as r: |             with requests.get(url, timeout=10, stream=True) as r: | ||||||
|                 r.raise_for_status() |                 r.raise_for_status() | ||||||
|  |                 mime_type = r.headers.get("content-type") | ||||||
|  |                 if mime_type is not None and mime_type.split(";")[0] != "text/html": | ||||||
|  |                     return | ||||||
|                 size = 0 |                 size = 0 | ||||||
|                 for chunk in r.iter_content(chunk_size=1024 ** 2): |                 for chunk in r.iter_content(chunk_size=1024 ** 2): | ||||||
|                     size += len(chunk) |                     size += len(chunk) | ||||||
|                     if size >= self.SIZE_LIMIT: |                     if size >= self.SIZE_LIMIT: | ||||||
|                         return |                         return | ||||||
|                     bytes_io.write(chunk) |                     bytes_io.write(chunk) | ||||||
|         except (requests.exceptions.ConnectionError, requests.exceptions.HTTPError) as e: |         except (requests.exceptions.ConnectionError, requests.exceptions.HTTPError, requests.exceptions.ReadTimeout): | ||||||
|             return |             return | ||||||
|  |  | ||||||
|         mime_type = r.headers.get("content-type") |  | ||||||
|         if mime_type is not None: |  | ||||||
|             if mime_type.split(";")[0] != "text/html": |  | ||||||
|                 return |  | ||||||
|         bytes_io.seek(0) |         bytes_io.seek(0) | ||||||
|         tree = etree.parse(bytes_io, etree.HTMLParser()).getroot() |         tree = etree.parse(bytes_io, etree.HTMLParser()).getroot() | ||||||
|         title = tree.xpath("/html/head/title") |         title_elements = tree.xpath("/html/head/title") | ||||||
|         if len(title) > 0: |         if len(title_elements) > 0: | ||||||
|             self.bot.privmsg(target, '\x02[URLInfo]\x02 ' + title[0].text.strip()) |             self.bot.privmsg(target, '\x02[URLInfo]\x02 ' + title_elements[0].text.strip()) | ||||||
|  |  | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user