2021-05-14 19:20:19 +00:00
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
import re
|
|
|
|
import irc3
|
|
|
|
import requests
|
|
|
|
import io
|
|
|
|
from lxml import etree
|
|
|
|
|
|
|
|
from . import Plugin
|
|
|
|
|
|
|
|
class URLInfo(Plugin):
|
|
|
|
BLACKLIST = [
|
2024-06-02 15:30:20 +00:00
|
|
|
r"^https?:\/\/(?:www\.)?youtube\.com",
|
|
|
|
r"^https?:\/\/youtu\.be",
|
|
|
|
r"^https?:\/\/w0bm\.com",
|
|
|
|
r"^https?:\/\/f0ck\.me",
|
|
|
|
r"^https?:\/\/(?:(?:vid|img|thumb)\.)?pr0gramm\.com"
|
2021-05-14 19:20:19 +00:00
|
|
|
]
|
|
|
|
|
2021-05-20 00:23:15 +00:00
|
|
|
# set the size limit to 2 MB so we don't fully download too large resources
|
2021-05-14 22:13:30 +00:00
|
|
|
SIZE_LIMIT = 2 * 1024 ** 2
|
|
|
|
|
2023-08-05 13:42:46 +00:00
|
|
|
@irc3.event(r'(?i)^:\S+ PRIVMSG (?P<target>\S+) :.*?(?P<url>https?:\/\/\S+\.\S+).*')
|
2021-05-14 19:20:19 +00:00
|
|
|
def url_parser(self, target: str, url: str):
|
|
|
|
for regex in self.BLACKLIST:
|
|
|
|
if re.match(regex, url):
|
|
|
|
return
|
2021-05-20 00:23:15 +00:00
|
|
|
|
2021-05-14 22:13:30 +00:00
|
|
|
bytes_io = io.BytesIO()
|
2021-05-20 00:23:15 +00:00
|
|
|
|
2021-05-14 19:20:19 +00:00
|
|
|
try:
|
2021-05-14 22:13:30 +00:00
|
|
|
with requests.get(url, timeout=10, stream=True) as r:
|
|
|
|
r.raise_for_status()
|
2021-05-20 00:23:15 +00:00
|
|
|
mime_type = r.headers.get("content-type")
|
|
|
|
if mime_type is not None and mime_type.split(";")[0] != "text/html":
|
|
|
|
return
|
2021-05-14 22:13:30 +00:00
|
|
|
size = 0
|
|
|
|
for chunk in r.iter_content(chunk_size=1024 ** 2):
|
|
|
|
size += len(chunk)
|
|
|
|
if size >= self.SIZE_LIMIT:
|
|
|
|
return
|
|
|
|
bytes_io.write(chunk)
|
2023-07-21 11:44:20 +00:00
|
|
|
except requests.exceptions.RequestException:
|
2021-05-14 19:20:19 +00:00
|
|
|
return
|
2021-05-14 22:13:30 +00:00
|
|
|
|
|
|
|
bytes_io.seek(0)
|
|
|
|
tree = etree.parse(bytes_io, etree.HTMLParser()).getroot()
|
2021-07-20 21:40:38 +00:00
|
|
|
if tree is None:
|
|
|
|
return
|
|
|
|
|
2021-05-20 00:23:15 +00:00
|
|
|
title_elements = tree.xpath("/html/head/title")
|
2021-07-20 21:40:38 +00:00
|
|
|
if len(title_elements) == 0:
|
|
|
|
return
|
|
|
|
|
|
|
|
title = title_elements[0].text
|
|
|
|
if title is None:
|
|
|
|
return
|
|
|
|
|
|
|
|
title = title.strip()
|
|
|
|
if title:
|
|
|
|
self.bot.privmsg(target, '\x02[URLInfo]\x02 ' + title)
|
2021-05-14 19:20:19 +00:00
|
|
|
|