Update plugin.py

This commit is contained in:
Gordon Shumway 2019-03-08 23:36:32 -05:00 committed by GitHub
parent 85bd8e21a2
commit 8dcbb7a195
1 changed files with 124 additions and 132 deletions

256
plugin.py
View File

@ -10,9 +10,16 @@ from supybot.commands import *
import supybot.ircmsgs as ircmsgs
import supybot.ircutils as ircutils
import supybot.callbacks as callbacks
import supybot.utils as utils
import humanize
import re
import requests
import sys
import urllib
try:
from urllib import urlencode
from urlparse import urlparse, parse_qsl
except ImportError:
from urllib.parse import urlencode, urlparse, parse_qsl
from bs4 import BeautifulSoup
import random
import json
@ -26,12 +33,6 @@ import supybot.ircdb as ircdb
import supybot.log as log
import pytz
if sys.version_info[0] >= 3:
from urllib.parse import urlencode, urlparse, parse_qsl
else:
from urllib import urlencode
from urlparse import urlparse, parse_qsl
try:
from supybot.i18n import PluginInternationalization
_ = PluginInternationalization("SpiffyTitles")
@ -45,7 +46,7 @@ class SpiffyTitles(callbacks.Plugin):
"""Displays link titles when posted in a channel"""
threaded = True
callBefore = ["Web"]
link_cache = []
link_cache = {}
handlers = {}
wall_clock_timeout = 8
max_request_retries = 3
@ -75,8 +76,6 @@ class SpiffyTitles(callbacks.Plugin):
def add_dailymotion_handlers(self):
self.handlers["www.dailymotion.com"] = self.handler_dailymotion
self.handlers["dailymotion.com"] = self.handler_dailymotion
self.handlers["dai.ly"] = self.handler_dailymotion
def add_vimeo_handlers(self):
self.handlers["vimeo.com"] = self.handler_vimeo
@ -102,12 +101,8 @@ class SpiffyTitles(callbacks.Plugin):
video_id = None
""" Get video ID """
if dailymotion_handler_enabled:
if "/video/" in info.path:
video_id = info.path.lstrip("/video/").split("_")[0]
if info.netloc == "dai.ly":
video_id = info.path.lstrip("/")
if dailymotion_handler_enabled and "/video/" in info.path:
video_id = info.path.lstrip("/video/").split("_")[0]
if video_id is not None:
fields = "id,title,owner.screenname,duration,views_total"
@ -127,8 +122,9 @@ class SpiffyTitles(callbacks.Plugin):
if response is not None and "title" in response:
video = response
dailymotion_template = self.get_template(
"dailymotionVideoTitleTemplate", channel)
dailymotion_template = \
Template(self.registryValue("dailymotionVideoTitleTemplate",
channel=channel))
video["views_total"] = "{:,}".format(int(video["views_total"]))
video["duration"] = self.get_duration_from_seconds(video["duration"])
video["ownerscreenname"] = video["owner.screenname"]
@ -181,7 +177,9 @@ class SpiffyTitles(callbacks.Plugin):
if response is not None and "title" in response[0]:
video = response[0]
vimeo_template = self.get_template("vimeoTitleTemplate", channel)
vimeo_template = Template(self.registryValue("vimeoTitleTemplate",
channel=channel))
"""
Some videos do not have this information available
"""
@ -245,7 +243,8 @@ class SpiffyTitles(callbacks.Plugin):
if response:
video = response
coub_template = self.get_template("coubTemplate", channel)
coub_template = Template(self.registryValue("coubTemplate"))
video["likes_count"] = "{:,}".format(int(video["likes_count"]))
video["recoubs_count"] = "{:,}".format(int(video["recoubs_count"]))
video["views_count"] = "{:,}".format(int(video["views_count"]))
@ -331,10 +330,12 @@ class SpiffyTitles(callbacks.Plugin):
"""
if is_ctcp and ignore_actions:
return
url = self.get_url_from_message(message)
if is_channel:
channel_is_allowed = self.is_channel_allowed(channel)
url = self.get_url_from_message(message)
#url = self.get_url_from_message(message)
ignore_match = self.message_matches_ignore_pattern(message)
if ignore_match:
@ -363,7 +364,7 @@ class SpiffyTitles(callbacks.Plugin):
url)
return
title = self.get_title_by_url(url, channel)
title = self.get_title_by_url(url, channel, origin_nick)
if title is not None and title:
ignore_match = self.title_matches_ignore_pattern(title, channel)
@ -371,7 +372,8 @@ class SpiffyTitles(callbacks.Plugin):
if ignore_match:
return
else:
irc.queueMsg(ircmsgs.privmsg(channel, title))
if not is_ignored:
irc.sendMsg(ircmsgs.privmsg(channel, title))
else:
if self.default_handler_enabled:
log.debug("SpiffyTitles: could not get a title for %s" % (url))
@ -379,19 +381,20 @@ class SpiffyTitles(callbacks.Plugin):
log.debug("SpiffyTitles: could not get a title for %s but default \
handler is disabled" % (url))
def get_title_by_url(self, url, channel):
def get_title_by_url(self, url, channel, origin_nick=None):
"""
Retrieves the title of a website based on the URL provided
"""
info = urlparse(url)
domain = str(info.netloc)
domain = info.netloc
title = None
"""
Check if we have this link cached according to the cache lifetime. If so, serve
link from the cache instead of calling handlers.
"""
cached_link = self.get_link_from_cache(url)
cached_link = self.get_link_from_cache(url, channel)
if cached_link is not None:
title = cached_link["title"]
@ -414,59 +417,44 @@ class SpiffyTitles(callbacks.Plugin):
# Update link cache
log.debug("SpiffyTitles: caching %s" % (url))
now = datetime.datetime.now()
self.link_cache.append({
if channel not in self.link_cache:
self.link_cache[channel] = []
self.link_cache[channel].append({
"url": url,
"timestamp": now,
"title": title
"title": title,
"from": origin_nick,
"channel": channel
})
return title
def t(self, irc, msg, args, query):
"""
Retrieves title for a URL on demand
"""
message = msg.args[1]
channel = msg.args[0]
is_channel = irc.isChannel(channel)
if not is_channel:
channel = msg.nick
url = self.get_url_from_message(query)
url = self.get_url_from_message(message)
title = None
error_message = self.registryValue("onDemandTitleError", channel=channel)
handled = False
err = ''
if url:
title = self.get_title_by_url(url, channel)
if title is not None and title:
if is_channel:
"""
This prevents the title being sent twice, when t()
is used in a channel, if it is handled by enabled
handlers already, t() will pass silently.
If t is requested in a /msg, the title acquired
using the enabled handlers will be replied back.
"""
handled = True
else:
"""
If the handlers are disabled, use the default handler
and attempt to get the title anyway.
"""
title = self.handler_default(url, channel, t_override=True)
if not handled:
if title is not None and title:
irc.queueMsg(ircmsgs.privmsg(channel, title))
else:
"""
Unable to find a title in a valid URL
"""
irc.queueMsg(ircmsgs.privmsg(channel, error_message))
else:
try:
if url:
title = self.get_title_by_url(query, channel)
except Exception as e:
pass
if title is not None and title:
irc.sendMsg(ircmsgs.privmsg(channel, title))
else:
irc.sendMsg(ircmsgs.privmsg(channel, error_message + " {}".format(err)))
t = wrap(t, ['text'])
def get_link_from_cache(self, url):
def get_link_from_cache(self, url, channel):
"""
Looks for a URL in the link cache and returns info about if it's not stale
according to the configured cache lifetime, or None.
@ -488,10 +476,11 @@ class SpiffyTitles(callbacks.Plugin):
stale = False
seconds = 0
for link in self.link_cache:
if link["url"] == url:
cached_link = link
break
if channel in self.link_cache:
for link in self.link_cache[channel]:
if link["url"] == url:
cached_link = link
break
# Found link, check timestamp
if cached_link is not None:
@ -632,7 +621,7 @@ class SpiffyTitles(callbacks.Plugin):
log.debug("SpiffyTitles: calling Youtube handler for %s" % (url))
video_id = self.get_video_id_from_url(url, domain)
yt_template = self.get_template("youtubeTitleTemplate", channel)
yt_template = Template(self.registryValue("youtubeTitleTemplate", channel=channel))
title = ""
if video_id:
@ -732,6 +721,7 @@ class SpiffyTitles(callbacks.Plugin):
return title
else:
log.debug("SpiffyTitles: falling back to default handler")
return self.handler_default(url, channel)
def get_duration_from_seconds(self, duration_seconds):
@ -743,6 +733,7 @@ class SpiffyTitles(callbacks.Plugin):
""" Only include hour if the video is at least 1 hour long """
if h > 0:
duration = "%02d:%s" % (h, duration)
return duration
def get_youtube_logo(self):
@ -752,6 +743,7 @@ class SpiffyTitles(callbacks.Plugin):
]
yt_logo = "".join(colored_letters)
return yt_logo
def get_total_seconds_from_duration(self, input):
@ -798,15 +790,14 @@ class SpiffyTitles(callbacks.Plugin):
else:
return ""
def handler_default(self, url, channel, t_override=False):
def handler_default(self, url, channel):
"""
Default handler for websites
"""
default_handler_enabled = self.registryValue("defaultHandlerEnabled", channel=channel)
if default_handler_enabled or t_override:
if default_handler_enabled:
log.debug("SpiffyTitles: calling default handler for %s" % (url))
default_template = self.get_template("defaultTitleTemplate", channel)
default_template = Template(self.registryValue("defaultTitleTemplate", channel=channel))
(html, is_redirect) = self.get_source_by_url(url)
if html is not None and html:
@ -840,7 +831,7 @@ class SpiffyTitles(callbacks.Plugin):
# We can only accommodate a specific format of URL here
if "/title/" in url:
imdb_id = url.split("/title/")[1].rstrip("/")
omdb_url = "http://www.omdbapi.com/?i=%s&plot=short&r=json&tomatoes=true" % (imdb_id)
omdb_url = "http://www.omdbapi.com/?i=%s&plot=short&r=json&tomatoes=true&apikey=7410c07d" % (imdb_id)
try:
request = requests.get(omdb_url, timeout=10, headers=headers)
@ -848,7 +839,7 @@ class SpiffyTitles(callbacks.Plugin):
if request.status_code == requests.codes.ok:
response = json.loads(request.text)
result = None
imdb_template = self.get_template("imdbTemplate", channel)
imdb_template = Template(self.registryValue("imdbTemplate"))
not_found = "Error" in response
unknown_error = response["Response"] != "True"
@ -870,6 +861,7 @@ class SpiffyTitles(callbacks.Plugin):
return result
else:
log.debug("SpiffyTitles: IMDB handler failed. calling default handler")
return self.handler_default(url, channel)
def handler_wikipedia(self, url, domain, channel):
@ -951,10 +943,12 @@ class SpiffyTitles(callbacks.Plugin):
max_chars = self.registryValue("wikipedia.maxChars", channel=channel)
if len(extract) > max_chars:
extract = extract[:max_chars - 3].rsplit(' ', 1)[0].rstrip(',.') + '...'
wikipedia_template = self.get_template("wikipedia.extractTemplate", channel)
extract_template = self.registryValue("wikipedia.extractTemplate", channel=channel)
wikipedia_template = Template(extract_template)
return wikipedia_template.render({"extract": extract})
else:
self.log.debug("SpiffyTitles: falling back to default handler")
return self.handler_default(url, channel)
def handler_reddit(self, url, domain, channel):
@ -974,7 +968,7 @@ class SpiffyTitles(callbacks.Plugin):
},
"comment": {
"pattern":
r"^/r/(?P<subreddit>[^/]+)/comments/(?P<thread>[^/]+)/[^/]+/(?P<comment>\w+)$",
r"^/r/(?P<subreddit>[^/]+)/comments/(?P<thread>[^/]+)/[^/]+/(?P<comment>\w+/?)$",
"url": "https://www.reddit.com/r/{subreddit}/comments/{thread}/x/{comment}.json"
},
"user": {
@ -1041,7 +1035,7 @@ class SpiffyTitles(callbacks.Plugin):
else:
age = '{}d'.format(age_days % 365)
if age_days > 365:
age = '{}y, '.format(age_days / 365) + age
age = '{}y, '.format(age_days // 365) + age
age = age + " ago"
if link_type == "thread":
link_type = "linkThread"
@ -1051,9 +1045,9 @@ class SpiffyTitles(callbacks.Plugin):
extract = data.get('selftext', '')
if link_type == "comment":
extract = data.get('body', '')
reddit_template = self.get_template(''.join(["reddit.",
link_type,
"Template"]), channel)
link_type_template = self.registryValue("reddit." + link_type + "Template",
channel=channel)
reddit_template = Template(link_type_template)
template_vars = {
"id": data.get('id', ''),
"user": data.get('name', ''),
@ -1092,6 +1086,7 @@ class SpiffyTitles(callbacks.Plugin):
Images, galleries, and albums all share their format in their identifier.
"""
match = re.match(r"[a-z0-9]+", input, re.IGNORECASE)
return match is not None
def handler_imgur(self, url, info, channel):
@ -1109,6 +1104,7 @@ class SpiffyTitles(callbacks.Plugin):
result = self.handler_imgur_album(url, info, channel)
else:
result = self.handler_default(url, channel)
return result
def handler_imgur_album(self, url, info, channel):
@ -1135,7 +1131,8 @@ class SpiffyTitles(callbacks.Plugin):
album = self.imgur_client.get_album(album_id)
if album:
imgur_album_template = self.get_template("imgurAlbumTemplate", channel)
album_template = self.registryValue("imgurAlbumTemplate", channel=channel)
imgur_album_template = Template(album_template)
compiled_template = imgur_album_template.render({
"title": album.title,
"section": album.section,
@ -1188,9 +1185,10 @@ class SpiffyTitles(callbacks.Plugin):
image = self.imgur_client.get_image(image_id)
if image:
imgur_image_template = self.get_template("imgurTemplate", channel)
channel_template = self.registryValue("imgurTemplate", channel=channel)
imgur_template = Template(channel_template)
readable_file_size = self.get_readable_file_size(image.size)
compiled_template = imgur_image_template.render({
compiled_template = imgur_template.render({
"title": image.title,
"type": image.type,
"nsfw": image.nsfw,
@ -1232,22 +1230,26 @@ class SpiffyTitles(callbacks.Plugin):
"""
use_bold = self.registryValue("useBold", channel=channel)
IP_pattern = r"((25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)([ (\[]?(\.|dot)[ )\]]?(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)){3})"
# Replace anywhere in string
title = title.replace("\n", " ")
title = title.replace("\t", " ")
title = re.sub(" +", " ", title)
title = re.sub(IP_pattern, "206.18.125.29", title)
if use_bold:
title = ircutils.bold(title)
title = title.strip()
return title
def get_title_from_html(self, html):
"""
Retrieves value of <title> tag from HTML
"""
soup = BeautifulSoup(html, "html5lib")
soup = BeautifulSoup(html, "lxml")
if soup is not None:
"""
@ -1263,18 +1265,18 @@ class SpiffyTitles(callbacks.Plugin):
if len(title):
return title
timeout_decorator.timeout(wall_clock_timeout)
def get_source_by_url(self, url, retries=1):
"""
Get the HTML of a website based on a URL
"""
max_retries = self.registryValue("maxRetries")
verify_ssl_certs = self.registryValue("verifySSL")
if retries is None:
retries = 1
if retries >= max_retries:
log.debug("SpiffyTitles: hit maximum retries for %s" % url)
return (None, False)
log.debug("SpiffyTitles: attempt #%s for %s" % (retries, url))
@ -1284,41 +1286,42 @@ class SpiffyTitles(callbacks.Plugin):
log.debug("SpiffyTitles: requesting %s" % (url))
request = requests.get(url, headers=headers, timeout=15, allow_redirects=True, verify=verify_ssl_certs)
with requests.get(url, headers=headers, timeout=10,
allow_redirects=True, stream=True) as request:
is_redirect = False
if request.history:
# check the top two domain levels
link_domain = self.get_base_domain(request.history[0].url)
real_domain = self.get_base_domain(request.url)
if link_domain != real_domain:
is_redirect = True
is_redirect = False
if request.history:
# check the top two domain levels
link_domain = self.get_base_domain(request.history[0].url)
real_domain = self.get_base_domain(request.url)
if link_domain != real_domain:
is_redirect = True
for redir in request.history:
log.debug("SpiffyTitles: Redirect %s from %s" % (redir.status_code, redir.url))
log.debug("SpiffyTitles: Final url %s" % (request.url))
for redir in request.history:
log.debug("SpiffyTitles: Redirect %s from %s" % (redir.status_code, redir.url))
log.debug("SpiffyTitles: Final url %s" % (request.url))
if request.status_code == requests.codes.ok:
# Check the content type which comes in the format: "text/html; charset=UTF-8"
content_type = request.headers.get("content-type").split(";")[0].strip()
acceptable_types = self.registryValue("mimeTypes")
if request.status_code == requests.codes.ok:
# Check the content type which comes in the format: "text/html; charset=UTF-8"
content_type = request.headers.get("content-type").split(";")[0].strip()
acceptable_types = self.registryValue("mimeTypes")
log.debug("SpiffyTitles: content type %s" % (content_type))
log.debug("SpiffyTitles: content type %s" % (content_type))
if content_type in acceptable_types:
text = request.content
if content_type in acceptable_types:
text = request.content
if text:
return (text, is_redirect)
else:
log.debug("SpiffyTitles: empty content from %s" % (url))
if text:
return (text, is_redirect)
else:
log.debug("SpiffyTitles: empty content from %s" % (url))
log.debug("SpiffyTitles: unacceptable mime type %s for url %s" %
(content_type, url))
else:
log.debug("SpiffyTitles: unacceptable mime type %s for url %s" %
(content_type, url))
else:
log.error("SpiffyTitles HTTP response code %s - %s" % (request.status_code,
request.content))
log.error("SpiffyTitles HTTP response code %s" % (request.status_code,))
#request.content))
return ('<html><head><title>nice link idiot</title></head><body></body></html>', is_redirect)
except timeout_decorator.TimeoutError:
log.error("SpiffyTitles: wall timeout!")
@ -1340,6 +1343,7 @@ class SpiffyTitles(callbacks.Plugin):
log.error("SpiffyTitles HTTPError: %s" % (str(e)))
except requests.exceptions.InvalidURL as e:
log.error("SpiffyTitles InvalidURL: %s" % (str(e)))
return (None, False)
def get_base_domain(self, url):
@ -1356,6 +1360,7 @@ class SpiffyTitles(callbacks.Plugin):
"User-Agent": agent,
"Accept-Language": ";".join((self.accept_language, "q=1.0"))
}
return headers
def get_user_agent(self):
@ -1363,6 +1368,7 @@ class SpiffyTitles(callbacks.Plugin):
Returns a random user agent from the ones available
"""
agents = self.registryValue("userAgents")
return random.choice(agents)
def message_matches_ignore_pattern(self, input):
@ -1375,6 +1381,7 @@ class SpiffyTitles(callbacks.Plugin):
if pattern:
match = re.search(pattern, input)
return match
def title_matches_ignore_pattern(self, input, channel):
@ -1391,6 +1398,7 @@ class SpiffyTitles(callbacks.Plugin):
if match:
log.debug("SpiffyTitles: title %s matches ignoredTitlePattern for %s" %
(input, channel))
return match
def get_url_from_message(self, input):
@ -1402,12 +1410,8 @@ class SpiffyTitles(callbacks.Plugin):
if match:
raw_url = match.group(0).strip()
if sys.version_info[0] >= 3:
url = self.remove_control_characters(
unicodedata.normalize('NFC', str(raw_url)))
else:
url = self.remove_control_characters(
unicodedata.normalize('NFC', unicode(raw_url)))
url = self.remove_control_characters(str(raw_url))
return url
def remove_control_characters(self, s):
@ -1421,23 +1425,11 @@ class SpiffyTitles(callbacks.Plugin):
has_cap = ircdb.checkCapability(mask, cap, ignoreDefaultAllow=True)
if has_cap:
log.debug("SpiffyTitles: %s has required capability '%s'" %
(mask, required_capability))
log.debug("SpiffyTitles: %s has required capability '%s'" % (mask, required_capability))
else:
log.debug("SpiffyTitles: %s does NOT have required capability '%s'" %
(mask, required_capability))
return has_cap
def get_template(self, handler_template, channel):
"""
Returns the requested template object.
"""
if sys.version_info[0] >= 3:
template = Template(self.registryValue(handler_template,
channel=channel))
else:
template = Template(self.registryValue(handler_template,
channel=channel).decode("utf-8"))
return template
Class = SpiffyTitles