From 49df8e850a6cfa77f0aaf3fc9684cd8684efeb33 Mon Sep 17 00:00:00 2001 From: oddluck <39967334+oddluck@users.noreply.github.com> Date: Wed, 4 Mar 2020 17:05:38 +0000 Subject: [PATCH] SpiffyTitles: file type/size, fake-useragent, badLinkText --- SpiffyTitles/README.md | 8 ++-- SpiffyTitles/config.py | 4 +- SpiffyTitles/plugin.py | 73 +++++++++++++++-------------------- SpiffyTitles/requirements.txt | 1 + 4 files changed, 38 insertions(+), 48 deletions(-) diff --git a/SpiffyTitles/README.md b/SpiffyTitles/README.md index bf98ff6..b6afd7a 100644 --- a/SpiffyTitles/README.md +++ b/SpiffyTitles/README.md @@ -285,12 +285,14 @@ improving performance. Default value: `60` `wallClockTimeoutInSeconds` - Timeout for total elapsed time when retrieving a title. If you set this value too high, the bot may time out. Default value: `8` (seconds). You must `!reload SpiffyTitles` for this setting to take effect. -`channelWhitelist` - a comma separated list of channels in which titles should be displayed. If `""`, +`channelWhitelist` - A comma separated list of channels in which titles should be displayed. If `""`, titles will be shown in all channels. Default value: `""` -`channelBlacklist` - a comma separated list of channels in which titles should never be displayed. If `""`, +`channelBlacklist` - A comma separated list of channels in which titles should never be displayed. If `""`, titles will be shown in all channels. Default value: `""` +`badLinkText` - The text to return when unable to retrieve a title from a URL. Default value: `Nice link idiot.` + ### About white/black lists - Channel names must be in lowercase - If `channelWhitelist` and `channelBlacklist` are empty, then titles will be displayed in every channel @@ -337,8 +339,6 @@ Ignore all links except youtube, imgur, and reddit !config supybot.plugins.SpiffyTitles.whitelistDomainPattern /(reddit\.com|youtube\.com|youtu\.be|imgur\.com)/ -`userAgents` - A comma separated list of strings of user agents randomly chosen when requesting. - `urlRegularExpression` - A regular expression used to match URLs. You shouldn't need to change this. `linkMessageIgnorePattern` - If a message matches this pattern, it will be ignored. This differs from `ignoredDomainPattern` in that it compares against the entire message rather than just the domain. diff --git a/SpiffyTitles/config.py b/SpiffyTitles/config.py index ea44937..e7411e2 100644 --- a/SpiffyTitles/config.py +++ b/SpiffyTitles/config.py @@ -69,8 +69,8 @@ conf.registerChannelValue(SpiffyTitles, 'useBold', registry.Boolean(False, _("""Use bold in titles"""))) # User agents -conf.registerGlobalValue(SpiffyTitles, 'userAgents', - registry.CommaSeparatedListOfStrings(["Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.60 Safari/537.36", "Mozilla/5.0 (Windows NT 6.3; rv:36.0) Gecko/20100101 Firefox/36.0", "Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; AS; rv:11.0) like Gecko"], _("""Reported user agent when fetching links"""))) +conf.registerGlobalValue(SpiffyTitles, 'badLinkText', + registry.String("Nice link idiot.", _("""Title to return for bad/unsnarfable links."""))) # Mime Types conf.registerGlobalValue(SpiffyTitles, 'mimeTypes', diff --git a/SpiffyTitles/plugin.py b/SpiffyTitles/plugin.py index 3cc5a7a..dbafc3a 100644 --- a/SpiffyTitles/plugin.py +++ b/SpiffyTitles/plugin.py @@ -48,6 +48,7 @@ import unicodedata import supybot.ircdb as ircdb import supybot.log as log import pendulum +from fake_useragent import UserAgent try: from supybot.i18n import PluginInternationalization @@ -117,7 +118,7 @@ class SpiffyTitles(callbacks.Plugin): self.handlers["www.twitch.tv"] = self.handler_twitch self.handlers["go.twitch.tv"] = self.handler_twitch self.handlers["clips.twitch.tv"] = self.handler_twitch - + def add_imdb_handlers(self): """ Enables meta info about IMDB links through the OMDB API @@ -158,11 +159,7 @@ class SpiffyTitles(callbacks.Plugin): fields = "id,title,owner.screenname,duration,views_total" api_url = "https://api.dailymotion.com/video/%s?fields=%s" % (video_id, fields) log.debug("SpiffyTitles: looking up dailymotion info: %s", api_url) - agent = self.get_user_agent() - headers = { - "User-Agent": agent - } - + headers = self.get_headers() request = requests.get(api_url, headers=headers) ok = request.status_code == requests.codes.ok @@ -213,11 +210,7 @@ class SpiffyTitles(callbacks.Plugin): if video_id is not None: api_url = "https://vimeo.com/api/v2/video/%s.json" % video_id log.debug("SpiffyTitles: looking up vimeo info: %s", api_url) - agent = self.get_user_agent() - headers = { - "User-Agent": agent - } - + headers = self.get_headers() request = requests.get(api_url, headers=headers) ok = request.status_code == requests.codes.ok @@ -279,11 +272,7 @@ class SpiffyTitles(callbacks.Plugin): video_id = video_id.split("?")[0] api_url = "http://coub.com/api/v2/coubs/%s" % video_id - agent = self.get_user_agent() - headers = { - "User-Agent": agent - } - + headers = self.get_headers() request = requests.get(api_url, headers=headers) ok = request.status_code == requests.codes.ok @@ -491,7 +480,7 @@ class SpiffyTitles(callbacks.Plugin): except Exception as e: pass - if title is not None and title: + if title: irc.reply(title) else: irc.reply(error_message + " {}".format(err)) @@ -663,10 +652,7 @@ class SpiffyTitles(callbacks.Plugin): } encoded_options = urlencode(options) api_url = "https://www.googleapis.com/youtube/v3/videos?%s" % (encoded_options) - agent = self.get_user_agent() - headers = { - "User-Agent": agent - } + headers = self.get_headers() log.debug("SpiffyTitles: requesting %s" % (api_url)) @@ -886,10 +872,7 @@ class SpiffyTitles(callbacks.Plugin): if not match: self.log.debug("SpiffyTitles: twitch - no title found.") return self.handler_default(url, channel) - agent = self.get_user_agent() - headers = { - "Client-ID": twitch_client_id - } + headers = self.get_headers() self.log.debug("SpiffyTitles: twitch - requesting %s" % (data_url)) request = requests.get(data_url, timeout=10, headers=headers) ok = request.status_code == requests.codes.ok @@ -1212,11 +1195,7 @@ class SpiffyTitles(callbacks.Plugin): api_params.update(title_param) param_string = "&".join("%s=%s" % (key, val) for (key, val) in api_params.items()) api_url = "https://%s/w/api.php?%s" % (info.netloc, param_string) - - agent = self.get_user_agent() - headers = { - "User-Agent": agent - } + headers = self.get_headers() extract = "" self.log.debug("SpiffyTitles: requesting %s" % (api_url)) @@ -1292,10 +1271,7 @@ class SpiffyTitles(callbacks.Plugin): self.log.debug("SpiffyTitles: no title found.") return self.handler_default(url, channel) - agent = self.get_user_agent() - headers = { - "User-Agent": agent - } + headers = self.get_headers() self.log.debug("SpiffyTitles: requesting %s" % (data_url)) @@ -1621,14 +1597,30 @@ class SpiffyTitles(callbacks.Plugin): else: log.debug("SpiffyTitles: unacceptable mime type %s for url %s" % (content_type, url)) + suffixes = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB'] + def humansize(nbytes): + i = 0 + while nbytes >= 1024 and i < len(suffixes)-1: + nbytes /= 1024. + i += 1 + f = ('%.2f' % nbytes).rstrip('0').rstrip('.') + return '%s %s' % (f, suffixes[i]) + size = request.headers.get("content-length") + if size: + size = humansize(int(size)) + text = "[{0}] ({1})".format(content_type, size) + else: + text = "[{0}]".format(content_type) + text = "