SpiffyTitles: Implement Wikipedia extracts
This commit is contained in:
parent
80ca56d870
commit
05b7ea1a46
24
README.md
24
README.md
|
|
@ -7,6 +7,7 @@ The ONLY gluten-free plugin for displaying link titles.
|
|||
- Configurable template so you can decide how titles are displayed and what they say
|
||||
- Additional information about [Youtube](https://youtube.com) videos
|
||||
- Additional information about [imgur](https://imgur.com) links
|
||||
- Article extracts from [Wikipedia](https://en.wikipedia.org) links
|
||||
- Rate limiting to mitigate abuse
|
||||
- Configurable white/black list to control where titles are disabled
|
||||
- Configurable list of user agents
|
||||
|
|
@ -95,7 +96,7 @@ Example output:
|
|||
|
||||
^ Snoop Dogg - Pump Pump feat. Lil Malik uploaded by GeorgeRDR3218 @ 00:45:: Duration: 04:41 :: 203,218 views :: 933 likes :: 40 dislikes :: 0 favorites :: 112 comments
|
||||
|
||||
### Available variable for the Youtube template ###
|
||||
### Available variables for the Youtube template ###
|
||||
|
||||
Variable | Description
|
||||
---------------|------------
|
||||
|
|
@ -184,6 +185,27 @@ Default value: `^ [{{ownerscreenname}}] {{title}} :: Duration: {{duration}} :: {
|
|||
|
||||
`dailymotionHandlerEnabled` - Whether to enable additional information about dailymotion videos.
|
||||
|
||||
### wikipedia handler
|
||||
|
||||
`wikipedia.enabled` - Whether to fetch extracts for Wikipedia articles.
|
||||
|
||||
`wikipedia.extractTemplate` - Wikipedia template.
|
||||
|
||||
Default value: "^ {{extract}}"
|
||||
|
||||
`wikipedia.maxChars` - Extract will be cut to this length (including '...').
|
||||
|
||||
Default value: 240
|
||||
|
||||
`wikipedia.removeParentheses` - Whether to remove parenthesized text from output.
|
||||
|
||||
`wikipedia.ignoreSectionLinks` - Whether to ignore links to specific article sections.
|
||||
|
||||
`wikipedia.apiParams` - Add or override API query parameters with a space-separated list of key=value pairs.
|
||||
|
||||
`wikipedia.titleParam` - The query parameter that will hold the page title from the URL.
|
||||
|
||||
|
||||
## Other options
|
||||
|
||||
`useBold` - Whether to bold the title. Default value: `False`
|
||||
|
|
|
|||
26
config.py
26
config.py
|
|
@ -152,3 +152,29 @@ conf.registerChannelValue(SpiffyTitles, 'requireCapability',
|
|||
|
||||
conf.registerChannelValue(SpiffyTitles, 'ignoredTitlePattern',
|
||||
registry.Regexp("", _("""Titles matching this pattern will be ignored.""")))
|
||||
|
||||
|
||||
conf.registerGroup(SpiffyTitles, 'wikipedia')
|
||||
|
||||
conf.registerChannelValue(SpiffyTitles.wikipedia, 'enabled',
|
||||
registry.Boolean(True, _("""Whether to fetch extracts for Wikipedia articles.""")))
|
||||
|
||||
conf.registerChannelValue(SpiffyTitles.wikipedia, 'apiParams',
|
||||
registry.SpaceSeparatedListOfStrings([], _("""Add or override API query parameters with a space-separated list of key=value pairs.""")))
|
||||
|
||||
conf.registerChannelValue(SpiffyTitles.wikipedia, 'titleParam',
|
||||
registry.String("titles", _("""The query parameter that will hold the page title from the URL.""")))
|
||||
|
||||
# Ideally, links to specific article sections would produce the relevant output for that section. This is not currently implemented.
|
||||
conf.registerChannelValue(SpiffyTitles.wikipedia, 'ignoreSectionLinks',
|
||||
registry.Boolean(True, _("""Ignore links to specific article sections.""")))
|
||||
|
||||
conf.registerChannelValue(SpiffyTitles.wikipedia, 'maxChars',
|
||||
registry.Integer(240, _("""Extract will be cut to this length (including '...').""")))
|
||||
|
||||
# Remove parenthesized text from output.
|
||||
conf.registerChannelValue(SpiffyTitles.wikipedia, 'removeParentheses',
|
||||
registry.Boolean(True, _("""Remove parenthesized text from output.""")))
|
||||
|
||||
conf.registerChannelValue(SpiffyTitles.wikipedia, 'extractTemplate',
|
||||
registry.String("^ {{extract}}", _("""Wikipedia template.""")))
|
||||
|
|
|
|||
87
plugin.py
87
plugin.py
|
|
@ -15,10 +15,10 @@ import supybot.callbacks as callbacks
|
|||
import re
|
||||
import requests
|
||||
try:
|
||||
from urlparse import urlparse
|
||||
from urllib import urlencode
|
||||
from urlparse import urlparse, parse_qsl
|
||||
except ImportError:
|
||||
from urllib.parse import urlencode, urlparse
|
||||
from urllib.parse import urlencode, urlparse, parse_qsl
|
||||
from bs4 import BeautifulSoup
|
||||
import random
|
||||
import json
|
||||
|
|
@ -68,6 +68,7 @@ class SpiffyTitles(callbacks.Plugin):
|
|||
self.add_coub_handlers()
|
||||
self.add_vimeo_handlers()
|
||||
self.add_dailymotion_handlers()
|
||||
self.add_wikipedia_handlers()
|
||||
|
||||
def add_dailymotion_handlers(self):
|
||||
self.handlers["www.dailymotion.com"] = self.handler_dailymotion
|
||||
|
|
@ -78,6 +79,9 @@ class SpiffyTitles(callbacks.Plugin):
|
|||
def add_coub_handlers(self):
|
||||
self.handlers["coub.com"] = self.handler_coub
|
||||
|
||||
def add_wikipedia_handlers(self):
|
||||
self.handlers["en.wikipedia.org"] = self.handler_wikipedia
|
||||
|
||||
def handler_dailymotion(self, url, info, channel):
|
||||
"""
|
||||
Handles dailymotion links
|
||||
|
|
@ -821,6 +825,85 @@ class SpiffyTitles(callbacks.Plugin):
|
|||
|
||||
return self.handler_default(url, channel)
|
||||
|
||||
def handler_wikipedia(self, url, domain, channel):
|
||||
"""
|
||||
Queries wikipedia API for article extracts.
|
||||
"""
|
||||
wikipedia_handler_enabled = self.registryValue("wikipedia.enabled", channel=channel)
|
||||
if not wikipedia_handler_enabled:
|
||||
return self.handler_default(url, channel)
|
||||
|
||||
self.log.debug("SpiffyTitles: calling Wikipedia handler for %s" % (url))
|
||||
|
||||
pattern = r"/(?:w(?:iki))/(?P<page>[^/]+)$"
|
||||
info = urlparse(url)
|
||||
match = re.search(pattern, info.path)
|
||||
if not match:
|
||||
self.log.debug("SpiffyTitles: no title found.")
|
||||
return self.handler_default(url, channel)
|
||||
elif info.fragment and self.registryValue("wikipedia.ignoreSectionLinks", channel=channel):
|
||||
self.log.debug("SpiffyTitles: ignoring section link.")
|
||||
return self.handler_default(url, channel)
|
||||
else:
|
||||
page_title = match.groupdict()['page']
|
||||
|
||||
default_api_params = {
|
||||
"format": "json",
|
||||
"action": "query",
|
||||
"prop": "extracts",
|
||||
"exsentences": "2",
|
||||
"exlimit": "1",
|
||||
"exintro": "",
|
||||
"explaintext": ""
|
||||
}
|
||||
extra_params = dict(parse_qsl('&'.join(self.registryValue("wikipedia.apiParams", channel=channel))))
|
||||
title_param = { self.registryValue("wikipedia.titleParam", channel=channel): page_title }
|
||||
|
||||
# merge dicts
|
||||
api_params = default_api_params.copy()
|
||||
api_params.update(extra_params)
|
||||
api_params.update(title_param)
|
||||
api_url = "https://en.wikipedia.org/w/api.php?%s" % ('&'.join("%s=%s" % (key, val) for (key,val) in api_params.iteritems()))
|
||||
|
||||
agent = self.get_user_agent()
|
||||
headers = {
|
||||
"User-Agent": agent
|
||||
}
|
||||
extract = ""
|
||||
|
||||
self.log.debug("SpiffyTitles: requesting %s" % (api_url))
|
||||
|
||||
request = requests.get(api_url, headers=headers)
|
||||
ok = request.status_code == requests.codes.ok
|
||||
|
||||
if ok:
|
||||
response = json.loads(request.text)
|
||||
|
||||
if response:
|
||||
try:
|
||||
extract = response['query']['pages'].values()[0]['extract']
|
||||
except KeyError as e:
|
||||
self.log.error("SpiffyTitles: KeyError parsing Wikipedia API JSON response: %s" % (str(e)))
|
||||
else:
|
||||
self.log.error("SpiffyTitles: Error parsing Wikipedia API JSON response")
|
||||
else:
|
||||
self.log.error("SpiffyTitles: Wikipedia API HTTP %s: %s" % (request.status_code, request.text))
|
||||
|
||||
if extract:
|
||||
if (self.registryValue("wikipedia.removeParentheses")):
|
||||
extract = re.sub(r' ?\([^)]*\)', '', extract)
|
||||
max_chars = self.registryValue("wikipedia.maxChars", channel=channel)
|
||||
if len(extract) > max_chars:
|
||||
extract = extract[:max_chars - 3].rsplit(' ', 1)[0].rstrip(',.') + '...'
|
||||
|
||||
wikipedia_template = Template(self.registryValue("wikipedia.extractTemplate", channel=channel))
|
||||
return wikipedia_template.render({"extract": extract})
|
||||
else:
|
||||
self.log.debug("SpiffyTitles: falling back to default handler")
|
||||
|
||||
return self.handler_default(url, channel)
|
||||
|
||||
|
||||
def is_valid_imgur_id(self, input):
|
||||
"""
|
||||
Tests if input matches the typical imgur id, which seems to be alphanumeric. Images, galleries,
|
||||
|
|
|
|||
Loading…
Reference in New Issue