157 lines
5.6 KiB
Python
157 lines
5.6 KiB
Python
###
|
||
# Copyright (c) 2019, Pedro de Oliveira
|
||
# All rights reserved.
|
||
#
|
||
# Redistribution and use in source and binary forms, with or without
|
||
# modification, are permitted provided that the following conditions are met:
|
||
#
|
||
# * Redistributions of source code must retain the above copyright notice,
|
||
# this list of conditions, and the following disclaimer.
|
||
# * Redistributions in binary form must reproduce the above copyright notice,
|
||
# this list of conditions, and the following disclaimer in the
|
||
# documentation and/or other materials provided with the distribution.
|
||
# * Neither the name of the author of this software nor the name of
|
||
# contributors to this software may be used to endorse or promote products
|
||
# derived from this software without specific prior written consent.
|
||
#
|
||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||
# POSSIBILITY OF SUCH DAMAGE.
|
||
|
||
###
|
||
|
||
from supybot import utils, plugins, ircutils, callbacks
|
||
from supybot.commands import *
|
||
try:
|
||
from supybot.i18n import PluginInternationalization
|
||
_ = PluginInternationalization('Priberam')
|
||
except ImportError:
|
||
# Placeholder that allows to run the plugin on a bot
|
||
# without the i18n module
|
||
_ = lambda x: x
|
||
import urllib.parse
|
||
from datetime import datetime
|
||
import re
|
||
import requests
|
||
import atoma
|
||
from bs4 import BeautifulSoup
|
||
|
||
|
||
class Priberam(callbacks.Plugin):
|
||
"""Priberam dictionary client"""
|
||
threaded = True
|
||
|
||
def __clean_str(self, text, clean_number=False):
|
||
text = re.sub(r'\s+', ' ', text)
|
||
#text = re.sub(r'(\[.*?]) (\1)', r'\1', text)
|
||
text = re.sub(r'\[.*?\]', '', text)
|
||
text = re.sub(r'^\s+', '', text)
|
||
if clean_number:
|
||
text = re.sub(r'^\d+\. +', '', text)
|
||
return text
|
||
|
||
def __get_definition(self, word, position):
|
||
response = requests.get(
|
||
"https://dicionario.priberam.org/{}".format(urllib.parse.quote(word)),
|
||
headers={'User-agent': 'Mozilla/5.0'})
|
||
soup = BeautifulSoup(response.content.decode('utf-8'))
|
||
|
||
definition = {
|
||
'error': False,
|
||
'total': None,
|
||
'value': None,
|
||
'extra': None
|
||
}
|
||
|
||
# Check if word really exists
|
||
error = soup.find('div', {'class': 'alert alert-info'})
|
||
if error:
|
||
definition['error'] = True
|
||
definition['value'] = self.__clean_str(error.text)
|
||
return definition
|
||
|
||
# Get all definitions from page
|
||
definitions = soup.find('div', {'id': 'resultados'}).find_all('p')
|
||
|
||
total = 0
|
||
# Only count the definitions that begin with a number
|
||
# Or the first, if there is only one, even without a number
|
||
for idx in range(len(definitions)):
|
||
if idx > 1 and self.__clean_str(definitions[idx].text)[:1].isdigit() is False:
|
||
break
|
||
total += 1
|
||
|
||
if position > total:
|
||
definition['error'] = True
|
||
return definition
|
||
|
||
extra = soup.find_all('span', {'class': 'def'})[0].text
|
||
|
||
definition['total'] = total
|
||
definition['value'] = self.__clean_str(definitions[position - 1].text, True)
|
||
if extra[0].islower():
|
||
definition['extra'] = extra
|
||
|
||
return definition
|
||
|
||
def find(self, irc, msg, args, word, position):
|
||
"""<word> [position]
|
||
|
||
Returns the dictionary definition of a <word>, at [position].
|
||
"""
|
||
definition = self.__get_definition(word, position)
|
||
|
||
if definition['error'] is True:
|
||
if definition['value'] is not None:
|
||
irc.reply(definition['value'], prefixNick=False)
|
||
return
|
||
|
||
message = "[{}/{}] {}".format(
|
||
position,
|
||
definition['total'],
|
||
word)
|
||
|
||
if position is 1 and definition['extra'] is not None:
|
||
message = "{} ({})".format(message, definition['extra'])
|
||
|
||
message = "{} {}".format(message, definition['value'])
|
||
irc.reply(message, prefixNick=False)
|
||
find = wrap(find, ['anything', optional('int', default=1)])
|
||
|
||
def wotd(self, irc, msg, args, argument):
|
||
"""[argument]
|
||
Returns the definition of the word of the day.
|
||
If [argument] is "cron" then just return if the time is 00:00.
|
||
"""
|
||
|
||
if argument == 'cron':
|
||
now = datetime.now()
|
||
if now.hour != 0 or now.minute != 0:
|
||
return
|
||
|
||
response = requests.get(
|
||
"https://dicionario.priberam.org/DoDiaRSS.aspx",
|
||
headers={'User-agent': 'Mozilla/5.0'}
|
||
)
|
||
feed = atoma.parse_rss_bytes(response.content)
|
||
definition = self.__get_definition(feed.items[0].title, 1)
|
||
|
||
irc.reply("Palavra do dia: {} ({}) {}".format(
|
||
feed.items[0].title,
|
||
definition['extra'],
|
||
definition['value']), prefixNick=False)
|
||
wotd = wrap(wotd, [optional('anything')])
|
||
|
||
Class = Priberam
|
||
|
||
|
||
# vim:set shiftwidth=4 softtabstop=4 expandtab textwidth=79:
|