limnoria-plugins/Priberam/plugin.py

###
# Copyright (c) 2019, Pedro de Oliveira
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
#   * Redistributions of source code must retain the above copyright notice,
#     this list of conditions, and the following disclaimer.
#   * Redistributions in binary form must reproduce the above copyright notice,
#     this list of conditions, and the following disclaimer in the
#     documentation and/or other materials provided with the distribution.
#   * Neither the name of the author of this software nor the name of
#     contributors to this software may be used to endorse or promote products
#     derived from this software without specific prior written consent.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.

###

from supybot import utils, plugins, ircutils, callbacks
from supybot.commands import *
try:
    from supybot.i18n import PluginInternationalization
    _ = PluginInternationalization('Priberam')
except ImportError:
    # Placeholder that allows to run the plugin on a bot
    # without the i18n module
    _ = lambda x: x
import urllib.parse
from datetime import datetime
import re
import requests
import atoma
from bs4 import BeautifulSoup


class Priberam(callbacks.Plugin):
    """Priberam dictionary client"""
    threaded = True

    def __clean_str(self, text, clean_number=False):
        text = re.sub(r'\s+', ' ', text)
        #text = re.sub(r'(\[.*?]) (\1)', r'\1', text)
        text = re.sub(r'\[.*?\]', '', text)
        text = re.sub(r'^\s+', '', text)
        if clean_number:
            text = re.sub(r'^\d+\. +', '', text)
        return text

    def __get_definition(self, word, position):
        response = requests.get(
            "https://dicionario.priberam.org/{}".format(urllib.parse.quote(word)),
            headers={'User-agent': 'Mozilla/5.0'})
        soup = BeautifulSoup(response.content.decode('utf-8'))

        definition = {
            'error': False,
            'total': None,
            'value': None,
            'extra': None
        }

        # Check if word really exists
        error = soup.find('div', {'class': 'alert alert-info'})
        if error:
            definition['error'] = True
            definition['value'] = self.__clean_str(error.text)
            return definition

        # Get all definitions from page
        definitions = soup.find('div', {'id': 'resultados'}).find_all('p')

        total = 0
        # Only count the definitions that begin with a number
        # Or the first, if there is only one, even without a number
        for idx in range(len(definitions)):
            if idx > 1 and self.__clean_str(definitions[idx].text)[:1].isdigit() is False:
                break
            total += 1

        if position > total:
            definition['error'] = True
            return definition

        extra = soup.find_all('span', {'class': 'def'})[0].text

        definition['total'] = total
        definition['value'] = self.__clean_str(definitions[position - 1].text, True)
        if extra[0].islower():
            definition['extra'] = extra

        return definition

    def find(self, irc, msg, args, word, position):
        """<word> [position]

        Returns the dictionary definition of a <word>, at [position].
        """
        definition = self.__get_definition(word, position)

        if definition['error'] is True:
            if definition['value'] is not None:
                irc.reply(definition['value'], prefixNick=False)
            return

        message = "[{}/{}] {}".format(
            position,
            definition['total'],
            word)

        if position is 1 and definition['extra'] is not None:
            message = "{} ({})".format(message, definition['extra'])

        message = "{} {}".format(message, definition['value'])
        irc.reply(message, prefixNick=False)
    find = wrap(find, ['anything', optional('int', default=1)])

    def wotd(self, irc, msg, args, argument):
        """[argument]
        Returns the definition of the word of the day.
        If [argument] is "cron" then just return if the time is 00:00.
        """

        if argument == 'cron':
            now = datetime.now()
            if now.hour != 0 or now.minute != 0:
                return

        response = requests.get(
            "https://dicionario.priberam.org/DoDiaRSS.aspx",
            headers={'User-agent': 'Mozilla/5.0'}
            )
        feed = atoma.parse_rss_bytes(response.content)
        definition = self.__get_definition(feed.items[0].title, 1)

        irc.reply("Palavra do dia: {} ({}) {}".format(
            feed.items[0].title,
            definition['extra'],
            definition['value']), prefixNick=False)
    wotd = wrap(wotd, [optional('anything')])

Class = Priberam


# vim:set shiftwidth=4 softtabstop=4 expandtab textwidth=79: