#!/usr/bin/env python # -*- coding: utf-8 -*- from bs4 import BeautifulSoup import urllib2 import re import unicodedata import os import string import sys months = { 'Jan': '01', 'Fev': '02', 'Mar': '03', 'Abr': '04', 'Mai': '05', 'Jun': '06', 'Jul': '07', 'Ago': '08', 'Set': '09', 'Out': '10', 'Nov': '11', 'Dez': '12' } scriptpath = os.path.dirname(os.path.realpath(__file__)) validFilenameChars = "-_. %s%s" % (string.ascii_letters, string.digits) def removeDisallowedFilenameChars(filename): cleanedFilename = unicodedata.normalize('NFKD', filename).encode('ASCII', 'ignore') return ''.join(c for c in cleanedFilename if c in validFilenameChars) def parseRTMP(url,title,progId): url = 'http://www.rtp.pt' + url programpath = scriptpath+"/"+progId if os.path.isdir(programpath) == False: os.makedirs(programpath) destfn = programpath+"/"+title+'.mp3' page = urllib2.urlopen(url) match = re.search('"file": ".*?//(.*?)"', page.read(), re.MULTILINE) if match: if os.path.isfile(destfn): print "- Ja downloadada... a ignorar" return print "- A sacar..." cmd = 'wget "http://rsspod.rtp.pt/podcasts/' + match.group(1) + '" -O "'+destfn+'"' os.system(cmd + "> /dev/null 2>&1") print "- Done" if len(sys.argv) != 2: sys.exit("Run with "+sys.argv[0]+" [progId]") if sys.argv[1].isdigit(): id = sys.argv[1] else: sys.exit("progId must be a number") # apanhar o numero total de paginas url = "http://www.rtp.pt/play/browseprog/"+id+"/1/true" page = urllib2.urlopen(url) match = re.search(r'