mirror of https://github.com/falsovsky/RTPapd.git
RTP changed the site again, fixed.
Also used some better practices on the code.
This commit is contained in:
parent
8b9d315dd0
commit
ef48fac7df
|
@ -2,7 +2,6 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# vim: tabstop=8 expandtab shiftwidth=4 softtabstop=4
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
import urllib2
|
||||
import re
|
||||
import unicodedata
|
||||
|
@ -10,6 +9,7 @@ import os
|
|||
import string
|
||||
import sys
|
||||
import time
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
months = {
|
||||
'Jan': '01',
|
||||
|
@ -40,12 +40,12 @@ def parseRTMP(url,title,progId):
|
|||
episode_id = match.group(1)
|
||||
|
||||
programpath = scriptpath+"/"+progId
|
||||
if os.path.isdir(programpath) == False:
|
||||
if os.path.isdir(programpath) is False:
|
||||
os.makedirs(programpath)
|
||||
destfn = programpath + "/" + title + "_" + episode_id + '.mp3'
|
||||
page = urllib2.urlopen(url)
|
||||
|
||||
match = re.search('"hls_url": "(.+?)",', page.read())
|
||||
match = re.search('file: "(.+?)",', page.read())
|
||||
if match:
|
||||
if os.path.isfile(destfn):
|
||||
print "- Ja downloadada... a ignorar"
|
||||
|
@ -56,11 +56,12 @@ def parseRTMP(url,title,progId):
|
|||
print "- Done"
|
||||
return True
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) != 2:
|
||||
sys.exit("Correr com "+sys.argv[0]+" [progId]")
|
||||
|
||||
if sys.argv[1].isdigit():
|
||||
id = sys.argv[1]
|
||||
progId = sys.argv[1]
|
||||
else:
|
||||
sys.exit("progId tem de ser um numero")
|
||||
|
||||
|
@ -68,12 +69,12 @@ exists = 0
|
|||
c = 1
|
||||
while True:
|
||||
print "--- Pagina " + str(c)
|
||||
url = "http://www.rtp.pt/play/bg_l_ep/?stamp=" + str(int(time.time())) + "&listDate=&listQuery=&listProgram=" + str(id) + "&listcategory=&listchannel=&listtype=recent&page=" + str(c) + "&type=all"
|
||||
url = "http://www.rtp.pt/play/bg_l_ep/?stamp=" + str(int(time.time())) + "&listDate=&listQuery=&listProgram=" + str(progId) + "&listcategory=&listchannel=&listtype=recent&page=" + str(c) + "&type=all"
|
||||
|
||||
page = urllib2.urlopen(url)
|
||||
soup = BeautifulSoup(page.read(), "html.parser")
|
||||
|
||||
if (soup.find('div') == None):
|
||||
if soup.find('div') is None:
|
||||
sys.exit("ultima pagina")
|
||||
|
||||
# apanha todos os items da pagina
|
||||
|
@ -108,8 +109,8 @@ while True:
|
|||
print "-- " + dt, pt
|
||||
|
||||
title = removeDisallowedFilenameChars(dt + "-" + pt)
|
||||
if parseRTMP(link['href'],title,id) == False:
|
||||
|
||||
if parseRTMP(link['href'], title, progId) is False:
|
||||
exists = exists + 1
|
||||
|
||||
c = c + 1
|
||||
|
||||
|
|
Loading…
Reference in New Issue