RTP changed the site again, fixed.

Also used some better practices on the code.
2017-01-29 16:40:45 +00:00 · 2017-01-29 16:40:45 +00:00 · ef48fac7df
parent 8b9d315dd0
commit ef48fac7df
1 changed files with 62 additions and 61 deletions
--- a/rtp.py
+++ b/rtp.py
@ -2,7 +2,6 @@
 # -*- coding: utf-8 -*-
 # vim: tabstop=8 expandtab shiftwidth=4 softtabstop=4

-from bs4 import BeautifulSoup
 import urllib2
 import re
 import unicodedata
@ -10,6 +9,7 @@ import os
 import string
 import sys
 import time
+from bs4 import BeautifulSoup

 months = {
    'Jan': '01',
@ -40,12 +40,12 @@ def parseRTMP(url,title,progId):
    episode_id = match.group(1)

    programpath = scriptpath+"/"+progId
-    if os.path.isdir(programpath) == False:
+    if os.path.isdir(programpath) is False:
        os.makedirs(programpath)
    destfn = programpath + "/" + title + "_" + episode_id + '.mp3'
    page = urllib2.urlopen(url)

-    match = re.search('"hls_url": "(.+?)",', page.read())
+    match = re.search('file: "(.+?)",', page.read())
    if match:
        if os.path.isfile(destfn):
            print "- Ja downloadada... a ignorar"
@ -56,11 +56,12 @@ def parseRTMP(url,title,progId):
        print "- Done"
        return True

+if __name__ == "__main__":
    if len(sys.argv) != 2:
        sys.exit("Correr com "+sys.argv[0]+" [progId]")

    if sys.argv[1].isdigit():
-    id = sys.argv[1]
+        progId = sys.argv[1]
    else:
        sys.exit("progId tem de ser um numero")

@ -68,12 +69,12 @@ exists = 0
    c = 1
    while True:
        print "--- Pagina " + str(c)
-    url = "http://www.rtp.pt/play/bg_l_ep/?stamp=" + str(int(time.time())) + "&listDate=&listQuery=&listProgram=" + str(id) + "&listcategory=&listchannel=&listtype=recent&page=" + str(c) + "&type=all"
+        url = "http://www.rtp.pt/play/bg_l_ep/?stamp=" + str(int(time.time())) + "&listDate=&listQuery=&listProgram=" + str(progId) + "&listcategory=&listchannel=&listtype=recent&page=" + str(c) + "&type=all"

        page = urllib2.urlopen(url)
        soup = BeautifulSoup(page.read(), "html.parser")

-    if (soup.find('div') == None):
+        if soup.find('div') is None:
            sys.exit("ultima pagina")

        # apanha todos os items da pagina
@ -108,8 +109,8 @@ while True:
            print "-- " +  dt, pt

            title = removeDisallowedFilenameChars(dt + "-" + pt)
-        if parseRTMP(link['href'],title,id) == False:
+
+            if parseRTMP(link['href'], title, progId) is False:
                exists = exists + 1

        c = c + 1
-