first commit
This commit is contained in:
commit
fa9e2f3ba6
|
@ -0,0 +1,23 @@
|
|||
Copyright (c) 2014, Pedro de Oliveira
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice, this
|
||||
list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
@ -0,0 +1,26 @@
|
|||
sicradical
|
||||
==========
|
||||
|
||||
SIC Radical program videos downloader
|
||||
|
||||
Requirements:
|
||||
- Python
|
||||
- Beautiful Soup - http://www.crummy.com/software/BeautifulSoup/
|
||||
- wget
|
||||
|
||||
Features:
|
||||
- Downloads the full video archive
|
||||
- Can be run in cron to download the latest ones
|
||||
- After 5 "already downloaded" files it exits
|
||||
- Uses a different directory per progName
|
||||
|
||||
Instructions:
|
||||
- Extract the "bs4" directory from the Beautiful Soup package to the directory of the script.
|
||||
- Run the script with an argument that is the progId to download
|
||||
|
||||
Example:
|
||||
- The URL for the "VERY TYPICAL" program is http://sicradical.sapo.pt/programas/very-typical so our progName is very-typical.
|
||||
- To download it just do:
|
||||
```
|
||||
./sicradical.py very-typical
|
||||
```
|
|
@ -0,0 +1,77 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
# vim: tabstop=8 expandtab shiftwidth=4 softtabstop=4
|
||||
|
||||
import urllib2
|
||||
import re
|
||||
import os
|
||||
import sys
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
scriptpath = os.path.dirname(os.path.realpath(__file__))
|
||||
agent = 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36' \
|
||||
'(KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36'
|
||||
|
||||
def parsePage(url, progName):
|
||||
match = re.search(r"/videos/([\w+|-]+)", url)
|
||||
title = match.group(1)
|
||||
|
||||
programpath = scriptpath + "/" + progName
|
||||
if os.path.isdir(programpath) is False:
|
||||
os.makedirs(programpath)
|
||||
destfn = programpath + "/" + title + '.mp4'
|
||||
|
||||
if os.path.isfile(destfn):
|
||||
print "- Ja downloadada... a ignorar"
|
||||
return False
|
||||
|
||||
headers = { 'User-Agent' : agent }
|
||||
req = urllib2.Request(url, None, headers)
|
||||
html = urllib2.urlopen(req).read()
|
||||
|
||||
soup = BeautifulSoup(html, "html.parser")
|
||||
|
||||
videourl = soup.find('source')['src']
|
||||
if videourl:
|
||||
print "- A sacar: " + title
|
||||
cmd = 'wget "' + videourl + '" -O "' + destfn + '"'
|
||||
os.system(cmd + "> /dev/null 2>&1")
|
||||
print "- Done"
|
||||
return True
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) != 2:
|
||||
sys.exit("Correr com "+sys.argv[0]+" [progName]")
|
||||
|
||||
if sys.argv[1]:
|
||||
progName = sys.argv[1]
|
||||
|
||||
exists = 0
|
||||
offset = ""
|
||||
while True:
|
||||
url = "http://sicradical.sapo.pt/api/molecule/category/programas/" + progName + "/videos?offset=" + offset
|
||||
|
||||
headers = { 'User-Agent' : agent }
|
||||
req = urllib2.Request(url, None, headers)
|
||||
html = urllib2.urlopen(req).read()
|
||||
|
||||
soup = BeautifulSoup(html, "html.parser")
|
||||
|
||||
if soup.find('article') is None:
|
||||
sys.exit("ultima pagina")
|
||||
|
||||
# apanha todos os items da pagina
|
||||
items = soup.findAll('article')
|
||||
|
||||
for item in items:
|
||||
if exists >= 5:
|
||||
sys.exit("A sair apos 5 falhas, ja devo ter tudo...")
|
||||
|
||||
# url
|
||||
link = item.find('a')['href']
|
||||
# data
|
||||
dt = item.find('p', {'class': 'timeStamp'})['datetime']
|
||||
offset = dt
|
||||
|
||||
if parsePage(link, progName) is False:
|
||||
exists = exists + 1
|
Loading…
Reference in New Issue