first commit
This commit is contained in:
commit
fa9e2f3ba6
|
@ -0,0 +1,23 @@
|
||||||
|
Copyright (c) 2014, Pedro de Oliveira
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright notice, this
|
||||||
|
list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
* Redistributions in binary form must reproduce the above copyright notice,
|
||||||
|
this list of conditions and the following disclaimer in the documentation
|
||||||
|
and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||||
|
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||||
|
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||||
|
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
@ -0,0 +1,26 @@
|
||||||
|
sicradical
|
||||||
|
==========
|
||||||
|
|
||||||
|
SIC Radical program videos downloader
|
||||||
|
|
||||||
|
Requirements:
|
||||||
|
- Python
|
||||||
|
- Beautiful Soup - http://www.crummy.com/software/BeautifulSoup/
|
||||||
|
- wget
|
||||||
|
|
||||||
|
Features:
|
||||||
|
- Downloads the full video archive
|
||||||
|
- Can be run in cron to download the latest ones
|
||||||
|
- After 5 "already downloaded" files it exits
|
||||||
|
- Uses a different directory per progName
|
||||||
|
|
||||||
|
Instructions:
|
||||||
|
- Extract the "bs4" directory from the Beautiful Soup package to the directory of the script.
|
||||||
|
- Run the script with an argument that is the progId to download
|
||||||
|
|
||||||
|
Example:
|
||||||
|
- The URL for the "VERY TYPICAL" program is http://sicradical.sapo.pt/programas/very-typical so our progName is very-typical.
|
||||||
|
- To download it just do:
|
||||||
|
```
|
||||||
|
./sicradical.py very-typical
|
||||||
|
```
|
|
@ -0,0 +1,77 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
# vim: tabstop=8 expandtab shiftwidth=4 softtabstop=4
|
||||||
|
|
||||||
|
import urllib2
|
||||||
|
import re
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
scriptpath = os.path.dirname(os.path.realpath(__file__))
|
||||||
|
agent = 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36' \
|
||||||
|
'(KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36'
|
||||||
|
|
||||||
|
def parsePage(url, progName):
|
||||||
|
match = re.search(r"/videos/([\w+|-]+)", url)
|
||||||
|
title = match.group(1)
|
||||||
|
|
||||||
|
programpath = scriptpath + "/" + progName
|
||||||
|
if os.path.isdir(programpath) is False:
|
||||||
|
os.makedirs(programpath)
|
||||||
|
destfn = programpath + "/" + title + '.mp4'
|
||||||
|
|
||||||
|
if os.path.isfile(destfn):
|
||||||
|
print "- Ja downloadada... a ignorar"
|
||||||
|
return False
|
||||||
|
|
||||||
|
headers = { 'User-Agent' : agent }
|
||||||
|
req = urllib2.Request(url, None, headers)
|
||||||
|
html = urllib2.urlopen(req).read()
|
||||||
|
|
||||||
|
soup = BeautifulSoup(html, "html.parser")
|
||||||
|
|
||||||
|
videourl = soup.find('source')['src']
|
||||||
|
if videourl:
|
||||||
|
print "- A sacar: " + title
|
||||||
|
cmd = 'wget "' + videourl + '" -O "' + destfn + '"'
|
||||||
|
os.system(cmd + "> /dev/null 2>&1")
|
||||||
|
print "- Done"
|
||||||
|
return True
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
if len(sys.argv) != 2:
|
||||||
|
sys.exit("Correr com "+sys.argv[0]+" [progName]")
|
||||||
|
|
||||||
|
if sys.argv[1]:
|
||||||
|
progName = sys.argv[1]
|
||||||
|
|
||||||
|
exists = 0
|
||||||
|
offset = ""
|
||||||
|
while True:
|
||||||
|
url = "http://sicradical.sapo.pt/api/molecule/category/programas/" + progName + "/videos?offset=" + offset
|
||||||
|
|
||||||
|
headers = { 'User-Agent' : agent }
|
||||||
|
req = urllib2.Request(url, None, headers)
|
||||||
|
html = urllib2.urlopen(req).read()
|
||||||
|
|
||||||
|
soup = BeautifulSoup(html, "html.parser")
|
||||||
|
|
||||||
|
if soup.find('article') is None:
|
||||||
|
sys.exit("ultima pagina")
|
||||||
|
|
||||||
|
# apanha todos os items da pagina
|
||||||
|
items = soup.findAll('article')
|
||||||
|
|
||||||
|
for item in items:
|
||||||
|
if exists >= 5:
|
||||||
|
sys.exit("A sair apos 5 falhas, ja devo ter tudo...")
|
||||||
|
|
||||||
|
# url
|
||||||
|
link = item.find('a')['href']
|
||||||
|
# data
|
||||||
|
dt = item.find('p', {'class': 'timeStamp'})['datetime']
|
||||||
|
offset = dt
|
||||||
|
|
||||||
|
if parsePage(link, progName) is False:
|
||||||
|
exists = exists + 1
|
Loading…
Reference in New Issue