#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# cvh_video.py
#
# Copyright 2010 Javier Rovegno Campos <tatadeluxe<at>gmail.com>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
# MA 02110-1301, USA.
#
"""
Script para descargar videos desde http://www.chilevision.cl/
Requiere:
- aria2c - wget
Extras:
Ofrece Descargar el resto de los videos
Uso:
# Comillas requeridas, problema parser
cvh_video.py "http://www.chilevision.cl/home/index.php?option=com_content&task=view&id=YYYYY&Itemid=XXX"
"""
import urllib
import re
import sys
import os
import getopt
import commands
import signal
def main():
# parse command line options
try:
opts, args = getopt.getopt(sys.argv[1:], "h", ["help"])
except getopt.error, msg:
print msg
print "for help use --help"
sys.exit(2)
# process options
for o, a in opts:
if o in ("-h", "--help"):
print __doc__
sys.exit(0)
# process arguments
for arg in args:
process(arg) # process() is defined elsewhere
def process(arg):
'''
>>> process('http://www.chilevision.cl/home/index.php?option=com_content&task=view&id=YYYYY&Itemid=XXX')
Traceback (most recent call last):
...
ValueError: Descarga video fallida, url no contiene video
'''
url_file = extrae_url_file(arg)
is_ok = os.system('aria2c "%s"'%url_file)
if is_ok == 0:
print 'Descarga video realizada con éxito!'
log_file(url_file)
if pregunta_download_more():
download_more(url_file)
else:
print 'La opción que elige es no descargar videos similares'
elif is_ok == 32512:
print 'Recomendamos que instale aria2c para acelerar la descarga'
is_ok = os.system('wget "%s"'%url_file)
log_file(url_file)
else:
print 'Descarga video fallida!'
def pregunta_download_more():
return raw_input_timer('Descargar el resto de los videos similares s/(n): ',5) == 's\n'
def download_more(url_file):
nn = ['01','02','03','04','05','06','07','08','09']
for i in xrange(10,50):
nn.append(str(i))
ext = url_file[-4:]
pref = url_file[:-6]
errores = 0
video_url_files = ''
for n in nn:
url_file_aux = '%s%s%s '%(pref,n,ext)
if verifica_url_file(url_file_aux):
log_file(url_file_aux)
video_url_files += '%s '%(url_file_aux)
elif errores <= 3:
#Tolera hasta 3 errores
errores += 1
else:
break
if video_url_files != '':
is_ok = os.system('aria2c -Z %s'%video_url_files)
else:
print "No hay más videos similares para descargar"
def verifica_url_file(url_file_aux):
return urllib.urlopen(url_file_aux).info().typeheader == 'application/octet-stream'
def extrae_url_file(url):
sock = urllib.urlopen(url)
htmlSource = sock.read()
sock.close()
try:
#Trata de buscar inicio url con video
video_url_init = htmlSource.index('playerCHV(') + 11
except ValueError:
#Busca los enlaces hacia url con videos
coleccion = re.findall("\d{,8}&Itemid=2389", htmlSource)
video_url_list = ''
for id in coleccion:
#Agrega los id en una lista con los enlaces
video_url_list += ('"http://www.chilevision.cl/home/index.php?option=com_content&task=view&id=%s&Itemid=2389" '
%id[:-12])
if video_url_list != '':
#Si encuentra alguna enlace hacia video válido los imprime en pantalla
print video_url_list
sys.exit(0)
else:
#No encuentra url con videos
raise ValueError, "Descarga video fallida, url no contiene video"
video_url_end = htmlSource.index('\'',video_url_init)
video_url = htmlSource[video_url_init:video_url_end]
video_url = video_url.replace('%2F','/')
return video_url
def log_file(url_file):
#Añade enlaces con videos descargados
log = open('log_chv_video.txt', 'a')
log.write("%s\n"%url_file)
log.close()
def alarm_handler(*args):
raise Exception("timeout")
#
# name: raw_input_timer
# @param str solicita dato,int seg
# @return texto
def raw_input_timer(prompt, timeout):
signal.signal(signal.SIGALRM, alarm_handler)
signal.alarm(timeout)
sys.stdout.write(prompt)
sys.stdout.flush()
try:
text = sys.stdin.readline()
except:
text = ""
signal.alarm(0)
return text
if __name__ == '__main__':
import doctest
doctest.testmod()
main()