]> Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/rtve.py
Imported Upstream version 2014.06.07
[youtubedl] / youtube_dl / extractor / rtve.py
1 # encoding: utf-8
2 from __future__ import unicode_literals
3
4 import re
5 import base64
6
7 from .common import InfoExtractor
8 from ..utils import (
9 struct_unpack,
10 )
11
12
13 class RTVEALaCartaIE(InfoExtractor):
14 IE_NAME = 'rtve.es:alacarta'
15 IE_DESC = 'RTVE a la carta'
16 _VALID_URL = r'http://www\.rtve\.es/alacarta/videos/[^/]+/[^/]+/(?P<id>\d+)'
17
18 _TEST = {
19 'url': 'http://www.rtve.es/alacarta/videos/balonmano/o-swiss-cup-masculina-final-espana-suecia/2491869/',
20 'md5': '18fcd45965bdd076efdb12cd7f6d7b9e',
21 'info_dict': {
22 'id': '2491869',
23 'ext': 'mp4',
24 'title': 'Balonmano - Swiss Cup masculina. Final: España-Suecia',
25 },
26 }
27
28 def _decrypt_url(self, png):
29 encrypted_data = base64.b64decode(png)
30 text_index = encrypted_data.find(b'tEXt')
31 text_chunk = encrypted_data[text_index-4:]
32 length = struct_unpack('!I', text_chunk[:4])[0]
33 # Use bytearray to get integers when iterating in both python 2.x and 3.x
34 data = bytearray(text_chunk[8:8+length])
35 data = [chr(b) for b in data if b != 0]
36 hash_index = data.index('#')
37 alphabet_data = data[:hash_index]
38 url_data = data[hash_index+1:]
39
40 alphabet = []
41 e = 0
42 d = 0
43 for l in alphabet_data:
44 if d == 0:
45 alphabet.append(l)
46 d = e = (e + 1) % 4
47 else:
48 d -= 1
49 url = ''
50 f = 0
51 e = 3
52 b = 1
53 for letter in url_data:
54 if f == 0:
55 l = int(letter)*10
56 f = 1
57 else:
58 if e == 0:
59 l += int(letter)
60 url += alphabet[l]
61 e = (b + 3) % 4
62 f = 0
63 b += 1
64 else:
65 e -= 1
66
67 return url
68
69 def _real_extract(self, url):
70 mobj = re.match(self._VALID_URL, url)
71 video_id = mobj.group('id')
72 info = self._download_json(
73 'http://www.rtve.es/api/videos/%s/config/alacarta_videos.json' % video_id,
74 video_id)['page']['items'][0]
75 png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/default/videos/%s.png' % video_id
76 png = self._download_webpage(png_url, video_id, 'Downloading url information')
77 video_url = self._decrypt_url(png)
78
79 return {
80 'id': video_id,
81 'title': info['title'],
82 'url': video_url,
83 'thumbnail': info['image'],
84 }