]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/rtve.py
   2 from __future__ 
import unicode_literals
 
   8 from .common 
import InfoExtractor
 
  22 def _decrypt_url(png
): 
  23     encrypted_data 
= base64
.b64decode(png
.encode('utf-8')) 
  24     text_index 
= encrypted_data
.find(b
'tEXt') 
  25     text_chunk 
= encrypted_data
[text_index 
- 4:] 
  26     length 
= compat_struct_unpack('!I', text_chunk
[:4])[0] 
  27     # Use bytearray to get integers when iterating in both python 2.x and 3.x 
  28     data 
= bytearray(text_chunk
[8:8 + length
]) 
  29     data 
= [chr(b
) for b 
in data 
if b 
!= 0] 
  30     hash_index 
= data
.index('#') 
  31     alphabet_data 
= data
[:hash_index
] 
  32     url_data 
= data
[hash_index 
+ 1:] 
  37     for l 
in alphabet_data
: 
  47     for letter 
in url_data
: 
  64 class RTVEALaCartaIE(InfoExtractor
): 
  65     IE_NAME 
= 'rtve.es:alacarta' 
  66     IE_DESC 
= 'RTVE a la carta' 
  67     _VALID_URL 
= r
'https?://(?:www\.)?rtve\.es/(m/)?(alacarta/videos|filmoteca)/[^/]+/[^/]+/(?P<id>\d+)' 
  70         'url': 'http://www.rtve.es/alacarta/videos/balonmano/o-swiss-cup-masculina-final-espana-suecia/2491869/', 
  71         'md5': '1d49b7e1ca7a7502c56a4bf1b60f1b43', 
  75             'title': 'Balonmano - Swiss Cup masculina. Final: España-Suecia', 
  79         'note': 'Live stream', 
  80         'url': 'http://www.rtve.es/alacarta/videos/television/24h-live/1694255/', 
  86         'skip': 'The f4m manifest can\'t be used yet', 
  88         'url': 'http://www.rtve.es/m/alacarta/videos/cuentame-como-paso/cuentame-como-paso-t16-ultimo-minuto-nuestra-vida-capitulo-276/2969138/?media=tve', 
  89         'only_matching': True, 
  91         'url': 'http://www.rtve.es/filmoteca/no-do/not-1-introduccion-primer-noticiario-espanol/1465256/', 
  92         'only_matching': True, 
  95     def _real_initialize(self
): 
  96         user_agent_b64 
= base64
.b64encode(std_headers
['User-Agent'].encode('utf-8')).decode('utf-8') 
  97         manager_info 
= self
._download
_json
( 
  98             'http://www.rtve.es/odin/loki/' + user_agent_b64
, 
  99             None, 'Fetching manager info') 
 100         self
._manager 
= manager_info
['manager'] 
 102     def _real_extract(self
, url
): 
 103         mobj 
= re
.match(self
._VALID
_URL
, url
) 
 104         video_id 
= mobj
.group('id') 
 105         info 
= self
._download
_json
( 
 106             'http://www.rtve.es/api/videos/%s/config/alacarta_videos.json' % video_id
, 
 107             video_id
)['page']['items'][0] 
 108         if info
['state'] == 'DESPU': 
 109             raise ExtractorError('The video is no longer available', expected
=True) 
 110         png_url 
= 'http://www.rtve.es/ztnr/movil/thumbnail/%s/videos/%s.png' % (self
._manager
, video_id
) 
 111         png_request 
= sanitized_Request(png_url
) 
 112         png_request
.add_header('Referer', url
) 
 113         png 
= self
._download
_webpage
(png_request
, video_id
, 'Downloading url information') 
 114         video_url 
= _decrypt_url(png
) 
 115         if not video_url
.endswith('.f4m'): 
 116             if '?' not in video_url
: 
 117                 video_url 
= video_url
.replace('resources/', 'auth/resources/') 
 118             video_url 
= video_url
.replace('.net.rtve', '.multimedia.cdn.rtve') 
 121         if info
.get('sbtFile') is not None: 
 122             subtitles 
= self
.extract_subtitles(video_id
, info
['sbtFile']) 
 126             'title': info
['title'], 
 128             'thumbnail': info
.get('image'), 
 130             'subtitles': subtitles
, 
 131             'duration': float_or_none(info
.get('duration'), scale
=1000), 
 134     def _get_subtitles(self
, video_id
, sub_file
): 
 135         subs 
= self
._download
_json
( 
 136             sub_file 
+ '.json', video_id
, 
 137             'Downloading subtitles info')['page']['items'] 
 139             (s
['lang'], [{'ext': 'vtt', 'url': s
['src']}]) 
 143 class RTVEInfantilIE(InfoExtractor
): 
 144     IE_NAME 
= 'rtve.es:infantil' 
 145     IE_DESC 
= 'RTVE infantil' 
 146     _VALID_URL 
= r
'https?://(?:www\.)?rtve\.es/infantil/serie/(?P<show>[^/]*)/video/(?P<short_title>[^/]*)/(?P<id>[0-9]+)/' 
 149         'url': 'http://www.rtve.es/infantil/serie/cleo/video/maneras-vivir/3040283/', 
 150         'md5': '915319587b33720b8e0357caaa6617e6', 
 154             'title': 'Maneras de vivir', 
 155             'thumbnail': 'http://www.rtve.es/resources/jpg/6/5/1426182947956.JPG', 
 160     def _real_extract(self
, url
): 
 161         video_id 
= self
._match
_id
(url
) 
 162         info 
= self
._download
_json
( 
 163             'http://www.rtve.es/api/videos/%s/config/alacarta_videos.json' % video_id
, 
 164             video_id
)['page']['items'][0] 
 166         webpage 
= self
._download
_webpage
(url
, video_id
) 
 167         vidplayer_id 
= self
._search
_regex
( 
 168             r
' id="vidplayer([0-9]+)"', webpage
, 'internal video ID') 
 170         png_url 
= 'http://www.rtve.es/ztnr/movil/thumbnail/default/videos/%s.png' % vidplayer_id
 
 171         png 
= self
._download
_webpage
(png_url
, video_id
, 'Downloading url information') 
 172         video_url 
= _decrypt_url(png
) 
 177             'title': info
['title'], 
 179             'thumbnail': info
.get('image'), 
 180             'duration': float_or_none(info
.get('duration'), scale
=1000), 
 184 class RTVELiveIE(InfoExtractor
): 
 185     IE_NAME 
= 'rtve.es:live' 
 186     IE_DESC 
= 'RTVE.es live streams' 
 187     _VALID_URL 
= r
'https?://(?:www\.)?rtve\.es/directo/(?P<id>[a-zA-Z0-9-]+)' 
 190         'url': 'http://www.rtve.es/directo/la-1/', 
 194             'title': 're:^La 1 [0-9]{4}-[0-9]{2}-[0-9]{2}Z[0-9]{6}$', 
 197             'skip_download': 'live stream', 
 201     def _real_extract(self
, url
): 
 202         mobj 
= re
.match(self
._VALID
_URL
, url
) 
 203         start_time 
= time
.gmtime() 
 204         video_id 
= mobj
.group('id') 
 206         webpage 
= self
._download
_webpage
(url
, video_id
) 
 207         title 
= remove_end(self
._og
_search
_title
(webpage
), ' en directo en RTVE.es') 
 208         title 
= remove_start(title
, 'Estoy viendo ') 
 209         title 
+= ' ' + time
.strftime('%Y-%m-%dZ%H%M%S', start_time
) 
 211         vidplayer_id 
= self
._search
_regex
( 
 212             r
'playerId=player([0-9]+)', webpage
, 'internal video ID') 
 213         png_url 
= 'http://www.rtve.es/ztnr/movil/thumbnail/amonet/videos/%s.png' % vidplayer_id
 
 214         png 
= self
._download
_webpage
(png_url
, video_id
, 'Downloading url information') 
 215         m3u8_url 
= _decrypt_url(png
) 
 216         formats 
= self
._extract
_m
3u8_formats
(m3u8_url
, video_id
, ext
='mp4') 
 217         self
._sort
_formats
(formats
) 
 227 class RTVETelevisionIE(InfoExtractor
): 
 228     IE_NAME 
= 'rtve.es:television' 
 229     _VALID_URL 
= r
'https?://(?:www\.)?rtve\.es/television/[^/]+/[^/]+/(?P<id>\d+).shtml' 
 232         'url': 'http://www.rtve.es/television/20160628/revolucion-del-movil/1364141.shtml', 
 236             'title': 'Documentos TV - La revolución del móvil', 
 237             'duration': 3496.948, 
 240             'skip_download': True, 
 244     def _real_extract(self
, url
): 
 245         page_id 
= self
._match
_id
(url
) 
 246         webpage 
= self
._download
_webpage
(url
, page_id
) 
 248         alacarta_url 
= self
._search
_regex
( 
 249             r
'data-location="alacarta_videos"[^<]+url":"(http://www\.rtve\.es/alacarta.+?)&', 
 250             webpage
, 'alacarta url', default
=None) 
 251         if alacarta_url 
is None: 
 252             raise ExtractorError( 
 253                 'The webpage doesn\'t contain any video', expected
=True) 
 255         return self
.url_result(alacarta_url
, ie
=RTVEALaCartaIE
.ie_key())