]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/rtve.py
   2 from __future__ 
import unicode_literals
 
   8 from .common 
import InfoExtractor
 
  24 def _decrypt_url(png
): 
  25     encrypted_data 
= compat_b64decode(png
) 
  26     text_index 
= encrypted_data
.find(b
'tEXt') 
  27     text_chunk 
= encrypted_data
[text_index 
- 4:] 
  28     length 
= compat_struct_unpack('!I', text_chunk
[:4])[0] 
  29     # Use bytearray to get integers when iterating in both python 2.x and 3.x 
  30     data 
= bytearray(text_chunk
[8:8 + length
]) 
  31     data 
= [chr(b
) for b 
in data 
if b 
!= 0] 
  32     hash_index 
= data
.index('#') 
  33     alphabet_data 
= data
[:hash_index
] 
  34     url_data 
= data
[hash_index 
+ 1:] 
  35     if url_data
[0] == 'H' and url_data
[3] == '%': 
  36         # remove useless HQ%% at the start 
  37         url_data 
= url_data
[4:] 
  42     for l 
in alphabet_data
: 
  52     for letter 
in url_data
: 
  69 class RTVEALaCartaIE(InfoExtractor
): 
  70     IE_NAME 
= 'rtve.es:alacarta' 
  71     IE_DESC 
= 'RTVE a la carta' 
  72     _VALID_URL 
= r
'https?://(?:www\.)?rtve\.es/(m/)?(alacarta/videos|filmoteca)/[^/]+/[^/]+/(?P<id>\d+)' 
  75         'url': 'http://www.rtve.es/alacarta/videos/balonmano/o-swiss-cup-masculina-final-espana-suecia/2491869/', 
  76         'md5': '1d49b7e1ca7a7502c56a4bf1b60f1b43', 
  80             'title': 'Balonmano - Swiss Cup masculina. Final: España-Suecia', 
  84         'note': 'Live stream', 
  85         'url': 'http://www.rtve.es/alacarta/videos/television/24h-live/1694255/', 
  91         'skip': 'The f4m manifest can\'t be used yet', 
  93         'url': 'http://www.rtve.es/alacarta/videos/servir-y-proteger/servir-proteger-capitulo-104/4236788/', 
  94         'md5': 'e55e162379ad587e9640eda4f7353c0f', 
  98             'title': 'Servir y proteger - Capítulo 104 ', 
 102             'skip_download': True,  # requires ffmpeg 
 105         'url': 'http://www.rtve.es/m/alacarta/videos/cuentame-como-paso/cuentame-como-paso-t16-ultimo-minuto-nuestra-vida-capitulo-276/2969138/?media=tve', 
 106         'only_matching': True, 
 108         'url': 'http://www.rtve.es/filmoteca/no-do/not-1-introduccion-primer-noticiario-espanol/1465256/', 
 109         'only_matching': True, 
 112     def _real_initialize(self
): 
 113         user_agent_b64 
= base64
.b64encode(std_headers
['User-Agent'].encode('utf-8')).decode('utf-8') 
 114         manager_info 
= self
._download
_json
( 
 115             'http://www.rtve.es/odin/loki/' + user_agent_b64
, 
 116             None, 'Fetching manager info') 
 117         self
._manager 
= manager_info
['manager'] 
 119     def _real_extract(self
, url
): 
 120         mobj 
= re
.match(self
._VALID
_URL
, url
) 
 121         video_id 
= mobj
.group('id') 
 122         info 
= self
._download
_json
( 
 123             'http://www.rtve.es/api/videos/%s/config/alacarta_videos.json' % video_id
, 
 124             video_id
)['page']['items'][0] 
 125         if info
['state'] == 'DESPU': 
 126             raise ExtractorError('The video is no longer available', expected
=True) 
 127         title 
= info
['title'] 
 128         png_url 
= 'http://www.rtve.es/ztnr/movil/thumbnail/%s/videos/%s.png' % (self
._manager
, video_id
) 
 129         png_request 
= sanitized_Request(png_url
) 
 130         png_request
.add_header('Referer', url
) 
 131         png 
= self
._download
_webpage
(png_request
, video_id
, 'Downloading url information') 
 132         video_url 
= _decrypt_url(png
) 
 133         ext 
= determine_ext(video_url
) 
 136         if not video_url
.endswith('.f4m') and ext 
!= 'm3u8': 
 137             if '?' not in video_url
: 
 138                 video_url 
= video_url
.replace('resources/', 'auth/resources/') 
 139             video_url 
= video_url
.replace('.net.rtve', '.multimedia.cdn.rtve') 
 142             formats
.extend(self
._extract
_m
3u8_formats
( 
 143                 video_url
, video_id
, ext
='mp4', entry_protocol
='m3u8_native', 
 144                 m3u8_id
='hls', fatal
=False)) 
 146             formats
.extend(self
._extract
_f
4m
_formats
( 
 147                 video_url
, video_id
, f4m_id
='hds', fatal
=False)) 
 152         self
._sort
_formats
(formats
) 
 155         if info
.get('sbtFile') is not None: 
 156             subtitles 
= self
.extract_subtitles(video_id
, info
['sbtFile']) 
 162             'thumbnail': info
.get('image'), 
 164             'subtitles': subtitles
, 
 165             'duration': float_or_none(info
.get('duration'), scale
=1000), 
 168     def _get_subtitles(self
, video_id
, sub_file
): 
 169         subs 
= self
._download
_json
( 
 170             sub_file 
+ '.json', video_id
, 
 171             'Downloading subtitles info')['page']['items'] 
 173             (s
['lang'], [{'ext': 'vtt', 'url': s
['src']}]) 
 177 class RTVEInfantilIE(InfoExtractor
): 
 178     IE_NAME 
= 'rtve.es:infantil' 
 179     IE_DESC 
= 'RTVE infantil' 
 180     _VALID_URL 
= r
'https?://(?:www\.)?rtve\.es/infantil/serie/(?P<show>[^/]*)/video/(?P<short_title>[^/]*)/(?P<id>[0-9]+)/' 
 183         'url': 'http://www.rtve.es/infantil/serie/cleo/video/maneras-vivir/3040283/', 
 184         'md5': '915319587b33720b8e0357caaa6617e6', 
 188             'title': 'Maneras de vivir', 
 189             'thumbnail': 'http://www.rtve.es/resources/jpg/6/5/1426182947956.JPG', 
 194     def _real_extract(self
, url
): 
 195         video_id 
= self
._match
_id
(url
) 
 196         info 
= self
._download
_json
( 
 197             'http://www.rtve.es/api/videos/%s/config/alacarta_videos.json' % video_id
, 
 198             video_id
)['page']['items'][0] 
 200         webpage 
= self
._download
_webpage
(url
, video_id
) 
 201         vidplayer_id 
= self
._search
_regex
( 
 202             r
' id="vidplayer([0-9]+)"', webpage
, 'internal video ID') 
 204         png_url 
= 'http://www.rtve.es/ztnr/movil/thumbnail/default/videos/%s.png' % vidplayer_id
 
 205         png 
= self
._download
_webpage
(png_url
, video_id
, 'Downloading url information') 
 206         video_url 
= _decrypt_url(png
) 
 211             'title': info
['title'], 
 213             'thumbnail': info
.get('image'), 
 214             'duration': float_or_none(info
.get('duration'), scale
=1000), 
 218 class RTVELiveIE(InfoExtractor
): 
 219     IE_NAME 
= 'rtve.es:live' 
 220     IE_DESC 
= 'RTVE.es live streams' 
 221     _VALID_URL 
= r
'https?://(?:www\.)?rtve\.es/directo/(?P<id>[a-zA-Z0-9-]+)' 
 224         'url': 'http://www.rtve.es/directo/la-1/', 
 228             'title': 're:^La 1 [0-9]{4}-[0-9]{2}-[0-9]{2}Z[0-9]{6}$', 
 231             'skip_download': 'live stream', 
 235     def _real_extract(self
, url
): 
 236         mobj 
= re
.match(self
._VALID
_URL
, url
) 
 237         start_time 
= time
.gmtime() 
 238         video_id 
= mobj
.group('id') 
 240         webpage 
= self
._download
_webpage
(url
, video_id
) 
 241         title 
= remove_end(self
._og
_search
_title
(webpage
), ' en directo en RTVE.es') 
 242         title 
= remove_start(title
, 'Estoy viendo ') 
 243         title 
+= ' ' + time
.strftime('%Y-%m-%dZ%H%M%S', start_time
) 
 245         vidplayer_id 
= self
._search
_regex
( 
 246             (r
'playerId=player([0-9]+)', 
 247              r
'class=["\'].*?
\blive
_mod
\b.*?
["\'][^>]+data-assetid=["\'](\d
+)', 
 248              r'data
-id=["\'](\d+)'), 
 249             webpage, 'internal video ID') 
 250         png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/amonet/videos/%s.png' % vidplayer_id 
 251         png = self._download_webpage(png_url, video_id, 'Downloading url information') 
 252         m3u8_url = _decrypt_url(png) 
 253         formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4') 
 254         self._sort_formats(formats) 
 264 class RTVETelevisionIE(InfoExtractor): 
 265     IE_NAME = 'rtve.es:television' 
 266     _VALID_URL = r'https?://(?:www\.)?rtve\.es/television/[^/]+/[^/]+/(?P<id>\d+).shtml' 
 269         'url': 'http://www.rtve.es/television/20160628/revolucion-del-movil/1364141.shtml', 
 273             'title': 'Documentos TV - La revolución del móvil', 
 274             'duration': 3496.948, 
 277             'skip_download': True, 
 281     def _real_extract(self, url): 
 282         page_id = self._match_id(url) 
 283         webpage = self._download_webpage(url, page_id) 
 285         alacarta_url = self._search_regex( 
 286             r'data-location="alacarta_videos
"[^<]+url":"(http://www\.rtve\.es/alacarta.+?)&', 
 287             webpage, 'alacarta url', default=None) 
 288         if alacarta_url is None: 
 289             raise ExtractorError( 
 290                 'The webpage doesn\'t contain any video', expected=True) 
 292         return self.url_result(alacarta_url, ie=RTVEALaCartaIE.ie_key())