]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/rtve.py
   2 from __future__ 
import unicode_literals
 
   8 from .common 
import InfoExtractor
 
  22 def _decrypt_url(png
): 
  23     encrypted_data 
= base64
.b64decode(png
.encode('utf-8')) 
  24     text_index 
= encrypted_data
.find(b
'tEXt') 
  25     text_chunk 
= encrypted_data
[text_index 
- 4:] 
  26     length 
= compat_struct_unpack('!I', text_chunk
[:4])[0] 
  27     # Use bytearray to get integers when iterating in both python 2.x and 3.x 
  28     data 
= bytearray(text_chunk
[8:8 + length
]) 
  29     data 
= [chr(b
) for b 
in data 
if b 
!= 0] 
  30     hash_index 
= data
.index('#') 
  31     alphabet_data 
= data
[:hash_index
] 
  32     url_data 
= data
[hash_index 
+ 1:] 
  37     for l 
in alphabet_data
: 
  47     for letter 
in url_data
: 
  64 class RTVEALaCartaIE(InfoExtractor
): 
  65     IE_NAME 
= 'rtve.es:alacarta' 
  66     IE_DESC 
= 'RTVE a la carta' 
  67     _VALID_URL 
= r
'https?://(?:www\.)?rtve\.es/(m/)?(alacarta/videos|filmoteca)/[^/]+/[^/]+/(?P<id>\d+)' 
  70         'url': 'http://www.rtve.es/alacarta/videos/balonmano/o-swiss-cup-masculina-final-espana-suecia/2491869/', 
  71         'md5': '1d49b7e1ca7a7502c56a4bf1b60f1b43', 
  75             'title': 'Balonmano - Swiss Cup masculina. Final: España-Suecia', 
  79         'note': 'Live stream', 
  80         'url': 'http://www.rtve.es/alacarta/videos/television/24h-live/1694255/', 
  86         'skip': 'The f4m manifest can\'t be used yet', 
  88         'url': 'http://www.rtve.es/m/alacarta/videos/cuentame-como-paso/cuentame-como-paso-t16-ultimo-minuto-nuestra-vida-capitulo-276/2969138/?media=tve', 
  89         'only_matching': True, 
  91         'url': 'http://www.rtve.es/filmoteca/no-do/not-1-introduccion-primer-noticiario-espanol/1465256/', 
  92         'only_matching': True, 
  95     def _real_initialize(self
): 
  96         user_agent_b64 
= base64
.b64encode(std_headers
['User-Agent'].encode('utf-8')).decode('utf-8') 
  97         manager_info 
= self
._download
_json
( 
  98             'http://www.rtve.es/odin/loki/' + user_agent_b64
, 
  99             None, 'Fetching manager info') 
 100         self
._manager 
= manager_info
['manager'] 
 102     def _real_extract(self
, url
): 
 103         mobj 
= re
.match(self
._VALID
_URL
, url
) 
 104         video_id 
= mobj
.group('id') 
 105         info 
= self
._download
_json
( 
 106             'http://www.rtve.es/api/videos/%s/config/alacarta_videos.json' % video_id
, 
 107             video_id
)['page']['items'][0] 
 108         if info
['state'] == 'DESPU': 
 109             raise ExtractorError('The video is no longer available', expected
=True) 
 110         png_url 
= 'http://www.rtve.es/ztnr/movil/thumbnail/%s/videos/%s.png' % (self
._manager
, video_id
) 
 111         png_request 
= sanitized_Request(png_url
) 
 112         png_request
.add_header('Referer', url
) 
 113         png 
= self
._download
_webpage
(png_request
, video_id
, 'Downloading url information') 
 114         video_url 
= _decrypt_url(png
) 
 115         if not video_url
.endswith('.f4m'): 
 116             if '?' not in video_url
: 
 117                 video_url 
= video_url
.replace('resources/', 'auth/resources/') 
 118             video_url 
= video_url
.replace('.net.rtve', '.multimedia.cdn.rtve') 
 121         if info
.get('sbtFile') is not None: 
 122             subtitles 
= self
.extract_subtitles(video_id
, info
['sbtFile']) 
 126             'title': info
['title'], 
 128             'thumbnail': info
.get('image'), 
 130             'subtitles': subtitles
, 
 131             'duration': float_or_none(info
.get('duration'), scale
=1000), 
 134     def _get_subtitles(self
, video_id
, sub_file
): 
 135         subs 
= self
._download
_json
( 
 136             sub_file 
+ '.json', video_id
, 
 137             'Downloading subtitles info')['page']['items'] 
 139             (s
['lang'], [{'ext': 'vtt', 'url': s
['src']}]) 
 143 class RTVEInfantilIE(InfoExtractor
): 
 144     IE_NAME 
= 'rtve.es:infantil' 
 145     IE_DESC 
= 'RTVE infantil' 
 146     _VALID_URL 
= r
'https?://(?:www\.)?rtve\.es/infantil/serie/(?P<show>[^/]*)/video/(?P<short_title>[^/]*)/(?P<id>[0-9]+)/' 
 149         'url': 'http://www.rtve.es/infantil/serie/cleo/video/maneras-vivir/3040283/', 
 150         'md5': '915319587b33720b8e0357caaa6617e6', 
 154             'title': 'Maneras de vivir', 
 155             'thumbnail': 'http://www.rtve.es/resources/jpg/6/5/1426182947956.JPG', 
 160     def _real_extract(self
, url
): 
 161         video_id 
= self
._match
_id
(url
) 
 162         info 
= self
._download
_json
( 
 163             'http://www.rtve.es/api/videos/%s/config/alacarta_videos.json' % video_id
, 
 164             video_id
)['page']['items'][0] 
 166         webpage 
= self
._download
_webpage
(url
, video_id
) 
 167         vidplayer_id 
= self
._search
_regex
( 
 168             r
' id="vidplayer([0-9]+)"', webpage
, 'internal video ID') 
 170         png_url 
= 'http://www.rtve.es/ztnr/movil/thumbnail/default/videos/%s.png' % vidplayer_id
 
 171         png 
= self
._download
_webpage
(png_url
, video_id
, 'Downloading url information') 
 172         video_url 
= _decrypt_url(png
) 
 177             'title': info
['title'], 
 179             'thumbnail': info
.get('image'), 
 180             'duration': float_or_none(info
.get('duration'), scale
=1000), 
 184 class RTVELiveIE(InfoExtractor
): 
 185     IE_NAME 
= 'rtve.es:live' 
 186     IE_DESC 
= 'RTVE.es live streams' 
 187     _VALID_URL 
= r
'https?://(?:www\.)?rtve\.es/directo/(?P<id>[a-zA-Z0-9-]+)' 
 190         'url': 'http://www.rtve.es/directo/la-1/', 
 194             'title': 're:^La 1 [0-9]{4}-[0-9]{2}-[0-9]{2}Z[0-9]{6}$', 
 197             'skip_download': 'live stream', 
 201     def _real_extract(self
, url
): 
 202         mobj 
= re
.match(self
._VALID
_URL
, url
) 
 203         start_time 
= time
.gmtime() 
 204         video_id 
= mobj
.group('id') 
 206         webpage 
= self
._download
_webpage
(url
, video_id
) 
 207         title 
= remove_end(self
._og
_search
_title
(webpage
), ' en directo en RTVE.es') 
 208         title 
= remove_start(title
, 'Estoy viendo ') 
 209         title 
+= ' ' + time
.strftime('%Y-%m-%dZ%H%M%S', start_time
) 
 211         vidplayer_id 
= self
._search
_regex
( 
 212             (r
'playerId=player([0-9]+)', 
 213              r
'class=["\'].*?
\blive
_mod
\b.*?
["\'][^>]+data-assetid=["\'](\d
+)', 
 214              r'data
-id=["\'](\d+)'), 
 215             webpage, 'internal video ID') 
 216         png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/amonet/videos/%s.png' % vidplayer_id 
 217         png = self._download_webpage(png_url, video_id, 'Downloading url information') 
 218         m3u8_url = _decrypt_url(png) 
 219         formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4') 
 220         self._sort_formats(formats) 
 230 class RTVETelevisionIE(InfoExtractor): 
 231     IE_NAME = 'rtve.es:television' 
 232     _VALID_URL = r'https?://(?:www\.)?rtve\.es/television/[^/]+/[^/]+/(?P<id>\d+).shtml' 
 235         'url': 'http://www.rtve.es/television/20160628/revolucion-del-movil/1364141.shtml', 
 239             'title': 'Documentos TV - La revolución del móvil', 
 240             'duration': 3496.948, 
 243             'skip_download': True, 
 247     def _real_extract(self, url): 
 248         page_id = self._match_id(url) 
 249         webpage = self._download_webpage(url, page_id) 
 251         alacarta_url = self._search_regex( 
 252             r'data-location="alacarta_videos
"[^<]+url":"(http://www\.rtve\.es/alacarta.+?)&', 
 253             webpage, 'alacarta url', default=None) 
 254         if alacarta_url is None: 
 255             raise ExtractorError( 
 256                 'The webpage doesn\'t contain any video', expected=True) 
 258         return self.url_result(alacarta_url, ie=RTVEALaCartaIE.ie_key())