]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/turner.py
   2 from __future__ 
import unicode_literals
 
   6 from .adobepass 
import AdobePassIE
 
   7 from ..compat 
import compat_str
 
  22 class TurnerBaseIE(AdobePassIE
): 
  23     _AKAMAI_SPE_TOKEN_CACHE 
= {} 
  25     def _extract_timestamp(self
, video_data
): 
  26         return int_or_none(xpath_attr(video_data
, 'dateCreated', 'uts')) 
  28     def _add_akamai_spe_token(self
, tokenizer_src
, video_url
, content_id
, ap_data
, custom_tokenizer_query
=None): 
  29         secure_path 
= self
._search
_regex
(r
'https?://[^/]+(.+/)', video_url
, 'secure path') + '*' 
  30         token 
= self
._AKAMAI
_SPE
_TOKEN
_CACHE
.get(secure_path
) 
  35             if custom_tokenizer_query
: 
  36                 query
.update(custom_tokenizer_query
) 
  38                 query
['videoId'] = content_id
 
  39             if ap_data
.get('auth_required'): 
  40                 query
['accessToken'] = self
._extract
_mvpd
_auth
(ap_data
['url'], content_id
, ap_data
['site_name'], ap_data
['site_name']) 
  41             auth 
= self
._download
_xml
( 
  42                 tokenizer_src
, content_id
, query
=query
) 
  43             error_msg 
= xpath_text(auth
, 'error/msg') 
  45                 raise ExtractorError(error_msg
, expected
=True) 
  46             token 
= xpath_text(auth
, 'token') 
  49             self
._AKAMAI
_SPE
_TOKEN
_CACHE
[secure_path
] = token
 
  50         return video_url 
+ '?hdnea=' + token
 
  52     def _extract_cvp_info(self
, data_src
, video_id
, path_data
={}, ap_data
={}): 
  53         video_data 
= self
._download
_xml
(data_src
, video_id
) 
  54         video_id 
= video_data
.attrib
['id'] 
  55         title 
= xpath_text(video_data
, 'headline', fatal
=True) 
  56         content_id 
= xpath_text(video_data
, 'contentId') or video_id
 
  57         # rtmp_src = xpath_text(video_data, 'akamai/src') 
  59         #     splited_rtmp_src = rtmp_src.split(',') 
  60         #     if len(splited_rtmp_src) == 2: 
  61         #         rtmp_src = splited_rtmp_src[1] 
  62         # aifp = xpath_text(video_data, 'akamai/aifp', default='') 
  67             r
'(?P<width>[0-9]+)x(?P<height>[0-9]+)(?:_(?P<bitrate>[0-9]+))?') 
  68         # Possible formats locations: files/file, files/groupFiles/files 
  70         for video_file 
in video_data
.findall('.//file'): 
  71             video_url 
= video_file
.text
.strip() 
  74             ext 
= determine_ext(video_url
) 
  75             if video_url
.startswith('/mp4:protected/'): 
  77                 # TODO Correct extraction for these files 
  78                 # protected_path_data = path_data.get('protected') 
  79                 # if not protected_path_data or not rtmp_src: 
  81                 # protected_path = self._search_regex( 
  82                 #     r'/mp4:(.+)\.[a-z0-9]', video_url, 'secure path') 
  83                 # auth = self._download_webpage( 
  84                 #     protected_path_data['tokenizer_src'], query={ 
  85                 #         'path': protected_path, 
  86                 #         'videoId': content_id, 
  89                 # token = xpath_text(auth, 'token') 
  92                 # video_url = rtmp_src + video_url + '?' + token 
  93             elif video_url
.startswith('/secure/'): 
  94                 secure_path_data 
= path_data
.get('secure') 
  95                 if not secure_path_data
: 
  97                 video_url 
= self
._add
_akamai
_spe
_token
( 
  98                     secure_path_data
['tokenizer_src'], 
  99                     secure_path_data
['media_src'] + video_url
, 
 101             elif not re
.match('https?://', video_url
): 
 102                 base_path_data 
= path_data
.get(ext
, path_data
.get('default', {})) 
 103                 media_src 
= base_path_data
.get('media_src') 
 106                 video_url 
= media_src 
+ video_url
 
 107             if video_url 
in urls
: 
 109             urls
.append(video_url
) 
 110             format_id 
= video_file
.get('bitrate') 
 112                 formats
.extend(self
._extract
_smil
_formats
( 
 113                     video_url
, video_id
, fatal
=False)) 
 115                 m3u8_formats 
= self
._extract
_m
3u8_formats
( 
 116                     video_url
, video_id
, 'mp4', 
 117                     m3u8_id
=format_id 
or 'hls', fatal
=False) 
 118                 if '/secure/' in video_url 
and '?hdnea=' in video_url
: 
 119                     for f 
in m3u8_formats
: 
 120                         f
['_seekable'] = False 
 121                 formats
.extend(m3u8_formats
) 
 123                 formats
.extend(self
._extract
_f
4m
_formats
( 
 124                     update_url_query(video_url
, {'hdcore': '3.7.0'}), 
 125                     video_id
, f4m_id
=format_id 
or 'hds', fatal
=False)) 
 128                     'format_id': format_id
, 
 132                 mobj 
= rex
.search(format_id 
+ video_url
) 
 135                         'width': int(mobj
.group('width')), 
 136                         'height': int(mobj
.group('height')), 
 137                         'tbr': int_or_none(mobj
.group('bitrate')), 
 139                 elif isinstance(format_id
, compat_str
): 
 140                     if format_id
.isdigit(): 
 141                         f
['tbr'] = int(format_id
) 
 143                         mobj 
= re
.match(r
'ios_(audio|[0-9]+)$', format_id
) 
 145                             if mobj
.group(1) == 'audio': 
 151                                 f
['tbr'] = int(mobj
.group(1)) 
 153         self
._sort
_formats
(formats
) 
 156         for source 
in video_data
.findall('closedCaptions/source'): 
 157             for track 
in source
.findall('track'): 
 158                 track_url 
= url_or_none(track
.get('url')) 
 159                 if not track_url 
or track_url
.endswith('/big'): 
 161                 lang 
= track
.get('lang') or track
.get('label') or 'en' 
 162                 subtitles
.setdefault(lang
, []).append({ 
 168                     }.get(source
.get('format')) 
 172             'id': image
.get('cut'), 
 174             'width': int_or_none(image
.get('width')), 
 175             'height': int_or_none(image
.get('height')), 
 176         } for image 
in video_data
.findall('images/image')] 
 178         is_live 
= xpath_text(video_data
, 'isLive') == 'true' 
 182             'title': self
._live
_title
(title
) if is_live 
else title
, 
 184             'subtitles': subtitles
, 
 185             'thumbnails': thumbnails
, 
 186             'thumbnail': xpath_text(video_data
, 'poster'), 
 187             'description': strip_or_none(xpath_text(video_data
, 'description')), 
 188             'duration': parse_duration(xpath_text(video_data
, 'length') or xpath_text(video_data
, 'trt')), 
 189             'timestamp': self
._extract
_timestamp
(video_data
), 
 190             'upload_date': xpath_attr(video_data
, 'metas', 'version'), 
 191             'series': xpath_text(video_data
, 'showTitle'), 
 192             'season_number': int_or_none(xpath_text(video_data
, 'seasonNumber')), 
 193             'episode_number': int_or_none(xpath_text(video_data
, 'episodeNumber')), 
 197     def _extract_ngtv_info(self
, media_id
, tokenizer_query
, ap_data
=None): 
 198         streams_data 
= self
._download
_json
( 
 199             'http://medium.ngtv.io/media/%s/tv' % media_id
, 
 200             media_id
)['media']['tv'] 
 204         for supported_type 
in ('unprotected', 'bulkaes'): 
 205             stream_data 
= streams_data
.get(supported_type
, {}) 
 206             m3u8_url 
= stream_data
.get('secureUrl') or stream_data
.get('url') 
 209             if stream_data
.get('playlistProtection') == 'spe': 
 210                 m3u8_url 
= self
._add
_akamai
_spe
_token
( 
 211                     'http://token.ngtv.io/token/token_spe', 
 212                     m3u8_url
, media_id
, ap_data 
or {}, tokenizer_query
) 
 213             formats
.extend(self
._extract
_m
3u8_formats
( 
 214                 m3u8_url
, media_id
, 'mp4', m3u8_id
='hls', fatal
=False)) 
 216             duration 
= float_or_none(stream_data
.get('totalRuntime')) 
 219                 for chapter 
in stream_data
.get('contentSegments', []): 
 220                     start_time 
= float_or_none(chapter
.get('start')) 
 221                     chapter_duration 
= float_or_none(chapter
.get('duration')) 
 222                     if start_time 
is None or chapter_duration 
is None: 
 225                         'start_time': start_time
, 
 226                         'end_time': start_time 
+ chapter_duration
, 
 228         self
._sort
_formats
(formats
) 
 232             'chapters': chapters
, 
 233             'duration': duration
,