]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/tvnow.py
   2 from __future__ 
import unicode_literals
 
   6 from .common 
import InfoExtractor
 
   7 from ..compat 
import compat_str
 
  19 class TVNowBaseIE(InfoExtractor
): 
  21         'id', 'title', 'free', 'geoblocked', 'articleLong', 'articleShort', 
  22         'broadcastStartDate', 'isDrm', 'duration', 'season', 'episode', 
  23         'manifest.dashclear', 'manifest.hlsclear', 'manifest.smoothclear', 
  24         'format.title', 'format.defaultImage169Format', 'format.defaultImage169Logo') 
  26     def _call_api(self
, path
, video_id
, query
): 
  27         return self
._download
_json
( 
  28             'https://api.tvnow.de/v3/' + path
, video_id
, query
=query
) 
  30     def _extract_video(self
, info
, display_id
): 
  31         video_id 
= compat_str(info
['id']) 
  35         for manifest_url 
in (info
.get('manifest') or {}).values(): 
  38             manifest_url 
= update_url_query(manifest_url
, {'filter': ''}) 
  39             path 
= self
._search
_regex
(r
'https?://[^/]+/(.+?)\.ism/', manifest_url
, 'path') 
  44             def url_repl(proto
, suffix
): 
  46                     r
'(?:hls|dash|hss)([.-])', proto 
+ r
'\1', re
.sub( 
  47                         r
'\.ism/(?:[^.]*\.(?:m3u8|mpd)|[Mm]anifest)', 
  48                         '.ism/' + suffix
, manifest_url
)) 
  50             formats 
= self
._extract
_mpd
_formats
( 
  51                 url_repl('dash', '.mpd'), video_id
, 
  52                 mpd_id
='dash', fatal
=False) 
  53             formats
.extend(self
._extract
_ism
_formats
( 
  54                 url_repl('hss', 'Manifest'), 
  55                 video_id
, ism_id
='mss', fatal
=False)) 
  56             formats
.extend(self
._extract
_m
3u8_formats
( 
  57                 url_repl('hls', '.m3u8'), video_id
, 'mp4', 
  58                 'm3u8_native', m3u8_id
='hls', fatal
=False)) 
  64                     'Video %s is DRM protected' % video_id
, expected
=True) 
  65             if info
.get('geoblocked'): 
  66                 raise self
.raise_geo_restricted() 
  67             if not info
.get('free', True): 
  69                     'Video %s is not available for free' % video_id
, expected
=True) 
  70         self
._sort
_formats
(formats
) 
  72         description 
= info
.get('articleLong') or info
.get('articleShort') 
  73         timestamp 
= parse_iso8601(info
.get('broadcastStartDate'), ' ') 
  74         duration 
= parse_duration(info
.get('duration')) 
  76         f 
= info
.get('format', {}) 
  79             'url': 'https://aistvnow-a.akamaihd.net/tvnow/movie/%s' % video_id
, 
  81         thumbnail 
= f
.get('defaultImage169Format') or f
.get('defaultImage169Logo') 
  89             'display_id': display_id
, 
  91             'description': description
, 
  92             'thumbnails': thumbnails
, 
  93             'timestamp': timestamp
, 
  95             'series': f
.get('title'), 
  96             'season_number': int_or_none(info
.get('season')), 
  97             'episode_number': int_or_none(info
.get('episode')), 
 103 class TVNowIE(TVNowBaseIE
): 
 104     _VALID_URL 
= r
'''(?x) 
 106                         (?:www\.)?tvnow\.(?:de|at|ch)/(?P<station>[^/]+)/ 
 108                         (?!(?:list|jahr)(?:/|$))(?P<id>[^/?\#&]+) 
 112     def suitable(cls
, url
): 
 113         return (False if TVNowNewIE
.suitable(url
) or TVNowSeasonIE
.suitable(url
) or TVNowAnnualIE
.suitable(url
) or TVNowShowIE
.suitable(url
) 
 114                 else super(TVNowIE
, cls
).suitable(url
)) 
 117         'url': 'https://www.tvnow.de/rtl2/grip-das-motormagazin/der-neue-porsche-911-gt-3/player', 
 120             'display_id': 'grip-das-motormagazin/der-neue-porsche-911-gt-3', 
 122             'title': 'Der neue Porsche 911 GT 3', 
 123             'description': 'md5:6143220c661f9b0aae73b245e5d898bb', 
 124             'timestamp': 1495994400, 
 125             'upload_date': '20170528', 
 127             'series': 'GRIP - Das Motormagazin', 
 129             'episode_number': 405, 
 130             'episode': 'Der neue Porsche 911 GT 3', 
 134         'url': 'https://www.tvnow.de/rtl2/armes-deutschland/episode-0008/player', 
 135         'only_matching': True, 
 138         'url': 'https://www.tvnow.de/nitro/alarm-fuer-cobra-11-die-autobahnpolizei/auf-eigene-faust-pilot/player', 
 139         'only_matching': True, 
 142         'url': 'https://www.tvnow.de/superrtl/die-lustigsten-schlamassel-der-welt/u-a-ketchup-effekt/player', 
 143         'only_matching': True, 
 146         'url': 'https://www.tvnow.de/ntv/startup-news/goetter-in-weiss/player', 
 147         'only_matching': True, 
 150         'url': 'https://www.tvnow.de/vox/auto-mobil/neues-vom-automobilmarkt-2017-11-19-17-00-00/player', 
 151         'only_matching': True, 
 154         'url': 'https://www.tvnow.de/rtlplus/op-ruft-dr-bruckner/die-vernaehte-frau/player', 
 155         'only_matching': True, 
 157         'url': 'https://www.tvnow.de/rtl2/grip-das-motormagazin/der-neue-porsche-911-gt-3', 
 158         'only_matching': True, 
 161     def _real_extract(self
, url
): 
 162         mobj 
= re
.match(self
._VALID
_URL
, url
) 
 163         display_id 
= '%s/%s' % mobj
.group(2, 3) 
 165         info 
= self
._call
_api
( 
 166             'movies/' + display_id
, display_id
, query
={ 
 167                 'fields': ','.join(self
._VIDEO
_FIELDS
), 
 170         return self
._extract
_video
(info
, display_id
) 
 173 class TVNowNewIE(InfoExtractor
): 
 174     _VALID_URL 
= r
'''(?x) 
 175                     (?P<base_url>https?:// 
 176                         (?:www\.)?tvnow\.(?:de|at|ch)/ 
 180                         episode-\d+-(?P<episode>[^/?$&]+)-(?P<id>\d+) 
 184         'url': 'https://www.tvnow.de/shows/grip-das-motormagazin-1669/2017-05/episode-405-der-neue-porsche-911-gt-3-331082', 
 185         'only_matching': True, 
 188     def _real_extract(self
, url
): 
 189         mobj 
= re
.match(self
._VALID
_URL
, url
) 
 190         base_url 
= re
.sub(r
'(?:shows|serien)', '_', mobj
.group('base_url')) 
 191         show
, episode 
= mobj
.group('show', 'episode') 
 192         return self
.url_result( 
 193             # Rewrite new URLs to the old format and use extraction via old API 
 194             # at api.tvnow.de as a loophole for bypassing premium content checks 
 195             '%s/%s/%s' % (base_url
, show
, episode
), 
 196             ie
=TVNowIE
.ie_key(), video_id
=mobj
.group('id')) 
 199 class TVNowNewBaseIE(InfoExtractor
): 
 200     def _call_api(self
, path
, video_id
, query
={}): 
 201         result 
= self
._download
_json
( 
 202             'https://apigw.tvnow.de/module/' + path
, video_id
, query
=query
) 
 203         error 
= result
.get('error') 
 205             raise ExtractorError( 
 206                 '%s said: %s' % (self
.IE_NAME
, error
), expected
=True) 
 211 TODO: new apigw.tvnow.de based version of TVNowIE. Replace old TVNowIE with it 
 212 when api.tvnow.de is shut down. This version can't bypass premium checks though. 
 213 class TVNowIE(TVNowNewBaseIE): 
 214     _VALID_URL = r'''(?x) 
 216                         (?:www\.)?tvnow\.(?:de|at|ch)/ 
 217                         (?:shows|serien)/[^/]+/ 
 219                         (?P<display_id>[^/?$&]+)-(?P<id>\d+) 
 223         # episode with annual navigation 
 224         'url': 'https://www.tvnow.de/shows/grip-das-motormagazin-1669/2017-05/episode-405-der-neue-porsche-911-gt-3-331082', 
 227             'display_id': 'grip-das-motormagazin/der-neue-porsche-911-gt-3', 
 229             'title': 'Der neue Porsche 911 GT 3', 
 230             'description': 'md5:6143220c661f9b0aae73b245e5d898bb', 
 231             'thumbnail': r're:^https?://.*\.jpg$', 
 232             'timestamp': 1495994400, 
 233             'upload_date': '20170528', 
 235             'series': 'GRIP - Das Motormagazin', 
 237             'episode_number': 405, 
 238             'episode': 'Der neue Porsche 911 GT 3', 
 241         # rtl2, episode with season navigation 
 242         'url': 'https://www.tvnow.de/shows/armes-deutschland-11471/staffel-3/episode-14-bernd-steht-seit-der-trennung-von-seiner-frau-allein-da-526124', 
 243         'only_matching': True, 
 246         'url': 'https://www.tvnow.de/serien/alarm-fuer-cobra-11-die-autobahnpolizei-1815/staffel-13/episode-5-auf-eigene-faust-pilot-366822', 
 247         'only_matching': True, 
 250         'url': 'https://www.tvnow.de/shows/die-lustigsten-schlamassel-der-welt-1221/staffel-2/episode-14-u-a-ketchup-effekt-364120', 
 251         'only_matching': True, 
 254         'url': 'https://www.tvnow.de/shows/startup-news-10674/staffel-2/episode-39-goetter-in-weiss-387630', 
 255         'only_matching': True, 
 258         'url': 'https://www.tvnow.de/shows/auto-mobil-174/2017-11/episode-46-neues-vom-automobilmarkt-2017-11-19-17-00-00-380072', 
 259         'only_matching': True, 
 261         'url': 'https://www.tvnow.de/shows/grip-das-motormagazin-1669/2017-05/episode-405-der-neue-porsche-911-gt-3-331082', 
 262         'only_matching': True, 
 265     def _extract_video(self, info, url, display_id): 
 266         config = info['config'] 
 267         source = config['source'] 
 269         video_id = compat_str(info.get('id') or source['videoId']) 
 270         title = source['title'].strip() 
 273         for manifest_url in (info.get('manifest') or {}).values(): 
 276             manifest_url = update_url_query(manifest_url, {'filter': ''}) 
 277             path = self._search_regex(r'https?://[^/]+/(.+?)\.ism/', manifest_url, 'path') 
 282             def url_repl(proto, suffix): 
 284                     r'(?:hls|dash|hss)([.-])', proto + r'\1', re.sub( 
 285                         r'\.ism/(?:[^.]*\.(?:m3u8|mpd)|[Mm]anifest)', 
 286                         '.ism/' + suffix, manifest_url)) 
 288             formats = self._extract_mpd_formats( 
 289                 url_repl('dash', '.mpd'), video_id, 
 290                 mpd_id='dash', fatal=False) 
 291             formats.extend(self._extract_ism_formats( 
 292                 url_repl('hss', 'Manifest'), 
 293                 video_id, ism_id='mss', fatal=False)) 
 294             formats.extend(self._extract_m3u8_formats( 
 295                 url_repl('hls', '.m3u8'), video_id, 'mp4', 
 296                 'm3u8_native', m3u8_id='hls', fatal=False)) 
 300             if try_get(info, lambda x: x['rights']['isDrm']): 
 301                 raise ExtractorError( 
 302                     'Video %s is DRM protected' % video_id, expected=True) 
 303             if try_get(config, lambda x: x['boards']['geoBlocking']['block']): 
 304                 raise self.raise_geo_restricted() 
 305             if not info.get('free', True): 
 306                 raise ExtractorError( 
 307                     'Video %s is not available for free' % video_id, expected=True) 
 308         self._sort_formats(formats) 
 310         description = source.get('description') 
 311         thumbnail = url_or_none(source.get('poster')) 
 312         timestamp = unified_timestamp(source.get('previewStart')) 
 313         duration = parse_duration(source.get('length')) 
 315         series = source.get('format') 
 316         season_number = int_or_none(self._search_regex( 
 317             r'staffel-(\d+)', url, 'season number', default=None)) 
 318         episode_number = int_or_none(self._search_regex( 
 319             r'episode-(\d+)', url, 'episode number', default=None)) 
 323             'display_id': display_id, 
 325             'description': description, 
 326             'thumbnail': thumbnail, 
 327             'timestamp': timestamp, 
 328             'duration': duration, 
 330             'season_number': season_number, 
 331             'episode_number': episode_number, 
 336     def _real_extract(self, url): 
 337         display_id, video_id = re.match(self._VALID_URL, url).groups() 
 338         info = self._call_api('player/' + video_id, video_id) 
 339         return self._extract_video(info, video_id, display_id) 
 343 class TVNowListBaseIE(TVNowNewBaseIE
): 
 344     _SHOW_VALID_URL 
= r
'''(?x) 
 347                             (?:www\.)?tvnow\.(?:de|at|ch)/(?:shows|serien)/ 
 348                             [^/?#&]+-(?P<show_id>\d+) 
 353     def suitable(cls
, url
): 
 354         return (False if TVNowNewIE
.suitable(url
) 
 355                 else super(TVNowListBaseIE
, cls
).suitable(url
)) 
 357     def _extract_items(self
, url
, show_id
, list_id
, query
): 
 358         items 
= self
._call
_api
( 
 359             'teaserrow/format/episode/' + show_id
, list_id
, 
 360             query
=query
)['items'] 
 364             if not isinstance(item
, dict): 
 366             item_url 
= urljoin(url
, item
.get('url')) 
 369             video_id 
= str_or_none(item
.get('id') or item
.get('videoId')) 
 370             item_title 
= item
.get('subheadline') or item
.get('text') 
 371             entries
.append(self
.url_result( 
 372                 item_url
, ie
=TVNowNewIE
.ie_key(), video_id
=video_id
, 
 373                 video_title
=item_title
)) 
 375         return self
.playlist_result(entries
, '%s/%s' % (show_id
, list_id
)) 
 378 class TVNowSeasonIE(TVNowListBaseIE
): 
 379     _VALID_URL 
= r
'%s/staffel-(?P<id>\d+)' % TVNowListBaseIE
._SHOW
_VALID
_URL
 
 381         'url': 'https://www.tvnow.de/serien/alarm-fuer-cobra-11-die-autobahnpolizei-1815/staffel-13', 
 385         'playlist_mincount': 22, 
 388     def _real_extract(self
, url
): 
 389         _
, show_id
, season_id 
= re
.match(self
._VALID
_URL
, url
).groups() 
 390         return self
._extract
_items
( 
 391             url
, show_id
, season_id
, {'season': season_id
}) 
 394 class TVNowAnnualIE(TVNowListBaseIE
): 
 395     _VALID_URL 
= r
'%s/(?P<year>\d{4})-(?P<month>\d{2})' % TVNowListBaseIE
._SHOW
_VALID
_URL
 
 397         'url': 'https://www.tvnow.de/shows/grip-das-motormagazin-1669/2017-05', 
 399             'id': '1669/2017-05', 
 401         'playlist_mincount': 2, 
 404     def _real_extract(self
, url
): 
 405         _
, show_id
, year
, month 
= re
.match(self
._VALID
_URL
, url
).groups() 
 406         return self
._extract
_items
( 
 407             url
, show_id
, '%s-%s' % (year
, month
), { 
 413 class TVNowShowIE(TVNowListBaseIE
): 
 414     _VALID_URL 
= TVNowListBaseIE
._SHOW
_VALID
_URL
 
 416         # annual navigationType 
 417         'url': 'https://www.tvnow.de/shows/grip-das-motormagazin-1669', 
 421         'playlist_mincount': 73, 
 423         # season navigationType 
 424         'url': 'https://www.tvnow.de/shows/armes-deutschland-11471', 
 428         'playlist_mincount': 3, 
 432     def suitable(cls
, url
): 
 433         return (False if TVNowNewIE
.suitable(url
) or TVNowSeasonIE
.suitable(url
) or TVNowAnnualIE
.suitable(url
) 
 434                 else super(TVNowShowIE
, cls
).suitable(url
)) 
 436     def _real_extract(self
, url
): 
 437         base_url
, show_id 
= re
.match(self
._VALID
_URL
, url
).groups() 
 439         result 
= self
._call
_api
( 
 440             'teaserrow/format/navigation/' + show_id
, show_id
) 
 442         items 
= result
['items'] 
 445         navigation 
= result
.get('navigationType') 
 446         if navigation 
== 'annual': 
 448                 if not isinstance(item
, dict): 
 450                 year 
= int_or_none(item
.get('year')) 
 453                 months 
= item
.get('months') 
 454                 if not isinstance(months
, list): 
 456                 for month_dict 
in months
: 
 457                     if not isinstance(month_dict
, dict) or not month_dict
: 
 459                     month_number 
= int_or_none(list(month_dict
.keys())[0]) 
 460                     if month_number 
is None: 
 462                     entries
.append(self
.url_result( 
 463                         '%s/%04d-%02d' % (base_url
, year
, month_number
), 
 464                         ie
=TVNowAnnualIE
.ie_key())) 
 465         elif navigation 
== 'season': 
 467                 if not isinstance(item
, dict): 
 469                 season_number 
= int_or_none(item
.get('season')) 
 470                 if season_number 
is None: 
 472                 entries
.append(self
.url_result( 
 473                     '%s/staffel-%d' % (base_url
, season_number
), 
 474                     ie
=TVNowSeasonIE
.ie_key())) 
 476             raise ExtractorError('Unknown navigationType') 
 478         return self
.playlist_result(entries
, show_id
)