]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/tvnow.py
   2 from __future__ 
import unicode_literals
 
   6 from .common 
import InfoExtractor
 
   7 from ..compat 
import compat_str
 
  19 class TVNowBaseIE(InfoExtractor
): 
  21         'id', 'title', 'free', 'geoblocked', 'articleLong', 'articleShort', 
  22         'broadcastStartDate', 'isDrm', 'duration', 'season', 'episode', 
  23         'manifest.dashclear', 'manifest.hlsclear', 'manifest.smoothclear', 
  24         'format.title', 'format.defaultImage169Format', 'format.defaultImage169Logo') 
  26     def _call_api(self
, path
, video_id
, query
): 
  27         return self
._download
_json
( 
  28             'https://api.tvnow.de/v3/' + path
, video_id
, query
=query
) 
  30     def _extract_video(self
, info
, display_id
): 
  31         video_id 
= compat_str(info
['id']) 
  35         for manifest_url 
in (info
.get('manifest') or {}).values(): 
  38             manifest_url 
= update_url_query(manifest_url
, {'filter': ''}) 
  39             path 
= self
._search
_regex
(r
'https?://[^/]+/(.+?)\.ism/', manifest_url
, 'path') 
  44             def url_repl(proto
, suffix
): 
  46                     r
'(?:hls|dash|hss)([.-])', proto 
+ r
'\1', re
.sub( 
  47                         r
'\.ism/(?:[^.]*\.(?:m3u8|mpd)|[Mm]anifest)', 
  48                         '.ism/' + suffix
, manifest_url
)) 
  50             def make_urls(proto
, suffix
): 
  51                 urls 
= [url_repl(proto
, suffix
)] 
  52                 hd_url 
= urls
[0].replace('/manifest/', '/ngvod/') 
  57             for man_url 
in make_urls('dash', '.mpd'): 
  58                 formats 
= self
._extract
_mpd
_formats
( 
  59                     man_url
, video_id
, mpd_id
='dash', fatal
=False) 
  60             for man_url 
in make_urls('hss', 'Manifest'): 
  61                 formats
.extend(self
._extract
_ism
_formats
( 
  62                     man_url
, video_id
, ism_id
='mss', fatal
=False)) 
  63             for man_url 
in make_urls('hls', '.m3u8'): 
  64                 formats
.extend(self
._extract
_m
3u8_formats
( 
  65                     man_url
, video_id
, 'mp4', 'm3u8_native', m3u8_id
='hls', 
  72                     'Video %s is DRM protected' % video_id
, expected
=True) 
  73             if info
.get('geoblocked'): 
  74                 raise self
.raise_geo_restricted() 
  75             if not info
.get('free', True): 
  77                     'Video %s is not available for free' % video_id
, expected
=True) 
  78         self
._sort
_formats
(formats
) 
  80         description 
= info
.get('articleLong') or info
.get('articleShort') 
  81         timestamp 
= parse_iso8601(info
.get('broadcastStartDate'), ' ') 
  82         duration 
= parse_duration(info
.get('duration')) 
  84         f 
= info
.get('format', {}) 
  87             'url': 'https://aistvnow-a.akamaihd.net/tvnow/movie/%s' % video_id
, 
  89         thumbnail 
= f
.get('defaultImage169Format') or f
.get('defaultImage169Logo') 
  97             'display_id': display_id
, 
  99             'description': description
, 
 100             'thumbnails': thumbnails
, 
 101             'timestamp': timestamp
, 
 102             'duration': duration
, 
 103             'series': f
.get('title'), 
 104             'season_number': int_or_none(info
.get('season')), 
 105             'episode_number': int_or_none(info
.get('episode')), 
 111 class TVNowIE(TVNowBaseIE
): 
 112     _VALID_URL 
= r
'''(?x) 
 114                         (?:www\.)?tvnow\.(?:de|at|ch)/(?P<station>[^/]+)/ 
 116                         (?!(?:list|jahr)(?:/|$))(?P<id>[^/?\#&]+) 
 120     def suitable(cls
, url
): 
 121         return (False if TVNowNewIE
.suitable(url
) or TVNowSeasonIE
.suitable(url
) or TVNowAnnualIE
.suitable(url
) or TVNowShowIE
.suitable(url
) 
 122                 else super(TVNowIE
, cls
).suitable(url
)) 
 125         'url': 'https://www.tvnow.de/rtl2/grip-das-motormagazin/der-neue-porsche-911-gt-3/player', 
 128             'display_id': 'grip-das-motormagazin/der-neue-porsche-911-gt-3', 
 130             'title': 'Der neue Porsche 911 GT 3', 
 131             'description': 'md5:6143220c661f9b0aae73b245e5d898bb', 
 132             'timestamp': 1495994400, 
 133             'upload_date': '20170528', 
 135             'series': 'GRIP - Das Motormagazin', 
 137             'episode_number': 405, 
 138             'episode': 'Der neue Porsche 911 GT 3', 
 142         'url': 'https://www.tvnow.de/rtl2/armes-deutschland/episode-0008/player', 
 143         'only_matching': True, 
 146         'url': 'https://www.tvnow.de/nitro/alarm-fuer-cobra-11-die-autobahnpolizei/auf-eigene-faust-pilot/player', 
 147         'only_matching': True, 
 150         'url': 'https://www.tvnow.de/superrtl/die-lustigsten-schlamassel-der-welt/u-a-ketchup-effekt/player', 
 151         'only_matching': True, 
 154         'url': 'https://www.tvnow.de/ntv/startup-news/goetter-in-weiss/player', 
 155         'only_matching': True, 
 158         'url': 'https://www.tvnow.de/vox/auto-mobil/neues-vom-automobilmarkt-2017-11-19-17-00-00/player', 
 159         'only_matching': True, 
 162         'url': 'https://www.tvnow.de/rtlplus/op-ruft-dr-bruckner/die-vernaehte-frau/player', 
 163         'only_matching': True, 
 165         'url': 'https://www.tvnow.de/rtl2/grip-das-motormagazin/der-neue-porsche-911-gt-3', 
 166         'only_matching': True, 
 169     def _real_extract(self
, url
): 
 170         mobj 
= re
.match(self
._VALID
_URL
, url
) 
 171         display_id 
= '%s/%s' % mobj
.group(2, 3) 
 173         info 
= self
._call
_api
( 
 174             'movies/' + display_id
, display_id
, query
={ 
 175                 'fields': ','.join(self
._VIDEO
_FIELDS
), 
 178         return self
._extract
_video
(info
, display_id
) 
 181 class TVNowNewIE(InfoExtractor
): 
 182     _VALID_URL 
= r
'''(?x) 
 183                     (?P<base_url>https?:// 
 184                         (?:www\.)?tvnow\.(?:de|at|ch)/ 
 188                         episode-\d+-(?P<episode>[^/?$&]+)-(?P<id>\d+) 
 192         'url': 'https://www.tvnow.de/shows/grip-das-motormagazin-1669/2017-05/episode-405-der-neue-porsche-911-gt-3-331082', 
 193         'only_matching': True, 
 196     def _real_extract(self
, url
): 
 197         mobj 
= re
.match(self
._VALID
_URL
, url
) 
 198         base_url 
= re
.sub(r
'(?:shows|serien)', '_', mobj
.group('base_url')) 
 199         show
, episode 
= mobj
.group('show', 'episode') 
 200         return self
.url_result( 
 201             # Rewrite new URLs to the old format and use extraction via old API 
 202             # at api.tvnow.de as a loophole for bypassing premium content checks 
 203             '%s/%s/%s' % (base_url
, show
, episode
), 
 204             ie
=TVNowIE
.ie_key(), video_id
=mobj
.group('id')) 
 207 class TVNowNewBaseIE(InfoExtractor
): 
 208     def _call_api(self
, path
, video_id
, query
={}): 
 209         result 
= self
._download
_json
( 
 210             'https://apigw.tvnow.de/module/' + path
, video_id
, query
=query
) 
 211         error 
= result
.get('error') 
 213             raise ExtractorError( 
 214                 '%s said: %s' % (self
.IE_NAME
, error
), expected
=True) 
 219 TODO: new apigw.tvnow.de based version of TVNowIE. Replace old TVNowIE with it 
 220 when api.tvnow.de is shut down. This version can't bypass premium checks though. 
 221 class TVNowIE(TVNowNewBaseIE): 
 222     _VALID_URL = r'''(?x) 
 224                         (?:www\.)?tvnow\.(?:de|at|ch)/ 
 225                         (?:shows|serien)/[^/]+/ 
 227                         (?P<display_id>[^/?$&]+)-(?P<id>\d+) 
 231         # episode with annual navigation 
 232         'url': 'https://www.tvnow.de/shows/grip-das-motormagazin-1669/2017-05/episode-405-der-neue-porsche-911-gt-3-331082', 
 235             'display_id': 'grip-das-motormagazin/der-neue-porsche-911-gt-3', 
 237             'title': 'Der neue Porsche 911 GT 3', 
 238             'description': 'md5:6143220c661f9b0aae73b245e5d898bb', 
 239             'thumbnail': r're:^https?://.*\.jpg$', 
 240             'timestamp': 1495994400, 
 241             'upload_date': '20170528', 
 243             'series': 'GRIP - Das Motormagazin', 
 245             'episode_number': 405, 
 246             'episode': 'Der neue Porsche 911 GT 3', 
 249         # rtl2, episode with season navigation 
 250         'url': 'https://www.tvnow.de/shows/armes-deutschland-11471/staffel-3/episode-14-bernd-steht-seit-der-trennung-von-seiner-frau-allein-da-526124', 
 251         'only_matching': True, 
 254         'url': 'https://www.tvnow.de/serien/alarm-fuer-cobra-11-die-autobahnpolizei-1815/staffel-13/episode-5-auf-eigene-faust-pilot-366822', 
 255         'only_matching': True, 
 258         'url': 'https://www.tvnow.de/shows/die-lustigsten-schlamassel-der-welt-1221/staffel-2/episode-14-u-a-ketchup-effekt-364120', 
 259         'only_matching': True, 
 262         'url': 'https://www.tvnow.de/shows/startup-news-10674/staffel-2/episode-39-goetter-in-weiss-387630', 
 263         'only_matching': True, 
 266         'url': 'https://www.tvnow.de/shows/auto-mobil-174/2017-11/episode-46-neues-vom-automobilmarkt-2017-11-19-17-00-00-380072', 
 267         'only_matching': True, 
 269         'url': 'https://www.tvnow.de/shows/grip-das-motormagazin-1669/2017-05/episode-405-der-neue-porsche-911-gt-3-331082', 
 270         'only_matching': True, 
 273     def _extract_video(self, info, url, display_id): 
 274         config = info['config'] 
 275         source = config['source'] 
 277         video_id = compat_str(info.get('id') or source['videoId']) 
 278         title = source['title'].strip() 
 281         for manifest_url in (info.get('manifest') or {}).values(): 
 284             manifest_url = update_url_query(manifest_url, {'filter': ''}) 
 285             path = self._search_regex(r'https?://[^/]+/(.+?)\.ism/', manifest_url, 'path') 
 290             def url_repl(proto, suffix): 
 292                     r'(?:hls|dash|hss)([.-])', proto + r'\1', re.sub( 
 293                         r'\.ism/(?:[^.]*\.(?:m3u8|mpd)|[Mm]anifest)', 
 294                         '.ism/' + suffix, manifest_url)) 
 296             formats = self._extract_mpd_formats( 
 297                 url_repl('dash', '.mpd'), video_id, 
 298                 mpd_id='dash', fatal=False) 
 299             formats.extend(self._extract_ism_formats( 
 300                 url_repl('hss', 'Manifest'), 
 301                 video_id, ism_id='mss', fatal=False)) 
 302             formats.extend(self._extract_m3u8_formats( 
 303                 url_repl('hls', '.m3u8'), video_id, 'mp4', 
 304                 'm3u8_native', m3u8_id='hls', fatal=False)) 
 308             if try_get(info, lambda x: x['rights']['isDrm']): 
 309                 raise ExtractorError( 
 310                     'Video %s is DRM protected' % video_id, expected=True) 
 311             if try_get(config, lambda x: x['boards']['geoBlocking']['block']): 
 312                 raise self.raise_geo_restricted() 
 313             if not info.get('free', True): 
 314                 raise ExtractorError( 
 315                     'Video %s is not available for free' % video_id, expected=True) 
 316         self._sort_formats(formats) 
 318         description = source.get('description') 
 319         thumbnail = url_or_none(source.get('poster')) 
 320         timestamp = unified_timestamp(source.get('previewStart')) 
 321         duration = parse_duration(source.get('length')) 
 323         series = source.get('format') 
 324         season_number = int_or_none(self._search_regex( 
 325             r'staffel-(\d+)', url, 'season number', default=None)) 
 326         episode_number = int_or_none(self._search_regex( 
 327             r'episode-(\d+)', url, 'episode number', default=None)) 
 331             'display_id': display_id, 
 333             'description': description, 
 334             'thumbnail': thumbnail, 
 335             'timestamp': timestamp, 
 336             'duration': duration, 
 338             'season_number': season_number, 
 339             'episode_number': episode_number, 
 344     def _real_extract(self, url): 
 345         display_id, video_id = re.match(self._VALID_URL, url).groups() 
 346         info = self._call_api('player/' + video_id, video_id) 
 347         return self._extract_video(info, video_id, display_id) 
 351 class TVNowListBaseIE(TVNowNewBaseIE
): 
 352     _SHOW_VALID_URL 
= r
'''(?x) 
 355                             (?:www\.)?tvnow\.(?:de|at|ch)/(?:shows|serien)/ 
 356                             [^/?#&]+-(?P<show_id>\d+) 
 361     def suitable(cls
, url
): 
 362         return (False if TVNowNewIE
.suitable(url
) 
 363                 else super(TVNowListBaseIE
, cls
).suitable(url
)) 
 365     def _extract_items(self
, url
, show_id
, list_id
, query
): 
 366         items 
= self
._call
_api
( 
 367             'teaserrow/format/episode/' + show_id
, list_id
, 
 368             query
=query
)['items'] 
 372             if not isinstance(item
, dict): 
 374             item_url 
= urljoin(url
, item
.get('url')) 
 377             video_id 
= str_or_none(item
.get('id') or item
.get('videoId')) 
 378             item_title 
= item
.get('subheadline') or item
.get('text') 
 379             entries
.append(self
.url_result( 
 380                 item_url
, ie
=TVNowNewIE
.ie_key(), video_id
=video_id
, 
 381                 video_title
=item_title
)) 
 383         return self
.playlist_result(entries
, '%s/%s' % (show_id
, list_id
)) 
 386 class TVNowSeasonIE(TVNowListBaseIE
): 
 387     _VALID_URL 
= r
'%s/staffel-(?P<id>\d+)' % TVNowListBaseIE
._SHOW
_VALID
_URL
 
 389         'url': 'https://www.tvnow.de/serien/alarm-fuer-cobra-11-die-autobahnpolizei-1815/staffel-13', 
 393         'playlist_mincount': 22, 
 396     def _real_extract(self
, url
): 
 397         _
, show_id
, season_id 
= re
.match(self
._VALID
_URL
, url
).groups() 
 398         return self
._extract
_items
( 
 399             url
, show_id
, season_id
, {'season': season_id
}) 
 402 class TVNowAnnualIE(TVNowListBaseIE
): 
 403     _VALID_URL 
= r
'%s/(?P<year>\d{4})-(?P<month>\d{2})' % TVNowListBaseIE
._SHOW
_VALID
_URL
 
 405         'url': 'https://www.tvnow.de/shows/grip-das-motormagazin-1669/2017-05', 
 407             'id': '1669/2017-05', 
 409         'playlist_mincount': 2, 
 412     def _real_extract(self
, url
): 
 413         _
, show_id
, year
, month 
= re
.match(self
._VALID
_URL
, url
).groups() 
 414         return self
._extract
_items
( 
 415             url
, show_id
, '%s-%s' % (year
, month
), { 
 421 class TVNowShowIE(TVNowListBaseIE
): 
 422     _VALID_URL 
= TVNowListBaseIE
._SHOW
_VALID
_URL
 
 424         # annual navigationType 
 425         'url': 'https://www.tvnow.de/shows/grip-das-motormagazin-1669', 
 429         'playlist_mincount': 73, 
 431         # season navigationType 
 432         'url': 'https://www.tvnow.de/shows/armes-deutschland-11471', 
 436         'playlist_mincount': 3, 
 440     def suitable(cls
, url
): 
 441         return (False if TVNowNewIE
.suitable(url
) or TVNowSeasonIE
.suitable(url
) or TVNowAnnualIE
.suitable(url
) 
 442                 else super(TVNowShowIE
, cls
).suitable(url
)) 
 444     def _real_extract(self
, url
): 
 445         base_url
, show_id 
= re
.match(self
._VALID
_URL
, url
).groups() 
 447         result 
= self
._call
_api
( 
 448             'teaserrow/format/navigation/' + show_id
, show_id
) 
 450         items 
= result
['items'] 
 453         navigation 
= result
.get('navigationType') 
 454         if navigation 
== 'annual': 
 456                 if not isinstance(item
, dict): 
 458                 year 
= int_or_none(item
.get('year')) 
 461                 months 
= item
.get('months') 
 462                 if not isinstance(months
, list): 
 464                 for month_dict 
in months
: 
 465                     if not isinstance(month_dict
, dict) or not month_dict
: 
 467                     month_number 
= int_or_none(list(month_dict
.keys())[0]) 
 468                     if month_number 
is None: 
 470                     entries
.append(self
.url_result( 
 471                         '%s/%04d-%02d' % (base_url
, year
, month_number
), 
 472                         ie
=TVNowAnnualIE
.ie_key())) 
 473         elif navigation 
== 'season': 
 475                 if not isinstance(item
, dict): 
 477                 season_number 
= int_or_none(item
.get('season')) 
 478                 if season_number 
is None: 
 480                 entries
.append(self
.url_result( 
 481                     '%s/staffel-%d' % (base_url
, season_number
), 
 482                     ie
=TVNowSeasonIE
.ie_key())) 
 484             raise ExtractorError('Unknown navigationType') 
 486         return self
.playlist_result(entries
, show_id
)