2 from __future__ 
import unicode_literals
 
   6 from .common 
import InfoExtractor
 
   7 from ..compat 
import compat_str
 
  22 class ZDFBaseIE(InfoExtractor
): 
  23     def _call_api(self
, url
, player
, referrer
, video_id
, item
): 
  24         return self
._download
_json
( 
  25             url
, video_id
, 'Downloading JSON %s' % item
, 
  28                 'Api-Auth': 'Bearer %s' % player
['apiToken'], 
  31     def _extract_player(self
, webpage
, video_id
, fatal
=True): 
  32         return self
._parse
_json
( 
  34                 r
'(?s)data-zdfplayer-jsb=(["\'])(?P
<json
>{.+?
})\
1', webpage, 
  35                 'player JSON
', default='{}' if not fatal else NO_DEFAULT, 
  40 class ZDFIE(ZDFBaseIE): 
  41     _VALID_URL = r'https?
://www\
.zdf\
.de
/(?
:[^
/]+/)*(?P
<id>[^
/?
]+)\
.html
' 
  42     _QUALITIES = ('auto
', 'low
', 'med
', 'high
', 'veryhigh
') 
  45         'url
': 'https
://www
.zdf
.de
/service
-und
-hilfe
/die
-neue
-zdf
-mediathek
/zdfmediathek
-trailer
-100.html
', 
  47             'id': 'zdfmediathek
-trailer
-100', 
  49             'title
': 'Die neue ZDFmediathek
', 
  50             'description
': 'md5
:3003d36487fb9a5ea2d1ff60beb55e8d
', 
  52             'timestamp
': 1477627200, 
  53             'upload_date
': '20161028', 
  56         'url
': 'https
://www
.zdf
.de
/filme
/taunuskrimi
/die
-lebenden
-und
-die
-toten
-1---ein
-taunuskrimi
-100.html
', 
  57         'only_matching
': True, 
  59         'url
': 'https
://www
.zdf
.de
/dokumentation
/planet
-e
/planet
-e
-uebersichtsseite
-weitere
-dokumentationen
-von
-planet
-e
-100.html
', 
  60         'only_matching
': True, 
  64     def _extract_subtitles(src): 
  66         for caption in try_get(src, lambda x: x['captions
'], list) or []: 
  67             subtitle_url = caption.get('uri
') 
  68             if subtitle_url and isinstance(subtitle_url, compat_str): 
  69                 lang = caption.get('language
', 'deu
') 
  70                 subtitles.setdefault(lang, []).append({ 
  75     def _extract_format(self, video_id, formats, format_urls, meta): 
  76         format_url = meta.get('url
') 
  77         if not format_url or not isinstance(format_url, compat_str): 
  79         if format_url in format_urls: 
  81         format_urls.add(format_url) 
  82         mime_type = meta.get('mimeType
') 
  83         ext = determine_ext(format_url) 
  84         if mime_type == 'application
/x
-mpegURL
' or ext == 'm3u8
': 
  85             formats.extend(self._extract_m3u8_formats( 
  86                 format_url, video_id, 'mp4
', m3u8_id='hls
', 
  87                 entry_protocol='m3u8_native
', fatal=False)) 
  88         elif mime_type == 'application
/f4m
+xml
' or ext == 'f4m
': 
  89             formats.extend(self._extract_f4m_formats( 
  90                 update_url_query(format_url, {'hdcore
': '3.7.0'}), video_id, f4m_id='hds
', fatal=False)) 
  92             f = parse_codecs(meta.get('mimeCodec
')) 
  94             for p in (meta.get('type'), meta.get('quality
')): 
  95                 if p and isinstance(p, compat_str): 
  99                 'format_id
': '-'.join(format_id), 
 100                 'format_note
': meta.get('quality
'), 
 101                 'language
': meta.get('language
'), 
 102                 'quality
': qualities(self._QUALITIES)(meta.get('quality
')), 
 107     def _extract_entry(self, url, player, content, video_id): 
 108         title = content.get('title
') or content['teaserHeadline
'] 
 110         t = content['mainVideoContent
']['http
://zdf
.de
/rels
/target
'] 
 112         ptmd_path = t.get('http
://zdf
.de
/rels
/streams
/ptmd
') 
 116                 'http
://zdf
.de
/rels
/streams
/ptmd
-template
'].replace( 
 117                 '{playerId}
', 'portal
') 
 119         ptmd = self._call_api( 
 120             urljoin(url, ptmd_path), player, url, video_id, 'metadata
') 
 124         for p in ptmd['priorityList
']: 
 125             formitaeten = p.get('formitaeten
') 
 126             if not isinstance(formitaeten, list): 
 128             for f in formitaeten: 
 129                 f_qualities = f.get('qualities
') 
 130                 if not isinstance(f_qualities, list): 
 132                 for quality in f_qualities: 
 133                     tracks = try_get(quality, lambda x: x['audio
']['tracks
'], list) 
 137                         self._extract_format( 
 138                             video_id, formats, track_uris, { 
 139                                 'url
': track.get('uri
'), 
 140                                 'type': f.get('type'), 
 141                                 'mimeType
': f.get('mimeType
'), 
 142                                 'quality
': quality.get('quality
'), 
 143                                 'language
': track.get('language
'), 
 145         self._sort_formats(formats) 
 149             content, lambda x: x['teaserImageRef
']['layouts
'], dict) 
 151             for layout_key, layout_url in layouts.items(): 
 152                 if not isinstance(layout_url, compat_str): 
 156                     'format_id
': layout_key, 
 158                 mobj = re.search(r'(?P
<width
>\d
+)x(?P
<height
>\d
+)', layout_key) 
 161                         'width
': int(mobj.group('width
')), 
 162                         'height
': int(mobj.group('height
')), 
 164                 thumbnails.append(thumbnail) 
 169             'description
': content.get('leadParagraph
') or content.get('teasertext
'), 
 170             'duration
': int_or_none(t.get('duration
')), 
 171             'timestamp
': unified_timestamp(content.get('editorialDate
')), 
 172             'thumbnails
': thumbnails, 
 173             'subtitles
': self._extract_subtitles(ptmd), 
 177     def _extract_regular(self, url, player, video_id): 
 178         content = self._call_api( 
 179             player['content
'], player, url, video_id, 'content
') 
 180         return self._extract_entry(player['content
'], player, content, video_id) 
 182     def _extract_mobile(self, video_id): 
 183         document = self._download_json( 
 184             'https
://zdf
-cdn
.live
.cellular
.de
/mediathekV2
/document
/%s' % video_id, 
 185             video_id)['document
'] 
 187         title = document['titel
'] 
 191         for f in document['formitaeten
']: 
 192             self._extract_format(video_id, formats, format_urls, f) 
 193         self._sort_formats(formats) 
 196         teaser_bild = document.get('teaserBild
') 
 197         if isinstance(teaser_bild, dict): 
 198             for thumbnail_key, thumbnail in teaser_bild.items(): 
 199                 thumbnail_url = try_get( 
 200                     thumbnail, lambda x: x['url
'], compat_str) 
 203                         'url
': thumbnail_url, 
 205                         'width
': int_or_none(thumbnail.get('width
')), 
 206                         'height
': int_or_none(thumbnail.get('height
')), 
 212             'description
': document.get('beschreibung
'), 
 213             'duration
': int_or_none(document.get('length
')), 
 214             'timestamp
': unified_timestamp(try_get( 
 215                 document, lambda x: x['meta
']['editorialDate
'], compat_str)), 
 216             'thumbnails
': thumbnails, 
 217             'subtitles
': self._extract_subtitles(document), 
 221     def _real_extract(self, url): 
 222         video_id = self._match_id(url) 
 224         webpage = self._download_webpage(url, video_id, fatal=False) 
 226             player = self._extract_player(webpage, url, fatal=False) 
 228                 return self._extract_regular(url, player, video_id) 
 230         return self._extract_mobile(video_id) 
 233 class ZDFChannelIE(ZDFBaseIE): 
 234     _VALID_URL = r'https?
://www\
.zdf\
.de
/(?
:[^
/]+/)*(?P
<id>[^
/?
#&]+)' 
 236         'url': 'https://www.zdf.de/sport/das-aktuelle-sportstudio', 
 238             'id': 'das-aktuelle-sportstudio', 
 239             'title': 'das aktuelle sportstudio | ZDF', 
 241         'playlist_count': 21, 
 243         'url': 'https://www.zdf.de/dokumentation/planet-e', 
 246             'title': 'planet e.', 
 250         'url': 'https://www.zdf.de/filme/taunuskrimi/', 
 251         'only_matching': True, 
 255     def suitable(cls
, url
): 
 256         return False if ZDFIE
.suitable(url
) else super(ZDFChannelIE
, cls
).suitable(url
) 
 258     def _real_extract(self
, url
): 
 259         channel_id 
= self
._match
_id
(url
) 
 261         webpage 
= self
._download
_webpage
(url
, channel_id
) 
 264             self
.url_result(item_url
, ie
=ZDFIE
.ie_key()) 
 265             for item_url 
in orderedSet(re
.findall( 
 266                 r
'data-plusbar-url=["\'](http
.+?\
.html
)', webpage))] 
 268         return self.playlist_result( 
 269             entries, channel_id, self._og_search_title(webpage, fatal=False)) 
 272         player = self._extract_player(webpage, channel_id) 
 274         channel_id = self._search_regex( 
 275             r'docId\s
*:\s
*(["\'])(?P<id>(?!\1).+?)\1', webpage, 
 276             'channel id', group='id') 
 278         channel = self._call_api( 
 279             'https://api.zdf.de/content/documents/%s.json' % channel_id, 
 280             player, url, channel_id) 
 283         for module in channel['module']: 
 284             for teaser in try_get(module, lambda x: x['teaser'], list) or []: 
 286                     teaser, lambda x: x['http://zdf.de/rels/target'], dict) 
 289                 items.extend(try_get( 
 291                     lambda x: x['resultsWithVideo']['http://zdf.de/rels/search/results'], 
 293             items.extend(try_get( 
 295                 lambda x: x['filterRef']['resultsWithVideo']['http://zdf.de/rels/search/results'], 
 301             t = try_get(item, lambda x: x['http://zdf.de/rels/target'], dict) 
 304             sharing_url = t.get('http://zdf.de/rels/sharing-url') 
 305             if not sharing_url or not isinstance(sharing_url, compat_str): 
 307             if sharing_url in entry_urls: 
 309             entry_urls.add(sharing_url) 
 310             entries.append(self.url_result( 
 311                 sharing_url, ie=ZDFIE.ie_key(), video_id=t.get('id'))) 
 313         return self.playlist_result(entries, channel_id, channel.get('title'))