+ 'dcterms.abstract', webpage, 'description', default=None)
+ if description is None:
+ description = self._html_search_meta(
+ 'description', webpage, 'meta description')
+
+ # Thumbnail is sometimes not present.
+ # It is in the mobile version, but that seems to use a different URL
+ # structure altogether.
+ thumbnail = self._og_search_thumbnail(webpage, default=None)
+
+ media_streams = re.findall(r'''(?x)
+ mediaCollection\.addMediaStream\([0-9]+,\s*[0-9]+,\s*"[^"]*",\s*
+ "([^"]+)"''', webpage)
+
+ if media_streams:
+ QUALITIES = qualities(['lo', 'hi', 'hq'])
+ formats = []
+ for furl in set(media_streams):
+ if furl.endswith('.f4m'):
+ fid = 'f4m'
+ else:
+ fid_m = re.match(r'.*\.([^.]+)\.[^.]+$', furl)
+ fid = fid_m.group(1) if fid_m else None
+ formats.append({
+ 'quality': QUALITIES(fid),
+ 'format_id': fid,
+ 'url': furl,
+ })
+ else: # request JSON file
+ media_info = self._download_json(
+ 'http://www.ardmediathek.de/play/media/%s' % video_id, video_id)
+ # The second element of the _mediaArray contains the standard http urls
+ streams = media_info['_mediaArray'][1]['_mediaStreamArray']
+ if not streams:
+ if '"fsk"' in webpage:
+ raise ExtractorError('This video is only available after 20:00')
+
+ formats = []
+ for s in streams:
+ if type(s['_stream']) == list:
+ for index, url in enumerate(s['_stream'][::-1]):
+ quality = s['_quality'] + index
+ formats.append({
+ 'quality': quality,
+ 'url': url,
+ 'format_id': '%s-%s' % (determine_ext(url), quality)
+ })
+ continue
+
+ format = {
+ 'quality': s['_quality'],
+ 'url': s['_stream'],
+ }
+
+ format['format_id'] = '%s-%s' % (
+ determine_ext(format['url']), format['quality'])
+
+ formats.append(format)
+
+ self._sort_formats(formats)