]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/mdr.py
7aa0080d735fe811d6babf110156f4ab895edbdd
   3 from .common 
import InfoExtractor
 
   9 class MDRIE(InfoExtractor
): 
  10     _VALID_URL 
= r
'^(?P<domain>(?:https?://)?(?:www\.)?mdr\.de)/mediathek/(?:.*)/(?P<type>video|audio)(?P<video_id>[^/_]+)_.*' 
  12     # No tests, MDR regularily deletes its videos 
  14     def _real_extract(self
, url
): 
  15         m 
= re
.match(self
._VALID
_URL
, url
) 
  16         video_id 
= m
.group('video_id') 
  17         domain 
= m
.group('domain') 
  19         # determine title and media streams from webpage 
  20         html 
= self
._download
_webpage
(url
, video_id
) 
  22         title 
= self
._html
_search
_regex
(r
'<h2>(.*?)</h2>', html
, u
'title') 
  23         xmlurl 
= self
._search
_regex
( 
  24             r
'(/mediathek/(?:.+)/(?:video|audio)[0-9]+-avCustom.xml)', html
, u
'XML URL') 
  26         doc 
= self
._download
_xml
(domain 
+ xmlurl
, video_id
) 
  28         for a 
in doc
.findall('./assets/asset'): 
  29             url_el 
= a
.find('.//progressiveDownloadUrl') 
  32             abr 
= int(a
.find('bitrateAudio').text
) // 1000 
  33             media_type 
= a
.find('mediaType').text
 
  36                 'filesize': int(a
.find('fileSize').text
), 
  40             vbr_el 
= a
.find('bitrateVideo') 
  44                     'format_id': u
'%s-%d' % (media_type
, abr
), 
  47                 vbr 
= int(vbr_el
.text
) // 1000 
  50                     'width': int(a
.find('frameWidth').text
), 
  51                     'height': int(a
.find('frameHeight').text
), 
  52                     'format_id': u
'%s-%d' % (media_type
, vbr
), 
  54             formats
.append(format
) 
  56             raise ExtractorError(u
'Could not find any valid formats') 
  58         self
._sort
_formats
(formats
)