]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/mdr.py
   1 from __future__ 
import unicode_literals
 
   5 from .common 
import InfoExtractor
 
   8 class MDRIE(InfoExtractor
): 
   9     _VALID_URL 
= r
'^(?P<domain>https?://(?:www\.)?mdr\.de)/(?:.*)/(?P<type>video|audio)(?P<video_id>[^/_]+)(?:_|\.html)' 
  11     # No tests, MDR regularily deletes its videos 
  13         'url': 'http://www.mdr.de/fakt/video189002.html', 
  14         'only_matching': True, 
  17     def _real_extract(self
, url
): 
  18         m 
= re
.match(self
._VALID
_URL
, url
) 
  19         video_id 
= m
.group('video_id') 
  20         domain 
= m
.group('domain') 
  22         # determine title and media streams from webpage 
  23         html 
= self
._download
_webpage
(url
, video_id
) 
  25         title 
= self
._html
_search
_regex
(r
'<h[12]>(.*?)</h[12]>', html
, 'title') 
  26         xmlurl 
= self
._search
_regex
( 
  27             r
'dataURL:\'(/(?
:.+)/(?
:video|audio
)[0-9]+-avCustom
.xml
)', html, 'XML URL
') 
  29         doc = self._download_xml(domain + xmlurl, video_id) 
  31         for a in doc.findall('./assets
/asset
'): 
  32             url_el = a.find('.//progressiveDownloadUrl
') 
  35             abr = int(a.find('bitrateAudio
').text) // 1000 
  36             media_type = a.find('mediaType
').text 
  39                 'filesize
': int(a.find('fileSize
').text), 
  43             vbr_el = a.find('bitrateVideo
') 
  47                     'format_id
': '%s-%d' % (media_type, abr), 
  50                 vbr = int(vbr_el.text) // 1000 
  53                     'width
': int(a.find('frameWidth
').text), 
  54                     'height
': int(a.find('frameHeight
').text), 
  55                     'format_id
': '%s-%d' % (media_type, vbr), 
  57             formats.append(format) 
  58         self._sort_formats(formats)