]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/orf.py
cfca2a06352287038ff367e0f83fa67bd4cee782
   4 import xml
.etree
.ElementTree
 
   7 from .common 
import InfoExtractor
 
  14 class ORFIE(InfoExtractor
): 
  15     _VALID_URL 
= r
'https?://tvthek.orf.at/(programs/.+?/episodes|topics/.+?)/(?P<id>\d+)' 
  17     def _real_extract(self
, url
): 
  18         mobj 
= re
.match(self
._VALID
_URL
, url
) 
  19         playlist_id 
= mobj
.group('id') 
  20         webpage 
= self
._download
_webpage
(url
, playlist_id
) 
  22         flash_xml 
= self
._search
_regex
('ORF.flashXML = \'(.+?)\'', webpage
, u
'flash xml') 
  23         flash_xml 
= compat_urlparse
.parse_qs('xml='+flash_xml
)['xml'][0] 
  24         flash_config 
= xml
.etree
.ElementTree
.fromstring(flash_xml
.encode('utf-8')) 
  25         playlist_json 
= self
._search
_regex
(r
'playlist\': \'(\
[.*?\
])\'', webpage, u'playlist
').replace(r'\"','"') 
  26         playlist = json.loads(playlist_json) 
  29         ns = '{http://tempuri.org/XMLSchema.xsd}' 
  30         xpath = '%(ns)sPlaylist/%(ns)sItems/%(ns)sItem' % {'ns': ns} 
  31         webpage_description = self._og_search_description(webpage) 
  32         for (i, (item, info)) in enumerate(zip(flash_config.findall(xpath), playlist), 1): 
  33             # Get best quality url 
  35             for q in ['Q6A', 'Q4A', 'Q1A']: 
  36                 video_url = find_xpath_attr(item, '%sVideoUrl' % ns, 'quality', q) 
  37                 if video_url is not None: 
  38                     rtmp_url = video_url.text 
  41                 raise ExtractorError(u'Couldn\'t get video url: %s' % info['id']) 
  42             description = self._html_search_regex( 
  43                 r'id="playlist_entry_
%s".*?<p>(.*?)</p>' % i, webpage, 
  44                 u'description', default=webpage_description, flags=re.DOTALL) 
  48                 'title': info['title'], 
  51                 'description': description,