]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/orf.py
4 import xml
. etree
. ElementTree
7 from . common
import InfoExtractor
14 class ORFIE ( InfoExtractor
):
15 _VALID_URL
= r
'https?://tvthek.orf.at/(programs/.+?/episodes|topics/.+?)/(?P<id>\d+)'
18 u
'url' : u
'http://tvthek.orf.at/programs/1171769-Wetter-ZIB/episodes/6557323-Wetter' ,
19 u
'file' : u
'6566957.flv' ,
22 u
'description' : u
'Christa Kummer, Marcus Wadsak und Kollegen präsentieren abwechselnd ihre täglichen Wetterprognosen für Österreich. \r \r Mehr Wetter unter wetter.ORF.at' ,
26 u
'skip_download' : True ,
30 def _real_extract ( self
, url
):
31 mobj
= re
. match ( self
._ VALID
_U RL
, url
)
32 playlist_id
= mobj
. group ( 'id' )
33 webpage
= self
._ download
_ webpage
( url
, playlist_id
)
35 flash_xml
= self
._ search
_ regex
( 'ORF.flashXML = \' (.+?) \' ' , webpage
, u
'flash xml' )
36 flash_xml
= compat_urlparse
. parse_qs ( 'xml=' + flash_xml
)[ 'xml' ][ 0 ]
37 flash_config
= xml
. etree
. ElementTree
. fromstring ( flash_xml
. encode ( 'utf-8' ))
38 playlist_json
= self
._ search
_ regex
( r
'playlist\' : \' ( \
[.* ?\
]) \' ', webpage, u' playlist
').replace(r' \" ',' "')
39 playlist = json.loads(playlist_json)
42 ns = '{http://tempuri.org/XMLSchema.xsd}'
43 xpath = ' %(ns)s Playlist/ %(ns)s Items/ %(ns)s Item' % {'ns': ns}
44 webpage_description = self._og_search_description(webpage)
45 for (i, (item, info)) in enumerate(zip(flash_config.findall(xpath), playlist), 1):
46 # Get best quality url
48 for q in ['Q6A', 'Q4A', 'Q1A']:
49 video_url = find_xpath_attr(item, ' %s VideoUrl' % ns, 'quality', q)
50 if video_url is not None:
51 rtmp_url = video_url.text
54 raise ExtractorError(u'Couldn \' t get video url: %s ' % info['id'])
55 description = self._html_search_regex(
56 r'id=" playlist_entry_
%s ".*?<p>(.*?)</p>' % i, webpage,
57 u'description', default=webpage_description, flags=re.DOTALL)
61 'title': info['title'],
64 'description': description,