]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/metacritic.py
2 import xml
.etree
.ElementTree
5 from .common
import InfoExtractor
8 class MetacriticIE(InfoExtractor
):
9 _VALID_URL
= r
'https?://www\.metacritic\.com/.+?/trailers/(?P<id>\d+)'
12 u
'url': u
'http://www.metacritic.com/game/playstation-4/infamous-second-son/trailers/3698222',
13 u
'file': u
'3698222.mp4',
15 u
'title': u
'inFamous: Second Son - inSide Sucker Punch: Smoke & Mirrors',
16 u
'description': u
'Take a peak behind-the-scenes to see how Sucker Punch brings smoke into the universe of inFAMOUS Second Son on the PS4.',
21 def _real_extract(self
, url
):
22 mobj
= re
.match(self
._VALID
_URL
, url
)
23 video_id
= mobj
.group('id')
24 webpage
= self
._download
_webpage
(url
, video_id
)
25 # The xml is not well formatted, there are raw '&'
26 info_xml
= self
._download
_webpage
('http://www.metacritic.com/video_data?video=' + video_id
,
27 video_id
, u
'Downloading info xml').replace('&', '&')
28 info
= xml
.etree
.ElementTree
.fromstring(info_xml
.encode('utf-8'))
30 clip
= next(c
for c
in info
.findall('playList/clip') if c
.find('id').text
== video_id
)
32 for videoFile
in clip
.findall('httpURI/videoFile'):
33 rate_str
= videoFile
.find('rate').text
34 video_url
= videoFile
.find('filePath').text
38 'format_id': rate_str
,
39 'rate': int(rate_str
),
41 formats
.sort(key
=operator
.itemgetter('rate'))
43 description
= self
._html
_search
_regex
(r
'<b>Description:</b>(.*?)</p>',
44 webpage
, u
'description', flags
=re
.DOTALL
)
48 'title': clip
.find('title').text
,
50 'description': description
,
51 'duration': int(clip
.find('duration').text
),
53 # TODO: Remove when #980 has been merged
54 info
.update(formats
[-1])