]> Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/gamespot.py
Merge tag 'upstream/2013.07.02'
[youtubedl] / youtube_dl / extractor / gamespot.py
1 import re
2 import xml.etree.ElementTree
3
4 from .common import InfoExtractor
5 from ..utils import (
6 unified_strdate,
7 )
8
9 class GameSpotIE(InfoExtractor):
10 _VALID_URL = r'(?:http://)?(?:www\.)?gamespot\.com/([^/]+)/videos/([^/]+)-([^/d]+)/'
11 _TEST = {
12 u"url": u"http://www.gamespot.com/arma-iii/videos/arma-iii-community-guide-sitrep-i-6410818/",
13 u"file": u"6410818.mp4",
14 u"md5": u"5569d64ca98db01f0177c934fe8c1e9b",
15 u"info_dict": {
16 u"title": u"Arma III - Community Guide: SITREP I",
17 u"upload_date": u"20130627",
18 }
19 }
20
21
22 def _real_extract(self, url):
23 mobj = re.match(self._VALID_URL, url)
24 video_id = mobj.group(3).split("-")[-1]
25 info_url = "http://www.gamespot.com/pages/video_player/xml.php?id="+str(video_id)
26 info_xml = self._download_webpage(info_url, video_id)
27 doc = xml.etree.ElementTree.fromstring(info_xml)
28 clip_el = doc.find('./playList/clip')
29
30 video_url = clip_el.find('./URI').text
31 title = clip_el.find('./title').text
32 ext = video_url.rpartition('.')[2]
33 thumbnail_url = clip_el.find('./screenGrabURI').text
34 view_count = int(clip_el.find('./views').text)
35 upload_date = unified_strdate(clip_el.find('./postDate').text)
36
37 return [{
38 'id' : video_id,
39 'url' : video_url,
40 'ext' : ext,
41 'title' : title,
42 'thumbnail' : thumbnail_url,
43 'upload_date' : upload_date,
44 'view_count' : view_count,
45 }]