]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/gamespot.py
cd3bbe65f5dd9891f5cf4a68fb1adcd8a45c4196
   2 import xml
.etree
.ElementTree
 
   4 from .common 
import InfoExtractor
 
  10 class GameSpotIE(InfoExtractor
): 
  11     _VALID_URL 
= r
'(?:http://)?(?:www\.)?gamespot\.com/.*-(?P<page_id>\d+)/?' 
  13         u
"url": u
"http://www.gamespot.com/arma-iii/videos/arma-iii-community-guide-sitrep-i-6410818/", 
  14         u
"file": u
"6410818.mp4", 
  15         u
"md5": u
"b2a30deaa8654fcccd43713a6b6a4825", 
  17             u
"title": u
"Arma 3 - Community Guide: SITREP I", 
  18             u
"upload_date": u
"20130627",  
  23     def _real_extract(self
, url
): 
  24         mobj 
= re
.match(self
._VALID
_URL
, url
) 
  25         page_id 
= mobj
.group('page_id') 
  26         webpage 
= self
._download
_webpage
(url
, page_id
) 
  27         video_id 
= self
._html
_search
_regex
([r
'"og:video" content=".*?\?id=(\d+)"', 
  28                                             r
'http://www\.gamespot\.com/videoembed/(\d+)'], 
  30         data 
= compat_urllib_parse
.urlencode({'id': video_id
, 'newplayer': '1'}) 
  31         info_url 
= 'http://www.gamespot.com/pages/video_player/xml.php?' + data
 
  32         info_xml 
= self
._download
_webpage
(info_url
, video_id
) 
  33         doc 
= xml
.etree
.ElementTree
.fromstring(info_xml
) 
  34         clip_el 
= doc
.find('./playList/clip') 
  36         http_urls 
= [{'url': node
.find('filePath').text
, 
  37                       'rate': int(node
.find('rate').text
)} 
  38             for node 
in clip_el
.find('./httpURI')] 
  39         best_quality 
= sorted(http_urls
, key
=lambda f
: f
['rate'])[-1] 
  40         video_url 
= best_quality
['url'] 
  41         title 
= clip_el
.find('./title').text
 
  42         ext 
= video_url
.rpartition('.')[2] 
  43         thumbnail_url 
= clip_el
.find('./screenGrabURI').text
 
  44         view_count 
= int(clip_el
.find('./views').text
) 
  45         upload_date 
= unified_strdate(clip_el
.find('./postDate').text
) 
  52             'thumbnail'   : thumbnail_url
, 
  53             'upload_date' : upload_date
, 
  54             'view_count'  : view_count
,