]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/cnn.py
   2 import xml
.etree
.ElementTree
 
   4 from .common 
import InfoExtractor
 
   5 from ..utils 
import determine_ext
 
   8 class CNNIE(InfoExtractor
): 
   9     _VALID_URL 
= r
'''(?x)https?://(edition\.)?cnn\.com/video/(data/.+?|\?)/ 
  10         (?P<path>.+?/(?P<title>[^/]+?)(?:\.cnn|(?=&)))''' 
  13         u
'url': u
'http://edition.cnn.com/video/?/video/sports/2013/06/09/nadal-1-on-1.cnn', 
  14         u
'file': u
'sports_2013_06_09_nadal-1-on-1.cnn.mp4', 
  15         u
'md5': u
'3e6121ea48df7e2259fe73a0628605c4', 
  17             u
'title': u
'Nadal wins 8th French Open title', 
  18             u
'description': u
'World Sport\'s Amanda Davies chats with 2013 French Open champion Rafael Nadal.', 
  22         u
"url": u
"http://edition.cnn.com/video/?/video/us/2013/08/21/sot-student-gives-epic-speech.georgia-institute-of-technology&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+rss%2Fcnn_topstories+%28RSS%3A+Top+Stories%29", 
  23         u
"file": u
"us_2013_08_21_sot-student-gives-epic-speech.georgia-institute-of-technology.mp4", 
  24         u
"md5": u
"b5cc60c60a3477d185af8f19a2a26f4e", 
  26             u
"title": "Student's epic speech stuns new freshmen", 
  27             u
"description": "A Georgia Tech student welcomes the incoming freshmen with an epic speech backed by music from \"2001: A Space Odyssey.\"" 
  31     def _real_extract(self
, url
): 
  32         mobj 
= re
.match(self
._VALID
_URL
, url
) 
  33         path 
= mobj
.group('path') 
  34         page_title 
= mobj
.group('title') 
  35         info_url 
= u
'http://cnn.com/video/data/3.0/%s/index.xml' % path
 
  36         info_xml 
= self
._download
_webpage
(info_url
, page_title
) 
  37         info 
= xml
.etree
.ElementTree
.fromstring(info_xml
.encode('utf-8')) 
  40         for f 
in info
.findall('files/file'): 
  41             mf 
= re
.match(r
'(\d+)x(\d+)(?:_(.*)k)?',f
.attrib
['bitrate']) 
  43                 formats
.append((int(mf
.group(1)), int(mf
.group(2)), int(mf
.group(3) or 0), f
.text
)) 
  44         formats 
= sorted(formats
) 
  45         (_
,_
,_
, video_path
) = formats
[-1] 
  46         video_url 
= 'http://ht.cdn.turner.com/cnn/big%s' % video_path
 
  48         thumbnails 
= sorted([((int(t
.attrib
['height']),int(t
.attrib
['width'])), t
.text
) for t 
in info
.findall('images/image')]) 
  49         thumbs_dict 
= [{'resolution': res
, 'url': t_url
} for (res
, t_url
) in thumbnails
] 
  51         return {'id': info
.attrib
['id'], 
  52                 'title': info
.find('headline').text
, 
  54                 'ext': determine_ext(video_url
), 
  55                 'thumbnail': thumbnails
[-1][1], 
  56                 'thumbnails': thumbs_dict
, 
  57                 'description': info
.find('description').text
,