]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/teamcoco.py
9dcffead04d5466c14c6f2ff60995ecfb5435e6d
   1 from __future__ 
import unicode_literals
 
   5 from .common 
import InfoExtractor
 
  11 class TeamcocoIE(InfoExtractor
): 
  12     _VALID_URL 
= r
'http://teamcoco\.com/video/(?P<url_title>.*)' 
  14         'url': 'http://teamcoco.com/video/louis-ck-interview-george-w-bush', 
  16         'md5': 'cde9ba0fa3506f5f017ce11ead928f9a', 
  18             "description": "Louis C.K. got starstruck by George W. Bush, so what? Part one.", 
  19             "title": "Louis C.K. Interview Pt. 1 11/3/11" 
  23     def _real_extract(self
, url
): 
  24         mobj 
= re
.match(self
._VALID
_URL
, url
) 
  26             raise ExtractorError('Invalid URL: %s' % url
) 
  27         url_title 
= mobj
.group('url_title') 
  28         webpage 
= self
._download
_webpage
(url
, url_title
) 
  30         video_id 
= self
._html
_search
_regex
( 
  31             r
'<article class="video" data-id="(\d+?)"', 
  34         self
.report_extraction(video_id
) 
  36         data_url 
= 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id
 
  37         data 
= self
._download
_xml
(data_url
, video_id
, 'Downloading data webpage') 
  39         qualities 
= ['500k', '480p', '1000k', '720p', '1080p'] 
  41         for filed 
in data
.findall('files/file'): 
  42             if filed
.attrib
.get('playmode') == 'all': 
  43                 # it just duplicates one of the entries 
  46             m_format 
= re
.search(r
'(\d+(k|p))\.mp4', file_url
) 
  47             if m_format 
is not None: 
  48                 format_id 
= m_format
.group(1) 
  50                 format_id 
= filed
.attrib
['bitrate'] 
  52                 int(filed
.attrib
['bitrate']) 
  53                 if filed
.attrib
['bitrate'].isdigit() 
  57                 quality 
= qualities
.index(format_id
) 
  64                 'format_id': format_id
, 
  68         self
._sort
_formats
(formats
) 
  73             'title': self
._og
_search
_title
(webpage
), 
  74             'thumbnail': self
._og
_search
_thumbnail
(webpage
), 
  75             'description': self
._og
_search
_description
(webpage
),