]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/teamcoco.py
   1 from __future__ 
import unicode_literals
 
   5 from .common 
import InfoExtractor
 
   8 class TeamcocoIE(InfoExtractor
): 
   9     _VALID_URL 
= r
'http://teamcoco\.com/video/(?P<video_id>[0-9]+)?/?(?P<display_id>.*)' 
  12         'url': 'http://teamcoco.com/video/80187/conan-becomes-a-mary-kay-beauty-consultant', 
  14         'md5': '3f7746aa0dc86de18df7539903d399ea', 
  16             'title': 'Conan Becomes A Mary Kay Beauty Consultant', 
  17             'description': 'Mary Kay is perhaps the most trusted name in female beauty, so of course Conan is a natural choice to sell their products.' 
  21         'url': 'http://teamcoco.com/video/louis-ck-interview-george-w-bush', 
  23         'md5': 'cde9ba0fa3506f5f017ce11ead928f9a', 
  25             "description": "Louis C.K. got starstruck by George W. Bush, so what? Part one.", 
  26             "title": "Louis C.K. Interview Pt. 1 11/3/11" 
  31     def _real_extract(self
, url
): 
  32         mobj 
= re
.match(self
._VALID
_URL
, url
) 
  34         display_id 
= mobj
.group('display_id') 
  35         webpage 
= self
._download
_webpage
(url
, display_id
) 
  37         video_id 
= mobj
.group("video_id") 
  39             video_id 
= self
._html
_search
_regex
( 
  40                 r
'data-node-id="(\d+?)"', 
  43         data_url 
= 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id
 
  44         data 
= self
._download
_xml
( 
  45             data_url
, display_id
, 'Downloading data webpage') 
  47         qualities 
= ['500k', '480p', '1000k', '720p', '1080p'] 
  49         for filed 
in data
.findall('files/file'): 
  50             if filed
.attrib
.get('playmode') == 'all': 
  51                 # it just duplicates one of the entries 
  54             m_format 
= re
.search(r
'(\d+(k|p))\.mp4', file_url
) 
  55             if m_format 
is not None: 
  56                 format_id 
= m_format
.group(1) 
  58                 format_id 
= filed
.attrib
['bitrate'] 
  60                 int(filed
.attrib
['bitrate']) 
  61                 if filed
.attrib
['bitrate'].isdigit() 
  65                 quality 
= qualities
.index(format_id
) 
  72                 'format_id': format_id
, 
  76         self
._sort
_formats
(formats
) 
  80             'display_id': display_id
, 
  82             'title': self
._og
_search
_title
(webpage
), 
  83             'thumbnail': self
._og
_search
_thumbnail
(webpage
), 
  84             'description': self
._og
_search
_description
(webpage
),