]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/teamcoco.py
b2a4b1fc05430558ad9b33a9aa3ce834107dc6e3
   1 # -*- coding: utf-8 -*- 
   2 from __future__ 
import unicode_literals
 
   9 from .common 
import InfoExtractor
 
  15 from ..compat 
import compat_ord
 
  18 class TeamcocoIE(InfoExtractor
): 
  19     _VALID_URL 
= r
'http://teamcoco\.com/video/(?P<video_id>[0-9]+)?/?(?P<display_id>.*)' 
  22             'url': 'http://teamcoco.com/video/80187/conan-becomes-a-mary-kay-beauty-consultant', 
  23             'md5': '3f7746aa0dc86de18df7539903d399ea', 
  27                 'title': 'Conan Becomes A Mary Kay Beauty Consultant', 
  28                 'description': 'Mary Kay is perhaps the most trusted name in female beauty, so of course Conan is a natural choice to sell their products.', 
  33             'url': 'http://teamcoco.com/video/louis-ck-interview-george-w-bush', 
  34             'md5': 'cde9ba0fa3506f5f017ce11ead928f9a', 
  38                 'description': 'Louis C.K. got starstruck by George W. Bush, so what? Part one.', 
  39                 'title': 'Louis C.K. Interview Pt. 1 11/3/11', 
  44             'url': 'http://teamcoco.com/video/timothy-olyphant-drinking-whiskey', 
  48                 'title': 'Timothy Olyphant Raises A Toast To “Justified”', 
  49                 'description': 'md5:15501f23f020e793aeca761205e42c24', 
  52                 'skip_download': True,  # m3u8 downloads 
  57         r
'"eVar42"\s*:\s*(\d+)', 
  58         r
'Ginger\.TeamCoco\.openInApp\("video",\s*"([^"]+)"', 
  59         r
'"id_not"\s*:\s*(\d+)' 
  62     def _real_extract(self
, url
): 
  63         mobj 
= re
.match(self
._VALID
_URL
, url
) 
  65         display_id 
= mobj
.group('display_id') 
  66         webpage
, urlh 
= self
._download
_webpage
_handle
(url
, display_id
) 
  67         if 'src=expired' in urlh
.geturl(): 
  68             raise ExtractorError('This video is expired.', expected
=True) 
  70         video_id 
= mobj
.group('video_id') 
  72             video_id 
= self
._html
_search
_regex
( 
  73                 self
._VIDEO
_ID
_REGEXES
, webpage
, 'video id') 
  77         preload_codes 
= self
._html
_search
_regex
( 
  78             r
'(function.+)setTimeout\(function\(\)\{playlist', 
  79             webpage
, 'preload codes') 
  80         base64_fragments 
= re
.findall(r
'"([a-zA-z0-9+/=]+)"', preload_codes
) 
  81         base64_fragments
.remove('init') 
  83         def _check_sequence(cur_fragments
): 
  86             for i 
in range(len(cur_fragments
)): 
  87                 cur_sequence 
= (''.join(cur_fragments
[i
:] + cur_fragments
[:i
])).encode('ascii') 
  89                     raw_data 
= base64
.b64decode(cur_sequence
) 
  90                     if compat_ord(raw_data
[0]) == compat_ord('{'): 
  91                         return json
.loads(raw_data
.decode('utf-8')) 
  92                 except (TypeError, binascii
.Error
, UnicodeDecodeError, ValueError): 
  96             for i 
in range(len(base64_fragments
) + 1): 
  97                 for j 
in range(i
, len(base64_fragments
) + 1): 
  98                     data 
= _check_sequence(base64_fragments
[:i
] + base64_fragments
[j
:]) 
 102         self
.to_screen('Try to compute possible data sequence. This may take some time.') 
 106             raise ExtractorError( 
 107                 'Preload information could not be extracted', expected
=True) 
 110         get_quality 
= qualities(['500k', '480p', '1000k', '720p', '1080p']) 
 111         for filed 
in data
['files']: 
 112             if determine_ext(filed
['url']) == 'm3u8': 
 113                 formats
.extend(self
._extract
_m
3u8_formats
( 
 114                     filed
['url'], video_id
, ext
='mp4')) 
 116                 m_format 
= re
.search(r
'(\d+(k|p))\.mp4', filed
['url']) 
 117                 if m_format 
is not None: 
 118                     format_id 
= m_format
.group(1) 
 120                     format_id 
= filed
['bitrate'] 
 122                     int(filed
['bitrate']) 
 123                     if filed
['bitrate'].isdigit() 
 130                     'format_id': format_id
, 
 131                     'quality': get_quality(format_id
), 
 134         self
._sort
_formats
(formats
) 
 138             'display_id': display_id
, 
 140             'title': data
['title'], 
 141             'thumbnail': data
.get('thumb', {}).get('href'), 
 142             'description': data
.get('teaser'), 
 143             'duration': data
.get('duration'), 
 144             'age_limit': self
._family
_friendly
_search
(webpage
),