]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/teamcoco.py
   1 # -*- coding: utf-8 -*- 
   2 from __future__ 
import unicode_literals
 
   9 from .common 
import InfoExtractor
 
  14 from ..compat 
import compat_ord
 
  17 class TeamcocoIE(InfoExtractor
): 
  18     _VALID_URL 
= r
'http://teamcoco\.com/video/(?P<video_id>[0-9]+)?/?(?P<display_id>.*)' 
  21             'url': 'http://teamcoco.com/video/80187/conan-becomes-a-mary-kay-beauty-consultant', 
  22             'md5': '3f7746aa0dc86de18df7539903d399ea', 
  26                 'title': 'Conan Becomes A Mary Kay Beauty Consultant', 
  27                 'description': 'Mary Kay is perhaps the most trusted name in female beauty, so of course Conan is a natural choice to sell their products.', 
  32             'url': 'http://teamcoco.com/video/louis-ck-interview-george-w-bush', 
  33             'md5': 'cde9ba0fa3506f5f017ce11ead928f9a', 
  37                 'description': 'Louis C.K. got starstruck by George W. Bush, so what? Part one.', 
  38                 'title': 'Louis C.K. Interview Pt. 1 11/3/11', 
  43             'url': 'http://teamcoco.com/video/timothy-olyphant-drinking-whiskey', 
  47                 'title': 'Timothy Olyphant Raises A Toast To “Justified”', 
  48                 'description': 'md5:15501f23f020e793aeca761205e42c24', 
  51                 'skip_download': True,  # m3u8 downloads 
  56         r
'"eVar42"\s*:\s*(\d+)', 
  57         r
'Ginger\.TeamCoco\.openInApp\("video",\s*"([^"]+)"', 
  58         r
'"id_not"\s*:\s*(\d+)' 
  61     def _real_extract(self
, url
): 
  62         mobj 
= re
.match(self
._VALID
_URL
, url
) 
  64         display_id 
= mobj
.group('display_id') 
  65         webpage
, urlh 
= self
._download
_webpage
_handle
(url
, display_id
) 
  66         if 'src=expired' in urlh
.geturl(): 
  67             raise ExtractorError('This video is expired.', expected
=True) 
  69         video_id 
= mobj
.group('video_id') 
  71             video_id 
= self
._html
_search
_regex
( 
  72                 self
._VIDEO
_ID
_REGEXES
, webpage
, 'video id') 
  76         preload_codes 
= self
._html
_search
_regex
( 
  77             r
'(function.+)setTimeout\(function\(\)\{playlist', 
  78             webpage
, 'preload codes') 
  79         base64_fragments 
= re
.findall(r
'"([a-zA-z0-9+/=]+)"', preload_codes
) 
  80         base64_fragments
.remove('init') 
  82         def _check_sequence(cur_fragments
): 
  85             for i 
in range(len(cur_fragments
)): 
  86                 cur_sequence 
= (''.join(cur_fragments
[i
:] + cur_fragments
[:i
])).encode('ascii') 
  88                     raw_data 
= base64
.b64decode(cur_sequence
) 
  89                     if compat_ord(raw_data
[0]) == compat_ord('{'): 
  90                         return json
.loads(raw_data
.decode('utf-8')) 
  91                 except (TypeError, binascii
.Error
, UnicodeDecodeError, ValueError): 
  95             for i 
in range(len(base64_fragments
) + 1): 
  96                 for j 
in range(i
, len(base64_fragments
) + 1): 
  97                     data 
= _check_sequence(base64_fragments
[:i
] + base64_fragments
[j
:]) 
 101         self
.to_screen('Try to compute possible data sequence. This may take some time.') 
 105             raise ExtractorError( 
 106                 'Preload information could not be extracted', expected
=True) 
 109         get_quality 
= qualities(['500k', '480p', '1000k', '720p', '1080p']) 
 110         for filed 
in data
['files']: 
 111             if filed
['type'] == 'hls': 
 112                 formats
.extend(self
._extract
_m
3u8_formats
( 
 113                     filed
['url'], video_id
, ext
='mp4')) 
 115                 m_format 
= re
.search(r
'(\d+(k|p))\.mp4', filed
['url']) 
 116                 if m_format 
is not None: 
 117                     format_id 
= m_format
.group(1) 
 119                     format_id 
= filed
['bitrate'] 
 121                     int(filed
['bitrate']) 
 122                     if filed
['bitrate'].isdigit() 
 129                     'format_id': format_id
, 
 130                     'quality': get_quality(format_id
), 
 133         self
._sort
_formats
(formats
) 
 137             'display_id': display_id
, 
 139             'title': data
['title'], 
 140             'thumbnail': data
.get('thumb', {}).get('href'), 
 141             'description': data
.get('teaser'), 
 142             'duration': data
.get('duration'), 
 143             'age_limit': self
._family
_friendly
_search
(webpage
),