]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/teamcoco.py
   2 from __future__ 
import unicode_literals
 
   8 from .common 
import InfoExtractor
 
  20 class TeamcocoIE(InfoExtractor
): 
  21     _VALID_URL 
= r
'https?://teamcoco\.com/video/(?P<video_id>[0-9]+)?/?(?P<display_id>.*)' 
  24             'url': 'http://teamcoco.com/video/80187/conan-becomes-a-mary-kay-beauty-consultant', 
  25             'md5': '3f7746aa0dc86de18df7539903d399ea', 
  29                 'title': 'Conan Becomes A Mary Kay Beauty Consultant', 
  30                 'description': 'Mary Kay is perhaps the most trusted name in female beauty, so of course Conan is a natural choice to sell their products.', 
  35             'url': 'http://teamcoco.com/video/louis-ck-interview-george-w-bush', 
  36             'md5': 'cde9ba0fa3506f5f017ce11ead928f9a', 
  40                 'description': 'Louis C.K. got starstruck by George W. Bush, so what? Part one.', 
  41                 'title': 'Louis C.K. Interview Pt. 1 11/3/11', 
  46             'url': 'http://teamcoco.com/video/timothy-olyphant-drinking-whiskey', 
  50                 'title': 'Timothy Olyphant Raises A Toast To “Justified”', 
  51                 'description': 'md5:15501f23f020e793aeca761205e42c24', 
  54                 'skip_download': True,  # m3u8 downloads 
  57             'url': 'http://teamcoco.com/video/full-episode-mon-6-1-joel-mchale-jake-tapper-and-musical-guest-courtney-barnett?playlist=x;eyJ0eXBlIjoidGFnIiwiaWQiOjl9', 
  61                 'title': 'Full Episode - Mon. 6/1 - Joel McHale, Jake Tapper, And Musical Guest Courtney Barnett', 
  62                 'description': 'Guests: Joel McHale, Jake Tapper, And Musical Guest Courtney Barnett', 
  65                 'skip_download': True,  # m3u8 downloads 
  70         r
'"eVar42"\s*:\s*(\d+)', 
  71         r
'Ginger\.TeamCoco\.openInApp\("video",\s*"([^"]+)"', 
  72         r
'"id_not"\s*:\s*(\d+)' 
  75     def _real_extract(self
, url
): 
  76         mobj 
= re
.match(self
._VALID
_URL
, url
) 
  78         display_id 
= mobj
.group('display_id') 
  79         webpage
, urlh 
= self
._download
_webpage
_handle
(url
, display_id
) 
  80         if 'src=expired' in urlh
.geturl(): 
  81             raise ExtractorError('This video is expired.', expected
=True) 
  83         video_id 
= mobj
.group('video_id') 
  85             video_id 
= self
._html
_search
_regex
( 
  86                 self
._VIDEO
_ID
_REGEXES
, webpage
, 'video id') 
  90         preload_codes 
= self
._html
_search
_regex
( 
  91             r
'(function.+)setTimeout\(function\(\)\{playlist', 
  92             webpage
, 'preload codes') 
  93         base64_fragments 
= re
.findall(r
'"([a-zA-Z0-9+/=]+)"', preload_codes
) 
  94         base64_fragments
.remove('init') 
  96         def _check_sequence(cur_fragments
): 
  99             for i 
in range(len(cur_fragments
)): 
 100                 cur_sequence 
= (''.join(cur_fragments
[i
:] + cur_fragments
[:i
])).encode('ascii') 
 102                     raw_data 
= compat_b64decode(cur_sequence
) 
 103                     if compat_ord(raw_data
[0]) == compat_ord('{'): 
 104                         return json
.loads(raw_data
.decode('utf-8')) 
 105                 except (TypeError, binascii
.Error
, UnicodeDecodeError, ValueError): 
 109             for i 
in range(len(base64_fragments
) + 1): 
 110                 for j 
in range(i
, len(base64_fragments
) + 1): 
 111                     data 
= _check_sequence(base64_fragments
[:i
] + base64_fragments
[j
:]) 
 115         self
.to_screen('Try to compute possible data sequence. This may take some time.') 
 119             raise ExtractorError( 
 120                 'Preload information could not be extracted', expected
=True) 
 123         get_quality 
= qualities(['500k', '480p', '1000k', '720p', '1080p']) 
 124         for filed 
in data
['files']: 
 125             if determine_ext(filed
['url']) == 'm3u8': 
 126                 # compat_urllib_parse.urljoin does not work here 
 127                 if filed
['url'].startswith('/'): 
 128                     m3u8_url 
= 'http://ht.cdn.turner.com/tbs/big/teamcoco' + filed
['url'] 
 130                     m3u8_url 
= filed
['url'] 
 131                 m3u8_formats 
= self
._extract
_m
3u8_formats
( 
 132                     m3u8_url
, video_id
, ext
='mp4') 
 133                 for m3u8_format 
in m3u8_formats
: 
 134                     if m3u8_format 
not in formats
: 
 135                         formats
.append(m3u8_format
) 
 136             elif determine_ext(filed
['url']) == 'f4m': 
 137                 # TODO Correct f4m extraction 
 140                 if filed
['url'].startswith('/mp4:protected/'): 
 141                     # TODO Correct extraction for these files 
 143                 m_format 
= re
.search(r
'(\d+(k|p))\.mp4', filed
['url']) 
 144                 if m_format 
is not None: 
 145                     format_id 
= m_format
.group(1) 
 147                     format_id 
= filed
['bitrate'] 
 149                     int(filed
['bitrate']) 
 150                     if filed
['bitrate'].isdigit() 
 157                     'format_id': format_id
, 
 158                     'quality': get_quality(format_id
), 
 161         self
._sort
_formats
(formats
) 
 165             'display_id': display_id
, 
 167             'title': data
['title'], 
 168             'thumbnail': data
.get('thumb', {}).get('href'), 
 169             'description': data
.get('teaser'), 
 170             'duration': data
.get('duration'), 
 171             'age_limit': self
._family
_friendly
_search
(webpage
),