]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/teamcoco.py
   2 from __future__ 
import unicode_literals
 
   9 from .common 
import InfoExtractor
 
  15 from ..compat 
import compat_ord
 
  18 class TeamcocoIE(InfoExtractor
): 
  19     _VALID_URL 
= r
'https?://teamcoco\.com/video/(?P<video_id>[0-9]+)?/?(?P<display_id>.*)' 
  22             'url': 'http://teamcoco.com/video/80187/conan-becomes-a-mary-kay-beauty-consultant', 
  23             'md5': '3f7746aa0dc86de18df7539903d399ea', 
  27                 'title': 'Conan Becomes A Mary Kay Beauty Consultant', 
  28                 'description': 'Mary Kay is perhaps the most trusted name in female beauty, so of course Conan is a natural choice to sell their products.', 
  33             'url': 'http://teamcoco.com/video/louis-ck-interview-george-w-bush', 
  34             'md5': 'cde9ba0fa3506f5f017ce11ead928f9a', 
  38                 'description': 'Louis C.K. got starstruck by George W. Bush, so what? Part one.', 
  39                 'title': 'Louis C.K. Interview Pt. 1 11/3/11', 
  44             'url': 'http://teamcoco.com/video/timothy-olyphant-drinking-whiskey', 
  48                 'title': 'Timothy Olyphant Raises A Toast To “Justified”', 
  49                 'description': 'md5:15501f23f020e793aeca761205e42c24', 
  52                 'skip_download': True,  # m3u8 downloads 
  55             'url': 'http://teamcoco.com/video/full-episode-mon-6-1-joel-mchale-jake-tapper-and-musical-guest-courtney-barnett?playlist=x;eyJ0eXBlIjoidGFnIiwiaWQiOjl9', 
  59                 'title': 'Full Episode - Mon. 6/1 - Joel McHale, Jake Tapper, And Musical Guest Courtney Barnett', 
  60                 'description': 'Guests: Joel McHale, Jake Tapper, And Musical Guest Courtney Barnett', 
  63                 'skip_download': True,  # m3u8 downloads 
  68         r
'"eVar42"\s*:\s*(\d+)', 
  69         r
'Ginger\.TeamCoco\.openInApp\("video",\s*"([^"]+)"', 
  70         r
'"id_not"\s*:\s*(\d+)' 
  73     def _real_extract(self
, url
): 
  74         mobj 
= re
.match(self
._VALID
_URL
, url
) 
  76         display_id 
= mobj
.group('display_id') 
  77         webpage
, urlh 
= self
._download
_webpage
_handle
(url
, display_id
) 
  78         if 'src=expired' in urlh
.geturl(): 
  79             raise ExtractorError('This video is expired.', expected
=True) 
  81         video_id 
= mobj
.group('video_id') 
  83             video_id 
= self
._html
_search
_regex
( 
  84                 self
._VIDEO
_ID
_REGEXES
, webpage
, 'video id') 
  88         preload_codes 
= self
._html
_search
_regex
( 
  89             r
'(function.+)setTimeout\(function\(\)\{playlist', 
  90             webpage
, 'preload codes') 
  91         base64_fragments 
= re
.findall(r
'"([a-zA-Z0-9+/=]+)"', preload_codes
) 
  92         base64_fragments
.remove('init') 
  94         def _check_sequence(cur_fragments
): 
  97             for i 
in range(len(cur_fragments
)): 
  98                 cur_sequence 
= (''.join(cur_fragments
[i
:] + cur_fragments
[:i
])).encode('ascii') 
 100                     raw_data 
= base64
.b64decode(cur_sequence
) 
 101                     if compat_ord(raw_data
[0]) == compat_ord('{'): 
 102                         return json
.loads(raw_data
.decode('utf-8')) 
 103                 except (TypeError, binascii
.Error
, UnicodeDecodeError, ValueError): 
 107             for i 
in range(len(base64_fragments
) + 1): 
 108                 for j 
in range(i
, len(base64_fragments
) + 1): 
 109                     data 
= _check_sequence(base64_fragments
[:i
] + base64_fragments
[j
:]) 
 113         self
.to_screen('Try to compute possible data sequence. This may take some time.') 
 117             raise ExtractorError( 
 118                 'Preload information could not be extracted', expected
=True) 
 121         get_quality 
= qualities(['500k', '480p', '1000k', '720p', '1080p']) 
 122         for filed 
in data
['files']: 
 123             if determine_ext(filed
['url']) == 'm3u8': 
 124                 # compat_urllib_parse.urljoin does not work here 
 125                 if filed
['url'].startswith('/'): 
 126                     m3u8_url 
= 'http://ht.cdn.turner.com/tbs/big/teamcoco' + filed
['url'] 
 128                     m3u8_url 
= filed
['url'] 
 129                 m3u8_formats 
= self
._extract
_m
3u8_formats
( 
 130                     m3u8_url
, video_id
, ext
='mp4') 
 131                 for m3u8_format 
in m3u8_formats
: 
 132                     if m3u8_format 
not in formats
: 
 133                         formats
.append(m3u8_format
) 
 134             elif determine_ext(filed
['url']) == 'f4m': 
 135                 # TODO Correct f4m extraction 
 138                 if filed
['url'].startswith('/mp4:protected/'): 
 139                     # TODO Correct extraction for these files 
 141                 m_format 
= re
.search(r
'(\d+(k|p))\.mp4', filed
['url']) 
 142                 if m_format 
is not None: 
 143                     format_id 
= m_format
.group(1) 
 145                     format_id 
= filed
['bitrate'] 
 147                     int(filed
['bitrate']) 
 148                     if filed
['bitrate'].isdigit() 
 155                     'format_id': format_id
, 
 156                     'quality': get_quality(format_id
), 
 159         self
._sort
_formats
(formats
) 
 163             'display_id': display_id
, 
 165             'title': data
['title'], 
 166             'thumbnail': data
.get('thumb', {}).get('href'), 
 167             'description': data
.get('teaser'), 
 168             'duration': data
.get('duration'), 
 169             'age_limit': self
._family
_friendly
_search
(webpage
),