]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/teamcoco.py
1 from __future__
import unicode_literals
6 from .common
import InfoExtractor
7 from ..utils
import qualities
10 class TeamcocoIE(InfoExtractor
):
11 _VALID_URL
= r
'http://teamcoco\.com/video/(?P<video_id>[0-9]+)?/?(?P<display_id>.*)'
14 'url': 'http://teamcoco.com/video/80187/conan-becomes-a-mary-kay-beauty-consultant',
15 'md5': '3f7746aa0dc86de18df7539903d399ea',
19 'title': 'Conan Becomes A Mary Kay Beauty Consultant',
20 'description': 'Mary Kay is perhaps the most trusted name in female beauty, so of course Conan is a natural choice to sell their products.',
24 'url': 'http://teamcoco.com/video/louis-ck-interview-george-w-bush',
25 'md5': 'cde9ba0fa3506f5f017ce11ead928f9a',
29 'description': 'Louis C.K. got starstruck by George W. Bush, so what? Part one.',
30 'title': 'Louis C.K. Interview Pt. 1 11/3/11',
36 r
'"eVar42"\s*:\s*(\d+)',
37 r
'Ginger\.TeamCoco\.openInApp\("video",\s*"([^"]+)"',
38 r
'"id_not"\s*:\s*(\d+)'
41 def _real_extract(self
, url
):
42 mobj
= re
.match(self
._VALID
_URL
, url
)
44 display_id
= mobj
.group('display_id')
45 webpage
= self
._download
_webpage
(url
, display_id
)
47 video_id
= mobj
.group('video_id')
49 video_id
= self
._html
_search
_regex
(
50 self
._VIDEO
_ID
_REGEXES
, webpage
, 'video id')
52 embed_url
= 'http://teamcoco.com/embed/v/%s' % video_id
53 embed
= self
._download
_webpage
(
54 embed_url
, video_id
, 'Downloading embed page')
56 encoded_data
= self
._search
_regex
(
57 r
'"preload"\s*:\s*"([^"]+)"', embed
, 'encoded data')
58 data
= self
._parse
_json
(
59 base64
.b64decode(encoded_data
.encode('ascii')).decode('utf-8'), video_id
)
62 get_quality
= qualities(['500k', '480p', '1000k', '720p', '1080p'])
63 for filed
in data
['files']:
64 m_format
= re
.search(r
'(\d+(k|p))\.mp4', filed
['url'])
65 if m_format
is not None:
66 format_id
= m_format
.group(1)
68 format_id
= filed
['bitrate']
71 if filed
['bitrate'].isdigit()
78 'format_id': format_id
,
79 'quality': get_quality(format_id
),
82 self
._sort
_formats
(formats
)
86 'display_id': display_id
,
88 'title': data
['title'],
89 'thumbnail': data
.get('thumb', {}).get('href'),
90 'description': data
.get('teaser'),
91 'age_limit': self
._family
_friendly
_search
(webpage
),