]> Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/mgtv.py
debian/control: Remove trailing whitespace at EOF.
[youtubedl] / youtube_dl / extractor / mgtv.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 from .common import InfoExtractor
5 from ..compat import compat_str
6 from ..utils import int_or_none
7
8
9 class MGTVIE(InfoExtractor):
10 _VALID_URL = r'https?://(?:www\.)?mgtv\.com/(v|b)/(?:[^/]+/)*(?P<id>\d+)\.html'
11 IE_DESC = '芒果TV'
12
13 _TESTS = [{
14 'url': 'http://www.mgtv.com/v/1/290525/f/3116640.html',
15 'md5': 'b1ffc0fc163152acf6beaa81832c9ee7',
16 'info_dict': {
17 'id': '3116640',
18 'ext': 'mp4',
19 'title': '我是歌手第四季双年巅峰会:韩红李玟“双王”领军对抗',
20 'description': '我是歌手第四季双年巅峰会',
21 'duration': 7461,
22 'thumbnail': r're:^https?://.*\.jpg$',
23 },
24 }, {
25 'url': 'http://www.mgtv.com/b/301817/3826653.html',
26 'only_matching': True,
27 }]
28
29 def _real_extract(self, url):
30 video_id = self._match_id(url)
31 api_data = self._download_json(
32 'http://pcweb.api.mgtv.com/player/video', video_id,
33 query={'video_id': video_id},
34 headers=self.geo_verification_headers())['data']
35 info = api_data['info']
36 title = info['title'].strip()
37 stream_domain = api_data['stream_domain'][0]
38
39 formats = []
40 for idx, stream in enumerate(api_data['stream']):
41 stream_path = stream.get('url')
42 if not stream_path:
43 continue
44 format_data = self._download_json(
45 stream_domain + stream_path, video_id,
46 note='Download video info for format #%d' % idx)
47 format_url = format_data.get('info')
48 if not format_url:
49 continue
50 tbr = int_or_none(self._search_regex(
51 r'_(\d+)_mp4/', format_url, 'tbr', default=None))
52 formats.append({
53 'format_id': compat_str(tbr or idx),
54 'url': format_url,
55 'ext': 'mp4',
56 'tbr': tbr,
57 'protocol': 'm3u8_native',
58 })
59 self._sort_formats(formats)
60
61 return {
62 'id': video_id,
63 'title': title,
64 'formats': formats,
65 'description': info.get('desc'),
66 'duration': int_or_none(info.get('duration')),
67 'thumbnail': info.get('thumb'),
68 }