]> Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/adobetv.py
debian/copyright: Fix lintian's dep5-copyright-license-name-not-unique.
[youtubedl] / youtube_dl / extractor / adobetv.py
1 from __future__ import unicode_literals
2
3 from .common import InfoExtractor
4 from ..utils import (
5 parse_duration,
6 unified_strdate,
7 str_to_int,
8 )
9
10
11 class AdobeTVIE(InfoExtractor):
12 _VALID_URL = r'https?://tv\.adobe\.com/watch/[^/]+/(?P<id>[^/]+)'
13
14 _TEST = {
15 'url': 'http://tv.adobe.com/watch/the-complete-picture-with-julieanne-kost/quick-tip-how-to-draw-a-circle-around-an-object-in-photoshop/',
16 'md5': '9bc5727bcdd55251f35ad311ca74fa1e',
17 'info_dict': {
18 'id': 'quick-tip-how-to-draw-a-circle-around-an-object-in-photoshop',
19 'ext': 'mp4',
20 'title': 'Quick Tip - How to Draw a Circle Around an Object in Photoshop',
21 'description': 'md5:99ec318dc909d7ba2a1f2b038f7d2311',
22 'thumbnail': 're:https?://.*\.jpg$',
23 'upload_date': '20110914',
24 'duration': 60,
25 'view_count': int,
26 },
27 }
28
29 def _real_extract(self, url):
30 video_id = self._match_id(url)
31 webpage = self._download_webpage(url, video_id)
32
33 player = self._parse_json(
34 self._search_regex(r'html5player:\s*({.+?})\s*\n', webpage, 'player'),
35 video_id)
36
37 title = player.get('title') or self._search_regex(
38 r'data-title="([^"]+)"', webpage, 'title')
39 description = self._og_search_description(webpage)
40 thumbnail = self._og_search_thumbnail(webpage)
41
42 upload_date = unified_strdate(
43 self._html_search_meta('datepublished', webpage, 'upload date'))
44
45 duration = parse_duration(
46 self._html_search_meta('duration', webpage, 'duration') or
47 self._search_regex(
48 r'Runtime:\s*(\d{2}:\d{2}:\d{2})',
49 webpage, 'duration', fatal=False))
50
51 view_count = str_to_int(self._search_regex(
52 r'<div class="views">\s*Views?:\s*([\d,.]+)\s*</div>',
53 webpage, 'view count'))
54
55 formats = [{
56 'url': source['src'],
57 'format_id': source.get('quality') or source['src'].split('-')[-1].split('.')[0] or None,
58 'tbr': source.get('bitrate'),
59 } for source in player['sources']]
60 self._sort_formats(formats)
61
62 return {
63 'id': video_id,
64 'title': title,
65 'description': description,
66 'thumbnail': thumbnail,
67 'upload_date': upload_date,
68 'duration': duration,
69 'view_count': view_count,
70 'formats': formats,
71 }