]> Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/ruv.py
debian/control: Verify compliance with Policy 4.1.4 (no changes needed).
[youtubedl] / youtube_dl / extractor / ruv.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 from .common import InfoExtractor
5 from ..utils import (
6 determine_ext,
7 unified_timestamp,
8 )
9
10
11 class RuvIE(InfoExtractor):
12 _VALID_URL = r'https?://(?:www\.)?ruv\.is/(?:sarpurinn/[^/]+|node)/(?P<id>[^/]+(?:/\d+)?)'
13 _TESTS = [{
14 # m3u8
15 'url': 'http://ruv.is/sarpurinn/ruv-aukaras/fh-valur/20170516',
16 'md5': '66347652f4e13e71936817102acc1724',
17 'info_dict': {
18 'id': '1144499',
19 'display_id': 'fh-valur/20170516',
20 'ext': 'mp4',
21 'title': 'FH - Valur',
22 'description': 'Bein útsending frá 3. leik FH og Vals í úrslitum Olísdeildar karla í handbolta.',
23 'timestamp': 1494963600,
24 'upload_date': '20170516',
25 },
26 }, {
27 # mp3
28 'url': 'http://ruv.is/sarpurinn/ras-2/morgunutvarpid/20170619',
29 'md5': '395ea250c8a13e5fdb39d4670ef85378',
30 'info_dict': {
31 'id': '1153630',
32 'display_id': 'morgunutvarpid/20170619',
33 'ext': 'mp3',
34 'title': 'Morgunútvarpið',
35 'description': 'md5:a4cf1202c0a1645ca096b06525915418',
36 'timestamp': 1497855000,
37 'upload_date': '20170619',
38 },
39 }, {
40 'url': 'http://ruv.is/sarpurinn/ruv/frettir/20170614',
41 'only_matching': True,
42 }, {
43 'url': 'http://www.ruv.is/node/1151854',
44 'only_matching': True,
45 }, {
46 'url': 'http://ruv.is/sarpurinn/klippa/secret-soltice-hefst-a-morgun',
47 'only_matching': True,
48 }, {
49 'url': 'http://ruv.is/sarpurinn/ras-1/morgunvaktin/20170619',
50 'only_matching': True,
51 }]
52
53 def _real_extract(self, url):
54 display_id = self._match_id(url)
55
56 webpage = self._download_webpage(url, display_id)
57
58 title = self._og_search_title(webpage)
59
60 FIELD_RE = r'video\.%s\s*=\s*(["\'])(?P<url>(?:(?!\1).)+)\1'
61
62 media_url = self._html_search_regex(
63 FIELD_RE % 'src', webpage, 'video URL', group='url')
64
65 video_id = self._search_regex(
66 r'<link\b[^>]+\bhref=["\']https?://www\.ruv\.is/node/(\d+)',
67 webpage, 'video id', default=display_id)
68
69 ext = determine_ext(media_url)
70
71 if ext == 'm3u8':
72 formats = self._extract_m3u8_formats(
73 media_url, video_id, 'mp4', entry_protocol='m3u8_native',
74 m3u8_id='hls')
75 elif ext == 'mp3':
76 formats = [{
77 'format_id': 'mp3',
78 'url': media_url,
79 'vcodec': 'none',
80 }]
81 else:
82 formats = [{
83 'url': media_url,
84 }]
85
86 description = self._og_search_description(webpage, default=None)
87 thumbnail = self._og_search_thumbnail(
88 webpage, default=None) or self._search_regex(
89 FIELD_RE % 'poster', webpage, 'thumbnail', fatal=False)
90 timestamp = unified_timestamp(self._html_search_meta(
91 'article:published_time', webpage, 'timestamp', fatal=False))
92
93 return {
94 'id': video_id,
95 'display_id': display_id,
96 'title': title,
97 'description': description,
98 'thumbnail': thumbnail,
99 'timestamp': timestamp,
100 'formats': formats,
101 }