]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/tagesschau.py
1 # -*- coding: utf-8 -*-
2 from __future__
import unicode_literals
6 from .common
import InfoExtractor
7 from ..utils
import parse_filesize
10 class TagesschauIE(InfoExtractor
):
11 _VALID_URL
= r
'https?://(?:www\.)?tagesschau\.de/multimedia/(?:[^/]+/)*?[^/#?]+?(?P<id>-?[0-9]+)(?:~_[^/#?]+?)?\.html'
14 'url': 'http://www.tagesschau.de/multimedia/video/video-102143.html',
15 'md5': '917a228bc7df7850783bc47979673a09',
19 'title': 'Regierungsumbildung in Athen: Neue Minister in Griechenland vereidigt',
20 'description': 'md5:171feccd9d9b3dd54d05d501568f6359',
21 'thumbnail': 're:^https?:.*\.jpg$',
24 'url': 'http://www.tagesschau.de/multimedia/sendung/ts-5727.html',
25 'md5': '3c54c1f6243d279b706bde660ceec633',
29 'description': 'md5:695c01bfd98b7e313c501386327aea59',
30 'title': 'Sendung: tagesschau \t04.12.2014 20:00 Uhr',
31 'thumbnail': 're:^https?:.*\.jpg$',
34 'url': 'http://www.tagesschau.de/multimedia/politikimradio/audio-18407.html',
35 'md5': 'aef45de271c4bf0a5db834aa40bf774c',
39 'title': 'Flüchtlingsdebatte: Hitzig, aber wenig hilfreich',
40 'description': 'Flüchtlingsdebatte: Hitzig, aber wenig hilfreich',
41 'thumbnail': 're:^https?:.*\.jpg$',
44 'url': 'http://www.tagesschau.de/multimedia/sendung/tsg-3771.html',
45 'only_matching': True,
47 'url': 'http://www.tagesschau.de/multimedia/sendung/tt-3827.html',
48 'only_matching': True,
50 'url': 'http://www.tagesschau.de/multimedia/sendung/nm-3475.html',
51 'only_matching': True,
53 'url': 'http://www.tagesschau.de/multimedia/sendung/weltspiegel-3167.html',
54 'only_matching': True,
56 'url': 'http://www.tagesschau.de/multimedia/tsvorzwanzig-959.html',
57 'only_matching': True,
59 'url': 'http://www.tagesschau.de/multimedia/sendung/bab/bab-3299~_bab-sendung-209.html',
60 'only_matching': True,
62 'url': 'http://www.tagesschau.de/multimedia/video/video-102303~_bab-sendung-211.html',
63 'only_matching': True,
67 's': {'width': 256, 'height': 144, 'quality': 1},
68 'm': {'width': 512, 'height': 288, 'quality': 2},
69 'l': {'width': 960, 'height': 544, 'quality': 3},
72 def _real_extract(self
, url
):
73 video_id
= self
._match
_id
(url
)
74 display_id
= video_id
.lstrip('-')
75 webpage
= self
._download
_webpage
(url
, display_id
)
77 player_url
= self
._html
_search
_meta
(
78 'twitter:player', webpage
, 'player URL', default
=None)
80 playerpage
= self
._download
_webpage
(
81 player_url
, display_id
, 'Downloading player page')
84 for media
in re
.finditer(
86 (?P<q_url>["\'])(?P
<url
>http
://media
.+?
)(?P
=q_url
)
87 ,\s
*type:(?P
<q_type
>["\'])(?P<type>video|audio)/(?P<ext>.+?)(?P=q_type)
88 (?:,\s*quality:(?P<q_quality>["\'])(?P
<quality
>.+?
)(?P
=q_quality
))?
90 url = media.group('url')
91 type_ = media.group('type')
92 ext = media.group('ext')
93 res = media.group('quality')
95 'format_id': '%s_%s' % (res, ext) if res else ext,
98 'vcodec': 'none' if type_ == 'audio' else None,
100 f.update(self._FORMATS.get(res, {}))
102 thumbnail = self._og_search_thumbnail(playerpage)
103 title = self._og_search_title(webpage).strip()
104 description = self._og_search_description(webpage).strip()
106 download_text = self._search_regex(
107 r'(?s)<p>Wir bieten dieses Video in folgenden Formaten zum Download an:</p>\s*<div class="controls">(.*?)</div>\s*<p>',
108 webpage, 'download links')
110 r'<div class="button" title="(?P<title>[^"]*)"><a href="(?P<url>[^"]+)">(?P<name>.+?)</a></div>',
114 format_id = self._search_regex(
115 r'.*/[^/.]+\.([^/]+)\.[^/.]+', l.group('url'), 'format ID')
117 'format_id': format_id,
118 'url': l.group('url'),
119 'format_name': l.group('name'),
123 Video
:\s
*(?P
<vcodec
>[a
-zA
-Z0
-9/._-]+)\s
*&\
#10;
124 (?P
<width
>[0-9]+)x(?P
<height
>[0-9]+)px
&\
#10;
125 (?P
<vbr
>[0-9]+)kbps
&\
#10;
126 Audio
:\s
*(?P
<abr
>[0-9]+)kbps
,\s
*(?P
<audio_desc
>[A
-Za
-z\
.0-9]+)&\
#10;
127 Gr
ö
;ß
;e
:\s
*(?P
<filesize_approx
>[0-9.,]+\s
+[a
-zA
-Z
]*B
)''',
131 'format_note': m.group('audio_desc'),
132 'vcodec': m.group('vcodec'),
133 'width': int(m.group('width')),
134 'height': int(m.group('height')),
135 'abr': int(m.group('abr')),
136 'vbr': int(m.group('vbr')),
137 'filesize_approx': parse_filesize(m.group('filesize_approx')),
139 formats.append(format)
140 thumbnail = self._og_search_thumbnail(webpage)
141 description = self._html_search_regex(
142 r'(?s)<p class="teasertext">(.*?)</p>',
143 webpage, 'description', default=None)
144 title = self._html_search_regex(
145 r'<span class="headline".*?>(.*?)</span>', webpage, 'title')
147 self._sort_formats(formats)
152 'thumbnail': thumbnail,
154 'description': description,