]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/tagesschau.py
   1 # -*- coding: utf-8 -*- 
   2 from __future__ 
import unicode_literals
 
   6 from .common 
import InfoExtractor
 
   7 from ..utils 
import parse_filesize
 
  10 class TagesschauIE(InfoExtractor
): 
  11     _VALID_URL 
= r
'https?://(?:www\.)?tagesschau\.de/multimedia/(?:[^/]+/)*?[^/#?]+?(?P<id>-?[0-9]+)(?:~_[^/#?]+?)?\.html' 
  14         'url': 'http://www.tagesschau.de/multimedia/video/video-102143.html', 
  15         'md5': '917a228bc7df7850783bc47979673a09', 
  19             'title': 'Regierungsumbildung in Athen: Neue Minister in Griechenland vereidigt', 
  20             'description': 'md5:171feccd9d9b3dd54d05d501568f6359', 
  21             'thumbnail': 're:^https?:.*\.jpg$', 
  24         'url': 'http://www.tagesschau.de/multimedia/sendung/ts-5727.html', 
  25         'md5': '3c54c1f6243d279b706bde660ceec633', 
  29             'description': 'md5:695c01bfd98b7e313c501386327aea59', 
  30             'title': 'Sendung: tagesschau \t04.12.2014 20:00 Uhr', 
  31             'thumbnail': 're:^https?:.*\.jpg$', 
  34         'url': 'http://www.tagesschau.de/multimedia/politikimradio/audio-18407.html', 
  35         'md5': 'aef45de271c4bf0a5db834aa40bf774c', 
  39             'title': 'Flüchtlingsdebatte: Hitzig, aber wenig hilfreich', 
  40             'description': 'Flüchtlingsdebatte: Hitzig, aber wenig hilfreich', 
  41             'thumbnail': 're:^https?:.*\.jpg$', 
  44         'url': 'http://www.tagesschau.de/multimedia/sendung/tsg-3771.html', 
  45         'only_matching': True, 
  47         'url': 'http://www.tagesschau.de/multimedia/sendung/tt-3827.html', 
  48         'only_matching': True, 
  50         'url': 'http://www.tagesschau.de/multimedia/sendung/nm-3475.html', 
  51         'only_matching': True, 
  53         'url': 'http://www.tagesschau.de/multimedia/sendung/weltspiegel-3167.html', 
  54         'only_matching': True, 
  56         'url': 'http://www.tagesschau.de/multimedia/tsvorzwanzig-959.html', 
  57         'only_matching': True, 
  59         'url': 'http://www.tagesschau.de/multimedia/sendung/bab/bab-3299~_bab-sendung-209.html', 
  60         'only_matching': True, 
  62         'url': 'http://www.tagesschau.de/multimedia/video/video-102303~_bab-sendung-211.html', 
  63         'only_matching': True, 
  67         's': {'width': 256, 'height': 144, 'quality': 1}, 
  68         'm': {'width': 512, 'height': 288, 'quality': 2}, 
  69         'l': {'width': 960, 'height': 544, 'quality': 3}, 
  72     def _real_extract(self
, url
): 
  73         video_id 
= self
._match
_id
(url
) 
  74         display_id 
= video_id
.lstrip('-') 
  75         webpage 
= self
._download
_webpage
(url
, display_id
) 
  77         player_url 
= self
._html
_search
_meta
( 
  78             'twitter:player', webpage
, 'player URL', default
=None) 
  80             playerpage 
= self
._download
_webpage
( 
  81                 player_url
, display_id
, 'Downloading player page') 
  84             for media 
in re
.finditer( 
  86                         (?P<q_url>["\'])(?P
<url
>http
://media
.+?
)(?P
=q_url
) 
  87                         ,\s
*type:(?P
<q_type
>["\'])(?P<type>video|audio)/(?P<ext>.+?)(?P=q_type) 
  88                         (?:,\s*quality:(?P<q_quality>["\'])(?P
<quality
>.+?
)(?P
=q_quality
))?
 
  90                 url = media.group('url') 
  91                 type_ = media.group('type') 
  92                 ext = media.group('ext') 
  93                 res = media.group('quality') 
  95                     'format_id': '%s_%s' % (res, ext) if res else ext, 
  98                     'vcodec': 'none' if type_ == 'audio' else None, 
 100                 f.update(self._FORMATS.get(res, {})) 
 102             thumbnail = self._og_search_thumbnail(playerpage) 
 103             title = self._og_search_title(webpage).strip() 
 104             description = self._og_search_description(webpage).strip() 
 106             download_text = self._search_regex( 
 107                 r'(?s)<p>Wir bieten dieses Video in folgenden Formaten zum Download an:</p>\s*<div class="controls">(.*?)</div>\s*<p>', 
 108                 webpage, 'download links') 
 110                 r'<div class="button" title="(?P<title>[^"]*)"><a href="(?P<url>[^"]+)">(?P<name>.+?)</a></div>', 
 114                 format_id = self._search_regex( 
 115                     r'.*/[^/.]+\.([^/]+)\.[^/.]+', l.group('url'), 'format ID') 
 117                     'format_id': format_id, 
 118                     'url': l.group('url'), 
 119                     'format_name': l.group('name'), 
 123                         Video
:\s
*(?P
<vcodec
>[a
-zA
-Z0
-9/._-]+)\s
*&\
#10; 
 124                         (?P
<width
>[0-9]+)x(?P
<height
>[0-9]+)px
&\
#10; 
 125                         (?P
<vbr
>[0-9]+)kbps
&\
#10; 
 126                         Audio
:\s
*(?P
<abr
>[0-9]+)kbps
,\s
*(?P
<audio_desc
>[A
-Za
-z\
.0-9]+)&\
#10; 
 127                         Gr
ö
;ß
;e
:\s
*(?P
<filesize_approx
>[0-9.,]+\s
+[a
-zA
-Z
]*B
)''', 
 131                         'format_note': m.group('audio_desc'), 
 132                         'vcodec': m.group('vcodec'), 
 133                         'width': int(m.group('width')), 
 134                         'height': int(m.group('height')), 
 135                         'abr': int(m.group('abr')), 
 136                         'vbr': int(m.group('vbr')), 
 137                         'filesize_approx': parse_filesize(m.group('filesize_approx')), 
 139                 formats.append(format) 
 140             thumbnail = self._og_search_thumbnail(webpage) 
 141             description = self._html_search_regex( 
 142                 r'(?s)<p class="teasertext">(.*?)</p>', 
 143                 webpage, 'description', default=None) 
 144             title = self._html_search_regex( 
 145                 r'<span class="headline".*?>(.*?)</span>', webpage, 'title') 
 147         self._sort_formats(formats) 
 152             'thumbnail': thumbnail, 
 154             'description': description,