]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/heise.py
8f49f52efd5398abbc7b922b9e2a268b3f609e2a
   2 from __future__ 
import unicode_literals
 
   4 from .common 
import InfoExtractor
 
   5 from .kaltura 
import KalturaIE
 
   6 from .youtube 
import YoutubeIE
 
  16 class HeiseIE(InfoExtractor
): 
  17     _VALID_URL 
= r
'https?://(?:www\.)?heise\.de/(?:[^/]+/)+[^/]+-(?P<id>[0-9]+)\.html' 
  19         'url': 'http://www.heise.de/video/artikel/Podcast-c-t-uplink-3-3-Owncloud-Tastaturen-Peilsender-Smartphone-2404147.html', 
  20         'md5': 'ffed432483e922e88545ad9f2f15d30e', 
  24             'title': "Podcast: c't uplink 3.3 – Owncloud / Tastaturen / Peilsender Smartphone", 
  25             'format_id': 'mp4_720p', 
  26             'timestamp': 1411812600, 
  27             'upload_date': '20140927', 
  28             'description': 'md5:c934cbfb326c669c2bcabcbe3d3fcd20', 
  29             'thumbnail': r
're:^https?://.*/gallery/$', 
  33         'url': 'http://www.heise.de/newsticker/meldung/Netflix-In-20-Jahren-vom-Videoverleih-zum-TV-Revolutionaer-3814130.html', 
  34         'md5': 'e403d2b43fea8e405e88e3f8623909f1', 
  38             'title': 'NEU IM SEPTEMBER | Netflix', 
  39             'description': 'md5:2131f3c7525e540d5fd841de938bd452', 
  40             'upload_date': '20170830', 
  41             'uploader': 'Netflix Deutschland, Österreich und Schweiz', 
  42             'uploader_id': 'netflixdach', 
  45             'skip_download': True, 
  48         'url': 'https://www.heise.de/video/artikel/nachgehakt-Wie-sichert-das-c-t-Tool-Restric-tor-Windows-10-ab-3700244.html', 
  49         'md5': '4b58058b46625bdbd841fc2804df95fc', 
  52             'timestamp': 1512470717, 
  53             'upload_date': '20171205', 
  55             'title': 'ct10 nachgehakt hos restrictor', 
  58             'skip_download': True, 
  61         'url': 'http://www.heise.de/ct/artikel/c-t-uplink-3-3-Owncloud-Tastaturen-Peilsender-Smartphone-2403911.html', 
  62         'only_matching': True, 
  64         'url': 'http://www.heise.de/newsticker/meldung/c-t-uplink-Owncloud-Tastaturen-Peilsender-Smartphone-2404251.html?wt_mc=rss.ho.beitrag.atom', 
  65         'only_matching': True, 
  67         'url': 'http://www.heise.de/ct/ausgabe/2016-12-Spiele-3214137.html', 
  68         'only_matching': True, 
  71     def _real_extract(self
, url
): 
  72         video_id 
= self
._match
_id
(url
) 
  73         webpage 
= self
._download
_webpage
(url
, video_id
) 
  75         title 
= self
._html
_search
_meta
('fulltitle', webpage
, default
=None) 
  76         if not title 
or title 
== "c't": 
  77             title 
= self
._search
_regex
( 
  78                 r
'<div[^>]+class="videoplayerjw"[^>]+data-title="([^"]+)"', 
  81         yt_urls 
= YoutubeIE
._extract
_urls
(webpage
) 
  83             return self
.playlist_from_matches(yt_urls
, video_id
, title
, ie
=YoutubeIE
.ie_key()) 
  85         kaltura_url 
= KalturaIE
._extract
_url
(webpage
) 
  87             return self
.url_result(smuggle_url(kaltura_url
, {'source_url': url
}), KalturaIE
.ie_key()) 
  89         container_id 
= self
._search
_regex
( 
  90             r
'<div class="videoplayerjw"[^>]+data-container="([0-9]+)"', 
  91             webpage
, 'container ID') 
  93         sequenz_id 
= self
._search
_regex
( 
  94             r
'<div class="videoplayerjw"[^>]+data-sequenz="([0-9]+)"', 
  95             webpage
, 'sequenz ID') 
  97         doc 
= self
._download
_xml
( 
  98             'http://www.heise.de/videout/feed', video_id
, query
={ 
  99                 'container': container_id
, 
 100                 'sequenz': sequenz_id
, 
 104         for source_node 
in doc
.findall('.//{http://rss.jwpcdn.com/}source'): 
 105             label 
= source_node
.attrib
['label'] 
 106             height 
= int_or_none(self
._search
_regex
( 
 107                 r
'^(.*?_)?([0-9]+)p$', label
, 'height', default
=None)) 
 108             video_url 
= source_node
.attrib
['file'] 
 109             ext 
= determine_ext(video_url
, '') 
 112                 'format_note': label
, 
 113                 'format_id': '%s_%s' % (ext
, label
), 
 116         self
._sort
_formats
(formats
) 
 118         description 
= self
._og
_search
_description
( 
 119             webpage
, default
=None) or self
._html
_search
_meta
( 
 120             'description', webpage
) 
 125             'description': description
, 
 126             'thumbnail': (xpath_text(doc
, './/{http://rss.jwpcdn.com/}image') or 
 127                           self
._og
_search
_thumbnail
(webpage
)), 
 128             'timestamp': parse_iso8601( 
 129                 self
._html
_search
_meta
('date', webpage
)),