]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/heise.py
82e11a7d88735f2105d0dff70c4304ecbf274ff0
   2 from __future__ 
import unicode_literals
 
   4 from .common 
import InfoExtractor
 
   5 from .youtube 
import YoutubeIE
 
  14 class HeiseIE(InfoExtractor
): 
  15     _VALID_URL 
= r
'https?://(?:www\.)?heise\.de/(?:[^/]+/)+[^/]+-(?P<id>[0-9]+)\.html' 
  17         'url': 'http://www.heise.de/video/artikel/Podcast-c-t-uplink-3-3-Owncloud-Tastaturen-Peilsender-Smartphone-2404147.html', 
  18         'md5': 'ffed432483e922e88545ad9f2f15d30e', 
  22             'title': "Podcast: c't uplink 3.3 – Owncloud / Tastaturen / Peilsender Smartphone", 
  23             'format_id': 'mp4_720p', 
  24             'timestamp': 1411812600, 
  25             'upload_date': '20140927', 
  26             'description': 'md5:c934cbfb326c669c2bcabcbe3d3fcd20', 
  27             'thumbnail': r
're:^https?://.*/gallery/$', 
  31         'url': 'http://www.heise.de/newsticker/meldung/Netflix-In-20-Jahren-vom-Videoverleih-zum-TV-Revolutionaer-3814130.html', 
  32         'md5': 'e403d2b43fea8e405e88e3f8623909f1', 
  36             'title': 'NEU IM SEPTEMBER | Netflix', 
  37             'description': 'md5:2131f3c7525e540d5fd841de938bd452', 
  38             'upload_date': '20170830', 
  39             'uploader': 'Netflix Deutschland, Österreich und Schweiz', 
  40             'uploader_id': 'netflixdach', 
  43             'skip_download': True, 
  46         'url': 'http://www.heise.de/ct/artikel/c-t-uplink-3-3-Owncloud-Tastaturen-Peilsender-Smartphone-2403911.html', 
  47         'only_matching': True, 
  49         'url': 'http://www.heise.de/newsticker/meldung/c-t-uplink-Owncloud-Tastaturen-Peilsender-Smartphone-2404251.html?wt_mc=rss.ho.beitrag.atom', 
  50         'only_matching': True, 
  52         'url': 'http://www.heise.de/ct/ausgabe/2016-12-Spiele-3214137.html', 
  53         'only_matching': True, 
  56     def _real_extract(self
, url
): 
  57         video_id 
= self
._match
_id
(url
) 
  58         webpage 
= self
._download
_webpage
(url
, video_id
) 
  60         title 
= self
._html
_search
_meta
('fulltitle', webpage
, default
=None) 
  61         if not title 
or title 
== "c't": 
  62             title 
= self
._search
_regex
( 
  63                 r
'<div[^>]+class="videoplayerjw"[^>]+data-title="([^"]+)"', 
  66         yt_urls 
= YoutubeIE
._extract
_urls
(webpage
) 
  68             return self
.playlist_from_matches(yt_urls
, video_id
, title
, ie
=YoutubeIE
.ie_key()) 
  70         container_id 
= self
._search
_regex
( 
  71             r
'<div class="videoplayerjw"[^>]+data-container="([0-9]+)"', 
  72             webpage
, 'container ID') 
  73         sequenz_id 
= self
._search
_regex
( 
  74             r
'<div class="videoplayerjw"[^>]+data-sequenz="([0-9]+)"', 
  75             webpage
, 'sequenz ID') 
  77         doc 
= self
._download
_xml
( 
  78             'http://www.heise.de/videout/feed', video_id
, query
={ 
  79                 'container': container_id
, 
  80                 'sequenz': sequenz_id
, 
  84         for source_node 
in doc
.findall('.//{http://rss.jwpcdn.com/}source'): 
  85             label 
= source_node
.attrib
['label'] 
  86             height 
= int_or_none(self
._search
_regex
( 
  87                 r
'^(.*?_)?([0-9]+)p$', label
, 'height', default
=None)) 
  88             video_url 
= source_node
.attrib
['file'] 
  89             ext 
= determine_ext(video_url
, '') 
  93                 'format_id': '%s_%s' % (ext
, label
), 
  96         self
._sort
_formats
(formats
) 
  98         description 
= self
._og
_search
_description
( 
  99             webpage
, default
=None) or self
._html
_search
_meta
( 
 100             'description', webpage
) 
 105             'description': description
, 
 106             'thumbnail': (xpath_text(doc
, './/{http://rss.jwpcdn.com/}image') or 
 107                           self
._og
_search
_thumbnail
(webpage
)), 
 108             'timestamp': parse_iso8601( 
 109                 self
._html
_search
_meta
('date', webpage
)),