]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/faz.py
   2 from __future__ 
import unicode_literals
 
   4 from .common 
import InfoExtractor
 
  12 class FazIE(InfoExtractor
): 
  14     _VALID_URL 
= r
'https?://(?:www\.)?faz\.net/(?:[^/]+/)*.*?-(?P<id>\d+)\.html' 
  17         'url': 'http://www.faz.net/multimedia/videos/stockholm-chemie-nobelpreis-fuer-drei-amerikanische-forscher-12610585.html', 
  21             'title': 'Stockholm: Chemie-Nobelpreis für drei amerikanische Forscher', 
  22             'description': 'md5:1453fbf9a0d041d985a47306192ea253', 
  25         'url': 'http://www.faz.net/aktuell/politik/berlin-gabriel-besteht-zerreissprobe-ueber-datenspeicherung-13659345.html', 
  26         'only_matching': True, 
  28         'url': 'http://www.faz.net/berlin-gabriel-besteht-zerreissprobe-ueber-datenspeicherung-13659345.html', 
  29         'only_matching': True, 
  31         'url': 'http://www.faz.net/-13659345.html', 
  32         'only_matching': True, 
  34         'url': 'http://www.faz.net/aktuell/politik/-13659345.html', 
  35         'only_matching': True, 
  37         'url': 'http://www.faz.net/foobarblafasel-13659345.html', 
  38         'only_matching': True, 
  41     def _real_extract(self
, url
): 
  42         video_id 
= self
._match
_id
(url
) 
  44         webpage 
= self
._download
_webpage
(url
, video_id
) 
  45         description 
= self
._og
_search
_description
(webpage
) 
  46         config_xml_url 
= self
._search
_regex
( 
  47             r
'videoXMLURL\s*=\s*"([^"]+)', webpage
, 'config xml url') 
  48         config 
= self
._download
_xml
( 
  49             config_xml_url
, video_id
, 'Downloading config xml') 
  51         encodings 
= xpath_element(config
, 'ENCODINGS', 'encodings', True) 
  53         for pref
, code 
in enumerate(['LOW', 'HIGH', 'HQ']): 
  54             encoding 
= xpath_element(encodings
, code
) 
  55             if encoding 
is not None: 
  56                 encoding_url 
= xpath_text(encoding
, 'FILENAME') 
  60                         'format_id': code
.lower(), 
  62                         'tbr': int_or_none(xpath_text(encoding
, 'AVERAGEBITRATE')), 
  64         self
._sort
_formats
(formats
) 
  68             'title': self
._og
_search
_title
(webpage
), 
  70             'description': description
.strip() if description 
else None, 
  71             'thumbnail': xpath_text(config
, 'STILL/STILL_BIG'), 
  72             'duration': int_or_none(xpath_text(config
, 'DURATION')),