]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/faz.py
   2 from __future__ 
import unicode_literals
 
   6 from .common 
import InfoExtractor
 
   7 from ..compat 
import compat_etree_fromstring
 
  15 class FazIE(InfoExtractor
): 
  17     _VALID_URL 
= r
'https?://(?:www\.)?faz\.net/(?:[^/]+/)*.*?-(?P<id>\d+)\.html' 
  20         'url': 'http://www.faz.net/multimedia/videos/stockholm-chemie-nobelpreis-fuer-drei-amerikanische-forscher-12610585.html', 
  24             'title': 'Stockholm: Chemie-Nobelpreis für drei amerikanische Forscher', 
  25             'description': 'md5:1453fbf9a0d041d985a47306192ea253', 
  28         'url': 'http://www.faz.net/aktuell/politik/berlin-gabriel-besteht-zerreissprobe-ueber-datenspeicherung-13659345.html', 
  29         'only_matching': True, 
  31         'url': 'http://www.faz.net/berlin-gabriel-besteht-zerreissprobe-ueber-datenspeicherung-13659345.html', 
  32         'only_matching': True, 
  34         'url': 'http://www.faz.net/-13659345.html', 
  35         'only_matching': True, 
  37         'url': 'http://www.faz.net/aktuell/politik/-13659345.html', 
  38         'only_matching': True, 
  40         'url': 'http://www.faz.net/foobarblafasel-13659345.html', 
  41         'only_matching': True, 
  44     def _real_extract(self
, url
): 
  45         video_id 
= self
._match
_id
(url
) 
  47         webpage 
= self
._download
_webpage
(url
, video_id
) 
  48         description 
= self
._og
_search
_description
(webpage
) 
  49         media 
= self
._html
_search
_regex
( 
  50             r
"data-videojs-media='([^']+)", 
  53             perform_url 
= self
._search
_regex
( 
  54                 r
"<iframe[^>]+?src='((?:http:)?//player\.performgroup\.com/eplayer/eplayer\.html#/?[0-9a-f]{26}\.[0-9a-z]{26})", 
  55                 webpage
, 'perform url') 
  56             return self
.url_result(perform_url
) 
  57         config 
= compat_etree_fromstring(media
) 
  59         encodings 
= xpath_element(config
, 'ENCODINGS', 'encodings', True) 
  61         for pref
, code 
in enumerate(['LOW', 'HIGH', 'HQ']): 
  62             encoding 
= xpath_element(encodings
, code
) 
  63             if encoding 
is not None: 
  64                 encoding_url 
= xpath_text(encoding
, 'FILENAME') 
  66                     tbr 
= xpath_text(encoding
, 'AVERAGEBITRATE', 1000) 
  68                         tbr 
= int_or_none(tbr
.replace(',', '.')) 
  71                         'format_id': code
.lower(), 
  74                         'vcodec': xpath_text(encoding
, 'CODEC'), 
  76                     mobj 
= re
.search(r
'(\d+)x(\d+)_(\d+)\.mp4', encoding_url
) 
  79                             'width': int(mobj
.group(1)), 
  80                             'height': int(mobj
.group(2)), 
  81                             'tbr': tbr 
or int(mobj
.group(3)), 
  84         self
._sort
_formats
(formats
) 
  88             'title': self
._og
_search
_title
(webpage
), 
  90             'description': description
.strip() if description 
else None, 
  91             'thumbnail': xpath_text(config
, 'STILL/STILL_BIG'), 
  92             'duration': int_or_none(xpath_text(config
, 'DURATION')),