]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/aftenposten.py
   2 from __future__ 
import unicode_literals
 
   6 from .common 
import InfoExtractor
 
  16 class AftenpostenIE(InfoExtractor
): 
  17     _VALID_URL 
= r
'https?://(?:www\.)?aftenposten\.no/webtv/([^/]+/)*(?P<id>[^/]+)-\d+\.html' 
  20         'url': 'http://www.aftenposten.no/webtv/serier-og-programmer/sweatshopenglish/TRAILER-SWEATSHOP---I-cant-take-any-more-7800835.html?paging=§ion=webtv_serierogprogrammer_sweatshop_sweatshopenglish', 
  21         'md5': 'fd828cd29774a729bf4d4425fe192972', 
  25             'title': 'TRAILER: "Sweatshop" - I can´t take any more', 
  26             'description': 'md5:21891f2b0dd7ec2f78d84a50e54f8238', 
  27             'timestamp': 1416927969, 
  28             'upload_date': '20141125', 
  32     def _real_extract(self
, url
): 
  33         display_id 
= self
._match
_id
(url
) 
  35         webpage 
= self
._download
_webpage
(url
, display_id
) 
  37         video_id 
= self
._html
_search
_regex
( 
  38             r
'data-xs-id="(\d+)"', webpage
, 'video id') 
  40         data 
= self
._download
_xml
( 
  41             'http://frontend.xstream.dk/ap/feed/video/?platform=web&id=%s' % video_id
, video_id
) 
  44             'atom': 'http://www.w3.org/2005/Atom', 
  45             'xt': 'http://xstream.dk/', 
  46             'media': 'http://search.yahoo.com/mrss/', 
  49         entry 
= data
.find(xpath_with_ns('./atom:entry', NS_MAP
)) 
  52             entry
, xpath_with_ns('./atom:title', NS_MAP
), 'title') 
  53         description 
= xpath_text( 
  54             entry
, xpath_with_ns('./atom:summary', NS_MAP
), 'description') 
  55         timestamp 
= parse_iso8601(xpath_text( 
  56             entry
, xpath_with_ns('./atom:published', NS_MAP
), 'upload date')) 
  59         media_group 
= entry
.find(xpath_with_ns('./media:group', NS_MAP
)) 
  60         for media_content 
in media_group
.findall(xpath_with_ns('./media:content', NS_MAP
)): 
  61             media_url 
= media_content
.get('url') 
  64             tbr 
= int_or_none(media_content
.get('bitrate')) 
  65             mobj 
= re
.search(r
'^(?P<url>rtmp://[^/]+/(?P<app>[^/]+))/(?P<playpath>.+)$', media_url
) 
  68                     'url': mobj
.group('url'), 
  69                     'play_path': 'mp4:%s' % mobj
.group('playpath'), 
  70                     'app': mobj
.group('app'), 
  73                     'format_id': 'rtmp-%d' % tbr
, 
  80         self
._sort
_formats
(formats
) 
  82         link 
= find_xpath_attr( 
  83             entry
, xpath_with_ns('./atom:link', NS_MAP
), 'rel', 'original') 
  86                 'url': link
.get('href'), 
  87                 'format_id': link
.get('rel'), 
  91             'url': splash
.get('url'), 
  92             'width': int_or_none(splash
.get('width')), 
  93             'height': int_or_none(splash
.get('height')), 
  94         } for splash 
in media_group
.findall(xpath_with_ns('./xt:splash', NS_MAP
))] 
  99             'description': description
, 
 100             'timestamp': timestamp
, 
 102             'thumbnails': thumbnails
,