]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/expressen.py
   2 from __future__ 
import unicode_literals
 
   6 from .common 
import InfoExtractor
 
  15 class ExpressenIE(InfoExtractor
): 
  18                         (?:www\.)?expressen\.se/ 
  19                         (?:(?:tvspelare/video|videoplayer/embed)/)? 
  24         'url': 'https://www.expressen.se/tv/ledare/ledarsnack/ledarsnack-om-arbetslosheten-bland-kvinnor-i-speciellt-utsatta-omraden/', 
  25         'md5': '2fbbe3ca14392a6b1b36941858d33a45', 
  29             'title': 'Ledarsnack: Om arbetslösheten bland kvinnor i speciellt utsatta områden', 
  30             'description': 'md5:f38c81ff69f3de4d269bbda012fcbbba', 
  31             'thumbnail': r
're:^https?://.*\.jpg$', 
  33             'timestamp': 1526639109, 
  34             'upload_date': '20180518', 
  37         'url': 'https://www.expressen.se/tv/kultur/kulturdebatt-med-expressens-karin-olsson/', 
  38         'only_matching': True, 
  40         'url': 'https://www.expressen.se/tvspelare/video/tv/ditv/ekonomistudion/experterna-har-ar-fragorna-som-avgor-valet/?embed=true&external=true&autoplay=true&startVolume=0&partnerId=di', 
  41         'only_matching': True, 
  43         'url': 'https://www.expressen.se/videoplayer/embed/tv/ditv/ekonomistudion/experterna-har-ar-fragorna-som-avgor-valet/?embed=true&external=true&autoplay=true&startVolume=0&partnerId=di', 
  44         'only_matching': True, 
  48     def _extract_urls(webpage
): 
  50             mobj
.group('url') for mobj 
in re
.finditer( 
  51                 r
'<iframe[^>]+\bsrc=(["\'])(?P
<url
>(?
:https?
:)?
//(?
:www\
.)?expressen\
.se
/(?
:tvspelare
/video|videoplayer
/embed
)/tv
/.+?
)\
1', 
  54     def _real_extract(self, url): 
  55         display_id = self._match_id(url) 
  57         webpage = self._download_webpage(url, display_id) 
  59         def extract_data(name): 
  60             return self._parse_json( 
  62                     r'data
-%s=(["\'])(?P<value>(?:(?!\1).)+)\1' % name, 
  63                     webpage, 'info', group='value'), 
  64                 display_id, transform_source=unescapeHTML) 
  66         info = extract_data('video-tracking-info') 
  67         video_id = info['videoId'] 
  69         data = extract_data('article-data') 
  70         stream = data['stream'] 
  72         if determine_ext(stream) == 'm3u8': 
  73             formats = self._extract_m3u8_formats( 
  74                 stream, display_id, 'mp4', entry_protocol='m3u8_native', 
  80         self._sort_formats(formats) 
  82         title = info.get('titleRaw') or data['title'] 
  83         description = info.get('descriptionRaw') 
  84         thumbnail = info.get('socialMediaImage') or data.get('image') 
  85         duration = int_or_none(info.get('videoTotalSecondsDuration') 
  86                                or data.get('totalSecondsDuration')) 
  87         timestamp = unified_timestamp(info.get('publishDate')) 
  91             'display_id': display_id, 
  93             'description': description, 
  94             'thumbnail': thumbnail, 
  96             'timestamp': timestamp,