]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/svt.py
1c04dfb7bf757477d134cc7caa223ab47d0800ba
   2 from __future__ 
import unicode_literals
 
   6 from .common 
import InfoExtractor
 
  15 class SVTBaseIE(InfoExtractor
): 
  16     def _extract_video(self
, video_info
, video_id
): 
  18         for vr 
in video_info
['videoReferences']: 
  19             player_type 
= vr
.get('playerType') 
  21             ext 
= determine_ext(vurl
) 
  23                 formats
.extend(self
._extract
_m
3u8_formats
( 
  25                     ext
='mp4', entry_protocol
='m3u8_native', 
  26                     m3u8_id
=player_type
, fatal
=False)) 
  28                 formats
.extend(self
._extract
_f
4m
_formats
( 
  29                     vurl 
+ '?hdcore=3.3.0', video_id
, 
  30                     f4m_id
=player_type
, fatal
=False)) 
  32                 if player_type 
== 'dashhbbtv': 
  33                     formats
.extend(self
._extract
_mpd
_formats
( 
  34                         vurl
, video_id
, mpd_id
=player_type
, fatal
=False)) 
  37                     'format_id': player_type
, 
  40         if not formats 
and video_info
.get('rights', {}).get('geoBlockedSweden'): 
  41             self
.raise_geo_restricted('This video is only available in Sweden') 
  42         self
._sort
_formats
(formats
) 
  45         subtitle_references 
= dict_get(video_info
, ('subtitles', 'subtitleReferences')) 
  46         if isinstance(subtitle_references
, list): 
  47             for sr 
in subtitle_references
: 
  48                 subtitle_url 
= sr
.get('url') 
  49                 subtitle_lang 
= sr
.get('language', 'sv') 
  51                     if determine_ext(subtitle_url
) == 'm3u8': 
  52                         # TODO(yan12125): handle WebVTT in m3u8 manifests 
  55                     subtitles
.setdefault(subtitle_lang
, []).append({'url': subtitle_url
}) 
  57         title 
= video_info
.get('title') 
  59         series 
= video_info
.get('programTitle') 
  60         season_number 
= int_or_none(video_info
.get('season')) 
  61         episode 
= video_info
.get('episodeTitle') 
  62         episode_number 
= int_or_none(video_info
.get('episodeNumber')) 
  64         duration 
= int_or_none(dict_get(video_info
, ('materialLength', 'contentDuration'))) 
  67             video_info
, ('inappropriateForChildren', 'blockedForChildren'), 
  68             skip_false_values
=False) 
  70             age_limit 
= 18 if adult 
else 0 
  76             'subtitles': subtitles
, 
  78             'age_limit': age_limit
, 
  80             'season_number': season_number
, 
  82             'episode_number': episode_number
, 
  86 class SVTIE(SVTBaseIE
): 
  87     _VALID_URL 
= r
'https?://(?:www\.)?svt\.se/wd\?(?:.*?&)?widgetId=(?P<widget_id>\d+)&.*?\barticleId=(?P<id>\d+)' 
  89         'url': 'http://www.svt.se/wd?widgetId=23991§ionId=541&articleId=2900353&type=embed&contextSectionId=123&autostart=false', 
  90         'md5': '33e9a5d8f646523ce0868ecfb0eed77d', 
  94             'title': 'Stjärnorna skojar till det - under SVT-intervjun', 
 101     def _extract_url(webpage
): 
 103             r
'(?:<iframe src|href)="(?P<url>%s[^"]*)"' % SVTIE
._VALID
_URL
, webpage
) 
 105             return mobj
.group('url') 
 107     def _real_extract(self
, url
): 
 108         mobj 
= re
.match(self
._VALID
_URL
, url
) 
 109         widget_id 
= mobj
.group('widget_id') 
 110         article_id 
= mobj
.group('id') 
 112         info 
= self
._download
_json
( 
 113             'http://www.svt.se/wd?widgetId=%s&articleId=%s&format=json&type=embed&output=json' % (widget_id
, article_id
), 
 116         info_dict 
= self
._extract
_video
(info
['video'], article_id
) 
 117         info_dict
['title'] = info
['context']['title'] 
 121 class SVTPlayIE(SVTBaseIE
): 
 122     IE_DESC 
= 'SVT Play and Öppet arkiv' 
 123     _VALID_URL 
= r
'https?://(?:www\.)?(?:svtplay|oppetarkiv)\.se/(?:video|klipp)/(?P<id>[0-9]+)' 
 125         'url': 'http://www.svtplay.se/video/5996901/flygplan-till-haile-selassie/flygplan-till-haile-selassie-2', 
 126         'md5': '2b6704fe4a28801e1a098bbf3c5ac611', 
 130             'title': 'Flygplan till Haile Selassie', 
 132             'thumbnail': 're:^https?://.*[\.-]jpg$', 
 141         # geo restricted to Sweden 
 142         'url': 'http://www.oppetarkiv.se/video/5219710/trollflojten', 
 143         'only_matching': True, 
 145         'url': 'http://www.svtplay.se/klipp/9023742/stopptid-om-bjorn-borg', 
 146         'only_matching': True, 
 149     def _real_extract(self
, url
): 
 150         video_id 
= self
._match
_id
(url
) 
 152         webpage 
= self
._download
_webpage
(url
, video_id
) 
 154         data 
= self
._parse
_json
( 
 156                 r
'root\["__svtplay"\]\s*=\s*([^;]+);', 
 157                 webpage
, 'embedded data', default
='{}'), 
 158             video_id
, fatal
=False) 
 160         thumbnail 
= self
._og
_search
_thumbnail
(webpage
) 
 163             video_info 
= try_get( 
 164                 data
, lambda x
: x
['context']['dispatcher']['stores']['VideoTitlePageStore']['data']['video'], 
 167                 info_dict 
= self
._extract
_video
(video_info
, video_id
) 
 169                     'title': data
['context']['dispatcher']['stores']['MetaStore']['title'], 
 170                     'thumbnail': thumbnail
, 
 174         video_id 
= self
._search
_regex
( 
 175             r
'<video[^>]+data-video-id=["\']([\da
-zA
-Z
-]+)', 
 176             webpage, 'video 
id', default=None) 
 179             data = self._download_json( 
 180                 'http
://www
.svt
.se
/videoplayer
-api
/video
/%s' % video_id, video_id) 
 181             info_dict = self._extract_video(data, video_id) 
 182             if not info_dict.get('title
'): 
 183                 info_dict['title
'] = re.sub( 
 185                     info_dict.get('episode
') or self._og_search_title(webpage))