]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/svt.py
2 from __future__
import unicode_literals
6 from .common
import InfoExtractor
15 class SVTBaseIE(InfoExtractor
):
16 def _extract_video(self
, video_info
, video_id
):
18 for vr
in video_info
['videoReferences']:
19 player_type
= vr
.get('playerType')
21 ext
= determine_ext(vurl
)
23 formats
.extend(self
._extract
_m
3u8_formats
(
25 ext
='mp4', entry_protocol
='m3u8_native',
26 m3u8_id
=player_type
, fatal
=False))
28 formats
.extend(self
._extract
_f
4m
_formats
(
29 vurl
+ '?hdcore=3.3.0', video_id
,
30 f4m_id
=player_type
, fatal
=False))
32 if player_type
== 'dashhbbtv':
33 formats
.extend(self
._extract
_mpd
_formats
(
34 vurl
, video_id
, mpd_id
=player_type
, fatal
=False))
37 'format_id': player_type
,
40 if not formats
and video_info
.get('rights', {}).get('geoBlockedSweden'):
41 self
.raise_geo_restricted('This video is only available in Sweden')
42 self
._sort
_formats
(formats
)
45 subtitle_references
= dict_get(video_info
, ('subtitles', 'subtitleReferences'))
46 if isinstance(subtitle_references
, list):
47 for sr
in subtitle_references
:
48 subtitle_url
= sr
.get('url')
49 subtitle_lang
= sr
.get('language', 'sv')
51 if determine_ext(subtitle_url
) == 'm3u8':
52 # TODO(yan12125): handle WebVTT in m3u8 manifests
55 subtitles
.setdefault(subtitle_lang
, []).append({'url': subtitle_url
})
57 title
= video_info
.get('title')
59 series
= video_info
.get('programTitle')
60 season_number
= int_or_none(video_info
.get('season'))
61 episode
= video_info
.get('episodeTitle')
62 episode_number
= int_or_none(video_info
.get('episodeNumber'))
64 duration
= int_or_none(dict_get(video_info
, ('materialLength', 'contentDuration')))
67 video_info
, ('inappropriateForChildren', 'blockedForChildren'),
68 skip_false_values
=False)
70 age_limit
= 18 if adult
else 0
76 'subtitles': subtitles
,
78 'age_limit': age_limit
,
80 'season_number': season_number
,
82 'episode_number': episode_number
,
86 class SVTIE(SVTBaseIE
):
87 _VALID_URL
= r
'https?://(?:www\.)?svt\.se/wd\?(?:.*?&)?widgetId=(?P<widget_id>\d+)&.*?\barticleId=(?P<id>\d+)'
89 'url': 'http://www.svt.se/wd?widgetId=23991§ionId=541&articleId=2900353&type=embed&contextSectionId=123&autostart=false',
90 'md5': '33e9a5d8f646523ce0868ecfb0eed77d',
94 'title': 'Stjärnorna skojar till det - under SVT-intervjun',
101 def _extract_url(webpage
):
103 r
'(?:<iframe src|href)="(?P<url>%s[^"]*)"' % SVTIE
._VALID
_URL
, webpage
)
105 return mobj
.group('url')
107 def _real_extract(self
, url
):
108 mobj
= re
.match(self
._VALID
_URL
, url
)
109 widget_id
= mobj
.group('widget_id')
110 article_id
= mobj
.group('id')
112 info
= self
._download
_json
(
113 'http://www.svt.se/wd?widgetId=%s&articleId=%s&format=json&type=embed&output=json' % (widget_id
, article_id
),
116 info_dict
= self
._extract
_video
(info
['video'], article_id
)
117 info_dict
['title'] = info
['context']['title']
121 class SVTPlayIE(SVTBaseIE
):
122 IE_DESC
= 'SVT Play and Öppet arkiv'
123 _VALID_URL
= r
'https?://(?:www\.)?(?:svtplay|oppetarkiv)\.se/video/(?P<id>[0-9]+)'
125 'url': 'http://www.svtplay.se/video/5996901/flygplan-till-haile-selassie/flygplan-till-haile-selassie-2',
126 'md5': '2b6704fe4a28801e1a098bbf3c5ac611',
130 'title': 'Flygplan till Haile Selassie',
132 'thumbnail': 're:^https?://.*[\.-]jpg$',
141 # geo restricted to Sweden
142 'url': 'http://www.oppetarkiv.se/video/5219710/trollflojten',
143 'only_matching': True,
146 def _real_extract(self
, url
):
147 video_id
= self
._match
_id
(url
)
149 webpage
= self
._download
_webpage
(url
, video_id
)
151 data
= self
._parse
_json
(
153 r
'root\["__svtplay"\]\s*=\s*([^;]+);',
154 webpage
, 'embedded data', default
='{}'),
155 video_id
, fatal
=False)
157 thumbnail
= self
._og
_search
_thumbnail
(webpage
)
160 video_info
= try_get(
161 data
, lambda x
: x
['context']['dispatcher']['stores']['VideoTitlePageStore']['data']['video'],
164 info_dict
= self
._extract
_video
(video_info
, video_id
)
166 'title': data
['context']['dispatcher']['stores']['MetaStore']['title'],
167 'thumbnail': thumbnail
,
171 video_id
= self
._search
_regex
(
172 r
'<video[^>]+data-video-id=["\']([\da
-zA
-Z
-]+)',
173 webpage, 'video
id', default=None)
176 data = self._download_json(
177 'http
://www
.svt
.se
/videoplayer
-api
/video
/%s' % video_id, video_id)
178 info_dict = self._extract_video(data, video_id)
179 if not info_dict.get('title
'):
180 info_dict['title
'] = re.sub(
182 info_dict.get('episode
') or self._og_search_title(webpage))