]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/ssa.py
1 from __future__
import unicode_literals
3 from .common
import InfoExtractor
10 class SSAIE(InfoExtractor
):
11 _VALID_URL
= r
'http://ssa\.nls\.uk/film/(?P<id>\d+)'
13 'url': 'http://ssa.nls.uk/film/3561',
17 'title': 'SHETLAND WOOL',
18 'description': 'md5:c5afca6871ad59b4271e7704fe50ab04',
20 'thumbnail': 're:^https?://.*\.jpg$',
24 'skip_download': True,
28 def _real_extract(self
, url
):
29 video_id
= self
._match
_id
(url
)
31 webpage
= self
._download
_webpage
(url
, video_id
)
33 streamer
= self
._search
_regex
(
34 r
"'streamer'\s*,\S*'(rtmp[^']+)'", webpage
, 'streamer')
35 play_path
= self
._search
_regex
(
36 r
"'file'\s*,\s*'([^']+)'", webpage
, 'file').rpartition('.')[0]
38 def search_field(field_name
, fatal
=False):
39 return self
._search
_regex
(
40 r
'<span\s+class="field_title">%s:</span>\s*<span\s+class="field_content">([^<]+)</span>' % field_name
,
41 webpage
, 'title', fatal
=fatal
)
43 title
= unescapeHTML(search_field('Title', fatal
=True)).strip('()[]')
44 description
= unescapeHTML(search_field('Description'))
45 duration
= parse_duration(search_field('Running time'))
46 thumbnail
= self
._search
_regex
(
47 r
"'image'\s*,\s*'([^']+)'", webpage
, 'thumbnails', fatal
=False)
52 'play_path': play_path
,
55 'description': description
,
57 'thumbnail': thumbnail
,