]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/movingimage.py
1 from __future__
import unicode_literals
3 from .common
import InfoExtractor
10 class MovingImageIE(InfoExtractor
):
11 _VALID_URL
= r
'https?://movingimage\.nls\.uk/film/(?P<id>\d+)'
13 'url': 'http://movingimage.nls.uk/film/3561',
14 'md5': '4caa05c2b38453e6f862197571a7be2f',
18 'title': 'SHETLAND WOOL',
19 'description': 'md5:c5afca6871ad59b4271e7704fe50ab04',
21 'thumbnail': r
're:^https?://.*\.jpg$',
25 def _real_extract(self
, url
):
26 video_id
= self
._match
_id
(url
)
28 webpage
= self
._download
_webpage
(url
, video_id
)
30 formats
= self
._extract
_m
3u8_formats
(
31 self
._html
_search
_regex
(r
'file\s*:\s*"([^"]+)"', webpage
, 'm3u8 manifest URL'),
32 video_id
, ext
='mp4', entry_protocol
='m3u8_native')
34 def search_field(field_name
, fatal
=False):
35 return self
._search
_regex
(
36 r
'<span\s+class="field_title">%s:</span>\s*<span\s+class="field_content">([^<]+)</span>' % field_name
,
37 webpage
, 'title', fatal
=fatal
)
39 title
= unescapeHTML(search_field('Title', fatal
=True)).strip('()[]')
40 description
= unescapeHTML(search_field('Description'))
41 duration
= parse_duration(search_field('Running time'))
42 thumbnail
= self
._search
_regex
(
43 r
"image\s*:\s*'([^']+)'", webpage
, 'thumbnail', fatal
=False)
49 'description': description
,
51 'thumbnail': thumbnail
,