]> Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/ssa.py
debian/patches: Removed, they came from upstream.
[youtubedl] / youtube_dl / extractor / ssa.py
1 from __future__ import unicode_literals
2
3 from .common import InfoExtractor
4 from ..utils import (
5 unescapeHTML,
6 parse_duration,
7 )
8
9
10 class SSAIE(InfoExtractor):
11 _VALID_URL = r'http://ssa\.nls\.uk/film/(?P<id>\d+)'
12 _TEST = {
13 'url': 'http://ssa.nls.uk/film/3561',
14 'info_dict': {
15 'id': '3561',
16 'ext': 'flv',
17 'title': 'SHETLAND WOOL',
18 'description': 'md5:c5afca6871ad59b4271e7704fe50ab04',
19 'duration': 900,
20 'thumbnail': 're:^https?://.*\.jpg$',
21 },
22 'params': {
23 # rtmp download
24 'skip_download': True,
25 },
26 }
27
28 def _real_extract(self, url):
29 video_id = self._match_id(url)
30
31 webpage = self._download_webpage(url, video_id)
32
33 streamer = self._search_regex(
34 r"'streamer'\s*,\S*'(rtmp[^']+)'", webpage, 'streamer')
35 play_path = self._search_regex(
36 r"'file'\s*,\s*'([^']+)'", webpage, 'file').rpartition('.')[0]
37
38 def search_field(field_name, fatal=False):
39 return self._search_regex(
40 r'<span\s+class="field_title">%s:</span>\s*<span\s+class="field_content">([^<]+)</span>' % field_name,
41 webpage, 'title', fatal=fatal)
42
43 title = unescapeHTML(search_field('Title', fatal=True)).strip('()[]')
44 description = unescapeHTML(search_field('Description'))
45 duration = parse_duration(search_field('Running time'))
46 thumbnail = self._search_regex(
47 r"'image'\s*,\s*'([^']+)'", webpage, 'thumbnails', fatal=False)
48
49 return {
50 'id': video_id,
51 'url': streamer,
52 'play_path': play_path,
53 'ext': 'flv',
54 'title': title,
55 'description': description,
56 'duration': duration,
57 'thumbnail': thumbnail,
58 }