]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/ssa.py
   1 from __future__ 
import unicode_literals
 
   3 from .common 
import InfoExtractor
 
  10 class SSAIE(InfoExtractor
): 
  11     _VALID_URL 
= r
'http://ssa\.nls\.uk/film/(?P<id>\d+)' 
  13         'url': 'http://ssa.nls.uk/film/3561', 
  17             'title': 'SHETLAND WOOL', 
  18             'description': 'md5:c5afca6871ad59b4271e7704fe50ab04', 
  20             'thumbnail': 're:^https?://.*\.jpg$', 
  24             'skip_download': True, 
  28     def _real_extract(self
, url
): 
  29         video_id 
= self
._match
_id
(url
) 
  31         webpage 
= self
._download
_webpage
(url
, video_id
) 
  33         streamer 
= self
._search
_regex
( 
  34             r
"'streamer'\s*,\S*'(rtmp[^']+)'", webpage
, 'streamer') 
  35         play_path 
= self
._search
_regex
( 
  36             r
"'file'\s*,\s*'([^']+)'", webpage
, 'file').rpartition('.')[0] 
  38         def search_field(field_name
, fatal
=False): 
  39             return self
._search
_regex
( 
  40                 r
'<span\s+class="field_title">%s:</span>\s*<span\s+class="field_content">([^<]+)</span>' % field_name
, 
  41                 webpage
, 'title', fatal
=fatal
) 
  43         title 
= unescapeHTML(search_field('Title', fatal
=True)).strip('()[]') 
  44         description 
= unescapeHTML(search_field('Description')) 
  45         duration 
= parse_duration(search_field('Running time')) 
  46         thumbnail 
= self
._search
_regex
( 
  47             r
"'image'\s*,\s*'([^']+)'", webpage
, 'thumbnails', fatal
=False) 
  52             'play_path': play_path
, 
  55             'description': description
, 
  57             'thumbnail': thumbnail
,