]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/rte.py
   2 from __future__ 
import unicode_literals
 
   6 from .common 
import InfoExtractor
 
  14 class RteIE(InfoExtractor
): 
  16     IE_DESC 
= 'Raidió Teilifís Éireann TV' 
  17     _VALID_URL 
= r
'https?://(?:www\.)?rte\.ie/player/[^/]{2,3}/show/[^/]+/(?P<id>[0-9]+)' 
  19         'url': 'http://www.rte.ie/player/ie/show/iwitness-862/10478715/', 
  23             'title': 'Watch iWitness  online', 
  24             'thumbnail': 're:^https?://.*\.jpg$', 
  25             'description': 'iWitness : The spirit of Ireland, one voice and one minute at a time.', 
  29             'skip_download': 'f4m fails with --test atm' 
  33     def _real_extract(self
, url
): 
  34         video_id 
= self
._match
_id
(url
) 
  35         webpage 
= self
._download
_webpage
(url
, video_id
) 
  37         title 
= self
._og
_search
_title
(webpage
) 
  38         description 
= self
._html
_search
_meta
('description', webpage
, 'description') 
  39         duration 
= float_or_none(self
._html
_search
_meta
( 
  40             'duration', webpage
, 'duration', fatal
=False), 1000) 
  42         thumbnail_id 
= self
._search
_regex
( 
  43             r
'<meta name="thumbnail" content="uri:irus:(.*?)" />', webpage
, 'thumbnail') 
  44         thumbnail 
= 'http://img.rasset.ie/' + thumbnail_id 
+ '.jpg' 
  46         feeds_url 
= self
._html
_search
_meta
('feeds-prefix', webpage
, 'feeds url') + video_id
 
  47         json_string 
= self
._download
_json
(feeds_url
, video_id
) 
  49         # f4m_url = server + relative_url 
  50         f4m_url 
= json_string
['shows'][0]['media:group'][0]['rte:server'] + json_string
['shows'][0]['media:group'][0]['url'] 
  51         f4m_formats 
= self
._extract
_f
4m
_formats
(f4m_url
, video_id
) 
  56             'formats': f4m_formats
, 
  57             'description': description
, 
  58             'thumbnail': thumbnail
, 
  63 class RteRadioIE(InfoExtractor
): 
  65     IE_DESC 
= 'Raidió Teilifís Éireann radio' 
  66     # Radioplayer URLs have two distinct specifier formats, 
  67     # the old format #!rii=<channel_id>:<id>:<playable_item_id>:<date>: 
  68     # the new format #!rii=b<channel_id>_<id>_<playable_item_id>_<date>_ 
  69     # where the IDs are int/empty, the date is DD-MM-YYYY, and the specifier may be truncated. 
  70     # An <id> uniquely defines an individual recording, and is the only part we require. 
  71     _VALID_URL 
= r
'https?://(?:www\.)?rte\.ie/radio/utils/radioplayer/rteradioweb\.html#!rii=(?:b?[0-9]*)(?:%3A|:|%5F|_)(?P<id>[0-9]+)' 
  74         # Old-style player URL; HLS and RTMPE formats 
  75         'url': 'http://www.rte.ie/radio/utils/radioplayer/rteradioweb.html#!rii=16:10507902:2414:27-12-2015:', 
  80             'thumbnail': 're:^https?://.*\.jpg$', 
  81             'description': 'md5:9ce124a7fb41559ec68f06387cabddf0', 
  82             'timestamp': 1451203200, 
  83             'upload_date': '20151227', 
  87             'skip_download': 'f4m fails with --test atm' 
  90         # New-style player URL; RTMPE formats only 
  91         'url': 'http://rte.ie/radio/utils/radioplayer/rteradioweb.html#!rii=b16_3250678_8861_06-04-2012_', 
  95             'title': 'The Lyric Concert with Paul Herriott', 
  96             'thumbnail': 're:^https?://.*\.jpg$', 
  98             'timestamp': 1333742400, 
  99             'upload_date': '20120406', 
 100             'duration': 7199.016, 
 103             'skip_download': 'f4m fails with --test atm' 
 107     def _real_extract(self
, url
): 
 108         item_id 
= self
._match
_id
(url
) 
 110         json_string 
= self
._download
_json
( 
 111             'http://www.rte.ie/rteavgen/getplaylist/?type=web&format=json&id=' + item_id
, 
 114         # NB the string values in the JSON are stored using XML escaping(!) 
 115         show 
= json_string
['shows'][0] 
 116         title 
= unescapeHTML(show
['title']) 
 117         description 
= unescapeHTML(show
.get('description')) 
 118         thumbnail 
= show
.get('thumbnail') 
 119         duration 
= float_or_none(show
.get('duration'), 1000) 
 120         timestamp 
= parse_iso8601(show
.get('published')) 
 122         mg 
= show
['media:group'][0] 
 127             m 
= re
.match(r
'(?P<url>rtmpe?://[^/]+)/(?P<app>.+)/(?P<playpath>mp4:.*)', mg
['url']) 
 131                     'url': m
['url'] + '/' + m
['app'], 
 133                     'play_path': m
['playpath'], 
 139         if mg
.get('hls_server') and mg
.get('hls_url'): 
 140             formats
.extend(self
._extract
_m
3u8_formats
( 
 141                 mg
['hls_server'] + mg
['hls_url'], item_id
, 'mp4', 
 142                 entry_protocol
='m3u8_native', m3u8_id
='hls', fatal
=False)) 
 144         if mg
.get('hds_server') and mg
.get('hds_url'): 
 145             formats
.extend(self
._extract
_f
4m
_formats
( 
 146                 mg
['hds_server'] + mg
['hds_url'], item_id
, 
 147                 f4m_id
='hds', fatal
=False)) 
 149         self
._sort
_formats
(formats
) 
 154             'description': description
, 
 155             'thumbnail': thumbnail
, 
 156             'timestamp': timestamp
, 
 157             'duration': duration
,