]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/rai.py
   1 from __future__ 
import unicode_literals
 
   5 from .subtitles 
import SubtitlesInfoExtractor
 
  13 class RaiIE(SubtitlesInfoExtractor
): 
  14     _VALID_URL 
= r
'(?P<url>http://(?:.+?\.)?(?:rai\.it|rai\.tv|rainews\.it)/dl/.+?-(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})(?:-.+?)?\.html)' 
  17             'url': 'http://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-cb27157f-9dd0-4aee-b788-b1f67643a391.html', 
  18             'md5': 'c064c0b2d09c278fb293116ef5d0a32d', 
  20                 'id': 'cb27157f-9dd0-4aee-b788-b1f67643a391', 
  22                 'title': 'Report del 07/04/2014', 
  23                 'description': 'md5:f27c544694cacb46a078db84ec35d2d9', 
  24                 'upload_date': '20140407', 
  29             'url': 'http://www.raisport.rai.it/dl/raiSport/media/rassegna-stampa-04a9f4bd-b563-40cf-82a6-aad3529cb4a9.html', 
  30             'md5': '8bb9c151924ce241b74dd52ef29ceafa', 
  32                 'id': '04a9f4bd-b563-40cf-82a6-aad3529cb4a9', 
  34                 'title': 'TG PRIMO TEMPO', 
  36                 'upload_date': '20140612', 
  42             'url': 'http://www.rainews.it/dl/rainews/media/state-of-the-net-Antonella-La-Carpia-regole-virali-7aafdea9-0e5d-49d5-88a6-7e65da67ae13.html', 
  43             'md5': '35cf7c229f22eeef43e48b5cf923bef0', 
  45                 'id': '7aafdea9-0e5d-49d5-88a6-7e65da67ae13', 
  47                 'title': 'State of the Net, Antonella La Carpia: regole virali', 
  48                 'description': 'md5:b0ba04a324126903e3da7763272ae63c', 
  49                 'upload_date': '20140613', 
  54             'url': 'http://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-b4a49761-e0cc-4b14-8736-2729f6f73132-tg2.html', 
  55             'md5': '35694f062977fe6619943f08ed935730', 
  57                 'id': 'b4a49761-e0cc-4b14-8736-2729f6f73132', 
  59                 'title': 'Alluvione in Sardegna e dissesto idrogeologico', 
  60                 'description': 'Edizione delle ore 20:30 ', 
  65     def _real_extract(self
, url
): 
  66         mobj 
= re
.match(self
._VALID
_URL
, url
) 
  67         video_id 
= mobj
.group('id') 
  69         media 
= self
._download
_json
('%s?json' % mobj
.group('url'), video_id
, 'Downloading video JSON') 
  71         title 
= media
.get('name') 
  72         description 
= media
.get('desc') 
  73         thumbnail 
= media
.get('image_300') or media
.get('image_medium') or media
.get('image') 
  74         duration 
= parse_duration(media
.get('length')) 
  75         uploader 
= media
.get('author') 
  76         upload_date 
= unified_strdate(media
.get('date')) 
  80         for format_id 
in ['wmv', 'm3u8', 'mediaUri', 'h264']: 
  81             media_url 
= media
.get(format_id
) 
  86                 'format_id': format_id
, 
  90         if self
._downloader
.params
.get('listsubtitles', False): 
  91             page 
= self
._download
_webpage
(url
, video_id
) 
  92             self
._list
_available
_subtitles
(video_id
, page
) 
  96         if self
._have
_to
_download
_any
_subtitles
: 
  97             page 
= self
._download
_webpage
(url
, video_id
) 
  98             subtitles 
= self
.extract_subtitles(video_id
, page
) 
 103             'description': description
, 
 104             'thumbnail': thumbnail
, 
 105             'uploader': uploader
, 
 106             'upload_date': upload_date
, 
 107             'duration': duration
, 
 109             'subtitles': subtitles
, 
 112     def _get_available_subtitles(self
, video_id
, webpage
): 
 114         m 
= re
.search(r
'<meta name="closedcaption" content="(?P<captions>[^"]+)"', webpage
) 
 116             captions 
= m
.group('captions') 
 119             if captions
.endswith(STL_EXT
): 
 120                 captions 
= captions
[:-len(STL_EXT
)] + SRT_EXT
 
 121             subtitles
['it'] = 'http://www.rai.tv%s' % compat_urllib_parse
.quote(captions
)