]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/rai.py
   1 from __future__ 
import unicode_literals
 
   5 from .common 
import InfoExtractor
 
  15 class RaiIE(InfoExtractor
): 
  16     _VALID_URL 
= r
'(?P<url>(?P<host>http://(?:.+?\.)?(?:rai\.it|rai\.tv|rainews\.it))/dl/.+?-(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})(?:-.+?)?\.html)' 
  19             'url': 'http://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-cb27157f-9dd0-4aee-b788-b1f67643a391.html', 
  20             'md5': 'c064c0b2d09c278fb293116ef5d0a32d', 
  22                 'id': 'cb27157f-9dd0-4aee-b788-b1f67643a391', 
  24                 'title': 'Report del 07/04/2014', 
  25                 'description': 'md5:f27c544694cacb46a078db84ec35d2d9', 
  26                 'upload_date': '20140407', 
  31             'url': 'http://www.raisport.rai.it/dl/raiSport/media/rassegna-stampa-04a9f4bd-b563-40cf-82a6-aad3529cb4a9.html', 
  32             'md5': '8bb9c151924ce241b74dd52ef29ceafa', 
  34                 'id': '04a9f4bd-b563-40cf-82a6-aad3529cb4a9', 
  36                 'title': 'TG PRIMO TEMPO', 
  38                 'upload_date': '20140612', 
  44             'url': 'http://www.rainews.it/dl/rainews/media/state-of-the-net-Antonella-La-Carpia-regole-virali-7aafdea9-0e5d-49d5-88a6-7e65da67ae13.html', 
  45             'md5': '35cf7c229f22eeef43e48b5cf923bef0', 
  47                 'id': '7aafdea9-0e5d-49d5-88a6-7e65da67ae13', 
  49                 'title': 'State of the Net, Antonella La Carpia: regole virali', 
  50                 'description': 'md5:b0ba04a324126903e3da7763272ae63c', 
  51                 'upload_date': '20140613', 
  56             'url': 'http://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-b4a49761-e0cc-4b14-8736-2729f6f73132-tg2.html', 
  57             'md5': '35694f062977fe6619943f08ed935730', 
  59                 'id': 'b4a49761-e0cc-4b14-8736-2729f6f73132', 
  61                 'title': 'Alluvione in Sardegna e dissesto idrogeologico', 
  62                 'description': 'Edizione delle ore 20:30 ', 
  66             'url': 'http://www.ilcandidato.rai.it/dl/ray/media/Il-Candidato---Primo-episodio-Le-Primarie-28e5525a-b495-45e8-a7c3-bc48ba45d2b6.html', 
  67             'md5': '02b64456f7cc09f96ff14e7dd489017e', 
  69                 'id': '28e5525a-b495-45e8-a7c3-bc48ba45d2b6', 
  71                 'title': 'Il Candidato - Primo episodio: "Le Primarie"', 
  72                 'description': 'Primo appuntamento con "Il candidato" con Filippo Timi, alias Piero Zucca presidente!', 
  78     def _extract_relinker_url(self
, webpage
): 
  79         return self
._proto
_relative
_url
(self
._search
_regex
( 
  80             [r
'name="videourl" content="([^"]+)"', r
'var\s+videoURL(?:_MP4)?\s*=\s*"([^"]+)"'], 
  81             webpage
, 'relinker url', default
=None)) 
  83     def _real_extract(self
, url
): 
  84         mobj 
= re
.match(self
._VALID
_URL
, url
) 
  85         video_id 
= mobj
.group('id') 
  86         host 
= mobj
.group('host') 
  88         webpage 
= self
._download
_webpage
(url
, video_id
) 
  90         relinker_url 
= self
._extract
_relinker
_url
(webpage
) 
  93             iframe_path 
= self
._search
_regex
( 
  94                 r
'<iframe[^>]+src="/?(dl/[^"]+\?iframe\b[^"]*)"', 
  96             webpage 
= self
._download
_webpage
( 
  97                 '%s/%s' % (host
, iframe_path
), video_id
) 
  98             relinker_url 
= self
._extract
_relinker
_url
(webpage
) 
 100         relinker 
= self
._download
_json
( 
 101             '%s&output=47' % relinker_url
, video_id
) 
 103         media_url 
= relinker
['video'][0] 
 104         ct 
= relinker
.get('ct') 
 106             formats 
= self
._extract
_f
4m
_formats
( 
 107                 media_url 
+ '&hdcore=3.7.0&plugin=aasp-3.7.0.39.44', video_id
) 
 114         json_link 
= self
._html
_search
_meta
( 
 115             'jsonlink', webpage
, 'JSON link', default
=None) 
 117             media 
= self
._download
_json
( 
 118                 host 
+ json_link
, video_id
, 'Downloading video JSON') 
 119             title 
= media
.get('name') 
 120             description 
= media
.get('desc') 
 121             thumbnail 
= media
.get('image_300') or media
.get('image_medium') or media
.get('image') 
 122             duration 
= parse_duration(media
.get('length')) 
 123             uploader 
= media
.get('author') 
 124             upload_date 
= unified_strdate(media
.get('date')) 
 126             title 
= (self
._search
_regex
( 
 127                 r
'var\s+videoTitolo\s*=\s*"(.+?)";', 
 128                 webpage
, 'title', default
=None) or self
._og
_search
_title
(webpage
)).replace('\\"', '"') 
 129             description 
= self
._og
_search
_description
(webpage
) 
 130             thumbnail 
= self
._og
_search
_thumbnail
(webpage
) 
 132             uploader 
= self
._html
_search
_meta
('Editore', webpage
, 'uploader') 
 133             upload_date 
= unified_strdate(self
._html
_search
_meta
( 
 134                 'item-date', webpage
, 'upload date', default
=None)) 
 136         subtitles 
= self
.extract_subtitles(video_id
, webpage
) 
 141             'description': description
, 
 142             'thumbnail': thumbnail
, 
 143             'uploader': uploader
, 
 144             'upload_date': upload_date
, 
 145             'duration': duration
, 
 147             'subtitles': subtitles
, 
 150     def _get_subtitles(self
, video_id
, webpage
): 
 152         m 
= re
.search(r
'<meta name="closedcaption" content="(?P<captions>[^"]+)"', webpage
) 
 154             captions 
= m
.group('captions') 
 157             if captions
.endswith(STL_EXT
): 
 158                 captions 
= captions
[:-len(STL_EXT
)] + SRT_EXT
 
 161                 'url': 'http://www.rai.tv%s' % compat_urllib_parse
.quote(captions
),