]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/rai.py
   1 from __future__ 
import unicode_literals
 
   5 from .common 
import InfoExtractor
 
  16 class RaiIE(InfoExtractor
): 
  17     _VALID_URL 
= r
'(?P<url>(?P<host>http://(?:.+?\.)?(?:rai\.it|rai\.tv|rainews\.it))/dl/.+?-(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})(?:-.+?)?\.html)' 
  20             'url': 'http://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-cb27157f-9dd0-4aee-b788-b1f67643a391.html', 
  21             'md5': 'c064c0b2d09c278fb293116ef5d0a32d', 
  23                 'id': 'cb27157f-9dd0-4aee-b788-b1f67643a391', 
  25                 'title': 'Report del 07/04/2014', 
  26                 'description': 'md5:f27c544694cacb46a078db84ec35d2d9', 
  27                 'upload_date': '20140407', 
  32             'url': 'http://www.raisport.rai.it/dl/raiSport/media/rassegna-stampa-04a9f4bd-b563-40cf-82a6-aad3529cb4a9.html', 
  33             'md5': '8bb9c151924ce241b74dd52ef29ceafa', 
  35                 'id': '04a9f4bd-b563-40cf-82a6-aad3529cb4a9', 
  37                 'title': 'TG PRIMO TEMPO', 
  39                 'upload_date': '20140612', 
  45             'url': 'http://www.rainews.it/dl/rainews/media/state-of-the-net-Antonella-La-Carpia-regole-virali-7aafdea9-0e5d-49d5-88a6-7e65da67ae13.html', 
  46             'md5': '35cf7c229f22eeef43e48b5cf923bef0', 
  48                 'id': '7aafdea9-0e5d-49d5-88a6-7e65da67ae13', 
  50                 'title': 'State of the Net, Antonella La Carpia: regole virali', 
  51                 'description': 'md5:b0ba04a324126903e3da7763272ae63c', 
  52                 'upload_date': '20140613', 
  57             'url': 'http://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-b4a49761-e0cc-4b14-8736-2729f6f73132-tg2.html', 
  58             'md5': '35694f062977fe6619943f08ed935730', 
  60                 'id': 'b4a49761-e0cc-4b14-8736-2729f6f73132', 
  62                 'title': 'Alluvione in Sardegna e dissesto idrogeologico', 
  63                 'description': 'Edizione delle ore 20:30 ', 
  67             'url': 'http://www.ilcandidato.rai.it/dl/ray/media/Il-Candidato---Primo-episodio-Le-Primarie-28e5525a-b495-45e8-a7c3-bc48ba45d2b6.html', 
  68             'md5': '02b64456f7cc09f96ff14e7dd489017e', 
  70                 'id': '28e5525a-b495-45e8-a7c3-bc48ba45d2b6', 
  72                 'title': 'Il Candidato - Primo episodio: "Le Primarie"', 
  73                 'description': 'Primo appuntamento con "Il candidato" con Filippo Timi, alias Piero Zucca presidente!', 
  78             'url': 'http://www.report.rai.it/dl/Report/puntata/ContentItem-0c7a664b-d0f4-4b2c-8835-3f82e46f433e.html', 
  79             'md5': '037104d2c14132887e5e4cf114569214', 
  81                 'id': '0c7a664b-d0f4-4b2c-8835-3f82e46f433e', 
  84                 'description': 'md5:4b1afae1364115ce5d78ed83cd2e5b3a', 
  86                 'upload_date': '20141221', 
  91     def _extract_relinker_url(self
, webpage
): 
  92         return self
._proto
_relative
_url
(self
._search
_regex
( 
  93             [r
'name="videourl" content="([^"]+)"', r
'var\s+videoURL(?:_MP4)?\s*=\s*"([^"]+)"'], 
  94             webpage
, 'relinker url', default
=None)) 
  96     def _real_extract(self
, url
): 
  97         mobj 
= re
.match(self
._VALID
_URL
, url
) 
  98         video_id 
= mobj
.group('id') 
  99         host 
= mobj
.group('host') 
 101         webpage 
= self
._download
_webpage
(url
, video_id
) 
 103         relinker_url 
= self
._extract
_relinker
_url
(webpage
) 
 106             iframe_url 
= self
._search
_regex
( 
 107                 [r
'<iframe[^>]+src="([^"]*/dl/[^"]+\?iframe\b[^"]*)"', 
 108                  r
'drawMediaRaiTV\(["\'](.+?
)["\']'], 
 110             if not iframe_url.startswith('http'): 
 111                 iframe_url = compat_urlparse.urljoin(url, iframe_url) 
 112             webpage = self._download_webpage( 
 113                 iframe_url, video_id) 
 114             relinker_url = self._extract_relinker_url(webpage) 
 116         relinker = self._download_json( 
 117             '%s&output=47' % relinker_url, video_id) 
 119         media_url = relinker['video'][0] 
 120         ct = relinker.get('ct') 
 122             formats = self._extract_f4m_formats( 
 123                 media_url + '&hdcore=3.7.0&plugin=aasp-3.7.0.39.44', video_id) 
 130         json_link = self._html_search_meta( 
 131             'jsonlink', webpage, 'JSON link', default=None) 
 133             media = self._download_json( 
 134                 host + json_link, video_id, 'Downloading video JSON') 
 135             title = media.get('name') 
 136             description = media.get('desc') 
 137             thumbnail = media.get('image_300') or media.get('image_medium') or media.get('image') 
 138             duration = parse_duration(media.get('length')) 
 139             uploader = media.get('author') 
 140             upload_date = unified_strdate(media.get('date')) 
 142             title = (self._search_regex( 
 143                 r'var\s+videoTitolo\s*=\s*"(.+?
)";', 
 144                 webpage, 'title', default=None) or self._og_search_title(webpage)).replace('\\"', '"') 
 145             description = self._og_search_description(webpage) 
 146             thumbnail = self._og_search_thumbnail(webpage) 
 148             uploader = self._html_search_meta('Editore', webpage, 'uploader') 
 149             upload_date = unified_strdate(self._html_search_meta( 
 150                 'item-date', webpage, 'upload date', default=None)) 
 152         subtitles = self.extract_subtitles(video_id, webpage) 
 157             'description': description, 
 158             'thumbnail': thumbnail, 
 159             'uploader': uploader, 
 160             'upload_date': upload_date, 
 161             'duration': duration, 
 163             'subtitles': subtitles, 
 166     def _get_subtitles(self, video_id, webpage): 
 168         m = re.search(r'<meta name="closedcaption
" content="(?P
<captions
>[^
"]+)"', webpage) 
 170             captions = m.group('captions
') 
 173             if captions.endswith(STL_EXT): 
 174                 captions = captions[:-len(STL_EXT)] + SRT_EXT 
 177                 'url
': 'http
://www
.rai
.tv
%s' % compat_urllib_parse.quote(captions),