]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/canalplus.py
1f02519a0149ad85aad3886bf0a01e5c986064c0
2 import xml
.etree
.ElementTree
4 from .common
import InfoExtractor
5 from ..utils
import unified_strdate
7 class CanalplusIE(InfoExtractor
):
8 _VALID_URL
= r
'https?://(www\.canalplus\.fr/.*?\?vid=|player\.canalplus\.fr/#/)(?P<id>\d+)'
9 _VIDEO_INFO_TEMPLATE
= 'http://service.canal-plus.com/video/rest/getVideosLiees/cplus/%s'
10 IE_NAME
= u
'canalplus.fr'
13 u
'url': u
'http://www.canalplus.fr/c-divertissement/pid3351-c-le-petit-journal.html?vid=889861',
14 u
'file': u
'889861.flv',
15 u
'md5': u
'590a888158b5f0d6832f84001fbf3e99',
17 u
'title': u
'Le Petit Journal 20/06/13 - La guerre des drone',
18 u
'upload_date': u
'20130620',
20 u
'skip': u
'Requires rtmpdump'
23 def _real_extract(self
, url
):
24 mobj
= re
.match(self
._VALID
_URL
, url
)
25 video_id
= mobj
.group('id')
26 info_url
= self
._VIDEO
_INFO
_TEMPLATE
% video_id
27 info_page
= self
._download
_webpage
(info_url
,video_id
,
28 u
'Downloading video info')
30 self
.report_extraction(video_id
)
31 doc
= xml
.etree
.ElementTree
.fromstring(info_page
.encode('utf-8'))
32 video_info
= [video
for video
in doc
if video
.find('ID').text
== video_id
][0]
33 infos
= video_info
.find('INFOS')
34 media
= video_info
.find('MEDIA')
35 formats
= [media
.find('VIDEOS/%s' % format
)
36 for format
in ['BAS_DEBIT', 'HAUT_DEBIT', 'HD']]
37 video_url
= [format
.text
for format
in formats
if format
is not None][-1]
39 return {'id': video_id
,
40 'title': u
'%s - %s' % (infos
.find('TITRAGE/TITRE').text
,
41 infos
.find('TITRAGE/SOUS_TITRE').text
),
44 'upload_date': unified_strdate(infos
.find('PUBLICATION/DATE').text
),
45 'thumbnail': media
.find('IMAGES/GRAND').text
,