]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/canalplus.py
1db9b24cf204cc26d68b1a1bdaff93577c3ae903
3 import xml
.etree
.ElementTree
5 from .common
import InfoExtractor
6 from ..utils
import unified_strdate
8 class CanalplusIE(InfoExtractor
):
9 _VALID_URL
= r
'https?://(www\.canalplus\.fr/.*?/(?P<path>.*)|player\.canalplus\.fr/#/(?P<id>\d+))'
10 _VIDEO_INFO_TEMPLATE
= 'http://service.canal-plus.com/video/rest/getVideosLiees/cplus/%s'
11 IE_NAME
= u
'canalplus.fr'
14 u
'url': u
'http://www.canalplus.fr/c-infos-documentaires/pid1830-c-zapping.html?vid=922470',
15 u
'file': u
'922470.flv',
17 u
'title': u
'Zapping - 26/08/13',
18 u
'description': u
'Le meilleur de toutes les chaînes, tous les jours.\nEmission du 26 août 2013',
19 u
'upload_date': u
'20130826',
22 u
'skip_download': True,
26 def _real_extract(self
, url
):
27 mobj
= re
.match(self
._VALID
_URL
, url
)
28 video_id
= mobj
.group('id')
30 webpage
= self
._download
_webpage
(url
, mobj
.group('path'))
31 video_id
= self
._search
_regex
(r
'videoId = "(\d+)";', webpage
, u
'video id')
32 info_url
= self
._VIDEO
_INFO
_TEMPLATE
% video_id
33 info_page
= self
._download
_webpage
(info_url
,video_id
,
34 u
'Downloading video info')
36 self
.report_extraction(video_id
)
37 doc
= xml
.etree
.ElementTree
.fromstring(info_page
.encode('utf-8'))
38 video_info
= [video
for video
in doc
if video
.find('ID').text
== video_id
][0]
39 infos
= video_info
.find('INFOS')
40 media
= video_info
.find('MEDIA')
41 formats
= [media
.find('VIDEOS/%s' % format
)
42 for format
in ['BAS_DEBIT', 'HAUT_DEBIT', 'HD']]
43 video_url
= [format
.text
for format
in formats
if format
is not None][-1]
45 return {'id': video_id
,
46 'title': u
'%s - %s' % (infos
.find('TITRAGE/TITRE').text
,
47 infos
.find('TITRAGE/SOUS_TITRE').text
),
50 'upload_date': unified_strdate(infos
.find('PUBLICATION/DATE').text
),
51 'thumbnail': media
.find('IMAGES/GRAND').text
,
52 'description': infos
.find('DESCRIPTION').text
,
53 'view_count': int(infos
.find('NB_VUES').text
),