]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/franceculture.py
e2ca962838932f682f0ac833bd64169ccaba8fc5
2 from __future__
import unicode_literals
4 from .common
import InfoExtractor
15 class FranceCultureIE(InfoExtractor
):
16 _VALID_URL
= r
'https?://(?:www\.)?franceculture\.fr/player/reecouter\?play=(?P<id>[0-9]+)'
18 'url': 'http://www.franceculture.fr/player/reecouter?play=4795174',
22 'title': 'Rendez-vous au pays des geeks',
23 'alt_title': 'Carnet nomade | 13-14',
25 'upload_date': '20140301',
26 'thumbnail': r
're:^http://static\.franceculture\.fr/.*/images/player/Carnet-nomade\.jpg$',
27 'description': 'startswith:Avec :Jean-Baptiste Péretié pour son documentaire sur Arte "La revanche',
28 'timestamp': 1393700400,
32 def _extract_from_player(self
, url
, video_id
):
33 webpage
= self
._download
_webpage
(url
, video_id
)
35 video_path
= self
._search
_regex
(
36 r
'<a id="player".*?href="([^"]+)"', webpage
, 'video path')
37 video_url
= compat_urlparse
.urljoin(url
, video_path
)
38 timestamp
= int_or_none(self
._search
_regex
(
39 r
'<a id="player".*?data-date="([0-9]+)"',
40 webpage
, 'upload date', fatal
=False))
41 thumbnail
= self
._search
_regex
(
42 r
'<a id="player".*?>\s+<img src="([^"]+)"',
43 webpage
, 'thumbnail', fatal
=False)
45 display_id
= self
._search
_regex
(
46 r
'<span class="path-diffusion">emission-(.*?)</span>', webpage
, 'display_id')
48 title
= self
._html
_search
_regex
(
49 r
'<span class="title-diffusion">(.*?)</span>', webpage
, 'title')
50 alt_title
= self
._html
_search
_regex
(
51 r
'<span class="title">(.*?)</span>',
52 webpage
, 'alt_title', fatal
=False)
53 description
= self
._html
_search
_regex
(
54 r
'<span class="description">(.*?)</span>',
55 webpage
, 'description', fatal
=False)
57 uploader
= self
._html
_search
_regex
(
58 r
'(?s)<div id="emission".*?<span class="author">(.*?)</span>',
59 webpage
, 'uploader', default
=None)
60 vcodec
= 'none' if determine_ext(video_url
.lower()) == 'mp3' else None
67 'timestamp': timestamp
,
69 'alt_title': alt_title
,
70 'thumbnail': thumbnail
,
71 'description': description
,
72 'display_id': display_id
,
75 def _real_extract(self
, url
):
76 video_id
= self
._match
_id
(url
)
77 return self
._extract
_from
_player
(url
, video_id
)
80 class FranceCultureEmissionIE(FranceCultureIE
):
81 _VALID_URL
= r
'https?://(?:www\.)?franceculture\.fr/emission-(?P<id>[^?#]+)'
83 'url': 'http://www.franceculture.fr/emission-les-carnets-de-la-creation-jean-gabriel-periot-cineaste-2015-10-13',
85 'title': 'Jean-Gabriel Périot, cinéaste',
86 'alt_title': 'Les Carnets de la création',
88 'display_id': 'les-carnets-de-la-creation-jean-gabriel-periot-cineaste-2015-10-13',
90 'timestamp': 1444762500,
91 'upload_date': '20151013',
92 'description': 'startswith:Aujourd\'hui dans "Les carnets de la création", le cinéaste',
96 def _real_extract(self
, url
):
97 video_id
= self
._match
_id
(url
)
98 webpage
= self
._download
_webpage
(url
, video_id
)
99 video_path
= self
._html
_search
_regex
(
100 r
'<a class="rf-player-open".*?href="([^"]+)"', webpage
, 'video path', 'no_path_player')
101 if video_path
== 'no_path_player':
102 raise ExtractorError('no player : no sound in this page.', expected
=True)
103 new_id
= self
._search
_regex
('play=(?P<id>[0-9]+)', video_path
, 'new_id', group
='id')
104 video_url
= compat_urlparse
.urljoin(url
, video_path
)
105 return self
._extract
_from
_player
(video_url
, new_id
)