]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/franceculture.py
   2 from __future__ 
import unicode_literals
 
   4 from .common 
import InfoExtractor
 
  15 class FranceCultureIE(InfoExtractor
): 
  16     _VALID_URL 
= r
'https?://(?:www\.)?franceculture\.fr/player/reecouter\?play=(?P<id>[0-9]+)' 
  18         'url': 'http://www.franceculture.fr/player/reecouter?play=4795174', 
  22             'title': 'Rendez-vous au pays des geeks', 
  23             'alt_title': 'Carnet nomade | 13-14', 
  25             'upload_date': '20140301', 
  26             'thumbnail': r
're:^http://static\.franceculture\.fr/.*/images/player/Carnet-nomade\.jpg$', 
  27             'description': 'startswith:Avec :Jean-Baptiste Péretié pour son documentaire sur Arte "La revanche', 
  28             'timestamp': 1393700400, 
  32     def _extract_from_player(self
, url
, video_id
): 
  33         webpage 
= self
._download
_webpage
(url
, video_id
) 
  35         video_path 
= self
._search
_regex
( 
  36             r
'<a id="player".*?href="([^"]+)"', webpage
, 'video path') 
  37         video_url 
= compat_urlparse
.urljoin(url
, video_path
) 
  38         timestamp 
= int_or_none(self
._search
_regex
( 
  39             r
'<a id="player".*?data-date="([0-9]+)"', 
  40             webpage
, 'upload date', fatal
=False)) 
  41         thumbnail 
= self
._search
_regex
( 
  42             r
'<a id="player".*?>\s+<img src="([^"]+)"', 
  43             webpage
, 'thumbnail', fatal
=False) 
  45         display_id 
= self
._search
_regex
( 
  46             r
'<span class="path-diffusion">emission-(.*?)</span>', webpage
, 'display_id') 
  48         title 
= self
._html
_search
_regex
( 
  49             r
'<span class="title-diffusion">(.*?)</span>', webpage
, 'title') 
  50         alt_title 
= self
._html
_search
_regex
( 
  51             r
'<span class="title">(.*?)</span>', 
  52             webpage
, 'alt_title', fatal
=False) 
  53         description 
= self
._html
_search
_regex
( 
  54             r
'<span class="description">(.*?)</span>', 
  55             webpage
, 'description', fatal
=False) 
  57         uploader 
= self
._html
_search
_regex
( 
  58             r
'(?s)<div id="emission".*?<span class="author">(.*?)</span>', 
  59             webpage
, 'uploader', default
=None) 
  60         vcodec 
= 'none' if determine_ext(video_url
.lower()) == 'mp3' else None 
  67             'timestamp': timestamp
, 
  69             'alt_title': alt_title
, 
  70             'thumbnail': thumbnail
, 
  71             'description': description
, 
  72             'display_id': display_id
, 
  75     def _real_extract(self
, url
): 
  76         video_id 
= self
._match
_id
(url
) 
  77         return self
._extract
_from
_player
(url
, video_id
) 
  80 class FranceCultureEmissionIE(FranceCultureIE
): 
  81     _VALID_URL 
= r
'https?://(?:www\.)?franceculture\.fr/emission-(?P<id>[^?#]+)' 
  83         'url': 'http://www.franceculture.fr/emission-les-carnets-de-la-creation-jean-gabriel-periot-cineaste-2015-10-13', 
  85             'title': 'Jean-Gabriel Périot, cinéaste', 
  86             'alt_title': 'Les Carnets de la création', 
  88             'display_id': 'les-carnets-de-la-creation-jean-gabriel-periot-cineaste-2015-10-13', 
  90             'timestamp': 1444762500, 
  91             'upload_date': '20151013', 
  92             'description': 'startswith:Aujourd\'hui dans "Les carnets de la création", le cinéaste', 
  96     def _real_extract(self
, url
): 
  97         video_id 
= self
._match
_id
(url
) 
  98         webpage 
= self
._download
_webpage
(url
, video_id
) 
  99         video_path 
= self
._html
_search
_regex
( 
 100             r
'<a class="rf-player-open".*?href="([^"]+)"', webpage
, 'video path', 'no_path_player') 
 101         if video_path 
== 'no_path_player': 
 102             raise ExtractorError('no player : no sound in this page.', expected
=True) 
 103         new_id 
= self
._search
_regex
('play=(?P<id>[0-9]+)', video_path
, 'new_id', group
='id') 
 104         video_url 
= compat_urlparse
.urljoin(url
, video_path
) 
 105         return self
._extract
_from
_player
(video_url
, new_id
)