]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/franceculture.py
0c29721629a25369621072e4f451e7decdc8df0b
2 from __future__
import unicode_literals
7 from . common
import InfoExtractor
14 class FranceCultureIE ( InfoExtractor
):
15 _VALID_URL
= r
'(?P<baseurl>http://(?:www\.)?franceculture\.fr/)player/reecouter\?play=(?P<id>[0-9]+)'
17 'url' : 'http://www.franceculture.fr/player/reecouter?play=4795174' ,
21 'title' : 'Rendez-vous au pays des geeks' ,
23 'uploader' : 'Colette Fellous' ,
24 'upload_date' : '20140301' ,
26 'thumbnail' : r
're:^http://www\.franceculture\.fr/.*/images/player/Carnet-nomade\.jpg$' ,
27 'description' : 'Avec :Jean-Baptiste Péretié pour son documentaire sur Arte "La revanche des « geeks », une enquête menée aux Etats-Unis dans la S ...' ,
31 def _real_extract ( self
, url
):
32 mobj
= re
. match ( self
._ VALID
_U RL
, url
)
33 video_id
= mobj
. group ( 'id' )
34 baseurl
= mobj
. group ( 'baseurl' )
36 webpage
= self
._ download
_ webpage
( url
, video_id
)
37 params_code
= self
._ search
_ regex
(
38 r
"<param name='movie' value='/sites/all/modules/rf/rf_player/swf/loader.swf\?([^']+)' />" ,
39 webpage
, 'parameter code' )
40 params
= compat_parse_qs ( params_code
)
41 video_url
= compat_urlparse
. urljoin ( baseurl
, params
[ 'urlAOD' ][ 0 ])
43 title
= self
._ html
_ search
_ regex
(
44 r
'<h1 class="title[^"]+">(.+?)</h1>' , webpage
, 'title' )
45 uploader
= self
._ html
_ search
_ regex
(
46 r
'(?s)<div id="emission".*?<span class="author">(.*?)</span>' ,
47 webpage
, 'uploader' , fatal
= False )
48 thumbnail_part
= self
._ html
_ search
_ regex
(
49 r
'(?s)<div id="emission".*?<img src="([^"]+)"' , webpage
,
50 'thumbnail' , fatal
= False )
51 if thumbnail_part
is None :
54 thumbnail
= compat_urlparse
. urljoin ( baseurl
, thumbnail_part
)
55 description
= self
._ html
_ search
_ regex
(
56 r
'(?s)<p class="desc">(.*?)</p>' , webpage
, 'description' )
58 info
= json
. loads ( params
[ 'infoData' ][ 0 ])[ 0 ]
59 duration
= info
. get ( 'media_length' )
60 upload_date_candidate
= info
. get ( 'media_section5' )
63 if ( upload_date_candidate
is not None and
64 re
. match ( r
'[0-9] {8} $' , upload_date_candidate
))
70 'vcodec' : 'none' if video_url
. lower (). endswith ( '.mp3' ) else None ,
73 'upload_date' : upload_date
,
75 'thumbnail' : thumbnail
,
76 'description' : description
,