]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/jeuxvideo.py
5 import xml
. etree
. ElementTree
7 from . common
import InfoExtractor
10 class JeuxVideoIE ( InfoExtractor
):
11 _VALID_URL
= r
'http://.*?\.jeuxvideo\.com/.*/(.*?)-\d+\.htm'
14 u
'url' : u
'http://www.jeuxvideo.com/reportages-videos-jeux/0004/00046170/tearaway-playstation-vita-gc-2013-tearaway-nous-presente-ses-papiers-d-identite-00115182.htm' ,
16 u
'md5' : u
'046e491afb32a8aaac1f44dd4ddd54ee' ,
18 u
'title' : u
'GC 2013 : Tearaway nous présente ses papiers d \' identité' ,
19 u
'description' : u
'Lorsque les développeurs de LittleBigPlanet proposent un nouveau titre, on ne peut que s \' attendre à un résultat original et fort attrayant. \n ' ,
23 def _real_extract ( self
, url
):
24 mobj
= re
. match ( self
._ VALID
_U RL
, url
)
25 title
= re
. match ( self
._ VALID
_U RL
, url
). group ( 1 )
26 webpage
= self
._ download
_ webpage
( url
, title
)
27 xml_link
= self
._ html
_ search
_ regex
(
28 r
'<param name="flashvars" value="config=(.*?)" />' ,
29 webpage
, u
'config URL' )
31 video_id
= self
._ search
_ regex
(
32 r
'http://www\.jeuxvideo\.com/config/\w+/\d+/(.*?)/\d+_player\.xml' ,
33 xml_link
, u
'video ID' )
35 xml_config
= self
._ download
_ webpage
(
36 xml_link
, title
, u
'Downloading XML config' )
37 config
= xml
. etree
. ElementTree
. fromstring ( xml_config
. encode ( 'utf-8' ))
38 info_json
= self
._ search
_ regex
(
39 r
'(?sm)<format\.json>(.*?)</format\.json>' ,
40 xml_config
, u
'JSON information' )
41 info
= json
. loads ( info_json
)[ 'versions' ][ 0 ]
43 video_url
= 'http://video720.jeuxvideo.com/' + info
[ 'file' ]
47 'title' : config
. find ( 'titre_video' ). text
,
50 'description' : self
._ og
_ search
_ description
( webpage
),
51 'thumbnail' : config
. find ( 'image' ). text
,