]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/wat.py
6 from . common
import InfoExtractor
13 class WatIE ( InfoExtractor
):
14 _VALID_URL
= r
'http://www\.wat\.tv/.*-(?P<shortID>.*?)_.*?\.html'
17 u
'url' : u
'http://www.wat.tv/video/world-war-philadelphia-vost-6bv55_2fjr7_.html' ,
18 u
'file' : u
'10631273.mp4' ,
19 u
'md5' : u
'd8b2231e1e333acd12aad94b80937e19' ,
21 u
'title' : u
'World War Z - Philadelphia VOST' ,
22 u
'description' : u
'La menace est partout. Que se passe-t-il à Philadelphia ? \r\n WORLD WAR Z, avec Brad Pitt, au cinéma le 3 juillet. \r\n http://www.worldwarz.fr' ,
24 u
'skip' : u
'Sometimes wat serves the whole file with the --test option' ,
27 def download_video_info ( self
, real_id
):
28 # 'contentv4' is used in the website, but it also returns the related
29 # videos, we don't need them
30 info
= self
._ download
_ webpage
( 'http://www.wat.tv/interface/contentv3/' + real_id
, real_id
, 'Downloading video info' )
31 info
= json
. loads ( info
)
35 def _real_extract ( self
, url
):
36 def real_id_for_chapter ( chapter
):
37 return chapter
[ 'tc_start' ]. split ( '-' )[ 0 ]
38 mobj
= re
. match ( self
._ VALID
_U RL
, url
)
39 short_id
= mobj
. group ( 'shortID' )
40 webpage
= self
._ download
_ webpage
( url
, short_id
)
41 real_id
= self
._ search
_ regex
( r
'xtpage = ".*-(.*?)";' , webpage
, 'real id' )
43 video_info
= self
. download_video_info ( real_id
)
44 chapters
= video_info
[ 'chapters' ]
45 first_chapter
= chapters
[ 0 ]
47 if real_id_for_chapter ( first_chapter
) != real_id
:
48 self
. to_screen ( 'Multipart video detected' )
50 for chapter
in chapters
:
51 chapter_id
= real_id_for_chapter ( chapter
)
52 # Yes, when we this chapter is processed by WatIE,
53 # it will download the info again
54 chapter_info
= self
. download_video_info ( chapter_id
)
55 chapter_urls
. append ( chapter_info
[ 'url' ])
56 entries
= [ self
. url_result ( chapter_url
) for chapter_url
in chapter_urls
]
57 return self
. playlist_result ( entries
, real_id
, video_info
[ 'title' ])
59 # Otherwise we can continue and extract just one part, we have to use
60 # the short id for getting the video url
61 info
= { 'id' : real_id
,
62 'url' : 'http://wat.tv/get/android5/ %s .mp4' % real_id
,
64 'title' : first_chapter
[ 'title' ],
65 'thumbnail' : first_chapter
[ 'preview' ],
66 'description' : first_chapter
[ 'description' ],
67 'view_count' : video_info
[ 'views' ],
69 if 'date_diffusion' in first_chapter
:
70 info
[ 'upload_date' ] = unified_strdate ( first_chapter
[ 'date_diffusion' ])