]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/wat.py
6 from . common
import InfoExtractor
14 class WatIE ( InfoExtractor
):
15 _VALID_URL
= r
'http://www.wat.tv/.*-(?P<shortID>.*?)_.*?.html'
18 u
'url' : u
'http://www.wat.tv/video/world-war-philadelphia-vost-6bv55_2fjr7_.html' ,
19 u
'file' : u
'10631273.mp4' ,
20 u
'md5' : u
'd8b2231e1e333acd12aad94b80937e19' ,
22 u
'title' : u
'World War Z - Philadelphia VOST' ,
23 u
'description' : u
'La menace est partout. Que se passe-t-il à Philadelphia ? \r\n WORLD WAR Z, avec Brad Pitt, au cinéma le 3 juillet. \r\n http://www.worldwarz.fr' ,
25 u
'skip' : u
'Sometimes wat serves the whole file with the --test option' ,
28 def download_video_info ( self
, real_id
):
29 # 'contentv4' is used in the website, but it also returns the related
30 # videos, we don't need them
31 info
= self
._ download
_ webpage
( 'http://www.wat.tv/interface/contentv3/' + real_id
, real_id
, 'Downloading video info' )
32 info
= json
. loads ( info
)
36 def _real_extract ( self
, url
):
37 def real_id_for_chapter ( chapter
):
38 return chapter
[ 'tc_start' ]. split ( '-' )[ 0 ]
39 mobj
= re
. match ( self
._ VALID
_U RL
, url
)
40 short_id
= mobj
. group ( 'shortID' )
41 webpage
= self
._ download
_ webpage
( url
, short_id
)
42 real_id
= self
._ search
_ regex
( r
'xtpage = ".*-(.*?)";' , webpage
, 'real id' )
44 video_info
= self
. download_video_info ( real_id
)
45 chapters
= video_info
[ 'chapters' ]
46 first_chapter
= chapters
[ 0 ]
48 if real_id_for_chapter ( first_chapter
) != real_id
:
49 self
. to_screen ( 'Multipart video detected' )
51 for chapter
in chapters
:
52 chapter_id
= real_id_for_chapter ( chapter
)
53 # Yes, when we this chapter is processed by WatIE,
54 # it will download the info again
55 chapter_info
= self
. download_video_info ( chapter_id
)
56 chapter_urls
. append ( chapter_info
[ 'url' ])
57 entries
= [ self
. url_result ( chapter_url
) for chapter_url
in chapter_urls
]
58 return self
. playlist_result ( entries
, real_id
, video_info
[ 'title' ])
60 # Otherwise we can continue and extract just one part, we have to use
61 # the short id for getting the video url
62 info
= { 'id' : real_id
,
63 'url' : 'http://wat.tv/get/android5/ %s .mp4' % real_id
,
65 'title' : first_chapter
[ 'title' ],
66 'thumbnail' : first_chapter
[ 'preview' ],
67 'description' : first_chapter
[ 'description' ],
68 'view_count' : video_info
[ 'views' ],
70 if 'date_diffusion' in first_chapter
:
71 info
[ 'upload_date' ] = unified_strdate ( first_chapter
[ 'date_diffusion' ])