]>
 
 
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/wat.py 
 
 
 
 
 
 
 
 
   6  from  . common 
import  InfoExtractor
 
  14  class  WatIE ( InfoExtractor
):  
  15      _VALID_URL
= r
'http://www.wat.tv/.*-(?P<shortID>.*?)_.*?.html'  
  18          u
'url' :  u
'http://www.wat.tv/video/world-war-philadelphia-vost-6bv55_2fjr7_.html' ,  
  19          u
'file' :  u
'10631273.mp4' ,  
  20          u
'md5' :  u
'0a4fe7870f31eaeabb5e25fd8da8414a' ,  
  22              u
'title' :  u
'World War Z - Philadelphia VOST' ,  
  23              u
'description' :  u
'La menace est partout. Que se passe-t-il à Philadelphia ? \r\n WORLD WAR Z, avec Brad Pitt, au cinéma le 3 juillet. \r\n http://www.worldwarz.fr' ,  
  27      def  download_video_info ( self
,  real_id
):  
  28          # 'contentv4' is used in the website, but it also returns the related  
  29          # videos, we don't need them  
  30          info 
=  self
._ download
_ webpage
( 'http://www.wat.tv/interface/contentv3/'  +  real_id
,  real_id
,  'Downloading video info' )  
  31          info 
=  json
. loads ( info
)  
  35      def  _real_extract ( self
,  url
):  
  36          def  real_id_for_chapter ( chapter
):  
  37              return  chapter
[ 'tc_start' ]. split ( '-' )[ 0 ]  
  38          mobj 
=  re
. match ( self
._ VALID
_U RL
,  url
)  
  39          short_id 
=  mobj
. group ( 'shortID' )  
  40          webpage 
=  self
._ download
_ webpage
( url
,  short_id
)  
  41          real_id 
=  self
._ search
_ regex
( r
'xtpage = ".*-(.*?)";' ,  webpage
,  'real id' )  
  43          video_info 
=  self
. download_video_info ( real_id
)  
  44          chapters 
=  video_info
[ 'chapters' ]  
  45          first_chapter 
=  chapters
[ 0 ]  
  47          if  real_id_for_chapter ( first_chapter
) !=  real_id
:  
  48              self
. to_screen ( 'Multipart video detected' )  
  50              for  chapter 
in  chapters
:  
  51                  chapter_id 
=  real_id_for_chapter ( chapter
)  
  52                  # Yes, when we this chapter is processed by WatIE,  
  53                  # it will download the info again  
  54                  chapter_info 
=  self
. download_video_info ( chapter_id
)  
  55                  chapter_urls
. append ( chapter_info
[ 'url' ])  
  56              entries 
= [ self
. url_result ( chapter_url
)  for  chapter_url 
in  chapter_urls
]  
  57              return  self
. playlist_result ( entries
,  real_id
,  video_info
[ 'title' ])  
  59          # Otherwise we can continue and extract just one part, we have to use  
  60          # the short id for getting the video url  
  61          player_data 
=  compat_urllib_parse
. urlencode ({ 'shortVideoId' :  short_id
,  
  63          player_info 
=  self
._ download
_ webpage
( 'http://www.wat.tv/player?'  +  player_data
,  
  64                                               real_id
,  u
'Downloading player info' )  
  65          player 
=  json
. loads ( player_info
)[ 'player' ]  
  66          html5_player 
=  self
._ html
_ search
_ regex
( r
'iframe src="(.*?)"' ,  player
,  
  68          player_webpage 
=  self
._ download
_ webpage
( html5_player
,  real_id
,  
  69                                                  u
'Downloading player webpage' )  
  71          video_url 
=  self
._ search
_ regex
( r
'urlhtml5 : "(.*?)"' ,  player_webpage
,  
  73          info 
= { 'id' :  real_id
,  
  76                  'title' :  first_chapter
[ 'title' ],  
  77                  'thumbnail' :  first_chapter
[ 'preview' ],  
  78                  'description' :  first_chapter
[ 'description' ],  
  79                  'view_count' :  video_info
[ 'views' ],  
  81          if  'date_diffusion'  in  first_chapter
:  
  82              info
[ 'upload_date' ] =  unified_strdate ( first_chapter
[ 'date_diffusion' ])