]>
 
 
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/wat.py 
 
 
 
 
 
 
 
 
   6  from  . common 
import  InfoExtractor
 
  13  class  WatIE ( InfoExtractor
):  
  14      _VALID_URL
= r
'http://www\.wat\.tv/.*-(?P<shortID>.*?)_.*?\.html'  
  17          u
'url' :  u
'http://www.wat.tv/video/world-war-philadelphia-vost-6bv55_2fjr7_.html' ,  
  18          u
'file' :  u
'10631273.mp4' ,  
  19          u
'md5' :  u
'd8b2231e1e333acd12aad94b80937e19' ,  
  21              u
'title' :  u
'World War Z - Philadelphia VOST' ,  
  22              u
'description' :  u
'La menace est partout. Que se passe-t-il à Philadelphia ? \r\n WORLD WAR Z, avec Brad Pitt, au cinéma le 3 juillet. \r\n http://www.worldwarz.fr' ,  
  24          u
'skip' :  u
'Sometimes wat serves the whole file with the --test option' ,  
  27      def  download_video_info ( self
,  real_id
):  
  28          # 'contentv4' is used in the website, but it also returns the related  
  29          # videos, we don't need them  
  30          info 
=  self
._ download
_ webpage
( 'http://www.wat.tv/interface/contentv3/'  +  real_id
,  real_id
,  'Downloading video info' )  
  31          info 
=  json
. loads ( info
)  
  35      def  _real_extract ( self
,  url
):  
  36          def  real_id_for_chapter ( chapter
):  
  37              return  chapter
[ 'tc_start' ]. split ( '-' )[ 0 ]  
  38          mobj 
=  re
. match ( self
._ VALID
_U RL
,  url
)  
  39          short_id 
=  mobj
. group ( 'shortID' )  
  40          webpage 
=  self
._ download
_ webpage
( url
,  short_id
)  
  41          real_id 
=  self
._ search
_ regex
( r
'xtpage = ".*-(.*?)";' ,  webpage
,  'real id' )  
  43          video_info 
=  self
. download_video_info ( real_id
)  
  44          chapters 
=  video_info
[ 'chapters' ]  
  45          first_chapter 
=  chapters
[ 0 ]  
  47          if  real_id_for_chapter ( first_chapter
) !=  real_id
:  
  48              self
. to_screen ( 'Multipart video detected' )  
  50              for  chapter 
in  chapters
:  
  51                  chapter_id 
=  real_id_for_chapter ( chapter
)  
  52                  # Yes, when we this chapter is processed by WatIE,  
  53                  # it will download the info again  
  54                  chapter_info 
=  self
. download_video_info ( chapter_id
)  
  55                  chapter_urls
. append ( chapter_info
[ 'url' ])  
  56              entries 
= [ self
. url_result ( chapter_url
)  for  chapter_url 
in  chapter_urls
]  
  57              return  self
. playlist_result ( entries
,  real_id
,  video_info
[ 'title' ])  
  59          # Otherwise we can continue and extract just one part, we have to use  
  60          # the short id for getting the video url  
  61          info 
= { 'id' :  real_id
,  
  62                  'url' :  'http://wat.tv/get/android5/ %s .mp4'  %  real_id
,  
  64                  'title' :  first_chapter
[ 'title' ],  
  65                  'thumbnail' :  first_chapter
[ 'preview' ],  
  66                  'description' :  first_chapter
[ 'description' ],  
  67                  'view_count' :  video_info
[ 'views' ],  
  69          if  'date_diffusion'  in  first_chapter
:  
  70              info
[ 'upload_date' ] =  unified_strdate ( first_chapter
[ 'date_diffusion' ])