]>
 
 
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/howstuffworks.py 
 
 
 
 
 
 
 
 
   1  from  __future__ 
import  unicode_literals
 
   3  from  . common 
import  InfoExtractor
 
  13  class  HowStuffWorksIE ( InfoExtractor
):  
  14      _VALID_URL 
=  r
'https?://[\da-z-]+\.howstuffworks\.com/(?:[^/]+/)*(?:\d+-)?(?P<id>.+?)-video\.htm'  
  17              'url' :  'http://adventure.howstuffworks.com/5266-cool-jobs-iditarod-musher-video.htm' ,  
  21                  'title' :  'Cool Jobs - Iditarod Musher' ,  
  22                  'description' :  'Cold sleds, freezing temps and warm dog breath... an Iditarod musher \' s dream. Kasey-Dee Gardner jumps on a sled to find out what the big deal is.' ,  
  23                  'display_id' :  'cool-jobs-iditarod-musher' ,  
  24                  'thumbnail' :  're:^https?://.*\.jpg$' ,  
  27              'skip' :  'Video broken' ,  
  30              'url' :  'http://adventure.howstuffworks.com/7199-survival-zone-food-and-water-in-the-savanna-video.htm' ,  
  34                  'title' :  'Survival Zone: Food and Water In the Savanna' ,  
  35                  'description' :  'Learn how to find both food and water while trekking in the African savannah. In this video from the Discovery Channel.' ,  
  36                  'display_id' :  'survival-zone-food-and-water-in-the-savanna' ,  
  37                  'thumbnail' :  're:^https?://.*\.jpg$' ,  
  41              'url' :  'http://entertainment.howstuffworks.com/arts/2706-sword-swallowing-1-by-dan-meyer-video.htm' ,  
  45                  'title' :  'Sword Swallowing #1 by Dan Meyer' ,  
  46                  'description' :  'Video footage (1 of 3) used by permission of the owner Dan Meyer through Sword Swallowers Association International <www.swordswallow.org>' ,  
  47                  'display_id' :  'sword-swallowing-1-by-dan-meyer' ,  
  48                  'thumbnail' :  're:^https?://.*\.jpg$' ,  
  52              'url' :  'http://shows.howstuffworks.com/stuff-to-blow-your-mind/optical-illusions-video.htm' ,  
  53              'only_matching' :  True ,  
  57      def  _real_extract ( self
,  url
):  
  58          display_id 
=  self
._ match
_ id
( url
)  
  59          webpage 
=  self
._ download
_ webpage
( url
,  display_id
)  
  60          clip_js 
=  self
._ search
_ regex
(  
  61              r
'(?s)var clip = ({.*?});' ,  webpage
,  'clip info' )  
  62          clip_info 
=  self
._ parse
_ json
(  
  63              clip_js
,  display_id
,  transform_source
= js_to_json
)  
  65          video_id 
=  clip_info
[ 'content_id' ]  
  67          m3u8_url 
=  clip_info
. get ( 'm3u8' )  
  68          if  m3u8_url 
and  determine_ext ( m3u8_url
) ==  'm3u8' :  
  69              formats
. extend ( self
._ extract
_ m
3u8_ formats
( m3u8_url
,  video_id
,  'mp4' ,  format_id
= 'hls' ,  fatal
= True ))  
  70          flv_url 
=  clip_info
. get ( 'flv_url' )  
  76          for  video 
in  clip_info
. get ( 'mp4' , []):  
  79                  'format_id' :  'mp4- %s '  %  video
[ 'bitrate' ],  
  80                  'vbr' :  int_or_none ( video
[ 'bitrate' ]. rstrip ( 'k' )),  
  84              smil 
=  self
._ download
_ xml
(  
  85                  'http://services.media.howstuffworks.com/videos/ %s /smil-service.smil'  %  video_id
,  
  86                  video_id
,  'Downloading video SMIL' )  
  88              http_base 
=  find_xpath_attr (  
  90                  './ {0} head/ {0} meta' . format ( '{http://www.w3.org/2001/SMIL20/Language}' ),  
  92                  'httpBase' ). get ( 'content' )  
  94              URL_SUFFIX 
=  '?v=2.11.3&fp=LNX 11,2,202,356&r=A&g=A'  
  96              for  video 
in  smil
. findall (  
  97                      './ {0} body/ {0} switch/ {0} video' . format ( '{http://www.w3.org/2001/SMIL20/Language}' )):  
  98                  vbr 
=  int_or_none ( video
. attrib
[ 'system-bitrate' ],  scale
= 1000 )  
 100                      'url' :  ' %s / %s%s '  % ( http_base
,  video
. attrib
[ 'src' ],  URL_SUFFIX
),  
 101                      'format_id' :  ' %d k'  %  vbr
,  
 105          self
._ sort
_ formats
( formats
)  
 108              'id' :  ' %s '  %  video_id
,  
 109              'display_id' :  display_id
,  
 110              'title' :  unescapeHTML ( clip_info
[ 'clip_title' ]),  
 111              'description' :  unescapeHTML ( clip_info
. get ( 'caption' )),  
 112              'thumbnail' :  clip_info
. get ( 'video_still_url' ),  
 113              'duration' :  int_or_none ( clip_info
. get ( 'duration' )),