]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/howstuffworks.py 
65ba2a48b069bd67d2b3382f2d87bc1160145612
   1  from  __future__ 
import  unicode_literals
   3  from  . common 
import  InfoExtractor
  13  class  HowStuffWorksIE ( InfoExtractor
):   14      _VALID_URL 
=  r
'https?://[\da-z-]+\.howstuffworks\.com/(?:[^/]+/)*(?:\d+-)?(?P<id>.+?)-video\.htm'   17              'url' :  'http://adventure.howstuffworks.com/5266-cool-jobs-iditarod-musher-video.htm' ,   21                  'title' :  'Cool Jobs - Iditarod Musher' ,   22                  'description' :  'Cold sleds, freezing temps and warm dog breath... an Iditarod musher \' s dream. Kasey-Dee Gardner jumps on a sled to find out what the big deal is.' ,   23                  'display_id' :  'cool-jobs-iditarod-musher' ,   24                  'thumbnail' :  're:^https?://.*\.jpg$' ,   27              'skip' :  'Video broken' ,   30              'url' :  'http://adventure.howstuffworks.com/7199-survival-zone-food-and-water-in-the-savanna-video.htm' ,   34                  'title' :  'Survival Zone: Food and Water In the Savanna' ,   35                  'description' :  'Learn how to find both food and water while trekking in the African savannah. In this video from the Discovery Channel.' ,   36                  'display_id' :  'survival-zone-food-and-water-in-the-savanna' ,   37                  'thumbnail' :  're:^https?://.*\.jpg$' ,   41              'url' :  'http://entertainment.howstuffworks.com/arts/2706-sword-swallowing-1-by-dan-meyer-video.htm' ,   45                  'title' :  'Sword Swallowing #1 by Dan Meyer' ,   46                  'description' :  'Video footage (1 of 3) used by permission of the owner Dan Meyer through Sword Swallowers Association International <www.swordswallow.org>' ,   47                  'display_id' :  'sword-swallowing-1-by-dan-meyer' ,   48                  'thumbnail' :  're:^https?://.*\.jpg$' ,   52              'url' :  'http://shows.howstuffworks.com/stuff-to-blow-your-mind/optical-illusions-video.htm' ,   53              'only_matching' :  True ,   57      def  _real_extract ( self
,  url
):   58          display_id 
=  self
._ match
_ id
( url
)   59          webpage 
=  self
._ download
_ webpage
( url
,  display_id
)   60          clip_js 
=  self
._ search
_ regex
(   61              r
'(?s)var clip = ({.*?});' ,  webpage
,  'clip info' )   62          clip_info 
=  self
._ parse
_ json
(   63              clip_js
,  display_id
,  transform_source
= js_to_json
)   65          video_id 
=  clip_info
[ 'content_id' ]   67          m3u8_url 
=  clip_info
. get ( 'm3u8' )   68          if  m3u8_url 
and  determine_ext ( m3u8_url
) ==  'm3u8' :   69              formats
. extend ( self
._ extract
_ m
3u8_ formats
( m3u8_url
,  video_id
,  'mp4' ,  format_id
= 'hls' ,  fatal
= True ))   70          flv_url 
=  clip_info
. get ( 'flv_url' )   76          for  video 
in  clip_info
. get ( 'mp4' , []):   79                  'format_id' :  'mp4- %s '  %  video
[ 'bitrate' ],   80                  'vbr' :  int_or_none ( video
[ 'bitrate' ]. rstrip ( 'k' )),   84              smil 
=  self
._ download
_ xml
(   85                  'http://services.media.howstuffworks.com/videos/ %s /smil-service.smil'  %  video_id
,   86                  video_id
,  'Downloading video SMIL' )   88              http_base 
=  find_xpath_attr (   90                  './ {0} head/ {0} meta' . format ( '{http://www.w3.org/2001/SMIL20/Language}' ),   92                  'httpBase' ). get ( 'content' )   94              URL_SUFFIX 
=  '?v=2.11.3&fp=LNX 11,2,202,356&r=A&g=A'   96              for  video 
in  smil
. findall (   97                      './ {0} body/ {0} switch/ {0} video' . format ( '{http://www.w3.org/2001/SMIL20/Language}' )):   98                  vbr 
=  int_or_none ( video
. attrib
[ 'system-bitrate' ],  scale
= 1000 )  100                      'url' :  ' %s / %s%s '  % ( http_base
,  video
. attrib
[ 'src' ],  URL_SUFFIX
),  101                      'format_id' :  ' %d k'  %  vbr
,  105          self
._ sort
_ formats
( formats
)  108              'id' :  ' %s '  %  video_id
,  109              'display_id' :  display_id
,  110              'title' :  unescapeHTML ( clip_info
[ 'clip_title' ]),  111              'description' :  unescapeHTML ( clip_info
. get ( 'caption' )),  112              'thumbnail' :  clip_info
. get ( 'video_still_url' ),  113              'duration' :  int_or_none ( clip_info
. get ( 'duration' )),