]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/howstuffworks.py 
   1  from  __future__ 
import  unicode_literals
   3  from  . common 
import  InfoExtractor
  12  class  HowStuffWorksIE ( InfoExtractor
):   13      _VALID_URL 
=  r
'https?://[\da-z-]+\.howstuffworks\.com/(?:[^/]+/)*(?:\d+-)?(?P<id>.+?)-video\.htm'   16              'url' :  'http://adventure.howstuffworks.com/5266-cool-jobs-iditarod-musher-video.htm' ,   20                  'title' :  'Cool Jobs - Iditarod Musher' ,   21                  'description' :  'Cold sleds, freezing temps and warm dog breath... an Iditarod musher \' s dream. Kasey-Dee Gardner jumps on a sled to find out what the big deal is.' ,   22                  'display_id' :  'cool-jobs-iditarod-musher' ,   23                  'thumbnail' :  're:^https?://.*\.jpg$' ,   28              'url' :  'http://adventure.howstuffworks.com/7199-survival-zone-food-and-water-in-the-savanna-video.htm' ,   32                  'title' :  'Survival Zone: Food and Water In the Savanna' ,   33                  'description' :  'Learn how to find both food and water while trekking in the African savannah. In this video from the Discovery Channel.' ,   34                  'display_id' :  'survival-zone-food-and-water-in-the-savanna' ,   35                  'thumbnail' :  're:^https?://.*\.jpg$' ,   39              'url' :  'http://entertainment.howstuffworks.com/arts/2706-sword-swallowing-1-by-dan-meyer-video.htm' ,   43                  'title' :  'Sword Swallowing #1 by Dan Meyer' ,   44                  'description' :  'Video footage (1 of 3) used by permission of the owner Dan Meyer through Sword Swallowers Association International <www.swordswallow.org>' ,   45                  'display_id' :  'sword-swallowing-1-by-dan-meyer' ,   46                  'thumbnail' :  're:^https?://.*\.jpg$' ,   50              'url' :  'http://shows.howstuffworks.com/stuff-to-blow-your-mind/optical-illusions-video.htm' ,   51              'only_matching' :  True ,   55      def  _real_extract ( self
,  url
):   56          display_id 
=  self
._ match
_ id
( url
)   57          webpage 
=  self
._ download
_ webpage
( url
,  display_id
)   58          clip_js 
=  self
._ search
_ regex
(   59              r
'(?s)var clip = ({.*?});' ,  webpage
,  'clip info' )   60          clip_info 
=  self
._ parse
_ json
(   61              clip_js
,  display_id
,  transform_source
= js_to_json
)   63          video_id 
=  clip_info
[ 'content_id' ]   65          m3u8_url 
=  clip_info
. get ( 'm3u8' )   67              formats 
+=  self
._ extract
_ m
3u8_ formats
( m3u8_url
,  video_id
,  'mp4' )   68          for  video 
in  clip_info
. get ( 'mp4' , []):   71                  'format_id' :  video
[ 'bitrate' ],   72                  'vbr' :  int ( video
[ 'bitrate' ]. rstrip ( 'k' )),   76              smil 
=  self
._ download
_ xml
(   77                  'http://services.media.howstuffworks.com/videos/ %s /smil-service.smil'  %  video_id
,   78                  video_id
,  'Downloading video SMIL' )   80              http_base 
=  find_xpath_attr (   82                  './ {0} head/ {0} meta' . format ( '{http://www.w3.org/2001/SMIL20/Language}' ),   84                  'httpBase' ). get ( 'content' )   86              URL_SUFFIX 
=  '?v=2.11.3&fp=LNX 11,2,202,356&r=A&g=A'   88              for  video 
in  smil
. findall (   89                      './ {0} body/ {0} switch/ {0} video' . format ( '{http://www.w3.org/2001/SMIL20/Language}' )):   90                  vbr 
=  int_or_none ( video
. attrib
[ 'system-bitrate' ],  scale
= 1000 )   92                      'url' :  ' %s / %s%s '  % ( http_base
,  video
. attrib
[ 'src' ],  URL_SUFFIX
),   93                      'format_id' :  ' %d k'  %  vbr
,   97          self
._ sort
_ formats
( formats
)  100              'id' :  ' %s '  %  video_id
,  101              'display_id' :  display_id
,  102              'title' :  unescapeHTML ( clip_info
[ 'clip_title' ]),  103              'description' :  unescapeHTML ( clip_info
. get ( 'caption' )),  104              'thumbnail' :  clip_info
. get ( 'video_still_url' ),  105              'duration' :  clip_info
. get ( 'duration' ),