]>
 
 
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/airmozilla.py 
 
 
 
 
 
 
 
 
   2  from  __future__ 
import  unicode_literals
 
   6  from  . common 
import  InfoExtractor
 
  14  class  AirMozillaIE ( InfoExtractor
):  
  15      _VALID_URL 
=  r
'https?://air\.mozilla\.org/(?P<id>[0-9a-z-]+)/?'  
  17          'url' :  'https://air.mozilla.org/privacy-lab-a-meetup-for-privacy-minded-people-in-san-francisco/' ,  
  18          'md5' :  '2e3e7486ba5d180e829d453875b9b8bf' ,  
  22              'title' :  'Privacy Lab - a meetup for privacy minded people in San Francisco' ,  
  23              'thumbnail' :  r
're:https?://vid\.ly/(?P<id>[0-9a-z-]+)/poster' ,  
  24              'description' :  'Brings together privacy professionals and others interested in privacy at for-profits, non-profits, and NGOs in an effort to contribute to the state of the ecosystem...' ,  
  25              'timestamp' :  1422487800 ,  
  26              'upload_date' :  '20150128' ,  
  27              'location' :  'SFO Commons' ,  
  30              'categories' : [ 'Main' ,  'Privacy' ],  
  34      def  _real_extract ( self
,  url
):  
  35          display_id 
=  self
._ match
_ id
( url
)  
  36          webpage 
=  self
._ download
_ webpage
( url
,  display_id
)  
  37          video_id 
=  self
._ html
_ search
_ regex
( r
'//vid.ly/(.*?)/embed' ,  webpage
,  'id' )  
  39          embed_script 
=  self
._ download
_ webpage
( 'https://vid.ly/ {0} /embed' . format ( video_id
),  video_id
)  
  40          jwconfig 
=  self
._ search
_ regex
( r
'\svar jwconfig = (\{.*?\});\s' ,  embed_script
,  'metadata' )  
  41          metadata 
=  self
._ parse
_ json
( jwconfig
,  video_id
)  
  44              'url' :  source
[ 'file' ],  
  45              'ext' :  source
[ 'type' ],  
  46              'format_id' :  self
._ search
_ regex
( r
'&format=(.*)$' ,  source
[ 'file' ],  'video format' ),  
  47              'format' :  source
[ 'label' ],  
  48              'height' :  int ( source
[ 'label' ]. rstrip ( 'p' )),  
  49          }  for  source 
in  metadata
[ 'playlist' ][ 0 ][ 'sources' ]]  
  50          self
._ sort
_ formats
( formats
)  
  52          view_count 
=  int_or_none ( self
._ html
_ search
_ regex
(  
  53              r
'Views since archived: ([0-9]+)' ,  
  54              webpage
,  'view count' ,  fatal
= False ))  
  55          timestamp 
=  parse_iso8601 ( self
._ html
_ search
_ regex
(  
  56              r
'<time datetime="(.*?)"' ,  webpage
,  'timestamp' ,  fatal
= False ))  
  57          duration 
=  parse_duration ( self
._ search
_ regex
(  
  58              r
'Duration:\s*(\d+\s*hours?\s*\d+\s*minutes?)' ,  
  59              webpage
,  'duration' ,  fatal
= False ))  
  63              'title' :  self
._ og
_ search
_ title
( webpage
),  
  65              'url' :  self
._ og
_ search
_u rl
( webpage
),  
  66              'display_id' :  display_id
,  
  67              'thumbnail' :  metadata
[ 'playlist' ][ 0 ]. get ( 'image' ),  
  68              'description' :  self
._ og
_ search
_ description
( webpage
),  
  69              'timestamp' :  timestamp
,  
  70              'location' :  self
._ html
_ search
_ regex
( r
'Location: (.*)' ,  webpage
,  'location' ,  default
= None ),  
  72              'view_count' :  view_count
,  
  73              'categories' :  re
. findall ( r
'<a href=".*?" class="channel">(.*?)</a>' ,  webpage
),