]>
 
 
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/airmozilla.py 
 
 
 
 
 
 
 
 
   2  from  __future__ 
import  unicode_literals
 
   6  from  . common 
import  InfoExtractor
 
  14  class  AirMozillaIE ( InfoExtractor
):  
  15      _VALID_URL 
=  r
'https?://air\.mozilla\.org/(?P<id>[0-9a-z-]+)/?'  
  17          'url' :  'https://air.mozilla.org/privacy-lab-a-meetup-for-privacy-minded-people-in-san-francisco/' ,  
  18          'md5' :  '8d02f53ee39cf006009180e21df1f3ba' ,  
  22              'title' :  'Privacy Lab - a meetup for privacy minded people in San Francisco' ,  
  23              'thumbnail' :  r
're:https?://.*/poster\.jpg' ,  
  24              'description' :  'Brings together privacy professionals and others interested in privacy at for-profits, non-profits, and NGOs in an effort to contribute to the state of the ecosystem...' ,  
  25              'timestamp' :  1422487800 ,  
  26              'upload_date' :  '20150128' ,  
  27              'location' :  'SFO Commons' ,  
  30              'categories' : [ 'Main' ,  'Privacy' ],  
  34      def  _real_extract ( self
,  url
):  
  35          display_id 
=  self
._ match
_ id
( url
)  
  36          webpage 
=  self
._ download
_ webpage
( url
,  display_id
)  
  37          video_id 
=  self
._ html
_ search
_ regex
( r
'//vid\.ly/(.*?)/embed' ,  webpage
,  'id' )  
  39          embed_script 
=  self
._ download
_ webpage
( 'https://vid.ly/ {0} /embed' . format ( video_id
),  video_id
)  
  40          jwconfig 
=  self
._ parse
_ json
( self
._ search
_ regex
(  
  41              r
'initCallback\((.*)\);' ,  embed_script
,  'metadata' ),  video_id
)[ 'config' ]  
  43          info_dict 
=  self
._ parse
_ jwplayer
_ data
( jwconfig
,  video_id
)  
  44          view_count 
=  int_or_none ( self
._ html
_ search
_ regex
(  
  45              r
'Views since archived: ([0-9]+)' ,  
  46              webpage
,  'view count' ,  fatal
= False ))  
  47          timestamp 
=  parse_iso8601 ( self
._ html
_ search
_ regex
(  
  48              r
'<time datetime="(.*?)"' ,  webpage
,  'timestamp' ,  fatal
= False ))  
  49          duration 
=  parse_duration ( self
._ search
_ regex
(  
  50              r
'Duration:\s*(\d+\s*hours?\s*\d+\s*minutes?)' ,  
  51              webpage
,  'duration' ,  fatal
= False ))  
  55              'title' :  self
._ og
_ search
_ title
( webpage
),  
  56              'url' :  self
._ og
_ search
_u rl
( webpage
),  
  57              'display_id' :  display_id
,  
  58              'description' :  self
._ og
_ search
_ description
( webpage
),  
  59              'timestamp' :  timestamp
,  
  60              'location' :  self
._ html
_ search
_ regex
( r
'Location: (.*)' ,  webpage
,  'location' ,  default
= None ),  
  62              'view_count' :  view_count
,  
  63              'categories' :  re
. findall ( r
'<a href=".*?" class="channel">(.*?)</a>' ,  webpage
),