]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/airmozilla.py 
0e069187994d0b9d25463d2d2f3cdb6c74ce5406
   2  from  __future__ 
import  unicode_literals
   6  from  . common 
import  InfoExtractor
  14  class  AirMozillaIE ( InfoExtractor
):   15      _VALID_URL 
=  r
'https?://air\.mozilla\.org/(?P<id>[0-9a-z-]+)/?'   17          'url' :  'https://air.mozilla.org/privacy-lab-a-meetup-for-privacy-minded-people-in-san-francisco/' ,   18          'md5' :  '2e3e7486ba5d180e829d453875b9b8bf' ,   22              'title' :  'Privacy Lab - a meetup for privacy minded people in San Francisco' ,   23              'thumbnail' :  r
're:https?://vid\.ly/(?P<id>[0-9a-z-]+)/poster' ,   24              'description' :  'Brings together privacy professionals and others interested in privacy at for-profits, non-profits, and NGOs in an effort to contribute to the state of the ecosystem...' ,   25              'timestamp' :  1422487800 ,   26              'upload_date' :  '20150128' ,   27              'location' :  'SFO Commons' ,   30              'categories' : [ 'Main' ,  'Privacy' ],   34      def  _real_extract ( self
,  url
):   35          display_id 
=  self
._ match
_ id
( url
)   36          webpage 
=  self
._ download
_ webpage
( url
,  display_id
)   37          video_id 
=  self
._ html
_ search
_ regex
( r
'//vid.ly/(.*?)/embed' ,  webpage
,  'id' )   39          embed_script 
=  self
._ download
_ webpage
( 'https://vid.ly/ {0} /embed' . format ( video_id
),  video_id
)   40          jwconfig 
=  self
._ search
_ regex
( r
'\svar jwconfig = (\{.*?\});\s' ,  embed_script
,  'metadata' )   41          metadata 
=  self
._ parse
_ json
( jwconfig
,  video_id
)   44              'url' :  source
[ 'file' ],   45              'ext' :  source
[ 'type' ],   46              'format_id' :  self
._ search
_ regex
( r
'&format=(.*)$' ,  source
[ 'file' ],  'video format' ),   47              'format' :  source
[ 'label' ],   48              'height' :  int ( source
[ 'label' ]. rstrip ( 'p' )),   49          }  for  source 
in  metadata
[ 'playlist' ][ 0 ][ 'sources' ]]   50          self
._ sort
_ formats
( formats
)   52          view_count 
=  int_or_none ( self
._ html
_ search
_ regex
(   53              r
'Views since archived: ([0-9]+)' ,   54              webpage
,  'view count' ,  fatal
= False ))   55          timestamp 
=  parse_iso8601 ( self
._ html
_ search
_ regex
(   56              r
'<time datetime="(.*?)"' ,  webpage
,  'timestamp' ,  fatal
= False ))   57          duration 
=  parse_duration ( self
._ search
_ regex
(   58              r
'Duration:\s*(\d+\s*hours?\s*\d+\s*minutes?)' ,   59              webpage
,  'duration' ,  fatal
= False ))   63              'title' :  self
._ og
_ search
_ title
( webpage
),   65              'url' :  self
._ og
_ search
_u rl
( webpage
),   66              'display_id' :  display_id
,   67              'thumbnail' :  metadata
[ 'playlist' ][ 0 ]. get ( 'image' ),   68              'description' :  self
._ og
_ search
_ description
( webpage
),   69              'timestamp' :  timestamp
,   70              'location' :  self
._ html
_ search
_ regex
( r
'Location: (.*)' ,  webpage
,  'location' ,  default
= None ),   72              'view_count' :  view_count
,   73              'categories' :  re
. findall ( r
'<a href=".*?" class="channel">(.*?)</a>' ,  webpage
),