]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/airmozilla.py
2 from __future__
import unicode_literals
6 from . common
import InfoExtractor
14 class AirMozillaIE ( InfoExtractor
):
15 _VALID_URL
= r
'https?://air\.mozilla\.org/(?P<id>[0-9a-z-]+)/?'
17 'url' : 'https://air.mozilla.org/privacy-lab-a-meetup-for-privacy-minded-people-in-san-francisco/' ,
18 'md5' : '2e3e7486ba5d180e829d453875b9b8bf' ,
22 'title' : 'Privacy Lab - a meetup for privacy minded people in San Francisco' ,
23 'thumbnail' : 're:https?://vid\.ly/(?P<id>[0-9a-z-]+)/poster' ,
24 'description' : 'Brings together privacy professionals and others interested in privacy at for-profits, non-profits, and NGOs in an effort to contribute to the state of the ecosystem...' ,
25 'timestamp' : 1422487800 ,
26 'upload_date' : '20150128' ,
27 'location' : 'SFO Commons' ,
30 'categories' : [ 'Main' , 'Privacy' ],
34 def _real_extract ( self
, url
):
35 display_id
= self
._ match
_ id
( url
)
36 webpage
= self
._ download
_ webpage
( url
, display_id
)
37 video_id
= self
._ html
_ search
_ regex
( r
'//vid.ly/(.*?)/embed' , webpage
, 'id' )
39 embed_script
= self
._ download
_ webpage
( 'https://vid.ly/ {0} /embed' . format ( video_id
), video_id
)
40 jwconfig
= self
._ search
_ regex
( r
'\svar jwconfig = (\{.*?\});\s' , embed_script
, 'metadata' )
41 metadata
= self
._ parse
_ json
( jwconfig
, video_id
)
44 'url' : source
[ 'file' ],
45 'ext' : source
[ 'type' ],
46 'format_id' : self
._ search
_ regex
( r
'&format=(.*)$' , source
[ 'file' ], 'video format' ),
47 'format' : source
[ 'label' ],
48 'height' : int ( source
[ 'label' ]. rstrip ( 'p' )),
49 } for source
in metadata
[ 'playlist' ][ 0 ][ 'sources' ]]
50 self
._ sort
_ formats
( formats
)
52 view_count
= int_or_none ( self
._ html
_ search
_ regex
(
53 r
'Views since archived: ([0-9]+)' ,
54 webpage
, 'view count' , fatal
= False ))
55 timestamp
= parse_iso8601 ( self
._ html
_ search
_ regex
(
56 r
'<time datetime="(.*?)"' , webpage
, 'timestamp' , fatal
= False ))
57 duration
= parse_duration ( self
._ search
_ regex
(
58 r
'Duration:\s*(\d+\s*hours?\s*\d+\s*minutes?)' ,
59 webpage
, 'duration' , fatal
= False ))
63 'title' : self
._ og
_ search
_ title
( webpage
),
65 'url' : self
._ og
_ search
_u rl
( webpage
),
66 'display_id' : display_id
,
67 'thumbnail' : metadata
[ 'playlist' ][ 0 ]. get ( 'image' ),
68 'description' : self
._ og
_ search
_ description
( webpage
),
69 'timestamp' : timestamp
,
70 'location' : self
._ html
_ search
_ regex
( r
'Location: (.*)' , webpage
, 'location' , default
= None ),
72 'view_count' : view_count
,
73 'categories' : re
. findall ( r
'<a href=".*?" class="channel">(.*?)</a>' , webpage
),