]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/airmozilla.py
2 from __future__
import unicode_literals
6 from . common
import InfoExtractor
14 class AirMozillaIE ( InfoExtractor
):
15 _VALID_URL
= r
'https?://air\.mozilla\.org/(?P<id>[0-9a-z-]+)/?'
17 'url' : 'https://air.mozilla.org/privacy-lab-a-meetup-for-privacy-minded-people-in-san-francisco/' ,
18 'md5' : '8d02f53ee39cf006009180e21df1f3ba' ,
22 'title' : 'Privacy Lab - a meetup for privacy minded people in San Francisco' ,
23 'thumbnail' : r
're:https?://.*/poster\.jpg' ,
24 'description' : 'Brings together privacy professionals and others interested in privacy at for-profits, non-profits, and NGOs in an effort to contribute to the state of the ecosystem...' ,
25 'timestamp' : 1422487800 ,
26 'upload_date' : '20150128' ,
27 'location' : 'SFO Commons' ,
30 'categories' : [ 'Main' , 'Privacy' ],
34 def _real_extract ( self
, url
):
35 display_id
= self
._ match
_ id
( url
)
36 webpage
= self
._ download
_ webpage
( url
, display_id
)
37 video_id
= self
._ html
_ search
_ regex
( r
'//vid\.ly/(.*?)/embed' , webpage
, 'id' )
39 embed_script
= self
._ download
_ webpage
( 'https://vid.ly/ {0} /embed' . format ( video_id
), video_id
)
40 jwconfig
= self
._ parse
_ json
( self
._ search
_ regex
(
41 r
'initCallback\((.*)\);' , embed_script
, 'metadata' ), video_id
)[ 'config' ]
43 info_dict
= self
._ parse
_ jwplayer
_ data
( jwconfig
, video_id
)
44 view_count
= int_or_none ( self
._ html
_ search
_ regex
(
45 r
'Views since archived: ([0-9]+)' ,
46 webpage
, 'view count' , fatal
= False ))
47 timestamp
= parse_iso8601 ( self
._ html
_ search
_ regex
(
48 r
'<time datetime="(.*?)"' , webpage
, 'timestamp' , fatal
= False ))
49 duration
= parse_duration ( self
._ search
_ regex
(
50 r
'Duration:\s*(\d+\s*hours?\s*\d+\s*minutes?)' ,
51 webpage
, 'duration' , fatal
= False ))
55 'title' : self
._ og
_ search
_ title
( webpage
),
56 'url' : self
._ og
_ search
_u rl
( webpage
),
57 'display_id' : display_id
,
58 'description' : self
._ og
_ search
_ description
( webpage
),
59 'timestamp' : timestamp
,
60 'location' : self
._ html
_ search
_ regex
( r
'Location: (.*)' , webpage
, 'location' , default
= None ),
62 'view_count' : view_count
,
63 'categories' : re
. findall ( r
'<a href=".*?" class="channel">(.*?)</a>' , webpage
),