]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/voicerepublic.py
   1 from __future__ 
import unicode_literals
 
   5 from .common 
import InfoExtractor
 
  17 class VoiceRepublicIE(InfoExtractor
): 
  18     _VALID_URL 
= r
'https?://voicerepublic\.com/(?:talks|embed)/(?P<id>[0-9a-z-]+)' 
  20         'url': 'http://voicerepublic.com/talks/watching-the-watchers-building-a-sousveillance-state', 
  21         'md5': '0554a24d1657915aa8e8f84e15dc9353', 
  24             'display_id': 'watching-the-watchers-building-a-sousveillance-state', 
  26             'title': 'Watching the Watchers: Building a Sousveillance State', 
  27             'description': 'md5:715ba964958afa2398df615809cfecb1', 
  28             'thumbnail': 're:^https?://.*\.(?:png|jpg)$', 
  33         'url': 'http://voicerepublic.com/embed/watching-the-watchers-building-a-sousveillance-state', 
  34         'only_matching': True, 
  37     def _real_extract(self
, url
): 
  38         display_id 
= self
._match
_id
(url
) 
  40         req 
= compat_urllib_request
.Request( 
  41             compat_urlparse
.urljoin(url
, '/talks/%s' % display_id
)) 
  42         # Older versions of Firefox get redirected to an "upgrade browser" page 
  43         req
.add_header('User-Agent', 'youtube-dl') 
  44         webpage 
= self
._download
_webpage
(req
, display_id
) 
  46         if '>Queued for processing, please stand by...<' in webpage
: 
  48                 'Audio is still queued for processing', expected
=True) 
  50         config 
= self
._search
_regex
( 
  51             r
'(?s)return ({.+?});\s*\n', webpage
, 
  53         data 
= self
._parse
_json
(config
, display_id
, fatal
=False) if config 
else None 
  56             description 
= data
.get('teaser') 
  57             talk_id 
= data
.get('talk_id') or display_id
 
  59             duration 
= int_or_none(talk
.get('duration')) 
  61                 'url': compat_urlparse
.urljoin(url
, talk_url
), 
  62                 'format_id': format_id
, 
  63                 'ext': determine_ext(talk_url
) or format_id
, 
  65             } for format_id
, talk_url 
in talk
['links'].items()] 
  67             title 
= self
._og
_search
_title
(webpage
) 
  68             description 
= self
._html
_search
_regex
( 
  69                 r
"(?s)<div class='talk-teaser'[^>]*>(.+?)</div>", 
  70                 webpage
, 'description', fatal
=False) 
  71             talk_id 
= self
._search
_regex
( 
  72                 [r
"id='jc-(\d+)'", r
"data-shareable-id='(\d+)'"], 
  73                 webpage
, 'talk id', default
=None) or display_id
 
  75             player 
= self
._search
_regex
( 
  76                 r
"class='vr-player jp-jplayer'([^>]+)>", webpage
, 'player') 
  78                 'url': compat_urlparse
.urljoin(url
, talk_url
), 
  79                 'format_id': format_id
, 
  80                 'ext': determine_ext(talk_url
) or format_id
, 
  82             } for format_id
, talk_url 
in re
.findall(r
"data-([^=]+)='([^']+)'", player
)] 
  83         self
._sort
_formats
(formats
) 
  85         thumbnail 
= self
._og
_search
_thumbnail
(webpage
) 
  86         view_count 
= int_or_none(self
._search
_regex
( 
  87             r
"class='play-count[^']*'>\s*(\d+) plays", 
  88             webpage
, 'play count', fatal
=False)) 
  92             'display_id': display_id
, 
  94             'description': description
, 
  95             'thumbnail': thumbnail
, 
  97             'view_count': view_count
,