]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/voicerepublic.py
   1 from __future__ 
import unicode_literals
 
   5 from .common 
import InfoExtractor
 
   6 from ..compat 
import compat_urlparse
 
  15 class VoiceRepublicIE(InfoExtractor
): 
  16     _VALID_URL 
= r
'https?://voicerepublic\.com/(?:talks|embed)/(?P<id>[0-9a-z-]+)' 
  18         'url': 'http://voicerepublic.com/talks/watching-the-watchers-building-a-sousveillance-state', 
  19         'md5': '0554a24d1657915aa8e8f84e15dc9353', 
  22             'display_id': 'watching-the-watchers-building-a-sousveillance-state', 
  24             'title': 'Watching the Watchers: Building a Sousveillance State', 
  25             'description': 'md5:715ba964958afa2398df615809cfecb1', 
  26             'thumbnail': 're:^https?://.*\.(?:png|jpg)$', 
  31         'url': 'http://voicerepublic.com/embed/watching-the-watchers-building-a-sousveillance-state', 
  32         'only_matching': True, 
  35     def _real_extract(self
, url
): 
  36         display_id 
= self
._match
_id
(url
) 
  38         req 
= sanitized_Request( 
  39             compat_urlparse
.urljoin(url
, '/talks/%s' % display_id
)) 
  40         # Older versions of Firefox get redirected to an "upgrade browser" page 
  41         req
.add_header('User-Agent', 'youtube-dl') 
  42         webpage 
= self
._download
_webpage
(req
, display_id
) 
  44         if '>Queued for processing, please stand by...<' in webpage
: 
  46                 'Audio is still queued for processing', expected
=True) 
  48         config 
= self
._search
_regex
( 
  49             r
'(?s)return ({.+?});\s*\n', webpage
, 
  51         data 
= self
._parse
_json
(config
, display_id
, fatal
=False) if config 
else None 
  54             description 
= data
.get('teaser') 
  55             talk_id 
= data
.get('talk_id') or display_id
 
  57             duration 
= int_or_none(talk
.get('duration')) 
  59                 'url': compat_urlparse
.urljoin(url
, talk_url
), 
  60                 'format_id': format_id
, 
  61                 'ext': determine_ext(talk_url
) or format_id
, 
  63             } for format_id
, talk_url 
in talk
['links'].items()] 
  65             title 
= self
._og
_search
_title
(webpage
) 
  66             description 
= self
._html
_search
_regex
( 
  67                 r
"(?s)<div class='talk-teaser'[^>]*>(.+?)</div>", 
  68                 webpage
, 'description', fatal
=False) 
  69             talk_id 
= self
._search
_regex
( 
  70                 [r
"id='jc-(\d+)'", r
"data-shareable-id='(\d+)'"], 
  71                 webpage
, 'talk id', default
=None) or display_id
 
  73             player 
= self
._search
_regex
( 
  74                 r
"class='vr-player jp-jplayer'([^>]+)>", webpage
, 'player') 
  76                 'url': compat_urlparse
.urljoin(url
, talk_url
), 
  77                 'format_id': format_id
, 
  78                 'ext': determine_ext(talk_url
) or format_id
, 
  80             } for format_id
, talk_url 
in re
.findall(r
"data-([^=]+)='([^']+)'", player
)] 
  81         self
._sort
_formats
(formats
) 
  83         thumbnail 
= self
._og
_search
_thumbnail
(webpage
) 
  84         view_count 
= int_or_none(self
._search
_regex
( 
  85             r
"class='play-count[^']*'>\s*(\d+) plays", 
  86             webpage
, 'play count', fatal
=False)) 
  90             'display_id': display_id
, 
  92             'description': description
, 
  93             'thumbnail': thumbnail
, 
  95             'view_count': view_count
,