]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/pyvideo.py
   1 from __future__ 
import unicode_literals
 
   5 from .common 
import InfoExtractor
 
   6 from ..compat 
import compat_str
 
   7 from ..utils 
import int_or_none
 
  10 class PyvideoIE(InfoExtractor
): 
  11     _VALID_URL 
= r
'https?://(?:www\.)?pyvideo\.org/(?P<category>[^/]+)/(?P<id>[^/?#&.]+)' 
  14         'url': 'http://pyvideo.org/pycon-us-2013/become-a-logging-expert-in-30-minutes.html', 
  16             'id': 'become-a-logging-expert-in-30-minutes', 
  20         'url': 'http://pyvideo.org/pygotham-2012/gloriajw-spotifywitherikbernhardsson182m4v.html', 
  21         'md5': '5fe1c7e0a8aa5570330784c847ff6d12', 
  25             'title': 'Gloriajw-SpotifyWithErikBernhardsson182.m4v', 
  29     def _real_extract(self
, url
): 
  30         mobj 
= re
.match(self
._VALID
_URL
, url
) 
  31         category 
= mobj
.group('category') 
  32         video_id 
= mobj
.group('id') 
  36         data 
= self
._download
_json
( 
  37             'https://raw.githubusercontent.com/pyvideo/data/master/%s/videos/%s.json' 
  38             % (category
, video_id
), video_id
, fatal
=False) 
  41             for video 
in data
['videos']: 
  42                 video_url 
= video
.get('url') 
  44                     if video
.get('type') == 'youtube': 
  45                         entries
.append(self
.url_result(video_url
, 'Youtube')) 
  48                             'id': compat_str(data
.get('id') or video_id
), 
  50                             'title': data
['title'], 
  51                             'description': data
.get('description') or data
.get('summary'), 
  52                             'thumbnail': data
.get('thumbnail_url'), 
  53                             'duration': int_or_none(data
.get('duration')), 
  56             webpage 
= self
._download
_webpage
(url
, video_id
) 
  57             title 
= self
._og
_search
_title
(webpage
) 
  58             media_urls 
= self
._search
_regex
( 
  59                 r
'(?s)Media URL:(.+?)</li>', webpage
, 'media urls') 
  61                     r
'<a[^>]+href=(["\'])(?P
<url
>http
.+?
)\
1', media_urls): 
  62                 media_url = m.group('url
') 
  63                 if re.match(r'https?
://www\
.youtube\
.com
/watch
\?v
=.*', media_url): 
  64                     entries.append(self.url_result(media_url, 'Youtube
')) 
  72         return self.playlist_result(entries, video_id)