]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/gdcvault.py
   1 from __future__ 
import unicode_literals
 
   5 from .common 
import InfoExtractor
 
  11 class GDCVaultIE(InfoExtractor
): 
  12     _VALID_URL 
= r
'https?://(?:www\.)?gdcvault\.com/play/(?P<id>\d+)/(?P<name>(\w|-)+)' 
  15             'url': 'http://www.gdcvault.com/play/1019721/Doki-Doki-Universe-Sweet-Simple', 
  16             'md5': '7ce8388f544c88b7ac11c7ab1b593704', 
  20                 'title': 'Doki-Doki Universe: Sweet, Simple and Genuine (GDC Next 10)' 
  24             'url': 'http://www.gdcvault.com/play/1015683/Embracing-the-Dark-Art-of', 
  28                 'title': 'Embracing the Dark Art of Mathematical Modeling in AI' 
  31                 'skip_download': True,  # Requires rtmpdump 
  36     def _parse_mp4(self
, xml_description
): 
  38         mp4_video 
= xml_description
.find('./metadata/mp4video') 
  42         mobj 
= re
.match(r
'(?P<root>https?://.*?/).*', mp4_video
.text
) 
  43         video_root 
= mobj
.group('root') 
  44         formats 
= xml_description
.findall('./metadata/MBRVideos/MBRVideo') 
  45         for format 
in formats
: 
  46             mobj 
= re
.match(r
'mp4\:(?P<path>.*)', format
.find('streamName').text
) 
  47             url 
= video_root 
+ mobj
.group('path') 
  48             vbr 
= format
.find('bitrate').text
 
  49             video_formats
.append({ 
  55     def _parse_flv(self
, xml_description
): 
  57         akami_url 
= xml_description
.find('./metadata/akamaiHost').text
 
  58         slide_video_path 
= xml_description
.find('./metadata/slideVideo').text
 
  59         video_formats
.append({ 
  60             'url': 'rtmp://' + akami_url 
+ '/' + slide_video_path
, 
  61             'format_note': 'slide deck video', 
  64             'format_id': 'slides', 
  66         speaker_video_path 
= xml_description
.find('./metadata/speakerVideo').text
 
  67         video_formats
.append({ 
  68             'url': 'rtmp://' + akami_url 
+ '/' + speaker_video_path
, 
  69             'format_note': 'speaker video', 
  72             'format_id': 'speaker', 
  76     def _login(self
, webpage_url
, video_id
): 
  77         (username
, password
) = self
._get
_login
_info
() 
  78         if username 
is None or password 
is None: 
  79             self
.report_warning('It looks like ' + webpage_url 
+ ' requires a login. Try specifying a username and password and try again.') 
  82         mobj 
= re
.match(r
'(?P<root_url>https?://.*?/).*', webpage_url
) 
  83         login_url 
= mobj
.group('root_url') + 'api/login.php' 
  84         logout_url 
= mobj
.group('root_url') + 'logout' 
  91         request 
= compat_urllib_request
.Request(login_url
, compat_urllib_parse
.urlencode(login_form
)) 
  92         request
.add_header('Content-Type', 'application/x-www-form-urlencoded') 
  93         self
._download
_webpage
(request
, video_id
, 'Logging in') 
  94         start_page 
= self
._download
_webpage
(webpage_url
, video_id
, 'Getting authenticated video page') 
  95         self
._download
_webpage
(logout_url
, video_id
, 'Logging out') 
  99     def _real_extract(self
, url
): 
 100         mobj 
= re
.match(self
._VALID
_URL
, url
) 
 102         video_id 
= mobj
.group('id') 
 103         webpage_url 
= 'http://www.gdcvault.com/play/' + video_id
 
 104         start_page 
= self
._download
_webpage
(webpage_url
, video_id
) 
 106         xml_root 
= self
._html
_search
_regex
(r
'<iframe src="(?P<xml_root>.*?)player.html.*?".*?</iframe>', start_page
, 'xml root', None, False) 
 109             # Probably need to authenticate 
 110             start_page 
= self
._login
(webpage_url
, video_id
) 
 111             if start_page 
is None: 
 112                 self
.report_warning('Could not login.') 
 114                 # Grab the url from the authenticated page 
 115                 xml_root 
= self
._html
_search
_regex
(r
'<iframe src="(?P<xml_root>.*?)player.html.*?".*?</iframe>', start_page
, 'xml root') 
 117         xml_name 
= self
._html
_search
_regex
(r
'<iframe src=".*?\?xml=(?P<xml_file>.+?\.xml).*?".*?</iframe>', start_page
, 'xml filename', None, False) 
 119             # Fallback to the older format 
 120             xml_name 
= self
._html
_search
_regex
(r
'<iframe src=".*?\?xmlURL=xml/(?P<xml_file>.+?\.xml).*?".*?</iframe>', start_page
, 'xml filename') 
 122         xml_decription_url 
= xml_root 
+ 'xml/' + xml_name
 
 123         xml_description 
= self
._download
_xml
(xml_decription_url
, video_id
) 
 125         video_title 
= xml_description
.find('./metadata/title').text
 
 126         video_formats 
= self
._parse
_mp
4(xml_description
) 
 127         if video_formats 
is None: 
 128             video_formats 
= self
._parse
_flv
(xml_description
) 
 132             'title': video_title
, 
 133             'formats': video_formats
,