]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/gdcvault.py
   1 from __future__ 
import unicode_literals
 
   5 from .common 
import InfoExtractor
 
   6 from ..compat 
import compat_urllib_parse
 
  14 class GDCVaultIE(InfoExtractor
): 
  15     _VALID_URL 
= r
'https?://(?:www\.)?gdcvault\.com/play/(?P<id>\d+)/(?P<name>(\w|-)+)?' 
  16     _NETRC_MACHINE 
= 'gdcvault' 
  19             'url': 'http://www.gdcvault.com/play/1019721/Doki-Doki-Universe-Sweet-Simple', 
  20             'md5': '7ce8388f544c88b7ac11c7ab1b593704', 
  23                 'display_id': 'Doki-Doki-Universe-Sweet-Simple', 
  25                 'title': 'Doki-Doki Universe: Sweet, Simple and Genuine (GDC Next 10)' 
  29             'url': 'http://www.gdcvault.com/play/1015683/Embracing-the-Dark-Art-of', 
  32                 'display_id': 'Embracing-the-Dark-Art-of', 
  34                 'title': 'Embracing the Dark Art of Mathematical Modeling in AI' 
  37                 'skip_download': True,  # Requires rtmpdump 
  41             'url': 'http://www.gdcvault.com/play/1015301/Thexder-Meets-Windows-95-or', 
  42             'md5': 'a5eb77996ef82118afbbe8e48731b98e', 
  45                 'display_id': 'Thexder-Meets-Windows-95-or', 
  47                 'title': 'Thexder Meets Windows 95, or Writing Great Games in the Windows 95 Environment', 
  49             'skip': 'Requires login', 
  52             'url': 'http://gdcvault.com/play/1020791/', 
  53             'only_matching': True, 
  57     def _parse_mp4(self
, xml_description
): 
  59         mp4_video 
= xml_description
.find('./metadata/mp4video') 
  63         mobj 
= re
.match(r
'(?P<root>https?://.*?/).*', mp4_video
.text
) 
  64         video_root 
= mobj
.group('root') 
  65         formats 
= xml_description
.findall('./metadata/MBRVideos/MBRVideo') 
  66         for format 
in formats
: 
  67             mobj 
= re
.match(r
'mp4\:(?P<path>.*)', format
.find('streamName').text
) 
  68             url 
= video_root 
+ mobj
.group('path') 
  69             vbr 
= format
.find('bitrate').text
 
  70             video_formats
.append({ 
  76     def _parse_flv(self
, xml_description
): 
  78         akamai_url 
= xml_description
.find('./metadata/akamaiHost').text
 
  79         audios 
= xml_description
.find('./metadata/audios') 
  80         if audios 
is not None: 
  83                     'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url
, 
  84                     'play_path': remove_end(audio
.get('url'), '.flv'), 
  87                     'format_id': audio
.get('code'), 
  89         slide_video_path 
= xml_description
.find('./metadata/slideVideo').text
 
  91             'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url
, 
  92             'play_path': remove_end(slide_video_path
, '.flv'), 
  94             'format_note': 'slide deck video', 
  97             'format_id': 'slides', 
  99         speaker_video_path 
= xml_description
.find('./metadata/speakerVideo').text
 
 101             'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url
, 
 102             'play_path': remove_end(speaker_video_path
, '.flv'), 
 104             'format_note': 'speaker video', 
 107             'format_id': 'speaker', 
 111     def _login(self
, webpage_url
, display_id
): 
 112         (username
, password
) = self
._get
_login
_info
() 
 113         if username 
is None or password 
is None: 
 114             self
.report_warning('It looks like ' + webpage_url 
+ ' requires a login. Try specifying a username and password and try again.') 
 117         mobj 
= re
.match(r
'(?P<root_url>https?://.*?/).*', webpage_url
) 
 118         login_url 
= mobj
.group('root_url') + 'api/login.php' 
 119         logout_url 
= mobj
.group('root_url') + 'logout' 
 123             'password': password
, 
 126         request 
= sanitized_Request(login_url
, compat_urllib_parse
.urlencode(login_form
)) 
 127         request
.add_header('Content-Type', 'application/x-www-form-urlencoded') 
 128         self
._download
_webpage
(request
, display_id
, 'Logging in') 
 129         start_page 
= self
._download
_webpage
(webpage_url
, display_id
, 'Getting authenticated video page') 
 130         self
._download
_webpage
(logout_url
, display_id
, 'Logging out') 
 134     def _real_extract(self
, url
): 
 135         mobj 
= re
.match(self
._VALID
_URL
, url
) 
 137         video_id 
= mobj
.group('id') 
 138         display_id 
= mobj
.group('name') or video_id
 
 140         webpage_url 
= 'http://www.gdcvault.com/play/' + video_id
 
 141         start_page 
= self
._download
_webpage
(webpage_url
, display_id
) 
 143         direct_url 
= self
._search
_regex
( 
 144             r
's1\.addVariable\("file",\s*encodeURIComponent\("(/[^"]+)"\)\);', 
 145             start_page
, 'url', default
=None) 
 147             title 
= self
._html
_search
_regex
( 
 148                 r
'<td><strong>Session Name</strong></td>\s*<td>(.*?)</td>', 
 150             video_url 
= 'http://www.gdcvault.com' + direct_url
 
 151             # resolve the url so that we can detect the correct extension 
 152             head 
= self
._request
_webpage
(HEADRequest(video_url
), video_id
) 
 153             video_url 
= head
.geturl() 
 157                 'display_id': display_id
, 
 162         xml_root 
= self
._html
_search
_regex
( 
 163             r
'<iframe src="(?P<xml_root>.*?)player.html.*?".*?</iframe>', 
 164             start_page
, 'xml root', default
=None) 
 166             # Probably need to authenticate 
 167             login_res 
= self
._login
(webpage_url
, display_id
) 
 168             if login_res 
is None: 
 169                 self
.report_warning('Could not login.') 
 171                 start_page 
= login_res
 
 172                 # Grab the url from the authenticated page 
 173                 xml_root 
= self
._html
_search
_regex
( 
 174                     r
'<iframe src="(.*?)player.html.*?".*?</iframe>', 
 175                     start_page
, 'xml root') 
 177         xml_name 
= self
._html
_search
_regex
( 
 178             r
'<iframe src=".*?\?xml=(.+?\.xml).*?".*?</iframe>', 
 179             start_page
, 'xml filename', default
=None) 
 181             # Fallback to the older format 
 182             xml_name 
= self
._html
_search
_regex
(r
'<iframe src=".*?\?xmlURL=xml/(?P<xml_file>.+?\.xml).*?".*?</iframe>', start_page
, 'xml filename') 
 184         xml_description_url 
= xml_root 
+ 'xml/' + xml_name
 
 185         xml_description 
= self
._download
_xml
(xml_description_url
, display_id
) 
 187         video_title 
= xml_description
.find('./metadata/title').text
 
 188         video_formats 
= self
._parse
_mp
4(xml_description
) 
 189         if video_formats 
is None: 
 190             video_formats 
= self
._parse
_flv
(xml_description
) 
 194             'display_id': display_id
, 
 195             'title': video_title
, 
 196             'formats': video_formats
,