]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/gdcvault.py
fed968f5179ebf6159212da5ab75b024b3bc0a03
   1 from __future__ 
import unicode_literals
 
   5 from .common 
import InfoExtractor
 
  12 class GDCVaultIE(InfoExtractor
): 
  13     _VALID_URL 
= r
'https?://(?:www\.)?gdcvault\.com/play/(?P<id>\d+)/(?P<name>(\w|-)+)' 
  16             'url': 'http://www.gdcvault.com/play/1019721/Doki-Doki-Universe-Sweet-Simple', 
  17             'md5': '7ce8388f544c88b7ac11c7ab1b593704', 
  21                 'title': 'Doki-Doki Universe: Sweet, Simple and Genuine (GDC Next 10)' 
  25             'url': 'http://www.gdcvault.com/play/1015683/Embracing-the-Dark-Art-of', 
  29                 'title': 'Embracing the Dark Art of Mathematical Modeling in AI' 
  32                 'skip_download': True,  # Requires rtmpdump 
  36             'url': 'http://www.gdcvault.com/play/1015301/Thexder-Meets-Windows-95-or', 
  37             'md5': 'a5eb77996ef82118afbbe8e48731b98e', 
  41                 'title': 'Thexder Meets Windows 95, or Writing Great Games in the Windows 95 Environment', 
  43             'skip': 'Requires login', 
  47     def _parse_mp4(self
, xml_description
): 
  49         mp4_video 
= xml_description
.find('./metadata/mp4video') 
  53         mobj 
= re
.match(r
'(?P<root>https?://.*?/).*', mp4_video
.text
) 
  54         video_root 
= mobj
.group('root') 
  55         formats 
= xml_description
.findall('./metadata/MBRVideos/MBRVideo') 
  56         for format 
in formats
: 
  57             mobj 
= re
.match(r
'mp4\:(?P<path>.*)', format
.find('streamName').text
) 
  58             url 
= video_root 
+ mobj
.group('path') 
  59             vbr 
= format
.find('bitrate').text
 
  60             video_formats
.append({ 
  66     def _parse_flv(self
, xml_description
): 
  68         akami_url 
= xml_description
.find('./metadata/akamaiHost').text
 
  69         slide_video_path 
= xml_description
.find('./metadata/slideVideo').text
 
  70         video_formats
.append({ 
  71             'url': 'rtmp://' + akami_url 
+ '/' + slide_video_path
, 
  72             'format_note': 'slide deck video', 
  75             'format_id': 'slides', 
  77         speaker_video_path 
= xml_description
.find('./metadata/speakerVideo').text
 
  78         video_formats
.append({ 
  79             'url': 'rtmp://' + akami_url 
+ '/' + speaker_video_path
, 
  80             'format_note': 'speaker video', 
  83             'format_id': 'speaker', 
  87     def _login(self
, webpage_url
, video_id
): 
  88         (username
, password
) = self
._get
_login
_info
() 
  89         if username 
is None or password 
is None: 
  90             self
.report_warning('It looks like ' + webpage_url 
+ ' requires a login. Try specifying a username and password and try again.') 
  93         mobj 
= re
.match(r
'(?P<root_url>https?://.*?/).*', webpage_url
) 
  94         login_url 
= mobj
.group('root_url') + 'api/login.php' 
  95         logout_url 
= mobj
.group('root_url') + 'logout' 
 102         request 
= compat_urllib_request
.Request(login_url
, compat_urllib_parse
.urlencode(login_form
)) 
 103         request
.add_header('Content-Type', 'application/x-www-form-urlencoded') 
 104         self
._download
_webpage
(request
, video_id
, 'Logging in') 
 105         start_page 
= self
._download
_webpage
(webpage_url
, video_id
, 'Getting authenticated video page') 
 106         self
._download
_webpage
(logout_url
, video_id
, 'Logging out') 
 110     def _real_extract(self
, url
): 
 111         mobj 
= re
.match(self
._VALID
_URL
, url
) 
 113         video_id 
= mobj
.group('id') 
 114         webpage_url 
= 'http://www.gdcvault.com/play/' + video_id
 
 115         start_page 
= self
._download
_webpage
(webpage_url
, video_id
) 
 117         direct_url 
= self
._search
_regex
( 
 118             r
's1\.addVariable\("file",\s*encodeURIComponent\("(/[^"]+)"\)\);', 
 119             start_page
, 'url', default
=None) 
 121             video_url 
= 'http://www.gdcvault.com/' + direct_url
 
 122             title 
= self
._html
_search
_regex
( 
 123                 r
'<td><strong>Session Name</strong></td>\s*<td>(.*?)</td>', 
 133         xml_root 
= self
._html
_search
_regex
( 
 134             r
'<iframe src="(?P<xml_root>.*?)player.html.*?".*?</iframe>', 
 135             start_page
, 'xml root', default
=None) 
 137             # Probably need to authenticate 
 138             login_res 
= self
._login
(webpage_url
, video_id
) 
 139             if login_res 
is None: 
 140                 self
.report_warning('Could not login.') 
 142                 start_page 
= login_res
 
 143                 # Grab the url from the authenticated page 
 144                 xml_root 
= self
._html
_search
_regex
( 
 145                     r
'<iframe src="(.*?)player.html.*?".*?</iframe>', 
 146                     start_page
, 'xml root') 
 148         xml_name 
= self
._html
_search
_regex
( 
 149             r
'<iframe src=".*?\?xml=(.+?\.xml).*?".*?</iframe>', 
 150             start_page
, 'xml filename', default
=None) 
 152             # Fallback to the older format 
 153             xml_name 
= self
._html
_search
_regex
(r
'<iframe src=".*?\?xmlURL=xml/(?P<xml_file>.+?\.xml).*?".*?</iframe>', start_page
, 'xml filename') 
 155         xml_decription_url 
= xml_root 
+ 'xml/' + xml_name
 
 156         xml_description 
= self
._download
_xml
(xml_decription_url
, video_id
) 
 158         video_title 
= xml_description
.find('./metadata/title').text
 
 159         video_formats 
= self
._parse
_mp
4(xml_description
) 
 160         if video_formats 
is None: 
 161             video_formats 
= self
._parse
_flv
(xml_description
) 
 165             'title': video_title
, 
 166             'formats': video_formats
,