]> Raphaƫl G. Git Repositories - youtubedl/blobdiff - youtube_dl/extractor/adobepass.py
Fix extraction from youtube.
[youtubedl] / youtube_dl / extractor / adobepass.py
index 7da96c65c694fbd57fa47b8de30f77851e5b5f13..1cf2dcbf35567bc6a47664e1bbf05234eecaf2fb 100644 (file)
@@ -6,12 +6,16 @@ import time
 import xml.etree.ElementTree as etree
 
 from .common import InfoExtractor
-from ..compat import compat_urlparse
+from ..compat import (
+    compat_kwargs,
+    compat_urlparse,
+)
 from ..utils import (
     unescapeHTML,
     urlencode_postdata,
     unified_timestamp,
     ExtractorError,
+    NO_DEFAULT,
 )
 
 
@@ -21,6 +25,11 @@ MSO_INFO = {
         'username_field': 'username',
         'password_field': 'password',
     },
+    'ATTOTT': {
+        'name': 'DIRECTV NOW',
+        'username_field': 'email',
+        'password_field': 'loginpassword',
+    },
     'Rogers': {
         'name': 'Rogers',
         'username_field': 'UserName',
@@ -36,6 +45,11 @@ MSO_INFO = {
         'username_field': 'Ecom_User_ID',
         'password_field': 'Ecom_Password',
     },
+    'Brighthouse': {
+        'name': 'Bright House Networks | Spectrum',
+        'username_field': 'j_username',
+        'password_field': 'j_password',
+    },
     'Charter_Direct': {
         'name': 'Charter Spectrum',
         'username_field': 'IDToken1',
@@ -1308,11 +1322,14 @@ class AdobePassIE(InfoExtractor):
     _USER_AGENT = 'Mozilla/5.0 (X11; Linux i686; rv:47.0) Gecko/20100101 Firefox/47.0'
     _MVPD_CACHE = 'ap-mvpd'
 
+    _DOWNLOADING_LOGIN_PAGE = 'Downloading Provider Login Page'
+
     def _download_webpage_handle(self, *args, **kwargs):
-        headers = kwargs.get('headers', {})
-        headers.update(self.geo_verification_headers())
+        headers = self.geo_verification_headers()
+        headers.update(kwargs.get('headers', {}))
         kwargs['headers'] = headers
-        return super(AdobePassIE, self)._download_webpage_handle(*args, **kwargs)
+        return super(AdobePassIE, self)._download_webpage_handle(
+            *args, **compat_kwargs(kwargs))
 
     @staticmethod
     def _get_mvpd_resource(provider_id, title, guid, rating):
@@ -1356,6 +1373,21 @@ class AdobePassIE(InfoExtractor):
                 'Use --ap-mso to specify Adobe Pass Multiple-system operator Identifier '
                 'and --ap-username and --ap-password or --netrc to provide account credentials.', expected=True)
 
+        def extract_redirect_url(html, url=None, fatal=False):
+            # TODO: eliminate code duplication with generic extractor and move
+            # redirection code into _download_webpage_handle
+            REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
+            redirect_url = self._search_regex(
+                r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
+                r'(?:[a-z-]+="[^"]+"\s+)*?content="%s' % REDIRECT_REGEX,
+                html, 'meta refresh redirect',
+                default=NO_DEFAULT if fatal else None, fatal=fatal)
+            if not redirect_url:
+                return None
+            if url:
+                redirect_url = compat_urlparse.urljoin(url, unescapeHTML(redirect_url))
+            return redirect_url
+
         mvpd_headers = {
             'ap_42': 'anonymous',
             'ap_11': 'Linux i686',
@@ -1405,16 +1437,15 @@ class AdobePassIE(InfoExtractor):
                         if '<form name="signin"' in provider_redirect_page:
                             provider_login_page_res = provider_redirect_page_res
                         elif 'http-equiv="refresh"' in provider_redirect_page:
-                            oauth_redirect_url = self._html_search_regex(
-                                r'content="0;\s*url=([^\'"]+)',
-                                provider_redirect_page, 'meta refresh redirect')
+                            oauth_redirect_url = extract_redirect_url(
+                                provider_redirect_page, fatal=True)
                             provider_login_page_res = self._download_webpage_handle(
                                 oauth_redirect_url, video_id,
-                                'Downloading Provider Login Page')
+                                self._DOWNLOADING_LOGIN_PAGE)
                         else:
                             provider_login_page_res = post_form(
                                 provider_redirect_page_res,
-                                'Downloading Provider Login Page')
+                                self._DOWNLOADING_LOGIN_PAGE)
 
                         mvpd_confirm_page_res = post_form(
                             provider_login_page_res, 'Logging in', {
@@ -1461,8 +1492,17 @@ class AdobePassIE(InfoExtractor):
                             'Content-Type': 'application/x-www-form-urlencoded'
                         })
                 else:
+                    # Some providers (e.g. DIRECTV NOW) have another meta refresh
+                    # based redirect that should be followed.
+                    provider_redirect_page, urlh = provider_redirect_page_res
+                    provider_refresh_redirect_url = extract_redirect_url(
+                        provider_redirect_page, url=urlh.geturl())
+                    if provider_refresh_redirect_url:
+                        provider_redirect_page_res = self._download_webpage_handle(
+                            provider_refresh_redirect_url, video_id,
+                            'Downloading Provider Redirect Page (meta refresh)')
                     provider_login_page_res = post_form(
-                        provider_redirect_page_res, 'Downloading Provider Login Page')
+                        provider_redirect_page_res, self._DOWNLOADING_LOGIN_PAGE)
                     mvpd_confirm_page_res = post_form(provider_login_page_res, 'Logging in', {
                         mso_info.get('username_field', 'username'): username,
                         mso_info.get('password_field', 'password'): password,