]> Raphaƫl G. Git Repositories - youtubedl/blobdiff - youtube_dl/extractor/tnaflix.py
d/p/disable-autoupdate-mechanism.patch: Extend to clean up errant import and README...
[youtubedl] / youtube_dl / extractor / tnaflix.py
index 78174178e6ef69362462f96f997b7a37a640a275..b3573c6e077c3dff730f13a02710f81a2c2cf0e8 100644 (file)
@@ -10,6 +10,7 @@ from ..utils import (
     int_or_none,
     parse_duration,
     str_to_int,
     int_or_none,
     parse_duration,
     str_to_int,
+    unescapeHTML,
     xpath_text,
 )
 
     xpath_text,
 )
 
@@ -17,9 +18,12 @@ from ..utils import (
 class TNAFlixNetworkBaseIE(InfoExtractor):
     # May be overridden in descendants if necessary
     _CONFIG_REGEX = [
 class TNAFlixNetworkBaseIE(InfoExtractor):
     # May be overridden in descendants if necessary
     _CONFIG_REGEX = [
-        r'flashvars\.config\s*=\s*escape\("([^"]+)"',
-        r'<input[^>]+name="config\d?" value="([^"]+)"',
+        r'flashvars\.config\s*=\s*escape\("(?P<url>[^"]+)"',
+        r'<input[^>]+name="config\d?" value="(?P<url>[^"]+)"',
+        r'config\s*=\s*(["\'])(?P<url>(?:https?:)?//(?:(?!\1).)+)\1',
     ]
     ]
+    _HOST = 'tna'
+    _VKEY_SUFFIX = ''
     _TITLE_REGEX = r'<input[^>]+name="title" value="([^"]+)"'
     _DESCRIPTION_REGEX = r'<input[^>]+name="description" value="([^"]+)"'
     _UPLOADER_REGEX = r'<input[^>]+name="username" value="([^"]+)"'
     _TITLE_REGEX = r'<input[^>]+name="title" value="([^"]+)"'
     _DESCRIPTION_REGEX = r'<input[^>]+name="description" value="([^"]+)"'
     _UPLOADER_REGEX = r'<input[^>]+name="username" value="([^"]+)"'
@@ -71,25 +75,34 @@ class TNAFlixNetworkBaseIE(InfoExtractor):
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         video_id = mobj.group('id')
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         video_id = mobj.group('id')
-        display_id = mobj.group('display_id') if 'display_id' in mobj.groupdict() else video_id
+        for display_id_key in ('display_id', 'display_id_2'):
+            if display_id_key in mobj.groupdict():
+                display_id = mobj.group(display_id_key)
+                if display_id:
+                    break
+        else:
+            display_id = video_id
 
         webpage = self._download_webpage(url, display_id)
 
         cfg_url = self._proto_relative_url(self._html_search_regex(
 
         webpage = self._download_webpage(url, display_id)
 
         cfg_url = self._proto_relative_url(self._html_search_regex(
-            self._CONFIG_REGEX, webpage, 'flashvars.config', default=None), 'http:')
+            self._CONFIG_REGEX, webpage, 'flashvars.config', default=None,
+            group='url'), 'http:')
 
         if not cfg_url:
             inputs = self._hidden_inputs(webpage)
 
         if not cfg_url:
             inputs = self._hidden_inputs(webpage)
-            cfg_url = 'https://cdn-fck.tnaflix.com/tnaflix/%s.fid?key=%s' % (inputs['vkey'], inputs['nkey'])
+            cfg_url = ('https://cdn-fck.%sflix.com/%sflix/%s%s.fid?key=%s&VID=%s&premium=1&vip=1&alpha'
+                       % (self._HOST, self._HOST, inputs['vkey'], self._VKEY_SUFFIX, inputs['nkey'], video_id))
 
         cfg_xml = self._download_xml(
             cfg_url, display_id, 'Downloading metadata',
 
         cfg_xml = self._download_xml(
             cfg_url, display_id, 'Downloading metadata',
-            transform_source=fix_xml_ampersands)
+            transform_source=fix_xml_ampersands, headers={'Referer': url})
 
         formats = []
 
         def extract_video_url(vl):
 
         formats = []
 
         def extract_video_url(vl):
-            return re.sub('speed=\d+', 'speed=', vl.text)
+            # Any URL modification now results in HTTP Error 403: Forbidden
+            return unescapeHTML(vl.text)
 
         video_link = cfg_xml.find('./videoLink')
         if video_link is not None:
 
         video_link = cfg_xml.find('./videoLink')
         if video_link is not None:
@@ -118,8 +131,12 @@ class TNAFlixNetworkBaseIE(InfoExtractor):
             xpath_text(cfg_xml, './startThumb', 'thumbnail'), 'http:')
         thumbnails = self._extract_thumbnails(cfg_xml)
 
             xpath_text(cfg_xml, './startThumb', 'thumbnail'), 'http:')
         thumbnails = self._extract_thumbnails(cfg_xml)
 
-        title = self._html_search_regex(
-            self._TITLE_REGEX, webpage, 'title') if self._TITLE_REGEX else self._og_search_title(webpage)
+        title = None
+        if self._TITLE_REGEX:
+            title = self._html_search_regex(
+                self._TITLE_REGEX, webpage, 'title', default=None)
+        if not title:
+            title = self._og_search_title(webpage)
 
         age_limit = self._rta_search(webpage) or 18
 
 
         age_limit = self._rta_search(webpage) or 18
 
@@ -168,7 +185,7 @@ class TNAFlixNetworkEmbedIE(TNAFlixNetworkBaseIE):
             'display_id': '6538',
             'ext': 'mp4',
             'title': 'Educational xxx video',
             'display_id': '6538',
             'ext': 'mp4',
             'title': 'Educational xxx video',
-            'thumbnail': 're:https?://.*\.jpg$',
+            'thumbnail': r're:https?://.*\.jpg$',
             'age_limit': 18,
         },
         'params': {
             'age_limit': 18,
         },
         'params': {
@@ -186,13 +203,16 @@ class TNAFlixNetworkEmbedIE(TNAFlixNetworkBaseIE):
             webpage)]
 
 
             webpage)]
 
 
-class TNAFlixIE(TNAFlixNetworkBaseIE):
+class TNAEMPFlixBaseIE(TNAFlixNetworkBaseIE):
+    _DESCRIPTION_REGEX = r'(?s)>Description:</[^>]+>(.+?)<'
+    _UPLOADER_REGEX = r'<span>by\s*<a[^>]+\bhref=["\']/profile/[^>]+>([^<]+)<'
+    _CATEGORIES_REGEX = r'(?s)<span[^>]*>Categories:</span>(.+?)</div>'
+
+
+class TNAFlixIE(TNAEMPFlixBaseIE):
     _VALID_URL = r'https?://(?:www\.)?tnaflix\.com/[^/]+/(?P<display_id>[^/]+)/video(?P<id>\d+)'
 
     _VALID_URL = r'https?://(?:www\.)?tnaflix\.com/[^/]+/(?P<display_id>[^/]+)/video(?P<id>\d+)'
 
-    _TITLE_REGEX = r'<title>(.+?) - TNAFlix Porn Videos</title>'
-    _DESCRIPTION_REGEX = r'<meta[^>]+name="description"[^>]+content="([^"]+)"'
-    _UPLOADER_REGEX = r'<i>\s*Verified Member\s*</i>\s*<h1>(.+?)</h1>'
-    _CATEGORIES_REGEX = r'(?s)<span[^>]*>Categories:</span>(.+?)</div>'
+    _TITLE_REGEX = r'<title>(.+?) - (?:TNAFlix Porn Videos|TNAFlix\.com)</title>'
 
     _TESTS = [{
         # anonymous uploader, no categories
 
     _TESTS = [{
         # anonymous uploader, no categories
@@ -203,7 +223,7 @@ class TNAFlixIE(TNAFlixNetworkBaseIE):
             'display_id': 'Carmella-Decesare-striptease',
             'ext': 'mp4',
             'title': 'Carmella Decesare - striptease',
             'display_id': 'Carmella-Decesare-striptease',
             'ext': 'mp4',
             'title': 'Carmella Decesare - striptease',
-            'thumbnail': 're:https?://.*\.jpg$',
+            'thumbnail': r're:https?://.*\.jpg$',
             'duration': 91,
             'age_limit': 18,
             'categories': ['Porn Stars'],
             'duration': 91,
             'age_limit': 18,
             'categories': ['Porn Stars'],
@@ -211,18 +231,18 @@ class TNAFlixIE(TNAFlixNetworkBaseIE):
     }, {
         # non-anonymous uploader, categories
         'url': 'https://www.tnaflix.com/teen-porn/Educational-xxx-video/video6538',
     }, {
         # non-anonymous uploader, categories
         'url': 'https://www.tnaflix.com/teen-porn/Educational-xxx-video/video6538',
-        'md5': 'fcba2636572895aba116171a899a5658',
+        'md5': '0f5d4d490dbfd117b8607054248a07c0',
         'info_dict': {
             'id': '6538',
             'display_id': 'Educational-xxx-video',
         'info_dict': {
             'id': '6538',
             'display_id': 'Educational-xxx-video',
-            'ext': 'flv',
+            'ext': 'mp4',
             'title': 'Educational xxx video',
             'description': 'md5:b4fab8f88a8621c8fabd361a173fe5b8',
             'title': 'Educational xxx video',
             'description': 'md5:b4fab8f88a8621c8fabd361a173fe5b8',
-            'thumbnail': 're:https?://.*\.jpg$',
+            'thumbnail': r're:https?://.*\.jpg$',
             'duration': 164,
             'age_limit': 18,
             'uploader': 'bobwhite39',
             'duration': 164,
             'age_limit': 18,
             'uploader': 'bobwhite39',
-            'categories': ['Amateur Porn', 'Squirting Videos', 'Teen Girls 18+'],
+            'categories': list,
         }
     }, {
         'url': 'https://www.tnaflix.com/amateur-porn/bunzHD-Ms.Donk/video358632',
         }
     }, {
         'url': 'https://www.tnaflix.com/amateur-porn/bunzHD-Ms.Donk/video358632',
@@ -230,21 +250,22 @@ class TNAFlixIE(TNAFlixNetworkBaseIE):
     }]
 
 
     }]
 
 
-class EMPFlixIE(TNAFlixNetworkBaseIE):
-    _VALID_URL = r'https?://(?:www\.)?empflix\.com/videos/(?P<display_id>.+?)-(?P<id>[0-9]+)\.html'
+class EMPFlixIE(TNAEMPFlixBaseIE):
+    _VALID_URL = r'https?://(?:www\.)?empflix\.com/(?:videos/(?P<display_id>.+?)-|[^/]+/(?P<display_id_2>[^/]+)/video)(?P<id>[0-9]+)'
 
 
-    _UPLOADER_REGEX = r'<span[^>]+class="infoTitle"[^>]*>Uploaded By:</span>(.+?)</li>'
+    _HOST = 'emp'
+    _VKEY_SUFFIX = '-1'
 
     _TESTS = [{
         'url': 'http://www.empflix.com/videos/Amateur-Finger-Fuck-33051.html',
 
     _TESTS = [{
         'url': 'http://www.empflix.com/videos/Amateur-Finger-Fuck-33051.html',
-        'md5': 'b1bc15b6412d33902d6e5952035fcabc',
+        'md5': 'bc30d48b91a7179448a0bda465114676',
         'info_dict': {
             'id': '33051',
             'display_id': 'Amateur-Finger-Fuck',
             'ext': 'mp4',
             'title': 'Amateur Finger Fuck',
             'description': 'Amateur solo finger fucking.',
         'info_dict': {
             'id': '33051',
             'display_id': 'Amateur-Finger-Fuck',
             'ext': 'mp4',
             'title': 'Amateur Finger Fuck',
             'description': 'Amateur solo finger fucking.',
-            'thumbnail': 're:https?://.*\.jpg$',
+            'thumbnail': r're:https?://.*\.jpg$',
             'duration': 83,
             'age_limit': 18,
             'uploader': 'cwbike',
             'duration': 83,
             'age_limit': 18,
             'uploader': 'cwbike',
@@ -253,6 +274,9 @@ class EMPFlixIE(TNAFlixNetworkBaseIE):
     }, {
         'url': 'http://www.empflix.com/videos/[AROMA][ARMD-718]-Aoi-Yoshino-Sawa-25826.html',
         'only_matching': True,
     }, {
         'url': 'http://www.empflix.com/videos/[AROMA][ARMD-718]-Aoi-Yoshino-Sawa-25826.html',
         'only_matching': True,
+    }, {
+        'url': 'https://www.empflix.com/amateur-porn/Amateur-Finger-Fuck/video33051',
+        'only_matching': True,
     }]
 
 
     }]
 
 
@@ -274,7 +298,7 @@ class MovieFapIE(TNAFlixNetworkBaseIE):
             'ext': 'mp4',
             'title': 'Experienced MILF Amazing Handjob',
             'description': 'Experienced MILF giving an Amazing Handjob',
             'ext': 'mp4',
             'title': 'Experienced MILF Amazing Handjob',
             'description': 'Experienced MILF giving an Amazing Handjob',
-            'thumbnail': 're:https?://.*\.jpg$',
+            'thumbnail': r're:https?://.*\.jpg$',
             'age_limit': 18,
             'uploader': 'darvinfred06',
             'view_count': int,
             'age_limit': 18,
             'uploader': 'darvinfred06',
             'view_count': int,
@@ -292,7 +316,7 @@ class MovieFapIE(TNAFlixNetworkBaseIE):
             'ext': 'flv',
             'title': 'Jeune Couple Russe',
             'description': 'Amateur',
             'ext': 'flv',
             'title': 'Jeune Couple Russe',
             'description': 'Amateur',
-            'thumbnail': 're:https?://.*\.jpg$',
+            'thumbnail': r're:https?://.*\.jpg$',
             'age_limit': 18,
             'uploader': 'whiskeyjar',
             'view_count': int,
             'age_limit': 18,
             'uploader': 'whiskeyjar',
             'view_count': int,