]> Raphaƫl G. Git Repositories - youtubedl/blobdiff - youtube_dl/extractor/openload.py
debian/patches: Remove patch to disambiguate between ffmpeg and avconv.
[youtubedl] / youtube_dl / extractor / openload.py
index eaaaf8a081782ae597f2ed6a376c09fca6fbf5e5..cf51e4770db6d4e67f97ceeedb3eab966584ef3d 100644 (file)
@@ -243,7 +243,18 @@ class PhantomJSwrapper(object):
 
 
 class OpenloadIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?(?:openload\.(?:co|io|link)|oload\.(?:tv|stream))/(?:f|embed)/(?P<id>[a-zA-Z0-9-_]+)'
+    _VALID_URL = r'''(?x)
+                    https?://
+                        (?P<host>
+                            (?:www\.)?
+                            (?:
+                                openload\.(?:co|io|link)|
+                                oload\.(?:tv|stream|site|xyz|win|download|cloud|cc|icu|fun)
+                            )
+                        )/
+                        (?:f|embed)/
+                        (?P<id>[a-zA-Z0-9-_]+)
+                    '''
 
     _TESTS = [{
         'url': 'https://openload.co/f/kUEfGclsU9o',
@@ -298,6 +309,31 @@ class OpenloadIE(InfoExtractor):
     }, {
         'url': 'https://oload.stream/f/KnG-kKZdcfY',
         'only_matching': True,
+    }, {
+        'url': 'https://oload.xyz/f/WwRBpzW8Wtk',
+        'only_matching': True,
+    }, {
+        'url': 'https://oload.win/f/kUEfGclsU9o',
+        'only_matching': True,
+    }, {
+        'url': 'https://oload.download/f/kUEfGclsU9o',
+        'only_matching': True,
+    }, {
+        'url': 'https://oload.cloud/f/4ZDnBXRWiB8',
+        'only_matching': True,
+    }, {
+        # Its title has not got its extension but url has it
+        'url': 'https://oload.download/f/N4Otkw39VCw/Tomb.Raider.2018.HDRip.XviD.AC3-EVO.avi.mp4',
+        'only_matching': True,
+    }, {
+        'url': 'https://oload.cc/embed/5NEAbI2BDSk',
+        'only_matching': True,
+    }, {
+        'url': 'https://oload.icu/f/-_i4y_F_Hs8',
+        'only_matching': True,
+    }, {
+        'url': 'https://oload.fun/f/gb6G1H4sHXY',
+        'only_matching': True,
     }]
 
     _USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36'
@@ -309,8 +345,11 @@ class OpenloadIE(InfoExtractor):
             webpage)
 
     def _real_extract(self, url):
-        video_id = self._match_id(url)
-        url_pattern = 'https://openload.co/%%s/%s/' % video_id
+        mobj = re.match(self._VALID_URL, url)
+        host = mobj.group('host')
+        video_id = mobj.group('id')
+
+        url_pattern = 'https://%s/%%s/%s/' % (host, video_id)
         headers = {
             'User-Agent': self._USER_AGENT,
         }
@@ -334,12 +373,16 @@ class OpenloadIE(InfoExtractor):
 
         decoded_id = (get_element_by_id('streamurl', webpage) or
                       get_element_by_id('streamuri', webpage) or
-                      get_element_by_id('streamurj', webpage))
-
-        if not decoded_id:
-            raise ExtractorError('Can\'t find stream URL', video_id=video_id)
+                      get_element_by_id('streamurj', webpage) or
+                      self._search_regex(
+                          (r'>\s*([\w-]+~\d{10,}~\d+\.\d+\.0\.0~[\w-]+)\s*<',
+                           r'>\s*([\w~-]+~\d+\.\d+\.\d+\.\d+~[\w~-]+)',
+                           r'>\s*([\w-]+~\d{10,}~(?:[a-f\d]+:){2}:~[\w-]+)\s*<',
+                           r'>\s*([\w~-]+~[a-f0-9:]+~[\w~-]+)\s*<',
+                           r'>\s*([\w~-]+~[a-f0-9:]+~[\w~-]+)'), webpage,
+                          'stream URL'))
 
-        video_url = 'https://openload.co/stream/%s?mime=true' % decoded_id
+        video_url = 'https://%s/stream/%s?mime=true' % (host, decoded_id)
 
         title = self._og_search_title(webpage, default=None) or self._search_regex(
             r'<span[^>]+class=["\']title["\'][^>]*>([^<]+)', webpage,
@@ -350,14 +393,12 @@ class OpenloadIE(InfoExtractor):
         entry = entries[0] if entries else {}
         subtitles = entry.get('subtitles')
 
-        info_dict = {
+        return {
             'id': video_id,
             'title': title,
             'thumbnail': entry.get('thumbnail') or self._og_search_thumbnail(webpage, default=None),
             'url': video_url,
-            # Seems all videos have extensions in their titles
-            'ext': determine_ext(title, 'mp4'),
+            'ext': determine_ext(title, None) or determine_ext(url, 'mp4'),
             'subtitles': subtitles,
             'http_headers': headers,
         }
-        return info_dict