]> Raphaƫl G. Git Repositories - youtubedl/blobdiff - youtube_dl/extractor/iprima.py
Update upstream source from tag 'upstream/2018.11.07'
[youtubedl] / youtube_dl / extractor / iprima.py
index 788bbe0d5c44177b5a943da9f9c3c3adf46a77b1..1d58d6e850724f226d66f5822777010f8a8b8d38 100644 (file)
@@ -8,12 +8,12 @@ from .common import InfoExtractor
 from ..utils import (
     determine_ext,
     js_to_json,
 from ..utils import (
     determine_ext,
     js_to_json,
-    sanitized_Request,
 )
 
 
 class IPrimaIE(InfoExtractor):
 )
 
 
 class IPrimaIE(InfoExtractor):
-    _VALID_URL = r'https?://play\.iprima\.cz/(?:.+/)?(?P<id>[^?#]+)'
+    _VALID_URL = r'https?://(?:play|prima)\.iprima\.cz/(?:.+/)?(?P<id>[^?#]+)'
+    _GEO_BYPASS = False
 
     _TESTS = [{
         'url': 'http://play.iprima.cz/gondici-s-r-o-33',
 
     _TESTS = [{
         'url': 'http://play.iprima.cz/gondici-s-r-o-33',
@@ -29,20 +29,39 @@ class IPrimaIE(InfoExtractor):
     }, {
         'url': 'http://play.iprima.cz/particka/particka-92',
         'only_matching': True,
     }, {
         'url': 'http://play.iprima.cz/particka/particka-92',
         'only_matching': True,
+    }, {
+        # geo restricted
+        'url': 'http://play.iprima.cz/closer-nove-pripady/closer-nove-pripady-iv-1',
+        'only_matching': True,
+    }, {
+        # iframe api.play-backend.iprima.cz
+        'url': 'https://prima.iprima.cz/my-little-pony/mapa-znameni-2-2',
+        'only_matching': True,
+    }, {
+        # iframe prima.iprima.cz
+        'url': 'https://prima.iprima.cz/porady/jak-se-stavi-sen/rodina-rathousova-praha',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
 
     }]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
 
+        self._set_cookie('play.iprima.cz', 'ott_adult_confirmed', '1')
+
         webpage = self._download_webpage(url, video_id)
 
         webpage = self._download_webpage(url, video_id)
 
-        video_id = self._search_regex(r'data-product="([^"]+)">', webpage, 'real id')
+        video_id = self._search_regex(
+            (r'<iframe[^>]+\bsrc=["\'](?:https?:)?//(?:api\.play-backend\.iprima\.cz/prehravac/embedded|prima\.iprima\.cz/[^/]+/[^/]+)\?.*?\bid=(p\d+)',
+             r'data-product="([^"]+)">'),
+            webpage, 'real id')
 
 
-        req = sanitized_Request(
-            'http://play.iprima.cz/prehravac/init?_infuse=1'
-            '&_ts=%s&productId=%s' % (round(time.time()), video_id))
-        req.add_header('Referer', url)
-        playerpage = self._download_webpage(req, video_id, note='Downloading player')
+        playerpage = self._download_webpage(
+            'http://play.iprima.cz/prehravac/init',
+            video_id, note='Downloading player', query={
+                '_infuse': 1,
+                '_ts': round(time.time()),
+                'productId': video_id,
+            }, headers={'Referer': url})
 
         formats = []
 
 
         formats = []
 
@@ -65,7 +84,7 @@ class IPrimaIE(InfoExtractor):
 
         options = self._parse_json(
             self._search_regex(
 
         options = self._parse_json(
             self._search_regex(
-                r'(?s)var\s+playerOptions\s*=\s*({.+?});',
+                r'(?s)(?:TDIPlayerOptions|playerOptions)\s*=\s*({.+?});\s*\]\]',
                 playerpage, 'player options', default='{}'),
             video_id, transform_source=js_to_json, fatal=False)
         if options:
                 playerpage, 'player options', default='{}'),
             video_id, transform_source=js_to_json, fatal=False)
         if options:
@@ -81,6 +100,9 @@ class IPrimaIE(InfoExtractor):
             for _, src in re.findall(r'src["\']\s*:\s*(["\'])(.+?)\1', playerpage):
                 extract_formats(src)
 
             for _, src in re.findall(r'src["\']\s*:\s*(["\'])(.+?)\1', playerpage):
                 extract_formats(src)
 
+        if not formats and '>GEO_IP_NOT_ALLOWED<' in playerpage:
+            self.raise_geo_restricted(countries=['CZ'])
+
         self._sort_formats(formats)
 
         return {
         self._sort_formats(formats)
 
         return {