X-Git-Url: https://git.rapsys.eu/youtubedl/blobdiff_plain/4e090bc3ceacc4e3cd464d12ea97700e3acad37d..fe6cc1b2aebf52f3fcdcc22ca8e846b67a406cc0:/youtube_dl/extractor/kaltura.py diff --git a/youtube_dl/extractor/kaltura.py b/youtube_dl/extractor/kaltura.py index 54374ea..639d738 100644 --- a/youtube_dl/extractor/kaltura.py +++ b/youtube_dl/extractor/kaltura.py @@ -91,6 +91,7 @@ class KalturaIE(InfoExtractor): }], }, }, + 'skip': 'Gone. Maybe https://www.safaribooksonline.com/library/tutorials/introduction-to-python-anon/3469/', 'params': { 'skip_download': True, }, @@ -107,29 +108,45 @@ class KalturaIE(InfoExtractor): @staticmethod def _extract_url(webpage): + # Embed codes: https://knowledge.kaltura.com/embedding-kaltura-media-players-your-site mobj = ( re.search( r"""(?xs) kWidget\.(?:thumb)?[Ee]mbed\( \{.*? - (?P['\"])wid(?P=q1)\s*:\s* - (?P['\"])_?(?P(?:(?!(?P=q2)).)+)(?P=q2),.*? - (?P['\"])entry_?[Ii]d(?P=q3)\s*:\s* - (?P['\"])(?P(?:(?!(?P=q4)).)+)(?P=q4)(?:,|\s*\}) - """, webpage) or - re.search( + (?P['"])wid(?P=q1)\s*:\s* + (?P['"])_?(?P(?:(?!(?P=q2)).)+)(?P=q2),.*? + (?P['"])entry_?[Ii]d(?P=q3)\s*:\s* + (?P['"])(?P(?:(?!(?P=q4)).)+)(?P=q4)(?:,|\s*\}) + """, webpage) + or re.search( r'''(?xs) - (?P["\']) + (?P["']) (?:https?:)?//cdnapi(?:sec)?\.kaltura\.com(?::\d+)?/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P\d+)(?:(?!(?P=q1)).)* (?P=q1).*? (?: - entry_?[Ii]d| - (?P["\'])entry_?[Ii]d(?P=q2) - )\s*:\s* - (?P["\'])(?P(?:(?!(?P=q3)).)+)(?P=q3) - ''', webpage)) + (?: + entry_?[Ii]d| + (?P["'])entry_?[Ii]d(?P=q2) + )\s*:\s*| + \[\s*(?P["'])entry_?[Ii]d(?P=q2_1)\s*\]\s*=\s* + ) + (?P["'])(?P(?:(?!(?P=q3)).)+)(?P=q3) + ''', webpage) + or re.search( + r'''(?xs) + <(?:iframe[^>]+src|meta[^>]+\bcontent)=(?P["']) + (?:https?:)?//(?:(?:www|cdnapi(?:sec)?)\.)?kaltura\.com/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P\d+) + (?:(?!(?P=q1)).)* + [?&;]entry_id=(?P(?:(?!(?P=q1))[^&])+) + (?:(?!(?P=q1)).)* + (?P=q1) + ''', webpage) + ) if mobj: embed_info = mobj.groupdict() + for k, v in embed_info.items(): + embed_info[k] = v.strip() url = 'kaltura:%(partner_id)s:%(id)s' % embed_info escaped_pid = re.escape(embed_info['partner_id']) service_url = re.search( @@ -177,6 +194,8 @@ class KalturaIE(InfoExtractor): 'entryId': video_id, 'service': 'baseentry', 'ks': '{1:result:ks}', + 'responseProfile:fields': 'createdAt,dataUrl,duration,name,plays,thumbnailUrl,userId', + 'responseProfile:type': 1, }, { 'action': 'getbyentryid', @@ -276,6 +295,9 @@ class KalturaIE(InfoExtractor): # skip for now. if f.get('fileExt') == 'chun': continue + # DRM-protected video, cannot be decrypted + if f.get('fileExt') == 'wvm': + continue if not f.get('fileExt'): # QT indicates QuickTime; some videos have broken fileExt if f.get('containerFormat') == 'qt': @@ -313,7 +335,7 @@ class KalturaIE(InfoExtractor): if captions: for caption in captions.get('objects', []): # Continue if caption is not ready - if f.get('status') != 2: + if caption.get('status') != 2: continue if not caption.get('id'): continue