X-Git-Url: https://git.rapsys.eu/youtubedl/blobdiff_plain/4e090bc3ceacc4e3cd464d12ea97700e3acad37d..68a5583f34cad0cbaf50fa60ad5c6f8c934d0f1b:/youtube_dl/extractor/kaltura.py diff --git a/youtube_dl/extractor/kaltura.py b/youtube_dl/extractor/kaltura.py index 54374ea..562e25f 100644 --- a/youtube_dl/extractor/kaltura.py +++ b/youtube_dl/extractor/kaltura.py @@ -91,6 +91,7 @@ class KalturaIE(InfoExtractor): }], }, }, + 'skip': 'Gone. Maybe https://www.safaribooksonline.com/library/tutorials/introduction-to-python-anon/3469/', 'params': { 'skip_download': True, }, @@ -107,27 +108,40 @@ class KalturaIE(InfoExtractor): @staticmethod def _extract_url(webpage): + # Embed codes: https://knowledge.kaltura.com/embedding-kaltura-media-players-your-site mobj = ( re.search( r"""(?xs) kWidget\.(?:thumb)?[Ee]mbed\( \{.*? - (?P['\"])wid(?P=q1)\s*:\s* - (?P['\"])_?(?P(?:(?!(?P=q2)).)+)(?P=q2),.*? - (?P['\"])entry_?[Ii]d(?P=q3)\s*:\s* - (?P['\"])(?P(?:(?!(?P=q4)).)+)(?P=q4)(?:,|\s*\}) + (?P['"])wid(?P=q1)\s*:\s* + (?P['"])_?(?P(?:(?!(?P=q2)).)+)(?P=q2),.*? + (?P['"])entry_?[Ii]d(?P=q3)\s*:\s* + (?P['"])(?P(?:(?!(?P=q4)).)+)(?P=q4)(?:,|\s*\}) """, webpage) or re.search( r'''(?xs) - (?P["\']) + (?P["']) (?:https?:)?//cdnapi(?:sec)?\.kaltura\.com(?::\d+)?/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P\d+)(?:(?!(?P=q1)).)* (?P=q1).*? (?: - entry_?[Ii]d| - (?P["\'])entry_?[Ii]d(?P=q2) - )\s*:\s* - (?P["\'])(?P(?:(?!(?P=q3)).)+)(?P=q3) - ''', webpage)) + (?: + entry_?[Ii]d| + (?P["'])entry_?[Ii]d(?P=q2) + )\s*:\s*| + \[\s*(?P["'])entry_?[Ii]d(?P=q2_1)\s*\]\s*=\s* + ) + (?P["'])(?P(?:(?!(?P=q3)).)+)(?P=q3) + ''', webpage) or + re.search( + r'''(?xs) + ]+src=(?P["']) + (?:https?:)?//(?:www\.)?kaltura\.com/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P\d+) + (?:(?!(?P=q1)).)* + [?&]entry_id=(?P(?:(?!(?P=q1))[^&])+) + (?P=q1) + ''', webpage) + ) if mobj: embed_info = mobj.groupdict() url = 'kaltura:%(partner_id)s:%(id)s' % embed_info @@ -276,6 +290,9 @@ class KalturaIE(InfoExtractor): # skip for now. if f.get('fileExt') == 'chun': continue + # DRM-protected video, cannot be decrypted + if f.get('fileExt') == 'wvm': + continue if not f.get('fileExt'): # QT indicates QuickTime; some videos have broken fileExt if f.get('containerFormat') == 'qt': @@ -313,7 +330,7 @@ class KalturaIE(InfoExtractor): if captions: for caption in captions.get('objects', []): # Continue if caption is not ready - if f.get('status') != 2: + if caption.get('status') != 2: continue if not caption.get('id'): continue