]> Raphaël G. Git Repositories - youtubedl/commitdiff
New upstream version 2017.11.06
authorRogério Brito <rbrito@ime.usp.br>
Wed, 8 Nov 2017 06:34:01 +0000 (04:34 -0200)
committerRogério Brito <rbrito@ime.usp.br>
Wed, 8 Nov 2017 06:34:01 +0000 (04:34 -0200)
46 files changed:
ChangeLog
README.md
README.txt
docs/supportedsites.md
test/test_InfoExtractor.py
test/test_utils.py
youtube-dl
youtube_dl/downloader/f4m.py
youtube_dl/downloader/fragment.py
youtube_dl/downloader/hls.py
youtube_dl/extractor/arte.py
youtube_dl/extractor/azmedien.py
youtube_dl/extractor/canvas.py
youtube_dl/extractor/common.py
youtube_dl/extractor/dctp.py
youtube_dl/extractor/dramafever.py
youtube_dl/extractor/drtv.py
youtube_dl/extractor/egghead.py
youtube_dl/extractor/eporner.py
youtube_dl/extractor/extractors.py
youtube_dl/extractor/fxnetworks.py
youtube_dl/extractor/gamespot.py
youtube_dl/extractor/gigya.py [new file with mode: 0644]
youtube_dl/extractor/hotstar.py
youtube_dl/extractor/medialaan.py
youtube_dl/extractor/megaphone.py
youtube_dl/extractor/myvideo.py [deleted file]
youtube_dl/extractor/nbc.py
youtube_dl/extractor/ndtv.py
youtube_dl/extractor/nick.py
youtube_dl/extractor/niconico.py
youtube_dl/extractor/parliamentliveuk.py
youtube_dl/extractor/pbs.py
youtube_dl/extractor/servus.py [new file with mode: 0644]
youtube_dl/extractor/skysports.py
youtube_dl/extractor/soundcloud.py
youtube_dl/extractor/soundgasm.py
youtube_dl/extractor/spankbang.py
youtube_dl/extractor/twentytwotracks.py [deleted file]
youtube_dl/extractor/twitch.py
youtube_dl/extractor/unity.py [new file with mode: 0644]
youtube_dl/extractor/vimeo.py
youtube_dl/extractor/younow.py [new file with mode: 0644]
youtube_dl/extractor/youtube.py
youtube_dl/utils.py
youtube_dl/version.py

index d728e4d0362356f39af55c2da18a035e6b3a5d38..8af3682745463d71f6bbf97057f840a3b01e00f0 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,64 @@
+version 2017.11.06
+
+Core
++ [extractor/common] Add protocol for f4m formats
+* [f4m] Prefer baseURL for relative URLs (#14660)
+* [extractor/common] Respect URL query in _extract_wowza_formats (14645)
+
+Extractors
++ [hotstar:playlist] Add support for playlists (#12465)
+* [hotstar] Bypass geo restriction (#14672)
+- [22tracks] Remove extractor (#11024, #14628)
++ [skysport] Sdd support ooyala videos protected with embed_token (#14641)
+* [gamespot] Extract formats referenced with new data fields (#14652)
+* [spankbang] Detect unavailable videos (#14644)
+
+
+version 2017.10.29
+
+Core
+* [extractor/common] Prefix format id for audio only HLS formats
++ [utils] Add support for zero years and months in parse_duration
+
+Extractors
+* [egghead] Fix extraction (#14388)
++ [fxnetworks] Extract series metadata (#14603)
++ [younow] Add support for younow.com (#9255, #9432, #12436)
+* [dctptv] Fix extraction (#14599)
+* [youtube] Restrict embed regex (#14600)
+* [vimeo] Restrict iframe embed regex (#14600)
+* [soundgasm] Improve extraction (#14588)
+- [myvideo] Remove extractor (#8557)
++ [nbc] Add support for classic-tv videos (#14575)
++ [vrtnu] Add support for cookies authentication and simplify (#11873)
++ [canvas] Add support for vrt.be/vrtnu (#11873)
+* [twitch:clips] Fix title extraction (#14566)
++ [ndtv] Add support for sub-sites (#14534)
+* [dramafever] Fix login error message extraction
++ [nick] Add support for more nickelodeon sites (no, dk, se, ch, fr, es, pt,
+  ro, hu) (#14553)
+
+
+version 2017.10.20
+
+Core
+* [downloader/fragment] Report warning instead of error on inconsistent
+  download state
+* [downloader/hls] Fix total fragments count when ad fragments exist
+
+Extractors
+* [parliamentliveuk] Fix extraction (#14524)
+* [soundcloud] Update client id (#14546)
++ [servus] Add support for servus.com (#14362)
++ [unity] Add support for unity3d.com (#14528)
+* [youtube] Replace youtube redirect URLs in description (#14517)
+* [pbs] Restrict direct video URL regular expression (#14519)
+* [drtv] Respect preference for direct HTTP formats (#14509)
++ [eporner] Add support for embed URLs (#14507)
+* [arte] Capture and output error message
+* [niconico] Improve uploader metadata extraction robustness (#14135)
+
+
 version 2017.10.15.1
 
 Core
@@ -834,7 +895,7 @@ version 2017.04.14
 
 Core
 + [downloader/hls] Add basic support for EXT-X-BYTERANGE tag (#10955)
-+ [adobepass] Improve Comcast and Verison login code (#10803)
++ [adobepass] Improve Comcast and Verizon login code (#10803)
 + [adobepass] Add support for Verizon (#10803)
 
 Extractors
index 2879aad24c23b7c88ffe2259219beab73aca226c..ea321d5362060ec68642508e35d31ec449d3ab6f 100644 (file)
--- a/README.md
+++ b/README.md
@@ -1,3 +1,5 @@
+[![Build Status](https://travis-ci.org/rg3/youtube-dl.svg?branch=master)](https://travis-ci.org/rg3/youtube-dl)
+
 youtube-dl - download videos from youtube.com or other video platforms
 
 - [INSTALLATION](#installation)
index a42d837696a7f1a534c212802554eec5192a1f3f..4b7adfd68c28eb9df629d0904334a29670c663d0 100644 (file)
@@ -1,3 +1,5 @@
+[Build Status]
+
 youtube-dl - download videos from youtube.com or other video platforms
 
 -   INSTALLATION
index 7071450d4dbba00e39dd416f092d842cfa49f8c2..6009df571204fdcd58bc1a47c9c66f10e8f45330 100644 (file)
@@ -3,8 +3,6 @@
  - **1up.com**
  - **20min**
  - **220.ro**
- - **22tracks:genre**
- - **22tracks:track**
  - **24video**
  - **3qsdn**: 3Q SDN
  - **3sat**
  - **HornBunny**
  - **HotNewHipHop**
  - **HotStar**
+ - **hotstar:playlist**
  - **Howcast**
  - **HowStuffWorks**
  - **HRTi**
  - **MySpace:album**
  - **MySpass**
  - **Myvi**
- - **myvideo** (Currently broken)
  - **MyVidster**
  - **n-tv.de**
  - **natgeo**
  - **SenateISVP**
  - **SendtoNews**
  - **ServingSys**
+ - **Servus**
  - **Sexu**
  - **Shahid**
  - **Shared**: shared.sx
  - **UDNEmbed**: 聯合影音
  - **UKTVPlay**
  - **Unistra**
+ - **Unity**
  - **uol.com.br**
  - **uplynk**
  - **uplynk:preplay**
  - **vpro**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
  - **Vrak**
  - **VRT**: deredactie.be, sporza.be, cobra.be and cobra.canvas.be
+ - **VrtNU**: VrtNU.be
  - **vrv**
  - **vrv:series**
  - **VShare**
  - **YouJizz**
  - **youku**: 优酷
  - **youku:show**
+ - **YouNowChannel**
+ - **YouNowLive**
+ - **YouNowMoment**
  - **YouPorn**
  - **YourUpload**
  - **youtube**: YouTube.com
index f18a823fcf834e4bbae95e9d72f9f3821c307c2b..686c63efac7ff4b94ca6012ab7affdade8b7c008 100644 (file)
@@ -574,6 +574,32 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
                 self.ie._sort_formats(formats)
                 expect_value(self, formats, expected_formats, None)
 
+    def test_parse_f4m_formats(self):
+        _TEST_CASES = [
+            (
+                # https://github.com/rg3/youtube-dl/issues/14660
+                'custom_base_url',
+                'http://api.new.livestream.com/accounts/6115179/events/6764928/videos/144884262.f4m',
+                [{
+                    'manifest_url': 'http://api.new.livestream.com/accounts/6115179/events/6764928/videos/144884262.f4m',
+                    'ext': 'flv',
+                    'format_id': '2148',
+                    'protocol': 'f4m',
+                    'tbr': 2148,
+                    'width': 1280,
+                    'height': 720,
+                }]
+            ),
+        ]
+
+        for f4m_file, f4m_url, expected_formats in _TEST_CASES:
+            with io.open('./test/testdata/f4m/%s.f4m' % f4m_file,
+                         mode='r', encoding='utf-8') as f:
+                formats = self.ie._parse_f4m_formats(
+                    compat_etree_fromstring(f.read().encode('utf-8')),
+                    f4m_url, None)
+                self.ie._sort_formats(formats)
+                expect_value(self, formats, expected_formats, None)
 
 if __name__ == '__main__':
     unittest.main()
index efa73d0f45e17b76647a8e7895a5bd3c62f404b3..cc13f795c338d816b442d9d338262eb9323f4f64 100644 (file)
@@ -540,6 +540,7 @@ class TestUtil(unittest.TestCase):
         self.assertEqual(parse_duration('87 Min.'), 5220)
         self.assertEqual(parse_duration('PT1H0.040S'), 3600.04)
         self.assertEqual(parse_duration('PT00H03M30SZ'), 210)
+        self.assertEqual(parse_duration('P0Y0M0DT0H4M20.880S'), 260.88)
 
     def test_fix_xml_ampersands(self):
         self.assertEqual(
index 15c016a00eae3eae1c6d7aec617a4930fa8dfd42..3b69288626280e3c15249625c3a0b8972223a9e6 100755 (executable)
Binary files a/youtube-dl and b/youtube-dl differ
index c8fde9a89093393132262f7b7d5ec60d83de4b8d..fdb80f42ae3fd61d76e9fe0d14274da1c127d289 100644 (file)
@@ -243,8 +243,17 @@ def remove_encrypted_media(media):
                        media))
 
 
-def _add_ns(prop):
-    return '{http://ns.adobe.com/f4m/1.0}%s' % prop
+def _add_ns(prop, ver=1):
+    return '{http://ns.adobe.com/f4m/%d.0}%s' % (ver, prop)
+
+
+def get_base_url(manifest):
+    base_url = xpath_text(
+        manifest, [_add_ns('baseURL'), _add_ns('baseURL', 2)],
+        'base URL', default=None)
+    if base_url:
+        base_url = base_url.strip()
+    return base_url
 
 
 class F4mFD(FragmentFD):
@@ -330,13 +339,13 @@ class F4mFD(FragmentFD):
             rate, media = list(filter(
                 lambda f: int(f[0]) == requested_bitrate, formats))[0]
 
-        base_url = compat_urlparse.urljoin(man_url, media.attrib['url'])
+        # Prefer baseURL for relative URLs as per 11.2 of F4M 3.0 spec.
+        man_base_url = get_base_url(doc) or man_url
+
+        base_url = compat_urlparse.urljoin(man_base_url, media.attrib['url'])
         bootstrap_node = doc.find(_add_ns('bootstrapInfo'))
-        # From Adobe F4M 3.0 spec:
-        # The <baseURL> element SHALL be the base URL for all relative
-        # (HTTP-based) URLs in the manifest. If <baseURL> is not present, said
-        # URLs should be relative to the location of the containing document.
-        boot_info, bootstrap_url = self._parse_bootstrap_node(bootstrap_node, man_url)
+        boot_info, bootstrap_url = self._parse_bootstrap_node(
+            bootstrap_node, man_base_url)
         live = boot_info['live']
         metadata_node = media.find(_add_ns('metadata'))
         if metadata_node is not None:
index 7e891b92a3b6a05257484c5d8ba81590013bb74b..93002e45a93278e9f527a08391b8b5b6bd7df218 100644 (file)
@@ -158,7 +158,7 @@ class FragmentFD(FileDownloader):
             if os.path.isfile(encodeFilename(self.ytdl_filename(ctx['filename']))):
                 self._read_ytdl_file(ctx)
                 if ctx['fragment_index'] > 0 and resume_len == 0:
-                    self.report_error(
+                    self.report_warning(
                         'Inconsistent state of incomplete fragment download. '
                         'Restarting from the beginning...')
                     ctx['fragment_index'] = resume_len = 0
index 7955ca510dd200fedb5d8507d937b716d8d56772..1a6e226c89c809acec5a011b943043e7314d2cfd 100644 (file)
@@ -88,6 +88,7 @@ class HlsFD(FragmentFD):
             if line.startswith('#'):
                 if anvato_ad(line):
                     ad_frags += 1
+                    ad_frag_next = True
                 continue
             if ad_frag_next:
                 ad_frag_next = False
index 5cde90c5b23d1f92a7709c766102bb298e63cadb..ffc321821cd3a4a0ba9a62ad97b6f443d8ecacb3 100644 (file)
@@ -6,6 +6,7 @@ import re
 from .common import InfoExtractor
 from ..compat import (
     compat_parse_qs,
+    compat_str,
     compat_urllib_parse_urlparse,
 )
 from ..utils import (
@@ -15,6 +16,7 @@ from ..utils import (
     int_or_none,
     NO_DEFAULT,
     qualities,
+    try_get,
     unified_strdate,
 )
 
@@ -80,12 +82,15 @@ class ArteTVBaseIE(InfoExtractor):
         info = self._download_json(json_url, video_id)
         player_info = info['videoJsonPlayer']
 
-        vsr = player_info['VSR']
-
+        vsr = try_get(player_info, lambda x: x['VSR'], dict)
         if not vsr:
-            raise ExtractorError(
-                'Video %s is not available' % player_info.get('VID') or video_id,
-                expected=True)
+            error = None
+            if try_get(player_info, lambda x: x['custom_msg']['type']) == 'error':
+                error = try_get(
+                    player_info, lambda x: x['custom_msg']['msg'], compat_str)
+            if not error:
+                error = 'Video %s is not available' % player_info.get('VID') or video_id
+            raise ExtractorError(error, expected=True)
 
         upload_date_str = player_info.get('shootingDate')
         if not upload_date_str:
index f4e07d9012d40a7a9b7a385331d60ba033f2a6cd..68f26e2cad635bd8eb23a14ced3e1b90b2b320fc 100644 (file)
@@ -47,7 +47,7 @@ class AZMedienIE(AZMedienBaseIE):
         'url': 'http://www.telezueri.ch/62-show-zuerinews/13772-episode-sonntag-18-dezember-2016/32419-segment-massenabweisungen-beim-hiltl-club-wegen-pelzboom',
         'info_dict': {
             'id': '1_2444peh4',
-            'ext': 'mov',
+            'ext': 'mp4',
             'title': 'Massenabweisungen beim Hiltl Club wegen Pelzboom',
             'description': 'md5:9ea9dd1b159ad65b36ddcf7f0d7c76a8',
             'uploader_id': 'TeleZ?ri',
index 6899f8431788fad62e4615a33d95f3ffa65f4ea1..3faa76076318813d8b16f005f9a87ce5df986034 100644 (file)
@@ -1,16 +1,22 @@
 from __future__ import unicode_literals
 
 import re
+import json
 
 from .common import InfoExtractor
+from .gigya import GigyaBaseIE
+from ..compat import compat_HTTPError
 from ..utils import (
-    float_or_none,
+    ExtractorError,
     strip_or_none,
+    float_or_none,
+    int_or_none,
+    parse_iso8601,
 )
 
 
 class CanvasIE(InfoExtractor):
-    _VALID_URL = r'https?://mediazone\.vrt\.be/api/v1/(?P<site_id>canvas|een|ketnet)/assets/(?P<id>m[dz]-ast-[^/?#&]+)'
+    _VALID_URL = r'https?://mediazone\.vrt\.be/api/v1/(?P<site_id>canvas|een|ketnet|vrtvideo)/assets/(?P<id>[^/?#&]+)'
     _TESTS = [{
         'url': 'https://mediazone.vrt.be/api/v1/ketnet/assets/md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
         'md5': '90139b746a0a9bd7bb631283f6e2a64e',
@@ -166,3 +172,139 @@ class CanvasEenIE(InfoExtractor):
             'title': title,
             'description': self._og_search_description(webpage),
         }
+
+
+class VrtNUIE(GigyaBaseIE):
+    IE_DESC = 'VrtNU.be'
+    _VALID_URL = r'https?://(?:www\.)?vrt\.be/(?P<site_id>vrtnu)/(?:[^/]+/)*(?P<id>[^/?#&]+)'
+    _TESTS = [{
+        'url': 'https://www.vrt.be/vrtnu/a-z/postbus-x/1/postbus-x-s1a1/',
+        'info_dict': {
+            'id': 'pbs-pub-2e2d8c27-df26-45c9-9dc6-90c78153044d$vid-90c932b1-e21d-4fb8-99b1-db7b49cf74de',
+            'ext': 'flv',
+            'title': 'De zwarte weduwe',
+            'description': 'md5:d90c21dced7db869a85db89a623998d4',
+            'duration': 1457.04,
+            'thumbnail': r're:^https?://.*\.jpg$',
+            'season': '1',
+            'season_number': 1,
+            'episode_number': 1,
+        },
+        'skip': 'This video is only available for registered users'
+    }]
+    _NETRC_MACHINE = 'vrtnu'
+    _APIKEY = '3_0Z2HujMtiWq_pkAjgnS2Md2E11a1AwZjYiBETtwNE-EoEHDINgtnvcAOpNgmrVGy'
+    _CONTEXT_ID = 'R3595707040'
+
+    def _real_initialize(self):
+        self._login()
+
+    def _login(self):
+        username, password = self._get_login_info()
+        if username is None:
+            return
+
+        auth_data = {
+            'APIKey': self._APIKEY,
+            'targetEnv': 'jssdk',
+            'loginID': username,
+            'password': password,
+            'authMode': 'cookie',
+        }
+
+        auth_info = self._gigya_login(auth_data)
+
+        # Sometimes authentication fails for no good reason, retry
+        login_attempt = 1
+        while login_attempt <= 3:
+            try:
+                # When requesting a token, no actual token is returned, but the
+                # necessary cookies are set.
+                self._request_webpage(
+                    'https://token.vrt.be',
+                    None, note='Requesting a token', errnote='Could not get a token',
+                    headers={
+                        'Content-Type': 'application/json',
+                        'Referer': 'https://www.vrt.be/vrtnu/',
+                    },
+                    data=json.dumps({
+                        'uid': auth_info['UID'],
+                        'uidsig': auth_info['UIDSignature'],
+                        'ts': auth_info['signatureTimestamp'],
+                        'email': auth_info['profile']['email'],
+                    }).encode('utf-8'))
+            except ExtractorError as e:
+                if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
+                    login_attempt += 1
+                    self.report_warning('Authentication failed')
+                    self._sleep(1, None, msg_template='Waiting for %(timeout)s seconds before trying again')
+                else:
+                    raise e
+            else:
+                break
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, display_id)
+
+        title = self._html_search_regex(
+            r'(?ms)<h1 class="content__heading">(.+?)</h1>',
+            webpage, 'title').strip()
+
+        description = self._html_search_regex(
+            r'(?ms)<div class="content__description">(.+?)</div>',
+            webpage, 'description', default=None)
+
+        season = self._html_search_regex(
+            [r'''(?xms)<div\ class="tabs__tab\ tabs__tab--active">\s*
+                    <span>seizoen\ (.+?)</span>\s*
+                </div>''',
+             r'<option value="seizoen (\d{1,3})" data-href="[^"]+?" selected>'],
+            webpage, 'season', default=None)
+
+        season_number = int_or_none(season)
+
+        episode_number = int_or_none(self._html_search_regex(
+            r'''(?xms)<div\ class="content__episode">\s*
+                    <abbr\ title="aflevering">afl</abbr>\s*<span>(\d+)</span>
+                </div>''',
+            webpage, 'episode_number', default=None))
+
+        release_date = parse_iso8601(self._html_search_regex(
+            r'(?ms)<div class="content__broadcastdate">\s*<time\ datetime="(.+?)"',
+            webpage, 'release_date', default=None))
+
+        # If there's a ? or a # in the URL, remove them and everything after
+        clean_url = url.split('?')[0].split('#')[0].strip('/')
+        securevideo_url = clean_url + '.mssecurevideo.json'
+
+        try:
+            video = self._download_json(securevideo_url, display_id)
+        except ExtractorError as e:
+            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
+                self.raise_login_required()
+            raise
+
+        # We are dealing with a '../<show>.relevant' URL
+        redirect_url = video.get('url')
+        if redirect_url:
+            return self.url_result(self._proto_relative_url(redirect_url, 'https:'))
+
+        # There is only one entry, but with an unknown key, so just get
+        # the first one
+        video_id = list(video.values())[0].get('videoid')
+
+        return {
+            '_type': 'url_transparent',
+            'url': 'https://mediazone.vrt.be/api/v1/vrtvideo/assets/%s' % video_id,
+            'ie_key': CanvasIE.ie_key(),
+            'id': video_id,
+            'display_id': display_id,
+            'title': title,
+            'description': description,
+            'season': season,
+            'season_number': season_number,
+            'episode_number': episode_number,
+            'release_date': release_date,
+        }
index a692406931d4b63711609c7b8355635235849275..e2d9f52b018c25abc5a58a93785473ff88d90b74 100644 (file)
@@ -29,7 +29,10 @@ from ..compat import (
     compat_urlparse,
     compat_xml_parse_error,
 )
-from ..downloader.f4m import remove_encrypted_media
+from ..downloader.f4m import (
+    get_base_url,
+    remove_encrypted_media,
+)
 from ..utils import (
     NO_DEFAULT,
     age_restricted,
@@ -1239,11 +1242,8 @@ class InfoExtractor(object):
         media_nodes = remove_encrypted_media(media_nodes)
         if not media_nodes:
             return formats
-        base_url = xpath_text(
-            manifest, ['{http://ns.adobe.com/f4m/1.0}baseURL', '{http://ns.adobe.com/f4m/2.0}baseURL'],
-            'base URL', default=None)
-        if base_url:
-            base_url = base_url.strip()
+
+        manifest_base_url = get_base_url(manifest)
 
         bootstrap_info = xpath_element(
             manifest, ['{http://ns.adobe.com/f4m/1.0}bootstrapInfo', '{http://ns.adobe.com/f4m/2.0}bootstrapInfo'],
@@ -1275,7 +1275,7 @@ class InfoExtractor(object):
                     continue
                 manifest_url = (
                     media_url if media_url.startswith('http://') or media_url.startswith('https://')
-                    else ((base_url or '/'.join(manifest_url.split('/')[:-1])) + '/' + media_url))
+                    else ((manifest_base_url or '/'.join(manifest_url.split('/')[:-1])) + '/' + media_url))
                 # If media_url is itself a f4m manifest do the recursive extraction
                 # since bitrates in parent manifest (this one) and media_url manifest
                 # may differ leading to inability to resolve the format by requested
@@ -1310,6 +1310,7 @@ class InfoExtractor(object):
                 'url': manifest_url,
                 'manifest_url': manifest_url,
                 'ext': 'flv' if bootstrap_info is not None else None,
+                'protocol': 'f4m',
                 'tbr': tbr,
                 'width': width,
                 'height': height,
@@ -1401,7 +1402,7 @@ class InfoExtractor(object):
             media_url = media.get('URI')
             if media_url:
                 format_id = []
-                for v in (group_id, name):
+                for v in (m3u8_id, group_id, name):
                     if v:
                         format_id.append(v)
                 f = {
@@ -2233,27 +2234,35 @@ class InfoExtractor(object):
         return formats
 
     def _extract_wowza_formats(self, url, video_id, m3u8_entry_protocol='m3u8_native', skip_protocols=[]):
+        query = compat_urlparse.urlparse(url).query
         url = re.sub(r'/(?:manifest|playlist|jwplayer)\.(?:m3u8|f4m|mpd|smil)', '', url)
         url_base = self._search_regex(
             r'(?:(?:https?|rtmp|rtsp):)?(//[^?]+)', url, 'format url')
         http_base_url = '%s:%s' % ('http', url_base)
         formats = []
+
+        def manifest_url(manifest):
+            m_url = '%s/%s' % (http_base_url, manifest)
+            if query:
+                m_url += '?%s' % query
+            return m_url
+
         if 'm3u8' not in skip_protocols:
             formats.extend(self._extract_m3u8_formats(
-                http_base_url + '/playlist.m3u8', video_id, 'mp4',
+                manifest_url('playlist.m3u8'), video_id, 'mp4',
                 m3u8_entry_protocol, m3u8_id='hls', fatal=False))
         if 'f4m' not in skip_protocols:
             formats.extend(self._extract_f4m_formats(
-                http_base_url + '/manifest.f4m',
+                manifest_url('manifest.f4m'),
                 video_id, f4m_id='hds', fatal=False))
         if 'dash' not in skip_protocols:
             formats.extend(self._extract_mpd_formats(
-                http_base_url + '/manifest.mpd',
+                manifest_url('manifest.mpd'),
                 video_id, mpd_id='dash', fatal=False))
         if re.search(r'(?:/smil:|\.smil)', url_base):
             if 'smil' not in skip_protocols:
                 rtmp_formats = self._extract_smil_formats(
-                    http_base_url + '/jwplayer.smil',
+                    manifest_url('jwplayer.smil'),
                     video_id, fatal=False)
                 for rtmp_format in rtmp_formats:
                     rtsp_format = rtmp_format.copy()
index 00fbbff2fa35d2212d521a43e0e1b41b281d477b..3a6d0560e478cb08445f07d84578d28b3e4bc514 100644 (file)
@@ -2,53 +2,85 @@
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
-from ..utils import unified_strdate
+from ..compat import compat_str
+from ..utils import (
+    float_or_none,
+    unified_strdate,
+)
 
 
 class DctpTvIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?dctp\.tv/(#/)?filme/(?P<id>.+?)/$'
+    _VALID_URL = r'https?://(?:www\.)?dctp\.tv/(?:#/)?filme/(?P<id>[^/?#&]+)'
     _TEST = {
         'url': 'http://www.dctp.tv/filme/videoinstallation-fuer-eine-kaufhausfassade/',
-        'md5': '174dd4a8a6225cf5655952f969cfbe24',
         'info_dict': {
             'id': '95eaa4f33dad413aa17b4ee613cccc6c',
             'display_id': 'videoinstallation-fuer-eine-kaufhausfassade',
-            'ext': 'mp4',
+            'ext': 'flv',
             'title': 'Videoinstallation für eine Kaufhausfassade',
             'description': 'Kurzfilm',
             'upload_date': '20110407',
             'thumbnail': r're:^https?://.*\.jpg$',
+            'duration': 71.24,
+        },
+        'params': {
+            # rtmp download
+            'skip_download': True,
         },
     }
 
     def _real_extract(self, url):
-        video_id = self._match_id(url)
-        webpage = self._download_webpage(url, video_id)
+        display_id = self._match_id(url)
 
-        object_id = self._html_search_meta('DC.identifier', webpage)
+        webpage = self._download_webpage(url, display_id)
 
-        servers_json = self._download_json(
-            'http://www.dctp.tv/elastic_streaming_client/get_streaming_server/',
-            video_id, note='Downloading server list')
-        server = servers_json[0]['server']
-        m3u8_path = self._search_regex(
-            r'\'([^\'"]+/playlist\.m3u8)"', webpage, 'm3u8 path')
-        formats = self._extract_m3u8_formats(
-            'http://%s%s' % (server, m3u8_path), video_id, ext='mp4',
-            entry_protocol='m3u8_native')
+        video_id = self._html_search_meta(
+            'DC.identifier', webpage, 'video id',
+            default=None) or self._search_regex(
+            r'id=["\']uuid[^>]+>([^<]+)<', webpage, 'video id')
 
         title = self._og_search_title(webpage)
+
+        servers = self._download_json(
+            'http://www.dctp.tv/streaming_servers/', display_id,
+            note='Downloading server list', fatal=False)
+
+        if servers:
+            endpoint = next(
+                server['endpoint']
+                for server in servers
+                if isinstance(server.get('endpoint'), compat_str) and
+                'cloudfront' in server['endpoint'])
+        else:
+            endpoint = 'rtmpe://s2pqqn4u96e4j8.cloudfront.net/cfx/st/'
+
+        app = self._search_regex(
+            r'^rtmpe?://[^/]+/(?P<app>.*)$', endpoint, 'app')
+
+        formats = [{
+            'url': endpoint,
+            'app': app,
+            'play_path': 'mp4:%s_dctp_0500_4x3.m4v' % video_id,
+            'page_url': url,
+            'player_url': 'http://svm-prod-dctptv-static.s3.amazonaws.com/dctptv-relaunch2012-109.swf',
+            'ext': 'flv',
+        }]
+
         description = self._html_search_meta('DC.description', webpage)
         upload_date = unified_strdate(
             self._html_search_meta('DC.date.created', webpage))
         thumbnail = self._og_search_thumbnail(webpage)
+        duration = float_or_none(self._search_regex(
+            r'id=["\']duration_in_ms[^+]>(\d+)', webpage, 'duration',
+            default=None), scale=1000)
 
         return {
-            'id': object_id,
+            'id': video_id,
             'title': title,
             'formats': formats,
-            'display_id': video_id,
+            'display_id': display_id,
             'description': description,
             'upload_date': upload_date,
             'thumbnail': thumbnail,
+            'duration': duration,
         }
index 9a498d72ad6f378ef5dd877354871bcccbfb4faf..95883a037f537de47a0ee9f3f18d913eb9ec1647 100644 (file)
@@ -59,7 +59,7 @@ class DramaFeverBaseIE(AMPIE):
         if all(logout_pattern not in response
                for logout_pattern in ['href="/accounts/logout/"', '>Log out<']):
             error = self._html_search_regex(
-                r'(?s)class="hidden-xs prompt"[^>]*>(.+?)<',
+                r'(?s)<h\d[^>]+\bclass="hidden-xs prompt"[^>]*>(.+?)</h\d',
                 response, 'error message', default=None)
             if error:
                 raise ExtractorError('Unable to login: %s' % error, expected=True)
index 69effba58371426ec6813066424e86aa976b9ce8..f757745ba7cf2fa500f03102d7ca2b0c932c27bc 100644 (file)
@@ -138,6 +138,7 @@ class DRTVIE(InfoExtractor):
                             'tbr': int_or_none(bitrate),
                             'ext': link.get('FileFormat'),
                             'vcodec': 'none' if kind == 'AudioResource' else None,
+                            'preference': preference,
                         })
                 subtitles_list = asset.get('SubtitlesList')
                 if isinstance(subtitles_list, list):
index e4a3046af573fec2961ae84a2137a9395bf68f0b..edabaafe689a3d4ffae1b626dc1a55aa068d05aa 100644 (file)
@@ -2,7 +2,9 @@
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
+from ..compat import compat_str
 from ..utils import (
+    determine_ext,
     int_or_none,
     try_get,
     unified_timestamp,
@@ -17,7 +19,7 @@ class EggheadCourseIE(InfoExtractor):
         'url': 'https://egghead.io/courses/professor-frisby-introduces-composable-functional-javascript',
         'playlist_count': 29,
         'info_dict': {
-            'id': 'professor-frisby-introduces-composable-functional-javascript',
+            'id': '72',
             'title': 'Professor Frisby Introduces Composable Functional JavaScript',
             'description': 're:(?s)^This course teaches the ubiquitous.*You\'ll start composing functionality before you know it.$',
         },
@@ -26,14 +28,28 @@ class EggheadCourseIE(InfoExtractor):
     def _real_extract(self, url):
         playlist_id = self._match_id(url)
 
+        lessons = self._download_json(
+            'https://egghead.io/api/v1/series/%s/lessons' % playlist_id,
+            playlist_id, 'Downloading course lessons JSON')
+
+        entries = []
+        for lesson in lessons:
+            lesson_url = lesson.get('http_url')
+            if not lesson_url or not isinstance(lesson_url, compat_str):
+                continue
+            lesson_id = lesson.get('id')
+            if lesson_id:
+                lesson_id = compat_str(lesson_id)
+            entries.append(self.url_result(
+                lesson_url, ie=EggheadLessonIE.ie_key(), video_id=lesson_id))
+
         course = self._download_json(
-            'https://egghead.io/api/v1/series/%s' % playlist_id, playlist_id)
+            'https://egghead.io/api/v1/series/%s' % playlist_id,
+            playlist_id, 'Downloading course JSON', fatal=False) or {}
 
-        entries = [
-            self.url_result(
-                'wistia:%s' % lesson['wistia_id'], ie='Wistia',
-                video_id=lesson['wistia_id'], video_title=lesson.get('title'))
-            for lesson in course['lessons'] if lesson.get('wistia_id')]
+        playlist_id = course.get('id')
+        if playlist_id:
+            playlist_id = compat_str(playlist_id)
 
         return self.playlist_result(
             entries, playlist_id, course.get('title'),
@@ -43,11 +59,12 @@ class EggheadCourseIE(InfoExtractor):
 class EggheadLessonIE(InfoExtractor):
     IE_DESC = 'egghead.io lesson'
     IE_NAME = 'egghead:lesson'
-    _VALID_URL = r'https://egghead\.io/lessons/(?P<id>[^/?#&]+)'
-    _TEST = {
+    _VALID_URL = r'https://egghead\.io/(?:api/v1/)?lessons/(?P<id>[^/?#&]+)'
+    _TESTS = [{
         'url': 'https://egghead.io/lessons/javascript-linear-data-flow-with-container-style-types-box',
         'info_dict': {
-            'id': 'fv5yotjxcg',
+            'id': '1196',
+            'display_id': 'javascript-linear-data-flow-with-container-style-types-box',
             'ext': 'mp4',
             'title': 'Create linear data flow with container style types (Box)',
             'description': 'md5:9aa2cdb6f9878ed4c39ec09e85a8150e',
@@ -60,25 +77,51 @@ class EggheadLessonIE(InfoExtractor):
         },
         'params': {
             'skip_download': True,
+            'format': 'bestvideo',
         },
-    }
+    }, {
+        'url': 'https://egghead.io/api/v1/lessons/react-add-redux-to-a-react-application',
+        'only_matching': True,
+    }]
 
     def _real_extract(self, url):
-        lesson_id = self._match_id(url)
+        display_id = self._match_id(url)
 
         lesson = self._download_json(
-            'https://egghead.io/api/v1/lessons/%s' % lesson_id, lesson_id)
+            'https://egghead.io/api/v1/lessons/%s' % display_id, display_id)
+
+        lesson_id = compat_str(lesson['id'])
+        title = lesson['title']
+
+        formats = []
+        for _, format_url in lesson['media_urls'].items():
+            if not format_url or not isinstance(format_url, compat_str):
+                continue
+            ext = determine_ext(format_url)
+            if ext == 'm3u8':
+                formats.extend(self._extract_m3u8_formats(
+                    format_url, lesson_id, 'mp4', entry_protocol='m3u8',
+                    m3u8_id='hls', fatal=False))
+            elif ext == 'mpd':
+                formats.extend(self._extract_mpd_formats(
+                    format_url, lesson_id, mpd_id='dash', fatal=False))
+            else:
+                formats.append({
+                    'url': format_url,
+                })
+        self._sort_formats(formats)
 
         return {
-            '_type': 'url_transparent',
-            'ie_key': 'Wistia',
-            'url': 'wistia:%s' % lesson['wistia_id'],
-            'id': lesson['wistia_id'],
-            'title': lesson.get('title'),
+            'id': lesson_id,
+            'display_id': display_id,
+            'title': title,
             'description': lesson.get('summary'),
             'thumbnail': lesson.get('thumb_nail'),
             'timestamp': unified_timestamp(lesson.get('published_at')),
             'duration': int_or_none(lesson.get('duration')),
             'view_count': int_or_none(lesson.get('plays_count')),
             'tags': try_get(lesson, lambda x: x['tag_list'], list),
+            'series': try_get(
+                lesson, lambda x: x['series']['title'], compat_str),
+            'formats': formats,
         }
index f3734e9f8984ab5a1a723bbb0be171c3fd9cf7b5..81f2e2ee1c4ac599d8844965fb9271e745293268 100644 (file)
@@ -15,7 +15,7 @@ from ..utils import (
 
 
 class EpornerIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?eporner\.com/hd-porn/(?P<id>\w+)(?:/(?P<display_id>[\w-]+))?'
+    _VALID_URL = r'https?://(?:www\.)?eporner\.com/(?:hd-porn|embed)/(?P<id>\w+)(?:/(?P<display_id>[\w-]+))?'
     _TESTS = [{
         'url': 'http://www.eporner.com/hd-porn/95008/Infamous-Tiffany-Teen-Strip-Tease-Video/',
         'md5': '39d486f046212d8e1b911c52ab4691f8',
@@ -35,6 +35,9 @@ class EpornerIE(InfoExtractor):
     }, {
         'url': 'http://www.eporner.com/hd-porn/3YRUtzMcWn0',
         'only_matching': True,
+    }, {
+        'url': 'http://www.eporner.com/hd-porn/3YRUtzMcWn0',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
index ecb33bc9e7dbbb673a87fc93036e75d06c6f6229..d084707ee81e9e137e260e45220e68e0efcc95d0 100644 (file)
@@ -150,6 +150,7 @@ from .canalc2 import Canalc2IE
 from .canvas import (
     CanvasIE,
     CanvasEenIE,
+    VrtNUIE,
 )
 from .carambatv import (
     CarambaTVIE,
@@ -431,7 +432,10 @@ from .hitbox import HitboxIE, HitboxLiveIE
 from .hitrecord import HitRecordIE
 from .hornbunny import HornBunnyIE
 from .hotnewhiphop import HotNewHipHopIE
-from .hotstar import HotStarIE
+from .hotstar import (
+    HotStarIE,
+    HotStarPlaylistIE,
+)
 from .howcast import HowcastIE
 from .howstuffworks import HowStuffWorksIE
 from .hrti import (
@@ -623,7 +627,6 @@ from .mwave import MwaveIE, MwaveMeetGreetIE
 from .myspace import MySpaceIE, MySpaceAlbumIE
 from .myspass import MySpassIE
 from .myvi import MyviIE
-from .myvideo import MyVideoIE
 from .myvidster import MyVidsterIE
 from .nationalgeographic import (
     NationalGeographicVideoIE,
@@ -925,6 +928,7 @@ from .seeker import SeekerIE
 from .senateisvp import SenateISVPIE
 from .sendtonews import SendtoNewsIE
 from .servingsys import ServingSysIE
+from .servus import ServusIE
 from .sexu import SexuIE
 from .shahid import ShahidIE
 from .shared import (
@@ -1109,10 +1113,6 @@ from .tvplayer import TVPlayerIE
 from .tweakers import TweakersIE
 from .twentyfourvideo import TwentyFourVideoIE
 from .twentymin import TwentyMinutenIE
-from .twentytwotracks import (
-    TwentyTwoTracksIE,
-    TwentyTwoTracksGenreIE
-)
 from .twitch import (
     TwitchVideoIE,
     TwitchChapterIE,
@@ -1138,6 +1138,7 @@ from .udn import UDNEmbedIE
 from .uktvplay import UKTVPlayIE
 from .digiteka import DigitekaIE
 from .unistra import UnistraIE
+from .unity import UnityIE
 from .uol import UOLIE
 from .uplynk import (
     UplynkIE,
@@ -1333,6 +1334,11 @@ from .youku import (
     YoukuIE,
     YoukuShowIE,
 )
+from .younow import (
+    YouNowLiveIE,
+    YouNowChannelIE,
+    YouNowMomentIE,
+)
 from .youporn import YouPornIE
 from .yourupload import YourUploadIE
 from .youtube import (
index 629897317be5fbe316639856cec73662c1f8149c..37549fb01ccfc05fb4a642f948ee70227588f6e0 100644 (file)
@@ -3,27 +3,31 @@ from __future__ import unicode_literals
 
 from .adobepass import AdobePassIE
 from ..utils import (
-    update_url_query,
     extract_attributes,
+    int_or_none,
     parse_age_limit,
     smuggle_url,
+    update_url_query,
 )
 
 
 class FXNetworksIE(AdobePassIE):
     _VALID_URL = r'https?://(?:www\.)?(?:fxnetworks|simpsonsworld)\.com/video/(?P<id>\d+)'
     _TESTS = [{
-        'url': 'http://www.fxnetworks.com/video/719841347694',
-        'md5': '1447d4722e42ebca19e5232ab93abb22',
+        'url': 'http://www.fxnetworks.com/video/1032565827847',
+        'md5': '8d99b97b4aa7a202f55b6ed47ea7e703',
         'info_dict': {
-            'id': '719841347694',
+            'id': 'dRzwHC_MMqIv',
             'ext': 'mp4',
-            'title': 'Vanpage',
-            'description': 'F*ck settling down. You\'re the Worst returns for an all new season August 31st on FXX.',
+            'title': 'First Look: Better Things - Season 2',
+            'description': 'Because real life is like a fart. Watch this FIRST LOOK to see what inspired the new season of Better Things.',
             'age_limit': 14,
             'uploader': 'NEWA-FNG-FX',
-            'upload_date': '20160706',
-            'timestamp': 1467844741,
+            'upload_date': '20170825',
+            'timestamp': 1503686274,
+            'episode_number': 0,
+            'season_number': 2,
+            'series': 'Better Things',
         },
         'add_ie': ['ThePlatform'],
     }, {
@@ -64,6 +68,9 @@ class FXNetworksIE(AdobePassIE):
             'id': video_id,
             'title': title,
             'url': smuggle_url(update_url_query(release_url, query), {'force_smil_url': True}),
+            'series': video_data.get('data-show-title'),
+            'episode_number': int_or_none(video_data.get('data-episode')),
+            'season_number': int_or_none(video_data.get('data-season')),
             'thumbnail': video_data.get('data-large-thumb'),
             'age_limit': parse_age_limit(rating),
             'ie_key': 'ThePlatform',
index 02804d297a4709198371e2e2acfd8a83857095be..6d177cbaf388afb1bb1a71873558ee55ca70ee2c 100644 (file)
@@ -14,7 +14,7 @@ from ..utils import (
 
 
 class GameSpotIE(OnceIE):
-    _VALID_URL = r'https?://(?:www\.)?gamespot\.com/.*-(?P<id>\d+)/?'
+    _VALID_URL = r'https?://(?:www\.)?gamespot\.com/videos/(?:[^/]+/\d+-|embed/)(?P<id>\d+)'
     _TESTS = [{
         'url': 'http://www.gamespot.com/videos/arma-3-community-guide-sitrep-i/2300-6410818/',
         'md5': 'b2a30deaa8654fcccd43713a6b6a4825',
@@ -35,6 +35,9 @@ class GameSpotIE(OnceIE):
         'params': {
             'skip_download': True,  # m3u8 downloads
         },
+    }, {
+        'url': 'https://www.gamespot.com/videos/embed/6439218/',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
@@ -52,7 +55,7 @@ class GameSpotIE(OnceIE):
             manifest_url = f4m_url
             formats.extend(self._extract_f4m_formats(
                 f4m_url + '?hdcore=3.7.0', page_id, f4m_id='hds', fatal=False))
-        m3u8_url = streams.get('m3u8_stream')
+        m3u8_url = dict_get(streams, ('m3u8_stream', 'adaptive_stream'))
         if m3u8_url:
             manifest_url = m3u8_url
             m3u8_formats = self._extract_m3u8_formats(
@@ -60,7 +63,7 @@ class GameSpotIE(OnceIE):
                 m3u8_id='hls', fatal=False)
             formats.extend(m3u8_formats)
         progressive_url = dict_get(
-            streams, ('progressive_hd', 'progressive_high', 'progressive_low'))
+            streams, ('progressive_hd', 'progressive_high', 'progressive_low', 'other_lr'))
         if progressive_url and manifest_url:
             qualities_basename = self._search_regex(
                 r'/([^/]+)\.csmil/',
diff --git a/youtube_dl/extractor/gigya.py b/youtube_dl/extractor/gigya.py
new file mode 100644 (file)
index 0000000..4121784
--- /dev/null
@@ -0,0 +1,22 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+from ..utils import (
+    ExtractorError,
+    urlencode_postdata,
+)
+
+
+class GigyaBaseIE(InfoExtractor):
+    def _gigya_login(self, auth_data):
+        auth_info = self._download_json(
+            'https://accounts.eu1.gigya.com/accounts.login', None,
+            note='Logging in', errnote='Unable to log in',
+            data=urlencode_postdata(auth_data))
+
+        error_message = auth_info.get('errorDetails') or auth_info.get('errorMessage')
+        if error_message:
+            raise ExtractorError(
+                'Unable to login: %s' % error_message, expected=True)
+        return auth_info
index 3a7a66a343992879992d677af8f9c318dbd433ff..d28af36ec1704a8d4c508c2036962c8584ea38aa 100644 (file)
@@ -1,22 +1,47 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
+import re
+
 from .common import InfoExtractor
+from ..compat import compat_str
 from ..utils import (
-    ExtractorError,
     determine_ext,
+    ExtractorError,
     int_or_none,
 )
 
 
-class HotStarIE(InfoExtractor):
+class HotStarBaseIE(InfoExtractor):
+    _GEO_COUNTRIES = ['IN']
+
+    def _download_json(self, *args, **kwargs):
+        response = super(HotStarBaseIE, self)._download_json(*args, **kwargs)
+        if response['resultCode'] != 'OK':
+            if kwargs.get('fatal'):
+                raise ExtractorError(
+                    response['errorDescription'], expected=True)
+            return None
+        return response['resultObj']
+
+    def _download_content_info(self, content_id):
+        return self._download_json(
+            'https://account.hotstar.com/AVS/besc', content_id, query={
+                'action': 'GetAggregatedContentDetails',
+                'appVersion': '5.0.40',
+                'channel': 'PCTV',
+                'contentId': content_id,
+            })['contentInfo'][0]
+
+
+class HotStarIE(HotStarBaseIE):
     _VALID_URL = r'https?://(?:www\.)?hotstar\.com/(?:.+?[/-])?(?P<id>\d{10})'
     _TESTS = [{
         'url': 'http://www.hotstar.com/on-air-with-aib--english-1000076273',
         'info_dict': {
             'id': '1000076273',
             'ext': 'mp4',
-            'title': 'On Air With AIB - English',
+            'title': 'On Air With AIB',
             'description': 'md5:c957d8868e9bc793ccb813691cc4c434',
             'timestamp': 1447227000,
             'upload_date': '20151111',
@@ -34,23 +59,11 @@ class HotStarIE(InfoExtractor):
         'only_matching': True,
     }]
 
-    def _download_json(self, url_or_request, video_id, note='Downloading JSON metadata', fatal=True, query=None):
-        json_data = super(HotStarIE, self)._download_json(
-            url_or_request, video_id, note, fatal=fatal, query=query)
-        if json_data['resultCode'] != 'OK':
-            if fatal:
-                raise ExtractorError(json_data['errorDescription'])
-            return None
-        return json_data['resultObj']
-
     def _real_extract(self, url):
         video_id = self._match_id(url)
-        video_data = self._download_json(
-            'http://account.hotstar.com/AVS/besc', video_id, query={
-                'action': 'GetAggregatedContentDetails',
-                'channel': 'PCTV',
-                'contentId': video_id,
-            })['contentInfo'][0]
+
+        video_data = self._download_content_info(video_id)
+
         title = video_data['episodeTitle']
 
         if video_data.get('encrypted') == 'Y':
@@ -99,3 +112,51 @@ class HotStarIE(InfoExtractor):
             'episode_number': int_or_none(video_data.get('episodeNumber')),
             'series': video_data.get('contentTitle'),
         }
+
+
+class HotStarPlaylistIE(HotStarBaseIE):
+    IE_NAME = 'hotstar:playlist'
+    _VALID_URL = r'(?P<url>https?://(?:www\.)?hotstar\.com/tv/[^/]+/(?P<content_id>\d+))/(?P<type>[^/]+)/(?P<id>\d+)'
+    _TESTS = [{
+        'url': 'http://www.hotstar.com/tv/pratidaan/14982/episodes/14812/9993',
+        'info_dict': {
+            'id': '14812',
+        },
+        'playlist_mincount': 75,
+    }, {
+        'url': 'http://www.hotstar.com/tv/pratidaan/14982/popular-clips/9998/9998',
+        'only_matching': True,
+    }]
+    _ITEM_TYPES = {
+        'episodes': 'EPISODE',
+        'popular-clips': 'CLIPS',
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        base_url = mobj.group('url')
+        content_id = mobj.group('content_id')
+        playlist_type = mobj.group('type')
+
+        content_info = self._download_content_info(content_id)
+        playlist_id = compat_str(content_info['categoryId'])
+
+        collection = self._download_json(
+            'https://search.hotstar.com/AVS/besc', playlist_id, query={
+                'action': 'SearchContents',
+                'appVersion': '5.0.40',
+                'channel': 'PCTV',
+                'moreFilters': 'series:%s;' % playlist_id,
+                'query': '*',
+                'searchOrder': 'last_broadcast_date desc,year desc,title asc',
+                'type': self._ITEM_TYPES.get(playlist_type, 'EPISODE'),
+            })
+
+        entries = [
+            self.url_result(
+                '%s/_/%s' % (base_url, video['contentId']),
+                ie=HotStarIE.ie_key(), video_id=video['contentId'])
+            for video in collection['response']['docs']
+            if video.get('contentId')]
+
+        return self.playlist_result(entries, playlist_id)
index 4c32fbc2c27b9a78e0e6fdae73f7dafd865d90fc..f8c30052f32486f511622656bdadbcf91f80717c 100644 (file)
@@ -2,19 +2,18 @@ from __future__ import unicode_literals
 
 import re
 
-from .common import InfoExtractor
+from .gigya import GigyaBaseIE
+
 from ..compat import compat_str
 from ..utils import (
-    ExtractorError,
     int_or_none,
     parse_duration,
     try_get,
     unified_timestamp,
-    urlencode_postdata,
 )
 
 
-class MedialaanIE(InfoExtractor):
+class MedialaanIE(GigyaBaseIE):
     _VALID_URL = r'''(?x)
                     https?://
                         (?:www\.|nieuws\.)?
@@ -119,15 +118,7 @@ class MedialaanIE(InfoExtractor):
             'password': password,
         }
 
-        auth_info = self._download_json(
-            'https://accounts.eu1.gigya.com/accounts.login', None,
-            note='Logging in', errnote='Unable to log in',
-            data=urlencode_postdata(auth_data))
-
-        error_message = auth_info.get('errorDetails') or auth_info.get('errorMessage')
-        if error_message:
-            raise ExtractorError(
-                'Unable to login: %s' % error_message, expected=True)
+        auth_info = self._gigya_login(auth_data)
 
         self._uid = auth_info['UID']
         self._uid_signature = auth_info['UIDSignature']
index 60e3caf0dc57614f59cbe434d4b1d83e12e1dd96..5bafa6cf443f1df8f5c7c2d87921533ef399b822 100644 (file)
@@ -18,7 +18,7 @@ class MegaphoneIE(InfoExtractor):
             'id': 'GLT9749789991',
             'ext': 'mp3',
             'title': '#97 What Kind Of Idiot Gets Phished?',
-            'thumbnail': 're:^https://.*\.png.*$',
+            'thumbnail': r're:^https://.*\.png.*$',
             'duration': 1776.26375,
             'author': 'Reply All',
         },
diff --git a/youtube_dl/extractor/myvideo.py b/youtube_dl/extractor/myvideo.py
deleted file mode 100644 (file)
index 367e811..0000000
+++ /dev/null
@@ -1,177 +0,0 @@
-from __future__ import unicode_literals
-
-import binascii
-import base64
-import hashlib
-import re
-import json
-
-from .common import InfoExtractor
-from ..compat import (
-    compat_ord,
-    compat_urllib_parse_unquote,
-    compat_urllib_parse_urlencode,
-)
-from ..utils import (
-    ExtractorError,
-    sanitized_Request,
-)
-
-
-class MyVideoIE(InfoExtractor):
-    _WORKING = False
-    _VALID_URL = r'https?://(?:www\.)?myvideo\.de/(?:[^/]+/)?watch/(?P<id>[0-9]+)/[^?/]+.*'
-    IE_NAME = 'myvideo'
-    _TEST = {
-        'url': 'http://www.myvideo.de/watch/8229274/bowling_fail_or_win',
-        'md5': '2d2753e8130479ba2cb7e0a37002053e',
-        'info_dict': {
-            'id': '8229274',
-            'ext': 'flv',
-            'title': 'bowling-fail-or-win',
-        }
-    }
-
-    # Original Code from: https://github.com/dersphere/plugin.video.myvideo_de.git
-    # Released into the Public Domain by Tristan Fischer on 2013-05-19
-    # https://github.com/rg3/youtube-dl/pull/842
-    def __rc4crypt(self, data, key):
-        x = 0
-        box = list(range(256))
-        for i in list(range(256)):
-            x = (x + box[i] + compat_ord(key[i % len(key)])) % 256
-            box[i], box[x] = box[x], box[i]
-        x = 0
-        y = 0
-        out = ''
-        for char in data:
-            x = (x + 1) % 256
-            y = (y + box[x]) % 256
-            box[x], box[y] = box[y], box[x]
-            out += chr(compat_ord(char) ^ box[(box[x] + box[y]) % 256])
-        return out
-
-    def __md5(self, s):
-        return hashlib.md5(s).hexdigest().encode()
-
-    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-
-        GK = (
-            b'WXpnME1EZGhNRGhpTTJNM01XVmhOREU0WldNNVpHTTJOakpt'
-            b'TW1FMU5tVTBNR05pWkRaa05XRXhNVFJoWVRVd1ptSXhaVEV3'
-            b'TnpsbA0KTVRkbU1tSTRNdz09'
-        )
-
-        # Get video webpage
-        webpage_url = 'http://www.myvideo.de/watch/%s' % video_id
-        webpage = self._download_webpage(webpage_url, video_id)
-
-        mobj = re.search('source src=\'(.+?)[.]([^.]+)\'', webpage)
-        if mobj is not None:
-            self.report_extraction(video_id)
-            video_url = mobj.group(1) + '.flv'
-
-            video_title = self._html_search_regex('<title>([^<]+)</title>',
-                                                  webpage, 'title')
-
-            return {
-                'id': video_id,
-                'url': video_url,
-                'title': video_title,
-            }
-
-        mobj = re.search(r'data-video-service="/service/data/video/%s/config' % video_id, webpage)
-        if mobj is not None:
-            request = sanitized_Request('http://www.myvideo.de/service/data/video/%s/config' % video_id, '')
-            response = self._download_webpage(request, video_id,
-                                              'Downloading video info')
-            info = json.loads(base64.b64decode(response).decode('utf-8'))
-            return {
-                'id': video_id,
-                'title': info['title'],
-                'url': info['streaming_url'].replace('rtmpe', 'rtmpt'),
-                'play_path': info['filename'],
-                'ext': 'flv',
-                'thumbnail': info['thumbnail'][0]['url'],
-            }
-
-        # try encxml
-        mobj = re.search('var flashvars={(.+?)}', webpage)
-        if mobj is None:
-            raise ExtractorError('Unable to extract video')
-
-        params = {}
-        encxml = ''
-        sec = mobj.group(1)
-        for (a, b) in re.findall('(.+?):\'(.+?)\',?', sec):
-            if not a == '_encxml':
-                params[a] = b
-            else:
-                encxml = compat_urllib_parse_unquote(b)
-        if not params.get('domain'):
-            params['domain'] = 'www.myvideo.de'
-        xmldata_url = '%s?%s' % (encxml, compat_urllib_parse_urlencode(params))
-        if 'flash_playertype=MTV' in xmldata_url:
-            self._downloader.report_warning('avoiding MTV player')
-            xmldata_url = (
-                'http://www.myvideo.de/dynamic/get_player_video_xml.php'
-                '?flash_playertype=D&ID=%s&_countlimit=4&autorun=yes'
-            ) % video_id
-
-        # get enc data
-        enc_data = self._download_webpage(xmldata_url, video_id).split('=')[1]
-        enc_data_b = binascii.unhexlify(enc_data)
-        sk = self.__md5(
-            base64.b64decode(base64.b64decode(GK)) +
-            self.__md5(
-                str(video_id).encode('utf-8')
-            )
-        )
-        dec_data = self.__rc4crypt(enc_data_b, sk)
-
-        # extracting infos
-        self.report_extraction(video_id)
-
-        video_url = None
-        mobj = re.search('connectionurl=\'(.*?)\'', dec_data)
-        if mobj:
-            video_url = compat_urllib_parse_unquote(mobj.group(1))
-            if 'myvideo2flash' in video_url:
-                self.report_warning(
-                    'Rewriting URL to use unencrypted rtmp:// ...',
-                    video_id)
-                video_url = video_url.replace('rtmpe://', 'rtmp://')
-
-        if not video_url:
-            # extract non rtmp videos
-            mobj = re.search('path=\'(http.*?)\' source=\'(.*?)\'', dec_data)
-            if mobj is None:
-                raise ExtractorError('unable to extract url')
-            video_url = compat_urllib_parse_unquote(mobj.group(1)) + compat_urllib_parse_unquote(mobj.group(2))
-
-        video_file = self._search_regex('source=\'(.*?)\'', dec_data, 'video file')
-        video_file = compat_urllib_parse_unquote(video_file)
-
-        if not video_file.endswith('f4m'):
-            ppath, prefix = video_file.split('.')
-            video_playpath = '%s:%s' % (prefix, ppath)
-        else:
-            video_playpath = ''
-
-        video_swfobj = self._search_regex(r'swfobject\.embedSWF\(\'(.+?)\'', webpage, 'swfobj')
-        video_swfobj = compat_urllib_parse_unquote(video_swfobj)
-
-        video_title = self._html_search_regex("<h1(?: class='globalHd')?>(.*?)</h1>",
-                                              webpage, 'title')
-
-        return {
-            'id': video_id,
-            'url': video_url,
-            'tc_url': video_url,
-            'title': video_title,
-            'ext': 'flv',
-            'play_path': video_playpath,
-            'player_url': video_swfobj,
-        }
index 35151f5274d2452be7cb508ec76e36db8fab9d9e..554dec36e62dc246ea314ac07f9cff6b3c1323fe 100644 (file)
@@ -15,7 +15,7 @@ from ..utils import (
 
 
 class NBCIE(AdobePassIE):
-    _VALID_URL = r'https?(?P<permalink>://(?:www\.)?nbc\.com/[^/]+/video/[^/]+/(?P<id>n?\d+))'
+    _VALID_URL = r'https?(?P<permalink>://(?:www\.)?nbc\.com/(?:classic-tv/)?[^/]+/video/[^/]+/(?P<id>n?\d+))'
 
     _TESTS = [
         {
@@ -67,7 +67,11 @@ class NBCIE(AdobePassIE):
                 'skip_download': True,
             },
             'skip': 'Only works from US',
-        }
+        },
+        {
+            'url': 'https://www.nbc.com/classic-tv/charles-in-charge/video/charles-in-charge-pilot/n3310',
+            'only_matching': True,
+        },
     ]
 
     def _real_extract(self, url):
index 255f608783edad0aa3838de028dd8ea07d9ae1b0..ddec89f2c3091c822a61d2a6790b9fc877c15829 100644 (file)
+# coding: utf-8
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
+from ..compat import (
+    compat_urllib_parse_unquote_plus
+)
 from ..utils import (
-    int_or_none,
+    parse_duration,
     remove_end,
     unified_strdate,
+    urljoin
 )
 
 
 class NDTVIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?ndtv\.com/video/(?:[^/]+/)+[^/?^&]+-(?P<id>\d+)'
-
-    _TEST = {
-        'url': 'http://www.ndtv.com/video/news/news/ndtv-exclusive-don-t-need-character-certificate-from-rahul-gandhi-says-arvind-kejriwal-300710',
-        'md5': '39f992dbe5fb531c395d8bbedb1e5e88',
-        'info_dict': {
-            'id': '300710',
-            'ext': 'mp4',
-            'title': "NDTV exclusive: Don't need character certificate from Rahul Gandhi, says Arvind Kejriwal",
-            'description': 'md5:ab2d4b4a6056c5cb4caa6d729deabf02',
-            'upload_date': '20131208',
-            'duration': 1327,
-            'thumbnail': r're:https?://.*\.jpg',
-        },
-    }
+    _VALID_URL = r'https?://(?:[^/]+\.)?ndtv\.com/(?:[^/]+/)*videos?/?(?:[^/]+/)*[^/?^&]+-(?P<id>\d+)'
+
+    _TESTS = [
+        {
+            'url': 'https://khabar.ndtv.com/video/show/prime-time/prime-time-ill-system-and-poor-education-468818',
+            'md5': '78efcf3880ef3fd9b83d405ca94a38eb',
+            'info_dict': {
+                'id': '468818',
+                'ext': 'mp4',
+                'title': "प्राइम टाइम: सिस्टम बीमार, स्कूल बदहाल",
+                'description': 'md5:f410512f1b49672e5695dea16ef2731d',
+                'upload_date': '20170928',
+                'duration': 2218,
+                'thumbnail': r're:https?://.*\.jpg',
+            }
+        },
+        {
+            # __filename is url
+            'url': 'http://movies.ndtv.com/videos/cracker-free-diwali-wishes-from-karan-johar-kriti-sanon-other-stars-470304',
+            'md5': 'f1d709352305b44443515ac56b45aa46',
+            'info_dict': {
+                'id': '470304',
+                'ext': 'mp4',
+                'title': "Cracker-Free Diwali Wishes From Karan Johar, Kriti Sanon & Other Stars",
+                'description': 'md5:f115bba1adf2f6433fa7c1ade5feb465',
+                'upload_date': '20171019',
+                'duration': 137,
+                'thumbnail': r're:https?://.*\.jpg',
+            }
+        },
+        {
+            'url': 'https://www.ndtv.com/video/news/news/delhi-s-air-quality-status-report-after-diwali-is-very-poor-470372',
+            'only_matching': True
+        },
+        {
+            'url': 'https://auto.ndtv.com/videos/the-cnb-daily-october-13-2017-469935',
+            'only_matching': True
+        },
+        {
+            'url': 'https://sports.ndtv.com/cricket/videos/2nd-t20i-rock-thrown-at-australia-cricket-team-bus-after-win-over-india-469764',
+            'only_matching': True
+        },
+        {
+            'url': 'http://gadgets.ndtv.com/videos/uncharted-the-lost-legacy-review-465568',
+            'only_matching': True
+        },
+        {
+            'url': 'http://profit.ndtv.com/videos/news/video-indian-economy-on-very-solid-track-international-monetary-fund-chief-470040',
+            'only_matching': True
+        },
+        {
+            'url': 'http://food.ndtv.com/video-basil-seeds-coconut-porridge-419083',
+            'only_matching': True
+        },
+        {
+            'url': 'https://doctor.ndtv.com/videos/top-health-stories-of-the-week-467396',
+            'only_matching': True
+        },
+        {
+            'url': 'https://swirlster.ndtv.com/video/how-to-make-friends-at-work-469324',
+            'only_matching': True
+        }
+    ]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
 
-        title = remove_end(self._og_search_title(webpage), ' - NDTV')
+        # '__title' does not contain extra words such as sub-site name, "Video" etc.
+        title = compat_urllib_parse_unquote_plus(
+            self._search_regex(r"__title\s*=\s*'([^']+)'", webpage, 'title', default=None) or
+            self._og_search_title(webpage))
 
         filename = self._search_regex(
-            r"__filename='([^']+)'", webpage, 'video filename')
-        video_url = 'http://bitcast-b.bitgravity.com/ndtvod/23372/ndtv/%s' % filename
+            r"(?:__)?filename\s*[:=]\s*'([^']+)'", webpage, 'video filename')
+        # in "movies" sub-site pages, filename is URL
+        video_url = urljoin('https://ndtvod.bc-ssl.cdn.bitgravity.com/23372/ndtv/', filename.lstrip('/'))
 
-        duration = int_or_none(self._search_regex(
-            r"__duration='([^']+)'", webpage, 'duration', fatal=False))
+        # "doctor" sub-site has MM:SS format
+        duration = parse_duration(self._search_regex(
+            r"(?:__)?duration\s*[:=]\s*'([^']+)'", webpage, 'duration', fatal=False))
 
+        # "sports", "doctor", "swirlster" sub-sites don't have 'publish-date'
         upload_date = unified_strdate(self._html_search_meta(
-            'publish-date', webpage, 'upload date', fatal=False))
+            'publish-date', webpage, 'upload date', default=None) or self._html_search_meta(
+            'uploadDate', webpage, 'upload date', default=None) or self._search_regex(
+            r'datePublished"\s*:\s*"([^"]+)"', webpage, 'upload date', fatal=False))
 
         description = remove_end(self._og_search_description(webpage), ' (Read more)')
 
index 510b1c41fd42dd3f77214fd804e9962a78e075af..310eea2cf054248260868515469962232b8562c2 100644 (file)
@@ -75,7 +75,7 @@ class NickIE(MTVServicesInfoExtractor):
 
 class NickDeIE(MTVServicesInfoExtractor):
     IE_NAME = 'nick.de'
-    _VALID_URL = r'https?://(?:www\.)?(?P<host>nick\.(?:de|com\.pl)|nickelodeon\.(?:nl|at))/[^/]+/(?:[^/]+/)*(?P<id>[^/?#&]+)'
+    _VALID_URL = r'https?://(?:www\.)?(?P<host>nick\.(?:de|com\.pl|ch)|nickelodeon\.(?:nl|be|at|dk|no|se))/[^/]+/(?:[^/]+/)*(?P<id>[^/?#&]+)'
     _TESTS = [{
         'url': 'http://www.nick.de/playlist/3773-top-videos/videos/episode/17306-zu-wasser-und-zu-land-rauchende-erdnusse',
         'only_matching': True,
@@ -91,6 +91,21 @@ class NickDeIE(MTVServicesInfoExtractor):
     }, {
         'url': 'http://www.nick.com.pl/seriale/474-spongebob-kanciastoporty/wideo/17412-teatr-to-jest-to-rodeo-oszolom',
         'only_matching': True,
+    }, {
+        'url': 'http://www.nickelodeon.no/program/2626-bulderhuset/videoer/90947-femteklasse-veronica-vs-vanzilla',
+        'only_matching': True,
+    }, {
+        'url': 'http://www.nickelodeon.dk/serier/2626-hojs-hus/videoer/761-tissepause',
+        'only_matching': True,
+    }, {
+        'url': 'http://www.nickelodeon.se/serier/2626-lugn-i-stormen/videos/998-',
+        'only_matching': True,
+    }, {
+        'url': 'http://www.nick.ch/shows/2304-adventure-time-abenteuerzeit-mit-finn-und-jake',
+        'only_matching': True,
+    }, {
+        'url': 'http://www.nickelodeon.be/afspeellijst/4530-top-videos/videos/episode/73917-inval-broodschapper-lariekoek-arie',
+        'only_matching': True,
     }]
 
     def _extract_mrss_url(self, webpage, host):
@@ -132,13 +147,28 @@ class NickNightIE(NickDeIE):
 
 class NickRuIE(MTVServicesInfoExtractor):
     IE_NAME = 'nickelodeonru'
-    _VALID_URL = r'https?://(?:www\.)nickelodeon\.ru/(?:playlist|shows|videos)/(?:[^/]+/)*(?P<id>[^/?#&]+)'
+    _VALID_URL = r'https?://(?:www\.)nickelodeon\.(?:ru|fr|es|pt|ro|hu)/[^/]+/(?:[^/]+/)*(?P<id>[^/?#&]+)'
     _TESTS = [{
         'url': 'http://www.nickelodeon.ru/shows/henrydanger/videos/episodes/3-sezon-15-seriya-licenziya-na-polyot/pmomfb#playlist/7airc6',
         'only_matching': True,
     }, {
         'url': 'http://www.nickelodeon.ru/videos/smotri-na-nickelodeon-v-iyule/g9hvh7',
         'only_matching': True,
+    }, {
+        'url': 'http://www.nickelodeon.fr/programmes/bob-l-eponge/videos/le-marathon-de-booh-kini-bottom-mardi-31-octobre/nfn7z0',
+        'only_matching': True,
+    }, {
+        'url': 'http://www.nickelodeon.es/videos/nickelodeon-consejos-tortitas/f7w7xy',
+        'only_matching': True,
+    }, {
+        'url': 'http://www.nickelodeon.pt/series/spongebob-squarepants/videos/a-bolha-de-tinta-gigante/xutq1b',
+        'only_matching': True,
+    }, {
+        'url': 'http://www.nickelodeon.ro/emisiuni/shimmer-si-shine/video/nahal-din-bomboane/uw5u2k',
+        'only_matching': True,
+    }, {
+        'url': 'http://www.nickelodeon.hu/musorok/spongyabob-kockanadrag/videok/episodes/buborekfujas-az-elszakadt-nadrag/q57iob#playlist/k6te4y',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
index 026329d3ea4210e2d17af374b67c4a87af252ee1..df7f528be2d4c8da7cfe826609608754f2f8e088 100644 (file)
@@ -40,7 +40,7 @@ class NiconicoIE(InfoExtractor):
             'uploader': 'takuya0301',
             'uploader_id': '2698420',
             'upload_date': '20131123',
-            'timestamp': 1385182762,
+            'timestamp': int,  # timestamp is unstable
             'description': '(c) copyright 2008, Blender Foundation / www.bigbuckbunny.org',
             'duration': 33,
             'view_count': int,
@@ -115,8 +115,8 @@ class NiconicoIE(InfoExtractor):
         'skip': 'Requires an account',
     }, {
         # "New" HTML5 video
+        # md5 is unstable
         'url': 'http://www.nicovideo.jp/watch/sm31464864',
-        'md5': '351647b4917660986dc0fa8864085135',
         'info_dict': {
             'id': 'sm31464864',
             'ext': 'mp4',
@@ -124,7 +124,7 @@ class NiconicoIE(InfoExtractor):
             'description': 'md5:e52974af9a96e739196b2c1ca72b5feb',
             'timestamp': 1498514060,
             'upload_date': '20170626',
-            'uploader': 'ゲス',
+            'uploader': 'ゲス',
             'uploader_id': '40826363',
             'thumbnail': r're:https?://.*',
             'duration': 198,
@@ -132,6 +132,25 @@ class NiconicoIE(InfoExtractor):
             'comment_count': int,
         },
         'skip': 'Requires an account',
+    }, {
+        # Video without owner
+        'url': 'http://www.nicovideo.jp/watch/sm18238488',
+        'md5': 'd265680a1f92bdcbbd2a507fc9e78a9e',
+        'info_dict': {
+            'id': 'sm18238488',
+            'ext': 'mp4',
+            'title': '【実写版】ミュータントタートルズ',
+            'description': 'md5:15df8988e47a86f9e978af2064bf6d8e',
+            'timestamp': 1341160408,
+            'upload_date': '20120701',
+            'uploader': None,
+            'uploader_id': None,
+            'thumbnail': r're:https?://.*',
+            'duration': 5271,
+            'view_count': int,
+            'comment_count': int,
+        },
+        'skip': 'Requires an account',
     }, {
         'url': 'http://sp.nicovideo.jp/watch/sm28964488?ss_pos=1&cp_in=wt_tg',
         'only_matching': True,
@@ -395,7 +414,9 @@ class NiconicoIE(InfoExtractor):
 
         webpage_url = get_video_info('watch_url') or url
 
-        owner = api_data.get('owner', {})
+        # Note: cannot use api_data.get('owner', {}) because owner may be set to "null"
+        # in the JSON, which will cause None to be returned instead of {}.
+        owner = try_get(api_data, lambda x: x.get('owner'), dict) or {}
         uploader_id = get_video_info(['ch_id', 'user_id']) or owner.get('id')
         uploader = get_video_info(['ch_name', 'user_nickname']) or owner.get('nickname')
 
index ebdab8db9faa0c8911c53c5764a18456926b6a55..bdd5ff565443d7353e64427b99e0edf60e75bd0c 100644 (file)
@@ -11,7 +11,7 @@ class ParliamentLiveUKIE(InfoExtractor):
     _TESTS = [{
         'url': 'http://parliamentlive.tv/Event/Index/c1e9d44d-fd6c-4263-b50f-97ed26cc998b',
         'info_dict': {
-            'id': 'c1e9d44d-fd6c-4263-b50f-97ed26cc998b',
+            'id': '1_af9nv9ym',
             'ext': 'mp4',
             'title': 'Home Affairs Committee',
             'uploader_id': 'FFMPEG-01',
@@ -28,14 +28,14 @@ class ParliamentLiveUKIE(InfoExtractor):
         webpage = self._download_webpage(
             'http://vodplayer.parliamentlive.tv/?mid=' + video_id, video_id)
         widget_config = self._parse_json(self._search_regex(
-            r'kWidgetConfig\s*=\s*({.+});',
+            r'(?s)kWidgetConfig\s*=\s*({.+});',
             webpage, 'kaltura widget config'), video_id)
-        kaltura_url = 'kaltura:%s:%s' % (widget_config['wid'][1:], widget_config['entry_id'])
+        kaltura_url = 'kaltura:%s:%s' % (
+            widget_config['wid'][1:], widget_config['entry_id'])
         event_title = self._download_json(
             'http://parliamentlive.tv/Event/GetShareVideo/' + video_id, video_id)['event']['title']
         return {
             '_type': 'url_transparent',
-            'id': video_id,
             'title': event_title,
             'description': '',
             'url': kaltura_url,
index 8889e4a1aaa3e41f49a63b53c010cf69d0842b1b..b51dcbe10dd14136220516c76e6cada7f70a9b0c 100644 (file)
@@ -187,7 +187,7 @@ class PBSIE(InfoExtractor):
     _VALID_URL = r'''(?x)https?://
         (?:
            # Direct video URL
-           (?:%s)/(?:viralplayer|video)/(?P<id>[0-9]+)/? |
+           (?:%s)/(?:(?:vir|port)alplayer|video)/(?P<id>[0-9]+)(?:[?/]|$) |
            # Article with embedded player (or direct video)
            (?:www\.)?pbs\.org/(?:[^/]+/){1,5}(?P<presumptive_id>[^/]+?)(?:\.html)?/?(?:$|[?\#]) |
            # Player
@@ -367,6 +367,10 @@ class PBSIE(InfoExtractor):
         {
             'url': 'http://watch.knpb.org/video/2365616055/',
             'only_matching': True,
+        },
+        {
+            'url': 'https://player.pbs.org/portalplayer/3004638221/?uid=',
+            'only_matching': True,
         }
     ]
     _ERRORS = {
diff --git a/youtube_dl/extractor/servus.py b/youtube_dl/extractor/servus.py
new file mode 100644 (file)
index 0000000..264e1dd
--- /dev/null
@@ -0,0 +1,43 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class ServusIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?servus\.com/(?:at|de)/p/[^/]+/(?P<id>AA-\w+|\d+-\d+)'
+    _TESTS = [{
+        'url': 'https://www.servus.com/de/p/Die-Gr%C3%BCnen-aus-Sicht-des-Volkes/AA-1T6VBU5PW1W12/',
+        'md5': '046dee641cda1c4cabe13baef3be2c1c',
+        'info_dict': {
+            'id': 'AA-1T6VBU5PW1W12',
+            'ext': 'mp4',
+            'title': 'Die Grünen aus Volkssicht',
+            'description': 'md5:052b5da1cb2cd7d562ef1f19be5a5cba',
+            'thumbnail': r're:^https?://.*\.jpg$',
+        }
+    }, {
+        'url': 'https://www.servus.com/at/p/Wie-das-Leben-beginnt/1309984137314-381415152/',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+
+        title = self._og_search_title(webpage)
+        description = self._og_search_description(webpage)
+        thumbnail = self._og_search_thumbnail(webpage)
+
+        formats = self._extract_m3u8_formats(
+            'https://stv.rbmbtnx.net/api/v1/manifests/%s.m3u8' % video_id,
+            video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls')
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'formats': formats,
+        }
index 4ca9f6b3c811f59ef11eb82d173554341f3ab66d..efcbb36a9eb5e6e2cf5884ccea3aa9c505fa8cb3 100644 (file)
@@ -2,7 +2,12 @@
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
-from ..utils import strip_or_none
+from ..utils import (
+    extract_attributes,
+    smuggle_url,
+    strip_or_none,
+    urljoin,
+)
 
 
 class SkySportsIE(InfoExtractor):
@@ -22,12 +27,22 @@ class SkySportsIE(InfoExtractor):
     def _real_extract(self, url):
         video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
+        video_data = extract_attributes(self._search_regex(
+            r'(<div.+?class="sdc-article-video__media-ooyala"[^>]+>)', webpage, 'video data'))
+
+        video_url = 'ooyala:%s' % video_data['data-video-id']
+        if video_data.get('data-token-required') == 'true':
+            token_fetch_options = self._parse_json(video_data.get('data-token-fetch-options', '{}'), video_id, fatal=False) or {}
+            token_fetch_url = token_fetch_options.get('url')
+            if token_fetch_url:
+                embed_token = self._download_webpage(urljoin(url, token_fetch_url), video_id, fatal=False)
+                if embed_token:
+                    video_url = smuggle_url(video_url, {'embed_token': embed_token.strip('"')})
 
         return {
             '_type': 'url_transparent',
             'id': video_id,
-            'url': 'ooyala:%s' % self._search_regex(
-                r'data-video-id="([^"]+)"', webpage, 'ooyala id'),
+            'url': video_url,
             'title': self._og_search_title(webpage),
             'description': strip_or_none(self._og_search_description(webpage)),
             'ie_key': 'Ooyala',
index 1c6799d579523806e9912912a0f50aa187c8773b..8894f4b0c32ee58894be125dbbd62d02ca6ae8fe 100644 (file)
@@ -138,7 +138,7 @@ class SoundcloudIE(InfoExtractor):
         },
     ]
 
-    _CLIENT_ID = 'JlZIsxg2hY5WnBgtn3jfS0UYCl0K8DOg'
+    _CLIENT_ID = 'c6CU49JDMapyrQo06UxU9xouB9ZVzqCn'
     _IPHONE_CLIENT_ID = '376f225bf427445fc4bfb6b99b72e0bf'
 
     @staticmethod
index e004e2c5ab12705c8d9ff5e12b25f53579539c72..3d78a9d76cce8aa28902111fe3f9018dafbb856a 100644 (file)
@@ -8,36 +8,49 @@ from .common import InfoExtractor
 
 class SoundgasmIE(InfoExtractor):
     IE_NAME = 'soundgasm'
-    _VALID_URL = r'https?://(?:www\.)?soundgasm\.net/u/(?P<user>[0-9a-zA-Z_\-]+)/(?P<title>[0-9a-zA-Z_\-]+)'
+    _VALID_URL = r'https?://(?:www\.)?soundgasm\.net/u/(?P<user>[0-9a-zA-Z_-]+)/(?P<display_id>[0-9a-zA-Z_-]+)'
     _TEST = {
         'url': 'http://soundgasm.net/u/ytdl/Piano-sample',
         'md5': '010082a2c802c5275bb00030743e75ad',
         'info_dict': {
             'id': '88abd86ea000cafe98f96321b23cc1206cbcbcc9',
             'ext': 'm4a',
-            'title': 'ytdl_Piano-sample',
-            'description': 'Royalty Free Sample Music'
+            'title': 'Piano sample',
+            'description': 'Royalty Free Sample Music',
+            'uploader': 'ytdl',
         }
     }
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
-        display_id = mobj.group('title')
-        audio_title = mobj.group('user') + '_' + mobj.group('title')
+        display_id = mobj.group('display_id')
+
         webpage = self._download_webpage(url, display_id)
+
         audio_url = self._html_search_regex(
-            r'(?s)m4a\:\s"([^"]+)"', webpage, 'audio URL')
-        audio_id = re.split(r'\/|\.', audio_url)[-2]
+            r'(?s)m4a\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
+            'audio URL', group='url')
+
+        title = self._search_regex(
+            r'<div[^>]+\bclass=["\']jp-title[^>]+>([^<]+)',
+            webpage, 'title', default=display_id)
+
         description = self._html_search_regex(
-            r'(?s)<li>Description:\s(.*?)<\/li>', webpage, 'description',
-            fatal=False)
+            (r'(?s)<div[^>]+\bclass=["\']jp-description[^>]+>(.+?)</div>',
+             r'(?s)<li>Description:\s(.*?)<\/li>'),
+            webpage, 'description', fatal=False)
+
+        audio_id = self._search_regex(
+            r'/([^/]+)\.m4a', audio_url, 'audio id', default=display_id)
 
         return {
             'id': audio_id,
             'display_id': display_id,
             'url': audio_url,
-            'title': audio_title,
-            'description': description
+            'vcodec': 'none',
+            'title': title,
+            'description': description,
+            'uploader': mobj.group('user'),
         }
 
 
index 3394c7e6ba4713ad0c63e2579d611a0319c45add..2863e53b5a47be353ae18df446c3ffd8a95d0913 100644 (file)
@@ -3,6 +3,7 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
+from ..utils import ExtractorError
 
 
 class SpankBangIE(InfoExtractor):
@@ -33,6 +34,10 @@ class SpankBangIE(InfoExtractor):
         video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
 
+        if re.search(r'<[^>]+\bid=["\']video_removed', webpage):
+            raise ExtractorError(
+                'Video %s is not available' % video_id, expected=True)
+
         stream_key = self._html_search_regex(
             r'''var\s+stream_key\s*=\s*['"](.+?)['"]''',
             webpage, 'stream key')
diff --git a/youtube_dl/extractor/twentytwotracks.py b/youtube_dl/extractor/twentytwotracks.py
deleted file mode 100644 (file)
index d6c0ab1..0000000
+++ /dev/null
@@ -1,86 +0,0 @@
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..utils import int_or_none
-
-# 22Tracks regularly replace the audio tracks that can be streamed on their
-# site. The tracks usually expire after 1 months, so we can't add tests.
-
-
-class TwentyTwoTracksIE(InfoExtractor):
-    _VALID_URL = r'https?://22tracks\.com/(?P<city>[a-z]+)/(?P<genre>[\da-z]+)/(?P<id>\d+)'
-    IE_NAME = '22tracks:track'
-
-    _API_BASE = 'http://22tracks.com/api'
-
-    def _extract_info(self, city, genre_name, track_id=None):
-        item_id = track_id if track_id else genre_name
-
-        cities = self._download_json(
-            '%s/cities' % self._API_BASE, item_id,
-            'Downloading cities info',
-            'Unable to download cities info')
-        city_id = [x['id'] for x in cities if x['slug'] == city][0]
-
-        genres = self._download_json(
-            '%s/genres/%s' % (self._API_BASE, city_id), item_id,
-            'Downloading %s genres info' % city,
-            'Unable to download %s genres info' % city)
-        genre = [x for x in genres if x['slug'] == genre_name][0]
-        genre_id = genre['id']
-
-        tracks = self._download_json(
-            '%s/tracks/%s' % (self._API_BASE, genre_id), item_id,
-            'Downloading %s genre tracks info' % genre_name,
-            'Unable to download track info')
-
-        return [x for x in tracks if x['id'] == item_id][0] if track_id else [genre['title'], tracks]
-
-    def _get_track_url(self, filename, track_id):
-        token = self._download_json(
-            'http://22tracks.com/token.php?desktop=true&u=/128/%s' % filename,
-            track_id, 'Downloading token', 'Unable to download token')
-        return 'http://audio.22tracks.com%s?st=%s&e=%d' % (token['filename'], token['st'], token['e'])
-
-    def _extract_track_info(self, track_info, track_id):
-        download_url = self._get_track_url(track_info['filename'], track_id)
-        title = '%s - %s' % (track_info['artist'].strip(), track_info['title'].strip())
-        return {
-            'id': track_id,
-            'url': download_url,
-            'ext': 'mp3',
-            'title': title,
-            'duration': int_or_none(track_info.get('duration')),
-            'timestamp': int_or_none(track_info.get('published_at') or track_info.get('created'))
-        }
-
-    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-
-        city = mobj.group('city')
-        genre = mobj.group('genre')
-        track_id = mobj.group('id')
-
-        track_info = self._extract_info(city, genre, track_id)
-        return self._extract_track_info(track_info, track_id)
-
-
-class TwentyTwoTracksGenreIE(TwentyTwoTracksIE):
-    _VALID_URL = r'https?://22tracks\.com/(?P<city>[a-z]+)/(?P<genre>[\da-z]+)/?$'
-    IE_NAME = '22tracks:genre'
-
-    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-
-        city = mobj.group('city')
-        genre = mobj.group('genre')
-
-        genre_title, tracks = self._extract_info(city, genre)
-
-        entries = [
-            self._extract_track_info(track_info, track_info['id'])
-            for track_info in tracks]
-
-        return self.playlist_result(entries, genre, genre_title)
index c926c99a999bb88388c77d859e54c8973417b9db..fefcd28078f6c69058366fbb7b8f9e616508fbb6 100644 (file)
@@ -609,7 +609,7 @@ class TwitchClipsIE(InfoExtractor):
                 r'(?s)clipInfo\s*=\s*({.+?});', webpage, 'clip info'),
             video_id, transform_source=js_to_json)
 
-        title = clip.get('channel_title') or self._og_search_title(webpage)
+        title = clip.get('title') or clip.get('channel_title') or self._og_search_title(webpage)
 
         formats = [{
             'url': option['source'],
diff --git a/youtube_dl/extractor/unity.py b/youtube_dl/extractor/unity.py
new file mode 100644 (file)
index 0000000..73daacf
--- /dev/null
@@ -0,0 +1,32 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from .youtube import YoutubeIE
+
+
+class UnityIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?unity3d\.com/learn/tutorials/(?:[^/]+/)*(?P<id>[^/?#&]+)'
+    _TESTS = [{
+        'url': 'https://unity3d.com/learn/tutorials/topics/animation/animate-anything-mecanim',
+        'info_dict': {
+            'id': 'jWuNtik0C8E',
+            'ext': 'mp4',
+            'title': 'Live Training 22nd September 2014 -  Animate Anything',
+            'description': 'md5:e54913114bd45a554c56cdde7669636e',
+            'duration': 2893,
+            'uploader': 'Unity',
+            'uploader_id': 'Unity3D',
+            'upload_date': '20140926',
+        }
+    }, {
+        'url': 'https://unity3d.com/learn/tutorials/projects/2d-ufo-tutorial/following-player-camera?playlist=25844',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+        youtube_id = self._search_regex(
+            r'data-video-id="([_0-9a-zA-Z-]+)"',
+            webpage, 'youtube ID')
+        return self.url_result(youtube_id, ie=YoutubeIE.ie_key(), video_id=video_id)
index c3f71b45e3bbee17bbf7e1d4787ac0aa888f251b..cedb548767e84a512b8ca5e0253d81f62a8ee502 100644 (file)
@@ -412,7 +412,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
         urls = []
         # Look for embedded (iframe) Vimeo player
         for mobj in re.finditer(
-                r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//player\.vimeo\.com/video/.+?)\1',
+                r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//player\.vimeo\.com/video/\d+.*?)\1',
                 webpage):
             urls.append(VimeoIE._smuggle_referrer(unescapeHTML(mobj.group('url')), url))
         PLAIN_EMBED_RE = (
diff --git a/youtube_dl/extractor/younow.py b/youtube_dl/extractor/younow.py
new file mode 100644 (file)
index 0000000..04dbc87
--- /dev/null
@@ -0,0 +1,202 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import itertools
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+    ExtractorError,
+    int_or_none,
+    try_get,
+)
+
+CDN_API_BASE = 'https://cdn.younow.com/php/api'
+MOMENT_URL_FORMAT = '%s/moment/fetch/id=%%s' % CDN_API_BASE
+
+
+class YouNowLiveIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?younow\.com/(?P<id>[^/?#&]+)'
+    _TEST = {
+        'url': 'https://www.younow.com/AmandaPadeezy',
+        'info_dict': {
+            'id': 'AmandaPadeezy',
+            'ext': 'mp4',
+            'is_live': True,
+            'title': 'March 26, 2017',
+            'thumbnail': r're:^https?://.*\.jpg$',
+            'tags': ['girls'],
+            'categories': ['girls'],
+            'uploader': 'AmandaPadeezy',
+            'uploader_id': '6716501',
+            'uploader_url': 'https://www.younow.com/AmandaPadeezy',
+            'creator': 'AmandaPadeezy',
+        },
+        'skip': True,
+    }
+
+    @classmethod
+    def suitable(cls, url):
+        return (False
+                if YouNowChannelIE.suitable(url) or YouNowMomentIE.suitable(url)
+                else super(YouNowLiveIE, cls).suitable(url))
+
+    def _real_extract(self, url):
+        username = self._match_id(url)
+
+        data = self._download_json(
+            'https://api.younow.com/php/api/broadcast/info/curId=0/user=%s'
+            % username, username)
+
+        if data.get('errorCode') != 0:
+            raise ExtractorError(data['errorMsg'], expected=True)
+
+        uploader = try_get(
+            data, lambda x: x['user']['profileUrlString'],
+            compat_str) or username
+
+        return {
+            'id': uploader,
+            'is_live': True,
+            'title': self._live_title(uploader),
+            'thumbnail': data.get('awsUrl'),
+            'tags': data.get('tags'),
+            'categories': data.get('tags'),
+            'uploader': uploader,
+            'uploader_id': data.get('userId'),
+            'uploader_url': 'https://www.younow.com/%s' % username,
+            'creator': uploader,
+            'view_count': int_or_none(data.get('viewers')),
+            'like_count': int_or_none(data.get('likes')),
+            'formats': [{
+                'url': '%s/broadcast/videoPath/hls=1/broadcastId=%s/channelId=%s'
+                       % (CDN_API_BASE, data['broadcastId'], data['userId']),
+                'ext': 'mp4',
+                'protocol': 'm3u8',
+            }],
+        }
+
+
+def _extract_moment(item, fatal=True):
+    moment_id = item.get('momentId')
+    if not moment_id:
+        if not fatal:
+            return
+        raise ExtractorError('Unable to extract moment id')
+
+    moment_id = compat_str(moment_id)
+
+    title = item.get('text')
+    if not title:
+        title = 'YouNow %s' % (
+            item.get('momentType') or item.get('titleType') or 'moment')
+
+    uploader = try_get(item, lambda x: x['owner']['name'], compat_str)
+    uploader_id = try_get(item, lambda x: x['owner']['userId'])
+    uploader_url = 'https://www.younow.com/%s' % uploader if uploader else None
+
+    entry = {
+        'extractor_key': 'YouNowMoment',
+        'id': moment_id,
+        'title': title,
+        'view_count': int_or_none(item.get('views')),
+        'like_count': int_or_none(item.get('likes')),
+        'timestamp': int_or_none(item.get('created')),
+        'creator': uploader,
+        'uploader': uploader,
+        'uploader_id': uploader_id,
+        'uploader_url': uploader_url,
+        'formats': [{
+            'url': 'https://hls.younow.com/momentsplaylists/live/%s/%s.m3u8'
+                   % (moment_id, moment_id),
+            'ext': 'mp4',
+            'protocol': 'm3u8_native',
+        }],
+    }
+
+    return entry
+
+
+class YouNowChannelIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?younow\.com/(?P<id>[^/]+)/channel'
+    _TEST = {
+        'url': 'https://www.younow.com/its_Kateee_/channel',
+        'info_dict': {
+            'id': '14629760',
+            'title': 'its_Kateee_ moments'
+        },
+        'playlist_mincount': 8,
+    }
+
+    def _entries(self, username, channel_id):
+        created_before = 0
+        for page_num in itertools.count(1):
+            if created_before is None:
+                break
+            info = self._download_json(
+                '%s/moment/profile/channelId=%s/createdBefore=%d/records=20'
+                % (CDN_API_BASE, channel_id, created_before), username,
+                note='Downloading moments page %d' % page_num)
+            items = info.get('items')
+            if not items or not isinstance(items, list):
+                break
+            for item in items:
+                if not isinstance(item, dict):
+                    continue
+                item_type = item.get('type')
+                if item_type == 'moment':
+                    entry = _extract_moment(item, fatal=False)
+                    if entry:
+                        yield entry
+                elif item_type == 'collection':
+                    moments = item.get('momentsIds')
+                    if isinstance(moments, list):
+                        for moment_id in moments:
+                            m = self._download_json(
+                                MOMENT_URL_FORMAT % moment_id, username,
+                                note='Downloading %s moment JSON' % moment_id,
+                                fatal=False)
+                            if m and isinstance(m, dict) and m.get('item'):
+                                entry = _extract_moment(m['item'])
+                                if entry:
+                                    yield entry
+                created_before = int_or_none(item.get('created'))
+
+    def _real_extract(self, url):
+        username = self._match_id(url)
+        channel_id = compat_str(self._download_json(
+            'https://api.younow.com/php/api/broadcast/info/curId=0/user=%s'
+            % username, username, note='Downloading user information')['userId'])
+        return self.playlist_result(
+            self._entries(username, channel_id), channel_id,
+            '%s moments' % username)
+
+
+class YouNowMomentIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?younow\.com/[^/]+/(?P<id>[^/?#&]+)'
+    _TEST = {
+        'url': 'https://www.younow.com/GABO.../20712117/36319236/3b316doc/m',
+        'md5': 'a30c70eadb9fb39a1aa3c8c0d22a0807',
+        'info_dict': {
+            'id': '20712117',
+            'ext': 'mp4',
+            'title': 'YouNow capture',
+            'view_count': int,
+            'like_count': int,
+            'timestamp': 1490432040,
+            'upload_date': '20170325',
+            'uploader': 'GABO...',
+            'uploader_id': 35917228,
+        },
+    }
+
+    @classmethod
+    def suitable(cls, url):
+        return (False
+                if YouNowChannelIE.suitable(url)
+                else super(YouNowMomentIE, cls).suitable(url))
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        item = self._download_json(MOMENT_URL_FORMAT % video_id, video_id)
+        return _extract_moment(item['item'])
index 4e8db240d3f9d141cfa457c1d941d1b5399f0c67..9943dddc13b478ce23e71c43d19f5cf18694f90b 100644 (file)
@@ -1391,7 +1391,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             )
             (["\'])
                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
-                (?:embed|v|p)/.+?)
+                (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
             \1''', webpage)]
 
         # lazyYT YouTube embed
@@ -1622,6 +1622,17 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
         # description
         description_original = video_description = get_element_by_id("eow-description", video_webpage)
         if video_description:
+
+            def replace_url(m):
+                redir_url = compat_urlparse.urljoin(url, m.group(1))
+                parsed_redir_url = compat_urllib_parse_urlparse(redir_url)
+                if re.search(r'^(?:www\.)?(?:youtube(?:-nocookie)?\.com|youtu\.be)$', parsed_redir_url.netloc) and parsed_redir_url.path == '/redirect':
+                    qs = compat_parse_qs(parsed_redir_url.query)
+                    q = qs.get('q')
+                    if q and q[0]:
+                        return q[0]
+                return redir_url
+
             description_original = video_description = re.sub(r'''(?x)
                 <a\s+
                     (?:[a-zA-Z-]+="[^"]*"\s+)*?
@@ -1630,7 +1641,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                     class="[^"]*"[^>]*>
                 [^<]+\.{3}\s*
                 </a>
-            ''', lambda m: compat_urlparse.urljoin(url, m.group(1)), video_description)
+            ''', replace_url, video_description)
             video_description = clean_html(video_description)
         else:
             fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
index 59fb3343582e6dfa2aab2c6a8311300bd7458141..34866a54b6efc122f4d0edb22712c503fa448ec0 100644 (file)
@@ -1835,10 +1835,20 @@ def parse_duration(s):
         days, hours, mins, secs, ms = m.groups()
     else:
         m = re.match(
-            r'''(?ix)(?:P?T)?
+            r'''(?ix)(?:P?
+                (?:
+                    [0-9]+\s*y(?:ears?)?\s*
+                )?
+                (?:
+                    [0-9]+\s*m(?:onths?)?\s*
+                )?
+                (?:
+                    [0-9]+\s*w(?:eeks?)?\s*
+                )?
                 (?:
                     (?P<days>[0-9]+)\s*d(?:ays?)?\s*
                 )?
+                T)?
                 (?:
                     (?P<hours>[0-9]+)\s*h(?:ours?)?\s*
                 )?
index d01ba30950a375541b7ba32fd54dd4923f6bd98e..8b67d23fee344f6192b34c6ab218577538e5575b 100644 (file)
@@ -1,3 +1,3 @@
 from __future__ import unicode_literals
 
-__version__ = '2017.10.15.1'
+__version__ = '2017.11.06'