]> Raphaël G. Git Repositories - youtubedl/commitdiff
New upstream version 2017.11.06
authorRogério Brito <rbrito@ime.usp.br>
Wed, 8 Nov 2017 06:34:01 +0000 (04:34 -0200)
committerRogério Brito <rbrito@ime.usp.br>
Wed, 8 Nov 2017 06:34:01 +0000 (04:34 -0200)
46 files changed:
ChangeLog
README.md
README.txt
docs/supportedsites.md
test/test_InfoExtractor.py
test/test_utils.py
youtube-dl
youtube_dl/downloader/f4m.py
youtube_dl/downloader/fragment.py
youtube_dl/downloader/hls.py
youtube_dl/extractor/arte.py
youtube_dl/extractor/azmedien.py
youtube_dl/extractor/canvas.py
youtube_dl/extractor/common.py
youtube_dl/extractor/dctp.py
youtube_dl/extractor/dramafever.py
youtube_dl/extractor/drtv.py
youtube_dl/extractor/egghead.py
youtube_dl/extractor/eporner.py
youtube_dl/extractor/extractors.py
youtube_dl/extractor/fxnetworks.py
youtube_dl/extractor/gamespot.py
youtube_dl/extractor/gigya.py [new file with mode: 0644]
youtube_dl/extractor/hotstar.py
youtube_dl/extractor/medialaan.py
youtube_dl/extractor/megaphone.py
youtube_dl/extractor/myvideo.py [deleted file]
youtube_dl/extractor/nbc.py
youtube_dl/extractor/ndtv.py
youtube_dl/extractor/nick.py
youtube_dl/extractor/niconico.py
youtube_dl/extractor/parliamentliveuk.py
youtube_dl/extractor/pbs.py
youtube_dl/extractor/servus.py [new file with mode: 0644]
youtube_dl/extractor/skysports.py
youtube_dl/extractor/soundcloud.py
youtube_dl/extractor/soundgasm.py
youtube_dl/extractor/spankbang.py
youtube_dl/extractor/twentytwotracks.py [deleted file]
youtube_dl/extractor/twitch.py
youtube_dl/extractor/unity.py [new file with mode: 0644]
youtube_dl/extractor/vimeo.py
youtube_dl/extractor/younow.py [new file with mode: 0644]
youtube_dl/extractor/youtube.py
youtube_dl/utils.py
youtube_dl/version.py

index d728e4d0362356f39af55c2da18a035e6b3a5d38..8af3682745463d71f6bbf97057f840a3b01e00f0 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,64 @@
+version 2017.11.06
+
+Core
++ [extractor/common] Add protocol for f4m formats
+* [f4m] Prefer baseURL for relative URLs (#14660)
+* [extractor/common] Respect URL query in _extract_wowza_formats (14645)
+
+Extractors
++ [hotstar:playlist] Add support for playlists (#12465)
+* [hotstar] Bypass geo restriction (#14672)
+- [22tracks] Remove extractor (#11024, #14628)
++ [skysport] Sdd support ooyala videos protected with embed_token (#14641)
+* [gamespot] Extract formats referenced with new data fields (#14652)
+* [spankbang] Detect unavailable videos (#14644)
+
+
+version 2017.10.29
+
+Core
+* [extractor/common] Prefix format id for audio only HLS formats
++ [utils] Add support for zero years and months in parse_duration
+
+Extractors
+* [egghead] Fix extraction (#14388)
++ [fxnetworks] Extract series metadata (#14603)
++ [younow] Add support for younow.com (#9255, #9432, #12436)
+* [dctptv] Fix extraction (#14599)
+* [youtube] Restrict embed regex (#14600)
+* [vimeo] Restrict iframe embed regex (#14600)
+* [soundgasm] Improve extraction (#14588)
+- [myvideo] Remove extractor (#8557)
++ [nbc] Add support for classic-tv videos (#14575)
++ [vrtnu] Add support for cookies authentication and simplify (#11873)
++ [canvas] Add support for vrt.be/vrtnu (#11873)
+* [twitch:clips] Fix title extraction (#14566)
++ [ndtv] Add support for sub-sites (#14534)
+* [dramafever] Fix login error message extraction
++ [nick] Add support for more nickelodeon sites (no, dk, se, ch, fr, es, pt,
+  ro, hu) (#14553)
+
+
+version 2017.10.20
+
+Core
+* [downloader/fragment] Report warning instead of error on inconsistent
+  download state
+* [downloader/hls] Fix total fragments count when ad fragments exist
+
+Extractors
+* [parliamentliveuk] Fix extraction (#14524)
+* [soundcloud] Update client id (#14546)
++ [servus] Add support for servus.com (#14362)
++ [unity] Add support for unity3d.com (#14528)
+* [youtube] Replace youtube redirect URLs in description (#14517)
+* [pbs] Restrict direct video URL regular expression (#14519)
+* [drtv] Respect preference for direct HTTP formats (#14509)
++ [eporner] Add support for embed URLs (#14507)
+* [arte] Capture and output error message
+* [niconico] Improve uploader metadata extraction robustness (#14135)
+
+
 version 2017.10.15.1
 
 Core
 version 2017.10.15.1
 
 Core
@@ -834,7 +895,7 @@ version 2017.04.14
 
 Core
 + [downloader/hls] Add basic support for EXT-X-BYTERANGE tag (#10955)
 
 Core
 + [downloader/hls] Add basic support for EXT-X-BYTERANGE tag (#10955)
-+ [adobepass] Improve Comcast and Verison login code (#10803)
++ [adobepass] Improve Comcast and Verizon login code (#10803)
 + [adobepass] Add support for Verizon (#10803)
 
 Extractors
 + [adobepass] Add support for Verizon (#10803)
 
 Extractors
index 2879aad24c23b7c88ffe2259219beab73aca226c..ea321d5362060ec68642508e35d31ec449d3ab6f 100644 (file)
--- a/README.md
+++ b/README.md
@@ -1,3 +1,5 @@
+[![Build Status](https://travis-ci.org/rg3/youtube-dl.svg?branch=master)](https://travis-ci.org/rg3/youtube-dl)
+
 youtube-dl - download videos from youtube.com or other video platforms
 
 - [INSTALLATION](#installation)
 youtube-dl - download videos from youtube.com or other video platforms
 
 - [INSTALLATION](#installation)
index a42d837696a7f1a534c212802554eec5192a1f3f..4b7adfd68c28eb9df629d0904334a29670c663d0 100644 (file)
@@ -1,3 +1,5 @@
+[Build Status]
+
 youtube-dl - download videos from youtube.com or other video platforms
 
 -   INSTALLATION
 youtube-dl - download videos from youtube.com or other video platforms
 
 -   INSTALLATION
index 7071450d4dbba00e39dd416f092d842cfa49f8c2..6009df571204fdcd58bc1a47c9c66f10e8f45330 100644 (file)
@@ -3,8 +3,6 @@
  - **1up.com**
  - **20min**
  - **220.ro**
  - **1up.com**
  - **20min**
  - **220.ro**
- - **22tracks:genre**
- - **22tracks:track**
  - **24video**
  - **3qsdn**: 3Q SDN
  - **3sat**
  - **24video**
  - **3qsdn**: 3Q SDN
  - **3sat**
  - **HornBunny**
  - **HotNewHipHop**
  - **HotStar**
  - **HornBunny**
  - **HotNewHipHop**
  - **HotStar**
+ - **hotstar:playlist**
  - **Howcast**
  - **HowStuffWorks**
  - **HRTi**
  - **Howcast**
  - **HowStuffWorks**
  - **HRTi**
  - **MySpace:album**
  - **MySpass**
  - **Myvi**
  - **MySpace:album**
  - **MySpass**
  - **Myvi**
- - **myvideo** (Currently broken)
  - **MyVidster**
  - **n-tv.de**
  - **natgeo**
  - **MyVidster**
  - **n-tv.de**
  - **natgeo**
  - **SenateISVP**
  - **SendtoNews**
  - **ServingSys**
  - **SenateISVP**
  - **SendtoNews**
  - **ServingSys**
+ - **Servus**
  - **Sexu**
  - **Shahid**
  - **Shared**: shared.sx
  - **Sexu**
  - **Shahid**
  - **Shared**: shared.sx
  - **UDNEmbed**: 聯合影音
  - **UKTVPlay**
  - **Unistra**
  - **UDNEmbed**: 聯合影音
  - **UKTVPlay**
  - **Unistra**
+ - **Unity**
  - **uol.com.br**
  - **uplynk**
  - **uplynk:preplay**
  - **uol.com.br**
  - **uplynk**
  - **uplynk:preplay**
  - **vpro**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
  - **Vrak**
  - **VRT**: deredactie.be, sporza.be, cobra.be and cobra.canvas.be
  - **vpro**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
  - **Vrak**
  - **VRT**: deredactie.be, sporza.be, cobra.be and cobra.canvas.be
+ - **VrtNU**: VrtNU.be
  - **vrv**
  - **vrv:series**
  - **VShare**
  - **vrv**
  - **vrv:series**
  - **VShare**
  - **YouJizz**
  - **youku**: 优酷
  - **youku:show**
  - **YouJizz**
  - **youku**: 优酷
  - **youku:show**
+ - **YouNowChannel**
+ - **YouNowLive**
+ - **YouNowMoment**
  - **YouPorn**
  - **YourUpload**
  - **youtube**: YouTube.com
  - **YouPorn**
  - **YourUpload**
  - **youtube**: YouTube.com
index f18a823fcf834e4bbae95e9d72f9f3821c307c2b..686c63efac7ff4b94ca6012ab7affdade8b7c008 100644 (file)
@@ -574,6 +574,32 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
                 self.ie._sort_formats(formats)
                 expect_value(self, formats, expected_formats, None)
 
                 self.ie._sort_formats(formats)
                 expect_value(self, formats, expected_formats, None)
 
+    def test_parse_f4m_formats(self):
+        _TEST_CASES = [
+            (
+                # https://github.com/rg3/youtube-dl/issues/14660
+                'custom_base_url',
+                'http://api.new.livestream.com/accounts/6115179/events/6764928/videos/144884262.f4m',
+                [{
+                    'manifest_url': 'http://api.new.livestream.com/accounts/6115179/events/6764928/videos/144884262.f4m',
+                    'ext': 'flv',
+                    'format_id': '2148',
+                    'protocol': 'f4m',
+                    'tbr': 2148,
+                    'width': 1280,
+                    'height': 720,
+                }]
+            ),
+        ]
+
+        for f4m_file, f4m_url, expected_formats in _TEST_CASES:
+            with io.open('./test/testdata/f4m/%s.f4m' % f4m_file,
+                         mode='r', encoding='utf-8') as f:
+                formats = self.ie._parse_f4m_formats(
+                    compat_etree_fromstring(f.read().encode('utf-8')),
+                    f4m_url, None)
+                self.ie._sort_formats(formats)
+                expect_value(self, formats, expected_formats, None)
 
 if __name__ == '__main__':
     unittest.main()
 
 if __name__ == '__main__':
     unittest.main()
index efa73d0f45e17b76647a8e7895a5bd3c62f404b3..cc13f795c338d816b442d9d338262eb9323f4f64 100644 (file)
@@ -540,6 +540,7 @@ class TestUtil(unittest.TestCase):
         self.assertEqual(parse_duration('87 Min.'), 5220)
         self.assertEqual(parse_duration('PT1H0.040S'), 3600.04)
         self.assertEqual(parse_duration('PT00H03M30SZ'), 210)
         self.assertEqual(parse_duration('87 Min.'), 5220)
         self.assertEqual(parse_duration('PT1H0.040S'), 3600.04)
         self.assertEqual(parse_duration('PT00H03M30SZ'), 210)
+        self.assertEqual(parse_duration('P0Y0M0DT0H4M20.880S'), 260.88)
 
     def test_fix_xml_ampersands(self):
         self.assertEqual(
 
     def test_fix_xml_ampersands(self):
         self.assertEqual(
index 15c016a00eae3eae1c6d7aec617a4930fa8dfd42..3b69288626280e3c15249625c3a0b8972223a9e6 100755 (executable)
Binary files a/youtube-dl and b/youtube-dl differ
index c8fde9a89093393132262f7b7d5ec60d83de4b8d..fdb80f42ae3fd61d76e9fe0d14274da1c127d289 100644 (file)
@@ -243,8 +243,17 @@ def remove_encrypted_media(media):
                        media))
 
 
                        media))
 
 
-def _add_ns(prop):
-    return '{http://ns.adobe.com/f4m/1.0}%s' % prop
+def _add_ns(prop, ver=1):
+    return '{http://ns.adobe.com/f4m/%d.0}%s' % (ver, prop)
+
+
+def get_base_url(manifest):
+    base_url = xpath_text(
+        manifest, [_add_ns('baseURL'), _add_ns('baseURL', 2)],
+        'base URL', default=None)
+    if base_url:
+        base_url = base_url.strip()
+    return base_url
 
 
 class F4mFD(FragmentFD):
 
 
 class F4mFD(FragmentFD):
@@ -330,13 +339,13 @@ class F4mFD(FragmentFD):
             rate, media = list(filter(
                 lambda f: int(f[0]) == requested_bitrate, formats))[0]
 
             rate, media = list(filter(
                 lambda f: int(f[0]) == requested_bitrate, formats))[0]
 
-        base_url = compat_urlparse.urljoin(man_url, media.attrib['url'])
+        # Prefer baseURL for relative URLs as per 11.2 of F4M 3.0 spec.
+        man_base_url = get_base_url(doc) or man_url
+
+        base_url = compat_urlparse.urljoin(man_base_url, media.attrib['url'])
         bootstrap_node = doc.find(_add_ns('bootstrapInfo'))
         bootstrap_node = doc.find(_add_ns('bootstrapInfo'))
-        # From Adobe F4M 3.0 spec:
-        # The <baseURL> element SHALL be the base URL for all relative
-        # (HTTP-based) URLs in the manifest. If <baseURL> is not present, said
-        # URLs should be relative to the location of the containing document.
-        boot_info, bootstrap_url = self._parse_bootstrap_node(bootstrap_node, man_url)
+        boot_info, bootstrap_url = self._parse_bootstrap_node(
+            bootstrap_node, man_base_url)
         live = boot_info['live']
         metadata_node = media.find(_add_ns('metadata'))
         if metadata_node is not None:
         live = boot_info['live']
         metadata_node = media.find(_add_ns('metadata'))
         if metadata_node is not None:
index 7e891b92a3b6a05257484c5d8ba81590013bb74b..93002e45a93278e9f527a08391b8b5b6bd7df218 100644 (file)
@@ -158,7 +158,7 @@ class FragmentFD(FileDownloader):
             if os.path.isfile(encodeFilename(self.ytdl_filename(ctx['filename']))):
                 self._read_ytdl_file(ctx)
                 if ctx['fragment_index'] > 0 and resume_len == 0:
             if os.path.isfile(encodeFilename(self.ytdl_filename(ctx['filename']))):
                 self._read_ytdl_file(ctx)
                 if ctx['fragment_index'] > 0 and resume_len == 0:
-                    self.report_error(
+                    self.report_warning(
                         'Inconsistent state of incomplete fragment download. '
                         'Restarting from the beginning...')
                     ctx['fragment_index'] = resume_len = 0
                         'Inconsistent state of incomplete fragment download. '
                         'Restarting from the beginning...')
                     ctx['fragment_index'] = resume_len = 0
index 7955ca510dd200fedb5d8507d937b716d8d56772..1a6e226c89c809acec5a011b943043e7314d2cfd 100644 (file)
@@ -88,6 +88,7 @@ class HlsFD(FragmentFD):
             if line.startswith('#'):
                 if anvato_ad(line):
                     ad_frags += 1
             if line.startswith('#'):
                 if anvato_ad(line):
                     ad_frags += 1
+                    ad_frag_next = True
                 continue
             if ad_frag_next:
                 ad_frag_next = False
                 continue
             if ad_frag_next:
                 ad_frag_next = False
index 5cde90c5b23d1f92a7709c766102bb298e63cadb..ffc321821cd3a4a0ba9a62ad97b6f443d8ecacb3 100644 (file)
@@ -6,6 +6,7 @@ import re
 from .common import InfoExtractor
 from ..compat import (
     compat_parse_qs,
 from .common import InfoExtractor
 from ..compat import (
     compat_parse_qs,
+    compat_str,
     compat_urllib_parse_urlparse,
 )
 from ..utils import (
     compat_urllib_parse_urlparse,
 )
 from ..utils import (
@@ -15,6 +16,7 @@ from ..utils import (
     int_or_none,
     NO_DEFAULT,
     qualities,
     int_or_none,
     NO_DEFAULT,
     qualities,
+    try_get,
     unified_strdate,
 )
 
     unified_strdate,
 )
 
@@ -80,12 +82,15 @@ class ArteTVBaseIE(InfoExtractor):
         info = self._download_json(json_url, video_id)
         player_info = info['videoJsonPlayer']
 
         info = self._download_json(json_url, video_id)
         player_info = info['videoJsonPlayer']
 
-        vsr = player_info['VSR']
-
+        vsr = try_get(player_info, lambda x: x['VSR'], dict)
         if not vsr:
         if not vsr:
-            raise ExtractorError(
-                'Video %s is not available' % player_info.get('VID') or video_id,
-                expected=True)
+            error = None
+            if try_get(player_info, lambda x: x['custom_msg']['type']) == 'error':
+                error = try_get(
+                    player_info, lambda x: x['custom_msg']['msg'], compat_str)
+            if not error:
+                error = 'Video %s is not available' % player_info.get('VID') or video_id
+            raise ExtractorError(error, expected=True)
 
         upload_date_str = player_info.get('shootingDate')
         if not upload_date_str:
 
         upload_date_str = player_info.get('shootingDate')
         if not upload_date_str:
index f4e07d9012d40a7a9b7a385331d60ba033f2a6cd..68f26e2cad635bd8eb23a14ced3e1b90b2b320fc 100644 (file)
@@ -47,7 +47,7 @@ class AZMedienIE(AZMedienBaseIE):
         'url': 'http://www.telezueri.ch/62-show-zuerinews/13772-episode-sonntag-18-dezember-2016/32419-segment-massenabweisungen-beim-hiltl-club-wegen-pelzboom',
         'info_dict': {
             'id': '1_2444peh4',
         'url': 'http://www.telezueri.ch/62-show-zuerinews/13772-episode-sonntag-18-dezember-2016/32419-segment-massenabweisungen-beim-hiltl-club-wegen-pelzboom',
         'info_dict': {
             'id': '1_2444peh4',
-            'ext': 'mov',
+            'ext': 'mp4',
             'title': 'Massenabweisungen beim Hiltl Club wegen Pelzboom',
             'description': 'md5:9ea9dd1b159ad65b36ddcf7f0d7c76a8',
             'uploader_id': 'TeleZ?ri',
             'title': 'Massenabweisungen beim Hiltl Club wegen Pelzboom',
             'description': 'md5:9ea9dd1b159ad65b36ddcf7f0d7c76a8',
             'uploader_id': 'TeleZ?ri',
index 6899f8431788fad62e4615a33d95f3ffa65f4ea1..3faa76076318813d8b16f005f9a87ce5df986034 100644 (file)
@@ -1,16 +1,22 @@
 from __future__ import unicode_literals
 
 import re
 from __future__ import unicode_literals
 
 import re
+import json
 
 from .common import InfoExtractor
 
 from .common import InfoExtractor
+from .gigya import GigyaBaseIE
+from ..compat import compat_HTTPError
 from ..utils import (
 from ..utils import (
-    float_or_none,
+    ExtractorError,
     strip_or_none,
     strip_or_none,
+    float_or_none,
+    int_or_none,
+    parse_iso8601,
 )
 
 
 class CanvasIE(InfoExtractor):
 )
 
 
 class CanvasIE(InfoExtractor):
-    _VALID_URL = r'https?://mediazone\.vrt\.be/api/v1/(?P<site_id>canvas|een|ketnet)/assets/(?P<id>m[dz]-ast-[^/?#&]+)'
+    _VALID_URL = r'https?://mediazone\.vrt\.be/api/v1/(?P<site_id>canvas|een|ketnet|vrtvideo)/assets/(?P<id>[^/?#&]+)'
     _TESTS = [{
         'url': 'https://mediazone.vrt.be/api/v1/ketnet/assets/md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
         'md5': '90139b746a0a9bd7bb631283f6e2a64e',
     _TESTS = [{
         'url': 'https://mediazone.vrt.be/api/v1/ketnet/assets/md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
         'md5': '90139b746a0a9bd7bb631283f6e2a64e',
@@ -166,3 +172,139 @@ class CanvasEenIE(InfoExtractor):
             'title': title,
             'description': self._og_search_description(webpage),
         }
             'title': title,
             'description': self._og_search_description(webpage),
         }
+
+
+class VrtNUIE(GigyaBaseIE):
+    IE_DESC = 'VrtNU.be'
+    _VALID_URL = r'https?://(?:www\.)?vrt\.be/(?P<site_id>vrtnu)/(?:[^/]+/)*(?P<id>[^/?#&]+)'
+    _TESTS = [{
+        'url': 'https://www.vrt.be/vrtnu/a-z/postbus-x/1/postbus-x-s1a1/',
+        'info_dict': {
+            'id': 'pbs-pub-2e2d8c27-df26-45c9-9dc6-90c78153044d$vid-90c932b1-e21d-4fb8-99b1-db7b49cf74de',
+            'ext': 'flv',
+            'title': 'De zwarte weduwe',
+            'description': 'md5:d90c21dced7db869a85db89a623998d4',
+            'duration': 1457.04,
+            'thumbnail': r're:^https?://.*\.jpg$',
+            'season': '1',
+            'season_number': 1,
+            'episode_number': 1,
+        },
+        'skip': 'This video is only available for registered users'
+    }]
+    _NETRC_MACHINE = 'vrtnu'
+    _APIKEY = '3_0Z2HujMtiWq_pkAjgnS2Md2E11a1AwZjYiBETtwNE-EoEHDINgtnvcAOpNgmrVGy'
+    _CONTEXT_ID = 'R3595707040'
+
+    def _real_initialize(self):
+        self._login()
+
+    def _login(self):
+        username, password = self._get_login_info()
+        if username is None:
+            return
+
+        auth_data = {
+            'APIKey': self._APIKEY,
+            'targetEnv': 'jssdk',
+            'loginID': username,
+            'password': password,
+            'authMode': 'cookie',
+        }
+
+        auth_info = self._gigya_login(auth_data)
+
+        # Sometimes authentication fails for no good reason, retry
+        login_attempt = 1
+        while login_attempt <= 3:
+            try:
+                # When requesting a token, no actual token is returned, but the
+                # necessary cookies are set.
+                self._request_webpage(
+                    'https://token.vrt.be',
+                    None, note='Requesting a token', errnote='Could not get a token',
+                    headers={
+                        'Content-Type': 'application/json',
+                        'Referer': 'https://www.vrt.be/vrtnu/',
+                    },
+                    data=json.dumps({
+                        'uid': auth_info['UID'],
+                        'uidsig': auth_info['UIDSignature'],
+                        'ts': auth_info['signatureTimestamp'],
+                        'email': auth_info['profile']['email'],
+                    }).encode('utf-8'))
+            except ExtractorError as e:
+                if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
+                    login_attempt += 1
+                    self.report_warning('Authentication failed')
+                    self._sleep(1, None, msg_template='Waiting for %(timeout)s seconds before trying again')
+                else:
+                    raise e
+            else:
+                break
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, display_id)
+
+        title = self._html_search_regex(
+            r'(?ms)<h1 class="content__heading">(.+?)</h1>',
+            webpage, 'title').strip()
+
+        description = self._html_search_regex(
+            r'(?ms)<div class="content__description">(.+?)</div>',
+            webpage, 'description', default=None)
+
+        season = self._html_search_regex(
+            [r'''(?xms)<div\ class="tabs__tab\ tabs__tab--active">\s*
+                    <span>seizoen\ (.+?)</span>\s*
+                </div>''',
+             r'<option value="seizoen (\d{1,3})" data-href="[^"]+?" selected>'],
+            webpage, 'season', default=None)
+
+        season_number = int_or_none(season)
+
+        episode_number = int_or_none(self._html_search_regex(
+            r'''(?xms)<div\ class="content__episode">\s*
+                    <abbr\ title="aflevering">afl</abbr>\s*<span>(\d+)</span>
+                </div>''',
+            webpage, 'episode_number', default=None))
+
+        release_date = parse_iso8601(self._html_search_regex(
+            r'(?ms)<div class="content__broadcastdate">\s*<time\ datetime="(.+?)"',
+            webpage, 'release_date', default=None))
+
+        # If there's a ? or a # in the URL, remove them and everything after
+        clean_url = url.split('?')[0].split('#')[0].strip('/')
+        securevideo_url = clean_url + '.mssecurevideo.json'
+
+        try:
+            video = self._download_json(securevideo_url, display_id)
+        except ExtractorError as e:
+            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
+                self.raise_login_required()
+            raise
+
+        # We are dealing with a '../<show>.relevant' URL
+        redirect_url = video.get('url')
+        if redirect_url:
+            return self.url_result(self._proto_relative_url(redirect_url, 'https:'))
+
+        # There is only one entry, but with an unknown key, so just get
+        # the first one
+        video_id = list(video.values())[0].get('videoid')
+
+        return {
+            '_type': 'url_transparent',
+            'url': 'https://mediazone.vrt.be/api/v1/vrtvideo/assets/%s' % video_id,
+            'ie_key': CanvasIE.ie_key(),
+            'id': video_id,
+            'display_id': display_id,
+            'title': title,
+            'description': description,
+            'season': season,
+            'season_number': season_number,
+            'episode_number': episode_number,
+            'release_date': release_date,
+        }
index a692406931d4b63711609c7b8355635235849275..e2d9f52b018c25abc5a58a93785473ff88d90b74 100644 (file)
@@ -29,7 +29,10 @@ from ..compat import (
     compat_urlparse,
     compat_xml_parse_error,
 )
     compat_urlparse,
     compat_xml_parse_error,
 )
-from ..downloader.f4m import remove_encrypted_media
+from ..downloader.f4m import (
+    get_base_url,
+    remove_encrypted_media,
+)
 from ..utils import (
     NO_DEFAULT,
     age_restricted,
 from ..utils import (
     NO_DEFAULT,
     age_restricted,
@@ -1239,11 +1242,8 @@ class InfoExtractor(object):
         media_nodes = remove_encrypted_media(media_nodes)
         if not media_nodes:
             return formats
         media_nodes = remove_encrypted_media(media_nodes)
         if not media_nodes:
             return formats
-        base_url = xpath_text(
-            manifest, ['{http://ns.adobe.com/f4m/1.0}baseURL', '{http://ns.adobe.com/f4m/2.0}baseURL'],
-            'base URL', default=None)
-        if base_url:
-            base_url = base_url.strip()
+
+        manifest_base_url = get_base_url(manifest)
 
         bootstrap_info = xpath_element(
             manifest, ['{http://ns.adobe.com/f4m/1.0}bootstrapInfo', '{http://ns.adobe.com/f4m/2.0}bootstrapInfo'],
 
         bootstrap_info = xpath_element(
             manifest, ['{http://ns.adobe.com/f4m/1.0}bootstrapInfo', '{http://ns.adobe.com/f4m/2.0}bootstrapInfo'],
@@ -1275,7 +1275,7 @@ class InfoExtractor(object):
                     continue
                 manifest_url = (
                     media_url if media_url.startswith('http://') or media_url.startswith('https://')
                     continue
                 manifest_url = (
                     media_url if media_url.startswith('http://') or media_url.startswith('https://')
-                    else ((base_url or '/'.join(manifest_url.split('/')[:-1])) + '/' + media_url))
+                    else ((manifest_base_url or '/'.join(manifest_url.split('/')[:-1])) + '/' + media_url))
                 # If media_url is itself a f4m manifest do the recursive extraction
                 # since bitrates in parent manifest (this one) and media_url manifest
                 # may differ leading to inability to resolve the format by requested
                 # If media_url is itself a f4m manifest do the recursive extraction
                 # since bitrates in parent manifest (this one) and media_url manifest
                 # may differ leading to inability to resolve the format by requested
@@ -1310,6 +1310,7 @@ class InfoExtractor(object):
                 'url': manifest_url,
                 'manifest_url': manifest_url,
                 'ext': 'flv' if bootstrap_info is not None else None,
                 'url': manifest_url,
                 'manifest_url': manifest_url,
                 'ext': 'flv' if bootstrap_info is not None else None,
+                'protocol': 'f4m',
                 'tbr': tbr,
                 'width': width,
                 'height': height,
                 'tbr': tbr,
                 'width': width,
                 'height': height,
@@ -1401,7 +1402,7 @@ class InfoExtractor(object):
             media_url = media.get('URI')
             if media_url:
                 format_id = []
             media_url = media.get('URI')
             if media_url:
                 format_id = []
-                for v in (group_id, name):
+                for v in (m3u8_id, group_id, name):
                     if v:
                         format_id.append(v)
                 f = {
                     if v:
                         format_id.append(v)
                 f = {
@@ -2233,27 +2234,35 @@ class InfoExtractor(object):
         return formats
 
     def _extract_wowza_formats(self, url, video_id, m3u8_entry_protocol='m3u8_native', skip_protocols=[]):
         return formats
 
     def _extract_wowza_formats(self, url, video_id, m3u8_entry_protocol='m3u8_native', skip_protocols=[]):
+        query = compat_urlparse.urlparse(url).query
         url = re.sub(r'/(?:manifest|playlist|jwplayer)\.(?:m3u8|f4m|mpd|smil)', '', url)
         url_base = self._search_regex(
             r'(?:(?:https?|rtmp|rtsp):)?(//[^?]+)', url, 'format url')
         http_base_url = '%s:%s' % ('http', url_base)
         formats = []
         url = re.sub(r'/(?:manifest|playlist|jwplayer)\.(?:m3u8|f4m|mpd|smil)', '', url)
         url_base = self._search_regex(
             r'(?:(?:https?|rtmp|rtsp):)?(//[^?]+)', url, 'format url')
         http_base_url = '%s:%s' % ('http', url_base)
         formats = []
+
+        def manifest_url(manifest):
+            m_url = '%s/%s' % (http_base_url, manifest)
+            if query:
+                m_url += '?%s' % query
+            return m_url
+
         if 'm3u8' not in skip_protocols:
             formats.extend(self._extract_m3u8_formats(
         if 'm3u8' not in skip_protocols:
             formats.extend(self._extract_m3u8_formats(
-                http_base_url + '/playlist.m3u8', video_id, 'mp4',
+                manifest_url('playlist.m3u8'), video_id, 'mp4',
                 m3u8_entry_protocol, m3u8_id='hls', fatal=False))
         if 'f4m' not in skip_protocols:
             formats.extend(self._extract_f4m_formats(
                 m3u8_entry_protocol, m3u8_id='hls', fatal=False))
         if 'f4m' not in skip_protocols:
             formats.extend(self._extract_f4m_formats(
-                http_base_url + '/manifest.f4m',
+                manifest_url('manifest.f4m'),
                 video_id, f4m_id='hds', fatal=False))
         if 'dash' not in skip_protocols:
             formats.extend(self._extract_mpd_formats(
                 video_id, f4m_id='hds', fatal=False))
         if 'dash' not in skip_protocols:
             formats.extend(self._extract_mpd_formats(
-                http_base_url + '/manifest.mpd',
+                manifest_url('manifest.mpd'),
                 video_id, mpd_id='dash', fatal=False))
         if re.search(r'(?:/smil:|\.smil)', url_base):
             if 'smil' not in skip_protocols:
                 rtmp_formats = self._extract_smil_formats(
                 video_id, mpd_id='dash', fatal=False))
         if re.search(r'(?:/smil:|\.smil)', url_base):
             if 'smil' not in skip_protocols:
                 rtmp_formats = self._extract_smil_formats(
-                    http_base_url + '/jwplayer.smil',
+                    manifest_url('jwplayer.smil'),
                     video_id, fatal=False)
                 for rtmp_format in rtmp_formats:
                     rtsp_format = rtmp_format.copy()
                     video_id, fatal=False)
                 for rtmp_format in rtmp_formats:
                     rtsp_format = rtmp_format.copy()
index 00fbbff2fa35d2212d521a43e0e1b41b281d477b..3a6d0560e478cb08445f07d84578d28b3e4bc514 100644 (file)
@@ -2,53 +2,85 @@
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
-from ..utils import unified_strdate
+from ..compat import compat_str
+from ..utils import (
+    float_or_none,
+    unified_strdate,
+)
 
 
 class DctpTvIE(InfoExtractor):
 
 
 class DctpTvIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?dctp\.tv/(#/)?filme/(?P<id>.+?)/$'
+    _VALID_URL = r'https?://(?:www\.)?dctp\.tv/(?:#/)?filme/(?P<id>[^/?#&]+)'
     _TEST = {
         'url': 'http://www.dctp.tv/filme/videoinstallation-fuer-eine-kaufhausfassade/',
     _TEST = {
         'url': 'http://www.dctp.tv/filme/videoinstallation-fuer-eine-kaufhausfassade/',
-        'md5': '174dd4a8a6225cf5655952f969cfbe24',
         'info_dict': {
             'id': '95eaa4f33dad413aa17b4ee613cccc6c',
             'display_id': 'videoinstallation-fuer-eine-kaufhausfassade',
         'info_dict': {
             'id': '95eaa4f33dad413aa17b4ee613cccc6c',
             'display_id': 'videoinstallation-fuer-eine-kaufhausfassade',
-            'ext': 'mp4',
+            'ext': 'flv',
             'title': 'Videoinstallation für eine Kaufhausfassade',
             'description': 'Kurzfilm',
             'upload_date': '20110407',
             'thumbnail': r're:^https?://.*\.jpg$',
             'title': 'Videoinstallation für eine Kaufhausfassade',
             'description': 'Kurzfilm',
             'upload_date': '20110407',
             'thumbnail': r're:^https?://.*\.jpg$',
+            'duration': 71.24,
+        },
+        'params': {
+            # rtmp download
+            'skip_download': True,
         },
     }
 
     def _real_extract(self, url):
         },
     }
 
     def _real_extract(self, url):
-        video_id = self._match_id(url)
-        webpage = self._download_webpage(url, video_id)
+        display_id = self._match_id(url)
 
 
-        object_id = self._html_search_meta('DC.identifier', webpage)
+        webpage = self._download_webpage(url, display_id)
 
 
-        servers_json = self._download_json(
-            'http://www.dctp.tv/elastic_streaming_client/get_streaming_server/',
-            video_id, note='Downloading server list')
-        server = servers_json[0]['server']
-        m3u8_path = self._search_regex(
-            r'\'([^\'"]+/playlist\.m3u8)"', webpage, 'm3u8 path')
-        formats = self._extract_m3u8_formats(
-            'http://%s%s' % (server, m3u8_path), video_id, ext='mp4',
-            entry_protocol='m3u8_native')
+        video_id = self._html_search_meta(
+            'DC.identifier', webpage, 'video id',
+            default=None) or self._search_regex(
+            r'id=["\']uuid[^>]+>([^<]+)<', webpage, 'video id')
 
         title = self._og_search_title(webpage)
 
         title = self._og_search_title(webpage)
+
+        servers = self._download_json(
+            'http://www.dctp.tv/streaming_servers/', display_id,
+            note='Downloading server list', fatal=False)
+
+        if servers:
+            endpoint = next(
+                server['endpoint']
+                for server in servers
+                if isinstance(server.get('endpoint'), compat_str) and
+                'cloudfront' in server['endpoint'])
+        else:
+            endpoint = 'rtmpe://s2pqqn4u96e4j8.cloudfront.net/cfx/st/'
+
+        app = self._search_regex(
+            r'^rtmpe?://[^/]+/(?P<app>.*)$', endpoint, 'app')
+
+        formats = [{
+            'url': endpoint,
+            'app': app,
+            'play_path': 'mp4:%s_dctp_0500_4x3.m4v' % video_id,
+            'page_url': url,
+            'player_url': 'http://svm-prod-dctptv-static.s3.amazonaws.com/dctptv-relaunch2012-109.swf',
+            'ext': 'flv',
+        }]
+
         description = self._html_search_meta('DC.description', webpage)
         upload_date = unified_strdate(
             self._html_search_meta('DC.date.created', webpage))
         thumbnail = self._og_search_thumbnail(webpage)
         description = self._html_search_meta('DC.description', webpage)
         upload_date = unified_strdate(
             self._html_search_meta('DC.date.created', webpage))
         thumbnail = self._og_search_thumbnail(webpage)
+        duration = float_or_none(self._search_regex(
+            r'id=["\']duration_in_ms[^+]>(\d+)', webpage, 'duration',
+            default=None), scale=1000)
 
         return {
 
         return {
-            'id': object_id,
+            'id': video_id,
             'title': title,
             'formats': formats,
             'title': title,
             'formats': formats,
-            'display_id': video_id,
+            'display_id': display_id,
             'description': description,
             'upload_date': upload_date,
             'thumbnail': thumbnail,
             'description': description,
             'upload_date': upload_date,
             'thumbnail': thumbnail,
+            'duration': duration,
         }
         }
index 9a498d72ad6f378ef5dd877354871bcccbfb4faf..95883a037f537de47a0ee9f3f18d913eb9ec1647 100644 (file)
@@ -59,7 +59,7 @@ class DramaFeverBaseIE(AMPIE):
         if all(logout_pattern not in response
                for logout_pattern in ['href="/accounts/logout/"', '>Log out<']):
             error = self._html_search_regex(
         if all(logout_pattern not in response
                for logout_pattern in ['href="/accounts/logout/"', '>Log out<']):
             error = self._html_search_regex(
-                r'(?s)class="hidden-xs prompt"[^>]*>(.+?)<',
+                r'(?s)<h\d[^>]+\bclass="hidden-xs prompt"[^>]*>(.+?)</h\d',
                 response, 'error message', default=None)
             if error:
                 raise ExtractorError('Unable to login: %s' % error, expected=True)
                 response, 'error message', default=None)
             if error:
                 raise ExtractorError('Unable to login: %s' % error, expected=True)
index 69effba58371426ec6813066424e86aa976b9ce8..f757745ba7cf2fa500f03102d7ca2b0c932c27bc 100644 (file)
@@ -138,6 +138,7 @@ class DRTVIE(InfoExtractor):
                             'tbr': int_or_none(bitrate),
                             'ext': link.get('FileFormat'),
                             'vcodec': 'none' if kind == 'AudioResource' else None,
                             'tbr': int_or_none(bitrate),
                             'ext': link.get('FileFormat'),
                             'vcodec': 'none' if kind == 'AudioResource' else None,
+                            'preference': preference,
                         })
                 subtitles_list = asset.get('SubtitlesList')
                 if isinstance(subtitles_list, list):
                         })
                 subtitles_list = asset.get('SubtitlesList')
                 if isinstance(subtitles_list, list):
index e4a3046af573fec2961ae84a2137a9395bf68f0b..edabaafe689a3d4ffae1b626dc1a55aa068d05aa 100644 (file)
@@ -2,7 +2,9 @@
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
+from ..compat import compat_str
 from ..utils import (
 from ..utils import (
+    determine_ext,
     int_or_none,
     try_get,
     unified_timestamp,
     int_or_none,
     try_get,
     unified_timestamp,
@@ -17,7 +19,7 @@ class EggheadCourseIE(InfoExtractor):
         'url': 'https://egghead.io/courses/professor-frisby-introduces-composable-functional-javascript',
         'playlist_count': 29,
         'info_dict': {
         'url': 'https://egghead.io/courses/professor-frisby-introduces-composable-functional-javascript',
         'playlist_count': 29,
         'info_dict': {
-            'id': 'professor-frisby-introduces-composable-functional-javascript',
+            'id': '72',
             'title': 'Professor Frisby Introduces Composable Functional JavaScript',
             'description': 're:(?s)^This course teaches the ubiquitous.*You\'ll start composing functionality before you know it.$',
         },
             'title': 'Professor Frisby Introduces Composable Functional JavaScript',
             'description': 're:(?s)^This course teaches the ubiquitous.*You\'ll start composing functionality before you know it.$',
         },
@@ -26,14 +28,28 @@ class EggheadCourseIE(InfoExtractor):
     def _real_extract(self, url):
         playlist_id = self._match_id(url)
 
     def _real_extract(self, url):
         playlist_id = self._match_id(url)
 
+        lessons = self._download_json(
+            'https://egghead.io/api/v1/series/%s/lessons' % playlist_id,
+            playlist_id, 'Downloading course lessons JSON')
+
+        entries = []
+        for lesson in lessons:
+            lesson_url = lesson.get('http_url')
+            if not lesson_url or not isinstance(lesson_url, compat_str):
+                continue
+            lesson_id = lesson.get('id')
+            if lesson_id:
+                lesson_id = compat_str(lesson_id)
+            entries.append(self.url_result(
+                lesson_url, ie=EggheadLessonIE.ie_key(), video_id=lesson_id))
+
         course = self._download_json(
         course = self._download_json(
-            'https://egghead.io/api/v1/series/%s' % playlist_id, playlist_id)
+            'https://egghead.io/api/v1/series/%s' % playlist_id,
+            playlist_id, 'Downloading course JSON', fatal=False) or {}
 
 
-        entries = [
-            self.url_result(
-                'wistia:%s' % lesson['wistia_id'], ie='Wistia',
-                video_id=lesson['wistia_id'], video_title=lesson.get('title'))
-            for lesson in course['lessons'] if lesson.get('wistia_id')]
+        playlist_id = course.get('id')
+        if playlist_id:
+            playlist_id = compat_str(playlist_id)
 
         return self.playlist_result(
             entries, playlist_id, course.get('title'),
 
         return self.playlist_result(
             entries, playlist_id, course.get('title'),
@@ -43,11 +59,12 @@ class EggheadCourseIE(InfoExtractor):
 class EggheadLessonIE(InfoExtractor):
     IE_DESC = 'egghead.io lesson'
     IE_NAME = 'egghead:lesson'
 class EggheadLessonIE(InfoExtractor):
     IE_DESC = 'egghead.io lesson'
     IE_NAME = 'egghead:lesson'
-    _VALID_URL = r'https://egghead\.io/lessons/(?P<id>[^/?#&]+)'
-    _TEST = {
+    _VALID_URL = r'https://egghead\.io/(?:api/v1/)?lessons/(?P<id>[^/?#&]+)'
+    _TESTS = [{
         'url': 'https://egghead.io/lessons/javascript-linear-data-flow-with-container-style-types-box',
         'info_dict': {
         'url': 'https://egghead.io/lessons/javascript-linear-data-flow-with-container-style-types-box',
         'info_dict': {
-            'id': 'fv5yotjxcg',
+            'id': '1196',
+            'display_id': 'javascript-linear-data-flow-with-container-style-types-box',
             'ext': 'mp4',
             'title': 'Create linear data flow with container style types (Box)',
             'description': 'md5:9aa2cdb6f9878ed4c39ec09e85a8150e',
             'ext': 'mp4',
             'title': 'Create linear data flow with container style types (Box)',
             'description': 'md5:9aa2cdb6f9878ed4c39ec09e85a8150e',
@@ -60,25 +77,51 @@ class EggheadLessonIE(InfoExtractor):
         },
         'params': {
             'skip_download': True,
         },
         'params': {
             'skip_download': True,
+            'format': 'bestvideo',
         },
         },
-    }
+    }, {
+        'url': 'https://egghead.io/api/v1/lessons/react-add-redux-to-a-react-application',
+        'only_matching': True,
+    }]
 
     def _real_extract(self, url):
 
     def _real_extract(self, url):
-        lesson_id = self._match_id(url)
+        display_id = self._match_id(url)
 
         lesson = self._download_json(
 
         lesson = self._download_json(
-            'https://egghead.io/api/v1/lessons/%s' % lesson_id, lesson_id)
+            'https://egghead.io/api/v1/lessons/%s' % display_id, display_id)
+
+        lesson_id = compat_str(lesson['id'])
+        title = lesson['title']
+
+        formats = []
+        for _, format_url in lesson['media_urls'].items():
+            if not format_url or not isinstance(format_url, compat_str):
+                continue
+            ext = determine_ext(format_url)
+            if ext == 'm3u8':
+                formats.extend(self._extract_m3u8_formats(
+                    format_url, lesson_id, 'mp4', entry_protocol='m3u8',
+                    m3u8_id='hls', fatal=False))
+            elif ext == 'mpd':
+                formats.extend(self._extract_mpd_formats(
+                    format_url, lesson_id, mpd_id='dash', fatal=False))
+            else:
+                formats.append({
+                    'url': format_url,
+                })
+        self._sort_formats(formats)
 
         return {
 
         return {
-            '_type': 'url_transparent',
-            'ie_key': 'Wistia',
-            'url': 'wistia:%s' % lesson['wistia_id'],
-            'id': lesson['wistia_id'],
-            'title': lesson.get('title'),
+            'id': lesson_id,
+            'display_id': display_id,
+            'title': title,
             'description': lesson.get('summary'),
             'thumbnail': lesson.get('thumb_nail'),
             'timestamp': unified_timestamp(lesson.get('published_at')),
             'duration': int_or_none(lesson.get('duration')),
             'view_count': int_or_none(lesson.get('plays_count')),
             'tags': try_get(lesson, lambda x: x['tag_list'], list),
             'description': lesson.get('summary'),
             'thumbnail': lesson.get('thumb_nail'),
             'timestamp': unified_timestamp(lesson.get('published_at')),
             'duration': int_or_none(lesson.get('duration')),
             'view_count': int_or_none(lesson.get('plays_count')),
             'tags': try_get(lesson, lambda x: x['tag_list'], list),
+            'series': try_get(
+                lesson, lambda x: x['series']['title'], compat_str),
+            'formats': formats,
         }
         }
index f3734e9f8984ab5a1a723bbb0be171c3fd9cf7b5..81f2e2ee1c4ac599d8844965fb9271e745293268 100644 (file)
@@ -15,7 +15,7 @@ from ..utils import (
 
 
 class EpornerIE(InfoExtractor):
 
 
 class EpornerIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?eporner\.com/hd-porn/(?P<id>\w+)(?:/(?P<display_id>[\w-]+))?'
+    _VALID_URL = r'https?://(?:www\.)?eporner\.com/(?:hd-porn|embed)/(?P<id>\w+)(?:/(?P<display_id>[\w-]+))?'
     _TESTS = [{
         'url': 'http://www.eporner.com/hd-porn/95008/Infamous-Tiffany-Teen-Strip-Tease-Video/',
         'md5': '39d486f046212d8e1b911c52ab4691f8',
     _TESTS = [{
         'url': 'http://www.eporner.com/hd-porn/95008/Infamous-Tiffany-Teen-Strip-Tease-Video/',
         'md5': '39d486f046212d8e1b911c52ab4691f8',
@@ -35,6 +35,9 @@ class EpornerIE(InfoExtractor):
     }, {
         'url': 'http://www.eporner.com/hd-porn/3YRUtzMcWn0',
         'only_matching': True,
     }, {
         'url': 'http://www.eporner.com/hd-porn/3YRUtzMcWn0',
         'only_matching': True,
+    }, {
+        'url': 'http://www.eporner.com/hd-porn/3YRUtzMcWn0',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
     }]
 
     def _real_extract(self, url):
index ecb33bc9e7dbbb673a87fc93036e75d06c6f6229..d084707ee81e9e137e260e45220e68e0efcc95d0 100644 (file)
@@ -150,6 +150,7 @@ from .canalc2 import Canalc2IE
 from .canvas import (
     CanvasIE,
     CanvasEenIE,
 from .canvas import (
     CanvasIE,
     CanvasEenIE,
+    VrtNUIE,
 )
 from .carambatv import (
     CarambaTVIE,
 )
 from .carambatv import (
     CarambaTVIE,
@@ -431,7 +432,10 @@ from .hitbox import HitboxIE, HitboxLiveIE
 from .hitrecord import HitRecordIE
 from .hornbunny import HornBunnyIE
 from .hotnewhiphop import HotNewHipHopIE
 from .hitrecord import HitRecordIE
 from .hornbunny import HornBunnyIE
 from .hotnewhiphop import HotNewHipHopIE
-from .hotstar import HotStarIE
+from .hotstar import (
+    HotStarIE,
+    HotStarPlaylistIE,
+)
 from .howcast import HowcastIE
 from .howstuffworks import HowStuffWorksIE
 from .hrti import (
 from .howcast import HowcastIE
 from .howstuffworks import HowStuffWorksIE
 from .hrti import (
@@ -623,7 +627,6 @@ from .mwave import MwaveIE, MwaveMeetGreetIE
 from .myspace import MySpaceIE, MySpaceAlbumIE
 from .myspass import MySpassIE
 from .myvi import MyviIE
 from .myspace import MySpaceIE, MySpaceAlbumIE
 from .myspass import MySpassIE
 from .myvi import MyviIE
-from .myvideo import MyVideoIE
 from .myvidster import MyVidsterIE
 from .nationalgeographic import (
     NationalGeographicVideoIE,
 from .myvidster import MyVidsterIE
 from .nationalgeographic import (
     NationalGeographicVideoIE,
@@ -925,6 +928,7 @@ from .seeker import SeekerIE
 from .senateisvp import SenateISVPIE
 from .sendtonews import SendtoNewsIE
 from .servingsys import ServingSysIE
 from .senateisvp import SenateISVPIE
 from .sendtonews import SendtoNewsIE
 from .servingsys import ServingSysIE
+from .servus import ServusIE
 from .sexu import SexuIE
 from .shahid import ShahidIE
 from .shared import (
 from .sexu import SexuIE
 from .shahid import ShahidIE
 from .shared import (
@@ -1109,10 +1113,6 @@ from .tvplayer import TVPlayerIE
 from .tweakers import TweakersIE
 from .twentyfourvideo import TwentyFourVideoIE
 from .twentymin import TwentyMinutenIE
 from .tweakers import TweakersIE
 from .twentyfourvideo import TwentyFourVideoIE
 from .twentymin import TwentyMinutenIE
-from .twentytwotracks import (
-    TwentyTwoTracksIE,
-    TwentyTwoTracksGenreIE
-)
 from .twitch import (
     TwitchVideoIE,
     TwitchChapterIE,
 from .twitch import (
     TwitchVideoIE,
     TwitchChapterIE,
@@ -1138,6 +1138,7 @@ from .udn import UDNEmbedIE
 from .uktvplay import UKTVPlayIE
 from .digiteka import DigitekaIE
 from .unistra import UnistraIE
 from .uktvplay import UKTVPlayIE
 from .digiteka import DigitekaIE
 from .unistra import UnistraIE
+from .unity import UnityIE
 from .uol import UOLIE
 from .uplynk import (
     UplynkIE,
 from .uol import UOLIE
 from .uplynk import (
     UplynkIE,
@@ -1333,6 +1334,11 @@ from .youku import (
     YoukuIE,
     YoukuShowIE,
 )
     YoukuIE,
     YoukuShowIE,
 )
+from .younow import (
+    YouNowLiveIE,
+    YouNowChannelIE,
+    YouNowMomentIE,
+)
 from .youporn import YouPornIE
 from .yourupload import YourUploadIE
 from .youtube import (
 from .youporn import YouPornIE
 from .yourupload import YourUploadIE
 from .youtube import (
index 629897317be5fbe316639856cec73662c1f8149c..37549fb01ccfc05fb4a642f948ee70227588f6e0 100644 (file)
@@ -3,27 +3,31 @@ from __future__ import unicode_literals
 
 from .adobepass import AdobePassIE
 from ..utils import (
 
 from .adobepass import AdobePassIE
 from ..utils import (
-    update_url_query,
     extract_attributes,
     extract_attributes,
+    int_or_none,
     parse_age_limit,
     smuggle_url,
     parse_age_limit,
     smuggle_url,
+    update_url_query,
 )
 
 
 class FXNetworksIE(AdobePassIE):
     _VALID_URL = r'https?://(?:www\.)?(?:fxnetworks|simpsonsworld)\.com/video/(?P<id>\d+)'
     _TESTS = [{
 )
 
 
 class FXNetworksIE(AdobePassIE):
     _VALID_URL = r'https?://(?:www\.)?(?:fxnetworks|simpsonsworld)\.com/video/(?P<id>\d+)'
     _TESTS = [{
-        'url': 'http://www.fxnetworks.com/video/719841347694',
-        'md5': '1447d4722e42ebca19e5232ab93abb22',
+        'url': 'http://www.fxnetworks.com/video/1032565827847',
+        'md5': '8d99b97b4aa7a202f55b6ed47ea7e703',
         'info_dict': {
         'info_dict': {
-            'id': '719841347694',
+            'id': 'dRzwHC_MMqIv',
             'ext': 'mp4',
             'ext': 'mp4',
-            'title': 'Vanpage',
-            'description': 'F*ck settling down. You\'re the Worst returns for an all new season August 31st on FXX.',
+            'title': 'First Look: Better Things - Season 2',
+            'description': 'Because real life is like a fart. Watch this FIRST LOOK to see what inspired the new season of Better Things.',
             'age_limit': 14,
             'uploader': 'NEWA-FNG-FX',
             'age_limit': 14,
             'uploader': 'NEWA-FNG-FX',
-            'upload_date': '20160706',
-            'timestamp': 1467844741,
+            'upload_date': '20170825',
+            'timestamp': 1503686274,
+            'episode_number': 0,
+            'season_number': 2,
+            'series': 'Better Things',
         },
         'add_ie': ['ThePlatform'],
     }, {
         },
         'add_ie': ['ThePlatform'],
     }, {
@@ -64,6 +68,9 @@ class FXNetworksIE(AdobePassIE):
             'id': video_id,
             'title': title,
             'url': smuggle_url(update_url_query(release_url, query), {'force_smil_url': True}),
             'id': video_id,
             'title': title,
             'url': smuggle_url(update_url_query(release_url, query), {'force_smil_url': True}),
+            'series': video_data.get('data-show-title'),
+            'episode_number': int_or_none(video_data.get('data-episode')),
+            'season_number': int_or_none(video_data.get('data-season')),
             'thumbnail': video_data.get('data-large-thumb'),
             'age_limit': parse_age_limit(rating),
             'ie_key': 'ThePlatform',
             'thumbnail': video_data.get('data-large-thumb'),
             'age_limit': parse_age_limit(rating),
             'ie_key': 'ThePlatform',
index 02804d297a4709198371e2e2acfd8a83857095be..6d177cbaf388afb1bb1a71873558ee55ca70ee2c 100644 (file)
@@ -14,7 +14,7 @@ from ..utils import (
 
 
 class GameSpotIE(OnceIE):
 
 
 class GameSpotIE(OnceIE):
-    _VALID_URL = r'https?://(?:www\.)?gamespot\.com/.*-(?P<id>\d+)/?'
+    _VALID_URL = r'https?://(?:www\.)?gamespot\.com/videos/(?:[^/]+/\d+-|embed/)(?P<id>\d+)'
     _TESTS = [{
         'url': 'http://www.gamespot.com/videos/arma-3-community-guide-sitrep-i/2300-6410818/',
         'md5': 'b2a30deaa8654fcccd43713a6b6a4825',
     _TESTS = [{
         'url': 'http://www.gamespot.com/videos/arma-3-community-guide-sitrep-i/2300-6410818/',
         'md5': 'b2a30deaa8654fcccd43713a6b6a4825',
@@ -35,6 +35,9 @@ class GameSpotIE(OnceIE):
         'params': {
             'skip_download': True,  # m3u8 downloads
         },
         'params': {
             'skip_download': True,  # m3u8 downloads
         },
+    }, {
+        'url': 'https://www.gamespot.com/videos/embed/6439218/',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
     }]
 
     def _real_extract(self, url):
@@ -52,7 +55,7 @@ class GameSpotIE(OnceIE):
             manifest_url = f4m_url
             formats.extend(self._extract_f4m_formats(
                 f4m_url + '?hdcore=3.7.0', page_id, f4m_id='hds', fatal=False))
             manifest_url = f4m_url
             formats.extend(self._extract_f4m_formats(
                 f4m_url + '?hdcore=3.7.0', page_id, f4m_id='hds', fatal=False))
-        m3u8_url = streams.get('m3u8_stream')
+        m3u8_url = dict_get(streams, ('m3u8_stream', 'adaptive_stream'))
         if m3u8_url:
             manifest_url = m3u8_url
             m3u8_formats = self._extract_m3u8_formats(
         if m3u8_url:
             manifest_url = m3u8_url
             m3u8_formats = self._extract_m3u8_formats(
@@ -60,7 +63,7 @@ class GameSpotIE(OnceIE):
                 m3u8_id='hls', fatal=False)
             formats.extend(m3u8_formats)
         progressive_url = dict_get(
                 m3u8_id='hls', fatal=False)
             formats.extend(m3u8_formats)
         progressive_url = dict_get(
-            streams, ('progressive_hd', 'progressive_high', 'progressive_low'))
+            streams, ('progressive_hd', 'progressive_high', 'progressive_low', 'other_lr'))
         if progressive_url and manifest_url:
             qualities_basename = self._search_regex(
                 r'/([^/]+)\.csmil/',
         if progressive_url and manifest_url:
             qualities_basename = self._search_regex(
                 r'/([^/]+)\.csmil/',
diff --git a/youtube_dl/extractor/gigya.py b/youtube_dl/extractor/gigya.py
new file mode 100644 (file)
index 0000000..4121784
--- /dev/null
@@ -0,0 +1,22 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+from ..utils import (
+    ExtractorError,
+    urlencode_postdata,
+)
+
+
+class GigyaBaseIE(InfoExtractor):
+    def _gigya_login(self, auth_data):
+        auth_info = self._download_json(
+            'https://accounts.eu1.gigya.com/accounts.login', None,
+            note='Logging in', errnote='Unable to log in',
+            data=urlencode_postdata(auth_data))
+
+        error_message = auth_info.get('errorDetails') or auth_info.get('errorMessage')
+        if error_message:
+            raise ExtractorError(
+                'Unable to login: %s' % error_message, expected=True)
+        return auth_info
index 3a7a66a343992879992d677af8f9c318dbd433ff..d28af36ec1704a8d4c508c2036962c8584ea38aa 100644 (file)
@@ -1,22 +1,47 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
 # coding: utf-8
 from __future__ import unicode_literals
 
+import re
+
 from .common import InfoExtractor
 from .common import InfoExtractor
+from ..compat import compat_str
 from ..utils import (
 from ..utils import (
-    ExtractorError,
     determine_ext,
     determine_ext,
+    ExtractorError,
     int_or_none,
 )
 
 
     int_or_none,
 )
 
 
-class HotStarIE(InfoExtractor):
+class HotStarBaseIE(InfoExtractor):
+    _GEO_COUNTRIES = ['IN']
+
+    def _download_json(self, *args, **kwargs):
+        response = super(HotStarBaseIE, self)._download_json(*args, **kwargs)
+        if response['resultCode'] != 'OK':
+            if kwargs.get('fatal'):
+                raise ExtractorError(
+                    response['errorDescription'], expected=True)
+            return None
+        return response['resultObj']
+
+    def _download_content_info(self, content_id):
+        return self._download_json(
+            'https://account.hotstar.com/AVS/besc', content_id, query={
+                'action': 'GetAggregatedContentDetails',
+                'appVersion': '5.0.40',
+                'channel': 'PCTV',
+                'contentId': content_id,
+            })['contentInfo'][0]
+
+
+class HotStarIE(HotStarBaseIE):
     _VALID_URL = r'https?://(?:www\.)?hotstar\.com/(?:.+?[/-])?(?P<id>\d{10})'
     _TESTS = [{
         'url': 'http://www.hotstar.com/on-air-with-aib--english-1000076273',
         'info_dict': {
             'id': '1000076273',
             'ext': 'mp4',
     _VALID_URL = r'https?://(?:www\.)?hotstar\.com/(?:.+?[/-])?(?P<id>\d{10})'
     _TESTS = [{
         'url': 'http://www.hotstar.com/on-air-with-aib--english-1000076273',
         'info_dict': {
             'id': '1000076273',
             'ext': 'mp4',
-            'title': 'On Air With AIB - English',
+            'title': 'On Air With AIB',
             'description': 'md5:c957d8868e9bc793ccb813691cc4c434',
             'timestamp': 1447227000,
             'upload_date': '20151111',
             'description': 'md5:c957d8868e9bc793ccb813691cc4c434',
             'timestamp': 1447227000,
             'upload_date': '20151111',
@@ -34,23 +59,11 @@ class HotStarIE(InfoExtractor):
         'only_matching': True,
     }]
 
         'only_matching': True,
     }]
 
-    def _download_json(self, url_or_request, video_id, note='Downloading JSON metadata', fatal=True, query=None):
-        json_data = super(HotStarIE, self)._download_json(
-            url_or_request, video_id, note, fatal=fatal, query=query)
-        if json_data['resultCode'] != 'OK':
-            if fatal:
-                raise ExtractorError(json_data['errorDescription'])
-            return None
-        return json_data['resultObj']
-
     def _real_extract(self, url):
         video_id = self._match_id(url)
     def _real_extract(self, url):
         video_id = self._match_id(url)
-        video_data = self._download_json(
-            'http://account.hotstar.com/AVS/besc', video_id, query={
-                'action': 'GetAggregatedContentDetails',
-                'channel': 'PCTV',
-                'contentId': video_id,
-            })['contentInfo'][0]
+
+        video_data = self._download_content_info(video_id)
+
         title = video_data['episodeTitle']
 
         if video_data.get('encrypted') == 'Y':
         title = video_data['episodeTitle']
 
         if video_data.get('encrypted') == 'Y':
@@ -99,3 +112,51 @@ class HotStarIE(InfoExtractor):
             'episode_number': int_or_none(video_data.get('episodeNumber')),
             'series': video_data.get('contentTitle'),
         }
             'episode_number': int_or_none(video_data.get('episodeNumber')),
             'series': video_data.get('contentTitle'),
         }
+
+
+class HotStarPlaylistIE(HotStarBaseIE):
+    IE_NAME = 'hotstar:playlist'
+    _VALID_URL = r'(?P<url>https?://(?:www\.)?hotstar\.com/tv/[^/]+/(?P<content_id>\d+))/(?P<type>[^/]+)/(?P<id>\d+)'
+    _TESTS = [{
+        'url': 'http://www.hotstar.com/tv/pratidaan/14982/episodes/14812/9993',
+        'info_dict': {
+            'id': '14812',
+        },
+        'playlist_mincount': 75,
+    }, {
+        'url': 'http://www.hotstar.com/tv/pratidaan/14982/popular-clips/9998/9998',
+        'only_matching': True,
+    }]
+    _ITEM_TYPES = {
+        'episodes': 'EPISODE',
+        'popular-clips': 'CLIPS',
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        base_url = mobj.group('url')
+        content_id = mobj.group('content_id')
+        playlist_type = mobj.group('type')
+
+        content_info = self._download_content_info(content_id)
+        playlist_id = compat_str(content_info['categoryId'])
+
+        collection = self._download_json(
+            'https://search.hotstar.com/AVS/besc', playlist_id, query={
+                'action': 'SearchContents',
+                'appVersion': '5.0.40',
+                'channel': 'PCTV',
+                'moreFilters': 'series:%s;' % playlist_id,
+                'query': '*',
+                'searchOrder': 'last_broadcast_date desc,year desc,title asc',
+                'type': self._ITEM_TYPES.get(playlist_type, 'EPISODE'),
+            })
+
+        entries = [
+            self.url_result(
+                '%s/_/%s' % (base_url, video['contentId']),
+                ie=HotStarIE.ie_key(), video_id=video['contentId'])
+            for video in collection['response']['docs']
+            if video.get('contentId')]
+
+        return self.playlist_result(entries, playlist_id)
index 4c32fbc2c27b9a78e0e6fdae73f7dafd865d90fc..f8c30052f32486f511622656bdadbcf91f80717c 100644 (file)
@@ -2,19 +2,18 @@ from __future__ import unicode_literals
 
 import re
 
 
 import re
 
-from .common import InfoExtractor
+from .gigya import GigyaBaseIE
+
 from ..compat import compat_str
 from ..utils import (
 from ..compat import compat_str
 from ..utils import (
-    ExtractorError,
     int_or_none,
     parse_duration,
     try_get,
     unified_timestamp,
     int_or_none,
     parse_duration,
     try_get,
     unified_timestamp,
-    urlencode_postdata,
 )
 
 
 )
 
 
-class MedialaanIE(InfoExtractor):
+class MedialaanIE(GigyaBaseIE):
     _VALID_URL = r'''(?x)
                     https?://
                         (?:www\.|nieuws\.)?
     _VALID_URL = r'''(?x)
                     https?://
                         (?:www\.|nieuws\.)?
@@ -119,15 +118,7 @@ class MedialaanIE(InfoExtractor):
             'password': password,
         }
 
             'password': password,
         }
 
-        auth_info = self._download_json(
-            'https://accounts.eu1.gigya.com/accounts.login', None,
-            note='Logging in', errnote='Unable to log in',
-            data=urlencode_postdata(auth_data))
-
-        error_message = auth_info.get('errorDetails') or auth_info.get('errorMessage')
-        if error_message:
-            raise ExtractorError(
-                'Unable to login: %s' % error_message, expected=True)
+        auth_info = self._gigya_login(auth_data)
 
         self._uid = auth_info['UID']
         self._uid_signature = auth_info['UIDSignature']
 
         self._uid = auth_info['UID']
         self._uid_signature = auth_info['UIDSignature']
index 60e3caf0dc57614f59cbe434d4b1d83e12e1dd96..5bafa6cf443f1df8f5c7c2d87921533ef399b822 100644 (file)
@@ -18,7 +18,7 @@ class MegaphoneIE(InfoExtractor):
             'id': 'GLT9749789991',
             'ext': 'mp3',
             'title': '#97 What Kind Of Idiot Gets Phished?',
             'id': 'GLT9749789991',
             'ext': 'mp3',
             'title': '#97 What Kind Of Idiot Gets Phished?',
-            'thumbnail': 're:^https://.*\.png.*$',
+            'thumbnail': r're:^https://.*\.png.*$',
             'duration': 1776.26375,
             'author': 'Reply All',
         },
             'duration': 1776.26375,
             'author': 'Reply All',
         },
diff --git a/youtube_dl/extractor/myvideo.py b/youtube_dl/extractor/myvideo.py
deleted file mode 100644 (file)
index 367e811..0000000
+++ /dev/null
@@ -1,177 +0,0 @@
-from __future__ import unicode_literals
-
-import binascii
-import base64
-import hashlib
-import re
-import json
-
-from .common import InfoExtractor
-from ..compat import (
-    compat_ord,
-    compat_urllib_parse_unquote,
-    compat_urllib_parse_urlencode,
-)
-from ..utils import (
-    ExtractorError,
-    sanitized_Request,
-)
-
-
-class MyVideoIE(InfoExtractor):
-    _WORKING = False
-    _VALID_URL = r'https?://(?:www\.)?myvideo\.de/(?:[^/]+/)?watch/(?P<id>[0-9]+)/[^?/]+.*'
-    IE_NAME = 'myvideo'
-    _TEST = {
-        'url': 'http://www.myvideo.de/watch/8229274/bowling_fail_or_win',
-        'md5': '2d2753e8130479ba2cb7e0a37002053e',
-        'info_dict': {
-            'id': '8229274',
-            'ext': 'flv',
-            'title': 'bowling-fail-or-win',
-        }
-    }
-
-    # Original Code from: https://github.com/dersphere/plugin.video.myvideo_de.git
-    # Released into the Public Domain by Tristan Fischer on 2013-05-19
-    # https://github.com/rg3/youtube-dl/pull/842
-    def __rc4crypt(self, data, key):
-        x = 0
-        box = list(range(256))
-        for i in list(range(256)):
-            x = (x + box[i] + compat_ord(key[i % len(key)])) % 256
-            box[i], box[x] = box[x], box[i]
-        x = 0
-        y = 0
-        out = ''
-        for char in data:
-            x = (x + 1) % 256
-            y = (y + box[x]) % 256
-            box[x], box[y] = box[y], box[x]
-            out += chr(compat_ord(char) ^ box[(box[x] + box[y]) % 256])
-        return out
-
-    def __md5(self, s):
-        return hashlib.md5(s).hexdigest().encode()
-
-    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-
-        GK = (
-            b'WXpnME1EZGhNRGhpTTJNM01XVmhOREU0WldNNVpHTTJOakpt'
-            b'TW1FMU5tVTBNR05pWkRaa05XRXhNVFJoWVRVd1ptSXhaVEV3'
-            b'TnpsbA0KTVRkbU1tSTRNdz09'
-        )
-
-        # Get video webpage
-        webpage_url = 'http://www.myvideo.de/watch/%s' % video_id
-        webpage = self._download_webpage(webpage_url, video_id)
-
-        mobj = re.search('source src=\'(.+?)[.]([^.]+)\'', webpage)
-        if mobj is not None:
-            self.report_extraction(video_id)
-            video_url = mobj.group(1) + '.flv'
-
-            video_title = self._html_search_regex('<title>([^<]+)</title>',
-                                                  webpage, 'title')
-
-            return {
-                'id': video_id,
-                'url': video_url,
-                'title': video_title,
-            }
-
-        mobj = re.search(r'data-video-service="/service/data/video/%s/config' % video_id, webpage)
-        if mobj is not None:
-            request = sanitized_Request('http://www.myvideo.de/service/data/video/%s/config' % video_id, '')
-            response = self._download_webpage(request, video_id,
-                                              'Downloading video info')
-            info = json.loads(base64.b64decode(response).decode('utf-8'))
-            return {
-                'id': video_id,
-                'title': info['title'],
-                'url': info['streaming_url'].replace('rtmpe', 'rtmpt'),
-                'play_path': info['filename'],
-                'ext': 'flv',
-                'thumbnail': info['thumbnail'][0]['url'],
-            }
-
-        # try encxml
-        mobj = re.search('var flashvars={(.+?)}', webpage)
-        if mobj is None:
-            raise ExtractorError('Unable to extract video')
-
-        params = {}
-        encxml = ''
-        sec = mobj.group(1)
-        for (a, b) in re.findall('(.+?):\'(.+?)\',?', sec):
-            if not a == '_encxml':
-                params[a] = b
-            else:
-                encxml = compat_urllib_parse_unquote(b)
-        if not params.get('domain'):
-            params['domain'] = 'www.myvideo.de'
-        xmldata_url = '%s?%s' % (encxml, compat_urllib_parse_urlencode(params))
-        if 'flash_playertype=MTV' in xmldata_url:
-            self._downloader.report_warning('avoiding MTV player')
-            xmldata_url = (
-                'http://www.myvideo.de/dynamic/get_player_video_xml.php'
-                '?flash_playertype=D&ID=%s&_countlimit=4&autorun=yes'
-            ) % video_id
-
-        # get enc data
-        enc_data = self._download_webpage(xmldata_url, video_id).split('=')[1]
-        enc_data_b = binascii.unhexlify(enc_data)
-        sk = self.__md5(
-            base64.b64decode(base64.b64decode(GK)) +
-            self.__md5(
-                str(video_id).encode('utf-8')
-            )
-        )
-        dec_data = self.__rc4crypt(enc_data_b, sk)
-
-        # extracting infos
-        self.report_extraction(video_id)
-
-        video_url = None
-        mobj = re.search('connectionurl=\'(.*?)\'', dec_data)
-        if mobj:
-            video_url = compat_urllib_parse_unquote(mobj.group(1))
-            if 'myvideo2flash' in video_url:
-                self.report_warning(
-                    'Rewriting URL to use unencrypted rtmp:// ...',
-                    video_id)
-                video_url = video_url.replace('rtmpe://', 'rtmp://')
-
-        if not video_url:
-            # extract non rtmp videos
-            mobj = re.search('path=\'(http.*?)\' source=\'(.*?)\'', dec_data)
-            if mobj is None:
-                raise ExtractorError('unable to extract url')
-            video_url = compat_urllib_parse_unquote(mobj.group(1)) + compat_urllib_parse_unquote(mobj.group(2))
-
-        video_file = self._search_regex('source=\'(.*?)\'', dec_data, 'video file')
-        video_file = compat_urllib_parse_unquote(video_file)
-
-        if not video_file.endswith('f4m'):
-            ppath, prefix = video_file.split('.')
-            video_playpath = '%s:%s' % (prefix, ppath)
-        else:
-            video_playpath = ''
-
-        video_swfobj = self._search_regex(r'swfobject\.embedSWF\(\'(.+?)\'', webpage, 'swfobj')
-        video_swfobj = compat_urllib_parse_unquote(video_swfobj)
-
-        video_title = self._html_search_regex("<h1(?: class='globalHd')?>(.*?)</h1>",
-                                              webpage, 'title')
-
-        return {
-            'id': video_id,
-            'url': video_url,
-            'tc_url': video_url,
-            'title': video_title,
-            'ext': 'flv',
-            'play_path': video_playpath,
-            'player_url': video_swfobj,
-        }
index 35151f5274d2452be7cb508ec76e36db8fab9d9e..554dec36e62dc246ea314ac07f9cff6b3c1323fe 100644 (file)
@@ -15,7 +15,7 @@ from ..utils import (
 
 
 class NBCIE(AdobePassIE):
 
 
 class NBCIE(AdobePassIE):
-    _VALID_URL = r'https?(?P<permalink>://(?:www\.)?nbc\.com/[^/]+/video/[^/]+/(?P<id>n?\d+))'
+    _VALID_URL = r'https?(?P<permalink>://(?:www\.)?nbc\.com/(?:classic-tv/)?[^/]+/video/[^/]+/(?P<id>n?\d+))'
 
     _TESTS = [
         {
 
     _TESTS = [
         {
@@ -67,7 +67,11 @@ class NBCIE(AdobePassIE):
                 'skip_download': True,
             },
             'skip': 'Only works from US',
                 'skip_download': True,
             },
             'skip': 'Only works from US',
-        }
+        },
+        {
+            'url': 'https://www.nbc.com/classic-tv/charles-in-charge/video/charles-in-charge-pilot/n3310',
+            'only_matching': True,
+        },
     ]
 
     def _real_extract(self, url):
     ]
 
     def _real_extract(self, url):
index 255f608783edad0aa3838de028dd8ea07d9ae1b0..ddec89f2c3091c822a61d2a6790b9fc877c15829 100644 (file)
+# coding: utf-8
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
+from ..compat import (
+    compat_urllib_parse_unquote_plus
+)
 from ..utils import (
 from ..utils import (
-    int_or_none,
+    parse_duration,
     remove_end,
     unified_strdate,
     remove_end,
     unified_strdate,
+    urljoin
 )
 
 
 class NDTVIE(InfoExtractor):
 )
 
 
 class NDTVIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?ndtv\.com/video/(?:[^/]+/)+[^/?^&]+-(?P<id>\d+)'
-
-    _TEST = {
-        'url': 'http://www.ndtv.com/video/news/news/ndtv-exclusive-don-t-need-character-certificate-from-rahul-gandhi-says-arvind-kejriwal-300710',
-        'md5': '39f992dbe5fb531c395d8bbedb1e5e88',
-        'info_dict': {
-            'id': '300710',
-            'ext': 'mp4',
-            'title': "NDTV exclusive: Don't need character certificate from Rahul Gandhi, says Arvind Kejriwal",
-            'description': 'md5:ab2d4b4a6056c5cb4caa6d729deabf02',
-            'upload_date': '20131208',
-            'duration': 1327,
-            'thumbnail': r're:https?://.*\.jpg',
-        },
-    }
+    _VALID_URL = r'https?://(?:[^/]+\.)?ndtv\.com/(?:[^/]+/)*videos?/?(?:[^/]+/)*[^/?^&]+-(?P<id>\d+)'
+
+    _TESTS = [
+        {
+            'url': 'https://khabar.ndtv.com/video/show/prime-time/prime-time-ill-system-and-poor-education-468818',
+            'md5': '78efcf3880ef3fd9b83d405ca94a38eb',
+            'info_dict': {
+                'id': '468818',
+                'ext': 'mp4',
+                'title': "प्राइम टाइम: सिस्टम बीमार, स्कूल बदहाल",
+                'description': 'md5:f410512f1b49672e5695dea16ef2731d',
+                'upload_date': '20170928',
+                'duration': 2218,
+                'thumbnail': r're:https?://.*\.jpg',
+            }
+        },
+        {
+            # __filename is url
+            'url': 'http://movies.ndtv.com/videos/cracker-free-diwali-wishes-from-karan-johar-kriti-sanon-other-stars-470304',
+            'md5': 'f1d709352305b44443515ac56b45aa46',
+            'info_dict': {
+                'id': '470304',
+                'ext': 'mp4',
+                'title': "Cracker-Free Diwali Wishes From Karan Johar, Kriti Sanon & Other Stars",
+                'description': 'md5:f115bba1adf2f6433fa7c1ade5feb465',
+                'upload_date': '20171019',
+                'duration': 137,
+                'thumbnail': r're:https?://.*\.jpg',
+            }
+        },
+        {
+            'url': 'https://www.ndtv.com/video/news/news/delhi-s-air-quality-status-report-after-diwali-is-very-poor-470372',
+            'only_matching': True
+        },
+        {
+            'url': 'https://auto.ndtv.com/videos/the-cnb-daily-october-13-2017-469935',
+            'only_matching': True
+        },
+        {
+            'url': 'https://sports.ndtv.com/cricket/videos/2nd-t20i-rock-thrown-at-australia-cricket-team-bus-after-win-over-india-469764',
+            'only_matching': True
+        },
+        {
+            'url': 'http://gadgets.ndtv.com/videos/uncharted-the-lost-legacy-review-465568',
+            'only_matching': True
+        },
+        {
+            'url': 'http://profit.ndtv.com/videos/news/video-indian-economy-on-very-solid-track-international-monetary-fund-chief-470040',
+            'only_matching': True
+        },
+        {
+            'url': 'http://food.ndtv.com/video-basil-seeds-coconut-porridge-419083',
+            'only_matching': True
+        },
+        {
+            'url': 'https://doctor.ndtv.com/videos/top-health-stories-of-the-week-467396',
+            'only_matching': True
+        },
+        {
+            'url': 'https://swirlster.ndtv.com/video/how-to-make-friends-at-work-469324',
+            'only_matching': True
+        }
+    ]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
 
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
 
-        title = remove_end(self._og_search_title(webpage), ' - NDTV')
+        # '__title' does not contain extra words such as sub-site name, "Video" etc.
+        title = compat_urllib_parse_unquote_plus(
+            self._search_regex(r"__title\s*=\s*'([^']+)'", webpage, 'title', default=None) or
+            self._og_search_title(webpage))
 
         filename = self._search_regex(
 
         filename = self._search_regex(
-            r"__filename='([^']+)'", webpage, 'video filename')
-        video_url = 'http://bitcast-b.bitgravity.com/ndtvod/23372/ndtv/%s' % filename
+            r"(?:__)?filename\s*[:=]\s*'([^']+)'", webpage, 'video filename')
+        # in "movies" sub-site pages, filename is URL
+        video_url = urljoin('https://ndtvod.bc-ssl.cdn.bitgravity.com/23372/ndtv/', filename.lstrip('/'))
 
 
-        duration = int_or_none(self._search_regex(
-            r"__duration='([^']+)'", webpage, 'duration', fatal=False))
+        # "doctor" sub-site has MM:SS format
+        duration = parse_duration(self._search_regex(
+            r"(?:__)?duration\s*[:=]\s*'([^']+)'", webpage, 'duration', fatal=False))
 
 
+        # "sports", "doctor", "swirlster" sub-sites don't have 'publish-date'
         upload_date = unified_strdate(self._html_search_meta(
         upload_date = unified_strdate(self._html_search_meta(
-            'publish-date', webpage, 'upload date', fatal=False))
+            'publish-date', webpage, 'upload date', default=None) or self._html_search_meta(
+            'uploadDate', webpage, 'upload date', default=None) or self._search_regex(
+            r'datePublished"\s*:\s*"([^"]+)"', webpage, 'upload date', fatal=False))
 
         description = remove_end(self._og_search_description(webpage), ' (Read more)')
 
 
         description = remove_end(self._og_search_description(webpage), ' (Read more)')
 
index 510b1c41fd42dd3f77214fd804e9962a78e075af..310eea2cf054248260868515469962232b8562c2 100644 (file)
@@ -75,7 +75,7 @@ class NickIE(MTVServicesInfoExtractor):
 
 class NickDeIE(MTVServicesInfoExtractor):
     IE_NAME = 'nick.de'
 
 class NickDeIE(MTVServicesInfoExtractor):
     IE_NAME = 'nick.de'
-    _VALID_URL = r'https?://(?:www\.)?(?P<host>nick\.(?:de|com\.pl)|nickelodeon\.(?:nl|at))/[^/]+/(?:[^/]+/)*(?P<id>[^/?#&]+)'
+    _VALID_URL = r'https?://(?:www\.)?(?P<host>nick\.(?:de|com\.pl|ch)|nickelodeon\.(?:nl|be|at|dk|no|se))/[^/]+/(?:[^/]+/)*(?P<id>[^/?#&]+)'
     _TESTS = [{
         'url': 'http://www.nick.de/playlist/3773-top-videos/videos/episode/17306-zu-wasser-und-zu-land-rauchende-erdnusse',
         'only_matching': True,
     _TESTS = [{
         'url': 'http://www.nick.de/playlist/3773-top-videos/videos/episode/17306-zu-wasser-und-zu-land-rauchende-erdnusse',
         'only_matching': True,
@@ -91,6 +91,21 @@ class NickDeIE(MTVServicesInfoExtractor):
     }, {
         'url': 'http://www.nick.com.pl/seriale/474-spongebob-kanciastoporty/wideo/17412-teatr-to-jest-to-rodeo-oszolom',
         'only_matching': True,
     }, {
         'url': 'http://www.nick.com.pl/seriale/474-spongebob-kanciastoporty/wideo/17412-teatr-to-jest-to-rodeo-oszolom',
         'only_matching': True,
+    }, {
+        'url': 'http://www.nickelodeon.no/program/2626-bulderhuset/videoer/90947-femteklasse-veronica-vs-vanzilla',
+        'only_matching': True,
+    }, {
+        'url': 'http://www.nickelodeon.dk/serier/2626-hojs-hus/videoer/761-tissepause',
+        'only_matching': True,
+    }, {
+        'url': 'http://www.nickelodeon.se/serier/2626-lugn-i-stormen/videos/998-',
+        'only_matching': True,
+    }, {
+        'url': 'http://www.nick.ch/shows/2304-adventure-time-abenteuerzeit-mit-finn-und-jake',
+        'only_matching': True,
+    }, {
+        'url': 'http://www.nickelodeon.be/afspeellijst/4530-top-videos/videos/episode/73917-inval-broodschapper-lariekoek-arie',
+        'only_matching': True,
     }]
 
     def _extract_mrss_url(self, webpage, host):
     }]
 
     def _extract_mrss_url(self, webpage, host):
@@ -132,13 +147,28 @@ class NickNightIE(NickDeIE):
 
 class NickRuIE(MTVServicesInfoExtractor):
     IE_NAME = 'nickelodeonru'
 
 class NickRuIE(MTVServicesInfoExtractor):
     IE_NAME = 'nickelodeonru'
-    _VALID_URL = r'https?://(?:www\.)nickelodeon\.ru/(?:playlist|shows|videos)/(?:[^/]+/)*(?P<id>[^/?#&]+)'
+    _VALID_URL = r'https?://(?:www\.)nickelodeon\.(?:ru|fr|es|pt|ro|hu)/[^/]+/(?:[^/]+/)*(?P<id>[^/?#&]+)'
     _TESTS = [{
         'url': 'http://www.nickelodeon.ru/shows/henrydanger/videos/episodes/3-sezon-15-seriya-licenziya-na-polyot/pmomfb#playlist/7airc6',
         'only_matching': True,
     }, {
         'url': 'http://www.nickelodeon.ru/videos/smotri-na-nickelodeon-v-iyule/g9hvh7',
         'only_matching': True,
     _TESTS = [{
         'url': 'http://www.nickelodeon.ru/shows/henrydanger/videos/episodes/3-sezon-15-seriya-licenziya-na-polyot/pmomfb#playlist/7airc6',
         'only_matching': True,
     }, {
         'url': 'http://www.nickelodeon.ru/videos/smotri-na-nickelodeon-v-iyule/g9hvh7',
         'only_matching': True,
+    }, {
+        'url': 'http://www.nickelodeon.fr/programmes/bob-l-eponge/videos/le-marathon-de-booh-kini-bottom-mardi-31-octobre/nfn7z0',
+        'only_matching': True,
+    }, {
+        'url': 'http://www.nickelodeon.es/videos/nickelodeon-consejos-tortitas/f7w7xy',
+        'only_matching': True,
+    }, {
+        'url': 'http://www.nickelodeon.pt/series/spongebob-squarepants/videos/a-bolha-de-tinta-gigante/xutq1b',
+        'only_matching': True,
+    }, {
+        'url': 'http://www.nickelodeon.ro/emisiuni/shimmer-si-shine/video/nahal-din-bomboane/uw5u2k',
+        'only_matching': True,
+    }, {
+        'url': 'http://www.nickelodeon.hu/musorok/spongyabob-kockanadrag/videok/episodes/buborekfujas-az-elszakadt-nadrag/q57iob#playlist/k6te4y',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
     }]
 
     def _real_extract(self, url):
index 026329d3ea4210e2d17af374b67c4a87af252ee1..df7f528be2d4c8da7cfe826609608754f2f8e088 100644 (file)
@@ -40,7 +40,7 @@ class NiconicoIE(InfoExtractor):
             'uploader': 'takuya0301',
             'uploader_id': '2698420',
             'upload_date': '20131123',
             'uploader': 'takuya0301',
             'uploader_id': '2698420',
             'upload_date': '20131123',
-            'timestamp': 1385182762,
+            'timestamp': int,  # timestamp is unstable
             'description': '(c) copyright 2008, Blender Foundation / www.bigbuckbunny.org',
             'duration': 33,
             'view_count': int,
             'description': '(c) copyright 2008, Blender Foundation / www.bigbuckbunny.org',
             'duration': 33,
             'view_count': int,
@@ -115,8 +115,8 @@ class NiconicoIE(InfoExtractor):
         'skip': 'Requires an account',
     }, {
         # "New" HTML5 video
         'skip': 'Requires an account',
     }, {
         # "New" HTML5 video
+        # md5 is unstable
         'url': 'http://www.nicovideo.jp/watch/sm31464864',
         'url': 'http://www.nicovideo.jp/watch/sm31464864',
-        'md5': '351647b4917660986dc0fa8864085135',
         'info_dict': {
             'id': 'sm31464864',
             'ext': 'mp4',
         'info_dict': {
             'id': 'sm31464864',
             'ext': 'mp4',
@@ -124,7 +124,7 @@ class NiconicoIE(InfoExtractor):
             'description': 'md5:e52974af9a96e739196b2c1ca72b5feb',
             'timestamp': 1498514060,
             'upload_date': '20170626',
             'description': 'md5:e52974af9a96e739196b2c1ca72b5feb',
             'timestamp': 1498514060,
             'upload_date': '20170626',
-            'uploader': 'ゲス',
+            'uploader': 'ゲス',
             'uploader_id': '40826363',
             'thumbnail': r're:https?://.*',
             'duration': 198,
             'uploader_id': '40826363',
             'thumbnail': r're:https?://.*',
             'duration': 198,
@@ -132,6 +132,25 @@ class NiconicoIE(InfoExtractor):
             'comment_count': int,
         },
         'skip': 'Requires an account',
             'comment_count': int,
         },
         'skip': 'Requires an account',
+    }, {
+        # Video without owner
+        'url': 'http://www.nicovideo.jp/watch/sm18238488',
+        'md5': 'd265680a1f92bdcbbd2a507fc9e78a9e',
+        'info_dict': {
+            'id': 'sm18238488',
+            'ext': 'mp4',
+            'title': '【実写版】ミュータントタートルズ',
+            'description': 'md5:15df8988e47a86f9e978af2064bf6d8e',
+            'timestamp': 1341160408,
+            'upload_date': '20120701',
+            'uploader': None,
+            'uploader_id': None,
+            'thumbnail': r're:https?://.*',
+            'duration': 5271,
+            'view_count': int,
+            'comment_count': int,
+        },
+        'skip': 'Requires an account',
     }, {
         'url': 'http://sp.nicovideo.jp/watch/sm28964488?ss_pos=1&cp_in=wt_tg',
         'only_matching': True,
     }, {
         'url': 'http://sp.nicovideo.jp/watch/sm28964488?ss_pos=1&cp_in=wt_tg',
         'only_matching': True,
@@ -395,7 +414,9 @@ class NiconicoIE(InfoExtractor):
 
         webpage_url = get_video_info('watch_url') or url
 
 
         webpage_url = get_video_info('watch_url') or url
 
-        owner = api_data.get('owner', {})
+        # Note: cannot use api_data.get('owner', {}) because owner may be set to "null"
+        # in the JSON, which will cause None to be returned instead of {}.
+        owner = try_get(api_data, lambda x: x.get('owner'), dict) or {}
         uploader_id = get_video_info(['ch_id', 'user_id']) or owner.get('id')
         uploader = get_video_info(['ch_name', 'user_nickname']) or owner.get('nickname')
 
         uploader_id = get_video_info(['ch_id', 'user_id']) or owner.get('id')
         uploader = get_video_info(['ch_name', 'user_nickname']) or owner.get('nickname')
 
index ebdab8db9faa0c8911c53c5764a18456926b6a55..bdd5ff565443d7353e64427b99e0edf60e75bd0c 100644 (file)
@@ -11,7 +11,7 @@ class ParliamentLiveUKIE(InfoExtractor):
     _TESTS = [{
         'url': 'http://parliamentlive.tv/Event/Index/c1e9d44d-fd6c-4263-b50f-97ed26cc998b',
         'info_dict': {
     _TESTS = [{
         'url': 'http://parliamentlive.tv/Event/Index/c1e9d44d-fd6c-4263-b50f-97ed26cc998b',
         'info_dict': {
-            'id': 'c1e9d44d-fd6c-4263-b50f-97ed26cc998b',
+            'id': '1_af9nv9ym',
             'ext': 'mp4',
             'title': 'Home Affairs Committee',
             'uploader_id': 'FFMPEG-01',
             'ext': 'mp4',
             'title': 'Home Affairs Committee',
             'uploader_id': 'FFMPEG-01',
@@ -28,14 +28,14 @@ class ParliamentLiveUKIE(InfoExtractor):
         webpage = self._download_webpage(
             'http://vodplayer.parliamentlive.tv/?mid=' + video_id, video_id)
         widget_config = self._parse_json(self._search_regex(
         webpage = self._download_webpage(
             'http://vodplayer.parliamentlive.tv/?mid=' + video_id, video_id)
         widget_config = self._parse_json(self._search_regex(
-            r'kWidgetConfig\s*=\s*({.+});',
+            r'(?s)kWidgetConfig\s*=\s*({.+});',
             webpage, 'kaltura widget config'), video_id)
             webpage, 'kaltura widget config'), video_id)
-        kaltura_url = 'kaltura:%s:%s' % (widget_config['wid'][1:], widget_config['entry_id'])
+        kaltura_url = 'kaltura:%s:%s' % (
+            widget_config['wid'][1:], widget_config['entry_id'])
         event_title = self._download_json(
             'http://parliamentlive.tv/Event/GetShareVideo/' + video_id, video_id)['event']['title']
         return {
             '_type': 'url_transparent',
         event_title = self._download_json(
             'http://parliamentlive.tv/Event/GetShareVideo/' + video_id, video_id)['event']['title']
         return {
             '_type': 'url_transparent',
-            'id': video_id,
             'title': event_title,
             'description': '',
             'url': kaltura_url,
             'title': event_title,
             'description': '',
             'url': kaltura_url,
index 8889e4a1aaa3e41f49a63b53c010cf69d0842b1b..b51dcbe10dd14136220516c76e6cada7f70a9b0c 100644 (file)
@@ -187,7 +187,7 @@ class PBSIE(InfoExtractor):
     _VALID_URL = r'''(?x)https?://
         (?:
            # Direct video URL
     _VALID_URL = r'''(?x)https?://
         (?:
            # Direct video URL
-           (?:%s)/(?:viralplayer|video)/(?P<id>[0-9]+)/? |
+           (?:%s)/(?:(?:vir|port)alplayer|video)/(?P<id>[0-9]+)(?:[?/]|$) |
            # Article with embedded player (or direct video)
            (?:www\.)?pbs\.org/(?:[^/]+/){1,5}(?P<presumptive_id>[^/]+?)(?:\.html)?/?(?:$|[?\#]) |
            # Player
            # Article with embedded player (or direct video)
            (?:www\.)?pbs\.org/(?:[^/]+/){1,5}(?P<presumptive_id>[^/]+?)(?:\.html)?/?(?:$|[?\#]) |
            # Player
@@ -367,6 +367,10 @@ class PBSIE(InfoExtractor):
         {
             'url': 'http://watch.knpb.org/video/2365616055/',
             'only_matching': True,
         {
             'url': 'http://watch.knpb.org/video/2365616055/',
             'only_matching': True,
+        },
+        {
+            'url': 'https://player.pbs.org/portalplayer/3004638221/?uid=',
+            'only_matching': True,
         }
     ]
     _ERRORS = {
         }
     ]
     _ERRORS = {
diff --git a/youtube_dl/extractor/servus.py b/youtube_dl/extractor/servus.py
new file mode 100644 (file)
index 0000000..264e1dd
--- /dev/null
@@ -0,0 +1,43 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class ServusIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?servus\.com/(?:at|de)/p/[^/]+/(?P<id>AA-\w+|\d+-\d+)'
+    _TESTS = [{
+        'url': 'https://www.servus.com/de/p/Die-Gr%C3%BCnen-aus-Sicht-des-Volkes/AA-1T6VBU5PW1W12/',
+        'md5': '046dee641cda1c4cabe13baef3be2c1c',
+        'info_dict': {
+            'id': 'AA-1T6VBU5PW1W12',
+            'ext': 'mp4',
+            'title': 'Die Grünen aus Volkssicht',
+            'description': 'md5:052b5da1cb2cd7d562ef1f19be5a5cba',
+            'thumbnail': r're:^https?://.*\.jpg$',
+        }
+    }, {
+        'url': 'https://www.servus.com/at/p/Wie-das-Leben-beginnt/1309984137314-381415152/',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+
+        title = self._og_search_title(webpage)
+        description = self._og_search_description(webpage)
+        thumbnail = self._og_search_thumbnail(webpage)
+
+        formats = self._extract_m3u8_formats(
+            'https://stv.rbmbtnx.net/api/v1/manifests/%s.m3u8' % video_id,
+            video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls')
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'formats': formats,
+        }
index 4ca9f6b3c811f59ef11eb82d173554341f3ab66d..efcbb36a9eb5e6e2cf5884ccea3aa9c505fa8cb3 100644 (file)
@@ -2,7 +2,12 @@
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
-from ..utils import strip_or_none
+from ..utils import (
+    extract_attributes,
+    smuggle_url,
+    strip_or_none,
+    urljoin,
+)
 
 
 class SkySportsIE(InfoExtractor):
 
 
 class SkySportsIE(InfoExtractor):
@@ -22,12 +27,22 @@ class SkySportsIE(InfoExtractor):
     def _real_extract(self, url):
         video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
     def _real_extract(self, url):
         video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
+        video_data = extract_attributes(self._search_regex(
+            r'(<div.+?class="sdc-article-video__media-ooyala"[^>]+>)', webpage, 'video data'))
+
+        video_url = 'ooyala:%s' % video_data['data-video-id']
+        if video_data.get('data-token-required') == 'true':
+            token_fetch_options = self._parse_json(video_data.get('data-token-fetch-options', '{}'), video_id, fatal=False) or {}
+            token_fetch_url = token_fetch_options.get('url')
+            if token_fetch_url:
+                embed_token = self._download_webpage(urljoin(url, token_fetch_url), video_id, fatal=False)
+                if embed_token:
+                    video_url = smuggle_url(video_url, {'embed_token': embed_token.strip('"')})
 
         return {
             '_type': 'url_transparent',
             'id': video_id,
 
         return {
             '_type': 'url_transparent',
             'id': video_id,
-            'url': 'ooyala:%s' % self._search_regex(
-                r'data-video-id="([^"]+)"', webpage, 'ooyala id'),
+            'url': video_url,
             'title': self._og_search_title(webpage),
             'description': strip_or_none(self._og_search_description(webpage)),
             'ie_key': 'Ooyala',
             'title': self._og_search_title(webpage),
             'description': strip_or_none(self._og_search_description(webpage)),
             'ie_key': 'Ooyala',
index 1c6799d579523806e9912912a0f50aa187c8773b..8894f4b0c32ee58894be125dbbd62d02ca6ae8fe 100644 (file)
@@ -138,7 +138,7 @@ class SoundcloudIE(InfoExtractor):
         },
     ]
 
         },
     ]
 
-    _CLIENT_ID = 'JlZIsxg2hY5WnBgtn3jfS0UYCl0K8DOg'
+    _CLIENT_ID = 'c6CU49JDMapyrQo06UxU9xouB9ZVzqCn'
     _IPHONE_CLIENT_ID = '376f225bf427445fc4bfb6b99b72e0bf'
 
     @staticmethod
     _IPHONE_CLIENT_ID = '376f225bf427445fc4bfb6b99b72e0bf'
 
     @staticmethod
index e004e2c5ab12705c8d9ff5e12b25f53579539c72..3d78a9d76cce8aa28902111fe3f9018dafbb856a 100644 (file)
@@ -8,36 +8,49 @@ from .common import InfoExtractor
 
 class SoundgasmIE(InfoExtractor):
     IE_NAME = 'soundgasm'
 
 class SoundgasmIE(InfoExtractor):
     IE_NAME = 'soundgasm'
-    _VALID_URL = r'https?://(?:www\.)?soundgasm\.net/u/(?P<user>[0-9a-zA-Z_\-]+)/(?P<title>[0-9a-zA-Z_\-]+)'
+    _VALID_URL = r'https?://(?:www\.)?soundgasm\.net/u/(?P<user>[0-9a-zA-Z_-]+)/(?P<display_id>[0-9a-zA-Z_-]+)'
     _TEST = {
         'url': 'http://soundgasm.net/u/ytdl/Piano-sample',
         'md5': '010082a2c802c5275bb00030743e75ad',
         'info_dict': {
             'id': '88abd86ea000cafe98f96321b23cc1206cbcbcc9',
             'ext': 'm4a',
     _TEST = {
         'url': 'http://soundgasm.net/u/ytdl/Piano-sample',
         'md5': '010082a2c802c5275bb00030743e75ad',
         'info_dict': {
             'id': '88abd86ea000cafe98f96321b23cc1206cbcbcc9',
             'ext': 'm4a',
-            'title': 'ytdl_Piano-sample',
-            'description': 'Royalty Free Sample Music'
+            'title': 'Piano sample',
+            'description': 'Royalty Free Sample Music',
+            'uploader': 'ytdl',
         }
     }
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         }
     }
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
-        display_id = mobj.group('title')
-        audio_title = mobj.group('user') + '_' + mobj.group('title')
+        display_id = mobj.group('display_id')
+
         webpage = self._download_webpage(url, display_id)
         webpage = self._download_webpage(url, display_id)
+
         audio_url = self._html_search_regex(
         audio_url = self._html_search_regex(
-            r'(?s)m4a\:\s"([^"]+)"', webpage, 'audio URL')
-        audio_id = re.split(r'\/|\.', audio_url)[-2]
+            r'(?s)m4a\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
+            'audio URL', group='url')
+
+        title = self._search_regex(
+            r'<div[^>]+\bclass=["\']jp-title[^>]+>([^<]+)',
+            webpage, 'title', default=display_id)
+
         description = self._html_search_regex(
         description = self._html_search_regex(
-            r'(?s)<li>Description:\s(.*?)<\/li>', webpage, 'description',
-            fatal=False)
+            (r'(?s)<div[^>]+\bclass=["\']jp-description[^>]+>(.+?)</div>',
+             r'(?s)<li>Description:\s(.*?)<\/li>'),
+            webpage, 'description', fatal=False)
+
+        audio_id = self._search_regex(
+            r'/([^/]+)\.m4a', audio_url, 'audio id', default=display_id)
 
         return {
             'id': audio_id,
             'display_id': display_id,
             'url': audio_url,
 
         return {
             'id': audio_id,
             'display_id': display_id,
             'url': audio_url,
-            'title': audio_title,
-            'description': description
+            'vcodec': 'none',
+            'title': title,
+            'description': description,
+            'uploader': mobj.group('user'),
         }
 
 
         }
 
 
index 3394c7e6ba4713ad0c63e2579d611a0319c45add..2863e53b5a47be353ae18df446c3ffd8a95d0913 100644 (file)
@@ -3,6 +3,7 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
 import re
 
 from .common import InfoExtractor
+from ..utils import ExtractorError
 
 
 class SpankBangIE(InfoExtractor):
 
 
 class SpankBangIE(InfoExtractor):
@@ -33,6 +34,10 @@ class SpankBangIE(InfoExtractor):
         video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
 
         video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
 
+        if re.search(r'<[^>]+\bid=["\']video_removed', webpage):
+            raise ExtractorError(
+                'Video %s is not available' % video_id, expected=True)
+
         stream_key = self._html_search_regex(
             r'''var\s+stream_key\s*=\s*['"](.+?)['"]''',
             webpage, 'stream key')
         stream_key = self._html_search_regex(
             r'''var\s+stream_key\s*=\s*['"](.+?)['"]''',
             webpage, 'stream key')
diff --git a/youtube_dl/extractor/twentytwotracks.py b/youtube_dl/extractor/twentytwotracks.py
deleted file mode 100644 (file)
index d6c0ab1..0000000
+++ /dev/null
@@ -1,86 +0,0 @@
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..utils import int_or_none
-
-# 22Tracks regularly replace the audio tracks that can be streamed on their
-# site. The tracks usually expire after 1 months, so we can't add tests.
-
-
-class TwentyTwoTracksIE(InfoExtractor):
-    _VALID_URL = r'https?://22tracks\.com/(?P<city>[a-z]+)/(?P<genre>[\da-z]+)/(?P<id>\d+)'
-    IE_NAME = '22tracks:track'
-
-    _API_BASE = 'http://22tracks.com/api'
-
-    def _extract_info(self, city, genre_name, track_id=None):
-        item_id = track_id if track_id else genre_name
-
-        cities = self._download_json(
-            '%s/cities' % self._API_BASE, item_id,
-            'Downloading cities info',
-            'Unable to download cities info')
-        city_id = [x['id'] for x in cities if x['slug'] == city][0]
-
-        genres = self._download_json(
-            '%s/genres/%s' % (self._API_BASE, city_id), item_id,
-            'Downloading %s genres info' % city,
-            'Unable to download %s genres info' % city)
-        genre = [x for x in genres if x['slug'] == genre_name][0]
-        genre_id = genre['id']
-
-        tracks = self._download_json(
-            '%s/tracks/%s' % (self._API_BASE, genre_id), item_id,
-            'Downloading %s genre tracks info' % genre_name,
-            'Unable to download track info')
-
-        return [x for x in tracks if x['id'] == item_id][0] if track_id else [genre['title'], tracks]
-
-    def _get_track_url(self, filename, track_id):
-        token = self._download_json(
-            'http://22tracks.com/token.php?desktop=true&u=/128/%s' % filename,
-            track_id, 'Downloading token', 'Unable to download token')
-        return 'http://audio.22tracks.com%s?st=%s&e=%d' % (token['filename'], token['st'], token['e'])
-
-    def _extract_track_info(self, track_info, track_id):
-        download_url = self._get_track_url(track_info['filename'], track_id)
-        title = '%s - %s' % (track_info['artist'].strip(), track_info['title'].strip())
-        return {
-            'id': track_id,
-            'url': download_url,
-            'ext': 'mp3',
-            'title': title,
-            'duration': int_or_none(track_info.get('duration')),
-            'timestamp': int_or_none(track_info.get('published_at') or track_info.get('created'))
-        }
-
-    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-
-        city = mobj.group('city')
-        genre = mobj.group('genre')
-        track_id = mobj.group('id')
-
-        track_info = self._extract_info(city, genre, track_id)
-        return self._extract_track_info(track_info, track_id)
-
-
-class TwentyTwoTracksGenreIE(TwentyTwoTracksIE):
-    _VALID_URL = r'https?://22tracks\.com/(?P<city>[a-z]+)/(?P<genre>[\da-z]+)/?$'
-    IE_NAME = '22tracks:genre'
-
-    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-
-        city = mobj.group('city')
-        genre = mobj.group('genre')
-
-        genre_title, tracks = self._extract_info(city, genre)
-
-        entries = [
-            self._extract_track_info(track_info, track_info['id'])
-            for track_info in tracks]
-
-        return self.playlist_result(entries, genre, genre_title)
index c926c99a999bb88388c77d859e54c8973417b9db..fefcd28078f6c69058366fbb7b8f9e616508fbb6 100644 (file)
@@ -609,7 +609,7 @@ class TwitchClipsIE(InfoExtractor):
                 r'(?s)clipInfo\s*=\s*({.+?});', webpage, 'clip info'),
             video_id, transform_source=js_to_json)
 
                 r'(?s)clipInfo\s*=\s*({.+?});', webpage, 'clip info'),
             video_id, transform_source=js_to_json)
 
-        title = clip.get('channel_title') or self._og_search_title(webpage)
+        title = clip.get('title') or clip.get('channel_title') or self._og_search_title(webpage)
 
         formats = [{
             'url': option['source'],
 
         formats = [{
             'url': option['source'],
diff --git a/youtube_dl/extractor/unity.py b/youtube_dl/extractor/unity.py
new file mode 100644 (file)
index 0000000..73daacf
--- /dev/null
@@ -0,0 +1,32 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from .youtube import YoutubeIE
+
+
+class UnityIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?unity3d\.com/learn/tutorials/(?:[^/]+/)*(?P<id>[^/?#&]+)'
+    _TESTS = [{
+        'url': 'https://unity3d.com/learn/tutorials/topics/animation/animate-anything-mecanim',
+        'info_dict': {
+            'id': 'jWuNtik0C8E',
+            'ext': 'mp4',
+            'title': 'Live Training 22nd September 2014 -  Animate Anything',
+            'description': 'md5:e54913114bd45a554c56cdde7669636e',
+            'duration': 2893,
+            'uploader': 'Unity',
+            'uploader_id': 'Unity3D',
+            'upload_date': '20140926',
+        }
+    }, {
+        'url': 'https://unity3d.com/learn/tutorials/projects/2d-ufo-tutorial/following-player-camera?playlist=25844',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+        youtube_id = self._search_regex(
+            r'data-video-id="([_0-9a-zA-Z-]+)"',
+            webpage, 'youtube ID')
+        return self.url_result(youtube_id, ie=YoutubeIE.ie_key(), video_id=video_id)
index c3f71b45e3bbee17bbf7e1d4787ac0aa888f251b..cedb548767e84a512b8ca5e0253d81f62a8ee502 100644 (file)
@@ -412,7 +412,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
         urls = []
         # Look for embedded (iframe) Vimeo player
         for mobj in re.finditer(
         urls = []
         # Look for embedded (iframe) Vimeo player
         for mobj in re.finditer(
-                r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//player\.vimeo\.com/video/.+?)\1',
+                r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//player\.vimeo\.com/video/\d+.*?)\1',
                 webpage):
             urls.append(VimeoIE._smuggle_referrer(unescapeHTML(mobj.group('url')), url))
         PLAIN_EMBED_RE = (
                 webpage):
             urls.append(VimeoIE._smuggle_referrer(unescapeHTML(mobj.group('url')), url))
         PLAIN_EMBED_RE = (
diff --git a/youtube_dl/extractor/younow.py b/youtube_dl/extractor/younow.py
new file mode 100644 (file)
index 0000000..04dbc87
--- /dev/null
@@ -0,0 +1,202 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import itertools
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+    ExtractorError,
+    int_or_none,
+    try_get,
+)
+
+CDN_API_BASE = 'https://cdn.younow.com/php/api'
+MOMENT_URL_FORMAT = '%s/moment/fetch/id=%%s' % CDN_API_BASE
+
+
+class YouNowLiveIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?younow\.com/(?P<id>[^/?#&]+)'
+    _TEST = {
+        'url': 'https://www.younow.com/AmandaPadeezy',
+        'info_dict': {
+            'id': 'AmandaPadeezy',
+            'ext': 'mp4',
+            'is_live': True,
+            'title': 'March 26, 2017',
+            'thumbnail': r're:^https?://.*\.jpg$',
+            'tags': ['girls'],
+            'categories': ['girls'],
+            'uploader': 'AmandaPadeezy',
+            'uploader_id': '6716501',
+            'uploader_url': 'https://www.younow.com/AmandaPadeezy',
+            'creator': 'AmandaPadeezy',
+        },
+        'skip': True,
+    }
+
+    @classmethod
+    def suitable(cls, url):
+        return (False
+                if YouNowChannelIE.suitable(url) or YouNowMomentIE.suitable(url)
+                else super(YouNowLiveIE, cls).suitable(url))
+
+    def _real_extract(self, url):
+        username = self._match_id(url)
+
+        data = self._download_json(
+            'https://api.younow.com/php/api/broadcast/info/curId=0/user=%s'
+            % username, username)
+
+        if data.get('errorCode') != 0:
+            raise ExtractorError(data['errorMsg'], expected=True)
+
+        uploader = try_get(
+            data, lambda x: x['user']['profileUrlString'],
+            compat_str) or username
+
+        return {
+            'id': uploader,
+            'is_live': True,
+            'title': self._live_title(uploader),
+            'thumbnail': data.get('awsUrl'),
+            'tags': data.get('tags'),
+            'categories': data.get('tags'),
+            'uploader': uploader,
+            'uploader_id': data.get('userId'),
+            'uploader_url': 'https://www.younow.com/%s' % username,
+            'creator': uploader,
+            'view_count': int_or_none(data.get('viewers')),
+            'like_count': int_or_none(data.get('likes')),
+            'formats': [{
+                'url': '%s/broadcast/videoPath/hls=1/broadcastId=%s/channelId=%s'
+                       % (CDN_API_BASE, data['broadcastId'], data['userId']),
+                'ext': 'mp4',
+                'protocol': 'm3u8',
+            }],
+        }
+
+
+def _extract_moment(item, fatal=True):
+    moment_id = item.get('momentId')
+    if not moment_id:
+        if not fatal:
+            return
+        raise ExtractorError('Unable to extract moment id')
+
+    moment_id = compat_str(moment_id)
+
+    title = item.get('text')
+    if not title:
+        title = 'YouNow %s' % (
+            item.get('momentType') or item.get('titleType') or 'moment')
+
+    uploader = try_get(item, lambda x: x['owner']['name'], compat_str)
+    uploader_id = try_get(item, lambda x: x['owner']['userId'])
+    uploader_url = 'https://www.younow.com/%s' % uploader if uploader else None
+
+    entry = {
+        'extractor_key': 'YouNowMoment',
+        'id': moment_id,
+        'title': title,
+        'view_count': int_or_none(item.get('views')),
+        'like_count': int_or_none(item.get('likes')),
+        'timestamp': int_or_none(item.get('created')),
+        'creator': uploader,
+        'uploader': uploader,
+        'uploader_id': uploader_id,
+        'uploader_url': uploader_url,
+        'formats': [{
+            'url': 'https://hls.younow.com/momentsplaylists/live/%s/%s.m3u8'
+                   % (moment_id, moment_id),
+            'ext': 'mp4',
+            'protocol': 'm3u8_native',
+        }],
+    }
+
+    return entry
+
+
+class YouNowChannelIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?younow\.com/(?P<id>[^/]+)/channel'
+    _TEST = {
+        'url': 'https://www.younow.com/its_Kateee_/channel',
+        'info_dict': {
+            'id': '14629760',
+            'title': 'its_Kateee_ moments'
+        },
+        'playlist_mincount': 8,
+    }
+
+    def _entries(self, username, channel_id):
+        created_before = 0
+        for page_num in itertools.count(1):
+            if created_before is None:
+                break
+            info = self._download_json(
+                '%s/moment/profile/channelId=%s/createdBefore=%d/records=20'
+                % (CDN_API_BASE, channel_id, created_before), username,
+                note='Downloading moments page %d' % page_num)
+            items = info.get('items')
+            if not items or not isinstance(items, list):
+                break
+            for item in items:
+                if not isinstance(item, dict):
+                    continue
+                item_type = item.get('type')
+                if item_type == 'moment':
+                    entry = _extract_moment(item, fatal=False)
+                    if entry:
+                        yield entry
+                elif item_type == 'collection':
+                    moments = item.get('momentsIds')
+                    if isinstance(moments, list):
+                        for moment_id in moments:
+                            m = self._download_json(
+                                MOMENT_URL_FORMAT % moment_id, username,
+                                note='Downloading %s moment JSON' % moment_id,
+                                fatal=False)
+                            if m and isinstance(m, dict) and m.get('item'):
+                                entry = _extract_moment(m['item'])
+                                if entry:
+                                    yield entry
+                created_before = int_or_none(item.get('created'))
+
+    def _real_extract(self, url):
+        username = self._match_id(url)
+        channel_id = compat_str(self._download_json(
+            'https://api.younow.com/php/api/broadcast/info/curId=0/user=%s'
+            % username, username, note='Downloading user information')['userId'])
+        return self.playlist_result(
+            self._entries(username, channel_id), channel_id,
+            '%s moments' % username)
+
+
+class YouNowMomentIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?younow\.com/[^/]+/(?P<id>[^/?#&]+)'
+    _TEST = {
+        'url': 'https://www.younow.com/GABO.../20712117/36319236/3b316doc/m',
+        'md5': 'a30c70eadb9fb39a1aa3c8c0d22a0807',
+        'info_dict': {
+            'id': '20712117',
+            'ext': 'mp4',
+            'title': 'YouNow capture',
+            'view_count': int,
+            'like_count': int,
+            'timestamp': 1490432040,
+            'upload_date': '20170325',
+            'uploader': 'GABO...',
+            'uploader_id': 35917228,
+        },
+    }
+
+    @classmethod
+    def suitable(cls, url):
+        return (False
+                if YouNowChannelIE.suitable(url)
+                else super(YouNowMomentIE, cls).suitable(url))
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        item = self._download_json(MOMENT_URL_FORMAT % video_id, video_id)
+        return _extract_moment(item['item'])
index 4e8db240d3f9d141cfa457c1d941d1b5399f0c67..9943dddc13b478ce23e71c43d19f5cf18694f90b 100644 (file)
@@ -1391,7 +1391,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             )
             (["\'])
                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
             )
             (["\'])
                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
-                (?:embed|v|p)/.+?)
+                (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
             \1''', webpage)]
 
         # lazyYT YouTube embed
             \1''', webpage)]
 
         # lazyYT YouTube embed
@@ -1622,6 +1622,17 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
         # description
         description_original = video_description = get_element_by_id("eow-description", video_webpage)
         if video_description:
         # description
         description_original = video_description = get_element_by_id("eow-description", video_webpage)
         if video_description:
+
+            def replace_url(m):
+                redir_url = compat_urlparse.urljoin(url, m.group(1))
+                parsed_redir_url = compat_urllib_parse_urlparse(redir_url)
+                if re.search(r'^(?:www\.)?(?:youtube(?:-nocookie)?\.com|youtu\.be)$', parsed_redir_url.netloc) and parsed_redir_url.path == '/redirect':
+                    qs = compat_parse_qs(parsed_redir_url.query)
+                    q = qs.get('q')
+                    if q and q[0]:
+                        return q[0]
+                return redir_url
+
             description_original = video_description = re.sub(r'''(?x)
                 <a\s+
                     (?:[a-zA-Z-]+="[^"]*"\s+)*?
             description_original = video_description = re.sub(r'''(?x)
                 <a\s+
                     (?:[a-zA-Z-]+="[^"]*"\s+)*?
@@ -1630,7 +1641,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                     class="[^"]*"[^>]*>
                 [^<]+\.{3}\s*
                 </a>
                     class="[^"]*"[^>]*>
                 [^<]+\.{3}\s*
                 </a>
-            ''', lambda m: compat_urlparse.urljoin(url, m.group(1)), video_description)
+            ''', replace_url, video_description)
             video_description = clean_html(video_description)
         else:
             fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
             video_description = clean_html(video_description)
         else:
             fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
index 59fb3343582e6dfa2aab2c6a8311300bd7458141..34866a54b6efc122f4d0edb22712c503fa448ec0 100644 (file)
@@ -1835,10 +1835,20 @@ def parse_duration(s):
         days, hours, mins, secs, ms = m.groups()
     else:
         m = re.match(
         days, hours, mins, secs, ms = m.groups()
     else:
         m = re.match(
-            r'''(?ix)(?:P?T)?
+            r'''(?ix)(?:P?
+                (?:
+                    [0-9]+\s*y(?:ears?)?\s*
+                )?
+                (?:
+                    [0-9]+\s*m(?:onths?)?\s*
+                )?
+                (?:
+                    [0-9]+\s*w(?:eeks?)?\s*
+                )?
                 (?:
                     (?P<days>[0-9]+)\s*d(?:ays?)?\s*
                 )?
                 (?:
                     (?P<days>[0-9]+)\s*d(?:ays?)?\s*
                 )?
+                T)?
                 (?:
                     (?P<hours>[0-9]+)\s*h(?:ours?)?\s*
                 )?
                 (?:
                     (?P<hours>[0-9]+)\s*h(?:ours?)?\s*
                 )?
index d01ba30950a375541b7ba32fd54dd4923f6bd98e..8b67d23fee344f6192b34c6ab218577538e5575b 100644 (file)
@@ -1,3 +1,3 @@
 from __future__ import unicode_literals
 
 from __future__ import unicode_literals
 
-__version__ = '2017.10.15.1'
+__version__ = '2017.11.06'