Update upstream source from tag 'upstream/2019.09.28'

[youtubedl] / test / test_YoutubeDL.py
diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py

index 055e4255583d500805facc4fc59e296170e876e4..ce96661716c42ae0bf9c6a8ccb9ddf48c715e0a2 100644 (file)
--- a/test/test_YoutubeDL.py
+++ b/test/test_YoutubeDL.py
@@ -1,4 +1,5 @@
  #!/usr/bin/env python
  #!/usr/bin/env python
+# coding: utf-8
  
  from __future__ import unicode_literals
  
  
  from __future__ import unicode_literals
  
@@ -12,8 +13,13 @@ import copy
  
  from test.helper import FakeYDL, assertRegexpMatches
  from youtube_dl import YoutubeDL
  
  from test.helper import FakeYDL, assertRegexpMatches
  from youtube_dl import YoutubeDL
+from youtube_dl.compat import compat_str, compat_urllib_error
  from youtube_dl.extractor import YoutubeIE
  from youtube_dl.extractor import YoutubeIE
+from youtube_dl.extractor.common import InfoExtractor
  from youtube_dl.postprocessor.common import PostProcessor
  from youtube_dl.postprocessor.common import PostProcessor
+from youtube_dl.utils import ExtractorError, match_filter_func
+
+TEST_URL = 'http://localhost/sample.mp4'
  
  
  class YDL(FakeYDL):
  
  
  class YDL(FakeYDL):
@@ -35,6 +41,7 @@ def _make_result(formats, **kwargs):
          'id': 'testid',
          'title': 'testttitle',
          'extractor': 'testex',
          'id': 'testid',
          'title': 'testttitle',
          'extractor': 'testex',
+        'extractor_key': 'TestEx',
      }
      res.update(**kwargs)
      return res
      }
      res.update(**kwargs)
      return res
@@ -46,8 +53,8 @@ class TestFormatSelection(unittest.TestCase):
          ydl = YDL()
          ydl.params['prefer_free_formats'] = True
          formats = [
          ydl = YDL()
          ydl.params['prefer_free_formats'] = True
          formats = [
-            {'ext': 'webm', 'height': 460, 'url': 'x'},
-            {'ext': 'mp4', 'height': 460, 'url': 'y'},
+            {'ext': 'webm', 'height': 460, 'url': TEST_URL},
+            {'ext': 'mp4', 'height': 460, 'url': TEST_URL},
          ]
          info_dict = _make_result(formats)
          yie = YoutubeIE(ydl)
          ]
          info_dict = _make_result(formats)
          yie = YoutubeIE(ydl)
@@ -60,8 +67,8 @@ class TestFormatSelection(unittest.TestCase):
          ydl = YDL()
          ydl.params['prefer_free_formats'] = True
          formats = [
          ydl = YDL()
          ydl.params['prefer_free_formats'] = True
          formats = [
-            {'ext': 'webm', 'height': 720, 'url': 'a'},
-            {'ext': 'mp4', 'height': 1080, 'url': 'b'},
+            {'ext': 'webm', 'height': 720, 'url': TEST_URL},
+            {'ext': 'mp4', 'height': 1080, 'url': TEST_URL},
          ]
          info_dict['formats'] = formats
          yie = YoutubeIE(ydl)
          ]
          info_dict['formats'] = formats
          yie = YoutubeIE(ydl)
@@ -74,9 +81,9 @@ class TestFormatSelection(unittest.TestCase):
          ydl = YDL()
          ydl.params['prefer_free_formats'] = False
          formats = [
          ydl = YDL()
          ydl.params['prefer_free_formats'] = False
          formats = [
-            {'ext': 'webm', 'height': 720, 'url': '_'},
-            {'ext': 'mp4', 'height': 720, 'url': '_'},
-            {'ext': 'flv', 'height': 720, 'url': '_'},
+            {'ext': 'webm', 'height': 720, 'url': TEST_URL},
+            {'ext': 'mp4', 'height': 720, 'url': TEST_URL},
+            {'ext': 'flv', 'height': 720, 'url': TEST_URL},
          ]
          info_dict['formats'] = formats
          yie = YoutubeIE(ydl)
          ]
          info_dict['formats'] = formats
          yie = YoutubeIE(ydl)
@@ -88,8 +95,8 @@ class TestFormatSelection(unittest.TestCase):
          ydl = YDL()
          ydl.params['prefer_free_formats'] = False
          formats = [
          ydl = YDL()
          ydl.params['prefer_free_formats'] = False
          formats = [
-            {'ext': 'flv', 'height': 720, 'url': '_'},
-            {'ext': 'webm', 'height': 720, 'url': '_'},
+            {'ext': 'flv', 'height': 720, 'url': TEST_URL},
+            {'ext': 'webm', 'height': 720, 'url': TEST_URL},
          ]
          info_dict['formats'] = formats
          yie = YoutubeIE(ydl)
          ]
          info_dict['formats'] = formats
          yie = YoutubeIE(ydl)
@@ -98,45 +105,13 @@ class TestFormatSelection(unittest.TestCase):
          downloaded = ydl.downloaded_info_dicts[0]
          self.assertEqual(downloaded['ext'], 'flv')
  
          downloaded = ydl.downloaded_info_dicts[0]
          self.assertEqual(downloaded['ext'], 'flv')
  
-    def test_format_limit(self):
-        formats = [
-            {'format_id': 'meh', 'url': 'http://example.com/meh', 'preference': 1},
-            {'format_id': 'good', 'url': 'http://example.com/good', 'preference': 2},
-            {'format_id': 'great', 'url': 'http://example.com/great', 'preference': 3},
-            {'format_id': 'excellent', 'url': 'http://example.com/exc', 'preference': 4},
-        ]
-        info_dict = _make_result(formats)
-
-        ydl = YDL()
-        ydl.process_ie_result(info_dict)
-        downloaded = ydl.downloaded_info_dicts[0]
-        self.assertEqual(downloaded['format_id'], 'excellent')
-
-        ydl = YDL({'format_limit': 'good'})
-        assert ydl.params['format_limit'] == 'good'
-        ydl.process_ie_result(info_dict.copy())
-        downloaded = ydl.downloaded_info_dicts[0]
-        self.assertEqual(downloaded['format_id'], 'good')
-
-        ydl = YDL({'format_limit': 'great', 'format': 'all'})
-        ydl.process_ie_result(info_dict.copy())
-        self.assertEqual(ydl.downloaded_info_dicts[0]['format_id'], 'meh')
-        self.assertEqual(ydl.downloaded_info_dicts[1]['format_id'], 'good')
-        self.assertEqual(ydl.downloaded_info_dicts[2]['format_id'], 'great')
-        self.assertTrue('3' in ydl.msgs[0])
-
-        ydl = YDL()
-        ydl.params['format_limit'] = 'excellent'
-        ydl.process_ie_result(info_dict.copy())
-        downloaded = ydl.downloaded_info_dicts[0]
-        self.assertEqual(downloaded['format_id'], 'excellent')
-
      def test_format_selection(self):
          formats = [
      def test_format_selection(self):
          formats = [
-            {'format_id': '35', 'ext': 'mp4', 'preference': 1, 'url': '_'},
-            {'format_id': '45', 'ext': 'webm', 'preference': 2, 'url': '_'},
-            {'format_id': '47', 'ext': 'webm', 'preference': 3, 'url': '_'},
-            {'format_id': '2', 'ext': 'flv', 'preference': 4, 'url': '_'},
+            {'format_id': '35', 'ext': 'mp4', 'preference': 1, 'url': TEST_URL},
+            {'format_id': 'example-with-dashes', 'ext': 'webm', 'preference': 1, 'url': TEST_URL},
+            {'format_id': '45', 'ext': 'webm', 'preference': 2, 'url': TEST_URL},
+            {'format_id': '47', 'ext': 'webm', 'preference': 3, 'url': TEST_URL},
+            {'format_id': '2', 'ext': 'flv', 'preference': 4, 'url': TEST_URL},
          ]
          info_dict = _make_result(formats)
  
          ]
          info_dict = _make_result(formats)
  
@@ -165,12 +140,17 @@ class TestFormatSelection(unittest.TestCase):
          downloaded = ydl.downloaded_info_dicts[0]
          self.assertEqual(downloaded['format_id'], '35')
  
          downloaded = ydl.downloaded_info_dicts[0]
          self.assertEqual(downloaded['format_id'], '35')
  
+        ydl = YDL({'format': 'example-with-dashes'})
+        ydl.process_ie_result(info_dict.copy())
+        downloaded = ydl.downloaded_info_dicts[0]
+        self.assertEqual(downloaded['format_id'], 'example-with-dashes')
+
      def test_format_selection_audio(self):
          formats = [
      def test_format_selection_audio(self):
          formats = [
-            {'format_id': 'audio-low', 'ext': 'webm', 'preference': 1, 'vcodec': 'none', 'url': '_'},
-            {'format_id': 'audio-mid', 'ext': 'webm', 'preference': 2, 'vcodec': 'none', 'url': '_'},
-            {'format_id': 'audio-high', 'ext': 'flv', 'preference': 3, 'vcodec': 'none', 'url': '_'},
-            {'format_id': 'vid', 'ext': 'mp4', 'preference': 4, 'url': '_'},
+            {'format_id': 'audio-low', 'ext': 'webm', 'preference': 1, 'vcodec': 'none', 'url': TEST_URL},
+            {'format_id': 'audio-mid', 'ext': 'webm', 'preference': 2, 'vcodec': 'none', 'url': TEST_URL},
+            {'format_id': 'audio-high', 'ext': 'flv', 'preference': 3, 'vcodec': 'none', 'url': TEST_URL},
+            {'format_id': 'vid', 'ext': 'mp4', 'preference': 4, 'url': TEST_URL},
          ]
          info_dict = _make_result(formats)
  
          ]
          info_dict = _make_result(formats)
  
@@ -185,8 +165,8 @@ class TestFormatSelection(unittest.TestCase):
          self.assertEqual(downloaded['format_id'], 'audio-low')
  
          formats = [
          self.assertEqual(downloaded['format_id'], 'audio-low')
  
          formats = [
-            {'format_id': 'vid-low', 'ext': 'mp4', 'preference': 1, 'url': '_'},
-            {'format_id': 'vid-high', 'ext': 'mp4', 'preference': 2, 'url': '_'},
+            {'format_id': 'vid-low', 'ext': 'mp4', 'preference': 1, 'url': TEST_URL},
+            {'format_id': 'vid-high', 'ext': 'mp4', 'preference': 2, 'url': TEST_URL},
          ]
          info_dict = _make_result(formats)
  
          ]
          info_dict = _make_result(formats)
  
@@ -228,9 +208,9 @@ class TestFormatSelection(unittest.TestCase):
  
      def test_format_selection_video(self):
          formats = [
  
      def test_format_selection_video(self):
          formats = [
-            {'format_id': 'dash-video-low', 'ext': 'mp4', 'preference': 1, 'acodec': 'none', 'url': '_'},
-            {'format_id': 'dash-video-high', 'ext': 'mp4', 'preference': 2, 'acodec': 'none', 'url': '_'},
-            {'format_id': 'vid', 'ext': 'mp4', 'preference': 3, 'url': '_'},
+            {'format_id': 'dash-video-low', 'ext': 'mp4', 'preference': 1, 'acodec': 'none', 'url': TEST_URL},
+            {'format_id': 'dash-video-high', 'ext': 'mp4', 'preference': 2, 'acodec': 'none', 'url': TEST_URL},
+            {'format_id': 'vid', 'ext': 'mp4', 'preference': 3, 'url': TEST_URL},
          ]
          info_dict = _make_result(formats)
  
          ]
          info_dict = _make_result(formats)
  
@@ -244,9 +224,94 @@ class TestFormatSelection(unittest.TestCase):
          downloaded = ydl.downloaded_info_dicts[0]
          self.assertEqual(downloaded['format_id'], 'dash-video-low')
  
          downloaded = ydl.downloaded_info_dicts[0]
          self.assertEqual(downloaded['format_id'], 'dash-video-low')
  
+        ydl = YDL({'format': 'bestvideo[format_id^=dash][format_id$=low]'})
+        ydl.process_ie_result(info_dict.copy())
+        downloaded = ydl.downloaded_info_dicts[0]
+        self.assertEqual(downloaded['format_id'], 'dash-video-low')
+
+        formats = [
+            {'format_id': 'vid-vcodec-dot', 'ext': 'mp4', 'preference': 1, 'vcodec': 'avc1.123456', 'acodec': 'none', 'url': TEST_URL},
+        ]
+        info_dict = _make_result(formats)
+
+        ydl = YDL({'format': 'bestvideo[vcodec=avc1.123456]'})
+        ydl.process_ie_result(info_dict.copy())
+        downloaded = ydl.downloaded_info_dicts[0]
+        self.assertEqual(downloaded['format_id'], 'vid-vcodec-dot')
+
+    def test_format_selection_string_ops(self):
+        formats = [
+            {'format_id': 'abc-cba', 'ext': 'mp4', 'url': TEST_URL},
+            {'format_id': 'zxc-cxz', 'ext': 'webm', 'url': TEST_URL},
+        ]
+        info_dict = _make_result(formats)
+
+        # equals (=)
+        ydl = YDL({'format': '[format_id=abc-cba]'})
+        ydl.process_ie_result(info_dict.copy())
+        downloaded = ydl.downloaded_info_dicts[0]
+        self.assertEqual(downloaded['format_id'], 'abc-cba')
+
+        # does not equal (!=)
+        ydl = YDL({'format': '[format_id!=abc-cba]'})
+        ydl.process_ie_result(info_dict.copy())
+        downloaded = ydl.downloaded_info_dicts[0]
+        self.assertEqual(downloaded['format_id'], 'zxc-cxz')
+
+        ydl = YDL({'format': '[format_id!=abc-cba][format_id!=zxc-cxz]'})
+        self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy())
+
+        # starts with (^=)
+        ydl = YDL({'format': '[format_id^=abc]'})
+        ydl.process_ie_result(info_dict.copy())
+        downloaded = ydl.downloaded_info_dicts[0]
+        self.assertEqual(downloaded['format_id'], 'abc-cba')
+
+        # does not start with (!^=)
+        ydl = YDL({'format': '[format_id!^=abc]'})
+        ydl.process_ie_result(info_dict.copy())
+        downloaded = ydl.downloaded_info_dicts[0]
+        self.assertEqual(downloaded['format_id'], 'zxc-cxz')
+
+        ydl = YDL({'format': '[format_id!^=abc][format_id!^=zxc]'})
+        self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy())
+
+        # ends with ($=)
+        ydl = YDL({'format': '[format_id$=cba]'})
+        ydl.process_ie_result(info_dict.copy())
+        downloaded = ydl.downloaded_info_dicts[0]
+        self.assertEqual(downloaded['format_id'], 'abc-cba')
+
+        # does not end with (!$=)
+        ydl = YDL({'format': '[format_id!$=cba]'})
+        ydl.process_ie_result(info_dict.copy())
+        downloaded = ydl.downloaded_info_dicts[0]
+        self.assertEqual(downloaded['format_id'], 'zxc-cxz')
+
+        ydl = YDL({'format': '[format_id!$=cba][format_id!$=cxz]'})
+        self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy())
+
+        # contains (*=)
+        ydl = YDL({'format': '[format_id*=bc-cb]'})
+        ydl.process_ie_result(info_dict.copy())
+        downloaded = ydl.downloaded_info_dicts[0]
+        self.assertEqual(downloaded['format_id'], 'abc-cba')
+
+        # does not contain (!*=)
+        ydl = YDL({'format': '[format_id!*=bc-cb]'})
+        ydl.process_ie_result(info_dict.copy())
+        downloaded = ydl.downloaded_info_dicts[0]
+        self.assertEqual(downloaded['format_id'], 'zxc-cxz')
+
+        ydl = YDL({'format': '[format_id!*=abc][format_id!*=zxc]'})
+        self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy())
+
+        ydl = YDL({'format': '[format_id!*=-]'})
+        self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy())
+
      def test_youtube_format_selection(self):
          order = [
      def test_youtube_format_selection(self):
          order = [
-            '38', '37', '46', '22', '45', '35', '44', '18', '34', '43', '6', '5', '36', '17', '13',
+            '38', '37', '46', '22', '45', '35', '44', '18', '34', '43', '6', '5', '17', '36', '13',
              # Apple HTTP Live Streaming
              '96', '95', '94', '93', '92', '132', '151',
              # 3D
              # Apple HTTP Live Streaming
              '96', '95', '94', '93', '92', '132', '151',
              # 3D
@@ -258,29 +323,147 @@ class TestFormatSelection(unittest.TestCase):
              '141', '172', '140', '171', '139',
          ]
  
              '141', '172', '140', '171', '139',
          ]
  
-        for f1id, f2id in zip(order, order[1:]):
-            f1 = YoutubeIE._formats[f1id].copy()
-            f1['format_id'] = f1id
-            f1['url'] = 'url:' + f1id
-            f2 = YoutubeIE._formats[f2id].copy()
-            f2['format_id'] = f2id
-            f2['url'] = 'url:' + f2id
+        def format_info(f_id):
+            info = YoutubeIE._formats[f_id].copy()
+
+            # XXX: In real cases InfoExtractor._parse_mpd_formats() fills up 'acodec'
+            # and 'vcodec', while in tests such information is incomplete since
+            # commit a6c2c24479e5f4827ceb06f64d855329c0a6f593
+            # test_YoutubeDL.test_youtube_format_selection is broken without
+            # this fix
+            if 'acodec' in info and 'vcodec' not in info:
+                info['vcodec'] = 'none'
+            elif 'vcodec' in info and 'acodec' not in info:
+                info['acodec'] = 'none'
+
+            info['format_id'] = f_id
+            info['url'] = 'url:' + f_id
+            return info
+        formats_order = [format_info(f_id) for f_id in order]
+
+        info_dict = _make_result(list(formats_order), extractor='youtube')
+        ydl = YDL({'format': 'bestvideo+bestaudio'})
+        yie = YoutubeIE(ydl)
+        yie._sort_formats(info_dict['formats'])
+        ydl.process_ie_result(info_dict)
+        downloaded = ydl.downloaded_info_dicts[0]
+        self.assertEqual(downloaded['format_id'], '137+141')
+        self.assertEqual(downloaded['ext'], 'mp4')
+
+        info_dict = _make_result(list(formats_order), extractor='youtube')
+        ydl = YDL({'format': 'bestvideo[height>=999999]+bestaudio/best'})
+        yie = YoutubeIE(ydl)
+        yie._sort_formats(info_dict['formats'])
+        ydl.process_ie_result(info_dict)
+        downloaded = ydl.downloaded_info_dicts[0]
+        self.assertEqual(downloaded['format_id'], '38')
+
+        info_dict = _make_result(list(formats_order), extractor='youtube')
+        ydl = YDL({'format': 'bestvideo/best,bestaudio'})
+        yie = YoutubeIE(ydl)
+        yie._sort_formats(info_dict['formats'])
+        ydl.process_ie_result(info_dict)
+        downloaded_ids = [info['format_id'] for info in ydl.downloaded_info_dicts]
+        self.assertEqual(downloaded_ids, ['137', '141'])
+
+        info_dict = _make_result(list(formats_order), extractor='youtube')
+        ydl = YDL({'format': '(bestvideo[ext=mp4],bestvideo[ext=webm])+bestaudio'})
+        yie = YoutubeIE(ydl)
+        yie._sort_formats(info_dict['formats'])
+        ydl.process_ie_result(info_dict)
+        downloaded_ids = [info['format_id'] for info in ydl.downloaded_info_dicts]
+        self.assertEqual(downloaded_ids, ['137+141', '248+141'])
+
+        info_dict = _make_result(list(formats_order), extractor='youtube')
+        ydl = YDL({'format': '(bestvideo[ext=mp4],bestvideo[ext=webm])[height<=720]+bestaudio'})
+        yie = YoutubeIE(ydl)
+        yie._sort_formats(info_dict['formats'])
+        ydl.process_ie_result(info_dict)
+        downloaded_ids = [info['format_id'] for info in ydl.downloaded_info_dicts]
+        self.assertEqual(downloaded_ids, ['136+141', '247+141'])
+
+        info_dict = _make_result(list(formats_order), extractor='youtube')
+        ydl = YDL({'format': '(bestvideo[ext=none]/bestvideo[ext=webm])+bestaudio'})
+        yie = YoutubeIE(ydl)
+        yie._sort_formats(info_dict['formats'])
+        ydl.process_ie_result(info_dict)
+        downloaded_ids = [info['format_id'] for info in ydl.downloaded_info_dicts]
+        self.assertEqual(downloaded_ids, ['248+141'])
  
  
+        for f1, f2 in zip(formats_order, formats_order[1:]):
              info_dict = _make_result([f1, f2], extractor='youtube')
              info_dict = _make_result([f1, f2], extractor='youtube')
-            ydl = YDL()
+            ydl = YDL({'format': 'best/bestvideo'})
              yie = YoutubeIE(ydl)
              yie._sort_formats(info_dict['formats'])
              ydl.process_ie_result(info_dict)
              downloaded = ydl.downloaded_info_dicts[0]
              yie = YoutubeIE(ydl)
              yie._sort_formats(info_dict['formats'])
              ydl.process_ie_result(info_dict)
              downloaded = ydl.downloaded_info_dicts[0]
-            self.assertEqual(downloaded['format_id'], f1id)
+            self.assertEqual(downloaded['format_id'], f1['format_id'])
  
              info_dict = _make_result([f2, f1], extractor='youtube')
  
              info_dict = _make_result([f2, f1], extractor='youtube')
-            ydl = YDL()
+            ydl = YDL({'format': 'best/bestvideo'})
              yie = YoutubeIE(ydl)
              yie._sort_formats(info_dict['formats'])
              ydl.process_ie_result(info_dict)
              downloaded = ydl.downloaded_info_dicts[0]
              yie = YoutubeIE(ydl)
              yie._sort_formats(info_dict['formats'])
              ydl.process_ie_result(info_dict)
              downloaded = ydl.downloaded_info_dicts[0]
-            self.assertEqual(downloaded['format_id'], f1id)
+            self.assertEqual(downloaded['format_id'], f1['format_id'])
+
+    def test_audio_only_extractor_format_selection(self):
+        # For extractors with incomplete formats (all formats are audio-only or
+        # video-only) best and worst should fallback to corresponding best/worst
+        # video-only or audio-only formats (as per
+        # https://github.com/ytdl-org/youtube-dl/pull/5556)
+        formats = [
+            {'format_id': 'low', 'ext': 'mp3', 'preference': 1, 'vcodec': 'none', 'url': TEST_URL},
+            {'format_id': 'high', 'ext': 'mp3', 'preference': 2, 'vcodec': 'none', 'url': TEST_URL},
+        ]
+        info_dict = _make_result(formats)
+
+        ydl = YDL({'format': 'best'})
+        ydl.process_ie_result(info_dict.copy())
+        downloaded = ydl.downloaded_info_dicts[0]
+        self.assertEqual(downloaded['format_id'], 'high')
+
+        ydl = YDL({'format': 'worst'})
+        ydl.process_ie_result(info_dict.copy())
+        downloaded = ydl.downloaded_info_dicts[0]
+        self.assertEqual(downloaded['format_id'], 'low')
+
+    def test_format_not_available(self):
+        formats = [
+            {'format_id': 'regular', 'ext': 'mp4', 'height': 360, 'url': TEST_URL},
+            {'format_id': 'video', 'ext': 'mp4', 'height': 720, 'acodec': 'none', 'url': TEST_URL},
+        ]
+        info_dict = _make_result(formats)
+
+        # This must fail since complete video-audio format does not match filter
+        # and extractor does not provide incomplete only formats (i.e. only
+        # video-only or audio-only).
+        ydl = YDL({'format': 'best[height>360]'})
+        self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy())
+
+    def test_format_selection_issue_10083(self):
+        # See https://github.com/ytdl-org/youtube-dl/issues/10083
+        formats = [
+            {'format_id': 'regular', 'height': 360, 'url': TEST_URL},
+            {'format_id': 'video', 'height': 720, 'acodec': 'none', 'url': TEST_URL},
+            {'format_id': 'audio', 'vcodec': 'none', 'url': TEST_URL},
+        ]
+        info_dict = _make_result(formats)
+
+        ydl = YDL({'format': 'best[height>360]/bestvideo[height>360]+bestaudio'})
+        ydl.process_ie_result(info_dict.copy())
+        self.assertEqual(ydl.downloaded_info_dicts[0]['format_id'], 'video+audio')
+
+    def test_invalid_format_specs(self):
+        def assert_syntax_error(format_spec):
+            ydl = YDL({'format': format_spec})
+            info_dict = _make_result([{'format_id': 'foo', 'url': TEST_URL}])
+            self.assertRaises(SyntaxError, ydl.process_ie_result, info_dict)
+
+        assert_syntax_error('bestvideo,,best')
+        assert_syntax_error('+bestaudio')
+        assert_syntax_error('bestvideo+')
+        assert_syntax_error('/')
  
      def test_format_filtering(self):
          formats = [
  
      def test_format_filtering(self):
          formats = [
@@ -337,6 +520,37 @@ class TestFormatSelection(unittest.TestCase):
          downloaded = ydl.downloaded_info_dicts[0]
          self.assertEqual(downloaded['format_id'], 'G')
  
          downloaded = ydl.downloaded_info_dicts[0]
          self.assertEqual(downloaded['format_id'], 'G')
  
+        ydl = YDL({'format': 'all[width>=400][width<=600]'})
+        ydl.process_ie_result(info_dict)
+        downloaded_ids = [info['format_id'] for info in ydl.downloaded_info_dicts]
+        self.assertEqual(downloaded_ids, ['B', 'C', 'D'])
+
+        ydl = YDL({'format': 'best[height<40]'})
+        try:
+            ydl.process_ie_result(info_dict)
+        except ExtractorError:
+            pass
+        self.assertEqual(ydl.downloaded_info_dicts, [])
+
+    def test_default_format_spec(self):
+        ydl = YDL({'simulate': True})
+        self.assertEqual(ydl._default_format_spec({}), 'bestvideo+bestaudio/best')
+
+        ydl = YDL({})
+        self.assertEqual(ydl._default_format_spec({'is_live': True}), 'best/bestvideo+bestaudio')
+
+        ydl = YDL({'simulate': True})
+        self.assertEqual(ydl._default_format_spec({'is_live': True}), 'bestvideo+bestaudio/best')
+
+        ydl = YDL({'outtmpl': '-'})
+        self.assertEqual(ydl._default_format_spec({}), 'best/bestvideo+bestaudio')
+
+        ydl = YDL({})
+        self.assertEqual(ydl._default_format_spec({}, download=False), 'bestvideo+bestaudio/best')
+        self.assertEqual(ydl._default_format_spec({'is_live': True}), 'best/bestvideo+bestaudio')
+
+
+class TestYoutubeDL(unittest.TestCase):
      def test_subtitles(self):
          def s_formats(lang, autocaption=False):
              return [{
      def test_subtitles(self):
          def s_formats(lang, autocaption=False):
              return [{
@@ -413,6 +627,9 @@ class TestFormatSelection(unittest.TestCase):
              'id': '1234',
              'ext': 'mp4',
              'width': None,
              'id': '1234',
              'ext': 'mp4',
              'width': None,
+            'height': 1080,
+            'title1': '$PATH',
+            'title2': '%PATH%',
          }
  
          def fname(templ):
          }
  
          def fname(templ):
@@ -422,13 +639,33 @@ class TestFormatSelection(unittest.TestCase):
          self.assertEqual(fname('%(id)s-%(width)s.%(ext)s'), '1234-NA.mp4')
          # Replace missing fields with 'NA'
          self.assertEqual(fname('%(uploader_date)s-%(id)s.%(ext)s'), 'NA-1234.mp4')
          self.assertEqual(fname('%(id)s-%(width)s.%(ext)s'), '1234-NA.mp4')
          # Replace missing fields with 'NA'
          self.assertEqual(fname('%(uploader_date)s-%(id)s.%(ext)s'), 'NA-1234.mp4')
+        self.assertEqual(fname('%(height)d.%(ext)s'), '1080.mp4')
+        self.assertEqual(fname('%(height)6d.%(ext)s'), '  1080.mp4')
+        self.assertEqual(fname('%(height)-6d.%(ext)s'), '1080  .mp4')
+        self.assertEqual(fname('%(height)06d.%(ext)s'), '001080.mp4')
+        self.assertEqual(fname('%(height) 06d.%(ext)s'), ' 01080.mp4')
+        self.assertEqual(fname('%(height)   06d.%(ext)s'), ' 01080.mp4')
+        self.assertEqual(fname('%(height)0 6d.%(ext)s'), ' 01080.mp4')
+        self.assertEqual(fname('%(height)0   6d.%(ext)s'), ' 01080.mp4')
+        self.assertEqual(fname('%(height)   0   6d.%(ext)s'), ' 01080.mp4')
+        self.assertEqual(fname('%%'), '%')
+        self.assertEqual(fname('%%%%'), '%%')
+        self.assertEqual(fname('%%(height)06d.%(ext)s'), '%(height)06d.mp4')
+        self.assertEqual(fname('%(width)06d.%(ext)s'), 'NA.mp4')
+        self.assertEqual(fname('%(width)06d.%%(ext)s'), 'NA.%(ext)s')
+        self.assertEqual(fname('%%(width)06d.%(ext)s'), '%(width)06d.mp4')
+        self.assertEqual(fname('Hello %(title1)s'), 'Hello $PATH')
+        self.assertEqual(fname('Hello %(title2)s'), 'Hello %PATH%')
  
      def test_format_note(self):
          ydl = YoutubeDL()
          self.assertEqual(ydl._format_note({}), '')
          assertRegexpMatches(self, ydl._format_note({
              'vbr': 10,
  
      def test_format_note(self):
          ydl = YoutubeDL()
          self.assertEqual(ydl._format_note({}), '')
          assertRegexpMatches(self, ydl._format_note({
              'vbr': 10,
-        }), '^\s*10k$')
+        }), r'^\s*10k$')
+        assertRegexpMatches(self, ydl._format_note({
+            'fps': 30,
+        }), r'^30fps$')
  
      def test_postprocessors(self):
          filename = 'post-processor-testfile.mp4'
  
      def test_postprocessors(self):
          filename = 'post-processor-testfile.mp4'
@@ -438,27 +675,230 @@ class TestFormatSelection(unittest.TestCase):
              def run(self, info):
                  with open(audiofile, 'wt') as f:
                      f.write('EXAMPLE')
              def run(self, info):
                  with open(audiofile, 'wt') as f:
                      f.write('EXAMPLE')
-                info['filepath']
-                return False, info
+                return [info['filepath']], info
  
  
-        def run_pp(params):
+        def run_pp(params, PP):
              with open(filename, 'wt') as f:
                  f.write('EXAMPLE')
              ydl = YoutubeDL(params)
              with open(filename, 'wt') as f:
                  f.write('EXAMPLE')
              ydl = YoutubeDL(params)
-            ydl.add_post_processor(SimplePP())
+            ydl.add_post_processor(PP())
              ydl.post_process(filename, {'filepath': filename})
  
              ydl.post_process(filename, {'filepath': filename})
  
-        run_pp({'keepvideo': True})
+        run_pp({'keepvideo': True}, SimplePP)
          self.assertTrue(os.path.exists(filename), '%s doesn\'t exist' % filename)
          self.assertTrue(os.path.exists(audiofile), '%s doesn\'t exist' % audiofile)
          os.unlink(filename)
          os.unlink(audiofile)
  
          self.assertTrue(os.path.exists(filename), '%s doesn\'t exist' % filename)
          self.assertTrue(os.path.exists(audiofile), '%s doesn\'t exist' % audiofile)
          os.unlink(filename)
          os.unlink(audiofile)
  
-        run_pp({'keepvideo': False})
+        run_pp({'keepvideo': False}, SimplePP)
          self.assertFalse(os.path.exists(filename), '%s exists' % filename)
          self.assertTrue(os.path.exists(audiofile), '%s doesn\'t exist' % audiofile)
          os.unlink(audiofile)
  
          self.assertFalse(os.path.exists(filename), '%s exists' % filename)
          self.assertTrue(os.path.exists(audiofile), '%s doesn\'t exist' % audiofile)
          os.unlink(audiofile)
  
+        class ModifierPP(PostProcessor):
+            def run(self, info):
+                with open(info['filepath'], 'wt') as f:
+                    f.write('MODIFIED')
+                return [], info
+
+        run_pp({'keepvideo': False}, ModifierPP)
+        self.assertTrue(os.path.exists(filename), '%s doesn\'t exist' % filename)
+        os.unlink(filename)
+
+    def test_match_filter(self):
+        class FilterYDL(YDL):
+            def __init__(self, *args, **kwargs):
+                super(FilterYDL, self).__init__(*args, **kwargs)
+                self.params['simulate'] = True
+
+            def process_info(self, info_dict):
+                super(YDL, self).process_info(info_dict)
+
+            def _match_entry(self, info_dict, incomplete):
+                res = super(FilterYDL, self)._match_entry(info_dict, incomplete)
+                if res is None:
+                    self.downloaded_info_dicts.append(info_dict)
+                return res
+
+        first = {
+            'id': '1',
+            'url': TEST_URL,
+            'title': 'one',
+            'extractor': 'TEST',
+            'duration': 30,
+            'filesize': 10 * 1024,
+            'playlist_id': '42',
+            'uploader': "變態妍字幕版 太妍 тест",
+            'creator': "тест ' 123 ' тест--",
+        }
+        second = {
+            'id': '2',
+            'url': TEST_URL,
+            'title': 'two',
+            'extractor': 'TEST',
+            'duration': 10,
+            'description': 'foo',
+            'filesize': 5 * 1024,
+            'playlist_id': '43',
+            'uploader': "тест 123",
+        }
+        videos = [first, second]
+
+        def get_videos(filter_=None):
+            ydl = FilterYDL({'match_filter': filter_})
+            for v in videos:
+                ydl.process_ie_result(v, download=True)
+            return [v['id'] for v in ydl.downloaded_info_dicts]
+
+        res = get_videos()
+        self.assertEqual(res, ['1', '2'])
+
+        def f(v):
+            if v['id'] == '1':
+                return None
+            else:
+                return 'Video id is not 1'
+        res = get_videos(f)
+        self.assertEqual(res, ['1'])
+
+        f = match_filter_func('duration < 30')
+        res = get_videos(f)
+        self.assertEqual(res, ['2'])
+
+        f = match_filter_func('description = foo')
+        res = get_videos(f)
+        self.assertEqual(res, ['2'])
+
+        f = match_filter_func('description =? foo')
+        res = get_videos(f)
+        self.assertEqual(res, ['1', '2'])
+
+        f = match_filter_func('filesize > 5KiB')
+        res = get_videos(f)
+        self.assertEqual(res, ['1'])
+
+        f = match_filter_func('playlist_id = 42')
+        res = get_videos(f)
+        self.assertEqual(res, ['1'])
+
+        f = match_filter_func('uploader = "變態妍字幕版 太妍 тест"')
+        res = get_videos(f)
+        self.assertEqual(res, ['1'])
+
+        f = match_filter_func('uploader != "變態妍字幕版 太妍 тест"')
+        res = get_videos(f)
+        self.assertEqual(res, ['2'])
+
+        f = match_filter_func('creator = "тест \' 123 \' тест--"')
+        res = get_videos(f)
+        self.assertEqual(res, ['1'])
+
+        f = match_filter_func("creator = 'тест \\' 123 \\' тест--'")
+        res = get_videos(f)
+        self.assertEqual(res, ['1'])
+
+        f = match_filter_func(r"creator = 'тест \' 123 \' тест--' & duration > 30")
+        res = get_videos(f)
+        self.assertEqual(res, [])
+
+    def test_playlist_items_selection(self):
+        entries = [{
+            'id': compat_str(i),
+            'title': compat_str(i),
+            'url': TEST_URL,
+        } for i in range(1, 5)]
+        playlist = {
+            '_type': 'playlist',
+            'id': 'test',
+            'entries': entries,
+            'extractor': 'test:playlist',
+            'extractor_key': 'test:playlist',
+            'webpage_url': 'http://example.com',
+        }
+
+        def get_ids(params):
+            ydl = YDL(params)
+            # make a copy because the dictionary can be modified
+            ydl.process_ie_result(playlist.copy())
+            return [int(v['id']) for v in ydl.downloaded_info_dicts]
+
+        result = get_ids({})
+        self.assertEqual(result, [1, 2, 3, 4])
+
+        result = get_ids({'playlistend': 10})
+        self.assertEqual(result, [1, 2, 3, 4])
+
+        result = get_ids({'playlistend': 2})
+        self.assertEqual(result, [1, 2])
+
+        result = get_ids({'playliststart': 10})
+        self.assertEqual(result, [])
+
+        result = get_ids({'playliststart': 2})
+        self.assertEqual(result, [2, 3, 4])
+
+        result = get_ids({'playlist_items': '2-4'})
+        self.assertEqual(result, [2, 3, 4])
+
+        result = get_ids({'playlist_items': '2,4'})
+        self.assertEqual(result, [2, 4])
+
+        result = get_ids({'playlist_items': '10'})
+        self.assertEqual(result, [])
+
+        result = get_ids({'playlist_items': '3-10'})
+        self.assertEqual(result, [3, 4])
+
+        result = get_ids({'playlist_items': '2-4,3-4,3'})
+        self.assertEqual(result, [2, 3, 4])
+
+    def test_urlopen_no_file_protocol(self):
+        # see https://github.com/ytdl-org/youtube-dl/issues/8227
+        ydl = YDL()
+        self.assertRaises(compat_urllib_error.URLError, ydl.urlopen, 'file:///etc/passwd')
+
+    def test_do_not_override_ie_key_in_url_transparent(self):
+        ydl = YDL()
+
+        class Foo1IE(InfoExtractor):
+            _VALID_URL = r'foo1:'
+
+            def _real_extract(self, url):
+                return {
+                    '_type': 'url_transparent',
+                    'url': 'foo2:',
+                    'ie_key': 'Foo2',
+                    'title': 'foo1 title',
+                    'id': 'foo1_id',
+                }
+
+        class Foo2IE(InfoExtractor):
+            _VALID_URL = r'foo2:'
+
+            def _real_extract(self, url):
+                return {
+                    '_type': 'url',
+                    'url': 'foo3:',
+                    'ie_key': 'Foo3',
+                }
+
+        class Foo3IE(InfoExtractor):
+            _VALID_URL = r'foo3:'
+
+            def _real_extract(self, url):
+                return _make_result([{'url': TEST_URL}], title='foo3 title')
+
+        ydl.add_info_extractor(Foo1IE(ydl))
+        ydl.add_info_extractor(Foo2IE(ydl))
+        ydl.add_info_extractor(Foo3IE(ydl))
+        ydl.extract_info('foo1:')
+        downloaded = ydl.downloaded_info_dicts[0]
+        self.assertEqual(downloaded['url'], TEST_URL)
+        self.assertEqual(downloaded['title'], 'foo1 title')
+        self.assertEqual(downloaded['id'], 'testid')
+        self.assertEqual(downloaded['extractor'], 'testex')
+        self.assertEqual(downloaded['extractor_key'], 'TestEx')
+
  
  if __name__ == '__main__':
      unittest.main()
  
  if __name__ == '__main__':
      unittest.main()