]> Raphaël G. Git Repositories - youtubedl/commitdiff
New upstream version 2020.06.16.1
authorRogério Brito <rbrito@ime.usp.br>
Wed, 17 Jun 2020 18:11:21 +0000 (15:11 -0300)
committerRogério Brito <rbrito@ime.usp.br>
Wed, 17 Jun 2020 18:11:21 +0000 (15:11 -0300)
ChangeLog
youtube-dl
youtube_dl/extractor/youtube.py
youtube_dl/version.py

index 234fcc50ee61287a921f45b34e340dbd05416566..07d6ccd69d61ee47f7130a24b65f0affbf15ef4d 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,10 @@
+version 2020.06.16.1
+
+Extractors
+* [youtube] Force old layout (#25682, #25683, #25680, #25686)
+* [youtube] Fix categories and improve tags extraction
+
+
 version 2020.06.16
 
 Extractors
 version 2020.06.16
 
 Extractors
index 359581fe5d24f9f78a87c3a150233680280021d2..a806646f03ba8a1dde4fb51acf947e7f9b692277 100755 (executable)
Binary files a/youtube-dl and b/youtube-dl differ
index ce2212a7caa13111e1d2835fa65be371a4937847..1bc79e01478445ede9a2b3a0f1d4ebea38ce7534 100644 (file)
@@ -77,7 +77,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
 
     def _set_language(self):
         self._set_cookie(
 
     def _set_language(self):
         self._set_cookie(
-            '.youtube.com', 'PREF', 'f1=50000000&hl=en',
+            '.youtube.com', 'PREF', 'f1=50000000&f6=8&hl=en',
             # YouTube sets the expire time to about two months
             expire_time=time.time() + 2 * 30 * 24 * 3600)
 
             # YouTube sets the expire time to about two months
             expire_time=time.time() + 2 * 30 * 24 * 3600)
 
@@ -2356,17 +2356,21 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
         m_cat_container = self._search_regex(
             r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
             video_webpage, 'categories', default=None)
         m_cat_container = self._search_regex(
             r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
             video_webpage, 'categories', default=None)
+        category = None
         if m_cat_container:
             category = self._html_search_regex(
                 r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
                 default=None)
         if m_cat_container:
             category = self._html_search_regex(
                 r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
                 default=None)
-            video_categories = None if category is None else [category]
-        else:
-            video_categories = None
+        if not category:
+            category = try_get(
+                microformat, lambda x: x['category'], compat_str)
+        video_categories = None if category is None else [category]
 
         video_tags = [
             unescapeHTML(m.group('content'))
             for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
 
         video_tags = [
             unescapeHTML(m.group('content'))
             for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
+        if not video_tags:
+            video_tags = try_get(video_details, lambda x: x['keywords'], list)
 
         def _extract_count(count_name):
             return str_to_int(self._search_regex(
 
         def _extract_count(count_name):
             return str_to_int(self._search_regex(
index 56ed71c2a61f8fd7f6368354215a2daa28b03262..6b88eb38caeff63388f753e5afecf7320269c4a5 100644 (file)
@@ -1,3 +1,3 @@
 from __future__ import unicode_literals
 
 from __future__ import unicode_literals
 
-__version__ = '2020.06.16'
+__version__ = '2020.06.16.1'