Imported Upstream version 2013.12.23

[youtubedl] / youtube_dl / extractor / youtube.py
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py

index c992cba978441081a50e5f134a4a694c83a6685c..a68576547e85f344d7ccaa78092fc0146b2e935e 100644 (file)
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -7,20 +7,16 @@ import itertools
  import json
  import os.path
  import re
-import socket
  import string
  import struct
  import traceback
-import xml.etree.ElementTree
  import zlib
  
  from .common import InfoExtractor, SearchInfoExtractor
  from .subtitles import SubtitlesInfoExtractor
  from ..utils import (
      compat_chr,
-    compat_http_client,
      compat_parse_qs,
-    compat_urllib_error,
      compat_urllib_parse,
      compat_urllib_request,
      compat_urlparse,
@@ -29,6 +25,7 @@ from ..utils import (
      clean_html,
      get_cachedir,
      get_element_by_id,
+    get_element_by_attribute,
      ExtractorError,
      unescapeHTML,
      unified_strdate,
@@ -45,19 +42,11 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
      # If True it will raise an error if no login info is provided
      _LOGIN_REQUIRED = False
  
-    def report_lang(self):
-        """Report attempt to set language."""
-        self.to_screen(u'Setting language')
-
      def _set_language(self):
-        request = compat_urllib_request.Request(self._LANG_URL)
-        try:
-            self.report_lang()
-            compat_urllib_request.urlopen(request).read()
-        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
-            self._downloader.report_warning(u'unable to set language: %s' % compat_str(err))
-            return False
-        return True
+        return bool(self._download_webpage(
+            self._LANG_URL, None,
+            note=u'Setting language', errnote='unable to set language',
+            fatal=False))
  
      def _login(self):
          (username, password) = self._get_login_info()
@@ -67,12 +56,12 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
                  raise ExtractorError(u'No login info available, needed for using %s.' % self.IE_NAME, expected=True)
              return False
  
-        request = compat_urllib_request.Request(self._LOGIN_URL)
-        try:
-            login_page = compat_urllib_request.urlopen(request).read().decode('utf-8')
-        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
-            self._downloader.report_warning(u'unable to fetch login page: %s' % compat_str(err))
-            return False
+        login_page = self._download_webpage(
+            self._LOGIN_URL, None,
+            note=u'Downloading login page',
+            errnote=u'unable to fetch login page', fatal=False)
+        if login_page is False:
+            return
  
          galx = self._search_regex(r'(?s)<input.+?name="GALX".+?value="(.+?)"',
                                    login_page, u'Login GALX parameter')
@@ -102,29 +91,28 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
          # chokes on unicode
          login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items())
          login_data = compat_urllib_parse.urlencode(login_form).encode('ascii')
-        request = compat_urllib_request.Request(self._LOGIN_URL, login_data)
-        try:
-            self.report_login()
-            login_results = compat_urllib_request.urlopen(request).read().decode('utf-8')
-            if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:
-                self._downloader.report_warning(u'unable to log in: bad username or password')
-                return False
-        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
-            self._downloader.report_warning(u'unable to log in: %s' % compat_str(err))
+
+        req = compat_urllib_request.Request(self._LOGIN_URL, login_data)
+        login_results = self._download_webpage(
+            req, None,
+            note=u'Logging in', errnote=u'unable to log in', fatal=False)
+        if login_results is False:
+            return False
+        if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:
+            self._downloader.report_warning(u'unable to log in: bad username or password')
              return False
          return True
  
      def _confirm_age(self):
          age_form = {
-                'next_url':     '/',
-                'action_confirm':   'Confirm',
-                }
-        request = compat_urllib_request.Request(self._AGE_URL, compat_urllib_parse.urlencode(age_form))
-        try:
-            self.report_age_confirmation()
-            compat_urllib_request.urlopen(request).read().decode('utf-8')
-        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
-            raise ExtractorError(u'Unable to confirm age: %s' % compat_str(err))
+            'next_url': '/',
+            'action_confirm': 'Confirm',
+        }
+        req = compat_urllib_request.Request(self._AGE_URL, compat_urllib_parse.urlencode(age_form))
+
+        self._download_webpage(
+            req, None,
+            note=u'Confirming age', errnote=u'Unable to confirm age')
          return True
  
      def _real_initialize(self):
@@ -139,10 +127,10 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
  
  class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
      IE_DESC = u'YouTube.com'
-    _VALID_URL = r"""^
+    _VALID_URL = r"""(?x)^
                       (
-                         (?:https?://)?                                       # http(s):// (optional)
-                         (?:(?:(?:(?:\w+\.)?youtube(?:-nocookie)?\.com/|
+                         (?:https?://|//)?                                    # http(s):// or protocol-independent URL (optional)
+                         (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/|
                              tube\.majestyc\.net/|
                              youtube\.googleapis\.com/)                        # the various hostnames, with wildcard subdomains
                           (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
@@ -248,21 +236,21 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
          '248': 'webm',
      }
      _video_dimensions = {
-        '5': '240x400',
+        '5': '400x240',
          '6': '???',
          '13': '???',
-        '17': '144x176',
-        '18': '360x640',
-        '22': '720x1280',
-        '34': '360x640',
-        '35': '480x854',
-        '36': '240x320',
-        '37': '1080x1920',
-        '38': '3072x4096',
-        '43': '360x640',
-        '44': '480x854',
-        '45': '720x1280',
-        '46': '1080x1920',
+        '17': '176x144',
+        '18': '640x360',
+        '22': '1280x720',
+        '34': '640x360',
+        '35': '854x480',
+        '36': '320x240',
+        '37': '1920x1080',
+        '38': '4096x3072',
+        '43': '640x360',
+        '44': '854x480',
+        '45': '1280x720',
+        '46': '1920x1080',
          '82': '360p',
          '83': '480p',
          '84': '720p',
@@ -336,7 +324,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                  u"uploader": u"Philipp Hagemeister",
                  u"uploader_id": u"phihag",
                  u"upload_date": u"20121002",
-                u"description": u"test chars:  \"'/\\ä↭𝕐\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de ."
+                u"description": u"test chars:  \"'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de ."
              }
          },
          {
@@ -363,6 +351,18 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                  u"uploader_id": u"justintimberlakeVEVO"
              }
          },
+        {
+            u"url":  u"//www.YouTube.com/watch?v=yZIXLfi8CZQ",
+            u"file":  u"yZIXLfi8CZQ.mp4",
+            u"note": u"Embed-only video (#1746)",
+            u"info_dict": {
+                u"upload_date": u"20120608",
+                u"title": u"Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012",
+                u"description": u"md5:09b78bd971f1e3e289601dfba15ca4f7",
+                u"uploader": u"SET India",
+                u"uploader_id": u"setindia"
+            }
+        },
      ]
  
  
@@ -370,16 +370,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
      def suitable(cls, url):
          """Receives a URL and returns True if suitable for this IE."""
          if YoutubePlaylistIE.suitable(url): return False
-        return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
+        return re.match(cls._VALID_URL, url) is not None
  
      def __init__(self, *args, **kwargs):
          super(YoutubeIE, self).__init__(*args, **kwargs)
          self._player_cache = {}
  
-    def report_video_webpage_download(self, video_id):
-        """Report attempt to download video webpage."""
-        self.to_screen(u'%s: Downloading video webpage' % video_id)
-
      def report_video_info_webpage_download(self, video_id):
          """Report attempt to download video info webpage."""
          self.to_screen(u'%s: Downloading video info webpage' % video_id)
@@ -1019,6 +1015,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
          """Turn the encrypted s field into a working signature"""
  
          if player_url is not None:
+            if player_url.startswith(u'//'):
+                player_url = u'https:' + player_url
              try:
                  player_id = (player_url, len(s))
                  if player_id not in self._player_cache:
@@ -1098,7 +1096,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
              params = compat_urllib_parse.urlencode({
                  'lang': lang,
                  'v': video_id,
-                'fmt': self._downloader.params.get('subtitlesformat'),
+                'fmt': self._downloader.params.get('subtitlesformat', 'srt'),
                  'name': l[0].encode('utf-8'),
              })
              url = u'http://www.youtube.com/api/timedtext?' + params
@@ -1111,7 +1109,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
      def _get_available_automatic_caption(self, video_id, webpage):
          """We need the webpage for getting the captions url, pass it as an
             argument to speed up the process."""
-        sub_format = self._downloader.params.get('subtitlesformat')
+        sub_format = self._downloader.params.get('subtitlesformat', 'srt')
          self.to_screen(u'%s: Looking for automatic captions' % video_id)
          mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
          err_msg = u'Couldn\'t find automatic captions for %s' % video_id
@@ -1130,8 +1128,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                  'asrs': 1,
              })
              list_url = caption_url + '&' + list_params
-            list_page = self._download_webpage(list_url, video_id)
-            caption_list = xml.etree.ElementTree.fromstring(list_page.encode('utf-8'))
+            caption_list = self._download_xml(list_url, video_id)
              original_lang_node = caption_list.find('track')
              if original_lang_node is None or original_lang_node.attrib.get('kind') != 'asr' :
                  self._downloader.report_warning(u'Video doesn\'t have automatic captions')
@@ -1245,15 +1242,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
          video_id = self._extract_id(url)
  
          # Get video webpage
-        self.report_video_webpage_download(video_id)
          url = 'https://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id
-        request = compat_urllib_request.Request(url)
-        try:
-            video_webpage_bytes = compat_urllib_request.urlopen(request).read()
-        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
-            raise ExtractorError(u'Unable to download video webpage: %s' % compat_str(err))
-
-        video_webpage = video_webpage_bytes.decode('utf-8', 'ignore')
+        video_webpage = self._download_webpage(url, video_id)
  
          # Attempt to extract SWF player URL
          mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
@@ -1270,7 +1260,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
              # We simulate the access to the video from www.youtube.com/v/{video_id}
              # this can be viewed without login into Youtube
              data = compat_urllib_parse.urlencode({'video_id': video_id,
-                                                  'el': 'embedded',
+                                                  'el': 'player_embedded',
                                                    'gl': 'US',
                                                    'hl': 'en',
                                                    'eurl': 'https://youtube.googleapis.com/v/' + video_id,
@@ -1299,6 +1289,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
              else:
                  raise ExtractorError(u'"token" parameter not in video info for unknown reason')
  
+        if 'view_count' in video_info:
+            view_count = int(video_info['view_count'][0])
+        else:
+            view_count = None
+
          # Check for "rental" videos
          if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
              raise ExtractorError(u'"rental" videos not supported')
@@ -1348,6 +1343,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
          # description
          video_description = get_element_by_id("eow-description", video_webpage)
          if video_description:
+            video_description = re.sub(r'''(?x)
+                <a\s+
+                    (?:[a-zA-Z-]+="[^"]+"\s+)*?
+                    title="([^"]+)"\s+
+                    (?:[a-zA-Z-]+="[^"]+"\s+)*?
+                    class="yt-uix-redirect-link"\s*>
+                [^<]+
+                </a>
+            ''', r'\1', video_description)
              video_description = clean_html(video_description)
          else:
              fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
@@ -1356,6 +1360,16 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
              else:
                  video_description = u''
  
+        def _extract_count(klass):
+            count = self._search_regex(
+                r'class="%s">([\d,]+)</span>' % re.escape(klass),
+                video_webpage, klass, default=None)
+            if count is not None:
+                return int(count.replace(',', ''))
+            return None
+        like_count = _extract_count(u'likes-count')
+        dislike_count = _extract_count(u'dislikes-count')
+
          # subtitles
          video_subtitles = self.extract_subtitles(video_id, video_webpage)
  
@@ -1365,9 +1379,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
  
          if 'length_seconds' not in video_info:
              self._downloader.report_warning(u'unable to extract video duration')
-            video_duration = ''
+            video_duration = None
          else:
-            video_duration = compat_urllib_parse.unquote_plus(video_info['length_seconds'][0])
+            video_duration = int(compat_urllib_parse.unquote_plus(video_info['length_seconds'][0]))
  
          # annotations
          video_annotations = None
@@ -1487,10 +1501,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                  'age_limit':    18 if age_gate else 0,
                  'annotations':  video_annotations,
                  'webpage_url': 'https://www.youtube.com/watch?v=%s' % video_id,
+                'view_count': view_count,
+                'like_count': like_count,
+                'dislike_count': dislike_count,
              })
          return results
  
-class YoutubePlaylistIE(InfoExtractor):
+class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
      IE_DESC = u'YouTube.com playlists'
      _VALID_URL = r"""(?:
                          (?:https?://)?
@@ -1501,13 +1518,14 @@ class YoutubePlaylistIE(InfoExtractor):
                             \? (?:.*?&)*? (?:p|a|list)=
                          |  p/
                          )
-                        ((?:PL|EC|UU|FL)?[0-9A-Za-z-_]{10,})
+                        ((?:PL|EC|UU|FL|RD)?[0-9A-Za-z-_]{10,})
                          .*
                       |
-                        ((?:PL|EC|UU|FL)[0-9A-Za-z-_]{10,})
+                        ((?:PL|EC|UU|FL|RD)[0-9A-Za-z-_]{10,})
                       )"""
-    _TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/playlists/%s?max-results=%i&start-index=%i&v=2&alt=json&safeSearch=none'
-    _MAX_RESULTS = 50
+    _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s&page=%s'
+    _MORE_PAGES_INDICATOR = r'data-link-type="next"'
+    _VIDEO_RE = r'href="/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&amp;[^"]*?index=(?P<index>\d+)'
      IE_NAME = u'youtube:playlist'
  
      @classmethod
@@ -1515,6 +1533,27 @@ class YoutubePlaylistIE(InfoExtractor):
          """Receives a URL and returns True if suitable for this IE."""
          return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
  
+    def _real_initialize(self):
+        self._login()
+
+    def _ids_to_results(self, ids):
+        return [self.url_result(vid_id, 'Youtube', video_id=vid_id)
+                       for vid_id in ids]
+
+    def _extract_mix(self, playlist_id):
+        # The mixes are generated from a a single video
+        # the id of the playlist is just 'RD' + video_id
+        url = 'https://youtube.com/watch?v=%s&list=%s' % (playlist_id[-11:], playlist_id)
+        webpage = self._download_webpage(url, playlist_id, u'Downloading Youtube mix')
+        title_span = (get_element_by_attribute('class', 'title long-title', webpage) or
+            get_element_by_attribute('class', 'title ', webpage))
+        title = clean_html(title_span)
+        video_re = r'data-index="\d+".*?href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s' % re.escape(playlist_id)
+        ids = orderedSet(re.findall(video_re, webpage))
+        url_results = self._ids_to_results(ids)
+
+        return self.playlist_result(url_results, playlist_id, title)
+
      def _real_extract(self, url):
          # Extract playlist id
          mobj = re.match(self._VALID_URL, url, re.VERBOSE)
@@ -1528,45 +1567,68 @@ class YoutubePlaylistIE(InfoExtractor):
              video_id = query_dict['v'][0]
              if self._downloader.params.get('noplaylist'):
                  self.to_screen(u'Downloading just video %s because of --no-playlist' % video_id)
-                return self.url_result('https://www.youtube.com/watch?v=' + video_id, 'Youtube')
+                return self.url_result(video_id, 'Youtube', video_id=video_id)
              else:
                  self.to_screen(u'Downloading playlist PL%s - add --no-playlist to just download video %s' % (playlist_id, video_id))
  
-        # Download playlist videos from API
-        videos = []
+        if playlist_id.startswith('RD'):
+            # Mixes require a custom extraction process
+            return self._extract_mix(playlist_id)
+        if playlist_id.startswith('TL'):
+            raise ExtractorError(u'For downloading YouTube.com top lists, use '
+                u'the "yttoplist" keyword, for example "youtube-dl \'yttoplist:music:Top Tracks\'"', expected=True)
+
+        # Extract the video ids from the playlist pages
+        ids = []
  
          for page_num in itertools.count(1):
-            start_index = self._MAX_RESULTS * (page_num - 1) + 1
-            if start_index >= 1000:
-                self._downloader.report_warning(u'Max number of results reached')
-                break
-            url = self._TEMPLATE_URL % (playlist_id, self._MAX_RESULTS, start_index)
+            url = self._TEMPLATE_URL % (playlist_id, page_num)
              page = self._download_webpage(url, playlist_id, u'Downloading page #%s' % page_num)
+            matches = re.finditer(self._VIDEO_RE, page)
+            # We remove the duplicates and the link with index 0
+            # (it's not the first video of the playlist)
+            new_ids = orderedSet(m.group('id') for m in matches if m.group('index') != '0')
+            ids.extend(new_ids)
  
-            try:
-                response = json.loads(page)
-            except ValueError as err:
-                raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err))
-
-            if 'feed' not in response:
-                raise ExtractorError(u'Got a malformed response from YouTube API')
-            playlist_title = response['feed']['title']['$t']
-            if 'entry' not in response['feed']:
-                # Number of videos is a multiple of self._MAX_RESULTS
+            if re.search(self._MORE_PAGES_INDICATOR, page) is None:
                  break
  
-            for entry in response['feed']['entry']:
-                index = entry['yt$position']['$t']
-                if 'media$group' in entry and 'yt$videoid' in entry['media$group']:
-                    videos.append((
-                        index,
-                        'https://www.youtube.com/watch?v=' + entry['media$group']['yt$videoid']['$t']
-                    ))
+        playlist_title = self._og_search_title(page)
+
+        url_results = self._ids_to_results(ids)
+        return self.playlist_result(url_results, playlist_id, playlist_title)
  
-        videos = [v[1] for v in sorted(videos)]
  
-        url_results = [self.url_result(vurl, 'Youtube') for vurl in videos]
-        return [self.playlist_result(url_results, playlist_id, playlist_title)]
+class YoutubeTopListIE(YoutubePlaylistIE):
+    IE_NAME = u'youtube:toplist'
+    IE_DESC = (u'YouTube.com top lists, "yttoplist:{channel}:{list title}"'
+        u' (Example: "yttoplist:music:Top Tracks")')
+    _VALID_URL = r'yttoplist:(?P<chann>.*?):(?P<title>.*?)$'
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        channel = mobj.group('chann')
+        title = mobj.group('title')
+        query = compat_urllib_parse.urlencode({'title': title})
+        playlist_re = 'href="([^"]+?%s[^"]+?)"' % re.escape(query)
+        channel_page = self._download_webpage('https://www.youtube.com/%s' % channel, title)
+        link = self._html_search_regex(playlist_re, channel_page, u'list')
+        url = compat_urlparse.urljoin('https://www.youtube.com/', link)
+        
+        video_re = r'data-index="\d+".*?data-video-id="([0-9A-Za-z_-]{11})"'
+        ids = []
+        # sometimes the webpage doesn't contain the videos
+        # retry until we get them
+        for i in itertools.count(0):
+            msg = u'Downloading Youtube mix'
+            if i > 0:
+                msg += ', retry #%d' % i
+            webpage = self._download_webpage(url, title, msg)
+            ids = orderedSet(re.findall(video_re, webpage))
+            if ids:
+                break
+        url_results = self._ids_to_results(ids)
+        return self.playlist_result(url_results, playlist_title=title)
  
  
  class YoutubeChannelIE(InfoExtractor):
@@ -1592,26 +1654,38 @@ class YoutubeChannelIE(InfoExtractor):
          # Download channel page
          channel_id = mobj.group(1)
          video_ids = []
-
-        # Download all channel pages using the json-based channel_ajax query
-        for pagenum in itertools.count(1):
-            url = self._MORE_PAGES_URL % (pagenum, channel_id)
-            page = self._download_webpage(url, channel_id,
-                                          u'Downloading page #%s' % pagenum)
-
-            page = json.loads(page)
-
-            ids_in_page = self.extract_videos_from_page(page['content_html'])
-            video_ids.extend(ids_in_page)
-
-            if self._MORE_PAGES_INDICATOR not in page['load_more_widget_html']:
-                break
+        url = 'https://www.youtube.com/channel/%s/videos' % channel_id
+        channel_page = self._download_webpage(url, channel_id)
+        autogenerated = re.search(r'''(?x)
+                class="[^"]*?(?:
+                    channel-header-autogenerated-label|
+                    yt-channel-title-autogenerated
+                )[^"]*"''', channel_page) is not None
+
+        if autogenerated:
+            # The videos are contained in a single page
+            # the ajax pages can't be used, they are empty
+            video_ids = self.extract_videos_from_page(channel_page)
+        else:
+            # Download all channel pages using the json-based channel_ajax query
+            for pagenum in itertools.count(1):
+                url = self._MORE_PAGES_URL % (pagenum, channel_id)
+                page = self._download_webpage(url, channel_id,
+                                              u'Downloading page #%s' % pagenum)
+    
+                page = json.loads(page)
+    
+                ids_in_page = self.extract_videos_from_page(page['content_html'])
+                video_ids.extend(ids_in_page)
+    
+                if self._MORE_PAGES_INDICATOR not in page['load_more_widget_html']:
+                    break
  
          self._downloader.to_screen(u'[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids)))
  
-        urls = ['http://www.youtube.com/watch?v=%s' % id for id in video_ids]
-        url_entries = [self.url_result(eurl, 'Youtube') for eurl in urls]
-        return [self.playlist_result(url_entries, channel_id)]
+        url_entries = [self.url_result(video_id, 'Youtube', video_id=video_id)
+                       for video_id in video_ids]
+        return self.playlist_result(url_entries, channel_id)
  
  
  class YoutubeUserIE(InfoExtractor):
@@ -1643,7 +1717,7 @@ class YoutubeUserIE(InfoExtractor):
          # page by page until there are no video ids - it means we got
          # all of them.
  
-        video_ids = []
+        url_results = []
  
          for pagenum in itertools.count(0):
              start_index = pagenum * self._GDATA_PAGE_SIZE + 1
@@ -1661,10 +1735,17 @@ class YoutubeUserIE(InfoExtractor):
                  break
  
              # Extract video identifiers
-            ids_in_page = []
-            for entry in response['feed']['entry']:
-                ids_in_page.append(entry['id']['$t'].split('/')[-1])
-            video_ids.extend(ids_in_page)
+            entries = response['feed']['entry']
+            for entry in entries:
+                title = entry['title']['$t']
+                video_id = entry['id']['$t'].split('/')[-1]
+                url_results.append({
+                    '_type': 'url',
+                    'url': video_id,
+                    'ie_key': 'Youtube',
+                    'id': 'video_id',
+                    'title': title,
+                })
  
              # A little optimization - if current page is not
              # "full", ie. does not contain PAGE_SIZE video ids then
@@ -1672,12 +1753,11 @@ class YoutubeUserIE(InfoExtractor):
              # are no more ids on further pages - no need to query
              # again.
  
-            if len(ids_in_page) < self._GDATA_PAGE_SIZE:
+            if len(entries) < self._GDATA_PAGE_SIZE:
                  break
  
-        urls = ['http://www.youtube.com/watch?v=%s' % video_id for video_id in video_ids]
-        url_results = [self.url_result(rurl, 'Youtube') for rurl in urls]
-        return [self.playlist_result(url_results, playlist_title = username)]
+        return self.playlist_result(url_results, playlist_title=username)
+
  
  class YoutubeSearchIE(SearchInfoExtractor):
      IE_DESC = u'YouTube.com searches'
@@ -1686,10 +1766,6 @@ class YoutubeSearchIE(SearchInfoExtractor):
      IE_NAME = u'youtube:search'
      _SEARCH_KEY = 'ytsearch'
  
-    def report_download_page(self, query, pagenum):
-        """Report attempt to download search page with given number."""
-        self._downloader.to_screen(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))
-
      def _get_n_results(self, query, n):
          """Get a specified number of results for a query"""
  
@@ -1698,16 +1774,15 @@ class YoutubeSearchIE(SearchInfoExtractor):
          limit = n
  
          while (50 * pagenum) < limit:
-            self.report_download_page(query, pagenum+1)
              result_url = self._API_URL % (compat_urllib_parse.quote_plus(query), (50*pagenum)+1)
-            request = compat_urllib_request.Request(result_url)
-            try:
-                data = compat_urllib_request.urlopen(request).read().decode('utf-8')
-            except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
-                raise ExtractorError(u'Unable to download API page: %s' % compat_str(err))
-            api_response = json.loads(data)['data']
-
-            if not 'items' in api_response:
+            data_json = self._download_webpage(
+                result_url, video_id=u'query "%s"' % query,
+                note=u'Downloading page %s' % (pagenum + 1),
+                errnote=u'Unable to download API page')
+            data = json.loads(data_json)
+            api_response = data['data']
+
+            if 'items' not in api_response:
                  raise ExtractorError(u'[youtube] No video results')
  
              new_ids = list(video['id'] for video in api_response['items'])
@@ -1718,10 +1793,12 @@ class YoutubeSearchIE(SearchInfoExtractor):
  
          if len(video_ids) > n:
              video_ids = video_ids[:n]
-        videos = [self.url_result('http://www.youtube.com/watch?v=%s' % id, 'Youtube') for id in video_ids]
+        videos = [self.url_result(video_id, 'Youtube', video_id=video_id)
+                  for video_id in video_ids]
          return self.playlist_result(videos, query)
  
  class YoutubeSearchDateIE(YoutubeSearchIE):
+    IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
      _API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc&orderby=published'
      _SEARCH_KEY = 'ytsearchdate'
      IE_DESC = u'YouTube.com searches, newest videos first'
@@ -1748,7 +1825,6 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
      Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
      """
      _LOGIN_REQUIRED = True
-    _PAGING_STEP = 30
      # use action_load_personal_feed instead of action_load_system_feed
      _PERSONAL_FEED = False
  
@@ -1768,9 +1844,8 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
  
      def _real_extract(self, url):
          feed_entries = []
-        # The step argument is available only in 2.7 or higher
-        for i in itertools.count(0):
-            paging = i*self._PAGING_STEP
+        paging = 0
+        for i in itertools.count(1):
              info = self._download_webpage(self._FEED_TEMPLATE % paging,
                                            u'%s feed' % self._FEED_NAME,
                                            u'Downloading page %s' % i)
@@ -1778,9 +1853,12 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
              feed_html = info['feed_html']
              m_ids = re.finditer(r'"/watch\?v=(.*?)["&]', feed_html)
              ids = orderedSet(m.group(1) for m in m_ids)
-            feed_entries.extend(self.url_result(id, 'Youtube') for id in ids)
+            feed_entries.extend(
+                self.url_result(video_id, 'Youtube', video_id=video_id)
+                for video_id in ids)
              if info['paging'] is None:
                  break
+            paging = info['paging']
          return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE)
  
  class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
@@ -1800,9 +1878,15 @@ class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor):
      _VALID_URL = r'https?://www\.youtube\.com/feed/watch_later|:ytwatchlater'
      _FEED_NAME = 'watch_later'
      _PLAYLIST_TITLE = u'Youtube Watch Later'
-    _PAGING_STEP = 100
      _PERSONAL_FEED = True
  
+class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
+    IE_DESC = u'Youtube watch history, "ythistory" keyword (requires authentication)'
+    _VALID_URL = u'https?://www\.youtube\.com/feed/history|:ythistory'
+    _FEED_NAME = 'history'
+    _PERSONAL_FEED = True
+    _PLAYLIST_TITLE = u'Youtube Watch History'
+
  class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
      IE_NAME = u'youtube:favorites'
      IE_DESC = u'YouTube.com favourite videos, "ytfav" keyword (requires authentication)'