]> Raphaƫl G. Git Repositories - youtubedl/blobdiff - youtube_dl/extractor/archiveorg.py
Merge tag 'upstream/2014.06.07'
[youtubedl] / youtube_dl / extractor / archiveorg.py
index 8bb546410f7a7486bdaa964bc724cf2c501e8851..34ce8429b121261784a1645c28e2a33cb76bcacb 100644 (file)
@@ -1,9 +1,10 @@
+from __future__ import unicode_literals
+
 import json
 import re
 
 from .common import InfoExtractor
 from ..utils import (
 import json
 import re
 
 from .common import InfoExtractor
 from ..utils import (
-    determine_ext,
     unified_strdate,
 )
 
     unified_strdate,
 )
 
@@ -13,23 +14,22 @@ class ArchiveOrgIE(InfoExtractor):
     IE_DESC = 'archive.org videos'
     _VALID_URL = r'(?:https?://)?(?:www\.)?archive\.org/details/(?P<id>[^?/]+)(?:[?].*)?$'
     _TEST = {
     IE_DESC = 'archive.org videos'
     _VALID_URL = r'(?:https?://)?(?:www\.)?archive\.org/details/(?P<id>[^?/]+)(?:[?].*)?$'
     _TEST = {
-        u"url": u"http://archive.org/details/XD300-23_68HighlightsAResearchCntAugHumanIntellect",
-        u'file': u'XD300-23_68HighlightsAResearchCntAugHumanIntellect.ogv',
-        u'md5': u'8af1d4cf447933ed3c7f4871162602db',
-        u'info_dict': {
-            u"title": u"1968 Demo - FJCC Conference Presentation Reel #1",
-            u"description": u"Reel 1 of 3: Also known as the \"Mother of All Demos\", Doug Engelbart's presentation at the Fall Joint Computer Conference in San Francisco, December 9, 1968 titled \"A Research Center for Augmenting Human Intellect.\" For this presentation, Doug and his team astonished the audience by not only relating their research, but demonstrating it live. This was the debut of the mouse, interactive computing, hypermedia, computer supported software engineering, video teleconferencing, etc. See also <a href=\"http://dougengelbart.org/firsts/dougs-1968-demo.html\" rel=\"nofollow\">Doug's 1968 Demo page</a> for more background, highlights, links, and the detailed paper published in this conference proceedings. Filmed on 3 reels: Reel 1 | <a href=\"http://www.archive.org/details/XD300-24_68HighlightsAResearchCntAugHumanIntellect\" rel=\"nofollow\">Reel 2</a> | <a href=\"http://www.archive.org/details/XD300-25_68HighlightsAResearchCntAugHumanIntellect\" rel=\"nofollow\">Reel 3</a>",
-            u"upload_date": u"19681210",
-            u"uploader": u"SRI International"
+        "url": "http://archive.org/details/XD300-23_68HighlightsAResearchCntAugHumanIntellect",
+        'file': 'XD300-23_68HighlightsAResearchCntAugHumanIntellect.ogv',
+        'md5': '8af1d4cf447933ed3c7f4871162602db',
+        'info_dict': {
+            "title": "1968 Demo - FJCC Conference Presentation Reel #1",
+            "description": "Reel 1 of 3: Also known as the \"Mother of All Demos\", Doug Engelbart's presentation at the Fall Joint Computer Conference in San Francisco, December 9, 1968 titled \"A Research Center for Augmenting Human Intellect.\" For this presentation, Doug and his team astonished the audience by not only relating their research, but demonstrating it live. This was the debut of the mouse, interactive computing, hypermedia, computer supported software engineering, video teleconferencing, etc. See also <a href=\"http://dougengelbart.org/firsts/dougs-1968-demo.html\" rel=\"nofollow\">Doug's 1968 Demo page</a> for more background, highlights, links, and the detailed paper published in this conference proceedings. Filmed on 3 reels: Reel 1 | <a href=\"http://www.archive.org/details/XD300-24_68HighlightsAResearchCntAugHumanIntellect\" rel=\"nofollow\">Reel 2</a> | <a href=\"http://www.archive.org/details/XD300-25_68HighlightsAResearchCntAugHumanIntellect\" rel=\"nofollow\">Reel 3</a>",
+            "upload_date": "19681210",
+            "uploader": "SRI International"
         }
     }
 
         }
     }
 
-
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         video_id = mobj.group('id')
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         video_id = mobj.group('id')
 
-        json_url = url + (u'?' if u'?' in url else '&') + u'output=json'
+        json_url = url + ('?' if '?' in url else '&') + 'output=json'
         json_data = self._download_webpage(json_url, video_id)
         data = json.loads(json_data)
 
         json_data = self._download_webpage(json_url, video_id)
         data = json.loads(json_data)
 
@@ -38,16 +38,16 @@ class ArchiveOrgIE(InfoExtractor):
         uploader = data['metadata']['creator'][0]
         upload_date = unified_strdate(data['metadata']['date'][0])
 
         uploader = data['metadata']['creator'][0]
         upload_date = unified_strdate(data['metadata']['date'][0])
 
-        formats = [{
+        formats = [
+            {
                 'format': fdata['format'],
                 'url': 'http://' + data['server'] + data['dir'] + fn,
                 'file_size': int(fdata['size']),
             }
                 'format': fdata['format'],
                 'url': 'http://' + data['server'] + data['dir'] + fn,
                 'file_size': int(fdata['size']),
             }
-            for fn,fdata in data['files'].items()
+            for fn, fdata in data['files'].items()
             if 'Video' in fdata['format']]
             if 'Video' in fdata['format']]
-        formats.sort(key=lambda fdata: fdata['file_size'])
-        for f in formats:
-            f['ext'] = determine_ext(f['url'])
+
+        self._sort_formats(formats)
 
         return {
             '_type': 'video',
 
         return {
             '_type': 'video',