]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/archiveorg.py
   1 from __future__ 
import unicode_literals
 
   3 from .common 
import InfoExtractor
 
   4 from ..utils 
import unified_strdate
 
   7 class ArchiveOrgIE(InfoExtractor
): 
   8     IE_NAME 
= 'archive.org' 
   9     IE_DESC 
= 'archive.org videos' 
  10     _VALID_URL 
= r
'https?://(?:www\.)?archive\.org/details/(?P<id>[^?/]+)(?:[?].*)?$' 
  12         'url': 'http://archive.org/details/XD300-23_68HighlightsAResearchCntAugHumanIntellect', 
  13         'md5': '8af1d4cf447933ed3c7f4871162602db', 
  15             'id': 'XD300-23_68HighlightsAResearchCntAugHumanIntellect', 
  17             'title': '1968 Demo - FJCC Conference Presentation Reel #1', 
  18             'description': 'md5:1780b464abaca9991d8968c877bb53ed', 
  19             'upload_date': '19681210', 
  20             'uploader': 'SRI International' 
  23         'url': 'https://archive.org/details/Cops1922', 
  24         'md5': '18f2a19e6d89af8425671da1cf3d4e04', 
  28             'title': 'Buster Keaton\'s "Cops" (1922)', 
  29             'description': 'md5:70f72ee70882f713d4578725461ffcc3', 
  33     def _real_extract(self
, url
): 
  34         video_id 
= self
._match
_id
(url
) 
  36         json_url 
= url 
+ ('&' if '?' in url 
else '?') + 'output=json' 
  37         data 
= self
._download
_json
(json_url
, video_id
) 
  39         def get_optional(data_dict
, field
): 
  40             return data_dict
['metadata'].get(field
, [None])[0] 
  42         title 
= get_optional(data
, 'title') 
  43         description 
= get_optional(data
, 'description') 
  44         uploader 
= get_optional(data
, 'creator') 
  45         upload_date 
= unified_strdate(get_optional(data
, 'date')) 
  49                 'format': fdata
['format'], 
  50                 'url': 'http://' + data
['server'] + data
['dir'] + fn
, 
  51                 'file_size': int(fdata
['size']), 
  53             for fn
, fdata 
in data
['files'].items() 
  54             if 'Video' in fdata
['format']] 
  56         self
._sort
_formats
(formats
) 
  63             'description': description
, 
  65             'upload_date': upload_date
, 
  66             'thumbnail': data
.get('misc', {}).get('image'),