]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/archiveorg.py
   1 from __future__ 
import unicode_literals
 
   3 from .common 
import InfoExtractor
 
  10 class ArchiveOrgIE(InfoExtractor
): 
  11     IE_NAME 
= 'archive.org' 
  12     IE_DESC 
= 'archive.org videos' 
  13     _VALID_URL 
= r
'https?://(?:www\.)?archive\.org/(?:details|embed)/(?P<id>[^/?#]+)(?:[?].*)?$' 
  15         'url': 'http://archive.org/details/XD300-23_68HighlightsAResearchCntAugHumanIntellect', 
  16         'md5': '8af1d4cf447933ed3c7f4871162602db', 
  18             'id': 'XD300-23_68HighlightsAResearchCntAugHumanIntellect', 
  20             'title': '1968 Demo - FJCC Conference Presentation Reel #1', 
  21             'description': 'md5:da45c349df039f1cc8075268eb1b5c25', 
  22             'upload_date': '19681210', 
  23             'uploader': 'SRI International' 
  26         'url': 'https://archive.org/details/Cops1922', 
  27         'md5': '0869000b4ce265e8ca62738b336b268a', 
  31             'title': 'Buster Keaton\'s "Cops" (1922)', 
  32             'description': 'md5:89e7c77bf5d965dd5c0372cfb49470f6', 
  35         'url': 'http://archive.org/embed/XD300-23_68HighlightsAResearchCntAugHumanIntellect', 
  36         'only_matching': True, 
  39     def _real_extract(self
, url
): 
  40         video_id 
= self
._match
_id
(url
) 
  41         webpage 
= self
._download
_webpage
( 
  42             'http://archive.org/embed/' + video_id
, video_id
) 
  43         jwplayer_playlist 
= self
._parse
_json
(self
._search
_regex
( 
  44             r
"(?s)Play\('[^']+'\s*,\s*(\[.+\])\s*,\s*{.*?}\)", 
  45             webpage
, 'jwplayer playlist'), video_id
) 
  46         info 
= self
._parse
_jwplayer
_data
( 
  47             {'playlist': jwplayer_playlist
}, video_id
, base_url
=url
) 
  49         def get_optional(metadata
, field
): 
  50             return metadata
.get(field
, [None])[0] 
  52         metadata 
= self
._download
_json
( 
  53             'http://archive.org/details/' + video_id
, video_id
, query
={ 
  57             'title': get_optional(metadata
, 'title') or info
.get('title'), 
  58             'description': clean_html(get_optional(metadata
, 'description')), 
  60         if info
.get('_type') != 'playlist': 
  62                 'uploader': get_optional(metadata
, 'creator'), 
  63                 'upload_date': unified_strdate(get_optional(metadata
, 'date')),