]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/appletrailers.py
8b191c19636087d89fe8505964292bcc6db3ba2b
   2 import xml
.etree
.ElementTree
 
   4 from .common 
import InfoExtractor
 
  10 class AppleTrailersIE(InfoExtractor
): 
  11     _VALID_URL 
= r
'https?://(?:www\.)?trailers.apple.com/trailers/(?P<company>[^/]+)/(?P<movie>[^/]+)' 
  13         u
"url": u
"http://trailers.apple.com/trailers/wb/manofsteel/", 
  16                 u
"file": u
"manofsteel-trailer4.mov", 
  17                 u
"md5": u
"11874af099d480cc09e103b189805d5f", 
  20                     u
"thumbnail": u
"http://trailers.apple.com/trailers/wb/manofsteel/images/thumbnail_11624.jpg", 
  21                     u
"title": u
"Trailer 4", 
  22                     u
"upload_date": u
"20130523", 
  23                     u
"uploader_id": u
"wb", 
  27                 u
"file": u
"manofsteel-trailer3.mov", 
  28                 u
"md5": u
"07a0a262aae5afe68120eed61137ab34", 
  31                     u
"thumbnail": u
"http://trailers.apple.com/trailers/wb/manofsteel/images/thumbnail_10793.jpg", 
  32                     u
"title": u
"Trailer 3", 
  33                     u
"upload_date": u
"20130417", 
  34                     u
"uploader_id": u
"wb", 
  38                 u
"file": u
"manofsteel-trailer.mov", 
  39                 u
"md5": u
"e401fde0813008e3307e54b6f384cff1", 
  42                     u
"thumbnail": u
"http://trailers.apple.com/trailers/wb/manofsteel/images/thumbnail_8703.jpg", 
  44                     u
"upload_date": u
"20121212", 
  45                     u
"uploader_id": u
"wb", 
  49                 u
"file": u
"manofsteel-teaser.mov", 
  50                 u
"md5": u
"76b392f2ae9e7c98b22913c10a639c97", 
  53                     u
"thumbnail": u
"http://trailers.apple.com/trailers/wb/manofsteel/images/thumbnail_6899.jpg", 
  55                     u
"upload_date": u
"20120721", 
  56                     u
"uploader_id": u
"wb", 
  62     def _real_extract(self
, url
): 
  63         mobj 
= re
.match(self
._VALID
_URL
, url
) 
  64         movie 
= mobj
.group('movie') 
  65         uploader_id 
= mobj
.group('company') 
  67         playlist_url 
= url
.partition(u
'?')[0] + u
'/includes/playlists/web.inc' 
  68         playlist_snippet 
= self
._download
_webpage
(playlist_url
, movie
) 
  69         playlist_cleaned 
= re
.sub(r
'(?s)<script>.*?</script>', u
'', playlist_snippet
) 
  70         playlist_html 
= u
'<html>' + playlist_cleaned 
+ u
'</html>' 
  74         doc 
= xml
.etree
.ElementTree
.fromstring(playlist_html
) 
  76         for li 
in doc
.findall('./div/ul/li'): 
  77             title 
= li
.find('.//h3').text
 
  78             video_id 
= movie 
+ '-' + re
.sub(r
'[^a-zA-Z0-9]', '', title
).lower() 
  79             thumbnail 
= li
.find('.//img').attrib
['src'] 
  81             date_el 
= li
.find('.//p') 
  83             m 
= re
.search(r
':\s?(?P<month>[0-9]{2})/(?P<day>[0-9]{2})/(?P<year>[0-9]{2})', date_el
.text
) 
  85                 upload_date 
= u
'20' + m
.group('year') + m
.group('month') + m
.group('day') 
  86             runtime_el 
= date_el
.find('./br') 
  87             m 
= re
.search(r
':\s?(?P<minutes>[0-9]+):(?P<seconds>[0-9]{1,2})', runtime_el
.tail
) 
  90                 duration 
= 60 * int(m
.group('minutes')) + int(m
.group('seconds')) 
  93             for formats_el 
in li
.findall('.//a'): 
  94                 if formats_el
.attrib
['class'] != 'OverlayPanel': 
  96                 target 
= formats_el
.attrib
['target'] 
  98                 format_code 
= formats_el
.text
 
  99                 if 'Automatic' in format_code
: 
 102                 size_q 
= formats_el
.attrib
['href'] 
 103                 size_id 
= size_q
.rpartition('#videos-')[2] 
 104                 if size_id 
not in size_cache
: 
 105                     size_url 
= url 
+ size_q
 
 106                     sizepage_html 
= self
._download
_webpage
( 
 108                         note
=u
'Downloading size info %s' % size_id
, 
 109                         errnote
=u
'Error while downloading size info %s' % size_id
, 
 111                     _doc 
= xml
.etree
.ElementTree
.fromstring(sizepage_html
) 
 112                     size_cache
[size_id
] = _doc
 
 114                 sizepage_doc 
= size_cache
[size_id
] 
 115                 links 
= sizepage_doc
.findall('.//{http://www.w3.org/1999/xhtml}ul/{http://www.w3.org/1999/xhtml}li/{http://www.w3.org/1999/xhtml}a') 
 117                     href 
= vid_a
.get('href') 
 118                     if not href
.endswith(target
): 
 120                     detail_q 
= href
.partition('#')[0] 
 121                     detail_url 
= url 
+ '/' + detail_q
 
 123                     m 
= re
.match(r
'includes/(?P<detail_id>[^/]+)/', detail_q
) 
 124                     detail_id 
= m
.group('detail_id') 
 126                     detail_html 
= self
._download
_webpage
( 
 128                         note
=u
'Downloading detail %s %s' % (detail_id
, size_id
), 
 129                         errnote
=u
'Error while downloading detail %s %s' % (detail_id
, size_id
) 
 131                     detail_doc 
= xml
.etree
.ElementTree
.fromstring(detail_html
) 
 132                     movie_link_el 
= detail_doc
.find('.//{http://www.w3.org/1999/xhtml}a') 
 133                     assert movie_link_el
.get('class') == 'movieLink' 
 134                     movie_link 
= movie_link_el
.get('href').partition('?')[0].replace('_', '_h') 
 135                     ext 
= determine_ext(movie_link
) 
 139                         'format': format_code
, 
 150                 'duration': duration
, 
 151                 'thumbnail': thumbnail
, 
 152                 'upload_date': upload_date
, 
 153                 'uploader_id': uploader_id
, 
 154                 'user_agent': 'QuickTime compatible (youtube-dl)', 
 156             # TODO: Remove when #980 has been merged 
 157             info
['url'] = formats
[-1]['url'] 
 158             info
['ext'] = formats
[-1]['ext'] 
 160             playlist
.append(info
)