]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/appletrailers.py
2 import xml
.etree
.ElementTree
4 from .common
import InfoExtractor
10 class AppleTrailersIE(InfoExtractor
):
11 _VALID_URL
= r
'https?://(?:www\.)?trailers.apple.com/trailers/(?P<company>[^/]+)/(?P<movie>[^/]+)'
13 u
"url": u
"http://trailers.apple.com/trailers/wb/manofsteel/",
16 u
"file": u
"manofsteel-trailer4.mov",
17 u
"md5": u
"11874af099d480cc09e103b189805d5f",
20 u
"thumbnail": u
"http://trailers.apple.com/trailers/wb/manofsteel/images/thumbnail_11624.jpg",
21 u
"title": u
"Trailer 4",
22 u
"upload_date": u
"20130523",
23 u
"uploader_id": u
"wb",
27 u
"file": u
"manofsteel-trailer3.mov",
28 u
"md5": u
"07a0a262aae5afe68120eed61137ab34",
31 u
"thumbnail": u
"http://trailers.apple.com/trailers/wb/manofsteel/images/thumbnail_10793.jpg",
32 u
"title": u
"Trailer 3",
33 u
"upload_date": u
"20130417",
34 u
"uploader_id": u
"wb",
38 u
"file": u
"manofsteel-trailer.mov",
39 u
"md5": u
"e401fde0813008e3307e54b6f384cff1",
42 u
"thumbnail": u
"http://trailers.apple.com/trailers/wb/manofsteel/images/thumbnail_8703.jpg",
44 u
"upload_date": u
"20121212",
45 u
"uploader_id": u
"wb",
49 u
"file": u
"manofsteel-teaser.mov",
50 u
"md5": u
"76b392f2ae9e7c98b22913c10a639c97",
53 u
"thumbnail": u
"http://trailers.apple.com/trailers/wb/manofsteel/images/thumbnail_6899.jpg",
55 u
"upload_date": u
"20120721",
56 u
"uploader_id": u
"wb",
62 def _real_extract(self
, url
):
63 mobj
= re
.match(self
._VALID
_URL
, url
)
64 movie
= mobj
.group('movie')
65 uploader_id
= mobj
.group('company')
67 playlist_url
= url
.partition(u
'?')[0] + u
'/includes/playlists/web.inc'
68 playlist_snippet
= self
._download
_webpage
(playlist_url
, movie
)
69 playlist_cleaned
= re
.sub(r
'(?s)<script>.*?</script>', u
'', playlist_snippet
)
70 playlist_html
= u
'<html>' + playlist_cleaned
+ u
'</html>'
74 doc
= xml
.etree
.ElementTree
.fromstring(playlist_html
)
76 for li
in doc
.findall('./div/ul/li'):
77 title
= li
.find('.//h3').text
78 video_id
= movie
+ '-' + re
.sub(r
'[^a-zA-Z0-9]', '', title
).lower()
79 thumbnail
= li
.find('.//img').attrib
['src']
81 date_el
= li
.find('.//p')
83 m
= re
.search(r
':\s?(?P<month>[0-9]{2})/(?P<day>[0-9]{2})/(?P<year>[0-9]{2})', date_el
.text
)
85 upload_date
= u
'20' + m
.group('year') + m
.group('month') + m
.group('day')
86 runtime_el
= date_el
.find('./br')
87 m
= re
.search(r
':\s?(?P<minutes>[0-9]+):(?P<seconds>[0-9]{1,2})', runtime_el
.tail
)
90 duration
= 60 * int(m
.group('minutes')) + int(m
.group('seconds'))
93 for formats_el
in li
.findall('.//a'):
94 if formats_el
.attrib
['class'] != 'OverlayPanel':
96 target
= formats_el
.attrib
['target']
98 format_code
= formats_el
.text
99 if 'Automatic' in format_code
:
102 size_q
= formats_el
.attrib
['href']
103 size_id
= size_q
.rpartition('#videos-')[2]
104 if size_id
not in size_cache
:
105 size_url
= url
+ size_q
106 sizepage_html
= self
._download
_webpage
(
108 note
=u
'Downloading size info %s' % size_id
,
109 errnote
=u
'Error while downloading size info %s' % size_id
,
111 _doc
= xml
.etree
.ElementTree
.fromstring(sizepage_html
)
112 size_cache
[size_id
] = _doc
114 sizepage_doc
= size_cache
[size_id
]
115 links
= sizepage_doc
.findall('.//{http://www.w3.org/1999/xhtml}ul/{http://www.w3.org/1999/xhtml}li/{http://www.w3.org/1999/xhtml}a')
117 href
= vid_a
.get('href')
118 if not href
.endswith(target
):
120 detail_q
= href
.partition('#')[0]
121 detail_url
= url
+ '/' + detail_q
123 m
= re
.match(r
'includes/(?P<detail_id>[^/]+)/', detail_q
)
124 detail_id
= m
.group('detail_id')
126 detail_html
= self
._download
_webpage
(
128 note
=u
'Downloading detail %s %s' % (detail_id
, size_id
),
129 errnote
=u
'Error while downloading detail %s %s' % (detail_id
, size_id
)
131 detail_doc
= xml
.etree
.ElementTree
.fromstring(detail_html
)
132 movie_link_el
= detail_doc
.find('.//{http://www.w3.org/1999/xhtml}a')
133 assert movie_link_el
.get('class') == 'movieLink'
134 movie_link
= movie_link_el
.get('href').partition('?')[0].replace('_', '_h')
135 ext
= determine_ext(movie_link
)
139 'format': format_code
,
150 'duration': duration
,
151 'thumbnail': thumbnail
,
152 'upload_date': upload_date
,
153 'uploader_id': uploader_id
,
154 'user_agent': 'QuickTime compatible (youtube-dl)',
156 # TODO: Remove when #980 has been merged
157 info
['url'] = formats
[-1]['url']
158 info
['ext'] = formats
[-1]['ext']
160 playlist
.append(info
)