]> Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/internetvideoarchive.py
76cc5ec3ee21450f724564ef0c75f9c08931d2f7
[youtubedl] / youtube_dl / extractor / internetvideoarchive.py
1 from __future__ import unicode_literals
2
3 from .common import InfoExtractor
4 from ..compat import (
5 compat_parse_qs,
6 compat_urlparse,
7 )
8 from ..utils import (
9 determine_ext,
10 int_or_none,
11 xpath_text,
12 )
13
14
15 class InternetVideoArchiveIE(InfoExtractor):
16 _VALID_URL = r'https?://video\.internetvideoarchive\.net/(?:player|flash/players)/.*?\?.*?publishedid.*?'
17
18 _TEST = {
19 'url': 'http://video.internetvideoarchive.net/player/6/configuration.ashx?customerid=69249&publishedid=194487&reporttag=vdbetatitle&playerid=641&autolist=0&domain=www.videodetective.com&maxrate=high&minrate=low&socialplayer=false',
20 'info_dict': {
21 'id': '194487',
22 'ext': 'mp4',
23 'title': 'KICK-ASS 2',
24 'description': 'md5:c189d5b7280400630a1d3dd17eaa8d8a',
25 },
26 'params': {
27 # m3u8 download
28 'skip_download': True,
29 },
30 }
31
32 @staticmethod
33 def _build_json_url(query):
34 return 'http://video.internetvideoarchive.net/player/6/configuration.ashx?' + query
35
36 @staticmethod
37 def _build_xml_url(query):
38 return 'http://video.internetvideoarchive.net/flash/players/flashconfiguration.aspx?' + query
39
40 def _real_extract(self, url):
41 query = compat_urlparse.urlparse(url).query
42 query_dic = compat_parse_qs(query)
43 video_id = query_dic['publishedid'][0]
44
45 if '/player/' in url:
46 configuration = self._download_json(url, video_id)
47
48 # There are multiple videos in the playlist whlie only the first one
49 # matches the video played in browsers
50 video_info = configuration['playlist'][0]
51 title = video_info['title']
52
53 formats = []
54 for source in video_info['sources']:
55 file_url = source['file']
56 if determine_ext(file_url) == 'm3u8':
57 m3u8_formats = self._extract_m3u8_formats(
58 file_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)
59 if m3u8_formats:
60 formats.extend(m3u8_formats)
61 file_url = m3u8_formats[0]['url']
62 formats.extend(self._extract_f4m_formats(
63 file_url.replace('.m3u8', '.f4m'),
64 video_id, f4m_id='hds', fatal=False))
65 formats.extend(self._extract_mpd_formats(
66 file_url.replace('.m3u8', '.mpd'),
67 video_id, mpd_id='dash', fatal=False))
68 else:
69 a_format = {
70 'url': file_url,
71 }
72
73 if source.get('label') and source['label'][-4:] == ' kbs':
74 tbr = int_or_none(source['label'][:-4])
75 a_format.update({
76 'tbr': tbr,
77 'format_id': 'http-%d' % tbr,
78 })
79 formats.append(a_format)
80
81 self._sort_formats(formats)
82
83 description = video_info.get('description')
84 thumbnail = video_info.get('image')
85 else:
86 configuration = self._download_xml(url, video_id)
87 formats = [{
88 'url': xpath_text(configuration, './file', 'file URL', fatal=True),
89 }]
90 thumbnail = xpath_text(configuration, './image', 'thumbnail')
91 title = 'InternetVideoArchive video %s' % video_id
92 description = None
93
94 return {
95 'id': video_id,
96 'title': title,
97 'formats': formats,
98 'thumbnail': thumbnail,
99 'description': description,
100 }