]> Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/internetvideoarchive.py
debian/control: Add B-D on pandoc and zip. Closes: #828905.
[youtubedl] / youtube_dl / extractor / internetvideoarchive.py
1 from __future__ import unicode_literals
2
3 from .common import InfoExtractor
4 from ..compat import (
5 compat_parse_qs,
6 compat_urlparse,
7 )
8 from ..utils import (
9 determine_ext,
10 int_or_none,
11 xpath_text,
12 )
13
14
15 class InternetVideoArchiveIE(InfoExtractor):
16 _VALID_URL = r'https?://video\.internetvideoarchive\.net/(?:player|flash/players)/.*?\?.*?publishedid.*?'
17
18 _TEST = {
19 'url': 'http://video.internetvideoarchive.net/player/6/configuration.ashx?customerid=69249&publishedid=194487&reporttag=vdbetatitle&playerid=641&autolist=0&domain=www.videodetective.com&maxrate=high&minrate=low&socialplayer=false',
20 'info_dict': {
21 'id': '194487',
22 'ext': 'mp4',
23 'title': 'KICK-ASS 2',
24 'description': 'md5:c189d5b7280400630a1d3dd17eaa8d8a',
25 },
26 'params': {
27 # m3u8 download
28 'skip_download': True,
29 },
30 }
31
32 @staticmethod
33 def _build_json_url(query):
34 return 'http://video.internetvideoarchive.net/player/6/configuration.ashx?' + query
35
36 @staticmethod
37 def _build_xml_url(query):
38 return 'http://video.internetvideoarchive.net/flash/players/flashconfiguration.aspx?' + query
39
40 def _real_extract(self, url):
41 query = compat_urlparse.urlparse(url).query
42 query_dic = compat_parse_qs(query)
43 video_id = query_dic['publishedid'][0]
44
45 if '/player/' in url:
46 configuration = self._download_json(url, video_id)
47
48 # There are multiple videos in the playlist whlie only the first one
49 # matches the video played in browsers
50 video_info = configuration['playlist'][0]
51
52 formats = []
53 for source in video_info['sources']:
54 file_url = source['file']
55 if determine_ext(file_url) == 'm3u8':
56 formats.extend(self._extract_m3u8_formats(
57 file_url, video_id, ext='mp4', m3u8_id='hls'))
58 else:
59 a_format = {
60 'url': file_url,
61 }
62
63 if source.get('label') and source['label'][-4:] == ' kbs':
64 tbr = int_or_none(source['label'][:-4])
65 a_format.update({
66 'tbr': tbr,
67 'format_id': 'http-%d' % tbr,
68 })
69 formats.append(a_format)
70
71 self._sort_formats(formats)
72
73 title = video_info['title']
74 description = video_info.get('description')
75 thumbnail = video_info.get('image')
76 else:
77 configuration = self._download_xml(url, video_id)
78 formats = [{
79 'url': xpath_text(configuration, './file', 'file URL', fatal=True),
80 }]
81 thumbnail = xpath_text(configuration, './image', 'thumbnail')
82 title = 'InternetVideoArchive video %s' % video_id
83 description = None
84
85 return {
86 'id': video_id,
87 'title': title,
88 'formats': formats,
89 'thumbnail': thumbnail,
90 'description': description,
91 }