]> Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/nhl.py
Start new release.
[youtubedl] / youtube_dl / extractor / nhl.py
1 from __future__ import unicode_literals
2
3 import re
4 import json
5
6 from .common import InfoExtractor
7 from ..compat import (
8 compat_urlparse,
9 compat_urllib_parse,
10 )
11 from ..utils import (
12 unified_strdate,
13 )
14
15
16 class NHLBaseInfoExtractor(InfoExtractor):
17 @staticmethod
18 def _fix_json(json_string):
19 return json_string.replace('\\\'', '\'')
20
21 def _extract_video(self, info):
22 video_id = info['id']
23 self.report_extraction(video_id)
24
25 initial_video_url = info['publishPoint']
26 if info['formats'] == '1':
27 data = compat_urllib_parse.urlencode({
28 'type': 'fvod',
29 'path': initial_video_url.replace('.mp4', '_sd.mp4'),
30 })
31 path_url = 'http://video.nhl.com/videocenter/servlets/encryptvideopath?' + data
32 path_doc = self._download_xml(
33 path_url, video_id, 'Downloading final video url')
34 video_url = path_doc.find('path').text
35 else:
36 video_url = initial_video_url
37
38 join = compat_urlparse.urljoin
39 return {
40 'id': video_id,
41 'title': info['name'],
42 'url': video_url,
43 'description': info['description'],
44 'duration': int(info['duration']),
45 'thumbnail': join(join(video_url, '/u/'), info['bigImage']),
46 'upload_date': unified_strdate(info['releaseDate'].split('.')[0]),
47 }
48
49
50 class NHLIE(NHLBaseInfoExtractor):
51 IE_NAME = 'nhl.com'
52 _VALID_URL = r'https?://video(?P<team>\.[^.]*)?\.nhl\.com/videocenter/console(?:\?(?:.*?[?&])?)id=(?P<id>[0-9a-z-]+)'
53
54 _TESTS = [{
55 'url': 'http://video.canucks.nhl.com/videocenter/console?catid=6?id=453614',
56 'md5': 'db704a4ea09e8d3988c85e36cc892d09',
57 'info_dict': {
58 'id': '453614',
59 'ext': 'mp4',
60 'title': 'Quick clip: Weise 4-3 goal vs Flames',
61 'description': 'Dale Weise scores his first of the season to put the Canucks up 4-3.',
62 'duration': 18,
63 'upload_date': '20131006',
64 },
65 }, {
66 'url': 'http://video.nhl.com/videocenter/console?id=2014020024-628-h',
67 'md5': 'd22e82bc592f52d37d24b03531ee9696',
68 'info_dict': {
69 'id': '2014020024-628-h',
70 'ext': 'mp4',
71 'title': 'Alex Galchenyuk Goal on Ray Emery (14:40/3rd)',
72 'description': 'Home broadcast - Montreal Canadiens at Philadelphia Flyers - October 11, 2014',
73 'duration': 0,
74 'upload_date': '20141011',
75 },
76 }, {
77 'url': 'http://video.flames.nhl.com/videocenter/console?id=630616',
78 'only_matching': True,
79 }]
80
81 def _real_extract(self, url):
82 mobj = re.match(self._VALID_URL, url)
83 video_id = mobj.group('id')
84 json_url = 'http://video.nhl.com/videocenter/servlets/playlist?ids=%s&format=json' % video_id
85 data = self._download_json(
86 json_url, video_id, transform_source=self._fix_json)
87 return self._extract_video(data[0])
88
89
90 class NHLVideocenterIE(NHLBaseInfoExtractor):
91 IE_NAME = 'nhl.com:videocenter'
92 IE_DESC = 'NHL videocenter category'
93 _VALID_URL = r'https?://video\.(?P<team>[^.]*)\.nhl\.com/videocenter/(console\?.*?catid=(?P<catid>[0-9]+)(?![&?]id=).*?)?$'
94 _TEST = {
95 'url': 'http://video.canucks.nhl.com/videocenter/console?catid=999',
96 'info_dict': {
97 'id': '999',
98 'title': 'Highlights',
99 },
100 'playlist_count': 12,
101 }
102
103 def _real_extract(self, url):
104 mobj = re.match(self._VALID_URL, url)
105 team = mobj.group('team')
106 webpage = self._download_webpage(url, team)
107 cat_id = self._search_regex(
108 [r'var defaultCatId = "(.+?)";',
109 r'{statusIndex:0,index:0,.*?id:(.*?),'],
110 webpage, 'category id')
111 playlist_title = self._html_search_regex(
112 r'tab0"[^>]*?>(.*?)</td>',
113 webpage, 'playlist title', flags=re.DOTALL).lower().capitalize()
114
115 data = compat_urllib_parse.urlencode({
116 'cid': cat_id,
117 # This is the default value
118 'count': 12,
119 'ptrs': 3,
120 'format': 'json',
121 })
122 path = '/videocenter/servlets/browse?' + data
123 request_url = compat_urlparse.urljoin(url, path)
124 response = self._download_webpage(request_url, playlist_title)
125 response = self._fix_json(response)
126 if not response.strip():
127 self._downloader.report_warning('Got an empty reponse, trying '
128 'adding the "newvideos" parameter')
129 response = self._download_webpage(request_url + '&newvideos=true',
130 playlist_title)
131 response = self._fix_json(response)
132 videos = json.loads(response)
133
134 return {
135 '_type': 'playlist',
136 'title': playlist_title,
137 'id': cat_id,
138 'entries': [self._extract_video(v) for v in videos],
139 }