- mobj = re.match(self._VALID_URL, url)
- if mobj is None:
- raise ExtractorError(u'Invalid URL: %s' % url)
- showName = mobj.group('showname')
- videoId = mobj.group('episode')
-
- self.report_extraction(videoId)
- webpage = self._download_webpage(url, videoId)
-
- videoDesc = self._html_search_regex('<meta name="description" content="([^"]*)"',
- webpage, u'description', fatal=False)
-
- imgUrl = self._html_search_regex('<meta property="og:image" content="([^"]*)"',
- webpage, u'thumbnail', fatal=False)
-
- playerUrl = self._html_search_regex('<meta property="og:video" content="([^"]*)"',
- webpage, u'player url')
-
- title = self._html_search_regex('<meta name="title" content="([^"]*)"',
- webpage, u'player url').split(' : ')[-1]
-
- configUrl = self._search_regex('config=(.*)$', playerUrl, u'config url')
- configUrl = compat_urllib_parse.unquote(configUrl)
-
- configJSON = self._download_webpage(configUrl, videoId,
- u'Downloading configuration',
- u'unable to download configuration')
-
- # Technically, it's JavaScript, not JSON
- configJSON = configJSON.replace("'", '"')
-
+ video_id = self._match_id(url)
+ webpage_req = compat_urllib_request.Request(url)
+ webpage_req.add_header('User-Agent', self._USER_AGENT)
+ webpage = self._download_webpage(webpage_req, video_id)
+
+ uploader_id = self._html_search_regex(
+ r"<h1\s+class='headline'>\s*<a\s+href='/videos/view/(.*?)'",
+ webpage, 'uploader ID', fatal=False)
+ uploader = self._html_search_regex(
+ r"<h1\s+class='headline'>(.*?)</a>",
+ webpage, 'uploader', fatal=False)
+ description = self._html_search_meta('description', webpage)
+ duration = parse_duration(self._html_search_meta('duration', webpage))
+
+ raw_title = self._html_search_meta('title', webpage, fatal=True)
+ title = raw_title.partition(' : ')[2]
+
+ config_url = compat_urllib_parse.unquote(self._html_search_regex(
+ r'''(?x)
+ (?:
+ <param\s+name="flashvars".*?\s+value="config=|
+ flashvars="config=
+ )
+ (https?://[^"&]+)
+ ''',
+ webpage, 'config URL'))
+
+ formats = []
+ ad_formats = []
+
+ def _add_format(name, cfg_url, quality):
+ cfg_req = compat_urllib_request.Request(cfg_url)
+ cfg_req.add_header('User-Agent', self._USER_AGENT)
+ config = self._download_json(
+ cfg_req, video_id,
+ 'Downloading ' + name + ' configuration',
+ 'Unable to download ' + name + ' configuration',
+ transform_source=js_to_json)
+
+ playlist = config['playlist']
+ for p in playlist:
+ if p.get('eventCategory') == 'Video':
+ ar = formats
+ elif p.get('eventCategory') == 'Video Postroll':
+ ar = ad_formats
+ else:
+ continue
+
+ ar.append({
+ 'url': p['url'],
+ 'format_id': name,
+ 'quality': quality,
+ 'http_headers': {
+ 'User-Agent': self._USER_AGENT,
+ },
+ })
+
+ _add_format('normal', config_url, quality=0)
+ hq_url = (config_url +
+ ('&hq=1' if '?' in config_url else config_url + '?hq=1'))