+ # video_title from flashvars contains whitespace instead of non-ASCII (see
+ # http://www.pornhub.com/view_video.php?viewkey=1331683002), not relying
+ # on that anymore.
+ title = self._html_search_meta(
+ 'twitter:title', webpage, default=None) or self._search_regex(
+ (r'<h1[^>]+class=["\']title["\'][^>]*>(?P<title>[^<]+)',
+ r'<div[^>]+data-video-title=(["\'])(?P<title>.+?)\1',
+ r'shareTitle\s*=\s*(["\'])(?P<title>.+?)\1'),
+ webpage, 'title', group='title')
+
+ video_urls = []
+ video_urls_set = set()
+ subtitles = {}
+
+ flashvars = self._parse_json(
+ self._search_regex(
+ r'var\s+flashvars_\d+\s*=\s*({.+?});', webpage, 'flashvars', default='{}'),
+ video_id)
+ if flashvars:
+ subtitle_url = url_or_none(flashvars.get('closedCaptionsFile'))
+ if subtitle_url:
+ subtitles.setdefault('en', []).append({
+ 'url': subtitle_url,
+ 'ext': 'srt',
+ })
+ thumbnail = flashvars.get('image_url')
+ duration = int_or_none(flashvars.get('video_duration'))
+ media_definitions = flashvars.get('mediaDefinitions')
+ if isinstance(media_definitions, list):
+ for definition in media_definitions:
+ if not isinstance(definition, dict):
+ continue
+ video_url = definition.get('videoUrl')
+ if not video_url or not isinstance(video_url, compat_str):
+ continue
+ if video_url in video_urls_set:
+ continue
+ video_urls_set.add(video_url)
+ video_urls.append(
+ (video_url, int_or_none(definition.get('quality'))))
+ else:
+ thumbnail, duration = [None] * 2
+
+ if not video_urls:
+ tv_webpage = dl_webpage('tv')
+
+ assignments = self._search_regex(
+ r'(var.+?mediastring.+?)</script>', tv_webpage,
+ 'encoded url').split(';')
+
+ js_vars = {}
+
+ def parse_js_value(inp):
+ inp = re.sub(r'/\*(?:(?!\*/).)*?\*/', '', inp)
+ if '+' in inp:
+ inps = inp.split('+')
+ return functools.reduce(
+ operator.concat, map(parse_js_value, inps))
+ inp = inp.strip()
+ if inp in js_vars:
+ return js_vars[inp]
+ return remove_quotes(inp)
+
+ for assn in assignments:
+ assn = assn.strip()
+ if not assn:
+ continue
+ assn = re.sub(r'var\s+', '', assn)
+ vname, value = assn.split('=', 1)
+ js_vars[vname] = parse_js_value(value)
+
+ video_url = js_vars['mediastring']
+ if video_url not in video_urls_set:
+ video_urls.append((video_url, None))
+ video_urls_set.add(video_url)
+
+ for mobj in re.finditer(
+ r'<a[^>]+\bclass=["\']downloadBtn\b[^>]+\bhref=(["\'])(?P<url>(?:(?!\1).)+)\1',
+ webpage):
+ video_url = mobj.group('url')
+ if video_url not in video_urls_set:
+ video_urls.append((video_url, None))
+ video_urls_set.add(video_url)
+
+ formats = []
+ for video_url, height in video_urls:
+ tbr = None
+ mobj = re.search(r'(?P<height>\d+)[pP]?_(?P<tbr>\d+)[kK]', video_url)
+ if mobj:
+ if not height:
+ height = int(mobj.group('height'))
+ tbr = int(mobj.group('tbr'))
+ formats.append({
+ 'url': video_url,
+ 'format_id': '%dp' % height if height else None,
+ 'height': height,
+ 'tbr': tbr,
+ })
+ self._sort_formats(formats)
+