X-Git-Url: https://git.rapsys.eu/youtubedl/blobdiff_plain/1d3fd83f473663fce3e0a10303473a38d80cc3d0..abc36f0474141930338eef35e2444365fab932b6:/youtube_dl/extractor/pornhub.py diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index 9b41359..b25f1f1 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -1,7 +1,9 @@ # coding: utf-8 from __future__ import unicode_literals +import functools import itertools +import operator # import os import re @@ -18,6 +20,7 @@ from ..utils import ( js_to_json, orderedSet, # sanitized_Request, + remove_quotes, str_to_int, ) # from ..aes import ( @@ -129,9 +132,32 @@ class PornHubIE(InfoExtractor): tv_webpage = dl_webpage('tv') - video_url = self._search_regex( - r']+\bsrc=(["\'])(?P(?:https?:)?//.+?)\1', tv_webpage, - 'video url', group='url') + assignments = self._search_regex( + r'(var.+?mediastring.+?)', tv_webpage, + 'encoded url').split(';') + + js_vars = {} + + def parse_js_value(inp): + inp = re.sub(r'/\*(?:(?!\*/).)*?\*/', '', inp) + if '+' in inp: + inps = inp.split('+') + return functools.reduce( + operator.concat, map(parse_js_value, inps)) + inp = inp.strip() + if inp in js_vars: + return js_vars[inp] + return remove_quotes(inp) + + for assn in assignments: + assn = assn.strip() + if not assn: + continue + assn = re.sub(r'var\s+', '', assn) + vname, value = assn.split('=', 1) + js_vars[vname] = parse_js_value(value) + + video_url = js_vars['mediastring'] title = self._search_regex( r'

([^>]+)

', tv_webpage, 'title', default=None)