import re
from .common import InfoExtractor
+from ..compat import (
+ compat_parse_qs,
+ compat_urllib_parse_urlparse,
+)
from ..utils import (
find_xpath_attr,
unified_strdate,
- get_element_by_id,
get_element_by_attribute,
int_or_none,
qualities,
def _extract_from_webpage(self, webpage, video_id, lang):
json_url = self._html_search_regex(
[r'arte_vp_url=["\'](.*?)["\']', r'data-url=["\']([^"]+)["\']'],
- webpage, 'json vp url')
+ webpage, 'json vp url', default=None)
+ if not json_url:
+ iframe_url = self._html_search_regex(
+ r'<iframe[^>]+src=(["\'])(?P<url>.+\bjson_url=.+?)\1',
+ webpage, 'iframe url', group='url')
+ json_url = compat_parse_qs(
+ compat_urllib_parse_urlparse(iframe_url).query)['json_url'][0]
return self._extract_from_json_url(json_url, video_id, lang)
def _extract_from_json_url(self, json_url, video_id, lang):
formats.append(format)
+ self._check_formats(formats, video_id)
self._sort_formats(formats)
info_dict['formats'] = formats
def _real_extract(self, url):
anchor_id, lang = self._extract_url_info(url)
webpage = self._download_webpage(url, anchor_id)
- row = get_element_by_id(anchor_id, webpage)
+ row = self._search_regex(
+ r'(?s)id="%s"[^>]*>.+?(<div[^>]*arte_vp_url[^>]*>)' % anchor_id,
+ webpage, 'row')
return self._extract_from_webpage(row, anchor_id, lang)