object_str = object_str.replace('<--', '<!--')
object_str = fix_xml_ampersands(object_str)
- object_doc = xml.etree.ElementTree.fromstring(object_str)
+ object_doc = xml.etree.ElementTree.fromstring(object_str.encode('utf-8'))
fv_el = find_xpath_attr(object_doc, './param', 'name', 'flashVars')
if fv_el is not None:
url_m = re.search(r'<meta\s+property="og:video"\s+content="(http://c.brightcove.com/[^"]+)"', webpage)
if url_m:
- return [unescapeHTML(url_m.group(1))]
+ url = unescapeHTML(url_m.group(1))
+ # Some sites don't add it, we can't download with this url, for example:
+ # http://www.ktvu.com/videos/news/raw-video-caltrain-releases-video-of-man-almost/vCTZdY/
+ if 'playerKey' in url:
+ return [url]
matches = re.findall(
r'''(?sx)<object