- m = re.search(re.escape(BEFORE) + '(.*?)' + re.escape(AFTER), webpage)
- if not m:
- raise ExtractorError(u'Cannot parse data')
- data = dict(json.loads(m.group(1)))
- params_raw = compat_urllib_parse.unquote(data['params'])
- params = json.loads(params_raw)
- video_data = params['video_data'][0]
- video_url = video_data.get('hd_src')
- if not video_url:
- video_url = video_data['sd_src']
- if not video_url:
- raise ExtractorError(u'Cannot find video URL')
- video_duration = int(video_data['video_duration'])
- thumbnail = video_data['thumbnail_src']
-
- video_title = self._html_search_regex('<h2 class="uiHeaderTitle">([^<]+)</h2>',
- webpage, u'title')
-
- info = {
+ PATTERN = re.escape(BEFORE) + '(?:\n|\\\\n)(.*?)' + re.escape(AFTER)
+
+ for m in re.findall(PATTERN, webpage):
+ swf_params = m.replace('\\\\', '\\').replace('\\"', '"')
+ data = dict(json.loads(swf_params))
+ params_raw = compat_urllib_parse_unquote(data['params'])
+ video_data_candidate = json.loads(params_raw)['video_data']
+ for _, f in video_data_candidate.items():
+ if not f:
+ continue
+ if isinstance(f, dict):
+ f = [f]
+ if not isinstance(f, list):
+ continue
+ if f[0].get('video_id') == video_id:
+ video_data = video_data_candidate
+ break
+ if video_data:
+ break
+
+ def video_data_list2dict(video_data):
+ ret = {}
+ for item in video_data:
+ format_id = item['stream_type']
+ ret.setdefault(format_id, []).append(item)
+ return ret
+
+ if not video_data:
+ server_js_data = self._parse_json(self._search_regex(
+ r'handleServerJS\(({.+})\);', webpage, 'server js data', default='{}'), video_id)
+ for item in server_js_data.get('instances', []):
+ if item[1][0] == 'VideoConfig':
+ video_data = video_data_list2dict(item[2][0]['videoData'])
+ break
+
+ if not video_data:
+ if not fatal_if_no_video:
+ return webpage, False
+ m_msg = re.search(r'class="[^"]*uiInterstitialContent[^"]*"><div>(.*?)</div>', webpage)
+ if m_msg is not None:
+ raise ExtractorError(
+ 'The video is not available, Facebook said: "%s"' % m_msg.group(1),
+ expected=True)
+ else:
+ raise ExtractorError('Cannot parse data')
+
+ formats = []
+ for format_id, f in video_data.items():
+ if f and isinstance(f, dict):
+ f = [f]
+ if not f or not isinstance(f, list):
+ continue
+ for quality in ('sd', 'hd'):
+ for src_type in ('src', 'src_no_ratelimit'):
+ src = f[0].get('%s_%s' % (quality, src_type))
+ if src:
+ preference = -10 if format_id == 'progressive' else 0
+ if quality == 'hd':
+ preference += 5
+ formats.append({
+ 'format_id': '%s_%s_%s' % (format_id, quality, src_type),
+ 'url': src,
+ 'preference': preference,
+ })
+ dash_manifest = f[0].get('dash_manifest')
+ if dash_manifest:
+ formats.extend(self._parse_mpd_formats(
+ compat_etree_fromstring(compat_urllib_parse_unquote_plus(dash_manifest))))
+ if not formats:
+ raise ExtractorError('Cannot find video formats')
+
+ self._sort_formats(formats)
+
+ video_title = self._html_search_regex(
+ r'<h2\s+[^>]*class="uiHeaderTitle"[^>]*>([^<]*)</h2>', webpage, 'title',
+ default=None)
+ if not video_title:
+ video_title = self._html_search_regex(
+ r'(?s)<span class="fbPhotosPhotoCaption".*?id="fbPhotoPageCaption"><span class="hasCaption">(.*?)</span>',
+ webpage, 'alternative title', default=None)
+ video_title = limit_length(video_title, 80)
+ if not video_title:
+ video_title = 'Facebook video #%s' % video_id
+ uploader = clean_html(get_element_by_id('fbPhotoPageAuthorName', webpage))
+
+ info_dict = {