-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
-
- if not video_id:
- video_path = mobj.group('path')
- webpage = self._download_webpage(url, video_path)
- video_id = self._search_regex(
- [r'data-video-?id="(\d+)"', r'content_id=(\d+)'], webpage, 'video id')
-
- detail = self._download_xml(
- 'http://m.mlb.com/gen/multimedia/detail/%s/%s/%s/%s.xml'
- % (video_id[-3], video_id[-2], video_id[-1], video_id), video_id)
-
- title = detail.find('./headline').text
- description = detail.find('./big-blurb').text
- duration = parse_duration(detail.find('./duration').text)
- timestamp = parse_iso8601(detail.attrib['date'][:-5])
-
- thumbnails = [{
- 'url': thumbnail.text,
- } for thumbnail in detail.findall('./thumbnailScenarios/thumbnailScenario')]
-
- formats = []
- for media_url in detail.findall('./url'):
- playback_scenario = media_url.attrib['playback_scenario']
- fmt = {
- 'url': media_url.text,
- 'format_id': playback_scenario,
- }
- m = re.search(r'(?P<vbr>\d+)K_(?P<width>\d+)X(?P<height>\d+)', playback_scenario)
- if m:
- fmt.update({
- 'vbr': int(m.group('vbr')) * 1000,
- 'width': int(m.group('width')),
- 'height': int(m.group('height')),
- })
- formats.append(fmt)
-
- self._sort_formats(formats)
-
- return {
- 'id': video_id,
- 'title': title,
- 'description': description,
- 'duration': duration,
- 'timestamp': timestamp,
- 'formats': formats,
- 'thumbnails': thumbnails,
- }