from .common import InfoExtractor
from ..utils import (
+ clean_html,
dict_get,
ExtractorError,
float_or_none,
+ get_element_by_class,
int_or_none,
+ js_to_json,
parse_duration,
parse_iso8601,
try_get,
unescapeHTML,
+ urlencode_postdata,
+ urljoin,
)
from ..compat import (
compat_etree_fromstring,
class BBCCoUkIE(InfoExtractor):
IE_NAME = 'bbc.co.uk'
IE_DESC = 'BBC iPlayer'
- _ID_REGEX = r'[pb][\da-z]{7}'
+ _ID_REGEX = r'[pbw][\da-z]{7}'
_VALID_URL = r'''(?x)
https?://
(?:www\.)?bbc\.co\.uk/
(?:
programmes/(?!articles/)|
iplayer(?:/[^/]+)?/(?:episode/|playlist/)|
- music/clips[/#]|
- radio/player/
+ music/(?:clips|audiovideo/popular)[/#]|
+ radio/player/|
+ events/[^/]+/play/[^/]+/
)
(?P<id>%s)(?!/(?:episodes|broadcasts|clips))
''' % _ID_REGEX
+ _LOGIN_URL = 'https://account.bbc.com/signin'
+ _NETRC_MACHINE = 'bbc'
+
_MEDIASELECTOR_URLS = [
# Provides HQ HLS streams with even better quality that pc mediaset but fails
# with geolocation in some cases when it's even not geo restricted at all (e.g.
}, {
'url': 'http://www.bbc.co.uk/radio/player/p03cchwf',
'only_matching': True,
- }
- ]
+ }, {
+ 'url': 'https://www.bbc.co.uk/music/audiovideo/popular#p055bc55',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.bbc.co.uk/programmes/w3csv1y9',
+ 'only_matching': True,
+ }]
_USP_RE = r'/([^/]+?)\.ism(?:\.hlsv2\.ism)?/[^/]+\.m3u8'
+ def _login(self):
+ username, password = self._get_login_info()
+ if username is None:
+ return
+
+ login_page = self._download_webpage(
+ self._LOGIN_URL, None, 'Downloading signin page')
+
+ login_form = self._hidden_inputs(login_page)
+
+ login_form.update({
+ 'username': username,
+ 'password': password,
+ })
+
+ post_url = urljoin(self._LOGIN_URL, self._search_regex(
+ r'<form[^>]+action=(["\'])(?P<url>.+?)\1', login_page,
+ 'post url', default=self._LOGIN_URL, group='url'))
+
+ response, urlh = self._download_webpage_handle(
+ post_url, None, 'Logging in', data=urlencode_postdata(login_form),
+ headers={'Referer': self._LOGIN_URL})
+
+ if self._LOGIN_URL in urlh.geturl():
+ error = clean_html(get_element_by_class('form-message', response))
+ if error:
+ raise ExtractorError(
+ 'Unable to login: %s' % error, expected=True)
+ raise ExtractorError('Unable to log in')
+
+ def _real_initialize(self):
+ self._login()
+
class MediaSelectionError(Exception):
def __init__(self, id):
self.id = id
fmt.update({
'width': width,
'height': height,
- 'vbr': bitrate,
+ 'tbr': bitrate,
'vcodec': encoding,
})
else:
'acodec': encoding,
'vcodec': 'none',
})
- if protocol == 'http':
+ if protocol in ('http', 'https'):
# Direct link
fmt.update({
'url': href,
'rtmp_live': False,
'ext': 'flv',
})
+ else:
+ continue
formats.append(fmt)
elif kind == 'captions':
subtitles = self.extract_subtitles(media, programme_id)
description = smp_config['summary']
for item in smp_config['items']:
kind = item['kind']
- if kind != 'programme' and kind != 'radioProgramme':
+ if kind not in ('programme', 'radioProgramme'):
continue
programme_id = item.get('vpid')
duration = int_or_none(item.get('duration'))
for item in self._extract_items(playlist):
kind = item.get('kind')
- if kind != 'programme' and kind != 'radioProgramme':
+ if kind not in ('programme', 'radioProgramme'):
continue
title = playlist.find('./{%s}title' % self._EMP_PLAYLIST_NS).text
description_el = playlist.find('./{%s}summary' % self._EMP_PLAYLIST_NS)
webpage = self._download_webpage(url, group_id, 'Downloading video page')
+ error = self._search_regex(
+ r'<div\b[^>]+\bclass=["\']smp__message delta["\'][^>]*>([^<]+)<',
+ webpage, 'error', default=None)
+ if error:
+ raise ExtractorError(error, expected=True)
+
programme_id = None
duration = None
# single video article embedded with data-media-vpid
'url': 'http://www.bbc.co.uk/sport/rowing/35908187',
'only_matching': True,
+ }, {
+ 'url': 'https://www.bbc.co.uk/bbcthree/clip/73d0bbd0-abc3-4cea-b3c0-cdae21905eb1',
+ 'info_dict': {
+ 'id': 'p06556y7',
+ 'ext': 'mp4',
+ 'title': 'Transfers: Cristiano Ronaldo to Man Utd, Arsenal to spend?',
+ 'description': 'md5:4b7dfd063d5a789a1512e99662be3ddd',
+ },
+ 'params': {
+ 'skip_download': True,
+ }
}]
@classmethod
'subtitles': subtitles,
}
+ bbc3_config = self._parse_json(
+ self._search_regex(
+ r'(?s)bbcthreeConfig\s*=\s*({.+?})\s*;\s*<', webpage,
+ 'bbcthree config', default='{}'),
+ playlist_id, transform_source=js_to_json, fatal=False)
+ if bbc3_config:
+ bbc3_playlist = try_get(
+ bbc3_config, lambda x: x['payload']['content']['bbcMedia']['playlist'],
+ dict)
+ if bbc3_playlist:
+ playlist_title = bbc3_playlist.get('title') or playlist_title
+ thumbnail = bbc3_playlist.get('holdingImageURL')
+ entries = []
+ for bbc3_item in bbc3_playlist['items']:
+ programme_id = bbc3_item.get('versionID')
+ if not programme_id:
+ continue
+ formats, subtitles = self._download_media_selector(programme_id)
+ self._sort_formats(formats)
+ entries.append({
+ 'id': programme_id,
+ 'title': playlist_title,
+ 'thumbnail': thumbnail,
+ 'timestamp': timestamp,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ })
+ return self.playlist_result(
+ entries, playlist_id, playlist_title, playlist_description)
+
def extract_all(pattern):
return list(filter(None, map(
lambda s: self._parse_json(s, playlist_id, fatal=False),