+version 2017.11.06
+
+Core
++ [extractor/common] Add protocol for f4m formats
+* [f4m] Prefer baseURL for relative URLs (#14660)
+* [extractor/common] Respect URL query in _extract_wowza_formats (14645)
+
+Extractors
++ [hotstar:playlist] Add support for playlists (#12465)
+* [hotstar] Bypass geo restriction (#14672)
+- [22tracks] Remove extractor (#11024, #14628)
++ [skysport] Sdd support ooyala videos protected with embed_token (#14641)
+* [gamespot] Extract formats referenced with new data fields (#14652)
+* [spankbang] Detect unavailable videos (#14644)
+
+
+version 2017.10.29
+
+Core
+* [extractor/common] Prefix format id for audio only HLS formats
++ [utils] Add support for zero years and months in parse_duration
+
+Extractors
+* [egghead] Fix extraction (#14388)
++ [fxnetworks] Extract series metadata (#14603)
++ [younow] Add support for younow.com (#9255, #9432, #12436)
+* [dctptv] Fix extraction (#14599)
+* [youtube] Restrict embed regex (#14600)
+* [vimeo] Restrict iframe embed regex (#14600)
+* [soundgasm] Improve extraction (#14588)
+- [myvideo] Remove extractor (#8557)
++ [nbc] Add support for classic-tv videos (#14575)
++ [vrtnu] Add support for cookies authentication and simplify (#11873)
++ [canvas] Add support for vrt.be/vrtnu (#11873)
+* [twitch:clips] Fix title extraction (#14566)
++ [ndtv] Add support for sub-sites (#14534)
+* [dramafever] Fix login error message extraction
++ [nick] Add support for more nickelodeon sites (no, dk, se, ch, fr, es, pt,
+ ro, hu) (#14553)
+
+
+version 2017.10.20
+
+Core
+* [downloader/fragment] Report warning instead of error on inconsistent
+ download state
+* [downloader/hls] Fix total fragments count when ad fragments exist
+
+Extractors
+* [parliamentliveuk] Fix extraction (#14524)
+* [soundcloud] Update client id (#14546)
++ [servus] Add support for servus.com (#14362)
++ [unity] Add support for unity3d.com (#14528)
+* [youtube] Replace youtube redirect URLs in description (#14517)
+* [pbs] Restrict direct video URL regular expression (#14519)
+* [drtv] Respect preference for direct HTTP formats (#14509)
++ [eporner] Add support for embed URLs (#14507)
+* [arte] Capture and output error message
+* [niconico] Improve uploader metadata extraction robustness (#14135)
+
+
version 2017.10.15.1
Core
Core
+ [downloader/hls] Add basic support for EXT-X-BYTERANGE tag (#10955)
-+ [adobepass] Improve Comcast and Verison login code (#10803)
++ [adobepass] Improve Comcast and Verizon login code (#10803)
+ [adobepass] Add support for Verizon (#10803)
Extractors
+[![Build Status](https://travis-ci.org/rg3/youtube-dl.svg?branch=master)](https://travis-ci.org/rg3/youtube-dl)
+
youtube-dl - download videos from youtube.com or other video platforms
- [INSTALLATION](#installation)
+[Build Status]
+
youtube-dl - download videos from youtube.com or other video platforms
- INSTALLATION
- **1up.com**
- **20min**
- **220.ro**
- - **22tracks:genre**
- - **22tracks:track**
- **24video**
- **3qsdn**: 3Q SDN
- **3sat**
- **HornBunny**
- **HotNewHipHop**
- **HotStar**
+ - **hotstar:playlist**
- **Howcast**
- **HowStuffWorks**
- **HRTi**
- **MySpace:album**
- **MySpass**
- **Myvi**
- - **myvideo** (Currently broken)
- **MyVidster**
- **n-tv.de**
- **natgeo**
- **SenateISVP**
- **SendtoNews**
- **ServingSys**
+ - **Servus**
- **Sexu**
- **Shahid**
- **Shared**: shared.sx
- **UDNEmbed**: 聯合影音
- **UKTVPlay**
- **Unistra**
+ - **Unity**
- **uol.com.br**
- **uplynk**
- **uplynk:preplay**
- **vpro**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
- **Vrak**
- **VRT**: deredactie.be, sporza.be, cobra.be and cobra.canvas.be
+ - **VrtNU**: VrtNU.be
- **vrv**
- **vrv:series**
- **VShare**
- **YouJizz**
- **youku**: 优酷
- **youku:show**
+ - **YouNowChannel**
+ - **YouNowLive**
+ - **YouNowMoment**
- **YouPorn**
- **YourUpload**
- **youtube**: YouTube.com
self.ie._sort_formats(formats)
expect_value(self, formats, expected_formats, None)
+ def test_parse_f4m_formats(self):
+ _TEST_CASES = [
+ (
+ # https://github.com/rg3/youtube-dl/issues/14660
+ 'custom_base_url',
+ 'http://api.new.livestream.com/accounts/6115179/events/6764928/videos/144884262.f4m',
+ [{
+ 'manifest_url': 'http://api.new.livestream.com/accounts/6115179/events/6764928/videos/144884262.f4m',
+ 'ext': 'flv',
+ 'format_id': '2148',
+ 'protocol': 'f4m',
+ 'tbr': 2148,
+ 'width': 1280,
+ 'height': 720,
+ }]
+ ),
+ ]
+
+ for f4m_file, f4m_url, expected_formats in _TEST_CASES:
+ with io.open('./test/testdata/f4m/%s.f4m' % f4m_file,
+ mode='r', encoding='utf-8') as f:
+ formats = self.ie._parse_f4m_formats(
+ compat_etree_fromstring(f.read().encode('utf-8')),
+ f4m_url, None)
+ self.ie._sort_formats(formats)
+ expect_value(self, formats, expected_formats, None)
if __name__ == '__main__':
unittest.main()
self.assertEqual(parse_duration('87 Min.'), 5220)
self.assertEqual(parse_duration('PT1H0.040S'), 3600.04)
self.assertEqual(parse_duration('PT00H03M30SZ'), 210)
+ self.assertEqual(parse_duration('P0Y0M0DT0H4M20.880S'), 260.88)
def test_fix_xml_ampersands(self):
self.assertEqual(
media))
-def _add_ns(prop):
- return '{http://ns.adobe.com/f4m/1.0}%s' % prop
+def _add_ns(prop, ver=1):
+ return '{http://ns.adobe.com/f4m/%d.0}%s' % (ver, prop)
+
+
+def get_base_url(manifest):
+ base_url = xpath_text(
+ manifest, [_add_ns('baseURL'), _add_ns('baseURL', 2)],
+ 'base URL', default=None)
+ if base_url:
+ base_url = base_url.strip()
+ return base_url
class F4mFD(FragmentFD):
rate, media = list(filter(
lambda f: int(f[0]) == requested_bitrate, formats))[0]
- base_url = compat_urlparse.urljoin(man_url, media.attrib['url'])
+ # Prefer baseURL for relative URLs as per 11.2 of F4M 3.0 spec.
+ man_base_url = get_base_url(doc) or man_url
+
+ base_url = compat_urlparse.urljoin(man_base_url, media.attrib['url'])
bootstrap_node = doc.find(_add_ns('bootstrapInfo'))
- # From Adobe F4M 3.0 spec:
- # The <baseURL> element SHALL be the base URL for all relative
- # (HTTP-based) URLs in the manifest. If <baseURL> is not present, said
- # URLs should be relative to the location of the containing document.
- boot_info, bootstrap_url = self._parse_bootstrap_node(bootstrap_node, man_url)
+ boot_info, bootstrap_url = self._parse_bootstrap_node(
+ bootstrap_node, man_base_url)
live = boot_info['live']
metadata_node = media.find(_add_ns('metadata'))
if metadata_node is not None:
if os.path.isfile(encodeFilename(self.ytdl_filename(ctx['filename']))):
self._read_ytdl_file(ctx)
if ctx['fragment_index'] > 0 and resume_len == 0:
- self.report_error(
+ self.report_warning(
'Inconsistent state of incomplete fragment download. '
'Restarting from the beginning...')
ctx['fragment_index'] = resume_len = 0
if line.startswith('#'):
if anvato_ad(line):
ad_frags += 1
+ ad_frag_next = True
continue
if ad_frag_next:
ad_frag_next = False
from .common import InfoExtractor
from ..compat import (
compat_parse_qs,
+ compat_str,
compat_urllib_parse_urlparse,
)
from ..utils import (
int_or_none,
NO_DEFAULT,
qualities,
+ try_get,
unified_strdate,
)
info = self._download_json(json_url, video_id)
player_info = info['videoJsonPlayer']
- vsr = player_info['VSR']
-
+ vsr = try_get(player_info, lambda x: x['VSR'], dict)
if not vsr:
- raise ExtractorError(
- 'Video %s is not available' % player_info.get('VID') or video_id,
- expected=True)
+ error = None
+ if try_get(player_info, lambda x: x['custom_msg']['type']) == 'error':
+ error = try_get(
+ player_info, lambda x: x['custom_msg']['msg'], compat_str)
+ if not error:
+ error = 'Video %s is not available' % player_info.get('VID') or video_id
+ raise ExtractorError(error, expected=True)
upload_date_str = player_info.get('shootingDate')
if not upload_date_str:
'url': 'http://www.telezueri.ch/62-show-zuerinews/13772-episode-sonntag-18-dezember-2016/32419-segment-massenabweisungen-beim-hiltl-club-wegen-pelzboom',
'info_dict': {
'id': '1_2444peh4',
- 'ext': 'mov',
+ 'ext': 'mp4',
'title': 'Massenabweisungen beim Hiltl Club wegen Pelzboom',
'description': 'md5:9ea9dd1b159ad65b36ddcf7f0d7c76a8',
'uploader_id': 'TeleZ?ri',
from __future__ import unicode_literals
import re
+import json
from .common import InfoExtractor
+from .gigya import GigyaBaseIE
+from ..compat import compat_HTTPError
from ..utils import (
- float_or_none,
+ ExtractorError,
strip_or_none,
+ float_or_none,
+ int_or_none,
+ parse_iso8601,
)
class CanvasIE(InfoExtractor):
- _VALID_URL = r'https?://mediazone\.vrt\.be/api/v1/(?P<site_id>canvas|een|ketnet)/assets/(?P<id>m[dz]-ast-[^/?#&]+)'
+ _VALID_URL = r'https?://mediazone\.vrt\.be/api/v1/(?P<site_id>canvas|een|ketnet|vrtvideo)/assets/(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'https://mediazone.vrt.be/api/v1/ketnet/assets/md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
'md5': '90139b746a0a9bd7bb631283f6e2a64e',
'title': title,
'description': self._og_search_description(webpage),
}
+
+
+class VrtNUIE(GigyaBaseIE):
+ IE_DESC = 'VrtNU.be'
+ _VALID_URL = r'https?://(?:www\.)?vrt\.be/(?P<site_id>vrtnu)/(?:[^/]+/)*(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://www.vrt.be/vrtnu/a-z/postbus-x/1/postbus-x-s1a1/',
+ 'info_dict': {
+ 'id': 'pbs-pub-2e2d8c27-df26-45c9-9dc6-90c78153044d$vid-90c932b1-e21d-4fb8-99b1-db7b49cf74de',
+ 'ext': 'flv',
+ 'title': 'De zwarte weduwe',
+ 'description': 'md5:d90c21dced7db869a85db89a623998d4',
+ 'duration': 1457.04,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'season': '1',
+ 'season_number': 1,
+ 'episode_number': 1,
+ },
+ 'skip': 'This video is only available for registered users'
+ }]
+ _NETRC_MACHINE = 'vrtnu'
+ _APIKEY = '3_0Z2HujMtiWq_pkAjgnS2Md2E11a1AwZjYiBETtwNE-EoEHDINgtnvcAOpNgmrVGy'
+ _CONTEXT_ID = 'R3595707040'
+
+ def _real_initialize(self):
+ self._login()
+
+ def _login(self):
+ username, password = self._get_login_info()
+ if username is None:
+ return
+
+ auth_data = {
+ 'APIKey': self._APIKEY,
+ 'targetEnv': 'jssdk',
+ 'loginID': username,
+ 'password': password,
+ 'authMode': 'cookie',
+ }
+
+ auth_info = self._gigya_login(auth_data)
+
+ # Sometimes authentication fails for no good reason, retry
+ login_attempt = 1
+ while login_attempt <= 3:
+ try:
+ # When requesting a token, no actual token is returned, but the
+ # necessary cookies are set.
+ self._request_webpage(
+ 'https://token.vrt.be',
+ None, note='Requesting a token', errnote='Could not get a token',
+ headers={
+ 'Content-Type': 'application/json',
+ 'Referer': 'https://www.vrt.be/vrtnu/',
+ },
+ data=json.dumps({
+ 'uid': auth_info['UID'],
+ 'uidsig': auth_info['UIDSignature'],
+ 'ts': auth_info['signatureTimestamp'],
+ 'email': auth_info['profile']['email'],
+ }).encode('utf-8'))
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
+ login_attempt += 1
+ self.report_warning('Authentication failed')
+ self._sleep(1, None, msg_template='Waiting for %(timeout)s seconds before trying again')
+ else:
+ raise e
+ else:
+ break
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, display_id)
+
+ title = self._html_search_regex(
+ r'(?ms)<h1 class="content__heading">(.+?)</h1>',
+ webpage, 'title').strip()
+
+ description = self._html_search_regex(
+ r'(?ms)<div class="content__description">(.+?)</div>',
+ webpage, 'description', default=None)
+
+ season = self._html_search_regex(
+ [r'''(?xms)<div\ class="tabs__tab\ tabs__tab--active">\s*
+ <span>seizoen\ (.+?)</span>\s*
+ </div>''',
+ r'<option value="seizoen (\d{1,3})" data-href="[^"]+?" selected>'],
+ webpage, 'season', default=None)
+
+ season_number = int_or_none(season)
+
+ episode_number = int_or_none(self._html_search_regex(
+ r'''(?xms)<div\ class="content__episode">\s*
+ <abbr\ title="aflevering">afl</abbr>\s*<span>(\d+)</span>
+ </div>''',
+ webpage, 'episode_number', default=None))
+
+ release_date = parse_iso8601(self._html_search_regex(
+ r'(?ms)<div class="content__broadcastdate">\s*<time\ datetime="(.+?)"',
+ webpage, 'release_date', default=None))
+
+ # If there's a ? or a # in the URL, remove them and everything after
+ clean_url = url.split('?')[0].split('#')[0].strip('/')
+ securevideo_url = clean_url + '.mssecurevideo.json'
+
+ try:
+ video = self._download_json(securevideo_url, display_id)
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
+ self.raise_login_required()
+ raise
+
+ # We are dealing with a '../<show>.relevant' URL
+ redirect_url = video.get('url')
+ if redirect_url:
+ return self.url_result(self._proto_relative_url(redirect_url, 'https:'))
+
+ # There is only one entry, but with an unknown key, so just get
+ # the first one
+ video_id = list(video.values())[0].get('videoid')
+
+ return {
+ '_type': 'url_transparent',
+ 'url': 'https://mediazone.vrt.be/api/v1/vrtvideo/assets/%s' % video_id,
+ 'ie_key': CanvasIE.ie_key(),
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': description,
+ 'season': season,
+ 'season_number': season_number,
+ 'episode_number': episode_number,
+ 'release_date': release_date,
+ }
compat_urlparse,
compat_xml_parse_error,
)
-from ..downloader.f4m import remove_encrypted_media
+from ..downloader.f4m import (
+ get_base_url,
+ remove_encrypted_media,
+)
from ..utils import (
NO_DEFAULT,
age_restricted,
media_nodes = remove_encrypted_media(media_nodes)
if not media_nodes:
return formats
- base_url = xpath_text(
- manifest, ['{http://ns.adobe.com/f4m/1.0}baseURL', '{http://ns.adobe.com/f4m/2.0}baseURL'],
- 'base URL', default=None)
- if base_url:
- base_url = base_url.strip()
+
+ manifest_base_url = get_base_url(manifest)
bootstrap_info = xpath_element(
manifest, ['{http://ns.adobe.com/f4m/1.0}bootstrapInfo', '{http://ns.adobe.com/f4m/2.0}bootstrapInfo'],
continue
manifest_url = (
media_url if media_url.startswith('http://') or media_url.startswith('https://')
- else ((base_url or '/'.join(manifest_url.split('/')[:-1])) + '/' + media_url))
+ else ((manifest_base_url or '/'.join(manifest_url.split('/')[:-1])) + '/' + media_url))
# If media_url is itself a f4m manifest do the recursive extraction
# since bitrates in parent manifest (this one) and media_url manifest
# may differ leading to inability to resolve the format by requested
'url': manifest_url,
'manifest_url': manifest_url,
'ext': 'flv' if bootstrap_info is not None else None,
+ 'protocol': 'f4m',
'tbr': tbr,
'width': width,
'height': height,
media_url = media.get('URI')
if media_url:
format_id = []
- for v in (group_id, name):
+ for v in (m3u8_id, group_id, name):
if v:
format_id.append(v)
f = {
return formats
def _extract_wowza_formats(self, url, video_id, m3u8_entry_protocol='m3u8_native', skip_protocols=[]):
+ query = compat_urlparse.urlparse(url).query
url = re.sub(r'/(?:manifest|playlist|jwplayer)\.(?:m3u8|f4m|mpd|smil)', '', url)
url_base = self._search_regex(
r'(?:(?:https?|rtmp|rtsp):)?(//[^?]+)', url, 'format url')
http_base_url = '%s:%s' % ('http', url_base)
formats = []
+
+ def manifest_url(manifest):
+ m_url = '%s/%s' % (http_base_url, manifest)
+ if query:
+ m_url += '?%s' % query
+ return m_url
+
if 'm3u8' not in skip_protocols:
formats.extend(self._extract_m3u8_formats(
- http_base_url + '/playlist.m3u8', video_id, 'mp4',
+ manifest_url('playlist.m3u8'), video_id, 'mp4',
m3u8_entry_protocol, m3u8_id='hls', fatal=False))
if 'f4m' not in skip_protocols:
formats.extend(self._extract_f4m_formats(
- http_base_url + '/manifest.f4m',
+ manifest_url('manifest.f4m'),
video_id, f4m_id='hds', fatal=False))
if 'dash' not in skip_protocols:
formats.extend(self._extract_mpd_formats(
- http_base_url + '/manifest.mpd',
+ manifest_url('manifest.mpd'),
video_id, mpd_id='dash', fatal=False))
if re.search(r'(?:/smil:|\.smil)', url_base):
if 'smil' not in skip_protocols:
rtmp_formats = self._extract_smil_formats(
- http_base_url + '/jwplayer.smil',
+ manifest_url('jwplayer.smil'),
video_id, fatal=False)
for rtmp_format in rtmp_formats:
rtsp_format = rtmp_format.copy()
from __future__ import unicode_literals
from .common import InfoExtractor
-from ..utils import unified_strdate
+from ..compat import compat_str
+from ..utils import (
+ float_or_none,
+ unified_strdate,
+)
class DctpTvIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?dctp\.tv/(#/)?filme/(?P<id>.+?)/$'
+ _VALID_URL = r'https?://(?:www\.)?dctp\.tv/(?:#/)?filme/(?P<id>[^/?#&]+)'
_TEST = {
'url': 'http://www.dctp.tv/filme/videoinstallation-fuer-eine-kaufhausfassade/',
- 'md5': '174dd4a8a6225cf5655952f969cfbe24',
'info_dict': {
'id': '95eaa4f33dad413aa17b4ee613cccc6c',
'display_id': 'videoinstallation-fuer-eine-kaufhausfassade',
- 'ext': 'mp4',
+ 'ext': 'flv',
'title': 'Videoinstallation für eine Kaufhausfassade',
'description': 'Kurzfilm',
'upload_date': '20110407',
'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 71.24,
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
},
}
def _real_extract(self, url):
- video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
+ display_id = self._match_id(url)
- object_id = self._html_search_meta('DC.identifier', webpage)
+ webpage = self._download_webpage(url, display_id)
- servers_json = self._download_json(
- 'http://www.dctp.tv/elastic_streaming_client/get_streaming_server/',
- video_id, note='Downloading server list')
- server = servers_json[0]['server']
- m3u8_path = self._search_regex(
- r'\'([^\'"]+/playlist\.m3u8)"', webpage, 'm3u8 path')
- formats = self._extract_m3u8_formats(
- 'http://%s%s' % (server, m3u8_path), video_id, ext='mp4',
- entry_protocol='m3u8_native')
+ video_id = self._html_search_meta(
+ 'DC.identifier', webpage, 'video id',
+ default=None) or self._search_regex(
+ r'id=["\']uuid[^>]+>([^<]+)<', webpage, 'video id')
title = self._og_search_title(webpage)
+
+ servers = self._download_json(
+ 'http://www.dctp.tv/streaming_servers/', display_id,
+ note='Downloading server list', fatal=False)
+
+ if servers:
+ endpoint = next(
+ server['endpoint']
+ for server in servers
+ if isinstance(server.get('endpoint'), compat_str) and
+ 'cloudfront' in server['endpoint'])
+ else:
+ endpoint = 'rtmpe://s2pqqn4u96e4j8.cloudfront.net/cfx/st/'
+
+ app = self._search_regex(
+ r'^rtmpe?://[^/]+/(?P<app>.*)$', endpoint, 'app')
+
+ formats = [{
+ 'url': endpoint,
+ 'app': app,
+ 'play_path': 'mp4:%s_dctp_0500_4x3.m4v' % video_id,
+ 'page_url': url,
+ 'player_url': 'http://svm-prod-dctptv-static.s3.amazonaws.com/dctptv-relaunch2012-109.swf',
+ 'ext': 'flv',
+ }]
+
description = self._html_search_meta('DC.description', webpage)
upload_date = unified_strdate(
self._html_search_meta('DC.date.created', webpage))
thumbnail = self._og_search_thumbnail(webpage)
+ duration = float_or_none(self._search_regex(
+ r'id=["\']duration_in_ms[^+]>(\d+)', webpage, 'duration',
+ default=None), scale=1000)
return {
- 'id': object_id,
+ 'id': video_id,
'title': title,
'formats': formats,
- 'display_id': video_id,
+ 'display_id': display_id,
'description': description,
'upload_date': upload_date,
'thumbnail': thumbnail,
+ 'duration': duration,
}
if all(logout_pattern not in response
for logout_pattern in ['href="/accounts/logout/"', '>Log out<']):
error = self._html_search_regex(
- r'(?s)class="hidden-xs prompt"[^>]*>(.+?)<',
+ r'(?s)<h\d[^>]+\bclass="hidden-xs prompt"[^>]*>(.+?)</h\d',
response, 'error message', default=None)
if error:
raise ExtractorError('Unable to login: %s' % error, expected=True)
'tbr': int_or_none(bitrate),
'ext': link.get('FileFormat'),
'vcodec': 'none' if kind == 'AudioResource' else None,
+ 'preference': preference,
})
subtitles_list = asset.get('SubtitlesList')
if isinstance(subtitles_list, list):
from __future__ import unicode_literals
from .common import InfoExtractor
+from ..compat import compat_str
from ..utils import (
+ determine_ext,
int_or_none,
try_get,
unified_timestamp,
'url': 'https://egghead.io/courses/professor-frisby-introduces-composable-functional-javascript',
'playlist_count': 29,
'info_dict': {
- 'id': 'professor-frisby-introduces-composable-functional-javascript',
+ 'id': '72',
'title': 'Professor Frisby Introduces Composable Functional JavaScript',
'description': 're:(?s)^This course teaches the ubiquitous.*You\'ll start composing functionality before you know it.$',
},
def _real_extract(self, url):
playlist_id = self._match_id(url)
+ lessons = self._download_json(
+ 'https://egghead.io/api/v1/series/%s/lessons' % playlist_id,
+ playlist_id, 'Downloading course lessons JSON')
+
+ entries = []
+ for lesson in lessons:
+ lesson_url = lesson.get('http_url')
+ if not lesson_url or not isinstance(lesson_url, compat_str):
+ continue
+ lesson_id = lesson.get('id')
+ if lesson_id:
+ lesson_id = compat_str(lesson_id)
+ entries.append(self.url_result(
+ lesson_url, ie=EggheadLessonIE.ie_key(), video_id=lesson_id))
+
course = self._download_json(
- 'https://egghead.io/api/v1/series/%s' % playlist_id, playlist_id)
+ 'https://egghead.io/api/v1/series/%s' % playlist_id,
+ playlist_id, 'Downloading course JSON', fatal=False) or {}
- entries = [
- self.url_result(
- 'wistia:%s' % lesson['wistia_id'], ie='Wistia',
- video_id=lesson['wistia_id'], video_title=lesson.get('title'))
- for lesson in course['lessons'] if lesson.get('wistia_id')]
+ playlist_id = course.get('id')
+ if playlist_id:
+ playlist_id = compat_str(playlist_id)
return self.playlist_result(
entries, playlist_id, course.get('title'),
class EggheadLessonIE(InfoExtractor):
IE_DESC = 'egghead.io lesson'
IE_NAME = 'egghead:lesson'
- _VALID_URL = r'https://egghead\.io/lessons/(?P<id>[^/?#&]+)'
- _TEST = {
+ _VALID_URL = r'https://egghead\.io/(?:api/v1/)?lessons/(?P<id>[^/?#&]+)'
+ _TESTS = [{
'url': 'https://egghead.io/lessons/javascript-linear-data-flow-with-container-style-types-box',
'info_dict': {
- 'id': 'fv5yotjxcg',
+ 'id': '1196',
+ 'display_id': 'javascript-linear-data-flow-with-container-style-types-box',
'ext': 'mp4',
'title': 'Create linear data flow with container style types (Box)',
'description': 'md5:9aa2cdb6f9878ed4c39ec09e85a8150e',
},
'params': {
'skip_download': True,
+ 'format': 'bestvideo',
},
- }
+ }, {
+ 'url': 'https://egghead.io/api/v1/lessons/react-add-redux-to-a-react-application',
+ 'only_matching': True,
+ }]
def _real_extract(self, url):
- lesson_id = self._match_id(url)
+ display_id = self._match_id(url)
lesson = self._download_json(
- 'https://egghead.io/api/v1/lessons/%s' % lesson_id, lesson_id)
+ 'https://egghead.io/api/v1/lessons/%s' % display_id, display_id)
+
+ lesson_id = compat_str(lesson['id'])
+ title = lesson['title']
+
+ formats = []
+ for _, format_url in lesson['media_urls'].items():
+ if not format_url or not isinstance(format_url, compat_str):
+ continue
+ ext = determine_ext(format_url)
+ if ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ format_url, lesson_id, 'mp4', entry_protocol='m3u8',
+ m3u8_id='hls', fatal=False))
+ elif ext == 'mpd':
+ formats.extend(self._extract_mpd_formats(
+ format_url, lesson_id, mpd_id='dash', fatal=False))
+ else:
+ formats.append({
+ 'url': format_url,
+ })
+ self._sort_formats(formats)
return {
- '_type': 'url_transparent',
- 'ie_key': 'Wistia',
- 'url': 'wistia:%s' % lesson['wistia_id'],
- 'id': lesson['wistia_id'],
- 'title': lesson.get('title'),
+ 'id': lesson_id,
+ 'display_id': display_id,
+ 'title': title,
'description': lesson.get('summary'),
'thumbnail': lesson.get('thumb_nail'),
'timestamp': unified_timestamp(lesson.get('published_at')),
'duration': int_or_none(lesson.get('duration')),
'view_count': int_or_none(lesson.get('plays_count')),
'tags': try_get(lesson, lambda x: x['tag_list'], list),
+ 'series': try_get(
+ lesson, lambda x: x['series']['title'], compat_str),
+ 'formats': formats,
}
class EpornerIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?eporner\.com/hd-porn/(?P<id>\w+)(?:/(?P<display_id>[\w-]+))?'
+ _VALID_URL = r'https?://(?:www\.)?eporner\.com/(?:hd-porn|embed)/(?P<id>\w+)(?:/(?P<display_id>[\w-]+))?'
_TESTS = [{
'url': 'http://www.eporner.com/hd-porn/95008/Infamous-Tiffany-Teen-Strip-Tease-Video/',
'md5': '39d486f046212d8e1b911c52ab4691f8',
}, {
'url': 'http://www.eporner.com/hd-porn/3YRUtzMcWn0',
'only_matching': True,
+ }, {
+ 'url': 'http://www.eporner.com/hd-porn/3YRUtzMcWn0',
+ 'only_matching': True,
}]
def _real_extract(self, url):
from .canvas import (
CanvasIE,
CanvasEenIE,
+ VrtNUIE,
)
from .carambatv import (
CarambaTVIE,
from .hitrecord import HitRecordIE
from .hornbunny import HornBunnyIE
from .hotnewhiphop import HotNewHipHopIE
-from .hotstar import HotStarIE
+from .hotstar import (
+ HotStarIE,
+ HotStarPlaylistIE,
+)
from .howcast import HowcastIE
from .howstuffworks import HowStuffWorksIE
from .hrti import (
from .myspace import MySpaceIE, MySpaceAlbumIE
from .myspass import MySpassIE
from .myvi import MyviIE
-from .myvideo import MyVideoIE
from .myvidster import MyVidsterIE
from .nationalgeographic import (
NationalGeographicVideoIE,
from .senateisvp import SenateISVPIE
from .sendtonews import SendtoNewsIE
from .servingsys import ServingSysIE
+from .servus import ServusIE
from .sexu import SexuIE
from .shahid import ShahidIE
from .shared import (
from .tweakers import TweakersIE
from .twentyfourvideo import TwentyFourVideoIE
from .twentymin import TwentyMinutenIE
-from .twentytwotracks import (
- TwentyTwoTracksIE,
- TwentyTwoTracksGenreIE
-)
from .twitch import (
TwitchVideoIE,
TwitchChapterIE,
from .uktvplay import UKTVPlayIE
from .digiteka import DigitekaIE
from .unistra import UnistraIE
+from .unity import UnityIE
from .uol import UOLIE
from .uplynk import (
UplynkIE,
YoukuIE,
YoukuShowIE,
)
+from .younow import (
+ YouNowLiveIE,
+ YouNowChannelIE,
+ YouNowMomentIE,
+)
from .youporn import YouPornIE
from .yourupload import YourUploadIE
from .youtube import (
from .adobepass import AdobePassIE
from ..utils import (
- update_url_query,
extract_attributes,
+ int_or_none,
parse_age_limit,
smuggle_url,
+ update_url_query,
)
class FXNetworksIE(AdobePassIE):
_VALID_URL = r'https?://(?:www\.)?(?:fxnetworks|simpsonsworld)\.com/video/(?P<id>\d+)'
_TESTS = [{
- 'url': 'http://www.fxnetworks.com/video/719841347694',
- 'md5': '1447d4722e42ebca19e5232ab93abb22',
+ 'url': 'http://www.fxnetworks.com/video/1032565827847',
+ 'md5': '8d99b97b4aa7a202f55b6ed47ea7e703',
'info_dict': {
- 'id': '719841347694',
+ 'id': 'dRzwHC_MMqIv',
'ext': 'mp4',
- 'title': 'Vanpage',
- 'description': 'F*ck settling down. You\'re the Worst returns for an all new season August 31st on FXX.',
+ 'title': 'First Look: Better Things - Season 2',
+ 'description': 'Because real life is like a fart. Watch this FIRST LOOK to see what inspired the new season of Better Things.',
'age_limit': 14,
'uploader': 'NEWA-FNG-FX',
- 'upload_date': '20160706',
- 'timestamp': 1467844741,
+ 'upload_date': '20170825',
+ 'timestamp': 1503686274,
+ 'episode_number': 0,
+ 'season_number': 2,
+ 'series': 'Better Things',
},
'add_ie': ['ThePlatform'],
}, {
'id': video_id,
'title': title,
'url': smuggle_url(update_url_query(release_url, query), {'force_smil_url': True}),
+ 'series': video_data.get('data-show-title'),
+ 'episode_number': int_or_none(video_data.get('data-episode')),
+ 'season_number': int_or_none(video_data.get('data-season')),
'thumbnail': video_data.get('data-large-thumb'),
'age_limit': parse_age_limit(rating),
'ie_key': 'ThePlatform',
class GameSpotIE(OnceIE):
- _VALID_URL = r'https?://(?:www\.)?gamespot\.com/.*-(?P<id>\d+)/?'
+ _VALID_URL = r'https?://(?:www\.)?gamespot\.com/videos/(?:[^/]+/\d+-|embed/)(?P<id>\d+)'
_TESTS = [{
'url': 'http://www.gamespot.com/videos/arma-3-community-guide-sitrep-i/2300-6410818/',
'md5': 'b2a30deaa8654fcccd43713a6b6a4825',
'params': {
'skip_download': True, # m3u8 downloads
},
+ }, {
+ 'url': 'https://www.gamespot.com/videos/embed/6439218/',
+ 'only_matching': True,
}]
def _real_extract(self, url):
manifest_url = f4m_url
formats.extend(self._extract_f4m_formats(
f4m_url + '?hdcore=3.7.0', page_id, f4m_id='hds', fatal=False))
- m3u8_url = streams.get('m3u8_stream')
+ m3u8_url = dict_get(streams, ('m3u8_stream', 'adaptive_stream'))
if m3u8_url:
manifest_url = m3u8_url
m3u8_formats = self._extract_m3u8_formats(
m3u8_id='hls', fatal=False)
formats.extend(m3u8_formats)
progressive_url = dict_get(
- streams, ('progressive_hd', 'progressive_high', 'progressive_low'))
+ streams, ('progressive_hd', 'progressive_high', 'progressive_low', 'other_lr'))
if progressive_url and manifest_url:
qualities_basename = self._search_regex(
r'/([^/]+)\.csmil/',
--- /dev/null
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+from ..utils import (
+ ExtractorError,
+ urlencode_postdata,
+)
+
+
+class GigyaBaseIE(InfoExtractor):
+ def _gigya_login(self, auth_data):
+ auth_info = self._download_json(
+ 'https://accounts.eu1.gigya.com/accounts.login', None,
+ note='Logging in', errnote='Unable to log in',
+ data=urlencode_postdata(auth_data))
+
+ error_message = auth_info.get('errorDetails') or auth_info.get('errorMessage')
+ if error_message:
+ raise ExtractorError(
+ 'Unable to login: %s' % error_message, expected=True)
+ return auth_info
# coding: utf-8
from __future__ import unicode_literals
+import re
+
from .common import InfoExtractor
+from ..compat import compat_str
from ..utils import (
- ExtractorError,
determine_ext,
+ ExtractorError,
int_or_none,
)
-class HotStarIE(InfoExtractor):
+class HotStarBaseIE(InfoExtractor):
+ _GEO_COUNTRIES = ['IN']
+
+ def _download_json(self, *args, **kwargs):
+ response = super(HotStarBaseIE, self)._download_json(*args, **kwargs)
+ if response['resultCode'] != 'OK':
+ if kwargs.get('fatal'):
+ raise ExtractorError(
+ response['errorDescription'], expected=True)
+ return None
+ return response['resultObj']
+
+ def _download_content_info(self, content_id):
+ return self._download_json(
+ 'https://account.hotstar.com/AVS/besc', content_id, query={
+ 'action': 'GetAggregatedContentDetails',
+ 'appVersion': '5.0.40',
+ 'channel': 'PCTV',
+ 'contentId': content_id,
+ })['contentInfo'][0]
+
+
+class HotStarIE(HotStarBaseIE):
_VALID_URL = r'https?://(?:www\.)?hotstar\.com/(?:.+?[/-])?(?P<id>\d{10})'
_TESTS = [{
'url': 'http://www.hotstar.com/on-air-with-aib--english-1000076273',
'info_dict': {
'id': '1000076273',
'ext': 'mp4',
- 'title': 'On Air With AIB - English',
+ 'title': 'On Air With AIB',
'description': 'md5:c957d8868e9bc793ccb813691cc4c434',
'timestamp': 1447227000,
'upload_date': '20151111',
'only_matching': True,
}]
- def _download_json(self, url_or_request, video_id, note='Downloading JSON metadata', fatal=True, query=None):
- json_data = super(HotStarIE, self)._download_json(
- url_or_request, video_id, note, fatal=fatal, query=query)
- if json_data['resultCode'] != 'OK':
- if fatal:
- raise ExtractorError(json_data['errorDescription'])
- return None
- return json_data['resultObj']
-
def _real_extract(self, url):
video_id = self._match_id(url)
- video_data = self._download_json(
- 'http://account.hotstar.com/AVS/besc', video_id, query={
- 'action': 'GetAggregatedContentDetails',
- 'channel': 'PCTV',
- 'contentId': video_id,
- })['contentInfo'][0]
+
+ video_data = self._download_content_info(video_id)
+
title = video_data['episodeTitle']
if video_data.get('encrypted') == 'Y':
'episode_number': int_or_none(video_data.get('episodeNumber')),
'series': video_data.get('contentTitle'),
}
+
+
+class HotStarPlaylistIE(HotStarBaseIE):
+ IE_NAME = 'hotstar:playlist'
+ _VALID_URL = r'(?P<url>https?://(?:www\.)?hotstar\.com/tv/[^/]+/(?P<content_id>\d+))/(?P<type>[^/]+)/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'http://www.hotstar.com/tv/pratidaan/14982/episodes/14812/9993',
+ 'info_dict': {
+ 'id': '14812',
+ },
+ 'playlist_mincount': 75,
+ }, {
+ 'url': 'http://www.hotstar.com/tv/pratidaan/14982/popular-clips/9998/9998',
+ 'only_matching': True,
+ }]
+ _ITEM_TYPES = {
+ 'episodes': 'EPISODE',
+ 'popular-clips': 'CLIPS',
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ base_url = mobj.group('url')
+ content_id = mobj.group('content_id')
+ playlist_type = mobj.group('type')
+
+ content_info = self._download_content_info(content_id)
+ playlist_id = compat_str(content_info['categoryId'])
+
+ collection = self._download_json(
+ 'https://search.hotstar.com/AVS/besc', playlist_id, query={
+ 'action': 'SearchContents',
+ 'appVersion': '5.0.40',
+ 'channel': 'PCTV',
+ 'moreFilters': 'series:%s;' % playlist_id,
+ 'query': '*',
+ 'searchOrder': 'last_broadcast_date desc,year desc,title asc',
+ 'type': self._ITEM_TYPES.get(playlist_type, 'EPISODE'),
+ })
+
+ entries = [
+ self.url_result(
+ '%s/_/%s' % (base_url, video['contentId']),
+ ie=HotStarIE.ie_key(), video_id=video['contentId'])
+ for video in collection['response']['docs']
+ if video.get('contentId')]
+
+ return self.playlist_result(entries, playlist_id)
import re
-from .common import InfoExtractor
+from .gigya import GigyaBaseIE
+
from ..compat import compat_str
from ..utils import (
- ExtractorError,
int_or_none,
parse_duration,
try_get,
unified_timestamp,
- urlencode_postdata,
)
-class MedialaanIE(InfoExtractor):
+class MedialaanIE(GigyaBaseIE):
_VALID_URL = r'''(?x)
https?://
(?:www\.|nieuws\.)?
'password': password,
}
- auth_info = self._download_json(
- 'https://accounts.eu1.gigya.com/accounts.login', None,
- note='Logging in', errnote='Unable to log in',
- data=urlencode_postdata(auth_data))
-
- error_message = auth_info.get('errorDetails') or auth_info.get('errorMessage')
- if error_message:
- raise ExtractorError(
- 'Unable to login: %s' % error_message, expected=True)
+ auth_info = self._gigya_login(auth_data)
self._uid = auth_info['UID']
self._uid_signature = auth_info['UIDSignature']
'id': 'GLT9749789991',
'ext': 'mp3',
'title': '#97 What Kind Of Idiot Gets Phished?',
- 'thumbnail': 're:^https://.*\.png.*$',
+ 'thumbnail': r're:^https://.*\.png.*$',
'duration': 1776.26375,
'author': 'Reply All',
},
+++ /dev/null
-from __future__ import unicode_literals
-
-import binascii
-import base64
-import hashlib
-import re
-import json
-
-from .common import InfoExtractor
-from ..compat import (
- compat_ord,
- compat_urllib_parse_unquote,
- compat_urllib_parse_urlencode,
-)
-from ..utils import (
- ExtractorError,
- sanitized_Request,
-)
-
-
-class MyVideoIE(InfoExtractor):
- _WORKING = False
- _VALID_URL = r'https?://(?:www\.)?myvideo\.de/(?:[^/]+/)?watch/(?P<id>[0-9]+)/[^?/]+.*'
- IE_NAME = 'myvideo'
- _TEST = {
- 'url': 'http://www.myvideo.de/watch/8229274/bowling_fail_or_win',
- 'md5': '2d2753e8130479ba2cb7e0a37002053e',
- 'info_dict': {
- 'id': '8229274',
- 'ext': 'flv',
- 'title': 'bowling-fail-or-win',
- }
- }
-
- # Original Code from: https://github.com/dersphere/plugin.video.myvideo_de.git
- # Released into the Public Domain by Tristan Fischer on 2013-05-19
- # https://github.com/rg3/youtube-dl/pull/842
- def __rc4crypt(self, data, key):
- x = 0
- box = list(range(256))
- for i in list(range(256)):
- x = (x + box[i] + compat_ord(key[i % len(key)])) % 256
- box[i], box[x] = box[x], box[i]
- x = 0
- y = 0
- out = ''
- for char in data:
- x = (x + 1) % 256
- y = (y + box[x]) % 256
- box[x], box[y] = box[y], box[x]
- out += chr(compat_ord(char) ^ box[(box[x] + box[y]) % 256])
- return out
-
- def __md5(self, s):
- return hashlib.md5(s).hexdigest().encode()
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
-
- GK = (
- b'WXpnME1EZGhNRGhpTTJNM01XVmhOREU0WldNNVpHTTJOakpt'
- b'TW1FMU5tVTBNR05pWkRaa05XRXhNVFJoWVRVd1ptSXhaVEV3'
- b'TnpsbA0KTVRkbU1tSTRNdz09'
- )
-
- # Get video webpage
- webpage_url = 'http://www.myvideo.de/watch/%s' % video_id
- webpage = self._download_webpage(webpage_url, video_id)
-
- mobj = re.search('source src=\'(.+?)[.]([^.]+)\'', webpage)
- if mobj is not None:
- self.report_extraction(video_id)
- video_url = mobj.group(1) + '.flv'
-
- video_title = self._html_search_regex('<title>([^<]+)</title>',
- webpage, 'title')
-
- return {
- 'id': video_id,
- 'url': video_url,
- 'title': video_title,
- }
-
- mobj = re.search(r'data-video-service="/service/data/video/%s/config' % video_id, webpage)
- if mobj is not None:
- request = sanitized_Request('http://www.myvideo.de/service/data/video/%s/config' % video_id, '')
- response = self._download_webpage(request, video_id,
- 'Downloading video info')
- info = json.loads(base64.b64decode(response).decode('utf-8'))
- return {
- 'id': video_id,
- 'title': info['title'],
- 'url': info['streaming_url'].replace('rtmpe', 'rtmpt'),
- 'play_path': info['filename'],
- 'ext': 'flv',
- 'thumbnail': info['thumbnail'][0]['url'],
- }
-
- # try encxml
- mobj = re.search('var flashvars={(.+?)}', webpage)
- if mobj is None:
- raise ExtractorError('Unable to extract video')
-
- params = {}
- encxml = ''
- sec = mobj.group(1)
- for (a, b) in re.findall('(.+?):\'(.+?)\',?', sec):
- if not a == '_encxml':
- params[a] = b
- else:
- encxml = compat_urllib_parse_unquote(b)
- if not params.get('domain'):
- params['domain'] = 'www.myvideo.de'
- xmldata_url = '%s?%s' % (encxml, compat_urllib_parse_urlencode(params))
- if 'flash_playertype=MTV' in xmldata_url:
- self._downloader.report_warning('avoiding MTV player')
- xmldata_url = (
- 'http://www.myvideo.de/dynamic/get_player_video_xml.php'
- '?flash_playertype=D&ID=%s&_countlimit=4&autorun=yes'
- ) % video_id
-
- # get enc data
- enc_data = self._download_webpage(xmldata_url, video_id).split('=')[1]
- enc_data_b = binascii.unhexlify(enc_data)
- sk = self.__md5(
- base64.b64decode(base64.b64decode(GK)) +
- self.__md5(
- str(video_id).encode('utf-8')
- )
- )
- dec_data = self.__rc4crypt(enc_data_b, sk)
-
- # extracting infos
- self.report_extraction(video_id)
-
- video_url = None
- mobj = re.search('connectionurl=\'(.*?)\'', dec_data)
- if mobj:
- video_url = compat_urllib_parse_unquote(mobj.group(1))
- if 'myvideo2flash' in video_url:
- self.report_warning(
- 'Rewriting URL to use unencrypted rtmp:// ...',
- video_id)
- video_url = video_url.replace('rtmpe://', 'rtmp://')
-
- if not video_url:
- # extract non rtmp videos
- mobj = re.search('path=\'(http.*?)\' source=\'(.*?)\'', dec_data)
- if mobj is None:
- raise ExtractorError('unable to extract url')
- video_url = compat_urllib_parse_unquote(mobj.group(1)) + compat_urllib_parse_unquote(mobj.group(2))
-
- video_file = self._search_regex('source=\'(.*?)\'', dec_data, 'video file')
- video_file = compat_urllib_parse_unquote(video_file)
-
- if not video_file.endswith('f4m'):
- ppath, prefix = video_file.split('.')
- video_playpath = '%s:%s' % (prefix, ppath)
- else:
- video_playpath = ''
-
- video_swfobj = self._search_regex(r'swfobject\.embedSWF\(\'(.+?)\'', webpage, 'swfobj')
- video_swfobj = compat_urllib_parse_unquote(video_swfobj)
-
- video_title = self._html_search_regex("<h1(?: class='globalHd')?>(.*?)</h1>",
- webpage, 'title')
-
- return {
- 'id': video_id,
- 'url': video_url,
- 'tc_url': video_url,
- 'title': video_title,
- 'ext': 'flv',
- 'play_path': video_playpath,
- 'player_url': video_swfobj,
- }
class NBCIE(AdobePassIE):
- _VALID_URL = r'https?(?P<permalink>://(?:www\.)?nbc\.com/[^/]+/video/[^/]+/(?P<id>n?\d+))'
+ _VALID_URL = r'https?(?P<permalink>://(?:www\.)?nbc\.com/(?:classic-tv/)?[^/]+/video/[^/]+/(?P<id>n?\d+))'
_TESTS = [
{
'skip_download': True,
},
'skip': 'Only works from US',
- }
+ },
+ {
+ 'url': 'https://www.nbc.com/classic-tv/charles-in-charge/video/charles-in-charge-pilot/n3310',
+ 'only_matching': True,
+ },
]
def _real_extract(self, url):
+# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
+from ..compat import (
+ compat_urllib_parse_unquote_plus
+)
from ..utils import (
- int_or_none,
+ parse_duration,
remove_end,
unified_strdate,
+ urljoin
)
class NDTVIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?ndtv\.com/video/(?:[^/]+/)+[^/?^&]+-(?P<id>\d+)'
-
- _TEST = {
- 'url': 'http://www.ndtv.com/video/news/news/ndtv-exclusive-don-t-need-character-certificate-from-rahul-gandhi-says-arvind-kejriwal-300710',
- 'md5': '39f992dbe5fb531c395d8bbedb1e5e88',
- 'info_dict': {
- 'id': '300710',
- 'ext': 'mp4',
- 'title': "NDTV exclusive: Don't need character certificate from Rahul Gandhi, says Arvind Kejriwal",
- 'description': 'md5:ab2d4b4a6056c5cb4caa6d729deabf02',
- 'upload_date': '20131208',
- 'duration': 1327,
- 'thumbnail': r're:https?://.*\.jpg',
- },
- }
+ _VALID_URL = r'https?://(?:[^/]+\.)?ndtv\.com/(?:[^/]+/)*videos?/?(?:[^/]+/)*[^/?^&]+-(?P<id>\d+)'
+
+ _TESTS = [
+ {
+ 'url': 'https://khabar.ndtv.com/video/show/prime-time/prime-time-ill-system-and-poor-education-468818',
+ 'md5': '78efcf3880ef3fd9b83d405ca94a38eb',
+ 'info_dict': {
+ 'id': '468818',
+ 'ext': 'mp4',
+ 'title': "प्राइम टाइम: सिस्टम बीमार, स्कूल बदहाल",
+ 'description': 'md5:f410512f1b49672e5695dea16ef2731d',
+ 'upload_date': '20170928',
+ 'duration': 2218,
+ 'thumbnail': r're:https?://.*\.jpg',
+ }
+ },
+ {
+ # __filename is url
+ 'url': 'http://movies.ndtv.com/videos/cracker-free-diwali-wishes-from-karan-johar-kriti-sanon-other-stars-470304',
+ 'md5': 'f1d709352305b44443515ac56b45aa46',
+ 'info_dict': {
+ 'id': '470304',
+ 'ext': 'mp4',
+ 'title': "Cracker-Free Diwali Wishes From Karan Johar, Kriti Sanon & Other Stars",
+ 'description': 'md5:f115bba1adf2f6433fa7c1ade5feb465',
+ 'upload_date': '20171019',
+ 'duration': 137,
+ 'thumbnail': r're:https?://.*\.jpg',
+ }
+ },
+ {
+ 'url': 'https://www.ndtv.com/video/news/news/delhi-s-air-quality-status-report-after-diwali-is-very-poor-470372',
+ 'only_matching': True
+ },
+ {
+ 'url': 'https://auto.ndtv.com/videos/the-cnb-daily-october-13-2017-469935',
+ 'only_matching': True
+ },
+ {
+ 'url': 'https://sports.ndtv.com/cricket/videos/2nd-t20i-rock-thrown-at-australia-cricket-team-bus-after-win-over-india-469764',
+ 'only_matching': True
+ },
+ {
+ 'url': 'http://gadgets.ndtv.com/videos/uncharted-the-lost-legacy-review-465568',
+ 'only_matching': True
+ },
+ {
+ 'url': 'http://profit.ndtv.com/videos/news/video-indian-economy-on-very-solid-track-international-monetary-fund-chief-470040',
+ 'only_matching': True
+ },
+ {
+ 'url': 'http://food.ndtv.com/video-basil-seeds-coconut-porridge-419083',
+ 'only_matching': True
+ },
+ {
+ 'url': 'https://doctor.ndtv.com/videos/top-health-stories-of-the-week-467396',
+ 'only_matching': True
+ },
+ {
+ 'url': 'https://swirlster.ndtv.com/video/how-to-make-friends-at-work-469324',
+ 'only_matching': True
+ }
+ ]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
- title = remove_end(self._og_search_title(webpage), ' - NDTV')
+ # '__title' does not contain extra words such as sub-site name, "Video" etc.
+ title = compat_urllib_parse_unquote_plus(
+ self._search_regex(r"__title\s*=\s*'([^']+)'", webpage, 'title', default=None) or
+ self._og_search_title(webpage))
filename = self._search_regex(
- r"__filename='([^']+)'", webpage, 'video filename')
- video_url = 'http://bitcast-b.bitgravity.com/ndtvod/23372/ndtv/%s' % filename
+ r"(?:__)?filename\s*[:=]\s*'([^']+)'", webpage, 'video filename')
+ # in "movies" sub-site pages, filename is URL
+ video_url = urljoin('https://ndtvod.bc-ssl.cdn.bitgravity.com/23372/ndtv/', filename.lstrip('/'))
- duration = int_or_none(self._search_regex(
- r"__duration='([^']+)'", webpage, 'duration', fatal=False))
+ # "doctor" sub-site has MM:SS format
+ duration = parse_duration(self._search_regex(
+ r"(?:__)?duration\s*[:=]\s*'([^']+)'", webpage, 'duration', fatal=False))
+ # "sports", "doctor", "swirlster" sub-sites don't have 'publish-date'
upload_date = unified_strdate(self._html_search_meta(
- 'publish-date', webpage, 'upload date', fatal=False))
+ 'publish-date', webpage, 'upload date', default=None) or self._html_search_meta(
+ 'uploadDate', webpage, 'upload date', default=None) or self._search_regex(
+ r'datePublished"\s*:\s*"([^"]+)"', webpage, 'upload date', fatal=False))
description = remove_end(self._og_search_description(webpage), ' (Read more)')
class NickDeIE(MTVServicesInfoExtractor):
IE_NAME = 'nick.de'
- _VALID_URL = r'https?://(?:www\.)?(?P<host>nick\.(?:de|com\.pl)|nickelodeon\.(?:nl|at))/[^/]+/(?:[^/]+/)*(?P<id>[^/?#&]+)'
+ _VALID_URL = r'https?://(?:www\.)?(?P<host>nick\.(?:de|com\.pl|ch)|nickelodeon\.(?:nl|be|at|dk|no|se))/[^/]+/(?:[^/]+/)*(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'http://www.nick.de/playlist/3773-top-videos/videos/episode/17306-zu-wasser-und-zu-land-rauchende-erdnusse',
'only_matching': True,
}, {
'url': 'http://www.nick.com.pl/seriale/474-spongebob-kanciastoporty/wideo/17412-teatr-to-jest-to-rodeo-oszolom',
'only_matching': True,
+ }, {
+ 'url': 'http://www.nickelodeon.no/program/2626-bulderhuset/videoer/90947-femteklasse-veronica-vs-vanzilla',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.nickelodeon.dk/serier/2626-hojs-hus/videoer/761-tissepause',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.nickelodeon.se/serier/2626-lugn-i-stormen/videos/998-',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.nick.ch/shows/2304-adventure-time-abenteuerzeit-mit-finn-und-jake',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.nickelodeon.be/afspeellijst/4530-top-videos/videos/episode/73917-inval-broodschapper-lariekoek-arie',
+ 'only_matching': True,
}]
def _extract_mrss_url(self, webpage, host):
class NickRuIE(MTVServicesInfoExtractor):
IE_NAME = 'nickelodeonru'
- _VALID_URL = r'https?://(?:www\.)nickelodeon\.ru/(?:playlist|shows|videos)/(?:[^/]+/)*(?P<id>[^/?#&]+)'
+ _VALID_URL = r'https?://(?:www\.)nickelodeon\.(?:ru|fr|es|pt|ro|hu)/[^/]+/(?:[^/]+/)*(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'http://www.nickelodeon.ru/shows/henrydanger/videos/episodes/3-sezon-15-seriya-licenziya-na-polyot/pmomfb#playlist/7airc6',
'only_matching': True,
}, {
'url': 'http://www.nickelodeon.ru/videos/smotri-na-nickelodeon-v-iyule/g9hvh7',
'only_matching': True,
+ }, {
+ 'url': 'http://www.nickelodeon.fr/programmes/bob-l-eponge/videos/le-marathon-de-booh-kini-bottom-mardi-31-octobre/nfn7z0',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.nickelodeon.es/videos/nickelodeon-consejos-tortitas/f7w7xy',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.nickelodeon.pt/series/spongebob-squarepants/videos/a-bolha-de-tinta-gigante/xutq1b',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.nickelodeon.ro/emisiuni/shimmer-si-shine/video/nahal-din-bomboane/uw5u2k',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.nickelodeon.hu/musorok/spongyabob-kockanadrag/videok/episodes/buborekfujas-az-elszakadt-nadrag/q57iob#playlist/k6te4y',
+ 'only_matching': True,
}]
def _real_extract(self, url):
'uploader': 'takuya0301',
'uploader_id': '2698420',
'upload_date': '20131123',
- 'timestamp': 1385182762,
+ 'timestamp': int, # timestamp is unstable
'description': '(c) copyright 2008, Blender Foundation / www.bigbuckbunny.org',
'duration': 33,
'view_count': int,
'skip': 'Requires an account',
}, {
# "New" HTML5 video
+ # md5 is unstable
'url': 'http://www.nicovideo.jp/watch/sm31464864',
- 'md5': '351647b4917660986dc0fa8864085135',
'info_dict': {
'id': 'sm31464864',
'ext': 'mp4',
'description': 'md5:e52974af9a96e739196b2c1ca72b5feb',
'timestamp': 1498514060,
'upload_date': '20170626',
- 'uploader': 'ゲス',
+ 'uploader': 'ゲスト',
'uploader_id': '40826363',
'thumbnail': r're:https?://.*',
'duration': 198,
'comment_count': int,
},
'skip': 'Requires an account',
+ }, {
+ # Video without owner
+ 'url': 'http://www.nicovideo.jp/watch/sm18238488',
+ 'md5': 'd265680a1f92bdcbbd2a507fc9e78a9e',
+ 'info_dict': {
+ 'id': 'sm18238488',
+ 'ext': 'mp4',
+ 'title': '【実写版】ミュータントタートルズ',
+ 'description': 'md5:15df8988e47a86f9e978af2064bf6d8e',
+ 'timestamp': 1341160408,
+ 'upload_date': '20120701',
+ 'uploader': None,
+ 'uploader_id': None,
+ 'thumbnail': r're:https?://.*',
+ 'duration': 5271,
+ 'view_count': int,
+ 'comment_count': int,
+ },
+ 'skip': 'Requires an account',
}, {
'url': 'http://sp.nicovideo.jp/watch/sm28964488?ss_pos=1&cp_in=wt_tg',
'only_matching': True,
webpage_url = get_video_info('watch_url') or url
- owner = api_data.get('owner', {})
+ # Note: cannot use api_data.get('owner', {}) because owner may be set to "null"
+ # in the JSON, which will cause None to be returned instead of {}.
+ owner = try_get(api_data, lambda x: x.get('owner'), dict) or {}
uploader_id = get_video_info(['ch_id', 'user_id']) or owner.get('id')
uploader = get_video_info(['ch_name', 'user_nickname']) or owner.get('nickname')
_TESTS = [{
'url': 'http://parliamentlive.tv/Event/Index/c1e9d44d-fd6c-4263-b50f-97ed26cc998b',
'info_dict': {
- 'id': 'c1e9d44d-fd6c-4263-b50f-97ed26cc998b',
+ 'id': '1_af9nv9ym',
'ext': 'mp4',
'title': 'Home Affairs Committee',
'uploader_id': 'FFMPEG-01',
webpage = self._download_webpage(
'http://vodplayer.parliamentlive.tv/?mid=' + video_id, video_id)
widget_config = self._parse_json(self._search_regex(
- r'kWidgetConfig\s*=\s*({.+});',
+ r'(?s)kWidgetConfig\s*=\s*({.+});',
webpage, 'kaltura widget config'), video_id)
- kaltura_url = 'kaltura:%s:%s' % (widget_config['wid'][1:], widget_config['entry_id'])
+ kaltura_url = 'kaltura:%s:%s' % (
+ widget_config['wid'][1:], widget_config['entry_id'])
event_title = self._download_json(
'http://parliamentlive.tv/Event/GetShareVideo/' + video_id, video_id)['event']['title']
return {
'_type': 'url_transparent',
- 'id': video_id,
'title': event_title,
'description': '',
'url': kaltura_url,
_VALID_URL = r'''(?x)https?://
(?:
# Direct video URL
- (?:%s)/(?:viralplayer|video)/(?P<id>[0-9]+)/? |
+ (?:%s)/(?:(?:vir|port)alplayer|video)/(?P<id>[0-9]+)(?:[?/]|$) |
# Article with embedded player (or direct video)
(?:www\.)?pbs\.org/(?:[^/]+/){1,5}(?P<presumptive_id>[^/]+?)(?:\.html)?/?(?:$|[?\#]) |
# Player
{
'url': 'http://watch.knpb.org/video/2365616055/',
'only_matching': True,
+ },
+ {
+ 'url': 'https://player.pbs.org/portalplayer/3004638221/?uid=',
+ 'only_matching': True,
}
]
_ERRORS = {
--- /dev/null
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class ServusIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?servus\.com/(?:at|de)/p/[^/]+/(?P<id>AA-\w+|\d+-\d+)'
+ _TESTS = [{
+ 'url': 'https://www.servus.com/de/p/Die-Gr%C3%BCnen-aus-Sicht-des-Volkes/AA-1T6VBU5PW1W12/',
+ 'md5': '046dee641cda1c4cabe13baef3be2c1c',
+ 'info_dict': {
+ 'id': 'AA-1T6VBU5PW1W12',
+ 'ext': 'mp4',
+ 'title': 'Die Grünen aus Volkssicht',
+ 'description': 'md5:052b5da1cb2cd7d562ef1f19be5a5cba',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ }
+ }, {
+ 'url': 'https://www.servus.com/at/p/Wie-das-Leben-beginnt/1309984137314-381415152/',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ title = self._og_search_title(webpage)
+ description = self._og_search_description(webpage)
+ thumbnail = self._og_search_thumbnail(webpage)
+
+ formats = self._extract_m3u8_formats(
+ 'https://stv.rbmbtnx.net/api/v1/manifests/%s.m3u8' % video_id,
+ video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls')
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'formats': formats,
+ }
from __future__ import unicode_literals
from .common import InfoExtractor
-from ..utils import strip_or_none
+from ..utils import (
+ extract_attributes,
+ smuggle_url,
+ strip_or_none,
+ urljoin,
+)
class SkySportsIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
+ video_data = extract_attributes(self._search_regex(
+ r'(<div.+?class="sdc-article-video__media-ooyala"[^>]+>)', webpage, 'video data'))
+
+ video_url = 'ooyala:%s' % video_data['data-video-id']
+ if video_data.get('data-token-required') == 'true':
+ token_fetch_options = self._parse_json(video_data.get('data-token-fetch-options', '{}'), video_id, fatal=False) or {}
+ token_fetch_url = token_fetch_options.get('url')
+ if token_fetch_url:
+ embed_token = self._download_webpage(urljoin(url, token_fetch_url), video_id, fatal=False)
+ if embed_token:
+ video_url = smuggle_url(video_url, {'embed_token': embed_token.strip('"')})
return {
'_type': 'url_transparent',
'id': video_id,
- 'url': 'ooyala:%s' % self._search_regex(
- r'data-video-id="([^"]+)"', webpage, 'ooyala id'),
+ 'url': video_url,
'title': self._og_search_title(webpage),
'description': strip_or_none(self._og_search_description(webpage)),
'ie_key': 'Ooyala',
},
]
- _CLIENT_ID = 'JlZIsxg2hY5WnBgtn3jfS0UYCl0K8DOg'
+ _CLIENT_ID = 'c6CU49JDMapyrQo06UxU9xouB9ZVzqCn'
_IPHONE_CLIENT_ID = '376f225bf427445fc4bfb6b99b72e0bf'
@staticmethod
class SoundgasmIE(InfoExtractor):
IE_NAME = 'soundgasm'
- _VALID_URL = r'https?://(?:www\.)?soundgasm\.net/u/(?P<user>[0-9a-zA-Z_\-]+)/(?P<title>[0-9a-zA-Z_\-]+)'
+ _VALID_URL = r'https?://(?:www\.)?soundgasm\.net/u/(?P<user>[0-9a-zA-Z_-]+)/(?P<display_id>[0-9a-zA-Z_-]+)'
_TEST = {
'url': 'http://soundgasm.net/u/ytdl/Piano-sample',
'md5': '010082a2c802c5275bb00030743e75ad',
'info_dict': {
'id': '88abd86ea000cafe98f96321b23cc1206cbcbcc9',
'ext': 'm4a',
- 'title': 'ytdl_Piano-sample',
- 'description': 'Royalty Free Sample Music'
+ 'title': 'Piano sample',
+ 'description': 'Royalty Free Sample Music',
+ 'uploader': 'ytdl',
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
- display_id = mobj.group('title')
- audio_title = mobj.group('user') + '_' + mobj.group('title')
+ display_id = mobj.group('display_id')
+
webpage = self._download_webpage(url, display_id)
+
audio_url = self._html_search_regex(
- r'(?s)m4a\:\s"([^"]+)"', webpage, 'audio URL')
- audio_id = re.split(r'\/|\.', audio_url)[-2]
+ r'(?s)m4a\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
+ 'audio URL', group='url')
+
+ title = self._search_regex(
+ r'<div[^>]+\bclass=["\']jp-title[^>]+>([^<]+)',
+ webpage, 'title', default=display_id)
+
description = self._html_search_regex(
- r'(?s)<li>Description:\s(.*?)<\/li>', webpage, 'description',
- fatal=False)
+ (r'(?s)<div[^>]+\bclass=["\']jp-description[^>]+>(.+?)</div>',
+ r'(?s)<li>Description:\s(.*?)<\/li>'),
+ webpage, 'description', fatal=False)
+
+ audio_id = self._search_regex(
+ r'/([^/]+)\.m4a', audio_url, 'audio id', default=display_id)
return {
'id': audio_id,
'display_id': display_id,
'url': audio_url,
- 'title': audio_title,
- 'description': description
+ 'vcodec': 'none',
+ 'title': title,
+ 'description': description,
+ 'uploader': mobj.group('user'),
}
import re
from .common import InfoExtractor
+from ..utils import ExtractorError
class SpankBangIE(InfoExtractor):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
+ if re.search(r'<[^>]+\bid=["\']video_removed', webpage):
+ raise ExtractorError(
+ 'Video %s is not available' % video_id, expected=True)
+
stream_key = self._html_search_regex(
r'''var\s+stream_key\s*=\s*['"](.+?)['"]''',
webpage, 'stream key')
+++ /dev/null
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..utils import int_or_none
-
-# 22Tracks regularly replace the audio tracks that can be streamed on their
-# site. The tracks usually expire after 1 months, so we can't add tests.
-
-
-class TwentyTwoTracksIE(InfoExtractor):
- _VALID_URL = r'https?://22tracks\.com/(?P<city>[a-z]+)/(?P<genre>[\da-z]+)/(?P<id>\d+)'
- IE_NAME = '22tracks:track'
-
- _API_BASE = 'http://22tracks.com/api'
-
- def _extract_info(self, city, genre_name, track_id=None):
- item_id = track_id if track_id else genre_name
-
- cities = self._download_json(
- '%s/cities' % self._API_BASE, item_id,
- 'Downloading cities info',
- 'Unable to download cities info')
- city_id = [x['id'] for x in cities if x['slug'] == city][0]
-
- genres = self._download_json(
- '%s/genres/%s' % (self._API_BASE, city_id), item_id,
- 'Downloading %s genres info' % city,
- 'Unable to download %s genres info' % city)
- genre = [x for x in genres if x['slug'] == genre_name][0]
- genre_id = genre['id']
-
- tracks = self._download_json(
- '%s/tracks/%s' % (self._API_BASE, genre_id), item_id,
- 'Downloading %s genre tracks info' % genre_name,
- 'Unable to download track info')
-
- return [x for x in tracks if x['id'] == item_id][0] if track_id else [genre['title'], tracks]
-
- def _get_track_url(self, filename, track_id):
- token = self._download_json(
- 'http://22tracks.com/token.php?desktop=true&u=/128/%s' % filename,
- track_id, 'Downloading token', 'Unable to download token')
- return 'http://audio.22tracks.com%s?st=%s&e=%d' % (token['filename'], token['st'], token['e'])
-
- def _extract_track_info(self, track_info, track_id):
- download_url = self._get_track_url(track_info['filename'], track_id)
- title = '%s - %s' % (track_info['artist'].strip(), track_info['title'].strip())
- return {
- 'id': track_id,
- 'url': download_url,
- 'ext': 'mp3',
- 'title': title,
- 'duration': int_or_none(track_info.get('duration')),
- 'timestamp': int_or_none(track_info.get('published_at') or track_info.get('created'))
- }
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
-
- city = mobj.group('city')
- genre = mobj.group('genre')
- track_id = mobj.group('id')
-
- track_info = self._extract_info(city, genre, track_id)
- return self._extract_track_info(track_info, track_id)
-
-
-class TwentyTwoTracksGenreIE(TwentyTwoTracksIE):
- _VALID_URL = r'https?://22tracks\.com/(?P<city>[a-z]+)/(?P<genre>[\da-z]+)/?$'
- IE_NAME = '22tracks:genre'
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
-
- city = mobj.group('city')
- genre = mobj.group('genre')
-
- genre_title, tracks = self._extract_info(city, genre)
-
- entries = [
- self._extract_track_info(track_info, track_info['id'])
- for track_info in tracks]
-
- return self.playlist_result(entries, genre, genre_title)
r'(?s)clipInfo\s*=\s*({.+?});', webpage, 'clip info'),
video_id, transform_source=js_to_json)
- title = clip.get('channel_title') or self._og_search_title(webpage)
+ title = clip.get('title') or clip.get('channel_title') or self._og_search_title(webpage)
formats = [{
'url': option['source'],
--- /dev/null
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from .youtube import YoutubeIE
+
+
+class UnityIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?unity3d\.com/learn/tutorials/(?:[^/]+/)*(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://unity3d.com/learn/tutorials/topics/animation/animate-anything-mecanim',
+ 'info_dict': {
+ 'id': 'jWuNtik0C8E',
+ 'ext': 'mp4',
+ 'title': 'Live Training 22nd September 2014 - Animate Anything',
+ 'description': 'md5:e54913114bd45a554c56cdde7669636e',
+ 'duration': 2893,
+ 'uploader': 'Unity',
+ 'uploader_id': 'Unity3D',
+ 'upload_date': '20140926',
+ }
+ }, {
+ 'url': 'https://unity3d.com/learn/tutorials/projects/2d-ufo-tutorial/following-player-camera?playlist=25844',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ youtube_id = self._search_regex(
+ r'data-video-id="([_0-9a-zA-Z-]+)"',
+ webpage, 'youtube ID')
+ return self.url_result(youtube_id, ie=YoutubeIE.ie_key(), video_id=video_id)
urls = []
# Look for embedded (iframe) Vimeo player
for mobj in re.finditer(
- r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//player\.vimeo\.com/video/.+?)\1',
+ r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//player\.vimeo\.com/video/\d+.*?)\1',
webpage):
urls.append(VimeoIE._smuggle_referrer(unescapeHTML(mobj.group('url')), url))
PLAIN_EMBED_RE = (
--- /dev/null
+# coding: utf-8
+from __future__ import unicode_literals
+
+import itertools
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ try_get,
+)
+
+CDN_API_BASE = 'https://cdn.younow.com/php/api'
+MOMENT_URL_FORMAT = '%s/moment/fetch/id=%%s' % CDN_API_BASE
+
+
+class YouNowLiveIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?younow\.com/(?P<id>[^/?#&]+)'
+ _TEST = {
+ 'url': 'https://www.younow.com/AmandaPadeezy',
+ 'info_dict': {
+ 'id': 'AmandaPadeezy',
+ 'ext': 'mp4',
+ 'is_live': True,
+ 'title': 'March 26, 2017',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'tags': ['girls'],
+ 'categories': ['girls'],
+ 'uploader': 'AmandaPadeezy',
+ 'uploader_id': '6716501',
+ 'uploader_url': 'https://www.younow.com/AmandaPadeezy',
+ 'creator': 'AmandaPadeezy',
+ },
+ 'skip': True,
+ }
+
+ @classmethod
+ def suitable(cls, url):
+ return (False
+ if YouNowChannelIE.suitable(url) or YouNowMomentIE.suitable(url)
+ else super(YouNowLiveIE, cls).suitable(url))
+
+ def _real_extract(self, url):
+ username = self._match_id(url)
+
+ data = self._download_json(
+ 'https://api.younow.com/php/api/broadcast/info/curId=0/user=%s'
+ % username, username)
+
+ if data.get('errorCode') != 0:
+ raise ExtractorError(data['errorMsg'], expected=True)
+
+ uploader = try_get(
+ data, lambda x: x['user']['profileUrlString'],
+ compat_str) or username
+
+ return {
+ 'id': uploader,
+ 'is_live': True,
+ 'title': self._live_title(uploader),
+ 'thumbnail': data.get('awsUrl'),
+ 'tags': data.get('tags'),
+ 'categories': data.get('tags'),
+ 'uploader': uploader,
+ 'uploader_id': data.get('userId'),
+ 'uploader_url': 'https://www.younow.com/%s' % username,
+ 'creator': uploader,
+ 'view_count': int_or_none(data.get('viewers')),
+ 'like_count': int_or_none(data.get('likes')),
+ 'formats': [{
+ 'url': '%s/broadcast/videoPath/hls=1/broadcastId=%s/channelId=%s'
+ % (CDN_API_BASE, data['broadcastId'], data['userId']),
+ 'ext': 'mp4',
+ 'protocol': 'm3u8',
+ }],
+ }
+
+
+def _extract_moment(item, fatal=True):
+ moment_id = item.get('momentId')
+ if not moment_id:
+ if not fatal:
+ return
+ raise ExtractorError('Unable to extract moment id')
+
+ moment_id = compat_str(moment_id)
+
+ title = item.get('text')
+ if not title:
+ title = 'YouNow %s' % (
+ item.get('momentType') or item.get('titleType') or 'moment')
+
+ uploader = try_get(item, lambda x: x['owner']['name'], compat_str)
+ uploader_id = try_get(item, lambda x: x['owner']['userId'])
+ uploader_url = 'https://www.younow.com/%s' % uploader if uploader else None
+
+ entry = {
+ 'extractor_key': 'YouNowMoment',
+ 'id': moment_id,
+ 'title': title,
+ 'view_count': int_or_none(item.get('views')),
+ 'like_count': int_or_none(item.get('likes')),
+ 'timestamp': int_or_none(item.get('created')),
+ 'creator': uploader,
+ 'uploader': uploader,
+ 'uploader_id': uploader_id,
+ 'uploader_url': uploader_url,
+ 'formats': [{
+ 'url': 'https://hls.younow.com/momentsplaylists/live/%s/%s.m3u8'
+ % (moment_id, moment_id),
+ 'ext': 'mp4',
+ 'protocol': 'm3u8_native',
+ }],
+ }
+
+ return entry
+
+
+class YouNowChannelIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?younow\.com/(?P<id>[^/]+)/channel'
+ _TEST = {
+ 'url': 'https://www.younow.com/its_Kateee_/channel',
+ 'info_dict': {
+ 'id': '14629760',
+ 'title': 'its_Kateee_ moments'
+ },
+ 'playlist_mincount': 8,
+ }
+
+ def _entries(self, username, channel_id):
+ created_before = 0
+ for page_num in itertools.count(1):
+ if created_before is None:
+ break
+ info = self._download_json(
+ '%s/moment/profile/channelId=%s/createdBefore=%d/records=20'
+ % (CDN_API_BASE, channel_id, created_before), username,
+ note='Downloading moments page %d' % page_num)
+ items = info.get('items')
+ if not items or not isinstance(items, list):
+ break
+ for item in items:
+ if not isinstance(item, dict):
+ continue
+ item_type = item.get('type')
+ if item_type == 'moment':
+ entry = _extract_moment(item, fatal=False)
+ if entry:
+ yield entry
+ elif item_type == 'collection':
+ moments = item.get('momentsIds')
+ if isinstance(moments, list):
+ for moment_id in moments:
+ m = self._download_json(
+ MOMENT_URL_FORMAT % moment_id, username,
+ note='Downloading %s moment JSON' % moment_id,
+ fatal=False)
+ if m and isinstance(m, dict) and m.get('item'):
+ entry = _extract_moment(m['item'])
+ if entry:
+ yield entry
+ created_before = int_or_none(item.get('created'))
+
+ def _real_extract(self, url):
+ username = self._match_id(url)
+ channel_id = compat_str(self._download_json(
+ 'https://api.younow.com/php/api/broadcast/info/curId=0/user=%s'
+ % username, username, note='Downloading user information')['userId'])
+ return self.playlist_result(
+ self._entries(username, channel_id), channel_id,
+ '%s moments' % username)
+
+
+class YouNowMomentIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?younow\.com/[^/]+/(?P<id>[^/?#&]+)'
+ _TEST = {
+ 'url': 'https://www.younow.com/GABO.../20712117/36319236/3b316doc/m',
+ 'md5': 'a30c70eadb9fb39a1aa3c8c0d22a0807',
+ 'info_dict': {
+ 'id': '20712117',
+ 'ext': 'mp4',
+ 'title': 'YouNow capture',
+ 'view_count': int,
+ 'like_count': int,
+ 'timestamp': 1490432040,
+ 'upload_date': '20170325',
+ 'uploader': 'GABO...',
+ 'uploader_id': 35917228,
+ },
+ }
+
+ @classmethod
+ def suitable(cls, url):
+ return (False
+ if YouNowChannelIE.suitable(url)
+ else super(YouNowMomentIE, cls).suitable(url))
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ item = self._download_json(MOMENT_URL_FORMAT % video_id, video_id)
+ return _extract_moment(item['item'])
)
(["\'])
(?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
- (?:embed|v|p)/.+?)
+ (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
\1''', webpage)]
# lazyYT YouTube embed
# description
description_original = video_description = get_element_by_id("eow-description", video_webpage)
if video_description:
+
+ def replace_url(m):
+ redir_url = compat_urlparse.urljoin(url, m.group(1))
+ parsed_redir_url = compat_urllib_parse_urlparse(redir_url)
+ if re.search(r'^(?:www\.)?(?:youtube(?:-nocookie)?\.com|youtu\.be)$', parsed_redir_url.netloc) and parsed_redir_url.path == '/redirect':
+ qs = compat_parse_qs(parsed_redir_url.query)
+ q = qs.get('q')
+ if q and q[0]:
+ return q[0]
+ return redir_url
+
description_original = video_description = re.sub(r'''(?x)
<a\s+
(?:[a-zA-Z-]+="[^"]*"\s+)*?
class="[^"]*"[^>]*>
[^<]+\.{3}\s*
</a>
- ''', lambda m: compat_urlparse.urljoin(url, m.group(1)), video_description)
+ ''', replace_url, video_description)
video_description = clean_html(video_description)
else:
fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
days, hours, mins, secs, ms = m.groups()
else:
m = re.match(
- r'''(?ix)(?:P?T)?
+ r'''(?ix)(?:P?
+ (?:
+ [0-9]+\s*y(?:ears?)?\s*
+ )?
+ (?:
+ [0-9]+\s*m(?:onths?)?\s*
+ )?
+ (?:
+ [0-9]+\s*w(?:eeks?)?\s*
+ )?
(?:
(?P<days>[0-9]+)\s*d(?:ays?)?\s*
)?
+ T)?
(?:
(?P<hours>[0-9]+)\s*h(?:ours?)?\s*
)?
from __future__ import unicode_literals
-__version__ = '2017.10.15.1'
+__version__ = '2017.11.06'