+version 2019.06.08
+
+Core
+* [downloader/common] Improve rate limit (#21301)
+* [utils] Improve strip_or_none
+* [extractor/common] Strip src attribute for HTML5 entries code (#18485,
+ #21169)
+
+Extractors
+* [ted] Fix playlist extraction (#20844, #21032)
+* [vlive:playlist] Fix video extraction when no playlist is found (#20590)
++ [vlive] Add CH+ support (#16887, #21209)
++ [openload] Add support for oload.website (#21329)
++ [tvnow] Extract HD formats (#21201)
++ [redbulltv] Add support for rrn:content URLs (#21297)
+* [youtube] Fix average rating extraction (#21304)
++ [bitchute] Extract HTML5 formats (#21306)
+* [cbsnews] Fix extraction (#9659, #15397)
+* [vvvvid] Relax URL regular expression (#21299)
++ [prosiebensat1] Add support for new API (#21272)
++ [vrv] Extract adaptive_hls formats (#21243)
+* [viki] Switch to HTTPS (#21001)
+* [LiveLeak] Check if the original videos exist (#21206, #21208)
+* [rtp] Fix extraction (#15099)
+* [youtube] Improve DRM protected videos detection (#1774)
++ [srgssrplay] Add support for popupvideoplayer URLs (#21155)
++ [24video] Add support for porno.24video.net (#21194)
++ [24video] Add support for 24video.site (#21193)
+- [pornflip] Remove extractor
+- [criterion] Remove extractor (#21195)
+* [pornhub] Use HTTPS (#21061)
+* [bitchute] Fix uploader extraction (#21076)
+* [streamcloud] Reduce waiting time to 6 seconds (#21092)
+- [novamov] Remove extractors (#21077)
++ [openload] Add support for oload.press (#21135)
+* [vivo] Fix extraction (#18906, #19217)
+
+
+version 2019.05.20
+
+Core
++ [extractor/common] Move workaround for applying first Set-Cookie header
+ into a separate _apply_first_set_cookie_header method
+
+Extractors
+* [safari] Fix authentication (#21090)
+* [vk] Use _apply_first_set_cookie_header
+* [vrt] Fix extraction (#20527)
++ [canvas] Add support for vrtnieuws and sporza site ids and extract
+ AES HLS formats
++ [vrv] Extract captions (#19238)
+* [tele5] Improve video id extraction
+* [tele5] Relax URL regular expression (#21020, #21063)
+* [svtplay] Update API URL (#21075)
++ [yahoo:gyao] Add X-User-Agent header to dam proxy requests (#21071)
+
+
+version 2019.05.11
+
+Core
+* [utils] Transliterate "þ" as "th" (#20897)
+
+Extractors
++ [cloudflarestream] Add support for videodelivery.net (#21049)
++ [byutv] Add support for DVR videos (#20574, #20676)
++ [gfycat] Add support for URLs with tags (#20696, #20731)
++ [openload] Add support for verystream.com (#20701, #20967)
+* [youtube] Use sp field value for signature field name (#18841, #18927,
+ #21028)
++ [yahoo:gyao] Extend URL regular expression (#21008)
+* [youtube] Fix channel id extraction (#20982, #21003)
++ [sky] Add support for news.sky.com (#13055)
++ [youtube:entrylistbase] Retry on 5xx HTTP errors (#20965)
++ [francetvinfo] Extend video id extraction (#20619, #20740)
+* [4tube] Update token hosts (#20918)
+* [hotstar] Move to API v2 (#20931)
+* [fox] Fix API error handling under python 2 (#20925)
++ [redbulltv] Extend URL regular expression (#20922)
+
+
+version 2019.04.30
+
+Extractors
+* [openload] Use real Chrome versions (#20902)
+- [youtube] Remove info el for get_video_info request
+* [youtube] Improve extraction robustness
+- [dramafever] Remove extractor (#20868)
+* [adn] Fix subtitle extraction (#12724)
++ [ccc] Extract creator (#20355)
++ [ccc:playlist] Add support for media.ccc.de playlists (#14601, #20355)
++ [sverigesradio] Add support for sverigesradio.se (#18635)
++ [cinemax] Add support for cinemax.com
+* [sixplay] Try extracting non-DRM protected manifests (#20849)
++ [youtube] Extract Youtube Music Auto-generated metadata (#20599, #20742)
+- [wrzuta] Remove extractor (#20684, #20801)
+* [twitch] Prefer source format (#20850)
++ [twitcasting] Add support for private videos (#20843)
+* [reddit] Validate thumbnail URL (#20030)
+* [yandexmusic] Fix track URL extraction (#20820)
+
+
+version 2019.04.24
+
+Extractors
+* [youtube] Fix extraction (#20758, #20759, #20761, #20762, #20764, #20766,
+ #20767, #20769, #20771, #20768, #20770)
+* [toutv] Fix extraction and extract series info (#20757)
++ [vrv] Add support for movie listings (#19229)
++ [youtube] Print error when no data is available (#20737)
++ [soundcloud] Add support for new rendition and improve extraction (#20699)
++ [ooyala] Add support for geo verification proxy
++ [nrl] Add support for nrl.com (#15991)
++ [vimeo] Extract live archive source format (#19144)
++ [vimeo] Add support for live streams and improve info extraction (#19144)
++ [ntvcojp] Add support for cu.ntv.co.jp
++ [nhk] Extract RTMPT format
++ [nhk] Add support for audio URLs
++ [udemy] Add another course id extraction pattern (#20491)
++ [openload] Add support for oload.services (#20691)
++ [openload] Add support for openloed.co (#20691, #20693)
+* [bravotv] Fix extraction (#19213)
+
+
+version 2019.04.17
+
+Extractors
+* [openload] Randomize User-Agent (closes #20688)
++ [openload] Add support for oladblock domains (#20471)
+* [adn] Fix subtitle extraction (#12724)
++ [aol] Add support for localized websites
++ [yahoo] Add support GYAO episode URLs
++ [yahoo] Add support for streaming.yahoo.co.jp (#5811, #7098)
++ [yahoo] Add support for gyao.yahoo.co.jp
+* [aenetworks] Fix history topic extraction and extract more formats
++ [cbs] Extract smpte and vtt subtitles
++ [streamango] Add support for streamcherry.com (#20592)
++ [yourporn] Add support for sxyprn.com (#20646)
+* [mgtv] Fix extraction (#20650)
+* [linkedin:learning] Use urljoin for form action URL (#20431)
++ [gdc] Add support for kaltura embeds (#20575)
+* [dispeak] Improve mp4 bitrate extraction
+* [kaltura] Sanitize embed URLs
+* [jwplatfom] Do not match manifest URLs (#20596)
+* [aol] Restrict URL regular expression and improve format extraction
++ [tiktok] Add support for new URL schema (#20573)
++ [stv:player] Add support for player.stv.tv (#20586)
+
+
+version 2019.04.07
+
+Core
++ [downloader/external] Pass rtmp_conn to ffmpeg
+
+Extractors
++ [ruutu] Add support for audio podcasts (#20473, #20545)
++ [xvideos] Extract all thumbnails (#20432)
++ [platzi] Add support for platzi.com (#20562)
+* [dvtv] Fix extraction (#18514, #19174)
++ [vrv] Add basic support for individual movie links (#19229)
++ [bfi:player] Add support for player.bfi.org.uk (#19235)
+* [hbo] Fix extraction and extract subtitles (#14629, #13709)
+* [youtube] Extract srv[1-3] subtitle formats (#20566)
+* [adultswim] Fix extraction (#18025)
+* [teamcoco] Fix extraction and add suport for subdomains (#17099, #20339)
+* [adn] Fix subtitle compatibility with ffmpeg
+* [adn] Fix extraction and add support for positioning styles (#20549)
+* [vk] Use unique video id (#17848)
+* [newstube] Fix extraction
+* [rtl2] Actualize extraction
++ [adobeconnect] Add support for adobeconnect.com (#20283)
++ [gaia] Add support for authentication (#14605)
++ [mediasite] Add support for dashed ids and named catalogs (#20531)
+
+
+version 2019.04.01
+
+Core
+* [utils] Improve int_or_none and float_or_none (#20403)
+* Check for valid --min-sleep-interval when --max-sleep-interval is specified
+ (#20435)
+
+Extractors
++ [weibo] Extend URL regular expression (#20496)
++ [xhamster] Add support for xhamster.one (#20508)
++ [mediasite] Add support for catalogs (#20507)
++ [teamtreehouse] Add support for teamtreehouse.com (#9836)
++ [ina] Add support for audio URLs
+* [ina] Improve extraction
+* [cwtv] Fix episode number extraction (#20461)
+* [npo] Improve DRM detection
++ [pornhub] Add support for DASH formats (#20403)
+* [svtplay] Update API endpoint (#20430)
+
+
+version 2019.03.18
+
+Core
+* [extractor/common] Improve HTML5 entries extraction
++ [utils] Introduce parse_bitrate
+* [update] Hide update URLs behind redirect
+* [extractor/common] Fix url meta field for unfragmented DASH formats (#20346)
+
+Extractors
++ [yandexvideo] Add extractor
+* [openload] Improve embed detection
++ [corus] Add support for bigbrothercanada.ca (#20357)
++ [orf:radio] Extract series (#20012)
++ [cbc:watch] Add support for gem.cbc.ca (#20251, #20359)
+- [anysex] Remove extractor (#19279)
++ [ciscolive] Add support for new URL schema (#20320, #20351)
++ [youtube] Add support for invidiou.sh (#20309)
+- [anitube] Remove extractor (#20334)
+- [ruleporn] Remove extractor (#15344, #20324)
+* [npr] Fix extraction (#10793, #13440)
+* [biqle] Fix extraction (#11471, #15313)
+* [viddler] Modernize
+* [moevideo] Fix extraction
+* [primesharetv] Remove extractor
+* [hypem] Modernize and extract more metadata (#15320)
+* [veoh] Fix extraction
+* [escapist] Modernize
+- [videomega] Remove extractor (#10108)
++ [beeg] Add support for beeg.porn (#20306)
+* [vimeo:review] Improve config url extraction and extract original format
+ (#20305)
+* [fox] Detect geo restriction and authentication errors (#20208)
+
+
+version 2019.03.09
+
+Core
+* [extractor/common] Use compat_etree_Element
++ [compat] Introduce compat_etree_Element
+* [extractor/common] Fallback url to base URL for DASH formats
+* [extractor/common] Do not fail on invalid data while parsing F4M manifest
+ in non fatal mode
+* [extractor/common] Return MPD manifest as format's url meta field (#20242)
+* [utils] Strip #HttpOnly_ prefix from cookies files (#20219)
+
+Extractors
+* [francetv:site] Relax video id regular expression (#20268)
+* [toutv] Detect invalid login error
+* [toutv] Fix authentication (#20261)
++ [urplay] Extract timestamp (#20235)
++ [openload] Add support for oload.space (#20246)
+* [facebook] Improve uploader extraction (#20250)
+* [bbc] Use compat_etree_Element
+* [crunchyroll] Use compat_etree_Element
+* [npo] Improve ISM extraction
+* [rai] Improve extraction (#20253)
+* [paramountnetwork] Fix mgid extraction (#20241)
+* [libsyn] Improve extraction (#20229)
++ [youtube] Add more invidious instances to URL regular expression (#20228)
+* [spankbang] Fix extraction (#20023)
+* [espn] Extend URL regular expression (#20013)
+* [sixplay] Handle videos with empty assets (#20016)
++ [vimeo] Add support for Vimeo Pro portfolio protected videos (#20070)
+
+
+version 2019.03.01
+
+Core
++ [downloader/external] Add support for rate limit and retries for wget
+* [downloader/external] Fix infinite retries for curl (#19303)
+
+Extractors
+* [npo] Fix extraction (#20084)
+* [francetv:site] Extend video id regex (#20029, #20071)
++ [periscope] Extract width and height (#20015)
+* [servus] Fix extraction (#19297)
+* [bbccouk] Make subtitles non fatal (#19651)
+* [metacafe] Fix family filter bypass (#19287)
+
+
+version 2019.02.18
+
+Extractors
+* [tvp:website] Fix and improve extraction
++ [tvp] Detect unavailable videos
+* [tvp] Fix description extraction and make thumbnail optional
++ [linuxacademy] Add support for linuxacademy.com (#12207)
+* [bilibili] Update keys (#19233)
+* [udemy] Extend URL regular expressions (#14330, #15883)
+* [udemy] Update User-Agent and detect captcha (#14713, #15839, #18126)
+* [noovo] Fix extraction (#19230)
+* [rai] Relax URL regular expression (#19232)
++ [vshare] Pass Referer to download request (#19205, #19221)
++ [openload] Add support for oload.live (#19222)
+* [imgur] Use video id as title fallback (#18590)
++ [twitch] Add new source format detection approach (#19193)
+* [tvplayhome] Fix video id extraction (#19190)
+* [tvplayhome] Fix episode metadata extraction (#19190)
+* [rutube:embed] Fix extraction (#19163)
++ [rutube:embed] Add support private videos (#19163)
++ [soundcloud] Extract more metadata
++ [trunews] Add support for trunews.com (#19153)
++ [linkedin:learning] Extract chapter_number and chapter_id (#19162)
+
+
+version 2019.02.08
+
+Core
+* [utils] Improve JSON-LD regular expression (#18058)
+* [YoutubeDL] Fallback to ie_key of matching extractor while making
+ download archive id when no explicit ie_key is provided (#19022)
+
+Extractors
++ [malltv] Add support for mall.tv (#18058, #17856)
++ [spankbang:playlist] Add support for playlists (#19145)
+* [spankbang] Extend URL regular expression
+* [trutv] Fix extraction (#17336)
+* [toutv] Fix authentication (#16398, #18700)
+* [pornhub] Fix tags and categories extraction (#13720, #19135)
+* [pornhd] Fix formats extraction
++ [pornhd] Extract like count (#19123, #19125)
+* [radiocanada] Switch to the new media requests (#19115)
++ [teachable] Add support for courses.workitdaily.com (#18871)
+- [vporn] Remove extractor (#16276)
++ [soundcloud:pagedplaylist] Add ie and title to entries (#19022, #19086)
++ [drtuber] Extract duration (#19078)
+* [soundcloud] Fix paged playlists extraction, add support for albums and update client id
+* [soundcloud] Update client id
+* [drtv] Improve preference (#19079)
++ [openload] Add support for openload.pw and oload.pw (#18930)
++ [openload] Add support for oload.info (#19073)
+* [crackle] Authorize media detail request (#16931)
+
+
+version 2019.01.30.1
+
+Core
+* [postprocessor/ffmpeg] Fix avconv processing broken in #19025 (#19067)
+
+
+version 2019.01.30
+
+Core
+* [postprocessor/ffmpeg] Do not copy Apple TV chapter tracks while embedding
+ subtitles (#19024, #19042)
+* [postprocessor/ffmpeg] Disable "Last message repeated" messages (#19025)
+
+Extractors
+* [yourporn] Fix extraction and extract duration (#18815, #18852, #19061)
+* [drtv] Improve extraction (#19039)
+ + Add support for EncryptedUri videos
+ + Extract more metadata
+ * Fix subtitles extraction
++ [fox] Add support for locked videos using cookies (#19060)
+* [fox] Fix extraction for free videos (#19060)
++ [zattoo] Add support for tv.salt.ch (#19059)
+
+
+version 2019.01.27
+
+Core
++ [extractor/common] Extract season in _json_ld
+* [postprocessor/ffmpeg] Fallback to ffmpeg/avconv for audio codec detection
+ (#681)
+
+Extractors
+* [vice] Fix extraction for locked videos (#16248)
++ [wakanim] Detect DRM protected videos
++ [wakanim] Add support for wakanim.tv (#14374)
+* [usatoday] Fix extraction for videos with custom brightcove partner id
+ (#18990)
+* [drtv] Fix extraction (#18989)
+* [nhk] Extend URL regular expression (#18968)
+* [go] Fix Adobe Pass requests for Disney Now (#18901)
++ [openload] Add support for oload.club (#18969)
+
+
+version 2019.01.24
+
+Core
+* [YoutubeDL] Fix negation for string operators in format selection (#18961)
+
+
+version 2019.01.23
+
+Core
+* [utils] Fix urljoin for paths with non-http(s) schemes
+* [extractor/common] Improve jwplayer relative URL handling (#18892)
++ [YoutubeDL] Add negation support for string comparisons in format selection
+ expressions (#18600, #18805)
+* [extractor/common] Improve HLS video-only format detection (#18923)
+
+Extractors
+* [crunchyroll] Extend URL regular expression (#18955)
+* [pornhub] Bypass scrape detection (#4822, #5930, #7074, #10175, #12722,
+ #17197, #18338 #18842, #18899)
++ [vrv] Add support for authentication (#14307)
+* [videomore:season] Fix extraction
+* [videomore] Improve extraction (#18908)
++ [tnaflix] Pass Referer in metadata request (#18925)
+* [radiocanada] Relax DRM check (#18608, #18609)
+* [vimeo] Fix video password verification for videos protected by
+ Referer HTTP header
++ [hketv] Add support for hkedcity.net (#18696)
++ [streamango] Add support for fruithosts.net (#18710)
++ [instagram] Add support for tags (#18757)
++ [odnoklassniki] Detect paid videos (#18876)
+* [ted] Correct acodec for HTTP formats (#18923)
+* [cartoonnetwork] Fix extraction (#15664, #17224)
+* [vimeo] Fix extraction for password protected player URLs (#18889)
+
+
version 2019.01.17
Extractors
all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites
clean:
- rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish youtube_dl/extractor/lazy_extractors.py *.dump *.part* *.ytdl *.info.json *.mp4 *.m4a *.flv *.mp3 *.avi *.mkv *.webm *.3gp *.wav *.ape *.swf *.jpg *.png CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe
+ rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish youtube_dl/extractor/lazy_extractors.py *.dump *.part* *.ytdl *.info.json *.mp4 *.m4a *.flv *.mp3 *.avi *.mkv *.webm *.3gp *.wav *.ape *.swf *.jpg *.png CONTRIBUTING.md.tmp youtube-dl youtube-dl.exe
find . -name "*.pyc" -delete
find . -name "*.class" -delete
CONTRIBUTING.md: README.md
$(PYTHON) devscripts/make_contributing.py README.md CONTRIBUTING.md
-.github/ISSUE_TEMPLATE.md: devscripts/make_issue_template.py .github/ISSUE_TEMPLATE_tmpl.md youtube_dl/version.py
- $(PYTHON) devscripts/make_issue_template.py .github/ISSUE_TEMPLATE_tmpl.md .github/ISSUE_TEMPLATE.md
+issuetemplates: devscripts/make_issue_template.py .github/ISSUE_TEMPLATE_tmpl/1_broken_site.md .github/ISSUE_TEMPLATE_tmpl/2_site_support_request.md .github/ISSUE_TEMPLATE_tmpl/3_site_feature_request.md .github/ISSUE_TEMPLATE_tmpl/4_bug_report.md .github/ISSUE_TEMPLATE_tmpl/5_feature_request.md youtube_dl/version.py
+ $(PYTHON) devscripts/make_issue_template.py .github/ISSUE_TEMPLATE_tmpl/1_broken_site.md .github/ISSUE_TEMPLATE/1_broken_site.md
+ $(PYTHON) devscripts/make_issue_template.py .github/ISSUE_TEMPLATE_tmpl/2_site_support_request.md .github/ISSUE_TEMPLATE/2_site_support_request.md
+ $(PYTHON) devscripts/make_issue_template.py .github/ISSUE_TEMPLATE_tmpl/3_site_feature_request.md .github/ISSUE_TEMPLATE/3_site_feature_request.md
+ $(PYTHON) devscripts/make_issue_template.py .github/ISSUE_TEMPLATE_tmpl/4_bug_report.md .github/ISSUE_TEMPLATE/4_bug_report.md
+ $(PYTHON) devscripts/make_issue_template.py .github/ISSUE_TEMPLATE_tmpl/5_feature_request.md .github/ISSUE_TEMPLATE/5_feature_request.md
supportedsites:
$(PYTHON) devscripts/make_supportedsites.py docs/supportedsites.md
-[![Build Status](https://travis-ci.org/rg3/youtube-dl.svg?branch=master)](https://travis-ci.org/rg3/youtube-dl)
+[![Build Status](https://travis-ci.org/ytdl-org/youtube-dl.svg?branch=master)](https://travis-ci.org/ytdl-org/youtube-dl)
youtube-dl - download videos from youtube.com or other video platforms
sudo port install youtube-dl
-Alternatively, refer to the [developer instructions](#developer-instructions) for how to check out and work with the git repository. For further options, including PGP signatures, see the [youtube-dl Download Page](https://rg3.github.io/youtube-dl/download.html).
+Alternatively, refer to the [developer instructions](#developer-instructions) for how to check out and work with the git repository. For further options, including PGP signatures, see the [youtube-dl Download Page](https://ytdl-org.github.io/youtube-dl/download.html).
# DESCRIPTION
**youtube-dl** is a command-line program to download videos from YouTube.com and a few more sites. It requires the Python interpreter, version 2.6, 2.7, or 3.2+, and it is not platform specific. It should work on your Unix box, on Windows or on macOS. It is released to the public domain, which means you can modify it, redistribute it or use it however you like.
You can also use a file extension (currently `3gp`, `aac`, `flv`, `m4a`, `mp3`, `mp4`, `ogg`, `wav`, `webm` are supported) to download the best quality format of a particular file extension served as a single file, e.g. `-f webm` will download the best quality format with the `webm` extension served as a single file.
You can also use special names to select particular edge case formats:
+
- `best`: Select the best quality format represented by a single file with video and audio.
- `worst`: Select the worst quality format represented by a single file with video and audio.
- `bestvideo`: Select the best quality video-only format (e.g. DASH video). May not be available.
You can also filter the video formats by putting a condition in brackets, as in `-f "best[height=720]"` (or `-f "[filesize>10M]"`).
The following numeric meta fields can be used with comparisons `<`, `<=`, `>`, `>=`, `=` (equals), `!=` (not equals):
+
- `filesize`: The number of bytes, if known in advance
- `width`: Width of the video, if known
- `height`: Height of the video, if known
- `asr`: Audio sampling rate in Hertz
- `fps`: Frame rate
-Also filtering work for comparisons `=` (equals), `!=` (not equals), `^=` (begins with), `$=` (ends with), `*=` (contains) and following string meta fields:
+Also filtering work for comparisons `=` (equals), `^=` (starts with), `$=` (ends with), `*=` (contains) and following string meta fields:
+
- `ext`: File extension
- `acodec`: Name of the audio codec in use
- `vcodec`: Name of the video codec in use
- `protocol`: The protocol that will be used for the actual download, lower-case (`http`, `https`, `rtsp`, `rtmp`, `rtmpe`, `mms`, `f4m`, `ism`, `http_dash_segments`, `m3u8`, or `m3u8_native`)
- `format_id`: A short description of the format
+Any string comparison may be prefixed with negation `!` in order to produce an opposite comparison, e.g. `!*=` (does not contain).
+
Note that none of the aforementioned meta fields are guaranteed to be present since this solely depends on the metadata obtained by particular extractor, i.e. the metadata offered by the video hoster.
Formats for which the value is not known are excluded unless you put a question mark (`?`) after the operator. You can combine format filters, so `-f "[height <=? 720][tbr>500]"` selects up to 720p videos (or videos where the height is not known) with a bitrate of at least 500 KBit/s.
Format selectors can also be grouped using parentheses, for example if you want to download the best mp4 and webm formats with a height lower than 480 you can use `-f '(mp4,webm)[height<480]'`.
-Since the end of April 2015 and version 2015.04.26, youtube-dl uses `-f bestvideo+bestaudio/best` as the default format selection (see [#5447](https://github.com/rg3/youtube-dl/issues/5447), [#5456](https://github.com/rg3/youtube-dl/issues/5456)). If ffmpeg or avconv are installed this results in downloading `bestvideo` and `bestaudio` separately and muxing them together into a single file giving the best overall quality available. Otherwise it falls back to `best` and results in downloading the best available quality served as a single file. `best` is also needed for videos that don't come from YouTube because they don't provide the audio and video in two different files. If you want to only download some DASH formats (for example if you are not interested in getting videos with a resolution higher than 1080p), you can add `-f bestvideo[height<=?1080]+bestaudio/best` to your configuration file. Note that if you use youtube-dl to stream to `stdout` (and most likely to pipe it to your media player then), i.e. you explicitly specify output template as `-o -`, youtube-dl still uses `-f best` format selection in order to start content delivery immediately to your player and not to wait until `bestvideo` and `bestaudio` are downloaded and muxed.
+Since the end of April 2015 and version 2015.04.26, youtube-dl uses `-f bestvideo+bestaudio/best` as the default format selection (see [#5447](https://github.com/ytdl-org/youtube-dl/issues/5447), [#5456](https://github.com/ytdl-org/youtube-dl/issues/5456)). If ffmpeg or avconv are installed this results in downloading `bestvideo` and `bestaudio` separately and muxing them together into a single file giving the best overall quality available. Otherwise it falls back to `best` and results in downloading the best available quality served as a single file. `best` is also needed for videos that don't come from YouTube because they don't provide the audio and video in two different files. If you want to only download some DASH formats (for example if you are not interested in getting videos with a resolution higher than 1080p), you can add `-f bestvideo[height<=?1080]+bestaudio/best` to your configuration file. Note that if you use youtube-dl to stream to `stdout` (and most likely to pipe it to your media player then), i.e. you explicitly specify output template as `-o -`, youtube-dl still uses `-f best` format selection in order to start content delivery immediately to your player and not to wait until `bestvideo` and `bestaudio` are downloaded and muxed.
If you want to preserve the old format selection behavior (prior to youtube-dl 2015.04.26), i.e. you want to download the best available quality media served as a single file, you should explicitly specify your choice with `-f best`. You may want to add it to the [configuration file](#configuration) in order not to type it every time you run youtube-dl.
# Download best mp4 format available or any other best if no mp4 available
$ youtube-dl -f 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best'
-# Download best format available but not better that 480p
+# Download best format available but no better than 480p
$ youtube-dl -f 'bestvideo[height<=480]+bestaudio/best[height<=480]'
# Download best video only format but no bigger than 50 MB
### How do I update youtube-dl?
-If you've followed [our manual installation instructions](https://rg3.github.io/youtube-dl/download.html), you can simply run `youtube-dl -U` (or, on Linux, `sudo youtube-dl -U`).
+If you've followed [our manual installation instructions](https://ytdl-org.github.io/youtube-dl/download.html), you can simply run `youtube-dl -U` (or, on Linux, `sudo youtube-dl -U`).
If you have used pip, a simple `sudo pip install -U youtube-dl` is sufficient to update.
sudo apt-get remove -y youtube-dl
-Afterwards, simply follow [our manual installation instructions](https://rg3.github.io/youtube-dl/download.html):
+Afterwards, simply follow [our manual installation instructions](https://ytdl-org.github.io/youtube-dl/download.html):
```
sudo wget https://yt-dl.org/latest/youtube-dl -O /usr/local/bin/youtube-dl
### I get HTTP error 402 when trying to download a video. What's this?
-Apparently YouTube requires you to pass a CAPTCHA test if you download too much. We're [considering to provide a way to let you solve the CAPTCHA](https://github.com/rg3/youtube-dl/issues/154), but at the moment, your best course of action is pointing a web browser to the youtube URL, solving the CAPTCHA, and restart youtube-dl.
+Apparently YouTube requires you to pass a CAPTCHA test if you download too much. We're [considering to provide a way to let you solve the CAPTCHA](https://github.com/ytdl-org/youtube-dl/issues/154), but at the moment, your best course of action is pointing a web browser to the youtube URL, solving the CAPTCHA, and restart youtube-dl.
### Do I need any other programs?
### What is this binary file? Where has the code gone?
-Since June 2012 ([#342](https://github.com/rg3/youtube-dl/issues/342)) youtube-dl is packed as an executable zipfile, simply unzip it (might need renaming to `youtube-dl.zip` first on some systems) or clone the git repository, as laid out above. If you modify the code, you can run it by executing the `__main__.py` file. To recompile the executable, run `make youtube-dl`.
+Since June 2012 ([#342](https://github.com/ytdl-org/youtube-dl/issues/342)) youtube-dl is packed as an executable zipfile, simply unzip it (might need renaming to `youtube-dl.zip` first on some systems) or clone the git repository, as laid out above. If you modify the code, you can run it by executing the `__main__.py` file. To recompile the executable, run `make youtube-dl`.
### The exe throws an error due to missing `MSVCR100.dll`
When youtube-dl knows that one particular downloader works better for a given website, that downloader will be picked. Otherwise, youtube-dl will pick the best downloader for general compatibility, which at the moment happens to be ffmpeg. This choice may change in future versions of youtube-dl, with improvements of the built-in downloader and/or ffmpeg.
-In particular, the generic extractor (used when your website is not in the [list of supported sites by youtube-dl](https://rg3.github.io/youtube-dl/supportedsites.html) cannot mandate one specific downloader.
+In particular, the generic extractor (used when your website is not in the [list of supported sites by youtube-dl](https://ytdl-org.github.io/youtube-dl/supportedsites.html) cannot mandate one specific downloader.
If you put either `--hls-prefer-native` or `--hls-prefer-ffmpeg` into your configuration, a different subset of videos will fail to download correctly. Instead, it is much better to [file an issue](https://yt-dl.org/bug) or a pull request which details why the native or the ffmpeg HLS downloader is a better choice for your use case.
# DEVELOPER INSTRUCTIONS
-Most users do not need to build youtube-dl and can [download the builds](https://rg3.github.io/youtube-dl/download.html) or get them from their distribution.
+Most users do not need to build youtube-dl and can [download the builds](https://ytdl-org.github.io/youtube-dl/download.html) or get them from their distribution.
To run youtube-dl as a developer, you don't need to build anything either. Simply execute
After you have ensured this site is distributing its content legally, you can follow this quick list (assuming your service is called `yourextractor`):
-1. [Fork this repository](https://github.com/rg3/youtube-dl/fork)
+1. [Fork this repository](https://github.com/ytdl-org/youtube-dl/fork)
2. Check out the source code with:
git clone git@github.com:YOUR_GITHUB_USERNAME/youtube-dl.git
# TODO more properties (see youtube_dl/extractor/common.py)
}
```
-5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/extractors.py).
+5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/extractors.py).
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note that tests with `only_matching` key in test's dict are not counted in.
-7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303). Add tests and code for as many as you want.
+7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/ytdl-org/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303). Add tests and code for as many as you want.
8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](http://flake8.pycqa.org/en/latest/index.html#quickstart):
$ flake8 youtube_dl/extractor/yourextractor.py
### Mandatory and optional metafields
-For extraction to work youtube-dl relies on metadata your extractor extracts and provides to youtube-dl expressed by an [information dictionary](https://github.com/rg3/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303) or simply *info dict*. Only the following meta fields in the *info dict* are considered mandatory for a successful extraction process by youtube-dl:
+For extraction to work youtube-dl relies on metadata your extractor extracts and provides to youtube-dl expressed by an [information dictionary](https://github.com/ytdl-org/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303) or simply *info dict*. Only the following meta fields in the *info dict* are considered mandatory for a successful extraction process by youtube-dl:
- `id` (media identifier)
- `title` (media title)
In fact only the last option is technically mandatory (i.e. if you can't figure out the download location of the media the extraction does not make any sense). But by convention youtube-dl also treats `id` and `title` as mandatory. Thus the aforementioned metafields are the critical data that the extraction does not make any sense without and if any of them fail to be extracted then the extractor is considered completely broken.
-[Any field](https://github.com/rg3/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L188-L303) apart from the aforementioned ones are considered **optional**. That means that extraction should be **tolerant** to situations when sources for these fields can potentially be unavailable (even if they are always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields.
+[Any field](https://github.com/ytdl-org/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L188-L303) apart from the aforementioned ones are considered **optional**. That means that extraction should be **tolerant** to situations when sources for these fields can potentially be unavailable (even if they are always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields.
#### Example
'PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4'
```
-### Use safe conversion functions
+### Use convenience conversion and parsing functions
-Wrap all extracted numeric data into safe functions from [`youtube_dl/utils.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/utils.py): `int_or_none`, `float_or_none`. Use them for string to number conversions as well.
+Wrap all extracted numeric data into safe functions from [`youtube_dl/utils.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/utils.py): `int_or_none`, `float_or_none`. Use them for string to number conversions as well.
Use `url_or_none` for safe URL processing.
Use `try_get` for safe metadata extraction from parsed JSON.
-Explore [`youtube_dl/utils.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/utils.py) for more useful convenience functions.
+Use `unified_strdate` for uniform `upload_date` or any `YYYYMMDD` meta field extraction, `unified_timestamp` for uniform `timestamp` extraction, `parse_filesize` for `filesize` extraction, `parse_count` for count meta fields extraction, `parse_resolution`, `parse_duration` for `duration` extraction, `parse_age_limit` for `age_limit` extraction.
+
+Explore [`youtube_dl/utils.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/utils.py) for more useful convenience functions.
#### More examples
# EMBEDDING YOUTUBE-DL
-youtube-dl makes the best effort to be a good command-line program, and thus should be callable from any programming language. If you encounter any problems parsing its output, feel free to [create a report](https://github.com/rg3/youtube-dl/issues/new).
+youtube-dl makes the best effort to be a good command-line program, and thus should be callable from any programming language. If you encounter any problems parsing its output, feel free to [create a report](https://github.com/ytdl-org/youtube-dl/issues/new).
From a Python program, you can embed youtube-dl in a more powerful fashion, like this:
ydl.download(['https://www.youtube.com/watch?v=BaW_jenozKc'])
```
-Most likely, you'll want to use various options. For a list of options available, have a look at [`youtube_dl/YoutubeDL.py`](https://github.com/rg3/youtube-dl/blob/3e4cedf9e8cd3157df2457df7274d0c842421945/youtube_dl/YoutubeDL.py#L137-L312). For a start, if you want to intercept youtube-dl's output, set a `logger` object.
+Most likely, you'll want to use various options. For a list of options available, have a look at [`youtube_dl/YoutubeDL.py`](https://github.com/ytdl-org/youtube-dl/blob/3e4cedf9e8cd3157df2457df7274d0c842421945/youtube_dl/YoutubeDL.py#L137-L312). For a start, if you want to intercept youtube-dl's output, set a `logger` object.
Here's a more complete example of a program that outputs only errors (and a short message after the download is finished), and downloads/converts the video to an mp3 file:
# BUGS
-Bugs and suggestions should be reported at: <https://github.com/rg3/youtube-dl/issues>. Unless you were prompted to or there is another pertinent reason (e.g. GitHub fails to accept the bug report), please do not send bug reports via personal email. For discussions, join us in the IRC channel [#youtube-dl](irc://chat.freenode.net/#youtube-dl) on freenode ([webchat](https://webchat.freenode.net/?randomnick=1&channels=youtube-dl)).
+Bugs and suggestions should be reported at: <https://github.com/ytdl-org/youtube-dl/issues>. Unless you were prompted to or there is another pertinent reason (e.g. GitHub fails to accept the bug report), please do not send bug reports via personal email. For discussions, join us in the IRC channel [#youtube-dl](irc://chat.freenode.net/#youtube-dl) on freenode ([webchat](https://webchat.freenode.net/?randomnick=1&channels=youtube-dl)).
**Please include the full output of youtube-dl when run with `-v`**, i.e. **add** `-v` flag to **your command line**, copy the **whole** output and post it in the issue body wrapped in \`\`\` for better formatting. It should look similar to this:
```
### Is the issue already documented?
-Make sure that someone has not already opened the issue you're trying to open. Search at the top of the window or browse the [GitHub Issues](https://github.com/rg3/youtube-dl/search?type=Issues) of this repository. If there is an issue, feel free to write something along the lines of "This affects me as well, with version 2015.01.01. Here is some more information on the issue: ...". While some issues may be old, a new post into them often spurs rapid activity.
+Make sure that someone has not already opened the issue you're trying to open. Search at the top of the window or browse the [GitHub Issues](https://github.com/ytdl-org/youtube-dl/search?type=Issues) of this repository. If there is an issue, feel free to write something along the lines of "This affects me as well, with version 2015.01.01. Here is some more information on the issue: ...". While some issues may be old, a new post into them often spurs rapid activity.
### Why are existing options not enough?
-Before requesting a new feature, please have a quick peek at [the list of supported options](https://github.com/rg3/youtube-dl/blob/master/README.md#options). Many feature requests are for features that actually exist already! Please, absolutely do show off your work in the issue report and detail how the existing similar options do *not* solve your problem.
+Before requesting a new feature, please have a quick peek at [the list of supported options](https://github.com/ytdl-org/youtube-dl/blob/master/README.md#options). Many feature requests are for features that actually exist already! Please, absolutely do show off your work in the issue report and detail how the existing similar options do *not* solve your problem.
### Is there enough context in your bug report?
will download the best quality format with the webm extension served as
a single file.
-You can also use special names to select particular edge case formats: -
-best: Select the best quality format represented by a single file with
-video and audio. - worst: Select the worst quality format represented by
-a single file with video and audio. - bestvideo: Select the best quality
-video-only format (e.g. DASH video). May not be available. - worstvideo:
-Select the worst quality video-only format. May not be available. -
-bestaudio: Select the best quality audio only-format. May not be
-available. - worstaudio: Select the worst quality audio only-format. May
-not be available.
+You can also use special names to select particular edge case formats:
+
+- best: Select the best quality format represented by a single file
+ with video and audio.
+- worst: Select the worst quality format represented by a single file
+ with video and audio.
+- bestvideo: Select the best quality video-only format (e.g. DASH
+ video). May not be available.
+- worstvideo: Select the worst quality video-only format. May not be
+ available.
+- bestaudio: Select the best quality audio only-format. May not be
+ available.
+- worstaudio: Select the worst quality audio only-format. May not be
+ available.
For example, to download the worst quality video-only format you can use
-f worstvideo.
brackets, as in -f "best[height=720]" (or -f "[filesize>10M]").
The following numeric meta fields can be used with comparisons <, <=, >,
->=, = (equals), != (not equals): - filesize: The number of bytes, if
-known in advance - width: Width of the video, if known - height: Height
-of the video, if known - tbr: Average bitrate of audio and video in
-KBit/s - abr: Average audio bitrate in KBit/s - vbr: Average video
-bitrate in KBit/s - asr: Audio sampling rate in Hertz - fps: Frame rate
-
-Also filtering work for comparisons = (equals), != (not equals), ^=
-(begins with), $= (ends with), *= (contains) and following string meta
-fields: - ext: File extension - acodec: Name of the audio codec in use -
-vcodec: Name of the video codec in use - container: Name of the
-container format - protocol: The protocol that will be used for the
-actual download, lower-case (http, https, rtsp, rtmp, rtmpe, mms, f4m,
-ism, http_dash_segments, m3u8, or m3u8_native) - format_id: A short
-description of the format
+>=, = (equals), != (not equals):
+
+- filesize: The number of bytes, if known in advance
+- width: Width of the video, if known
+- height: Height of the video, if known
+- tbr: Average bitrate of audio and video in KBit/s
+- abr: Average audio bitrate in KBit/s
+- vbr: Average video bitrate in KBit/s
+- asr: Audio sampling rate in Hertz
+- fps: Frame rate
+
+Also filtering work for comparisons = (equals), ^= (starts with), $=
+(ends with), *= (contains) and following string meta fields:
+
+- ext: File extension
+- acodec: Name of the audio codec in use
+- vcodec: Name of the video codec in use
+- container: Name of the container format
+- protocol: The protocol that will be used for the actual download,
+ lower-case (http, https, rtsp, rtmp, rtmpe, mms, f4m, ism,
+ http_dash_segments, m3u8, or m3u8_native)
+- format_id: A short description of the format
+
+Any string comparison may be prefixed with negation ! in order to
+produce an opposite comparison, e.g. !*= (does not contain).
Note that none of the aforementioned meta fields are guaranteed to be
present since this solely depends on the metadata obtained by particular
# Download best mp4 format available or any other best if no mp4 available
$ youtube-dl -f 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best'
- # Download best format available but not better that 480p
+ # Download best format available but no better than 480p
$ youtube-dl -f 'bestvideo[height<=480]+bestaudio/best[height<=480]'
# Download best video only format but no bigger than 50 MB
'https://www.youtube.com/watch?v=FqZTN594JQw&list='
'PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4'
-Use safe conversion functions
+Use convenience conversion and parsing functions
Wrap all extracted numeric data into safe functions from
youtube_dl/utils.py: int_or_none, float_or_none. Use them for string to
Use try_get for safe metadata extraction from parsed JSON.
+Use unified_strdate for uniform upload_date or any YYYYMMDD meta field
+extraction, unified_timestamp for uniform timestamp extraction,
+parse_filesize for filesize extraction, parse_count for count meta
+fields extraction, parse_resolution, parse_duration for duration
+extraction, parse_age_limit for age_limit extraction.
+
Explore youtube_dl/utils.py for more useful convenience functions.
More examples
Bugs and suggestions should be reported at:
-https://github.com/rg3/youtube-dl/issues. Unless you were prompted to or
-there is another pertinent reason (e.g. GitHub fails to accept the bug
-report), please do not send bug reports via personal email. For
+https://github.com/ytdl-org/youtube-dl/issues. Unless you were prompted
+to or there is another pertinent reason (e.g. GitHub fails to accept the
+bug report), please do not send bug reports via personal email. For
discussions, join us in the IRC channel #youtube-dl on freenode
(webchat).
class YoutubeDLBuilder(object):
- authorizedUsers = ['fraca7', 'phihag', 'rg3', 'FiloSottile']
+ authorizedUsers = ['fraca7', 'phihag', 'rg3', 'FiloSottile', 'ytdl-org']
def __init__(self, **kwargs):
if self.repoName != 'youtube-dl':
RESULT = ('.' + domain + '\n' in LIST or '\n' + domain + '\n' in LIST)
- if RESULT and ('info_dict' not in test or 'age_limit' not in test['info_dict'] or
- test['info_dict']['age_limit'] != 18):
+ if RESULT and ('info_dict' not in test or 'age_limit' not in test['info_dict']
+ or test['info_dict']['age_limit'] != 18):
print('\nPotential missing age_limit check: {0}'.format(test['name']))
- elif not RESULT and ('info_dict' in test and 'age_limit' in test['info_dict'] and
- test['info_dict']['age_limit'] == 18):
+ elif not RESULT and ('info_dict' in test and 'age_limit' in test['info_dict']
+ and test['info_dict']['age_limit'] == 18):
print('\nPotential false negative: {0}'.format(test['name']))
else:
class GitHubReleaser(object):
- _API_URL = 'https://api.github.com/repos/rg3/youtube-dl/releases'
- _UPLOADS_URL = 'https://uploads.github.com/repos/rg3/youtube-dl/releases/%s/assets?name=%s'
+ _API_URL = 'https://api.github.com/repos/ytdl-org/youtube-dl/releases'
+ _UPLOADS_URL = 'https://uploads.github.com/repos/ytdl-org/youtube-dl/releases/%s/assets?name=%s'
_NETRC_MACHINE = 'github.com'
def __init__(self, debuglevel=0):
atom_template = textwrap.dedent("""\
<?xml version="1.0" encoding="utf-8"?>
<feed xmlns="http://www.w3.org/2005/Atom">
- <link rel="self" href="http://rg3.github.io/youtube-dl/update/releases.atom" />
+ <link rel="self" href="http://ytdl-org.github.io/youtube-dl/update/releases.atom" />
<title>youtube-dl releases</title>
<id>https://yt-dl.org/feed/youtube-dl-updates-feed</id>
<updated>@TIMESTAMP@</updated>
<entry>
<id>https://yt-dl.org/feed/youtube-dl-updates-feed/youtube-dl-@VERSION@</id>
<title>New version @VERSION@</title>
- <link href="http://rg3.github.io/youtube-dl" />
+ <link href="http://ytdl-org.github.io/youtube-dl" />
<content type="xhtml">
<div xmlns="http://www.w3.org/1999/xhtml">
Downloads available at <a href="https://yt-dl.org/downloads/@VERSION@/">https://yt-dl.org/downloads/@VERSION@/</a>
sed -i "s/<unreleased>/$version/" ChangeLog
/bin/echo -e "\n### Committing documentation, templates and youtube_dl/version.py..."
-make README.md CONTRIBUTING.md .github/ISSUE_TEMPLATE.md supportedsites
-git add README.md CONTRIBUTING.md .github/ISSUE_TEMPLATE.md docs/supportedsites.md youtube_dl/version.py ChangeLog
+make README.md CONTRIBUTING.md issuetemplates supportedsites
+git add README.md CONTRIBUTING.md .github/ISSUE_TEMPLATE/1_broken_site.md .github/ISSUE_TEMPLATE/2_site_support_request.md .github/ISSUE_TEMPLATE/3_site_feature_request.md .github/ISSUE_TEMPLATE/4_bug_report.md .github/ISSUE_TEMPLATE/5_feature_request.md .github/ISSUE_TEMPLATE/6_question.md docs/supportedsites.md youtube_dl/version.py ChangeLog
git commit $gpg_sign_commits -m "release $version"
/bin/echo -e "\n### Now tagging, signing and pushing..."
REV=$(git rev-parse HEAD)
make youtube-dl youtube-dl.tar.gz
read -p "VM running? (y/n) " -n 1
-wget "http://$buildserver/build/rg3/youtube-dl/youtube-dl.exe?rev=$REV" -O youtube-dl.exe
+wget "http://$buildserver/build/ytdl-org/youtube-dl/youtube-dl.exe?rev=$REV" -O youtube-dl.exe
mkdir -p "build/$version"
mv youtube-dl youtube-dl.exe "build/$version"
mv youtube-dl.tar.gz "build/$version/youtube-dl-$version.tar.gz"
for page in itertools.count(1):
releases = json.loads(compat_urllib_request.urlopen(
- 'https://api.github.com/repos/rg3/youtube-dl/releases?page=%s' % page
+ 'https://api.github.com/repos/ytdl-org/youtube-dl/releases?page=%s' % page
).read().decode('utf-8'))
if not releases:
- **acast:channel**
- **AddAnime**
- **ADN**: Anime Digital Network
+ - **AdobeConnect**
- **AdobeTV**
- **AdobeTVChannel**
- **AdobeTVShow**
- **AmericasTestKitchen**
- **anderetijden**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
- **AnimeOnDemand**
- - **anitube.se**
- **Anvato**
- - **AnySex**
+ - **aol.com**
- **APA**
- **Aparat**
- **AppleConnect**
- **AudioBoom**
- **audiomack**
- **audiomack:album**
- - **auroravid**: AuroraVid
- **AWAAN**
- **awaan:live**
- **awaan:season**
- **Bellator**
- **BellMedia**
- **Bet**
+ - **bfi:player**
- **Bigflix**
- **Bild**: Bild.de
- **BiliBili**
- **CBSInteractive**
- **CBSLocal**
- **cbsnews**: CBS News
+ - **cbsnews:embed**
- **cbsnews:livevideo**: CBS News Live Videos
- **CBSSports**
- **CCMA**
- **chirbit**
- **chirbit:profile**
- **Cinchcast**
+ - **Cinemax**
- **CiscoLiveSearch**
- **CiscoLiveSession**
- **CJSW**
- **Clipsyndicate**
- **CloserToTruth**
- **CloudflareStream**
- - **cloudtime**: CloudTime
- **Cloudy**
- **Clubic**
- **Clyp**
- **Coub**
- **Cracked**
- **Crackle**
- - **Criterion**
- **CrooksAndLiars**
- **crunchyroll**
- **crunchyroll:playlist**
- **CSpan**: C-SPAN
- **CtsNews**: 華視新聞
- **CTVNews**
+ - **cu.ntv.co.jp**: Nippon Television Network
- **Culturebox**
- **CultureUnplugged**
- **curiositystream**
- **DouyuTV**: 斗鱼
- **DPlay**
- **DPlayIt**
- - **dramafever**
- - **dramafever:series**
- **DRBonanza**
- **Dropbox**
- **DrTuber**
- **Groupon**
- **Hark**
- **hbo**
- - **hbo:episode**
- **HearThisAt**
- **Heise**
- **HellPorno**
- **hitbox**
- **hitbox:live**
- **HitRecord**
+ - **hketv**: 香港教育局教育電視 (HKETV) Educational Television, Hong Kong Educational Bureau
- **HornBunny**
- **HotNewHipHop**
- **hotstar**
- **IndavideoEmbed**
- **InfoQ**
- **Instagram**
+ - **instagram:tag**: Instagram hashtag search
- **instagram:user**: Instagram user profile
- **Internazionale**
- **InternetVideoArchive**
- **LineTV**
- **linkedin:learning**
- **linkedin:learning:course**
+ - **LinuxAcademy**
- **LiTV**
- **LiveLeak**
- **LiveLeakEmbed**
- **mailru:music**: Музыка@Mail.Ru
- **mailru:music:search**: Музыка@Mail.Ru
- **MakerTV**
+ - **MallTV**
- **mangomolo:live**
- **mangomolo:video**
- **ManyVids**
- **MatchTV**
- **MDR**: MDR.DE and KiKA
- **media.ccc.de**
+ - **media.ccc.de:lists**
- **Medialaan**
- **Mediaset**
- **Mediasite**
+ - **MediasiteCatalog**
+ - **MediasiteNamedCatalog**
- **Medici**
- **megaphone.fm**: megaphone.fm embedded players
- **Meipai**: 美拍
- **MyVisionTV**
- **n-tv.de**
- **natgeo:video**
+ - **NationalGeographicTV**
- **Naver**
- **NBA**
- **NBC**
- **nowness**
- **nowness:playlist**
- **nowness:series**
- - **nowvideo**: NowVideo
- **Noz**
- **npo**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
- **npo.nl:live**
- **NRKTVEpisodes**
- **NRKTVSeason**
- **NRKTVSeries**
+ - **NRLTV**
- **ntv.ru**
- **Nuvid**
- **NYTimes**
- **OdaTV**
- **Odnoklassniki**
- **OktoberfestTV**
- - **on.aol.com**
- **OnDemandKorea**
- **onet.pl**
- **onet.tv**
- **Piksel**
- **Pinkbike**
- **Pladform**
+ - **Platzi**
+ - **PlatziCourse**
- **play.fm**
- **PlayPlusTV**
- **PlaysTV**
- **PopcornTV**
- **PornCom**
- **PornerBros**
- - **PornFlip**
- **PornHd**
- **PornHub**: PornHub and Thumbzilla
- **PornHubPlaylist**
- **PornoXO**
- **PornTube**
- **PressTV**
- - **PrimeShareTV**
- **PromptFile**
- **prosiebensat1**: ProSiebenSat.1 Digital
- **puhutv**
- **radio.de**
- **radiobremen**
- **radiocanada**
- - **RadioCanadaAudioVideo**
+ - **radiocanada:audiovideo**
- **radiofrance**
- **RadioJavan**
- **Rai**
- **RBMARadio**
- **RDS**: RDS.ca
- **RedBullTV**
+ - **RedBullTVRrnContent**
- **Reddit**
- **RedditR**
- **RedTube**
- **RTVS**
- **Rudo**
- **RUHD**
- - **RulePorn**
- **rutube**: Rutube videos
- **rutube:channel**: Rutube channels
- **rutube:embed**: Rutube embedded videos
- **safari:api**
- **safari:course**: safaribooksonline.com online courses
- **SAKTV**
+ - **SaltTV**
- **Sapo**: SAPO Vídeos
- **savefrom.net**
- **SBS**: sbs.com.au
- **ShowRoomLive**
- **Sina**
- **SkylineWebcams**
+ - **SkyNews**
- **skynewsarabia:article**
- **skynewsarabia:video**
- **SkySports**
- **southpark.nl**
- **southparkstudios.dk**
- **SpankBang**
+ - **SpankBangPlaylist**
- **Spankwire**
- **Spiegel**
- **Spiegel:Article**: Articles on spiegel.de
- **StreamCZ**
- **StreetVoice**
- **StretchInternet**
+ - **stv:player**
- **SunPorno**
+ - **sverigesradio:episode**
+ - **sverigesradio:publication**
- **SVT**
- **SVTPage**
- **SVTPlay**: SVT Play and Öppet arkiv
- **teachertube:user:collection**: teachertube.com user and collection videos
- **TeachingChannel**
- **Teamcoco**
+ - **TeamTreeHouse**
- **TechTalks**
- **techtv.mit.edu**
- **ted**
- **ToypicsUser**: Toypics user profile
- **TrailerAddict** (Currently broken)
- **Trilulilu**
+ - **TruNews**
- **TruTV**
- **Tube8**
- **TubiTv**
- **Vbox7**
- **VeeHD**
- **Veoh**
+ - **verystream**
- **Vessel**
- **Vesti**: Вести.Ru
- **Vevo**
- **video.mit.edu**
- **VideoDetective**
- **videofy.me**
- - **VideoMega**
- **videomore**
- **videomore:season**
- **videomore:video**
- **VideoPremium**
- **VideoPress**
- - **videoweed**: VideoWeed
- **Vidio**
- **VidLii**
- **vidme**
- **Voot**
- **VoxMedia**
- **VoxMediaVolume**
- - **Vporn**
- **vpro**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
- **Vrak**
- - **VRT**: deredactie.be, sporza.be, cobra.be and cobra.canvas.be
+ - **VRT**: VRT NWS, Flanders News, Flandern Info and Sporza
- **VrtNU**: VrtNU.be
- **vrv**
- **vrv:series**
- **VVVVID**
- **VyboryMos**
- **Vzaar**
+ - **Wakanim**
- **Walla**
- **WalyTV**
- **washingtonpost**
- **Weibo**
- **WeiboMobile**
- **WeiqiTV**: WQTV
- - **wholecloud**: WholeCloud
- **Wimp**
- **Wistia**
- **wnl**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
- **WorldStarHipHop**
- - **wrzuta.pl**
- - **wrzuta.pl:playlist**
- **WSJ**: Wall Street Journal
- **WSJArticle**
- **WWE**
- **XVideos**
- **XXXYMovies**
- **Yahoo**: Yahoo screen and movies
+ - **yahoo:gyao**
+ - **yahoo:gyao:player**
- **YandexDisk**
- **yandexmusic:album**: Яндекс.Музыка - Альбом
- **yandexmusic:playlist**: Яндекс.Музыка - Плейлист
- **yandexmusic:track**: Яндекс.Музыка - Трек
+ - **YandexVideo**
- **YapFiles**
- **YesJapan**
- **yinyuetai:video**: 音悦Tai
[flake8]
exclude = youtube_dl/extractor/__init__.py,devscripts/buildserver.py,devscripts/lazy_load_template.py,devscripts/make_issue_template.py,setup.py,build,.git,venv
-ignore = E402,E501,E731,E741
+ignore = E402,E501,E731,E741,W503
version=__version__,
description=DESCRIPTION,
long_description=LONG_DESCRIPTION,
- url='https://github.com/rg3/youtube-dl',
+ url='https://github.com/ytdl-org/youtube-dl',
author='Ricardo Garcia',
author_email='ytdl@yt-dl.org',
maintainer='Sergey M.',
<meta content='Foo' property=og:foobar>
<meta name="og:test1" content='foo > < bar'/>
<meta name="og:test2" content="foo >//< bar"/>
+ <meta property=og-test3 content='Ill-formatted opengraph'/>
'''
self.assertEqual(ie._og_search_title(html), 'Foo')
self.assertEqual(ie._og_search_description(html), 'Some video\'s description ')
self.assertEqual(ie._og_search_property('foobar', html), 'Foo')
self.assertEqual(ie._og_search_property('test1', html), 'foo > < bar')
self.assertEqual(ie._og_search_property('test2', html), 'foo >//< bar')
+ self.assertEqual(ie._og_search_property('test3', html), 'Ill-formatted opengraph')
self.assertEqual(ie._og_search_property(('test0', 'test1'), html), 'foo > < bar')
self.assertRaises(RegexNotFoundError, ie._og_search_property, 'test0', html, None, fatal=True)
self.assertRaises(RegexNotFoundError, ie._og_search_property, ('test0', 'test00'), html, None, fatal=True)
self.assertRaises(ExtractorError, self.ie._download_json, uri, None)
self.assertEqual(self.ie._download_json(uri, None, fatal=False), None)
+ def test_parse_html5_media_entries(self):
+ # from https://www.r18.com/
+ # with kpbs in label
+ expect_dict(
+ self,
+ self.ie._parse_html5_media_entries(
+ 'https://www.r18.com/',
+ r'''
+ <video id="samplevideo_amateur" class="js-samplevideo video-js vjs-default-skin vjs-big-play-centered" controls preload="auto" width="400" height="225" poster="//pics.r18.com/digital/amateur/mgmr105/mgmr105jp.jpg">
+ <source id="video_source" src="https://awscc3001.r18.com/litevideo/freepv/m/mgm/mgmr105/mgmr105_sm_w.mp4" type="video/mp4" res="240" label="300kbps">
+ <source id="video_source" src="https://awscc3001.r18.com/litevideo/freepv/m/mgm/mgmr105/mgmr105_dm_w.mp4" type="video/mp4" res="480" label="1000kbps">
+ <source id="video_source" src="https://awscc3001.r18.com/litevideo/freepv/m/mgm/mgmr105/mgmr105_dmb_w.mp4" type="video/mp4" res="740" label="1500kbps">
+ <p>Your browser does not support the video tag.</p>
+ </video>
+ ''', None)[0],
+ {
+ 'formats': [{
+ 'url': 'https://awscc3001.r18.com/litevideo/freepv/m/mgm/mgmr105/mgmr105_sm_w.mp4',
+ 'ext': 'mp4',
+ 'format_id': '300kbps',
+ 'height': 240,
+ 'tbr': 300,
+ }, {
+ 'url': 'https://awscc3001.r18.com/litevideo/freepv/m/mgm/mgmr105/mgmr105_dm_w.mp4',
+ 'ext': 'mp4',
+ 'format_id': '1000kbps',
+ 'height': 480,
+ 'tbr': 1000,
+ }, {
+ 'url': 'https://awscc3001.r18.com/litevideo/freepv/m/mgm/mgmr105/mgmr105_dmb_w.mp4',
+ 'ext': 'mp4',
+ 'format_id': '1500kbps',
+ 'height': 740,
+ 'tbr': 1500,
+ }],
+ 'thumbnail': '//pics.r18.com/digital/amateur/mgmr105/mgmr105jp.jpg'
+ })
+
+ # from https://www.csfd.cz/
+ # with width and height
+ expect_dict(
+ self,
+ self.ie._parse_html5_media_entries(
+ 'https://www.csfd.cz/',
+ r'''
+ <video width="770" height="328" preload="none" controls poster="https://img.csfd.cz/files/images/film/video/preview/163/344/163344118_748d20.png?h360" >
+ <source src="https://video.csfd.cz/files/videos/157/750/157750813/163327358_eac647.mp4" type="video/mp4" width="640" height="360">
+ <source src="https://video.csfd.cz/files/videos/157/750/157750813/163327360_3d2646.mp4" type="video/mp4" width="1280" height="720">
+ <source src="https://video.csfd.cz/files/videos/157/750/157750813/163327356_91f258.mp4" type="video/mp4" width="1920" height="1080">
+ <source src="https://video.csfd.cz/files/videos/157/750/157750813/163327359_962b4a.webm" type="video/webm" width="640" height="360">
+ <source src="https://video.csfd.cz/files/videos/157/750/157750813/163327361_6feee0.webm" type="video/webm" width="1280" height="720">
+ <source src="https://video.csfd.cz/files/videos/157/750/157750813/163327357_8ab472.webm" type="video/webm" width="1920" height="1080">
+ <track src="https://video.csfd.cz/files/subtitles/163/344/163344115_4c388b.srt" type="text/x-srt" kind="subtitles" srclang="cs" label="cs">
+ </video>
+ ''', None)[0],
+ {
+ 'formats': [{
+ 'url': 'https://video.csfd.cz/files/videos/157/750/157750813/163327358_eac647.mp4',
+ 'ext': 'mp4',
+ 'width': 640,
+ 'height': 360,
+ }, {
+ 'url': 'https://video.csfd.cz/files/videos/157/750/157750813/163327360_3d2646.mp4',
+ 'ext': 'mp4',
+ 'width': 1280,
+ 'height': 720,
+ }, {
+ 'url': 'https://video.csfd.cz/files/videos/157/750/157750813/163327356_91f258.mp4',
+ 'ext': 'mp4',
+ 'width': 1920,
+ 'height': 1080,
+ }, {
+ 'url': 'https://video.csfd.cz/files/videos/157/750/157750813/163327359_962b4a.webm',
+ 'ext': 'webm',
+ 'width': 640,
+ 'height': 360,
+ }, {
+ 'url': 'https://video.csfd.cz/files/videos/157/750/157750813/163327361_6feee0.webm',
+ 'ext': 'webm',
+ 'width': 1280,
+ 'height': 720,
+ }, {
+ 'url': 'https://video.csfd.cz/files/videos/157/750/157750813/163327357_8ab472.webm',
+ 'ext': 'webm',
+ 'width': 1920,
+ 'height': 1080,
+ }],
+ 'subtitles': {
+ 'cs': [{'url': 'https://video.csfd.cz/files/subtitles/163/344/163344115_4c388b.srt'}]
+ },
+ 'thumbnail': 'https://img.csfd.cz/files/images/film/video/preview/163/344/163344118_748d20.png?h360'
+ })
+
+ # from https://tamasha.com/v/Kkdjw
+ # with height in label
+ expect_dict(
+ self,
+ self.ie._parse_html5_media_entries(
+ 'https://tamasha.com/v/Kkdjw',
+ r'''
+ <video crossorigin="anonymous">
+ <source src="https://s-v2.tamasha.com/statics/videos_file/19/8f/Kkdjw_198feff8577d0057536e905cce1fb61438dd64e0_n_240.mp4" type="video/mp4" label="AUTO" res="0"/>
+ <source src="https://s-v2.tamasha.com/statics/videos_file/19/8f/Kkdjw_198feff8577d0057536e905cce1fb61438dd64e0_n_240.mp4" type="video/mp4"
+ label="240p" res="240"/>
+ <source src="https://s-v2.tamasha.com/statics/videos_file/20/00/Kkdjw_200041c66f657fc967db464d156eafbc1ed9fe6f_n_144.mp4" type="video/mp4"
+ label="144p" res="144"/>
+ </video>
+ ''', None)[0],
+ {
+ 'formats': [{
+ 'url': 'https://s-v2.tamasha.com/statics/videos_file/19/8f/Kkdjw_198feff8577d0057536e905cce1fb61438dd64e0_n_240.mp4',
+ }, {
+ 'url': 'https://s-v2.tamasha.com/statics/videos_file/19/8f/Kkdjw_198feff8577d0057536e905cce1fb61438dd64e0_n_240.mp4',
+ 'ext': 'mp4',
+ 'format_id': '240p',
+ 'height': 240,
+ }, {
+ 'url': 'https://s-v2.tamasha.com/statics/videos_file/20/00/Kkdjw_200041c66f657fc967db464d156eafbc1ed9fe6f_n_144.mp4',
+ 'ext': 'mp4',
+ 'format_id': '144p',
+ 'height': 144,
+ }]
+ })
+
+ # from https://www.directvnow.com
+ # with data-src
+ expect_dict(
+ self,
+ self.ie._parse_html5_media_entries(
+ 'https://www.directvnow.com',
+ r'''
+ <video id="vid1" class="header--video-masked active" muted playsinline>
+ <source data-src="https://cdn.directv.com/content/dam/dtv/prod/website_directvnow-international/videos/DTVN_hdr_HBO_v3.mp4" type="video/mp4" />
+ </video>
+ ''', None)[0],
+ {
+ 'formats': [{
+ 'ext': 'mp4',
+ 'url': 'https://cdn.directv.com/content/dam/dtv/prod/website_directvnow-international/videos/DTVN_hdr_HBO_v3.mp4',
+ }]
+ })
+
+ # from https://www.directvnow.com
+ # with data-src
+ expect_dict(
+ self,
+ self.ie._parse_html5_media_entries(
+ 'https://www.directvnow.com',
+ r'''
+ <video id="vid1" class="header--video-masked active" muted playsinline>
+ <source data-src="https://cdn.directv.com/content/dam/dtv/prod/website_directvnow-international/videos/DTVN_hdr_HBO_v3.mp4" type="video/mp4" />
+ </video>
+ ''', None)[0],
+ {
+ 'formats': [{
+ 'url': 'https://cdn.directv.com/content/dam/dtv/prod/website_directvnow-international/videos/DTVN_hdr_HBO_v3.mp4',
+ 'ext': 'mp4',
+ }]
+ })
+
+ # from https://www.klarna.com/uk/
+ # with data-video-src
+ expect_dict(
+ self,
+ self.ie._parse_html5_media_entries(
+ 'https://www.directvnow.com',
+ r'''
+ <video loop autoplay muted class="responsive-video block-kl__video video-on-medium">
+ <source src="" data-video-desktop data-video-src="https://www.klarna.com/uk/wp-content/uploads/sites/11/2019/01/KL062_Smooth3_0_DogWalking_5s_920x080_.mp4" type="video/mp4" />
+ </video>
+ ''', None)[0],
+ {
+ 'formats': [{
+ 'url': 'https://www.klarna.com/uk/wp-content/uploads/sites/11/2019/01/KL062_Smooth3_0_DogWalking_5s_920x080_.mp4',
+ 'ext': 'mp4',
+ }],
+ })
+
def test_extract_jwplayer_data_realworld(self):
# from http://www.suffolk.edu/sjc/
expect_dict(
def test_parse_m3u8_formats(self):
_TEST_CASES = [
(
- # https://github.com/rg3/youtube-dl/issues/11507
+ # https://github.com/ytdl-org/youtube-dl/issues/11507
# http://pluzz.francetv.fr/videos/le_ministere.html
'pluzz_francetv_11507',
'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/master.m3u8?caption=2017%2F16%2F156589847-1492488987.m3u8%3Afra%3AFrancais&audiotrack=0%3Afra%3AFrancais',
}]
),
(
- # https://github.com/rg3/youtube-dl/issues/11995
+ # https://github.com/ytdl-org/youtube-dl/issues/11995
# http://teamcoco.com/video/clueless-gamer-super-bowl-for-honor
'teamcoco_11995',
'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/main.m3u8',
}]
),
(
- # https://github.com/rg3/youtube-dl/issues/12211
+ # https://github.com/ytdl-org/youtube-dl/issues/12211
# http://video.toggle.sg/en/series/whoopie-s-world/ep3/478601
'toggle_mobile_12211',
'http://cdnapi.kaltura.com/p/2082311/sp/208231100/playManifest/protocol/http/entryId/0_89q6e8ku/format/applehttp/tags/mobile_sd/f/a.m3u8',
'width': 1280,
'height': 720,
}]
- )
+ ),
+ (
+ # https://github.com/ytdl-org/youtube-dl/issues/18923
+ # https://www.ted.com/talks/boris_hesser_a_grassroots_healthcare_revolution_in_africa
+ 'ted_18923',
+ 'http://hls.ted.com/talks/31241.m3u8',
+ [{
+ 'url': 'http://hls.ted.com/videos/BorisHesser_2018S/audio/600k.m3u8?nobumpers=true&uniqueId=76011e2b',
+ 'format_id': '600k-Audio',
+ 'vcodec': 'none',
+ }, {
+ 'url': 'http://hls.ted.com/videos/BorisHesser_2018S/audio/600k.m3u8?nobumpers=true&uniqueId=76011e2b',
+ 'format_id': '68',
+ 'vcodec': 'none',
+ }, {
+ 'url': 'http://hls.ted.com/videos/BorisHesser_2018S/video/64k.m3u8?nobumpers=true&uniqueId=76011e2b',
+ 'format_id': '163',
+ 'acodec': 'none',
+ 'width': 320,
+ 'height': 180,
+ }, {
+ 'url': 'http://hls.ted.com/videos/BorisHesser_2018S/video/180k.m3u8?nobumpers=true&uniqueId=76011e2b',
+ 'format_id': '481',
+ 'acodec': 'none',
+ 'width': 512,
+ 'height': 288,
+ }, {
+ 'url': 'http://hls.ted.com/videos/BorisHesser_2018S/video/320k.m3u8?nobumpers=true&uniqueId=76011e2b',
+ 'format_id': '769',
+ 'acodec': 'none',
+ 'width': 512,
+ 'height': 288,
+ }, {
+ 'url': 'http://hls.ted.com/videos/BorisHesser_2018S/video/450k.m3u8?nobumpers=true&uniqueId=76011e2b',
+ 'format_id': '984',
+ 'acodec': 'none',
+ 'width': 512,
+ 'height': 288,
+ }, {
+ 'url': 'http://hls.ted.com/videos/BorisHesser_2018S/video/600k.m3u8?nobumpers=true&uniqueId=76011e2b',
+ 'format_id': '1255',
+ 'acodec': 'none',
+ 'width': 640,
+ 'height': 360,
+ }, {
+ 'url': 'http://hls.ted.com/videos/BorisHesser_2018S/video/950k.m3u8?nobumpers=true&uniqueId=76011e2b',
+ 'format_id': '1693',
+ 'acodec': 'none',
+ 'width': 853,
+ 'height': 480,
+ }, {
+ 'url': 'http://hls.ted.com/videos/BorisHesser_2018S/video/1500k.m3u8?nobumpers=true&uniqueId=76011e2b',
+ 'format_id': '2462',
+ 'acodec': 'none',
+ 'width': 1280,
+ 'height': 720,
+ }]
+ ),
]
for m3u8_file, m3u8_url, expected_formats in _TEST_CASES:
def test_parse_mpd_formats(self):
_TEST_CASES = [
(
- # https://github.com/rg3/youtube-dl/issues/13919
+ # https://github.com/ytdl-org/youtube-dl/issues/13919
# Also tests duplicate representation ids, see
- # https://github.com/rg3/youtube-dl/issues/15111
+ # https://github.com/ytdl-org/youtube-dl/issues/15111
'float_duration',
- 'http://unknown/manifest.mpd',
+ 'http://unknown/manifest.mpd', # mpd_url
+ None, # mpd_base_url
[{
'manifest_url': 'http://unknown/manifest.mpd',
'ext': 'm4a',
'height': 1080,
}]
), (
- # https://github.com/rg3/youtube-dl/pull/14844
+ # https://github.com/ytdl-org/youtube-dl/pull/14844
'urls_only',
- 'http://unknown/manifest.mpd',
+ 'http://unknown/manifest.mpd', # mpd_url
+ None, # mpd_base_url
[{
'manifest_url': 'http://unknown/manifest.mpd',
'ext': 'mp4',
'width': 1920,
'height': 1080,
}]
+ ), (
+ # https://github.com/ytdl-org/youtube-dl/issues/20346
+ # Media considered unfragmented even though it contains
+ # Initialization tag
+ 'unfragmented',
+ 'https://v.redd.it/hw1x7rcg7zl21/DASHPlaylist.mpd', # mpd_url
+ 'https://v.redd.it/hw1x7rcg7zl21', # mpd_base_url
+ [{
+ 'url': 'https://v.redd.it/hw1x7rcg7zl21/audio',
+ 'manifest_url': 'https://v.redd.it/hw1x7rcg7zl21/DASHPlaylist.mpd',
+ 'ext': 'm4a',
+ 'format_id': 'AUDIO-1',
+ 'format_note': 'DASH audio',
+ 'container': 'm4a_dash',
+ 'acodec': 'mp4a.40.2',
+ 'vcodec': 'none',
+ 'tbr': 129.87,
+ 'asr': 48000,
+
+ }, {
+ 'url': 'https://v.redd.it/hw1x7rcg7zl21/DASH_240',
+ 'manifest_url': 'https://v.redd.it/hw1x7rcg7zl21/DASHPlaylist.mpd',
+ 'ext': 'mp4',
+ 'format_id': 'VIDEO-2',
+ 'format_note': 'DASH video',
+ 'container': 'mp4_dash',
+ 'acodec': 'none',
+ 'vcodec': 'avc1.4d401e',
+ 'tbr': 608.0,
+ 'width': 240,
+ 'height': 240,
+ 'fps': 30,
+ }, {
+ 'url': 'https://v.redd.it/hw1x7rcg7zl21/DASH_360',
+ 'manifest_url': 'https://v.redd.it/hw1x7rcg7zl21/DASHPlaylist.mpd',
+ 'ext': 'mp4',
+ 'format_id': 'VIDEO-1',
+ 'format_note': 'DASH video',
+ 'container': 'mp4_dash',
+ 'acodec': 'none',
+ 'vcodec': 'avc1.4d401e',
+ 'tbr': 804.261,
+ 'width': 360,
+ 'height': 360,
+ 'fps': 30,
+ }]
)
]
- for mpd_file, mpd_url, expected_formats in _TEST_CASES:
+ for mpd_file, mpd_url, mpd_base_url, expected_formats in _TEST_CASES:
with io.open('./test/testdata/mpd/%s.mpd' % mpd_file,
mode='r', encoding='utf-8') as f:
formats = self.ie._parse_mpd_formats(
compat_etree_fromstring(f.read().encode('utf-8')),
- mpd_url=mpd_url)
+ mpd_base_url=mpd_base_url, mpd_url=mpd_url)
self.ie._sort_formats(formats)
expect_value(self, formats, expected_formats, None)
def test_parse_f4m_formats(self):
_TEST_CASES = [
(
- # https://github.com/rg3/youtube-dl/issues/14660
+ # https://github.com/ytdl-org/youtube-dl/issues/14660
'custom_base_url',
'http://api.new.livestream.com/accounts/6115179/events/6764928/videos/144884262.f4m',
[{
downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded['format_id'], 'vid-vcodec-dot')
+ def test_format_selection_string_ops(self):
+ formats = [
+ {'format_id': 'abc-cba', 'ext': 'mp4', 'url': TEST_URL},
+ {'format_id': 'zxc-cxz', 'ext': 'webm', 'url': TEST_URL},
+ ]
+ info_dict = _make_result(formats)
+
+ # equals (=)
+ ydl = YDL({'format': '[format_id=abc-cba]'})
+ ydl.process_ie_result(info_dict.copy())
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['format_id'], 'abc-cba')
+
+ # does not equal (!=)
+ ydl = YDL({'format': '[format_id!=abc-cba]'})
+ ydl.process_ie_result(info_dict.copy())
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['format_id'], 'zxc-cxz')
+
+ ydl = YDL({'format': '[format_id!=abc-cba][format_id!=zxc-cxz]'})
+ self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy())
+
+ # starts with (^=)
+ ydl = YDL({'format': '[format_id^=abc]'})
+ ydl.process_ie_result(info_dict.copy())
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['format_id'], 'abc-cba')
+
+ # does not start with (!^=)
+ ydl = YDL({'format': '[format_id!^=abc]'})
+ ydl.process_ie_result(info_dict.copy())
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['format_id'], 'zxc-cxz')
+
+ ydl = YDL({'format': '[format_id!^=abc][format_id!^=zxc]'})
+ self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy())
+
+ # ends with ($=)
+ ydl = YDL({'format': '[format_id$=cba]'})
+ ydl.process_ie_result(info_dict.copy())
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['format_id'], 'abc-cba')
+
+ # does not end with (!$=)
+ ydl = YDL({'format': '[format_id!$=cba]'})
+ ydl.process_ie_result(info_dict.copy())
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['format_id'], 'zxc-cxz')
+
+ ydl = YDL({'format': '[format_id!$=cba][format_id!$=cxz]'})
+ self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy())
+
+ # contains (*=)
+ ydl = YDL({'format': '[format_id*=bc-cb]'})
+ ydl.process_ie_result(info_dict.copy())
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['format_id'], 'abc-cba')
+
+ # does not contain (!*=)
+ ydl = YDL({'format': '[format_id!*=bc-cb]'})
+ ydl.process_ie_result(info_dict.copy())
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['format_id'], 'zxc-cxz')
+
+ ydl = YDL({'format': '[format_id!*=abc][format_id!*=zxc]'})
+ self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy())
+
+ ydl = YDL({'format': '[format_id!*=-]'})
+ self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy())
+
def test_youtube_format_selection(self):
order = [
'38', '37', '46', '22', '45', '35', '44', '18', '34', '43', '6', '5', '17', '36', '13',
# For extractors with incomplete formats (all formats are audio-only or
# video-only) best and worst should fallback to corresponding best/worst
# video-only or audio-only formats (as per
- # https://github.com/rg3/youtube-dl/pull/5556)
+ # https://github.com/ytdl-org/youtube-dl/pull/5556)
formats = [
{'format_id': 'low', 'ext': 'mp3', 'preference': 1, 'vcodec': 'none', 'url': TEST_URL},
{'format_id': 'high', 'ext': 'mp3', 'preference': 2, 'vcodec': 'none', 'url': TEST_URL},
self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy())
def test_format_selection_issue_10083(self):
- # See https://github.com/rg3/youtube-dl/issues/10083
+ # See https://github.com/ytdl-org/youtube-dl/issues/10083
formats = [
{'format_id': 'regular', 'height': 360, 'url': TEST_URL},
{'format_id': 'video', 'height': 720, 'acodec': 'none', 'url': TEST_URL},
self.assertEqual(result, [2, 3, 4])
def test_urlopen_no_file_protocol(self):
- # see https://github.com/rg3/youtube-dl/issues/8227
+ # see https://github.com/ytdl-org/youtube-dl/issues/8227
ydl = YDL()
self.assertRaises(compat_urllib_error.URLError, ydl.urlopen, 'file:///etc/passwd')
tf.close()
os.remove(tf.name)
+ def test_strip_httponly_prefix(self):
+ cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/httponly_cookies.txt')
+ cookiejar.load(ignore_discard=True, ignore_expires=True)
+
+ def assert_cookie_has_value(key):
+ self.assertEqual(cookiejar._cookies['www.foobar.foobar']['/'][key].value, key + '_VALUE')
+
+ assert_cookie_has_value('HTTPONLY_COOKIE')
+ assert_cookie_has_value('JS_ACCESSIBLE_COOKIE')
+
if __name__ == '__main__':
unittest.main()
def test_decrypt_text(self):
password = intlist_to_bytes(self.key).decode('utf-8')
encrypted = base64.b64encode(
- intlist_to_bytes(self.iv[:8]) +
- b'\x17\x15\x93\xab\x8d\x80V\xcdV\xe0\t\xcdo\xc2\xa5\xd8ksM\r\xe27N\xae'
+ intlist_to_bytes(self.iv[:8])
+ + b'\x17\x15\x93\xab\x8d\x80V\xcdV\xe0\t\xcdo\xc2\xa5\xd8ksM\r\xe27N\xae'
).decode('utf-8')
decrypted = (aes_decrypt_text(encrypted, password, 16))
self.assertEqual(decrypted, self.secret_msg)
password = intlist_to_bytes(self.key).decode('utf-8')
encrypted = base64.b64encode(
- intlist_to_bytes(self.iv[:8]) +
- b'\x0b\xe6\xa4\xd9z\x0e\xb8\xb9\xd0\xd4i_\x85\x1d\x99\x98_\xe5\x80\xe7.\xbf\xa5\x83'
+ intlist_to_bytes(self.iv[:8])
+ + b'\x0b\xe6\xa4\xd9z\x0e\xb8\xb9\xd0\xd4i_\x85\x1d\x99\x98_\xe5\x80\xe7.\xbf\xa5\x83'
).decode('utf-8')
decrypted = (aes_decrypt_text(encrypted, password, 32))
self.assertEqual(decrypted, self.secret_msg)
self.assertMatch('https://vimeo.com/user7108434/videos', ['vimeo:user'])
self.assertMatch('https://vimeo.com/user21297594/review/75524534/3c257a1b5d', ['vimeo:review'])
- # https://github.com/rg3/youtube-dl/issues/1930
+ # https://github.com/ytdl-org/youtube-dl/issues/1930
def test_soundcloud_not_matching_sets(self):
self.assertMatch('http://soundcloud.com/floex/sets/gone-ep', ['soundcloud:set'])
self.assertMatch('http://tatianamaslanydaily.tumblr.com/post/54196191430', ['Tumblr'])
def test_pbs(self):
- # https://github.com/rg3/youtube-dl/issues/2350
+ # https://github.com/ytdl-org/youtube-dl/issues/2350
self.assertMatch('http://video.pbs.org/viralplayer/2365173446/', ['pbs'])
self.assertMatch('http://video.pbs.org/widget/partnerplayer/980042464/', ['pbs'])
def test_yahoo_https(self):
- # https://github.com/rg3/youtube-dl/issues/2701
+ # https://github.com/ytdl-org/youtube-dl/issues/2701
self.assertMatch(
'https://screen.yahoo.com/smartwatches-latest-wearable-gadgets-163745379-cbs.html',
['Yahoo'])
from youtube_dl.compat import (
compat_getenv,
compat_setenv,
+ compat_etree_Element,
compat_etree_fromstring,
compat_expanduser,
compat_shlex_split,
self.assertEqual(compat_shlex_split('-option "one\ntwo" \n -flag'), ['-option', 'one\ntwo', '-flag'])
self.assertEqual(compat_shlex_split('-val 中文'), ['-val', '中文'])
+ def test_compat_etree_Element(self):
+ try:
+ compat_etree_Element.items
+ except AttributeError:
+ self.fail('compat_etree_Element is not a type')
+
def test_compat_etree_fromstring(self):
xml = '''
<root foo="bar" spam="中文">
def test_func(self):
as_file = os.path.join(TEST_DIR, testfile)
swf_file = os.path.join(TEST_DIR, test_id + '.swf')
- if ((not os.path.exists(swf_file)) or
- os.path.getmtime(swf_file) < os.path.getmtime(as_file)):
+ if ((not os.path.exists(swf_file))
+ or os.path.getmtime(swf_file) < os.path.getmtime(as_file)):
# Recompile
try:
subprocess.check_call([
ExtractorError,
find_xpath_attr,
fix_xml_ampersands,
+ float_or_none,
get_element_by_class,
get_element_by_attribute,
get_elements_by_class,
get_elements_by_attribute,
InAdvancePagedList,
+ int_or_none,
intlist_to_bytes,
is_html,
js_to_json,
parse_count,
parse_iso8601,
parse_resolution,
+ parse_bitrate,
pkcs1pad,
read_batch_urls,
sanitize_filename,
smuggle_url,
str_to_int,
strip_jsonp,
+ strip_or_none,
timeconvert,
unescapeHTML,
unified_strdate,
self.assertEqual(sanitize_filename(
'ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ', restricted=True),
- 'AAAAAAAECEEEEIIIIDNOOOOOOOOEUUUUUYPssaaaaaaaeceeeeiiiionooooooooeuuuuuypy')
+ 'AAAAAAAECEEEEIIIIDNOOOOOOOOEUUUUUYTHssaaaaaaaeceeeeiiiionooooooooeuuuuuythy')
def test_sanitize_ids(self):
self.assertEqual(sanitize_filename('_n_cd26wFpw', is_id=True), '_n_cd26wFpw')
shell_quote(args),
"""ffmpeg -i 'ñ€ß'"'"'.mp4'""" if compat_os_name != 'nt' else '''ffmpeg -i "ñ€ß'.mp4"''')
+ def test_float_or_none(self):
+ self.assertEqual(float_or_none('42.42'), 42.42)
+ self.assertEqual(float_or_none('42'), 42.0)
+ self.assertEqual(float_or_none(''), None)
+ self.assertEqual(float_or_none(None), None)
+ self.assertEqual(float_or_none([]), None)
+ self.assertEqual(float_or_none(set()), None)
+
+ def test_int_or_none(self):
+ self.assertEqual(int_or_none('42'), 42)
+ self.assertEqual(int_or_none(''), None)
+ self.assertEqual(int_or_none(None), None)
+ self.assertEqual(int_or_none([]), None)
+ self.assertEqual(int_or_none(set()), None)
+
def test_str_to_int(self):
self.assertEqual(str_to_int('123,456'), 123456)
self.assertEqual(str_to_int('123.456'), 123456)
self.assertEqual(urljoin('http://foo.de/', ''), None)
self.assertEqual(urljoin('http://foo.de/', ['foobar']), None)
self.assertEqual(urljoin('http://foo.de/a/b/c.txt', '.././../d.txt'), 'http://foo.de/d.txt')
+ self.assertEqual(urljoin('http://foo.de/a/b/c.txt', 'rtmp://foo.de'), 'rtmp://foo.de')
+ self.assertEqual(urljoin(None, 'rtmp://foo.de'), 'rtmp://foo.de')
def test_url_or_none(self):
self.assertEqual(url_or_none(None), None)
d = json.loads(stripped)
self.assertEqual(d, {'status': 'success'})
+ def test_strip_or_none(self):
+ self.assertEqual(strip_or_none(' abc'), 'abc')
+ self.assertEqual(strip_or_none('abc '), 'abc')
+ self.assertEqual(strip_or_none(' abc '), 'abc')
+ self.assertEqual(strip_or_none('\tabc\t'), 'abc')
+ self.assertEqual(strip_or_none('\n\tabc\n\t'), 'abc')
+ self.assertEqual(strip_or_none('abc'), 'abc')
+ self.assertEqual(strip_or_none(''), '')
+ self.assertEqual(strip_or_none(None), None)
+ self.assertEqual(strip_or_none(42), None)
+ self.assertEqual(strip_or_none([]), None)
+
def test_uppercase_escape(self):
self.assertEqual(uppercase_escape('aä'), 'aä')
self.assertEqual(uppercase_escape('\\U0001d550'), '𝕐')
self.assertEqual(parse_resolution('4k'), {'height': 2160})
self.assertEqual(parse_resolution('8K'), {'height': 4320})
+ def test_parse_bitrate(self):
+ self.assertEqual(parse_bitrate(None), None)
+ self.assertEqual(parse_bitrate(''), None)
+ self.assertEqual(parse_bitrate('300kbps'), 300)
+ self.assertEqual(parse_bitrate('1500kbps'), 1500)
+ self.assertEqual(parse_bitrate('300 kbps'), 300)
+
def test_version_tuple(self):
self.assertEqual(version_tuple('1'), (1,))
self.assertEqual(version_tuple('10.23.344'), (10, 23, 344))
--- /dev/null
+# Netscape HTTP Cookie File
+# http://curl.haxx.se/rfc/cookie_spec.html
+# This is a generated file! Do not edit.
+
+#HttpOnly_www.foobar.foobar FALSE / TRUE 2147483647 HTTPONLY_COOKIE HTTPONLY_COOKIE_VALUE
+www.foobar.foobar FALSE / TRUE 2147483647 JS_ACCESSIBLE_COOKIE JS_ACCESSIBLE_COOKIE_VALUE
--- /dev/null
+#EXTM3U
+#EXT-X-VERSION:4
+#EXT-X-STREAM-INF:AUDIO="600k",BANDWIDTH=1255659,PROGRAM-ID=1,CODECS="avc1.42c01e,mp4a.40.2",RESOLUTION=640x360
+/videos/BorisHesser_2018S/video/600k.m3u8?nobumpers=true&uniqueId=76011e2b
+#EXT-X-STREAM-INF:AUDIO="600k",BANDWIDTH=163154,PROGRAM-ID=1,CODECS="avc1.42c00c,mp4a.40.2",RESOLUTION=320x180
+/videos/BorisHesser_2018S/video/64k.m3u8?nobumpers=true&uniqueId=76011e2b
+#EXT-X-STREAM-INF:AUDIO="600k",BANDWIDTH=481701,PROGRAM-ID=1,CODECS="avc1.42c015,mp4a.40.2",RESOLUTION=512x288
+/videos/BorisHesser_2018S/video/180k.m3u8?nobumpers=true&uniqueId=76011e2b
+#EXT-X-STREAM-INF:AUDIO="600k",BANDWIDTH=769968,PROGRAM-ID=1,CODECS="avc1.42c015,mp4a.40.2",RESOLUTION=512x288
+/videos/BorisHesser_2018S/video/320k.m3u8?nobumpers=true&uniqueId=76011e2b
+#EXT-X-STREAM-INF:AUDIO="600k",BANDWIDTH=984037,PROGRAM-ID=1,CODECS="avc1.42c015,mp4a.40.2",RESOLUTION=512x288
+/videos/BorisHesser_2018S/video/450k.m3u8?nobumpers=true&uniqueId=76011e2b
+#EXT-X-STREAM-INF:AUDIO="600k",BANDWIDTH=1693925,PROGRAM-ID=1,CODECS="avc1.4d401f,mp4a.40.2",RESOLUTION=853x480
+/videos/BorisHesser_2018S/video/950k.m3u8?nobumpers=true&uniqueId=76011e2b
+#EXT-X-STREAM-INF:AUDIO="600k",BANDWIDTH=2462469,PROGRAM-ID=1,CODECS="avc1.640028,mp4a.40.2",RESOLUTION=1280x720
+/videos/BorisHesser_2018S/video/1500k.m3u8?nobumpers=true&uniqueId=76011e2b
+#EXT-X-STREAM-INF:AUDIO="600k",BANDWIDTH=68101,PROGRAM-ID=1,CODECS="mp4a.40.2",DEFAULT=YES
+/videos/BorisHesser_2018S/audio/600k.m3u8?nobumpers=true&uniqueId=76011e2b
+
+#EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=74298,PROGRAM-ID=1,CODECS="avc1.42c00c",RESOLUTION=320x180,URI="/videos/BorisHesser_2018S/video/64k_iframe.m3u8?nobumpers=true&uniqueId=76011e2b"
+#EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=216200,PROGRAM-ID=1,CODECS="avc1.42c015",RESOLUTION=512x288,URI="/videos/BorisHesser_2018S/video/180k_iframe.m3u8?nobumpers=true&uniqueId=76011e2b"
+#EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=304717,PROGRAM-ID=1,CODECS="avc1.42c015",RESOLUTION=512x288,URI="/videos/BorisHesser_2018S/video/320k_iframe.m3u8?nobumpers=true&uniqueId=76011e2b"
+#EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=350933,PROGRAM-ID=1,CODECS="avc1.42c015",RESOLUTION=512x288,URI="/videos/BorisHesser_2018S/video/450k_iframe.m3u8?nobumpers=true&uniqueId=76011e2b"
+#EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=495850,PROGRAM-ID=1,CODECS="avc1.42c01e",RESOLUTION=640x360,URI="/videos/BorisHesser_2018S/video/600k_iframe.m3u8?nobumpers=true&uniqueId=76011e2b"
+#EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=810750,PROGRAM-ID=1,CODECS="avc1.4d401f",RESOLUTION=853x480,URI="/videos/BorisHesser_2018S/video/950k_iframe.m3u8?nobumpers=true&uniqueId=76011e2b"
+#EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=1273700,PROGRAM-ID=1,CODECS="avc1.640028",RESOLUTION=1280x720,URI="/videos/BorisHesser_2018S/video/1500k_iframe.m3u8?nobumpers=true&uniqueId=76011e2b"
+
+#EXT-X-MEDIA:TYPE=AUDIO,GROUP-ID="600k",LANGUAGE="en",NAME="Audio",AUTOSELECT=YES,DEFAULT=YES,URI="/videos/BorisHesser_2018S/audio/600k.m3u8?nobumpers=true&uniqueId=76011e2b",BANDWIDTH=614400
--- /dev/null
+<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
+<MPD mediaPresentationDuration="PT54.915S" minBufferTime="PT1.500S" profiles="urn:mpeg:dash:profile:isoff-on-demand:2011" type="static" xmlns="urn:mpeg:dash:schema:mpd:2011">
+ <Period duration="PT54.915S">
+ <AdaptationSet segmentAlignment="true" subsegmentAlignment="true" subsegmentStartsWithSAP="1">
+ <Representation bandwidth="804261" codecs="avc1.4d401e" frameRate="30" height="360" id="VIDEO-1" mimeType="video/mp4" startWithSAP="1" width="360">
+ <BaseURL>DASH_360</BaseURL>
+ <SegmentBase indexRange="915-1114" indexRangeExact="true">
+ <Initialization range="0-914"/>
+ </SegmentBase>
+ </Representation>
+ <Representation bandwidth="608000" codecs="avc1.4d401e" frameRate="30" height="240" id="VIDEO-2" mimeType="video/mp4" startWithSAP="1" width="240">
+ <BaseURL>DASH_240</BaseURL>
+ <SegmentBase indexRange="913-1112" indexRangeExact="true">
+ <Initialization range="0-912"/>
+ </SegmentBase>
+ </Representation>
+ </AdaptationSet>
+ <AdaptationSet>
+ <Representation audioSamplingRate="48000" bandwidth="129870" codecs="mp4a.40.2" id="AUDIO-1" mimeType="audio/mp4" startWithSAP="1">
+ <AudioChannelConfiguration schemeIdUri="urn:mpeg:dash:23003:3:audio_channel_configuration:2011" value="2"/>
+ <BaseURL>audio</BaseURL>
+ <SegmentBase indexRange="832-1007" indexRangeExact="true">
+ <Initialization range="0-831"/>
+ </SegmentBase>
+ </Representation>
+ </AdaptationSet>
+ </Period>
+</MPD>
\f[C]webm\f[] extension served as a single file.
.PP
You can also use special names to select particular edge case formats:
-\- \f[C]best\f[]: Select the best quality format represented by a single
+.IP \[bu] 2
+\f[C]best\f[]: Select the best quality format represented by a single
+file with video and audio.
+.IP \[bu] 2
+\f[C]worst\f[]: Select the worst quality format represented by a single
file with video and audio.
-\- \f[C]worst\f[]: Select the worst quality format represented by a
-single file with video and audio.
-\- \f[C]bestvideo\f[]: Select the best quality video\-only format (e.g.
+.IP \[bu] 2
+\f[C]bestvideo\f[]: Select the best quality video\-only format (e.g.
DASH video).
May not be available.
-\- \f[C]worstvideo\f[]: Select the worst quality video\-only format.
+.IP \[bu] 2
+\f[C]worstvideo\f[]: Select the worst quality video\-only format.
May not be available.
-\- \f[C]bestaudio\f[]: Select the best quality audio only\-format.
+.IP \[bu] 2
+\f[C]bestaudio\f[]: Select the best quality audio only\-format.
May not be available.
-\- \f[C]worstaudio\f[]: Select the worst quality audio only\-format.
+.IP \[bu] 2
+\f[C]worstaudio\f[]: Select the worst quality audio only\-format.
May not be available.
.PP
For example, to download the worst quality video\-only format you can
.PP
The following numeric meta fields can be used with comparisons
\f[C]<\f[], \f[C]<=\f[], \f[C]>\f[], \f[C]>=\f[], \f[C]=\f[] (equals),
-\f[C]!=\f[] (not equals): \- \f[C]filesize\f[]: The number of bytes, if
-known in advance \- \f[C]width\f[]: Width of the video, if known \-
-\f[C]height\f[]: Height of the video, if known \- \f[C]tbr\f[]: Average
-bitrate of audio and video in KBit/s \- \f[C]abr\f[]: Average audio
-bitrate in KBit/s \- \f[C]vbr\f[]: Average video bitrate in KBit/s \-
-\f[C]asr\f[]: Audio sampling rate in Hertz \- \f[C]fps\f[]: Frame rate
-.PP
-Also filtering work for comparisons \f[C]=\f[] (equals), \f[C]!=\f[]
-(not equals), \f[C]^=\f[] (begins with), \f[C]$=\f[] (ends with),
-\f[C]*=\f[] (contains) and following string meta fields: \-
-\f[C]ext\f[]: File extension \- \f[C]acodec\f[]: Name of the audio codec
-in use \- \f[C]vcodec\f[]: Name of the video codec in use \-
-\f[C]container\f[]: Name of the container format \- \f[C]protocol\f[]:
-The protocol that will be used for the actual download, lower\-case
-(\f[C]http\f[], \f[C]https\f[], \f[C]rtsp\f[], \f[C]rtmp\f[],
-\f[C]rtmpe\f[], \f[C]mms\f[], \f[C]f4m\f[], \f[C]ism\f[],
-\f[C]http_dash_segments\f[], \f[C]m3u8\f[], or \f[C]m3u8_native\f[]) \-
+\f[C]!=\f[] (not equals):
+.IP \[bu] 2
+\f[C]filesize\f[]: The number of bytes, if known in advance
+.IP \[bu] 2
+\f[C]width\f[]: Width of the video, if known
+.IP \[bu] 2
+\f[C]height\f[]: Height of the video, if known
+.IP \[bu] 2
+\f[C]tbr\f[]: Average bitrate of audio and video in KBit/s
+.IP \[bu] 2
+\f[C]abr\f[]: Average audio bitrate in KBit/s
+.IP \[bu] 2
+\f[C]vbr\f[]: Average video bitrate in KBit/s
+.IP \[bu] 2
+\f[C]asr\f[]: Audio sampling rate in Hertz
+.IP \[bu] 2
+\f[C]fps\f[]: Frame rate
+.PP
+Also filtering work for comparisons \f[C]=\f[] (equals), \f[C]^=\f[]
+(starts with), \f[C]$=\f[] (ends with), \f[C]*=\f[] (contains) and
+following string meta fields:
+.IP \[bu] 2
+\f[C]ext\f[]: File extension
+.IP \[bu] 2
+\f[C]acodec\f[]: Name of the audio codec in use
+.IP \[bu] 2
+\f[C]vcodec\f[]: Name of the video codec in use
+.IP \[bu] 2
+\f[C]container\f[]: Name of the container format
+.IP \[bu] 2
+\f[C]protocol\f[]: The protocol that will be used for the actual
+download, lower\-case (\f[C]http\f[], \f[C]https\f[], \f[C]rtsp\f[],
+\f[C]rtmp\f[], \f[C]rtmpe\f[], \f[C]mms\f[], \f[C]f4m\f[], \f[C]ism\f[],
+\f[C]http_dash_segments\f[], \f[C]m3u8\f[], or \f[C]m3u8_native\f[])
+.IP \[bu] 2
\f[C]format_id\f[]: A short description of the format
.PP
+Any string comparison may be prefixed with negation \f[C]!\f[] in order
+to produce an opposite comparison, e.g.
+\f[C]!*=\f[] (does not contain).
+.PP
Note that none of the aforementioned meta fields are guaranteed to be
present since this solely depends on the metadata obtained by particular
extractor, i.e.
.PP
Since the end of April 2015 and version 2015.04.26, youtube\-dl uses
\f[C]\-f\ bestvideo+bestaudio/best\f[] as the default format selection
-(see #5447 (https://github.com/rg3/youtube-dl/issues/5447),
-#5456 (https://github.com/rg3/youtube-dl/issues/5456)).
+(see #5447 (https://github.com/ytdl-org/youtube-dl/issues/5447),
+#5456 (https://github.com/ytdl-org/youtube-dl/issues/5456)).
If ffmpeg or avconv are installed this results in downloading
\f[C]bestvideo\f[] and \f[C]bestaudio\f[] separately and muxing them
together into a single file giving the best overall quality available.
#\ Download\ best\ mp4\ format\ available\ or\ any\ other\ best\ if\ no\ mp4\ available
$\ youtube\-dl\ \-f\ \[aq]bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best\[aq]
-#\ Download\ best\ format\ available\ but\ not\ better\ that\ 480p
+#\ Download\ best\ format\ available\ but\ no\ better\ than\ 480p
$\ youtube\-dl\ \-f\ \[aq]bestvideo[height<=480]+bestaudio/best[height<=480]\[aq]
#\ Download\ best\ video\ only\ format\ but\ no\ bigger\ than\ 50\ MB
.SS How do I update youtube\-dl?
.PP
If you\[aq]ve followed our manual installation
-instructions (https://rg3.github.io/youtube-dl/download.html), you can
-simply run \f[C]youtube\-dl\ \-U\f[] (or, on Linux,
+instructions (https://ytdl-org.github.io/youtube-dl/download.html), you
+can simply run \f[C]youtube\-dl\ \-U\f[] (or, on Linux,
\f[C]sudo\ youtube\-dl\ \-U\f[]).
.PP
If you have used pip, a simple
.fi
.PP
Afterwards, simply follow our manual installation
-instructions (https://rg3.github.io/youtube-dl/download.html):
+instructions (https://ytdl-org.github.io/youtube-dl/download.html):
.IP
.nf
\f[C]
Apparently YouTube requires you to pass a CAPTCHA test if you download
too much.
We\[aq]re considering to provide a way to let you solve the
-CAPTCHA (https://github.com/rg3/youtube-dl/issues/154), but at the
+CAPTCHA (https://github.com/ytdl-org/youtube-dl/issues/154), but at the
moment, your best course of action is pointing a web browser to the
youtube URL, solving the CAPTCHA, and restart youtube\-dl.
.SS Do I need any other programs?
Please update to Python 2.6 or 2.7.
.SS What is this binary file? Where has the code gone?
.PP
-Since June 2012 (#342 (https://github.com/rg3/youtube-dl/issues/342))
-youtube\-dl is packed as an executable zipfile, simply unzip it (might
-need renaming to \f[C]youtube\-dl.zip\f[] first on some systems) or
-clone the git repository, as laid out above.
+Since June 2012
+(#342 (https://github.com/ytdl-org/youtube-dl/issues/342)) youtube\-dl
+is packed as an executable zipfile, simply unzip it (might need renaming
+to \f[C]youtube\-dl.zip\f[] first on some systems) or clone the git
+repository, as laid out above.
If you modify the code, you can run it by executing the
\f[C]__main__.py\f[] file.
To recompile the executable, run \f[C]make\ youtube\-dl\f[].
.PP
In particular, the generic extractor (used when your website is not in
the list of supported sites by
-youtube\-dl (https://rg3.github.io/youtube-dl/supportedsites.html)
+youtube\-dl (https://ytdl-org.github.io/youtube-dl/supportedsites.html)
cannot mandate one specific downloader.
.PP
If you put either \f[C]\-\-hls\-prefer\-native\f[] or
.SH DEVELOPER INSTRUCTIONS
.PP
Most users do not need to build youtube\-dl and can download the
-builds (https://rg3.github.io/youtube-dl/download.html) or get them from
-their distribution.
+builds (https://ytdl-org.github.io/youtube-dl/download.html) or get them
+from their distribution.
.PP
To run youtube\-dl as a developer, you don\[aq]t need to build anything
either.
you can follow this quick list (assuming your service is called
\f[C]yourextractor\f[]):
.IP " 1." 4
-Fork this repository (https://github.com/rg3/youtube-dl/fork)
+Fork this repository (https://github.com/ytdl-org/youtube-dl/fork)
.IP " 2." 4
Check out the source code with:
.RS 4
.RE
.IP " 5." 4
Add an import in
-\f[C]youtube_dl/extractor/extractors.py\f[] (https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/extractors.py).
+\f[C]youtube_dl/extractor/extractors.py\f[] (https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/extractors.py).
.IP " 6." 4
Run
\f[C]python\ test/test_download.py\ TestDownload.test_YourExtractor\f[].
not counted in.
.IP " 7." 4
Have a look at
-\f[C]youtube_dl/extractor/common.py\f[] (https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py)
+\f[C]youtube_dl/extractor/common.py\f[] (https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/common.py)
for possible helper methods and a detailed description of what your
extractor should and may
-return (https://github.com/rg3/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303).
+return (https://github.com/ytdl-org/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303).
Add tests and code for as many as you want.
.IP " 8." 4
Make sure your code follows youtube\-dl coding conventions and check the
.PP
For extraction to work youtube\-dl relies on metadata your extractor
extracts and provides to youtube\-dl expressed by an information
-dictionary (https://github.com/rg3/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303)
+dictionary (https://github.com/ytdl-org/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303)
or simply \f[I]info dict\f[].
Only the following meta fields in the \f[I]info dict\f[] are considered
mandatory for a successful extraction process by youtube\-dl:
extracted then the extractor is considered completely broken.
.PP
Any
-field (https://github.com/rg3/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L188-L303)
+field (https://github.com/ytdl-org/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L188-L303)
apart from the aforementioned ones are considered \f[B]optional\f[].
That means that extraction should be \f[B]tolerant\f[] to situations
when sources for these fields can potentially be unavailable (even if
\[aq]PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4\[aq]
\f[]
.fi
-.SS Use safe conversion functions
+.SS Use convenience conversion and parsing functions
.PP
Wrap all extracted numeric data into safe functions from
-\f[C]youtube_dl/utils.py\f[] (https://github.com/rg3/youtube-dl/blob/master/youtube_dl/utils.py):
+\f[C]youtube_dl/utils.py\f[] (https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/utils.py):
\f[C]int_or_none\f[], \f[C]float_or_none\f[].
Use them for string to number conversions as well.
.PP
.PP
Use \f[C]try_get\f[] for safe metadata extraction from parsed JSON.
.PP
+Use \f[C]unified_strdate\f[] for uniform \f[C]upload_date\f[] or any
+\f[C]YYYYMMDD\f[] meta field extraction, \f[C]unified_timestamp\f[] for
+uniform \f[C]timestamp\f[] extraction, \f[C]parse_filesize\f[] for
+\f[C]filesize\f[] extraction, \f[C]parse_count\f[] for count meta fields
+extraction, \f[C]parse_resolution\f[], \f[C]parse_duration\f[] for
+\f[C]duration\f[] extraction, \f[C]parse_age_limit\f[] for
+\f[C]age_limit\f[] extraction.
+.PP
Explore
-\f[C]youtube_dl/utils.py\f[] (https://github.com/rg3/youtube-dl/blob/master/youtube_dl/utils.py)
+\f[C]youtube_dl/utils.py\f[] (https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/utils.py)
for more useful convenience functions.
.SS More examples
.SS Safely extract optional description from parsed JSON
youtube\-dl makes the best effort to be a good command\-line program,
and thus should be callable from any programming language.
If you encounter any problems parsing its output, feel free to create a
-report (https://github.com/rg3/youtube-dl/issues/new).
+report (https://github.com/ytdl-org/youtube-dl/issues/new).
.PP
From a Python program, you can embed youtube\-dl in a more powerful
fashion, like this:
.PP
Most likely, you\[aq]ll want to use various options.
For a list of options available, have a look at
-\f[C]youtube_dl/YoutubeDL.py\f[] (https://github.com/rg3/youtube-dl/blob/3e4cedf9e8cd3157df2457df7274d0c842421945/youtube_dl/YoutubeDL.py#L137-L312).
+\f[C]youtube_dl/YoutubeDL.py\f[] (https://github.com/ytdl-org/youtube-dl/blob/3e4cedf9e8cd3157df2457df7274d0c842421945/youtube_dl/YoutubeDL.py#L137-L312).
For a start, if you want to intercept youtube\-dl\[aq]s output, set a
\f[C]logger\f[] object.
.PP
.SH BUGS
.PP
Bugs and suggestions should be reported at:
-<https://github.com/rg3/youtube-dl/issues>.
+<https://github.com/ytdl-org/youtube-dl/issues>.
Unless you were prompted to or there is another pertinent reason (e.g.
GitHub fails to accept the bug report), please do not send bug reports
via personal email.
Make sure that someone has not already opened the issue you\[aq]re
trying to open.
Search at the top of the window or browse the GitHub
-Issues (https://github.com/rg3/youtube-dl/search?type=Issues) of this
-repository.
+Issues (https://github.com/ytdl-org/youtube-dl/search?type=Issues) of
+this repository.
If there is an issue, feel free to write something along the lines of
"This affects me as well, with version 2015.01.01.
Here is some more information on the issue: ...".
.PP
Before requesting a new feature, please have a quick peek at the list of
supported
-options (https://github.com/rg3/youtube-dl/blob/master/README.md#options).
+options (https://github.com/ytdl-org/youtube-dl/blob/master/README.md#options).
Many feature requests are for features that actually exist already!
Please, absolutely do show off your work in the issue report and detail
how the existing similar options do \f[I]not\f[] solve your problem.
sanitize_url,
sanitized_Request,
std_headers,
+ str_or_none,
subtitles_filename,
UnavailableVideoError,
url_basename,
The following options are used by the post processors:
prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available,
otherwise prefer ffmpeg.
+ ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
+ to the binary or its containing directory.
postprocessor_args: A list of additional command-line arguments for the
postprocessor.
else:
raise
- if (sys.platform != 'win32' and
- sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and
- not params.get('restrictfilenames', False)):
+ if (sys.platform != 'win32'
+ and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
+ and not params.get('restrictfilenames', False)):
# Unicode filesystem API will throw errors (#1474, #13027)
self.report_warning(
'Assuming --restrict-filenames since file system encoding '
if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
if idxs:
correct_argv = (
- ['youtube-dl'] +
- [a for i, a in enumerate(argv) if i not in idxs] +
- ['--'] + [argv[i] for i in idxs]
+ ['youtube-dl']
+ + [a for i, a in enumerate(argv) if i not in idxs]
+ + ['--'] + [argv[i] for i in idxs]
)
self.report_warning(
'Long argument string detected. '
if result_type in ('url', 'url_transparent'):
ie_result['url'] = sanitize_url(ie_result['url'])
extract_flat = self.params.get('extract_flat', False)
- if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
- extract_flat is True):
+ if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
+ or extract_flat is True):
if self.params.get('forcejson', False):
self.to_stdout(json.dumps(ie_result))
return ie_result
# url_transparent. In such cases outer metadata (from ie_result)
# should be propagated to inner one (info). For this to happen
# _type of info should be overridden with url_transparent. This
- # fixes issue from https://github.com/rg3/youtube-dl/pull/11163.
+ # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
if new_result.get('_type') == 'url':
new_result['_type'] = 'url_transparent'
if not m:
STR_OPERATORS = {
'=': operator.eq,
- '!=': operator.ne,
'^=': lambda attr, value: attr.startswith(value),
'$=': lambda attr, value: attr.endswith(value),
'*=': lambda attr, value: value in attr,
}
str_operator_rex = re.compile(r'''(?x)
\s*(?P<key>ext|acodec|vcodec|container|protocol|format_id)
- \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
+ \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?
\s*(?P<value>[a-zA-Z0-9._-]+)
\s*$
''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
m = str_operator_rex.search(filter_spec)
if m:
comparison_value = m.group('value')
- op = STR_OPERATORS[m.group('op')]
+ str_op = STR_OPERATORS[m.group('op')]
+ if m.group('negation'):
+ op = lambda attr, value: not str_op(attr, value)
+ else:
+ op = str_op
if not m:
raise ValueError('Invalid filter specification %r' % filter_spec)
# by extractor are incomplete or not (i.e. whether extractor provides only
# video-only or audio-only formats) for proper formats selection for
# extractors with such incomplete formats (see
- # https://github.com/rg3/youtube-dl/pull/5556).
+ # https://github.com/ytdl-org/youtube-dl/pull/5556).
# Since formats may be filtered during format selection and may not match
# the original formats the results may be incorrect. Thus original formats
# or pre-calculated metrics should be passed to format selection routines
# We will pass a context object containing all necessary additional data
# instead of just formats.
# This fixes incorrect format selection issue (see
- # https://github.com/rg3/youtube-dl/issues/10083).
+ # https://github.com/ytdl-org/youtube-dl/issues/10083).
incomplete_formats = (
# All formats are video-only or
- all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats) or
+ all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
# all formats are audio-only
- all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
+ or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
ctx = {
'formats': formats,
if sub_info.get('data') is not None:
try:
# Use newline='' to prevent conversion of newline characters
- # See https://github.com/rg3/youtube-dl/issues/10268
+ # See https://github.com/ytdl-org/youtube-dl/issues/10268
with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
subfile.write(sub_info['data'])
except (OSError, IOError):
else:
assert fixup_policy in ('ignore', 'never')
- if (info_dict.get('requested_formats') is None and
- info_dict.get('container') == 'm4a_dash'):
+ if (info_dict.get('requested_formats') is None
+ and info_dict.get('container') == 'm4a_dash'):
if fixup_policy == 'warn':
self.report_warning(
'%s: writing DASH m4a. '
else:
assert fixup_policy in ('ignore', 'never')
- if (info_dict.get('protocol') == 'm3u8_native' or
- info_dict.get('protocol') == 'm3u8' and
- self.params.get('hls_prefer_native')):
+ if (info_dict.get('protocol') == 'm3u8_native'
+ or info_dict.get('protocol') == 'm3u8'
+ and self.params.get('hls_prefer_native')):
if fixup_policy == 'warn':
self.report_warning('%s: malformed AAC bitstream detected.' % (
info_dict['id']))
def download(self, url_list):
"""Download a given list of URLs."""
outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
- if (len(url_list) > 1 and
- outtmpl != '-' and
- '%' not in outtmpl and
- self.params.get('max_downloads') != 1):
+ if (len(url_list) > 1
+ and outtmpl != '-'
+ and '%' not in outtmpl
+ and self.params.get('max_downloads') != 1):
raise SameFileError(outtmpl)
for url in url_list:
self.report_warning('Unable to remove downloaded original file')
def _make_archive_id(self, info_dict):
+ video_id = info_dict.get('id')
+ if not video_id:
+ return
# Future-proof against any change in case
# and backwards compatibility with prior versions
- extractor = info_dict.get('extractor_key')
+ extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist
if extractor is None:
- if 'id' in info_dict:
- extractor = info_dict.get('ie_key') # key in a playlist
- if extractor is None:
- return None # Incomplete video information
- return extractor.lower() + ' ' + info_dict['id']
+ url = str_or_none(info_dict.get('url'))
+ if not url:
+ return
+ # Try to find matching extractor for the URL and take its ie_key
+ for ie in self._ies:
+ if ie.suitable(url):
+ extractor = ie.ie_key()
+ break
+ else:
+ return
+ return extractor.lower() + ' ' + video_id
def in_download_archive(self, info_dict):
fn = self.params.get('download_archive')
return False
vid_id = self._make_archive_id(info_dict)
- if vid_id is None:
+ if not vid_id:
return False # Incomplete video information
try:
if res:
res += ', '
res += '%s container' % fdict['container']
- if (fdict.get('vcodec') is not None and
- fdict.get('vcodec') != 'none'):
+ if (fdict.get('vcodec') is not None
+ and fdict.get('vcodec') != 'none'):
if res:
res += ', '
res += fdict['vcodec']
return
if type('') is not compat_str:
- # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
+ # Python 2.6 on SLES11 SP1 (https://github.com/ytdl-org/youtube-dl/issues/3326)
self.report_warning(
'Your Python is broken! Update to a newer and supported version')
proxies = {'http': opts_proxy, 'https': opts_proxy}
else:
proxies = compat_urllib_request.getproxies()
- # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
+ # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
if 'http' in proxies and 'https' not in proxies:
proxies['https'] = proxies['http']
proxy_handler = PerRequestProxyHandler(proxies)
# When passing our own FileHandler instance, build_opener won't add the
# default FileHandler and allows us to disable the file protocol, which
# can be used for malicious purposes (see
- # https://github.com/rg3/youtube-dl/issues/8227)
+ # https://github.com/ytdl-org/youtube-dl/issues/8227)
file_handler = compat_urllib_request.FileHandler()
def file_open(*args, **kwargs):
# Delete the default user-agent header, which would otherwise apply in
# cases where our custom HTTP handler doesn't come into play
- # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
+ # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
opener.addheaders = []
self._opener = opener
def _real_main(argv=None):
# Compatibility fixes for Windows
if sys.platform == 'win32':
- # https://github.com/rg3/youtube-dl/issues/820
+ # https://github.com/ytdl-org/youtube-dl/issues/820
codecs.register(lambda name: codecs.lookup('utf-8') if name == 'cp65001' else None)
workaround_optparse_bug9161()
if opts.max_sleep_interval is not None:
if opts.max_sleep_interval < 0:
parser.error('max sleep interval must be positive or 0')
+ if opts.sleep_interval is None:
+ parser.error('min sleep interval must be specified, use --min-sleep-interval')
if opts.max_sleep_interval < opts.sleep_interval:
parser.error('max sleep interval must be greater than or equal to min sleep interval')
else:
if opts.allsubtitles and not opts.writeautomaticsub:
opts.writesubtitles = True
- outtmpl = ((opts.outtmpl is not None and opts.outtmpl) or
- (opts.format == '-1' and opts.usetitle and '%(title)s-%(id)s-%(format)s.%(ext)s') or
- (opts.format == '-1' and '%(id)s-%(format)s.%(ext)s') or
- (opts.usetitle and opts.autonumber and '%(autonumber)s-%(title)s-%(id)s.%(ext)s') or
- (opts.usetitle and '%(title)s-%(id)s.%(ext)s') or
- (opts.useid and '%(id)s.%(ext)s') or
- (opts.autonumber and '%(autonumber)s-%(id)s.%(ext)s') or
- DEFAULT_OUTTMPL)
+ outtmpl = ((opts.outtmpl is not None and opts.outtmpl)
+ or (opts.format == '-1' and opts.usetitle and '%(title)s-%(id)s-%(format)s.%(ext)s')
+ or (opts.format == '-1' and '%(id)s-%(format)s.%(ext)s')
+ or (opts.usetitle and opts.autonumber and '%(autonumber)s-%(title)s-%(id)s.%(ext)s')
+ or (opts.usetitle and '%(title)s-%(id)s.%(ext)s')
+ or (opts.useid and '%(id)s.%(ext)s')
+ or (opts.autonumber and '%(autonumber)s-%(id)s.%(ext)s')
+ or DEFAULT_OUTTMPL)
if not os.path.splitext(outtmpl)[1] and opts.extractaudio:
parser.error('Cannot download a video and extract audio into the same'
' file! Use "{0}.%(ext)s" instead of "{0}" as the output'
# HACK: The following are the correct unquote_to_bytes, unquote and unquote_plus
# implementations from cpython 3.4.3's stdlib. Python 2's version
- # is apparently broken (see https://github.com/rg3/youtube-dl/pull/6244)
+ # is apparently broken (see https://github.com/ytdl-org/youtube-dl/pull/6244)
def compat_urllib_parse_unquote_to_bytes(string):
"""unquote_to_bytes('abc%20def') -> b'abc def'."""
pass
+try:
+ # xml.etree.ElementTree.Element is a method in Python <=2.6 and
+ # the following will crash with:
+ # TypeError: isinstance() arg 2 must be a class, type, or tuple of classes and types
+ isinstance(None, xml.etree.ElementTree.Element)
+ from xml.etree.ElementTree import Element as compat_etree_Element
+except TypeError: # Python <=2.6
+ from xml.etree.ElementTree import _ElementInterface as compat_etree_Element
+
if sys.version_info[0] >= 3:
def compat_etree_fromstring(text):
return etree.XML(text, parser=etree.XMLParser(target=_TreeBuilder()))
try:
args = shlex.split('中文')
- assert (isinstance(args, list) and
- isinstance(args[0], compat_str) and
- args[0] == '中文')
+ assert (isinstance(args, list)
+ and isinstance(args[0], compat_str)
+ and args[0] == '中文')
compat_shlex_split = shlex.split
except (AssertionError, UnicodeEncodeError):
# Working around shlex issue with unicode strings on some python 2
compat_socket_create_connection = socket.create_connection
-# Fix https://github.com/rg3/youtube-dl/issues/4223
+# Fix https://github.com/ytdl-org/youtube-dl/issues/4223
# See http://bugs.python.org/issue9161 for what is broken
def workaround_optparse_bug9161():
op = optparse.OptionParser()
# PyPy2 prior to version 5.4.0 expects byte strings as Windows function
# names, see the original PyPy issue [1] and the youtube-dl one [2].
# 1. https://bitbucket.org/pypy/pypy/issues/2360/windows-ctypescdll-typeerror-function-name
- # 2. https://github.com/rg3/youtube-dl/pull/4392
+ # 2. https://github.com/ytdl-org/youtube-dl/pull/4392
def compat_ctypes_WINFUNCTYPE(*args, **kwargs):
real = ctypes.WINFUNCTYPE(*args, **kwargs)
'compat_cookiejar',
'compat_cookies',
'compat_ctypes_WINFUNCTYPE',
+ 'compat_etree_Element',
'compat_etree_fromstring',
'compat_etree_register_namespace',
'compat_expanduser',
return
speed = float(byte_counter) / elapsed
if speed > rate_limit:
- time.sleep(max((byte_counter // rate_limit) - elapsed, 0))
+ sleep_time = float(byte_counter) / rate_limit - elapsed
+ if sleep_time > 0:
+ time.sleep(sleep_time)
def temp_name(self, filename):
"""Returns a temporary filename for the given filename."""
"""
nooverwrites_and_exists = (
- self.params.get('nooverwrites', False) and
- os.path.exists(encodeFilename(filename))
+ self.params.get('nooverwrites', False)
+ and os.path.exists(encodeFilename(filename))
)
if not hasattr(filename, 'write'):
continuedl_and_exists = (
- self.params.get('continuedl', True) and
- os.path.isfile(encodeFilename(filename)) and
- not self.params.get('nopart', False)
+ self.params.get('continuedl', True)
+ and os.path.isfile(encodeFilename(filename))
+ and not self.params.get('nopart', False)
)
# Check file already present
cmd += self._valueless_option('--silent', 'noprogress')
cmd += self._valueless_option('--verbose', 'verbose')
cmd += self._option('--limit-rate', 'ratelimit')
- cmd += self._option('--retry', 'retries')
+ retry = self._option('--retry', 'retries')
+ if len(retry) == 2:
+ if retry[1] in ('inf', 'infinite'):
+ retry[1] = '2147483647'
+ cmd += retry
cmd += self._option('--max-filesize', 'max_filesize')
cmd += self._option('--interface', 'source_address')
cmd += self._option('--proxy', 'proxy')
cmd = [self.exe, '-O', tmpfilename, '-nv', '--no-cookies']
for key, val in info_dict['http_headers'].items():
cmd += ['--header', '%s: %s' % (key, val)]
+ cmd += self._option('--limit-rate', 'ratelimit')
+ retry = self._option('--tries', 'retries')
+ if len(retry) == 2:
+ if retry[1] in ('inf', 'infinite'):
+ retry[1] = '0'
+ cmd += retry
cmd += self._option('--bind-address', 'source_address')
cmd += self._option('--proxy', 'proxy')
cmd += self._valueless_option('--no-check-certificate', 'nocheckcertificate')
# setting -seekable prevents ffmpeg from guessing if the server
# supports seeking(by adding the header `Range: bytes=0-`), which
# can cause problems in some cases
- # https://github.com/rg3/youtube-dl/issues/11800#issuecomment-275037127
+ # https://github.com/ytdl-org/youtube-dl/issues/11800#issuecomment-275037127
# http://trac.ffmpeg.org/ticket/6125#comment:10
args += ['-seekable', '1' if seekable else '0']
tc_url = info_dict.get('tc_url')
flash_version = info_dict.get('flash_version')
live = info_dict.get('rtmp_live', False)
+ conn = info_dict.get('rtmp_conn')
if player_url is not None:
args += ['-rtmp_swfverify', player_url]
if page_url is not None:
args += ['-rtmp_flashver', flash_version]
if live:
args += ['-rtmp_live', 'live']
+ if isinstance(conn, list):
+ for entry in conn:
+ args += ['-rtmp_conn', entry]
+ elif isinstance(conn, compat_str):
+ args += ['-rtmp_conn', conn]
args += ['-i', url, '-c', 'copy']
# mp4 file couldn't be played, but if we ask ffmpeg to quit it
# produces a file that is playable (this is mostly useful for live
# streams). Note that Windows is not affected and produces playable
- # files (see https://github.com/rg3/youtube-dl/issues/8300).
+ # files (see https://github.com/ytdl-org/youtube-dl/issues/8300).
if sys.platform != 'win32':
proc.communicate(b'q')
raise
def remove_encrypted_media(media):
- return list(filter(lambda e: 'drmAdditionalHeaderId' not in e.attrib and
- 'drmAdditionalHeaderSetId' not in e.attrib,
+ return list(filter(lambda e: 'drmAdditionalHeaderId' not in e.attrib
+ and 'drmAdditionalHeaderSetId' not in e.attrib,
media))
media = doc.findall(_add_ns('media'))
if not media:
self.report_error('No media found')
- for e in (doc.findall(_add_ns('drmAdditionalHeader')) +
- doc.findall(_add_ns('drmAdditionalHeaderSet'))):
+ for e in (doc.findall(_add_ns('drmAdditionalHeader'))
+ + doc.findall(_add_ns('drmAdditionalHeaderSet'))):
# If id attribute is missing it's valid for all media nodes
# without drmAdditionalHeaderId or drmAdditionalHeaderSetId attribute
if 'id' not in e.attrib:
urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url))
man_url = urlh.geturl()
# Some manifests may be malformed, e.g. prosiebensat1 generated manifests
- # (see https://github.com/rg3/youtube-dl/issues/6215#issuecomment-121704244
- # and https://github.com/rg3/youtube-dl/issues/7823)
+ # (see https://github.com/ytdl-org/youtube-dl/issues/6215#issuecomment-121704244
+ # and https://github.com/ytdl-org/youtube-dl/issues/7823)
manifest = fix_xml_ampersands(urlh.read().decode('utf-8', 'ignore')).strip()
doc = compat_etree_fromstring(manifest)
# In tests, segments may be truncated, and thus
# FlvReader may not be able to parse the whole
# chunk. If so, write the segment as is
- # See https://github.com/rg3/youtube-dl/issues/9214
+ # See https://github.com/ytdl-org/youtube-dl/issues/9214
dest_stream.write(down_data)
break
raise
frag_total_bytes = s.get('total_bytes') or 0
if not ctx['live']:
estimated_size = (
- (ctx['complete_frags_downloaded_bytes'] + frag_total_bytes) /
- (state['fragment_index'] + 1) * total_frags)
+ (ctx['complete_frags_downloaded_bytes'] + frag_total_bytes)
+ / (state['fragment_index'] + 1) * total_frags)
state['total_bytes_estimate'] = estimated_size
if s['status'] == 'finished':
return fd.real_download(filename, info_dict)
def is_ad_fragment_start(s):
- return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=ad' in s or
- s.startswith('#UPLYNK-SEGMENT') and s.endswith(',ad'))
+ return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=ad' in s
+ or s.startswith('#UPLYNK-SEGMENT') and s.endswith(',ad'))
def is_ad_fragment_end(s):
- return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=master' in s or
- s.startswith('#UPLYNK-SEGMENT') and s.endswith(',segment'))
+ return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=master' in s
+ or s.startswith('#UPLYNK-SEGMENT') and s.endswith(',segment'))
media_frags = 0
ad_frags = 0
except compat_urllib_error.HTTPError as err:
# Unavailable (possibly temporary) fragments may be served.
# First we try to retry then either skip or abort.
- # See https://github.com/rg3/youtube-dl/issues/10165,
- # https://github.com/rg3/youtube-dl/issues/10448).
+ # See https://github.com/ytdl-org/youtube-dl/issues/10165,
+ # https://github.com/ytdl-org/youtube-dl/issues/10448).
count += 1
if count <= fragment_retries:
self.report_retry_fragment(err, frag_index, count, fragment_retries)
is_test = self.params.get('test', False)
chunk_size = self._TEST_FILE_SIZE if is_test else (
- info_dict.get('downloader_options', {}).get('http_chunk_size') or
- self.params.get('http_chunk_size') or 0)
+ info_dict.get('downloader_options', {}).get('http_chunk_size')
+ or self.params.get('http_chunk_size') or 0)
ctx.open_mode = 'wb'
ctx.resume_len = 0
# to match the value of requested Range HTTP header. This is due to a webservers
# that don't support resuming and serve a whole file with no Content-Range
# set in response despite of requested Range (see
- # https://github.com/rg3/youtube-dl/issues/6057#issuecomment-126129799)
+ # https://github.com/ytdl-org/youtube-dl/issues/6057#issuecomment-126129799)
if has_range:
content_range = ctx.data.headers.get('Content-Range')
if content_range:
content_len = int_or_none(content_range_m.group(3))
accept_content_len = (
# Non-chunked download
- not ctx.chunk_size or
+ not ctx.chunk_size
# Chunked download and requested piece or
# its part is promised to be served
- content_range_end == range_end or
- content_len < range_end)
+ or content_range_end == range_end
+ or content_len < range_end)
if accept_content_len:
ctx.data_len = content_len
return
raise
else:
# Examine the reported length
- if (content_length is not None and
- (ctx.resume_len - 100 < int(content_length) < ctx.resume_len + 100)):
+ if (content_length is not None
+ and (ctx.resume_len - 100 < int(content_length) < ctx.resume_len + 100)):
# The file had already been fully downloaded.
# Explanation to the above condition: in issue #175 it was revealed that
# YouTube sometimes adds or removes a few bytes from the end of the file,
parsed_url = compat_urllib_parse_urlparse(url)
av_val = av_res + len(parsed_url.netloc)
confirm_url = (
- parsed_url.scheme + '://' + parsed_url.netloc +
- action + '?' +
- compat_urllib_parse_urlencode({
+ parsed_url.scheme + '://' + parsed_url.netloc
+ + action + '?'
+ + compat_urllib_parse_urlencode({
'jschl_vc': vc, 'jschl_answer': compat_str(av_val)}))
self._download_webpage(
confirm_url, video_id,
intlist_to_bytes,
long_to_bytes,
pkcs1pad,
- srt_subtitles_timecode,
strip_or_none,
urljoin,
)
}
_BASE_URL = 'http://animedigitalnetwork.fr'
_RSA_KEY = (0xc35ae1e4356b65a73b551493da94b8cb443491c0aa092a357a5aee57ffc14dda85326f42d716e539a34542a0d3f363adf16c5ec222d713d5997194030ee2e4f0d1fb328c01a81cf6868c090d50de8e169c6b13d1675b9eeed1cbc51e1fffca9b38af07f37abd790924cd3bee59d0257cfda4fe5f3f0534877e21ce5821447d1b, 65537)
+ _POS_ALIGN_MAP = {
+ 'start': 1,
+ 'end': 3,
+ }
+ _LINE_ALIGN_MAP = {
+ 'middle': 8,
+ 'end': 4,
+ }
+
+ @staticmethod
+ def _ass_subtitles_timecode(seconds):
+ return '%01d:%02d:%02d.%02d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 100)
def _get_subtitles(self, sub_path, video_id):
if not sub_path:
enc_subtitles = self._download_webpage(
urljoin(self._BASE_URL, sub_path),
- video_id, fatal=False)
+ video_id, 'Downloading subtitles location', fatal=False) or '{}'
+ subtitle_location = (self._parse_json(enc_subtitles, video_id, fatal=False) or {}).get('location')
+ if subtitle_location:
+ enc_subtitles = self._download_webpage(
+ urljoin(self._BASE_URL, subtitle_location),
+ video_id, 'Downloading subtitles data', fatal=False,
+ headers={'Origin': 'https://animedigitalnetwork.fr'})
if not enc_subtitles:
return None
# http://animedigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js
dec_subtitles = intlist_to_bytes(aes_cbc_decrypt(
bytes_to_intlist(compat_b64decode(enc_subtitles[24:])),
- bytes_to_intlist(binascii.unhexlify(self._K + '9032ad7083106400')),
+ bytes_to_intlist(binascii.unhexlify(self._K + '4b8ef13ec1872730')),
bytes_to_intlist(compat_b64decode(enc_subtitles[:24]))
))
subtitles_json = self._parse_json(
subtitles = {}
for sub_lang, sub in subtitles_json.items():
- srt = ''
- for num, current in enumerate(sub):
- start, end, text = (
+ ssa = '''[Script Info]
+ScriptType:V4.00
+[V4 Styles]
+Format: Name,Fontname,Fontsize,PrimaryColour,SecondaryColour,TertiaryColour,BackColour,Bold,Italic,BorderStyle,Outline,Shadow,Alignment,MarginL,MarginR,MarginV,AlphaLevel,Encoding
+Style: Default,Arial,18,16777215,16777215,16777215,0,-1,0,1,1,0,2,20,20,20,0,0
+[Events]
+Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
+ for current in sub:
+ start, end, text, line_align, position_align = (
float_or_none(current.get('startTime')),
float_or_none(current.get('endTime')),
- current.get('text'))
+ current.get('text'), current.get('lineAlign'),
+ current.get('positionAlign'))
if start is None or end is None or text is None:
continue
- srt += os.linesep.join(
- (
- '%d' % num,
- '%s --> %s' % (
- srt_subtitles_timecode(start),
- srt_subtitles_timecode(end)),
- text,
- os.linesep,
- ))
+ alignment = self._POS_ALIGN_MAP.get(position_align, 2) + self._LINE_ALIGN_MAP.get(line_align, 0)
+ ssa += os.linesep + 'Dialogue: Marked=0,%s,%s,Default,,0,0,0,,%s%s' % (
+ self._ass_subtitles_timecode(start),
+ self._ass_subtitles_timecode(end),
+ '{\\a%d}' % alignment if alignment != 2 else '',
+ text.replace('\n', '\\N').replace('<i>', '{\\i1}').replace('</i>', '{\\i0}'))
if sub_lang == 'vostf':
sub_lang = 'fr'
'ext': 'json',
'data': json.dumps(sub),
}, {
- 'ext': 'srt',
- 'data': srt,
+ 'ext': 'ssa',
+ 'data': ssa,
}])
return subtitles
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
player_config = self._parse_json(self._search_regex(
- r'playerConfig\s*=\s*({.+});', webpage, 'player config'), video_id)
+ r'playerConfig\s*=\s*({.+});', webpage,
+ 'player config', default='{}'), video_id, fatal=False)
+ if not player_config:
+ config_url = urljoin(self._BASE_URL, self._search_regex(
+ r'(?:id="player"|class="[^"]*adn-player-container[^"]*")[^>]+data-url="([^"]+)"',
+ webpage, 'config url'))
+ player_config = self._download_json(
+ config_url, video_id,
+ 'Downloading player config JSON metadata')['player']
video_info = {}
video_info_str = self._search_regex(
encrypted_message = long_to_bytes(pow(bytes_to_long(padded_message), e, n))
authorization = base64.b64encode(encrypted_message).decode()
links_data = self._download_json(
- urljoin(self._BASE_URL, links_url), video_id, headers={
+ urljoin(self._BASE_URL, links_url), video_id,
+ 'Downloading links JSON metadata', headers={
'Authorization': 'Bearer ' + authorization,
})
links = links_data.get('links') or {}
metas = metas or links_data.get('meta') or {}
- sub_path = (sub_path or links_data.get('subtitles')) + '&token=' + token
+ sub_path = sub_path or links_data.get('subtitles') or \
+ 'index.php?option=com_vodapi&task=subtitles.getJSON&format=json&id=' + video_id
+ sub_path += '&token=' + token
error = links_data.get('error')
title = metas.get('title') or video_info['title']
for format_id, qualities in links.items():
if not isinstance(qualities, dict):
continue
- for load_balancer_url in qualities.values():
+ for quality, load_balancer_url in qualities.items():
load_balancer_data = self._download_json(
- load_balancer_url, video_id, fatal=False) or {}
+ load_balancer_url, video_id,
+ 'Downloading %s %s JSON metadata' % (format_id, quality),
+ fatal=False) or {}
m3u8_url = load_balancer_data.get('location')
if not m3u8_url:
continue
--- /dev/null
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_parse_qs,
+ compat_urlparse,
+)
+
+
+class AdobeConnectIE(InfoExtractor):
+ _VALID_URL = r'https?://\w+\.adobeconnect\.com/(?P<id>[\w-]+)'
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ title = self._html_search_regex(r'<title>(.+?)</title>', webpage, 'title')
+ qs = compat_parse_qs(self._search_regex(r"swfUrl\s*=\s*'([^']+)'", webpage, 'swf url').split('?')[1])
+ is_live = qs.get('isLive', ['false'])[0] == 'true'
+ formats = []
+ for con_string in qs['conStrings'][0].split(','):
+ formats.append({
+ 'format_id': con_string.split('://')[0],
+ 'app': compat_urlparse.quote('?' + con_string.split('?')[1] + 'flvplayerapp/' + qs['appInstance'][0]),
+ 'ext': 'flv',
+ 'play_path': 'mp4:' + qs['streamName'][0],
+ 'rtmp_conn': 'S:' + qs['ticket'][0],
+ 'rtmp_live': is_live,
+ 'url': con_string,
+ })
+
+ return {
+ 'id': video_id,
+ 'title': self._live_title(title) if is_live else title,
+ 'formats': formats,
+ 'is_live': is_live,
+ }
# coding: utf-8
from __future__ import unicode_literals
+import json
import re
from .turner import TurnerBaseIE
from ..utils import (
+ determine_ext,
+ float_or_none,
int_or_none,
+ mimetype2ext,
+ parse_age_limit,
+ parse_iso8601,
strip_or_none,
- url_or_none,
+ try_get,
)
'ext': 'mp4',
'title': 'Rick and Morty - Pilot',
'description': 'Rick moves in with his daughter\'s family and establishes himself as a bad influence on his grandson, Morty.',
- 'timestamp': 1493267400,
- 'upload_date': '20170427',
+ 'timestamp': 1543294800,
+ 'upload_date': '20181127',
},
'params': {
# m3u8 download
# m3u8 download
'skip_download': True,
},
+ 'skip': '404 Not Found',
}, {
'url': 'http://www.adultswim.com/videos/decker/inside-decker-a-new-hero/',
'info_dict': {
}, {
'url': 'http://www.adultswim.com/videos/attack-on-titan',
'info_dict': {
- 'id': 'b7A69dzfRzuaXIECdxW8XQ',
+ 'id': 'attack-on-titan',
'title': 'Attack on Titan',
- 'description': 'md5:6c8e003ea0777b47013e894767f5e114',
+ 'description': 'md5:41caa9416906d90711e31dc00cb7db7e',
},
'playlist_mincount': 12,
}, {
# m3u8 download
'skip_download': True,
},
+ 'skip': '404 Not Found',
}]
def _real_extract(self, url):
show_path, episode_path = re.match(self._VALID_URL, url).groups()
display_id = episode_path or show_path
- webpage = self._download_webpage(url, display_id)
- initial_data = self._parse_json(self._search_regex(
- r'AS_INITIAL_DATA(?:__)?\s*=\s*({.+?});',
- webpage, 'initial data'), display_id)
-
- is_stream = show_path == 'streams'
- if is_stream:
- if not episode_path:
- episode_path = 'live-stream'
+ query = '''query {
+ getShowBySlug(slug:"%s") {
+ %%s
+ }
+}''' % show_path
+ if episode_path:
+ query = query % '''title
+ getVideoBySlug(slug:"%s") {
+ _id
+ auth
+ description
+ duration
+ episodeNumber
+ launchDate
+ mediaID
+ seasonNumber
+ poster
+ title
+ tvRating
+ }''' % episode_path
+ ['getVideoBySlug']
+ else:
+ query = query % '''metaDescription
+ title
+ videos(first:1000,sort:["episode_number"]) {
+ edges {
+ node {
+ _id
+ slug
+ }
+ }
+ }'''
+ show_data = self._download_json(
+ 'https://www.adultswim.com/api/search', display_id,
+ data=json.dumps({'query': query}).encode(),
+ headers={'Content-Type': 'application/json'})['data']['getShowBySlug']
+ if episode_path:
+ video_data = show_data['getVideoBySlug']
+ video_id = video_data['_id']
+ episode_title = title = video_data['title']
+ series = show_data.get('title')
+ if series:
+ title = '%s - %s' % (series, title)
+ info = {
+ 'id': video_id,
+ 'title': title,
+ 'description': strip_or_none(video_data.get('description')),
+ 'duration': float_or_none(video_data.get('duration')),
+ 'formats': [],
+ 'subtitles': {},
+ 'age_limit': parse_age_limit(video_data.get('tvRating')),
+ 'thumbnail': video_data.get('poster'),
+ 'timestamp': parse_iso8601(video_data.get('launchDate')),
+ 'series': series,
+ 'season_number': int_or_none(video_data.get('seasonNumber')),
+ 'episode': episode_title,
+ 'episode_number': int_or_none(video_data.get('episodeNumber')),
+ }
- video_data = next(stream for stream_path, stream in initial_data['streams'].items() if stream_path == episode_path)
- video_id = video_data.get('stream')
+ auth = video_data.get('auth')
+ media_id = video_data.get('mediaID')
+ if media_id:
+ info.update(self._extract_ngtv_info(media_id, {
+ # CDN_TOKEN_APP_ID from:
+ # https://d2gg02c3xr550i.cloudfront.net/assets/asvp.e9c8bef24322d060ef87.bundle.js
+ 'appId': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhcHBJZCI6ImFzLXR2ZS1kZXNrdG9wLXB0enQ2bSIsInByb2R1Y3QiOiJ0dmUiLCJuZXR3b3JrIjoiYXMiLCJwbGF0Zm9ybSI6ImRlc2t0b3AiLCJpYXQiOjE1MzI3MDIyNzl9.BzSCk-WYOZ2GMCIaeVb8zWnzhlgnXuJTCu0jGp_VaZE',
+ }, {
+ 'url': url,
+ 'site_name': 'AdultSwim',
+ 'auth_required': auth,
+ }))
- if not video_id:
- entries = []
- for episode in video_data.get('archiveEpisodes', []):
- episode_url = url_or_none(episode.get('url'))
- if not episode_url:
+ if not auth:
+ extract_data = self._download_json(
+ 'https://www.adultswim.com/api/shows/v1/videos/' + video_id,
+ video_id, query={'fields': 'stream'}, fatal=False) or {}
+ assets = try_get(extract_data, lambda x: x['data']['video']['stream']['assets'], list) or []
+ for asset in assets:
+ asset_url = asset.get('url')
+ if not asset_url:
continue
- entries.append(self.url_result(
- episode_url, 'AdultSwim', episode.get('id')))
- return self.playlist_result(
- entries, video_data.get('id'), video_data.get('title'),
- strip_or_none(video_data.get('description')))
- else:
- show_data = initial_data['show']
-
- if not episode_path:
- entries = []
- for video in show_data.get('videos', []):
- slug = video.get('slug')
- if not slug:
+ ext = determine_ext(asset_url, mimetype2ext(asset.get('mime_type')))
+ if ext == 'm3u8':
+ info['formats'].extend(self._extract_m3u8_formats(
+ asset_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
+ elif ext == 'f4m':
continue
- entries.append(self.url_result(
- 'http://adultswim.com/videos/%s/%s' % (show_path, slug),
- 'AdultSwim', video.get('id')))
- return self.playlist_result(
- entries, show_data.get('id'), show_data.get('title'),
- strip_or_none(show_data.get('metadata', {}).get('description')))
-
- video_data = show_data['sluggedVideo']
- video_id = video_data['id']
+ # info['formats'].extend(self._extract_f4m_formats(
+ # asset_url, video_id, f4m_id='hds', fatal=False))
+ elif ext in ('scc', 'ttml', 'vtt'):
+ info['subtitles'].setdefault('en', []).append({
+ 'url': asset_url,
+ })
+ self._sort_formats(info['formats'])
- info = self._extract_cvp_info(
- 'http://www.adultswim.com/videos/api/v0/assets?platform=desktop&id=' + video_id,
- video_id, {
- 'secure': {
- 'media_src': 'http://androidhls-secure.cdn.turner.com/adultswim/big',
- 'tokenizer_src': 'http://www.adultswim.com/astv/mvpd/processors/services/token_ipadAdobe.do',
- },
- }, {
- 'url': url,
- 'site_name': 'AdultSwim',
- 'auth_required': video_data.get('auth'),
- })
-
- info.update({
- 'id': video_id,
- 'display_id': display_id,
- 'description': info.get('description') or strip_or_none(video_data.get('description')),
- })
- if not is_stream:
- info.update({
- 'duration': info.get('duration') or int_or_none(video_data.get('duration')),
- 'timestamp': info.get('timestamp') or int_or_none(video_data.get('launch_date')),
- 'season_number': info.get('season_number') or int_or_none(video_data.get('season_number')),
- 'episode': info['title'],
- 'episode_number': info.get('episode_number') or int_or_none(video_data.get('episode_number')),
- })
-
- info['series'] = video_data.get('collection_title') or info.get('series')
- if info['series'] and info['series'] != info['title']:
- info['title'] = '%s - %s' % (info['series'], info['title'])
-
- return info
+ return info
+ else:
+ entries = []
+ for edge in show_data.get('videos', {}).get('edges', []):
+ video = edge.get('node') or {}
+ slug = video.get('slug')
+ if not slug:
+ continue
+ entries.append(self.url_result(
+ 'http://adultswim.com/videos/%s/%s' % (show_path, slug),
+ 'AdultSwim', video.get('_id')))
+ return self.playlist_result(
+ entries, show_path, show_data.get('title'),
+ strip_or_none(show_data.get('metaDescription')))
+# coding: utf-8
from __future__ import unicode_literals
import re
from .theplatform import ThePlatformIE
from ..utils import (
+ extract_attributes,
+ ExtractorError,
+ int_or_none,
smuggle_url,
update_url_query,
- unescapeHTML,
- extract_attributes,
- get_element_by_attribute,
)
from ..compat import (
compat_urlparse,
_THEPLATFORM_KEY = 'crazyjava'
_THEPLATFORM_SECRET = 's3cr3t'
+ def _extract_aen_smil(self, smil_url, video_id, auth=None):
+ query = {'mbr': 'true'}
+ if auth:
+ query['auth'] = auth
+ TP_SMIL_QUERY = [{
+ 'assetTypes': 'high_video_ak',
+ 'switch': 'hls_high_ak'
+ }, {
+ 'assetTypes': 'high_video_s3'
+ }, {
+ 'assetTypes': 'high_video_s3',
+ 'switch': 'hls_ingest_fastly'
+ }]
+ formats = []
+ subtitles = {}
+ last_e = None
+ for q in TP_SMIL_QUERY:
+ q.update(query)
+ m_url = update_url_query(smil_url, q)
+ m_url = self._sign_url(m_url, self._THEPLATFORM_KEY, self._THEPLATFORM_SECRET)
+ try:
+ tp_formats, tp_subtitles = self._extract_theplatform_smil(
+ m_url, video_id, 'Downloading %s SMIL data' % (q.get('switch') or q['assetTypes']))
+ except ExtractorError as e:
+ last_e = e
+ continue
+ formats.extend(tp_formats)
+ subtitles = self._merge_subtitles(subtitles, tp_subtitles)
+ if last_e and not formats:
+ raise last_e
+ self._sort_formats(formats)
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
+
class AENetworksIE(AENetworksBaseIE):
IE_NAME = 'aenetworks'
(?:
shows/(?P<show_path>[^/]+(?:/[^/]+){0,2})|
movies/(?P<movie_display_id>[^/]+)(?:/full-movie)?|
- specials/(?P<special_display_id>[^/]+)/full-special|
+ specials/(?P<special_display_id>[^/]+)/(?:full-special|preview-)|
collections/[^/]+/(?P<collection_display_id>[^/]+)
)
'''
_TESTS = [{
'url': 'http://www.history.com/shows/mountain-men/season-1/episode-1',
- 'md5': 'a97a65f7e823ae10e9244bc5433d5fe6',
'info_dict': {
'id': '22253814',
'ext': 'mp4',
- 'title': 'Winter Is Coming',
+ 'title': 'Winter is Coming',
'description': 'md5:641f424b7a19d8e24f26dea22cf59d74',
'timestamp': 1338306241,
'upload_date': '20120529',
'uploader': 'AENE-NEW',
},
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
'add_ie': ['ThePlatform'],
}, {
'url': 'http://www.history.com/shows/ancient-aliens/season-1',
}, {
'url': 'https://www.historyvault.com/collections/america-the-story-of-us/westward',
'only_matching': True
+ }, {
+ 'url': 'https://www.aetv.com/specials/hunting-jonbenets-killer-the-untold-story/preview-hunting-jonbenets-killer-the-untold-story',
+ 'only_matching': True
}]
_DOMAIN_TO_REQUESTOR_ID = {
'history.com': 'HISTORY',
return self.playlist_result(
entries, self._html_search_meta('aetn:SeasonId', webpage))
- query = {
- 'mbr': 'true',
- 'assetTypes': 'high_video_ak',
- 'switch': 'hls_high_ak',
- }
video_id = self._html_search_meta('aetn:VideoID', webpage)
media_url = self._search_regex(
[r"media_url\s*=\s*'(?P<url>[^']+)'",
theplatform_metadata = self._download_theplatform_metadata(self._search_regex(
r'https?://link\.theplatform\.com/s/([^?]+)', media_url, 'theplatform_path'), video_id)
info = self._parse_theplatform_metadata(theplatform_metadata)
+ auth = None
if theplatform_metadata.get('AETN$isBehindWall'):
requestor_id = self._DOMAIN_TO_REQUESTOR_ID[domain]
resource = self._get_mvpd_resource(
requestor_id, theplatform_metadata['title'],
theplatform_metadata.get('AETN$PPL_pplProgramId') or theplatform_metadata.get('AETN$PPL_pplProgramId_OLD'),
theplatform_metadata['ratings'][0]['rating'])
- query['auth'] = self._extract_mvpd_auth(
+ auth = self._extract_mvpd_auth(
url, video_id, requestor_id, resource)
info.update(self._search_json_ld(webpage, video_id, fatal=False))
- media_url = update_url_query(media_url, query)
- media_url = self._sign_url(media_url, self._THEPLATFORM_KEY, self._THEPLATFORM_SECRET)
- formats, subtitles = self._extract_theplatform_smil(media_url, video_id)
- self._sort_formats(formats)
- info.update({
- 'id': video_id,
- 'formats': formats,
- 'subtitles': subtitles,
- })
+ info.update(self._extract_aen_smil(media_url, video_id, auth))
return info
class HistoryTopicIE(AENetworksBaseIE):
IE_NAME = 'history:topic'
IE_DESC = 'History.com Topic'
- _VALID_URL = r'https?://(?:www\.)?history\.com/topics/(?:[^/]+/)?(?P<topic_id>[^/]+)(?:/[^/]+(?:/(?P<video_display_id>[^/?#]+))?)?'
+ _VALID_URL = r'https?://(?:www\.)?history\.com/topics/[^/]+/(?P<id>[\w+-]+?)-video'
_TESTS = [{
- 'url': 'http://www.history.com/topics/valentines-day/history-of-valentines-day/videos/bet-you-didnt-know-valentines-day?m=528e394da93ae&s=undefined&f=1&free=false',
+ 'url': 'https://www.history.com/topics/valentines-day/history-of-valentines-day-video',
'info_dict': {
'id': '40700995724',
'ext': 'mp4',
- 'title': "Bet You Didn't Know: Valentine's Day",
+ 'title': "History of Valentine’s Day",
'description': 'md5:7b57ea4829b391995b405fa60bd7b5f7',
'timestamp': 1375819729,
'upload_date': '20130806',
- 'uploader': 'AENE-NEW',
},
'params': {
# m3u8 download
'skip_download': True,
},
'add_ie': ['ThePlatform'],
- }, {
- 'url': 'http://www.history.com/topics/world-war-i/world-war-i-history/videos',
- 'info_dict':
- {
- 'id': 'world-war-i-history',
- 'title': 'World War I History',
- },
- 'playlist_mincount': 23,
- }, {
- 'url': 'http://www.history.com/topics/world-war-i-history/videos',
- 'only_matching': True,
- }, {
- 'url': 'http://www.history.com/topics/world-war-i/world-war-i-history',
- 'only_matching': True,
- }, {
- 'url': 'http://www.history.com/topics/world-war-i/world-war-i-history/speeches',
- 'only_matching': True,
}]
def theplatform_url_result(self, theplatform_url, video_id, query):
}
def _real_extract(self, url):
- topic_id, video_display_id = re.match(self._VALID_URL, url).groups()
- if video_display_id:
- webpage = self._download_webpage(url, video_display_id)
- release_url, video_id = re.search(r"_videoPlayer.play\('([^']+)'\s*,\s*'[^']+'\s*,\s*'(\d+)'\)", webpage).groups()
- release_url = unescapeHTML(release_url)
-
- return self.theplatform_url_result(
- release_url, video_id, {
- 'mbr': 'true',
- 'switch': 'hls',
- 'assetTypes': 'high_video_ak',
- })
- else:
- webpage = self._download_webpage(url, topic_id)
- entries = []
- for episode_item in re.findall(r'<a.+?data-release-url="[^"]+"[^>]*>', webpage):
- video_attributes = extract_attributes(episode_item)
- entries.append(self.theplatform_url_result(
- video_attributes['data-release-url'], video_attributes['data-id'], {
- 'mbr': 'true',
- 'switch': 'hls',
- 'assetTypes': 'high_video_ak',
- }))
- return self.playlist_result(entries, topic_id, get_element_by_attribute('class', 'show-title', webpage))
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ video_id = self._search_regex(
+ r'<phoenix-iframe[^>]+src="[^"]+\btpid=(\d+)', webpage, 'tpid')
+ result = self._download_json(
+ 'https://feeds.video.aetnd.com/api/v2/history/videos',
+ video_id, query={'filter[id]': video_id})['results'][0]
+ title = result['title']
+ info = self._extract_aen_smil(result['publicUrl'], video_id)
+ info.update({
+ 'title': title,
+ 'description': result.get('description'),
+ 'duration': int_or_none(result.get('duration')),
+ 'timestamp': int_or_none(result.get('added'), 1000),
+ })
+ return info
+++ /dev/null
-from __future__ import unicode_literals
-
-from .nuevo import NuevoBaseIE
-
-
-class AnitubeIE(NuevoBaseIE):
- IE_NAME = 'anitube.se'
- _VALID_URL = r'https?://(?:www\.)?anitube\.se/video/(?P<id>\d+)'
-
- _TEST = {
- 'url': 'http://www.anitube.se/video/36621',
- 'md5': '59d0eeae28ea0bc8c05e7af429998d43',
- 'info_dict': {
- 'id': '36621',
- 'ext': 'mp4',
- 'title': 'Recorder to Randoseru 01',
- 'duration': 180.19,
- },
- 'skip': 'Blocked in the US',
- }
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- webpage = self._download_webpage(url, video_id)
- key = self._search_regex(
- r'src=["\']https?://[^/]+/embed/([A-Za-z0-9_-]+)', webpage, 'key')
-
- return self._extract_nuevo(
- 'http://www.anitube.se/nuevo/econfig.php?key=%s' % key, video_id)
+++ /dev/null
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..utils import (
- parse_duration,
- int_or_none,
-)
-
-
-class AnySexIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?anysex\.com/(?P<id>\d+)'
- _TEST = {
- 'url': 'http://anysex.com/156592/',
- 'md5': '023e9fbb7f7987f5529a394c34ad3d3d',
- 'info_dict': {
- 'id': '156592',
- 'ext': 'mp4',
- 'title': 'Busty and sexy blondie in her bikini strips for you',
- 'description': 'md5:de9e418178e2931c10b62966474e1383',
- 'categories': ['Erotic'],
- 'duration': 270,
- 'age_limit': 18,
- }
- }
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
-
- webpage = self._download_webpage(url, video_id)
-
- video_url = self._html_search_regex(r"video_url\s*:\s*'([^']+)'", webpage, 'video URL')
-
- title = self._html_search_regex(r'<title>(.*?)</title>', webpage, 'title')
- description = self._html_search_regex(
- r'<div class="description"[^>]*>([^<]+)</div>', webpage, 'description', fatal=False)
- thumbnail = self._html_search_regex(
- r'preview_url\s*:\s*\'(.*?)\'', webpage, 'thumbnail', fatal=False)
-
- categories = re.findall(
- r'<a href="http://anysex\.com/categories/[^"]+" title="[^"]*">([^<]+)</a>', webpage)
-
- duration = parse_duration(self._search_regex(
- r'<b>Duration:</b> (?:<q itemprop="duration">)?(\d+:\d+)', webpage, 'duration', fatal=False))
- view_count = int_or_none(self._html_search_regex(
- r'<b>Views:</b> (\d+)', webpage, 'view count', fatal=False))
-
- return {
- 'id': video_id,
- 'url': video_url,
- 'ext': 'mp4',
- 'title': title,
- 'description': description,
- 'thumbnail': thumbnail,
- 'categories': categories,
- 'duration': duration,
- 'view_count': view_count,
- 'age_limit': 18,
- }
import re
from .common import InfoExtractor
+from ..compat import (
+ compat_parse_qs,
+ compat_urllib_parse_urlparse,
+)
from ..utils import (
ExtractorError,
int_or_none,
class AolIE(InfoExtractor):
- IE_NAME = 'on.aol.com'
- _VALID_URL = r'(?:aol-video:|https?://(?:(?:www|on)\.)?aol\.com/(?:[^/]+/)*(?:[^/?#&]+-)?)(?P<id>[^/?#&]+)'
+ IE_NAME = 'aol.com'
+ _VALID_URL = r'(?:aol-video:|https?://(?:www\.)?aol\.(?:com|ca|co\.uk|de|jp)/video/(?:[^/]+/)*)(?P<id>[0-9a-f]+)'
_TESTS = [{
# video with 5min ID
- 'url': 'http://on.aol.com/video/u-s--official-warns-of-largest-ever-irs-phone-scam-518167793?icid=OnHomepageC2Wide_MustSee_Img',
+ 'url': 'https://www.aol.com/video/view/u-s--official-warns-of-largest-ever-irs-phone-scam/518167793/',
'md5': '18ef68f48740e86ae94b98da815eec42',
'info_dict': {
'id': '518167793',
}
}, {
# video with vidible ID
- 'url': 'http://www.aol.com/video/view/netflix-is-raising-rates/5707d6b8e4b090497b04f706/',
+ 'url': 'https://www.aol.com/video/view/netflix-is-raising-rates/5707d6b8e4b090497b04f706/',
'info_dict': {
'id': '5707d6b8e4b090497b04f706',
'ext': 'mp4',
'skip_download': True,
}
}, {
- 'url': 'http://on.aol.com/partners/abc-551438d309eab105804dbfe8/sneak-peek-was-haley-really-framed-570eaebee4b0448640a5c944',
+ 'url': 'https://www.aol.com/video/view/park-bench-season-2-trailer/559a1b9be4b0c3bfad3357a7/',
'only_matching': True,
}, {
- 'url': 'http://on.aol.com/shows/park-bench-shw518173474-559a1b9be4b0c3bfad3357a7?context=SH:SHW518173474:PL4327:1460619712763',
+ 'url': 'https://www.aol.com/video/view/donald-trump-spokeswoman-tones-down-megyn-kelly-attacks/519442220/',
'only_matching': True,
}, {
- 'url': 'http://on.aol.com/video/519442220',
+ 'url': 'aol-video:5707d6b8e4b090497b04f706',
'only_matching': True,
}, {
- 'url': 'aol-video:5707d6b8e4b090497b04f706',
+ 'url': 'https://www.aol.com/video/playlist/PL8245/5ca79d19d21f1a04035db606/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.aol.ca/video/view/u-s-woman-s-family-arrested-for-murder-first-pinned-on-panhandler-police/5c7ccf45bc03931fa04b2fe1/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.aol.co.uk/video/view/-one-dead-and-22-hurt-in-bus-crash-/5cb3a6f3d21f1a072b457347/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.aol.de/video/view/eva-braun-privataufnahmen-von-hitlers-geliebter-werden-digitalisiert/5cb2d49de98ab54c113d3d5d/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.aol.jp/video/playlist/5a28e936a1334d000137da0c/5a28f3151e642219fde19831/',
'only_matching': True,
}]
video_data = response['data']
formats = []
- m3u8_url = video_data.get('videoMasterPlaylist')
+ m3u8_url = url_or_none(video_data.get('videoMasterPlaylist'))
if m3u8_url:
formats.extend(self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
'width': int(mobj.group(1)),
'height': int(mobj.group(2)),
})
+ else:
+ qs = compat_parse_qs(compat_urllib_parse_urlparse(video_url).query)
+ f.update({
+ 'width': int_or_none(qs.get('w', [None])[0]),
+ 'height': int_or_none(qs.get('h', [None])[0]),
+ })
formats.append(f)
self._sort_formats(formats, ('width', 'height', 'tbr', 'format_id'))
f_url, video_id, mpd_id=kind, fatal=False))
elif kind == 'silverlight':
# TODO: process when ism is supported (see
- # https://github.com/rg3/youtube-dl/issues/8118)
+ # https://github.com/ytdl-org/youtube-dl/issues/8118)
continue
else:
tbr = float_or_none(f.get('Bitrate'), 1000)
_TEST = {
'url': 'http://bambuser.com/v/4050584',
- # MD5 seems to be flaky, see https://travis-ci.org/rg3/youtube-dl/jobs/14051016#L388
+ # MD5 seems to be flaky, see https://travis-ci.org/ytdl-org/youtube-dl/jobs/14051016#L388
# 'md5': 'fba8f7693e48fd4e8641b3fd5539a641',
'info_dict': {
'id': '4050584',
},
'params': {
# It doesn't respect the 'Range' header, it would download the whole video
- # caused the travis builds to fail: https://travis-ci.org/rg3/youtube-dl/jobs/14493845#L59
+ # caused the travis builds to fail: https://travis-ci.org/ytdl-org/youtube-dl/jobs/14493845#L59
'skip_download': True,
},
}
# coding: utf-8
from __future__ import unicode_literals
-import re
import itertools
+import re
from .common import InfoExtractor
from ..utils import (
parse_iso8601,
try_get,
unescapeHTML,
+ url_or_none,
urlencode_postdata,
urljoin,
)
from ..compat import (
+ compat_etree_Element,
compat_HTTPError,
compat_urlparse,
)
},
'skip': 'Now it\'s really geo-restricted',
}, {
- # compact player (https://github.com/rg3/youtube-dl/issues/8147)
+ # compact player (https://github.com/ytdl-org/youtube-dl/issues/8147)
'url': 'http://www.bbc.co.uk/programmes/p028bfkf/player',
'info_dict': {
'id': 'p028bfkj',
def _get_subtitles(self, media, programme_id):
subtitles = {}
for connection in self._extract_connections(media):
- captions = self._download_xml(connection.get('href'), programme_id, 'Downloading captions')
+ cc_url = url_or_none(connection.get('href'))
+ if not cc_url:
+ continue
+ captions = self._download_xml(
+ cc_url, programme_id, 'Downloading captions', fatal=False)
+ if not isinstance(captions, compat_etree_Element):
+ continue
lang = captions.get('{http://www.w3.org/XML/1998/namespace}lang', 'en')
subtitles[lang] = [
{
class BeegIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?beeg\.com/(?P<id>\d+)'
- _TEST = {
+ _VALID_URL = r'https?://(?:www\.)?beeg\.(?:com|porn(?:/video)?)/(?P<id>\d+)'
+ _TESTS = [{
'url': 'http://beeg.com/5416503',
'md5': 'a1a1b1a8bc70a89e49ccfd113aed0820',
'info_dict': {
'tags': list,
'age_limit': 18,
}
- }
+ }, {
+ 'url': 'https://beeg.porn/video/5416503',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://beeg.porn/5416503',
+ 'only_matching': True,
+ }]
def _real_extract(self, url):
video_id = self._match_id(url)
--- /dev/null
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import extract_attributes
+
+
+class BFIPlayerIE(InfoExtractor):
+ IE_NAME = 'bfi:player'
+ _VALID_URL = r'https?://player\.bfi\.org\.uk/[^/]+/film/watch-(?P<id>[\w-]+)-online'
+ _TEST = {
+ 'url': 'https://player.bfi.org.uk/free/film/watch-computer-doctor-1974-online',
+ 'md5': 'e8783ebd8e061ec4bc6e9501ed547de8',
+ 'info_dict': {
+ 'id': 'htNnhlZjE60C9VySkQEIBtU-cNV1Xx63',
+ 'ext': 'mp4',
+ 'title': 'Computer Doctor',
+ 'description': 'md5:fb6c240d40c4dbe40428bdd62f78203b',
+ },
+ 'skip': 'BFI Player films cannot be played outside of the UK',
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ entries = []
+ for player_el in re.findall(r'(?s)<[^>]+class="player"[^>]*>', webpage):
+ player_attr = extract_attributes(player_el)
+ ooyala_id = player_attr.get('data-video-id')
+ if not ooyala_id:
+ continue
+ entries.append(self.url_result(
+ 'ooyala:' + ooyala_id, 'Ooyala',
+ ooyala_id, player_attr.get('data-label')))
+ return self.playlist_result(entries)
}]
}]
- _APP_KEY = '84956560bc028eb7'
- _BILIBILI_KEY = '94aba54af9065f71de72f5508f1cd42e'
+ _APP_KEY = 'iVGUTjsxvpLeuDCf'
+ _BILIBILI_KEY = 'aHRmhWMLkdeMuILqORnYZocwMBpMEOdt'
def _report_error(self, result):
if 'message' in result:
from __future__ import unicode_literals
from .common import InfoExtractor
+from .vk import VKIE
+from ..utils import (
+ HEADRequest,
+ int_or_none,
+)
class BIQLEIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?biqle\.(?:com|org|ru)/watch/(?P<id>-?\d+_\d+)'
_TESTS = [{
- 'url': 'http://www.biqle.ru/watch/847655_160197695',
- 'md5': 'ad5f746a874ccded7b8f211aeea96637',
+ # Youtube embed
+ 'url': 'https://biqle.ru/watch/-115995369_456239081',
+ 'md5': '97af5a06ee4c29bbf9c001bdb1cf5c06',
'info_dict': {
- 'id': '160197695',
+ 'id': '8v4f-avW-VI',
'ext': 'mp4',
- 'title': 'Foo Fighters - The Pretender (Live at Wembley Stadium)',
- 'uploader': 'Andrey Rogozin',
- 'upload_date': '20110605',
- }
+ 'title': "PASSE-PARTOUT - L'ete c'est fait pour jouer",
+ 'description': 'Passe-Partout',
+ 'uploader_id': 'mrsimpsonstef3',
+ 'uploader': 'Phanolito',
+ 'upload_date': '20120822',
+ },
}, {
- 'url': 'https://biqle.org/watch/-44781847_168547604',
+ 'url': 'http://biqle.org/watch/-44781847_168547604',
'md5': '7f24e72af1db0edf7c1aaba513174f97',
'info_dict': {
- 'id': '168547604',
+ 'id': '-44781847_168547604',
'ext': 'mp4',
'title': 'Ребенок в шоке от автоматической мойки',
+ 'timestamp': 1396633454,
'uploader': 'Dmitry Kotov',
+ 'upload_date': '20140404',
+ 'uploader_id': '47850140',
},
- 'skip': ' This video was marked as adult. Embedding adult videos on external sites is prohibited.',
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
embed_url = self._proto_relative_url(self._search_regex(
- r'<iframe.+?src="((?:http:)?//daxab\.com/[^"]+)".*?></iframe>', webpage, 'embed url'))
+ r'<iframe.+?src="((?:https?:)?//daxab\.com/[^"]+)".*?></iframe>',
+ webpage, 'embed url'))
+ if VKIE.suitable(embed_url):
+ return self.url_result(embed_url, VKIE.ie_key(), video_id)
+
+ self._request_webpage(
+ HEADRequest(embed_url), video_id, headers={'Referer': url})
+ video_id, sig, _, access_token = self._get_cookies(embed_url)['video_ext'].value.split('%3A')
+ item = self._download_json(
+ 'https://api.vk.com/method/video.get', video_id,
+ headers={'User-Agent': 'okhttp/3.4.1'}, query={
+ 'access_token': access_token,
+ 'sig': sig,
+ 'v': 5.44,
+ 'videos': video_id,
+ })['response']['items'][0]
+ title = item['title']
+
+ formats = []
+ for f_id, f_url in item.get('files', {}).items():
+ if f_id == 'external':
+ return self.url_result(f_url)
+ ext, height = f_id.split('_')
+ formats.append({
+ 'format_id': height + 'p',
+ 'url': f_url,
+ 'height': int_or_none(height),
+ 'ext': ext,
+ })
+ self._sort_formats(formats)
+
+ thumbnails = []
+ for k, v in item.items():
+ if k.startswith('photo_') and v:
+ width = k.replace('photo_', '')
+ thumbnails.append({
+ 'id': width,
+ 'url': v,
+ 'width': int_or_none(width),
+ })
return {
- '_type': 'url_transparent',
- 'url': embed_url,
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'comment_count': int_or_none(item.get('comments')),
+ 'description': item.get('description'),
+ 'duration': int_or_none(item.get('duration')),
+ 'thumbnails': thumbnails,
+ 'timestamp': int_or_none(item.get('date')),
+ 'uploader': item.get('owner_id'),
+ 'view_count': int_or_none(item.get('views')),
}
formats = [
{'url': format_url}
for format_url in orderedSet(format_urls)]
+
+ if not formats:
+ formats = self._parse_html5_media_entries(
+ url, webpage, video_id)[0]['formats']
+
self._check_formats(formats, video_id)
self._sort_formats(formats)
webpage, default=None) or self._html_search_meta(
'twitter:image:src', webpage, 'thumbnail')
uploader = self._html_search_regex(
- r'(?s)<p\b[^>]+\bclass=["\']video-author[^>]+>(.+?)</p>', webpage,
- 'uploader', fatal=False)
+ (r'(?s)<div class=["\']channel-banner.*?<p\b[^>]+\bclass=["\']name[^>]+>(.+?)</p>',
+ r'(?s)<p\b[^>]+\bclass=["\']video-author[^>]+>(.+?)</p>'),
+ webpage, 'uploader', fatal=False)
return {
'id': video_id,
video_id = self._match_id(url)
display_id = video_id[:8]
- api_url = ('https://apib4.blinkx.com/api.php?action=play_video&' +
- 'video=%s' % video_id)
+ api_url = ('https://apib4.blinkx.com/api.php?action=play_video&'
+ + 'video=%s' % video_id)
data_json = self._download_webpage(api_url, display_id)
data = json.loads(data_json)['api']['results'][0]
duration = None
# coding: utf-8
from __future__ import unicode_literals
+import re
+
from .adobepass import AdobePassIE
from ..utils import (
smuggle_url,
class BravoTVIE(AdobePassIE):
_VALID_URL = r'https?://(?:www\.)?bravotv\.com/(?:[^/]+/)+(?P<id>[^/?#]+)'
_TESTS = [{
- 'url': 'http://www.bravotv.com/last-chance-kitchen/season-5/videos/lck-ep-12-fishy-finale',
- 'md5': '9086d0b7ef0ea2aabc4781d75f4e5863',
+ 'url': 'https://www.bravotv.com/top-chef/season-16/episode-15/videos/the-top-chef-season-16-winner-is',
+ 'md5': 'e34684cfea2a96cd2ee1ef3a60909de9',
'info_dict': {
- 'id': 'zHyk1_HU_mPy',
+ 'id': 'epL0pmK1kQlT',
'ext': 'mp4',
- 'title': 'LCK Ep 12: Fishy Finale',
- 'description': 'S13/E12: Two eliminated chefs have just 12 minutes to cook up a delicious fish dish.',
+ 'title': 'The Top Chef Season 16 Winner Is...',
+ 'description': 'Find out who takes the title of Top Chef!',
'uploader': 'NBCU-BRAV',
- 'upload_date': '20160302',
- 'timestamp': 1456945320,
+ 'upload_date': '20190314',
+ 'timestamp': 1552591860,
}
}, {
'url': 'http://www.bravotv.com/below-deck/season-3/ep-14-reunion-part-1',
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
settings = self._parse_json(self._search_regex(
- r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);', webpage, 'drupal settings'),
+ r'<script[^>]+data-drupal-selector="drupal-settings-json"[^>]*>({.+?})</script>', webpage, 'drupal settings'),
display_id)
info = {}
query = {
'mbr': 'true',
}
account_pid, release_pid = [None] * 2
- tve = settings.get('sharedTVE')
+ tve = settings.get('ls_tve')
if tve:
query['manifest'] = 'm3u'
- account_pid = 'HNK2IC'
- release_pid = tve['release_pid']
+ mobj = re.search(r'<[^>]+id="pdk-player"[^>]+data-url=["\']?(?:https?:)?//player\.theplatform\.com/p/([^/]+)/(?:[^/]+/)*select/([^?#&"\']+)', webpage)
+ if mobj:
+ account_pid, tp_path = mobj.groups()
+ release_pid = tp_path.strip('/').split('/')[-1]
+ else:
+ account_pid = 'HNK2IC'
+ tp_path = release_pid = tve['release_pid']
if tve.get('entitlement') == 'auth':
- adobe_pass = settings.get('adobePass', {})
+ adobe_pass = settings.get('tve_adobe_auth', {})
resource = self._get_mvpd_resource(
adobe_pass.get('adobePassResourceId', 'bravo'),
tve['title'], release_pid, tve.get('rating'))
query['auth'] = self._extract_mvpd_auth(
url, release_pid, adobe_pass.get('adobePassRequestorId', 'bravo'), resource)
else:
- shared_playlist = settings['shared_playlist']
+ shared_playlist = settings['ls_playlist']
account_pid = shared_playlist['account_pid']
metadata = shared_playlist['video_metadata'][shared_playlist['default_clip']]
- release_pid = metadata['release_pid']
+ tp_path = release_pid = metadata.get('release_pid')
+ if not release_pid:
+ release_pid = metadata['guid']
+ tp_path = 'media/guid/2140479951/' + release_pid
info.update({
'title': metadata['title'],
'description': metadata.get('description'),
'_type': 'url_transparent',
'id': release_pid,
'url': smuggle_url(update_url_query(
- 'http://link.theplatform.com/s/%s/%s' % (account_pid, release_pid),
+ 'http://link.theplatform.com/s/%s/%s' % (account_pid, tp_path),
query), {'force_smil_url': True}),
'ie_key': 'ThePlatform',
})
'playlist_mincount': 7,
},
{
- # playlist with 'playlistTab' (https://github.com/rg3/youtube-dl/issues/9965)
+ # playlist with 'playlistTab' (https://github.com/ytdl-org/youtube-dl/issues/9965)
'url': 'http://c.brightcove.com/services/json/experience/runtime/?command=get_programming_for_experience&playerKey=AQ%7E%7E,AAABXlLMdok%7E,NJ4EoMlZ4rZdx9eU1rkMVd8EaYPBBUlg',
'info_dict': {
'id': '1522758701001',
<object class="BrightcoveExperience">{params}</object>
"""
- # Fix up some stupid HTML, see https://github.com/rg3/youtube-dl/issues/1553
+ # Fix up some stupid HTML, see https://github.com/ytdl-org/youtube-dl/issues/1553
object_str = re.sub(r'(<param(?:\s+[a-zA-Z0-9_]+="[^"]*")*)>',
lambda m: m.group(1) + '/>', object_str)
- # Fix up some stupid XML, see https://github.com/rg3/youtube-dl/issues/1608
+ # Fix up some stupid XML, see https://github.com/ytdl-org/youtube-dl/issues/1608
object_str = object_str.replace('<--', '<!--')
# remove namespace to simplify extraction
object_str = re.sub(r'(<object[^>]*)(xmlns=".*?")', r'\1', object_str)
import re
from .common import InfoExtractor
+from ..utils import parse_duration
class BYUtvIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?byutv\.org/(?:watch|player)/(?!event/)(?P<id>[0-9a-f-]+)(?:/(?P<display_id>[^/?#&]+))?'
_TESTS = [{
+ # ooyalaVOD
'url': 'http://www.byutv.org/watch/6587b9a3-89d2-42a6-a7f7-fd2f81840a7d/studio-c-season-5-episode-5',
'info_dict': {
'id': 'ZvanRocTpW-G5_yZFeltTAMv6jxOU9KH',
'skip_download': True,
},
'add_ie': ['Ooyala'],
+ }, {
+ # dvr
+ 'url': 'https://www.byutv.org/player/8f1dab9b-b243-47c8-b525-3e2d021a3451/byu-softball-pacific-vs-byu-41219---game-2',
+ 'info_dict': {
+ 'id': '8f1dab9b-b243-47c8-b525-3e2d021a3451',
+ 'display_id': 'byu-softball-pacific-vs-byu-41219---game-2',
+ 'ext': 'mp4',
+ 'title': 'Pacific vs. BYU (4/12/19)',
+ 'description': 'md5:1ac7b57cb9a78015910a4834790ce1f3',
+ 'duration': 11645,
+ },
+ 'params': {
+ 'skip_download': True
+ },
}, {
'url': 'http://www.byutv.org/watch/6587b9a3-89d2-42a6-a7f7-fd2f81840a7d',
'only_matching': True,
video_id = mobj.group('id')
display_id = mobj.group('display_id') or video_id
- ep = self._download_json(
- 'https://api.byutv.org/api3/catalog/getvideosforcontent', video_id,
- query={
+ info = self._download_json(
+ 'https://api.byutv.org/api3/catalog/getvideosforcontent',
+ display_id, query={
'contentid': video_id,
'channel': 'byutv',
'x-byutv-context': 'web$US',
}, headers={
'x-byutv-context': 'web$US',
'x-byutv-platformkey': 'xsaaw9c7y5',
- })['ooyalaVOD']
+ })
+
+ ep = info.get('ooyalaVOD')
+ if ep:
+ return {
+ '_type': 'url_transparent',
+ 'ie_key': 'Ooyala',
+ 'url': 'ooyala:%s' % ep['providerId'],
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': ep.get('title'),
+ 'description': ep.get('description'),
+ 'thumbnail': ep.get('imageThumbnail'),
+ }
+ ep = info['dvr']
+ title = ep['title']
+ formats = self._extract_m3u8_formats(
+ ep['videoUrl'], video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls')
+ self._sort_formats(formats)
return {
- '_type': 'url_transparent',
- 'ie_key': 'Ooyala',
- 'url': 'ooyala:%s' % ep['providerId'],
'id': video_id,
'display_id': display_id,
- 'title': ep.get('title'),
+ 'title': title,
'description': ep.get('description'),
'thumbnail': ep.get('imageThumbnail'),
+ 'duration': parse_duration(ep.get('length')),
+ 'formats': formats,
}
class CanvasIE(InfoExtractor):
- _VALID_URL = r'https?://mediazone\.vrt\.be/api/v1/(?P<site_id>canvas|een|ketnet|vrtvideo)/assets/(?P<id>[^/?#&]+)'
+ _VALID_URL = r'https?://mediazone\.vrt\.be/api/v1/(?P<site_id>canvas|een|ketnet|vrt(?:video|nieuws)|sporza)/assets/(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'https://mediazone.vrt.be/api/v1/ketnet/assets/md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
'md5': '90139b746a0a9bd7bb631283f6e2a64e',
'url': 'https://mediazone.vrt.be/api/v1/canvas/assets/mz-ast-5e5f90b6-2d72-4c40-82c2-e134f884e93e',
'only_matching': True,
}]
+ _HLS_ENTRY_PROTOCOLS_MAP = {
+ 'HLS': 'm3u8_native',
+ 'HLS_AES': 'm3u8',
+ }
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
format_url, format_type = target.get('url'), target.get('type')
if not format_url or not format_type:
continue
- if format_type == 'HLS':
+ if format_type in self._HLS_ENTRY_PROTOCOLS_MAP:
formats.extend(self._extract_m3u8_formats(
- format_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ format_url, video_id, 'mp4', self._HLS_ENTRY_PROTOCOLS_MAP[format_type],
m3u8_id=format_type, fatal=False))
elif format_type == 'HDS':
formats.extend(self._extract_f4m_formats(
# coding: utf-8
from __future__ import unicode_literals
-import re
-
from .turner import TurnerBaseIE
+from ..utils import int_or_none
class CartoonNetworkIE(TurnerBaseIE):
_VALID_URL = r'https?://(?:www\.)?cartoonnetwork\.com/video/(?:[^/]+/)+(?P<id>[^/?#]+)-(?:clip|episode)\.html'
_TEST = {
- 'url': 'http://www.cartoonnetwork.com/video/teen-titans-go/starfire-the-cat-lady-clip.html',
+ 'url': 'https://www.cartoonnetwork.com/video/ben-10/how-to-draw-upgrade-episode.html',
'info_dict': {
- 'id': '8a250ab04ed07e6c014ef3f1e2f9016c',
+ 'id': '6e3375097f63874ebccec7ef677c1c3845fa850e',
'ext': 'mp4',
- 'title': 'Starfire the Cat Lady',
- 'description': 'Robin decides to become a cat so that Starfire will finally love him.',
+ 'title': 'How to Draw Upgrade',
+ 'description': 'md5:2061d83776db7e8be4879684eefe8c0f',
},
'params': {
# m3u8 download
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
- id_type, video_id = re.search(r"_cnglobal\.cvp(Video|Title)Id\s*=\s*'([^']+)';", webpage).groups()
- query = ('id' if id_type == 'Video' else 'titleId') + '=' + video_id
- return self._extract_cvp_info(
- 'http://www.cartoonnetwork.com/video-seo-svc/episodeservices/getCvpPlaylist?networkName=CN2&' + query, video_id, {
- 'secure': {
- 'media_src': 'http://androidhls-secure.cdn.turner.com/toon/big',
- 'tokenizer_src': 'https://token.vgtf.net/token/token_mobile',
- },
- }, {
+
+ def find_field(global_re, name, content_re=None, value_re='[^"]+', fatal=False):
+ metadata_re = ''
+ if content_re:
+ metadata_re = r'|video_metadata\.content_' + content_re
+ return self._search_regex(
+ r'(?:_cnglobal\.currentVideo\.%s%s)\s*=\s*"(%s)";' % (global_re, metadata_re, value_re),
+ webpage, name, fatal=fatal)
+
+ media_id = find_field('mediaId', 'media id', 'id', '[0-9a-f]{40}', True)
+ title = find_field('episodeTitle', 'title', '(?:episodeName|name)', fatal=True)
+
+ info = self._extract_ngtv_info(
+ media_id, {'networkId': 'cartoonnetwork'}, {
'url': url,
'site_name': 'CartoonNetwork',
- 'auth_required': self._search_regex(
- r'_cnglobal\.cvpFullOrPreviewAuth\s*=\s*(true|false);',
- webpage, 'auth required', default='false') == 'true',
+ 'auth_required': find_field('authType', 'auth type') != 'unauth',
})
+
+ series = find_field(
+ 'propertyName', 'series', 'showName') or self._html_search_meta('partOfSeries', webpage)
+ info.update({
+ 'id': media_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': self._html_search_meta('description', webpage),
+ 'series': series,
+ 'episode': title,
+ })
+
+ for field in ('season', 'episode'):
+ field_name = field + 'Number'
+ info[field + '_number'] = int_or_none(find_field(
+ field_name, field + ' number', value_re=r'\d+') or self._html_search_meta(field_name, webpage))
+
+ return info
class CBCWatchIE(CBCWatchBaseIE):
IE_NAME = 'cbc.ca:watch'
- _VALID_URL = r'https?://watch\.cbc\.ca/(?:[^/]+/)+(?P<id>[0-9a-f-]+)'
+ _VALID_URL = r'https?://(?:gem|watch)\.cbc\.ca/(?:[^/]+/)+(?P<id>[0-9a-f-]+)'
_TESTS = [{
# geo-restricted to Canada, bypassable
'url': 'http://watch.cbc.ca/doc-zone/season-6/customer-disservice/38e815a-009e3ab12e4',
'description': 'Arthur, the sweetest 8-year-old aardvark, and his pals solve all kinds of problems with humour, kindness and teamwork.',
},
'playlist_mincount': 30,
+ }, {
+ 'url': 'https://gem.cbc.ca/media/this-hour-has-22-minutes/season-26/episode-20/38e815a-0108c6c6a42',
+ 'only_matching': True,
}]
def _real_extract(self, url):
class CBSBaseIE(ThePlatformFeedIE):
def _parse_smil_subtitles(self, smil, namespace=None, subtitles_lang='en'):
- closed_caption_e = find_xpath_attr(smil, self._xpath_ns('.//param', namespace), 'name', 'ClosedCaptionURL')
- return {
- 'en': [{
- 'ext': 'ttml',
- 'url': closed_caption_e.attrib['value'],
- }]
- } if closed_caption_e is not None and closed_caption_e.attrib.get('value') else []
+ subtitles = {}
+ for k, ext in [('sMPTE-TTCCURL', 'tt'), ('ClosedCaptionURL', 'ttml'), ('webVTTCaptionURL', 'vtt')]:
+ cc_e = find_xpath_attr(smil, self._xpath_ns('.//param', namespace), 'name', k)
+ if cc_e is not None:
+ cc_url = cc_e.get('value')
+ if cc_url:
+ subtitles.setdefault(subtitles_lang, []).append({
+ 'ext': ext,
+ 'url': cc_url,
+ })
+ return subtitles
class CBSIE(CBSBaseIE):
# coding: utf-8
from __future__ import unicode_literals
+import re
+import zlib
+
from .common import InfoExtractor
from .cbs import CBSIE
+from ..compat import (
+ compat_b64decode,
+ compat_urllib_parse_unquote,
+)
from ..utils import (
parse_duration,
)
+class CBSNewsEmbedIE(CBSIE):
+ IE_NAME = 'cbsnews:embed'
+ _VALID_URL = r'https?://(?:www\.)?cbsnews\.com/embed/video[^#]*#(?P<id>.+)'
+ _TESTS = [{
+ 'url': 'https://www.cbsnews.com/embed/video/?v=1.c9b5b61492913d6660db0b2f03579ef25e86307a#1Vb7b9s2EP5XBAHbT6Gt98PAMKTJ0se6LVjWYWtdGBR1stlIpEBSTtwi%2F%2FvuJNkNhmHdGxgM2NL57vjd6zt%2B8PngdN%2Fyg79qeGvhzN%2FLGrS%2F%2BuBLB531V28%2B%2BO7Qg7%2Fy97r2z3xZ42NW8yLhDbA0S0KWlHnIijwKWJBHZZnHBa8Cgbpdf%2F89NM9Hi9fXifhpr8sr%2FlP848tn%2BTdXycX25zh4cdX%2FvHl6PmmPqnWQv9w8Ed%2B9GjYRim07bFEqdG%2BZVHuwTm65A7bVRrYtR5lAyMox7pigF6W4k%2By91mjspGsJ%2BwVae4%2BsvdnaO1p73HkXs%2FVisUDTGm7R8IcdnOROeq%2B19qT1amhA1VJtPenoTUgrtfKc9m7Rq8dP7nnjwOB7wg7ADdNt7VX64DWAWlKhPtmDEq22g4GF99x6Dk9E8OSsankHXqPNKDxC%2FdK7MLKTircTDgsI3mmj4OBdSq64dy7fd1x577RU1rt4cvMtOaulFYOd%2FLewRWvDO9lIgXFpZSnkZmjbv5SxKTPoQXClFbpsf%2Fhbbpzs0IB3vb8KkyzJQ%2BywOAgCrMpgRrz%2BKk4fvb7kFbR4XJCu0gAdtNO7woCwZTu%2BBUs9bam%2Fds71drVerpeisgrubLjAB4nnOSkWQnfr5W6o1ku5Xpr1MgrCbL0M0vUyDtfLLK15WiYp47xKWSLyjFVpwVmVJSLIoCjSOFkv3W7oKsVliwZJcB9nwXpZ5GEQQwY8jNKqKCBrgjTLeFxgdCIpazojDgnRtn43J6kG7nZ6cAbxh0EeFFk4%2B1u867cY5u4344n%2FxXjCqAjucdTHgLKojNKmSfO8KRsOFY%2FzKEYCKEJBzv90QA9nfm9gL%2BHulaFqUkz9ULUYxl62B3U%2FRVNLA8IhggaPycOoBuwOCESciDQVSSUgiOMsROB%2FhKfwCKOzEk%2B4k6rWd4uuT%2FwTDz7K7t3d3WLO8ISD95jSPQbayBacthbz86XVgxHwhex5zawzgDOmtp%2F3GPcXn0VXHdSS029%2Fj99UC%2FwJUvyKQ%2FzKyixIEVlYJOn4RxxuaH43Ty9fbJ5OObykHH435XAzJTHeOF4hhEUXD8URe%2FQ%2FBT%2BMpf8d5GN02Ox%2FfiGsl7TA7POu1xZ5%2BbTzcAVKMe48mqcC21hkacVEVScM26liVVBnrKkC4CLKyzAvHu0lhEaTKMFwI3a4SN9MsrfYzdBLq2vkwRD1gVviLT8kY9h2CHH6Y%2Bix6609weFtey4ESp60WtyeWMy%2BsmBuhsoKIyuoT%2Bq2R%2FrW5qi3g%2FvzS2j40DoixDP8%2BKP0yUdpXJ4l6Vla%2Bg9vce%2BC4yM5YlUcbA%2F0jLKdpmTwvsdN5z88nAIe08%2F0HgxeG1iv%2B6Hlhjh7uiW0SDzYNI92L401uha3JKYk268UVRzdOzNQvAaJqoXzAc80dAV440NZ1WVVAAMRYQ2KrGJFmDUsq8saWSnjvIj8t78y%2FRa3JRnbHVfyFpfwoDiGpPgjzekyUiKNlU3OMlwuLMmzgvEojllYVE2Z1HhImvsnk%2BuhusTEoB21PAtSFodeFK3iYhXEH9WOG2%2FkOE833sfeG%2Ff5cfHtEFNXgYes0%2FXj7aGivUgJ9XpusCtoNcNYVVnJVrrDo0OmJAutHCpuZul4W9lLcfy7BnuLPT02%2ByXsCTk%2B9zhzswIN04YueNSK%2BPtM0jS88QdLqSLJDTLsuGZJNolm2yO0PXh3UPnz9Ix5bfIAqxPjvETQsDCEiPG4QbqNyhBZISxybLnZYCrW5H3Axp690%2F0BJdXtDZ5ITuM4xj3f4oUHGzc5JeJmZKpp%2FjwKh4wMV%2FV1yx3emLoR0MwbG4K%2F%2BZgVep3PnzXGDHZ6a3i%2Fk%2BJrONDN13%2Bnq6tBTYk4o7cLGhBtqCC4KwacGHpEVuoH5JNro%2FE6JfE6d5RydbiR76k%2BW5wioDHBIjw1euhHjUGRB0y5A97KoaPx6MlL%2BwgboUVtUFRI%2FLemgTpdtF59ii7pab08kuPcfWzs0l%2FRI5takWnFpka0zOgWRtYcuf9aIxZMxlwr6IiGpsb6j2DQUXPl%2FimXI599Ev7fWjoPD78A',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ item = self._parse_json(zlib.decompress(compat_b64decode(
+ compat_urllib_parse_unquote(self._match_id(url))),
+ -zlib.MAX_WBITS), None)['video']['items'][0]
+ return self._extract_video_info(item['mpxRefId'], 'cbsnews')
+
+
class CBSNewsIE(CBSIE):
IE_NAME = 'cbsnews'
IE_DESC = 'CBS News'
- _VALID_URL = r'https?://(?:www\.)?cbsnews\.com/(?:news|videos)/(?P<id>[\da-z_-]+)'
+ _VALID_URL = r'https?://(?:www\.)?cbsnews\.com/(?:news|video)/(?P<id>[\da-z_-]+)'
_TESTS = [
{
# 60 minutes
'url': 'http://www.cbsnews.com/news/artificial-intelligence-positioned-to-be-a-game-changer/',
'info_dict': {
- 'id': '_B6Ga3VJrI4iQNKsir_cdFo9Re_YJHE_',
- 'ext': 'mp4',
- 'title': 'Artificial Intelligence',
- 'description': 'md5:8818145f9974431e0fb58a1b8d69613c',
+ 'id': 'Y_nf_aEg6WwO9OLAq0MpKaPgfnBUxfW4',
+ 'ext': 'flv',
+ 'title': 'Artificial Intelligence, real-life applications',
+ 'description': 'md5:a7aaf27f1b4777244de8b0b442289304',
'thumbnail': r're:^https?://.*\.jpg$',
- 'duration': 1606,
+ 'duration': 317,
'uploader': 'CBSI-NEW',
- 'timestamp': 1498431900,
- 'upload_date': '20170625',
+ 'timestamp': 1476046464,
+ 'upload_date': '20161009',
},
'params': {
- # m3u8 download
+ # rtmp download
'skip_download': True,
},
},
{
- 'url': 'http://www.cbsnews.com/videos/fort-hood-shooting-army-downplays-mental-illness-as-cause-of-attack/',
+ 'url': 'https://www.cbsnews.com/video/fort-hood-shooting-army-downplays-mental-illness-as-cause-of-attack/',
'info_dict': {
'id': 'SNJBOYzXiWBOvaLsdzwH8fmtP1SCd91Y',
'ext': 'mp4',
# 48 hours
'url': 'http://www.cbsnews.com/news/maria-ridulph-murder-will-the-nations-oldest-cold-case-to-go-to-trial-ever-get-solved/',
'info_dict': {
- 'id': 'QpM5BJjBVEAUFi7ydR9LusS69DPLqPJ1',
- 'ext': 'mp4',
'title': 'Cold as Ice',
- 'description': 'Can a childhood memory of a friend\'s murder solve a 1957 cold case? "48 Hours" correspondent Erin Moriarty has the latest.',
- 'upload_date': '20170604',
- 'timestamp': 1496538000,
- 'uploader': 'CBSI-NEW',
- },
- 'params': {
- 'skip_download': True,
+ 'description': 'Can a childhood memory solve the 1957 murder of 7-year-old Maria Ridulph?',
},
+ 'playlist_mincount': 7,
},
]
def _real_extract(self, url):
- video_id = self._match_id(url)
-
- webpage = self._download_webpage(url, video_id)
+ display_id = self._match_id(url)
- video_info = self._parse_json(self._html_search_regex(
- r'(?:<ul class="media-list items" id="media-related-items"[^>]*><li data-video-info|<div id="cbsNewsVideoPlayer" data-video-player-options)=\'({.+?})\'',
- webpage, 'video JSON info', default='{}'), video_id, fatal=False)
+ webpage = self._download_webpage(url, display_id)
- if video_info:
- item = video_info['item'] if 'item' in video_info else video_info
- else:
- state = self._parse_json(self._search_regex(
- r'data-cbsvideoui-options=(["\'])(?P<json>{.+?})\1', webpage,
- 'playlist JSON info', group='json'), video_id)['state']
- item = state['playlist'][state['pid']]
+ entries = []
+ for embed_url in re.findall(r'<iframe[^>]+data-src="(https?://(?:www\.)?cbsnews\.com/embed/video/[^#]*#[^"]+)"', webpage):
+ entries.append(self.url_result(embed_url, CBSNewsEmbedIE.ie_key()))
+ if entries:
+ return self.playlist_result(
+ entries, playlist_title=self._html_search_meta(['og:title', 'twitter:title'], webpage),
+ playlist_description=self._html_search_meta(['og:description', 'twitter:description', 'description'], webpage))
+ item = self._parse_json(self._html_search_regex(
+ r'CBSNEWS\.defaultPayload\s*=\s*({.+})',
+ webpage, 'video JSON info'), display_id)['items'][0]
return self._extract_video_info(item['mpxRefId'], 'cbsnews')
+# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
int_or_none,
parse_iso8601,
+ try_get,
+ url_or_none,
)
'id': '1839',
'ext': 'mp4',
'title': 'Introduction to Processor Design',
+ 'creator': 'byterazor',
'description': 'md5:df55f6d073d4ceae55aae6f2fd98a0ac',
'thumbnail': r're:^https?://.*\.jpg$',
'upload_date': '20131228',
'timestamp': 1388188800,
'duration': 3710,
+ 'tags': list,
}
}, {
'url': 'https://media.ccc.de/v/32c3-7368-shopshifting#download',
'id': event_id,
'display_id': display_id,
'title': event_data['title'],
+ 'creator': try_get(event_data, lambda x: ', '.join(x['persons'])),
'description': event_data.get('description'),
'thumbnail': event_data.get('thumb_url'),
'timestamp': parse_iso8601(event_data.get('date')),
'tags': event_data.get('tags'),
'formats': formats,
}
+
+
+class CCCPlaylistIE(InfoExtractor):
+ IE_NAME = 'media.ccc.de:lists'
+ _VALID_URL = r'https?://(?:www\.)?media\.ccc\.de/c/(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://media.ccc.de/c/30c3',
+ 'info_dict': {
+ 'title': '30C3',
+ 'id': '30c3',
+ },
+ 'playlist_count': 135,
+ }]
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url).lower()
+
+ conf = self._download_json(
+ 'https://media.ccc.de/public/conferences/' + playlist_id,
+ playlist_id)
+
+ entries = []
+ for e in conf['events']:
+ event_url = url_or_none(e.get('frontend_link'))
+ if event_url:
+ entries.append(self.url_result(event_url, ie=CCCIE.ie_key()))
+
+ return self.playlist_result(entries, playlist_id, conf.get('title'))
stream_formats = self._extract_mpd_formats(
stream_url, playlist_id,
mpd_id='dash-%s' % format_id, fatal=False)
- # See https://github.com/rg3/youtube-dl/issues/12119#issuecomment-280037031
+ # See https://github.com/ytdl-org/youtube-dl/issues/12119#issuecomment-280037031
if format_id == 'audioDescription':
for f in stream_formats:
f['source_preference'] = -10
--- /dev/null
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .hbo import HBOBaseIE
+
+
+class CinemaxIE(HBOBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?cinemax\.com/(?P<path>[^/]+/video/[0-9a-z-]+-(?P<id>\d+))'
+ _TESTS = [{
+ 'url': 'https://www.cinemax.com/warrior/video/s1-ep-1-recap-20126903',
+ 'md5': '82e0734bba8aa7ef526c9dd00cf35a05',
+ 'info_dict': {
+ 'id': '20126903',
+ 'ext': 'mp4',
+ 'title': 'S1 Ep 1: Recap',
+ },
+ 'expected_warnings': ['Unknown MIME type application/mp4 in DASH manifest'],
+ }, {
+ 'url': 'https://www.cinemax.com/warrior/video/s1-ep-1-recap-20126903.embed',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ path, video_id = re.match(self._VALID_URL, url).groups()
+ info = self._extract_info('https://www.cinemax.com/%s.xml' % path, video_id)
+ info['id'] = video_id
+ return info
class CiscoLiveSessionIE(CiscoLiveBaseIE):
- _VALID_URL = r'https?://ciscolive\.cisco\.com/on-demand-library/\??[^#]*#/session/(?P<id>[^/?&]+)'
- _TEST = {
+ _VALID_URL = r'https?://(?:www\.)?ciscolive(?:\.cisco)?\.com/[^#]*#/session/(?P<id>[^/?&]+)'
+ _TESTS = [{
'url': 'https://ciscolive.cisco.com/on-demand-library/?#/session/1423353499155001FoSs',
'md5': 'c98acf395ed9c9f766941c70f5352e22',
'info_dict': {
'uploader_id': '5647924234001',
'location': '16B Mezz.',
},
- }
+ }, {
+ 'url': 'https://www.ciscolive.com/global/on-demand-library.html?search.event=ciscoliveemea2019#/session/15361595531500013WOU',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.ciscolive.com/global/on-demand-library.html?#/session/1490051371645001kNaS',
+ 'only_matching': True,
+ }]
def _real_extract(self, url):
rf_id = self._match_id(url)
class CiscoLiveSearchIE(CiscoLiveBaseIE):
- _VALID_URL = r'https?://ciscolive\.cisco\.com/on-demand-library/'
+ _VALID_URL = r'https?://(?:www\.)?ciscolive(?:\.cisco)?\.com/(?:global/)?on-demand-library(?:\.html|/)'
_TESTS = [{
'url': 'https://ciscolive.cisco.com/on-demand-library/?search.event=ciscoliveus2018&search.technicallevel=scpsSkillLevel_aintroductory&search.focus=scpsSessionFocus_designAndDeployment#/',
'info_dict': {
}, {
'url': 'https://ciscolive.cisco.com/on-demand-library/?search.technology=scpsTechnology_applicationDevelopment&search.technology=scpsTechnology_ipv6&search.focus=scpsSessionFocus_troubleshootingTroubleshooting#/',
'only_matching': True,
+ }, {
+ 'url': 'https://www.ciscolive.com/global/on-demand-library.html?search.technicallevel=scpsSkillLevel_aintroductory&search.event=ciscoliveemea2019&search.technology=scpsTechnology_dataCenter&search.focus=scpsSessionFocus_bestPractices#/',
+ 'only_matching': True,
}]
@classmethod
_VALID_URL = r'''(?x)
https?://
(?:
- (?:watch\.)?cloudflarestream\.com/|
- embed\.cloudflarestream\.com/embed/[^/]+\.js\?.*?\bvideo=
+ (?:watch\.)?(?:cloudflarestream\.com|videodelivery\.net)/|
+ embed\.(?:cloudflarestream\.com|videodelivery\.net)/embed/[^/]+\.js\?.*?\bvideo=
)
(?P<id>[\da-f]+)
'''
}, {
'url': 'https://cloudflarestream.com/31c9291ab41fac05471db4e73aa11717/manifest/video.mpd',
'only_matching': True,
+ }, {
+ 'url': 'https://embed.videodelivery.net/embed/r4xu.fla9.latest.js?video=81d80727f3022488598f68d323c1ad5e',
+ 'only_matching': True,
}]
@staticmethod
return [
mobj.group('url')
for mobj in re.finditer(
- r'<script[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//embed\.cloudflarestream\.com/embed/[^/]+\.js\?.*?\bvideo=[\da-f]+?.*?)\1',
+ r'<script[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//embed\.(?:cloudflarestream\.com|videodelivery\.net)/embed/[^/]+\.js\?.*?\bvideo=[\da-f]+?.*?)\1',
webpage)]
def _real_extract(self, url):
from ..compat import (
compat_cookiejar,
compat_cookies,
+ compat_etree_Element,
compat_etree_fromstring,
compat_getpass,
compat_integer_types,
compiled_regex_type,
determine_ext,
determine_protocol,
+ dict_get,
error_to_compat_str,
ExtractorError,
extract_attributes,
JSON_LD_RE,
mimetype2ext,
orderedSet,
+ parse_bitrate,
parse_codecs,
parse_duration,
parse_iso8601,
parse_m3u8_attributes,
+ parse_resolution,
RegexNotFoundError,
sanitized_Request,
sanitize_filename,
+ str_or_none,
+ strip_or_none,
unescapeHTML,
unified_strdate,
unified_timestamp,
from worst to best quality.
Potential fields:
- * url Mandatory. The URL of the video file
+ * url The mandatory URL representing the media:
+ for plain file media - HTTP URL of this file,
+ for RTMP - RTMP URL,
+ for HLS - URL of the M3U8 media playlist,
+ for HDS - URL of the F4M manifest,
+ for DASH
+ - HTTP URL to plain file media (in case of
+ unfragmented media)
+ - URL of the MPD manifest or base URL
+ representing the media if MPD manifest
+ is parsed from a string (in case of
+ fragmented media)
+ for MSS - URL of the ISM manifest.
* manifest_url
The URL of the manifest file in case of
- fragmented media (DASH, hls, hds)
+ fragmented media:
+ for HLS - URL of the M3U8 master playlist,
+ for HDS - URL of the F4M manifest,
+ for DASH - URL of the MPD manifest,
+ for MSS - URL of the ISM manifest.
* ext Will be calculated from URL if missing
* format A human-readable description of the format
("mp4 container with h264/opus").
raise ExtractorError('An extractor error has occurred.', cause=e)
def __maybe_fake_ip_and_retry(self, countries):
- if (not self._downloader.params.get('geo_bypass_country', None) and
- self._GEO_BYPASS and
- self._downloader.params.get('geo_bypass', True) and
- not self._x_forwarded_for_ip and
- countries):
+ if (not self._downloader.params.get('geo_bypass_country', None)
+ and self._GEO_BYPASS
+ and self._downloader.params.get('geo_bypass', True)
+ and not self._x_forwarded_for_ip
+ and countries):
country_code = random.choice(countries)
self._x_forwarded_for_ip = GeoUtils.random_ipv4(country_code)
if self._x_forwarded_for_ip:
def __check_blocked(self, content):
first_block = content[:512]
- if ('<title>Access to this site is blocked</title>' in content and
- 'Websense' in first_block):
+ if ('<title>Access to this site is blocked</title>' in content
+ and 'Websense' in first_block):
msg = 'Access to this webpage has been blocked by Websense filtering software in your network.'
blocked_iframe = self._html_search_regex(
r'<iframe src="([^"]+)"', content,
if block_msg:
msg += ' (Message: "%s")' % block_msg.replace('\n', ' ')
raise ExtractorError(msg, expected=True)
- if ('<title>TTK :: Доступ к ресурсу ограничен</title>' in content and
- 'blocklist.rkn.gov.ru' in content):
+ if ('<title>TTK :: Доступ к ресурсу ограничен</title>' in content
+ and 'blocklist.rkn.gov.ru' in content):
raise ExtractorError(
'Access to this webpage has been blocked by decision of the Russian government. '
'Visit http://blocklist.rkn.gov.ru/ for a block reason.',
fatal=True, encoding=None, data=None, headers={}, query={},
expected_status=None):
"""
- Return a tuple (xml as an xml.etree.ElementTree.Element, URL handle).
+ Return a tuple (xml as an compat_etree_Element, URL handle).
See _download_webpage docstring for arguments specification.
"""
transform_source=None, fatal=True, encoding=None,
data=None, headers={}, query={}, expected_status=None):
"""
- Return the xml as an xml.etree.ElementTree.Element.
+ Return the xml as an compat_etree_Element.
See _download_webpage docstring for arguments specification.
"""
@staticmethod
def _og_regexes(prop):
content_re = r'content=(?:"([^"]+?)"|\'([^\']+?)\'|\s*([^\s"\'=<>`]+?))'
- property_re = (r'(?:name|property)=(?:\'og:%(prop)s\'|"og:%(prop)s"|\s*og:%(prop)s\b)'
+ property_re = (r'(?:name|property)=(?:\'og[:-]%(prop)s\'|"og[:-]%(prop)s"|\s*og[:-]%(prop)s\b)'
% {'prop': re.escape(prop)})
template = r'<meta[^>]+?%s[^>]+?%s'
return [
info['title'] = episode_name
part_of_season = e.get('partOfSeason')
if isinstance(part_of_season, dict) and part_of_season.get('@type') in ('TVSeason', 'Season', 'CreativeWorkSeason'):
- info['season_number'] = int_or_none(part_of_season.get('seasonNumber'))
+ info.update({
+ 'season': unescapeHTML(part_of_season.get('name')),
+ 'season_number': int_or_none(part_of_season.get('seasonNumber')),
+ })
part_of_series = e.get('partOfSeries') or e.get('partOfTVSeries')
if isinstance(part_of_series, dict) and part_of_series.get('@type') in ('TVSeries', 'Series', 'CreativeWorkSeries'):
info['series'] = unescapeHTML(part_of_series.get('name'))
manifest_url, video_id, 'Downloading f4m manifest',
'Unable to download f4m manifest',
# Some manifests may be malformed, e.g. prosiebensat1 generated manifests
- # (see https://github.com/rg3/youtube-dl/issues/6215#issuecomment-121704244)
+ # (see https://github.com/ytdl-org/youtube-dl/issues/6215#issuecomment-121704244)
transform_source=transform_source,
fatal=fatal)
def _parse_f4m_formats(self, manifest, manifest_url, video_id, preference=None, f4m_id=None,
transform_source=lambda s: fix_xml_ampersands(s).strip(),
fatal=True, m3u8_id=None):
+ if not isinstance(manifest, compat_etree_Element) and not fatal:
+ return []
+
# currently youtube-dl cannot decode the playerVerificationChallenge as Akamai uses Adobe Alchemy
akamai_pv = manifest.find('{http://ns.adobe.com/f4m/1.0}pv-2.0')
if akamai_pv is not None and ';' in akamai_pv.text:
manifest_version = '2.0'
media_nodes = manifest.findall('{http://ns.adobe.com/f4m/2.0}media')
# Remove unsupported DRM protected media from final formats
- # rendition (see https://github.com/rg3/youtube-dl/issues/8573).
+ # rendition (see https://github.com/ytdl-org/youtube-dl/issues/8573).
media_nodes = remove_encrypted_media(media_nodes)
if not media_nodes:
return formats
# References:
# 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-21
- # 2. https://github.com/rg3/youtube-dl/issues/12211
+ # 2. https://github.com/ytdl-org/youtube-dl/issues/12211
+ # 3. https://github.com/ytdl-org/youtube-dl/issues/18923
# We should try extracting formats only from master playlists [1, 4.3.4],
# i.e. playlists that describe available qualities. On the other hand
rendition = stream_group[0]
return rendition.get('NAME') or stream_group_id
+ # parse EXT-X-MEDIA tags before EXT-X-STREAM-INF in order to have the
+ # chance to detect video only formats when EXT-X-STREAM-INF tags
+ # precede EXT-X-MEDIA tags in HLS manifest such as [3].
+ for line in m3u8_doc.splitlines():
+ if line.startswith('#EXT-X-MEDIA:'):
+ extract_media(line)
+
for line in m3u8_doc.splitlines():
if line.startswith('#EXT-X-STREAM-INF:'):
last_stream_inf = parse_m3u8_attributes(line)
- elif line.startswith('#EXT-X-MEDIA:'):
- extract_media(line)
elif line.startswith('#') or not line.strip():
continue
else:
tbr = float_or_none(
- last_stream_inf.get('AVERAGE-BANDWIDTH') or
- last_stream_inf.get('BANDWIDTH'), scale=1000)
+ last_stream_inf.get('AVERAGE-BANDWIDTH')
+ or last_stream_inf.get('BANDWIDTH'), scale=1000)
format_id = []
if m3u8_id:
format_id.append(m3u8_id)
if res is False:
return []
mpd_doc, urlh = res
+ if mpd_doc is None:
+ return []
mpd_base_url = base_url(urlh.geturl())
return self._parse_mpd_formats(
bandwidth = int_or_none(representation_attrib.get('bandwidth'))
f = {
'format_id': '%s-%s' % (mpd_id, representation_id) if mpd_id else representation_id,
- 'url': base_url,
'manifest_url': mpd_url,
'ext': mimetype2ext(mime_type),
'width': int_or_none(representation_attrib.get('width')),
# First of, % characters outside $...$ templates
# must be escaped by doubling for proper processing
# by % operator string formatting used further (see
- # https://github.com/rg3/youtube-dl/issues/16867).
+ # https://github.com/ytdl-org/youtube-dl/issues/16867).
t = ''
in_template = False
for c in tmpl:
# @initialization is a regular template like @media one
# so it should be handled just the same way (see
- # https://github.com/rg3/youtube-dl/issues/11605)
+ # https://github.com/ytdl-org/youtube-dl/issues/11605)
if 'initialization' in representation_ms_info:
initialization_template = prepare_template(
'initialization',
elif 'segment_urls' in representation_ms_info:
# Segment URLs with no SegmentTimeline
# Example: https://www.seznam.cz/zpravy/clanek/cesko-zasahne-vitr-o-sile-vichrice-muze-byt-i-zivotu-nebezpecny-39091
- # https://github.com/rg3/youtube-dl/pull/14844
+ # https://github.com/ytdl-org/youtube-dl/pull/14844
fragments = []
segment_duration = float_or_none(
representation_ms_info['segment_duration'],
fragment['duration'] = segment_duration
fragments.append(fragment)
representation_ms_info['fragments'] = fragments
- # NB: MPD manifest may contain direct URLs to unfragmented media.
- # No fragments key is present in this case.
+ # If there is a fragments key available then we correctly recognized fragmented media.
+ # Otherwise we will assume unfragmented media with direct access. Technically, such
+ # assumption is not necessarily correct since we may simply have no support for
+ # some forms of fragmented media renditions yet, but for now we'll use this fallback.
if 'fragments' in representation_ms_info:
f.update({
+ # NB: mpd_url may be empty when MPD manifest is parsed from a string
+ 'url': mpd_url or base_url,
'fragment_base_url': base_url,
'fragments': [],
'protocol': 'http_dash_segments',
f['url'] = initialization_url
f['fragments'].append({location_key(initialization_url): initialization_url})
f['fragments'].extend(representation_ms_info['fragments'])
+ else:
+ # Assuming direct URL to unfragmented media.
+ f['url'] = base_url
+
# According to [1, 5.3.5.2, Table 7, page 35] @id of Representation
# is not necessarily unique within a Period thus formats with
# the same `format_id` are quite possible. There are numerous examples
- # of such manifests (see https://github.com/rg3/youtube-dl/issues/15111,
- # https://github.com/rg3/youtube-dl/issues/13919)
+ # of such manifests (see https://github.com/ytdl-org/youtube-dl/issues/15111,
+ # https://github.com/ytdl-org/youtube-dl/issues/13919)
full_info = formats_dict.get(representation_id, {}).copy()
full_info.update(f)
formats.append(full_info)
media_tags.extend(re.findall(
# We only allow video|audio followed by a whitespace or '>'.
# Allowing more characters may end up in significant slow down (see
- # https://github.com/rg3/youtube-dl/issues/11979, example URL:
+ # https://github.com/ytdl-org/youtube-dl/issues/11979, example URL:
# http://www.porntrex.com/maps/videositemap.xml).
r'(?s)(<(?P<tag>(?:amp-)?(?:video|audio))(?:\s+[^>]*)?>)(.*?)</(?P=tag)>', webpage))
for media_tag, media_type, media_content in media_tags:
'subtitles': {},
}
media_attributes = extract_attributes(media_tag)
- src = media_attributes.get('src')
+ src = strip_or_none(media_attributes.get('src'))
if src:
_, formats = _media_formats(src, media_type)
media_info['formats'].extend(formats)
media_info['thumbnail'] = absolute_url(media_attributes.get('poster'))
if media_content:
for source_tag in re.findall(r'<source[^>]+>', media_content):
- source_attributes = extract_attributes(source_tag)
- src = source_attributes.get('src')
+ s_attr = extract_attributes(source_tag)
+ # data-video-src and data-src are non standard but seen
+ # several times in the wild
+ src = strip_or_none(dict_get(s_attr, ('src', 'data-video-src', 'data-src')))
if not src:
continue
- f = parse_content_type(source_attributes.get('type'))
+ f = parse_content_type(s_attr.get('type'))
is_plain_url, formats = _media_formats(src, media_type, f)
if is_plain_url:
- # res attribute is not standard but seen several times
- # in the wild
+ # width, height, res, label and title attributes are
+ # all not standard but seen several times in the wild
+ labels = [
+ s_attr.get(lbl)
+ for lbl in ('label', 'title')
+ if str_or_none(s_attr.get(lbl))
+ ]
+ width = int_or_none(s_attr.get('width'))
+ height = (int_or_none(s_attr.get('height'))
+ or int_or_none(s_attr.get('res')))
+ if not width or not height:
+ for lbl in labels:
+ resolution = parse_resolution(lbl)
+ if not resolution:
+ continue
+ width = width or resolution.get('width')
+ height = height or resolution.get('height')
+ for lbl in labels:
+ tbr = parse_bitrate(lbl)
+ if tbr:
+ break
+ else:
+ tbr = None
f.update({
- 'height': int_or_none(source_attributes.get('res')),
- 'format_id': source_attributes.get('label'),
+ 'width': width,
+ 'height': height,
+ 'tbr': tbr,
+ 'format_id': s_attr.get('label') or s_attr.get('title'),
})
f.update(formats[0])
media_info['formats'].append(f)
track_attributes = extract_attributes(track_tag)
kind = track_attributes.get('kind')
if not kind or kind in ('subtitles', 'captions'):
- src = track_attributes.get('src')
+ src = strip_or_none(track_attributes.get('src'))
if not src:
continue
lang = track_attributes.get('srclang') or track_attributes.get('lang') or track_attributes.get('label')
'id': this_video_id,
'title': unescapeHTML(video_data['title'] if require_title else video_data.get('title')),
'description': video_data.get('description'),
- 'thumbnail': self._proto_relative_url(video_data.get('image')),
+ 'thumbnail': urljoin(base_url, self._proto_relative_url(video_data.get('image'))),
'timestamp': int_or_none(video_data.get('pubdate')),
'duration': float_or_none(jwplayer_data.get('duration') or video_data.get('duration')),
'subtitles': subtitles,
for source in jwplayer_sources_data:
if not isinstance(source, dict):
continue
- source_url = self._proto_relative_url(source.get('file'))
- if not source_url:
- continue
- if base_url:
- source_url = compat_urlparse.urljoin(base_url, source_url)
- if source_url in urls:
+ source_url = urljoin(
+ base_url, self._proto_relative_url(source.get('file')))
+ if not source_url or source_url in urls:
continue
urls.append(source_url)
source_type = source.get('type') or ''
self._downloader.cookiejar.add_cookie_header(req)
return compat_cookies.SimpleCookie(req.get_header('Cookie'))
+ def _apply_first_set_cookie_header(self, url_handle, cookie):
+ """
+ Apply first Set-Cookie header instead of the last. Experimental.
+
+ Some sites (e.g. [1-3]) may serve two cookies under the same name
+ in Set-Cookie header and expect the first (old) one to be set rather
+ than second (new). However, as of RFC6265 the newer one cookie
+ should be set into cookie store what actually happens.
+ We will workaround this issue by resetting the cookie to
+ the first one manually.
+ 1. https://new.vk.com/
+ 2. https://github.com/ytdl-org/youtube-dl/issues/9841#issuecomment-227871201
+ 3. https://learning.oreilly.com/
+ """
+ for header, cookies in url_handle.headers.items():
+ if header.lower() != 'set-cookie':
+ continue
+ if sys.version_info[0] >= 3:
+ cookies = cookies.encode('iso-8859-1')
+ cookies = cookies.decode('utf-8')
+ cookie_value = re.search(
+ r'%s=(.+?);.*?\b[Dd]omain=(.+?)(?:[,;]|$)' % cookie, cookies)
+ if cookie_value:
+ value, domain = cookie_value.groups()
+ self._set_cookie(domain, cookie, value)
+ break
+
def get_testcases(self, include_onlymatching=False):
t = getattr(self, '_TEST', None)
if t:
return not any_restricted
def extract_subtitles(self, *args, **kwargs):
- if (self._downloader.params.get('writesubtitles', False) or
- self._downloader.params.get('listsubtitles')):
+ if (self._downloader.params.get('writesubtitles', False)
+ or self._downloader.params.get('listsubtitles')):
return self._get_subtitles(*args, **kwargs)
return {}
return ret
def extract_automatic_captions(self, *args, **kwargs):
- if (self._downloader.params.get('writeautomaticsub', False) or
- self._downloader.params.get('listsubtitles')):
+ if (self._downloader.params.get('writeautomaticsub', False)
+ or self._downloader.params.get('listsubtitles')):
return self._get_automatic_captions(*args, **kwargs)
return {}
raise NotImplementedError('This method must be implemented by subclasses')
def mark_watched(self, *args, **kwargs):
- if (self._downloader.params.get('mark_watched', False) and
- (self._get_login_info()[0] is not None or
- self._downloader.params.get('cookiefile') is not None)):
+ if (self._downloader.params.get('mark_watched', False)
+ and (self._get_login_info()[0] is not None
+ or self._downloader.params.get('cookiefile') is not None)):
self._mark_watched(*args, **kwargs)
def _mark_watched(self, *args, **kwargs):
class UnicodeBOMIE(InfoExtractor):
- IE_DESC = False
- _VALID_URL = r'(?P<bom>\ufeff)(?P<id>.*)$'
-
- # Disable test for python 3.2 since BOM is broken in re in this version
- # (see https://github.com/rg3/youtube-dl/issues/9751)
- _TESTS = [] if (3, 0) < sys.version_info <= (3, 3) else [{
- 'url': '\ufeffhttp://www.youtube.com/watch?v=BaW_jenozKc',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- real_url = self._match_id(url)
- self.report_warning(
- 'Your URL starts with a Byte Order Mark (BOM). '
- 'Removing the BOM and looking for "%s" ...' % real_url)
- return self.url_result(real_url)
+ IE_DESC = False
+ _VALID_URL = r'(?P<bom>\ufeff)(?P<id>.*)$'
+
+ # Disable test for python 3.2 since BOM is broken in re in this version
+ # (see https://github.com/ytdl-org/youtube-dl/issues/9751)
+ _TESTS = [] if (3, 0) < sys.version_info <= (3, 3) else [{
+ 'url': '\ufeffhttp://www.youtube.com/watch?v=BaW_jenozKc',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ real_url = self._match_id(url)
+ self.report_warning(
+ 'Your URL starts with a Byte Order Mark (BOM). '
+ 'Removing the BOM and looking for "%s" ...' % real_url)
+ return self.url_result(real_url)
(?:www\.)?
(?P<domain>
(?:globaltv|etcanada)\.com|
- (?:hgtv|foodnetwork|slice|history|showcase)\.ca
+ (?:hgtv|foodnetwork|slice|history|showcase|bigbrothercanada)\.ca
)
- /(?:video/|(?:[^/]+/)+(?:videos/[a-z0-9-]+-|video\.html\?.*?\bv=))
+ /(?:video/(?:[^/]+/)?|(?:[^/]+/)+(?:videos/[a-z0-9-]+-|video\.html\?.*?\bv=))
(?P<id>\d+)
'''
_TESTS = [{
}, {
'url': 'http://www.showcase.ca/eyewitness/video/eyewitness++106/video.html?v=955070531919&p=1&s=da#video',
'only_matching': True,
+ }, {
+ 'url': 'http://www.bigbrothercanada.ca/video/1457812035894/',
+ 'only_matching': True
+ }, {
+ 'url': 'https://www.bigbrothercanada.ca/video/big-brother-canada-704/1457812035894/',
+ 'only_matching': True
}]
_TP_FEEDS = {
'feed_id': '9H6qyshBZU3E',
'account_id': 2414426607,
},
+ 'bigbrothercanada': {
+ 'feed_id': 'ChQqrem0lNUp',
+ 'account_id': 2269680845,
+ },
}
def _real_extract(self, url):
# coding: utf-8
from __future__ import unicode_literals, division
+import hashlib
+import hmac
import re
+import time
from .common import InfoExtractor
from ..compat import compat_HTTPError
for country in countries:
try:
+ # Authorization generation algorithm is reverse engineered from:
+ # https://www.sonycrackle.com/static/js/main.ea93451f.chunk.js
+ media_detail_url = 'https://web-api-us.crackle.com/Service.svc/details/media/%s/%s?disableProtocols=true' % (video_id, country)
+ timestamp = time.strftime('%Y%m%d%H%M', time.gmtime())
+ h = hmac.new(b'IGSLUQCBDFHEOIFM', '|'.join([media_detail_url, timestamp]).encode(), hashlib.sha1).hexdigest().upper()
media = self._download_json(
- 'https://web-api-us.crackle.com/Service.svc/details/media/%s/%s'
- % (video_id, country), video_id,
- 'Downloading media JSON as %s' % country,
- 'Unable to download media JSON', query={
- 'disableProtocols': 'true',
- 'format': 'json'
+ media_detail_url, video_id, 'Downloading media JSON as %s' % country,
+ 'Unable to download media JSON', headers={
+ 'Accept': 'application/json',
+ 'Authorization': '|'.join([h, timestamp, '117', '1']),
})
except ExtractorError as e:
# 401 means geo restriction, trying next country
+++ /dev/null
-# coding: utf-8
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-
-
-class CriterionIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?criterion\.com/films/(?P<id>[0-9]+)-.+'
- _TEST = {
- 'url': 'http://www.criterion.com/films/184-le-samourai',
- 'md5': 'bc51beba55685509883a9a7830919ec3',
- 'info_dict': {
- 'id': '184',
- 'ext': 'mp4',
- 'title': 'Le Samouraï',
- 'description': 'md5:a2b4b116326558149bef81f76dcbb93f',
- 'thumbnail': r're:^https?://.*\.jpg$',
- }
- }
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
-
- final_url = self._search_regex(
- r'so\.addVariable\("videoURL", "(.+?)"\)\;', webpage, 'video url')
- title = self._og_search_title(webpage)
- description = self._html_search_meta('description', webpage)
- thumbnail = self._search_regex(
- r'so\.addVariable\("thumbnailURL", "(.+?)"\)\;',
- webpage, 'thumbnail url')
-
- return {
- 'id': video_id,
- 'url': final_url,
- 'title': title,
- 'description': description,
- 'thumbnail': thumbnail,
- }
import re
import json
-import xml.etree.ElementTree as etree
import zlib
from hashlib import sha1
from .vrv import VRVIE
from ..compat import (
compat_b64decode,
+ compat_etree_Element,
compat_etree_fromstring,
compat_urllib_parse_urlencode,
compat_urllib_request,
if username is None:
return
- self._download_webpage(
- 'https://www.crunchyroll.com/?a=formhandler',
- None, 'Logging in', 'Wrong login info',
- data=urlencode_postdata({
- 'formname': 'RpcApiUser_Login',
- 'next_url': 'https://www.crunchyroll.com/acct/membership',
- 'name': username,
- 'password': password,
- }))
-
- '''
login_page = self._download_webpage(
self._LOGIN_URL, None, 'Downloading login page')
def is_logged(webpage):
- return '<title>Redirecting' in webpage
+ return 'href="/logout"' in webpage
# Already logged in
if is_logged(login_page):
raise ExtractorError('Unable to login: %s' % error, expected=True)
raise ExtractorError('Unable to log in')
- '''
def _real_initialize(self):
self._login()
request = (url_or_request if isinstance(url_or_request, compat_urllib_request.Request)
else sanitized_Request(url_or_request))
# Accept-Language must be set explicitly to accept any language to avoid issues
- # similar to https://github.com/rg3/youtube-dl/issues/6797.
+ # similar to https://github.com/ytdl-org/youtube-dl/issues/6797.
# Along with IP address Crunchyroll uses Accept-Language to guess whether georestriction
# should be imposed or not (from what I can see it just takes the first language
# ignoring the priority and requires it to correspond the IP). By the way this causes
# > This content may be inappropriate for some people.
# > Are you sure you want to continue?
# since it's not disabled by default in crunchyroll account's settings.
- # See https://github.com/rg3/youtube-dl/issues/7202.
+ # See https://github.com/ytdl-org/youtube-dl/issues/7202.
qs['skip_wall'] = ['1']
return compat_urlparse.urlunparse(
parsed_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
class CrunchyrollIE(CrunchyrollBaseIE, VRVIE):
IE_NAME = 'crunchyroll'
- _VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.(?:com|fr)/(?:media(?:-|/\?id=)|[^/]*/[^/?&]*?)(?P<video_id>[0-9]+))(?:[/?&]|$)'
+ _VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.(?:com|fr)/(?:media(?:-|/\?id=)|(?:[^/]*/){1,2}[^/?&]*?)(?P<video_id>[0-9]+))(?:[/?&]|$)'
_TESTS = [{
'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513',
'info_dict': {
}, {
'url': 'http://www.crunchyroll.com/media-723735',
'only_matching': True,
+ }, {
+ 'url': 'https://www.crunchyroll.com/en-gb/mob-psycho-100/episode-2-urban-legends-encountering-rumors-780921',
+ 'only_matching': True,
}]
_FORMAT_IDS = {
'Downloading subtitles for ' + sub_name, data={
'subtitle_script_id': sub_id,
})
- if not isinstance(sub_doc, etree.Element):
+ if not isinstance(sub_doc, compat_etree_Element):
continue
sid = sub_doc.get('id')
iv = xpath_text(sub_doc, 'iv', 'subtitle iv')
'video_quality': stream_quality,
'current_page': url,
})
- if isinstance(streamdata, etree.Element):
+ if isinstance(streamdata, compat_etree_Element):
stream_info = streamdata.find('./{default}preload/stream_info')
if stream_info is not None:
stream_infos.append(stream_info)
'video_format': stream_format,
'video_encode_quality': stream_quality,
})
- if isinstance(stream_info, etree.Element):
+ if isinstance(stream_info, compat_etree_Element):
stream_infos.append(stream_info)
for stream_info in stream_infos:
video_encode_id = xpath_text(stream_info, './video_encode_id')
season = episode = episode_number = duration = thumbnail = None
- if isinstance(metadata, etree.Element):
+ if isinstance(metadata, compat_etree_Element):
season = xpath_text(metadata, 'series_title')
episode = xpath_text(metadata, 'episode_title')
episode_number = int_or_none(xpath_text(metadata, 'episode_number'))
season = str_or_none(video_data.get('season'))
episode = str_or_none(video_data.get('episode'))
if episode and season:
- episode = episode.lstrip(season)
+ episode = episode[len(season):]
return {
'_type': 'url_transparent',
sources_url = (try_get(
video_data,
(lambda x: x['plugins']['sources']['url'],
- lambda x: x['sources']['url']), compat_str) or
- 'http://www.dailymail.co.uk/api/player/%s/video-sources.json' % video_id)
+ lambda x: x['sources']['url']), compat_str)
+ or 'http://www.dailymail.co.uk/api/player/%s/video-sources.json' % video_id)
video_sources = self._download_json(sources_url, video_id)
body = video_sources.get('body')
webpage, 'comment count', default=None))
player_v5 = self._search_regex(
- [r'buildPlayer\(({.+?})\);\n', # See https://github.com/rg3/youtube-dl/issues/7826
+ [r'buildPlayer\(({.+?})\);\n', # See https://github.com/ytdl-org/youtube-dl/issues/7826
r'playerV5\s*=\s*dmp\.create\([^,]+?,\s*({.+?})\);',
r'buildPlayer\(({.+?})\);',
r'var\s+config\s*=\s*({.+?});',
- # New layout regex (see https://github.com/rg3/youtube-dl/issues/13580)
+ # New layout regex (see https://github.com/ytdl-org/youtube-dl/issues/13580)
r'__PLAYER_CONFIG__\s*=\s*({.+?});'],
webpage, 'player v5', default=None)
if player_v5:
endpoint = next(
server['endpoint']
for server in servers
- if url_or_none(server.get('endpoint')) and
- 'cloudfront' in server['endpoint'])
+ if url_or_none(server.get('endpoint'))
+ and 'cloudfront' in server['endpoint'])
else:
endpoint = 'rtmpe://s2pqqn4u96e4j8.cloudfront.net/cfx/st/'
stream_name = xpath_text(a_format, 'streamName', fatal=True)
video_path = re.match(r'mp4\:(?P<path>.*)', stream_name).group('path')
url = video_root + video_path
- vbr = xpath_text(a_format, 'bitrate')
+ bitrate = xpath_text(a_format, 'bitrate')
+ tbr = int_or_none(bitrate)
+ vbr = int_or_none(self._search_regex(
+ r'-(\d+)\.mp4', video_path, 'vbr', default=None))
+ abr = tbr - vbr if tbr and vbr else None
video_formats.append({
+ 'format_id': bitrate,
'url': url,
- 'vbr': int_or_none(vbr),
+ 'tbr': tbr,
+ 'vbr': vbr,
+ 'abr': abr,
})
return video_formats
+++ /dev/null
-# coding: utf-8
-from __future__ import unicode_literals
-
-import itertools
-import json
-
-from .common import InfoExtractor
-from ..compat import (
- compat_HTTPError,
- compat_urlparse,
-)
-from ..utils import (
- clean_html,
- ExtractorError,
- int_or_none,
- parse_age_limit,
- parse_duration,
- unified_timestamp,
- url_or_none,
-)
-
-
-class DramaFeverBaseIE(InfoExtractor):
- _NETRC_MACHINE = 'dramafever'
-
- _CONSUMER_SECRET = 'DA59dtVXYLxajktV'
-
- _consumer_secret = None
-
- def _get_consumer_secret(self):
- mainjs = self._download_webpage(
- 'http://www.dramafever.com/static/51afe95/df2014/scripts/main.js',
- None, 'Downloading main.js', fatal=False)
- if not mainjs:
- return self._CONSUMER_SECRET
- return self._search_regex(
- r"var\s+cs\s*=\s*'([^']+)'", mainjs,
- 'consumer secret', default=self._CONSUMER_SECRET)
-
- def _real_initialize(self):
- self._consumer_secret = self._get_consumer_secret()
- self._login()
-
- def _login(self):
- username, password = self._get_login_info()
- if username is None:
- return
-
- login_form = {
- 'username': username,
- 'password': password,
- }
-
- try:
- response = self._download_json(
- 'https://www.dramafever.com/api/users/login', None, 'Logging in',
- data=json.dumps(login_form).encode('utf-8'), headers={
- 'x-consumer-key': self._consumer_secret,
- })
- except ExtractorError as e:
- if isinstance(e.cause, compat_HTTPError) and e.cause.code in (403, 404):
- response = self._parse_json(
- e.cause.read().decode('utf-8'), None)
- else:
- raise
-
- # Successful login
- if response.get('result') or response.get('guid') or response.get('user_guid'):
- return
-
- errors = response.get('errors')
- if errors and isinstance(errors, list):
- error = errors[0]
- message = error.get('message') or error['reason']
- raise ExtractorError('Unable to login: %s' % message, expected=True)
- raise ExtractorError('Unable to log in')
-
-
-class DramaFeverIE(DramaFeverBaseIE):
- IE_NAME = 'dramafever'
- _VALID_URL = r'https?://(?:www\.)?dramafever\.com/(?:[^/]+/)?drama/(?P<id>[0-9]+/[0-9]+)(?:/|$)'
- _TESTS = [{
- 'url': 'https://www.dramafever.com/drama/4274/1/Heirs/',
- 'info_dict': {
- 'id': '4274.1',
- 'ext': 'wvm',
- 'title': 'Heirs - Episode 1',
- 'description': 'md5:362a24ba18209f6276e032a651c50bc2',
- 'thumbnail': r're:^https?://.*\.jpg',
- 'duration': 3783,
- 'timestamp': 1381354993,
- 'upload_date': '20131009',
- 'series': 'Heirs',
- 'season_number': 1,
- 'episode': 'Episode 1',
- 'episode_number': 1,
- },
- 'params': {
- # m3u8 download
- 'skip_download': True,
- },
- }, {
- 'url': 'http://www.dramafever.com/drama/4826/4/Mnet_Asian_Music_Awards_2015/?ap=1',
- 'info_dict': {
- 'id': '4826.4',
- 'ext': 'flv',
- 'title': 'Mnet Asian Music Awards 2015',
- 'description': 'md5:3ff2ee8fedaef86e076791c909cf2e91',
- 'episode': 'Mnet Asian Music Awards 2015 - Part 3',
- 'episode_number': 4,
- 'thumbnail': r're:^https?://.*\.jpg',
- 'timestamp': 1450213200,
- 'upload_date': '20151215',
- 'duration': 5359,
- },
- 'params': {
- # m3u8 download
- 'skip_download': True,
- },
- }, {
- 'url': 'https://www.dramafever.com/zh-cn/drama/4972/15/Doctor_Romantic/',
- 'only_matching': True,
- }]
-
- def _call_api(self, path, video_id, note, fatal=False):
- return self._download_json(
- 'https://www.dramafever.com/api/5/' + path,
- video_id, note=note, headers={
- 'x-consumer-key': self._consumer_secret,
- }, fatal=fatal)
-
- def _get_subtitles(self, video_id):
- subtitles = {}
- subs = self._call_api(
- 'video/%s/subtitles/webvtt/' % video_id, video_id,
- 'Downloading subtitles JSON', fatal=False)
- if not subs or not isinstance(subs, list):
- return subtitles
- for sub in subs:
- if not isinstance(sub, dict):
- continue
- sub_url = url_or_none(sub.get('url'))
- if not sub_url:
- continue
- subtitles.setdefault(
- sub.get('code') or sub.get('language') or 'en', []).append({
- 'url': sub_url
- })
- return subtitles
-
- def _real_extract(self, url):
- video_id = self._match_id(url).replace('/', '.')
-
- series_id, episode_number = video_id.split('.')
-
- video = self._call_api(
- 'series/%s/episodes/%s/' % (series_id, episode_number), video_id,
- 'Downloading video JSON')
-
- formats = []
- download_assets = video.get('download_assets')
- if download_assets and isinstance(download_assets, dict):
- for format_id, format_dict in download_assets.items():
- if not isinstance(format_dict, dict):
- continue
- format_url = url_or_none(format_dict.get('url'))
- if not format_url:
- continue
- formats.append({
- 'url': format_url,
- 'format_id': format_id,
- 'filesize': int_or_none(video.get('filesize')),
- })
-
- stream = self._call_api(
- 'video/%s/stream/' % video_id, video_id, 'Downloading stream JSON',
- fatal=False)
- if stream:
- stream_url = stream.get('stream_url')
- if stream_url:
- formats.extend(self._extract_m3u8_formats(
- stream_url, video_id, 'mp4', entry_protocol='m3u8_native',
- m3u8_id='hls', fatal=False))
- self._sort_formats(formats)
-
- title = video.get('title') or 'Episode %s' % episode_number
- description = video.get('description')
- thumbnail = video.get('thumbnail')
- timestamp = unified_timestamp(video.get('release_date'))
- duration = parse_duration(video.get('duration'))
- age_limit = parse_age_limit(video.get('tv_rating'))
- series = video.get('series_title')
- season_number = int_or_none(video.get('season'))
-
- if series:
- title = '%s - %s' % (series, title)
-
- subtitles = self.extract_subtitles(video_id)
-
- return {
- 'id': video_id,
- 'title': title,
- 'description': description,
- 'thumbnail': thumbnail,
- 'duration': duration,
- 'timestamp': timestamp,
- 'age_limit': age_limit,
- 'series': series,
- 'season_number': season_number,
- 'episode_number': int_or_none(episode_number),
- 'formats': formats,
- 'subtitles': subtitles,
- }
-
-
-class DramaFeverSeriesIE(DramaFeverBaseIE):
- IE_NAME = 'dramafever:series'
- _VALID_URL = r'https?://(?:www\.)?dramafever\.com/(?:[^/]+/)?drama/(?P<id>[0-9]+)(?:/(?:(?!\d+(?:/|$)).+)?)?$'
- _TESTS = [{
- 'url': 'http://www.dramafever.com/drama/4512/Cooking_with_Shin/',
- 'info_dict': {
- 'id': '4512',
- 'title': 'Cooking with Shin',
- 'description': 'md5:84a3f26e3cdc3fb7f500211b3593b5c1',
- },
- 'playlist_count': 4,
- }, {
- 'url': 'http://www.dramafever.com/drama/124/IRIS/',
- 'info_dict': {
- 'id': '124',
- 'title': 'IRIS',
- 'description': 'md5:b3a30e587cf20c59bd1c01ec0ee1b862',
- },
- 'playlist_count': 20,
- }]
-
- _PAGE_SIZE = 60 # max is 60 (see http://api.drama9.com/#get--api-4-episode-series-)
-
- def _real_extract(self, url):
- series_id = self._match_id(url)
-
- series = self._download_json(
- 'http://www.dramafever.com/api/4/series/query/?cs=%s&series_id=%s'
- % (self._consumer_secret, series_id),
- series_id, 'Downloading series JSON')['series'][series_id]
-
- title = clean_html(series['name'])
- description = clean_html(series.get('description') or series.get('description_short'))
-
- entries = []
- for page_num in itertools.count(1):
- episodes = self._download_json(
- 'http://www.dramafever.com/api/4/episode/series/?cs=%s&series_id=%s&page_size=%d&page_number=%d'
- % (self._consumer_secret, series_id, self._PAGE_SIZE, page_num),
- series_id, 'Downloading episodes JSON page #%d' % page_num)
- for episode in episodes.get('value', []):
- episode_url = episode.get('episode_url')
- if not episode_url:
- continue
- entries.append(self.url_result(
- compat_urlparse.urljoin(url, episode_url),
- 'DramaFever', episode.get('guid')))
- if page_num == episodes['num_pages']:
- break
-
- return self.playlist_result(entries, series_id, title, description)
video_url, video_id, fatal=False))
elif ext == 'm3u8':
# the certificates are misconfigured (see
- # https://github.com/rg3/youtube-dl/issues/8665)
+ # https://github.com/ytdl-org/youtube-dl/issues/8665)
if video_url.startswith('https://'):
continue
formats.extend(self._extract_m3u8_formats(
from .common import InfoExtractor
from ..utils import (
+ int_or_none,
NO_DEFAULT,
+ parse_duration,
str_to_int,
)
})
self._sort_formats(formats)
+ duration = int_or_none(video_data.get('duration')) or parse_duration(
+ video_data.get('duration_format'))
+
title = self._html_search_regex(
(r'<h1[^>]+class=["\']title[^>]+>([^<]+)',
r'<title>([^<]+)\s*@\s+DrTuber',
'comment_count': comment_count,
'categories': categories,
'age_limit': self._rta_search(webpage),
+ 'duration': duration,
}
# coding: utf-8
from __future__ import unicode_literals
+import binascii
+import hashlib
+import re
+
+
from .common import InfoExtractor
+from ..aes import aes_cbc_decrypt
+from ..compat import compat_urllib_parse_unquote
from ..utils import (
+ bytes_to_intlist,
ExtractorError,
int_or_none,
+ intlist_to_bytes,
float_or_none,
mimetype2ext,
- parse_iso8601,
- remove_end,
+ str_or_none,
+ unified_timestamp,
update_url_query,
+ url_or_none,
)
IE_NAME = 'drtv'
_TESTS = [{
'url': 'https://www.dr.dk/tv/se/boern/ultra/klassen-ultra/klassen-darlig-taber-10',
- 'md5': '7ae17b4e18eb5d29212f424a7511c184',
+ 'md5': '25e659cccc9a2ed956110a299fdf5983',
'info_dict': {
'id': 'klassen-darlig-taber-10',
'ext': 'mp4',
'title': 'Klassen - Dårlig taber (10)',
'description': 'md5:815fe1b7fa656ed80580f31e8b3c79aa',
- 'timestamp': 1471991907,
- 'upload_date': '20160823',
+ 'timestamp': 1539085800,
+ 'upload_date': '20181009',
'duration': 606.84,
+ 'series': 'Klassen',
+ 'season': 'Klassen I',
+ 'season_number': 1,
+ 'season_id': 'urn:dr:mu:bundle:57d7e8216187a4031cfd6f6b',
+ 'episode': 'Episode 10',
+ 'episode_number': 10,
+ 'release_year': 2016,
},
+ 'expected_warnings': ['Unable to download f4m manifest'],
}, {
# embed
'url': 'https://www.dr.dk/nyheder/indland/live-christianias-rydning-af-pusher-street-er-i-gang',
'info_dict': {
- 'id': 'christiania-pusher-street-ryddes-drdkrjpo',
+ 'id': 'urn:dr:mu:programcard:57c926176187a50a9c6e83c6',
'ext': 'mp4',
- 'title': 'LIVE Christianias rydning af Pusher Street er i gang',
+ 'title': 'christiania pusher street ryddes drdkrjpo',
'description': 'md5:2a71898b15057e9b97334f61d04e6eb5',
'timestamp': 1472800279,
'upload_date': '20160902',
'params': {
'skip_download': True,
},
+ 'expected_warnings': ['Unable to download f4m manifest'],
}, {
# with SignLanguage formats
'url': 'https://www.dr.dk/tv/se/historien-om-danmark/-/historien-om-danmark-stenalder',
'info_dict': {
'id': 'historien-om-danmark-stenalder',
'ext': 'mp4',
- 'title': 'Historien om Danmark: Stenalder (1)',
+ 'title': 'Historien om Danmark: Stenalder',
'description': 'md5:8c66dcbc1669bbc6f873879880f37f2a',
- 'timestamp': 1490401996,
- 'upload_date': '20170325',
- 'duration': 3502.04,
+ 'timestamp': 1546628400,
+ 'upload_date': '20190104',
+ 'duration': 3502.56,
'formats': 'mincount:20',
},
'params': {
video_id = self._search_regex(
(r'data-(?:material-identifier|episode-slug)="([^"]+)"',
- r'data-resource="[^>"]+mu/programcard/expanded/([^"]+)"'),
- webpage, 'video id')
+ r'data-resource="[^>"]+mu/programcard/expanded/([^"]+)"'),
+ webpage, 'video id', default=None)
- programcard = self._download_json(
- 'http://www.dr.dk/mu/programcard/expanded/%s' % video_id,
- video_id, 'Downloading video JSON')
- data = programcard['Data'][0]
+ if not video_id:
+ video_id = compat_urllib_parse_unquote(self._search_regex(
+ r'(urn(?:%3A|:)dr(?:%3A|:)mu(?:%3A|:)programcard(?:%3A|:)[\da-f]+)',
+ webpage, 'urn'))
- title = remove_end(self._og_search_title(
- webpage, default=None), ' | TV | DR') or data['Title']
+ data = self._download_json(
+ 'https://www.dr.dk/mu-online/api/1.4/programcard/%s' % video_id,
+ video_id, 'Downloading video JSON', query={'expanded': 'true'})
+
+ title = str_or_none(data.get('Title')) or re.sub(
+ r'\s*\|\s*(?:TV\s*\|\s*DR|DRTV)$', '',
+ self._og_search_title(webpage))
description = self._og_search_description(
webpage, default=None) or data.get('Description')
- timestamp = parse_iso8601(data.get('CreatedTime'))
+ timestamp = unified_timestamp(
+ data.get('PrimaryBroadcastStartTime') or data.get('SortDateTime'))
thumbnail = None
duration = None
formats = []
subtitles = {}
- for asset in data['Assets']:
+ assets = []
+ primary_asset = data.get('PrimaryAsset')
+ if isinstance(primary_asset, dict):
+ assets.append(primary_asset)
+ secondary_assets = data.get('SecondaryAssets')
+ if isinstance(secondary_assets, list):
+ for secondary_asset in secondary_assets:
+ if isinstance(secondary_asset, dict):
+ assets.append(secondary_asset)
+
+ def hex_to_bytes(hex):
+ return binascii.a2b_hex(hex.encode('ascii'))
+
+ def decrypt_uri(e):
+ n = int(e[2:10], 16)
+ a = e[10 + n:]
+ data = bytes_to_intlist(hex_to_bytes(e[10:10 + n]))
+ key = bytes_to_intlist(hashlib.sha256(
+ ('%s:sRBzYNXBzkKgnjj8pGtkACch' % a).encode('utf-8')).digest())
+ iv = bytes_to_intlist(hex_to_bytes(a))
+ decrypted = aes_cbc_decrypt(data, key, iv)
+ return intlist_to_bytes(
+ decrypted[:-decrypted[-1]]).decode('utf-8').split('?')[0]
+
+ for asset in assets:
kind = asset.get('Kind')
if kind == 'Image':
- thumbnail = asset.get('Uri')
+ thumbnail = url_or_none(asset.get('Uri'))
elif kind in ('VideoResource', 'AudioResource'):
duration = float_or_none(asset.get('DurationInMilliseconds'), 1000)
restricted_to_denmark = asset.get('RestrictedToDenmark')
asset_target = asset.get('Target')
for link in asset.get('Links', []):
uri = link.get('Uri')
+ if not uri:
+ encrypted_uri = link.get('EncryptedUri')
+ if not encrypted_uri:
+ continue
+ try:
+ uri = decrypt_uri(encrypted_uri)
+ except Exception:
+ self.report_warning(
+ 'Unable to decrypt EncryptedUri', video_id)
+ continue
+ uri = url_or_none(uri)
if not uri:
continue
target = link.get('Target')
format_id = target or ''
- preference = None
- if asset_target in ('SpokenSubtitles', 'SignLanguage'):
+ if asset_target in ('SpokenSubtitles', 'SignLanguage', 'VisuallyInterpreted'):
preference = -1
format_id += '-%s' % asset_target
+ elif asset_target == 'Default':
+ preference = 1
+ else:
+ preference = None
if target == 'HDS':
f4m_formats = self._extract_f4m_formats(
uri + '?hdcore=3.3.0&plugin=aasp-3.3.0.99.43',
'vcodec': 'none' if kind == 'AudioResource' else None,
'preference': preference,
})
- subtitles_list = asset.get('SubtitlesList')
- if isinstance(subtitles_list, list):
- LANGS = {
- 'Danish': 'da',
- }
- for subs in subtitles_list:
- if not subs.get('Uri'):
- continue
- lang = subs.get('Language') or 'da'
- subtitles.setdefault(LANGS.get(lang, lang), []).append({
- 'url': subs['Uri'],
- 'ext': mimetype2ext(subs.get('MimeType')) or 'vtt'
- })
+ subtitles_list = asset.get('SubtitlesList') or asset.get('Subtitleslist')
+ if isinstance(subtitles_list, list):
+ LANGS = {
+ 'Danish': 'da',
+ }
+ for subs in subtitles_list:
+ if not isinstance(subs, dict):
+ continue
+ sub_uri = url_or_none(subs.get('Uri'))
+ if not sub_uri:
+ continue
+ lang = subs.get('Language') or 'da'
+ subtitles.setdefault(LANGS.get(lang, lang), []).append({
+ 'url': sub_uri,
+ 'ext': mimetype2ext(subs.get('MimeType')) or 'vtt'
+ })
if not formats and restricted_to_denmark:
self.raise_geo_restricted(
'duration': duration,
'formats': formats,
'subtitles': subtitles,
+ 'series': str_or_none(data.get('SeriesTitle')),
+ 'season': str_or_none(data.get('SeasonTitle')),
+ 'season_number': int_or_none(data.get('SeasonNumber')),
+ 'season_id': str_or_none(data.get('SeasonUrn')),
+ 'episode': str_or_none(data.get('EpisodeTitle')),
+ 'episode_number': int_or_none(data.get('EpisodeNumber')),
+ 'release_year': int_or_none(data.get('ProductionYear')),
}
int_or_none,
js_to_json,
mimetype2ext,
+ try_get,
unescapeHTML,
+ parse_iso8601,
)
class DVTVIE(InfoExtractor):
IE_NAME = 'dvtv'
IE_DESC = 'http://video.aktualne.cz/'
-
_VALID_URL = r'https?://video\.aktualne\.cz/(?:[^/]+/)+r~(?P<id>[0-9a-f]{32})'
-
_TESTS = [{
'url': 'http://video.aktualne.cz/dvtv/vondra-o-ceskem-stoleti-pri-pohledu-na-havla-mi-bylo-trapne/r~e5efe9ca855511e4833a0025900fea04/',
'md5': '67cb83e4a955d36e1b5d31993134a0c2',
'ext': 'mp4',
'title': 'Vondra o Českém století: Při pohledu na Havla mi bylo trapně',
'duration': 1484,
+ 'upload_date': '20141217',
+ 'timestamp': 1418792400,
}
}, {
'url': 'http://video.aktualne.cz/dvtv/dvtv-16-12-2014-utok-talibanu-boj-o-kliniku-uprchlici/r~973eb3bc854e11e498be002590604f2e/',
'info_dict': {
- 'title': r're:^DVTV 16\. 12\. 2014: útok Talibanu, boj o kliniku, uprchlíci',
+ 'title': r'DVTV 16. 12. 2014: útok Talibanu, boj o kliniku, uprchlíci',
'id': '973eb3bc854e11e498be002590604f2e',
},
'playlist': [{
'ext': 'mp4',
'title': 'Zeman si jen léčí mindráky, Sobotku nenávidí a Babiš se mu teď hodí, tvrdí Kmenta',
'duration': 1103,
+ 'upload_date': '20170511',
+ 'timestamp': 1494514200,
},
'params': {
'skip_download': True,
}, {
'url': 'http://video.aktualne.cz/v-cechach-poprve-zazni-zelenkova-zrestaurovana-mse/r~45b4b00483ec11e4883b002590604f2e/',
'only_matching': True,
+ }, {
+ # Test live stream video (liveStarter) parsing
+ 'url': 'https://video.aktualne.cz/dvtv/zive-mistryne-sveta-eva-samkova-po-navratu-ze-sampionatu/r~182654c2288811e990fd0cc47ab5f122/',
+ 'md5': '2e552e483f2414851ca50467054f9d5d',
+ 'info_dict': {
+ 'id': '8d116360288011e98c840cc47ab5f122',
+ 'ext': 'mp4',
+ 'title': 'Živě: Mistryně světa Eva Samková po návratu ze šampionátu',
+ 'upload_date': '20190204',
+ 'timestamp': 1549289591,
+ },
+ 'params': {
+ # Video content is no longer available
+ 'skip_download': True,
+ },
}]
- def _parse_video_metadata(self, js, video_id, live_js=None):
+ def _parse_video_metadata(self, js, video_id, timestamp):
data = self._parse_json(js, video_id, transform_source=js_to_json)
- if live_js:
- data.update(self._parse_json(
- live_js, video_id, transform_source=js_to_json))
-
title = unescapeHTML(data['title'])
+ live_starter = try_get(data, lambda x: x['plugins']['liveStarter'], dict)
+ if live_starter:
+ data.update(live_starter)
+
formats = []
- for video in data['sources']:
- video_url = video.get('file')
- if not video_url:
- continue
- video_type = video.get('type')
- ext = determine_ext(video_url, mimetype2ext(video_type))
- if video_type == 'application/vnd.apple.mpegurl' or ext == 'm3u8':
- formats.extend(self._extract_m3u8_formats(
- video_url, video_id, 'mp4', entry_protocol='m3u8_native',
- m3u8_id='hls', fatal=False))
- elif video_type == 'application/dash+xml' or ext == 'mpd':
- formats.extend(self._extract_mpd_formats(
- video_url, video_id, mpd_id='dash', fatal=False))
- else:
- label = video.get('label')
- height = self._search_regex(
- r'^(\d+)[pP]', label or '', 'height', default=None)
- format_id = ['http']
- for f in (ext, label):
- if f:
- format_id.append(f)
- formats.append({
- 'url': video_url,
- 'format_id': '-'.join(format_id),
- 'height': int_or_none(height),
- })
+ for tracks in data.get('tracks', {}).values():
+ for video in tracks:
+ video_url = video.get('src')
+ if not video_url:
+ continue
+ video_type = video.get('type')
+ ext = determine_ext(video_url, mimetype2ext(video_type))
+ if video_type == 'application/vnd.apple.mpegurl' or ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ video_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False))
+ elif video_type == 'application/dash+xml' or ext == 'mpd':
+ formats.extend(self._extract_mpd_formats(
+ video_url, video_id, mpd_id='dash', fatal=False))
+ else:
+ label = video.get('label')
+ height = self._search_regex(
+ r'^(\d+)[pP]', label or '', 'height', default=None)
+ format_id = ['http']
+ for f in (ext, label):
+ if f:
+ format_id.append(f)
+ formats.append({
+ 'url': video_url,
+ 'format_id': '-'.join(format_id),
+ 'height': int_or_none(height),
+ })
self._sort_formats(formats)
return {
'description': data.get('description'),
'thumbnail': data.get('image'),
'duration': int_or_none(data.get('duration')),
- 'timestamp': int_or_none(data.get('pubtime')),
+ 'timestamp': int_or_none(timestamp),
'formats': formats
}
def _real_extract(self, url):
video_id = self._match_id(url)
-
webpage = self._download_webpage(url, video_id)
+ timestamp = parse_iso8601(self._html_search_meta(
+ 'article:published_time', webpage, 'published time', default=None))
- # live content
- live_item = self._search_regex(
- r'(?s)embedData[0-9a-f]{32}\.asset\.liveStarter\s*=\s*(\{.+?\});',
- webpage, 'video', default=None)
+ items = re.findall(r'(?s)playlist\.push\(({.+?})\);', webpage)
+ if items:
+ return self.playlist_result(
+ [self._parse_video_metadata(i, video_id, timestamp) for i in items],
+ video_id, self._html_search_meta('twitter:title', webpage))
- # single video
item = self._search_regex(
- r'(?s)embedData[0-9a-f]{32}\[["\']asset["\']\]\s*=\s*(\{.+?\});',
+ r'(?s)BBXPlayer\.setup\((.+?)\);',
webpage, 'video', default=None)
-
if item:
- return self._parse_video_metadata(item, video_id, live_item)
-
- # playlist
- items = re.findall(
- r"(?s)BBX\.context\.assets\['[0-9a-f]{32}'\]\.push\(({.+?})\);",
- webpage)
- if not items:
- items = re.findall(r'(?s)var\s+asset\s*=\s*({.+?});\n', webpage)
-
- if items:
- return {
- '_type': 'playlist',
- 'id': video_id,
- 'title': self._og_search_title(webpage),
- 'entries': [self._parse_video_metadata(i, video_id) for i in items]
- }
+ # remove function calls (ex. htmldeentitize)
+ # TODO this should be fixed in a general way in the js_to_json
+ item = re.sub(r'\w+?\((.+)\)', r'\1', item)
+ return self._parse_video_metadata(item, video_id, timestamp)
raise ExtractorError('Could not find neither video nor playlist')
from __future__ import unicode_literals
-import json
-
from .common import InfoExtractor
from ..utils import (
determine_ext,
clean_html,
int_or_none,
float_or_none,
- sanitized_Request,
)
class EscapistIE(InfoExtractor):
- _VALID_URL = r'https?://?(?:www\.)?escapistmagazine\.com/videos/view/[^/?#]+/(?P<id>[0-9]+)-[^/?#]*(?:$|[?#])'
+ _VALID_URL = r'https?://?(?:(?:www|v1)\.)?escapistmagazine\.com/videos/view/[^/]+/(?P<id>[0-9]+)'
_TESTS = [{
'url': 'http://www.escapistmagazine.com/videos/view/the-escapist-presents/6618-Breaking-Down-Baldurs-Gate',
'md5': 'ab3a706c681efca53f0a35f1415cf0d1',
'duration': 304,
'uploader': 'The Escapist',
}
+ }, {
+ 'url': 'http://escapistmagazine.com/videos/view/the-escapist-presents/6618',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://v1.escapistmagazine.com/videos/view/the-escapist-presents/6618-Breaking-Down-Baldurs-Gate',
+ 'only_matching': True,
}]
def _real_extract(self, url):
video_id = ims_video['videoID']
key = ims_video['hash']
- config_req = sanitized_Request(
- 'http://www.escapistmagazine.com/videos/'
- 'vidconfig.php?videoID=%s&hash=%s' % (video_id, key))
- config_req.add_header('Referer', url)
- config = self._download_webpage(config_req, video_id, 'Downloading video config')
+ config = self._download_webpage(
+ 'http://www.escapistmagazine.com/videos/vidconfig.php',
+ video_id, 'Downloading video config', headers={
+ 'Referer': url,
+ }, query={
+ 'videoID': video_id,
+ 'hash': key,
+ })
- data = json.loads(_decrypt_config(key, config))
+ data = self._parse_json(_decrypt_config(key, config), video_id)
video_data = data['videoData']
title = clean_html(video_data['title'])
- duration = float_or_none(video_data.get('duration'), 1000)
- uploader = video_data.get('publisher')
formats = [{
'url': video['src'],
'id': video_id,
'formats': formats,
'title': title,
- 'thumbnail': self._og_search_thumbnail(webpage),
+ 'thumbnail': self._og_search_thumbnail(webpage) or data.get('poster'),
'description': self._og_search_description(webpage),
- 'duration': duration,
- 'uploader': uploader,
+ 'duration': float_or_none(video_data.get('duration'), 1000),
+ 'uploader': video_data.get('publisher'),
+ 'series': video_data.get('show'),
}
(?:
.*?\?.*?\bid=|
/_/id/
- )
+ )|
+ [^/]+/video/
)
)|
(?:www\.)espnfc\.(?:com|us)/(?:video/)?[^/]+/\d+/video/
}, {
'url': 'http://www.espnfc.com/english-premier-league/23/video/3324163/premier-league-in-90-seconds-golden-tweets',
'only_matching': True,
+ }, {
+ 'url': 'http://www.espn.com/espnw/video/26066627/arkansas-gibson-completes-hr-cycle-four-innings',
+ 'only_matching': True,
}]
def _real_extract(self, url):
title = info.get('titleRaw') or data['title']
description = info.get('descriptionRaw')
thumbnail = info.get('socialMediaImage') or data.get('image')
- duration = int_or_none(info.get('videoTotalSecondsDuration') or
- data.get('totalSecondsDuration'))
+ duration = int_or_none(info.get('videoTotalSecondsDuration')
+ or data.get('totalSecondsDuration'))
timestamp = unified_timestamp(info.get('publishDate'))
return {
)
from .addanime import AddAnimeIE
from .adn import ADNIE
+from .adobeconnect import AdobeConnectIE
from .adobetv import (
AdobeTVIE,
AdobeTVShowIE,
from .amcnetworks import AMCNetworksIE
from .americastestkitchen import AmericasTestKitchenIE
from .animeondemand import AnimeOnDemandIE
-from .anitube import AnitubeIE
from .anvato import AnvatoIE
-from .anysex import AnySexIE
from .aol import AolIE
from .allocine import AllocineIE
from .aliexpress import AliExpressLiveIE
from .bellmedia import BellMediaIE
from .beatport import BeatportIE
from .bet import BetIE
+from .bfi import BFIPlayerIE
from .bigflix import BigflixIE
from .bild import BildIE
from .bilibili import (
from .cbslocal import CBSLocalIE
from .cbsinteractive import CBSInteractiveIE
from .cbsnews import (
+ CBSNewsEmbedIE,
CBSNewsIE,
CBSNewsLiveVideoIE,
)
from .cbssports import CBSSportsIE
-from .ccc import CCCIE
+from .ccc import (
+ CCCIE,
+ CCCPlaylistIE,
+)
from .ccma import CCMAIE
from .cctv import CCTVIE
from .cda import CDAIE
ChirbitProfileIE,
)
from .cinchcast import CinchcastIE
+from .cinemax import CinemaxIE
from .ciscolive import (
CiscoLiveSessionIE,
CiscoLiveSearchIE,
from .corus import CorusIE
from .cracked import CrackedIE
from .crackle import CrackleIE
-from .criterion import CriterionIE
from .crooksandliars import CrooksAndLiarsIE
from .crunchyroll import (
CrunchyrollIE,
DPlayIE,
DPlayItIE,
)
-from .dramafever import (
- DramaFeverIE,
- DramaFeverSeriesIE,
-)
from .dreisat import DreiSatIE
from .drbonanza import DRBonanzaIE
from .drtuber import DrTuberIE
from .gputechconf import GPUTechConfIE
from .groupon import GrouponIE
from .hark import HarkIE
-from .hbo import (
- HBOIE,
- HBOEpisodeIE,
-)
+from .hbo import HBOIE
from .hearthisat import HearThisAtIE
from .heise import HeiseIE
from .hellporno import HellPornoIE
from .helsinki import HelsinkiIE
from .hentaistigma import HentaiStigmaIE
from .hgtv import HGTVComShowIE
+from .hketv import HKETVIE
from .hidive import HiDiveIE
from .historicfilms import HistoricFilmsIE
from .hitbox import HitboxIE, HitboxLiveIE
from .inc import IncIE
from .indavideo import IndavideoEmbedIE
from .infoq import InfoQIE
-from .instagram import InstagramIE, InstagramUserIE
+from .instagram import (
+ InstagramIE,
+ InstagramUserIE,
+ InstagramTagIE,
+)
from .internazionale import InternazionaleIE
from .internetvideoarchive import InternetVideoArchiveIE
from .iprima import IPrimaIE
LinkedInLearningIE,
LinkedInLearningCourseIE,
)
+from .linuxacademy import LinuxAcademyIE
from .litv import LiTVIE
from .liveleak import (
LiveLeakIE,
MailRuMusicSearchIE,
)
from .makertv import MakerTVIE
+from .malltv import MallTVIE
from .mangomolo import (
MangomoloVideoIE,
MangomoloLiveIE,
from .matchtv import MatchTVIE
from .mdr import MDRIE
from .mediaset import MediasetIE
-from .mediasite import MediasiteIE
+from .mediasite import (
+ MediasiteIE,
+ MediasiteCatalogIE,
+ MediasiteNamedCatalogIE,
+)
from .medici import MediciIE
from .megaphone import MegaphoneIE
from .meipai import MeipaiIE
MyviEmbedIE,
)
from .myvidster import MyVidsterIE
-from .nationalgeographic import NationalGeographicVideoIE
+from .nationalgeographic import (
+ NationalGeographicVideoIE,
+ NationalGeographicTVIE,
+)
from .naver import NaverIE
from .nba import NBAIE
from .nbc import (
NovaEmbedIE,
NovaIE,
)
-from .novamov import (
- AuroraVidIE,
- CloudTimeIE,
- NowVideoIE,
- VideoWeedIE,
- WholeCloudIE,
-)
from .nowness import (
NownessIE,
NownessPlaylistIE,
NRKTVSeasonIE,
NRKTVSeriesIE,
)
+from .nrl import NRLTVIE
+from .ntvcojp import NTVCoJpCUIE
from .ntvde import NTVDeIE
from .ntvru import NTVRuIE
from .nytimes import (
OoyalaIE,
OoyalaExternalIE,
)
-from .openload import OpenloadIE
+from .openload import (
+ OpenloadIE,
+ VerystreamIE,
+)
from .ora import OraTVIE
from .orf import (
ORFTVthekIE,
from .piksel import PikselIE
from .pinkbike import PinkbikeIE
from .pladform import PladformIE
+from .platzi import (
+ PlatziIE,
+ PlatziCourseIE,
+)
from .playfm import PlayFMIE
from .playplustv import PlayPlusTVIE
from .plays import PlaysTVIE
from .popcorntv import PopcornTVIE
from .porn91 import Porn91IE
from .porncom import PornComIE
-from .pornflip import PornFlipIE
from .pornhd import PornHdIE
from .pornhub import (
PornHubIE,
PuhuTVSerieIE,
)
from .presstv import PressTVIE
-from .primesharetv import PrimeShareTVIE
from .promptfile import PromptFileIE
from .prosiebensat1 import ProSiebenSat1IE
from .puls4 import Puls4IE
)
from .rbmaradio import RBMARadioIE
from .rds import RDSIE
-from .redbulltv import RedBullTVIE
+from .redbulltv import (
+ RedBullTVIE,
+ RedBullTVRrnContentIE,
+)
from .reddit import (
RedditIE,
RedditRIE,
from .rtvs import RTVSIE
from .rudo import RudoIE
from .ruhd import RUHDIE
-from .ruleporn import RulePornIE
from .rutube import (
RutubeIE,
RutubeChannelIE,
SkyNewsArabiaIE,
SkyNewsArabiaArticleIE,
)
-from .skysports import SkySportsIE
+from .sky import (
+ SkyNewsIE,
+ SkySportsIE,
+)
from .slideshare import SlideshareIE
from .slideslive import SlidesLiveIE
from .slutload import SlutloadIE
SouthParkEsIE,
SouthParkNlIE
)
-from .spankbang import SpankBangIE
+from .spankbang import (
+ SpankBangIE,
+ SpankBangPlaylistIE,
+)
from .spankwire import SpankwireIE
from .spiegel import SpiegelIE, SpiegelArticleIE
from .spiegeltv import SpiegeltvIE
from .streamcz import StreamCZIE
from .streetvoice import StreetVoiceIE
from .stretchinternet import StretchInternetIE
+from .stv import STVPlayerIE
from .sunporno import SunPornoIE
+from .sverigesradio import (
+ SverigesRadioEpisodeIE,
+ SverigesRadioPublicationIE,
+)
from .svt import (
SVTIE,
SVTPageIE,
)
from .teachingchannel import TeachingChannelIE
from .teamcoco import TeamcocoIE
+from .teamtreehouse import TeamTreeHouseIE
from .techtalks import TechTalksIE
from .ted import TEDIE
from .tele5 import Tele5IE
from .toypics import ToypicsUserIE, ToypicsIE
from .traileraddict import TrailerAddictIE
from .trilulilu import TriluliluIE
+from .trunews import TruNewsIE
from .trutv import TruTVIE
from .tube8 import Tube8IE
from .tubitv import TubiTvIE
from .tvp import (
TVPEmbedIE,
TVPIE,
- TVPSeriesIE,
+ TVPWebsiteIE,
)
from .tvplay import (
TVPlayIE,
from .videa import VideaIE
from .videodetective import VideoDetectiveIE
from .videofyme import VideofyMeIE
-from .videomega import VideoMegaIE
from .videomore import (
VideomoreIE,
VideomoreVideoIE,
VoxMediaVolumeIE,
VoxMediaIE,
)
-from .vporn import VpornIE
from .vrt import VRTIE
from .vrak import VrakIE
from .vrv import (
from .vvvvid import VVVVIDIE
from .vyborymos import VyboryMosIE
from .vzaar import VzaarIE
+from .wakanim import WakanimIE
from .walla import WallaIE
from .washingtonpost import (
WashingtonPostIE,
WebOfStoriesPlaylistIE,
)
from .weibo import (
- WeiboIE,
+ WeiboIE,
WeiboMobileIE
)
from .weiqitv import WeiqiTVIE
from .wimp import WimpIE
from .wistia import WistiaIE
from .worldstarhiphop import WorldStarHipHopIE
-from .wrzuta import (
- WrzutaIE,
- WrzutaPlaylistIE,
-)
from .wsj import (
WSJIE,
WSJArticleIE,
from .yahoo import (
YahooIE,
YahooSearchIE,
+ YahooGyaOPlayerIE,
+ YahooGyaOIE,
)
+from .yandexdisk import YandexDiskIE
from .yandexmusic import (
YandexMusicTrackIE,
YandexMusicAlbumIE,
YandexMusicPlaylistIE,
)
-from .yandexdisk import YandexDiskIE
+from .yandexvideo import YandexVideoIE
from .yapfiles import YapFilesIE
from .yesjapan import YesJapanIE
from .yinyuetai import YinYueTaiIE
QuantumTVIE,
QuicklineIE,
QuicklineLiveIE,
+ SaltTVIE,
SAKTVIE,
VTXTVIE,
WalyTVIE,
uploader = clean_html(get_element_by_id(
'fbPhotoPageAuthorName', webpage)) or self._search_regex(
r'ownerName\s*:\s*"([^"]+)"', webpage, 'uploader',
- fatal=False) or self._og_search_title(webpage, fatal=False)
+ default=None) or self._og_search_title(webpage, fatal=False)
timestamp = int_or_none(self._search_regex(
r'<abbr[^>]+data-utime=["\'](\d+)', webpage,
'timestamp', default=None))
class FourTubeBaseIE(InfoExtractor):
- _TKN_HOST = 'tkn.kodicdn.com'
-
def _extract_formats(self, url, video_id, media_id, sources):
token_url = 'https://%s/%s/desktop/%s' % (
self._TKN_HOST, media_id, '+'.join(sources))
IE_NAME = '4tube'
_VALID_URL = r'https?://(?:(?P<kind>www|m)\.)?4tube\.com/(?:videos|embed)/(?P<id>\d+)(?:/(?P<display_id>[^/?#&]+))?'
_URL_TEMPLATE = 'https://www.4tube.com/videos/%s/video'
+ _TKN_HOST = 'token.4tube.com'
_TESTS = [{
'url': 'http://www.4tube.com/videos/209733/hot-babe-holly-michaels-gets-her-ass-stuffed-by-black',
'md5': '6516c8ac63b03de06bc8eac14362db4f',
class FuxIE(FourTubeBaseIE):
_VALID_URL = r'https?://(?:(?P<kind>www|m)\.)?fux\.com/(?:video|embed)/(?P<id>\d+)(?:/(?P<display_id>[^/?#&]+))?'
_URL_TEMPLATE = 'https://www.fux.com/video/%s/video'
+ _TKN_HOST = 'token.fux.com'
_TESTS = [{
'url': 'https://www.fux.com/video/195359/awesome-fucking-kitchen-ends-cum-swallow',
'info_dict': {
class PornerBrosIE(FourTubeBaseIE):
_VALID_URL = r'https?://(?:(?P<kind>www|m)\.)?pornerbros\.com/(?:videos/(?P<display_id>[^/]+)_|embed/)(?P<id>\d+)'
_URL_TEMPLATE = 'https://www.pornerbros.com/videos/video_%s'
+ _TKN_HOST = 'token.pornerbros.com'
_TESTS = [{
'url': 'https://www.pornerbros.com/videos/skinny-brunette-takes-big-cock-down-her-anal-hole_181369',
'md5': '6516c8ac63b03de06bc8eac14362db4f',
# coding: utf-8
from __future__ import unicode_literals
-# import json
-# import uuid
+import json
+import uuid
from .adobepass import AdobePassIE
+from ..compat import (
+ compat_HTTPError,
+ compat_str,
+ compat_urllib_parse_unquote,
+)
from ..utils import (
+ ExtractorError,
int_or_none,
parse_age_limit,
parse_duration,
try_get,
unified_timestamp,
- update_url_query,
)
class FOXIE(AdobePassIE):
- _VALID_URL = r'https?://(?:www\.)?(?:fox\.com|nationalgeographic\.com/tv)/watch/(?P<id>[\da-fA-F]+)'
+ _VALID_URL = r'https?://(?:www\.)?fox\.com/watch/(?P<id>[\da-fA-F]+)'
_TESTS = [{
# clip
'url': 'https://www.fox.com/watch/4b765a60490325103ea69888fb2bd4e8/',
'upload_date': '20170901',
'creator': 'FOX',
'series': 'Gotham',
+ 'age_limit': 14,
},
'params': {
'skip_download': True,
# episode, geo-restricted, tv provided required
'url': 'https://www.fox.com/watch/30056b295fb57f7452aeeb4920bc3024/',
'only_matching': True,
- }, {
- 'url': 'https://www.nationalgeographic.com/tv/watch/f690e05ebbe23ab79747becd0cc223d1/',
- 'only_matching': True,
}]
- # _access_token = None
+ _GEO_BYPASS = False
+ _HOME_PAGE_URL = 'https://www.fox.com/'
+ _API_KEY = 'abdcbed02c124d393b39e818a4312055'
+ _access_token = None
- # def _call_api(self, path, video_id, data=None):
- # headers = {
- # 'X-Api-Key': '238bb0a0c2aba67922c48709ce0c06fd',
- # }
- # if self._access_token:
- # headers['Authorization'] = 'Bearer ' + self._access_token
- # return self._download_json(
- # 'https://api2.fox.com/v2.0/' + path, video_id, data=data, headers=headers)
+ def _call_api(self, path, video_id, data=None):
+ headers = {
+ 'X-Api-Key': self._API_KEY,
+ }
+ if self._access_token:
+ headers['Authorization'] = 'Bearer ' + self._access_token
+ try:
+ return self._download_json(
+ 'https://api2.fox.com/v2.0/' + path,
+ video_id, data=data, headers=headers)
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
+ entitlement_issues = self._parse_json(
+ e.cause.read().decode(), video_id)['entitlementIssues']
+ for e in entitlement_issues:
+ if e.get('errorCode') == 1005:
+ raise ExtractorError(
+ 'This video is only available via cable service provider '
+ 'subscription. You may want to use --cookies.', expected=True)
+ messages = ', '.join([e['message'] for e in entitlement_issues])
+ raise ExtractorError(messages, expected=True)
+ raise
- # def _real_initialize(self):
- # self._access_token = self._call_api(
- # 'login', None, json.dumps({
- # 'deviceId': compat_str(uuid.uuid4()),
- # }).encode())['accessToken']
+ def _real_initialize(self):
+ if not self._access_token:
+ mvpd_auth = self._get_cookies(self._HOME_PAGE_URL).get('mvpd-auth')
+ if mvpd_auth:
+ self._access_token = (self._parse_json(compat_urllib_parse_unquote(
+ mvpd_auth.value), None, fatal=False) or {}).get('accessToken')
+ if not self._access_token:
+ self._access_token = self._call_api(
+ 'login', None, json.dumps({
+ 'deviceId': compat_str(uuid.uuid4()),
+ }).encode())['accessToken']
def _real_extract(self, url):
video_id = self._match_id(url)
- video = self._download_json(
- 'https://api.fox.com/fbc-content/v1_5/video/%s' % video_id,
- video_id, headers={
- 'apikey': 'abdcbed02c124d393b39e818a4312055',
- 'Content-Type': 'application/json',
- 'Referer': url,
- })
- # video = self._call_api('vodplayer/' + video_id, video_id)
+ video = self._call_api('vodplayer/' + video_id, video_id)
title = video['name']
- release_url = video['videoRelease']['url']
- # release_url = video['url']
-
- data = try_get(
- video, lambda x: x['trackingData']['properties'], dict) or {}
-
- rating = video.get('contentRating')
- if data.get('authRequired'):
- resource = self._get_mvpd_resource(
- 'fbc-fox', title, video.get('guid'), rating)
- release_url = update_url_query(
- release_url, {
- 'auth': self._extract_mvpd_auth(
- url, video_id, 'fbc-fox', resource)
- })
- m3u8_url = self._download_json(release_url, video_id)['playURL']
+ release_url = video['url']
+ try:
+ m3u8_url = self._download_json(release_url, video_id)['playURL']
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
+ error = self._parse_json(e.cause.read().decode(), video_id)
+ if error.get('exception') == 'GeoLocationBlocked':
+ self.raise_geo_restricted(countries=['US'])
+ raise ExtractorError(error['description'], expected=True)
+ raise
formats = self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4',
entry_protocol='m3u8_native', m3u8_id='hls')
self._sort_formats(formats)
+ data = try_get(
+ video, lambda x: x['trackingData']['properties'], dict) or {}
+
duration = int_or_none(video.get('durationInSeconds')) or int_or_none(
video.get('duration')) or parse_duration(video.get('duration'))
timestamp = unified_timestamp(video.get('datePublished'))
'description': video.get('description'),
'duration': duration,
'timestamp': timestamp,
- 'age_limit': parse_age_limit(rating),
+ 'age_limit': parse_age_limit(video.get('contentRating')),
'creator': creator,
'series': series,
'season_number': int_or_none(video.get('seasonNumber')),
ext = determine_ext(video_url)
if ext == 'f4m':
if georestricted:
- # See https://github.com/rg3/youtube-dl/issues/3963
+ # See https://github.com/ytdl-org/youtube-dl/issues/3963
# m3u8 urls work fine
continue
formats.extend(self._extract_f4m_formats(
_TESTS = [{
'url': 'https://www.france.tv/france-2/13h15-le-dimanche/140921-les-mysteres-de-jesus.html',
'info_dict': {
- 'id': '162311093',
+ 'id': 'ec217ecc-0733-48cf-ac06-af1347b849d1',
'ext': 'mp4',
'title': '13h15, le dimanche... - Les mystères de Jésus',
'description': 'md5:75efe8d4c0a8205e5904498ffe1e1a42',
catalogue = None
video_id = self._search_regex(
- r'data-main-video=(["\'])(?P<id>(?:(?!\1).)+)\1',
+ r'(?:data-main-video\s*=|videoId["\']?\s*[:=])\s*(["\'])(?P<id>(?:(?!\1).)+)\1',
webpage, 'video id', default=None, group='id')
if not video_id:
self.url_result(dailymotion_url, DailymotionIE.ie_key())
for dailymotion_url in dailymotion_urls])
- video_id, catalogue = self._search_regex(
- (r'id-video=([^@]+@[^"]+)',
+ video_id = self._search_regex(
+ (r'player\.load[^;]+src:\s*["\']([^"\']+)',
+ r'id-video=([^@]+@[^"]+)',
r'<a[^>]+href="(?:https?:)?//videos\.francetv\.fr/video/([^@]+@[^"]+)"'),
- webpage, 'video id').split('@')
+ webpage, 'video id')
- return self._make_url_result(video_id, catalogue)
+ return self._make_url_result(video_id)
class FranceTVInfoSportIE(FranceTVBaseInfoExtractor):
chapter_number = None
index = lesson.get('index')
element_index = lesson.get('elementIndex')
- if (isinstance(index, int) and isinstance(element_index, int) and
- index < element_index):
+ if (isinstance(index, int) and isinstance(element_index, int)
+ and index < element_index):
chapter_number = element_index - index
chapter = (chapters[chapter_number - 1]
if chapter_number - 1 < len(chapters) else None)
import re
from .common import InfoExtractor
-from ..compat import compat_str
+from ..compat import (
+ compat_str,
+ compat_urllib_parse_unquote,
+)
from ..utils import (
+ ExtractorError,
int_or_none,
str_or_none,
strip_or_none,
try_get,
+ urlencode_postdata,
)
'skip_download': True,
},
}]
+ _NETRC_MACHINE = 'gaia'
+ _jwt = None
+
+ def _real_initialize(self):
+ auth = self._get_cookies('https://www.gaia.com/').get('auth')
+ if auth:
+ auth = self._parse_json(
+ compat_urllib_parse_unquote(auth.value),
+ None, fatal=False)
+ if not auth:
+ username, password = self._get_login_info()
+ if username is None:
+ return
+ auth = self._download_json(
+ 'https://auth.gaia.com/v1/login',
+ None, data=urlencode_postdata({
+ 'username': username,
+ 'password': password
+ }))
+ if auth.get('success') is False:
+ raise ExtractorError(', '.join(auth['messages']), expected=True)
+ if auth:
+ self._jwt = auth.get('jwt')
def _real_extract(self, url):
display_id, vtype = re.search(self._VALID_URL, url).groups()
media_id = compat_str(vdata['nid'])
title = node['title']
+ headers = None
+ if self._jwt:
+ headers = {'Authorization': 'Bearer ' + self._jwt}
media = self._download_json(
- 'https://brooklyn.gaia.com/media/' + media_id, media_id)
+ 'https://brooklyn.gaia.com/media/' + media_id,
+ media_id, headers=headers)
formats = self._extract_m3u8_formats(
media['mediaUrls']['bcHLS'], media_id, 'mp4')
self._sort_formats(formats)
import re
from .common import InfoExtractor
+from .kaltura import KalturaIE
from ..utils import (
HEADRequest,
sanitized_Request,
+ smuggle_url,
urlencode_postdata,
)
class GDCVaultIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?gdcvault\.com/play/(?P<id>\d+)/(?P<name>(\w|-)+)?'
+ _VALID_URL = r'https?://(?:www\.)?gdcvault\.com/play/(?P<id>\d+)(?:/(?P<name>[\w-]+))?'
_NETRC_MACHINE = 'gdcvault'
_TESTS = [
{
'url': 'http://www.gdcvault.com/play/1019721/Doki-Doki-Universe-Sweet-Simple',
'md5': '7ce8388f544c88b7ac11c7ab1b593704',
'info_dict': {
- 'id': '1019721',
+ 'id': '201311826596_AWNY',
'display_id': 'Doki-Doki-Universe-Sweet-Simple',
'ext': 'mp4',
'title': 'Doki-Doki Universe: Sweet, Simple and Genuine (GDC Next 10)'
{
'url': 'http://www.gdcvault.com/play/1015683/Embracing-the-Dark-Art-of',
'info_dict': {
- 'id': '1015683',
+ 'id': '201203272_1330951438328RSXR',
'display_id': 'Embracing-the-Dark-Art-of',
'ext': 'flv',
'title': 'Embracing the Dark Art of Mathematical Modeling in AI'
'url': 'http://gdcvault.com/play/1023460/Tenacious-Design-and-The-Interface',
'md5': 'a8efb6c31ed06ca8739294960b2dbabd',
'info_dict': {
- 'id': '1023460',
+ 'id': '840376_BQRC',
'ext': 'mp4',
'display_id': 'Tenacious-Design-and-The-Interface',
'title': 'Tenacious Design and The Interface of \'Destiny\'',
# Multiple audios
'url': 'http://www.gdcvault.com/play/1014631/Classic-Game-Postmortem-PAC',
'info_dict': {
- 'id': '1014631',
- 'ext': 'flv',
+ 'id': '12396_1299111843500GMPX',
+ 'ext': 'mp4',
'title': 'How to Create a Good Game - From My Experience of Designing Pac-Man',
},
- 'params': {
- 'skip_download': True, # Requires rtmpdump
- 'format': 'jp', # The japanese audio
- }
+ # 'params': {
+ # 'skip_download': True, # Requires rtmpdump
+ # 'format': 'jp', # The japanese audio
+ # }
},
{
# gdc-player.html
'url': 'http://www.gdcvault.com/play/1435/An-American-engine-in-Tokyo',
'info_dict': {
- 'id': '1435',
+ 'id': '9350_1238021887562UHXB',
'display_id': 'An-American-engine-in-Tokyo',
- 'ext': 'flv',
+ 'ext': 'mp4',
'title': 'An American Engine in Tokyo:/nThe collaboration of Epic Games and Square Enix/nFor THE LAST REMINANT',
},
+ },
+ {
+ # Kaltura Embed
+ 'url': 'https://www.gdcvault.com/play/1026180/Mastering-the-Apex-of-Scaling',
+ 'info_dict': {
+ 'id': '0_h1fg8j3p',
+ 'ext': 'mp4',
+ 'title': 'Mastering the Apex of Scaling Game Servers (Presented by Multiplay)',
+ 'timestamp': 1554401811,
+ 'upload_date': '20190404',
+ 'uploader_id': 'joe@blazestreaming.com',
+ },
'params': {
- 'skip_download': True, # Requires rtmpdump
+ 'format': 'mp4-408',
},
},
]
return start_page
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
-
- video_id = mobj.group('id')
- display_id = mobj.group('name') or video_id
+ video_id, name = re.match(self._VALID_URL, url).groups()
+ display_id = name or video_id
webpage_url = 'http://www.gdcvault.com/play/' + video_id
start_page = self._download_webpage(webpage_url, display_id)
start_page, 'url', default=None)
if direct_url:
title = self._html_search_regex(
- r'<td><strong>Session Name</strong></td>\s*<td>(.*?)</td>',
+ r'<td><strong>Session Name:?</strong></td>\s*<td>(.*?)</td>',
start_page, 'title')
video_url = 'http://www.gdcvault.com' + direct_url
# resolve the url so that we can detect the correct extension
- head = self._request_webpage(HEADRequest(video_url), video_id)
- video_url = head.geturl()
+ video_url = self._request_webpage(
+ HEADRequest(video_url), video_id).geturl()
return {
'id': video_id,
'title': title,
}
- PLAYER_REGEX = r'<iframe src="(?P<xml_root>.+?)/(?:gdc-)?player.*?\.html.*?".*?</iframe>'
-
- xml_root = self._html_search_regex(
- PLAYER_REGEX, start_page, 'xml root', default=None)
- if xml_root is None:
- # Probably need to authenticate
- login_res = self._login(webpage_url, display_id)
- if login_res is None:
- self.report_warning('Could not login.')
- else:
- start_page = login_res
- # Grab the url from the authenticated page
- xml_root = self._html_search_regex(
- PLAYER_REGEX, start_page, 'xml root')
-
- xml_name = self._html_search_regex(
- r'<iframe src=".*?\?xml=(.+?\.xml).*?".*?</iframe>',
- start_page, 'xml filename', default=None)
- if xml_name is None:
- # Fallback to the older format
+ embed_url = KalturaIE._extract_url(start_page)
+ if embed_url:
+ embed_url = smuggle_url(embed_url, {'source_url': url})
+ ie_key = 'Kaltura'
+ else:
+ PLAYER_REGEX = r'<iframe src="(?P<xml_root>.+?)/(?:gdc-)?player.*?\.html.*?".*?</iframe>'
+
+ xml_root = self._html_search_regex(
+ PLAYER_REGEX, start_page, 'xml root', default=None)
+ if xml_root is None:
+ # Probably need to authenticate
+ login_res = self._login(webpage_url, display_id)
+ if login_res is None:
+ self.report_warning('Could not login.')
+ else:
+ start_page = login_res
+ # Grab the url from the authenticated page
+ xml_root = self._html_search_regex(
+ PLAYER_REGEX, start_page, 'xml root')
+
xml_name = self._html_search_regex(
- r'<iframe src=".*?\?xmlURL=xml/(?P<xml_file>.+?\.xml).*?".*?</iframe>',
+ r'<iframe src=".*?\?xml(?:=|URL=xml/)(.+?\.xml).*?".*?</iframe>',
start_page, 'xml filename')
+ embed_url = '%s/xml/%s' % (xml_root, xml_name)
+ ie_key = 'DigitallySpeaking'
return {
'_type': 'url_transparent',
'id': video_id,
'display_id': display_id,
- 'url': '%s/xml/%s' % (xml_root, xml_name),
- 'ie_key': 'DigitallySpeaking',
+ 'url': embed_url,
+ 'ie_key': ie_key,
}
from .videa import VideaIE
from .twentymin import TwentyMinutenIE
from .ustream import UstreamIE
-from .openload import OpenloadIE
+from .openload import (
+ OpenloadIE,
+ VerystreamIE,
+)
from .videopress import VideoPressIE
from .rutube import RutubeIE
from .limelight import LimelightBaseIE
},
},
{
- # https://github.com/rg3/youtube-dl/issues/2253
+ # https://github.com/ytdl-org/youtube-dl/issues/2253
'url': 'http://bcove.me/i6nfkrc3',
'md5': '0ba9446db037002366bab3b3eb30c88c',
'info_dict': {
},
},
{
- # https://github.com/rg3/youtube-dl/issues/3541
+ # https://github.com/ytdl-org/youtube-dl/issues/3541
'add_ie': ['BrightcoveLegacy'],
'url': 'http://www.kijk.nl/sbs6/leermijvrouwenkennen/videos/jqMiXKAYan2S/aflevering-1',
'info_dict': {
}
},
# Multiple brightcove videos
- # https://github.com/rg3/youtube-dl/issues/2283
+ # https://github.com/ytdl-org/youtube-dl/issues/2283
{
'url': 'http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html',
'info_dict': {
return camtasia_res
# Sometimes embedded video player is hidden behind percent encoding
- # (e.g. https://github.com/rg3/youtube-dl/issues/2448)
+ # (e.g. https://github.com/ytdl-org/youtube-dl/issues/2448)
# Unescaping the whole page allows to handle those cases in a generic way
webpage = compat_urllib_parse_unquote(webpage)
return self.url_result(mobj.group('url'))
# Look for Ooyala videos
- mobj = (re.search(r'player\.ooyala\.com/[^"?]+[?#][^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
- re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
- re.search(r'OO\.Player\.create\.apply\(\s*OO\.Player\s*,\s*op\(\s*\[\s*[\'"][^\'"]*[\'"]\s*,\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
- re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage) or
- re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage))
+ mobj = (re.search(r'player\.ooyala\.com/[^"?]+[?#][^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage)
+ or re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage)
+ or re.search(r'OO\.Player\.create\.apply\(\s*OO\.Player\s*,\s*op\(\s*\[\s*[\'"][^\'"]*[\'"]\s*,\s*[\'"](?P<ec>.{32})[\'"]', webpage)
+ or re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage)
+ or re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage))
if mobj is not None:
embed_token = self._search_regex(
r'embedToken[\'"]?\s*:\s*[\'"]([^\'"]+)',
if mobj is not None:
return self.url_result(mobj.group(1), 'Mpora')
- # Look for embedded NovaMov-based player
- mobj = re.search(
- r'''(?x)<(?:pagespeed_)?iframe[^>]+?src=(["\'])
- (?P<url>http://(?:(?:embed|www)\.)?
- (?:novamov\.com|
- nowvideo\.(?:ch|sx|eu|at|ag|co)|
- videoweed\.(?:es|com)|
- movshare\.(?:net|sx|ag)|
- divxstage\.(?:eu|net|ch|co|at|ag))
- /embed\.php.+?)\1''', webpage)
- if mobj is not None:
- return self.url_result(mobj.group('url'))
-
# Look for embedded Facebook player
facebook_urls = FacebookIE._extract_urls(webpage)
if facebook_urls:
return self.playlist_from_matches(
openload_urls, video_id, video_title, ie=OpenloadIE.ie_key())
+ # Look for Verystream embeds
+ verystream_urls = VerystreamIE._extract_urls(webpage)
+ if verystream_urls:
+ return self.playlist_from_matches(
+ verystream_urls, video_id, video_title, ie=VerystreamIE.ie_key())
+
# Look for VideoPress embeds
videopress_urls = VideoPressIE._extract_urls(webpage)
if videopress_urls:
jwplayer_data, video_id, require_title=False, base_url=url)
return merge_dicts(info, info_dict)
except ExtractorError:
- # See https://github.com/rg3/youtube-dl/pull/16735
+ # See https://github.com/ytdl-org/youtube-dl/pull/16735
pass
# Video.js embed
else:
formats.append({
'url': src,
- 'ext': (mimetype2ext(src_type) or
- ext if ext in KNOWN_EXTENSIONS else 'mp4'),
+ 'ext': (mimetype2ext(src_type)
+ or ext if ext in KNOWN_EXTENSIONS else 'mp4'),
})
if formats:
self._sort_formats(formats)
class GfycatIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?gfycat\.com/(?:ifr/|gifs/detail/)?(?P<id>[^/?#]+)'
+ _VALID_URL = r'https?://(?:www\.)?gfycat\.com/(?:ifr/|gifs/detail/)?(?P<id>[^-/?#]+)'
_TESTS = [{
'url': 'http://gfycat.com/DeadlyDecisiveGermanpinscher',
'info_dict': {
}, {
'url': 'https://gfycat.com/gifs/detail/UnconsciousLankyIvorygull',
'only_matching': True
+ }, {
+ 'url': 'https://gfycat.com/acceptablehappygoluckyharborporpoise-baseball',
+ 'only_matching': True
}]
def _real_extract(self, url):
},
'watchdisneychannel': {
'brand': '004',
- 'requestor_id': 'Disney',
+ 'resource_id': 'Disney',
},
'watchdisneyjunior': {
'brand': '008',
- 'requestor_id': 'DisneyJunior',
+ 'resource_id': 'DisneyJunior',
},
'watchdisneyxd': {
'brand': '009',
- 'requestor_id': 'DisneyXD',
+ 'resource_id': 'DisneyXD',
}
}
_VALID_URL = r'https?://(?:(?P<sub_domain>%s)\.)?go\.com/(?:(?:[^/]+/)*(?P<id>vdka\w+)|(?:[^/]+/)*(?P<display_id>[^/?#]+))'\
'device': '001',
}
if video_data.get('accesslevel') == '1':
- requestor_id = site_info['requestor_id']
- resource = self._get_mvpd_resource(
+ requestor_id = site_info.get('requestor_id', 'DisneyChannels')
+ resource = site_info.get('resource_id') or self._get_mvpd_resource(
requestor_id, title, video_id, None)
auth = self._extract_mvpd_auth(
url, video_id, requestor_id, resource)
}
}, {
# video can't be watched anonymously due to view count limit reached,
- # but can be downloaded (see https://github.com/rg3/youtube-dl/issues/14046)
+ # but can be downloaded (see https://github.com/ytdl-org/youtube-dl/issues/14046)
'url': 'https://drive.google.com/file/d/0B-vUyvmDLdWDcEt4WjBqcmI2XzQ/view',
'md5': 'bfbd670d03a470bb1e6d4a257adec12e',
'info_dict': {
import re
from .common import InfoExtractor
-from ..compat import compat_str
from ..utils import (
xpath_text,
xpath_element,
int_or_none,
parse_duration,
+ urljoin,
)
},
}
- def _extract_from_id(self, video_id):
- video_data = self._download_xml(
- 'http://render.lv3.hbo.com/data/content/global/videos/data/%s.xml' % video_id, video_id)
- title = xpath_text(video_data, 'title', 'title', True)
+ def _extract_info(self, url, display_id):
+ video_data = self._download_xml(url, display_id)
+ video_id = xpath_text(video_data, 'id', fatal=True)
+ episode_title = title = xpath_text(video_data, 'title', fatal=True)
+ series = xpath_text(video_data, 'program')
+ if series:
+ title = '%s - %s' % (series, title)
formats = []
for source in xpath_element(video_data, 'videos', 'sources', True):
'width': width,
})
+ subtitles = None
+ caption_url = xpath_text(video_data, 'captionUrl')
+ if caption_url:
+ subtitles = {
+ 'en': [{
+ 'url': caption_url,
+ 'ext': 'ttml'
+ }],
+ }
+
return {
'id': video_id,
'title': title,
'duration': parse_duration(xpath_text(video_data, 'duration/tv14')),
+ 'series': series,
+ 'episode': episode_title,
'formats': formats,
'thumbnails': thumbnails,
+ 'subtitles': subtitles,
}
class HBOIE(HBOBaseIE):
IE_NAME = 'hbo'
- _VALID_URL = r'https?://(?:www\.)?hbo\.com/video/video\.html\?.*vid=(?P<id>[0-9]+)'
+ _VALID_URL = r'https?://(?:www\.)?hbo\.com/(?:video|embed)(?:/[^/]+)*/(?P<id>[^/?#]+)'
_TEST = {
- 'url': 'http://www.hbo.com/video/video.html?autoplay=true&g=u&vid=1437839',
- 'md5': '2c6a6bc1222c7e91cb3334dad1746e5a',
+ 'url': 'https://www.hbo.com/video/game-of-thrones/seasons/season-8/videos/trailer',
+ 'md5': '8126210656f433c452a21367f9ad85b3',
'info_dict': {
- 'id': '1437839',
+ 'id': '22113301',
'ext': 'mp4',
- 'title': 'Ep. 64 Clip: Encryption',
- 'thumbnail': r're:https?://.*\.jpg$',
- 'duration': 1072,
- }
- }
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- return self._extract_from_id(video_id)
-
-
-class HBOEpisodeIE(HBOBaseIE):
- IE_NAME = 'hbo:episode'
- _VALID_URL = r'https?://(?:www\.)?hbo\.com/(?P<path>(?!video)(?:(?:[^/]+/)+video|watch-free-episodes)/(?P<id>[0-9a-z-]+))(?:\.html)?'
-
- _TESTS = [{
- 'url': 'http://www.hbo.com/girls/episodes/5/52-i-love-you-baby/video/ep-52-inside-the-episode.html?autoplay=true',
- 'md5': '61ead79b9c0dfa8d3d4b07ef4ac556fb',
- 'info_dict': {
- 'id': '1439518',
- 'display_id': 'ep-52-inside-the-episode',
- 'ext': 'mp4',
- 'title': 'Ep. 52: Inside the Episode',
- 'thumbnail': r're:https?://.*\.jpg$',
- 'duration': 240,
+ 'title': 'Game of Thrones - Trailer',
},
- }, {
- 'url': 'http://www.hbo.com/game-of-thrones/about/video/season-5-invitation-to-the-set.html?autoplay=true',
- 'only_matching': True,
- }, {
- 'url': 'http://www.hbo.com/watch-free-episodes/last-week-tonight-with-john-oliver',
- 'only_matching': True,
- }]
+ 'expected_warnings': ['Unknown MIME type application/mp4 in DASH manifest'],
+ }
def _real_extract(self, url):
- path, display_id = re.match(self._VALID_URL, url).groups()
-
- content = self._download_json(
- 'http://www.hbo.com/api/content/' + path, display_id)['content']
-
- video_id = compat_str((content.get('parsed', {}).get(
- 'common:FullBleedVideo', {}) or content['selectedEpisode'])['videoId'])
-
- info_dict = self._extract_from_id(video_id)
- info_dict['display_id'] = display_id
-
- return info_dict
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ location_path = self._parse_json(self._html_search_regex(
+ r'data-state="({.+?})"', webpage, 'state'), display_id)['video']['locationUrl']
+ return self._extract_info(urljoin(url, location_path), display_id)
'id': video_id,
'title': title,
'description': description,
- 'thumbnail': (xpath_text(doc, './/{http://rss.jwpcdn.com/}image') or
- self._og_search_thumbnail(webpage)),
+ 'thumbnail': (xpath_text(doc, './/{http://rss.jwpcdn.com/}image')
+ or self._og_search_thumbnail(webpage)),
'timestamp': parse_iso8601(
self._html_search_meta('date', webpage)),
'formats': formats,
title = video_meta.get('media_status')
alt_title = video_meta.get('media_title')
description = clean_html(
- video_meta.get('media_description') or
- video_meta.get('media_description_md'))
+ video_meta.get('media_description')
+ or video_meta.get('media_description_md'))
duration = float_or_none(video_meta.get('media_duration'))
uploader = video_meta.get('media_user_name')
views = int_or_none(video_meta.get('media_views'))
tags = [
t['text']
for t in tags_list
- if isinstance(t, dict) and t.get('text') and
- isinstance(t['text'], compat_str)]
+ if isinstance(t, dict) and t.get('text')
+ and isinstance(t['text'], compat_str)]
return {
'id': video_id,
--- /dev/null
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ clean_html,
+ ExtractorError,
+ int_or_none,
+ merge_dicts,
+ parse_count,
+ str_or_none,
+ try_get,
+ unified_strdate,
+ urlencode_postdata,
+ urljoin,
+)
+
+
+class HKETVIE(InfoExtractor):
+ IE_NAME = 'hketv'
+ IE_DESC = '香港教育局教育電視 (HKETV) Educational Television, Hong Kong Educational Bureau'
+ _GEO_BYPASS = False
+ _GEO_COUNTRIES = ['HK']
+ _VALID_URL = r'https?://(?:www\.)?hkedcity\.net/etv/resource/(?P<id>[0-9]+)'
+ _TESTS = [{
+ 'url': 'https://www.hkedcity.net/etv/resource/2932360618',
+ 'md5': 'f193712f5f7abb208ddef3c5ea6ed0b7',
+ 'info_dict': {
+ 'id': '2932360618',
+ 'ext': 'mp4',
+ 'title': '喜閱一生(共享閱讀樂) (中、英文字幕可供選擇)',
+ 'description': 'md5:d5286d05219ef50e0613311cbe96e560',
+ 'upload_date': '20181024',
+ 'duration': 900,
+ 'subtitles': 'count:2',
+ },
+ 'skip': 'Geo restricted to HK',
+ }, {
+ 'url': 'https://www.hkedcity.net/etv/resource/972641418',
+ 'md5': '1ed494c1c6cf7866a8290edad9b07dc9',
+ 'info_dict': {
+ 'id': '972641418',
+ 'ext': 'mp4',
+ 'title': '衣冠楚楚 (天使系列之一)',
+ 'description': 'md5:10bb3d659421e74f58e5db5691627b0f',
+ 'upload_date': '20070109',
+ 'duration': 907,
+ 'subtitles': {},
+ },
+ 'params': {
+ 'geo_verification_proxy': '<HK proxy here>',
+ },
+ 'skip': 'Geo restricted to HK',
+ }]
+
+ _CC_LANGS = {
+ '中文(繁體中文)': 'zh-Hant',
+ '中文(简体中文)': 'zh-Hans',
+ 'English': 'en',
+ 'Bahasa Indonesia': 'id',
+ '\u0939\u093f\u0928\u094d\u0926\u0940': 'hi',
+ '\u0928\u0947\u092a\u093e\u0932\u0940': 'ne',
+ 'Tagalog': 'tl',
+ '\u0e44\u0e17\u0e22': 'th',
+ '\u0627\u0631\u062f\u0648': 'ur',
+ }
+ _FORMAT_HEIGHTS = {
+ 'SD': 360,
+ 'HD': 720,
+ }
+ _APPS_BASE_URL = 'https://apps.hkedcity.net'
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ title = (
+ self._html_search_meta(
+ ('ed_title', 'search.ed_title'), webpage, default=None)
+ or self._search_regex(
+ r'data-favorite_title_(?:eng|chi)=(["\'])(?P<id>(?:(?!\1).)+)\1',
+ webpage, 'title', default=None, group='url')
+ or self._html_search_regex(
+ r'<h1>([^<]+)</h1>', webpage, 'title', default=None)
+ or self._og_search_title(webpage)
+ )
+
+ file_id = self._search_regex(
+ r'post_var\[["\']file_id["\']\s*\]\s*=\s*(.+?);',
+ webpage, 'file ID')
+ curr_url = self._search_regex(
+ r'post_var\[["\']curr_url["\']\s*\]\s*=\s*"(.+?)";',
+ webpage, 'curr URL')
+ data = {
+ 'action': 'get_info',
+ 'curr_url': curr_url,
+ 'file_id': file_id,
+ 'video_url': file_id,
+ }
+
+ response = self._download_json(
+ self._APPS_BASE_URL + '/media/play/handler.php', video_id,
+ data=urlencode_postdata(data),
+ headers=merge_dicts({
+ 'Content-Type': 'application/x-www-form-urlencoded'},
+ self.geo_verification_headers()))
+
+ result = response['result']
+
+ if not response.get('success') or not response.get('access'):
+ error = clean_html(response.get('access_err_msg'))
+ if 'Video streaming is not available in your country' in error:
+ self.raise_geo_restricted(
+ msg=error, countries=self._GEO_COUNTRIES)
+ else:
+ raise ExtractorError(error, expected=True)
+
+ formats = []
+
+ width = int_or_none(result.get('width'))
+ height = int_or_none(result.get('height'))
+
+ playlist0 = result['playlist'][0]
+ for fmt in playlist0['sources']:
+ file_url = urljoin(self._APPS_BASE_URL, fmt.get('file'))
+ if not file_url:
+ continue
+ # If we ever wanted to provide the final resolved URL that
+ # does not require cookies, albeit with a shorter lifespan:
+ # urlh = self._downloader.urlopen(file_url)
+ # resolved_url = urlh.geturl()
+ label = fmt.get('label')
+ h = self._FORMAT_HEIGHTS.get(label)
+ w = h * width // height if h and width and height else None
+ formats.append({
+ 'format_id': label,
+ 'ext': fmt.get('type'),
+ 'url': file_url,
+ 'width': w,
+ 'height': h,
+ })
+ self._sort_formats(formats)
+
+ subtitles = {}
+ tracks = try_get(playlist0, lambda x: x['tracks'], list) or []
+ for track in tracks:
+ if not isinstance(track, dict):
+ continue
+ track_kind = str_or_none(track.get('kind'))
+ if not track_kind or not isinstance(track_kind, compat_str):
+ continue
+ if track_kind.lower() not in ('captions', 'subtitles'):
+ continue
+ track_url = urljoin(self._APPS_BASE_URL, track.get('file'))
+ if not track_url:
+ continue
+ track_label = track.get('label')
+ subtitles.setdefault(self._CC_LANGS.get(
+ track_label, track_label), []).append({
+ 'url': self._proto_relative_url(track_url),
+ 'ext': 'srt',
+ })
+
+ # Likes
+ emotion = self._download_json(
+ 'https://emocounter.hkedcity.net/handler.php', video_id,
+ data=urlencode_postdata({
+ 'action': 'get_emotion',
+ 'data[bucket_id]': 'etv',
+ 'data[identifier]': video_id,
+ }),
+ headers={'Content-Type': 'application/x-www-form-urlencoded'},
+ fatal=False) or {}
+ like_count = int_or_none(try_get(
+ emotion, lambda x: x['data']['emotion_data'][0]['count']))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': self._html_search_meta(
+ 'description', webpage, fatal=False),
+ 'upload_date': unified_strdate(self._html_search_meta(
+ 'ed_date', webpage, fatal=False), day_first=False),
+ 'duration': int_or_none(result.get('length')),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'thumbnail': urljoin(self._APPS_BASE_URL, result.get('image')),
+ 'view_count': parse_count(result.get('view_count')),
+ 'like_count': like_count,
+ }
import hashlib
import hmac
import time
+import uuid
from .common import InfoExtractor
-from ..compat import compat_HTTPError
+from ..compat import (
+ compat_HTTPError,
+ compat_str,
+)
from ..utils import (
determine_ext,
ExtractorError,
int_or_none,
+ str_or_none,
try_get,
+ url_or_none,
)
class HotStarBaseIE(InfoExtractor):
_AKAMAI_ENCRYPTION_KEY = b'\x05\xfc\x1a\x01\xca\xc9\x4b\xc4\x12\xfc\x53\x12\x07\x75\xf9\xee'
- def _call_api(self, path, video_id, query_name='contentId'):
+ def _call_api_impl(self, path, video_id, query):
st = int(time.time())
exp = st + 6000
auth = 'st=%d~exp=%d~acl=/*' % (st, exp)
auth += '~hmac=' + hmac.new(self._AKAMAI_ENCRYPTION_KEY, auth.encode(), hashlib.sha256).hexdigest()
response = self._download_json(
- 'https://api.hotstar.com/' + path,
- video_id, headers={
+ 'https://api.hotstar.com/' + path, video_id, headers={
'hotstarauth': auth,
'x-country-code': 'IN',
'x-platform-code': 'JIO',
- }, query={
- query_name: video_id,
- 'tas': 10000,
- })
+ }, query=query)
if response['statusCode'] != 'OK':
raise ExtractorError(
response['body']['message'], expected=True)
return response['body']['results']
+ def _call_api(self, path, video_id, query_name='contentId'):
+ return self._call_api_impl(path, video_id, {
+ query_name: video_id,
+ 'tas': 10000,
+ })
+
+ def _call_api_v2(self, path, video_id):
+ return self._call_api_impl(
+ '%s/in/contents/%s' % (path, video_id), video_id, {
+ 'desiredConfig': 'encryption:plain;ladder:phone,tv;package:hls,dash',
+ 'client': 'mweb',
+ 'clientVersion': '6.18.0',
+ 'deviceId': compat_str(uuid.uuid4()),
+ 'osName': 'Windows',
+ 'osVersion': '10',
+ })
+
class HotStarIE(HotStarBaseIE):
IE_NAME = 'hotstar'
}, {
'url': 'http://www.hotstar.com/1000000515',
'only_matching': True,
+ }, {
+ # only available via api v2
+ 'url': 'https://www.hotstar.com/tv/ek-bhram-sarvagun-sampanna/s-2116/janhvi-targets-suman/1000234847',
+ 'only_matching': True,
}]
_GEO_BYPASS = False
raise ExtractorError('This video is DRM protected.', expected=True)
formats = []
- format_data = self._call_api('h/v1/play', video_id)['item']
- format_url = format_data['playbackUrl']
- ext = determine_ext(format_url)
- if ext == 'm3u8':
+ geo_restricted = False
+ playback_sets = self._call_api_v2('h/v2/play', video_id)['playBackSets']
+ for playback_set in playback_sets:
+ if not isinstance(playback_set, dict):
+ continue
+ format_url = url_or_none(playback_set.get('playbackUrl'))
+ if not format_url:
+ continue
+ tags = str_or_none(playback_set.get('tagsCombination')) or ''
+ if tags and 'encryption:plain' not in tags:
+ continue
+ ext = determine_ext(format_url)
try:
- formats.extend(self._extract_m3u8_formats(
- format_url, video_id, 'mp4', m3u8_id='hls'))
+ if 'package:hls' in tags or ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ format_url, video_id, 'mp4', m3u8_id='hls'))
+ elif 'package:dash' in tags or ext == 'mpd':
+ formats.extend(self._extract_mpd_formats(
+ format_url, video_id, mpd_id='dash'))
+ elif ext == 'f4m':
+ # produce broken files
+ pass
+ else:
+ formats.append({
+ 'url': format_url,
+ 'width': int_or_none(playback_set.get('width')),
+ 'height': int_or_none(playback_set.get('height')),
+ })
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
- self.raise_geo_restricted(countries=['IN'])
- raise
- elif ext == 'f4m':
- # produce broken files
- pass
- else:
- formats.append({
- 'url': format_url,
- 'width': int_or_none(format_data.get('width')),
- 'height': int_or_none(format_data.get('height')),
- })
+ geo_restricted = True
+ continue
+ if not formats and geo_restricted:
+ self.raise_geo_restricted(countries=['IN'])
self._sort_formats(formats)
return {
language=self._APP_LANGUAGE,
application_id=self._APP_PUBLICATION_ID)
- self._login_url = (modules['user']['resources']['login']['uri'] +
- '/format/json').format(session_id=self._session_id)
+ self._login_url = (modules['user']['resources']['login']['uri']
+ + '/format/json').format(session_id=self._session_id)
self._logout_url = modules['user']['resources']['logout']['uri']
from __future__ import unicode_literals
-import json
-import time
-
from .common import InfoExtractor
-from ..compat import compat_urllib_parse_urlencode
-from ..utils import (
- ExtractorError,
- sanitized_Request,
-)
+from ..utils import int_or_none
class HypemIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?hypem\.com/track/(?P<id>[^/]+)/'
+ _VALID_URL = r'https?://(?:www\.)?hypem\.com/track/(?P<id>[0-9a-z]{5})'
_TEST = {
'url': 'http://hypem.com/track/1v6ga/BODYWORK+-+TAME',
'md5': 'b9cc91b5af8995e9f0c1cee04c575828',
'ext': 'mp3',
'title': 'Tame',
'uploader': 'BODYWORK',
+ 'timestamp': 1371810457,
+ 'upload_date': '20130621',
}
}
def _real_extract(self, url):
track_id = self._match_id(url)
- data = {'ax': 1, 'ts': time.time()}
- request = sanitized_Request(url + '?' + compat_urllib_parse_urlencode(data))
- response, urlh = self._download_webpage_handle(
- request, track_id, 'Downloading webpage with the url')
-
- html_tracks = self._html_search_regex(
- r'(?ms)<script type="application/json" id="displayList-data">(.+?)</script>',
- response, 'tracks')
- try:
- track_list = json.loads(html_tracks)
- track = track_list['tracks'][0]
- except ValueError:
- raise ExtractorError('Hypemachine contained invalid JSON.')
-
- key = track['key']
+ response = self._download_webpage(url, track_id)
+
+ track = self._parse_json(self._html_search_regex(
+ r'(?s)<script\s+type="application/json"\s+id="displayList-data">(.+?)</script>',
+ response, 'tracks'), track_id)['tracks'][0]
+
track_id = track['id']
title = track['song']
- request = sanitized_Request(
- 'http://hypem.com/serve/source/%s/%s' % (track_id, key),
- '', {'Content-Type': 'application/json'})
- song_data = self._download_json(request, track_id, 'Downloading metadata')
- final_url = song_data['url']
- artist = track.get('artist')
+ final_url = self._download_json(
+ 'http://hypem.com/serve/source/%s/%s' % (track_id, track['key']),
+ track_id, 'Downloading metadata', headers={
+ 'Content-Type': 'application/json'
+ })['url']
return {
'id': track_id,
'url': final_url,
'ext': 'mp3',
'title': title,
- 'uploader': artist,
+ 'uploader': track.get('artist'),
+ 'duration': int_or_none(track.get('time')),
+ 'timestamp': int_or_none(track.get('ts')),
+ 'track': title,
}
}, {
'url': 'https://i.imgur.com/crGpqCV.mp4',
'only_matching': True,
+ }, {
+ # no title
+ 'url': 'https://i.imgur.com/jxBXAMC.gifv',
+ 'only_matching': True,
}]
def _real_extract(self, url):
return {
'id': video_id,
'formats': formats,
- 'title': self._og_search_title(webpage),
+ 'title': self._og_search_title(webpage, default=video_id),
}
# coding: utf-8
from __future__ import unicode_literals
-import re
-
from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ int_or_none,
+ strip_or_none,
+ xpath_attr,
+ xpath_text,
+)
class InaIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?ina\.fr/video/(?P<id>I?[A-Z0-9]+)'
- _TEST = {
+ _VALID_URL = r'https?://(?:www\.)?ina\.fr/(?:video|audio)/(?P<id>[A-Z0-9_]+)'
+ _TESTS = [{
'url': 'http://www.ina.fr/video/I12055569/francois-hollande-je-crois-que-c-est-clair-video.html',
'md5': 'a667021bf2b41f8dc6049479d9bb38a3',
'info_dict': {
'id': 'I12055569',
'ext': 'mp4',
'title': 'François Hollande "Je crois que c\'est clair"',
+ 'description': 'md5:3f09eb072a06cb286b8f7e4f77109663',
}
- }
+ }, {
+ 'url': 'https://www.ina.fr/video/S806544_001/don-d-organes-des-avancees-mais-d-importants-besoins-video.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.ina.fr/audio/P16173408',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.ina.fr/video/P16173408-video.html',
+ 'only_matching': True,
+ }]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
-
- video_id = mobj.group('id')
- mrss_url = 'http://player.ina.fr/notices/%s.mrss' % video_id
- info_doc = self._download_xml(mrss_url, video_id)
+ video_id = self._match_id(url)
+ info_doc = self._download_xml(
+ 'http://player.ina.fr/notices/%s.mrss' % video_id, video_id)
+ item = info_doc.find('channel/item')
+ title = xpath_text(item, 'title', fatal=True)
+ media_ns_xpath = lambda x: self._xpath_ns(x, 'http://search.yahoo.com/mrss/')
+ content = item.find(media_ns_xpath('content'))
- self.report_extraction(video_id)
+ get_furl = lambda x: xpath_attr(content, media_ns_xpath(x), 'url')
+ formats = []
+ for q, w, h in (('bq', 400, 300), ('mq', 512, 384), ('hq', 768, 576)):
+ q_url = get_furl(q)
+ if not q_url:
+ continue
+ formats.append({
+ 'format_id': q,
+ 'url': q_url,
+ 'width': w,
+ 'height': h,
+ })
+ if not formats:
+ furl = get_furl('player') or content.attrib['url']
+ ext = determine_ext(furl)
+ formats = [{
+ 'url': furl,
+ 'vcodec': 'none' if ext == 'mp3' else None,
+ 'ext': ext,
+ }]
- video_url = info_doc.find('.//{http://search.yahoo.com/mrss/}player').attrib['url']
+ thumbnails = []
+ for thumbnail in content.findall(media_ns_xpath('thumbnail')):
+ thumbnail_url = thumbnail.get('url')
+ if not thumbnail_url:
+ continue
+ thumbnails.append({
+ 'url': thumbnail_url,
+ 'height': int_or_none(thumbnail.get('height')),
+ 'width': int_or_none(thumbnail.get('width')),
+ })
return {
'id': video_id,
- 'url': video_url,
- 'title': info_doc.find('.//title').text,
+ 'formats': formats,
+ 'title': title,
+ 'description': strip_or_none(xpath_text(item, 'description')),
+ 'thumbnails': thumbnails,
}
formats = self._extract_bokecc_formats(webpage, video_id)
else:
formats = (
- self._extract_rtmp_video(webpage) +
- self._extract_http_video(webpage) +
- self._extract_http_audio(webpage, video_id))
+ self._extract_rtmp_video(webpage)
+ + self._extract_http_video(webpage)
+ + self._extract_http_audio(webpage, video_id))
self._sort_formats(formats)
}
-class InstagramUserIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?instagram\.com/(?P<id>[^/]{2,})/?(?:$|[?#])'
- IE_DESC = 'Instagram user profile'
- IE_NAME = 'instagram:user'
- _TEST = {
- 'url': 'https://instagram.com/porsche',
- 'info_dict': {
- 'id': 'porsche',
- 'title': 'porsche',
- },
- 'playlist_count': 5,
- 'params': {
- 'extract_flat': True,
- 'skip_download': True,
- 'playlistend': 5,
- }
- }
+class InstagramPlaylistIE(InfoExtractor):
+ # A superclass for handling any kind of query based on GraphQL which
+ # results in a playlist.
+
+ _gis_tmpl = None # used to cache GIS request type
- _gis_tmpl = None
+ def _parse_graphql(self, webpage, item_id):
+ # Reads a webpage and returns its GraphQL data.
+ return self._parse_json(
+ self._search_regex(
+ r'sharedData\s*=\s*({.+?})\s*;\s*[<\n]', webpage, 'data'),
+ item_id)
- def _entries(self, data):
+ def _extract_graphql(self, data, url):
+ # Parses GraphQL queries containing videos and generates a playlist.
def get_count(suffix):
return int_or_none(try_get(
node, lambda x: x['edge_media_' + suffix]['count']))
- uploader_id = data['entry_data']['ProfilePage'][0]['graphql']['user']['id']
+ uploader_id = self._match_id(url)
csrf_token = data['config']['csrf_token']
rhx_gis = data.get('rhx_gis') or '3c7ca9dcefcf966d11dacf1f151335e8'
- self._set_cookie('instagram.com', 'ig_pr', '1')
-
cursor = ''
for page_num in itertools.count(1):
- variables = json.dumps({
- 'id': uploader_id,
+ variables = {
'first': 12,
'after': cursor,
- })
+ }
+ variables.update(self._query_vars_for(data))
+ variables = json.dumps(variables)
if self._gis_tmpl:
gis_tmpls = [self._gis_tmpl]
'%s:%s:%s' % (rhx_gis, csrf_token, std_headers['User-Agent']),
]
+ # try all of the ways to generate a GIS query, and not only use the
+ # first one that works, but cache it for future requests
for gis_tmpl in gis_tmpls:
try:
- media = self._download_json(
+ json_data = self._download_json(
'https://www.instagram.com/graphql/query/', uploader_id,
'Downloading JSON page %d' % page_num, headers={
'X-Requested-With': 'XMLHttpRequest',
'X-Instagram-GIS': hashlib.md5(
('%s:%s' % (gis_tmpl, variables)).encode('utf-8')).hexdigest(),
}, query={
- 'query_hash': '42323d64886122307be10013ad2dcc44',
+ 'query_hash': self._QUERY_HASH,
'variables': variables,
- })['data']['user']['edge_owner_to_timeline_media']
+ })
+ media = self._parse_timeline_from(json_data)
self._gis_tmpl = gis_tmpl
break
except ExtractorError as e:
+ # if it's an error caused by a bad query, and there are
+ # more GIS templates to try, ignore it and keep trying
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
if gis_tmpl != gis_tmpls[-1]:
continue
break
def _real_extract(self, url):
- username = self._match_id(url)
-
- webpage = self._download_webpage(url, username)
+ user_or_tag = self._match_id(url)
+ webpage = self._download_webpage(url, user_or_tag)
+ data = self._parse_graphql(webpage, user_or_tag)
- data = self._parse_json(
- self._search_regex(
- r'sharedData\s*=\s*({.+?})\s*;\s*[<\n]', webpage, 'data'),
- username)
+ self._set_cookie('instagram.com', 'ig_pr', '1')
return self.playlist_result(
- self._entries(data), username, username)
+ self._extract_graphql(data, url), user_or_tag, user_or_tag)
+
+
+class InstagramUserIE(InstagramPlaylistIE):
+ _VALID_URL = r'https?://(?:www\.)?instagram\.com/(?P<id>[^/]{2,})/?(?:$|[?#])'
+ IE_DESC = 'Instagram user profile'
+ IE_NAME = 'instagram:user'
+ _TEST = {
+ 'url': 'https://instagram.com/porsche',
+ 'info_dict': {
+ 'id': 'porsche',
+ 'title': 'porsche',
+ },
+ 'playlist_count': 5,
+ 'params': {
+ 'extract_flat': True,
+ 'skip_download': True,
+ 'playlistend': 5,
+ }
+ }
+
+ _QUERY_HASH = '42323d64886122307be10013ad2dcc44',
+
+ @staticmethod
+ def _parse_timeline_from(data):
+ # extracts the media timeline data from a GraphQL result
+ return data['data']['user']['edge_owner_to_timeline_media']
+
+ @staticmethod
+ def _query_vars_for(data):
+ # returns a dictionary of variables to add to the timeline query based
+ # on the GraphQL of the original page
+ return {
+ 'id': data['entry_data']['ProfilePage'][0]['graphql']['user']['id']
+ }
+
+
+class InstagramTagIE(InstagramPlaylistIE):
+ _VALID_URL = r'https?://(?:www\.)?instagram\.com/explore/tags/(?P<id>[^/]+)'
+ IE_DESC = 'Instagram hashtag search'
+ IE_NAME = 'instagram:tag'
+ _TEST = {
+ 'url': 'https://instagram.com/explore/tags/lolcats',
+ 'info_dict': {
+ 'id': 'lolcats',
+ 'title': 'lolcats',
+ },
+ 'playlist_count': 50,
+ 'params': {
+ 'extract_flat': True,
+ 'skip_download': True,
+ 'playlistend': 50,
+ }
+ }
+
+ _QUERY_HASH = 'f92f56d47dc7a55b606908374b43a314',
+
+ @staticmethod
+ def _parse_timeline_from(data):
+ # extracts the media timeline data from a GraphQL result
+ return data['data']['hashtag']['edge_hashtag_to_media']
+
+ @staticmethod
+ def _query_vars_for(data):
+ # returns a dictionary of variables to add to the timeline query based
+ # on the GraphQL of the original page
+ return {
+ 'tag_name':
+ data['entry_data']['TagPage'][0]['graphql']['hashtag']['name']
+ }
self._sleep(5, video_id)
self._sort_formats(formats)
- title = (get_element_by_id('widget-videotitle', webpage) or
- clean_html(get_element_by_attribute('class', 'mod-play-tit', webpage)) or
- self._html_search_regex(r'<span[^>]+data-videochanged-title="word"[^>]*>([^<]+)</span>', webpage, 'title'))
+ title = (get_element_by_id('widget-videotitle', webpage)
+ or clean_html(get_element_by_attribute('class', 'mod-play-tit', webpage))
+ or self._html_search_regex(r'<span[^>]+data-videochanged-title="word"[^>]*>([^<]+)</span>', webpage, 'title'))
return {
'id': video_id,
return etree.SubElement(element, _add_ns(name))
production_id = (
- params.get('data-video-autoplay-id') or
- '%s#001' % (
- params.get('data-video-episode-id') or
- video_id.replace('a', '/')))
+ params.get('data-video-autoplay-id')
+ or '%s#001' % (
+ params.get('data-video-episode-id')
+ or video_id.replace('a', '/')))
req_env = etree.Element(_add_ns('soapenv:Envelope'))
_add_sub_element(req_env, 'soapenv:Header')
class JWPlatformIE(InfoExtractor):
- _VALID_URL = r'(?:https?://(?:content\.jwplatform|cdn\.jwplayer)\.com/(?:(?:feed|player|thumb|preview|video|manifest)s|jw6|v2/media)/|jwplatform:)(?P<id>[a-zA-Z0-9]{8})'
+ _VALID_URL = r'(?:https?://(?:content\.jwplatform|cdn\.jwplayer)\.com/(?:(?:feed|player|thumb|preview|video)s|jw6|v2/media)/|jwplatform:)(?P<id>[a-zA-Z0-9]{8})'
_TESTS = [{
'url': 'http://content.jwplatform.com/players/nPripu9l-ALJ3XQCI.js',
'md5': 'fa8899fa601eb7c83a64e9d568bdf325',
(?P<q2>['"])_?(?P<partner_id>(?:(?!(?P=q2)).)+)(?P=q2),.*?
(?P<q3>['"])entry_?[Ii]d(?P=q3)\s*:\s*
(?P<q4>['"])(?P<id>(?:(?!(?P=q4)).)+)(?P=q4)(?:,|\s*\})
- """, webpage) or
- re.search(
+ """, webpage)
+ or re.search(
r'''(?xs)
(?P<q1>["'])
(?:https?:)?//cdnapi(?:sec)?\.kaltura\.com(?::\d+)?/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+)(?:(?!(?P=q1)).)*
\[\s*(?P<q2_1>["'])entry_?[Ii]d(?P=q2_1)\s*\]\s*=\s*
)
(?P<q3>["'])(?P<id>(?:(?!(?P=q3)).)+)(?P=q3)
- ''', webpage) or
- re.search(
+ ''', webpage)
+ or re.search(
r'''(?xs)
<(?:iframe[^>]+src|meta[^>]+\bcontent)=(?P<q1>["'])
(?:https?:)?//(?:(?:www|cdnapi(?:sec)?)\.)?kaltura\.com/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+)
)
if mobj:
embed_info = mobj.groupdict()
+ for k, v in embed_info.items():
+ embed_info[k] = v.strip()
url = 'kaltura:%(partner_id)s:%(id)s' % embed_info
escaped_pid = re.escape(embed_info['partner_id'])
service_url = re.search(
webpage = self._download_webpage(url, video_id)
- title = (self._html_search_meta('title', webpage, default=None) or
- self._search_regex(r'<h1 class="title">([^<]+)</h1>', webpage, 'video title'))
+ title = (self._html_search_meta('title', webpage, default=None)
+ or self._search_regex(r'<h1 class="title">([^<]+)</h1>', webpage, 'video title'))
video_id = self._search_regex(
r'/config/video/(.+?)\.xml', webpage, 'video id')
'title': 'Ali',
},
'playlist_mincount': 95,
- 'skip': 'Regularly stalls travis build', # See https://travis-ci.org/rg3/youtube-dl/jobs/78878540
+ 'skip': 'Regularly stalls travis build', # See https://travis-ci.org/ytdl-org/youtube-dl/jobs/78878540
}]
PAGE_SIZE = 15
# coding: utf-8
from __future__ import unicode_literals
-import json
import re
from .common import InfoExtractor
from ..utils import (
+ clean_html,
+ get_element_by_class,
parse_duration,
+ strip_or_none,
unified_strdate,
)
'id': '6385796',
'ext': 'mp3',
'title': "Champion Minded - Developing a Growth Mindset",
- 'description': 'In this episode, Allistair talks about the importance of developing a growth mindset, not only in sports, but in life too.',
+ # description fetched using another request:
+ # http://html5-player.libsyn.com/embed/getitemdetails?item_id=6385796
+ # 'description': 'In this episode, Allistair talks about the importance of developing a growth mindset, not only in sports, but in life too.',
'upload_date': '20180320',
'thumbnail': 're:^https?://.*',
},
}]
def _real_extract(self, url):
- m = re.match(self._VALID_URL, url)
- video_id = m.group('id')
- url = m.group('mainurl')
+ url, video_id = re.match(self._VALID_URL, url).groups()
webpage = self._download_webpage(url, video_id)
- podcast_title = self._search_regex(
- r'<h3>([^<]+)</h3>', webpage, 'podcast title', default=None)
- if podcast_title:
- podcast_title = podcast_title.strip()
- episode_title = self._search_regex(
- r'(?:<div class="episode-title">|<h4>)([^<]+)</', webpage, 'episode title')
- if episode_title:
- episode_title = episode_title.strip()
+ data = self._parse_json(self._search_regex(
+ r'var\s+playlistItem\s*=\s*({.+?});',
+ webpage, 'JSON data block'), video_id)
+
+ episode_title = data.get('item_title') or get_element_by_class('episode-title', webpage)
+ if not episode_title:
+ self._search_regex(
+ [r'data-title="([^"]+)"', r'<title>(.+?)</title>'],
+ webpage, 'episode title')
+ episode_title = episode_title.strip()
+
+ podcast_title = strip_or_none(clean_html(self._search_regex(
+ r'<h3>([^<]+)</h3>', webpage, 'podcast title',
+ default=None) or get_element_by_class('podcast-title', webpage)))
title = '%s - %s' % (podcast_title, episode_title) if podcast_title else episode_title
+ formats = []
+ for k, format_id in (('media_url_libsyn', 'libsyn'), ('media_url', 'main'), ('download_link', 'download')):
+ f_url = data.get(k)
+ if not f_url:
+ continue
+ formats.append({
+ 'url': f_url,
+ 'format_id': format_id,
+ })
+
description = self._html_search_regex(
r'<p\s+id="info_text_body">(.+?)</p>', webpage,
'description', default=None)
# Strip non-breaking and normal spaces
description = description.replace('\u00A0', ' ').strip()
release_date = unified_strdate(self._search_regex(
- r'<div class="release_date">Released: ([^<]+)<', webpage, 'release date', fatal=False))
-
- data_json = self._search_regex(r'var\s+playlistItem\s*=\s*(\{.*?\});\n', webpage, 'JSON data block')
- data = json.loads(data_json)
-
- formats = [{
- 'url': data['media_url'],
- 'format_id': 'main',
- }, {
- 'url': data['media_url_libsyn'],
- 'format_id': 'libsyn',
- }]
- thumbnail = data.get('thumbnail_url')
- duration = parse_duration(data.get('duration'))
+ r'<div class="release_date">Released: ([^<]+)<',
+ webpage, 'release date', default=None) or data.get('release_date'))
return {
'id': video_id,
'title': title,
'description': description,
- 'thumbnail': thumbnail,
+ 'thumbnail': data.get('thumbnail_url'),
'upload_date': release_date,
- 'duration': duration,
+ 'duration': parse_duration(data.get('duration')),
'formats': formats,
}
float_or_none,
int_or_none,
urlencode_postdata,
+ urljoin,
)
class LinkedInLearningBaseIE(InfoExtractor):
_NETRC_MACHINE = 'linkedin'
+ _LOGIN_URL = 'https://www.linkedin.com/uas/login?trk=learning'
def _call_api(self, course_slug, fields, video_slug=None, resolution=None):
query = {
'Csrf-Token': self._get_cookies(api_url)['JSESSIONID'].value,
}, query=query)['elements'][0]
- def _get_video_id(self, urn, course_slug, video_slug):
+ def _get_urn_id(self, video_data):
+ urn = video_data.get('urn')
if urn:
mobj = re.search(r'urn:li:lyndaCourse:\d+,(\d+)', urn)
if mobj:
return mobj.group(1)
- return '%s/%s' % (course_slug, video_slug)
+
+ def _get_video_id(self, video_data, course_slug, video_slug):
+ return self._get_urn_id(video_data) or '%s/%s' % (course_slug, video_slug)
def _real_initialize(self):
email, password = self._get_login_info()
return
login_page = self._download_webpage(
- 'https://www.linkedin.com/uas/login?trk=learning',
- None, 'Downloading login page')
- action_url = self._search_regex(
+ self._LOGIN_URL, None, 'Downloading login page')
+ action_url = urljoin(self._LOGIN_URL, self._search_regex(
r'<form[^>]+action=(["\'])(?P<url>.+?)\1', login_page, 'post url',
- default='https://www.linkedin.com/uas/login-submit', group='url')
+ default='https://www.linkedin.com/uas/login-submit', group='url'))
data = self._hidden_inputs(login_page)
data.update({
'session_key': email,
self._sort_formats(formats, ('width', 'height', 'source_preference', 'tbr', 'abr'))
return {
- 'id': self._get_video_id(video_data.get('urn'), course_slug, video_slug),
+ 'id': self._get_video_id(video_data, course_slug, video_slug),
'title': title,
'formats': formats,
'thumbnail': video_data.get('defaultThumbnail'),
course_data = self._call_api(course_slug, 'chapters,description,title')
entries = []
- for chapter in course_data.get('chapters', []):
+ for chapter_number, chapter in enumerate(course_data.get('chapters', []), 1):
chapter_title = chapter.get('title')
+ chapter_id = self._get_urn_id(chapter)
for video in chapter.get('videos', []):
video_slug = video.get('slug')
if not video_slug:
continue
entries.append({
'_type': 'url_transparent',
- 'id': self._get_video_id(video.get('urn'), course_slug, video_slug),
+ 'id': self._get_video_id(video, course_slug, video_slug),
'title': video.get('title'),
'url': 'https://www.linkedin.com/learning/%s/%s' % (course_slug, video_slug),
'chapter': chapter_title,
+ 'chapter_number': chapter_number,
+ 'chapter_id': chapter_id,
'ie_key': LinkedInLearningIE.ie_key(),
})
--- /dev/null
+from __future__ import unicode_literals
+
+import json
+import random
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_b64decode,
+ compat_HTTPError,
+ compat_str,
+)
+from ..utils import (
+ ExtractorError,
+ orderedSet,
+ unescapeHTML,
+ urlencode_postdata,
+ urljoin,
+)
+
+
+class LinuxAcademyIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:www\.)?linuxacademy\.com/cp/
+ (?:
+ courses/lesson/course/(?P<chapter_id>\d+)/lesson/(?P<lesson_id>\d+)|
+ modules/view/id/(?P<course_id>\d+)
+ )
+ '''
+ _TESTS = [{
+ 'url': 'https://linuxacademy.com/cp/courses/lesson/course/1498/lesson/2/module/154',
+ 'info_dict': {
+ 'id': '1498-2',
+ 'ext': 'mp4',
+ 'title': "Introduction to the Practitioner's Brief",
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'Requires Linux Academy account credentials',
+ }, {
+ 'url': 'https://linuxacademy.com/cp/courses/lesson/course/1498/lesson/2',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://linuxacademy.com/cp/modules/view/id/154',
+ 'info_dict': {
+ 'id': '154',
+ 'title': 'AWS Certified Cloud Practitioner',
+ 'description': 'md5:039db7e60e4aac9cf43630e0a75fa834',
+ },
+ 'playlist_count': 41,
+ 'skip': 'Requires Linux Academy account credentials',
+ }]
+
+ _AUTHORIZE_URL = 'https://login.linuxacademy.com/authorize'
+ _ORIGIN_URL = 'https://linuxacademy.com'
+ _CLIENT_ID = 'KaWxNn1C2Gc7n83W9OFeXltd8Utb5vvx'
+ _NETRC_MACHINE = 'linuxacademy'
+
+ def _real_initialize(self):
+ self._login()
+
+ def _login(self):
+ username, password = self._get_login_info()
+ if username is None:
+ return
+
+ def random_string():
+ return ''.join([
+ random.choice('0123456789ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz-._~')
+ for _ in range(32)])
+
+ webpage, urlh = self._download_webpage_handle(
+ self._AUTHORIZE_URL, None, 'Downloading authorize page', query={
+ 'client_id': self._CLIENT_ID,
+ 'response_type': 'token id_token',
+ 'redirect_uri': self._ORIGIN_URL,
+ 'scope': 'openid email user_impersonation profile',
+ 'audience': self._ORIGIN_URL,
+ 'state': random_string(),
+ 'nonce': random_string(),
+ })
+
+ login_data = self._parse_json(
+ self._search_regex(
+ r'atob\(\s*(["\'])(?P<value>(?:(?!\1).)+)\1', webpage,
+ 'login info', group='value'), None,
+ transform_source=lambda x: compat_b64decode(x).decode('utf-8')
+ )['extraParams']
+
+ login_data.update({
+ 'client_id': self._CLIENT_ID,
+ 'redirect_uri': self._ORIGIN_URL,
+ 'tenant': 'lacausers',
+ 'connection': 'Username-Password-Authentication',
+ 'username': username,
+ 'password': password,
+ 'sso': 'true',
+ })
+
+ login_state_url = compat_str(urlh.geturl())
+
+ try:
+ login_page = self._download_webpage(
+ 'https://login.linuxacademy.com/usernamepassword/login', None,
+ 'Downloading login page', data=json.dumps(login_data).encode(),
+ headers={
+ 'Content-Type': 'application/json',
+ 'Origin': 'https://login.linuxacademy.com',
+ 'Referer': login_state_url,
+ })
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
+ error = self._parse_json(e.cause.read(), None)
+ message = error.get('description') or error['code']
+ raise ExtractorError(
+ '%s said: %s' % (self.IE_NAME, message), expected=True)
+ raise
+
+ callback_page, urlh = self._download_webpage_handle(
+ 'https://login.linuxacademy.com/login/callback', None,
+ 'Downloading callback page',
+ data=urlencode_postdata(self._hidden_inputs(login_page)),
+ headers={
+ 'Content-Type': 'application/x-www-form-urlencoded',
+ 'Origin': 'https://login.linuxacademy.com',
+ 'Referer': login_state_url,
+ })
+
+ access_token = self._search_regex(
+ r'access_token=([^=&]+)', compat_str(urlh.geturl()),
+ 'access token')
+
+ self._download_webpage(
+ 'https://linuxacademy.com/cp/login/tokenValidateLogin/token/%s'
+ % access_token, None, 'Downloading token validation page')
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ chapter_id, lecture_id, course_id = mobj.group('chapter_id', 'lesson_id', 'course_id')
+ item_id = course_id if course_id else '%s-%s' % (chapter_id, lecture_id)
+
+ webpage = self._download_webpage(url, item_id)
+
+ # course path
+ if course_id:
+ entries = [
+ self.url_result(
+ urljoin(url, lesson_url), ie=LinuxAcademyIE.ie_key())
+ for lesson_url in orderedSet(re.findall(
+ r'<a[^>]+\bhref=["\'](/cp/courses/lesson/course/\d+/lesson/\d+/module/\d+)',
+ webpage))]
+ title = unescapeHTML(self._html_search_regex(
+ (r'class=["\']course-title["\'][^>]*>(?P<value>[^<]+)',
+ r'var\s+title\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1'),
+ webpage, 'title', default=None, group='value'))
+ description = unescapeHTML(self._html_search_regex(
+ r'var\s+description\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1',
+ webpage, 'description', default=None, group='value'))
+ return self.playlist_result(entries, course_id, title, description)
+
+ # single video path
+ info = self._extract_jwplayer_data(
+ webpage, item_id, require_title=False, m3u8_id='hls',)
+ title = self._search_regex(
+ (r'>Lecture\s*:\s*(?P<value>[^<]+)',
+ r'lessonName\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1'), webpage,
+ 'title', group='value')
+ info.update({
+ 'id': item_id,
+ 'title': title,
+ })
+ return info
},
'skip': 'Video is dead',
}, {
- # Covers https://github.com/rg3/youtube-dl/pull/5983
+ # Covers https://github.com/ytdl-org/youtube-dl/pull/5983
# Multiple resolutions
'url': 'http://www.liveleak.com/view?i=801_1409392012',
'md5': 'c3a449dbaca5c0d1825caecd52a57d7b',
'thumbnail': r're:^https?://.*\.jpg$'
}
}, {
- # Covers https://github.com/rg3/youtube-dl/pull/10664#issuecomment-247439521
+ # Covers https://github.com/ytdl-org/youtube-dl/pull/10664#issuecomment-247439521
'url': 'http://m.liveleak.com/view?i=763_1473349649',
'add_ie': ['Youtube'],
'info_dict': {
}, {
'url': 'https://www.liveleak.com/view?t=HvHi_1523016227',
'only_matching': True,
+ }, {
+ # No original video
+ 'url': 'https://www.liveleak.com/view?t=C26ZZ_1558612804',
+ 'only_matching': True,
}]
@staticmethod
# Removing '.*.mp4' gives the raw video, which is essentially
# the same video without the LiveLeak logo at the top (see
- # https://github.com/rg3/youtube-dl/pull/4768)
+ # https://github.com/ytdl-org/youtube-dl/pull/4768)
orig_url = re.sub(r'\.mp4\.[^.]+', '', a_format['url'])
if a_format['url'] != orig_url:
format_id = a_format.get('format_id')
- formats.append({
- 'format_id': 'original' + ('-' + format_id if format_id else ''),
- 'url': orig_url,
- 'preference': 1,
- })
+ format_id = 'original' + ('-' + format_id if format_id else '')
+ if self._is_valid_url(orig_url, video_id, format_id):
+ formats.append({
+ 'format_id': format_id,
+ 'url': orig_url,
+ 'preference': 1,
+ })
self._sort_formats(formats)
info_dict['formats'] = formats
--- /dev/null
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import merge_dicts
+
+
+class MallTVIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?mall\.tv/(?:[^/]+/)*(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://www.mall.tv/18-miliard-pro-neziskovky-opravdu-jsou-sportovci-nebo-clovek-v-tisni-pijavice',
+ 'md5': '1c4a37f080e1f3023103a7b43458e518',
+ 'info_dict': {
+ 'id': 't0zzt0',
+ 'display_id': '18-miliard-pro-neziskovky-opravdu-jsou-sportovci-nebo-clovek-v-tisni-pijavice',
+ 'ext': 'mp4',
+ 'title': '18 miliard pro neziskovky. Opravdu jsou sportovci nebo Člověk v tísni pijavice?',
+ 'description': 'md5:25fc0ec42a72ba602b602c683fa29deb',
+ 'duration': 216,
+ 'timestamp': 1538870400,
+ 'upload_date': '20181007',
+ 'view_count': int,
+ }
+ }, {
+ 'url': 'https://www.mall.tv/kdo-to-plati/18-miliard-pro-neziskovky-opravdu-jsou-sportovci-nebo-clovek-v-tisni-pijavice',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(
+ url, display_id, headers=self.geo_verification_headers())
+
+ SOURCE_RE = r'(<source[^>]+\bsrc=(?:(["\'])(?:(?!\2).)+|[^\s]+)/(?P<id>[\da-z]+)/index)\b'
+ video_id = self._search_regex(
+ SOURCE_RE, webpage, 'video id', group='id')
+
+ media = self._parse_html5_media_entries(
+ url, re.sub(SOURCE_RE, r'\1.m3u8', webpage), video_id,
+ m3u8_id='hls', m3u8_entry_protocol='m3u8_native')[0]
+
+ info = self._search_json_ld(webpage, video_id, default={})
+
+ return merge_dicts(media, info, {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': self._og_search_title(webpage, default=None) or display_id,
+ 'description': self._og_search_description(webpage, default=None),
+ 'thumbnail': self._og_search_thumbnail(webpage, default=None),
+ })
ExtractorError,
float_or_none,
mimetype2ext,
+ str_or_none,
+ try_get,
unescapeHTML,
unsmuggle_url,
url_or_none,
)
+_ID_RE = r'(?:[0-9a-f]{32,34}|[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12,14})'
+
+
class MediasiteIE(InfoExtractor):
- _VALID_URL = r'(?xi)https?://[^/]+/Mediasite/(?:Play|Showcase/(?:default|livebroadcast)/Presentation)/(?P<id>[0-9a-f]{32,34})(?P<query>\?[^#]+|)'
+ _VALID_URL = r'(?xi)https?://[^/]+/Mediasite/(?:Play|Showcase/(?:default|livebroadcast)/Presentation)/(?P<id>%s)(?P<query>\?[^#]+|)' % _ID_RE
_TESTS = [
{
'url': 'https://hitsmediaweb.h-its.org/mediasite/Play/2db6c271681e4f199af3c60d1f82869b1d',
'url': 'https://mediasite.ntnu.no/Mediasite/Showcase/default/Presentation/7d8b913259334b688986e970fae6fcb31d',
'only_matching': True,
},
+ {
+ # dashed id
+ 'url': 'https://hitsmediaweb.h-its.org/mediasite/Play/2db6c271-681e-4f19-9af3-c60d1f82869b1d',
+ 'only_matching': True,
+ }
]
# look in Mediasite.Core.js (Mediasite.ContentStreamType[*])
return [
unescapeHTML(mobj.group('url'))
for mobj in re.finditer(
- r'(?xi)<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:(?:https?:)?//[^/]+)?/Mediasite/Play/[0-9a-f]{32,34}(?:\?.*?)?)\1',
+ r'(?xi)<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:(?:https?:)?//[^/]+)?/Mediasite/Play/%s(?:\?.*?)?)\1' % _ID_RE,
webpage)]
def _real_extract(self, url):
'formats': formats,
'thumbnails': thumbnails,
}
+
+
+class MediasiteCatalogIE(InfoExtractor):
+ _VALID_URL = r'''(?xi)
+ (?P<url>https?://[^/]+/Mediasite)
+ /Catalog/Full/
+ (?P<catalog_id>{0})
+ (?:
+ /(?P<current_folder_id>{0})
+ /(?P<root_dynamic_folder_id>{0})
+ )?
+ '''.format(_ID_RE)
+ _TESTS = [{
+ 'url': 'http://events7.mediasite.com/Mediasite/Catalog/Full/631f9e48530d454381549f955d08c75e21',
+ 'info_dict': {
+ 'id': '631f9e48530d454381549f955d08c75e21',
+ 'title': 'WCET Summit: Adaptive Learning in Higher Ed: Improving Outcomes Dynamically',
+ },
+ 'playlist_count': 6,
+ 'expected_warnings': ['is not a supported codec'],
+ }, {
+ # with CurrentFolderId and RootDynamicFolderId
+ 'url': 'https://medaudio.medicine.iu.edu/Mediasite/Catalog/Full/9518c4a6c5cf4993b21cbd53e828a92521/97a9db45f7ab47428c77cd2ed74bb98f14/9518c4a6c5cf4993b21cbd53e828a92521',
+ 'info_dict': {
+ 'id': '9518c4a6c5cf4993b21cbd53e828a92521',
+ 'title': 'IUSM Family and Friends Sessions',
+ },
+ 'playlist_count': 2,
+ }, {
+ 'url': 'http://uipsyc.mediasite.com/mediasite/Catalog/Full/d5d79287c75243c58c50fef50174ec1b21',
+ 'only_matching': True,
+ }, {
+ # no AntiForgeryToken
+ 'url': 'https://live.libraries.psu.edu/Mediasite/Catalog/Full/8376d4b24dd1457ea3bfe4cf9163feda21',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://medaudio.medicine.iu.edu/Mediasite/Catalog/Full/9518c4a6c5cf4993b21cbd53e828a92521/97a9db45f7ab47428c77cd2ed74bb98f14/9518c4a6c5cf4993b21cbd53e828a92521',
+ 'only_matching': True,
+ }, {
+ # dashed id
+ 'url': 'http://events7.mediasite.com/Mediasite/Catalog/Full/631f9e48-530d-4543-8154-9f955d08c75e',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ mediasite_url = mobj.group('url')
+ catalog_id = mobj.group('catalog_id')
+ current_folder_id = mobj.group('current_folder_id') or catalog_id
+ root_dynamic_folder_id = mobj.group('root_dynamic_folder_id')
+
+ webpage = self._download_webpage(url, catalog_id)
+
+ # AntiForgeryToken is optional (e.g. [1])
+ # 1. https://live.libraries.psu.edu/Mediasite/Catalog/Full/8376d4b24dd1457ea3bfe4cf9163feda21
+ anti_forgery_token = self._search_regex(
+ r'AntiForgeryToken\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1',
+ webpage, 'anti forgery token', default=None, group='value')
+ if anti_forgery_token:
+ anti_forgery_header = self._search_regex(
+ r'AntiForgeryHeaderName\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1',
+ webpage, 'anti forgery header name',
+ default='X-SOFO-AntiForgeryHeader', group='value')
+
+ data = {
+ 'IsViewPage': True,
+ 'IsNewFolder': True,
+ 'AuthTicket': None,
+ 'CatalogId': catalog_id,
+ 'CurrentFolderId': current_folder_id,
+ 'RootDynamicFolderId': root_dynamic_folder_id,
+ 'ItemsPerPage': 1000,
+ 'PageIndex': 0,
+ 'PermissionMask': 'Execute',
+ 'CatalogSearchType': 'SearchInFolder',
+ 'SortBy': 'Date',
+ 'SortDirection': 'Descending',
+ 'StartDate': None,
+ 'EndDate': None,
+ 'StatusFilterList': None,
+ 'PreviewKey': None,
+ 'Tags': [],
+ }
+
+ headers = {
+ 'Content-Type': 'application/json; charset=UTF-8',
+ 'Referer': url,
+ 'X-Requested-With': 'XMLHttpRequest',
+ }
+ if anti_forgery_token:
+ headers[anti_forgery_header] = anti_forgery_token
+
+ catalog = self._download_json(
+ '%s/Catalog/Data/GetPresentationsForFolder' % mediasite_url,
+ catalog_id, data=json.dumps(data).encode(), headers=headers)
+
+ entries = []
+ for video in catalog['PresentationDetailsList']:
+ if not isinstance(video, dict):
+ continue
+ video_id = str_or_none(video.get('Id'))
+ if not video_id:
+ continue
+ entries.append(self.url_result(
+ '%s/Play/%s' % (mediasite_url, video_id),
+ ie=MediasiteIE.ie_key(), video_id=video_id))
+
+ title = try_get(
+ catalog, lambda x: x['CurrentFolder']['Name'], compat_str)
+
+ return self.playlist_result(entries, catalog_id, title,)
+
+
+class MediasiteNamedCatalogIE(InfoExtractor):
+ _VALID_URL = r'(?xi)(?P<url>https?://[^/]+/Mediasite)/Catalog/catalogs/(?P<catalog_name>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://msite.misis.ru/Mediasite/Catalog/catalogs/2016-industrial-management-skriabin-o-o',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ mediasite_url = mobj.group('url')
+ catalog_name = mobj.group('catalog_name')
+
+ webpage = self._download_webpage(url, catalog_name)
+
+ catalog_id = self._search_regex(
+ r'CatalogId\s*:\s*["\'](%s)' % _ID_RE, webpage, 'catalog id')
+
+ return self.url_result(
+ '%s/Catalog/Full/%s' % (mediasite_url, catalog_id),
+ ie=MediasiteCatalogIE.ie_key(), video_id=catalog_id)
from __future__ import unicode_literals
+import json
import re
from .common import InfoExtractor
from ..compat import (
compat_parse_qs,
+ compat_urllib_parse,
compat_urllib_parse_unquote,
- compat_urllib_parse_urlencode,
)
from ..utils import (
determine_ext,
headers = {
# Disable family filter
- 'Cookie': 'user=%s; ' % compat_urllib_parse_urlencode({'ffilter': False})
+ 'Cookie': 'user=%s; ' % compat_urllib_parse.quote(json.dumps({'ffilter': False}))
}
# AnyClip videos require the flashversion cookie so that we get the link
# coding: utf-8
from __future__ import unicode_literals
+import base64
+import time
+import uuid
+
from .common import InfoExtractor
-from ..compat import compat_str
-from ..utils import int_or_none
+from ..compat import (
+ compat_HTTPError,
+ compat_str,
+)
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+)
class MGTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?mgtv\.com/(v|b)/(?:[^/]+/)*(?P<id>\d+)\.html'
IE_DESC = '芒果TV'
+ _GEO_COUNTRIES = ['CN']
_TESTS = [{
'url': 'http://www.mgtv.com/v/1/290525/f/3116640.html',
- 'md5': 'b1ffc0fc163152acf6beaa81832c9ee7',
'info_dict': {
'id': '3116640',
'ext': 'mp4',
- 'title': '我是歌手第四季双年巅峰会:韩红李玟“双王”领军对抗',
+ 'title': '我是歌手 第四季',
'description': '我是歌手第四季双年巅峰会',
'duration': 7461,
'thumbnail': r're:^https?://.*\.jpg$',
def _real_extract(self, url):
video_id = self._match_id(url)
- api_data = self._download_json(
- 'http://pcweb.api.mgtv.com/player/video', video_id,
- query={'video_id': video_id},
- headers=self.geo_verification_headers())['data']
+ try:
+ api_data = self._download_json(
+ 'https://pcweb.api.mgtv.com/player/video', video_id, query={
+ 'tk2': base64.urlsafe_b64encode(b'did=%s|pno=1030|ver=0.3.0301|clit=%d' % (compat_str(uuid.uuid4()).encode(), time.time()))[::-1],
+ 'video_id': video_id,
+ }, headers=self.geo_verification_headers())['data']
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
+ error = self._parse_json(e.cause.read().decode(), None)
+ if error.get('code') == 40005:
+ self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
+ raise ExtractorError(error['msg'], expected=True)
+ raise
info = api_data['info']
title = info['title'].strip()
- stream_domain = api_data['stream_domain'][0]
+ stream_data = self._download_json(
+ 'https://pcweb.api.mgtv.com/player/getSource', video_id, query={
+ 'pm2': api_data['atc']['pm2'],
+ 'video_id': video_id,
+ }, headers=self.geo_verification_headers())['data']
+ stream_domain = stream_data['stream_domain'][0]
formats = []
- for idx, stream in enumerate(api_data['stream']):
+ for idx, stream in enumerate(stream_data['stream']):
stream_path = stream.get('url')
if not stream_path:
continue
format_url = format_data.get('info')
if not format_url:
continue
- tbr = int_or_none(self._search_regex(
+ tbr = int_or_none(stream.get('filebitrate') or self._search_regex(
r'_(\d+)_mp4/', format_url, 'tbr', default=None))
formats.append({
'format_id': compat_str(tbr or idx),
# coding: utf-8
from __future__ import unicode_literals
-import json
import re
from .common import InfoExtractor
from ..utils import (
- ExtractorError,
+ clean_html,
int_or_none,
- sanitized_Request,
- urlencode_postdata,
)
IE_DESC = 'LetitBit video services: moevideo.net, playreplay.net and videochart.net'
_VALID_URL = r'''(?x)
https?://(?P<host>(?:www\.)?
- (?:(?:moevideo|playreplay|videochart)\.net))/
- (?:video|framevideo)/(?P<id>[0-9]+\.[0-9A-Za-z]+)'''
+ (?:(?:moevideo|playreplay|videochart)\.net|thesame\.tv))/
+ (?:video|framevideo|embed)/(?P<id>[0-9a-z]+\.[0-9A-Za-z]+)'''
_API_URL = 'http://api.letitbit.net/'
_API_KEY = 'tVL0gjqo5'
_TESTS = [
]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
+ host, video_id = re.match(self._VALID_URL, url).groups()
webpage = self._download_webpage(
- 'http://%s/video/%s' % (mobj.group('host'), video_id),
+ 'http://%s/video/%s' % (host, video_id),
video_id, 'Downloading webpage')
title = self._og_search_title(webpage)
- thumbnail = self._og_search_thumbnail(webpage)
- description = self._og_search_description(webpage)
- r = [
- self._API_KEY,
- [
- 'preview/flv_link',
- {
- 'uid': video_id,
- },
- ],
- ]
- r_json = json.dumps(r)
- post = urlencode_postdata({'r': r_json})
- req = sanitized_Request(self._API_URL, post)
- req.add_header('Content-type', 'application/x-www-form-urlencoded')
-
- response = self._download_json(req, video_id)
- if response['status'] != 'OK':
- raise ExtractorError(
- '%s returned error: %s' % (self.IE_NAME, response['data']),
- expected=True
- )
- item = response['data'][0]
- video_url = item['link']
- duration = int_or_none(item['length'])
- width = int_or_none(item['width'])
- height = int_or_none(item['height'])
- filesize = int_or_none(item['convert_size'])
-
- formats = [{
- 'format_id': 'sd',
- 'http_headers': {'Range': 'bytes=0-'}, # Required to download
- 'url': video_url,
- 'width': width,
- 'height': height,
- 'filesize': filesize,
- }]
+ embed_webpage = self._download_webpage(
+ 'http://%s/embed/%s' % (host, video_id),
+ video_id, 'Downloading embed webpage')
+ video = self._parse_json(self._search_regex(
+ r'mvplayer\("#player"\s*,\s*({.+})',
+ embed_webpage, 'mvplayer'), video_id)['video']
return {
'id': video_id,
'title': title,
- 'thumbnail': thumbnail,
- 'description': description,
- 'duration': duration,
- 'formats': formats,
+ 'thumbnail': video.get('poster') or self._og_search_thumbnail(webpage),
+ 'description': clean_html(self._og_search_description(webpage)),
+ 'duration': int_or_none(self._og_search_property('video:duration', webpage)),
+ 'url': video['ourUrl'],
}
video_url = (self._html_search_regex(
(r'setup\(\{\s*["\']file["\']\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1',
r'fileurl\s*=\s*(["\'])(?P<url>(?:(?!\1).)+)\1'),
- webpage, 'video URL', default=None, group='url') or
- 'http://cdn4.videos.motherlessmedia.com/videos/%s.mp4?fs=opencloud' % video_id)
+ webpage, 'video URL', default=None, group='url')
+ or 'http://cdn4.videos.motherlessmedia.com/videos/%s.mp4?fs=opencloud' % video_id)
age_limit = self._rta_search(webpage)
view_count = str_to_int(self._html_search_regex(
r'<strong>Views</strong>\s+([^<]+)<',
continue
if 'm3u8' in format_url:
# m3u8_native should not be used here until
- # https://github.com/rg3/youtube-dl/issues/9913 is fixed
+ # https://github.com/ytdl-org/youtube-dl/issues/9913 is fixed
m3u8_formats = self._extract_m3u8_formats(
format_url, display_id, 'mp4',
m3u8_id='hls', fatal=False)
from __future__ import unicode_literals
from .common import InfoExtractor
+from .fox import FOXIE
from ..utils import (
smuggle_url,
url_basename,
{'force_smil_url': True}),
'id': guid,
}
+
+
+class NationalGeographicTVIE(FOXIE):
+ _VALID_URL = r'https?://(?:www\.)?nationalgeographic\.com/tv/watch/(?P<id>[\da-fA-F]+)'
+ _TESTS = [{
+ 'url': 'https://www.nationalgeographic.com/tv/watch/6a875e6e734b479beda26438c9f21138/',
+ 'info_dict': {
+ 'id': '6a875e6e734b479beda26438c9f21138',
+ 'ext': 'mp4',
+ 'title': 'Why Nat Geo? Valley of the Boom',
+ 'description': 'The lives of prominent figures in the tech world, including their friendships, rivalries, victories and failures.',
+ 'timestamp': 1542662458,
+ 'upload_date': '20181119',
+ 'age_limit': 14,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }]
+ _HOME_PAGE_URL = 'https://www.nationalgeographic.com/tv/'
+ _API_KEY = '238bb0a0c2aba67922c48709ce0c06fd'
# '__title' does not contain extra words such as sub-site name, "Video" etc.
title = compat_urllib_parse_unquote_plus(
- self._search_regex(r"__title\s*=\s*'([^']+)'", webpage, 'title', default=None) or
- self._og_search_title(webpage))
+ self._search_regex(r"__title\s*=\s*'([^']+)'", webpage, 'title', default=None)
+ or self._og_search_title(webpage))
filename = self._search_regex(
r"(?:__)?filename\s*[:=]\s*'([^']+)'", webpage, 'video filename')
# coding: utf-8
from __future__ import unicode_literals
-import re
+import base64
+import hashlib
from .common import InfoExtractor
+from ..aes import aes_cbc_decrypt
from ..utils import (
- ExtractorError,
+ bytes_to_intlist,
int_or_none,
+ intlist_to_bytes,
+ parse_codecs,
+ parse_duration,
)
_VALID_URL = r'https?://(?:www\.)?newstube\.ru/media/(?P<id>.+)'
_TEST = {
'url': 'http://www.newstube.ru/media/telekanal-cnn-peremestil-gorod-slavyansk-v-krym',
- 'md5': '801eef0c2a9f4089fa04e4fe3533abdc',
+ 'md5': '9d10320ad473444352f72f746ccb8b8c',
'info_dict': {
'id': '728e0ef2-e187-4012-bac0-5a081fdcb1f6',
'ext': 'mp4',
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
+ video_id = self._match_id(url)
- page = self._download_webpage(url, video_id, 'Downloading page')
+ page = self._download_webpage(url, video_id)
+ title = self._html_search_meta(['og:title', 'twitter:title'], page, fatal=True)
video_guid = self._html_search_regex(
- r'<meta property="og:video:url" content="https?://(?:www\.)?newstube\.ru/freshplayer\.swf\?guid=(?P<guid>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})',
+ r'<meta\s+property="og:video(?::(?:(?:secure_)?url|iframe))?"\s+content="https?://(?:www\.)?newstube\.ru/embed/(?P<guid>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})',
page, 'video GUID')
- player = self._download_xml(
- 'http://p.newstube.ru/v2/player.asmx/GetAutoPlayInfo6?state=&url=%s&sessionId=&id=%s&placement=profile&location=n2' % (url, video_guid),
- video_guid, 'Downloading player XML')
-
- def ns(s):
- return s.replace('/', '/%(ns)s') % {'ns': '{http://app1.newstube.ru/N2SiteWS/player.asmx}'}
-
- error_message = player.find(ns('./ErrorMessage'))
- if error_message is not None:
- raise ExtractorError('%s returned error: %s' % (self.IE_NAME, error_message.text), expected=True)
-
- session_id = player.find(ns('./SessionId')).text
- media_info = player.find(ns('./Medias/MediaInfo'))
- title = media_info.find(ns('./Name')).text
- description = self._og_search_description(page)
- thumbnail = media_info.find(ns('./KeyFrame')).text
- duration = int(media_info.find(ns('./Duration')).text) / 1000.0
+ enc_data = base64.b64decode(self._download_webpage(
+ 'https://www.newstube.ru/embed/api/player/getsources2',
+ video_guid, query={
+ 'guid': video_guid,
+ 'ff': 3,
+ }))
+ key = hashlib.pbkdf2_hmac(
+ 'sha1', video_guid.replace('-', '').encode(), enc_data[:16], 1)[:16]
+ dec_data = aes_cbc_decrypt(
+ bytes_to_intlist(enc_data[32:]), bytes_to_intlist(key),
+ bytes_to_intlist(enc_data[16:32]))
+ sources = self._parse_json(intlist_to_bytes(dec_data[:-dec_data[-1]]), video_guid)
formats = []
-
- for stream_info in media_info.findall(ns('./Streams/StreamInfo')):
- media_location = stream_info.find(ns('./MediaLocation'))
- if media_location is None:
+ for source in sources:
+ source_url = source.get('Src')
+ if not source_url:
continue
-
- server = media_location.find(ns('./Server')).text
- app = media_location.find(ns('./App')).text
- media_id = stream_info.find(ns('./Id')).text
- name = stream_info.find(ns('./Name')).text
- width = int(stream_info.find(ns('./Width')).text)
- height = int(stream_info.find(ns('./Height')).text)
-
- formats.append({
- 'url': 'rtmp://%s/%s' % (server, app),
- 'app': app,
- 'play_path': '01/%s' % video_guid.upper(),
- 'rtmp_conn': ['S:%s' % session_id, 'S:%s' % media_id, 'S:n2'],
- 'page_url': url,
- 'ext': 'flv',
- 'format_id': 'rtmp' + ('-%s' % name if name else ''),
- 'width': width,
+ height = int_or_none(source.get('Height'))
+ f = {
+ 'format_id': 'http' + ('-%dp' % height if height else ''),
+ 'url': source_url,
+ 'width': int_or_none(source.get('Width')),
'height': height,
- })
-
- sources_data = self._download_json(
- 'http://www.newstube.ru/player2/getsources?guid=%s' % video_guid,
- video_guid, fatal=False)
- if sources_data:
- for source in sources_data.get('Sources', []):
- source_url = source.get('Src')
- if not source_url:
- continue
- height = int_or_none(source.get('Height'))
- f = {
- 'format_id': 'http' + ('-%dp' % height if height else ''),
- 'url': source_url,
- 'width': int_or_none(source.get('Width')),
- 'height': height,
- }
- source_type = source.get('Type')
- if source_type:
- mobj = re.search(r'codecs="([^,]+),\s*([^"]+)"', source_type)
- if mobj:
- vcodec, acodec = mobj.groups()
- f.update({
- 'vcodec': vcodec,
- 'acodec': acodec,
- })
- formats.append(f)
+ }
+ source_type = source.get('Type')
+ if source_type:
+ f.update(parse_codecs(self._search_regex(
+ r'codecs="([^"]+)"', source_type, 'codecs', fatal=False)))
+ formats.append(f)
self._check_formats(formats, video_guid)
self._sort_formats(formats)
return {
'id': video_guid,
'title': title,
- 'description': description,
- 'thumbnail': thumbnail,
- 'duration': duration,
+ 'description': self._html_search_meta(['description', 'og:description'], page),
+ 'thumbnail': self._html_search_meta(['og:image:secure_url', 'og:image', 'twitter:image'], page),
+ 'duration': parse_duration(self._html_search_meta('duration', page)),
'formats': formats,
}
_URL_PATTERN = r'\{url: \'(.+)\'\}'
def _fetch_title(self, page):
- return (self._html_search_regex(r'<h1 id="h1">([^<>]+)</h1>', page, 'news title', default=None) or
- self._html_search_meta('description', page, 'news title'))
+ return (self._html_search_regex(r'<h1 id="h1">([^<>]+)</h1>', page, 'news title', default=None)
+ or self._html_search_meta('description', page, 'news title'))
def _fetch_thumbnail(self, page):
return self._html_search_regex(r"setInitialImage\(\'([^']+)'\)", page, 'video thumbnail', fatal=False)
from __future__ import unicode_literals
+import re
+
from .common import InfoExtractor
-from ..utils import ExtractorError
class NhkVodIE(InfoExtractor):
- _VALID_URL = r'https?://www3\.nhk\.or\.jp/nhkworld/en/vod/(?P<id>[^/]+/[^/?#&]+)'
- _TEST = {
- # Videos available only for a limited period of time. Visit
- # http://www3.nhk.or.jp/nhkworld/en/vod/ for working samples.
- 'url': 'http://www3.nhk.or.jp/nhkworld/en/vod/tokyofashion/20160815',
- 'info_dict': {
- 'id': 'A1bnNiNTE6nY3jLllS-BIISfcC_PpvF5',
- 'ext': 'flv',
- 'title': 'TOKYO FASHION EXPRESS - The Kimono as Global Fashion',
- 'description': 'md5:db338ee6ce8204f415b754782f819824',
- 'series': 'TOKYO FASHION EXPRESS',
- 'episode': 'The Kimono as Global Fashion',
- },
- 'skip': 'Videos available only for a limited period of time',
- }
- _API_URL = 'http://api.nhk.or.jp/nhkworld/vodesdlist/v1/all/all/all.json?apikey=EJfK8jdS57GqlupFgAfAAwr573q01y6k'
+ _VALID_URL = r'https?://www3\.nhk\.or\.jp/nhkworld/(?P<lang>[a-z]{2})/ondemand/(?P<type>video|audio)/(?P<id>\d{7}|[a-z]+-\d{8}-\d+)'
+ # Content available only for a limited period of time. Visit
+ # https://www3.nhk.or.jp/nhkworld/en/ondemand/ for working samples.
+ _TESTS = [{
+ 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/2015173/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/audio/plugin-20190404-1/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www3.nhk.or.jp/nhkworld/fr/ondemand/audio/plugin-20190404-1/',
+ 'only_matching': True,
+ }]
+ _API_URL_TEMPLATE = 'https://api.nhk.or.jp/nhkworld/%sodesdlist/v7/episode/%s/%s/all%s.json'
def _real_extract(self, url):
- video_id = self._match_id(url)
+ lang, m_type, episode_id = re.match(self._VALID_URL, url).groups()
+ if episode_id.isdigit():
+ episode_id = episode_id[:4] + '-' + episode_id[4:]
- data = self._download_json(self._API_URL, video_id)
+ is_video = m_type == 'video'
+ episode = self._download_json(
+ self._API_URL_TEMPLATE % ('v' if is_video else 'r', episode_id, lang, '/all' if is_video else ''),
+ episode_id, query={'apikey': 'EJfK8jdS57GqlupFgAfAAwr573q01y6k'})['data']['episodes'][0]
+ title = episode.get('sub_title_clean') or episode['sub_title']
- try:
- episode = next(
- e for e in data['data']['episodes']
- if e.get('url') and video_id in e['url'])
- except StopIteration:
- raise ExtractorError('Unable to find episode')
+ def get_clean_field(key):
+ return episode.get(key + '_clean') or episode.get(key)
- embed_code = episode['vod_id']
+ series = get_clean_field('title')
- title = episode.get('sub_title_clean') or episode['sub_title']
- description = episode.get('description_clean') or episode.get('description')
- series = episode.get('title_clean') or episode.get('title')
+ thumbnails = []
+ for s, w, h in [('', 640, 360), ('_l', 1280, 720)]:
+ img_path = episode.get('image' + s)
+ if not img_path:
+ continue
+ thumbnails.append({
+ 'id': '%dp' % h,
+ 'height': h,
+ 'width': w,
+ 'url': 'https://www3.nhk.or.jp' + img_path,
+ })
- return {
- '_type': 'url_transparent',
- 'ie_key': 'Ooyala',
- 'url': 'ooyala:%s' % embed_code,
+ info = {
+ 'id': episode_id + '-' + lang,
'title': '%s - %s' % (series, title) if series and title else title,
- 'description': description,
+ 'description': get_clean_field('description'),
+ 'thumbnails': thumbnails,
'series': series,
'episode': title,
}
+ if is_video:
+ info.update({
+ '_type': 'url_transparent',
+ 'ie_key': 'Ooyala',
+ 'url': 'ooyala:' + episode['vod_id'],
+ })
+ else:
+ audio = episode['audio']
+ audio_path = audio['audio']
+ info['formats'] = self._extract_m3u8_formats(
+ 'https://nhks-vh.akamaihd.net/i%s/master.m3u8' % audio_path,
+ episode_id, 'm4a', m3u8_id='hls', fatal=False)
+ for proto in ('rtmpt', 'rtmp'):
+ info['formats'].append({
+ 'ext': 'flv',
+ 'format_id': proto,
+ 'url': '%s://flv.nhk.or.jp/ondemand/mp4:flv%s' % (proto, audio_path),
+ 'vcodec': 'none',
+ })
+ for f in info['formats']:
+ f['language'] = lang
+ return info
'timestamp': 1454544904,
},
}, {
- # Some m3u8 URLs are invalid (https://github.com/rg3/youtube-dl/issues/10713)
+ # Some m3u8 URLs are invalid (https://github.com/ytdl-org/youtube-dl/issues/10713)
'url': 'https://www.nhl.com/predators/video/poile-laviolette-on-subban-trade/t-277437416/c-44315003',
'md5': '50b2bb47f405121484dda3ccbea25459',
'info_dict': {
video_detail = watch_api_data.get('videoDetail', {})
thumbnail = (
- get_video_info(['thumbnail_url', 'thumbnailURL']) or
- self._html_search_meta('image', webpage, 'thumbnail', default=None) or
- video_detail.get('thumbnail'))
+ get_video_info(['thumbnail_url', 'thumbnailURL'])
+ or self._html_search_meta('image', webpage, 'thumbnail', default=None)
+ or video_detail.get('thumbnail'))
description = get_video_info('description')
- timestamp = (parse_iso8601(get_video_info('first_retrieve')) or
- unified_timestamp(get_video_info('postedDateTime')))
+ timestamp = (parse_iso8601(get_video_info('first_retrieve'))
+ or unified_timestamp(get_video_info('postedDateTime')))
if not timestamp:
match = self._html_search_meta('datePublished', webpage, 'date published', default=None)
if match:
view_count = int_or_none(match.replace(',', ''))
view_count = view_count or video_detail.get('viewCount')
- comment_count = (int_or_none(get_video_info('comment_num')) or
- video_detail.get('commentCount') or
- try_get(api_data, lambda x: x['thread']['commentCount']))
+ comment_count = (int_or_none(get_video_info('comment_num'))
+ or video_detail.get('commentCount')
+ or try_get(api_data, lambda x: x['thread']['commentCount']))
if not comment_count:
match = self._html_search_regex(
r'>Comments: <strong[^>]*>([^<]+)</strong>',
comment_count = int_or_none(match.replace(',', ''))
duration = (parse_duration(
- get_video_info('length') or
- self._html_search_meta(
- 'video:duration', webpage, 'video duration', default=None)) or
- video_detail.get('length') or
- get_video_info('duration'))
+ get_video_info('length')
+ or self._html_search_meta(
+ 'video:duration', webpage, 'video duration', default=None))
+ or video_detail.get('length')
+ or get_video_info('duration'))
webpage_url = get_video_info('watch_url') or url
# Timestamp adjustment offset between server time and local time
# must be calculated in order to use timestamps closest to server's
- # in all API requests (see https://github.com/rg3/youtube-dl/issues/7864)
+ # in all API requests (see https://github.com/ytdl-org/youtube-dl/issues/7864)
webpage = self._download_webpage(url, video_id)
player_url = self._search_regex(
webpage = self._download_webpage(url, video_id)
- bc_url = BrightcoveNewIE._extract_url(self, webpage)
+ brightcove_id = self._search_regex(
+ r'data-video-id=["\'](\d+)', webpage, 'brightcove id')
data = self._parse_json(
self._search_regex(
return {
'_type': 'url_transparent',
'ie_key': BrightcoveNewIE.ie_key(),
- 'url': smuggle_url(bc_url, {'geo_countries': ['CA']}),
+ 'url': smuggle_url(
+ self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id,
+ {'geo_countries': ['CA']}),
+ 'id': brightcove_id,
'title': title,
'description': description,
'series': series,
+++ /dev/null
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..compat import compat_urlparse
-from ..utils import (
- ExtractorError,
- NO_DEFAULT,
- sanitized_Request,
- urlencode_postdata,
-)
-
-
-class NovaMovIE(InfoExtractor):
- IE_NAME = 'novamov'
- IE_DESC = 'NovaMov'
-
- _VALID_URL_TEMPLATE = r'''(?x)
- http://
- (?:
- (?:www\.)?%(host)s/(?:file|video|mobile/\#/videos)/|
- (?:(?:embed|www)\.)%(host)s/embed(?:\.php|/)?\?(?:.*?&)?\bv=
- )
- (?P<id>[a-z\d]{13})
- '''
- _VALID_URL = _VALID_URL_TEMPLATE % {'host': r'novamov\.com'}
-
- _HOST = 'www.novamov.com'
-
- _FILE_DELETED_REGEX = r'This file no longer exists on our servers!</h2>'
- _FILEKEY_REGEX = r'flashvars\.filekey=(?P<filekey>"?[^"]+"?);'
- _TITLE_REGEX = r'(?s)<div class="v_tab blockborder rounded5" id="v_tab1">\s*<h3>([^<]+)</h3>'
- _DESCRIPTION_REGEX = r'(?s)<div class="v_tab blockborder rounded5" id="v_tab1">\s*<h3>[^<]+</h3><p>([^<]+)</p>'
- _URL_TEMPLATE = 'http://%s/video/%s'
-
- _TEST = None
-
- def _check_existence(self, webpage, video_id):
- if re.search(self._FILE_DELETED_REGEX, webpage) is not None:
- raise ExtractorError('Video %s does not exist' % video_id, expected=True)
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- url = self._URL_TEMPLATE % (self._HOST, video_id)
-
- webpage = self._download_webpage(
- url, video_id, 'Downloading video page')
-
- self._check_existence(webpage, video_id)
-
- def extract_filekey(default=NO_DEFAULT):
- filekey = self._search_regex(
- self._FILEKEY_REGEX, webpage, 'filekey', default=default)
- if filekey is not default and (filekey[0] != '"' or filekey[-1] != '"'):
- return self._search_regex(
- r'var\s+%s\s*=\s*"([^"]+)"' % re.escape(filekey), webpage, 'filekey', default=default)
- else:
- return filekey
-
- filekey = extract_filekey(default=None)
-
- if not filekey:
- fields = self._hidden_inputs(webpage)
- post_url = self._search_regex(
- r'<form[^>]+action=(["\'])(?P<url>.+?)\1', webpage,
- 'post url', default=url, group='url')
- if not post_url.startswith('http'):
- post_url = compat_urlparse.urljoin(url, post_url)
- request = sanitized_Request(
- post_url, urlencode_postdata(fields))
- request.add_header('Content-Type', 'application/x-www-form-urlencoded')
- request.add_header('Referer', post_url)
- webpage = self._download_webpage(
- request, video_id, 'Downloading continue to the video page')
- self._check_existence(webpage, video_id)
-
- filekey = extract_filekey()
-
- title = self._html_search_regex(self._TITLE_REGEX, webpage, 'title')
- description = self._html_search_regex(self._DESCRIPTION_REGEX, webpage, 'description', default='', fatal=False)
-
- api_response = self._download_webpage(
- 'http://%s/api/player.api.php?key=%s&file=%s' % (self._HOST, filekey, video_id), video_id,
- 'Downloading video api response')
-
- response = compat_urlparse.parse_qs(api_response)
-
- if 'error_msg' in response:
- raise ExtractorError('%s returned error: %s' % (self.IE_NAME, response['error_msg'][0]), expected=True)
-
- video_url = response['url'][0]
-
- return {
- 'id': video_id,
- 'url': video_url,
- 'title': title,
- 'description': description
- }
-
-
-class WholeCloudIE(NovaMovIE):
- IE_NAME = 'wholecloud'
- IE_DESC = 'WholeCloud'
-
- _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': r'(?:wholecloud\.net|movshare\.(?:net|sx|ag))'}
-
- _HOST = 'www.wholecloud.net'
-
- _FILE_DELETED_REGEX = r'>This file no longer exists on our servers.<'
- _TITLE_REGEX = r'<strong>Title:</strong> ([^<]+)</p>'
- _DESCRIPTION_REGEX = r'<strong>Description:</strong> ([^<]+)</p>'
-
- _TEST = {
- 'url': 'http://www.wholecloud.net/video/559e28be54d96',
- 'md5': 'abd31a2132947262c50429e1d16c1bfd',
- 'info_dict': {
- 'id': '559e28be54d96',
- 'ext': 'flv',
- 'title': 'dissapeared image',
- 'description': 'optical illusion dissapeared image magic illusion',
- }
- }
-
-
-class NowVideoIE(NovaMovIE):
- IE_NAME = 'nowvideo'
- IE_DESC = 'NowVideo'
-
- _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': r'nowvideo\.(?:to|ch|ec|sx|eu|at|ag|co|li)'}
-
- _HOST = 'www.nowvideo.to'
-
- _FILE_DELETED_REGEX = r'>This file no longer exists on our servers.<'
- _TITLE_REGEX = r'<h4>([^<]+)</h4>'
- _DESCRIPTION_REGEX = r'</h4>\s*<p>([^<]+)</p>'
-
- _TEST = {
- 'url': 'http://www.nowvideo.sx/video/f1d6fce9a968b',
- 'md5': '12c82cad4f2084881d8bc60ee29df092',
- 'info_dict': {
- 'id': 'f1d6fce9a968b',
- 'ext': 'flv',
- 'title': 'youtubedl test video BaWjenozKc',
- 'description': 'Description',
- },
- }
-
-
-class VideoWeedIE(NovaMovIE):
- IE_NAME = 'videoweed'
- IE_DESC = 'VideoWeed'
-
- _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': r'videoweed\.(?:es|com)'}
-
- _HOST = 'www.videoweed.es'
-
- _FILE_DELETED_REGEX = r'>This file no longer exists on our servers.<'
- _TITLE_REGEX = r'<h1 class="text_shadow">([^<]+)</h1>'
- _URL_TEMPLATE = 'http://%s/file/%s'
-
- _TEST = {
- 'url': 'http://www.videoweed.es/file/b42178afbea14',
- 'md5': 'abd31a2132947262c50429e1d16c1bfd',
- 'info_dict': {
- 'id': 'b42178afbea14',
- 'ext': 'flv',
- 'title': 'optical illusion dissapeared image magic illusion',
- 'description': ''
- },
- }
-
-
-class CloudTimeIE(NovaMovIE):
- IE_NAME = 'cloudtime'
- IE_DESC = 'CloudTime'
-
- _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': r'cloudtime\.to'}
-
- _HOST = 'www.cloudtime.to'
-
- _FILE_DELETED_REGEX = r'>This file no longer exists on our servers.<'
- _TITLE_REGEX = r'<div[^>]+class=["\']video_det["\'][^>]*>\s*<strong>([^<]+)</strong>'
-
- _TEST = None
-
-
-class AuroraVidIE(NovaMovIE):
- IE_NAME = 'auroravid'
- IE_DESC = 'AuroraVid'
-
- _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': r'auroravid\.to'}
-
- _HOST = 'www.auroravid.to'
-
- _FILE_DELETED_REGEX = r'This file no longer exists on our servers!<'
-
- _TESTS = [{
- 'url': 'http://www.auroravid.to/video/4rurhn9x446jj',
- 'md5': '7205f346a52bbeba427603ba10d4b935',
- 'info_dict': {
- 'id': '4rurhn9x446jj',
- 'ext': 'flv',
- 'title': 'search engine optimization',
- 'description': 'search engine optimization is used to rank the web page in the google search engine'
- },
- 'skip': '"Invalid token" errors abound (in web interface as well as youtube-dl, there is nothing we can do about it.)'
- }, {
- 'url': 'http://www.auroravid.to/embed/?v=4rurhn9x446jj',
- 'only_matching': True,
- }]
ExtractorError,
fix_xml_ampersands,
int_or_none,
+ merge_dicts,
orderedSet,
parse_duration,
qualities,
+ str_or_none,
strip_jsonp,
unified_strdate,
+ unified_timestamp,
+ url_or_none,
+ urlencode_postdata,
)
def _real_extract(self, url):
video_id = self._match_id(url)
- return self._get_info(video_id)
+ return self._get_info(url, video_id) or self._get_old_info(video_id)
+
+ def _get_info(self, url, video_id):
+ token = self._download_json(
+ 'https://www.npostart.nl/api/token', video_id,
+ 'Downloading token', headers={
+ 'Referer': url,
+ 'X-Requested-With': 'XMLHttpRequest',
+ })['token']
+
+ player = self._download_json(
+ 'https://www.npostart.nl/player/%s' % video_id, video_id,
+ 'Downloading player JSON', data=urlencode_postdata({
+ 'autoplay': 0,
+ 'share': 1,
+ 'pageUrl': url,
+ 'hasAdConsent': 0,
+ '_token': token,
+ }))
+
+ player_token = player['token']
+
+ drm = False
+ format_urls = set()
+ formats = []
+ for profile in ('hls', 'dash-widevine', 'dash-playready', 'smooth'):
+ streams = self._download_json(
+ 'https://start-player.npo.nl/video/%s/streams' % video_id,
+ video_id, 'Downloading %s profile JSON' % profile, fatal=False,
+ query={
+ 'profile': profile,
+ 'quality': 'npo',
+ 'tokenId': player_token,
+ 'streamType': 'broadcast',
+ })
+ if not streams:
+ continue
+ stream = streams.get('stream')
+ if not isinstance(stream, dict):
+ continue
+ stream_url = url_or_none(stream.get('src'))
+ if not stream_url or stream_url in format_urls:
+ continue
+ format_urls.add(stream_url)
+ if stream.get('protection') is not None or stream.get('keySystemOptions') is not None:
+ drm = True
+ continue
+ stream_type = stream.get('type')
+ stream_ext = determine_ext(stream_url)
+ if stream_type == 'application/dash+xml' or stream_ext == 'mpd':
+ formats.extend(self._extract_mpd_formats(
+ stream_url, video_id, mpd_id='dash', fatal=False))
+ elif stream_type == 'application/vnd.apple.mpegurl' or stream_ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ stream_url, video_id, ext='mp4',
+ entry_protocol='m3u8_native', m3u8_id='hls', fatal=False))
+ elif re.search(r'\.isml?/Manifest', stream_url):
+ formats.extend(self._extract_ism_formats(
+ stream_url, video_id, ism_id='mss', fatal=False))
+ else:
+ formats.append({
+ 'url': stream_url,
+ })
+
+ if not formats:
+ if drm:
+ raise ExtractorError('This video is DRM protected.', expected=True)
+ return
+
+ self._sort_formats(formats)
+
+ info = {
+ 'id': video_id,
+ 'title': video_id,
+ 'formats': formats,
+ }
- def _get_info(self, video_id):
+ embed_url = url_or_none(player.get('embedUrl'))
+ if embed_url:
+ webpage = self._download_webpage(
+ embed_url, video_id, 'Downloading embed page', fatal=False)
+ if webpage:
+ video = self._parse_json(
+ self._search_regex(
+ r'\bvideo\s*=\s*({.+?})\s*;', webpage, 'video',
+ default='{}'), video_id)
+ if video:
+ title = video.get('episodeTitle')
+ subtitles = {}
+ subtitles_list = video.get('subtitles')
+ if isinstance(subtitles_list, list):
+ for cc in subtitles_list:
+ cc_url = url_or_none(cc.get('src'))
+ if not cc_url:
+ continue
+ lang = str_or_none(cc.get('language')) or 'nl'
+ subtitles.setdefault(lang, []).append({
+ 'url': cc_url,
+ })
+ return merge_dicts({
+ 'title': title,
+ 'description': video.get('description'),
+ 'thumbnail': url_or_none(
+ video.get('still_image_url') or video.get('orig_image_url')),
+ 'duration': int_or_none(video.get('duration')),
+ 'timestamp': unified_timestamp(video.get('broadcastDate')),
+ 'creator': video.get('channel'),
+ 'series': video.get('title'),
+ 'episode': title,
+ 'episode_number': int_or_none(video.get('episodeNumber')),
+ 'subtitles': subtitles,
+ }, info)
+
+ return info
+
+ def _get_old_info(self, video_id):
metadata = self._download_json(
'http://e.omroep.nl/metadata/%s' % video_id,
video_id,
# JSON
else:
video_url = stream_info.get('url')
- if not video_url or video_url in urls:
+ if not video_url or 'vodnotavailable.' in video_url or video_url in urls:
continue
urls.add(video_url)
if determine_ext(video_url) == 'm3u8':
from __future__ import unicode_literals
from .common import InfoExtractor
-from ..compat import compat_urllib_parse_urlencode
from ..utils import (
int_or_none,
qualities,
class NprIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?npr\.org/player/v2/mediaPlayer\.html\?.*\bid=(?P<id>\d+)'
+ _VALID_URL = r'https?://(?:www\.)?npr\.org/(?:sections/[^/]+/)?\d{4}/\d{2}/\d{2}/(?P<id>\d+)'
_TESTS = [{
- 'url': 'http://www.npr.org/player/v2/mediaPlayer.html?id=449974205',
+ 'url': 'https://www.npr.org/sections/allsongs/2015/10/21/449974205/new-music-from-beach-house-chairlift-cmj-discoveries-and-more',
'info_dict': {
'id': '449974205',
'title': 'New Music From Beach House, Chairlift, CMJ Discoveries And More'
},
'playlist_count': 7,
}, {
- 'url': 'http://www.npr.org/player/v2/mediaPlayer.html?action=1&t=1&islist=false&id=446928052&m=446929930&live=1',
+ 'url': 'https://www.npr.org/sections/deceptivecadence/2015/10/09/446928052/music-from-the-shadows-ancient-armenian-hymns-and-piano-jazz',
'info_dict': {
'id': '446928052',
'title': "Songs We Love: Tigran Hamasyan, 'Your Mercy is Boundless'"
'duration': 402,
},
}],
+ }, {
+ # mutlimedia, not media title
+ 'url': 'https://www.npr.org/2017/06/19/533198237/tigers-jaw-tiny-desk-concert',
+ 'info_dict': {
+ 'id': '533198237',
+ 'title': 'Tigers Jaw: Tiny Desk Concert',
+ },
+ 'playlist': [{
+ 'md5': '12fa60cb2d3ed932f53609d4aeceabf1',
+ 'info_dict': {
+ 'id': '533201718',
+ 'ext': 'mp4',
+ 'title': 'Tigers Jaw: Tiny Desk Concert',
+ 'duration': 402,
+ },
+ }],
+ 'expected_warnings': ['Failed to download m3u8 information'],
}]
def _real_extract(self, url):
playlist_id = self._match_id(url)
- config = self._download_json(
- 'http://api.npr.org/query?%s' % compat_urllib_parse_urlencode({
+ story = self._download_json(
+ 'http://api.npr.org/query', playlist_id, query={
'id': playlist_id,
- 'fields': 'titles,audio,show',
+ 'fields': 'audio,multimedia,title',
'format': 'json',
'apiKey': 'MDAzMzQ2MjAyMDEyMzk4MTU1MDg3ZmM3MQ010',
- }), playlist_id)
-
- story = config['list']['story'][0]
+ })['list']['story'][0]
+ playlist_title = story.get('title', {}).get('$text')
- KNOWN_FORMATS = ('threegp', 'mp4', 'mp3')
+ KNOWN_FORMATS = ('threegp', 'm3u8', 'smil', 'mp4', 'mp3')
quality = qualities(KNOWN_FORMATS)
entries = []
- for audio in story.get('audio', []):
- title = audio.get('title', {}).get('$text')
- duration = int_or_none(audio.get('duration', {}).get('$text'))
+ for media in story.get('audio', []) + story.get('multimedia', []):
+ media_id = media['id']
+
formats = []
- for format_id, formats_entry in audio.get('format', {}).items():
+ for format_id, formats_entry in media.get('format', {}).items():
if not formats_entry:
continue
if isinstance(formats_entry, list):
if not format_url:
continue
if format_id in KNOWN_FORMATS:
- formats.append({
- 'url': format_url,
- 'format_id': format_id,
- 'ext': formats_entry.get('type'),
- 'quality': quality(format_id),
- })
+ if format_id == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ format_url, media_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False))
+ elif format_id == 'smil':
+ smil_formats = self._extract_smil_formats(
+ format_url, media_id, transform_source=lambda s: s.replace(
+ 'rtmp://flash.npr.org/ondemand/', 'https://ondemand.npr.org/'))
+ self._check_formats(smil_formats, media_id)
+ formats.extend(smil_formats)
+ else:
+ formats.append({
+ 'url': format_url,
+ 'format_id': format_id,
+ 'quality': quality(format_id),
+ })
self._sort_formats(formats)
+
entries.append({
- 'id': audio['id'],
- 'title': title,
- 'duration': duration,
+ 'id': media_id,
+ 'title': media.get('title', {}).get('$text') or playlist_title,
+ 'thumbnail': media.get('altImageUrl', {}).get('$text'),
+ 'duration': int_or_none(media.get('duration', {}).get('$text')),
'formats': formats,
})
- playlist_title = story.get('title', {}).get('$text')
return self.playlist_result(entries, playlist_id, playlist_title)
entries = []
conviva = data.get('convivaStatistics') or {}
- live = (data.get('mediaElementType') == 'Live' or
- data.get('isLive') is True or conviva.get('isLive'))
+ live = (data.get('mediaElementType') == 'Live'
+ or data.get('isLive') is True or conviva.get('isLive'))
def make_title(t):
return self._live_title(t) if live else t
--- /dev/null
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class NRLTVIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?nrl\.com/tv(/[^/]+)*/(?P<id>[^/?&#]+)'
+ _TEST = {
+ 'url': 'https://www.nrl.com/tv/news/match-highlights-titans-v-knights-862805/',
+ 'info_dict': {
+ 'id': 'YyNnFuaDE6kPJqlDhG4CGQ_w89mKTau4',
+ 'ext': 'mp4',
+ 'title': 'Match Highlights: Titans v Knights',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ 'format': 'bestvideo',
+ },
+ }
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ q_data = self._parse_json(self._search_regex(
+ r"(?s)q-data='({.+?})'", webpage, 'player data'), display_id)
+ ooyala_id = q_data['videoId']
+ return self.url_result(
+ 'ooyala:' + ooyala_id, 'Ooyala', ooyala_id, q_data.get('title'))
--- /dev/null
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ js_to_json,
+ smuggle_url,
+)
+
+
+class NTVCoJpCUIE(InfoExtractor):
+ IE_NAME = 'cu.ntv.co.jp'
+ IE_DESC = 'Nippon Television Network'
+ _VALID_URL = r'https?://cu\.ntv\.co\.jp/(?!program)(?P<id>[^/?&#]+)'
+ _TEST = {
+ 'url': 'https://cu.ntv.co.jp/televiva-chill-gohan_181031/',
+ 'info_dict': {
+ 'id': '5978891207001',
+ 'ext': 'mp4',
+ 'title': '桜エビと炒り卵がポイント! 「中華風 エビチリおにぎり」──『美虎』五十嵐美幸',
+ 'upload_date': '20181213',
+ 'description': 'md5:211b52f4fd60f3e0e72b68b0c6ba52a9',
+ 'uploader_id': '3855502814001',
+ 'timestamp': 1544669941,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }
+ BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s'
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ player_config = self._parse_json(self._search_regex(
+ r'(?s)PLAYER_CONFIG\s*=\s*({.+?})',
+ webpage, 'player config'), display_id, js_to_json)
+ video_id = player_config['videoId']
+ account_id = player_config.get('account') or '3855502814001'
+ return {
+ '_type': 'url_transparent',
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': self._search_regex(r'<h1[^>]+class="title"[^>]*>([^<]+)', webpage, 'title').strip(),
+ 'description': self._html_search_meta(['description', 'og:description'], webpage),
+ 'url': smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % (account_id, video_id), {'geo_countries': ['JP']}),
+ 'ie_key': 'BrightcoveNew',
+ }
}, {
'url': 'https://m.ok.ru/dk?st.cmd=movieLayer&st.discId=863789452017&st.retLoc=friend&st.rtu=%2Fdk%3Fst.cmd%3DfriendMovies%26st.mode%3Down%26st.mrkId%3D%257B%2522uploadedMovieMarker%2522%253A%257B%2522marker%2522%253A%25221519410114503%2522%252C%2522hasMore%2522%253Atrue%257D%252C%2522sharedMovieMarker%2522%253A%257B%2522marker%2522%253Anull%252C%2522hasMore%2522%253Afalse%257D%257D%26st.friendId%3D561722190321%26st.frwd%3Don%26_prevCmd%3DfriendMovies%26tkn%3D7257&st.discType=MOVIE&st.mvId=863789452017&_prevCmd=friendMovies&tkn=3648#lst#',
'only_matching': True,
+ }, {
+ # Paid video
+ 'url': 'https://ok.ru/video/954886983203',
+ 'only_matching': True,
}]
def _real_extract(self, url):
'ext': 'flv',
})
+ if not formats:
+ payment_info = metadata.get('paymentInfo')
+ if payment_info:
+ raise ExtractorError('This video is paid, subscribe to download it', expected=True)
+
self._sort_formats(formats)
info['formats'] = formats
progressive_formats = []
for adaptive_format in formats:
# Prevent advertisement from embedding into m3u8 playlist (see
- # https://github.com/rg3/youtube-dl/issues/8893#issuecomment-199912684)
+ # https://github.com/ytdl-org/youtube-dl/issues/8893#issuecomment-199912684)
adaptive_format['url'] = re.sub(
r'\badsegmentlength=\d+', r'adsegmentlength=0', adaptive_format['url'])
rendition_id = self._search_regex(
title = metadata['title']
auth_data = self._download_json(
- self._AUTHORIZATION_URL_TEMPLATE % (pcode, embed_code) +
- compat_urllib_parse_urlencode({
+ self._AUTHORIZATION_URL_TEMPLATE % (pcode, embed_code)
+ + compat_urllib_parse_urlencode({
'domain': domain,
'supportedFormats': supportedformats or 'mp4,rtmp,m3u8,hds,dash,smooth',
'embedToken': embed_token,
- }), video_id)
+ }), video_id, headers=self.geo_verification_headers())
cur_auth_data = auth_data['authorization_data'][embed_code]
import json
import os
+import random
import re
import subprocess
import tempfile
if cookie.discard is not None:
cookie_dict['discard'] = cookie.discard
try:
- if (cookie.has_nonstandard_attr('httpOnly') or
- cookie.has_nonstandard_attr('httponly') or
- cookie.has_nonstandard_attr('HttpOnly')):
+ if (cookie.has_nonstandard_attr('httpOnly')
+ or cookie.has_nonstandard_attr('httponly')
+ or cookie.has_nonstandard_attr('HttpOnly')):
cookie_dict['httponly'] = True
except TypeError:
pass
class OpenloadIE(InfoExtractor):
+ _DOMAINS = r'(?:openload\.(?:co|io|link|pw)|oload\.(?:tv|stream|site|xyz|win|download|cloud|cc|icu|fun|club|info|press|pw|live|space|services|website)|oladblock\.(?:services|xyz|me)|openloed\.co)'
_VALID_URL = r'''(?x)
https?://
(?P<host>
(?:www\.)?
- (?:
- openload\.(?:co|io|link)|
- oload\.(?:tv|stream|site|xyz|win|download|cloud|cc|icu|fun)
- )
+ %s
)/
(?:f|embed)/
(?P<id>[a-zA-Z0-9-_]+)
- '''
-
+ ''' % _DOMAINS
+ _EMBED_WORD = 'embed'
+ _STREAM_WORD = 'f'
+ _REDIR_WORD = 'stream'
+ _URL_IDS = ('streamurl', 'streamuri', 'streamurj')
_TESTS = [{
'url': 'https://openload.co/f/kUEfGclsU9o',
'md5': 'bf1c059b004ebc7a256f89408e65c36e',
}, {
'url': 'https://oload.fun/f/gb6G1H4sHXY',
'only_matching': True,
+ }, {
+ 'url': 'https://oload.club/f/Nr1L-aZ2dbQ',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://oload.info/f/5NEAbI2BDSk',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://openload.pw/f/WyKgK8s94N0',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://oload.pw/f/WyKgK8s94N0',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://oload.live/f/-Z58UZ-GR4M',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://oload.space/f/IY4eZSst3u8/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://oload.services/embed/bs1NWj1dCag/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://oload.press/embed/drTBl1aOTvk/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://oload.website/embed/drTBl1aOTvk/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://oladblock.services/f/b8NWEgkqNLI/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://oladblock.xyz/f/b8NWEgkqNLI/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://oladblock.me/f/b8NWEgkqNLI/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://openloed.co/f/b8NWEgkqNLI/',
+ 'only_matching': True,
}]
- _USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36'
+ _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
+ _CHROME_VERSIONS = (
+ '74.0.3729.129',
+ '76.0.3780.3',
+ '76.0.3780.2',
+ '74.0.3729.128',
+ '76.0.3780.1',
+ '76.0.3780.0',
+ '75.0.3770.15',
+ '74.0.3729.127',
+ '74.0.3729.126',
+ '76.0.3779.1',
+ '76.0.3779.0',
+ '75.0.3770.14',
+ '74.0.3729.125',
+ '76.0.3778.1',
+ '76.0.3778.0',
+ '75.0.3770.13',
+ '74.0.3729.124',
+ '74.0.3729.123',
+ '73.0.3683.121',
+ '76.0.3777.1',
+ '76.0.3777.0',
+ '75.0.3770.12',
+ '74.0.3729.122',
+ '76.0.3776.4',
+ '75.0.3770.11',
+ '74.0.3729.121',
+ '76.0.3776.3',
+ '76.0.3776.2',
+ '73.0.3683.120',
+ '74.0.3729.120',
+ '74.0.3729.119',
+ '74.0.3729.118',
+ '76.0.3776.1',
+ '76.0.3776.0',
+ '76.0.3775.5',
+ '75.0.3770.10',
+ '74.0.3729.117',
+ '76.0.3775.4',
+ '76.0.3775.3',
+ '74.0.3729.116',
+ '75.0.3770.9',
+ '76.0.3775.2',
+ '76.0.3775.1',
+ '76.0.3775.0',
+ '75.0.3770.8',
+ '74.0.3729.115',
+ '74.0.3729.114',
+ '76.0.3774.1',
+ '76.0.3774.0',
+ '75.0.3770.7',
+ '74.0.3729.113',
+ '74.0.3729.112',
+ '74.0.3729.111',
+ '76.0.3773.1',
+ '76.0.3773.0',
+ '75.0.3770.6',
+ '74.0.3729.110',
+ '74.0.3729.109',
+ '76.0.3772.1',
+ '76.0.3772.0',
+ '75.0.3770.5',
+ '74.0.3729.108',
+ '74.0.3729.107',
+ '76.0.3771.1',
+ '76.0.3771.0',
+ '75.0.3770.4',
+ '74.0.3729.106',
+ '74.0.3729.105',
+ '75.0.3770.3',
+ '74.0.3729.104',
+ '74.0.3729.103',
+ '74.0.3729.102',
+ '75.0.3770.2',
+ '74.0.3729.101',
+ '75.0.3770.1',
+ '75.0.3770.0',
+ '74.0.3729.100',
+ '75.0.3769.5',
+ '75.0.3769.4',
+ '74.0.3729.99',
+ '75.0.3769.3',
+ '75.0.3769.2',
+ '75.0.3768.6',
+ '74.0.3729.98',
+ '75.0.3769.1',
+ '75.0.3769.0',
+ '74.0.3729.97',
+ '73.0.3683.119',
+ '73.0.3683.118',
+ '74.0.3729.96',
+ '75.0.3768.5',
+ '75.0.3768.4',
+ '75.0.3768.3',
+ '75.0.3768.2',
+ '74.0.3729.95',
+ '74.0.3729.94',
+ '75.0.3768.1',
+ '75.0.3768.0',
+ '74.0.3729.93',
+ '74.0.3729.92',
+ '73.0.3683.117',
+ '74.0.3729.91',
+ '75.0.3766.3',
+ '74.0.3729.90',
+ '75.0.3767.2',
+ '75.0.3767.1',
+ '75.0.3767.0',
+ '74.0.3729.89',
+ '73.0.3683.116',
+ '75.0.3766.2',
+ '74.0.3729.88',
+ '75.0.3766.1',
+ '75.0.3766.0',
+ '74.0.3729.87',
+ '73.0.3683.115',
+ '74.0.3729.86',
+ '75.0.3765.1',
+ '75.0.3765.0',
+ '74.0.3729.85',
+ '73.0.3683.114',
+ '74.0.3729.84',
+ '75.0.3764.1',
+ '75.0.3764.0',
+ '74.0.3729.83',
+ '73.0.3683.113',
+ '75.0.3763.2',
+ '75.0.3761.4',
+ '74.0.3729.82',
+ '75.0.3763.1',
+ '75.0.3763.0',
+ '74.0.3729.81',
+ '73.0.3683.112',
+ '75.0.3762.1',
+ '75.0.3762.0',
+ '74.0.3729.80',
+ '75.0.3761.3',
+ '74.0.3729.79',
+ '73.0.3683.111',
+ '75.0.3761.2',
+ '74.0.3729.78',
+ '74.0.3729.77',
+ '75.0.3761.1',
+ '75.0.3761.0',
+ '73.0.3683.110',
+ '74.0.3729.76',
+ '74.0.3729.75',
+ '75.0.3760.0',
+ '74.0.3729.74',
+ '75.0.3759.8',
+ '75.0.3759.7',
+ '75.0.3759.6',
+ '74.0.3729.73',
+ '75.0.3759.5',
+ '74.0.3729.72',
+ '73.0.3683.109',
+ '75.0.3759.4',
+ '75.0.3759.3',
+ '74.0.3729.71',
+ '75.0.3759.2',
+ '74.0.3729.70',
+ '73.0.3683.108',
+ '74.0.3729.69',
+ '75.0.3759.1',
+ '75.0.3759.0',
+ '74.0.3729.68',
+ '73.0.3683.107',
+ '74.0.3729.67',
+ '75.0.3758.1',
+ '75.0.3758.0',
+ '74.0.3729.66',
+ '73.0.3683.106',
+ '74.0.3729.65',
+ '75.0.3757.1',
+ '75.0.3757.0',
+ '74.0.3729.64',
+ '73.0.3683.105',
+ '74.0.3729.63',
+ '75.0.3756.1',
+ '75.0.3756.0',
+ '74.0.3729.62',
+ '73.0.3683.104',
+ '75.0.3755.3',
+ '75.0.3755.2',
+ '73.0.3683.103',
+ '75.0.3755.1',
+ '75.0.3755.0',
+ '74.0.3729.61',
+ '73.0.3683.102',
+ '74.0.3729.60',
+ '75.0.3754.2',
+ '74.0.3729.59',
+ '75.0.3753.4',
+ '74.0.3729.58',
+ '75.0.3754.1',
+ '75.0.3754.0',
+ '74.0.3729.57',
+ '73.0.3683.101',
+ '75.0.3753.3',
+ '75.0.3752.2',
+ '75.0.3753.2',
+ '74.0.3729.56',
+ '75.0.3753.1',
+ '75.0.3753.0',
+ '74.0.3729.55',
+ '73.0.3683.100',
+ '74.0.3729.54',
+ '75.0.3752.1',
+ '75.0.3752.0',
+ '74.0.3729.53',
+ '73.0.3683.99',
+ '74.0.3729.52',
+ '75.0.3751.1',
+ '75.0.3751.0',
+ '74.0.3729.51',
+ '73.0.3683.98',
+ '74.0.3729.50',
+ '75.0.3750.0',
+ '74.0.3729.49',
+ '74.0.3729.48',
+ '74.0.3729.47',
+ '75.0.3749.3',
+ '74.0.3729.46',
+ '73.0.3683.97',
+ '75.0.3749.2',
+ '74.0.3729.45',
+ '75.0.3749.1',
+ '75.0.3749.0',
+ '74.0.3729.44',
+ '73.0.3683.96',
+ '74.0.3729.43',
+ '74.0.3729.42',
+ '75.0.3748.1',
+ '75.0.3748.0',
+ '74.0.3729.41',
+ '75.0.3747.1',
+ '73.0.3683.95',
+ '75.0.3746.4',
+ '74.0.3729.40',
+ '74.0.3729.39',
+ '75.0.3747.0',
+ '75.0.3746.3',
+ '75.0.3746.2',
+ '74.0.3729.38',
+ '75.0.3746.1',
+ '75.0.3746.0',
+ '74.0.3729.37',
+ '73.0.3683.94',
+ '75.0.3745.5',
+ '75.0.3745.4',
+ '75.0.3745.3',
+ '75.0.3745.2',
+ '74.0.3729.36',
+ '75.0.3745.1',
+ '75.0.3745.0',
+ '75.0.3744.2',
+ '74.0.3729.35',
+ '73.0.3683.93',
+ '74.0.3729.34',
+ '75.0.3744.1',
+ '75.0.3744.0',
+ '74.0.3729.33',
+ '73.0.3683.92',
+ '74.0.3729.32',
+ '74.0.3729.31',
+ '73.0.3683.91',
+ '75.0.3741.2',
+ '75.0.3740.5',
+ '74.0.3729.30',
+ '75.0.3741.1',
+ '75.0.3741.0',
+ '74.0.3729.29',
+ '75.0.3740.4',
+ '73.0.3683.90',
+ '74.0.3729.28',
+ '75.0.3740.3',
+ '73.0.3683.89',
+ '75.0.3740.2',
+ '74.0.3729.27',
+ '75.0.3740.1',
+ '75.0.3740.0',
+ '74.0.3729.26',
+ '73.0.3683.88',
+ '73.0.3683.87',
+ '74.0.3729.25',
+ '75.0.3739.1',
+ '75.0.3739.0',
+ '73.0.3683.86',
+ '74.0.3729.24',
+ '73.0.3683.85',
+ '75.0.3738.4',
+ '75.0.3738.3',
+ '75.0.3738.2',
+ '75.0.3738.1',
+ '75.0.3738.0',
+ '74.0.3729.23',
+ '73.0.3683.84',
+ '74.0.3729.22',
+ '74.0.3729.21',
+ '75.0.3737.1',
+ '75.0.3737.0',
+ '74.0.3729.20',
+ '73.0.3683.83',
+ '74.0.3729.19',
+ '75.0.3736.1',
+ '75.0.3736.0',
+ '74.0.3729.18',
+ '73.0.3683.82',
+ '74.0.3729.17',
+ '75.0.3735.1',
+ '75.0.3735.0',
+ '74.0.3729.16',
+ '73.0.3683.81',
+ '75.0.3734.1',
+ '75.0.3734.0',
+ '74.0.3729.15',
+ '73.0.3683.80',
+ '74.0.3729.14',
+ '75.0.3733.1',
+ '75.0.3733.0',
+ '75.0.3732.1',
+ '74.0.3729.13',
+ '74.0.3729.12',
+ '73.0.3683.79',
+ '74.0.3729.11',
+ '75.0.3732.0',
+ '74.0.3729.10',
+ '73.0.3683.78',
+ '74.0.3729.9',
+ '74.0.3729.8',
+ '74.0.3729.7',
+ '75.0.3731.3',
+ '75.0.3731.2',
+ '75.0.3731.0',
+ '74.0.3729.6',
+ '73.0.3683.77',
+ '73.0.3683.76',
+ '75.0.3730.5',
+ '75.0.3730.4',
+ '73.0.3683.75',
+ '74.0.3729.5',
+ '73.0.3683.74',
+ '75.0.3730.3',
+ '75.0.3730.2',
+ '74.0.3729.4',
+ '73.0.3683.73',
+ '73.0.3683.72',
+ '75.0.3730.1',
+ '75.0.3730.0',
+ '74.0.3729.3',
+ '73.0.3683.71',
+ '74.0.3729.2',
+ '73.0.3683.70',
+ '74.0.3729.1',
+ '74.0.3729.0',
+ '74.0.3726.4',
+ '73.0.3683.69',
+ '74.0.3726.3',
+ '74.0.3728.0',
+ '74.0.3726.2',
+ '73.0.3683.68',
+ '74.0.3726.1',
+ '74.0.3726.0',
+ '74.0.3725.4',
+ '73.0.3683.67',
+ '73.0.3683.66',
+ '74.0.3725.3',
+ '74.0.3725.2',
+ '74.0.3725.1',
+ '74.0.3724.8',
+ '74.0.3725.0',
+ '73.0.3683.65',
+ '74.0.3724.7',
+ '74.0.3724.6',
+ '74.0.3724.5',
+ '74.0.3724.4',
+ '74.0.3724.3',
+ '74.0.3724.2',
+ '74.0.3724.1',
+ '74.0.3724.0',
+ '73.0.3683.64',
+ '74.0.3723.1',
+ '74.0.3723.0',
+ '73.0.3683.63',
+ '74.0.3722.1',
+ '74.0.3722.0',
+ '73.0.3683.62',
+ '74.0.3718.9',
+ '74.0.3702.3',
+ '74.0.3721.3',
+ '74.0.3721.2',
+ '74.0.3721.1',
+ '74.0.3721.0',
+ '74.0.3720.6',
+ '73.0.3683.61',
+ '72.0.3626.122',
+ '73.0.3683.60',
+ '74.0.3720.5',
+ '72.0.3626.121',
+ '74.0.3718.8',
+ '74.0.3720.4',
+ '74.0.3720.3',
+ '74.0.3718.7',
+ '74.0.3720.2',
+ '74.0.3720.1',
+ '74.0.3720.0',
+ '74.0.3718.6',
+ '74.0.3719.5',
+ '73.0.3683.59',
+ '74.0.3718.5',
+ '74.0.3718.4',
+ '74.0.3719.4',
+ '74.0.3719.3',
+ '74.0.3719.2',
+ '74.0.3719.1',
+ '73.0.3683.58',
+ '74.0.3719.0',
+ '73.0.3683.57',
+ '73.0.3683.56',
+ '74.0.3718.3',
+ '73.0.3683.55',
+ '74.0.3718.2',
+ '74.0.3718.1',
+ '74.0.3718.0',
+ '73.0.3683.54',
+ '74.0.3717.2',
+ '73.0.3683.53',
+ '74.0.3717.1',
+ '74.0.3717.0',
+ '73.0.3683.52',
+ '74.0.3716.1',
+ '74.0.3716.0',
+ '73.0.3683.51',
+ '74.0.3715.1',
+ '74.0.3715.0',
+ '73.0.3683.50',
+ '74.0.3711.2',
+ '74.0.3714.2',
+ '74.0.3713.3',
+ '74.0.3714.1',
+ '74.0.3714.0',
+ '73.0.3683.49',
+ '74.0.3713.1',
+ '74.0.3713.0',
+ '72.0.3626.120',
+ '73.0.3683.48',
+ '74.0.3712.2',
+ '74.0.3712.1',
+ '74.0.3712.0',
+ '73.0.3683.47',
+ '72.0.3626.119',
+ '73.0.3683.46',
+ '74.0.3710.2',
+ '72.0.3626.118',
+ '74.0.3711.1',
+ '74.0.3711.0',
+ '73.0.3683.45',
+ '72.0.3626.117',
+ '74.0.3710.1',
+ '74.0.3710.0',
+ '73.0.3683.44',
+ '72.0.3626.116',
+ '74.0.3709.1',
+ '74.0.3709.0',
+ '74.0.3704.9',
+ '73.0.3683.43',
+ '72.0.3626.115',
+ '74.0.3704.8',
+ '74.0.3704.7',
+ '74.0.3708.0',
+ '74.0.3706.7',
+ '74.0.3704.6',
+ '73.0.3683.42',
+ '72.0.3626.114',
+ '74.0.3706.6',
+ '72.0.3626.113',
+ '74.0.3704.5',
+ '74.0.3706.5',
+ '74.0.3706.4',
+ '74.0.3706.3',
+ '74.0.3706.2',
+ '74.0.3706.1',
+ '74.0.3706.0',
+ '73.0.3683.41',
+ '72.0.3626.112',
+ '74.0.3705.1',
+ '74.0.3705.0',
+ '73.0.3683.40',
+ '72.0.3626.111',
+ '73.0.3683.39',
+ '74.0.3704.4',
+ '73.0.3683.38',
+ '74.0.3704.3',
+ '74.0.3704.2',
+ '74.0.3704.1',
+ '74.0.3704.0',
+ '73.0.3683.37',
+ '72.0.3626.110',
+ '72.0.3626.109',
+ '74.0.3703.3',
+ '74.0.3703.2',
+ '73.0.3683.36',
+ '74.0.3703.1',
+ '74.0.3703.0',
+ '73.0.3683.35',
+ '72.0.3626.108',
+ '74.0.3702.2',
+ '74.0.3699.3',
+ '74.0.3702.1',
+ '74.0.3702.0',
+ '73.0.3683.34',
+ '72.0.3626.107',
+ '73.0.3683.33',
+ '74.0.3701.1',
+ '74.0.3701.0',
+ '73.0.3683.32',
+ '73.0.3683.31',
+ '72.0.3626.105',
+ '74.0.3700.1',
+ '74.0.3700.0',
+ '73.0.3683.29',
+ '72.0.3626.103',
+ '74.0.3699.2',
+ '74.0.3699.1',
+ '74.0.3699.0',
+ '73.0.3683.28',
+ '72.0.3626.102',
+ '73.0.3683.27',
+ '73.0.3683.26',
+ '74.0.3698.0',
+ '74.0.3696.2',
+ '72.0.3626.101',
+ '73.0.3683.25',
+ '74.0.3696.1',
+ '74.0.3696.0',
+ '74.0.3694.8',
+ '72.0.3626.100',
+ '74.0.3694.7',
+ '74.0.3694.6',
+ '74.0.3694.5',
+ '74.0.3694.4',
+ '72.0.3626.99',
+ '72.0.3626.98',
+ '74.0.3694.3',
+ '73.0.3683.24',
+ '72.0.3626.97',
+ '72.0.3626.96',
+ '72.0.3626.95',
+ '73.0.3683.23',
+ '72.0.3626.94',
+ '73.0.3683.22',
+ '73.0.3683.21',
+ '72.0.3626.93',
+ '74.0.3694.2',
+ '72.0.3626.92',
+ '74.0.3694.1',
+ '74.0.3694.0',
+ '74.0.3693.6',
+ '73.0.3683.20',
+ '72.0.3626.91',
+ '74.0.3693.5',
+ '74.0.3693.4',
+ '74.0.3693.3',
+ '74.0.3693.2',
+ '73.0.3683.19',
+ '74.0.3693.1',
+ '74.0.3693.0',
+ '73.0.3683.18',
+ '72.0.3626.90',
+ '74.0.3692.1',
+ '74.0.3692.0',
+ '73.0.3683.17',
+ '72.0.3626.89',
+ '74.0.3687.3',
+ '74.0.3691.1',
+ '74.0.3691.0',
+ '73.0.3683.16',
+ '72.0.3626.88',
+ '72.0.3626.87',
+ '73.0.3683.15',
+ '74.0.3690.1',
+ '74.0.3690.0',
+ '73.0.3683.14',
+ '72.0.3626.86',
+ '73.0.3683.13',
+ '73.0.3683.12',
+ '74.0.3689.1',
+ '74.0.3689.0',
+ '73.0.3683.11',
+ '72.0.3626.85',
+ '73.0.3683.10',
+ '72.0.3626.84',
+ '73.0.3683.9',
+ '74.0.3688.1',
+ '74.0.3688.0',
+ '73.0.3683.8',
+ '72.0.3626.83',
+ '74.0.3687.2',
+ '74.0.3687.1',
+ '74.0.3687.0',
+ '73.0.3683.7',
+ '72.0.3626.82',
+ '74.0.3686.4',
+ '72.0.3626.81',
+ '74.0.3686.3',
+ '74.0.3686.2',
+ '74.0.3686.1',
+ '74.0.3686.0',
+ '73.0.3683.6',
+ '72.0.3626.80',
+ '74.0.3685.1',
+ '74.0.3685.0',
+ '73.0.3683.5',
+ '72.0.3626.79',
+ '74.0.3684.1',
+ '74.0.3684.0',
+ '73.0.3683.4',
+ '72.0.3626.78',
+ '72.0.3626.77',
+ '73.0.3683.3',
+ '73.0.3683.2',
+ '72.0.3626.76',
+ '73.0.3683.1',
+ '73.0.3683.0',
+ '72.0.3626.75',
+ '71.0.3578.141',
+ '73.0.3682.1',
+ '73.0.3682.0',
+ '72.0.3626.74',
+ '71.0.3578.140',
+ '73.0.3681.4',
+ '73.0.3681.3',
+ '73.0.3681.2',
+ '73.0.3681.1',
+ '73.0.3681.0',
+ '72.0.3626.73',
+ '71.0.3578.139',
+ '72.0.3626.72',
+ '72.0.3626.71',
+ '73.0.3680.1',
+ '73.0.3680.0',
+ '72.0.3626.70',
+ '71.0.3578.138',
+ '73.0.3678.2',
+ '73.0.3679.1',
+ '73.0.3679.0',
+ '72.0.3626.69',
+ '71.0.3578.137',
+ '73.0.3678.1',
+ '73.0.3678.0',
+ '71.0.3578.136',
+ '73.0.3677.1',
+ '73.0.3677.0',
+ '72.0.3626.68',
+ '72.0.3626.67',
+ '71.0.3578.135',
+ '73.0.3676.1',
+ '73.0.3676.0',
+ '73.0.3674.2',
+ '72.0.3626.66',
+ '71.0.3578.134',
+ '73.0.3674.1',
+ '73.0.3674.0',
+ '72.0.3626.65',
+ '71.0.3578.133',
+ '73.0.3673.2',
+ '73.0.3673.1',
+ '73.0.3673.0',
+ '72.0.3626.64',
+ '71.0.3578.132',
+ '72.0.3626.63',
+ '72.0.3626.62',
+ '72.0.3626.61',
+ '72.0.3626.60',
+ '73.0.3672.1',
+ '73.0.3672.0',
+ '72.0.3626.59',
+ '71.0.3578.131',
+ '73.0.3671.3',
+ '73.0.3671.2',
+ '73.0.3671.1',
+ '73.0.3671.0',
+ '72.0.3626.58',
+ '71.0.3578.130',
+ '73.0.3670.1',
+ '73.0.3670.0',
+ '72.0.3626.57',
+ '71.0.3578.129',
+ '73.0.3669.1',
+ '73.0.3669.0',
+ '72.0.3626.56',
+ '71.0.3578.128',
+ '73.0.3668.2',
+ '73.0.3668.1',
+ '73.0.3668.0',
+ '72.0.3626.55',
+ '71.0.3578.127',
+ '73.0.3667.2',
+ '73.0.3667.1',
+ '73.0.3667.0',
+ '72.0.3626.54',
+ '71.0.3578.126',
+ '73.0.3666.1',
+ '73.0.3666.0',
+ '72.0.3626.53',
+ '71.0.3578.125',
+ '73.0.3665.4',
+ '73.0.3665.3',
+ '72.0.3626.52',
+ '73.0.3665.2',
+ '73.0.3664.4',
+ '73.0.3665.1',
+ '73.0.3665.0',
+ '72.0.3626.51',
+ '71.0.3578.124',
+ '72.0.3626.50',
+ '73.0.3664.3',
+ '73.0.3664.2',
+ '73.0.3664.1',
+ '73.0.3664.0',
+ '73.0.3663.2',
+ '72.0.3626.49',
+ '71.0.3578.123',
+ '73.0.3663.1',
+ '73.0.3663.0',
+ '72.0.3626.48',
+ '71.0.3578.122',
+ '73.0.3662.1',
+ '73.0.3662.0',
+ '72.0.3626.47',
+ '71.0.3578.121',
+ '73.0.3661.1',
+ '72.0.3626.46',
+ '73.0.3661.0',
+ '72.0.3626.45',
+ '71.0.3578.120',
+ '73.0.3660.2',
+ '73.0.3660.1',
+ '73.0.3660.0',
+ '72.0.3626.44',
+ '71.0.3578.119',
+ '73.0.3659.1',
+ '73.0.3659.0',
+ '72.0.3626.43',
+ '71.0.3578.118',
+ '73.0.3658.1',
+ '73.0.3658.0',
+ '72.0.3626.42',
+ '71.0.3578.117',
+ '73.0.3657.1',
+ '73.0.3657.0',
+ '72.0.3626.41',
+ '71.0.3578.116',
+ '73.0.3656.1',
+ '73.0.3656.0',
+ '72.0.3626.40',
+ '71.0.3578.115',
+ '73.0.3655.1',
+ '73.0.3655.0',
+ '72.0.3626.39',
+ '71.0.3578.114',
+ '73.0.3654.1',
+ '73.0.3654.0',
+ '72.0.3626.38',
+ '71.0.3578.113',
+ '73.0.3653.1',
+ '73.0.3653.0',
+ '72.0.3626.37',
+ '71.0.3578.112',
+ '73.0.3652.1',
+ '73.0.3652.0',
+ '72.0.3626.36',
+ '71.0.3578.111',
+ '73.0.3651.1',
+ '73.0.3651.0',
+ '72.0.3626.35',
+ '71.0.3578.110',
+ '73.0.3650.1',
+ '73.0.3650.0',
+ '72.0.3626.34',
+ '71.0.3578.109',
+ '73.0.3649.1',
+ '73.0.3649.0',
+ '72.0.3626.33',
+ '71.0.3578.108',
+ '73.0.3648.2',
+ '73.0.3648.1',
+ '73.0.3648.0',
+ '72.0.3626.32',
+ '71.0.3578.107',
+ '73.0.3647.2',
+ '73.0.3647.1',
+ '73.0.3647.0',
+ '72.0.3626.31',
+ '71.0.3578.106',
+ '73.0.3635.3',
+ '73.0.3646.2',
+ '73.0.3646.1',
+ '73.0.3646.0',
+ '72.0.3626.30',
+ '71.0.3578.105',
+ '72.0.3626.29',
+ '73.0.3645.2',
+ '73.0.3645.1',
+ '73.0.3645.0',
+ '72.0.3626.28',
+ '71.0.3578.104',
+ '72.0.3626.27',
+ '72.0.3626.26',
+ '72.0.3626.25',
+ '72.0.3626.24',
+ '73.0.3644.0',
+ '73.0.3643.2',
+ '72.0.3626.23',
+ '71.0.3578.103',
+ '73.0.3643.1',
+ '73.0.3643.0',
+ '72.0.3626.22',
+ '71.0.3578.102',
+ '73.0.3642.1',
+ '73.0.3642.0',
+ '72.0.3626.21',
+ '71.0.3578.101',
+ '73.0.3641.1',
+ '73.0.3641.0',
+ '72.0.3626.20',
+ '71.0.3578.100',
+ '72.0.3626.19',
+ '73.0.3640.1',
+ '73.0.3640.0',
+ '72.0.3626.18',
+ '73.0.3639.1',
+ '71.0.3578.99',
+ '73.0.3639.0',
+ '72.0.3626.17',
+ '73.0.3638.2',
+ '72.0.3626.16',
+ '73.0.3638.1',
+ '73.0.3638.0',
+ '72.0.3626.15',
+ '71.0.3578.98',
+ '73.0.3635.2',
+ '71.0.3578.97',
+ '73.0.3637.1',
+ '73.0.3637.0',
+ '72.0.3626.14',
+ '71.0.3578.96',
+ '71.0.3578.95',
+ '72.0.3626.13',
+ '71.0.3578.94',
+ '73.0.3636.2',
+ '71.0.3578.93',
+ '73.0.3636.1',
+ '73.0.3636.0',
+ '72.0.3626.12',
+ '71.0.3578.92',
+ '73.0.3635.1',
+ '73.0.3635.0',
+ '72.0.3626.11',
+ '71.0.3578.91',
+ '73.0.3634.2',
+ '73.0.3634.1',
+ '73.0.3634.0',
+ '72.0.3626.10',
+ '71.0.3578.90',
+ '71.0.3578.89',
+ '73.0.3633.2',
+ '73.0.3633.1',
+ '73.0.3633.0',
+ '72.0.3610.4',
+ '72.0.3626.9',
+ '71.0.3578.88',
+ '73.0.3632.5',
+ '73.0.3632.4',
+ '73.0.3632.3',
+ '73.0.3632.2',
+ '73.0.3632.1',
+ '73.0.3632.0',
+ '72.0.3626.8',
+ '71.0.3578.87',
+ '73.0.3631.2',
+ '73.0.3631.1',
+ '73.0.3631.0',
+ '72.0.3626.7',
+ '71.0.3578.86',
+ '72.0.3626.6',
+ '73.0.3630.1',
+ '73.0.3630.0',
+ '72.0.3626.5',
+ '71.0.3578.85',
+ '72.0.3626.4',
+ '73.0.3628.3',
+ '73.0.3628.2',
+ '73.0.3629.1',
+ '73.0.3629.0',
+ '72.0.3626.3',
+ '71.0.3578.84',
+ '73.0.3628.1',
+ '73.0.3628.0',
+ '71.0.3578.83',
+ '73.0.3627.1',
+ '73.0.3627.0',
+ '72.0.3626.2',
+ '71.0.3578.82',
+ '71.0.3578.81',
+ '71.0.3578.80',
+ '72.0.3626.1',
+ '72.0.3626.0',
+ '71.0.3578.79',
+ '70.0.3538.124',
+ '71.0.3578.78',
+ '72.0.3623.4',
+ '72.0.3625.2',
+ '72.0.3625.1',
+ '72.0.3625.0',
+ '71.0.3578.77',
+ '70.0.3538.123',
+ '72.0.3624.4',
+ '72.0.3624.3',
+ '72.0.3624.2',
+ '71.0.3578.76',
+ '72.0.3624.1',
+ '72.0.3624.0',
+ '72.0.3623.3',
+ '71.0.3578.75',
+ '70.0.3538.122',
+ '71.0.3578.74',
+ '72.0.3623.2',
+ '72.0.3610.3',
+ '72.0.3623.1',
+ '72.0.3623.0',
+ '72.0.3622.3',
+ '72.0.3622.2',
+ '71.0.3578.73',
+ '70.0.3538.121',
+ '72.0.3622.1',
+ '72.0.3622.0',
+ '71.0.3578.72',
+ '70.0.3538.120',
+ '72.0.3621.1',
+ '72.0.3621.0',
+ '71.0.3578.71',
+ '70.0.3538.119',
+ '72.0.3620.1',
+ '72.0.3620.0',
+ '71.0.3578.70',
+ '70.0.3538.118',
+ '71.0.3578.69',
+ '72.0.3619.1',
+ '72.0.3619.0',
+ '71.0.3578.68',
+ '70.0.3538.117',
+ '71.0.3578.67',
+ '72.0.3618.1',
+ '72.0.3618.0',
+ '71.0.3578.66',
+ '70.0.3538.116',
+ '72.0.3617.1',
+ '72.0.3617.0',
+ '71.0.3578.65',
+ '70.0.3538.115',
+ '72.0.3602.3',
+ '71.0.3578.64',
+ '72.0.3616.1',
+ '72.0.3616.0',
+ '71.0.3578.63',
+ '70.0.3538.114',
+ '71.0.3578.62',
+ '72.0.3615.1',
+ '72.0.3615.0',
+ '71.0.3578.61',
+ '70.0.3538.113',
+ '72.0.3614.1',
+ '72.0.3614.0',
+ '71.0.3578.60',
+ '70.0.3538.112',
+ '72.0.3613.1',
+ '72.0.3613.0',
+ '71.0.3578.59',
+ '70.0.3538.111',
+ '72.0.3612.2',
+ '72.0.3612.1',
+ '72.0.3612.0',
+ '70.0.3538.110',
+ '71.0.3578.58',
+ '70.0.3538.109',
+ '72.0.3611.2',
+ '72.0.3611.1',
+ '72.0.3611.0',
+ '71.0.3578.57',
+ '70.0.3538.108',
+ '72.0.3610.2',
+ '71.0.3578.56',
+ '71.0.3578.55',
+ '72.0.3610.1',
+ '72.0.3610.0',
+ '71.0.3578.54',
+ '70.0.3538.107',
+ '71.0.3578.53',
+ '72.0.3609.3',
+ '71.0.3578.52',
+ '72.0.3609.2',
+ '71.0.3578.51',
+ '72.0.3608.5',
+ '72.0.3609.1',
+ '72.0.3609.0',
+ '71.0.3578.50',
+ '70.0.3538.106',
+ '72.0.3608.4',
+ '72.0.3608.3',
+ '72.0.3608.2',
+ '71.0.3578.49',
+ '72.0.3608.1',
+ '72.0.3608.0',
+ '70.0.3538.105',
+ '71.0.3578.48',
+ '72.0.3607.1',
+ '72.0.3607.0',
+ '71.0.3578.47',
+ '70.0.3538.104',
+ '72.0.3606.2',
+ '72.0.3606.1',
+ '72.0.3606.0',
+ '71.0.3578.46',
+ '70.0.3538.103',
+ '70.0.3538.102',
+ '72.0.3605.3',
+ '72.0.3605.2',
+ '72.0.3605.1',
+ '72.0.3605.0',
+ '71.0.3578.45',
+ '70.0.3538.101',
+ '71.0.3578.44',
+ '71.0.3578.43',
+ '70.0.3538.100',
+ '70.0.3538.99',
+ '71.0.3578.42',
+ '72.0.3604.1',
+ '72.0.3604.0',
+ '71.0.3578.41',
+ '70.0.3538.98',
+ '71.0.3578.40',
+ '72.0.3603.2',
+ '72.0.3603.1',
+ '72.0.3603.0',
+ '71.0.3578.39',
+ '70.0.3538.97',
+ '72.0.3602.2',
+ '71.0.3578.38',
+ '71.0.3578.37',
+ '72.0.3602.1',
+ '72.0.3602.0',
+ '71.0.3578.36',
+ '70.0.3538.96',
+ '72.0.3601.1',
+ '72.0.3601.0',
+ '71.0.3578.35',
+ '70.0.3538.95',
+ '72.0.3600.1',
+ '72.0.3600.0',
+ '71.0.3578.34',
+ '70.0.3538.94',
+ '72.0.3599.3',
+ '72.0.3599.2',
+ '72.0.3599.1',
+ '72.0.3599.0',
+ '71.0.3578.33',
+ '70.0.3538.93',
+ '72.0.3598.1',
+ '72.0.3598.0',
+ '71.0.3578.32',
+ '70.0.3538.87',
+ '72.0.3597.1',
+ '72.0.3597.0',
+ '72.0.3596.2',
+ '71.0.3578.31',
+ '70.0.3538.86',
+ '71.0.3578.30',
+ '71.0.3578.29',
+ '72.0.3596.1',
+ '72.0.3596.0',
+ '71.0.3578.28',
+ '70.0.3538.85',
+ '72.0.3595.2',
+ '72.0.3591.3',
+ '72.0.3595.1',
+ '72.0.3595.0',
+ '71.0.3578.27',
+ '70.0.3538.84',
+ '72.0.3594.1',
+ '72.0.3594.0',
+ '71.0.3578.26',
+ '70.0.3538.83',
+ '72.0.3593.2',
+ '72.0.3593.1',
+ '72.0.3593.0',
+ '71.0.3578.25',
+ '70.0.3538.82',
+ '72.0.3589.3',
+ '72.0.3592.2',
+ '72.0.3592.1',
+ '72.0.3592.0',
+ '71.0.3578.24',
+ '72.0.3589.2',
+ '70.0.3538.81',
+ '70.0.3538.80',
+ '72.0.3591.2',
+ '72.0.3591.1',
+ '72.0.3591.0',
+ '71.0.3578.23',
+ '70.0.3538.79',
+ '71.0.3578.22',
+ '72.0.3590.1',
+ '72.0.3590.0',
+ '71.0.3578.21',
+ '70.0.3538.78',
+ '70.0.3538.77',
+ '72.0.3589.1',
+ '72.0.3589.0',
+ '71.0.3578.20',
+ '70.0.3538.76',
+ '71.0.3578.19',
+ '70.0.3538.75',
+ '72.0.3588.1',
+ '72.0.3588.0',
+ '71.0.3578.18',
+ '70.0.3538.74',
+ '72.0.3586.2',
+ '72.0.3587.0',
+ '71.0.3578.17',
+ '70.0.3538.73',
+ '72.0.3586.1',
+ '72.0.3586.0',
+ '71.0.3578.16',
+ '70.0.3538.72',
+ '72.0.3585.1',
+ '72.0.3585.0',
+ '71.0.3578.15',
+ '70.0.3538.71',
+ '71.0.3578.14',
+ '72.0.3584.1',
+ '72.0.3584.0',
+ '71.0.3578.13',
+ '70.0.3538.70',
+ '72.0.3583.2',
+ '71.0.3578.12',
+ '72.0.3583.1',
+ '72.0.3583.0',
+ '71.0.3578.11',
+ '70.0.3538.69',
+ '71.0.3578.10',
+ '72.0.3582.0',
+ '72.0.3581.4',
+ '71.0.3578.9',
+ '70.0.3538.67',
+ '72.0.3581.3',
+ '72.0.3581.2',
+ '72.0.3581.1',
+ '72.0.3581.0',
+ '71.0.3578.8',
+ '70.0.3538.66',
+ '72.0.3580.1',
+ '72.0.3580.0',
+ '71.0.3578.7',
+ '70.0.3538.65',
+ '71.0.3578.6',
+ '72.0.3579.1',
+ '72.0.3579.0',
+ '71.0.3578.5',
+ '70.0.3538.64',
+ '71.0.3578.4',
+ '71.0.3578.3',
+ '71.0.3578.2',
+ '71.0.3578.1',
+ '71.0.3578.0',
+ '70.0.3538.63',
+ '69.0.3497.128',
+ '70.0.3538.62',
+ '70.0.3538.61',
+ '70.0.3538.60',
+ '70.0.3538.59',
+ '71.0.3577.1',
+ '71.0.3577.0',
+ '70.0.3538.58',
+ '69.0.3497.127',
+ '71.0.3576.2',
+ '71.0.3576.1',
+ '71.0.3576.0',
+ '70.0.3538.57',
+ '70.0.3538.56',
+ '71.0.3575.2',
+ '70.0.3538.55',
+ '69.0.3497.126',
+ '70.0.3538.54',
+ '71.0.3575.1',
+ '71.0.3575.0',
+ '71.0.3574.1',
+ '71.0.3574.0',
+ '70.0.3538.53',
+ '69.0.3497.125',
+ '70.0.3538.52',
+ '71.0.3573.1',
+ '71.0.3573.0',
+ '70.0.3538.51',
+ '69.0.3497.124',
+ '71.0.3572.1',
+ '71.0.3572.0',
+ '70.0.3538.50',
+ '69.0.3497.123',
+ '71.0.3571.2',
+ '70.0.3538.49',
+ '69.0.3497.122',
+ '71.0.3571.1',
+ '71.0.3571.0',
+ '70.0.3538.48',
+ '69.0.3497.121',
+ '71.0.3570.1',
+ '71.0.3570.0',
+ '70.0.3538.47',
+ '69.0.3497.120',
+ '71.0.3568.2',
+ '71.0.3569.1',
+ '71.0.3569.0',
+ '70.0.3538.46',
+ '69.0.3497.119',
+ '70.0.3538.45',
+ '71.0.3568.1',
+ '71.0.3568.0',
+ '70.0.3538.44',
+ '69.0.3497.118',
+ '70.0.3538.43',
+ '70.0.3538.42',
+ '71.0.3567.1',
+ '71.0.3567.0',
+ '70.0.3538.41',
+ '69.0.3497.117',
+ '71.0.3566.1',
+ '71.0.3566.0',
+ '70.0.3538.40',
+ '69.0.3497.116',
+ '71.0.3565.1',
+ '71.0.3565.0',
+ '70.0.3538.39',
+ '69.0.3497.115',
+ '71.0.3564.1',
+ '71.0.3564.0',
+ '70.0.3538.38',
+ '69.0.3497.114',
+ '71.0.3563.0',
+ '71.0.3562.2',
+ '70.0.3538.37',
+ '69.0.3497.113',
+ '70.0.3538.36',
+ '70.0.3538.35',
+ '71.0.3562.1',
+ '71.0.3562.0',
+ '70.0.3538.34',
+ '69.0.3497.112',
+ '70.0.3538.33',
+ '71.0.3561.1',
+ '71.0.3561.0',
+ '70.0.3538.32',
+ '69.0.3497.111',
+ '71.0.3559.6',
+ '71.0.3560.1',
+ '71.0.3560.0',
+ '71.0.3559.5',
+ '71.0.3559.4',
+ '70.0.3538.31',
+ '69.0.3497.110',
+ '71.0.3559.3',
+ '70.0.3538.30',
+ '69.0.3497.109',
+ '71.0.3559.2',
+ '71.0.3559.1',
+ '71.0.3559.0',
+ '70.0.3538.29',
+ '69.0.3497.108',
+ '71.0.3558.2',
+ '71.0.3558.1',
+ '71.0.3558.0',
+ '70.0.3538.28',
+ '69.0.3497.107',
+ '71.0.3557.2',
+ '71.0.3557.1',
+ '71.0.3557.0',
+ '70.0.3538.27',
+ '69.0.3497.106',
+ '71.0.3554.4',
+ '70.0.3538.26',
+ '71.0.3556.1',
+ '71.0.3556.0',
+ '70.0.3538.25',
+ '71.0.3554.3',
+ '69.0.3497.105',
+ '71.0.3554.2',
+ '70.0.3538.24',
+ '69.0.3497.104',
+ '71.0.3555.2',
+ '70.0.3538.23',
+ '71.0.3555.1',
+ '71.0.3555.0',
+ '70.0.3538.22',
+ '69.0.3497.103',
+ '71.0.3554.1',
+ '71.0.3554.0',
+ '70.0.3538.21',
+ '69.0.3497.102',
+ '71.0.3553.3',
+ '70.0.3538.20',
+ '69.0.3497.101',
+ '71.0.3553.2',
+ '69.0.3497.100',
+ '71.0.3553.1',
+ '71.0.3553.0',
+ '70.0.3538.19',
+ '69.0.3497.99',
+ '69.0.3497.98',
+ '69.0.3497.97',
+ '71.0.3552.6',
+ '71.0.3552.5',
+ '71.0.3552.4',
+ '71.0.3552.3',
+ '71.0.3552.2',
+ '71.0.3552.1',
+ '71.0.3552.0',
+ '70.0.3538.18',
+ '69.0.3497.96',
+ '71.0.3551.3',
+ '71.0.3551.2',
+ '71.0.3551.1',
+ '71.0.3551.0',
+ '70.0.3538.17',
+ '69.0.3497.95',
+ '71.0.3550.3',
+ '71.0.3550.2',
+ '71.0.3550.1',
+ '71.0.3550.0',
+ '70.0.3538.16',
+ '69.0.3497.94',
+ '71.0.3549.1',
+ '71.0.3549.0',
+ '70.0.3538.15',
+ '69.0.3497.93',
+ '69.0.3497.92',
+ '71.0.3548.1',
+ '71.0.3548.0',
+ '70.0.3538.14',
+ '69.0.3497.91',
+ '71.0.3547.1',
+ '71.0.3547.0',
+ '70.0.3538.13',
+ '69.0.3497.90',
+ '71.0.3546.2',
+ '69.0.3497.89',
+ '71.0.3546.1',
+ '71.0.3546.0',
+ '70.0.3538.12',
+ '69.0.3497.88',
+ '71.0.3545.4',
+ '71.0.3545.3',
+ '71.0.3545.2',
+ '71.0.3545.1',
+ '71.0.3545.0',
+ '70.0.3538.11',
+ '69.0.3497.87',
+ '71.0.3544.5',
+ '71.0.3544.4',
+ '71.0.3544.3',
+ '71.0.3544.2',
+ '71.0.3544.1',
+ '71.0.3544.0',
+ '69.0.3497.86',
+ '70.0.3538.10',
+ '69.0.3497.85',
+ '70.0.3538.9',
+ '69.0.3497.84',
+ '71.0.3543.4',
+ '70.0.3538.8',
+ '71.0.3543.3',
+ '71.0.3543.2',
+ '71.0.3543.1',
+ '71.0.3543.0',
+ '70.0.3538.7',
+ '69.0.3497.83',
+ '71.0.3542.2',
+ '71.0.3542.1',
+ '71.0.3542.0',
+ '70.0.3538.6',
+ '69.0.3497.82',
+ '69.0.3497.81',
+ '71.0.3541.1',
+ '71.0.3541.0',
+ '70.0.3538.5',
+ '69.0.3497.80',
+ '71.0.3540.1',
+ '71.0.3540.0',
+ '70.0.3538.4',
+ '69.0.3497.79',
+ '70.0.3538.3',
+ '71.0.3539.1',
+ '71.0.3539.0',
+ '69.0.3497.78',
+ '68.0.3440.134',
+ '69.0.3497.77',
+ '70.0.3538.2',
+ '70.0.3538.1',
+ '70.0.3538.0',
+ '69.0.3497.76',
+ '68.0.3440.133',
+ '69.0.3497.75',
+ '70.0.3537.2',
+ '70.0.3537.1',
+ '70.0.3537.0',
+ '69.0.3497.74',
+ '68.0.3440.132',
+ '70.0.3536.0',
+ '70.0.3535.5',
+ '70.0.3535.4',
+ '70.0.3535.3',
+ '69.0.3497.73',
+ '68.0.3440.131',
+ '70.0.3532.8',
+ '70.0.3532.7',
+ '69.0.3497.72',
+ '69.0.3497.71',
+ '70.0.3535.2',
+ '70.0.3535.1',
+ '70.0.3535.0',
+ '69.0.3497.70',
+ '68.0.3440.130',
+ '69.0.3497.69',
+ '68.0.3440.129',
+ '70.0.3534.4',
+ '70.0.3534.3',
+ '70.0.3534.2',
+ '70.0.3534.1',
+ '70.0.3534.0',
+ '69.0.3497.68',
+ '68.0.3440.128',
+ '70.0.3533.2',
+ '70.0.3533.1',
+ '70.0.3533.0',
+ '69.0.3497.67',
+ '68.0.3440.127',
+ '70.0.3532.6',
+ '70.0.3532.5',
+ '70.0.3532.4',
+ '69.0.3497.66',
+ '68.0.3440.126',
+ '70.0.3532.3',
+ '70.0.3532.2',
+ '70.0.3532.1',
+ '69.0.3497.60',
+ '69.0.3497.65',
+ '69.0.3497.64',
+ '70.0.3532.0',
+ '70.0.3531.0',
+ '70.0.3530.4',
+ '70.0.3530.3',
+ '70.0.3530.2',
+ '69.0.3497.58',
+ '68.0.3440.125',
+ '69.0.3497.57',
+ '69.0.3497.56',
+ '69.0.3497.55',
+ '69.0.3497.54',
+ '70.0.3530.1',
+ '70.0.3530.0',
+ '69.0.3497.53',
+ '68.0.3440.124',
+ '69.0.3497.52',
+ '70.0.3529.3',
+ '70.0.3529.2',
+ '70.0.3529.1',
+ '70.0.3529.0',
+ '69.0.3497.51',
+ '70.0.3528.4',
+ '68.0.3440.123',
+ '70.0.3528.3',
+ '70.0.3528.2',
+ '70.0.3528.1',
+ '70.0.3528.0',
+ '69.0.3497.50',
+ '68.0.3440.122',
+ '70.0.3527.1',
+ '70.0.3527.0',
+ '69.0.3497.49',
+ '68.0.3440.121',
+ '70.0.3526.1',
+ '70.0.3526.0',
+ '68.0.3440.120',
+ '69.0.3497.48',
+ '69.0.3497.47',
+ '68.0.3440.119',
+ '68.0.3440.118',
+ '70.0.3525.5',
+ '70.0.3525.4',
+ '70.0.3525.3',
+ '68.0.3440.117',
+ '69.0.3497.46',
+ '70.0.3525.2',
+ '70.0.3525.1',
+ '70.0.3525.0',
+ '69.0.3497.45',
+ '68.0.3440.116',
+ '70.0.3524.4',
+ '70.0.3524.3',
+ '69.0.3497.44',
+ '70.0.3524.2',
+ '70.0.3524.1',
+ '70.0.3524.0',
+ '70.0.3523.2',
+ '69.0.3497.43',
+ '68.0.3440.115',
+ '70.0.3505.9',
+ '69.0.3497.42',
+ '70.0.3505.8',
+ '70.0.3523.1',
+ '70.0.3523.0',
+ '69.0.3497.41',
+ '68.0.3440.114',
+ '70.0.3505.7',
+ '69.0.3497.40',
+ '70.0.3522.1',
+ '70.0.3522.0',
+ '70.0.3521.2',
+ '69.0.3497.39',
+ '68.0.3440.113',
+ '70.0.3505.6',
+ '70.0.3521.1',
+ '70.0.3521.0',
+ '69.0.3497.38',
+ '68.0.3440.112',
+ '70.0.3520.1',
+ '70.0.3520.0',
+ '69.0.3497.37',
+ '68.0.3440.111',
+ '70.0.3519.3',
+ '70.0.3519.2',
+ '70.0.3519.1',
+ '70.0.3519.0',
+ '69.0.3497.36',
+ '68.0.3440.110',
+ '70.0.3518.1',
+ '70.0.3518.0',
+ '69.0.3497.35',
+ '69.0.3497.34',
+ '68.0.3440.109',
+ '70.0.3517.1',
+ '70.0.3517.0',
+ '69.0.3497.33',
+ '68.0.3440.108',
+ '69.0.3497.32',
+ '70.0.3516.3',
+ '70.0.3516.2',
+ '70.0.3516.1',
+ '70.0.3516.0',
+ '69.0.3497.31',
+ '68.0.3440.107',
+ '70.0.3515.4',
+ '68.0.3440.106',
+ '70.0.3515.3',
+ '70.0.3515.2',
+ '70.0.3515.1',
+ '70.0.3515.0',
+ '69.0.3497.30',
+ '68.0.3440.105',
+ '68.0.3440.104',
+ '70.0.3514.2',
+ '70.0.3514.1',
+ '70.0.3514.0',
+ '69.0.3497.29',
+ '68.0.3440.103',
+ '70.0.3513.1',
+ '70.0.3513.0',
+ '69.0.3497.28',
+ )
- @staticmethod
- def _extract_urls(webpage):
+ @classmethod
+ def _extract_urls(cls, webpage):
return re.findall(
- r'<iframe[^>]+src=["\']((?:https?://)?(?:openload\.(?:co|io)|oload\.tv)/embed/[a-zA-Z0-9-_]+)',
- webpage)
+ r'<iframe[^>]+src=["\']((?:https?://)?%s/%s/[a-zA-Z0-9-_]+)'
+ % (cls._DOMAINS, cls._EMBED_WORD), webpage)
+
+ def _extract_decrypted_page(self, page_url, webpage, video_id, headers):
+ phantom = PhantomJSwrapper(self, required_version='2.0')
+ webpage, _ = phantom.get(page_url, html=webpage, video_id=video_id, headers=headers)
+ return webpage
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
url_pattern = 'https://%s/%%s/%s/' % (host, video_id)
headers = {
- 'User-Agent': self._USER_AGENT,
+ 'User-Agent': self._USER_AGENT_TPL % random.choice(self._CHROME_VERSIONS),
}
- for path in ('embed', 'f'):
+ for path in (self._EMBED_WORD, self._STREAM_WORD):
page_url = url_pattern % path
- last = path == 'f'
+ last = path == self._STREAM_WORD
webpage = self._download_webpage(
page_url, video_id, 'Downloading %s webpage' % path,
headers=headers, fatal=last)
raise ExtractorError('File not found', expected=True, video_id=video_id)
break
- phantom = PhantomJSwrapper(self, required_version='2.0')
- webpage, _ = phantom.get(page_url, html=webpage, video_id=video_id, headers=headers)
-
- decoded_id = (get_element_by_id('streamurl', webpage) or
- get_element_by_id('streamuri', webpage) or
- get_element_by_id('streamurj', webpage) or
- self._search_regex(
- (r'>\s*([\w-]+~\d{10,}~\d+\.\d+\.0\.0~[\w-]+)\s*<',
- r'>\s*([\w~-]+~\d+\.\d+\.\d+\.\d+~[\w~-]+)',
- r'>\s*([\w-]+~\d{10,}~(?:[a-f\d]+:){2}:~[\w-]+)\s*<',
- r'>\s*([\w~-]+~[a-f0-9:]+~[\w~-]+)\s*<',
- r'>\s*([\w~-]+~[a-f0-9:]+~[\w~-]+)'), webpage,
- 'stream URL'))
-
- video_url = 'https://%s/stream/%s?mime=true' % (host, decoded_id)
+ webpage = self._extract_decrypted_page(page_url, webpage, video_id, headers)
+ for element_id in self._URL_IDS:
+ decoded_id = get_element_by_id(element_id, webpage)
+ if decoded_id:
+ break
+ if not decoded_id:
+ decoded_id = self._search_regex(
+ (r'>\s*([\w-]+~\d{10,}~\d+\.\d+\.0\.0~[\w-]+)\s*<',
+ r'>\s*([\w~-]+~\d+\.\d+\.\d+\.\d+~[\w~-]+)',
+ r'>\s*([\w-]+~\d{10,}~(?:[a-f\d]+:){2}:~[\w-]+)\s*<',
+ r'>\s*([\w~-]+~[a-f0-9:]+~[\w~-]+)\s*<',
+ r'>\s*([\w~-]+~[a-f0-9:]+~[\w~-]+)'), webpage,
+ 'stream URL')
+ video_url = 'https://%s/%s/%s?mime=true' % (host, self._REDIR_WORD, decoded_id)
title = self._og_search_title(webpage, default=None) or self._search_regex(
r'<span[^>]+class=["\']title["\'][^>]*>([^<]+)', webpage,
'subtitles': subtitles,
'http_headers': headers,
}
+
+
+class VerystreamIE(OpenloadIE):
+ IE_NAME = 'verystream'
+
+ _DOMAINS = r'(?:verystream\.com)'
+ _VALID_URL = r'''(?x)
+ https?://
+ (?P<host>
+ (?:www\.)?
+ %s
+ )/
+ (?:stream|e)/
+ (?P<id>[a-zA-Z0-9-_]+)
+ ''' % _DOMAINS
+ _EMBED_WORD = 'e'
+ _STREAM_WORD = 'stream'
+ _REDIR_WORD = 'gettoken'
+ _URL_IDS = ('videolink', )
+ _TESTS = [{
+ 'url': 'https://verystream.com/stream/c1GWQ9ngBBx/',
+ 'md5': 'd3e8c5628ccb9970b65fd65269886795',
+ 'info_dict': {
+ 'id': 'c1GWQ9ngBBx',
+ 'ext': 'mp4',
+ 'title': 'Big Buck Bunny.mp4',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ }, {
+ 'url': 'https://verystream.com/e/c1GWQ9ngBBx/',
+ 'only_matching': True,
+ }]
+
+ def _extract_decrypted_page(self, page_url, webpage, video_id, headers):
+ return webpage # for Verystream, the webpage is already decrypted
'description': subtitle,
'duration': (info['end'] - info['start']) / 1000,
'timestamp': info['start'] / 1000,
- 'ext': 'mp3'
+ 'ext': 'mp3',
+ 'series': data.get('programTitle')
}
entries = [extract_entry_dict(t, data['title'], data['subtitle']) for t in data['streams']]
'https://www.panda.tv/api_room_v2?roomid=%s' % video_id, video_id)
error_code = config.get('errno', 0)
- if error_code is not 0:
+ if error_code != 0:
raise ExtractorError(
'%s returned error %s: %s'
% (self.IE_NAME, error_code, config['errmsg']),
{
# Video embedded in iframe containing angle brackets as attribute's value (e.g.
# "<iframe style='position: absolute;<br />\ntop: 0; left: 0;' ...", see
- # https://github.com/rg3/youtube-dl/issues/7059)
+ # https://github.com/ytdl-org/youtube-dl/issues/7059)
'url': 'http://www.pbs.org/food/features/a-chefs-life-season-3-episode-5-prickly-business/',
'md5': '59b0ef5009f9ac8a319cc5efebcd865e',
'info_dict': {
},
},
{
- # https://github.com/rg3/youtube-dl/issues/13801
+ # https://github.com/ytdl-org/youtube-dl/issues/13801
'url': 'https://www.pbs.org/video/pbs-newshour-full-episode-july-31-2017-1501539057/',
'info_dict': {
'id': '3003333873',
# we won't try extracting them.
# Since summer 2016 higher quality formats (4500k and 6500k) are also available
# albeit they are not documented in [2].
- # 1. https://github.com/rg3/youtube-dl/commit/cbc032c8b70a038a69259378c92b4ba97b42d491#commitcomment-17313656
+ # 1. https://github.com/ytdl-org/youtube-dl/commit/cbc032c8b70a038a69259378c92b4ba97b42d491#commitcomment-17313656
# 2. https://projects.pbs.org/confluence/display/coveapi/COVE+Video+Specifications
if not bitrate or int(bitrate) < 400:
continue
from .common import InfoExtractor
from ..utils import (
+ int_or_none,
parse_iso8601,
unescapeHTML,
)
'url': broadcast[image],
} for image in ('image_url', 'image_url_small') if broadcast.get(image)]
+ width = int_or_none(broadcast.get('width'))
+ height = int_or_none(broadcast.get('height'))
+
+ def add_width_and_height(f):
+ for key, val in (('width', width), ('height', height)):
+ if not f.get(key):
+ f[key] = val
+
video_urls = set()
formats = []
for format_id in ('replay', 'rtmp', 'hls', 'https_hls', 'lhls', 'lhlsweb'):
continue
video_urls.add(video_url)
if format_id != 'rtmp':
- formats.extend(self._extract_m3u8_formats(
+ m3u8_formats = self._extract_m3u8_formats(
video_url, token, 'mp4',
entry_protocol='m3u8_native'
if state in ('ended', 'timed_out') else 'm3u8',
- m3u8_id=format_id, fatal=False))
+ m3u8_id=format_id, fatal=False)
+ if len(m3u8_formats) == 1:
+ add_width_and_height(m3u8_formats[0])
+ formats.extend(m3u8_formats)
continue
- formats.append({
+ rtmp_format = {
'url': video_url,
'ext': 'flv' if format_id == 'rtmp' else 'mp4',
- })
+ }
+ add_width_and_height(rtmp_format)
+ formats.append(rtmp_format)
self._sort_formats(formats)
return {
--- /dev/null
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_b64decode,
+ compat_str,
+)
+from ..utils import (
+ clean_html,
+ ExtractorError,
+ int_or_none,
+ str_or_none,
+ try_get,
+ url_or_none,
+ urlencode_postdata,
+ urljoin,
+)
+
+
+class PlatziIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ platzi\.com/clases| # es version
+ courses\.platzi\.com/classes # en version
+ )/[^/]+/(?P<id>\d+)-[^/?\#&]+
+ '''
+ _LOGIN_URL = 'https://platzi.com/login/'
+ _NETRC_MACHINE = 'platzi'
+
+ _TESTS = [{
+ 'url': 'https://platzi.com/clases/1311-next-js/12074-creando-nuestra-primera-pagina/',
+ 'md5': '8f56448241005b561c10f11a595b37e3',
+ 'info_dict': {
+ 'id': '12074',
+ 'ext': 'mp4',
+ 'title': 'Creando nuestra primera página',
+ 'description': 'md5:4c866e45034fc76412fbf6e60ae008bc',
+ 'duration': 420,
+ },
+ 'skip': 'Requires platzi account credentials',
+ }, {
+ 'url': 'https://courses.platzi.com/classes/1367-communication-codestream/13430-background/',
+ 'info_dict': {
+ 'id': '13430',
+ 'ext': 'mp4',
+ 'title': 'Background',
+ 'description': 'md5:49c83c09404b15e6e71defaf87f6b305',
+ 'duration': 360,
+ },
+ 'skip': 'Requires platzi account credentials',
+ 'params': {
+ 'skip_download': True,
+ },
+ }]
+
+ def _real_initialize(self):
+ self._login()
+
+ def _login(self):
+ username, password = self._get_login_info()
+ if username is None:
+ return
+
+ login_page = self._download_webpage(
+ self._LOGIN_URL, None, 'Downloading login page')
+
+ login_form = self._hidden_inputs(login_page)
+
+ login_form.update({
+ 'email': username,
+ 'password': password,
+ })
+
+ urlh = self._request_webpage(
+ self._LOGIN_URL, None, 'Logging in',
+ data=urlencode_postdata(login_form),
+ headers={'Referer': self._LOGIN_URL})
+
+ # login succeeded
+ if 'platzi.com/login' not in compat_str(urlh.geturl()):
+ return
+
+ login_error = self._webpage_read_content(
+ urlh, self._LOGIN_URL, None, 'Downloading login error page')
+
+ login = self._parse_json(
+ self._search_regex(
+ r'login\s*=\s*({.+?})(?:\s*;|\s*</script)', login_error, 'login'),
+ None)
+
+ for kind in ('error', 'password', 'nonFields'):
+ error = str_or_none(login.get('%sError' % kind))
+ if error:
+ raise ExtractorError(
+ 'Unable to login: %s' % error, expected=True)
+ raise ExtractorError('Unable to log in')
+
+ def _real_extract(self, url):
+ lecture_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, lecture_id)
+
+ data = self._parse_json(
+ self._search_regex(
+ r'client_data\s*=\s*({.+?})\s*;', webpage, 'client data'),
+ lecture_id)
+
+ material = data['initialState']['material']
+ desc = material['description']
+ title = desc['title']
+
+ formats = []
+ for server_id, server in material['videos'].items():
+ if not isinstance(server, dict):
+ continue
+ for format_id in ('hls', 'dash'):
+ format_url = url_or_none(server.get(format_id))
+ if not format_url:
+ continue
+ if format_id == 'hls':
+ formats.extend(self._extract_m3u8_formats(
+ format_url, lecture_id, 'mp4',
+ entry_protocol='m3u8_native', m3u8_id=format_id,
+ note='Downloading %s m3u8 information' % server_id,
+ fatal=False))
+ elif format_id == 'dash':
+ formats.extend(self._extract_mpd_formats(
+ format_url, lecture_id, mpd_id=format_id,
+ note='Downloading %s MPD manifest' % server_id,
+ fatal=False))
+ self._sort_formats(formats)
+
+ content = str_or_none(desc.get('content'))
+ description = (clean_html(compat_b64decode(content).decode('utf-8'))
+ if content else None)
+ duration = int_or_none(material.get('duration'), invscale=60)
+
+ return {
+ 'id': lecture_id,
+ 'title': title,
+ 'description': description,
+ 'duration': duration,
+ 'formats': formats,
+ }
+
+
+class PlatziCourseIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ platzi\.com/clases| # es version
+ courses\.platzi\.com/classes # en version
+ )/(?P<id>[^/?\#&]+)
+ '''
+ _TESTS = [{
+ 'url': 'https://platzi.com/clases/next-js/',
+ 'info_dict': {
+ 'id': '1311',
+ 'title': 'Curso de Next.js',
+ },
+ 'playlist_count': 22,
+ }, {
+ 'url': 'https://courses.platzi.com/classes/communication-codestream/',
+ 'info_dict': {
+ 'id': '1367',
+ 'title': 'Codestream Course',
+ },
+ 'playlist_count': 14,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if PlatziIE.suitable(url) else super(PlatziCourseIE, cls).suitable(url)
+
+ def _real_extract(self, url):
+ course_name = self._match_id(url)
+
+ webpage = self._download_webpage(url, course_name)
+
+ props = self._parse_json(
+ self._search_regex(r'data\s*=\s*({.+?})\s*;', webpage, 'data'),
+ course_name)['initialProps']
+
+ entries = []
+ for chapter_num, chapter in enumerate(props['concepts'], 1):
+ if not isinstance(chapter, dict):
+ continue
+ materials = chapter.get('materials')
+ if not materials or not isinstance(materials, list):
+ continue
+ chapter_title = chapter.get('title')
+ chapter_id = str_or_none(chapter.get('id'))
+ for material in materials:
+ if not isinstance(material, dict):
+ continue
+ if material.get('material_type') != 'video':
+ continue
+ video_url = urljoin(url, material.get('url'))
+ if not video_url:
+ continue
+ entries.append({
+ '_type': 'url_transparent',
+ 'url': video_url,
+ 'title': str_or_none(material.get('name')),
+ 'id': str_or_none(material.get('id')),
+ 'ie_key': PlatziIE.ie_key(),
+ 'chapter': chapter_title,
+ 'chapter_number': chapter_num,
+ 'chapter_id': chapter_id,
+ })
+
+ course_id = compat_str(try_get(props, lambda x: x['course']['id']))
+ course_title = try_get(props, lambda x: x['course']['name'], compat_str)
+
+ return self.playlist_result(entries, course_id, course_title)
)
# Some courses also offer widescreen resolution for high quality (see
- # https://github.com/rg3/youtube-dl/issues/7766)
+ # https://github.com/ytdl-org/youtube-dl/issues/7766)
widescreen = course.get('supportsWideScreenVideoFormats') is True
best_quality = 'high-widescreen' if widescreen else 'high'
if widescreen:
# Pluralsight tracks multiple sequential calls to ViewClip API and start
# to return 429 HTTP errors after some time (see
- # https://github.com/rg3/youtube-dl/pull/6989). Moreover it may even lead
- # to account ban (see https://github.com/rg3/youtube-dl/issues/6842).
+ # https://github.com/ytdl-org/youtube-dl/pull/6989). Moreover it may even lead
+ # to account ban (see https://github.com/ytdl-org/youtube-dl/issues/6842).
# To somewhat reduce the probability of these consequences
# we will sleep random amount of time before each call to ViewClip.
self._sleep(
video_id = mobj.group('id')
channel = mobj.group('channel') or mobj.group('channel_2')
- json_url = (('%s://%s.podomatic.com/entry/embed_params/%s' +
- '?permalink=true&rtmp=0') %
+ json_url = (('%s://%s.podomatic.com/entry/embed_params/%s'
+ + '?permalink=true&rtmp=0') %
(mobj.group('proto'), channel, video_id))
data_json = self._download_webpage(
json_url, video_id, 'Downloading video info')
+++ /dev/null
-# coding: utf-8
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-from ..compat import (
- compat_parse_qs,
- compat_str,
-)
-from ..utils import (
- int_or_none,
- try_get,
- unified_timestamp,
-)
-
-
-class PornFlipIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?pornflip\.com/(?:v|embed)/(?P<id>[^/?#&]+)'
- _TESTS = [{
- 'url': 'https://www.pornflip.com/v/wz7DfNhMmep',
- 'md5': '98c46639849145ae1fd77af532a9278c',
- 'info_dict': {
- 'id': 'wz7DfNhMmep',
- 'ext': 'mp4',
- 'title': '2 Amateurs swallow make his dream cumshots true',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'duration': 112,
- 'timestamp': 1481655502,
- 'upload_date': '20161213',
- 'uploader_id': '106786',
- 'uploader': 'figifoto',
- 'view_count': int,
- 'age_limit': 18,
- }
- }, {
- 'url': 'https://www.pornflip.com/embed/wz7DfNhMmep',
- 'only_matching': True,
- }, {
- 'url': 'https://www.pornflip.com/v/EkRD6-vS2-s',
- 'only_matching': True,
- }, {
- 'url': 'https://www.pornflip.com/embed/EkRD6-vS2-s',
- 'only_matching': True,
- }, {
- 'url': 'https://www.pornflip.com/v/NG9q6Pb_iK8',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- webpage = self._download_webpage(
- 'https://www.pornflip.com/v/%s' % video_id, video_id)
-
- flashvars = compat_parse_qs(self._search_regex(
- r'<embed[^>]+flashvars=(["\'])(?P<flashvars>(?:(?!\1).)+)\1',
- webpage, 'flashvars', group='flashvars'))
-
- title = flashvars['video_vars[title]'][0]
-
- def flashvar(kind):
- return try_get(
- flashvars, lambda x: x['video_vars[%s]' % kind][0], compat_str)
-
- formats = []
- for key, value in flashvars.items():
- if not (value and isinstance(value, list)):
- continue
- format_url = value[0]
- if key == 'video_vars[hds_manifest]':
- formats.extend(self._extract_mpd_formats(
- format_url, video_id, mpd_id='dash', fatal=False))
- continue
- height = self._search_regex(
- r'video_vars\[video_urls\]\[(\d+)', key, 'height', default=None)
- if not height:
- continue
- formats.append({
- 'url': format_url,
- 'format_id': 'http-%s' % height,
- 'height': int_or_none(height),
- })
- self._sort_formats(formats)
-
- uploader = self._html_search_regex(
- (r'<span[^>]+class="name"[^>]*>\s*<a[^>]+>\s*<strong>(?P<uploader>[^<]+)',
- r'<meta[^>]+content=(["\'])[^>]*\buploaded by (?P<uploader>.+?)\1'),
- webpage, 'uploader', fatal=False, group='uploader')
-
- return {
- 'id': video_id,
- 'formats': formats,
- 'title': title,
- 'thumbnail': flashvar('big_thumb'),
- 'duration': int_or_none(flashvar('duration')),
- 'timestamp': unified_timestamp(self._html_search_meta(
- 'uploadDate', webpage, 'timestamp')),
- 'uploader_id': flashvar('author_id'),
- 'uploader': uploader,
- 'view_count': int_or_none(flashvar('views')),
- 'age_limit': 18,
- }
from .common import InfoExtractor
from ..utils import (
+ determine_ext,
ExtractorError,
int_or_none,
js_to_json,
+ urljoin,
)
_VALID_URL = r'https?://(?:www\.)?pornhd\.com/(?:[a-z]{2,4}/)?videos/(?P<id>\d+)(?:/(?P<display_id>.+))?'
_TESTS = [{
'url': 'http://www.pornhd.com/videos/9864/selfie-restroom-masturbation-fun-with-chubby-cutie-hd-porn-video',
- 'md5': 'c8b964b1f0a4b5f7f28ae3a5c9f86ad5',
+ 'md5': '87f1540746c1d32ec7a2305c12b96b25',
'info_dict': {
'id': '9864',
'display_id': 'selfie-restroom-masturbation-fun-with-chubby-cutie-hd-porn-video',
'description': 'md5:3748420395e03e31ac96857a8f125b2b',
'thumbnail': r're:^https?://.*\.jpg',
'view_count': int,
+ 'like_count': int,
'age_limit': 18,
}
}, {
'description': 'md5:8ff0523848ac2b8f9b065ba781ccf294',
'thumbnail': r're:^https?://.*\.jpg',
'view_count': int,
+ 'like_count': int,
'age_limit': 18,
},
'skip': 'Not available anymore',
formats = []
for format_id, video_url in sources.items():
+ video_url = urljoin(url, video_url)
if not video_url:
continue
height = int_or_none(self._search_regex(
r'^(\d+)[pP]', format_id, 'height', default=None))
formats.append({
'url': video_url,
+ 'ext': determine_ext(video_url, 'mp4'),
'format_id': format_id,
'height': height,
})
r"poster'?\s*:\s*([\"'])(?P<url>(?:(?!\1).)+)\1", webpage,
'thumbnail', fatal=False, group='url')
+ like_count = int_or_none(self._search_regex(
+ (r'(\d+)\s*</11[^>]+>(?: |\s)*\blikes',
+ r'class=["\']save-count["\'][^>]*>\s*(\d+)'),
+ webpage, 'like count', fatal=False))
+
return {
'id': video_id,
'display_id': display_id,
'description': description,
'thumbnail': thumbnail,
'view_count': view_count,
+ 'like_count': like_count,
'formats': formats,
'age_limit': 18,
}
from ..compat import (
compat_HTTPError,
compat_str,
+ compat_urllib_request,
)
+from .openload import PhantomJSwrapper
from ..utils import (
+ determine_ext,
ExtractorError,
int_or_none,
- js_to_json,
orderedSet,
remove_quotes,
str_to_int,
)
-class PornHubIE(InfoExtractor):
+class PornHubBaseIE(InfoExtractor):
+ def _download_webpage_handle(self, *args, **kwargs):
+ def dl(*args, **kwargs):
+ return super(PornHubBaseIE, self)._download_webpage_handle(*args, **kwargs)
+
+ webpage, urlh = dl(*args, **kwargs)
+
+ if any(re.search(p, webpage) for p in (
+ r'<body\b[^>]+\bonload=["\']go\(\)',
+ r'document\.cookie\s*=\s*["\']RNKEY=',
+ r'document\.location\.reload\(true\)')):
+ url_or_request = args[0]
+ url = (url_or_request.get_full_url()
+ if isinstance(url_or_request, compat_urllib_request.Request)
+ else url_or_request)
+ phantom = PhantomJSwrapper(self, required_version='2.0')
+ phantom.get(url, html=webpage)
+ webpage, urlh = dl(*args, **kwargs)
+
+ return webpage, urlh
+
+
+class PornHubIE(PornHubBaseIE):
IE_DESC = 'PornHub and Thumbzilla'
_VALID_URL = r'''(?x)
https?://
def dl_webpage(platform):
self._set_cookie(host, 'platform', platform)
return self._download_webpage(
- 'http://www.%s/view_video.php?viewkey=%s' % (host, video_id),
+ 'https://www.%s/view_video.php?viewkey=%s' % (host, video_id),
video_id, 'Downloading %s webpage' % platform)
webpage = dl_webpage('pc')
r'/(\d{6}/\d{2})/', video_url, 'upload data', default=None)
if upload_date:
upload_date = upload_date.replace('/', '')
+ if determine_ext(video_url) == 'mpd':
+ formats.extend(self._extract_mpd_formats(
+ video_url, video_id, mpd_id='dash', fatal=False))
+ continue
tbr = None
mobj = re.search(r'(?P<height>\d+)[pP]?_(?P<tbr>\d+)[kK]', video_url)
if mobj:
comment_count = self._extract_count(
r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment')
- page_params = self._parse_json(self._search_regex(
- r'page_params\.zoneDetails\[([\'"])[^\'"]+\1\]\s*=\s*(?P<data>{[^}]+})',
- webpage, 'page parameters', group='data', default='{}'),
- video_id, transform_source=js_to_json, fatal=False)
- tags = categories = None
- if page_params:
- tags = page_params.get('tags', '').split(',')
- categories = page_params.get('categories', '').split(',')
+ def extract_list(meta_key):
+ div = self._search_regex(
+ r'(?s)<div[^>]+\bclass=["\'].*?\b%sWrapper[^>]*>(.+?)</div>'
+ % meta_key, webpage, meta_key, default=None)
+ if div:
+ return re.findall(r'<a[^>]+\bhref=[^>]+>([^<]+)', div)
return {
'id': video_id,
'comment_count': comment_count,
'formats': formats,
'age_limit': 18,
- 'tags': tags,
- 'categories': categories,
+ 'tags': extract_list('tags'),
+ 'categories': extract_list('categories'),
'subtitles': subtitles,
}
-class PornHubPlaylistBaseIE(InfoExtractor):
+class PornHubPlaylistBaseIE(PornHubBaseIE):
def _extract_entries(self, webpage, host):
# Only process container div with main playlist content skipping
# drop-down menu that uses similar pattern for videos (see
- # https://github.com/rg3/youtube-dl/issues/11594).
+ # https://github.com/ytdl-org/youtube-dl/issues/11594).
container = self._search_regex(
r'(?s)(<div[^>]+class=["\']container.+)', webpage,
'container', default=webpage)
+++ /dev/null
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-from ..utils import (
- ExtractorError,
- sanitized_Request,
- urlencode_postdata,
-)
-
-
-class PrimeShareTVIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?primeshare\.tv/download/(?P<id>[\da-zA-Z]+)'
-
- _TEST = {
- 'url': 'http://primeshare.tv/download/238790B611',
- 'md5': 'b92d9bf5461137c36228009f31533fbc',
- 'info_dict': {
- 'id': '238790B611',
- 'ext': 'mp4',
- 'title': 'Public Domain - 1960s Commercial - Crest Toothpaste-YKsuFona',
- },
- }
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- webpage = self._download_webpage(url, video_id)
-
- if '>File not exist<' in webpage:
- raise ExtractorError('Video %s does not exist' % video_id, expected=True)
-
- fields = self._hidden_inputs(webpage)
-
- headers = {
- 'Referer': url,
- 'Content-Type': 'application/x-www-form-urlencoded',
- }
-
- wait_time = int(self._search_regex(
- r'var\s+cWaitTime\s*=\s*(\d+)',
- webpage, 'wait time', default=7)) + 1
- self._sleep(wait_time, video_id)
-
- req = sanitized_Request(
- url, urlencode_postdata(fields), headers)
- video_page = self._download_webpage(
- req, video_id, 'Downloading video page')
-
- video_url = self._search_regex(
- r"url\s*:\s*'([^']+\.primeshare\.tv(?::443)?/file/[^']+)'",
- video_page, 'video url')
-
- title = self._html_search_regex(
- r'<h1>Watch\s*(?: )?\s*\((.+?)(?:\s*\[\.\.\.\])?\)\s*(?: )?\s*<strong>',
- video_page, 'title')
-
- return {
- 'id': video_id,
- 'url': video_url,
- 'title': title,
- 'ext': 'mp4',
- }
class ProSiebenSat1BaseIE(InfoExtractor):
+ _GEO_COUNTRIES = ['DE']
+ _ACCESS_ID = None
+ _SUPPORTED_PROTOCOLS = 'dash:clear,hls:clear,progressive:clear'
+ _V4_BASE_URL = 'https://vas-v4.p7s1video.net/4.0/get'
+
def _extract_video_info(self, url, clip_id):
client_location = url
if video.get('is_protected') is True:
raise ExtractorError('This video is DRM protected.', expected=True)
- duration = float_or_none(video.get('duration'))
- source_ids = [compat_str(source['id']) for source in video['sources']]
-
- client_id = self._SALT[:2] + sha1(''.join([clip_id, self._SALT, self._TOKEN, client_location, self._SALT, self._CLIENT_NAME]).encode('utf-8')).hexdigest()
-
- sources = self._download_json(
- 'http://vas.sim-technik.de/vas/live/v2/videos/%s/sources' % clip_id,
- clip_id, 'Downloading sources JSON', query={
- 'access_token': self._TOKEN,
- 'client_id': client_id,
- 'client_location': client_location,
- 'client_name': self._CLIENT_NAME,
- })
- server_id = sources['server_id']
+ formats = []
+ if self._ACCESS_ID:
+ raw_ct = self._ENCRYPTION_KEY + clip_id + self._IV + self._ACCESS_ID
+ server_token = (self._download_json(
+ self._V4_BASE_URL + 'protocols', clip_id,
+ 'Downloading protocols JSON',
+ headers=self.geo_verification_headers(), query={
+ 'access_id': self._ACCESS_ID,
+ 'client_token': sha1((raw_ct).encode()).hexdigest(),
+ 'video_id': clip_id,
+ }, fatal=False) or {}).get('server_token')
+ if server_token:
+ urls = (self._download_json(
+ self._V4_BASE_URL + 'urls', clip_id, 'Downloading urls JSON', query={
+ 'access_id': self._ACCESS_ID,
+ 'client_token': sha1((raw_ct + server_token + self._SUPPORTED_PROTOCOLS).encode()).hexdigest(),
+ 'protocols': self._SUPPORTED_PROTOCOLS,
+ 'server_token': server_token,
+ 'video_id': clip_id,
+ }, fatal=False) or {}).get('urls') or {}
+ for protocol, variant in urls.items():
+ source_url = variant.get('clear', {}).get('url')
+ if not source_url:
+ continue
+ if protocol == 'dash':
+ formats.extend(self._extract_mpd_formats(
+ source_url, clip_id, mpd_id=protocol, fatal=False))
+ elif protocol == 'hls':
+ formats.extend(self._extract_m3u8_formats(
+ source_url, clip_id, 'mp4', 'm3u8_native',
+ m3u8_id=protocol, fatal=False))
+ else:
+ formats.append({
+ 'url': source_url,
+ 'format_id': protocol,
+ })
+ if not formats:
+ source_ids = [compat_str(source['id']) for source in video['sources']]
- def fix_bitrate(bitrate):
- bitrate = int_or_none(bitrate)
- if not bitrate:
- return None
- return (bitrate // 1000) if bitrate % 1000 == 0 else bitrate
+ client_id = self._SALT[:2] + sha1(''.join([clip_id, self._SALT, self._TOKEN, client_location, self._SALT, self._CLIENT_NAME]).encode('utf-8')).hexdigest()
- formats = []
- for source_id in source_ids:
- client_id = self._SALT[:2] + sha1(''.join([self._SALT, clip_id, self._TOKEN, server_id, client_location, source_id, self._SALT, self._CLIENT_NAME]).encode('utf-8')).hexdigest()
- urls = self._download_json(
- 'http://vas.sim-technik.de/vas/live/v2/videos/%s/sources/url' % clip_id,
- clip_id, 'Downloading urls JSON', fatal=False, query={
+ sources = self._download_json(
+ 'http://vas.sim-technik.de/vas/live/v2/videos/%s/sources' % clip_id,
+ clip_id, 'Downloading sources JSON', query={
'access_token': self._TOKEN,
'client_id': client_id,
'client_location': client_location,
'client_name': self._CLIENT_NAME,
- 'server_id': server_id,
- 'source_ids': source_id,
})
- if not urls:
- continue
- if urls.get('status_code') != 0:
- raise ExtractorError('This video is unavailable', expected=True)
- urls_sources = urls['sources']
- if isinstance(urls_sources, dict):
- urls_sources = urls_sources.values()
- for source in urls_sources:
- source_url = source.get('url')
- if not source_url:
+ server_id = sources['server_id']
+
+ def fix_bitrate(bitrate):
+ bitrate = int_or_none(bitrate)
+ if not bitrate:
+ return None
+ return (bitrate // 1000) if bitrate % 1000 == 0 else bitrate
+
+ for source_id in source_ids:
+ client_id = self._SALT[:2] + sha1(''.join([self._SALT, clip_id, self._TOKEN, server_id, client_location, source_id, self._SALT, self._CLIENT_NAME]).encode('utf-8')).hexdigest()
+ urls = self._download_json(
+ 'http://vas.sim-technik.de/vas/live/v2/videos/%s/sources/url' % clip_id,
+ clip_id, 'Downloading urls JSON', fatal=False, query={
+ 'access_token': self._TOKEN,
+ 'client_id': client_id,
+ 'client_location': client_location,
+ 'client_name': self._CLIENT_NAME,
+ 'server_id': server_id,
+ 'source_ids': source_id,
+ })
+ if not urls:
continue
- protocol = source.get('protocol')
- mimetype = source.get('mimetype')
- if mimetype == 'application/f4m+xml' or 'f4mgenerator' in source_url or determine_ext(source_url) == 'f4m':
- formats.extend(self._extract_f4m_formats(
- source_url, clip_id, f4m_id='hds', fatal=False))
- elif mimetype == 'application/x-mpegURL':
- formats.extend(self._extract_m3u8_formats(
- source_url, clip_id, 'mp4', 'm3u8_native',
- m3u8_id='hls', fatal=False))
- elif mimetype == 'application/dash+xml':
- formats.extend(self._extract_mpd_formats(
- source_url, clip_id, mpd_id='dash', fatal=False))
- else:
- tbr = fix_bitrate(source['bitrate'])
- if protocol in ('rtmp', 'rtmpe'):
- mobj = re.search(r'^(?P<url>rtmpe?://[^/]+)/(?P<path>.+)$', source_url)
- if not mobj:
- continue
- path = mobj.group('path')
- mp4colon_index = path.rfind('mp4:')
- app = path[:mp4colon_index]
- play_path = path[mp4colon_index:]
- formats.append({
- 'url': '%s/%s' % (mobj.group('url'), app),
- 'app': app,
- 'play_path': play_path,
- 'player_url': 'http://livepassdl.conviva.com/hf/ver/2.79.0.17083/LivePassModuleMain.swf',
- 'page_url': 'http://www.prosieben.de',
- 'tbr': tbr,
- 'ext': 'flv',
- 'format_id': 'rtmp%s' % ('-%d' % tbr if tbr else ''),
- })
+ if urls.get('status_code') != 0:
+ raise ExtractorError('This video is unavailable', expected=True)
+ urls_sources = urls['sources']
+ if isinstance(urls_sources, dict):
+ urls_sources = urls_sources.values()
+ for source in urls_sources:
+ source_url = source.get('url')
+ if not source_url:
+ continue
+ protocol = source.get('protocol')
+ mimetype = source.get('mimetype')
+ if mimetype == 'application/f4m+xml' or 'f4mgenerator' in source_url or determine_ext(source_url) == 'f4m':
+ formats.extend(self._extract_f4m_formats(
+ source_url, clip_id, f4m_id='hds', fatal=False))
+ elif mimetype == 'application/x-mpegURL':
+ formats.extend(self._extract_m3u8_formats(
+ source_url, clip_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False))
+ elif mimetype == 'application/dash+xml':
+ formats.extend(self._extract_mpd_formats(
+ source_url, clip_id, mpd_id='dash', fatal=False))
else:
- formats.append({
- 'url': source_url,
- 'tbr': tbr,
- 'format_id': 'http%s' % ('-%d' % tbr if tbr else ''),
- })
+ tbr = fix_bitrate(source['bitrate'])
+ if protocol in ('rtmp', 'rtmpe'):
+ mobj = re.search(r'^(?P<url>rtmpe?://[^/]+)/(?P<path>.+)$', source_url)
+ if not mobj:
+ continue
+ path = mobj.group('path')
+ mp4colon_index = path.rfind('mp4:')
+ app = path[:mp4colon_index]
+ play_path = path[mp4colon_index:]
+ formats.append({
+ 'url': '%s/%s' % (mobj.group('url'), app),
+ 'app': app,
+ 'play_path': play_path,
+ 'player_url': 'http://livepassdl.conviva.com/hf/ver/2.79.0.17083/LivePassModuleMain.swf',
+ 'page_url': 'http://www.prosieben.de',
+ 'tbr': tbr,
+ 'ext': 'flv',
+ 'format_id': 'rtmp%s' % ('-%d' % tbr if tbr else ''),
+ })
+ else:
+ formats.append({
+ 'url': source_url,
+ 'tbr': tbr,
+ 'format_id': 'http%s' % ('-%d' % tbr if tbr else ''),
+ })
self._sort_formats(formats)
return {
- 'duration': duration,
+ 'duration': float_or_none(video.get('duration')),
'formats': formats,
}
_TESTS = [
{
- # Tests changes introduced in https://github.com/rg3/youtube-dl/pull/6242
- # in response to fixing https://github.com/rg3/youtube-dl/issues/6215:
+ # Tests changes introduced in https://github.com/ytdl-org/youtube-dl/pull/6242
+ # in response to fixing https://github.com/ytdl-org/youtube-dl/issues/6215:
# - malformed f4m manifest support
# - proper handling of URLs starting with `https?://` in 2.0 manifests
# - recursive child f4m manifests extraction
_TOKEN = 'prosieben'
_SALT = '01!8d8F_)r9]4s[qeuXfP%'
_CLIENT_NAME = 'kolibri-2.0.19-splec4'
+
+ _ACCESS_ID = 'x_prosiebenmaxx-de'
+ _ENCRYPTION_KEY = 'Eeyeey9oquahthainoofashoyoikosag'
+ _IV = 'Aeluchoc6aevechuipiexeeboowedaok'
+
_CLIPID_REGEXES = [
r'"clip_id"\s*:\s+"(\d+)"',
r'clipid: "(\d+)"',
import re
from .common import InfoExtractor
+from ..compat import compat_HTTPError
from ..utils import (
- xpath_text,
- find_xpath_attr,
determine_ext,
+ ExtractorError,
int_or_none,
unified_strdate,
- xpath_element,
- ExtractorError,
- determine_protocol,
- unsmuggle_url,
)
# m3u8 download
'skip_download': True,
},
+ },
+ {
+ # with protectionType but not actually DRM protected
+ 'url': 'radiocanada:toutv:140872',
+ 'info_dict': {
+ 'id': '140872',
+ 'title': 'Épisode 1',
+ 'series': 'District 31',
+ },
+ 'only_matching': True,
}
]
+ _GEO_COUNTRIES = ['CA']
+ _access_token = None
+ _claims = None
- def _real_extract(self, url):
- url, smuggled_data = unsmuggle_url(url, {})
- app_code, video_id = re.match(self._VALID_URL, url).groups()
-
- metadata = self._download_xml(
- 'http://api.radio-canada.ca/metaMedia/v1/index.ashx',
- video_id, note='Downloading metadata XML', query={
+ def _call_api(self, path, video_id=None, app_code=None, query=None):
+ if not query:
+ query = {}
+ query.update({
+ 'client_key': '773aea60-0e80-41bb-9c7f-e6d7c3ad17fb',
+ 'output': 'json',
+ })
+ if video_id:
+ query.update({
'appCode': app_code,
'idMedia': video_id,
})
+ if self._access_token:
+ query['access_token'] = self._access_token
+ try:
+ return self._download_json(
+ 'https://services.radio-canada.ca/media/' + path, video_id, query=query)
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code in (401, 422):
+ data = self._parse_json(e.cause.read().decode(), None)
+ error = data.get('error_description') or data['errorMessage']['text']
+ raise ExtractorError(error, expected=True)
+ raise
+
+ def _extract_info(self, app_code, video_id):
+ metas = self._call_api('meta/v1/index.ashx', video_id, app_code)['Metas']
def get_meta(name):
- el = find_xpath_attr(metadata, './/Meta', 'name', name)
- return el.text if el is not None else None
+ for meta in metas:
+ if meta.get('name') == name:
+ text = meta.get('text')
+ if text:
+ return text
+ # protectionType does not necessarily mean the video is DRM protected (see
+ # https://github.com/ytdl-org/youtube-dl/pull/18609).
if get_meta('protectionType'):
- raise ExtractorError('This video is DRM protected.', expected=True)
-
- device_types = ['ipad']
- if not smuggled_data:
- device_types.append('flash')
- device_types.append('android')
+ self.report_warning('This video is probably DRM protected.')
- formats = []
- error = None
- # TODO: extract f4m formats
- # f4m formats can be extracted using flashhd device_type but they produce unplayable file
- for device_type in device_types:
- validation_url = 'http://api.radio-canada.ca/validationMedia/v1/Validation.ashx'
- query = {
- 'appCode': app_code,
- 'idMedia': video_id,
- 'connectionType': 'broadband',
- 'multibitrate': 'true',
- 'deviceType': device_type,
- }
- if smuggled_data:
- validation_url = 'https://services.radio-canada.ca/media/validation/v2/'
- query.update(smuggled_data)
- else:
- query.update({
- # paysJ391wsHjbOJwvCs26toz and bypasslock are used to bypass geo-restriction
- 'paysJ391wsHjbOJwvCs26toz': 'CA',
- 'bypasslock': 'NZt5K62gRqfc',
- })
- v_data = self._download_xml(validation_url, video_id, note='Downloading %s XML' % device_type, query=query, fatal=False)
- v_url = xpath_text(v_data, 'url')
- if not v_url:
- continue
- if v_url == 'null':
- error = xpath_text(v_data, 'message')
- continue
- ext = determine_ext(v_url)
- if ext == 'm3u8':
- formats.extend(self._extract_m3u8_formats(
- v_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
- elif ext == 'f4m':
- formats.extend(self._extract_f4m_formats(
- v_url, video_id, f4m_id='hds', fatal=False))
- else:
- ext = determine_ext(v_url)
- bitrates = xpath_element(v_data, 'bitrates')
- for url_e in bitrates.findall('url'):
- tbr = int_or_none(url_e.get('bitrate'))
- if not tbr:
- continue
- f_url = re.sub(r'\d+\.%s' % ext, '%d.%s' % (tbr, ext), v_url)
- protocol = determine_protocol({'url': f_url})
- f = {
- 'format_id': '%s-%d' % (protocol, tbr),
- 'url': f_url,
- 'ext': 'flv' if protocol == 'rtmp' else ext,
- 'protocol': protocol,
- 'width': int_or_none(url_e.get('width')),
- 'height': int_or_none(url_e.get('height')),
- 'tbr': tbr,
- }
- mobj = re.match(r'(?P<url>rtmp://[^/]+/[^/]+)/(?P<playpath>[^?]+)(?P<auth>\?.+)', f_url)
- if mobj:
- f.update({
- 'url': mobj.group('url') + mobj.group('auth'),
- 'play_path': mobj.group('playpath'),
- })
- formats.append(f)
- if protocol == 'rtsp':
- base_url = self._search_regex(
- r'rtsp://([^?]+)', f_url, 'base url', default=None)
- if base_url:
- base_url = 'http://' + base_url
- formats.extend(self._extract_m3u8_formats(
- base_url + '/playlist.m3u8', video_id, 'mp4',
- 'm3u8_native', m3u8_id='hls', fatal=False))
- formats.extend(self._extract_f4m_formats(
- base_url + '/manifest.f4m', video_id,
- f4m_id='hds', fatal=False))
- if not formats and error:
+ query = {
+ 'connectionType': 'hd',
+ 'deviceType': 'ipad',
+ 'multibitrate': 'true',
+ }
+ if self._claims:
+ query['claims'] = self._claims
+ v_data = self._call_api('validation/v2/', video_id, app_code, query)
+ v_url = v_data.get('url')
+ if not v_url:
+ error = v_data['message']
+ if error == "Le contenu sélectionné n'est pas disponible dans votre pays":
+ raise self.raise_geo_restricted(error, self._GEO_COUNTRIES)
+ if error == 'Le contenu sélectionné est disponible seulement en premium':
+ self.raise_login_required(error)
raise ExtractorError(
'%s said: %s' % (self.IE_NAME, error), expected=True)
+ formats = self._extract_m3u8_formats(v_url, video_id, 'mp4')
self._sort_formats(formats)
subtitles = {}
'formats': formats,
}
+ def _real_extract(self, url):
+ return self._extract_info(*re.match(self._VALID_URL, url).groups())
+
class RadioCanadaAudioVideoIE(InfoExtractor):
- 'radiocanada:audiovideo'
- _VALID_URL = r'https?://ici\.radio-canada\.ca/audio-video/media-(?P<id>[0-9]+)'
- _TEST = {
+ IE_NAME = 'radiocanada:audiovideo'
+ _VALID_URL = r'https?://ici\.radio-canada\.ca/([^/]+/)*media-(?P<id>[0-9]+)'
+ _TESTS = [{
'url': 'http://ici.radio-canada.ca/audio-video/media-7527184/barack-obama-au-vietnam',
'info_dict': {
'id': '7527184',
# m3u8 download
'skip_download': True,
},
- }
+ }, {
+ 'url': 'https://ici.radio-canada.ca/info/videos/media-7527184/barack-obama-au-vietnam',
+ 'only_matching': True,
+ }]
def _real_extract(self, url):
return self.url_result('radiocanada:medianet:%s' % self._match_id(url))
if (ext == 'm3u8' and platform != 'mon') or (ext == 'f4m' and platform != 'flash'):
continue
- if ext == 'm3u8':
+ if ext == 'm3u8' or 'format=m3u8' in media_url or platform == 'mon':
formats.extend(self._extract_m3u8_formats(
media_url, video_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False))
- elif ext == 'f4m':
+ elif ext == 'f4m' or platform == 'flash':
manifest_url = update_url_query(
media_url.replace('manifest#live_hds.f4m', 'manifest.f4m'),
{'hdcore': '3.7.0', 'plugin': 'aasp-3.7.0.39.44'})
class RaiIE(RaiBaseIE):
- _VALID_URL = r'https?://[^/]+\.(?:rai\.(?:it|tv)|rainews\.it)/dl/.+?-(?P<id>%s)(?:-.+?)?\.html' % RaiBaseIE._UUID_RE
+ _VALID_URL = r'https?://[^/]+\.(?:rai\.(?:it|tv)|rainews\.it)/.+?-(?P<id>%s)(?:-.+?)?\.html' % RaiBaseIE._UUID_RE
_TESTS = [{
# var uniquename = "ContentItem-..."
# data-id="ContentItem-..."
# Direct MMS URL
'url': 'http://www.rai.it/dl/RaiTV/programmi/media/ContentItem-b63a4089-ac28-48cf-bca5-9f5b5bc46df5.html',
'only_matching': True,
+ }, {
+ 'url': 'https://www.rainews.it/tgr/marche/notiziari/video/2019/02/ContentItem-6ba945a2-889c-4a80-bdeb-8489c70a8db9.html',
+ 'only_matching': True,
}]
def _extract_from_content_id(self, content_id, url):
class RedBullTVIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?redbull(?:\.tv|\.com/(?:[^/]+/)?tv)/video/(?P<id>AP-\w+)'
+ _VALID_URL = r'https?://(?:www\.)?redbull(?:\.tv|\.com(?:/[^/]+)?(?:/tv)?)(?:/events/[^/]+)?/(?:videos?|live)/(?P<id>AP-\w+)'
_TESTS = [{
# film
'url': 'https://www.redbull.tv/video/AP-1Q6XCDTAN1W11',
}, {
'url': 'https://www.redbull.com/int-en/tv/video/AP-1UWHCAR9S1W11/rob-meets-sam-gaze?playlist=playlists::3f81040a-2f31-4832-8e2e-545b1d39d173',
'only_matching': True,
+ }, {
+ 'url': 'https://www.redbull.com/us-en/videos/AP-1YM9QCYE52111',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.redbull.com/us-en/events/AP-1XV2K61Q51W11/live/AP-1XUJ86FDH1W11',
+ 'only_matching': True,
}]
def _real_extract(self, url):
'formats': formats,
'subtitles': subtitles,
}
+
+
+class RedBullTVRrnContentIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?redbull(?:\.tv|\.com(?:/[^/]+)?(?:/tv)?)/(?:video|live)/rrn:content:[^:]+:(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
+ _TESTS = [{
+ 'url': 'https://www.redbull.com/int-en/tv/video/rrn:content:live-videos:e3e6feb4-e95f-50b7-962a-c70f8fd13c73/mens-dh-finals-fort-william',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.redbull.com/int-en/tv/video/rrn:content:videos:a36a0f36-ff1b-5db8-a69d-ee11a14bf48b/tn-ts-style?playlist=rrn:content:event-profiles:83f05926-5de8-5389-b5e4-9bb312d715e8:extras',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, display_id)
+
+ video_url = self._og_search_url(webpage)
+
+ return self.url_result(
+ video_url, ie=RedBullTVIE.ie_key(),
+ video_id=RedBullTVIE._match_id(video_url))
ExtractorError,
int_or_none,
float_or_none,
+ url_or_none,
)
'_type': 'url_transparent',
'url': video_url,
'title': data.get('title'),
- 'thumbnail': data.get('thumbnail'),
+ 'thumbnail': url_or_none(data.get('thumbnail')),
'timestamp': float_or_none(data.get('created_utc')),
'uploader': data.get('author'),
'like_count': int_or_none(data.get('ups')),
class RTL2IE(InfoExtractor):
IE_NAME = 'rtl2'
- _VALID_URL = r'http?://(?:www\.)?rtl2\.de/[^?#]*?/(?P<id>[^?#/]*?)(?:$|/(?:$|[?#]))'
+ _VALID_URL = r'https?://(?:www\.)?rtl2\.de/sendung/[^/]+/(?:video/(?P<vico_id>\d+)[^/]+/(?P<vivi_id>\d+)-|folge/)(?P<id>[^/?#]+)'
_TESTS = [{
'url': 'http://www.rtl2.de/sendung/grip-das-motormagazin/folge/folge-203-0',
'info_dict': {
# rtmp download
'skip_download': True,
},
+ 'expected_warnings': ['Unable to download f4m manifest', 'Failed to download m3u8 information'],
}, {
'url': 'http://www.rtl2.de/sendung/koeln-50667/video/5512-anna/21040-anna-erwischt-alex/',
'info_dict': {
- 'id': '21040-anna-erwischt-alex',
+ 'id': 'anna-erwischt-alex',
'ext': 'mp4',
'title': 'Anna erwischt Alex!',
'description': 'Anna nimmt ihrem Vater nicht ab, dass er nicht spielt. Und tatsächlich erwischt sie ihn auf frischer Tat.'
# rtmp download
'skip_download': True,
},
+ 'expected_warnings': ['Unable to download f4m manifest', 'Failed to download m3u8 information'],
}]
def _real_extract(self, url):
- # Some rtl2 urls have no slash at the end, so append it.
- if not url.endswith('/'):
- url += '/'
-
- video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
-
- mobj = re.search(
- r'<div[^>]+data-collection="(?P<vico_id>\d+)"[^>]+data-video="(?P<vivi_id>\d+)"',
- webpage)
- if mobj:
- vico_id = mobj.group('vico_id')
- vivi_id = mobj.group('vivi_id')
- else:
- vico_id = self._html_search_regex(
- r'vico_id\s*:\s*([0-9]+)', webpage, 'vico_id')
- vivi_id = self._html_search_regex(
- r'vivi_id\s*:\s*([0-9]+)', webpage, 'vivi_id')
+ vico_id, vivi_id, display_id = re.match(self._VALID_URL, url).groups()
+ if not vico_id:
+ webpage = self._download_webpage(url, display_id)
+
+ mobj = re.search(
+ r'data-collection="(?P<vico_id>\d+)"[^>]+data-video="(?P<vivi_id>\d+)"',
+ webpage)
+ if mobj:
+ vico_id = mobj.group('vico_id')
+ vivi_id = mobj.group('vivi_id')
+ else:
+ vico_id = self._html_search_regex(
+ r'vico_id\s*:\s*([0-9]+)', webpage, 'vico_id')
+ vivi_id = self._html_search_regex(
+ r'vivi_id\s*:\s*([0-9]+)', webpage, 'vivi_id')
info = self._download_json(
- 'http://www.rtl2.de/sites/default/modules/rtl2/mediathek/php/get_video_jw.php',
- video_id, query={
+ 'https://service.rtl2.de/api-player-vipo/video.php',
+ display_id, query={
'vico_id': vico_id,
'vivi_id': vivi_id,
})
'format_id': 'rtmp',
'url': rtmp_url,
'play_path': stream_url,
- 'player_url': 'http://www.rtl2.de/flashplayer/vipo_player.swf',
+ 'player_url': 'https://www.rtl2.de/sites/default/modules/rtl2/jwplayer/jwplayer-7.6.0/jwplayer.flash.swf',
'page_url': url,
'flash_version': 'LNX 11,2,202,429',
'rtmp_conn': rtmp_conn,
m3u8_url = video_info.get('streamurl_hls')
if m3u8_url:
- formats.extend(self._extract_akamai_formats(m3u8_url, video_id))
+ formats.extend(self._extract_akamai_formats(m3u8_url, display_id))
self._sort_formats(formats)
return {
- 'id': video_id,
+ 'id': display_id,
'title': title,
'thumbnail': video_info.get('image'),
'description': video_info.get('beschreibung'),
'description': 'Er zijn nieuwe beelden vrijgegeven die vlak na de aanslag in Kopenhagen zijn gemaakt. Op de video is goed te zien hoe omstanders zich bekommeren om één van de slachtoffers, terwijl de eerste agenten ter plaatse komen.',
}
}, {
- # empty synopsis and missing episodes (see https://github.com/rg3/youtube-dl/issues/6275)
+ # empty synopsis and missing episodes (see https://github.com/ytdl-org/youtube-dl/issues/6275)
# best format available nettv
'url': 'http://www.rtl.nl/system/videoplayer/derden/rtlnieuws/video_embed.html#uuid=f536aac0-1dc3-4314-920e-3bd1c5b3811a/autoplay=false',
'info_dict': {
# coding: utf-8
from __future__ import unicode_literals
-import re
-
from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ js_to_json,
+)
class RTPIE(InfoExtractor):
'description': 'As paixões musicais de António Cartaxo e António Macedo',
'thumbnail': r're:^https?://.*\.jpg',
},
- 'params': {
- # rtmp download
- 'skip_download': True,
- },
}, {
'url': 'http://www.rtp.pt/play/p831/a-quimica-das-coisas',
'only_matching': True,
webpage = self._download_webpage(url, video_id)
title = self._html_search_meta(
'twitter:title', webpage, display_name='title', fatal=True)
- description = self._html_search_meta('description', webpage)
- thumbnail = self._og_search_thumbnail(webpage)
-
- player_config = self._search_regex(
- r'(?s)RTPPLAY\.player\.newPlayer\(\s*(\{.*?\})\s*\)', webpage, 'player config')
- config = self._parse_json(player_config, video_id)
-
- path, ext = config.get('file').rsplit('.', 1)
- formats = [{
- 'format_id': 'rtmp',
- 'ext': ext,
- 'vcodec': config.get('type') == 'audio' and 'none' or None,
- 'preference': -2,
- 'url': 'rtmp://{streamer:s}/{application:s}'.format(**config),
- 'app': config.get('application'),
- 'play_path': '{ext:s}:{path:s}'.format(ext=ext, path=path),
- 'page_url': url,
- 'rtmp_live': config.get('live', False),
- 'player_url': 'http://programas.rtp.pt/play/player.swf?v3',
- 'rtmp_real_time': True,
- }]
-
- # Construct regular HTTP download URLs
- replacements = {
- 'audio': {
- 'format_id': 'mp3',
- 'pattern': r'^nas2\.share/wavrss/',
- 'repl': 'http://rsspod.rtp.pt/podcasts/',
- 'vcodec': 'none',
- },
- 'video': {
- 'format_id': 'mp4_h264',
- 'pattern': r'^nas2\.share/h264/',
- 'repl': 'http://rsspod.rtp.pt/videocasts/',
- 'vcodec': 'h264',
- },
- }
- r = replacements[config['type']]
- if re.match(r['pattern'], config['file']) is not None:
- formats.append({
- 'format_id': r['format_id'],
- 'url': re.sub(r['pattern'], r['repl'], config['file']),
- 'vcodec': r['vcodec'],
- })
- self._sort_formats(formats)
+ config = self._parse_json(self._search_regex(
+ r'(?s)RTPPlayer\(({.+?})\);', webpage,
+ 'player config'), video_id, js_to_json)
+ file_url = config['file']
+ ext = determine_ext(file_url)
+ if ext == 'm3u8':
+ file_key = config.get('fileKey')
+ formats = self._extract_m3u8_formats(
+ file_url, video_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=file_key)
+ if file_key:
+ formats.append({
+ 'url': 'https://cdn-ondemand.rtp.pt' + file_key,
+ 'preference': 1,
+ })
+ self._sort_formats(formats)
+ else:
+ formats = [{
+ 'url': file_url,
+ 'ext': ext,
+ }]
+ if config.get('mediaType') == 'audio':
+ for f in formats:
+ f['vcodec'] = 'none'
return {
'id': video_id,
'title': title,
'formats': formats,
- 'description': description,
- 'thumbnail': thumbnail,
+ 'description': self._html_search_meta(['description', 'twitter:description'], webpage),
+ 'thumbnail': config.get('poster') or self._og_search_thumbnail(webpage),
}
+++ /dev/null
-from __future__ import unicode_literals
-
-from .nuevo import NuevoBaseIE
-
-
-class RulePornIE(NuevoBaseIE):
- _VALID_URL = r'https?://(?:www\.)?ruleporn\.com/(?:[^/?#&]+/)*(?P<id>[^/?#&]+)'
- _TEST = {
- 'url': 'http://ruleporn.com/brunette-nympho-chick-takes-her-boyfriend-in-every-angle/',
- 'md5': '86861ebc624a1097c7c10eaf06d7d505',
- 'info_dict': {
- 'id': '48212',
- 'display_id': 'brunette-nympho-chick-takes-her-boyfriend-in-every-angle',
- 'ext': 'mp4',
- 'title': 'Brunette Nympho Chick Takes Her Boyfriend In Every Angle',
- 'description': 'md5:6d28be231b981fff1981deaaa03a04d5',
- 'age_limit': 18,
- 'duration': 635.1,
- }
- }
-
- def _real_extract(self, url):
- display_id = self._match_id(url)
-
- webpage = self._download_webpage(url, display_id)
-
- video_id = self._search_regex(
- r'lovehomeporn\.com/embed/(\d+)', webpage, 'video id')
-
- title = self._search_regex(
- r'<h2[^>]+title=(["\'])(?P<url>.+?)\1',
- webpage, 'title', group='url')
- description = self._html_search_meta('description', webpage)
-
- info = self._extract_nuevo(
- 'http://lovehomeporn.com/media/nuevo/econfig.php?key=%s&rp=true' % video_id,
- video_id)
- info.update({
- 'display_id': display_id,
- 'title': title,
- 'description': description,
- 'age_limit': 18
- })
- return info
class RutubeBaseIE(InfoExtractor):
- def _extract_video(self, video, video_id=None, require_title=True):
+ def _download_api_info(self, video_id, query=None):
+ if not query:
+ query = {}
+ query['format'] = 'json'
+ return self._download_json(
+ 'http://rutube.ru/api/video/%s/' % video_id,
+ video_id, 'Downloading video JSON',
+ 'Unable to download video JSON', query=query)
+
+ @staticmethod
+ def _extract_info(video, video_id=None, require_title=True):
title = video['title'] if require_title else video.get('title')
age_limit = video.get('is_adult')
category = try_get(video, lambda x: x['category']['name'])
return {
- 'id': video.get('id') or video_id,
+ 'id': video.get('id') or video_id if video_id else video['id'],
'title': title,
'description': video.get('description'),
'thumbnail': video.get('thumbnail_url'),
'is_live': bool_or_none(video.get('is_livestream')),
}
+ def _download_and_extract_info(self, video_id, query=None):
+ return self._extract_info(
+ self._download_api_info(video_id, query=query), video_id)
+
+ def _download_api_options(self, video_id, query=None):
+ if not query:
+ query = {}
+ query['format'] = 'json'
+ return self._download_json(
+ 'http://rutube.ru/api/play/options/%s/' % video_id,
+ video_id, 'Downloading options JSON',
+ 'Unable to download options JSON',
+ headers=self.geo_verification_headers(), query=query)
+
+ def _extract_formats(self, options, video_id):
+ formats = []
+ for format_id, format_url in options['video_balancer'].items():
+ ext = determine_ext(format_url)
+ if ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ format_url, video_id, 'mp4', m3u8_id=format_id, fatal=False))
+ elif ext == 'f4m':
+ formats.extend(self._extract_f4m_formats(
+ format_url, video_id, f4m_id=format_id, fatal=False))
+ else:
+ formats.append({
+ 'url': format_url,
+ 'format_id': format_id,
+ })
+ self._sort_formats(formats)
+ return formats
+
+ def _download_and_extract_formats(self, video_id, query=None):
+ return self._extract_formats(
+ self._download_api_options(video_id, query=query), video_id)
+
class RutubeIE(RutubeBaseIE):
IE_NAME = 'rutube'
_TESTS = [{
'url': 'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/',
- 'md5': '79938ade01294ef7e27574890d0d3769',
+ 'md5': '1d24f180fac7a02f3900712e5a5764d6',
'info_dict': {
'id': '3eac3b4561676c17df9132a9a1e62e3e',
- 'ext': 'flv',
+ 'ext': 'mp4',
'title': 'Раненный кенгуру забежал в аптеку',
'description': 'http://www.ntdtv.ru ',
- 'duration': 80,
+ 'duration': 81,
'uploader': 'NTDRussian',
'uploader_id': '29790',
'timestamp': 1381943602,
def _real_extract(self, url):
video_id = self._match_id(url)
-
- video = self._download_json(
- 'http://rutube.ru/api/video/%s/?format=json' % video_id,
- video_id, 'Downloading video JSON')
-
- info = self._extract_video(video, video_id)
-
- options = self._download_json(
- 'http://rutube.ru/api/play/options/%s/?format=json' % video_id,
- video_id, 'Downloading options JSON',
- headers=self.geo_verification_headers())
-
- formats = []
- for format_id, format_url in options['video_balancer'].items():
- ext = determine_ext(format_url)
- if ext == 'm3u8':
- formats.extend(self._extract_m3u8_formats(
- format_url, video_id, 'mp4', m3u8_id=format_id, fatal=False))
- elif ext == 'f4m':
- formats.extend(self._extract_f4m_formats(
- format_url, video_id, f4m_id=format_id, fatal=False))
- else:
- formats.append({
- 'url': format_url,
- 'format_id': format_id,
- })
- self._sort_formats(formats)
-
- info['formats'] = formats
+ info = self._download_and_extract_info(video_id)
+ info['formats'] = self._download_and_extract_formats(video_id)
return info
-class RutubeEmbedIE(InfoExtractor):
+class RutubeEmbedIE(RutubeBaseIE):
IE_NAME = 'rutube:embed'
IE_DESC = 'Rutube embedded videos'
_VALID_URL = r'https?://rutube\.ru/(?:video|play)/embed/(?P<id>[0-9]+)'
'url': 'http://rutube.ru/video/embed/6722881?vk_puid37=&vk_puid38=',
'info_dict': {
'id': 'a10e53b86e8f349080f718582ce4c661',
- 'ext': 'flv',
+ 'ext': 'mp4',
'timestamp': 1387830582,
'upload_date': '20131223',
'uploader_id': '297833',
}, {
'url': 'http://rutube.ru/play/embed/8083783',
'only_matching': True,
+ }, {
+ # private video
+ 'url': 'https://rutube.ru/play/embed/10631925?p=IbAigKqWd1do4mjaM5XLIQ',
+ 'only_matching': True,
}]
def _real_extract(self, url):
embed_id = self._match_id(url)
- webpage = self._download_webpage(url, embed_id)
-
- canonical_url = self._html_search_regex(
- r'<link\s+rel="canonical"\s+href="([^"]+?)"', webpage,
- 'Canonical URL')
- return self.url_result(canonical_url, RutubeIE.ie_key())
+ # Query may contain private videos token and should be passed to API
+ # requests (see #19163)
+ query = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
+ options = self._download_api_options(embed_id, query)
+ video_id = options['effective_video']
+ formats = self._extract_formats(options, video_id)
+ info = self._download_and_extract_info(video_id, query)
+ info.update({
+ 'extractor_key': 'Rutube',
+ 'formats': formats,
+ })
+ return info
class RutubePlaylistBaseIE(RutubeBaseIE):
video_url = url_or_none(result.get('video_url'))
if not video_url:
continue
- entry = self._extract_video(result, require_title=False)
+ entry = self._extract_info(result, require_title=False)
entry.update({
'_type': 'url',
'url': video_url,
'url': 'http://www.ruutu.fi/video/3193728',
'only_matching': True,
},
+ {
+ # audio podcast
+ 'url': 'https://www.supla.fi/supla/3382410',
+ 'md5': 'b9d7155fed37b2ebf6021d74c4b8e908',
+ 'info_dict': {
+ 'id': '3382410',
+ 'ext': 'mp3',
+ 'title': 'Mikä ihmeen poltergeist?',
+ 'description': 'md5:bbb6963df17dfd0ecd9eb9a61bf14b52',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'age_limit': 0,
+ },
+ 'expected_warnings': ['HTTP Error 502: Bad Gateway'],
+ }
]
def _real_extract(self, url):
extract_formats(child)
elif child.tag.endswith('File'):
video_url = child.text
- if (not video_url or video_url in processed_urls or
- any(p in video_url for p in ('NOT_USED', 'NOT-USED'))):
+ if (not video_url or video_url in processed_urls
+ or any(p in video_url for p in ('NOT_USED', 'NOT-USED'))):
continue
processed_urls.append(video_url)
ext = determine_ext(video_url)
continue
formats.extend(self._extract_mpd_formats(
video_url, video_id, mpd_id='dash', fatal=False))
+ elif ext == 'mp3' or child.tag == 'AudioMediaFile':
+ formats.append({
+ 'format_id': 'audio',
+ 'url': video_url,
+ 'vcodec': 'none',
+ })
else:
proto = compat_urllib_parse_urlparse(video_url).scheme
if not child.tag.startswith('HTTP') and proto != 'rtmp':
# coding: utf-8
from __future__ import unicode_literals
+import json
import re
from .common import InfoExtractor
+from ..compat import (
+ compat_parse_qs,
+ compat_str,
+ compat_urlparse,
+)
from ..utils import (
ExtractorError,
- sanitized_Request,
- std_headers,
- urlencode_postdata,
update_url_query,
)
if username is None:
return
- headers = std_headers.copy()
- if 'Referer' not in headers:
- headers['Referer'] = self._LOGIN_URL
-
- login_page = self._download_webpage(
- self._LOGIN_URL, None, 'Downloading login form', headers=headers)
+ _, urlh = self._download_webpage_handle(
+ 'https://learning.oreilly.com/accounts/login-check/', None,
+ 'Downloading login page')
- def is_logged(webpage):
- return any(re.search(p, webpage) for p in (
- r'href=["\']/accounts/logout/', r'>Sign Out<'))
+ def is_logged(urlh):
+ return 'learning.oreilly.com/home/' in compat_str(urlh.geturl())
- if is_logged(login_page):
+ if is_logged(urlh):
self.LOGGED_IN = True
return
- csrf = self._html_search_regex(
- r"name='csrfmiddlewaretoken'\s+value='([^']+)'",
- login_page, 'csrf token')
+ redirect_url = compat_str(urlh.geturl())
+ parsed_url = compat_urlparse.urlparse(redirect_url)
+ qs = compat_parse_qs(parsed_url.query)
+ next_uri = compat_urlparse.urljoin(
+ 'https://api.oreilly.com', qs['next'][0])
+
+ auth, urlh = self._download_json_handle(
+ 'https://www.oreilly.com/member/auth/login/', None, 'Logging in',
+ data=json.dumps({
+ 'email': username,
+ 'password': password,
+ 'redirect_uri': next_uri,
+ }).encode(), headers={
+ 'Content-Type': 'application/json',
+ 'Referer': redirect_url,
+ }, expected_status=400)
+
+ credentials = auth.get('credentials')
+ if (not auth.get('logged_in') and not auth.get('redirect_uri')
+ and credentials):
+ raise ExtractorError(
+ 'Unable to login: %s' % credentials, expected=True)
- login_form = {
- 'csrfmiddlewaretoken': csrf,
- 'email': username,
- 'password1': password,
- 'login': 'Sign In',
- 'next': '',
- }
+ # oreilly serves two same groot_sessionid cookies in Set-Cookie header
+ # and expects first one to be actually set
+ self._apply_first_set_cookie_header(urlh, 'groot_sessionid')
- request = sanitized_Request(
- self._LOGIN_URL, urlencode_postdata(login_form), headers=headers)
- login_page = self._download_webpage(
- request, None, 'Logging in')
+ _, urlh = self._download_webpage_handle(
+ auth.get('redirect_uri') or next_uri, None, 'Completing login',)
- if not is_logged(login_page):
- raise ExtractorError(
- 'Login failed; make sure your credentials are correct and try again.',
- expected=True)
+ if is_logged(urlh):
+ self.LOGGED_IN = True
+ return
- self.LOGGED_IN = True
+ raise ExtractorError('Unable to log in')
class SafariIE(SafariBaseIE):
IE_DESC = 'safaribooksonline.com online video'
_VALID_URL = r'''(?x)
https?://
- (?:www\.)?(?:safaribooksonline|learning\.oreilly)\.com/
+ (?:www\.)?(?:safaribooksonline|(?:learning\.)?oreilly)\.com/
(?:
library/view/[^/]+/(?P<course_id>[^/]+)/(?P<part>[^/?\#&]+)\.html|
videos/[^/]+/[^/]+/(?P<reference_id>[^-]+-[^/?\#&]+)
}, {
'url': 'https://learning.oreilly.com/videos/hadoop-fundamentals-livelessons/9780133392838/9780133392838-00_SeriesIntro',
'only_matching': True,
+ }, {
+ 'url': 'https://www.oreilly.com/library/view/hadoop-fundamentals-livelessons/9780133392838/00_SeriesIntro.html',
+ 'only_matching': True,
}]
_PARTNER_ID = '1926081'
class SafariApiIE(SafariBaseIE):
IE_NAME = 'safari:api'
- _VALID_URL = r'https?://(?:www\.)?(?:safaribooksonline|learning\.oreilly)\.com/api/v1/book/(?P<course_id>[^/]+)/chapter(?:-content)?/(?P<part>[^/?#&]+)\.html'
+ _VALID_URL = r'https?://(?:www\.)?(?:safaribooksonline|(?:learning\.)?oreilly)\.com/api/v1/book/(?P<course_id>[^/]+)/chapter(?:-content)?/(?P<part>[^/?#&]+)\.html'
_TESTS = [{
'url': 'https://www.safaribooksonline.com/api/v1/book/9780133392838/chapter/part00.html',
_VALID_URL = r'''(?x)
https?://
(?:
- (?:www\.)?(?:safaribooksonline|learning\.oreilly)\.com/
+ (?:www\.)?(?:safaribooksonline|(?:learning\.)?oreilly)\.com/
(?:
library/view/[^/]+|
api/v1/book|
}, {
'url': 'https://learning.oreilly.com/videos/hadoop-fundamentals-livelessons/9780133392838',
'only_matching': True,
+ }, {
+ 'url': 'https://www.oreilly.com/library/view/hadoop-fundamentals-livelessons/9780133392838/',
+ 'only_matching': True,
}]
@classmethod
raise ExtractorError('%s said: %s' % (self.IE_NAME, error_message), expected=True)
urls = player_params['releaseUrls']
- theplatform_url = (urls.get('progressive') or urls.get('html') or
- urls.get('standard') or player_params['relatedItemsURL'])
+ theplatform_url = (urls.get('progressive') or urls.get('html')
+ or urls.get('standard') or player_params['relatedItemsURL'])
return {
'_type': 'url_transparent',
# coding: utf-8
from __future__ import unicode_literals
+import re
+
from .common import InfoExtractor
class ServusIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?servus\.com/(?:at|de)/p/[^/]+/(?P<id>AA-\w+|\d+-\d+)'
+ _VALID_URL = r'https?://(?:www\.)?servus\.com/(?:(?:at|de)/p/[^/]+|tv/videos)/(?P<id>[aA]{2}-\w+|\d+-\d+)'
_TESTS = [{
'url': 'https://www.servus.com/de/p/Die-Gr%C3%BCnen-aus-Sicht-des-Volkes/AA-1T6VBU5PW1W12/',
- 'md5': '046dee641cda1c4cabe13baef3be2c1c',
+ 'md5': '3e1dd16775aa8d5cbef23628cfffc1f4',
'info_dict': {
'id': 'AA-1T6VBU5PW1W12',
'ext': 'mp4',
- 'title': 'Die Grünen aus Volkssicht',
- 'description': 'md5:052b5da1cb2cd7d562ef1f19be5a5cba',
- 'thumbnail': r're:^https?://.*\.jpg$',
+ 'title': 'Die Grünen aus Sicht des Volkes',
+ 'description': 'md5:1247204d85783afe3682644398ff2ec4',
+ 'thumbnail': r're:^https?://.*\.jpg',
}
}, {
'url': 'https://www.servus.com/at/p/Wie-das-Leben-beginnt/1309984137314-381415152/',
'only_matching': True,
+ }, {
+ 'url': 'https://www.servus.com/tv/videos/aa-1t6vbu5pw1w12/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.servus.com/tv/videos/1380889096408-1235196658/',
+ 'only_matching': True,
}]
def _real_extract(self, url):
- video_id = self._match_id(url)
+ video_id = self._match_id(url).upper()
webpage = self._download_webpage(url, video_id)
- title = self._og_search_title(webpage)
+ title = self._search_regex(
+ (r'videoLabel\s*=\s*(["\'])(?P<title>(?:(?!\1).)+)\1',
+ r'<h\d+[^>]+\bclass=["\']heading--(?:one|two)["\'][^>]*>(?P<title>[^<]+)'),
+ webpage, 'title', default=None,
+ group='title') or self._og_search_title(webpage)
+ title = re.sub(r'\s*-\s*Servus TV\s*$', '', title)
description = self._og_search_description(webpage)
thumbnail = self._og_search_thumbnail(webpage)
from .common import InfoExtractor
from ..compat import compat_b64decode
from ..utils import (
+ determine_ext,
ExtractorError,
int_or_none,
+ KNOWN_EXTENSIONS,
+ parse_filesize,
url_or_none,
urlencode_postdata,
)
video_url = self._extract_video_url(webpage, video_id, url)
- title = compat_b64decode(self._html_search_meta(
- 'full:title', webpage, 'title')).decode('utf-8')
- filesize = int_or_none(self._html_search_meta(
- 'full:size', webpage, 'file size', fatal=False))
+ title = self._extract_title(webpage)
+ filesize = int_or_none(self._extract_filesize(webpage))
return {
'id': video_id,
'title': title,
}
+ def _extract_title(self, webpage):
+ return compat_b64decode(self._html_search_meta(
+ 'full:title', webpage, 'title')).decode('utf-8')
+
+ def _extract_filesize(self, webpage):
+ return self._html_search_meta(
+ 'full:size', webpage, 'file size', fatal=False)
+
class SharedIE(SharedBaseIE):
IE_DESC = 'shared.sx'
'id': 'd7ddda0e78',
'ext': 'mp4',
'title': 'Chicken',
- 'filesize': 528031,
+ 'filesize': 515659,
},
}
- def _extract_video_url(self, webpage, video_id, *args):
+ def _extract_title(self, webpage):
+ title = self._html_search_regex(
+ r'data-name\s*=\s*(["\'])(?P<title>(?:(?!\1).)+)\1', webpage,
+ 'title', default=None, group='title')
+ if title:
+ ext = determine_ext(title)
+ if ext.lower() in KNOWN_EXTENSIONS:
+ title = title.rpartition('.' + ext)[0]
+ return title
+ return self._og_search_title(webpage)
+
+ def _extract_filesize(self, webpage):
+ return parse_filesize(self._search_regex(
+ r'data-type=["\']video["\'][^>]*>Watch.*?<strong>\s*\((.+?)\)',
+ webpage, 'filesize', fatal=False))
+
+ def _extract_video_url(self, webpage, video_id, url):
def decode_url(encoded_url):
return compat_b64decode(encoded_url).decode('utf-8')
quality_key = qualities(['lq', 'sd', 'hq', 'hd'])
formats = []
subtitles = {}
- for asset in clip_data['assets']:
+ assets = clip_data.get('assets') or []
+ for asset in assets:
asset_url = asset.get('full_physical_path')
protocol = asset.get('protocol')
- if not asset_url or protocol == 'primetime' or asset.get('type') == 'usp_hlsfp_h264' or asset_url in urls:
+ if not asset_url or ((protocol == 'primetime' or asset.get('type') == 'usp_hlsfp_h264') and not ('_drmnp.ism/' in asset_url or '_unpnp.ism/' in asset_url)) or asset_url in urls:
continue
urls.append(asset_url)
container = asset.get('video_container')
if not urlh:
continue
asset_url = urlh.geturl()
+ asset_url = asset_url.replace('_drmnp.ism/', '_unpnp.ism/')
for i in range(3, 0, -1):
asset_url = asset_url = asset_url.replace('_sd1/', '_sd%d/' % i)
m3u8_formats = self._extract_m3u8_formats(
)
-class SkySportsIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?skysports\.com/watch/video/(?P<id>[0-9]+)'
- _TEST = {
- 'url': 'http://www.skysports.com/watch/video/10328419/bale-its-our-time-to-shine',
- 'md5': '77d59166cddc8d3cb7b13e35eaf0f5ec',
- 'info_dict': {
- 'id': '10328419',
- 'ext': 'mp4',
- 'title': 'Bale: It\'s our time to shine',
- 'description': 'md5:e88bda94ae15f7720c5cb467e777bb6d',
- },
- 'add_ie': ['Ooyala'],
- }
-
+class SkyBaseIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
video_data = extract_attributes(self._search_regex(
- r'(<div.+?class="sdc-article-video__media-ooyala"[^>]+>)', webpage, 'video data'))
+ r'(<div.+?class="[^"]*sdc-article-video__media-ooyala[^"]*"[^>]+>)',
+ webpage, 'video data'))
video_url = 'ooyala:%s' % video_data['data-video-id']
if video_data.get('data-token-required') == 'true':
- token_fetch_options = self._parse_json(video_data.get('data-token-fetch-options', '{}'), video_id, fatal=False) or {}
+ token_fetch_options = self._parse_json(video_data.get(
+ 'data-token-fetch-options', '{}'), video_id, fatal=False) or {}
token_fetch_url = token_fetch_options.get('url')
if token_fetch_url:
- embed_token = self._download_webpage(urljoin(url, token_fetch_url), video_id, fatal=False)
+ embed_token = self._download_webpage(urljoin(
+ url, token_fetch_url), video_id, fatal=False)
if embed_token:
- video_url = smuggle_url(video_url, {'embed_token': embed_token.strip('"')})
+ video_url = smuggle_url(
+ video_url, {'embed_token': embed_token.strip('"')})
return {
'_type': 'url_transparent',
'description': strip_or_none(self._og_search_description(webpage)),
'ie_key': 'Ooyala',
}
+
+
+class SkySportsIE(SkyBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?skysports\.com/watch/video/(?P<id>[0-9]+)'
+ _TEST = {
+ 'url': 'http://www.skysports.com/watch/video/10328419/bale-its-our-time-to-shine',
+ 'md5': '77d59166cddc8d3cb7b13e35eaf0f5ec',
+ 'info_dict': {
+ 'id': 'o3eWJnNDE6l7kfNO8BOoBlRxXRQ4ANNQ',
+ 'ext': 'mp4',
+ 'title': 'Bale: It\'s our time to shine',
+ 'description': 'md5:e88bda94ae15f7720c5cb467e777bb6d',
+ },
+ 'add_ie': ['Ooyala'],
+ }
+
+
+class SkyNewsIE(SkyBaseIE):
+ _VALID_URL = r'https?://news\.sky\.com/video/[0-9a-z-]+-(?P<id>[0-9]+)'
+ _TEST = {
+ 'url': 'https://news.sky.com/video/russian-plane-inspected-after-deadly-fire-11712962',
+ 'md5': 'd6327e581473cea9976a3236ded370cd',
+ 'info_dict': {
+ 'id': '1ua21xaDE6lCtZDmbYfl8kwsKLooJbNM',
+ 'ext': 'mp4',
+ 'title': 'Russian plane inspected after deadly fire',
+ 'description': 'The Russian Investigative Committee has released video of the wreckage of a passenger plane which caught fire near Moscow.',
+ },
+ 'add_ie': ['Ooyala'],
+ }
)
from ..utils import (
ExtractorError,
+ float_or_none,
int_or_none,
- unified_strdate,
+ KNOWN_EXTENSIONS,
+ merge_dicts,
+ mimetype2ext,
+ str_or_none,
+ try_get,
+ unified_timestamp,
update_url_query,
+ url_or_none,
)
(?:(?:(?:www\.|m\.)?soundcloud\.com/
(?!stations/track)
(?P<uploader>[\w\d-]+)/
- (?!(?:tracks|sets(?:/.+?)?|reposts|likes|spotlight)/?(?:$|[?#]))
+ (?!(?:tracks|albums|sets(?:/.+?)?|reposts|likes|spotlight)/?(?:$|[?#]))
(?P<title>[\w\d-]+)/?
(?P<token>[^?]+?)?(?:[?].*)?$)
|(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+)
'info_dict': {
'id': '62986583',
'ext': 'mp3',
- 'upload_date': '20121011',
+ 'title': 'Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1',
'description': 'No Downloads untill we record the finished version this weekend, i was too pumped n i had to post it , earl is prolly gonna b hella p.o\'d',
'uploader': 'E.T. ExTerrestrial Music',
- 'title': 'Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1',
- 'duration': 143,
+ 'timestamp': 1349920598,
+ 'upload_date': '20121011',
+ 'duration': 143.216,
'license': 'all-rights-reserved',
+ 'view_count': int,
+ 'like_count': int,
+ 'comment_count': int,
+ 'repost_count': int,
}
},
# not streamable song
'title': 'Goldrushed',
'description': 'From Stockholm Sweden\r\nPovel / Magnus / Filip / David\r\nwww.theroyalconcept.com',
'uploader': 'The Royal Concept',
+ 'timestamp': 1337635207,
'upload_date': '20120521',
- 'duration': 227,
+ 'duration': 30,
'license': 'all-rights-reserved',
+ 'view_count': int,
+ 'like_count': int,
+ 'comment_count': int,
+ 'repost_count': int,
},
'params': {
# rtmp
'id': '123998367',
'ext': 'mp3',
'title': 'Youtube - Dl Test Video \'\' Ä↭',
- 'uploader': 'jaimeMF',
'description': 'test chars: \"\'/\\ä↭',
+ 'uploader': 'jaimeMF',
+ 'timestamp': 1386604920,
'upload_date': '20131209',
- 'duration': 9,
+ 'duration': 9.927,
'license': 'all-rights-reserved',
+ 'view_count': int,
+ 'like_count': int,
+ 'comment_count': int,
+ 'repost_count': int,
},
},
# private link (alt format)
'id': '123998367',
'ext': 'mp3',
'title': 'Youtube - Dl Test Video \'\' Ä↭',
- 'uploader': 'jaimeMF',
'description': 'test chars: \"\'/\\ä↭',
+ 'uploader': 'jaimeMF',
+ 'timestamp': 1386604920,
'upload_date': '20131209',
- 'duration': 9,
+ 'duration': 9.927,
'license': 'all-rights-reserved',
+ 'view_count': int,
+ 'like_count': int,
+ 'comment_count': int,
+ 'repost_count': int,
},
},
# downloadable song
'title': 'Bus Brakes',
'description': 'md5:0053ca6396e8d2fd7b7e1595ef12ab66',
'uploader': 'oddsamples',
+ 'timestamp': 1389232924,
'upload_date': '20140109',
- 'duration': 17,
+ 'duration': 17.346,
'license': 'cc-by-sa',
+ 'view_count': int,
+ 'like_count': int,
+ 'comment_count': int,
+ 'repost_count': int,
},
},
# private link, downloadable format
'title': 'Uplifting Only 238 [No Talking] (incl. Alex Feed Guestmix) (Aug 31, 2017) [wav]',
'description': 'md5:fa20ee0fca76a3d6df8c7e57f3715366',
'uploader': 'Ori Uplift Music',
+ 'timestamp': 1504206263,
'upload_date': '20170831',
- 'duration': 7449,
+ 'duration': 7449.096,
'license': 'all-rights-reserved',
+ 'view_count': int,
+ 'like_count': int,
+ 'comment_count': int,
+ 'repost_count': int,
},
},
# no album art, use avatar pic for thumbnail
'title': 'Sideways (Prod. Mad Real)',
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
'uploader': 'garyvee',
+ 'timestamp': 1488152409,
'upload_date': '20170226',
- 'duration': 207,
+ 'duration': 207.012,
'thumbnail': r're:https?://.*\.jpg',
'license': 'all-rights-reserved',
+ 'view_count': int,
+ 'like_count': int,
+ 'comment_count': int,
+ 'repost_count': int,
},
'params': {
'skip_download': True,
},
},
+ # not avaialble via api.soundcloud.com/i1/tracks/id/streams
+ {
+ 'url': 'https://soundcloud.com/giovannisarani/mezzo-valzer',
+ 'md5': 'e22aecd2bc88e0e4e432d7dcc0a1abf7',
+ 'info_dict': {
+ 'id': '583011102',
+ 'ext': 'mp3',
+ 'title': 'Mezzo Valzer',
+ 'description': 'md5:4138d582f81866a530317bae316e8b61',
+ 'uploader': 'Giovanni Sarani',
+ 'timestamp': 1551394171,
+ 'upload_date': '20190228',
+ 'duration': 180.157,
+ 'thumbnail': r're:https?://.*\.jpg',
+ 'license': 'all-rights-reserved',
+ 'view_count': int,
+ 'like_count': int,
+ 'comment_count': int,
+ 'repost_count': int,
+ },
+ 'expected_warnings': ['Unable to download JSON metadata'],
+ }
]
- _CLIENT_ID = 'LvWovRaJZlWCHql0bISuum8Bd2KX79mb'
+ _CLIENT_ID = 'FweeGBOOEOYJWLJN3oEyToGLKhmSz0I7'
@staticmethod
def _extract_urls(webpage):
r'<iframe[^>]+src=(["\'])(?P<url>(?:https?://)?(?:w\.)?soundcloud\.com/player.+?)\1',
webpage)]
- def report_resolve(self, video_id):
- """Report information extraction."""
- self.to_screen('%s: Resolving id' % video_id)
-
@classmethod
def _resolv_url(cls, url):
return 'https://api.soundcloud.com/resolve.json?url=' + url + '&client_id=' + cls._CLIENT_ID
def _extract_info_dict(self, info, full_title=None, quiet=False, secret_token=None):
track_id = compat_str(info['id'])
+ title = info['title']
name = full_title or track_id
if quiet:
self.report_extraction(name)
thumbnail = info.get('artwork_url') or info.get('user', {}).get('avatar_url')
if isinstance(thumbnail, compat_str):
thumbnail = thumbnail.replace('-large', '-t500x500')
+ username = try_get(info, lambda x: x['user']['username'], compat_str)
+
+ def extract_count(key):
+ return int_or_none(info.get('%s_count' % key))
+
+ like_count = extract_count('favoritings')
+ if like_count is None:
+ like_count = extract_count('likes')
+
result = {
'id': track_id,
- 'uploader': info.get('user', {}).get('username'),
- 'upload_date': unified_strdate(info.get('created_at')),
- 'title': info['title'],
+ 'uploader': username,
+ 'timestamp': unified_timestamp(info.get('created_at')),
+ 'title': title,
'description': info.get('description'),
'thumbnail': thumbnail,
- 'duration': int_or_none(info.get('duration'), 1000),
+ 'duration': float_or_none(info.get('duration'), 1000),
'webpage_url': info.get('permalink_url'),
'license': info.get('license'),
+ 'view_count': extract_count('playback'),
+ 'like_count': like_count,
+ 'comment_count': extract_count('comment'),
+ 'repost_count': extract_count('reposts'),
+ 'genre': info.get('genre'),
}
+
+ format_urls = set()
formats = []
query = {'client_id': self._CLIENT_ID}
if secret_token is not None:
# We can build a direct link to the song
format_url = update_url_query(
'https://api.soundcloud.com/tracks/%s/download' % track_id, query)
+ format_urls.add(format_url)
formats.append({
'format_id': 'download',
'ext': info.get('original_format', 'mp3'),
'preference': 10,
})
- # We have to retrieve the url
+ # Old API, does not work for some tracks (e.g.
+ # https://soundcloud.com/giovannisarani/mezzo-valzer)
format_dict = self._download_json(
'https://api.soundcloud.com/i1/tracks/%s/streams' % track_id,
- track_id, 'Downloading track url', query=query)
-
- for key, stream_url in format_dict.items():
- ext, abr = 'mp3', None
- mobj = re.search(r'_([^_]+)_(\d+)_url', key)
- if mobj:
- ext, abr = mobj.groups()
- abr = int(abr)
- if key.startswith('http'):
- stream_formats = [{
- 'format_id': key,
- 'ext': ext,
- 'url': stream_url,
- }]
- elif key.startswith('rtmp'):
- # The url doesn't have an rtmp app, we have to extract the playpath
- url, path = stream_url.split('mp3:', 1)
- stream_formats = [{
- 'format_id': key,
- 'url': url,
- 'play_path': 'mp3:' + path,
- 'ext': 'flv',
- }]
- elif key.startswith('hls'):
- stream_formats = self._extract_m3u8_formats(
- stream_url, track_id, ext, entry_protocol='m3u8_native',
- m3u8_id=key, fatal=False)
- else:
+ track_id, 'Downloading track url', query=query, fatal=False)
+
+ if format_dict:
+ for key, stream_url in format_dict.items():
+ if stream_url in format_urls:
+ continue
+ format_urls.add(stream_url)
+ ext, abr = 'mp3', None
+ mobj = re.search(r'_([^_]+)_(\d+)_url', key)
+ if mobj:
+ ext, abr = mobj.groups()
+ abr = int(abr)
+ if key.startswith('http'):
+ stream_formats = [{
+ 'format_id': key,
+ 'ext': ext,
+ 'url': stream_url,
+ }]
+ elif key.startswith('rtmp'):
+ # The url doesn't have an rtmp app, we have to extract the playpath
+ url, path = stream_url.split('mp3:', 1)
+ stream_formats = [{
+ 'format_id': key,
+ 'url': url,
+ 'play_path': 'mp3:' + path,
+ 'ext': 'flv',
+ }]
+ elif key.startswith('hls'):
+ stream_formats = self._extract_m3u8_formats(
+ stream_url, track_id, ext, entry_protocol='m3u8_native',
+ m3u8_id=key, fatal=False)
+ else:
+ continue
+
+ if abr:
+ for f in stream_formats:
+ f['abr'] = abr
+
+ formats.extend(stream_formats)
+
+ # New API
+ transcodings = try_get(
+ info, lambda x: x['media']['transcodings'], list) or []
+ for t in transcodings:
+ if not isinstance(t, dict):
continue
-
- if abr:
- for f in stream_formats:
- f['abr'] = abr
-
- formats.extend(stream_formats)
+ format_url = url_or_none(t.get('url'))
+ if not format_url:
+ continue
+ stream = self._download_json(
+ update_url_query(format_url, query), track_id, fatal=False)
+ if not isinstance(stream, dict):
+ continue
+ stream_url = url_or_none(stream.get('url'))
+ if not stream_url:
+ continue
+ if stream_url in format_urls:
+ continue
+ format_urls.add(stream_url)
+ protocol = try_get(t, lambda x: x['format']['protocol'], compat_str)
+ if protocol != 'hls' and '/hls' in format_url:
+ protocol = 'hls'
+ ext = None
+ preset = str_or_none(t.get('preset'))
+ if preset:
+ ext = preset.split('_')[0]
+ if ext not in KNOWN_EXTENSIONS:
+ mimetype = try_get(
+ t, lambda x: x['format']['mime_type'], compat_str)
+ ext = mimetype2ext(mimetype) or 'mp3'
+ format_id_list = []
+ if protocol:
+ format_id_list.append(protocol)
+ format_id_list.append(ext)
+ format_id = '_'.join(format_id_list)
+ formats.append({
+ 'url': stream_url,
+ 'format_id': format_id,
+ 'ext': ext,
+ 'protocol': 'm3u8_native' if protocol == 'hls' else 'http',
+ })
if not formats:
# We fallback to the stream_url in the original info, this
'url': update_url_query(info['stream_url'], query),
'ext': 'mp3',
})
+ self._check_formats(formats, track_id)
for f in formats:
f['vcodec'] = 'none'
- self._check_formats(formats, track_id)
self._sort_formats(formats)
result['formats'] = formats
raise ExtractorError('Invalid URL: %s' % url)
track_id = mobj.group('track_id')
+ new_info = {}
if track_id is not None:
info_json_url = 'https://api.soundcloud.com/tracks/' + track_id + '.json?client_id=' + self._CLIENT_ID
if token:
resolve_title += '/%s' % token
- self.report_resolve(full_title)
-
- url = 'https://soundcloud.com/%s' % resolve_title
- info_json_url = self._resolv_url(url)
- info = self._download_json(info_json_url, full_title, 'Downloading info JSON')
-
- return self._extract_info_dict(info, full_title, secret_token=token)
+ webpage = self._download_webpage(url, full_title, fatal=False)
+ if webpage:
+ entries = self._parse_json(
+ self._search_regex(
+ r'var\s+c\s*=\s*(\[.+?\])\s*,\s*o\s*=Date\b', webpage,
+ 'data', default='[]'), full_title, fatal=False)
+ if entries:
+ for e in entries:
+ if not isinstance(e, dict):
+ continue
+ if e.get('id') != 67:
+ continue
+ data = try_get(e, lambda x: x['data'][0], dict)
+ if data:
+ new_info = data
+ break
+ info_json_url = self._resolv_url(
+ 'https://soundcloud.com/%s' % resolve_title)
+
+ # Contains some additional info missing from new_info
+ info = self._download_json(
+ info_json_url, full_title, 'Downloading info JSON')
+
+ return self._extract_info_dict(
+ merge_dicts(info, new_info), full_title, secret_token=token)
class SoundcloudPlaylistBaseIE(SoundcloudIE):
full_title += '/' + token
url += '/' + token
- self.report_resolve(full_title)
-
resolv_url = self._resolv_url(url)
info = self._download_json(resolv_url, full_title)
class SoundcloudPagedPlaylistBaseIE(SoundcloudPlaylistBaseIE):
- _API_BASE = 'https://api.soundcloud.com'
_API_V2_BASE = 'https://api-v2.soundcloud.com'
def _extract_playlist(self, base_url, playlist_id, playlist_title):
next_href, playlist_id, 'Downloading track page %s' % (i + 1))
collection = response['collection']
- if not collection:
- break
- def resolve_permalink_url(candidates):
+ if not isinstance(collection, list):
+ collection = []
+
+ # Empty collection may be returned, in this case we proceed
+ # straight to next_href
+
+ def resolve_entry(candidates):
for cand in candidates:
- if isinstance(cand, dict):
- permalink_url = cand.get('permalink_url')
- entry_id = self._extract_id(cand)
- if permalink_url and permalink_url.startswith('http'):
- return permalink_url, entry_id
+ if not isinstance(cand, dict):
+ continue
+ permalink_url = url_or_none(cand.get('permalink_url'))
+ if not permalink_url:
+ continue
+ return self.url_result(
+ permalink_url,
+ ie=SoundcloudIE.ie_key() if SoundcloudIE.suitable(permalink_url) else None,
+ video_id=self._extract_id(cand),
+ video_title=cand.get('title'))
for e in collection:
- permalink_url, entry_id = resolve_permalink_url((e, e.get('track'), e.get('playlist')))
- if permalink_url:
- entries.append(self.url_result(permalink_url, video_id=entry_id))
+ entry = resolve_entry((e, e.get('track'), e.get('playlist')))
+ if entry:
+ entries.append(entry)
next_href = response.get('next_href')
if not next_href:
(?:(?:www|m)\.)?soundcloud\.com/
(?P<user>[^/]+)
(?:/
- (?P<rsrc>tracks|sets|reposts|likes|spotlight)
+ (?P<rsrc>tracks|albums|sets|reposts|likes|spotlight)
)?
/?(?:[?#].*)?$
'''
IE_NAME = 'soundcloud:user'
_TESTS = [{
- 'url': 'https://soundcloud.com/the-akashic-chronicler',
+ 'url': 'https://soundcloud.com/soft-cell-official',
'info_dict': {
- 'id': '114582580',
- 'title': 'The Akashic Chronicler (All)',
+ 'id': '207965082',
+ 'title': 'Soft Cell (All)',
},
- 'playlist_mincount': 74,
+ 'playlist_mincount': 28,
}, {
- 'url': 'https://soundcloud.com/the-akashic-chronicler/tracks',
+ 'url': 'https://soundcloud.com/soft-cell-official/tracks',
'info_dict': {
- 'id': '114582580',
- 'title': 'The Akashic Chronicler (Tracks)',
+ 'id': '207965082',
+ 'title': 'Soft Cell (Tracks)',
},
- 'playlist_mincount': 37,
+ 'playlist_mincount': 27,
}, {
- 'url': 'https://soundcloud.com/the-akashic-chronicler/sets',
+ 'url': 'https://soundcloud.com/soft-cell-official/albums',
'info_dict': {
- 'id': '114582580',
- 'title': 'The Akashic Chronicler (Playlists)',
+ 'id': '207965082',
+ 'title': 'Soft Cell (Albums)',
+ },
+ 'playlist_mincount': 1,
+ }, {
+ 'url': 'https://soundcloud.com/jcv246/sets',
+ 'info_dict': {
+ 'id': '12982173',
+ 'title': 'Jordi / cv (Playlists)',
},
'playlist_mincount': 2,
}, {
- 'url': 'https://soundcloud.com/the-akashic-chronicler/reposts',
+ 'url': 'https://soundcloud.com/jcv246/reposts',
'info_dict': {
- 'id': '114582580',
- 'title': 'The Akashic Chronicler (Reposts)',
+ 'id': '12982173',
+ 'title': 'Jordi / cv (Reposts)',
},
- 'playlist_mincount': 7,
+ 'playlist_mincount': 6,
}, {
- 'url': 'https://soundcloud.com/the-akashic-chronicler/likes',
+ 'url': 'https://soundcloud.com/clalberg/likes',
'info_dict': {
- 'id': '114582580',
- 'title': 'The Akashic Chronicler (Likes)',
+ 'id': '11817582',
+ 'title': 'clalberg (Likes)',
},
- 'playlist_mincount': 321,
+ 'playlist_mincount': 5,
}, {
'url': 'https://soundcloud.com/grynpyret/spotlight',
'info_dict': {
}]
_BASE_URL_MAP = {
- 'all': '%s/profile/soundcloud:users:%%s' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE,
- 'tracks': '%s/users/%%s/tracks' % SoundcloudPagedPlaylistBaseIE._API_BASE,
+ 'all': '%s/stream/users/%%s' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE,
+ 'tracks': '%s/users/%%s/tracks' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE,
+ 'albums': '%s/users/%%s/albums' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE,
'sets': '%s/users/%%s/playlists' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE,
- 'reposts': '%s/profile/soundcloud:users:%%s/reposts' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE,
+ 'reposts': '%s/stream/users/%%s/reposts' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE,
'likes': '%s/users/%%s/likes' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE,
'spotlight': '%s/users/%%s/spotlight' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE,
}
_TITLE_MAP = {
'all': 'All',
'tracks': 'Tracks',
+ 'albums': 'Albums',
'sets': 'Playlists',
'reposts': 'Reposts',
'likes': 'Likes',
from .common import InfoExtractor
from ..utils import (
ExtractorError,
+ orderedSet,
parse_duration,
parse_resolution,
str_to_int,
+ url_or_none,
+ urlencode_postdata,
)
class SpankBangIE(InfoExtractor):
- _VALID_URL = r'https?://(?:(?:www|m|[a-z]{2})\.)?spankbang\.com/(?P<id>[\da-z]+)/video'
+ _VALID_URL = r'https?://(?:[^/]+\.)?spankbang\.com/(?P<id>[\da-z]+)/(?:video|play|embed)\b'
_TESTS = [{
'url': 'http://spankbang.com/3vvn/video/fantasy+solo',
'md5': '1cc433e1d6aa14bc376535b8679302f7',
# 4k
'url': 'https://spankbang.com/1vwqx/video/jade+kush+solo+4k',
'only_matching': True,
+ }, {
+ 'url': 'https://m.spankbang.com/3vvn/play/fantasy+solo/480p/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://m.spankbang.com/3vvn/play',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://spankbang.com/2y3td/embed/',
+ 'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id, headers={
- 'Cookie': 'country=US'
- })
+ webpage = self._download_webpage(
+ url.replace('/%s/embed' % video_id, '/%s/video' % video_id),
+ video_id, headers={'Cookie': 'country=US'})
if re.search(r'<[^>]+\bid=["\']video_removed', webpage):
raise ExtractorError(
'Video %s is not available' % video_id, expected=True)
formats = []
- for mobj in re.finditer(
- r'stream_url_(?P<id>[^\s=]+)\s*=\s*(["\'])(?P<url>(?:(?!\2).)+)\2',
- webpage):
- format_id, format_url = mobj.group('id', 'url')
+
+ def extract_format(format_id, format_url):
+ f_url = url_or_none(format_url)
+ if not f_url:
+ return
f = parse_resolution(format_id)
f.update({
- 'url': format_url,
+ 'url': f_url,
'format_id': format_id,
})
formats.append(f)
+
+ STREAM_URL_PREFIX = 'stream_url_'
+
+ for mobj in re.finditer(
+ r'%s(?P<id>[^\s=]+)\s*=\s*(["\'])(?P<url>(?:(?!\2).)+)\2'
+ % STREAM_URL_PREFIX, webpage):
+ extract_format(mobj.group('id', 'url'))
+
+ if not formats:
+ stream_key = self._search_regex(
+ r'data-streamkey\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1',
+ webpage, 'stream key', group='value')
+
+ sb_csrf_session = self._get_cookies(
+ 'https://spankbang.com')['sb_csrf_session'].value
+
+ stream = self._download_json(
+ 'https://spankbang.com/api/videos/stream', video_id,
+ 'Downloading stream JSON', data=urlencode_postdata({
+ 'id': stream_key,
+ 'data': 0,
+ 'sb_csrf_session': sb_csrf_session,
+ }), headers={
+ 'Referer': url,
+ 'X-CSRFToken': sb_csrf_session,
+ })
+
+ for format_id, format_url in stream.items():
+ if format_id.startswith(STREAM_URL_PREFIX):
+ extract_format(
+ format_id[len(STREAM_URL_PREFIX):], format_url)
+
self._sort_formats(formats)
title = self._html_search_regex(
'formats': formats,
'age_limit': age_limit,
}
+
+
+class SpankBangPlaylistIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:[^/]+\.)?spankbang\.com/(?P<id>[\da-z]+)/playlist/[^/]+'
+ _TEST = {
+ 'url': 'https://spankbang.com/ug0k/playlist/big+ass+titties',
+ 'info_dict': {
+ 'id': 'ug0k',
+ 'title': 'Big Ass Titties',
+ },
+ 'playlist_mincount': 50,
+ }
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+
+ webpage = self._download_webpage(
+ url, playlist_id, headers={'Cookie': 'country=US; mobile=on'})
+
+ entries = [self.url_result(
+ 'https://spankbang.com/%s/video' % video_id,
+ ie=SpankBangIE.ie_key(), video_id=video_id)
+ for video_id in orderedSet(re.findall(
+ r'<a[^>]+\bhref=["\']/?([\da-z]+)/play/', webpage))]
+
+ title = self._html_search_regex(
+ r'<h1>([^<]+)\s+playlist</h1>', webpage, 'playlist title',
+ fatal=False)
+
+ return self.playlist_result(entries, playlist_id, title)
_GEO_COUNTRIES = ['US']
def _extract_mgid(self, webpage):
- cs = self._parse_json(self._search_regex(
+ root_data = self._parse_json(self._search_regex(
r'window\.__DATA__\s*=\s*({.+})',
- webpage, 'data'), None)['children']
- c = next(c for c in cs if c.get('type') == 'VideoPlayer')
+ webpage, 'data'), None)
+
+ def find_sub_data(data, data_type):
+ return next(c for c in data['children'] if c.get('type') == data_type)
+
+ c = find_sub_data(find_sub_data(root_data, 'MainContainer'), 'VideoPlayer')
return c['props']['media']['video']['config']['uri']
class SRGSSRPlayIE(InfoExtractor):
IE_DESC = 'srf.ch, rts.ch, rsi.ch, rtr.ch and swissinfo.ch play sites'
- _VALID_URL = r'https?://(?:(?:www|play)\.)?(?P<bu>srf|rts|rsi|rtr|swissinfo)\.ch/play/(?:tv|radio)/[^/]+/(?P<type>video|audio)/[^?]+\?id=(?P<id>[0-9a-f\-]{36}|\d+)'
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:(?:www|play)\.)?
+ (?P<bu>srf|rts|rsi|rtr|swissinfo)\.ch/play/(?:tv|radio)/
+ (?:
+ [^/]+/(?P<type>video|audio)/[^?]+|
+ popup(?P<type_2>video|audio)player
+ )
+ \?id=(?P<id>[0-9a-f\-]{36}|\d+)
+ '''
_TESTS = [{
'url': 'http://www.srf.ch/play/tv/10vor10/video/snowden-beantragt-asyl-in-russland?id=28e1a57d-5b76-4399-8ab3-9097f071e6c5',
# m3u8 download
'skip_download': True,
}
+ }, {
+ 'url': 'https://www.srf.ch/play/tv/popupvideoplayer?id=c4dba0ca-e75b-43b2-a34f-f708a4932e01',
+ 'only_matching': True,
}]
def _real_extract(self, url):
- bu, media_type, media_id = re.match(self._VALID_URL, url).groups()
+ mobj = re.match(self._VALID_URL, url)
+ bu = mobj.group('bu')
+ media_type = mobj.group('type') or mobj.group('type_2')
+ media_id = mobj.group('id')
# other info can be extracted from url + '&layout=json'
return self.url_result('srgssr:%s:%s:%s' % (bu[:3], media_type, media_id), 'SRGSSR')
class StreamangoIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?streamango\.com/(?:f|embed)/(?P<id>[^/?#&]+)'
+ _VALID_URL = r'https?://(?:www\.)?(?:streamango\.com|fruithosts\.net|streamcherry\.com)/(?:f|embed)/(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'https://streamango.com/f/clapasobsptpkdfe/20170315_150006_mp4',
'md5': 'e992787515a182f55e38fc97588d802a',
}, {
'url': 'https://streamango.com/embed/clapasobsptpkdfe/20170315_150006_mp4',
'only_matching': True,
+ }, {
+ 'url': 'https://fruithosts.net/f/mreodparcdcmspsm/w1f1_r4lph_2018_brrs_720p_latino_mp4',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://streamcherry.com/f/clapasobsptpkdfe/',
+ 'only_matching': True,
}]
def _real_extract(self, url):
value="([^"]*)"
''', orig_webpage)
- self._sleep(12, video_id)
+ self._sleep(6, video_id)
webpage = self._download_webpage(
url, video_id, data=urlencode_postdata(fields), headers={
--- /dev/null
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_parse_qs,
+ compat_urllib_parse_urlparse
+)
+from ..utils import (
+ extract_attributes,
+ float_or_none,
+ int_or_none,
+ str_or_none,
+)
+
+
+class STVPlayerIE(InfoExtractor):
+ IE_NAME = 'stv:player'
+ _VALID_URL = r'https?://player\.stv\.tv/(?P<type>episode|video)/(?P<id>[a-z0-9]{4})'
+ _TEST = {
+ 'url': 'https://player.stv.tv/video/7srz/victoria/interview-with-the-cast-ahead-of-new-victoria/',
+ 'md5': '2ad867d4afd641fa14187596e0fbc91b',
+ 'info_dict': {
+ 'id': '6016487034001',
+ 'ext': 'mp4',
+ 'upload_date': '20190321',
+ 'title': 'Interview with the cast ahead of new Victoria',
+ 'description': 'Nell Hudson and Lily Travers tell us what to expect in the new season of Victoria.',
+ 'timestamp': 1553179628,
+ 'uploader_id': '1486976045',
+ },
+ 'skip': 'this resource is unavailable outside of the UK',
+ }
+ _PUBLISHER_ID = '1486976045'
+ _PTYPE_MAP = {
+ 'episode': 'episodes',
+ 'video': 'shortform',
+ }
+
+ def _real_extract(self, url):
+ ptype, video_id = re.match(self._VALID_URL, url).groups()
+ webpage = self._download_webpage(url, video_id)
+
+ qs = compat_parse_qs(compat_urllib_parse_urlparse(self._search_regex(
+ r'itemprop="embedURL"[^>]+href="([^"]+)',
+ webpage, 'embed URL', default=None)).query)
+ publisher_id = qs.get('publisherID', [None])[0] or self._PUBLISHER_ID
+
+ player_attr = extract_attributes(self._search_regex(
+ r'(<[^>]+class="bcplayer"[^>]+>)', webpage, 'player', default=None)) or {}
+
+ info = {}
+ duration = ref_id = series = video_id = None
+ api_ref_id = player_attr.get('data-player-api-refid')
+ if api_ref_id:
+ resp = self._download_json(
+ 'https://player.api.stv.tv/v1/%s/%s' % (self._PTYPE_MAP[ptype], api_ref_id),
+ api_ref_id, fatal=False)
+ if resp:
+ result = resp.get('results') or {}
+ video = result.get('video') or {}
+ video_id = str_or_none(video.get('id'))
+ ref_id = video.get('guid')
+ duration = video.get('length')
+ programme = result.get('programme') or {}
+ series = programme.get('name') or programme.get('shortName')
+ subtitles = {}
+ _subtitles = result.get('_subtitles') or {}
+ for ext, sub_url in _subtitles.items():
+ subtitles.setdefault('en', []).append({
+ 'ext': 'vtt' if ext == 'webvtt' else ext,
+ 'url': sub_url,
+ })
+ info.update({
+ 'description': result.get('summary'),
+ 'subtitles': subtitles,
+ 'view_count': int_or_none(result.get('views')),
+ })
+ if not video_id:
+ video_id = qs.get('videoId', [None])[0] or self._search_regex(
+ r'<link\s+itemprop="url"\s+href="(\d+)"',
+ webpage, 'video id', default=None) or 'ref:' + (ref_id or player_attr['data-refid'])
+
+ info.update({
+ '_type': 'url_transparent',
+ 'duration': float_or_none(duration or player_attr.get('data-duration'), 1000),
+ 'id': video_id,
+ 'ie_key': 'BrightcoveNew',
+ 'series': series or player_attr.get('data-programme-name'),
+ 'url': 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s' % (publisher_id, video_id),
+ })
+ return info
--- /dev/null
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ int_or_none,
+ str_or_none,
+)
+
+
+class SverigesRadioBaseIE(InfoExtractor):
+ _BASE_URL = 'https://sverigesradio.se/sida/playerajax/'
+ _QUALITIES = ['low', 'medium', 'high']
+ _EXT_TO_CODEC_MAP = {
+ 'mp3': 'mp3',
+ 'm4a': 'aac',
+ }
+ _CODING_FORMAT_TO_ABR_MAP = {
+ 5: 128,
+ 11: 192,
+ 12: 32,
+ 13: 96,
+ }
+
+ def _real_extract(self, url):
+ audio_id = self._match_id(url)
+ query = {
+ 'id': audio_id,
+ 'type': self._AUDIO_TYPE,
+ }
+
+ item = self._download_json(
+ self._BASE_URL + 'audiometadata', audio_id,
+ 'Downloading audio JSON metadata', query=query)['items'][0]
+ title = item['subtitle']
+
+ query['format'] = 'iis'
+ urls = []
+ formats = []
+ for quality in self._QUALITIES:
+ query['quality'] = quality
+ audio_url_data = self._download_json(
+ self._BASE_URL + 'getaudiourl', audio_id,
+ 'Downloading %s format JSON metadata' % quality,
+ fatal=False, query=query) or {}
+ audio_url = audio_url_data.get('audioUrl')
+ if not audio_url or audio_url in urls:
+ continue
+ urls.append(audio_url)
+ ext = determine_ext(audio_url)
+ coding_format = audio_url_data.get('codingFormat')
+ abr = int_or_none(self._search_regex(
+ r'_a(\d+)\.m4a', audio_url, 'audio bitrate',
+ default=None)) or self._CODING_FORMAT_TO_ABR_MAP.get(coding_format)
+ formats.append({
+ 'abr': abr,
+ 'acodec': self._EXT_TO_CODEC_MAP.get(ext),
+ 'ext': ext,
+ 'format_id': str_or_none(coding_format),
+ 'vcodec': 'none',
+ 'url': audio_url,
+ })
+ self._sort_formats(formats)
+
+ return {
+ 'id': audio_id,
+ 'title': title,
+ 'formats': formats,
+ 'series': item.get('title'),
+ 'duration': int_or_none(item.get('duration')),
+ 'thumbnail': item.get('displayimageurl'),
+ 'description': item.get('description'),
+ }
+
+
+class SverigesRadioPublicationIE(SverigesRadioBaseIE):
+ IE_NAME = 'sverigesradio:publication'
+ _VALID_URL = r'https?://(?:www\.)?sverigesradio\.se/sida/(?:artikel|gruppsida)\.aspx\?.*?\bartikel=(?P<id>[0-9]+)'
+ _TESTS = [{
+ 'url': 'https://sverigesradio.se/sida/artikel.aspx?programid=83&artikel=7038546',
+ 'md5': '6a4917e1923fccb080e5a206a5afa542',
+ 'info_dict': {
+ 'id': '7038546',
+ 'ext': 'm4a',
+ 'duration': 132,
+ 'series': 'Nyheter (Ekot)',
+ 'title': 'Esa Teittinen: Sanningen har inte kommit fram',
+ 'description': 'md5:daf7ce66a8f0a53d5465a5984d3839df',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ },
+ }, {
+ 'url': 'https://sverigesradio.se/sida/gruppsida.aspx?programid=3304&grupp=6247&artikel=7146887',
+ 'only_matching': True,
+ }]
+ _AUDIO_TYPE = 'publication'
+
+
+class SverigesRadioEpisodeIE(SverigesRadioBaseIE):
+ IE_NAME = 'sverigesradio:episode'
+ _VALID_URL = r'https?://(?:www\.)?sverigesradio\.se/(?:sida/)?avsnitt/(?P<id>[0-9]+)'
+ _TEST = {
+ 'url': 'https://sverigesradio.se/avsnitt/1140922?programid=1300',
+ 'md5': '20dc4d8db24228f846be390b0c59a07c',
+ 'info_dict': {
+ 'id': '1140922',
+ 'ext': 'mp3',
+ 'duration': 3307,
+ 'series': 'Konflikt',
+ 'title': 'Metoo och valen',
+ 'description': 'md5:fcb5c1f667f00badcc702b196f10a27e',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ }
+ }
+ _AUDIO_TYPE = 'episode'
'market.saleshacker.com': 'saleshacker',
'learnability.org': 'learnability',
'edurila.com': 'edurila',
+ 'courses.workitdaily.com': 'workitdaily',
}
_VALID_URL_SUB_TUPLE = (_URL_PREFIX, '|'.join(re.escape(site) for site in _SITES.keys()))
class TeamcocoIE(TurnerBaseIE):
- _VALID_URL = r'https?://teamcoco\.com/(?P<id>([^/]+/)*[^/?#]+)'
+ _VALID_URL = r'https?://(?:\w+\.)?teamcoco\.com/(?P<id>([^/]+/)*[^/?#]+)'
_TESTS = [
{
'url': 'http://teamcoco.com/video/mary-kay-remote',
}, {
'url': 'http://teamcoco.com/israel/conan-hits-the-streets-beaches-of-tel-aviv',
'only_matching': True,
+ }, {
+ 'url': 'https://conan25.teamcoco.com/video/ice-cube-kevin-hart-conan-share-lyft',
+ 'only_matching': True,
}
]
def _graphql_call(self, query_template, object_type, object_id):
find_object = 'find' + object_type
return self._download_json(
- 'http://teamcoco.com/graphql/', object_id, data=json.dumps({
+ 'https://teamcoco.com/graphql', object_id, data=json.dumps({
'query': query_template % (find_object, object_id)
- }))['data'][find_object]
+ }).encode(), headers={
+ 'Content-Type': 'application/json',
+ })['data'][find_object]
def _real_extract(self, url):
display_id = self._match_id(url)
'accessTokenType': 'jws',
}))
else:
- video_sources = self._graphql_call('''{
+ d = self._download_json(
+ 'https://teamcoco.com/_truman/d/' + video_id,
+ video_id, fatal=False) or {}
+ video_sources = d.get('meta') or {}
+ if not video_sources:
+ video_sources = self._graphql_call('''{
%s(id: "%s") {
src
}
--- /dev/null
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ determine_ext,
+ ExtractorError,
+ float_or_none,
+ get_element_by_class,
+ get_element_by_id,
+ parse_duration,
+ remove_end,
+ urlencode_postdata,
+ urljoin,
+)
+
+
+class TeamTreeHouseIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?teamtreehouse\.com/library/(?P<id>[^/]+)'
+ _TESTS = [{
+ # Course
+ 'url': 'https://teamtreehouse.com/library/introduction-to-user-authentication-in-php',
+ 'info_dict': {
+ 'id': 'introduction-to-user-authentication-in-php',
+ 'title': 'Introduction to User Authentication in PHP',
+ 'description': 'md5:405d7b4287a159b27ddf30ca72b5b053',
+ },
+ 'playlist_mincount': 24,
+ }, {
+ # WorkShop
+ 'url': 'https://teamtreehouse.com/library/deploying-a-react-app',
+ 'info_dict': {
+ 'id': 'deploying-a-react-app',
+ 'title': 'Deploying a React App',
+ 'description': 'md5:10a82e3ddff18c14ac13581c9b8e5921',
+ },
+ 'playlist_mincount': 4,
+ }, {
+ # Video
+ 'url': 'https://teamtreehouse.com/library/application-overview-2',
+ 'info_dict': {
+ 'id': 'application-overview-2',
+ 'ext': 'mp4',
+ 'title': 'Application Overview',
+ 'description': 'md5:4b0a234385c27140a4378de5f1e15127',
+ },
+ 'expected_warnings': ['This is just a preview'],
+ }]
+ _NETRC_MACHINE = 'teamtreehouse'
+
+ def _real_initialize(self):
+ email, password = self._get_login_info()
+ if email is None:
+ return
+
+ signin_page = self._download_webpage(
+ 'https://teamtreehouse.com/signin',
+ None, 'Downloading signin page')
+ data = self._form_hidden_inputs('new_user_session', signin_page)
+ data.update({
+ 'user_session[email]': email,
+ 'user_session[password]': password,
+ })
+ error_message = get_element_by_class('error-message', self._download_webpage(
+ 'https://teamtreehouse.com/person_session',
+ None, 'Logging in', data=urlencode_postdata(data)))
+ if error_message:
+ raise ExtractorError(clean_html(error_message), expected=True)
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ title = self._html_search_meta(['og:title', 'twitter:title'], webpage)
+ description = self._html_search_meta(
+ ['description', 'og:description', 'twitter:description'], webpage)
+ entries = self._parse_html5_media_entries(url, webpage, display_id)
+ if entries:
+ info = entries[0]
+
+ for subtitles in info.get('subtitles', {}).values():
+ for subtitle in subtitles:
+ subtitle['ext'] = determine_ext(subtitle['url'], 'srt')
+
+ is_preview = 'data-preview="true"' in webpage
+ if is_preview:
+ self.report_warning(
+ 'This is just a preview. You need to be signed in with a Basic account to download the entire video.', display_id)
+ duration = 30
+ else:
+ duration = float_or_none(self._search_regex(
+ r'data-duration="(\d+)"', webpage, 'duration'), 1000)
+ if not duration:
+ duration = parse_duration(get_element_by_id(
+ 'video-duration', webpage))
+
+ info.update({
+ 'id': display_id,
+ 'title': title,
+ 'description': description,
+ 'duration': duration,
+ })
+ return info
+ else:
+ def extract_urls(html, extract_info=None):
+ for path in re.findall(r'<a[^>]+href="([^"]+)"', html):
+ page_url = urljoin(url, path)
+ entry = {
+ '_type': 'url_transparent',
+ 'id': self._match_id(page_url),
+ 'url': page_url,
+ 'id_key': self.ie_key(),
+ }
+ if extract_info:
+ entry.update(extract_info)
+ entries.append(entry)
+
+ workshop_videos = self._search_regex(
+ r'(?s)<ul[^>]+id="workshop-videos"[^>]*>(.+?)</ul>',
+ webpage, 'workshop videos', default=None)
+ if workshop_videos:
+ extract_urls(workshop_videos)
+ else:
+ stages_path = self._search_regex(
+ r'(?s)<div[^>]+id="syllabus-stages"[^>]+data-url="([^"]+)"',
+ webpage, 'stages path')
+ if stages_path:
+ stages_page = self._download_webpage(
+ urljoin(url, stages_path), display_id, 'Downloading stages page')
+ for chapter_number, (chapter, steps_list) in enumerate(re.findall(r'(?s)<h2[^>]*>\s*(.+?)\s*</h2>.+?<ul[^>]*>(.+?)</ul>', stages_page), 1):
+ extract_urls(steps_list, {
+ 'chapter': chapter,
+ 'chapter_number': chapter_number,
+ })
+ title = remove_end(title, ' Course')
+
+ return self.playlist_result(
+ entries, display_id, title, description)
from .common import InfoExtractor
-from ..compat import compat_str
+from ..compat import (
+ compat_str,
+ compat_urlparse
+)
from ..utils import (
+ extract_attributes,
float_or_none,
int_or_none,
try_get,
(?P<proto>https?://)
(?P<type>www|embed(?:-ssl)?)(?P<urlmain>\.ted\.com/
(
- (?P<type_playlist>playlists(?:/\d+)?) # We have a playlist
+ (?P<type_playlist>playlists(?:/(?P<playlist_id>\d+))?) # We have a playlist
|
((?P<type_talk>talks)) # We have a simple talk
|
'info_dict': {
'id': '10',
'title': 'Who are the hackers?',
+ 'description': 'md5:49a0dbe8fb76d81a0e64b4a80af7f15a'
},
'playlist_mincount': 6,
}, {
webpage = self._download_webpage(url, name,
'Downloading playlist webpage')
- info = self._extract_info(webpage)
- playlist_info = try_get(
- info, lambda x: x['__INITIAL_DATA__']['playlist'],
- dict) or info['playlist']
+ playlist_entries = []
+ for entry in re.findall(r'(?s)<[^>]+data-ga-context=["\']playlist["\'][^>]*>', webpage):
+ attrs = extract_attributes(entry)
+ entry_url = compat_urlparse.urljoin(url, attrs['href'])
+ playlist_entries.append(self.url_result(entry_url, self.ie_key()))
+
+ final_url = self._og_search_url(webpage, fatal=False)
+ playlist_id = (
+ re.match(self._VALID_URL, final_url).group('playlist_id')
+ if final_url else None)
- playlist_entries = [
- self.url_result('http://www.ted.com/talks/' + talk['slug'], self.ie_key())
- for talk in try_get(
- info, lambda x: x['__INITIAL_DATA__']['talks'],
- dict) or info['talks']
- ]
return self.playlist_result(
- playlist_entries,
- playlist_id=compat_str(playlist_info['id']),
- playlist_title=playlist_info['title'])
+ playlist_entries, playlist_id=playlist_id,
+ playlist_title=self._og_search_title(webpage, fatal=False),
+ playlist_description=self._og_search_description(webpage))
def _talk_info(self, url, video_name):
webpage = self._download_webpage(url, video_name)
'format_id': m3u8_format['format_id'].replace('hls', 'http'),
'protocol': 'http',
})
+ if f.get('acodec') == 'none':
+ del f['acodec']
formats.append(f)
audio_download = talk_info.get('audioDownload')
class Tele5IE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?tele5\.de/(?:mediathek|tv)/(?P<id>[^?#&]+)'
+ _VALID_URL = r'https?://(?:www\.)?tele5\.de/(?:[^/]+/)*(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'https://www.tele5.de/mediathek/filme-online/videos?vid=1549416',
'info_dict': {
'skip_download': True,
},
}, {
- 'url': 'https://www.tele5.de/tv/kalkofes-mattscheibe/video-clips/politik-und-gesellschaft?ve_id=1551191',
+ 'url': 'https://www.tele5.de/kalkofes-mattscheibe/video-clips/politik-und-gesellschaft?ve_id=1551191',
'only_matching': True,
}, {
- 'url': 'https://www.tele5.de/tv/dark-matter/videos',
+ 'url': 'https://www.tele5.de/video-clip/?ve_id=1609440',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.tele5.de/filme/schlefaz-dragon-crusaders/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.tele5.de/filme/making-of/avengers-endgame/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.tele5.de/star-trek/raumschiff-voyager/ganze-folge/das-vinculum/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.tele5.de/anders-ist-sevda/',
'only_matching': True,
}]
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
video_id = self._html_search_regex(
- r'id\s*=\s*["\']video-player["\'][^>]+data-id\s*=\s*["\'](\d+)',
- webpage, 'video id')
+ (r'id\s*=\s*["\']video-player["\'][^>]+data-id\s*=\s*["\'](\d+)',
+ r'\s+id\s*=\s*["\']player_(\d{6,})',
+ r'\bdata-id\s*=\s*["\'](\d{6,})'), webpage, 'video id')
return self.url_result(
'https://api.nexx.cloud/v3/759/videos/byid/%s' % video_id,
return [m.group('url')]
# Are whitesapces ignored in URLs?
- # https://github.com/rg3/youtube-dl/issues/12044
+ # https://github.com/ytdl-org/youtube-dl/issues/12044
matches = re.findall(
r'(?s)<(?:iframe|script)[^>]+src=(["\'])((?:https?:)?//player\.theplatform\.com/p/.+?)\1', webpage)
if matches:
if smuggled_data.get('force_smil_url', False):
smil_url = url
- # Explicitly specified SMIL (see https://github.com/rg3/youtube-dl/issues/7385)
+ # Explicitly specified SMIL (see https://github.com/ytdl-org/youtube-dl/issues/7385)
elif '/guid/' in url:
headers = {}
source_url = smuggled_data.get('source_url')
class TikTokIE(TikTokBaseIE):
- _VALID_URL = r'https?://(?:m\.)?tiktok\.com/v/(?P<id>\d+)'
- _TEST = {
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ (?:m\.)?tiktok\.com/v|
+ (?:www\.)?tiktok\.com/share/video
+ )
+ /(?P<id>\d+)
+ '''
+ _TESTS = [{
'url': 'https://m.tiktok.com/v/6606727368545406213.html',
'md5': 'd584b572e92fcd48888051f238022420',
'info_dict': {
'comment_count': int,
'repost_count': int,
}
- }
+ }, {
+ 'url': 'https://www.tiktok.com/share/video/6606727368545406213',
+ 'only_matching': True,
+ }]
def _real_extract(self, url):
video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
+ webpage = self._download_webpage(
+ 'https://m.tiktok.com/v/%s.html' % video_id, video_id)
data = self._parse_json(self._search_regex(
r'\bdata\s*=\s*({.+?})\s*;', webpage, 'data'), video_id)
return self._extract_aweme(data)
class TikTokUserIE(TikTokBaseIE):
- _VALID_URL = r'https?://(?:m\.)?tiktok\.com/h5/share/usr/(?P<id>\d+)'
- _TEST = {
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ (?:m\.)?tiktok\.com/h5/share/usr|
+ (?:www\.)?tiktok\.com/share/user
+ )
+ /(?P<id>\d+)
+ '''
+ _TESTS = [{
'url': 'https://m.tiktok.com/h5/share/usr/188294915489964032.html',
'info_dict': {
'id': '188294915489964032',
},
'playlist_mincount': 24,
- }
+ }, {
+ 'url': 'https://www.tiktok.com/share/user/188294915489964032',
+ 'only_matching': True,
+ }]
def _real_extract(self, url):
user_id = self._match_id(url)
cfg_xml = self._download_xml(
cfg_url, display_id, 'Downloading metadata',
- transform_source=fix_xml_ampersands)
+ transform_source=fix_xml_ampersands, headers={'Referer': url})
formats = []
# coding: utf-8
from __future__ import unicode_literals
-import re
+import json
-from .common import InfoExtractor
+from .radiocanada import RadioCanadaIE
+from ..compat import compat_HTTPError
from ..utils import (
+ ExtractorError,
int_or_none,
- js_to_json,
- urlencode_postdata,
- extract_attributes,
- smuggle_url,
+ merge_dicts,
)
-class TouTvIE(InfoExtractor):
+class TouTvIE(RadioCanadaIE):
_NETRC_MACHINE = 'toutv'
IE_NAME = 'tou.tv'
_VALID_URL = r'https?://ici\.tou\.tv/(?P<id>[a-zA-Z0-9_-]+(?:/S[0-9]+[EC][0-9]+)?)'
- _access_token = None
- _claims = None
_TESTS = [{
'url': 'http://ici.tou.tv/garfield-tout-court/S2015E17',
'url': 'https://ici.tou.tv/l-age-adulte/S01C501',
'only_matching': True,
}]
+ _CLIENT_KEY = '4dd36440-09d5-4468-8923-b6d91174ad36'
def _real_initialize(self):
email, password = self._get_login_info()
if email is None:
return
- state = 'http://ici.tou.tv/'
- webpage = self._download_webpage(state, None, 'Downloading homepage')
- toutvlogin = self._parse_json(self._search_regex(
- r'(?s)toutvlogin\s*=\s*({.+?});', webpage, 'toutvlogin'), None, js_to_json)
- authorize_url = toutvlogin['host'] + '/auth/oauth/v2/authorize'
- login_webpage = self._download_webpage(
- authorize_url, None, 'Downloading login page', query={
- 'client_id': toutvlogin['clientId'],
- 'redirect_uri': 'https://ici.tou.tv/login/loginCallback',
- 'response_type': 'token',
- 'scope': 'media-drmt openid profile email id.write media-validation.read.privileged',
- 'state': state,
- })
-
- def extract_form_url_and_data(wp, default_form_url, form_spec_re=''):
- form, form_elem = re.search(
- r'(?s)((<form[^>]+?%s[^>]*?>).+?</form>)' % form_spec_re, wp).groups()
- form_data = self._hidden_inputs(form)
- form_url = extract_attributes(form_elem).get('action') or default_form_url
- return form_url, form_data
-
- post_url, form_data = extract_form_url_and_data(
- login_webpage,
- 'https://services.radio-canada.ca/auth/oauth/v2/authorize/login',
- r'(?:id|name)="Form-login"')
- form_data.update({
- 'login-email': email,
- 'login-password': password,
- })
- consent_webpage = self._download_webpage(
- post_url, None, 'Logging in', data=urlencode_postdata(form_data))
- post_url, form_data = extract_form_url_and_data(
- consent_webpage,
- 'https://services.radio-canada.ca/auth/oauth/v2/authorize/consent')
- _, urlh = self._download_webpage_handle(
- post_url, None, 'Following Redirection',
- data=urlencode_postdata(form_data))
- self._access_token = self._search_regex(
- r'access_token=([\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})',
- urlh.geturl(), 'access token')
- self._claims = self._download_json(
- 'https://services.radio-canada.ca/media/validation/v2/getClaims',
- None, 'Extracting Claims', query={
- 'token': self._access_token,
- 'access_token': self._access_token,
- })['claims']
+ try:
+ self._access_token = self._download_json(
+ 'https://services.radio-canada.ca/toutv/profiling/accounts/login',
+ None, 'Logging in', data=json.dumps({
+ 'ClientId': self._CLIENT_KEY,
+ 'ClientSecret': '34026772-244b-49b6-8b06-317b30ac9a20',
+ 'Email': email,
+ 'Password': password,
+ 'Scope': 'id.write media-validation.read',
+ }).encode(), headers={
+ 'Authorization': 'client-key ' + self._CLIENT_KEY,
+ 'Content-Type': 'application/json;charset=utf-8',
+ })['access_token']
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
+ error = self._parse_json(e.cause.read().decode(), None)['Message']
+ raise ExtractorError(error, expected=True)
+ raise
+ self._claims = self._call_api('validation/v2/getClaims')['claims']
def _real_extract(self, url):
path = self._match_id(url)
- metadata = self._download_json('http://ici.tou.tv/presentation/%s' % path, path)
+ metadata = self._download_json(
+ 'https://services.radio-canada.ca/toutv/presentation/%s' % path, path, query={
+ 'client_key': self._CLIENT_KEY,
+ 'device': 'web',
+ 'version': 4,
+ })
# IsDrm does not necessarily mean the video is DRM protected (see
- # https://github.com/rg3/youtube-dl/issues/13994).
+ # https://github.com/ytdl-org/youtube-dl/issues/13994).
if metadata.get('IsDrm'):
self.report_warning('This video is probably DRM protected.', path)
video_id = metadata['IdMedia']
details = metadata['Details']
- title = details['OriginalTitle']
- video_url = 'radiocanada:%s:%s' % (metadata.get('AppCode', 'toutv'), video_id)
- if self._access_token and self._claims:
- video_url = smuggle_url(video_url, {
- 'access_token': self._access_token,
- 'claims': self._claims,
- })
- return {
- '_type': 'url_transparent',
- 'url': video_url,
+ return merge_dicts({
'id': video_id,
- 'title': title,
+ 'title': details.get('OriginalTitle'),
+ 'description': details.get('Description'),
'thumbnail': details.get('ImageUrl'),
'duration': int_or_none(details.get('LengthInSeconds')),
- }
+ 'series': metadata.get('ProgramTitle'),
+ 'season_number': int_or_none(metadata.get('SeasonNumber')),
+ 'season': metadata.get('SeasonTitle'),
+ 'episode_number': int_or_none(metadata.get('EpisodeNumber')),
+ 'episode': metadata.get('EpisodeTitle'),
+ }, self._extract_info(metadata.get('AppCode', 'toutv'), video_id))
--- /dev/null
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ dict_get,
+ float_or_none,
+ int_or_none,
+ unified_timestamp,
+ update_url_query,
+ url_or_none,
+)
+
+
+class TruNewsIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?trunews\.com/stream/(?P<id>[^/?#&]+)'
+ _TEST = {
+ 'url': 'https://www.trunews.com/stream/will-democrats-stage-a-circus-during-president-trump-s-state-of-the-union-speech',
+ 'md5': 'a19c024c3906ff954fac9b96ce66bb08',
+ 'info_dict': {
+ 'id': '5c5a21e65d3c196e1c0020cc',
+ 'display_id': 'will-democrats-stage-a-circus-during-president-trump-s-state-of-the-union-speech',
+ 'ext': 'mp4',
+ 'title': "Will Democrats Stage a Circus During President Trump's State of the Union Speech?",
+ 'description': 'md5:c583b72147cc92cf21f56a31aff7a670',
+ 'duration': 3685,
+ 'timestamp': 1549411440,
+ 'upload_date': '20190206',
+ },
+ 'add_ie': ['Zype'],
+ }
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ video = self._download_json(
+ 'https://api.zype.com/videos', display_id, query={
+ 'app_key': 'PUVKp9WgGUb3-JUw6EqafLx8tFVP6VKZTWbUOR-HOm__g4fNDt1bCsm_LgYf_k9H',
+ 'per_page': 1,
+ 'active': 'true',
+ 'friendly_title': display_id,
+ })['response'][0]
+
+ zype_id = video['_id']
+
+ thumbnails = []
+ thumbnails_list = video.get('thumbnails')
+ if isinstance(thumbnails_list, list):
+ for thumbnail in thumbnails_list:
+ if not isinstance(thumbnail, dict):
+ continue
+ thumbnail_url = url_or_none(thumbnail.get('url'))
+ if not thumbnail_url:
+ continue
+ thumbnails.append({
+ 'url': thumbnail_url,
+ 'width': int_or_none(thumbnail.get('width')),
+ 'height': int_or_none(thumbnail.get('height')),
+ })
+
+ return {
+ '_type': 'url_transparent',
+ 'url': update_url_query(
+ 'https://player.zype.com/embed/%s.js' % zype_id,
+ {'api_key': 'X5XnahkjCwJrT_l5zUqypnaLEObotyvtUKJWWlONxDoHVjP8vqxlArLV8llxMbyt'}),
+ 'ie_key': 'Zype',
+ 'id': zype_id,
+ 'display_id': display_id,
+ 'title': video.get('title'),
+ 'description': dict_get(video, ('description', 'ott_description', 'short_description')),
+ 'duration': int_or_none(video.get('duration')),
+ 'timestamp': unified_timestamp(video.get('published_at')),
+ 'average_rating': float_or_none(video.get('rating')),
+ 'view_count': int_or_none(video.get('request_count')),
+ 'thumbnails': thumbnails,
+ }
import re
from .turner import TurnerBaseIE
+from ..utils import (
+ int_or_none,
+ parse_iso8601,
+)
class TruTVIE(TurnerBaseIE):
- _VALID_URL = r'https?://(?:www\.)?trutv\.com(?:(?P<path>/shows/[^/]+/videos/[^/?#]+?)\.html|/full-episodes/[^/]+/(?P<id>\d+))'
+ _VALID_URL = r'https?://(?:www\.)?trutv\.com/(?:shows|full-episodes)/(?P<series_slug>[0-9A-Za-z-]+)/(?:videos/(?P<clip_slug>[0-9A-Za-z-]+)|(?P<id>\d+))'
_TEST = {
- 'url': 'http://www.trutv.com/shows/10-things/videos/you-wont-believe-these-sports-bets.html',
- 'md5': '2cdc844f317579fed1a7251b087ff417',
+ 'url': 'https://www.trutv.com/shows/the-carbonaro-effect/videos/sunlight-activated-flower.html',
'info_dict': {
- 'id': '/shows/10-things/videos/you-wont-believe-these-sports-bets',
+ 'id': 'f16c03beec1e84cd7d1a51f11d8fcc29124cc7f1',
'ext': 'mp4',
- 'title': 'You Won\'t Believe These Sports Bets',
- 'description': 'Jamie Lee sits down with a bookie to discuss the bizarre world of illegal sports betting.',
- 'upload_date': '20130305',
- }
+ 'title': 'Sunlight-Activated Flower',
+ 'description': "A customer is stunned when he sees Michael's sunlight-activated flower.",
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
}
def _real_extract(self, url):
- path, video_id = re.match(self._VALID_URL, url).groups()
- auth_required = False
- if path:
- data_src = 'http://www.trutv.com/video/cvp/v2/xml/content.xml?id=%s.xml' % path
+ series_slug, clip_slug, video_id = re.match(self._VALID_URL, url).groups()
+
+ if video_id:
+ path = 'episode'
+ display_id = video_id
else:
- webpage = self._download_webpage(url, video_id)
- video_id = self._search_regex(
- r"TTV\.TVE\.episodeId\s*=\s*'([^']+)';",
- webpage, 'video id', default=video_id)
- auth_required = self._search_regex(
- r'TTV\.TVE\.authRequired\s*=\s*(true|false);',
- webpage, 'auth required', default='false') == 'true'
- data_src = 'http://www.trutv.com/tveverywhere/services/cvpXML.do?titleId=' + video_id
- return self._extract_cvp_info(
- data_src, path, {
- 'secure': {
- 'media_src': 'http://androidhls-secure.cdn.turner.com/trutv/big',
- 'tokenizer_src': 'http://www.trutv.com/tveverywhere/processors/services/token_ipadAdobe.do',
- },
- }, {
+ path = 'series/clip'
+ display_id = clip_slug
+
+ data = self._download_json(
+ 'https://api.trutv.com/v2/web/%s/%s/%s' % (path, series_slug, display_id),
+ display_id)
+ video_data = data['episode'] if video_id else data['info']
+ media_id = video_data['mediaId']
+ title = video_data['title'].strip()
+
+ info = self._extract_ngtv_info(
+ media_id, {}, {
'url': url,
'site_name': 'truTV',
- 'auth_required': auth_required,
+ 'auth_required': video_data.get('isAuthRequired'),
})
+
+ thumbnails = []
+ for image in video_data.get('images', []):
+ image_url = image.get('srcUrl')
+ if not image_url:
+ continue
+ thumbnails.append({
+ 'url': image_url,
+ 'width': int_or_none(image.get('width')),
+ 'height': int_or_none(image.get('height')),
+ })
+
+ info.update({
+ 'id': media_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': video_data.get('description'),
+ 'thumbnails': thumbnails,
+ 'timestamp': parse_iso8601(video_data.get('publicationDate')),
+ 'series': video_data.get('showTitle'),
+ 'season_number': int_or_none(video_data.get('seasonNum')),
+ 'episode_number': int_or_none(video_data.get('episodeNum')),
+ })
+ return info
r'\.ism/(?:[^.]*\.(?:m3u8|mpd)|[Mm]anifest)',
'.ism/' + suffix, manifest_url))
- formats = self._extract_mpd_formats(
- url_repl('dash', '.mpd'), video_id,
- mpd_id='dash', fatal=False)
- formats.extend(self._extract_ism_formats(
- url_repl('hss', 'Manifest'),
- video_id, ism_id='mss', fatal=False))
- formats.extend(self._extract_m3u8_formats(
- url_repl('hls', '.m3u8'), video_id, 'mp4',
- 'm3u8_native', m3u8_id='hls', fatal=False))
+ def make_urls(proto, suffix):
+ urls = [url_repl(proto, suffix)]
+ hd_url = urls[0].replace('/manifest/', '/ngvod/')
+ if hd_url != urls[0]:
+ urls.append(hd_url)
+ return urls
+
+ for man_url in make_urls('dash', '.mpd'):
+ formats = self._extract_mpd_formats(
+ man_url, video_id, mpd_id='dash', fatal=False)
+ for man_url in make_urls('hss', 'Manifest'):
+ formats.extend(self._extract_ism_formats(
+ man_url, video_id, ism_id='mss', fatal=False))
+ for man_url in make_urls('hls', '.m3u8'):
+ formats.extend(self._extract_m3u8_formats(
+ man_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls',
+ fatal=False))
if formats:
break
else:
return result
-"""
+r"""
TODO: new apigw.tvnow.de based version of TVNowIE. Replace old TVNowIE with it
when api.tvnow.de is shut down. This version can't bypass premium checks though.
class TVNowIE(TVNowNewBaseIE):
# coding: utf-8
from __future__ import unicode_literals
+import itertools
import re
from .common import InfoExtractor
from ..utils import (
- determine_ext,
clean_html,
- get_element_by_attribute,
+ determine_ext,
ExtractorError,
+ get_element_by_attribute,
+ orderedSet,
)
_TESTS = [{
'url': 'https://vod.tvp.pl/video/czas-honoru,i-seria-odc-13,194536',
- 'md5': '8aa518c15e5cc32dfe8db400dc921fbb',
+ 'md5': 'a21eb0aa862f25414430f15fdfb9e76c',
'info_dict': {
'id': '194536',
'ext': 'mp4',
- 'title': 'Czas honoru, I seria – odc. 13',
- 'description': 'md5:381afa5bca72655fe94b05cfe82bf53d',
+ 'title': 'Czas honoru, odc. 13 – Władek',
+ 'description': 'md5:437f48b93558370b031740546b696e24',
},
}, {
'url': 'http://www.tvp.pl/there-can-be-anything-so-i-shortened-it/17916176',
'title': 'Wiadomości, 28.09.2017, 19:30',
'description': 'Wydanie główne codziennego serwisu informacyjnego.'
},
+ 'skip': 'HTTP Error 404: Not Found',
}, {
'url': 'http://vod.tvp.pl/seriale/obyczajowe/na-sygnale/sezon-2-27-/odc-39/17834272',
'only_matching': True,
return {
'_type': 'url_transparent',
'url': 'tvp:' + video_id,
- 'description': self._og_search_description(webpage, default=None),
- 'thumbnail': self._og_search_thumbnail(webpage),
+ 'description': self._og_search_description(
+ webpage, default=None) or self._html_search_meta(
+ 'description', webpage, default=None),
+ 'thumbnail': self._og_search_thumbnail(webpage, default=None),
'ie_key': 'TVPEmbed',
}
_VALID_URL = r'(?:tvp:|https?://[^/]+\.tvp\.(?:pl|info)/sess/tvplayer\.php\?.*?object_id=)(?P<id>\d+)'
_TESTS = [{
+ 'url': 'tvp:194536',
+ 'md5': 'a21eb0aa862f25414430f15fdfb9e76c',
+ 'info_dict': {
+ 'id': '194536',
+ 'ext': 'mp4',
+ 'title': 'Czas honoru, odc. 13 – Władek',
+ },
+ }, {
+ # not available
'url': 'http://www.tvp.pl/sess/tvplayer.php?object_id=22670268',
'md5': '8c9cd59d16edabf39331f93bf8a766c7',
'info_dict': {
'ext': 'mp4',
'title': 'Panorama, 07.12.2015, 15:40',
},
+ 'skip': 'Transmisja została zakończona lub materiał niedostępny',
}, {
'url': 'tvp:22670268',
'only_matching': True,
webpage = self._download_webpage(
'http://www.tvp.pl/sess/tvplayer.php?object_id=%s' % video_id, video_id)
- error_massage = get_element_by_attribute('class', 'msg error', webpage)
- if error_massage:
+ error = self._html_search_regex(
+ r'(?s)<p[^>]+\bclass=["\']notAvailable__text["\'][^>]*>(.+?)</p>',
+ webpage, 'error', default=None) or clean_html(
+ get_element_by_attribute('class', 'msg error', webpage))
+ if error:
raise ExtractorError('%s said: %s' % (
- self.IE_NAME, clean_html(error_massage)), expected=True)
+ self.IE_NAME, clean_html(error)), expected=True)
title = self._search_regex(
r'name\s*:\s*([\'"])Title\1\s*,\s*value\s*:\s*\1(?P<title>.+?)\1',
}
-class TVPSeriesIE(InfoExtractor):
+class TVPWebsiteIE(InfoExtractor):
IE_NAME = 'tvp:series'
- _VALID_URL = r'https?://vod\.tvp\.pl/(?:[^/]+/){2}(?P<id>[^/]+)/?$'
+ _VALID_URL = r'https?://vod\.tvp\.pl/website/(?P<display_id>[^,]+),(?P<id>\d+)'
_TESTS = [{
- 'url': 'http://vod.tvp.pl/filmy-fabularne/filmy-za-darmo/ogniem-i-mieczem',
+ # series
+ 'url': 'https://vod.tvp.pl/website/lzy-cennet,38678312/video',
'info_dict': {
- 'title': 'Ogniem i mieczem',
- 'id': '4278026',
+ 'id': '38678312',
},
- 'playlist_count': 4,
+ 'playlist_count': 115,
}, {
- 'url': 'http://vod.tvp.pl/audycje/podroze/boso-przez-swiat',
+ # film
+ 'url': 'https://vod.tvp.pl/website/gloria,35139666',
'info_dict': {
- 'title': 'Boso przez świat',
- 'id': '9329207',
+ 'id': '36637049',
+ 'ext': 'mp4',
+ 'title': 'Gloria, Gloria',
+ },
+ 'params': {
+ 'skip_download': True,
},
- 'playlist_count': 86,
+ 'add_ie': ['TVPEmbed'],
+ }, {
+ 'url': 'https://vod.tvp.pl/website/lzy-cennet,38678312',
+ 'only_matching': True,
}]
- def _real_extract(self, url):
- display_id = self._match_id(url)
- webpage = self._download_webpage(url, display_id, tries=5)
-
- title = self._html_search_regex(
- r'(?s) id=[\'"]path[\'"]>(?:.*? / ){2}(.*?)</span>', webpage, 'series')
- playlist_id = self._search_regex(r'nodeId:\s*(\d+)', webpage, 'playlist id')
- playlist = self._download_webpage(
- 'http://vod.tvp.pl/vod/seriesAjax?type=series&nodeId=%s&recommend'
- 'edId=0&sort=&page=0&pageSize=10000' % playlist_id, display_id, tries=5,
- note='Downloading playlist')
-
- videos_paths = re.findall(
- '(?s)class="shortTitle">.*?href="(/[^"]+)', playlist)
- entries = [
- self.url_result('http://vod.tvp.pl%s' % v_path, ie=TVPIE.ie_key())
- for v_path in videos_paths]
+ def _entries(self, display_id, playlist_id):
+ url = 'https://vod.tvp.pl/website/%s,%s/video' % (display_id, playlist_id)
+ for page_num in itertools.count(1):
+ page = self._download_webpage(
+ url, display_id, 'Downloading page %d' % page_num,
+ query={'page': page_num})
- return {
- '_type': 'playlist',
- 'id': playlist_id,
- 'display_id': display_id,
- 'title': title,
- 'entries': entries,
- }
+ video_ids = orderedSet(re.findall(
+ r'<a[^>]+\bhref=["\']/video/%s,[^,]+,(\d+)' % display_id,
+ page))
+
+ if not video_ids:
+ break
+
+ for video_id in video_ids:
+ yield self.url_result(
+ 'tvp:%s' % video_id, ie=TVPEmbedIE.ie_key(),
+ video_id=video_id)
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ display_id, playlist_id = mobj.group('display_id', 'id')
+ return self.playlist_result(
+ self._entries(display_id, playlist_id), playlist_id)
webpage = self._download_webpage(url, video_id)
video_id = self._search_regex(
- r'data-asset-id\s*=\s*["\'](\d{5,7})\b', webpage, 'video id',
- default=None)
+ r'data-asset-id\s*=\s*["\'](\d{5,})\b', webpage, 'video id')
- if video_id:
+ if len(video_id) < 8:
return self.url_result(
'mtg:%s' % video_id, ie=TVPlayIE.ie_key(), video_id=video_id)
r'(\d+)(?:[.\s]+sezona|\s+HOOAEG)', season or '', 'season number',
default=None))
episode = self._search_regex(
- r'(["\'])(?P<value>(?:(?!\1).)+)\1', webpage, 'episode',
- default=None, group='value')
+ (r'\bepisode\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1',
+ r'data-subtitle\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1'), webpage,
+ 'episode', default=None, group='value')
episode_number = int_or_none(self._search_regex(
r'(?:S[eē]rija|Osa)\s+(\d+)', episode or '', 'episode number',
default=None))
class TwentyFourVideoIE(InfoExtractor):
IE_NAME = '24video'
- _VALID_URL = r'https?://(?P<host>(?:www\.)?24video\.(?:net|me|xxx|sexy?|tube|adult))/(?:video/(?:view|xml)/|player/new24_play\.swf\?id=)(?P<id>\d+)'
+ _VALID_URL = r'''(?x)
+ https?://
+ (?P<host>
+ (?:(?:www|porno)\.)?24video\.
+ (?:net|me|xxx|sexy?|tube|adult|site)
+ )/
+ (?:
+ video/(?:(?:view|xml)/)?|
+ player/new24_play\.swf\?id=
+ )
+ (?P<id>\d+)
+ '''
_TESTS = [{
'url': 'http://www.24video.net/video/view/1044982',
}, {
'url': 'http://www.24video.tube/video/view/2363750',
'only_matching': True,
+ }, {
+ 'url': 'https://www.24video.site/video/view/2640421',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://porno.24video.net/video/2640421-vsya-takaya-gibkaya-i-v-masle',
+ 'only_matching': True,
}]
def _real_extract(self, url):
from __future__ import unicode_literals
from .common import InfoExtractor
+from ..utils import urlencode_postdata
import re
class TwitCastingIE(InfoExtractor):
_VALID_URL = r'https?://(?:[^/]+\.)?twitcasting\.tv/(?P<uploader_id>[^/]+)/movie/(?P<id>\d+)'
- _TEST = {
+ _TESTS = [{
'url': 'https://twitcasting.tv/ivetesangalo/movie/2357609',
'md5': '745243cad58c4681dc752490f7540d7f',
'info_dict': {
'id': '2357609',
'ext': 'mp4',
- 'title': 'Recorded Live #2357609',
+ 'title': 'Live #2357609',
'uploader_id': 'ivetesangalo',
'description': "Moi! I'm live on TwitCasting from my iPhone.",
'thumbnail': r're:^https?://.*\.jpg$',
'params': {
'skip_download': True,
},
- }
+ }, {
+ 'url': 'https://twitcasting.tv/mttbernardini/movie/3689740',
+ 'info_dict': {
+ 'id': '3689740',
+ 'ext': 'mp4',
+ 'title': 'Live playing something #3689740',
+ 'uploader_id': 'mttbernardini',
+ 'description': "I'm live on TwitCasting from my iPad. password: abc (Santa Marinella/Lazio, Italia)",
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ 'params': {
+ 'skip_download': True,
+ 'videopassword': 'abc',
+ },
+ }]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
uploader_id = mobj.group('uploader_id')
- webpage = self._download_webpage(url, video_id)
+ video_password = self._downloader.params.get('videopassword')
+ request_data = None
+ if video_password:
+ request_data = urlencode_postdata({
+ 'password': video_password,
+ })
+ webpage = self._download_webpage(url, video_id, data=request_data)
title = self._html_search_regex(
r'(?s)<[^>]+id=["\']movietitle[^>]+>(.+?)</',
def _prefer_source(self, formats):
try:
source = next(f for f in formats if f['format_id'] == 'Source')
- source['preference'] = 10
+ source['quality'] = 10
except StopIteration:
- pass # No Source stream present
+ for f in formats:
+ if '/chunked/' in f['url']:
+ f.update({
+ 'quality': 10,
+ 'format_note': 'Source',
+ })
self._sort_formats(formats)
IE_NAME = 'udemy'
_VALID_URL = r'''(?x)
https?://
- www\.udemy\.com/
+ (?:[^/]+\.)?udemy\.com/
(?:
[^#]+\#/lecture/|
lecture/view/?\?lectureId=|
# only outputs rendition
'url': 'https://www.udemy.com/how-you-can-help-your-local-community-5-amazing-examples/learn/v4/t/lecture/3225750?start=0',
'only_matching': True,
+ }, {
+ 'url': 'https://wipro.udemy.com/java-tutorial/#/lecture/172757',
+ 'only_matching': True,
}]
def _extract_course_info(self, webpage, video_id):
webpage, 'course', default='{}')),
video_id, fatal=False) or {}
course_id = course.get('id') or self._search_regex(
- r'data-course-id=["\'](\d+)', webpage, 'course id')
+ [
+ r'data-course-id=["\'](\d+)',
+ r'"courseId"\s*:\s*(\d+)'
+ ], webpage, 'course id')
return course_id, course.get('title')
def _enroll_course(self, base_url, webpage, course_id):
def _download_webpage_handle(self, *args, **kwargs):
headers = kwargs.get('headers', {}).copy()
- headers['User-Agent'] = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_5) AppleWebKit/603.2.4 (KHTML, like Gecko) Version/10.1.1 Safari/603.2.4'
+ headers['User-Agent'] = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36'
kwargs['headers'] = headers
- return super(UdemyIE, self)._download_webpage_handle(
+ ret = super(UdemyIE, self)._download_webpage_handle(
*args, **compat_kwargs(kwargs))
+ if not ret:
+ return ret
+ webpage, _ = ret
+ if any(p in webpage for p in (
+ '>Please verify you are a human',
+ 'Access to this page has been denied because we believe you are using automation tools to browse the website',
+ '"_pxCaptcha"')):
+ raise ExtractorError(
+ 'Udemy asks you to solve a CAPTCHA. Login with browser, '
+ 'solve CAPTCHA, then export cookies and pass cookie file to '
+ 'youtube-dl with --cookies.', expected=True)
+ return ret
def _download_json(self, url_or_request, *args, **kwargs):
headers = {
}, res))
# react rendition since 2017.04.15 (see
- # https://github.com/rg3/youtube-dl/issues/12744)
+ # https://github.com/ytdl-org/youtube-dl/issues/12744)
data = self._parse_json(
self._search_regex(
r'videojs-setup-data=(["\'])(?P<data>{.+?})\1', view_html,
class UdemyCourseIE(UdemyIE):
IE_NAME = 'udemy:course'
- _VALID_URL = r'https?://(?:www\.)?udemy\.com/(?P<id>[^/?#&]+)'
- _TESTS = []
+ _VALID_URL = r'https?://(?:[^/]+\.)?udemy\.com/(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://www.udemy.com/java-tutorial/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://wipro.udemy.com/java-tutorial/',
+ 'only_matching': True,
+ }]
@classmethod
def suitable(cls, url):
from __future__ import unicode_literals
from .common import InfoExtractor
+from ..utils import unified_timestamp
class URPlayIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?ur(?:play|skola)\.se/(?:program|Produkter)/(?P<id>[0-9]+)'
_TESTS = [{
- 'url': 'http://urplay.se/program/190031-tripp-trapp-trad-sovkudde',
- 'md5': 'ad5f0de86f16ca4c8062cd103959a9eb',
+ 'url': 'https://urplay.se/program/203704-ur-samtiden-livet-universum-och-rymdens-markliga-musik-om-vetenskap-kritiskt-tankande-och-motstand',
+ 'md5': 'ff5b0c89928f8083c74bbd5099c9292d',
+ 'info_dict': {
+ 'id': '203704',
+ 'ext': 'mp4',
+ 'title': 'UR Samtiden - Livet, universum och rymdens märkliga musik : Om vetenskap, kritiskt tänkande och motstånd',
+ 'description': 'md5:5344508a52aa78c1ced6c1b8b9e44e9a',
+ 'timestamp': 1513512768,
+ 'upload_date': '20171217',
+ },
+ }, {
+ 'url': 'https://urskola.se/Produkter/190031-Tripp-Trapp-Trad-Sovkudde',
'info_dict': {
'id': '190031',
'ext': 'mp4',
'title': 'Tripp, Trapp, Träd : Sovkudde',
'description': 'md5:b86bffdae04a7e9379d1d7e5947df1d1',
+ 'timestamp': 1440093600,
+ 'upload_date': '20150820',
},
}, {
'url': 'http://urskola.se/Produkter/155794-Smasagor-meankieli-Grodan-i-vida-varlden',
'title': urplayer_data['title'],
'description': self._og_search_description(webpage),
'thumbnail': urplayer_data.get('image'),
+ 'timestamp': unified_timestamp(self._html_search_meta(('uploadDate', 'schema:uploadDate'), webpage, 'timestamp')),
'series': urplayer_data.get('series_title'),
'subtitles': subtitles,
'formats': formats,
from .common import InfoExtractor
from ..utils import (
+ ExtractorError,
get_element_by_attribute,
parse_duration,
+ try_get,
update_url_query,
- ExtractorError,
)
from ..compat import compat_str
class USATodayIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?usatoday\.com/(?:[^/]+/)*(?P<id>[^?/#]+)'
- _TEST = {
+ _TESTS = [{
+ # Brightcove Partner ID = 29906170001
'url': 'http://www.usatoday.com/media/cinematic/video/81729424/us-france-warn-syrian-regime-ahead-of-new-peace-talks/',
- 'md5': '4d40974481fa3475f8bccfd20c5361f8',
+ 'md5': '033587d2529dc3411a1ab3644c3b8827',
'info_dict': {
- 'id': '81729424',
+ 'id': '4799374959001',
'ext': 'mp4',
'title': 'US, France warn Syrian regime ahead of new peace talks',
'timestamp': 1457891045,
'uploader_id': '29906170001',
'upload_date': '20160313',
}
- }
- BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/29906170001/38a9eecc-bdd8-42a3-ba14-95397e48b3f8_default/index.html?videoId=%s'
+ }, {
+ # ui-video-data[asset_metadata][items][brightcoveaccount] = 28911775001
+ 'url': 'https://www.usatoday.com/story/tech/science/2018/08/21/yellowstone-supervolcano-eruption-stop-worrying-its-blow/973633002/',
+ 'info_dict': {
+ 'id': '5824495846001',
+ 'ext': 'mp4',
+ 'title': 'Yellowstone more likely to crack rather than explode',
+ 'timestamp': 1534790612,
+ 'description': 'md5:3715e7927639a4f16b474e9391687c62',
+ 'uploader_id': '28911775001',
+ 'upload_date': '20180820',
+ }
+ }]
+ BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s'
def _real_extract(self, url):
display_id = self._match_id(url)
if not ui_video_data:
raise ExtractorError('no video on the webpage', expected=True)
video_data = self._parse_json(ui_video_data, display_id)
+ item = try_get(video_data, lambda x: x['asset_metadata']['items'], dict) or {}
return {
'_type': 'url_transparent',
- 'url': self.BRIGHTCOVE_URL_TEMPLATE % video_data['brightcove_id'],
+ 'url': self.BRIGHTCOVE_URL_TEMPLATE % (item.get('brightcoveaccount', '29906170001'), item.get('brightcoveid') or video_data['brightcove_id']),
'id': compat_str(video_data['id']),
'title': video_data['title'],
'thumbnail': video_data.get('thumbnail'),
m = re.match(self._VALID_URL, url)
video_id = m.group('id')
- # some sites use this embed format (see: https://github.com/rg3/youtube-dl/issues/2990)
+ # some sites use this embed format (see: https://github.com/ytdl-org/youtube-dl/issues/2990)
if m.group('type') == 'embed/recorded':
video_id = m.group('id')
desktop_url = 'http://www.ustream.tv/recorded/' + video_id
video_id = self._match_id(url)
# VeeHD seems to send garbage on the first request.
- # See https://github.com/rg3/youtube-dl/issues/2102
+ # See https://github.com/ytdl-org/youtube-dl/issues/2102
self._download_webpage(url, video_id, 'Requesting webpage')
webpage = self._download_webpage(url, video_id)
from __future__ import unicode_literals
-import re
-import json
-
from .common import InfoExtractor
from ..utils import (
int_or_none,
- ExtractorError,
- sanitized_Request,
+ parse_duration,
+ qualities,
)
_TESTS = [{
'url': 'http://www.veoh.com/watch/v56314296nk7Zdmz3',
- 'md5': '620e68e6a3cff80086df3348426c9ca3',
+ 'md5': '9e7ecc0fd8bbee7a69fe38953aeebd30',
'info_dict': {
- 'id': '56314296',
+ 'id': 'v56314296nk7Zdmz3',
'ext': 'mp4',
'title': 'Straight Backs Are Stronger',
'uploader': 'LUMOback',
'only_matching': True,
}]
- def _extract_formats(self, source):
- formats = []
- link = source.get('aowPermalink')
- if link:
- formats.append({
- 'url': link,
- 'ext': 'mp4',
- 'format_id': 'aow',
- })
- link = source.get('fullPreviewHashLowPath')
- if link:
- formats.append({
- 'url': link,
- 'format_id': 'low',
- })
- link = source.get('fullPreviewHashHighPath')
- if link:
- formats.append({
- 'url': link,
- 'format_id': 'high',
- })
- return formats
-
def _extract_video(self, source):
return {
'id': source.get('videoId'),
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
-
- if video_id.startswith('v'):
- rsp = self._download_xml(
- r'http://www.veoh.com/api/findByPermalink?permalink=%s' % video_id, video_id, 'Downloading video XML')
- stat = rsp.get('stat')
- if stat == 'ok':
- return self._extract_video(rsp.find('./videoList/video'))
- elif stat == 'fail':
- raise ExtractorError(
- '%s said: %s' % (self.IE_NAME, rsp.find('./errorList/error').get('errorMessage')), expected=True)
-
- webpage = self._download_webpage(url, video_id)
- age_limit = 0
- if 'class="adultwarning-container"' in webpage:
- self.report_age_confirmation()
- age_limit = 18
- request = sanitized_Request(url)
- request.add_header('Cookie', 'confirmedAdult=true')
- webpage = self._download_webpage(request, video_id)
+ video_id = self._match_id(url)
+ video = self._download_json(
+ 'https://www.veoh.com/watch/getVideo/' + video_id,
+ video_id)['video']
+ title = video['title']
- m_youtube = re.search(r'http://www\.youtube\.com/v/(.*?)(\&|"|\?)', webpage)
- if m_youtube is not None:
- youtube_id = m_youtube.group(1)
- self.to_screen('%s: detected Youtube video.' % video_id)
- return self.url_result(youtube_id, 'Youtube')
-
- info = json.loads(
- self._search_regex(r'videoDetailsJSON = \'({.*?})\';', webpage, 'info').replace('\\\'', '\''))
-
- video = self._extract_video(info)
- video['age_limit'] = age_limit
+ thumbnail_url = None
+ q = qualities(['HQ', 'Regular'])
+ formats = []
+ for f_id, f_url in video.get('src', {}).items():
+ if not f_url:
+ continue
+ if f_id == 'poster':
+ thumbnail_url = f_url
+ else:
+ formats.append({
+ 'format_id': f_id,
+ 'quality': q(f_id),
+ 'url': f_url,
+ })
+ self._sort_formats(formats)
- return video
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': video.get('description'),
+ 'thumbnail': thumbnail_url,
+ 'uploader': video.get('author', {}).get('nickname'),
+ 'duration': int_or_none(video.get('lengthBySec')) or parse_duration(video.get('length')),
+ 'view_count': int_or_none(video.get('views')),
+ 'formats': formats,
+ 'average_rating': int_or_none(video.get('rating')),
+ 'comment_count': int_or_none(video.get('numOfComments')),
+ }
fatal=False)
# Some videos are only available via webpage (e.g.
- # https://github.com/rg3/youtube-dl/issues/9366)
+ # https://github.com/ytdl-org/youtube-dl/issues/9366)
if not video_versions:
webpage = self._download_webpage(url, video_id)
json_data = self._extract_json(webpage, video_id)
genres = video_info.get('genres')
genre = (
- genres[0] if genres and isinstance(genres, list) and
- isinstance(genres[0], compat_str) else None)
+ genres[0] if genres and isinstance(genres, list)
+ and isinstance(genres[0], compat_str) else None)
is_explicit = video_info.get('isExplicit')
if is_explicit is True:
'url': 'https://www.viceland.com/en_us/video/thursday-march-1-2018/5a8f2d7ff1cdb332dd446ec1',
'only_matching': True,
}]
- _PREPLAY_HOST = 'vms.vice'
@staticmethod
def _extract_urls(webpage):
})
try:
- host = 'www.viceland' if is_locked else self._PREPLAY_HOST
preplay = self._download_json(
- 'https://%s.com/%s/video/preplay/%s' % (host, locale, video_id),
+ 'https://vms.vice.com/%s/video/preplay/%s' % (locale, video_id),
video_id, query=query)
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code in (400, 401):
from __future__ import unicode_literals
+import re
+
from .common import InfoExtractor
-from ..compat import (
- compat_urllib_parse_urlencode,
- compat_urlparse,
-)
from ..utils import (
float_or_none,
int_or_none,
- sanitized_Request,
)
class ViddlerIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?viddler\.com/(?:v|embed|player)/(?P<id>[a-z0-9]+)'
+ _VALID_URL = r'https?://(?:www\.)?viddler\.com/(?:v|embed|player)/(?P<id>[a-z0-9]+)(?:.+?\bsecret=(\d+))?'
_TESTS = [{
'url': 'http://www.viddler.com/v/43903784',
'md5': '9eee21161d2c7f5b39690c3e325fab2f',
}]
def _real_extract(self, url):
- video_id = self._match_id(url)
+ video_id, secret = re.match(self._VALID_URL, url).groups()
query = {
'video_id': video_id,
'key': 'v0vhrt7bg2xq1vyxhkct',
}
-
- qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
- secret = qs.get('secret', [None])[0]
if secret:
query['secret'] = secret
- headers = {'Referer': 'http://static.cdn-ec.viddler.com/js/arpeggio/v2/embed.html'}
- request = sanitized_Request(
- 'http://api.viddler.com/api/v2/viddler.videos.getPlaybackDetails.json?%s'
- % compat_urllib_parse_urlencode(query), None, headers)
- data = self._download_json(request, video_id)['video']
+ data = self._download_json(
+ 'http://api.viddler.com/api/v2/viddler.videos.getPlaybackDetails.json',
+ video_id, headers={'Referer': url}, query=query)['video']
formats = []
for filed in data['files']:
+++ /dev/null
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..utils import (
- decode_packed_codes,
- sanitized_Request,
-)
-
-
-class VideoMegaIE(InfoExtractor):
- _VALID_URL = r'(?:videomega:|https?://(?:www\.)?videomega\.tv/(?:(?:view|iframe|cdn)\.php)?\?ref=)(?P<id>[A-Za-z0-9]+)'
- _TESTS = [{
- 'url': 'http://videomega.tv/cdn.php?ref=AOSQBJYKIDDIKYJBQSOA',
- 'md5': 'cc1920a58add3f05c6a93285b84fb3aa',
- 'info_dict': {
- 'id': 'AOSQBJYKIDDIKYJBQSOA',
- 'ext': 'mp4',
- 'title': '1254207',
- 'thumbnail': r're:^https?://.*\.jpg$',
- }
- }, {
- 'url': 'http://videomega.tv/cdn.php?ref=AOSQBJYKIDDIKYJBQSOA&width=1070&height=600',
- 'only_matching': True,
- }, {
- 'url': 'http://videomega.tv/view.php?ref=090051111052065112106089103052052103089106112065052111051090',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- iframe_url = 'http://videomega.tv/cdn.php?ref=%s' % video_id
- req = sanitized_Request(iframe_url)
- req.add_header('Referer', url)
- req.add_header('Cookie', 'noadvtday=0')
- webpage = self._download_webpage(req, video_id)
-
- title = self._html_search_regex(
- r'<title>(.+?)</title>', webpage, 'title')
- title = re.sub(
- r'(?:^[Vv]ideo[Mm]ega\.tv\s-\s*|\s*-\svideomega\.tv$)', '', title)
- thumbnail = self._search_regex(
- r'<video[^>]+?poster="([^"]+)"', webpage, 'thumbnail', fatal=False)
-
- real_codes = decode_packed_codes(webpage)
- video_url = self._search_regex(
- r'"src"\s*,\s*"([^"]+)"', real_codes, 'video URL')
-
- return {
- 'id': video_id,
- 'title': title,
- 'url': video_url,
- 'thumbnail': thumbnail,
- 'http_headers': {
- 'Referer': iframe_url,
- },
- }
import re
from .common import InfoExtractor
+from ..compat import compat_str
from ..utils import (
int_or_none,
+ orderedSet,
+ parse_duration,
+ str_or_none,
+ unified_strdate,
+ url_or_none,
xpath_element,
xpath_text,
)
class VideomoreIE(InfoExtractor):
IE_NAME = 'videomore'
- _VALID_URL = r'videomore:(?P<sid>\d+)$|https?://videomore\.ru/(?:(?:embed|[^/]+/[^/]+)/|[^/]+\?.*\btrack_id=)(?P<id>\d+)(?:[/?#&]|\.(?:xml|json)|$)'
+ _VALID_URL = r'''(?x)
+ videomore:(?P<sid>\d+)$|
+ https?://(?:player\.)?videomore\.ru/
+ (?:
+ (?:
+ embed|
+ [^/]+/[^/]+
+ )/|
+ [^/]*\?.*?\btrack_id=
+ )
+ (?P<id>\d+)
+ (?:[/?#&]|\.(?:xml|json)|$)
+ '''
_TESTS = [{
'url': 'http://videomore.ru/kino_v_detalayah/5_sezon/367617',
'md5': '44455a346edc0d509ac5b5a5b531dc35',
}, {
'url': 'videomore:367617',
'only_matching': True,
+ }, {
+ 'url': 'https://player.videomore.ru/?partner_id=97&track_id=736234&autoplay=0&userToken=',
+ 'only_matching': True,
}]
@staticmethod
class VideomoreVideoIE(InfoExtractor):
IE_NAME = 'videomore:video'
- _VALID_URL = r'https?://videomore\.ru/(?:(?:[^/]+/){2})?(?P<id>[^/?#&]+)[/?#&]*$'
+ _VALID_URL = r'https?://videomore\.ru/(?:(?:[^/]+/){2})?(?P<id>[^/?#&]+)(?:/*|[?#&].*?)$'
_TESTS = [{
# single video with og:video:iframe
'url': 'http://videomore.ru/elki_3',
'params': {
'skip_download': True,
},
+ }, {
+ 'url': 'https://videomore.ru/molodezhka/6_sezon/29_seriya?utm_so',
+ 'only_matching': True,
}]
@classmethod
r'track-id=["\'](\d+)',
r'xcnt_product_id\s*=\s*(\d+)'), webpage, 'video id')
video_url = 'videomore:%s' % video_id
+ else:
+ video_id = None
- return self.url_result(video_url, VideomoreIE.ie_key())
+ return self.url_result(
+ video_url, ie=VideomoreIE.ie_key(), video_id=video_id)
class VideomoreSeasonIE(InfoExtractor):
IE_NAME = 'videomore:season'
- _VALID_URL = r'https?://videomore\.ru/(?!embed)(?P<id>[^/]+/[^/?#&]+)[/?#&]*$'
+ _VALID_URL = r'https?://videomore\.ru/(?!embed)(?P<id>[^/]+/[^/?#&]+)(?:/*|[?#&].*?)$'
_TESTS = [{
'url': 'http://videomore.ru/molodezhka/sezon_promo',
'info_dict': {
'title': 'Молодежка Промо',
},
'playlist_mincount': 12,
+ }, {
+ 'url': 'http://videomore.ru/molodezhka/sezon_promo?utm_so',
+ 'only_matching': True,
}]
+ @classmethod
+ def suitable(cls, url):
+ return (False if (VideomoreIE.suitable(url) or VideomoreVideoIE.suitable(url))
+ else super(VideomoreSeasonIE, cls).suitable(url))
+
def _real_extract(self, url):
display_id = self._match_id(url)
title = self._og_search_title(webpage)
- entries = [
- self.url_result(item) for item in re.findall(
- r'<a[^>]+href="((?:https?:)?//videomore\.ru/%s/[^/]+)"[^>]+class="widget-item-desc"'
- % display_id, webpage)]
+ data = self._parse_json(
+ self._html_search_regex(
+ r'\bclass=["\']seasons-tracks["\'][^>]+\bdata-custom-data=(["\'])(?P<value>{.+?})\1',
+ webpage, 'data', default='{}', group='value'),
+ display_id, fatal=False)
+
+ entries = []
+
+ if data:
+ episodes = data.get('episodes')
+ if isinstance(episodes, list):
+ for ep in episodes:
+ if not isinstance(ep, dict):
+ continue
+ ep_id = int_or_none(ep.get('id'))
+ ep_url = url_or_none(ep.get('url'))
+ if ep_id:
+ e = {
+ 'url': 'videomore:%s' % ep_id,
+ 'id': compat_str(ep_id),
+ }
+ elif ep_url:
+ e = {'url': ep_url}
+ else:
+ continue
+ e.update({
+ '_type': 'url',
+ 'ie_key': VideomoreIE.ie_key(),
+ 'title': str_or_none(ep.get('title')),
+ 'thumbnail': url_or_none(ep.get('image')),
+ 'duration': parse_duration(ep.get('duration')),
+ 'episode_number': int_or_none(ep.get('number')),
+ 'upload_date': unified_strdate(ep.get('date')),
+ })
+ entries.append(e)
+
+ if not entries:
+ entries = [
+ self.url_result(
+ 'videomore:%s' % video_id, ie=VideomoreIE.ie_key(),
+ video_id=video_id)
+ for video_id in orderedSet(re.findall(
+ r':(?:id|key)=["\'](\d+)["\']', webpage))]
+
+ if not entries:
+ entries = [
+ self.url_result(item) for item in re.findall(
+ r'<a[^>]+href="((?:https?:)?//videomore\.ru/%s/[^/]+)"[^>]+class="widget-item-desc"'
+ % display_id, webpage)]
return self.playlist_result(entries, display_id, title)
class VikiBaseIE(InfoExtractor):
_VALID_URL_BASE = r'https?://(?:www\.)?viki\.(?:com|net|mx|jp|fr)/'
_API_QUERY_TEMPLATE = '/v4/%sapp=%s&t=%s&site=www.viki.com'
- _API_URL_TEMPLATE = 'http://api.viki.io%s&sig=%s'
+ _API_URL_TEMPLATE = 'https://api.viki.io%s&sig=%s'
_APP = '100005a'
_APP_VERSION = '2.2.5.1428709186'
for video in page['response']:
video_id = video['id']
entries.append(self.url_result(
- 'http://www.viki.com/videos/%s' % video_id, 'Viki'))
+ 'https://www.viki.com/videos/%s' % video_id, 'Viki'))
if not page['pagination']['next']:
break
# coding: utf-8
from __future__ import unicode_literals
+import base64
import json
import re
import itertools
def _parse_config(self, config, video_id):
video_data = config['video']
- # Extract title
video_title = video_data['title']
-
- # Extract uploader, uploader_url and uploader_id
- video_uploader = video_data.get('owner', {}).get('name')
- video_uploader_url = video_data.get('owner', {}).get('url')
- video_uploader_id = video_uploader_url.split('/')[-1] if video_uploader_url else None
-
- # Extract video thumbnail
- video_thumbnail = video_data.get('thumbnail')
- if video_thumbnail is None:
- video_thumbs = video_data.get('thumbs')
- if video_thumbs and isinstance(video_thumbs, dict):
- _, video_thumbnail = sorted((int(width if width.isdigit() else 0), t_url) for (width, t_url) in video_thumbs.items())[-1]
-
- # Extract video duration
- video_duration = int_or_none(video_data.get('duration'))
+ live_event = video_data.get('live_event') or {}
+ is_live = live_event.get('status') == 'started'
formats = []
config_files = video_data.get('files') or config['request'].get('files', {})
'tbr': int_or_none(f.get('bitrate')),
})
+ # TODO: fix handling of 308 status code returned for live archive manifest requests
for files_type in ('hls', 'dash'):
for cdn_name, cdn_data in config_files.get(files_type, {}).get('cdns', {}).items():
manifest_url = cdn_data.get('url')
if files_type == 'hls':
formats.extend(self._extract_m3u8_formats(
manifest_url, video_id, 'mp4',
- 'm3u8_native', m3u8_id=format_id,
+ 'm3u8' if is_live else 'm3u8_native', m3u8_id=format_id,
note='Downloading %s m3u8 information' % cdn_name,
fatal=False))
elif files_type == 'dash':
else:
mpd_manifest_urls = [(format_id, manifest_url)]
for f_id, m_url in mpd_manifest_urls:
+ if 'json=1' in m_url:
+ real_m_url = (self._download_json(m_url, video_id, fatal=False) or {}).get('url')
+ if real_m_url:
+ m_url = real_m_url
mpd_formats = self._extract_mpd_formats(
m_url.replace('/master.json', '/master.mpd'), video_id, f_id,
'Downloading %s MPD information' % cdn_name,
f['preference'] = -40
formats.extend(mpd_formats)
+ live_archive = live_event.get('archive') or {}
+ live_archive_source_url = live_archive.get('source_url')
+ if live_archive_source_url and live_archive.get('status') == 'done':
+ formats.append({
+ 'format_id': 'live-archive-source',
+ 'url': live_archive_source_url,
+ 'preference': 1,
+ })
+
subtitles = {}
text_tracks = config['request'].get('text_tracks')
if text_tracks:
'url': 'https://vimeo.com' + tt['url'],
}]
+ thumbnails = []
+ if not is_live:
+ for key, thumb in video_data.get('thumbs', {}).items():
+ thumbnails.append({
+ 'id': key,
+ 'width': int_or_none(key),
+ 'url': thumb,
+ })
+ thumbnail = video_data.get('thumbnail')
+ if thumbnail:
+ thumbnails.append({
+ 'url': thumbnail,
+ })
+
+ owner = video_data.get('owner') or {}
+ video_uploader_url = owner.get('url')
+
return {
- 'title': video_title,
- 'uploader': video_uploader,
- 'uploader_id': video_uploader_id,
+ 'title': self._live_title(video_title) if is_live else video_title,
+ 'uploader': owner.get('name'),
+ 'uploader_id': video_uploader_url.split('/')[-1] if video_uploader_url else None,
'uploader_url': video_uploader_url,
- 'thumbnail': video_thumbnail,
- 'duration': video_duration,
+ 'thumbnails': thumbnails,
+ 'duration': int_or_none(video_data.get('duration')),
'formats': formats,
'subtitles': subtitles,
+ 'is_live': is_live,
}
+ def _extract_original_format(self, url, video_id):
+ download_data = self._download_json(
+ url, video_id, fatal=False,
+ query={'action': 'load_download_config'},
+ headers={'X-Requested-With': 'XMLHttpRequest'})
+ if download_data:
+ source_file = download_data.get('source_file')
+ if isinstance(source_file, dict):
+ download_url = source_file.get('download_url')
+ if download_url and not source_file.get('is_cold') and not source_file.get('is_defrosting'):
+ source_name = source_file.get('public_name', 'Original')
+ if self._is_valid_url(download_url, video_id, '%s video' % source_name):
+ ext = (try_get(
+ source_file, lambda x: x['extension'],
+ compat_str) or determine_ext(
+ download_url, None) or 'mp4').lower()
+ return {
+ 'url': download_url,
+ 'ext': ext,
+ 'width': int_or_none(source_file.get('width')),
+ 'height': int_or_none(source_file.get('height')),
+ 'filesize': parse_filesize(source_file.get('size')),
+ 'format_id': source_name,
+ 'preference': 1,
+ }
+
class VimeoIE(VimeoBaseInfoExtractor):
"""Information extractor for vimeo.com."""
'skip_download': True,
},
},
+ {
+ 'url': 'http://player.vimeo.com/video/68375962',
+ 'md5': 'aaf896bdb7ddd6476df50007a0ac0ae7',
+ 'info_dict': {
+ 'id': '68375962',
+ 'ext': 'mp4',
+ 'title': 'youtube-dl password protected test video',
+ 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/user18948128',
+ 'uploader_id': 'user18948128',
+ 'uploader': 'Jaime Marquínez Ferrándiz',
+ 'duration': 10,
+ },
+ 'params': {
+ 'videopassword': 'youtube-dl',
+ },
+ },
{
'url': 'http://vimeo.com/moogaloop.swf?clip_id=2539741',
'only_matching': True,
'url': 'https://vimeo.com/160743502/abd0e13fb4',
'only_matching': True,
}
+ # https://gettingthingsdone.com/workflowmap/
+ # vimeo embed with check-password page protected by Referer header
]
@staticmethod
urls = VimeoIE._extract_urls(url, webpage)
return urls[0] if urls else None
- def _verify_player_video_password(self, url, video_id):
+ def _verify_player_video_password(self, url, video_id, headers):
password = self._downloader.params.get('videopassword')
if password is None:
raise ExtractorError('This video is protected by a password, use the --video-password option')
- data = urlencode_postdata({'password': password})
- pass_url = url + '/check-password'
- password_request = sanitized_Request(pass_url, data)
- password_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
- password_request.add_header('Referer', url)
- return self._download_json(
- password_request, video_id,
- 'Verifying the password', 'Wrong password')
+ data = urlencode_postdata({
+ 'password': base64.b64encode(password.encode()),
+ })
+ headers = merge_dicts(headers, {
+ 'Content-Type': 'application/x-www-form-urlencoded',
+ })
+ checked = self._download_json(
+ url + '/check-password', video_id,
+ 'Verifying the password', data=data, headers=headers)
+ if checked is False:
+ raise ExtractorError('Wrong video password', expected=True)
+ return checked
def _real_initialize(self):
self._login()
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
orig_url = url
- if mobj.group('pro') or mobj.group('player'):
+ if mobj.group('pro'):
+ # some videos require portfolio_id to be present in player url
+ # https://github.com/ytdl-org/youtube-dl/issues/20070
+ url = self._extract_url(url, self._download_webpage(url, video_id))
+ elif mobj.group('player'):
url = 'https://player.vimeo.com/video/' + video_id
elif any(p in url for p in ('play_redirect_hls', 'moogaloop.swf')):
url = 'https://vimeo.com/' + video_id
if not config_url:
# Sometimes new react-based page is served instead of old one that require
# different config URL extraction approach (see
- # https://github.com/rg3/youtube-dl/pull/7209)
+ # https://github.com/ytdl-org/youtube-dl/pull/7209)
vimeo_clip_page_config = self._search_regex(
r'vimeo\.clip_page_config\s*=\s*({.+?});', webpage,
'vimeo clip page config')
cause=e)
else:
if config.get('view') == 4:
- config = self._verify_player_video_password(redirect_url, video_id)
+ config = self._verify_player_video_password(redirect_url, video_id, headers)
vod = config.get('video', {}).get('vod', {})
comment_count = None
formats = []
- download_request = sanitized_Request('https://vimeo.com/%s?action=load_download_config' % video_id, headers={
- 'X-Requested-With': 'XMLHttpRequest'})
- download_data = self._download_json(download_request, video_id, fatal=False)
- if download_data:
- source_file = download_data.get('source_file')
- if isinstance(source_file, dict):
- download_url = source_file.get('download_url')
- if download_url and not source_file.get('is_cold') and not source_file.get('is_defrosting'):
- source_name = source_file.get('public_name', 'Original')
- if self._is_valid_url(download_url, video_id, '%s video' % source_name):
- ext = (try_get(
- source_file, lambda x: x['extension'],
- compat_str) or determine_ext(
- download_url, None) or 'mp4').lower()
- formats.append({
- 'url': download_url,
- 'ext': ext,
- 'width': int_or_none(source_file.get('width')),
- 'height': int_or_none(source_file.get('height')),
- 'filesize': parse_filesize(source_file.get('size')),
- 'format_id': source_name,
- 'preference': 1,
- })
+
+ source_format = self._extract_original_format(
+ 'https://vimeo.com/' + video_id, video_id)
+ if source_format:
+ formats.append(source_format)
info_dict_config = self._parse_config(config, video_id)
formats.extend(info_dict_config['formats'])
class VimeoReviewIE(VimeoBaseInfoExtractor):
IE_NAME = 'vimeo:review'
IE_DESC = 'Review pages on vimeo'
- _VALID_URL = r'https://vimeo\.com/[^/]+/review/(?P<id>[^/]+)'
+ _VALID_URL = r'(?P<url>https://vimeo\.com/[^/]+/review/(?P<id>[^/]+)/[0-9a-f]{10})'
_TESTS = [{
'url': 'https://vimeo.com/user21297594/review/75524534/3c257a1b5d',
'md5': 'c507a72f780cacc12b2248bb4006d253',
data = self._parse_json(self._search_regex(
r'window\s*=\s*_extend\(window,\s*({.+?})\);', webpage, 'data',
default=NO_DEFAULT if video_password_verified else '{}'), video_id)
- config_url = data.get('vimeo_esi', {}).get('config', {}).get('configUrl')
+ config = data.get('vimeo_esi', {}).get('config', {})
+ config_url = config.get('configUrl') or try_get(config, lambda x: x['clipData']['configUrl'])
if config_url is None:
self._verify_video_password(webpage_url, video_id, webpage)
config_url = self._get_config_url(
return config_url
def _real_extract(self, url):
- video_id = self._match_id(url)
+ page_url, video_id = re.match(self._VALID_URL, url).groups()
config_url = self._get_config_url(url, video_id)
config = self._download_json(config_url, video_id)
info_dict = self._parse_config(config, video_id)
+ source_format = self._extract_original_format(page_url, video_id)
+ if source_format:
+ info_dict['formats'].append(source_format)
self._vimeo_sort_formats(info_dict['formats'])
info_dict['id'] = video_id
return info_dict
import collections
import re
-import sys
from .common import InfoExtractor
-from ..compat import (
- compat_str,
- compat_urlparse,
-)
+from ..compat import compat_urlparse
from ..utils import (
clean_html,
ExtractorError,
'pass': password.encode('cp1251'),
})
- # https://new.vk.com/ serves two same remixlhk cookies in Set-Cookie header
- # and expects the first one to be set rather than second (see
- # https://github.com/rg3/youtube-dl/issues/9841#issuecomment-227871201).
- # As of RFC6265 the newer one cookie should be set into cookie store
- # what actually happens.
- # We will workaround this VK issue by resetting the remixlhk cookie to
- # the first one manually.
- for header, cookies in url_handle.headers.items():
- if header.lower() != 'set-cookie':
- continue
- if sys.version_info[0] >= 3:
- cookies = cookies.encode('iso-8859-1')
- cookies = cookies.decode('utf-8')
- remixlhk = re.search(r'remixlhk=(.+?);.*?\bdomain=(.+?)(?:[,;]|$)', cookies)
- if remixlhk:
- value, domain = remixlhk.groups()
- self._set_cookie(domain, 'remixlhk', value)
- break
+ # vk serves two same remixlhk cookies in Set-Cookie header and expects
+ # first one to be actually set
+ self._apply_first_set_cookie_header(url_handle, 'remixlhk')
login_page = self._download_webpage(
'https://login.vk.com/?act=login', None,
'url': 'http://vk.com/videos-77521?z=video-77521_162222515%2Fclub77521',
'md5': '7babad3b85ea2e91948005b1b8b0cb84',
'info_dict': {
- 'id': '162222515',
+ 'id': '-77521_162222515',
'ext': 'mp4',
'title': 'ProtivoGunz - Хуёвая песня',
'uploader': 're:(?:Noize MC|Alexander Ilyashenko).*',
'url': 'http://vk.com/video205387401_165548505',
'md5': '6c0aeb2e90396ba97035b9cbde548700',
'info_dict': {
- 'id': '165548505',
+ 'id': '205387401_165548505',
'ext': 'mp4',
'title': 'No name',
'uploader': 'Tom Cruise',
'url': 'http://vk.com/video_ext.php?oid=32194266&id=162925554&hash=7d8c2e0d5e05aeaa&hd=1',
'md5': 'c7ce8f1f87bec05b3de07fdeafe21a0a',
'info_dict': {
- 'id': '162925554',
+ 'id': '32194266_162925554',
'ext': 'mp4',
'uploader': 'Vladimir Gavrin',
'title': 'Lin Dan',
'md5': 'a590bcaf3d543576c9bd162812387666',
'note': 'Only available for registered users',
'info_dict': {
- 'id': '164049491',
+ 'id': '-8871596_164049491',
'ext': 'mp4',
'uploader': 'Триллеры',
'title': '► Бойцовский клуб / Fight Club 1999 [HD 720]',
'url': 'http://vk.com/hd_kino_mania?z=video-43215063_168067957%2F15c66b9b533119788d',
'md5': '4d7a5ef8cf114dfa09577e57b2993202',
'info_dict': {
- 'id': '168067957',
+ 'id': '-43215063_168067957',
'ext': 'mp4',
'uploader': 'Киномания - лучшее из мира кино',
'title': ' ',
'md5': '0c45586baa71b7cb1d0784ee3f4e00a6',
'note': 'ivi.ru embed',
'info_dict': {
- 'id': '60690',
+ 'id': '-43215063_169084319',
'ext': 'mp4',
'title': 'Книга Илая',
'duration': 6771,
'url': 'https://vk.com/video30481095_171201961?list=8764ae2d21f14088d4',
'md5': '091287af5402239a1051c37ec7b92913',
'info_dict': {
- 'id': '171201961',
+ 'id': '30481095_171201961',
'ext': 'mp4',
'title': 'ТюменцевВВ_09.07.2015',
'uploader': 'Anton Ivanov',
'url': 'https://vk.com/video276849682_170681728',
'info_dict': {
'id': 'V3K4mi0SYkc',
- 'ext': 'webm',
+ 'ext': 'mp4',
'title': "DSWD Awards 'Children's Joy Foundation, Inc.' Certificate of Registration and License to Operate",
'description': 'md5:bf9c26cfa4acdfb146362682edd3827a',
- 'duration': 179,
+ 'duration': 178,
'upload_date': '20130116',
'uploader': "Children's Joy Foundation Inc.",
'uploader_id': 'thecjf',
'url': 'http://vk.com/video-110305615_171782105',
'md5': 'e13fcda136f99764872e739d13fac1d1',
'info_dict': {
- 'id': '171782105',
+ 'id': '-110305615_171782105',
'ext': 'mp4',
'title': 'S-Dance, репетиции к The way show',
'uploader': 'THE WAY SHOW | 17 апреля',
{
# finished live stream, postlive_mp4
'url': 'https://vk.com/videos-387766?z=video-387766_456242764%2Fpl_-387766_-2',
- 'md5': '90d22d051fccbbe9becfccc615be6791',
'info_dict': {
- 'id': '456242764',
+ 'id': '-387766_456242764',
'ext': 'mp4',
- 'title': 'ИгроМир 2016 — день 1',
+ 'title': 'ИгроМир 2016 День 1 — Игромания Утром',
'uploader': 'Игромания',
'duration': 5239,
- 'view_count': int,
+ # TODO: use act=show to extract view_count
+ # 'view_count': int,
+ 'upload_date': '20160929',
+ 'uploader_id': '-387766',
+ 'timestamp': 1475137527,
},
},
{
format_url = url_or_none(format_url)
if not format_url or not format_url.startswith(('http', '//', 'rtmp')):
continue
- if (format_id.startswith(('url', 'cache')) or
- format_id in ('extra_data', 'live_mp4', 'postlive_mp4')):
+ if (format_id.startswith(('url', 'cache'))
+ or format_id in ('extra_data', 'live_mp4', 'postlive_mp4')):
height = int_or_none(self._search_regex(
r'^(?:url|cache)(\d+)', format_id, 'height', default=None))
formats.append({
self._sort_formats(formats)
return {
- 'id': compat_str(data.get('vid') or video_id),
+ 'id': video_id,
'formats': formats,
'title': title,
'thumbnail': data.get('jpg'),
class VLiveIE(InfoExtractor):
IE_NAME = 'vlive'
_VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/video/(?P<id>[0-9]+)'
+ _NETRC_MACHINE = 'vlive'
_TESTS = [{
'url': 'http://www.vlive.tv/video/1326',
'md5': 'cc7314812855ce56de70a06a27314983',
'params': {
'skip_download': True,
},
+ }, {
+ 'url': 'https://www.vlive.tv/video/129100',
+ 'md5': 'ca2569453b79d66e5b919e5d308bff6b',
+ 'info_dict': {
+ 'id': '129100',
+ 'ext': 'mp4',
+ 'title': '[V LIVE] [BTS+] Run BTS! 2019 - EP.71 :: Behind the scene',
+ 'creator': 'BTS+',
+ 'view_count': int,
+ 'subtitles': 'mincount:10',
+ },
+ 'skip': 'This video is only available for CH+ subscribers',
}]
@classmethod
def suitable(cls, url):
return False if VLivePlaylistIE.suitable(url) else super(VLiveIE, cls).suitable(url)
+ def _real_initialize(self):
+ self._login()
+
+ def _login(self):
+ email, password = self._get_login_info()
+ if None in (email, password):
+ return
+
+ def is_logged_in():
+ login_info = self._download_json(
+ 'https://www.vlive.tv/auth/loginInfo', None,
+ note='Downloading login info',
+ headers={'Referer': 'https://www.vlive.tv/home'})
+ return try_get(
+ login_info, lambda x: x['message']['login'], bool) or False
+
+ LOGIN_URL = 'https://www.vlive.tv/auth/email/login'
+ self._request_webpage(
+ LOGIN_URL, None, note='Downloading login cookies')
+
+ self._download_webpage(
+ LOGIN_URL, None, note='Logging in',
+ data=urlencode_postdata({'email': email, 'pwd': password}),
+ headers={
+ 'Referer': LOGIN_URL,
+ 'Content-Type': 'application/x-www-form-urlencoded'
+ })
+
+ if not is_logged_in():
+ raise ExtractorError('Unable to log in', expected=True)
+
def _real_extract(self, url):
video_id = self._match_id(url)
if status in ('LIVE_ON_AIR', 'BIG_EVENT_ON_AIR'):
return self._live(video_id, webpage)
elif status in ('VOD_ON_AIR', 'BIG_EVENT_INTRO'):
- if long_video_id and key:
- return self._replay(video_id, webpage, long_video_id, key)
- else:
- status = 'COMING_SOON'
+ return self._replay(video_id, webpage, long_video_id, key)
if status == 'LIVE_END':
raise ExtractorError('Uploading for replay. Please wait...',
raise ExtractorError('We are sorry, '
'but the live broadcast has been canceled.',
expected=True)
+ elif status == 'ONLY_APP':
+ raise ExtractorError('Unsupported video type', expected=True)
else:
raise ExtractorError('Unknown status %s' % status)
def _get_common_fields(self, webpage):
title = self._og_search_title(webpage)
creator = self._html_search_regex(
- r'<div[^>]+class="info_area"[^>]*>\s*<a\s+[^>]*>([^<]+)',
+ r'<div[^>]+class="info_area"[^>]*>\s*(?:<em[^>]*>.*?</em\s*>\s*)?<a\s+[^>]*>([^<]+)',
webpage, 'creator', fatal=False)
thumbnail = self._og_search_thumbnail(webpage)
return {
}
def _live(self, video_id, webpage):
- init_page = self._download_webpage(
- 'https://www.vlive.tv/video/init/view',
- video_id, note='Downloading live webpage',
- data=urlencode_postdata({'videoSeq': video_id}),
- headers={
- 'Referer': 'https://www.vlive.tv/video/%s' % video_id,
- 'Content-Type': 'application/x-www-form-urlencoded'
- })
+ init_page = self._download_init_page(video_id)
live_params = self._search_regex(
r'"liveStreamInfo"\s*:\s*(".*"),',
return info
def _replay(self, video_id, webpage, long_video_id, key):
+ if '' in (long_video_id, key):
+ init_page = self._download_init_page(video_id)
+ video_info = self._parse_json(self._search_regex(
+ (r'(?s)oVideoStatus\s*=\s*({.+?})\s*</script',
+ r'(?s)oVideoStatus\s*=\s*({.+})'), init_page, 'video info'),
+ video_id)
+ if video_info.get('status') == 'NEED_CHANNEL_PLUS':
+ self.raise_login_required(
+ 'This video is only available for CH+ subscribers')
+ long_video_id, key = video_info['vid'], video_info['inkey']
+
playinfo = self._download_json(
'http://global.apis.naver.com/rmcnmv/rmcnmv/vod_play_videoInfo.json?%s'
% compat_urllib_parse_urlencode({
})
return info
+ def _download_init_page(self, video_id):
+ return self._download_webpage(
+ 'https://www.vlive.tv/video/init/view',
+ video_id, note='Downloading live webpage',
+ data=urlencode_postdata({'videoSeq': video_id}),
+ headers={
+ 'Referer': 'https://www.vlive.tv/video/%s' % video_id,
+ 'Content-Type': 'application/x-www-form-urlencoded'
+ })
+
class VLiveChannelIE(InfoExtractor):
IE_NAME = 'vlive:channel'
# Large values of maxNumOfRows (~300 or above) may cause
# empty responses (see [1]), e.g. this happens for [2] that
# has more than 300 videos.
- # 1. https://github.com/rg3/youtube-dl/issues/13830
+ # 1. https://github.com/ytdl-org/youtube-dl/issues/13830
# 2. http://channels.vlive.tv/EDBF.
'maxNumOfRows': 100,
'_': int(time.time()),
class VLivePlaylistIE(InfoExtractor):
IE_NAME = 'vlive:playlist'
_VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/video/(?P<video_id>[0-9]+)/playlist/(?P<id>[0-9]+)'
- _TEST = {
+ _VIDEO_URL_TEMPLATE = 'http://www.vlive.tv/video/%s'
+ _TESTS = [{
+ # regular working playlist
+ 'url': 'https://www.vlive.tv/video/117956/playlist/117963',
+ 'info_dict': {
+ 'id': '117963',
+ 'title': '아이돌룸(IDOL ROOM) 41회 - (여자)아이들'
+ },
+ 'playlist_mincount': 10
+ }, {
+ # playlist with no playlistVideoSeqs
'url': 'http://www.vlive.tv/video/22867/playlist/22912',
'info_dict': {
- 'id': '22912',
- 'title': 'Valentine Day Message from TWICE'
+ 'id': '22867',
+ 'ext': 'mp4',
+ 'title': '[V LIVE] Valentine Day Message from MINA',
+ 'creator': 'TWICE',
+ 'view_count': int
},
- 'playlist_mincount': 9
- }
+ 'params': {
+ 'skip_download': True,
+ }
+ }]
+
+ def _build_video_result(self, video_id, message):
+ self.to_screen(message)
+ return self.url_result(
+ self._VIDEO_URL_TEMPLATE % video_id,
+ ie=VLiveIE.ie_key(), video_id=video_id)
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id, playlist_id = mobj.group('video_id', 'id')
- VIDEO_URL_TEMPLATE = 'http://www.vlive.tv/video/%s'
if self._downloader.params.get('noplaylist'):
- self.to_screen(
- 'Downloading just video %s because of --no-playlist' % video_id)
- return self.url_result(
- VIDEO_URL_TEMPLATE % video_id,
- ie=VLiveIE.ie_key(), video_id=video_id)
+ return self._build_video_result(
+ video_id,
+ 'Downloading just video %s because of --no-playlist'
+ % video_id)
self.to_screen(
'Downloading playlist %s - add --no-playlist to just download video'
'http://www.vlive.tv/video/%s/playlist/%s'
% (video_id, playlist_id), playlist_id)
- item_ids = self._parse_json(
- self._search_regex(
- r'playlistVideoSeqs\s*=\s*(\[[^]]+\])', webpage,
- 'playlist video seqs'),
- playlist_id)
+ raw_item_ids = self._search_regex(
+ r'playlistVideoSeqs\s*=\s*(\[[^]]+\])', webpage,
+ 'playlist video seqs', default=None, fatal=False)
+
+ if not raw_item_ids:
+ return self._build_video_result(
+ video_id,
+ 'Downloading just video %s because no playlist was found'
+ % video_id)
+
+ item_ids = self._parse_json(raw_item_ids, playlist_id)
entries = [
self.url_result(
- VIDEO_URL_TEMPLATE % item_id, ie=VLiveIE.ie_key(),
+ self._VIDEO_URL_TEMPLATE % item_id, ie=VLiveIE.ie_key(),
video_id=compat_str(item_id))
for item_id in item_ids]
+++ /dev/null
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..utils import (
- ExtractorError,
- parse_duration,
- str_to_int,
- urljoin,
-)
-
-
-class VpornIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?vporn\.com/[^/]+/(?P<display_id>[^/]+)/(?P<id>\d+)'
- _TESTS = [
- {
- 'url': 'http://www.vporn.com/masturbation/violet-on-her-th-birthday/497944/',
- 'md5': 'facf37c1b86546fa0208058546842c55',
- 'info_dict': {
- 'id': '497944',
- 'display_id': 'violet-on-her-th-birthday',
- 'ext': 'mp4',
- 'title': 'Violet on her 19th birthday',
- 'description': 'Violet dances in front of the camera which is sure to get you horny.',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'uploader': 'kileyGrope',
- 'categories': ['Masturbation', 'Teen'],
- 'duration': 393,
- 'age_limit': 18,
- 'view_count': int,
- },
- 'skip': 'video removed',
- },
- {
- 'url': 'http://www.vporn.com/female/hana-shower/523564/',
- 'md5': 'ced35a4656198a1664cf2cda1575a25f',
- 'info_dict': {
- 'id': '523564',
- 'display_id': 'hana-shower',
- 'ext': 'mp4',
- 'title': 'Hana Shower',
- 'description': 'Hana showers at the bathroom.',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'uploader': 'Hmmmmm',
- 'categories': ['Big Boobs', 'Erotic', 'Teen', 'Female', '720p'],
- 'duration': 588,
- 'age_limit': 18,
- 'view_count': int,
- }
- },
- ]
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
- display_id = mobj.group('display_id')
-
- webpage = self._download_webpage(url, display_id)
-
- errmsg = 'This video has been deleted due to Copyright Infringement or by the account owner!'
- if errmsg in webpage:
- raise ExtractorError('%s said: %s' % (self.IE_NAME, errmsg), expected=True)
-
- title = self._html_search_regex(
- r'videoname\s*=\s*\'([^\']+)\'', webpage, 'title').strip()
- description = self._html_search_regex(
- r'class="(?:descr|description_txt)">(.*?)</div>',
- webpage, 'description', fatal=False)
- thumbnail = urljoin('http://www.vporn.com', self._html_search_regex(
- r'flashvars\.imageUrl\s*=\s*"([^"]+)"', webpage, 'description',
- default=None))
-
- uploader = self._html_search_regex(
- r'(?s)Uploaded by:.*?<a href="/user/[^"]+"[^>]*>(.+?)</a>',
- webpage, 'uploader', fatal=False)
-
- categories = re.findall(r'<a href="/cat/[^"]+"[^>]*>([^<]+)</a>', webpage)
-
- duration = parse_duration(self._search_regex(
- r'Runtime:\s*</span>\s*(\d+ min \d+ sec)',
- webpage, 'duration', fatal=False))
-
- view_count = str_to_int(self._search_regex(
- r'class="views">([\d,\.]+) [Vv]iews<',
- webpage, 'view count', fatal=False))
- comment_count = str_to_int(self._html_search_regex(
- r"'Comments \(([\d,\.]+)\)'",
- webpage, 'comment count', default=None))
-
- formats = []
-
- for video in re.findall(r'flashvars\.videoUrl([^=]+?)\s*=\s*"(https?://[^"]+)"', webpage):
- video_url = video[1]
- fmt = {
- 'url': video_url,
- 'format_id': video[0],
- }
- m = re.search(r'_(?P<width>\d+)x(?P<height>\d+)_(?P<vbr>\d+)k\.mp4$', video_url)
- if m:
- fmt.update({
- 'width': int(m.group('width')),
- 'height': int(m.group('height')),
- 'vbr': int(m.group('vbr')),
- })
- formats.append(fmt)
-
- self._sort_formats(formats)
-
- return {
- 'id': video_id,
- 'display_id': display_id,
- 'title': title,
- 'description': description,
- 'thumbnail': thumbnail,
- 'uploader': uploader,
- 'categories': categories,
- 'duration': duration,
- 'view_count': view_count,
- 'comment_count': comment_count,
- 'age_limit': 18,
- 'formats': formats,
- }
from .common import InfoExtractor
from ..utils import (
+ extract_attributes,
float_or_none,
+ get_element_by_class,
+ strip_or_none,
+ unified_timestamp,
)
class VRTIE(InfoExtractor):
- IE_DESC = 'deredactie.be, sporza.be, cobra.be and cobra.canvas.be'
- _VALID_URL = r'https?://(?:deredactie|sporza|cobra(?:\.canvas)?)\.be/cm/(?:[^/]+/)+(?P<id>[^/]+)/*'
- _TESTS = [
- # deredactie.be
- {
- 'url': 'http://deredactie.be/cm/vrtnieuws/videozone/programmas/journaal/EP_141025_JOL',
- 'md5': '4cebde1eb60a53782d4f3992cbd46ec8',
- 'info_dict': {
- 'id': '2129880',
- 'ext': 'flv',
- 'title': 'Het journaal L - 25/10/14',
- 'description': None,
- 'timestamp': 1414271750.949,
- 'upload_date': '20141025',
- 'duration': 929,
- },
- 'skip': 'HTTP Error 404: Not Found',
+ IE_DESC = 'VRT NWS, Flanders News, Flandern Info and Sporza'
+ _VALID_URL = r'https?://(?:www\.)?(?P<site>vrt\.be/vrtnws|sporza\.be)/[a-z]{2}/\d{4}/\d{2}/\d{2}/(?P<id>[^/?&#]+)'
+ _TESTS = [{
+ 'url': 'https://www.vrt.be/vrtnws/nl/2019/05/15/beelden-van-binnenkant-notre-dame-een-maand-na-de-brand/',
+ 'md5': 'e1663accf5cf13f375f3cd0d10476669',
+ 'info_dict': {
+ 'id': 'pbs-pub-7855fc7b-1448-49bc-b073-316cb60caa71$vid-2ca50305-c38a-4762-9890-65cbd098b7bd',
+ 'ext': 'mp4',
+ 'title': 'Beelden van binnenkant Notre-Dame, één maand na de brand',
+ 'description': 'Op maandagavond 15 april ging een deel van het dakgebinte van de Parijse kathedraal in vlammen op.',
+ 'timestamp': 1557924660,
+ 'upload_date': '20190515',
+ 'duration': 31.2,
},
- # sporza.be
- {
- 'url': 'http://sporza.be/cm/sporza/videozone/programmas/extratime/EP_141020_Extra_time',
- 'md5': '11f53088da9bf8e7cfc42456697953ff',
- 'info_dict': {
- 'id': '2124639',
- 'ext': 'flv',
- 'title': 'Bekijk Extra Time van 20 oktober',
- 'description': 'md5:83ac5415a4f1816c6a93f8138aef2426',
- 'timestamp': 1413835980.560,
- 'upload_date': '20141020',
- 'duration': 3238,
- },
- 'skip': 'HTTP Error 404: Not Found',
+ }, {
+ 'url': 'https://sporza.be/nl/2019/05/15/de-belgian-cats-zijn-klaar-voor-het-ek/',
+ 'md5': '910bba927566e9ab992278f647eb4b75',
+ 'info_dict': {
+ 'id': 'pbs-pub-f2c86a46-8138-413a-a4b9-a0015a16ce2c$vid-1f112b31-e58e-4379-908d-aca6d80f8818',
+ 'ext': 'mp4',
+ 'title': 'De Belgian Cats zijn klaar voor het EK mét Ann Wauters',
+ 'timestamp': 1557923760,
+ 'upload_date': '20190515',
+ 'duration': 115.17,
},
- # cobra.be
- {
- 'url': 'http://cobra.be/cm/cobra/videozone/rubriek/film-videozone/141022-mv-ellis-cafecorsari',
- 'md5': '78a2b060a5083c4f055449a72477409d',
- 'info_dict': {
- 'id': '2126050',
- 'ext': 'flv',
- 'title': 'Bret Easton Ellis in Café Corsari',
- 'description': 'md5:f699986e823f32fd6036c1855a724ee9',
- 'timestamp': 1413967500.494,
- 'upload_date': '20141022',
- 'duration': 661,
- },
- 'skip': 'HTTP Error 404: Not Found',
- },
- {
- # YouTube video
- 'url': 'http://deredactie.be/cm/vrtnieuws/videozone/nieuws/cultuurenmedia/1.2622957',
- 'md5': 'b8b93da1df1cea6c8556255a796b7d61',
- 'info_dict': {
- 'id': 'Wji-BZ0oCwg',
- 'ext': 'mp4',
- 'title': 'ROGUE ONE: A STAR WARS STORY Official Teaser Trailer',
- 'description': 'md5:8e468944dce15567a786a67f74262583',
- 'uploader': 'Star Wars',
- 'uploader_id': 'starwars',
- 'upload_date': '20160407',
- },
- 'add_ie': ['Youtube'],
- },
- {
- 'url': 'http://cobra.canvas.be/cm/cobra/videozone/rubriek/film-videozone/1.2377055',
- 'info_dict': {
- 'id': '2377055',
- 'ext': 'mp4',
- 'title': 'Cafe Derby',
- 'description': 'Lenny Van Wesemael debuteert met de langspeelfilm Café Derby. Een waar gebeurd maar ook verzonnen verhaal.',
- 'upload_date': '20150626',
- 'timestamp': 1435305240.769,
- },
- 'params': {
- # m3u8 download
- 'skip_download': True,
- }
- }
- ]
+ }, {
+ 'url': 'https://www.vrt.be/vrtnws/en/2019/05/15/belgium_s-eurovision-entry-falls-at-the-first-hurdle/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.vrt.be/vrtnws/de/2019/05/15/aus-fuer-eliott-im-halbfinale-des-eurosongfestivals/',
+ 'only_matching': True,
+ }]
+ _CLIENT_MAP = {
+ 'vrt.be/vrtnws': 'vrtnieuws',
+ 'sporza.be': 'sporza',
+ }
def _real_extract(self, url):
- video_id = self._match_id(url)
-
- webpage = self._download_webpage(url, video_id)
-
- video_id = self._search_regex(
- r'data-video-id="([^"]+)_[^"]+"', webpage, 'video id', fatal=False)
-
- src = self._search_regex(
- r'data-video-src="([^"]+)"', webpage, 'video src', default=None)
-
- video_type = self._search_regex(
- r'data-video-type="([^"]+)"', webpage, 'video type', default=None)
-
- if video_type == 'YouTubeVideo':
- return self.url_result(src, 'Youtube')
-
- formats = []
-
- mobj = re.search(
- r'data-video-iphone-server="(?P<server>[^"]+)"\s+data-video-iphone-path="(?P<path>[^"]+)"',
- webpage)
- if mobj:
- formats.extend(self._extract_m3u8_formats(
- '%s/%s' % (mobj.group('server'), mobj.group('path')),
- video_id, 'mp4', m3u8_id='hls', fatal=False))
-
- if src:
- formats = self._extract_wowza_formats(src, video_id)
- if 'data-video-geoblocking="true"' not in webpage:
- for f in formats:
- if f['url'].startswith('rtsp://'):
- http_format = f.copy()
- http_format.update({
- 'url': f['url'].replace('rtsp://', 'http://').replace('vod.', 'download.').replace('/_definst_/', '/').replace('mp4:', ''),
- 'format_id': f['format_id'].replace('rtsp', 'http'),
- 'protocol': 'http',
- })
- formats.append(http_format)
-
- if not formats and 'data-video-geoblocking="true"' in webpage:
- self.raise_geo_restricted('This video is only available in Belgium')
-
- self._sort_formats(formats)
-
- title = self._og_search_title(webpage)
- description = self._og_search_description(webpage, default=None)
- thumbnail = self._og_search_thumbnail(webpage)
- timestamp = float_or_none(self._search_regex(
- r'data-video-sitestat-pubdate="(\d+)"', webpage, 'timestamp', fatal=False), 1000)
- duration = float_or_none(self._search_regex(
- r'data-video-duration="(\d+)"', webpage, 'duration', fatal=False), 1000)
+ site, display_id = re.match(self._VALID_URL, url).groups()
+ webpage = self._download_webpage(url, display_id)
+ attrs = extract_attributes(self._search_regex(
+ r'(<[^>]+class="vrtvideo"[^>]*>)', webpage, 'vrt video'))
+
+ asset_id = attrs['data-videoid']
+ publication_id = attrs.get('data-publicationid')
+ if publication_id:
+ asset_id = publication_id + '$' + asset_id
+ client = attrs.get('data-client') or self._CLIENT_MAP[site]
+
+ title = strip_or_none(get_element_by_class(
+ 'vrt-title', webpage) or self._html_search_meta(
+ ['og:title', 'twitter:title', 'name'], webpage))
+ description = self._html_search_meta(
+ ['og:description', 'twitter:description', 'description'], webpage)
+ if description == '…':
+ description = None
+ timestamp = unified_timestamp(self._html_search_meta(
+ 'article:published_time', webpage))
return {
- 'id': video_id,
+ '_type': 'url_transparent',
+ 'id': asset_id,
+ 'display_id': display_id,
'title': title,
'description': description,
- 'thumbnail': thumbnail,
+ 'thumbnail': attrs.get('data-posterimage'),
'timestamp': timestamp,
- 'duration': duration,
- 'formats': formats,
+ 'duration': float_or_none(attrs.get('data-duration'), 1000),
+ 'url': 'https://mediazone.vrt.be/api/v1/%s/assets/%s' % (client, asset_id),
+ 'ie_key': 'Canvas',
}
from .common import InfoExtractor
from ..compat import (
+ compat_HTTPError,
compat_urllib_parse_urlencode,
compat_urllib_parse,
)
from ..utils import (
+ ExtractorError,
float_or_none,
int_or_none,
)
_API_DOMAIN = None
_API_PARAMS = {}
_CMS_SIGNING = {}
+ _TOKEN = None
+ _TOKEN_SECRET = ''
def _call_api(self, path, video_id, note, data=None):
+ # https://tools.ietf.org/html/rfc5849#section-3
base_url = self._API_DOMAIN + '/core/' + path
- encoded_query = compat_urllib_parse_urlencode({
- 'oauth_consumer_key': self._API_PARAMS['oAuthKey'],
- 'oauth_nonce': ''.join([random.choice(string.ascii_letters) for _ in range(32)]),
- 'oauth_signature_method': 'HMAC-SHA1',
- 'oauth_timestamp': int(time.time()),
- 'oauth_version': '1.0',
- })
+ query = [
+ ('oauth_consumer_key', self._API_PARAMS['oAuthKey']),
+ ('oauth_nonce', ''.join([random.choice(string.ascii_letters) for _ in range(32)])),
+ ('oauth_signature_method', 'HMAC-SHA1'),
+ ('oauth_timestamp', int(time.time())),
+ ]
+ if self._TOKEN:
+ query.append(('oauth_token', self._TOKEN))
+ encoded_query = compat_urllib_parse_urlencode(query)
headers = self.geo_verification_headers()
if data:
data = json.dumps(data).encode()
headers['Content-Type'] = 'application/json'
- method = 'POST' if data else 'GET'
- base_string = '&'.join([method, compat_urllib_parse.quote(base_url, ''), compat_urllib_parse.quote(encoded_query, '')])
+ base_string = '&'.join([
+ 'POST' if data else 'GET',
+ compat_urllib_parse.quote(base_url, ''),
+ compat_urllib_parse.quote(encoded_query, '')])
oauth_signature = base64.b64encode(hmac.new(
- (self._API_PARAMS['oAuthSecret'] + '&').encode('ascii'),
+ (self._API_PARAMS['oAuthSecret'] + '&' + self._TOKEN_SECRET).encode('ascii'),
base_string.encode(), hashlib.sha1).digest()).decode()
encoded_query += '&oauth_signature=' + compat_urllib_parse.quote(oauth_signature, '')
- return self._download_json(
- '?'.join([base_url, encoded_query]), video_id,
- note='Downloading %s JSON metadata' % note, headers=headers, data=data)
+ try:
+ return self._download_json(
+ '?'.join([base_url, encoded_query]), video_id,
+ note='Downloading %s JSON metadata' % note, headers=headers, data=data)
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
+ raise ExtractorError(json.loads(e.cause.read().decode())['message'], expected=True)
+ raise
def _call_cms(self, path, video_id, note):
if not self._CMS_SIGNING:
self._API_DOMAIN + path, video_id, query=self._CMS_SIGNING,
note='Downloading %s JSON metadata' % note, headers=self.geo_verification_headers())
- def _set_api_params(self, webpage, video_id):
- if not self._API_PARAMS:
- self._API_PARAMS = self._parse_json(self._search_regex(
- r'window\.__APP_CONFIG__\s*=\s*({.+?})</script>',
- webpage, 'api config'), video_id)['cxApiParams']
- self._API_DOMAIN = self._API_PARAMS.get('apiDomain', 'https://api.vrv.co')
-
def _get_cms_resource(self, resource_key, video_id):
return self._call_api(
'cms_resource', video_id, 'resource path', data={
'resource_key': resource_key,
})['__links__']['cms_resource']['href']
+ def _real_initialize(self):
+ webpage = self._download_webpage(
+ 'https://vrv.co/', None, headers=self.geo_verification_headers())
+ self._API_PARAMS = self._parse_json(self._search_regex(
+ [
+ r'window\.__APP_CONFIG__\s*=\s*({.+?})(?:</script>|;)',
+ r'window\.__APP_CONFIG__\s*=\s*({.+})'
+ ], webpage, 'app config'), None)['cxApiParams']
+ self._API_DOMAIN = self._API_PARAMS.get('apiDomain', 'https://api.vrv.co')
+
class VRVIE(VRVBaseIE):
IE_NAME = 'vrv'
# m3u8 download
'skip_download': True,
},
+ }, {
+ # movie listing
+ 'url': 'https://vrv.co/watch/G6NQXZ1J6/Lily-CAT',
+ 'info_dict': {
+ 'id': 'G6NQXZ1J6',
+ 'title': 'Lily C.A.T',
+ 'description': 'md5:988b031e7809a6aeb60968be4af7db07',
+ },
+ 'playlist_count': 2,
}]
+ _NETRC_MACHINE = 'vrv'
+
+ def _real_initialize(self):
+ super(VRVIE, self)._real_initialize()
+
+ email, password = self._get_login_info()
+ if email is None:
+ return
+
+ token_credentials = self._call_api(
+ 'authenticate/by:credentials', None, 'Token Credentials', data={
+ 'email': email,
+ 'password': password,
+ })
+ self._TOKEN = token_credentials['oauth_token']
+ self._TOKEN_SECRET = token_credentials['oauth_token_secret']
def _extract_vrv_formats(self, url, video_id, stream_format, audio_lang, hardsub_lang):
- if not url or stream_format not in ('hls', 'dash'):
+ if not url or stream_format not in ('hls', 'dash', 'adaptive_hls'):
return []
- assert audio_lang or hardsub_lang
stream_id_list = []
if audio_lang:
stream_id_list.append('audio-%s' % audio_lang)
if hardsub_lang:
stream_id_list.append('hardsub-%s' % hardsub_lang)
- stream_id = '-'.join(stream_id_list)
- format_id = '%s-%s' % (stream_format, stream_id)
- if stream_format == 'hls':
+ format_id = stream_format
+ if stream_id_list:
+ format_id += '-' + '-'.join(stream_id_list)
+ if 'hls' in stream_format:
adaptive_formats = self._extract_m3u8_formats(
url, video_id, 'mp4', m3u8_id=format_id,
- note='Downloading %s m3u8 information' % stream_id,
+ note='Downloading %s information' % format_id,
fatal=False)
elif stream_format == 'dash':
adaptive_formats = self._extract_mpd_formats(
url, video_id, mpd_id=format_id,
- note='Downloading %s MPD information' % stream_id,
+ note='Downloading %s information' % format_id,
fatal=False)
if audio_lang:
for f in adaptive_formats:
def _real_extract(self, url):
video_id = self._match_id(url)
- webpage = self._download_webpage(
- url, video_id,
- headers=self.geo_verification_headers())
- media_resource = self._parse_json(self._search_regex(
- [
- r'window\.__INITIAL_STATE__\s*=\s*({.+?})(?:</script>|;)',
- r'window\.__INITIAL_STATE__\s*=\s*({.+})'
- ], webpage, 'inital state'), video_id).get('watch', {}).get('mediaResource') or {}
-
- video_data = media_resource.get('json')
- if not video_data:
- self._set_api_params(webpage, video_id)
- episode_path = self._get_cms_resource(
- 'cms:/episodes/' + video_id, video_id)
- video_data = self._call_cms(episode_path, video_id, 'video')
+
+ object_data = self._call_cms(self._get_cms_resource(
+ 'cms:/objects/' + video_id, video_id), video_id, 'object')['items'][0]
+ resource_path = object_data['__links__']['resource']['href']
+ video_data = self._call_cms(resource_path, video_id, 'video')
title = video_data['title']
+ description = video_data.get('description')
- streams_json = media_resource.get('streams', {}).get('json', {})
- if not streams_json:
- self._set_api_params(webpage, video_id)
- streams_path = video_data['__links__']['streams']['href']
- streams_json = self._call_cms(streams_path, video_id, 'streams')
+ if video_data.get('__class__') == 'movie_listing':
+ items = self._call_cms(
+ video_data['__links__']['movie_listing/movies']['href'],
+ video_id, 'movie listing').get('items') or []
+ if len(items) != 1:
+ entries = []
+ for item in items:
+ item_id = item.get('id')
+ if not item_id:
+ continue
+ entries.append(self.url_result(
+ 'https://vrv.co/watch/' + item_id,
+ self.ie_key(), item_id, item.get('title')))
+ return self.playlist_result(entries, video_id, title, description)
+ video_data = items[0]
+
+ streams_path = video_data['__links__'].get('streams', {}).get('href')
+ if not streams_path:
+ self.raise_login_required()
+ streams_json = self._call_cms(streams_path, video_id, 'streams')
audio_locale = streams_json.get('audio_locale')
formats = []
self._sort_formats(formats)
subtitles = {}
- for subtitle in streams_json.get('subtitles', {}).values():
- subtitle_url = subtitle.get('url')
- if not subtitle_url:
- continue
- subtitles.setdefault(subtitle.get('locale', 'en-US'), []).append({
- 'url': subtitle_url,
- 'ext': subtitle.get('format', 'ass'),
- })
+ for k in ('captions', 'subtitles'):
+ for subtitle in streams_json.get(k, {}).values():
+ subtitle_url = subtitle.get('url')
+ if not subtitle_url:
+ continue
+ subtitles.setdefault(subtitle.get('locale', 'en-US'), []).append({
+ 'url': subtitle_url,
+ 'ext': subtitle.get('format', 'ass'),
+ })
thumbnails = []
for thumbnail in video_data.get('images', {}).get('thumbnails', []):
'formats': formats,
'subtitles': subtitles,
'thumbnails': thumbnails,
- 'description': video_data.get('description'),
+ 'description': description,
'duration': float_or_none(video_data.get('duration_ms'), 1000),
'uploader_id': video_data.get('channel_id'),
'series': video_data.get('series_title'),
def _real_extract(self, url):
series_id = self._match_id(url)
- webpage = self._download_webpage(
- url, series_id,
- headers=self.geo_verification_headers())
- self._set_api_params(webpage, series_id)
seasons_path = self._get_cms_resource(
'cms:/seasons?series_id=' + series_id, series_id)
seasons_data = self._call_cms(seasons_path, series_id, 'seasons')
webpage = self._download_webpage(
'https://vshare.io/v/%s/width-650/height-430/1' % video_id,
- video_id)
+ video_id, headers={'Referer': url})
title = self._html_search_regex(
r'<title>([^<]+)</title>', webpage, 'title')
class VVVVIDIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?vvvvid\.it/#!(?:show|anime|film|series)/(?P<show_id>\d+)/[^/]+/(?P<season_id>\d+)/(?P<id>[0-9]+)'
+ _VALID_URL = r'https?://(?:www\.)?vvvvid\.it/(?:#!)?(?:show|anime|film|series)/(?P<show_id>\d+)/[^/]+/(?P<season_id>\d+)/(?P<id>[0-9]+)'
_TESTS = [{
# video_type == 'video/vvvvid'
'url': 'https://www.vvvvid.it/#!show/434/perche-dovrei-guardarlo-di-dario-moccia/437/489048/ping-pong',
--- /dev/null
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ merge_dicts,
+ urljoin,
+)
+
+
+class WakanimIE(InfoExtractor):
+ _VALID_URL = r'https://(?:www\.)?wakanim\.tv/[^/]+/v2/catalogue/episode/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://www.wakanim.tv/de/v2/catalogue/episode/2997/the-asterisk-war-omu-staffel-1-episode-02-omu',
+ 'info_dict': {
+ 'id': '2997',
+ 'ext': 'mp4',
+ 'title': 'Episode 02',
+ 'description': 'md5:2927701ea2f7e901de8bfa8d39b2852d',
+ 'series': 'The Asterisk War (OmU.)',
+ 'season_number': 1,
+ 'episode': 'Episode 02',
+ 'episode_number': 2,
+ },
+ 'params': {
+ 'format': 'bestvideo',
+ 'skip_download': True,
+ },
+ }, {
+ # DRM Protected
+ 'url': 'https://www.wakanim.tv/de/v2/catalogue/episode/7843/sword-art-online-alicization-omu-arc-2-folge-15-omu',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ m3u8_url = urljoin(url, self._search_regex(
+ r'file\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage, 'm3u8 url',
+ group='url'))
+ # https://docs.microsoft.com/en-us/azure/media-services/previous/media-services-content-protection-overview#streaming-urls
+ encryption = self._search_regex(
+ r'encryption%3D(c(?:enc|bc(?:s-aapl)?))',
+ m3u8_url, 'encryption', default=None)
+ if encryption and encryption in ('cenc', 'cbcs-aapl'):
+ raise ExtractorError('This video is DRM protected.', expected=True)
+
+ formats = self._extract_m3u8_formats(
+ m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls')
+
+ info = self._search_json_ld(webpage, video_id, default={})
+
+ title = self._search_regex(
+ (r'<h1[^>]+\bclass=["\']episode_h1[^>]+\btitle=(["\'])(?P<title>(?:(?!\1).)+)\1',
+ r'<span[^>]+\bclass=["\']episode_title["\'][^>]*>(?P<title>[^<]+)'),
+ webpage, 'title', default=None, group='title')
+
+ return merge_dicts(info, {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ })
class WeiboIE(InfoExtractor):
- _VALID_URL = r'https?://weibo\.com/[0-9]+/(?P<id>[a-zA-Z0-9]+)'
+ _VALID_URL = r'https?://(?:www\.)?weibo\.com/[0-9]+/(?P<id>[a-zA-Z0-9]+)'
_TEST = {
'url': 'https://weibo.com/6275294458/Fp6RGfbff?type=comment',
'info_dict': {
+++ /dev/null
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..utils import (
- ExtractorError,
- int_or_none,
- qualities,
- remove_start,
-)
-
-
-class WrzutaIE(InfoExtractor):
- IE_NAME = 'wrzuta.pl'
-
- _VALID_URL = r'https?://(?P<uploader>[0-9a-zA-Z]+)\.wrzuta\.pl/(?P<typ>film|audio)/(?P<id>[0-9a-zA-Z]+)'
-
- _TESTS = [{
- 'url': 'http://laboratoriumdextera.wrzuta.pl/film/aq4hIZWrkBu/nike_football_the_last_game',
- 'md5': '9e67e05bed7c03b82488d87233a9efe7',
- 'info_dict': {
- 'id': 'aq4hIZWrkBu',
- 'ext': 'mp4',
- 'title': 'Nike Football: The Last Game',
- 'duration': 307,
- 'uploader_id': 'laboratoriumdextera',
- 'description': 'md5:7fb5ef3c21c5893375fda51d9b15d9cd',
- },
- 'skip': 'Redirected to wrzuta.pl',
- }, {
- 'url': 'http://vexling.wrzuta.pl/audio/01xBFabGXu6/james_horner_-_into_the_na_39_vi_world_bonus',
- 'md5': 'f80564fb5a2ec6ec59705ae2bf2ba56d',
- 'info_dict': {
- 'id': '01xBFabGXu6',
- 'ext': 'mp3',
- 'title': 'James Horner - Into The Na\'vi World [Bonus]',
- 'description': 'md5:30a70718b2cd9df3120fce4445b0263b',
- 'duration': 95,
- 'uploader_id': 'vexling',
- },
- }]
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
- typ = mobj.group('typ')
- uploader = mobj.group('uploader')
-
- webpage, urlh = self._download_webpage_handle(url, video_id)
-
- if urlh.geturl() == 'http://www.wrzuta.pl/':
- raise ExtractorError('Video removed', expected=True)
-
- quality = qualities(['SD', 'MQ', 'HQ', 'HD'])
-
- audio_table = {'flv': 'mp3', 'webm': 'ogg', '???': 'mp3'}
-
- embedpage = self._download_json('http://www.wrzuta.pl/npp/embed/%s/%s' % (uploader, video_id), video_id)
-
- formats = []
- for media in embedpage['url']:
- fmt = media['type'].split('@')[0]
- if typ == 'audio':
- ext = audio_table.get(fmt, fmt)
- else:
- ext = fmt
-
- formats.append({
- 'format_id': '%s_%s' % (ext, media['quality'].lower()),
- 'url': media['url'],
- 'ext': ext,
- 'quality': quality(media['quality']),
- })
-
- self._sort_formats(formats)
-
- return {
- 'id': video_id,
- 'title': self._og_search_title(webpage),
- 'thumbnail': self._og_search_thumbnail(webpage),
- 'formats': formats,
- 'duration': int_or_none(embedpage['duration']),
- 'uploader_id': uploader,
- 'description': self._og_search_description(webpage),
- 'age_limit': embedpage.get('minimalAge', 0),
- }
-
-
-class WrzutaPlaylistIE(InfoExtractor):
- """
- this class covers extraction of wrzuta playlist entries
- the extraction process bases on following steps:
- * collect information of playlist size
- * download all entries provided on
- the playlist webpage (the playlist is split
- on two pages: first directly reached from webpage
- second: downloaded on demand by ajax call and rendered
- using the ajax call response)
- * in case size of extracted entries not reached total number of entries
- use the ajax call to collect the remaining entries
- """
-
- IE_NAME = 'wrzuta.pl:playlist'
- _VALID_URL = r'https?://(?P<uploader>[0-9a-zA-Z]+)\.wrzuta\.pl/playlista/(?P<id>[0-9a-zA-Z]+)'
- _TESTS = [{
- 'url': 'http://miromak71.wrzuta.pl/playlista/7XfO4vE84iR/moja_muza',
- 'playlist_mincount': 14,
- 'info_dict': {
- 'id': '7XfO4vE84iR',
- 'title': 'Moja muza',
- },
- }, {
- 'url': 'http://heroesf70.wrzuta.pl/playlista/6Nj3wQHx756/lipiec_-_lato_2015_muzyka_swiata',
- 'playlist_mincount': 144,
- 'info_dict': {
- 'id': '6Nj3wQHx756',
- 'title': 'Lipiec - Lato 2015 Muzyka Świata',
- },
- }, {
- 'url': 'http://miromak71.wrzuta.pl/playlista/7XfO4vE84iR',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- playlist_id = mobj.group('id')
- uploader = mobj.group('uploader')
-
- webpage = self._download_webpage(url, playlist_id)
-
- playlist_size = int_or_none(self._html_search_regex(
- (r'<div[^>]+class=["\']playlist-counter["\'][^>]*>\d+/(\d+)',
- r'<div[^>]+class=["\']all-counter["\'][^>]*>(.+?)</div>'),
- webpage, 'playlist size', default=None))
-
- playlist_title = remove_start(
- self._og_search_title(webpage), 'Playlista: ')
-
- entries = []
- if playlist_size:
- entries = [
- self.url_result(entry_url)
- for _, entry_url in re.findall(
- r'<a[^>]+href=(["\'])(http.+?)\1[^>]+class=["\']playlist-file-page',
- webpage)]
- if playlist_size > len(entries):
- playlist_content = self._download_json(
- 'http://%s.wrzuta.pl/xhr/get_playlist_offset/%s' % (uploader, playlist_id),
- playlist_id,
- 'Downloading playlist JSON',
- 'Unable to download playlist JSON')
- entries.extend([
- self.url_result(entry['filelink'])
- for entry in playlist_content.get('files', []) if entry.get('filelink')])
-
- return self.playlist_result(entries, playlist_id, playlist_title)
class XHamsterIE(InfoExtractor):
_VALID_URL = r'''(?x)
https?://
- (?:.+?\.)?xhamster\.com/
+ (?:.+?\.)?xhamster\.(?:com|one)/
(?:
movies/(?P<id>\d+)/(?P<display_id>[^/]*)\.html|
videos/(?P<display_id_2>[^/]*)-(?P<id_2>\d+)
# new URL schema
'url': 'https://pt.xhamster.com/videos/euro-pedal-pumping-7937821',
'only_matching': True,
+ }, {
+ 'url': 'https://xhamster.one/videos/femaleagent-shy-beauty-takes-the-bait-1509445',
+ 'only_matching': True,
}]
def _real_extract(self, url):
webpage, 'title', default=None,
group='title') or self._og_search_title(webpage)
- thumbnail = self._search_regex(
- (r'setThumbUrl\(\s*(["\'])(?P<thumbnail>(?:(?!\1).)+)\1',
- r'url_bigthumb=(?P<thumbnail>.+?)&'),
- webpage, 'thumbnail', fatal=False, group='thumbnail')
+ thumbnails = []
+ for preference, thumbnail in enumerate(('', '169')):
+ thumbnail_url = self._search_regex(
+ r'setThumbUrl%s\(\s*(["\'])(?P<thumbnail>(?:(?!\1).)+)\1' % thumbnail,
+ webpage, 'thumbnail', default=None, group='thumbnail')
+ if thumbnail_url:
+ thumbnails.append({
+ 'url': thumbnail_url,
+ 'preference': preference,
+ })
+
duration = int_or_none(self._og_search_property(
'duration', webpage, default=None)) or parse_duration(
self._search_regex(
'formats': formats,
'title': title,
'duration': duration,
- 'thumbnail': thumbnail,
+ 'thumbnails': thumbnails,
'age_limit': 18,
}
'id': query,
'entries': entries,
}
+
+
+class YahooGyaOPlayerIE(InfoExtractor):
+ IE_NAME = 'yahoo:gyao:player'
+ _VALID_URL = r'https?://(?:gyao\.yahoo\.co\.jp/(?:player|episode/[^/]+)|streaming\.yahoo\.co\.jp/c/y)/(?P<id>\d+/v\d+/v\d+|[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
+ _TESTS = [{
+ 'url': 'https://gyao.yahoo.co.jp/player/00998/v00818/v0000000000000008564/',
+ 'info_dict': {
+ 'id': '5993125228001',
+ 'ext': 'mp4',
+ 'title': 'フューリー 【字幕版】',
+ 'description': 'md5:21e691c798a15330eda4db17a8fe45a5',
+ 'uploader_id': '4235717419001',
+ 'upload_date': '20190124',
+ 'timestamp': 1548294365,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://streaming.yahoo.co.jp/c/y/01034/v00133/v0000000000000000706/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://gyao.yahoo.co.jp/episode/%E3%81%8D%E3%81%AE%E3%81%86%E4%BD%95%E9%A3%9F%E3%81%B9%E3%81%9F%EF%BC%9F%20%E7%AC%AC2%E8%A9%B1%202019%2F4%2F12%E6%94%BE%E9%80%81%E5%88%86/5cb02352-b725-409e-9f8d-88f947a9f682',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url).replace('/', ':')
+ video = self._download_json(
+ 'https://gyao.yahoo.co.jp/dam/v1/videos/' + video_id,
+ video_id, query={
+ 'fields': 'longDescription,title,videoId',
+ }, headers={
+ 'X-User-Agent': 'Unknown Pc GYAO!/2.0.0 Web',
+ })
+ return {
+ '_type': 'url_transparent',
+ 'id': video_id,
+ 'title': video['title'],
+ 'url': smuggle_url(
+ 'http://players.brightcove.net/4235717419001/default_default/index.html?videoId=' + video['videoId'],
+ {'geo_countries': ['JP']}),
+ 'description': video.get('longDescription'),
+ 'ie_key': BrightcoveNewIE.ie_key(),
+ }
+
+
+class YahooGyaOIE(InfoExtractor):
+ IE_NAME = 'yahoo:gyao'
+ _VALID_URL = r'https?://(?:gyao\.yahoo\.co\.jp/(?:p|title/[^/]+)|streaming\.yahoo\.co\.jp/p/y)/(?P<id>\d+/v\d+|[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
+ _TESTS = [{
+ 'url': 'https://gyao.yahoo.co.jp/p/00449/v03102/',
+ 'info_dict': {
+ 'id': '00449:v03102',
+ },
+ 'playlist_count': 2,
+ }, {
+ 'url': 'https://streaming.yahoo.co.jp/p/y/01034/v00133/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://gyao.yahoo.co.jp/title/%E3%81%97%E3%82%83%E3%81%B9%E3%81%8F%E3%82%8A007/5b025a49-b2e5-4dc7-945c-09c6634afacf',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ program_id = self._match_id(url).replace('/', ':')
+ videos = self._download_json(
+ 'https://gyao.yahoo.co.jp/api/programs/%s/videos' % program_id, program_id)['videos']
+ entries = []
+ for video in videos:
+ video_id = video.get('id')
+ if not video_id:
+ continue
+ entries.append(self.url_result(
+ 'https://gyao.yahoo.co.jp/player/%s/' % video_id.replace(':', '/'),
+ YahooGyaOPlayerIE.ie_key(), video_id))
+ return self.playlist_result(entries, program_id)
'skip': 'Travis CI servers blocked by YandexMusic',
}
- def _get_track_url(self, storage_dir, track_id):
- data = self._download_json(
- 'http://music.yandex.ru/api/v1.5/handlers/api-jsonp.jsx?action=getTrackSrc&p=download-info/%s'
- % storage_dir,
- track_id, 'Downloading track location JSON')
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ album_id, track_id = mobj.group('album_id'), mobj.group('id')
- # Each string is now wrapped in a list, this is probably only temporarily thus
- # supporting both scenarios (see https://github.com/rg3/youtube-dl/issues/10193)
- for k, v in data.items():
- if v and isinstance(v, list):
- data[k] = v[0]
+ track = self._download_json(
+ 'http://music.yandex.ru/handlers/track.jsx?track=%s:%s' % (track_id, album_id),
+ track_id, 'Downloading track JSON')['track']
+ track_title = track['title']
- key = hashlib.md5(('XGRlBW9FXlekgbPrRHuSiA' + data['path'][1:] + data['s']).encode('utf-8')).hexdigest()
- storage = storage_dir.split('.')
+ download_data = self._download_json(
+ 'https://music.yandex.ru/api/v2.1/handlers/track/%s:%s/web-album_track-track-track-main/download/m' % (track_id, album_id),
+ track_id, 'Downloading track location url JSON',
+ headers={'X-Retpath-Y': url})
- return ('http://%s/get-mp3/%s/%s?track-id=%s&from=service-10-track&similarities-experiment=default'
- % (data['host'], key, data['ts'] + data['path'], storage[1]))
+ fd_data = self._download_json(
+ download_data['src'], track_id,
+ 'Downloading track location JSON',
+ query={'format': 'json'})
+ key = hashlib.md5(('XGRlBW9FXlekgbPrRHuSiA' + fd_data['path'][1:] + fd_data['s']).encode('utf-8')).hexdigest()
+ storage = track['storageDir'].split('.')
+ f_url = 'http://%s/get-mp3/%s/%s?track-id=%s ' % (fd_data['host'], key, fd_data['ts'] + fd_data['path'], storage[1])
- def _get_track_info(self, track):
thumbnail = None
cover_uri = track.get('albums', [{}])[0].get('coverUri')
if cover_uri:
if not thumbnail.startswith('http'):
thumbnail = 'http://' + thumbnail
- track_title = track['title']
track_info = {
- 'id': track['id'],
+ 'id': track_id,
'ext': 'mp3',
- 'url': self._get_track_url(track['storageDir'], track['id']),
+ 'url': f_url,
'filesize': int_or_none(track.get('fileSize')),
'duration': float_or_none(track.get('durationMs'), 1000),
'thumbnail': thumbnail,
'track': track_title,
+ 'acodec': download_data.get('codec'),
+ 'abr': int_or_none(download_data.get('bitrate')),
}
def extract_artist(artist_list):
})
else:
track_info['title'] = track_title
- return track_info
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- album_id, track_id = mobj.group('album_id'), mobj.group('id')
-
- track = self._download_json(
- 'http://music.yandex.ru/handlers/track.jsx?track=%s:%s' % (track_id, album_id),
- track_id, 'Downloading track JSON')['track']
-
- return self._get_track_info(track)
+ return track_info
class YandexMusicPlaylistBaseIE(YandexMusicBaseIE):
'skip': 'Travis CI servers blocked by YandexMusic',
}, {
# playlist exceeding the limit of 150 tracks shipped with webpage (see
- # https://github.com/rg3/youtube-dl/issues/6666)
+ # https://github.com/ytdl-org/youtube-dl/issues/6666)
'url': 'https://music.yandex.ru/users/ya.playlist/playlists/1036',
'info_dict': {
'id': '1036',
--- /dev/null
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ url_or_none,
+)
+
+
+class YandexVideoIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ yandex\.ru(?:/portal/(?:video|efir))?/?\?.*?stream_id=|
+ frontend\.vh\.yandex\.ru/player/
+ )
+ (?P<id>[\da-f]+)
+ '''
+ _TESTS = [{
+ 'url': 'https://yandex.ru/portal/video?stream_id=4dbb262b4fe5cf15a215de4f34eee34d',
+ 'md5': '33955d7ae052f15853dc41f35f17581c',
+ 'info_dict': {
+ 'id': '4dbb262b4fe5cf15a215de4f34eee34d',
+ 'ext': 'mp4',
+ 'title': 'В Нью-Йорке баржи и теплоход оторвались от причала и расплылись по Гудзону',
+ 'description': '',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'timestamp': 0,
+ 'duration': 30,
+ 'age_limit': 18,
+ },
+ }, {
+ 'url': 'https://yandex.ru/portal/efir?stream_id=4dbb36ec4e0526d58f9f2dc8f0ecf374&from=morda',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://yandex.ru/?stream_id=4dbb262b4fe5cf15a215de4f34eee34d',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://frontend.vh.yandex.ru/player/4dbb262b4fe5cf15a215de4f34eee34d?from=morda',
+ 'only_matching': True,
+ }, {
+ # vod-episode, series episode
+ 'url': 'https://yandex.ru/portal/video?stream_id=45b11db6e4b68797919c93751a938cee',
+ 'only_matching': True,
+ }, {
+ # episode, sports
+ 'url': 'https://yandex.ru/?stream_channel=1538487871&stream_id=4132a07f71fb0396be93d74b3477131d',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ content = self._download_json(
+ 'https://frontend.vh.yandex.ru/v22/player/%s.json' % video_id,
+ video_id, query={
+ 'stream_options': 'hires',
+ 'disable_trackings': 1,
+ })['content']
+
+ m3u8_url = url_or_none(content.get('content_url')) or url_or_none(
+ content['streams'][0]['url'])
+ title = content.get('title') or content.get('computed_title')
+
+ formats = self._extract_m3u8_formats(
+ m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls')
+ self._sort_formats(formats)
+
+ description = content.get('description')
+ thumbnail = content.get('thumbnail')
+ timestamp = (int_or_none(content.get('release_date'))
+ or int_or_none(content.get('release_date_ut'))
+ or int_or_none(content.get('start_time')))
+ duration = int_or_none(content.get('duration'))
+ series = content.get('program_title')
+ age_limit = int_or_none(content.get('restriction_age'))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'timestamp': timestamp,
+ 'duration': duration,
+ 'series': series,
+ 'age_limit': age_limit,
+ 'formats': formats,
+ }
transform_source=lambda s: js_to_json(strip_jsonp(s))).get('html')
if playlist_data is None:
return [None, None]
- drama_list = (get_element_by_class('p-drama-grid', playlist_data) or
- get_element_by_class('p-drama-half-row', playlist_data))
+ drama_list = (get_element_by_class('p-drama-grid', playlist_data)
+ or get_element_by_class('p-drama-half-row', playlist_data))
if drama_list is None:
raise ExtractorError('No episodes found')
video_urls = re.findall(r'<a[^>]+href="([^"]+)"', drama_list)
from __future__ import unicode_literals
from .common import InfoExtractor
-from ..utils import urljoin
+from ..utils import (
+ parse_duration,
+ urljoin,
+)
class YourPornIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?yourporn\.sexy/post/(?P<id>[^/?#&.]+)'
- _TEST = {
+ _VALID_URL = r'https?://(?:www\.)?(?:yourporn\.sexy|sxyprn\.com)/post/(?P<id>[^/?#&.]+)'
+ _TESTS = [{
'url': 'https://yourporn.sexy/post/57ffcb2e1179b.html',
'md5': '6f8682b6464033d87acaa7a8ff0c092e',
'info_dict': {
'ext': 'mp4',
'title': 'md5:c9f43630bd968267672651ba905a7d35',
'thumbnail': r're:^https?://.*\.jpg$',
- 'age_limit': 18
+ 'duration': 165,
+ 'age_limit': 18,
},
- }
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://sxyprn.com/post/57ffcb2e1179b.html',
+ 'only_matching': True,
+ }]
def _real_extract(self, url):
video_id = self._match_id(url)
self._search_regex(
r'data-vnfo=(["\'])(?P<data>{.+?})\1', webpage, 'data info',
group='data'),
- video_id)[video_id]).replace('/cdn/', '/cdn3/')
+ video_id)[video_id]).replace('/cdn/', '/cdn4/')
title = (self._search_regex(
r'<[^>]+\bclass=["\']PostEditTA[^>]+>([^<]+)', webpage, 'title',
default=None) or self._og_search_description(webpage)).strip()
thumbnail = self._og_search_thumbnail(webpage)
+ duration = parse_duration(self._search_regex(
+ r'duration\s*:\s*<[^>]+>([\d:]+)', webpage, 'duration',
+ default=None))
return {
'id': video_id,
'url': video_url,
'title': title,
'thumbnail': thumbnail,
- 'age_limit': 18
+ 'duration': duration,
+ 'age_limit': 18,
}
from ..swfinterp import SWFInterpreter
from ..compat import (
compat_chr,
+ compat_HTTPError,
compat_kwargs,
compat_parse_qs,
compat_urllib_parse_unquote,
)
from ..utils import (
clean_html,
+ dict_get,
error_to_compat_str,
ExtractorError,
float_or_none,
if not mobj:
break
- more = self._download_json(
- 'https://youtube.com/%s' % mobj.group('more'), playlist_id,
- 'Downloading page #%s' % page_num,
- transform_source=uppercase_escape)
+ count = 0
+ retries = 3
+ while count <= retries:
+ try:
+ # Downloading page may result in intermittent 5xx HTTP error
+ # that is usually worked around with a retry
+ more = self._download_json(
+ 'https://youtube.com/%s' % mobj.group('more'), playlist_id,
+ 'Downloading page #%s%s'
+ % (page_num, ' (retry #%d)' % count if count else ''),
+ transform_source=uppercase_escape)
+ break
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503):
+ count += 1
+ if count <= retries:
+ continue
+ raise
+
content_html = more['content_html']
if not content_html.strip():
# Some webpages show a "Load more" button but they don't
(?:www\.)?hooktube\.com/|
(?:www\.)?yourepeat\.com/|
tube\.majestyc\.net/|
- (?:www\.)?invidio\.us/|
+ (?:(?:www|dev)\.)?invidio\.us/|
+ (?:www\.)?invidiou\.sh/|
+ (?:www\.)?invidious\.snopyta\.org/|
+ (?:www\.)?invidious\.kabi\.tk/|
+ (?:www\.)?vid\.wxzm\.sx/|
youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
(?:.*?\#/)? # handle anchor (#/) redirect urls
(?: # the various things that can precede the ID:
'135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
'136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
'137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
- '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/rg3/youtube-dl/issues/4559)
+ '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
'160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
'212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
'264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
# RTMP (unnamed)
'_rtmp': {'protocol': 'rtmp'},
}
- _SUBTITLE_FORMATS = ('ttml', 'vtt')
+ _SUBTITLE_FORMATS = ('srv1', 'srv2', 'srv3', 'ttml', 'vtt')
_GEO_BYPASS = False
'age_limit': 18,
},
},
- # video_info is None (https://github.com/rg3/youtube-dl/issues/4421)
+ # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
# YouTube Red ad is not captured for creator
{
'url': '__2ABJjxzNo',
'DASH manifest missing',
]
},
- # Olympics (https://github.com/rg3/youtube-dl/issues/4431)
+ # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
{
'url': 'lqQg6PlCWgI',
'info_dict': {
},
'skip': 'This live event has ended.',
},
- # Extraction from multiple DASH manifests (https://github.com/rg3/youtube-dl/pull/6097)
+ # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
{
'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
'info_dict': {
'skip': 'This video is not available.',
},
{
- # Multifeed video with comma in title (see https://github.com/rg3/youtube-dl/issues/8536)
+ # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
'info_dict': {
'id': 'gVfLd0zydlo',
'only_matching': True,
},
{
- # Title with JS-like syntax "};" (see https://github.com/rg3/youtube-dl/issues/7468)
+ # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
# Also tests cut-off URL expansion in video description (see
- # https://github.com/rg3/youtube-dl/issues/1892,
- # https://github.com/rg3/youtube-dl/issues/8164)
+ # https://github.com/ytdl-org/youtube-dl/issues/1892,
+ # https://github.com/ytdl-org/youtube-dl/issues/8164)
'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
'info_dict': {
'id': 'lsguqyKfVQg',
'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
'track': 'Dark Walk - Position Music',
'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
+ 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
},
'params': {
'skip_download': True,
},
},
{
- # Tags with '};' (see https://github.com/rg3/youtube-dl/issues/7468)
+ # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
'only_matching': True,
},
'only_matching': True,
},
{
- # YouTube Red paid video (https://github.com/rg3/youtube-dl/issues/10059)
+ # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
'only_matching': True,
},
'skip_download': True,
'youtube_include_dash_manifest': False,
},
- }
+ },
+ {
+ # Youtube Music Auto-generated description
+ 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
+ 'info_dict': {
+ 'id': 'MgNrAu2pzNs',
+ 'ext': 'mp4',
+ 'title': 'Voyeur Girl',
+ 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
+ 'upload_date': '20190312',
+ 'uploader': 'Various Artists - Topic',
+ 'uploader_id': 'UCVWKBi1ELZn0QX2CBLSkiyw',
+ 'artist': 'Stephen',
+ 'track': 'Voyeur Girl',
+ 'album': 'it\'s too much love to know my dear',
+ 'release_date': '20190313',
+ 'release_year': 2019,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ # Youtube Music Auto-generated description
+ # Retrieve 'artist' field from 'Artist:' in video description
+ # when it is present on youtube music video
+ 'url': 'https://www.youtube.com/watch?v=k0jLE7tTwjY',
+ 'info_dict': {
+ 'id': 'k0jLE7tTwjY',
+ 'ext': 'mp4',
+ 'title': 'Latch Feat. Sam Smith',
+ 'description': 'md5:3cb1e8101a7c85fcba9b4fb41b951335',
+ 'upload_date': '20150110',
+ 'uploader': 'Various Artists - Topic',
+ 'uploader_id': 'UCNkEcmYdjrH4RqtNgh7BZ9w',
+ 'artist': 'Disclosure',
+ 'track': 'Latch Feat. Sam Smith',
+ 'album': 'Latch Featuring Sam Smith',
+ 'release_date': '20121008',
+ 'release_year': 2012,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ # Youtube Music Auto-generated description
+ # handle multiple artists on youtube music video
+ 'url': 'https://www.youtube.com/watch?v=74qn0eJSjpA',
+ 'info_dict': {
+ 'id': '74qn0eJSjpA',
+ 'ext': 'mp4',
+ 'title': 'Eastside',
+ 'description': 'md5:290516bb73dcbfab0dcc4efe6c3de5f2',
+ 'upload_date': '20180710',
+ 'uploader': 'Benny Blanco - Topic',
+ 'uploader_id': 'UCzqz_ksRu_WkIzmivMdIS7A',
+ 'artist': 'benny blanco, Halsey, Khalid',
+ 'track': 'Eastside',
+ 'album': 'Eastside',
+ 'release_date': '20180713',
+ 'release_year': 2018,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ # Youtube Music Auto-generated description
+ # handle youtube music video with release_year and no release_date
+ 'url': 'https://www.youtube.com/watch?v=-hcAI0g-f5M',
+ 'info_dict': {
+ 'id': '-hcAI0g-f5M',
+ 'ext': 'mp4',
+ 'title': 'Put It On Me',
+ 'description': 'md5:93c55acc682ae7b0c668f2e34e1c069e',
+ 'upload_date': '20180426',
+ 'uploader': 'Matt Maeson - Topic',
+ 'uploader_id': 'UCnEkIGqtGcQMLk73Kp-Q5LQ',
+ 'artist': 'Matt Maeson',
+ 'track': 'Put It On Me',
+ 'album': 'The Hearse',
+ 'release_date': None,
+ 'release_year': 2018,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
]
def __init__(self, *args, **kwargs):
# regex won't capture the whole JSON. Yet working around by trying more
# concrete regex first keeping in mind proper quoted string handling
# to be implemented in future that will replace this workaround (see
- # https://github.com/rg3/youtube-dl/issues/7468,
- # https://github.com/rg3/youtube-dl/pull/7599)
+ # https://github.com/ytdl-org/youtube-dl/issues/7468,
+ # https://github.com/ytdl-org/youtube-dl/pull/7599)
r';ytplayer\.config\s*=\s*({.+?});ytplayer',
r';ytplayer\.config\s*=\s*({.+?});',
)
def extract_view_count(v_info):
return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))
+ def extract_token(v_info):
+ return dict_get(v_info, ('account_playback_token', 'accountPlaybackToken', 'token'))
+
player_response = {}
# Get video info
add_dash_mpd(video_info)
# Rental video is not rented but preview is available (e.g.
# https://www.youtube.com/watch?v=yYr8q0y5Jfg,
- # https://github.com/rg3/youtube-dl/issues/10532)
+ # https://github.com/ytdl-org/youtube-dl/issues/10532)
if not video_info and args.get('ypc_vid'):
return self.url_result(
args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
# are missing from DASH manifest pointed by webpage's dashmpd, some - from DASH
# manifest pointed by get_video_info's dashmpd).
# The general idea is to take a union of itags of both DASH manifests (for example
- # video with such 'manifest behavior' see https://github.com/rg3/youtube-dl/issues/6093)
+ # video with such 'manifest behavior' see https://github.com/ytdl-org/youtube-dl/issues/6093)
self.report_video_info_webpage_download(video_id)
- for el in ('info', 'embedded', 'detailpage', 'vevo', ''):
+ for el in ('embedded', 'detailpage', 'vevo', ''):
query = {
'video_id': video_id,
'ps': 'default',
view_count = extract_view_count(get_video_info)
if not video_info:
video_info = get_video_info
- if 'token' in get_video_info:
+ get_token = extract_token(get_video_info)
+ if get_token:
# Different get_video_info requests may report different results, e.g.
# some may report video unavailability, but some may serve it without
- # any complaint (see https://github.com/rg3/youtube-dl/issues/7362,
+ # any complaint (see https://github.com/ytdl-org/youtube-dl/issues/7362,
# the original webpage as well as el=info and el=embedded get_video_info
# requests report video unavailability due to geo restriction while
# el=detailpage succeeds and returns valid data). This is probably
# due to YouTube measures against IP ranges of hosting providers.
# Working around by preferring the first succeeded video_info containing
# the token if no such video_info yet was found.
- if 'token' not in video_info:
+ token = extract_token(video_info)
+ if not token:
video_info = get_video_info
break
r'(?s)<h1[^>]+id="unavailable-message"[^>]*>(.+?)</h1>',
video_webpage, 'unavailable message', default=None)
- if 'token' not in video_info:
- if 'reason' in video_info:
- if 'The uploader has not made this video available in your country.' in video_info['reason']:
- regions_allowed = self._html_search_meta(
- 'regionsAllowed', video_webpage, default=None)
- countries = regions_allowed.split(',') if regions_allowed else None
- self.raise_geo_restricted(
- msg=video_info['reason'][0], countries=countries)
- reason = video_info['reason'][0]
- if 'Invalid parameters' in reason:
- unavailable_message = extract_unavailable_message()
- if unavailable_message:
- reason = unavailable_message
- raise ExtractorError(
- 'YouTube said: %s' % reason,
- expected=True, video_id=video_id)
- else:
- raise ExtractorError(
- '"token" parameter not in video info for unknown reason',
- video_id=video_id)
-
- if video_info.get('license_info'):
- raise ExtractorError('This video is DRM protected.', expected=True)
+ if not video_info:
+ unavailable_message = extract_unavailable_message()
+ if not unavailable_message:
+ unavailable_message = 'Unable to extract video data'
+ raise ExtractorError(
+ 'YouTube said: %s' % unavailable_message, expected=True, video_id=video_id)
video_details = try_get(
player_response, lambda x: x['videoDetails'], dict) or {}
for feed in multifeed_metadata_list.split(','):
# Unquote should take place before split on comma (,) since textual
# fields may contain comma as well (see
- # https://github.com/rg3/youtube-dl/issues/8536)
+ # https://github.com/ytdl-org/youtube-dl/issues/8536)
feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))
entries.append({
'_type': 'url_transparent',
# Check for "rental" videos
if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
- raise ExtractorError('"rental" videos not supported. See https://github.com/rg3/youtube-dl/issues/359 for more information.', expected=True)
+ raise ExtractorError('"rental" videos not supported. See https://github.com/ytdl-org/youtube-dl/issues/359 for more information.', expected=True)
def _extract_filesize(media_url):
return int_or_none(self._search_regex(
elif not is_live and (len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1):
encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
if 'rtmpe%3Dyes' in encoded_url_map:
- raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
+ raise ExtractorError('rtmpe downloads are not supported, see https://github.com/ytdl-org/youtube-dl/issues/343 for more information.', expected=True)
formats_spec = {}
fmt_list = video_info.get('fmt_list', [''])[0]
if fmt_list:
formats = []
for url_data_str in encoded_url_map.split(','):
url_data = compat_parse_qs(url_data_str)
- if 'itag' not in url_data or 'url' not in url_data:
+ if 'itag' not in url_data or 'url' not in url_data or url_data.get('drm_families'):
continue
stream_type = int_or_none(try_get(url_data, lambda x: x['stream_type'][0]))
# Unsupported FORMAT_STREAM_TYPE_OTF
signature = self._decrypt_signature(
encrypted_sig, video_id, player_url, age_gate)
- url += '&signature=' + signature
+ sp = try_get(url_data, lambda x: x['sp'][0], compat_str) or 'signature'
+ url += '&%s=%s' % (sp, signature)
if 'ratebypass' not in url:
url += '&ratebypass=yes'
dct.update(formats_spec[format_id])
# Some itags are not included in DASH manifest thus corresponding formats will
- # lack metadata (see https://github.com/rg3/youtube-dl/pull/5993).
+ # lack metadata (see https://github.com/ytdl-org/youtube-dl/pull/5993).
# Trying to extract metadata from url_encoded_fmt_stream_map entry.
mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])
width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)
url_or_none(try_get(
player_response,
lambda x: x['streamingData']['hlsManifestUrl'],
- compat_str)) or
- url_or_none(try_get(
+ compat_str))
+ or url_or_none(try_get(
video_info, lambda x: x['hlsvp'][0], compat_str)))
if manifest_url:
formats = []
else:
self._downloader.report_warning('unable to extract uploader nickname')
- channel_id = self._html_search_meta(
- 'channelId', video_webpage, 'channel id')
+ channel_id = (
+ str_or_none(video_details.get('channelId'))
+ or self._html_search_meta(
+ 'channelId', video_webpage, 'channel id', default=None)
+ or self._search_regex(
+ r'data-channel-external-id=(["\'])(?P<id>(?:(?!\1).)+)\1',
+ video_webpage, 'channel id', default=None, group='id'))
channel_url = 'http://www.youtube.com/channel/%s' % channel_id if channel_id else None
# thumbnail image
track = extract_meta('Song')
artist = extract_meta('Artist')
+ album = extract_meta('Album')
+
+ # Youtube Music Auto-generated description
+ release_date = release_year = None
+ if video_description:
+ mobj = re.search(r'(?s)Provided to YouTube by [^\n]+\n+(?P<track>[^·]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?', video_description)
+ if mobj:
+ if not track:
+ track = mobj.group('track').strip()
+ if not artist:
+ artist = mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·'))
+ if not album:
+ album = mobj.group('album'.strip())
+ release_year = mobj.group('release_year')
+ release_date = mobj.group('release_date')
+ if release_date:
+ release_date = release_date.replace('-', '')
+ if not release_year:
+ release_year = int(release_date[:4])
+ if release_year:
+ release_year = int(release_year)
m_episode = re.search(
r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',
r'<[^>]+class=["\']watch-view-count[^>]+>\s*([\d,\s]+)', video_webpage,
'view count', default=None))
+ average_rating = (
+ float_or_none(video_details.get('averageRating'))
+ or try_get(video_info, lambda x: float_or_none(x['avg_rating'][0])))
+
# subtitles
video_subtitles = self.extract_subtitles(video_id, video_webpage)
automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
# Remove the formats we found through non-DASH, they
# contain less info and it can be wrong, because we use
# fixed values (for example the resolution). See
- # https://github.com/rg3/youtube-dl/issues/5774 for an
+ # https://github.com/ytdl-org/youtube-dl/issues/5774 for an
# example.
formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]
formats.extend(dash_formats.values())
if f.get('vcodec') != 'none':
f['stretched_ratio'] = ratio
+ if not formats:
+ token = extract_token(video_info)
+ if not token:
+ if 'reason' in video_info:
+ if 'The uploader has not made this video available in your country.' in video_info['reason']:
+ regions_allowed = self._html_search_meta(
+ 'regionsAllowed', video_webpage, default=None)
+ countries = regions_allowed.split(',') if regions_allowed else None
+ self.raise_geo_restricted(
+ msg=video_info['reason'][0], countries=countries)
+ reason = video_info['reason'][0]
+ if 'Invalid parameters' in reason:
+ unavailable_message = extract_unavailable_message()
+ if unavailable_message:
+ reason = unavailable_message
+ raise ExtractorError(
+ 'YouTube said: %s' % reason,
+ expected=True, video_id=video_id)
+ else:
+ raise ExtractorError(
+ '"token" parameter not in video info for unknown reason',
+ video_id=video_id)
+
+ if not formats and (video_info.get('license_info') or try_get(player_response, lambda x: x['streamingData']['licenseInfos'])):
+ raise ExtractorError('This video is DRM protected.', expected=True)
+
self._sort_formats(formats)
self.mark_watched(video_id, video_info, player_response)
'view_count': view_count,
'like_count': like_count,
'dislike_count': dislike_count,
- 'average_rating': float_or_none(video_info.get('avg_rating', [None])[0]),
+ 'average_rating': average_rating,
'formats': formats,
'is_live': is_live,
'start_time': start_time,
'episode_number': episode_number,
'track': track,
'artist': artist,
+ 'album': album,
+ 'release_date': release_date,
+ 'release_year': release_year,
}
search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
title_span = (
- search_title('playlist-title') or
- search_title('title long-title') or
- search_title('title'))
+ search_title('playlist-title')
+ or search_title('title long-title')
+ or search_title('title'))
title = clean_html(title_span)
return self.playlist_result(url_results, playlist_id, title)
url = self._TEMPLATE_URL % playlist_id
page = self._download_webpage(url, playlist_id)
- # the yt-alert-message now has tabindex attribute (see https://github.com/rg3/youtube-dl/issues/11604)
+ # the yt-alert-message now has tabindex attribute (see https://github.com/ytdl-org/youtube-dl/issues/11604)
for match in re.findall(r'<div class="yt-alert-message"[^>]*>([^<]+)</div>', page):
match = match.strip()
# Check if the playlist exists or is private
return playlist
# Some playlist URLs don't actually serve a playlist (see
- # https://github.com/rg3/youtube-dl/issues/10537).
+ # https://github.com/ytdl-org/youtube-dl/issues/10537).
# Fallback to plain video extraction if there is a video id
# along with playlist id.
return self.url_result(video_id, 'Youtube', video_id=video_id)
return next(
chan['cid'] for chan in channel_list
if chan.get('cid') and (
- chan.get('display_alias') == channel_name or
- chan.get('cid') == channel_name))
+ chan.get('display_alias') == channel_name
+ or chan.get('cid') == channel_name))
except StopIteration:
raise ExtractorError('Could not extract channel id')
'url': 'https://www.1und1.tv/watch/abc/123-abc',
'only_matching': True,
}]
+
+
+class SaltTVIE(ZattooIE):
+ _NETRC_MACHINE = 'salttv'
+ _HOST = 'tv.salt.ch'
+ _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
+
+ _TESTS = [{
+ 'url': 'https://tv.salt.ch/watch/abc/123-abc',
+ 'only_matching': True,
+ }]
except IOError:
return default # silently skip if file is not present
try:
- # FIXME: https://github.com/rg3/youtube-dl/commit/dfe5fa49aed02cf36ba9f743b11b0903554b5e56
+ # FIXME: https://github.com/ytdl-org/youtube-dl/commit/dfe5fa49aed02cf36ba9f743b11b0903554b5e56
contents = optionf.read()
if sys.version_info < (3,):
contents = contents.decode(preferredencoding())
from .common import AudioConversionError, PostProcessor
-from ..compat import (
- compat_subprocess_get_DEVNULL,
-)
from ..utils import (
encodeArgument,
encodeFilename,
return self._paths[self.probe_basename]
def get_audio_codec(self, path):
- if not self.probe_available:
- raise PostProcessingError('ffprobe or avprobe not found. Please install one.')
+ if not self.probe_available and not self.available:
+ raise PostProcessingError('ffprobe/avprobe and ffmpeg/avconv not found. Please install one.')
try:
- cmd = [
- encodeFilename(self.probe_executable, True),
- encodeArgument('-show_streams'),
- encodeFilename(self._ffmpeg_filename_argument(path), True)]
+ if self.probe_available:
+ cmd = [
+ encodeFilename(self.probe_executable, True),
+ encodeArgument('-show_streams')]
+ else:
+ cmd = [
+ encodeFilename(self.executable, True),
+ encodeArgument('-i')]
+ cmd.append(encodeFilename(self._ffmpeg_filename_argument(path), True))
if self._downloader.params.get('verbose', False):
- self._downloader.to_screen('[debug] %s command line: %s' % (self.basename, shell_quote(cmd)))
- handle = subprocess.Popen(cmd, stderr=compat_subprocess_get_DEVNULL(), stdout=subprocess.PIPE, stdin=subprocess.PIPE)
- output = handle.communicate()[0]
- if handle.wait() != 0:
+ self._downloader.to_screen(
+ '[debug] %s command line: %s' % (self.basename, shell_quote(cmd)))
+ handle = subprocess.Popen(
+ cmd, stderr=subprocess.PIPE,
+ stdout=subprocess.PIPE, stdin=subprocess.PIPE)
+ stdout_data, stderr_data = handle.communicate()
+ expected_ret = 0 if self.probe_available else 1
+ if handle.wait() != expected_ret:
return None
except (IOError, OSError):
return None
- audio_codec = None
- for line in output.decode('ascii', 'ignore').split('\n'):
- if line.startswith('codec_name='):
- audio_codec = line.split('=')[1].strip()
- elif line.strip() == 'codec_type=audio' and audio_codec is not None:
- return audio_codec
+ output = (stdout_data if self.probe_available else stderr_data).decode('ascii', 'ignore')
+ if self.probe_available:
+ audio_codec = None
+ for line in output.split('\n'):
+ if line.startswith('codec_name='):
+ audio_codec = line.split('=')[1].strip()
+ elif line.strip() == 'codec_type=audio' and audio_codec is not None:
+ return audio_codec
+ else:
+ # Stream #FILE_INDEX:STREAM_INDEX[STREAM_ID](LANGUAGE): CODEC_TYPE: CODEC_NAME
+ mobj = re.search(
+ r'Stream\s*#\d+:\d+(?:\[0x[0-9a-f]+\])?(?:\([a-z]{3}\))?:\s*Audio:\s*([0-9a-z]+)',
+ output)
+ if mobj:
+ return mobj.group(1)
return None
def run_ffmpeg_multiple_files(self, input_paths, out_path, opts):
encodeArgument('-i'),
encodeFilename(self._ffmpeg_filename_argument(path), True)
])
- cmd = ([encodeFilename(self.executable, True), encodeArgument('-y')] +
- files_cmd +
- [encodeArgument(o) for o in opts] +
- [encodeFilename(self._ffmpeg_filename_argument(out_path), True)])
+ cmd = [encodeFilename(self.executable, True), encodeArgument('-y')]
+ # avconv does not have repeat option
+ if self.basename == 'ffmpeg':
+ cmd += [encodeArgument('-loglevel'), encodeArgument('repeat+info')]
+ cmd += (files_cmd
+ + [encodeArgument(o) for o in opts]
+ + [encodeFilename(self._ffmpeg_filename_argument(out_path), True)])
if self._downloader.params.get('verbose', False):
self._downloader.to_screen('[debug] ffmpeg command line: %s' % shell_quote(cmd))
information['ext'] = extension
# If we download foo.mp3 and convert it to... foo.mp3, then don't delete foo.mp3, silly.
- if (new_path == path or
- (self._nopostoverwrites and os.path.exists(encodeFilename(new_path)))):
+ if (new_path == path
+ or (self._nopostoverwrites and os.path.exists(encodeFilename(new_path)))):
self._downloader.to_screen('[ffmpeg] Post-process file %s exists, skipping' % new_path)
return [], information
# Don't copy the existing subtitles, we may be running the
# postprocessor a second time
'-map', '-0:s',
+ # Don't copy Apple TV chapters track, bin_data (see #19042, #19024,
+ # https://trac.ffmpeg.org/ticket/6016)
+ '-map', '-0:d',
]
if information['ext'] == 'mp4':
opts += ['-c:s', 'mov_text']
except XAttrMetadataError as e:
if e.reason == 'NO_SPACE':
self._downloader.report_warning(
- 'There\'s no disk space left, disk quota exceeded or filesystem xattr limit exceeded. ' +
- (('Some ' if num_written else '') + 'extended attributes are not written.').capitalize())
+ 'There\'s no disk space left, disk quota exceeded or filesystem xattr limit exceeded. '
+ + (('Some ' if num_written else '') + 'extended attributes are not written.').capitalize())
elif e.reason == 'VALUE_TOO_LONG':
self._downloader.report_warning(
'Unable to write extended attributes due to too long values.')
def update_self(to_screen, verbose, opener):
"""Update the program file with the latest version from the repository"""
- UPDATE_URL = 'https://rg3.github.io/youtube-dl/update/'
+ UPDATE_URL = 'https://yt-dl.org/update/'
VERSION_URL = UPDATE_URL + 'LATEST_VERSION'
JSON_URL = UPDATE_URL + 'versions.json'
UPDATES_RSA_KEY = (0x9d60ee4d8f805312fdb15a62f87b95bd66177b91df176765d13514a0f1754bcd2057295c5b6f1d35daa6742c3ffc9a82d3e118861c207995a8031e151d863c9927e304576bc80692bc8e094896fcf11b66f3e29e04e3a71e9a11558558acea1840aec37fc396fb6b65dc81a1c4144e03bd1c011de62e3f1357b327d08426fe93, 65537)
# needed for sanitizing filenames in restricted mode
ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
- itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUYP', ['ss'],
- 'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuypy')))
+ itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
+ 'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
DATE_FORMATS = (
'%d %B %Y',
])
PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
-JSON_LD_RE = r'(?is)<script[^>]+type=(["\'])application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
+JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
def preferredencoding():
return 'http:%s' % url
# Fix some common typos seen so far
COMMON_TYPOS = (
- # https://github.com/rg3/youtube-dl/issues/15649
+ # https://github.com/ytdl-org/youtube-dl/issues/15649
(r'^httpss://', r'https://'),
# https://bx1.be/lives/direct-tv/
(r'^rmtp([es]?)://', r'rtmp\1://'),
numstr = '0%s' % numstr
else:
base = 10
- # See https://github.com/rg3/youtube-dl/issues/7518
+ # See https://github.com/ytdl-org/youtube-dl/issues/7518
try:
return compat_chr(int(numstr, base))
except ValueError:
self.msg = msg
# Parsing code and msg
- if (self.code in (errno.ENOSPC, errno.EDQUOT) or
- 'No space left' in self.msg or 'Disk quota excedded' in self.msg):
+ if (self.code in (errno.ENOSPC, errno.EDQUOT)
+ or 'No space left' in self.msg or 'Disk quota excedded' in self.msg):
self.reason = 'NO_SPACE'
elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
self.reason = 'VALUE_TOO_LONG'
def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
# Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
# expected HTTP responses to meet HTTP/1.0 or later (see also
- # https://github.com/rg3/youtube-dl/issues/6727)
+ # https://github.com/ytdl-org/youtube-dl/issues/6727)
if sys.version_info < (3, 0):
kwargs['strict'] = True
hc = http_class(*args, **compat_kwargs(kwargs))
resp.msg = old_resp.msg
del resp.headers['Content-encoding']
# Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
- # https://github.com/rg3/youtube-dl/issues/6457).
+ # https://github.com/ytdl-org/youtube-dl/issues/6457).
if 300 <= resp.code < 400:
location = resp.headers.get('Location')
if location:
class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
+ _HTTPONLY_PREFIX = '#HttpOnly_'
+
def save(self, filename=None, ignore_discard=False, ignore_expires=False):
# Store session cookies with `expires` set to 0 instead of an empty
# string
compat_cookiejar.MozillaCookieJar.save(self, filename, ignore_discard, ignore_expires)
def load(self, filename=None, ignore_discard=False, ignore_expires=False):
- compat_cookiejar.MozillaCookieJar.load(self, filename, ignore_discard, ignore_expires)
+ """Load cookies from a file."""
+ if filename is None:
+ if self.filename is not None:
+ filename = self.filename
+ else:
+ raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
+
+ cf = io.StringIO()
+ with open(filename) as f:
+ for line in f:
+ if line.startswith(self._HTTPONLY_PREFIX):
+ line = line[len(self._HTTPONLY_PREFIX):]
+ cf.write(compat_str(line))
+ cf.seek(0)
+ self._really_load(cf, filename, ignore_discard, ignore_expires)
# Session cookies are denoted by either `expires` field set to
# an empty string or 0. MozillaCookieJar only recognizes the former
# (see [1]). So we need force the latter to be recognized as session
def http_response(self, request, response):
# Python 2 will choke on next HTTP request in row if there are non-ASCII
# characters in Set-Cookie HTTP header of last response (see
- # https://github.com/rg3/youtube-dl/issues/6769).
+ # https://github.com/ytdl-org/youtube-dl/issues/6769).
# In order to at least prevent crashing we will percent encode Set-Cookie
# header before HTTPCookieProcessor starts processing it.
# if sys.version_info < (3, 0) and response.headers:
def not_a_console(handle):
if handle == INVALID_HANDLE_VALUE or handle is None:
return True
- return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR or
- GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
+ return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
+ or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
if not_a_console(h):
return False
if _windows_write_string(s, out):
return
- if ('b' in getattr(out, 'mode', '') or
- sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
+ if ('b' in getattr(out, 'mode', '')
+ or sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
byt = s.encode(encoding or preferredencoding(), 'ignore')
out.write(byt)
elif hasattr(out, 'buffer'):
return {}
+def parse_bitrate(s):
+ if not isinstance(s, compat_str):
+ return
+ mobj = re.search(r'\b(\d+)\s*kbps', s)
+ if mobj:
+ return int(mobj.group(1))
+
+
def month_by_name(name, lang='en'):
""" Return the number of a month by (locale-independently) English name """
path = path.decode('utf-8')
if not isinstance(path, compat_str) or not path:
return None
- if re.match(r'^(?:https?:)?//', path):
+ if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
return path
if isinstance(base, bytes):
base = base.decode('utf-8')
return default
try:
return int(v) * invscale // scale
- except ValueError:
+ except (ValueError, TypeError):
return default
return default
try:
return float(v) * invscale / scale
- except ValueError:
+ except (ValueError, TypeError):
return default
return v if isinstance(v, bool) else default
-def strip_or_none(v):
- return None if v is None else v.strip()
+def strip_or_none(v, default=None):
+ return v.strip() if isinstance(v, compat_str) else default
def url_or_none(url):
try:
# STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
# SIGTTOU if youtube-dl is run in the background.
- # See https://github.com/rg3/youtube-dl/issues/955#issuecomment-209789656
+ # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
out, _ = subprocess.Popen(
[encodeArgument(exe)] + args,
stdin=subprocess.PIPE,
for k, v in a_dict.items():
if v is None:
continue
- if (k not in merged or
- (isinstance(v, compat_str) and v and
- isinstance(merged[k], compat_str) and
- not merged[k])):
+ if (k not in merged
+ or (isinstance(v, compat_str) and v
+ and isinstance(merged[k], compat_str)
+ and not merged[k])):
merged[k] = v
return merged
if m:
op = COMPARISON_OPERATORS[m.group('op')]
actual_value = dct.get(m.group('key'))
- if (m.group('quotedstrval') is not None or
- m.group('strval') is not None or
+ if (m.group('quotedstrval') is not None
+ or m.group('strval') is not None
# If the original field is a string and matching comparisonvalue is
# a number we should respect the origin of the original field
# and process comparison value as a string (see
- # https://github.com/rg3/youtube-dl/issues/11082).
- actual_value is not None and m.group('intval') is not None and
- isinstance(actual_value, compat_str)):
+ # https://github.com/ytdl-org/youtube-dl/issues/11082).
+ or actual_value is not None and m.group('intval') is not None
+ and isinstance(actual_value, compat_str)):
if m.group('op') not in ('=', '!='):
raise ValueError(
'Operator %s does not support string values!' % m.group('op'))
# Based on png2str() written by @gdkchan and improved by @yokrysty
-# Originally posted at https://github.com/rg3/youtube-dl/issues/9706
+# Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
def decode_png(png_data):
# Reference: https://www.w3.org/TR/PNG/
header = png_data[8:]
if hasattr(xattr, 'set'): # pyxattr
# Unicode arguments are not supported in python-pyxattr until
# version 0.5.0
- # See https://github.com/rg3/youtube-dl/issues/5498
+ # See https://github.com/ytdl-org/youtube-dl/issues/5498
pyxattr_required_version = '0.5.0'
if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
# TODO: fallback to CLI tools
executable = 'xattr'
opts = ['-w', key, value]
- cmd = ([encodeFilename(executable, True)] +
- [encodeArgument(o) for o in opts] +
- [encodeFilename(path, True)])
+ cmd = ([encodeFilename(executable, True)]
+ + [encodeArgument(o) for o in opts]
+ + [encodeFilename(path, True)])
try:
p = subprocess.Popen(
from __future__ import unicode_literals
-__version__ = '2019.01.17'
+__version__ = '2019.06.08'