+version 2017.09.24
+
+Core
++ [options] Accept lrc as a subtitle conversion target format (#14292)
+* [utils] Fix handling raw TTML subtitles (#14191)
+
+Extractors
+* [24video] Fix timestamp extraction and make non fatal (#14295)
++ [24video] Add support for 24video.adult (#14295)
++ [kakao] Add support for tv.kakao.com (#12298, #14007)
++ [twitter] Add support for URLs without user id (#14270)
++ [americastestkitchen] Add support for americastestkitchen.com (#10764,
+ #13996)
+* [generic] Fix support for multiple HTML5 videos on one page (#14080)
+* [mixcloud] Fix extraction (#14088, #14132)
++ [lynda] Add support for educourse.ga (#14286)
+* [beeg] Fix extraction (#14275)
+* [nbcsports:vplayer] Correct theplatform URL (#13873)
+* [twitter] Fix duration extraction (#14141)
+* [tvplay] Bypass geo restriction
++ [heise] Add support for YouTube embeds (#14109)
++ [popcorntv] Add support for popcorntv.it (#5914, #14211)
+* [viki] Update app data (#14181)
+* [morningstar] Relax URL regular expression (#14222)
+* [openload] Fix extraction (#14225, #14257)
+* [noovo] Fix extraction (#14214)
+* [dailymotion:playlist] Relax URL regular expression (#14219)
++ [twitch] Add support for go.twitch.tv URLs (#14215)
+* [vgtv] Relax URL regular expression (#14223)
+
+
+version 2017.09.15
+
+Core
+* [downloader/fragment] Restart inconsistent incomplete fragment downloads
+ (#13731)
+* [YoutubeDL] Download raw subtitles files (#12909, #14191)
+
+Extractors
+* [condenast] Fix extraction (#14196, #14207)
++ [orf] Add support for f4m stories
+* [tv4] Relax URL regular expression (#14206)
+* [animeondemand] Bypass geo restriction
++ [animeondemand] Add support for flash videos (#9944)
+
+
+version 2017.09.11
+
+Extractors
+* [rutube:playlist] Fix suitable (#14166)
+
+
+version 2017.09.10
+
+Core
++ [utils] Introduce bool_or_none
+* [YoutubeDL] Ensure dir existence for each requested format (#14116)
+
+Extractors
+* [fox] Fix extraction (#14147)
+* [rutube] Use bool_or_none
+* [rutube] Rework and generalize playlist extractors (#13565)
++ [rutube:playlist] Add support for playlists (#13534, #13565)
++ [radiocanada] Add fallback for title extraction (#14145)
+* [vk] Use dedicated YouTube embeds extraction routine
+* [vice] Use dedicated YouTube embeds extraction routine
+* [cracked] Use dedicated YouTube embeds extraction routine
+* [chilloutzone] Use dedicated YouTube embeds extraction routine
+* [abcnews] Use dedicated YouTube embeds extraction routine
+* [youtube] Separate methods for embeds extraction
+* [redtube] Fix formats extraction (#14122)
+* [arte] Relax unavailability check (#14112)
++ [manyvids] Add support for preview videos from manyvids.com (#14053, #14059)
+* [vidme:user] Relax URL regular expression (#14054)
+* [bpb] Fix extraction (#14043, #14086)
+* [soundcloud] Fix download URL with private tracks (#14093)
+* [aliexpress:live] Add support for live.aliexpress.com (#13698, #13707)
+* [viidea] Capture and output lecture error message (#14099)
+* [radiocanada] Skip unsupported platforms (#14100)
+
+
+version 2017.09.02
+
+Extractors
+* [youtube] Force old layout for each webpage (#14068, #14072, #14074, #14076,
+ #14077, #14079, #14082, #14083, #14094, #14095, #14096)
+* [youtube] Fix upload date extraction (#14065)
++ [charlierose] Add support for episodes (#14062)
++ [bbccouk] Add support for w-prefixed ids (#14056)
+* [googledrive] Extend URL regular expression (#9785)
++ [googledrive] Add support for source format (#14046)
+* [pornhd] Fix extraction (#14005)
+
+
+version 2017.08.27.1
+
+Extractors
+
+* [youtube] Fix extraction with --youtube-skip-dash-manifest enabled (#14037)
+
+
+version 2017.08.27
+
+Core
++ [extractor/common] Extract height and format id for HTML5 videos (#14034)
+* [downloader/http] Rework HTTP downloader (#506, #809, #2849, #4240, #6023,
+ #8625, #9483)
+ * Simplify code and split into separate routines to facilitate maintaining
+ * Make retry mechanism work on errors during actual download not only
+ during connection establishment phase
+ * Retry on ECONNRESET and ETIMEDOUT during reading data from network
+ * Retry on content too short
+ * Show error description on retry
+
+Extractors
+* [generic] Lower preference for extraction from LD-JSON
+* [rai] Fix audio formats extraction (#14024)
+* [youtube] Fix controversy videos extraction (#14027, #14029)
+* [mixcloud] Fix extraction (#14015, #14020)
+
+
+version 2017.08.23
+
+Core
++ [extractor/common] Introduce _parse_xml
+* [extractor/common] Make HLS and DASH extraction in_parse_html5_media_entries
+ non fatal (#13970)
+* [utils] Fix unescapeHTML for misformed string like "&a"" (#13935)
+
+Extractors
+* [cbc:watch] Bypass geo restriction (#13993)
+* [toutv] Relax DRM check (#13994)
++ [googledrive] Add support for subtitles (#13619, #13638)
+* [pornhub] Relax uploader regular expression (#13906, #13975)
+* [bandcamp:album] Extract track titles (#13962)
++ [bbccouk] Add support for events URLs (#13893)
++ [liveleak] Support multi-video pages (#6542)
++ [liveleak] Support another liveleak embedding pattern (#13336)
+* [cda] Fix extraction (#13935)
++ [laola1tv] Add support for tv.ittf.com (#13965)
+* [mixcloud] Fix extraction (#13958, #13974, #13980, #14003)
+
+
+version 2017.08.18
+
+Core
+* [YoutubeDL] Sanitize byte string format URLs (#13951)
++ [extractor/common] Add support for float durations in _parse_mpd_formats
+ (#13919)
+
+Extractors
+* [arte] Detect unavailable videos (#13945)
+* [generic] Convert redirect URLs to unicode strings (#13951)
+* [udemy] Fix paid course detection (#13943)
+* [pluralsight] Use RPC API for course extraction (#13937)
++ [clippit] Add support for clippituser.tv
++ [qqmusic] Support new URL schemes (#13805)
+* [periscope] Renew HLS extraction (#13917)
+* [mixcloud] Extract decrypt key
+
+
+version 2017.08.13
+
+Core
+* [YoutubeDL] Make sure format id is not empty
+* [extractor/common] Make _family_friendly_search optional
+* [extractor/common] Respect source's type attribute for HTML5 media (#13892)
+
+Extractors
+* [pornhub:playlistbase] Skip videos from drop-down menu (#12819, #13902)
++ [fourtube] Add support pornerbros.com (#6022)
++ [fourtube] Add support porntube.com (#7859, #13901)
++ [fourtube] Add support fux.com
+* [limelight] Improve embeds detection (#13895)
++ [reddit] Add support for v.redd.it and reddit.com (#13847)
+* [aparat] Extract all formats (#13887)
+* [mixcloud] Fix play info decryption (#13885)
++ [generic] Add support for vzaar embeds (#13876)
+
+
+version 2017.08.09
+
+Core
+* [utils] Skip missing params in cli_bool_option (#13865)
+
+Extractors
+* [xxxymovies] Fix title extraction (#13868)
++ [nick] Add support for nick.com.pl (#13860)
+* [mixcloud] Fix play info decryption (#13867)
+* [20min] Fix embeds extraction (#13852)
+* [dplayit] Fix extraction (#13851)
++ [niconico] Support videos with multiple formats (#13522)
++ [niconico] Support HTML5-only videos (#13806)
+
+
+version 2017.08.06
+
+Core
+* Use relative paths for DASH fragments (#12990)
+
+Extractors
+* [pluralsight] Fix format selection
+- [mpora] Remove extractor (#13826)
++ [voot] Add support for voot.com (#10255, #11644, #11814, #12350, #13218)
+* [vlive:channel] Limit number of videos per page to 100 (#13830)
+* [podomatic] Extend URL regular expression (#13827)
+* [cinchcast] Extend URL regular expression
+* [yandexdisk] Relax URL regular expression (#13824)
+* [vidme] Extract DASH and HLS formats
+- [teamfour] Remove extractor (#13782)
+* [pornhd] Fix extraction (#13783)
+* [udemy] Fix subtitles extraction (#13812)
+* [mlb] Extend URL regular expression (#13740, #13773)
++ [pbs] Add support for new URL schema (#13801)
+* [nrktv] Update API host (#13796)
+
+
+version 2017.07.30.1
+
+Core
+* [downloader/hls] Use redirect URL as manifest base (#13755)
+* [options] Correctly hide login info from debug outputs (#13696)
+
+Extractors
++ [watchbox] Add support for watchbox.de (#13739)
+- [clipfish] Remove extractor
++ [youjizz] Fix extraction (#13744)
++ [generic] Add support for another ooyala embed pattern (#13727)
++ [ard] Add support for lives (#13771)
+* [soundcloud] Update client id
++ [soundcloud:trackstation] Add support for track stations (#13733)
+* [svtplay] Use geo verification proxy for API request
+* [svtplay] Update API URL (#13767)
++ [yandexdisk] Add support for yadi.sk (#13755)
++ [megaphone] Add support for megaphone.fm
+* [amcnetworks] Make rating optional (#12453)
+* [cloudy] Fix extraction (#13737)
++ [nickru] Add support for nickelodeon.ru
+* [mtv] Improve thumbnal extraction
+* [nick] Automate geo-restriction bypass (#13711)
+* [niconico] Improve error reporting (#13696)
+
+
+version 2017.07.23
+
+Core
+* [YoutubeDL] Improve default format specification (#13704)
+* [YoutubeDL] Do not override id, extractor and extractor_key for
+ url_transparent entities
+* [extractor/common] Fix playlist_from_matches
+
+Extractors
+* [itv] Fix production id extraction (#13671, #13703)
+* [vidio] Make duration non fatal and fix typo
+* [mtv] Skip missing video parts (#13690)
+* [sportbox:embed] Fix extraction
++ [npo] Add support for npo3.nl URLs (#13695)
+* [dramafever] Remove video id from title (#13699)
++ [egghead:lesson] Add support for lessons (#6635)
+* [funnyordie] Extract more metadata (#13677)
+* [youku:show] Fix playlist extraction (#13248)
++ [dispeak] Recognize sevt subdomain (#13276)
+* [adn] Improve error reporting (#13663)
+* [crunchyroll] Relax series and season regular expression (#13659)
++ [spiegel:article] Add support for nexx iframe embeds (#13029)
++ [nexx:embed] Add support for iframe embeds
+* [nexx] Improve JS embed extraction
++ [pearvideo] Add support for pearvideo.com (#13031)
+
+
+version 2017.07.15
+
+Core
+* [YoutubeDL] Don't expand environment variables in meta fields (#13637)
+
+Extractors
+* [spiegeltv] Delegate extraction to nexx extractor (#13159)
++ [nexx] Add support for nexx.cloud (#10807, #13465)
+* [generic] Fix rutube embeds extraction (#13641)
+* [karrierevideos] Fix title extraction (#13641)
+* [youtube] Don't capture YouTube Red ad for creator meta field (#13621)
+* [slideshare] Fix extraction (#13617)
++ [5tv] Add another video URL pattern (#13354, #13606)
+* [drtv] Make HLS and HDS extraction non fatal
+* [ted] Fix subtitles extraction (#13628, #13629)
+* [vine] Make sure the title won't be empty
++ [twitter] Support HLS streams in vmap URLs
++ [periscope] Support pscp.tv URLs in embedded frames
+* [twitter] Extract mp4 urls via mobile API (#12726)
+* [niconico] Fix authentication error handling (#12486)
+* [giantbomb] Extract m3u8 formats (#13626)
++ [vlive:playlist] Add support for playlists (#13613)
+
+
+version 2017.07.09
+
+Core
++ [extractor/common] Add support for AMP tags in _parse_html5_media_entries
++ [utils] Support attributes with no values in get_elements_by_attribute
+
+Extractors
++ [dailymail] Add support for embeds
++ [joj] Add support for joj.sk (#13268)
+* [abc.net.au:iview] Extract more formats (#13492, #13489)
+* [egghead:course] Fix extraction (#6635, #13370)
++ [cjsw] Add support for cjsw.com (#13525)
++ [eagleplatform] Add support for referrer protected videos (#13557)
++ [eagleplatform] Add support for another embed pattern (#13557)
+* [veoh] Extend URL regular expression (#13601)
+* [npo:live] Fix live stream id extraction (#13568, #13605)
+* [googledrive] Fix height extraction (#13603)
++ [dailymotion] Add support for new layout (#13580)
+- [yam] Remove extractor
+* [xhamster] Extract all formats and fix duration extraction (#13593)
++ [xhamster] Add support for new URL schema (#13593)
+* [espn] Extend URL regular expression (#13244, #13549)
+* [kaltura] Fix typo in subtitles extraction (#13569)
+* [vier] Adapt extraction to redesign (#13575)
+
+
+version 2017.07.02
+
+Core
+* [extractor/common] Improve _json_ld
+
+Extractors
++ [thisoldhouse] Add more fallbacks for video id
+* [thisoldhouse] Fix video id extraction (#13540, #13541)
+* [xfileshare] Extend format regular expression (#13536)
+* [ted] Fix extraction (#13535)
++ [tastytrade] Add support for tastytrade.com (#13521)
+* [dplayit] Relax video id regular expression (#13524)
++ [generic] Extract more generic metadata (#13527)
++ [bbccouk] Capture and output error message (#13501, #13518)
+* [cbsnews] Relax video info regular expression (#13284, #13503)
++ [facebook] Add support for plugin video embeds and multiple embeds (#13493)
+* [soundcloud] Switch to https for API requests (#13502)
+* [pandatv] Switch to https for API and download URLs
++ [pandatv] Add support for https URLs (#13491)
++ [niconico] Support sp subdomain (#13494)
+
+
+version 2017.06.25
+
+Core
++ [adobepass] Add support for DIRECTV NOW (mso ATTOTT) (#13472)
+* [YoutubeDL] Skip malformed formats for better extraction robustness
+
+Extractors
++ [wsj] Add support for barrons.com (#13470)
++ [ign] Add another video id pattern (#13328)
++ [raiplay:live] Add support for live streams (#13414)
++ [redbulltv] Add support for live videos and segments (#13486)
++ [onetpl] Add support for videos embedded via pulsembed (#13482)
+* [ooyala] Make more robust
+* [ooyala] Skip empty format URLs (#13471, #13476)
+* [hgtv.com:show] Fix typo
+
+
+version 2017.06.23
+
+Core
+* [adobepass] Fix extraction on older python 2.6
+
+Extractors
+* [youtube] Adapt to new automatic captions rendition (#13467)
+* [hgtv.com:show] Relax video config regular expression (#13279, #13461)
+* [drtuber] Fix formats extraction (#12058)
+* [youporn] Fix upload date extraction
+* [youporn] Improve formats extraction
+* [youporn] Fix title extraction (#13456)
+* [googledrive] Fix formats sorting (#13443)
+* [watchindianporn] Fix extraction (#13411, #13415)
++ [vimeo] Add fallback mp4 extension for original format
++ [ruv] Add support for ruv.is (#13396)
+* [viu] Fix extraction on older python 2.6
+* [pandora.tv] Fix upload_date extraction (#12846)
++ [asiancrush] Add support for asiancrush.com (#13420)
+
+
+version 2017.06.18
+
+Core
+* [downloader/common] Use utils.shell_quote for debug command line
+* [utils] Use compat_shlex_quote in shell_quote
+* [postprocessor/execafterdownload] Encode command line (#13407)
+* [compat] Fix compat_shlex_quote on Windows (#5889, #10254)
+* [postprocessor/metadatafromtitle] Fix missing optional meta fields processing
+ in --metadata-from-title (#13408)
+* [extractor/common] Fix json dumping with --geo-bypass
++ [extractor/common] Improve jwplayer subtitles extraction
++ [extractor/common] Improve jwplayer formats extraction (#13379)
+
+Extractors
+* [polskieradio] Fix extraction (#13392)
++ [xfileshare] Add support for fastvideo.me (#13385)
+* [bilibili] Fix extraction of videos with double quotes in titles (#13387)
+* [4tube] Fix extraction (#13381, #13382)
++ [disney] Add support for disneychannel.de (#13383)
+* [npo] Improve URL regular expression (#13376)
++ [corus] Add support for showcase.ca
++ [corus] Add support for history.ca (#13359)
+
+
+version 2017.06.12
+
+Core
+* [utils] Handle compat_HTMLParseError in extract_attributes (#13349)
++ [compat] Introduce compat_HTMLParseError
+* [utils] Improve unified_timestamp
+* [extractor/generic] Ensure format id is unicode string
+* [extractor/common] Return unicode string from _match_id
++ [YoutubeDL] Sanitize more fields (#13313)
+
+Extractors
++ [xfileshare] Add support for rapidvideo.tv (#13348)
+* [xfileshare] Modernize and pass Referer
++ [rutv] Add support for testplayer.vgtrk.com (#13347)
++ [newgrounds] Extract more metadata (#13232)
++ [newgrounds:playlist] Add support for playlists (#10611)
+* [newgrounds] Improve formats and uploader extraction (#13346)
+* [msn] Fix formats extraction
+* [turbo] Ensure format id is string
+* [sexu] Ensure height is int
+* [jove] Ensure comment count is int
+* [golem] Ensure format id is string
+* [gfycat] Ensure filesize is int
+* [foxgay] Ensure height is int
+* [flickr] Ensure format id is string
+* [sohu] Fix numeric fields
+* [safari] Improve authentication detection (#13319)
+* [liveleak] Ensure height is int (#13313)
+* [streamango] Make title optional (#13292)
+* [rtlnl] Improve URL regular expression (#13295)
+* [tvplayer] Fix extraction (#13291)
+
+
+version 2017.06.05
+
+Core
+* [YoutubeDL] Don't emit ANSI escape codes on Windows (#13270)
+
+Extractors
++ [bandcamp:weekly] Add support for bandcamp weekly (#12758)
+* [pornhub:playlist] Fix extraction (#13281)
+- [godtv] Remove extractor (#13175)
+* [safari] Fix typo (#13252)
+* [youtube] Improve chapters extraction (#13247)
+* [1tv] Lower preference for HTTP formats (#13246)
+* [francetv] Relax URL regular expression
+* [drbonanza] Fix extraction (#13231)
+* [packtpub] Fix authentication (#13240)
+
+
+version 2017.05.29
+
+Extractors
+* [youtube] Fix DASH MPD extraction for videos with non-encrypted format URLs
+ (#13211)
+* [xhamster] Fix uploader and like/dislike count extraction (#13216))
++ [xhamster] Extract categories (#11728)
++ [abcnews] Add support for embed URLs (#12851)
+* [gaskrank] Fix extraction (#12493)
+* [medialaan] Fix videos with missing videoUrl (#12774)
+* [dvtv] Fix playlist support
++ [dvtv] Add support for DASH and HLS formats (#3063)
++ [beam:vod] Add support for beam.pro/mixer.com VODs (#13032))
+* [cbsinteractive] Relax URL regular expression (#13213)
+* [adn] Fix formats extraction
++ [youku] Extract more metadata (#10433)
+* [cbsnews] Fix extraction (#13205)
+
+
+version 2017.05.26
+
+Core
++ [utils] strip_jsonp() can recognize more patterns
+* [postprocessor/ffmpeg] Fix metadata filename handling on Python 2 (#13182)
+
+Extractors
++ [youtube] DASH MPDs with cipher signatures are recognized now (#11381)
++ [bbc] Add support for authentication
+* [tudou] Merge into youku extractor (#12214)
+* [youku:show] Fix extraction
+* [youku] Fix extraction (#13191)
+* [udemy] Fix extraction for outputs' format entries without URL (#13192)
+* [vimeo] Fix formats' sorting (#13189)
+* [cbsnews] Fix extraction for 60 Minutes videos (#12861)
+
+
+version 2017.05.23
+
+Core
++ [downloader/external] Pass -loglevel to ffmpeg downloader (#13183)
++ [adobepass] Add support for Bright House Networks (#13149)
+
+Extractors
++ [streamcz] Add support for subtitles (#13174)
+* [youtube] Fix DASH manifest signature decryption (#8944, #13156)
+* [toggle] Relax URL regular expression (#13172)
+* [toypics] Fix extraction (#13077)
+* [njpwworld] Fix extraction (#13162, #13169)
++ [hitbox] Add support for smashcast.tv (#13154)
+* [mitele] Update app key regular expression (#13158)
+
+
version 2017.05.18.1
Core
pypi-files: youtube-dl.bash-completion README.txt youtube-dl.1 youtube-dl.fish
youtube-dl: youtube_dl/*.py youtube_dl/*/*.py
- zip --quiet youtube-dl youtube_dl/*.py youtube_dl/*/*.py
- zip --quiet --junk-paths youtube-dl youtube_dl/__main__.py
+ mkdir -p zip
+ for d in youtube_dl youtube_dl/downloader youtube_dl/extractor youtube_dl/postprocessor ; do \
+ mkdir -p zip/$$d ;\
+ cp -pPR $$d/*.py zip/$$d/ ;\
+ done
+ touch -t 200001010101 zip/youtube_dl/*.py zip/youtube_dl/*/*.py
+ mv zip/youtube_dl/__main__.py zip/
+ cd zip ; zip -q ../youtube-dl youtube_dl/*.py youtube_dl/*/*.py __main__.py
+ rm -rf zip
echo '#!$(PYTHON)' > youtube-dl
cat youtube-dl.zip >> youtube-dl
rm youtube-dl.zip
--exclude '*.pyc' \
--exclude '*.pyo' \
--exclude '*~' \
- --exclude '__pycache' \
+ --exclude '__pycache__' \
--exclude '.git' \
--exclude 'testdata' \
--exclude 'docs/_build' \
sudo wget https://yt-dl.org/downloads/latest/youtube-dl -O /usr/local/bin/youtube-dl
sudo chmod a+rx /usr/local/bin/youtube-dl
-Windows users can [download an .exe file](https://yt-dl.org/latest/youtube-dl.exe) and place it in any location on their [PATH](http://en.wikipedia.org/wiki/PATH_%28variable%29) except for `%SYSTEMROOT%\System32` (e.g. **do not** put in `C:\Windows\System32`).
+Windows users can [download an .exe file](https://yt-dl.org/latest/youtube-dl.exe) and place it in any location on their [PATH](https://en.wikipedia.org/wiki/PATH_%28variable%29) except for `%SYSTEMROOT%\System32` (e.g. **do not** put in `C:\Windows\System32`).
You can also use pip:
This command will update youtube-dl if you have already installed it. See the [pypi page](https://pypi.python.org/pypi/youtube_dl) for more information.
-OS X users can install youtube-dl with [Homebrew](http://brew.sh/):
+OS X users can install youtube-dl with [Homebrew](https://brew.sh/):
brew install youtube-dl
--max-views COUNT Do not download any videos with more than
COUNT views
--match-filter FILTER Generic video filter. Specify any key (see
- help for -o for a list of available keys)
- to match if the key is present, !key to
- check if the key is not present, key >
- NUMBER (like "comment_count > 12", also
- works with >=, <, <=, !=, =) to compare
- against a number, key = 'LITERAL' (like
- "uploader = 'Mike Smith'", also works with
- !=) to match against a string literal and &
- to require multiple matches. Values which
- are not known are excluded unless you put a
- question mark (?) after the operator. For
- example, to only match videos that have
+ the "OUTPUT TEMPLATE" for a list of
+ available keys) to match if the key is
+ present, !key to check if the key is not
+ present, key > NUMBER (like "comment_count
+ > 12", also works with >=, <, <=, !=, =) to
+ compare against a number, key = 'LITERAL'
+ (like "uploader = 'Mike Smith'", also works
+ with !=) to match against a string literal
+ and & to require multiple matches. Values
+ which are not known are excluded unless you
+ put a question mark (?) after the operator.
+ For example, to only match videos that have
been liked more than 100 times and disliked
less than 50 times (or the dislike
functionality is not available at the given
--get-filename Simulate, quiet but print output filename
--get-format Simulate, quiet but print output format
-j, --dump-json Simulate, quiet but print JSON information.
- See --output for a description of available
- keys.
+ See the "OUTPUT TEMPLATE" for a description
+ of available keys.
-J, --dump-single-json Simulate, quiet but print JSON information
for each command-line argument. If the URL
refers to a playlist, dump the whole
syntax. Example: --exec 'adb push {}
/sdcard/Music/ && rm {}'
--convert-subs FORMAT Convert the subtitles to other format
- (currently supported: srt|ass|vtt)
+ (currently supported: srt|ass|vtt|lrc)
# CONFIGURATION
### Authentication with `.netrc` file
-You may also want to configure automatic credentials storage for extractors that support authentication (by providing login and password with `--username` and `--password`) in order not to pass credentials as command line arguments on every youtube-dl execution and prevent tracking plain text passwords in the shell command history. You can achieve this using a [`.netrc` file](http://stackoverflow.com/tags/.netrc/info) on a per extractor basis. For that you will need to create a `.netrc` file in your `$HOME` and restrict permissions to read/write by only you:
+You may also want to configure automatic credentials storage for extractors that support authentication (by providing login and password with `--username` and `--password`) in order not to pass credentials as command line arguments on every youtube-dl execution and prevent tracking plain text passwords in the shell command history. You can achieve this using a [`.netrc` file](https://stackoverflow.com/tags/.netrc/info) on a per extractor basis. For that you will need to create a `.netrc` file in your `$HOME` and restrict permissions to read/write by only you:
```
touch $HOME/.netrc
chmod a-rwx,u+rw $HOME/.netrc
```
To activate authentication with the `.netrc` file you should pass `--netrc` to youtube-dl or place it in the [configuration file](#configuration).
-On Windows you may also need to setup the `%HOME%` environment variable manually.
+On Windows you may also need to setup the `%HOME%` environment variable manually. For example:
+```
+set HOME=%USERPROFILE%
+```
# OUTPUT TEMPLATE
**tl;dr:** [navigate me to examples](#output-template-examples).
-The basic usage is not to set any template arguments when downloading a single file, like in `youtube-dl -o funny_video.flv "http://some/video"`. However, it may contain special sequences that will be replaced when downloading each video. The special sequences may be formatted according to [python string formatting operations](https://docs.python.org/2/library/stdtypes.html#string-formatting). For example, `%(NAME)s` or `%(NAME)05d`. To clarify, that is a percent symbol followed by a name in parentheses, followed by a formatting operations. Allowed names along with sequence type are:
+The basic usage is not to set any template arguments when downloading a single file, like in `youtube-dl -o funny_video.flv "https://some/video"`. However, it may contain special sequences that will be replaced when downloading each video. The special sequences may be formatted according to [python string formatting operations](https://docs.python.org/2/library/stdtypes.html#string-formatting). For example, `%(NAME)s` or `%(NAME)05d`. To clarify, that is a percent symbol followed by a name in parentheses, followed by a formatting operations. Allowed names along with sequence type are:
- `id` (string): Video identifier
- `title` (string): Video title
- `playlist_id` (string): Playlist identifier
- `playlist_title` (string): Playlist title
-
Available for the video that belongs to some logical chapter or section:
+
- `chapter` (string): Name or title of the chapter the video belongs to
- `chapter_number` (numeric): Number of the chapter the video belongs to
- `chapter_id` (string): Id of the chapter the video belongs to
Available for the video that is an episode of some series or programme:
+
- `series` (string): Title of the series or programme the video episode belongs to
- `season` (string): Title of the season the video episode belongs to
- `season_number` (numeric): Number of the season the video episode belongs to
- `episode_id` (string): Id of the video episode
Available for the media that is a track or a part of a music album:
+
- `track` (string): Title of the track
- `track_number` (numeric): Number of the track within an album or a disc
- `track_id` (string): Id of the track
#### Output template examples
-Note on Windows you may need to use double quotes instead of single.
+Note that on Windows you may need to use double quotes instead of single.
```bash
$ youtube-dl --get-filename -o '%(title)s.%(ext)s' BaW_jenozKc
$ youtube-dl -u user -p password -o '~/MyVideos/%(playlist)s/%(chapter_number)s - %(chapter)s/%(title)s.%(ext)s' https://www.udemy.com/java-tutorial/
# Download entire series season keeping each series and each season in separate directory under C:/MyVideos
-$ youtube-dl -o "C:/MyVideos/%(series)s/%(season_number)s - %(season)s/%(episode_number)s - %(episode)s.%(ext)s" http://videomore.ru/kino_v_detalayah/5_sezon/367617
+$ youtube-dl -o "C:/MyVideos/%(series)s/%(season_number)s - %(season)s/%(episode_number)s - %(episode)s.%(ext)s" https://videomore.ru/kino_v_detalayah/5_sezon/367617
# Stream the video being downloaded to stdout
$ youtube-dl -o - BaW_jenozKc
- `acodec`: Name of the audio codec in use
- `vcodec`: Name of the video codec in use
- `container`: Name of the container format
- - `protocol`: The protocol that will be used for the actual download, lower-case (`http`, `https`, `rtsp`, `rtmp`, `rtmpe`, `mms`, `f4m`, `ism`, `m3u8`, or `m3u8_native`)
+ - `protocol`: The protocol that will be used for the actual download, lower-case (`http`, `https`, `rtsp`, `rtmp`, `rtmpe`, `mms`, `f4m`, `ism`, `http_dash_segments`, `m3u8`, or `m3u8_native`)
- `format_id`: A short description of the format
Note that none of the aforementioned meta fields are guaranteed to be present since this solely depends on the metadata obtained by particular extractor, i.e. the metadata offered by the video hoster.
#### Format selection examples
-Note on Windows you may need to use double quotes instead of single.
+Note that on Windows you may need to use double quotes instead of single.
```bash
# Download best mp4 format available or any other best if no mp4 available
### How do I update youtube-dl?
-If you've followed [our manual installation instructions](http://rg3.github.io/youtube-dl/download.html), you can simply run `youtube-dl -U` (or, on Linux, `sudo youtube-dl -U`).
+If you've followed [our manual installation instructions](https://rg3.github.io/youtube-dl/download.html), you can simply run `youtube-dl -U` (or, on Linux, `sudo youtube-dl -U`).
If you have used pip, a simple `sudo pip install -U youtube-dl` is sufficient to update.
-If you have installed youtube-dl using a package manager like *apt-get* or *yum*, use the standard system update mechanism to update. Note that distribution packages are often outdated. As a rule of thumb, youtube-dl releases at least once a month, and often weekly or even daily. Simply go to http://yt-dl.org/ to find out the current version. Unfortunately, there is nothing we youtube-dl developers can do if your distribution serves a really outdated version. You can (and should) complain to your distribution in their bugtracker or support forum.
+If you have installed youtube-dl using a package manager like *apt-get* or *yum*, use the standard system update mechanism to update. Note that distribution packages are often outdated. As a rule of thumb, youtube-dl releases at least once a month, and often weekly or even daily. Simply go to https://yt-dl.org to find out the current version. Unfortunately, there is nothing we youtube-dl developers can do if your distribution serves a really outdated version. You can (and should) complain to your distribution in their bugtracker or support forum.
As a last resort, you can also uninstall the version installed by your package manager and follow our manual installation instructions. For that, remove the distribution's package, with a line like
sudo apt-get remove -y youtube-dl
-Afterwards, simply follow [our manual installation instructions](http://rg3.github.io/youtube-dl/download.html):
+Afterwards, simply follow [our manual installation instructions](https://rg3.github.io/youtube-dl/download.html):
```
sudo wget https://yt-dl.org/latest/youtube-dl -O /usr/local/bin/youtube-dl
youtube-dl works fine on its own on most sites. However, if you want to convert video/audio, you'll need [avconv](https://libav.org/) or [ffmpeg](https://www.ffmpeg.org/). On some sites - most notably YouTube - videos can be retrieved in a higher quality format without sound. youtube-dl will detect whether avconv/ffmpeg is present and automatically pick the best option.
-Videos or video formats streamed via RTMP protocol can only be downloaded when [rtmpdump](https://rtmpdump.mplayerhq.hu/) is installed. Downloading MMS and RTSP videos requires either [mplayer](http://mplayerhq.hu/) or [mpv](https://mpv.io/) to be installed.
+Videos or video formats streamed via RTMP protocol can only be downloaded when [rtmpdump](https://rtmpdump.mplayerhq.hu/) is installed. Downloading MMS and RTSP videos requires either [mplayer](https://mplayerhq.hu/) or [mpv](https://mpv.io/) to be installed.
### I have downloaded a video but how can I play it?
-Once the video is fully downloaded, use any video player, such as [mpv](https://mpv.io/), [vlc](http://www.videolan.org/) or [mplayer](http://www.mplayerhq.hu/).
+Once the video is fully downloaded, use any video player, such as [mpv](https://mpv.io/), [vlc](https://www.videolan.org/) or [mplayer](https://www.mplayerhq.hu/).
### I extracted a video URL with `-g`, but it does not play on another machine / in my web browser.
### How do I download a video starting with a `-`?
-Either prepend `http://www.youtube.com/watch?v=` or separate the ID from the options with `--`:
+Either prepend `https://www.youtube.com/watch?v=` or separate the ID from the options with `--`:
youtube-dl -- -wNyEUrxzFU
- youtube-dl "http://www.youtube.com/watch?v=-wNyEUrxzFU"
+ youtube-dl "https://www.youtube.com/watch?v=-wNyEUrxzFU"
### How do I pass cookies to youtube-dl?
### How do I stream directly to media player?
-You will first need to tell youtube-dl to stream media to stdout with `-o -`, and also tell your media player to read from stdin (it must be capable of this for streaming) and then pipe former to latter. For example, streaming to [vlc](http://www.videolan.org/) can be achieved with:
+You will first need to tell youtube-dl to stream media to stdout with `-o -`, and also tell your media player to read from stdin (it must be capable of this for streaming) and then pipe former to latter. For example, streaming to [vlc](https://www.videolan.org/) can be achieved with:
- youtube-dl -o - "http://www.youtube.com/watch?v=BaW_jenozKcj" | vlc -
+ youtube-dl -o - "https://www.youtube.com/watch?v=BaW_jenozKcj" | vlc -
### How do I download only new videos from a playlist?
When youtube-dl knows that one particular downloader works better for a given website, that downloader will be picked. Otherwise, youtube-dl will pick the best downloader for general compatibility, which at the moment happens to be ffmpeg. This choice may change in future versions of youtube-dl, with improvements of the built-in downloader and/or ffmpeg.
-In particular, the generic extractor (used when your website is not in the [list of supported sites by youtube-dl](http://rg3.github.io/youtube-dl/supportedsites.html) cannot mandate one specific downloader.
+In particular, the generic extractor (used when your website is not in the [list of supported sites by youtube-dl](https://rg3.github.io/youtube-dl/supportedsites.html) cannot mandate one specific downloader.
If you put either `--hls-prefer-native` or `--hls-prefer-ffmpeg` into your configuration, a different subset of videos will fail to download correctly. Instead, it is much better to [file an issue](https://yt-dl.org/bug) or a pull request which details why the native or the ffmpeg HLS downloader is a better choice for your use case.
### How can I detect whether a given URL is supported by youtube-dl?
-For one, have a look at the [list of supported sites](docs/supportedsites.md). Note that it can sometimes happen that the site changes its URL scheme (say, from http://example.com/video/1234567 to http://example.com/v/1234567 ) and youtube-dl reports an URL of a service in that list as unsupported. In that case, simply report a bug.
+For one, have a look at the [list of supported sites](docs/supportedsites.md). Note that it can sometimes happen that the site changes its URL scheme (say, from https://example.com/video/1234567 to https://example.com/v/1234567 ) and youtube-dl reports an URL of a service in that list as unsupported. In that case, simply report a bug.
It is *not* possible to detect whether a URL is supported or not. That's because youtube-dl contains a generic extractor which matches **all** URLs. You may be tempted to disable, exclude, or remove the generic extractor, but the generic extractor not only allows users to extract videos from lots of websites that embed a video from another service, but may also be used to extract video from a service that it's hosting itself. Therefore, we neither recommend nor support disabling, excluding, or removing the generic extractor.
# DEVELOPER INSTRUCTIONS
-Most users do not need to build youtube-dl and can [download the builds](http://rg3.github.io/youtube-dl/download.html) or get them from their distribution.
+Most users do not need to build youtube-dl and can [download the builds](https://rg3.github.io/youtube-dl/download.html) or get them from their distribution.
To run youtube-dl as a developer, you don't need to build anything either. Simply execute
python test/test_download.py
nosetests
+See item 6 of [new extractor tutorial](#adding-support-for-a-new-site) for how to run extractor specific test cases.
+
If you want to create a build of youtube-dl yourself, you'll need
* python
class YourExtractorIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?yourextractor\.com/watch/(?P<id>[0-9]+)'
_TEST = {
- 'url': 'http://yourextractor.com/watch/42',
+ 'url': 'https://yourextractor.com/watch/42',
'md5': 'TODO: md5 sum of the first 10241 bytes of the video file (use --test)',
'info_dict': {
'id': '42',
}
```
5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/extractors.py).
-6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc.
+6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note that tests with `only_matching` key in test's dict are not counted in.
7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L74-L252). Add tests and code for as many as you want.
-8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://pypi.python.org/pypi/flake8). Also make sure your code works under all [Python](http://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+.
-9. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this:
+8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://pypi.python.org/pypi/flake8). Also make sure your code works under all [Python](https://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+.
+9. When the tests pass, [add](https://git-scm.com/docs/git-add) the new files and [commit](https://git-scm.com/docs/git-commit) them and [push](https://git-scm.com/docs/git-push) the result, like this:
$ git add youtube_dl/extractor/extractors.py
$ git add youtube_dl/extractor/yourextractor.py
ydl_opts = {}
with youtube_dl.YoutubeDL(ydl_opts) as ydl:
- ydl.download(['http://www.youtube.com/watch?v=BaW_jenozKc'])
+ ydl.download(['https://www.youtube.com/watch?v=BaW_jenozKc'])
```
Most likely, you'll want to use various options. For a list of options available, have a look at [`youtube_dl/YoutubeDL.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/YoutubeDL.py#L129-L279). For a start, if you want to intercept youtube-dl's output, set a `logger` object.
'progress_hooks': [my_hook],
}
with youtube_dl.YoutubeDL(ydl_opts) as ydl:
- ydl.download(['http://www.youtube.com/watch?v=BaW_jenozKc'])
+ ydl.download(['https://www.youtube.com/watch?v=BaW_jenozKc'])
```
# BUGS
-Bugs and suggestions should be reported at: <https://github.com/rg3/youtube-dl/issues>. Unless you were prompted to or there is another pertinent reason (e.g. GitHub fails to accept the bug report), please do not send bug reports via personal email. For discussions, join us in the IRC channel [#youtube-dl](irc://chat.freenode.net/#youtube-dl) on freenode ([webchat](http://webchat.freenode.net/?randomnick=1&channels=youtube-dl)).
+Bugs and suggestions should be reported at: <https://github.com/rg3/youtube-dl/issues>. Unless you were prompted to or there is another pertinent reason (e.g. GitHub fails to accept the bug report), please do not send bug reports via personal email. For discussions, join us in the IRC channel [#youtube-dl](irc://chat.freenode.net/#youtube-dl) on freenode ([webchat](https://webchat.freenode.net/?randomnick=1&channels=youtube-dl)).
**Please include the full output of youtube-dl when run with `-v`**, i.e. **add** `-v` flag to **your command line**, copy the **whole** output and post it in the issue body wrapped in \`\`\` for better formatting. It should look similar to this:
```
$ youtube-dl -v <your command line>
[debug] System config: []
[debug] User config: []
-[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
+[debug] Command-line args: [u'-v', u'https://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2015.12.06
[debug] Git HEAD: 135392e
If your server has multiple IPs or you suspect censorship, adding `--call-home` may be a good idea to get more diagnostics. If the error is `ERROR: Unable to extract ...` and you cannot reproduce it from multiple countries, add `--dump-pages` (warning: this will yield a rather large output, redirect it to the file `log.txt` by adding `>log.txt 2>&1` to your command-line) or upload the `.dump` files you get when you add `--write-pages` [somewhere](https://gist.github.com/).
-**Site support requests must contain an example URL**. An example URL is a URL you might want to download, like `http://www.youtube.com/watch?v=BaW_jenozKc`. There should be an obvious video present. Except under very special circumstances, the main page of a video service (e.g. `http://www.youtube.com/`) is *not* an example URL.
+**Site support requests must contain an example URL**. An example URL is a URL you might want to download, like `https://www.youtube.com/watch?v=BaW_jenozKc`. There should be an obvious video present. Except under very special circumstances, the main page of a video service (e.g. `https://www.youtube.com/`) is *not* an example URL.
### Are you using the latest version?
--max-views COUNT Do not download any videos with more than
COUNT views
--match-filter FILTER Generic video filter. Specify any key (see
- help for -o for a list of available keys)
- to match if the key is present, !key to
- check if the key is not present, key >
- NUMBER (like "comment_count > 12", also
- works with >=, <, <=, !=, =) to compare
- against a number, key = 'LITERAL' (like
- "uploader = 'Mike Smith'", also works with
- !=) to match against a string literal and &
- to require multiple matches. Values which
- are not known are excluded unless you put a
- question mark (?) after the operator. For
- example, to only match videos that have
+ the "OUTPUT TEMPLATE" for a list of
+ available keys) to match if the key is
+ present, !key to check if the key is not
+ present, key > NUMBER (like "comment_count
+ > 12", also works with >=, <, <=, !=, =) to
+ compare against a number, key = 'LITERAL'
+ (like "uploader = 'Mike Smith'", also works
+ with !=) to match against a string literal
+ and & to require multiple matches. Values
+ which are not known are excluded unless you
+ put a question mark (?) after the operator.
+ For example, to only match videos that have
been liked more than 100 times and disliked
less than 50 times (or the dislike
functionality is not available at the given
--get-filename Simulate, quiet but print output filename
--get-format Simulate, quiet but print output format
-j, --dump-json Simulate, quiet but print JSON information.
- See --output for a description of available
- keys.
+ See the "OUTPUT TEMPLATE" for a description
+ of available keys.
-J, --dump-single-json Simulate, quiet but print JSON information
for each command-line argument. If the URL
refers to a playlist, dump the whole
syntax. Example: --exec 'adb push {}
/sdcard/Music/ && rm {}'
--convert-subs FORMAT Convert the subtitles to other format
- (currently supported: srt|ass|vtt)
+ (currently supported: srt|ass|vtt|lrc)
to youtube-dl or place it in the configuration file.
On Windows you may also need to setup the %HOME% environment variable
-manually.
+manually. For example:
+
+ set HOME=%USERPROFILE%
TL;DR: navigate me to examples.
The basic usage is not to set any template arguments when downloading a
-single file, like in youtube-dl -o funny_video.flv "http://some/video".
+single file, like in youtube-dl -o funny_video.flv "https://some/video".
However, it may contain special sequences that will be replaced when
downloading each video. The special sequences may be formatted according
to python string formatting operations. For example, %(NAME)s or
- playlist_title (string): Playlist title
Available for the video that belongs to some logical chapter or section:
-- chapter (string): Name or title of the chapter the video belongs to -
-chapter_number (numeric): Number of the chapter the video belongs to -
-chapter_id (string): Id of the chapter the video belongs to
+
+- chapter (string): Name or title of the chapter the video belongs to
+- chapter_number (numeric): Number of the chapter the video belongs to
+- chapter_id (string): Id of the chapter the video belongs to
Available for the video that is an episode of some series or programme:
-- series (string): Title of the series or programme the video episode
-belongs to - season (string): Title of the season the video episode
-belongs to - season_number (numeric): Number of the season the video
-episode belongs to - season_id (string): Id of the season the video
-episode belongs to - episode (string): Title of the video episode -
-episode_number (numeric): Number of the video episode within a season -
-episode_id (string): Id of the video episode
-
-Available for the media that is a track or a part of a music album: -
-track (string): Title of the track - track_number (numeric): Number of
-the track within an album or a disc - track_id (string): Id of the track
-- artist (string): Artist(s) of the track - genre (string): Genre(s) of
-the track - album (string): Title of the album the track belongs to -
-album_type (string): Type of the album - album_artist (string): List of
-all artists appeared on the album - disc_number (numeric): Number of the
-disc or other physical medium the track belongs to - release_year
-(numeric): Year (YYYY) when the album was released
+
+- series (string): Title of the series or programme the video episode
+ belongs to
+- season (string): Title of the season the video episode belongs to
+- season_number (numeric): Number of the season the video episode
+ belongs to
+- season_id (string): Id of the season the video episode belongs to
+- episode (string): Title of the video episode
+- episode_number (numeric): Number of the video episode within a
+ season
+- episode_id (string): Id of the video episode
+
+Available for the media that is a track or a part of a music album:
+
+- track (string): Title of the track
+- track_number (numeric): Number of the track within an album or a
+ disc
+- track_id (string): Id of the track
+- artist (string): Artist(s) of the track
+- genre (string): Genre(s) of the track
+- album (string): Title of the album the track belongs to
+- album_type (string): Type of the album
+- album_artist (string): List of all artists appeared on the album
+- disc_number (numeric): Number of the disc or other physical medium
+ the track belongs to
+- release_year (numeric): Year (YYYY) when the album was released
Each aforementioned sequence when referenced in an output template will
be replaced by the actual value corresponding to the sequence name. Note
Output template examples
-Note on Windows you may need to use double quotes instead of single.
+Note that on Windows you may need to use double quotes instead of
+single.
$ youtube-dl --get-filename -o '%(title)s.%(ext)s' BaW_jenozKc
youtube-dl test video ''_Ƥāš.mp4 # All kinds of weird characters
$ youtube-dl -u user -p password -o '~/MyVideos/%(playlist)s/%(chapter_number)s - %(chapter)s/%(title)s.%(ext)s' https://www.udemy.com/java-tutorial/
# Download entire series season keeping each series and each season in separate directory under C:/MyVideos
- $ youtube-dl -o "C:/MyVideos/%(series)s/%(season_number)s - %(season)s/%(episode_number)s - %(episode)s.%(ext)s" http://videomore.ru/kino_v_detalayah/5_sezon/367617
+ $ youtube-dl -o "C:/MyVideos/%(series)s/%(season_number)s - %(season)s/%(episode_number)s - %(episode)s.%(ext)s" https://videomore.ru/kino_v_detalayah/5_sezon/367617
# Stream the video being downloaded to stdout
$ youtube-dl -o - BaW_jenozKc
vcodec: Name of the video codec in use - container: Name of the
container format - protocol: The protocol that will be used for the
actual download, lower-case (http, https, rtsp, rtmp, rtmpe, mms, f4m,
-ism, m3u8, or m3u8_native) - format_id: A short description of the
-format
+ism, http_dash_segments, m3u8, or m3u8_native) - format_id: A short
+description of the format
Note that none of the aforementioned meta fields are guaranteed to be
present since this solely depends on the metadata obtained by particular
Format selection examples
-Note on Windows you may need to use double quotes instead of single.
+Note that on Windows you may need to use double quotes instead of
+single.
# Download best mp4 format available or any other best if no mp4 available
$ youtube-dl -f 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best'
or _yum_, use the standard system update mechanism to update. Note that
distribution packages are often outdated. As a rule of thumb, youtube-dl
releases at least once a month, and often weekly or even daily. Simply
-go to http://yt-dl.org/ to find out the current version. Unfortunately,
+go to https://yt-dl.org to find out the current version. Unfortunately,
there is nothing we youtube-dl developers can do if your distribution
serves a really outdated version. You can (and should) complain to your
distribution in their bugtracker or support forum.
How do I download a video starting with a -?
-Either prepend http://www.youtube.com/watch?v= or separate the ID from
+Either prepend https://www.youtube.com/watch?v= or separate the ID from
the options with --:
youtube-dl -- -wNyEUrxzFU
- youtube-dl "http://www.youtube.com/watch?v=-wNyEUrxzFU"
+ youtube-dl "https://www.youtube.com/watch?v=-wNyEUrxzFU"
How do I pass cookies to youtube-dl?
capable of this for streaming) and then pipe former to latter. For
example, streaming to vlc can be achieved with:
- youtube-dl -o - "http://www.youtube.com/watch?v=BaW_jenozKcj" | vlc -
+ youtube-dl -o - "https://www.youtube.com/watch?v=BaW_jenozKcj" | vlc -
How do I download only new videos from a playlist?
For one, have a look at the list of supported sites. Note that it can
sometimes happen that the site changes its URL scheme (say, from
-http://example.com/video/1234567 to http://example.com/v/1234567 ) and
+https://example.com/video/1234567 to https://example.com/v/1234567 ) and
youtube-dl reports an URL of a service in that list as unsupported. In
that case, simply report a bug.
python test/test_download.py
nosetests
+See item 6 of new extractor tutorial for how to run extractor specific
+test cases.
+
If you want to create a build of youtube-dl yourself, you'll need
- python
class YourExtractorIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?yourextractor\.com/watch/(?P<id>[0-9]+)'
_TEST = {
- 'url': 'http://yourextractor.com/watch/42',
+ 'url': 'https://yourextractor.com/watch/42',
'md5': 'TODO: md5 sum of the first 10241 bytes of the video file (use --test)',
'info_dict': {
'id': '42',
_TEST to _TESTS and make it into a list of dictionaries. The tests
will then be named TestDownload.test_YourExtractor,
TestDownload.test_YourExtractor_1,
- TestDownload.test_YourExtractor_2, etc.
+ TestDownload.test_YourExtractor_2, etc. Note that tests with
+ only_matching key in test's dict are not counted in.
7. Have a look at youtube_dl/extractor/common.py for possible helper
methods and a detailed description of what your extractor should and
may return. Add tests and code for as many as you want.
ydl_opts = {}
with youtube_dl.YoutubeDL(ydl_opts) as ydl:
- ydl.download(['http://www.youtube.com/watch?v=BaW_jenozKc'])
+ ydl.download(['https://www.youtube.com/watch?v=BaW_jenozKc'])
Most likely, you'll want to use various options. For a list of options
available, have a look at youtube_dl/YoutubeDL.py. For a start, if you
'progress_hooks': [my_hook],
}
with youtube_dl.YoutubeDL(ydl_opts) as ydl:
- ydl.download(['http://www.youtube.com/watch?v=BaW_jenozKc'])
+ ydl.download(['https://www.youtube.com/watch?v=BaW_jenozKc'])
$ youtube-dl -v <your command line>
[debug] System config: []
[debug] User config: []
- [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
+ [debug] Command-line args: [u'-v', u'https://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2015.12.06
[debug] Git HEAD: 135392e
SITE SUPPORT REQUESTS MUST CONTAIN AN EXAMPLE URL. An example URL is a
URL you might want to download, like
-http://www.youtube.com/watch?v=BaW_jenozKc. There should be an obvious
+https://www.youtube.com/watch?v=BaW_jenozKc. There should be an obvious
video present. Except under very special circumstances, the main page of
-a video service (e.g. http://www.youtube.com/) is _not_ an example URL.
+a video service (e.g. https://www.youtube.com/) is _not_ an example URL.
Are you using the latest version?
import sys
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-from test.helper import get_testcases
+from test.helper import gettestcases
from youtube_dl.utils import compat_urllib_parse_urlparse
from youtube_dl.utils import compat_urllib_request
else:
METHOD = 'EURISTIC'
-for test in get_testcases():
+for test in gettestcases():
if METHOD == 'EURISTIC':
try:
webpage = compat_urllib_request.urlopen(test['url'], timeout=10).read()
ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
README_FILE = os.path.join(ROOT_DIR, 'README.md')
-PREFIX = '''%YOUTUBE-DL(1)
+PREFIX = r'''%YOUTUBE-DL(1)
# NAME
- **afreecatv**: afreecatv.com
- **afreecatv:global**: afreecatv.com
- **AirMozilla**
+ - **AliExpressLive**
- **AlJazeera**
- **Allocine**
- **AlphaPorno**
- **AMCNetworks**
- - **anderetijden**: npo.nl and ntr.nl
+ - **AmericasTestKitchen**
+ - **anderetijden**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
- **AnimeOnDemand**
- **anitube.se**
- **Anvato**
- **arte.tv:info**
- **arte.tv:magazine**
- **arte.tv:playlist**
+ - **AsianCrush**
+ - **AsianCrushPlaylist**
- **AtresPlayer**
- **ATTTechChannel**
- **ATVAt**
- **bambuser:channel**
- **Bandcamp**
- **Bandcamp:album**
+ - **Bandcamp:weekly**
- **bangumi.bilibili.com**: BiliBiliēŖå§
- **bbc**: BBC
- **bbc.co.uk**: BBC iPlayer
- **bbc.co.uk:article**: BBC articles
- **bbc.co.uk:iplayer:playlist**
- **bbc.co.uk:playlist**
- - **Beam:live**
- **Beatport**
- **Beeg**
- **BehindKink**
- **chirbit**
- **chirbit:profile**
- **Cinchcast**
- - **Clipfish**
+ - **CJSW**
- **cliphunter**
+ - **Clippit**
- **ClipRs**
- **Clipsyndicate**
- **CloserToTruth**
- **EbaumsWorld**
- **EchoMsk**
- **egghead:course**: egghead.io course
+ - **egghead:lesson**: egghead.io lesson
- **eHow**
- **Einthusan**
- **eitb.tv**
- **Funimation**
- **FunnyOrDie**
- **Fusion**
+ - **Fux**
- **FXNetworks**
- **GameInformer**
- **GameOne**
- **Go**
- **Go90**
- **GodTube**
- - **GodTV**
- **Golem**
- **GoogleDrive**
- **Goshgay**
- **IPrima**
- **iqiyi**: ē±å„čŗ
- **Ir90Tv**
+ - **ITTF**
- **ITV**
- **ivi**: ivi.ru
- **ivi:compilation**: ivi.ru compilations
- **Jamendo**
- **JamendoAlbum**
- **JeuxVideo**
+ - **Joj**
- **Jove**
- **jpopsuki.tv**
- **JWPlatform**
+ - **Kakao**
- **Kaltura**
- **Kamcord**
- **KanalPlay**: Kanal 5/9/11 Play
- **limelight:channel_list**
- **LiTV**
- **LiveLeak**
+ - **LiveLeakEmbed**
- **livestream**
- **livestream:original**
- **LnkGo**
- **MakerTV**
- **mangomolo:live**
- **mangomolo:video**
+ - **ManyVids**
- **MatchTV**
- **MDR**: MDR.DE and KiKA
- **media.ccc.de**
- **Medialaan**
- **Mediaset**
- **Medici**
+ - **megaphone.fm**: megaphone.fm embedded players
- **Meipai**: ē¾ę
- **MelonVOD**
- **META**
- **mixcloud:playlist**
- **mixcloud:stream**
- **mixcloud:user**
+ - **Mixer:live**
+ - **Mixer:vod**
- **MLB**
- **Mnet**
- **MoeVideo**: LetitBit video services: moevideo.net, playreplay.net and videochart.net
- **MovieFap**
- **Moviezine**
- **MovingImage**
- - **MPORA**
- **MSN**
- **mtg**: MTG services
- **mtv**
- **netease:song**: ē½ęäŗé³ä¹
- **Netzkino**
- **Newgrounds**
+ - **NewgroundsPlaylist**
- **Newstube**
- **NextMedia**: čęę„å ±
- **NextMediaActionNews**: čęę„å ± - åę°č
- **NextTV**: 壹é»č¦
+ - **Nexx**
+ - **NexxEmbed**
- **nfb**: National Film Board of Canada
- **nfl.com**
- **NhkVod**
- **nhl.com:videocenter:category**: NHL videocenter category
- **nick.com**
- **nick.de**
+ - **nickelodeonru**
- **nicknight**
- **niconico**: ćć³ćć³åē»
- **NiconicoPlaylist**
- **NowTVList**
- **nowvideo**: NowVideo
- **Noz**
- - **npo**: npo.nl and ntr.nl
+ - **npo**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
- **npo.nl:live**
- **npo.nl:radio**
- **npo.nl:radio:fragment**
- **Openload**
- **OraTV**
- **orf:fm4**: radio FM4
+ - **orf:fm4:story**: fm4.orf.at stories
- **orf:iptv**: iptv.ORF.at
- **orf:oe1**: Radio Ćsterreich 1
- **orf:tvthek**: ORF TVthek
- **Patreon**
- **pbs**: Public Broadcasting Service (PBS) and member stations: PBS: Public Broadcasting Service, APT - Alabama Public Television (WBIQ), GPB/Georgia Public Broadcasting (WGTV), Mississippi Public Broadcasting (WMPN), Nashville Public Television (WNPT), WFSU-TV (WFSU), WSRE (WSRE), WTCI (WTCI), WPBA/Channel 30 (WPBA), Alaska Public Media (KAKM), Arizona PBS (KAET), KNME-TV/Channel 5 (KNME), Vegas PBS (KLVX), AETN/ARKANSAS ETV NETWORK (KETS), KET (WKLE), WKNO/Channel 10 (WKNO), LPB/LOUISIANA PUBLIC BROADCASTING (WLPB), OETA (KETA), Ozarks Public Television (KOZK), WSIU Public Broadcasting (WSIU), KEET TV (KEET), KIXE/Channel 9 (KIXE), KPBS San Diego (KPBS), KQED (KQED), KVIE Public Television (KVIE), PBS SoCal/KOCE (KOCE), ValleyPBS (KVPT), CONNECTICUT PUBLIC TELEVISION (WEDH), KNPB Channel 5 (KNPB), SOPTV (KSYS), Rocky Mountain PBS (KRMA), KENW-TV3 (KENW), KUED Channel 7 (KUED), Wyoming PBS (KCWC), Colorado Public Television / KBDI 12 (KBDI), KBYU-TV (KBYU), Thirteen/WNET New York (WNET), WGBH/Channel 2 (WGBH), WGBY (WGBY), NJTV Public Media NJ (WNJT), WLIW21 (WLIW), mpt/Maryland Public Television (WMPB), WETA Television and Radio (WETA), WHYY (WHYY), PBS 39 (WLVT), WVPT - Your Source for PBS and More! (WVPT), Howard University Television (WHUT), WEDU PBS (WEDU), WGCU Public Media (WGCU), WPBT2 (WPBT), WUCF TV (WUCF), WUFT/Channel 5 (WUFT), WXEL/Channel 42 (WXEL), WLRN/Channel 17 (WLRN), WUSF Public Broadcasting (WUSF), ETV (WRLK), UNC-TV (WUNC), PBS Hawaii - Oceanic Cable Channel 10 (KHET), Idaho Public Television (KAID), KSPS (KSPS), OPB (KOPB), KWSU/Channel 10 & KTNW/Channel 31 (KWSU), WILL-TV (WILL), Network Knowledge - WSEC/Springfield (WSEC), WTTW11 (WTTW), Iowa Public Television/IPTV (KDIN), Nine Network (KETC), PBS39 Fort Wayne (WFWA), WFYI Indianapolis (WFYI), Milwaukee Public Television (WMVS), WNIN (WNIN), WNIT Public Television (WNIT), WPT (WPNE), WVUT/Channel 22 (WVUT), WEIU/Channel 51 (WEIU), WQPT-TV (WQPT), WYCC PBS Chicago (WYCC), WIPB-TV (WIPB), WTIU (WTIU), CET (WCET), ThinkTVNetwork (WPTD), WBGU-TV (WBGU), WGVU TV (WGVU), NET1 (KUON), Pioneer Public Television (KWCM), SDPB Television (KUSD), TPT (KTCA), KSMQ (KSMQ), KPTS/Channel 8 (KPTS), KTWU/Channel 11 (KTWU), East Tennessee PBS (WSJK), WCTE-TV (WCTE), WLJT, Channel 11 (WLJT), WOSU TV (WOSU), WOUB/WOUC (WOUB), WVPB (WVPB), WKYU-PBS (WKYU), KERA 13 (KERA), MPBN (WCBB), Mountain Lake PBS (WCFE), NHPTV (WENH), Vermont PBS (WETK), witf (WITF), WQED Multimedia (WQED), WMHT Educational Telecommunications (WMHT), Q-TV (WDCQ), WTVS Detroit Public TV (WTVS), CMU Public Television (WCMU), WKAR-TV (WKAR), WNMU-TV Public TV 13 (WNMU), WDSE - WRPT (WDSE), WGTE TV (WGTE), Lakeland Public Television (KAWE), KMOS-TV - Channels 6.1, 6.2 and 6.3 (KMOS), MontanaPBS (KUSM), KRWG/Channel 22 (KRWG), KACV (KACV), KCOS/Channel 13 (KCOS), WCNY/Channel 24 (WCNY), WNED (WNED), WPBS (WPBS), WSKG Public TV (WSKG), WXXI (WXXI), WPSU (WPSU), WVIA Public Media Studios (WVIA), WTVI (WTVI), Western Reserve PBS (WNEO), WVIZ/PBS ideastream (WVIZ), KCTS 9 (KCTS), Basin PBS (KPBT), KUHT / Channel 8 (KUHT), KLRN (KLRN), KLRU (KLRU), WTJX Channel 12 (WTJX), WCVE PBS (WCVE), KBTC Public Television (KBTC)
- **pcmag**
+ - **PearVideo**
- **People**
- **periscope**: Periscope
- **periscope:user**: Periscope user videos
- **Pokemon**
- **PolskieRadio**
- **PolskieRadioCategory**
+ - **PopcornTV**
- **PornCom**
+ - **PornerBros**
- **PornFlip**
- **PornHd**
- **PornHub**: PornHub and Thumbzilla
- **Pornotube**
- **PornoVoisines**
- **PornoXO**
+ - **PornTube**
- **PressTV**
- **PrimeShareTV**
- **PromptFile**
- **RadioJavan**
- **Rai**
- **RaiPlay**
+ - **RaiPlayLive**
- **RBMARadio**
- **RDS**: RDS.ca
- **RedBullTV**
+ - **Reddit**
+ - **RedditR**
- **RedTube**
- **RegioTV**
- **RENTV**
- **rutube:embed**: Rutube embedded videos
- **rutube:movie**: Rutube movies
- **rutube:person**: Rutube person videos
+ - **rutube:playlist**: Rutube playlists
- **RUTV**: RUTV.RU
- **Ruutu**
+ - **Ruv**
- **safari**: safaribooksonline.com online video
- **safari:api**
- **safari:course**: safaribooksonline.com online courses
- **soundcloud:playlist**
- **soundcloud:search**: Soundcloud search
- **soundcloud:set**
+ - **soundcloud:trackstation**
- **soundcloud:user**
- **soundgasm**
- **soundgasm:profile**
- **Tagesschau**
- **tagesschau:player**
- **Tass**
- - **TBS**
+ - **TastyTrade**
+ - **TBS** (Currently broken)
- **TDSLifeway**
- **teachertube**: teachertube.com videos
- **teachertube:user:collection**: teachertube.com user and collection videos
- **TeachingChannel**
- **Teamcoco**
- - **TeamFourStar**
- **TechTalks**
- **techtv.mit.edu**
- **ted**
- **ToonGoggles**
- **Tosh**: Tosh.0
- **tou.tv**
- - **Toypics**: Toypics user profile
+ - **Toypics**: Toypics video
- **ToypicsUser**: Toypics user profile
- **TrailerAddict** (Currently broken)
- **Trilulilu**
- **TruTV**
- **Tube8**
- **TubiTv**
- - **tudou**
- - **tudou:album**
- - **tudou:playlist**
- **Tumblr**
- **tunein:clip**
- **tunein:program**
- **vk:wallpost**
- **vlive**
- **vlive:channel**
+ - **vlive:playlist**
- **Vodlocker**
- **VODPl**
- **VODPlatform**
- **VoiceRepublic**
+ - **Voot**
- **VoxMedia**
- **Vporn**
- - **vpro**: npo.nl and ntr.nl
+ - **vpro**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
- **Vrak**
- **VRT**: deredactie.be, sporza.be, cobra.be and cobra.canvas.be
- **vrv**
- **washingtonpost**
- **washingtonpost:article**
- **wat.tv**
+ - **WatchBox**
- **WatchIndianPorn**: Watch Indian Porn
- **WDR**
- **wdr:mobile**
- **wholecloud**: WholeCloud
- **Wimp**
- **Wistia**
- - **wnl**: npo.nl and ntr.nl
+ - **wnl**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
- **WorldStarHipHop**
- **wrzuta.pl**
- **wrzuta.pl:playlist**
- **WSJArticle**
- **XBef**
- **XboxClips**
- - **XFileShare**: XFileShare based sites: DaClips, FileHoot, GorillaVid, MovPod, PowerWatch, Rapidvideo.ws, TheVideoBee, Vidto, Streamin.To, XVIDSTAGE, Vid ABC, VidBom, vidlo
+ - **XFileShare**: XFileShare based sites: DaClips, FileHoot, GorillaVid, MovPod, PowerWatch, Rapidvideo.ws, TheVideoBee, Vidto, Streamin.To, XVIDSTAGE, Vid ABC, VidBom, vidlo, RapidVideo.TV, FastVideo.me
- **XHamster**
- **XHamsterEmbed**
- **xiami:album**: č¾ē±³é³ä¹ - äøč¾
- **XVideos**
- **XXXYMovies**
- **Yahoo**: Yahoo screen and movies
- - **Yam**: ččÆč¤yam天ē©ŗéØč½
+ - **YandexDisk**
- **yandexmusic:album**: ŠÆŠ½Š“ŠµŠŗŃ.ŠŃŠ·ŃŠŗŠ° - ŠŠ»ŃŠ±Š¾Š¼
- **yandexmusic:playlist**: ŠÆŠ½Š“ŠµŠŗŃ.ŠŃŠ·ŃŠŗŠ° - ŠŠ»ŠµŠ¹Š»ŠøŃŃ
- **yandexmusic:track**: ŠÆŠ½Š“ŠµŠŗŃ.ŠŃŠ·ŃŠŗŠ° - Š¢ŃŠµŠŗ
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import FakeYDL, expect_dict, expect_value
+from youtube_dl.compat import compat_etree_fromstring
from youtube_dl.extractor.common import InfoExtractor
from youtube_dl.extractor import YoutubeIE, get_info_extractor
from youtube_dl.utils import encode_data_uri, strip_jsonp, ExtractorError, RegexNotFoundError
self.ie._sort_formats(formats)
expect_value(self, formats, expected_formats, None)
+ def test_parse_mpd_formats(self):
+ _TEST_CASES = [
+ (
+ # https://github.com/rg3/youtube-dl/issues/13919
+ 'float_duration',
+ 'http://unknown/manifest.mpd',
+ [{
+ 'manifest_url': 'http://unknown/manifest.mpd',
+ 'ext': 'mp4',
+ 'format_id': '318597',
+ 'format_note': 'DASH video',
+ 'protocol': 'http_dash_segments',
+ 'acodec': 'none',
+ 'vcodec': 'avc1.42001f',
+ 'tbr': 318.597,
+ 'width': 340,
+ 'height': 192,
+ }, {
+ 'manifest_url': 'http://unknown/manifest.mpd',
+ 'ext': 'mp4',
+ 'format_id': '638590',
+ 'format_note': 'DASH video',
+ 'protocol': 'http_dash_segments',
+ 'acodec': 'none',
+ 'vcodec': 'avc1.42001f',
+ 'tbr': 638.59,
+ 'width': 512,
+ 'height': 288,
+ }, {
+ 'manifest_url': 'http://unknown/manifest.mpd',
+ 'ext': 'mp4',
+ 'format_id': '1022565',
+ 'format_note': 'DASH video',
+ 'protocol': 'http_dash_segments',
+ 'acodec': 'none',
+ 'vcodec': 'avc1.4d001f',
+ 'tbr': 1022.565,
+ 'width': 688,
+ 'height': 384,
+ }, {
+ 'manifest_url': 'http://unknown/manifest.mpd',
+ 'ext': 'mp4',
+ 'format_id': '2046506',
+ 'format_note': 'DASH video',
+ 'protocol': 'http_dash_segments',
+ 'acodec': 'none',
+ 'vcodec': 'avc1.4d001f',
+ 'tbr': 2046.506,
+ 'width': 1024,
+ 'height': 576,
+ }, {
+ 'manifest_url': 'http://unknown/manifest.mpd',
+ 'ext': 'mp4',
+ 'format_id': '3998017',
+ 'format_note': 'DASH video',
+ 'protocol': 'http_dash_segments',
+ 'acodec': 'none',
+ 'vcodec': 'avc1.640029',
+ 'tbr': 3998.017,
+ 'width': 1280,
+ 'height': 720,
+ }, {
+ 'manifest_url': 'http://unknown/manifest.mpd',
+ 'ext': 'mp4',
+ 'format_id': '5997485',
+ 'format_note': 'DASH video',
+ 'protocol': 'http_dash_segments',
+ 'acodec': 'none',
+ 'vcodec': 'avc1.640032',
+ 'tbr': 5997.485,
+ 'width': 1920,
+ 'height': 1080,
+ }]
+ ),
+ ]
+
+ for mpd_file, mpd_url, expected_formats in _TEST_CASES:
+ with io.open('./test/testdata/mpd/%s.mpd' % mpd_file,
+ mode='r', encoding='utf-8') as f:
+ formats = self.ie._parse_mpd_formats(
+ compat_etree_fromstring(f.read().encode('utf-8')),
+ mpd_url=mpd_url)
+ self.ie._sort_formats(formats)
+ expect_value(self, formats, expected_formats, None)
+
if __name__ == '__main__':
unittest.main()
'id': 'testid',
'title': 'testttitle',
'extractor': 'testex',
+ 'extractor_key': 'TestEx',
}
res.update(**kwargs)
return res
ydl = YDL({'format': 'best[height>360]'})
self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy())
+ def test_format_selection_issue_10083(self):
+ # See https://github.com/rg3/youtube-dl/issues/10083
+ formats = [
+ {'format_id': 'regular', 'height': 360, 'url': TEST_URL},
+ {'format_id': 'video', 'height': 720, 'acodec': 'none', 'url': TEST_URL},
+ {'format_id': 'audio', 'vcodec': 'none', 'url': TEST_URL},
+ ]
+ info_dict = _make_result(formats)
+
+ ydl = YDL({'format': 'best[height>360]/bestvideo[height>360]+bestaudio'})
+ ydl.process_ie_result(info_dict.copy())
+ self.assertEqual(ydl.downloaded_info_dicts[0]['format_id'], 'video+audio')
+
def test_invalid_format_specs(self):
def assert_syntax_error(format_spec):
ydl = YDL({'format': format_spec})
pass
self.assertEqual(ydl.downloaded_info_dicts, [])
+ def test_default_format_spec(self):
+ ydl = YDL({'simulate': True})
+ self.assertEqual(ydl._default_format_spec({}), 'bestvideo+bestaudio/best')
+
+ ydl = YDL({'outtmpl': '-'})
+ self.assertEqual(ydl._default_format_spec({}), 'best')
+
+ ydl = YDL({})
+ self.assertEqual(ydl._default_format_spec({}, download=False), 'bestvideo+bestaudio/best')
+ self.assertEqual(ydl._default_format_spec({'is_live': True}), 'best')
+
class TestYoutubeDL(unittest.TestCase):
def test_subtitles(self):
'ext': 'mp4',
'width': None,
'height': 1080,
+ 'title1': '$PATH',
+ 'title2': '%PATH%',
}
def fname(templ):
self.assertEqual(fname('%(height)0 6d.%(ext)s'), ' 01080.mp4')
self.assertEqual(fname('%(height)0 6d.%(ext)s'), ' 01080.mp4')
self.assertEqual(fname('%(height) 0 6d.%(ext)s'), ' 01080.mp4')
+ self.assertEqual(fname('%%'), '%')
+ self.assertEqual(fname('%%%%'), '%%')
self.assertEqual(fname('%%(height)06d.%(ext)s'), '%(height)06d.mp4')
self.assertEqual(fname('%(width)06d.%(ext)s'), 'NA.mp4')
self.assertEqual(fname('%(width)06d.%%(ext)s'), 'NA.%(ext)s')
self.assertEqual(fname('%%(width)06d.%(ext)s'), '%(width)06d.mp4')
+ self.assertEqual(fname('Hello %(title1)s'), 'Hello $PATH')
+ self.assertEqual(fname('Hello %(title2)s'), 'Hello %PATH%')
def test_format_note(self):
ydl = YoutubeDL()
'_type': 'url_transparent',
'url': 'foo2:',
'ie_key': 'Foo2',
- 'title': 'foo1 title'
+ 'title': 'foo1 title',
+ 'id': 'foo1_id',
}
class Foo2IE(InfoExtractor):
downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded['url'], TEST_URL)
self.assertEqual(downloaded['title'], 'foo1 title')
+ self.assertEqual(downloaded['id'], 'testid')
+ self.assertEqual(downloaded['extractor'], 'testex')
+ self.assertEqual(downloaded['extractor_key'], 'TestEx')
if __name__ == '__main__':
--- /dev/null
+# coding: utf-8
+
+from __future__ import unicode_literals
+
+# Allow direct execution
+import os
+import sys
+import unittest
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from youtube_dl.options import _hide_login_info
+
+
+class TestOptions(unittest.TestCase):
+ def test_hide_login_info(self):
+ self.assertEqual(_hide_login_info(['-u', 'foo', '-p', 'bar']),
+ ['-u', 'PRIVATE', '-p', 'PRIVATE'])
+ self.assertEqual(_hide_login_info(['-u']), ['-u'])
+ self.assertEqual(_hide_login_info(['-u', 'foo', '-u', 'bar']),
+ ['-u', 'PRIVATE', '-u', 'PRIVATE'])
+ self.assertEqual(_hide_login_info(['--username=foo']),
+ ['--username=PRIVATE'])
+
+
+if __name__ == '__main__':
+ unittest.main()
compat_chr,
compat_etree_fromstring,
compat_getenv,
+ compat_os_name,
compat_setenv,
compat_urlparse,
compat_parse_qs,
self.assertEqual(unescapeHTML('/'), '/')
self.assertEqual(unescapeHTML('é'), 'Ć©')
self.assertEqual(unescapeHTML('�'), '�')
+ self.assertEqual(unescapeHTML('&a"'), '&a"')
# HTML5 entities
self.assertEqual(unescapeHTML('.''), '.\'')
self.assertEqual(unified_timestamp('May 16, 2016 11:15 PM'), 1463440500)
self.assertEqual(unified_timestamp('Feb 7, 2016 at 6:35 pm'), 1454870100)
self.assertEqual(unified_timestamp('2017-03-30T17:52:41Q'), 1490896361)
+ self.assertEqual(unified_timestamp('Sep 11, 2013 | 5:49 AM'), 1378878540)
def test_determine_ext(self):
self.assertEqual(determine_ext('http://example.com/foo/bar.mp4/?download'), 'mp4')
def test_shell_quote(self):
args = ['ffmpeg', '-i', encodeFilename('Ʊā¬Ć\'.mp4')]
- self.assertEqual(shell_quote(args), """ffmpeg -i 'Ʊā¬Ć'"'"'.mp4'""")
+ self.assertEqual(
+ shell_quote(args),
+ """ffmpeg -i 'Ʊā¬Ć'"'"'.mp4'""" if compat_os_name != 'nt' else '''ffmpeg -i "Ʊā¬Ć'.mp4"''')
def test_str_to_int(self):
self.assertEqual(str_to_int('123,456'), 123456)
d = json.loads(stripped)
self.assertEqual(d, {'status': 'success'})
+ stripped = strip_jsonp('window.cb && window.cb({"status": "success"});')
+ d = json.loads(stripped)
+ self.assertEqual(d, {'status': 'success'})
+
+ stripped = strip_jsonp('window.cb && cb({"status": "success"});')
+ d = json.loads(stripped)
+ self.assertEqual(d, {'status': 'success'})
+
def test_uppercase_escape(self):
self.assertEqual(uppercase_escape('aƤ'), 'aƤ')
self.assertEqual(uppercase_escape('\\U0001d550'), 'š')
supports_outside_bmp = False
if supports_outside_bmp:
self.assertEqual(extract_attributes('<e x="Smile 😀!">'), {'x': 'Smile \U0001f600!'})
+ # Malformed HTML should not break attributes extraction on older Python
+ self.assertEqual(extract_attributes('<mal"formed/>'), {})
def test_clean_html(self):
self.assertEqual(clean_html('a:\nb'), 'a: b')
def test_args_to_str(self):
self.assertEqual(
args_to_str(['foo', 'ba/r', '-baz', '2 be', '']),
- 'foo ba/r -baz \'2 be\' \'\''
+ 'foo ba/r -baz \'2 be\' \'\'' if compat_os_name != 'nt' else 'foo ba/r -baz "2 be" ""'
)
def test_parse_filesize(self):
<p begin="3" dur="-1">Ignored, three</p>
</div>
</body>
- </tt>'''
+ </tt>'''.encode('utf-8')
srt_data = '''1
00:00:00,000 --> 00:00:01,000
The following line contains Chinese characters and special symbols
<p begin="0" end="1">The first line</p>
</div>
</body>
- </tt>'''
+ </tt>'''.encode('utf-8')
srt_data = '''1
00:00:00,000 --> 00:00:01,000
The first line
<p style="s1" tts:textDecoration="underline" begin="00:00:09.56" id="p2" end="00:00:12.36"><span style="s2" tts:color="lime">inner<br /> </span>style</p>
</div>
</body>
-</tt>'''
+</tt>'''.encode('utf-8')
srt_data = '''1
00:00:02,080 --> 00:00:05,839
<font color="white" face="sansSerif" size="16">default style<font color="red">custom style</font></font>
'''
self.assertEqual(dfxp2srt(dfxp_data_with_style), srt_data)
+ dfxp_data_non_utf8 = '''<?xml version="1.0" encoding="UTF-16"?>
+ <tt xmlns="http://www.w3.org/ns/ttml" xml:lang="en" xmlns:tts="http://www.w3.org/ns/ttml#parameter">
+ <body>
+ <div xml:lang="en">
+ <p begin="0" end="1">Line 1</p>
+ <p begin="1" end="2">ē¬¬äŗč”</p>
+ </div>
+ </body>
+ </tt>'''.encode('utf-16')
+ srt_data = '''1
+00:00:00,000 --> 00:00:01,000
+Line 1
+
+2
+00:00:01,000 --> 00:00:02,000
+ē¬¬äŗč”
+
+'''
+ self.assertEqual(dfxp2srt(dfxp_data_non_utf8), srt_data)
+
def test_cli_option(self):
self.assertEqual(cli_option({'proxy': '127.0.0.1:3128'}, '--proxy', 'proxy'), ['--proxy', '127.0.0.1:3128'])
self.assertEqual(cli_option({'proxy': None}, '--proxy', 'proxy'), [])
cli_bool_option(
{'nocheckcertificate': False}, '--check-certificate', 'nocheckcertificate', 'false', 'true', '='),
['--check-certificate=true'])
+ self.assertEqual(
+ cli_bool_option(
+ {}, '--check-certificate', 'nocheckcertificate', 'false', 'true', '='),
+ [])
def test_ohdave_rsa_encrypt(self):
N = 0xab86b6371b5318aaa1d3c9e612a9f1264f372323c8c0f19875b5fc3b3fd3afcc1e5bec527aa94bfa85bffc157e4245aebda05389a5357b75115ac94f074aefcd
self.assertEqual(get_element_by_attribute('class', 'foo', html), None)
self.assertEqual(get_element_by_attribute('class', 'no-such-foo', html), None)
+ html = '''
+ <div itemprop="author" itemscope>foo</div>
+ '''
+
+ self.assertEqual(get_element_by_attribute('itemprop', 'author', html), 'foo')
+
def test_get_elements_by_class(self):
html = '''
<span class="foo bar">nice</span><span class="foo bar">also nice</span>
'title': '3 - ŠŠ· ŃŠµŃŠæŠ¾Š² Š»ŃŠ½Ń...[Iz serpov luny]',
}]
),
+ (
+ # https://www.youtube.com/watch?v=xZW70zEasOk
+ # time point more than duration
+ '''ā LCS Spring finals: Saturday and Sunday from <a href="#" onclick="yt.www.watch.player.seekTo(13*60+30);return false;">13:30</a> outside the venue! <br />ā PAX East: Fri, Sat & Sun - more info in tomorrows video on the main channel!''',
+ 283,
+ []
+ ),
]
def test_youtube_chapters(self):
.TP
.B \-\-match\-filter \f[I]FILTER\f[]
Generic video filter.
-Specify any key (see help for \-o for a list of available keys) to match
-if the key is present, !key to check if the key is not present, key >
-NUMBER (like "comment_count > 12", also works with >=, <, <=, !=, =) to
-compare against a number, key = \[aq]LITERAL\[aq] (like "uploader =
-\[aq]Mike Smith\[aq]", also works with !=) to match against a string
+Specify any key (see the "OUTPUT TEMPLATE" for a list of available keys)
+to match if the key is present, !key to check if the key is not present,
+key > NUMBER (like "comment_count > 12", also works with >=, <, <=, !=,
+=) to compare against a number, key = \[aq]LITERAL\[aq] (like "uploader
+= \[aq]Mike Smith\[aq]", also works with !=) to match against a string
literal and & to require multiple matches.
Values which are not known are excluded unless you put a question mark
(?) after the operator.
.TP
.B \-j, \-\-dump\-json
Simulate, quiet but print JSON information.
-See \-\-output for a description of available keys.
+See the "OUTPUT TEMPLATE" for a description of available keys.
.RS
.RE
.TP
.RE
.TP
.B \-\-convert\-subs \f[I]FORMAT\f[]
-Convert the subtitles to other format (currently supported: srt|ass|vtt)
+Convert the subtitles to other format (currently supported:
+srt|ass|vtt|lrc)
.RS
.RE
.SH CONFIGURATION
execution and prevent tracking plain text passwords in the shell command
history.
You can achieve this using a \f[C]\&.netrc\f[]
-file (http://stackoverflow.com/tags/.netrc/info) on a per extractor
+file (https://stackoverflow.com/tags/.netrc/info) on a per extractor
basis.
For that you will need to create a \f[C]\&.netrc\f[] file in your
\f[C]$HOME\f[] and restrict permissions to read/write by only you:
.PP
On Windows you may also need to setup the \f[C]%HOME%\f[] environment
variable manually.
+For example:
+.IP
+.nf
+\f[C]
+set\ HOME=%USERPROFILE%
+\f[]
+.fi
.SH OUTPUT TEMPLATE
.PP
The \f[C]\-o\f[] option allows users to indicate a template for the
.PP
The basic usage is not to set any template arguments when downloading a
single file, like in
-\f[C]youtube\-dl\ \-o\ funny_video.flv\ "http://some/video"\f[].
+\f[C]youtube\-dl\ \-o\ funny_video.flv\ "https://some/video"\f[].
However, it may contain special sequences that will be replaced when
downloading each video.
The special sequences may be formatted according to python string
\f[C]playlist_title\f[] (string): Playlist title
.PP
Available for the video that belongs to some logical chapter or section:
-\- \f[C]chapter\f[] (string): Name or title of the chapter the video
-belongs to \- \f[C]chapter_number\f[] (numeric): Number of the chapter
-the video belongs to \- \f[C]chapter_id\f[] (string): Id of the chapter
-the video belongs to
+.IP \[bu] 2
+\f[C]chapter\f[] (string): Name or title of the chapter the video
+belongs to
+.IP \[bu] 2
+\f[C]chapter_number\f[] (numeric): Number of the chapter the video
+belongs to
+.IP \[bu] 2
+\f[C]chapter_id\f[] (string): Id of the chapter the video belongs to
.PP
Available for the video that is an episode of some series or programme:
-\- \f[C]series\f[] (string): Title of the series or programme the video
-episode belongs to \- \f[C]season\f[] (string): Title of the season the
-video episode belongs to \- \f[C]season_number\f[] (numeric): Number of
-the season the video episode belongs to \- \f[C]season_id\f[] (string):
-Id of the season the video episode belongs to \- \f[C]episode\f[]
-(string): Title of the video episode \- \f[C]episode_number\f[]
-(numeric): Number of the video episode within a season \-
+.IP \[bu] 2
+\f[C]series\f[] (string): Title of the series or programme the video
+episode belongs to
+.IP \[bu] 2
+\f[C]season\f[] (string): Title of the season the video episode belongs
+to
+.IP \[bu] 2
+\f[C]season_number\f[] (numeric): Number of the season the video episode
+belongs to
+.IP \[bu] 2
+\f[C]season_id\f[] (string): Id of the season the video episode belongs
+to
+.IP \[bu] 2
+\f[C]episode\f[] (string): Title of the video episode
+.IP \[bu] 2
+\f[C]episode_number\f[] (numeric): Number of the video episode within a
+season
+.IP \[bu] 2
\f[C]episode_id\f[] (string): Id of the video episode
.PP
-Available for the media that is a track or a part of a music album: \-
-\f[C]track\f[] (string): Title of the track \- \f[C]track_number\f[]
-(numeric): Number of the track within an album or a disc \-
-\f[C]track_id\f[] (string): Id of the track \- \f[C]artist\f[] (string):
-Artist(s) of the track \- \f[C]genre\f[] (string): Genre(s) of the track
-\- \f[C]album\f[] (string): Title of the album the track belongs to \-
-\f[C]album_type\f[] (string): Type of the album \- \f[C]album_artist\f[]
-(string): List of all artists appeared on the album \-
+Available for the media that is a track or a part of a music album:
+.IP \[bu] 2
+\f[C]track\f[] (string): Title of the track
+.IP \[bu] 2
+\f[C]track_number\f[] (numeric): Number of the track within an album or
+a disc
+.IP \[bu] 2
+\f[C]track_id\f[] (string): Id of the track
+.IP \[bu] 2
+\f[C]artist\f[] (string): Artist(s) of the track
+.IP \[bu] 2
+\f[C]genre\f[] (string): Genre(s) of the track
+.IP \[bu] 2
+\f[C]album\f[] (string): Title of the album the track belongs to
+.IP \[bu] 2
+\f[C]album_type\f[] (string): Type of the album
+.IP \[bu] 2
+\f[C]album_artist\f[] (string): List of all artists appeared on the
+album
+.IP \[bu] 2
\f[C]disc_number\f[] (numeric): Number of the disc or other physical
-medium the track belongs to \- \f[C]release_year\f[] (numeric): Year
-(YYYY) when the album was released
+medium the track belongs to
+.IP \[bu] 2
+\f[C]release_year\f[] (numeric): Year (YYYY) when the album was released
.PP
Each aforementioned sequence when referenced in an output template will
be replaced by the actual value corresponding to the sequence name.
\f[C]\-o\ "C:\\%HOMEPATH%\\Desktop\\%%(title)s.%%(ext)s"\f[].
.SS Output template examples
.PP
-Note on Windows you may need to use double quotes instead of single.
+Note that on Windows you may need to use double quotes instead of
+single.
.IP
.nf
\f[C]
$\ youtube\-dl\ \-u\ user\ \-p\ password\ \-o\ \[aq]~/MyVideos/%(playlist)s/%(chapter_number)s\ \-\ %(chapter)s/%(title)s.%(ext)s\[aq]\ https://www.udemy.com/java\-tutorial/
#\ Download\ entire\ series\ season\ keeping\ each\ series\ and\ each\ season\ in\ separate\ directory\ under\ C:/MyVideos
-$\ youtube\-dl\ \-o\ "C:/MyVideos/%(series)s/%(season_number)s\ \-\ %(season)s/%(episode_number)s\ \-\ %(episode)s.%(ext)s"\ http://videomore.ru/kino_v_detalayah/5_sezon/367617
+$\ youtube\-dl\ \-o\ "C:/MyVideos/%(series)s/%(season_number)s\ \-\ %(season)s/%(episode_number)s\ \-\ %(episode)s.%(ext)s"\ https://videomore.ru/kino_v_detalayah/5_sezon/367617
#\ Stream\ the\ video\ being\ downloaded\ to\ stdout
$\ youtube\-dl\ \-o\ \-\ BaW_jenozKc
\f[C]container\f[]: Name of the container format \- \f[C]protocol\f[]:
The protocol that will be used for the actual download, lower\-case
(\f[C]http\f[], \f[C]https\f[], \f[C]rtsp\f[], \f[C]rtmp\f[],
-\f[C]rtmpe\f[], \f[C]mms\f[], \f[C]f4m\f[], \f[C]ism\f[], \f[C]m3u8\f[],
-or \f[C]m3u8_native\f[]) \- \f[C]format_id\f[]: A short description of
-the format
+\f[C]rtmpe\f[], \f[C]mms\f[], \f[C]f4m\f[], \f[C]ism\f[],
+\f[C]http_dash_segments\f[], \f[C]m3u8\f[], or \f[C]m3u8_native\f[]) \-
+\f[C]format_id\f[]: A short description of the format
.PP
Note that none of the aforementioned meta fields are guaranteed to be
present since this solely depends on the metadata obtained by particular
order not to type it every time you run youtube\-dl.
.SS Format selection examples
.PP
-Note on Windows you may need to use double quotes instead of single.
+Note that on Windows you may need to use double quotes instead of
+single.
.IP
.nf
\f[C]
.SS How do I update youtube\-dl?
.PP
If you\[aq]ve followed our manual installation
-instructions (http://rg3.github.io/youtube-dl/download.html), you can
+instructions (https://rg3.github.io/youtube-dl/download.html), you can
simply run \f[C]youtube\-dl\ \-U\f[] (or, on Linux,
\f[C]sudo\ youtube\-dl\ \-U\f[]).
.PP
Note that distribution packages are often outdated.
As a rule of thumb, youtube\-dl releases at least once a month, and
often weekly or even daily.
-Simply go to http://yt\-dl.org/ to find out the current version.
+Simply go to https://yt\-dl.org to find out the current version.
Unfortunately, there is nothing we youtube\-dl developers can do if your
distribution serves a really outdated version.
You can (and should) complain to your distribution in their bugtracker
.fi
.PP
Afterwards, simply follow our manual installation
-instructions (http://rg3.github.io/youtube-dl/download.html):
+instructions (https://rg3.github.io/youtube-dl/download.html):
.IP
.nf
\f[C]
Videos or video formats streamed via RTMP protocol can only be
downloaded when rtmpdump (https://rtmpdump.mplayerhq.hu/) is installed.
Downloading MMS and RTSP videos requires either
-mplayer (http://mplayerhq.hu/) or mpv (https://mpv.io/) to be installed.
+mplayer (https://mplayerhq.hu/) or mpv (https://mpv.io/) to be
+installed.
.SS I have downloaded a video but how can I play it?
.PP
Once the video is fully downloaded, use any video player, such as
-mpv (https://mpv.io/), vlc (http://www.videolan.org/) or
-mplayer (http://www.mplayerhq.hu/).
+mpv (https://mpv.io/), vlc (https://www.videolan.org/) or
+mplayer (https://www.mplayerhq.hu/).
.SS I extracted a video URL with \f[C]\-g\f[], but it does not play on
another machine / in my web browser.
.PP
configuration file (#configuration).
.SS How do I download a video starting with a \f[C]\-\f[]?
.PP
-Either prepend \f[C]http://www.youtube.com/watch?v=\f[] or separate the
+Either prepend \f[C]https://www.youtube.com/watch?v=\f[] or separate the
ID from the options with \f[C]\-\-\f[]:
.IP
.nf
\f[C]
youtube\-dl\ \-\-\ \-wNyEUrxzFU
-youtube\-dl\ "http://www.youtube.com/watch?v=\-wNyEUrxzFU"
+youtube\-dl\ "https://www.youtube.com/watch?v=\-wNyEUrxzFU"
\f[]
.fi
.SS How do I pass cookies to youtube\-dl?
You will first need to tell youtube\-dl to stream media to stdout with
\f[C]\-o\ \-\f[], and also tell your media player to read from stdin (it
must be capable of this for streaming) and then pipe former to latter.
-For example, streaming to vlc (http://www.videolan.org/) can be achieved
-with:
+For example, streaming to vlc (https://www.videolan.org/) can be
+achieved with:
.IP
.nf
\f[C]
-youtube\-dl\ \-o\ \-\ "http://www.youtube.com/watch?v=BaW_jenozKcj"\ |\ vlc\ \-
+youtube\-dl\ \-o\ \-\ "https://www.youtube.com/watch?v=BaW_jenozKcj"\ |\ vlc\ \-
\f[]
.fi
.SS How do I download only new videos from a playlist?
.PP
In particular, the generic extractor (used when your website is not in
the list of supported sites by
-youtube\-dl (http://rg3.github.io/youtube-dl/supportedsites.html) cannot
-mandate one specific downloader.
+youtube\-dl (https://rg3.github.io/youtube-dl/supportedsites.html)
+cannot mandate one specific downloader.
.PP
If you put either \f[C]\-\-hls\-prefer\-native\f[] or
\f[C]\-\-hls\-prefer\-ffmpeg\f[] into your configuration, a different
For one, have a look at the list of supported
sites (docs/supportedsites.md).
Note that it can sometimes happen that the site changes its URL scheme
-(say, from http://example.com/video/1234567 to
-http://example.com/v/1234567 ) and youtube\-dl reports an URL of a
+(say, from https://example.com/video/1234567 to
+https://example.com/v/1234567 ) and youtube\-dl reports an URL of a
service in that list as unsupported.
In that case, simply report a bug.
.PP
.SH DEVELOPER INSTRUCTIONS
.PP
Most users do not need to build youtube\-dl and can download the
-builds (http://rg3.github.io/youtube-dl/download.html) or get them from
+builds (https://rg3.github.io/youtube-dl/download.html) or get them from
their distribution.
.PP
To run youtube\-dl as a developer, you don\[aq]t need to build anything
\f[]
.fi
.PP
+See item 6 of new extractor tutorial (#adding-support-for-a-new-site)
+for how to run extractor specific test cases.
+.PP
If you want to create a build of youtube\-dl yourself, you\[aq]ll need
.IP \[bu] 2
python
class\ YourExtractorIE(InfoExtractor):
\ \ \ \ _VALID_URL\ =\ r\[aq]https?://(?:www\\.)?yourextractor\\.com/watch/(?P<id>[0\-9]+)\[aq]
\ \ \ \ _TEST\ =\ {
-\ \ \ \ \ \ \ \ \[aq]url\[aq]:\ \[aq]http://yourextractor.com/watch/42\[aq],
+\ \ \ \ \ \ \ \ \[aq]url\[aq]:\ \[aq]https://yourextractor.com/watch/42\[aq],
\ \ \ \ \ \ \ \ \[aq]md5\[aq]:\ \[aq]TODO:\ md5\ sum\ of\ the\ first\ 10241\ bytes\ of\ the\ video\ file\ (use\ \-\-test)\[aq],
\ \ \ \ \ \ \ \ \[aq]info_dict\[aq]:\ {
\ \ \ \ \ \ \ \ \ \ \ \ \[aq]id\[aq]:\ \[aq]42\[aq],
The tests will then be named \f[C]TestDownload.test_YourExtractor\f[],
\f[C]TestDownload.test_YourExtractor_1\f[],
\f[C]TestDownload.test_YourExtractor_2\f[], etc.
+Note that tests with \f[C]only_matching\f[] key in test\[aq]s dict are
+not counted in.
.IP " 7." 4
Have a look at
\f[C]youtube_dl/extractor/common.py\f[] (https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py)
Make sure your code follows youtube\-dl coding
conventions (#youtube-dl-coding-conventions) and check the code with
flake8 (https://pypi.python.org/pypi/flake8).
-Also make sure your code works under all Python (http://www.python.org/)
-versions claimed supported by youtube\-dl, namely 2.6, 2.7, and 3.2+.
+Also make sure your code works under all
+Python (https://www.python.org/) versions claimed supported by
+youtube\-dl, namely 2.6, 2.7, and 3.2+.
.IP " 9." 4
-When the tests pass, add (http://git-scm.com/docs/git-add) the new files
-and commit (http://git-scm.com/docs/git-commit) them and
-push (http://git-scm.com/docs/git-push) the result, like this:
+When the tests pass, add (https://git-scm.com/docs/git-add) the new
+files and commit (https://git-scm.com/docs/git-commit) them and
+push (https://git-scm.com/docs/git-push) the result, like this:
.RS 4
.IP
.nf
ydl_opts\ =\ {}
with\ youtube_dl.YoutubeDL(ydl_opts)\ as\ ydl:
-\ \ \ \ ydl.download([\[aq]http://www.youtube.com/watch?v=BaW_jenozKc\[aq]])
+\ \ \ \ ydl.download([\[aq]https://www.youtube.com/watch?v=BaW_jenozKc\[aq]])
\f[]
.fi
.PP
\ \ \ \ \[aq]progress_hooks\[aq]:\ [my_hook],
}
with\ youtube_dl.YoutubeDL(ydl_opts)\ as\ ydl:
-\ \ \ \ ydl.download([\[aq]http://www.youtube.com/watch?v=BaW_jenozKc\[aq]])
+\ \ \ \ ydl.download([\[aq]https://www.youtube.com/watch?v=BaW_jenozKc\[aq]])
\f[]
.fi
.SH BUGS
via personal email.
For discussions, join us in the IRC channel
#youtube\-dl (irc://chat.freenode.net/#youtube-dl) on freenode
-(webchat (http://webchat.freenode.net/?randomnick=1&channels=youtube-dl)).
+(webchat (https://webchat.freenode.net/?randomnick=1&channels=youtube-dl)).
.PP
\f[B]Please include the full output of youtube\-dl when run with
\f[C]\-v\f[]\f[], i.e.
$\ youtube\-dl\ \-v\ <your\ command\ line>
[debug]\ System\ config:\ []
[debug]\ User\ config:\ []
-[debug]\ Command\-line\ args:\ [u\[aq]\-v\[aq],\ u\[aq]http://www.youtube.com/watch?v=BaW_jenozKcj\[aq]]
+[debug]\ Command\-line\ args:\ [u\[aq]\-v\[aq],\ u\[aq]https://www.youtube.com/watch?v=BaW_jenozKcj\[aq]]
[debug]\ Encodings:\ locale\ cp1251,\ fs\ mbcs,\ out\ cp866,\ pref\ cp1251
[debug]\ youtube\-dl\ version\ 2015.12.06
[debug]\ Git\ HEAD:\ 135392e
.PP
\f[B]Site support requests must contain an example URL\f[].
An example URL is a URL you might want to download, like
-\f[C]http://www.youtube.com/watch?v=BaW_jenozKc\f[].
+\f[C]https://www.youtube.com/watch?v=BaW_jenozKc\f[].
There should be an obvious video present.
Except under very special circumstances, the main page of a video
service (e.g.
-\f[C]http://www.youtube.com/\f[]) is \f[I]not\f[] an example URL.
+\f[C]https://www.youtube.com/\f[]) is \f[I]not\f[] an example URL.
.SS Are you using the latest version?
.PP
Before reporting any issue, type \f[C]youtube\-dl\ \-U\f[].
complete --command youtube-dl --long-option dateafter --description 'Download only videos uploaded on or after this date (i.e. inclusive)'
complete --command youtube-dl --long-option min-views --description 'Do not download any videos with less than COUNT views'
complete --command youtube-dl --long-option max-views --description 'Do not download any videos with more than COUNT views'
-complete --command youtube-dl --long-option match-filter --description 'Generic video filter. Specify any key (see help for -o for a list of available keys) to match if the key is present, !key to check if the key is not present, key > NUMBER (like "comment_count > 12", also works with >=, <, <=, !=, =) to compare against a number, key = '"'"'LITERAL'"'"' (like "uploader = '"'"'Mike Smith'"'"'", also works with !=) to match against a string literal and & to require multiple matches. Values which are not known are excluded unless you put a question mark (?) after the operator. For example, to only match videos that have been liked more than 100 times and disliked less than 50 times (or the dislike functionality is not available at the given service), but who also have a description, use --match-filter "like_count > 100 & dislike_count <? 50 & description" .'
+complete --command youtube-dl --long-option match-filter --description 'Generic video filter. Specify any key (see the "OUTPUT TEMPLATE" for a list of available keys) to match if the key is present, !key to check if the key is not present, key > NUMBER (like "comment_count > 12", also works with >=, <, <=, !=, =) to compare against a number, key = '"'"'LITERAL'"'"' (like "uploader = '"'"'Mike Smith'"'"'", also works with !=) to match against a string literal and & to require multiple matches. Values which are not known are excluded unless you put a question mark (?) after the operator. For example, to only match videos that have been liked more than 100 times and disliked less than 50 times (or the dislike functionality is not available at the given service), but who also have a description, use --match-filter "like_count > 100 & dislike_count <? 50 & description" .'
complete --command youtube-dl --long-option no-playlist --description 'Download only the video, if the URL refers to a video and a playlist.'
complete --command youtube-dl --long-option yes-playlist --description 'Download the playlist, if the URL refers to a video and a playlist.'
complete --command youtube-dl --long-option age-limit --description 'Download only videos suitable for the given age'
complete --command youtube-dl --long-option get-duration --description 'Simulate, quiet but print video length'
complete --command youtube-dl --long-option get-filename --description 'Simulate, quiet but print output filename'
complete --command youtube-dl --long-option get-format --description 'Simulate, quiet but print output format'
-complete --command youtube-dl --long-option dump-json --short-option j --description 'Simulate, quiet but print JSON information. See --output for a description of available keys.'
+complete --command youtube-dl --long-option dump-json --short-option j --description 'Simulate, quiet but print JSON information. See the "OUTPUT TEMPLATE" for a description of available keys.'
complete --command youtube-dl --long-option dump-single-json --short-option J --description 'Simulate, quiet but print JSON information for each command-line argument. If the URL refers to a playlist, dump the whole playlist information in a single line.'
complete --command youtube-dl --long-option print-json --description 'Be quiet and print the video information as JSON (video is still being downloaded).'
complete --command youtube-dl --long-option newline --description 'Output progress bar as new lines'
complete --command youtube-dl --long-option prefer-ffmpeg --description 'Prefer ffmpeg over avconv for running the postprocessors'
complete --command youtube-dl --long-option ffmpeg-location --description 'Location of the ffmpeg/avconv binary; either the path to the binary or its containing directory.'
complete --command youtube-dl --long-option exec --description 'Execute a command on the file after downloading, similar to find'"'"'s -exec syntax. Example: --exec '"'"'adb push {} /sdcard/Music/ && rm {}'"'"''
-complete --command youtube-dl --long-option convert-subs --description 'Convert the subtitles to other format (currently supported: srt|ass|vtt)'
+complete --command youtube-dl --long-option convert-subs --description 'Convert the subtitles to other format (currently supported: srt|ass|vtt|lrc)'
complete --command youtube-dl --arguments ":ytfavorites :ytrecommended :ytsubscriptions :ytwatchlater :ythistory"
import traceback
import random
+from string import ascii_letters
+
from .compat import (
compat_basestring,
compat_cookiejar,
format_bytes,
formatSeconds,
GeoRestrictedError,
+ int_or_none,
ISO3166Utils,
locked_file,
make_HTTPS_handler,
)
from .cache import Cache
from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER
+from .extractor.openload import PhantomJSwrapper
from .downloader import get_suitable_downloader
from .downloader.rtmp import rtmpdump_version
from .postprocessor import (
postprocessor.
"""
+ _NUMERIC_FIELDS = set((
+ 'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
+ 'timestamp', 'upload_year', 'upload_month', 'upload_day',
+ 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
+ 'average_rating', 'comment_count', 'age_limit',
+ 'start_time', 'end_time',
+ 'chapter_number', 'season_number', 'episode_number',
+ 'track_number', 'disc_number', 'release_year',
+ 'playlist_index',
+ ))
+
params = None
_ies = []
_pps = []
def to_console_title(self, message):
if not self.params.get('consoletitle', False):
return
- if compat_os_name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
- # c_wchar_p() might not be necessary if `message` is
- # already of type unicode()
- ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
+ if compat_os_name == 'nt':
+ if ctypes.windll.kernel32.GetConsoleWindow():
+ # c_wchar_p() might not be necessary if `message` is
+ # already of type unicode()
+ ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
elif 'TERM' in os.environ:
self._write_string('\033]0;%s\007' % message, self._screen_file)
def save_console_title(self):
if not self.params.get('consoletitle', False):
return
- if 'TERM' in os.environ:
+ if compat_os_name != 'nt' and 'TERM' in os.environ:
# Save the title on stack
self._write_string('\033[22;0t', self._screen_file)
def restore_console_title(self):
if not self.params.get('consoletitle', False):
return
- if 'TERM' in os.environ:
+ if compat_os_name != 'nt' and 'TERM' in os.environ:
# Restore the title from stack
self._write_string('\033[23;0t', self._screen_file)
r'%%(\1)0%dd' % field_size_compat_map[mobj.group('field')],
outtmpl)
- NUMERIC_FIELDS = set((
- 'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
- 'timestamp', 'upload_year', 'upload_month', 'upload_day',
- 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
- 'average_rating', 'comment_count', 'age_limit',
- 'start_time', 'end_time',
- 'chapter_number', 'season_number', 'episode_number',
- 'track_number', 'disc_number', 'release_year',
- 'playlist_index',
- ))
-
# Missing numeric fields used together with integer presentation types
# in format specification will break the argument substitution since
# string 'NA' is returned for missing fields. We will patch output
# template for missing fields to meet string presentation type.
- for numeric_field in NUMERIC_FIELDS:
+ for numeric_field in self._NUMERIC_FIELDS:
if numeric_field not in template_dict:
# As of [1] format syntax is:
# %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
FORMAT_RE.format(numeric_field),
r'%({0})s'.format(numeric_field), outtmpl)
- filename = expand_path(outtmpl % template_dict)
+ # expand_path translates '%%' into '%' and '$$' into '$'
+ # correspondingly that is not what we want since we need to keep
+ # '%%' intact for template dict substitution step. Working around
+ # with boundary-alike separator hack.
+ sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
+ outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
+
+ # outtmpl should be expand_path'ed before template dict substitution
+ # because meta fields may contain env variables we don't want to
+ # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
+ # title "Hello $PATH", we don't want `$PATH` to be expanded.
+ filename = expand_path(outtmpl).replace(sep, '') % template_dict
+
# Temporary fix for #4787
# 'Treat' all problem characters by passing filename through preferredencoding
# to workaround encoding issues with subprocess on python2 @ Windows
force_properties = dict(
(k, v) for k, v in ie_result.items() if v is not None)
- for f in ('_type', 'url', 'ie_key'):
+ for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
if f in force_properties:
del force_properties[f]
new_result = info.copy()
return op(actual_value, comparison_value)
return _filter
+ def _default_format_spec(self, info_dict, download=True):
+ req_format_list = []
+
+ def can_have_partial_formats():
+ if self.params.get('simulate', False):
+ return True
+ if not download:
+ return True
+ if self.params.get('outtmpl', DEFAULT_OUTTMPL) == '-':
+ return False
+ if info_dict.get('is_live'):
+ return False
+ merger = FFmpegMergerPP(self)
+ return merger.available and merger.can_merge()
+ if can_have_partial_formats():
+ req_format_list.append('bestvideo+bestaudio')
+ req_format_list.append('best')
+ return '/'.join(req_format_list)
+
def build_format_selector(self, format_spec):
def syntax_error(note, start):
message = (
if 'title' not in info_dict:
raise ExtractorError('Missing "title" field in extractor result')
- if not isinstance(info_dict['id'], compat_str):
- self.report_warning('"id" field is not a string - forcing string conversion')
- info_dict['id'] = compat_str(info_dict['id'])
+ def report_force_conversion(field, field_not, conversion):
+ self.report_warning(
+ '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
+ % (field, field_not, conversion))
+
+ def sanitize_string_field(info, string_field):
+ field = info.get(string_field)
+ if field is None or isinstance(field, compat_str):
+ return
+ report_force_conversion(string_field, 'a string', 'string')
+ info[string_field] = compat_str(field)
+
+ def sanitize_numeric_fields(info):
+ for numeric_field in self._NUMERIC_FIELDS:
+ field = info.get(numeric_field)
+ if field is None or isinstance(field, compat_numeric_types):
+ continue
+ report_force_conversion(numeric_field, 'numeric', 'int')
+ info[numeric_field] = int_or_none(field)
+
+ sanitize_string_field(info_dict, 'id')
+ sanitize_numeric_fields(info_dict)
if 'playlist' not in info_dict:
# It isn't part of a playlist
if not formats:
raise ExtractorError('No video formats found!')
+ def is_wellformed(f):
+ url = f.get('url')
+ if not url:
+ self.report_warning(
+ '"url" field is missing or empty - skipping format, '
+ 'there is an error in extractor')
+ return False
+ if isinstance(url, bytes):
+ sanitize_string_field(f, 'url')
+ return True
+
+ # Filter out malformed formats for better extraction robustness
+ formats = list(filter(is_wellformed, formats))
+
formats_dict = {}
# We check that all the formats have the format and format_id fields
for i, format in enumerate(formats):
- if 'url' not in format:
- raise ExtractorError('Missing "url" key in result (index %d)' % i)
-
+ sanitize_string_field(format, 'format_id')
+ sanitize_numeric_fields(format)
format['url'] = sanitize_url(format['url'])
-
- if format.get('format_id') is None:
+ if not format.get('format_id'):
format['format_id'] = compat_str(i)
else:
# Sanitize format_id from characters used in format selector expression
req_format = self.params.get('format')
if req_format is None:
- req_format_list = []
- if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and
- not info_dict.get('is_live')):
- merger = FFmpegMergerPP(self)
- if merger.available and merger.can_merge():
- req_format_list.append('bestvideo+bestaudio')
- req_format_list.append('best')
- req_format = '/'.join(req_format_list)
+ req_format = self._default_format_spec(info_dict, download=download)
+ if self.params.get('verbose'):
+ self.to_stdout('[debug] Default format spec: %s' % req_format)
+
format_selector = self.build_format_selector(req_format)
# While in format selection we may need to have an access to the original
if filename is None:
return
- try:
- dn = os.path.dirname(sanitize_path(encodeFilename(filename)))
- if dn and not os.path.exists(dn):
- os.makedirs(dn)
- except (OSError, IOError) as err:
- self.report_error('unable to create directory ' + error_to_compat_str(err))
+ def ensure_dir_exists(path):
+ try:
+ dn = os.path.dirname(path)
+ if dn and not os.path.exists(dn):
+ os.makedirs(dn)
+ return True
+ except (OSError, IOError) as err:
+ self.report_error('unable to create directory ' + error_to_compat_str(err))
+ return False
+
+ if not ensure_dir_exists(sanitize_path(encodeFilename(filename))):
return
if self.params.get('writedescription', False):
ie = self.get_info_extractor(info_dict['extractor_key'])
for sub_lang, sub_info in subtitles.items():
sub_format = sub_info['ext']
- if sub_info.get('data') is not None:
- sub_data = sub_info['data']
+ sub_filename = subtitles_filename(filename, sub_lang, sub_format)
+ if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
+ self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
else:
- try:
- sub_data = ie._download_webpage(
- sub_info['url'], info_dict['id'], note=False)
- except ExtractorError as err:
- self.report_warning('Unable to download subtitle for "%s": %s' %
- (sub_lang, error_to_compat_str(err.cause)))
- continue
- try:
- sub_filename = subtitles_filename(filename, sub_lang, sub_format)
- if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
- self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
+ self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
+ if sub_info.get('data') is not None:
+ try:
+ # Use newline='' to prevent conversion of newline characters
+ # See https://github.com/rg3/youtube-dl/issues/10268
+ with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
+ subfile.write(sub_info['data'])
+ except (OSError, IOError):
+ self.report_error('Cannot write subtitles file ' + sub_filename)
+ return
else:
- self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
- # Use newline='' to prevent conversion of newline characters
- # See https://github.com/rg3/youtube-dl/issues/10268
- with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
- subfile.write(sub_data)
- except (OSError, IOError):
- self.report_error('Cannot write subtitles file ' + sub_filename)
- return
+ try:
+ sub_data = ie._request_webpage(
+ sub_info['url'], info_dict['id'], note=False).read()
+ with io.open(encodeFilename(sub_filename), 'wb') as subfile:
+ subfile.write(sub_data)
+ except (ExtractorError, IOError, OSError, ValueError) as err:
+ self.report_warning('Unable to download subtitle for "%s": %s' %
+ (sub_lang, error_to_compat_str(err)))
+ continue
if self.params.get('writeinfojson', False):
infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
for f in requested_formats:
new_info = dict(info_dict)
new_info.update(f)
- fname = self.prepare_filename(new_info)
- fname = prepend_extension(fname, 'f%s' % f['format_id'], new_info['ext'])
+ fname = prepend_extension(
+ self.prepare_filename(new_info),
+ 'f%s' % f['format_id'], new_info['ext'])
+ if not ensure_dir_exists(fname):
+ return
downloaded.append(fname)
partial_success = dl(fname, new_info)
success = success and partial_success
info_dict.get('protocol') == 'm3u8' and
self.params.get('hls_prefer_native')):
if fixup_policy == 'warn':
- self.report_warning('%s: malformated aac bitstream.' % (
+ self.report_warning('%s: malformed AAC bitstream detected.' % (
info_dict['id']))
elif fixup_policy == 'detect_or_warn':
fixup_pp = FFmpegFixupM3u8PP(self)
info_dict['__postprocessors'].append(fixup_pp)
else:
self.report_warning(
- '%s: malformated aac bitstream. %s'
+ '%s: malformed AAC bitstream detected. %s'
% (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
else:
assert fixup_policy in ('ignore', 'never')
exe_versions = FFmpegPostProcessor.get_versions(self)
exe_versions['rtmpdump'] = rtmpdump_version()
+ exe_versions['phantomjs'] = PhantomJSwrapper._version()
exe_str = ', '.join(
'%s %s' % (exe, v)
for exe, v in sorted(exe_versions.items())
if opts.recodevideo not in ['mp4', 'flv', 'webm', 'ogg', 'mkv', 'avi']:
parser.error('invalid video recode format specified')
if opts.convertsubtitles is not None:
- if opts.convertsubtitles not in ['srt', 'vtt', 'ass']:
+ if opts.convertsubtitles not in ['srt', 'vtt', 'ass', 'lrc']:
parser.error('invalid subtitle format specified')
if opts.date is not None:
import email
import getpass
import io
+import itertools
import optparse
import os
import re
import struct
import subprocess
import sys
-import itertools
import xml.etree.ElementTree
except ImportError: # Python 2
from HTMLParser import HTMLParser as compat_HTMLParser
+try: # Python 2
+ from HTMLParser import HTMLParseError as compat_HTMLParseError
+except ImportError: # Python <3.4
+ try:
+ from html.parser import HTMLParseError as compat_HTMLParseError
+ except ImportError: # Python >3.4
+
+ # HTMLParseError has been deprecated in Python 3.3 and removed in
+ # Python 3.5. Introducing dummy exception for Python >3.5 for compatible
+ # and uniform cross-version exceptiong handling
+ class compat_HTMLParseError(Exception):
+ pass
+
try:
from subprocess import DEVNULL
compat_subprocess_get_DEVNULL = lambda: DEVNULL
parsed_result[name] = [value]
return parsed_result
-try:
- from shlex import quote as compat_shlex_quote
-except ImportError: # Python < 3.3
+
+compat_os_name = os._name if os.name == 'java' else os.name
+
+
+if compat_os_name == 'nt':
def compat_shlex_quote(s):
- if re.match(r'^[-_\w./]+$', s):
- return s
- else:
- return "'" + s.replace("'", "'\"'\"'") + "'"
+ return s if re.match(r'^[-_\w./]+$', s) else '"%s"' % s.replace('"', '\\"')
+else:
+ try:
+ from shlex import quote as compat_shlex_quote
+ except ImportError: # Python < 3.3
+ def compat_shlex_quote(s):
+ if re.match(r'^[-_\w./]+$', s):
+ return s
+ else:
+ return "'" + s.replace("'", "'\"'\"'") + "'"
try:
return ord(c)
-compat_os_name = os._name if os.name == 'java' else os.name
-
-
if sys.version_info >= (3, 0):
compat_getenv = os.getenv
compat_expanduser = os.path.expanduser
compat_struct_pack = struct.pack
compat_struct_unpack = struct.unpack
+try:
+ from future_builtins import zip as compat_zip
+except ImportError: # not 2.6+ or is 3.x
+ try:
+ from itertools import izip as compat_zip # < 2.5 or 3.x
+ except ImportError:
+ compat_zip = zip
__all__ = [
+ 'compat_HTMLParseError',
'compat_HTMLParser',
'compat_HTTPError',
'compat_basestring',
'compat_urlretrieve',
'compat_xml_parse_error',
'compat_xpath',
+ 'compat_zip',
'workaround_optparse_bug9161',
]
from ..compat import compat_os_name
from ..utils import (
+ decodeArgument,
encodeFilename,
error_to_compat_str,
- decodeArgument,
format_bytes,
+ shell_quote,
timeconvert,
)
"""Report attempt to resume at given byte."""
self.to_screen('[download] Resuming download at byte %s' % resume_len)
- def report_retry(self, count, retries):
+ def report_retry(self, err, count, retries):
"""Report retry in case of HTTP error 5xx"""
self.to_screen(
- '[download] Got server HTTP error. Retrying (attempt %d of %s)...'
- % (count, self.format_retries(retries)))
+ '[download] Got server HTTP error: %s. Retrying (attempt %d of %s)...'
+ % (error_to_compat_str(err), count, self.format_retries(retries)))
def report_file_already_downloaded(self, file_name):
"""Report file has already been fully downloaded."""
if exe is None:
exe = os.path.basename(str_args[0])
- try:
- import pipes
- shell_quote = lambda args: ' '.join(map(pipes.quote, str_args))
- except ImportError:
- shell_quote = repr
self.to_screen('[debug] %s command line: %s' % (
exe, shell_quote(str_args)))
from .fragment import FragmentFD
from ..compat import compat_urllib_error
+from ..utils import urljoin
class DashSegmentsFD(FragmentFD):
FD_NAME = 'dashsegments'
def real_download(self, filename, info_dict):
- segments = info_dict['fragments'][:1] if self.params.get(
+ fragment_base_url = info_dict.get('fragment_base_url')
+ fragments = info_dict['fragments'][:1] if self.params.get(
'test', False) else info_dict['fragments']
ctx = {
'filename': filename,
- 'total_frags': len(segments),
+ 'total_frags': len(fragments),
}
self._prepare_and_start_frag_download(ctx)
skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
frag_index = 0
- for i, segment in enumerate(segments):
+ for i, fragment in enumerate(fragments):
frag_index += 1
if frag_index <= ctx['fragment_index']:
continue
count = 0
while count <= fragment_retries:
try:
- success, frag_content = self._download_fragment(ctx, segment['url'], info_dict)
+ fragment_url = fragment.get('url')
+ if not fragment_url:
+ assert fragment_base_url
+ fragment_url = urljoin(fragment_base_url, fragment['path'])
+ success, frag_content = self._download_fragment(ctx, fragment_url, info_dict)
if not success:
return False
self._append_fragment(ctx, frag_content)
args = [ffpp.executable, '-y']
+ for log_level in ('quiet', 'verbose'):
+ if self.params.get(log_level, False):
+ args += ['-loglevel', log_level]
+ break
+
seekable = info_dict.get('_seekable')
if seekable is not None:
# setting -seekable prevents ffmpeg from guessing if the server
if self.__do_ytdl_file(ctx):
if os.path.isfile(encodeFilename(self.ytdl_filename(ctx['filename']))):
self._read_ytdl_file(ctx)
+ if ctx['fragment_index'] > 0 and resume_len == 0:
+ self.report_error(
+ 'Inconsistent state of incomplete fragment download. '
+ 'Restarting from the beginning...')
+ ctx['fragment_index'] = resume_len = 0
+ self._write_ytdl_file(ctx)
else:
self._write_ytdl_file(ctx)
- if ctx['fragment_index'] > 0:
- assert resume_len > 0
+ assert ctx['fragment_index'] == 0
dest_stream, tmpfilename = sanitize_open(tmpfilename, open_mode)
man_url = info_dict['url']
self.to_screen('[%s] Downloading m3u8 manifest' % self.FD_NAME)
- manifest = self.ydl.urlopen(self._prepare_url(info_dict, man_url)).read()
-
- s = manifest.decode('utf-8', 'ignore')
+ urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url))
+ man_url = urlh.geturl()
+ s = urlh.read().decode('utf-8', 'ignore')
if not self.can_download(s, info_dict):
if info_dict.get('extra_param_to_segment_url'):
class HttpFD(FileDownloader):
def real_download(self, filename, info_dict):
url = info_dict['url']
- tmpfilename = self.temp_name(filename)
- stream = None
+
+ class DownloadContext(dict):
+ __getattr__ = dict.get
+ __setattr__ = dict.__setitem__
+ __delattr__ = dict.__delitem__
+
+ ctx = DownloadContext()
+ ctx.filename = filename
+ ctx.tmpfilename = self.temp_name(filename)
+ ctx.stream = None
# Do not include the Accept-Encoding header
headers = {'Youtubedl-no-compression': 'True'}
if is_test:
request.add_header('Range', 'bytes=0-%s' % str(self._TEST_FILE_SIZE - 1))
- # Establish possible resume length
- if os.path.isfile(encodeFilename(tmpfilename)):
- resume_len = os.path.getsize(encodeFilename(tmpfilename))
- else:
- resume_len = 0
-
- open_mode = 'wb'
- if resume_len != 0:
- if self.params.get('continuedl', True):
- self.report_resuming_byte(resume_len)
- request.add_header('Range', 'bytes=%d-' % resume_len)
- open_mode = 'ab'
- else:
- resume_len = 0
+ ctx.open_mode = 'wb'
+ ctx.resume_len = 0
+
+ if self.params.get('continuedl', True):
+ # Establish possible resume length
+ if os.path.isfile(encodeFilename(ctx.tmpfilename)):
+ ctx.resume_len = os.path.getsize(encodeFilename(ctx.tmpfilename))
count = 0
retries = self.params.get('retries', 0)
- while count <= retries:
+
+ class SucceedDownload(Exception):
+ pass
+
+ class RetryDownload(Exception):
+ def __init__(self, source_error):
+ self.source_error = source_error
+
+ def establish_connection():
+ if ctx.resume_len != 0:
+ self.report_resuming_byte(ctx.resume_len)
+ request.add_header('Range', 'bytes=%d-' % ctx.resume_len)
+ ctx.open_mode = 'ab'
# Establish connection
try:
- data = self.ydl.urlopen(request)
+ ctx.data = self.ydl.urlopen(request)
# When trying to resume, Content-Range HTTP header of response has to be checked
# to match the value of requested Range HTTP header. This is due to a webservers
# that don't support resuming and serve a whole file with no Content-Range
# set in response despite of requested Range (see
# https://github.com/rg3/youtube-dl/issues/6057#issuecomment-126129799)
- if resume_len > 0:
- content_range = data.headers.get('Content-Range')
+ if ctx.resume_len > 0:
+ content_range = ctx.data.headers.get('Content-Range')
if content_range:
content_range_m = re.search(r'bytes (\d+)-', content_range)
# Content-Range is present and matches requested Range, resume is possible
- if content_range_m and resume_len == int(content_range_m.group(1)):
- break
+ if content_range_m and ctx.resume_len == int(content_range_m.group(1)):
+ return
# Content-Range is either not present or invalid. Assuming remote webserver is
# trying to send the whole file, resume is not possible, so wiping the local file
# and performing entire redownload
self.report_unable_to_resume()
- resume_len = 0
- open_mode = 'wb'
- break
+ ctx.resume_len = 0
+ ctx.open_mode = 'wb'
+ return
except (compat_urllib_error.HTTPError, ) as err:
if (err.code < 500 or err.code >= 600) and err.code != 416:
# Unexpected HTTP error
# Unable to resume (requested range not satisfiable)
try:
# Open the connection again without the range header
- data = self.ydl.urlopen(basic_request)
- content_length = data.info()['Content-Length']
+ ctx.data = self.ydl.urlopen(basic_request)
+ content_length = ctx.data.info()['Content-Length']
except (compat_urllib_error.HTTPError, ) as err:
if err.code < 500 or err.code >= 600:
raise
else:
# Examine the reported length
if (content_length is not None and
- (resume_len - 100 < int(content_length) < resume_len + 100)):
+ (ctx.resume_len - 100 < int(content_length) < ctx.resume_len + 100)):
# The file had already been fully downloaded.
# Explanation to the above condition: in issue #175 it was revealed that
# YouTube sometimes adds or removes a few bytes from the end of the file,
# I decided to implement a suggested change and consider the file
# completely downloaded if the file size differs less than 100 bytes from
# the one in the hard drive.
- self.report_file_already_downloaded(filename)
- self.try_rename(tmpfilename, filename)
+ self.report_file_already_downloaded(ctx.filename)
+ self.try_rename(ctx.tmpfilename, ctx.filename)
self._hook_progress({
- 'filename': filename,
+ 'filename': ctx.filename,
'status': 'finished',
- 'downloaded_bytes': resume_len,
- 'total_bytes': resume_len,
+ 'downloaded_bytes': ctx.resume_len,
+ 'total_bytes': ctx.resume_len,
})
- return True
+ raise SucceedDownload()
else:
# The length does not match, we start the download over
self.report_unable_to_resume()
- resume_len = 0
- open_mode = 'wb'
- break
- except socket.error as e:
- if e.errno != errno.ECONNRESET:
+ ctx.resume_len = 0
+ ctx.open_mode = 'wb'
+ return
+ raise RetryDownload(err)
+ except socket.error as err:
+ if err.errno != errno.ECONNRESET:
# Connection reset is no problem, just retry
raise
+ raise RetryDownload(err)
+
+ def download():
+ data_len = ctx.data.info().get('Content-length', None)
+
+ # Range HTTP header may be ignored/unsupported by a webserver
+ # (e.g. extractor/scivee.py, extractor/bambuser.py).
+ # However, for a test we still would like to download just a piece of a file.
+ # To achieve this we limit data_len to _TEST_FILE_SIZE and manually control
+ # block size when downloading a file.
+ if is_test and (data_len is None or int(data_len) > self._TEST_FILE_SIZE):
+ data_len = self._TEST_FILE_SIZE
+
+ if data_len is not None:
+ data_len = int(data_len) + ctx.resume_len
+ min_data_len = self.params.get('min_filesize')
+ max_data_len = self.params.get('max_filesize')
+ if min_data_len is not None and data_len < min_data_len:
+ self.to_screen('\r[download] File is smaller than min-filesize (%s bytes < %s bytes). Aborting.' % (data_len, min_data_len))
+ return False
+ if max_data_len is not None and data_len > max_data_len:
+ self.to_screen('\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len))
+ return False
- # Retry
- count += 1
- if count <= retries:
- self.report_retry(count, retries)
-
- if count > retries:
- self.report_error('giving up after %s retries' % retries)
- return False
-
- data_len = data.info().get('Content-length', None)
-
- # Range HTTP header may be ignored/unsupported by a webserver
- # (e.g. extractor/scivee.py, extractor/bambuser.py).
- # However, for a test we still would like to download just a piece of a file.
- # To achieve this we limit data_len to _TEST_FILE_SIZE and manually control
- # block size when downloading a file.
- if is_test and (data_len is None or int(data_len) > self._TEST_FILE_SIZE):
- data_len = self._TEST_FILE_SIZE
-
- if data_len is not None:
- data_len = int(data_len) + resume_len
- min_data_len = self.params.get('min_filesize')
- max_data_len = self.params.get('max_filesize')
- if min_data_len is not None and data_len < min_data_len:
- self.to_screen('\r[download] File is smaller than min-filesize (%s bytes < %s bytes). Aborting.' % (data_len, min_data_len))
- return False
- if max_data_len is not None and data_len > max_data_len:
- self.to_screen('\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len))
- return False
-
- byte_counter = 0 + resume_len
- block_size = self.params.get('buffersize', 1024)
- start = time.time()
+ byte_counter = 0 + ctx.resume_len
+ block_size = self.params.get('buffersize', 1024)
+ start = time.time()
- # measure time over whole while-loop, so slow_down() and best_block_size() work together properly
- now = None # needed for slow_down() in the first loop run
- before = start # start measuring
- while True:
+ # measure time over whole while-loop, so slow_down() and best_block_size() work together properly
+ now = None # needed for slow_down() in the first loop run
+ before = start # start measuring
- # Download and write
- data_block = data.read(block_size if not is_test else min(block_size, data_len - byte_counter))
- byte_counter += len(data_block)
+ def retry(e):
+ if ctx.tmpfilename != '-':
+ ctx.stream.close()
+ ctx.stream = None
+ ctx.resume_len = os.path.getsize(encodeFilename(ctx.tmpfilename))
+ raise RetryDownload(e)
- # exit loop when download is finished
- if len(data_block) == 0:
- break
+ while True:
+ try:
+ # Download and write
+ data_block = ctx.data.read(block_size if not is_test else min(block_size, data_len - byte_counter))
+ # socket.timeout is a subclass of socket.error but may not have
+ # errno set
+ except socket.timeout as e:
+ retry(e)
+ except socket.error as e:
+ if e.errno not in (errno.ECONNRESET, errno.ETIMEDOUT):
+ raise
+ retry(e)
+
+ byte_counter += len(data_block)
+
+ # exit loop when download is finished
+ if len(data_block) == 0:
+ break
+
+ # Open destination file just in time
+ if ctx.stream is None:
+ try:
+ ctx.stream, ctx.tmpfilename = sanitize_open(
+ ctx.tmpfilename, ctx.open_mode)
+ assert ctx.stream is not None
+ ctx.filename = self.undo_temp_name(ctx.tmpfilename)
+ self.report_destination(ctx.filename)
+ except (OSError, IOError) as err:
+ self.report_error('unable to open for writing: %s' % str(err))
+ return False
+
+ if self.params.get('xattr_set_filesize', False) and data_len is not None:
+ try:
+ write_xattr(ctx.tmpfilename, 'user.ytdl.filesize', str(data_len).encode('utf-8'))
+ except (XAttrUnavailableError, XAttrMetadataError) as err:
+ self.report_error('unable to set filesize xattr: %s' % str(err))
- # Open destination file just in time
- if stream is None:
try:
- (stream, tmpfilename) = sanitize_open(tmpfilename, open_mode)
- assert stream is not None
- filename = self.undo_temp_name(tmpfilename)
- self.report_destination(filename)
- except (OSError, IOError) as err:
- self.report_error('unable to open for writing: %s' % str(err))
+ ctx.stream.write(data_block)
+ except (IOError, OSError) as err:
+ self.to_stderr('\n')
+ self.report_error('unable to write data: %s' % str(err))
return False
- if self.params.get('xattr_set_filesize', False) and data_len is not None:
- try:
- write_xattr(tmpfilename, 'user.ytdl.filesize', str(data_len).encode('utf-8'))
- except (XAttrUnavailableError, XAttrMetadataError) as err:
- self.report_error('unable to set filesize xattr: %s' % str(err))
-
- try:
- stream.write(data_block)
- except (IOError, OSError) as err:
+ # Apply rate limit
+ self.slow_down(start, now, byte_counter - ctx.resume_len)
+
+ # end measuring of one loop run
+ now = time.time()
+ after = now
+
+ # Adjust block size
+ if not self.params.get('noresizebuffer', False):
+ block_size = self.best_block_size(after - before, len(data_block))
+
+ before = after
+
+ # Progress message
+ speed = self.calc_speed(start, now, byte_counter - ctx.resume_len)
+ if data_len is None:
+ eta = None
+ else:
+ eta = self.calc_eta(start, time.time(), data_len - ctx.resume_len, byte_counter - ctx.resume_len)
+
+ self._hook_progress({
+ 'status': 'downloading',
+ 'downloaded_bytes': byte_counter,
+ 'total_bytes': data_len,
+ 'tmpfilename': ctx.tmpfilename,
+ 'filename': ctx.filename,
+ 'eta': eta,
+ 'speed': speed,
+ 'elapsed': now - start,
+ })
+
+ if is_test and byte_counter == data_len:
+ break
+
+ if ctx.stream is None:
self.to_stderr('\n')
- self.report_error('unable to write data: %s' % str(err))
+ self.report_error('Did not get any data blocks')
return False
+ if ctx.tmpfilename != '-':
+ ctx.stream.close()
- # Apply rate limit
- self.slow_down(start, now, byte_counter - resume_len)
+ if data_len is not None and byte_counter != data_len:
+ err = ContentTooShortError(byte_counter, int(data_len))
+ if count <= retries:
+ retry(err)
+ raise err
- # end measuring of one loop run
- now = time.time()
- after = now
+ self.try_rename(ctx.tmpfilename, ctx.filename)
- # Adjust block size
- if not self.params.get('noresizebuffer', False):
- block_size = self.best_block_size(after - before, len(data_block))
-
- before = after
-
- # Progress message
- speed = self.calc_speed(start, now, byte_counter - resume_len)
- if data_len is None:
- eta = None
- else:
- eta = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
+ # Update file modification time
+ if self.params.get('updatetime', True):
+ info_dict['filetime'] = self.try_utime(ctx.filename, ctx.data.info().get('last-modified', None))
self._hook_progress({
- 'status': 'downloading',
'downloaded_bytes': byte_counter,
- 'total_bytes': data_len,
- 'tmpfilename': tmpfilename,
- 'filename': filename,
- 'eta': eta,
- 'speed': speed,
- 'elapsed': now - start,
+ 'total_bytes': byte_counter,
+ 'filename': ctx.filename,
+ 'status': 'finished',
+ 'elapsed': time.time() - start,
})
- if is_test and byte_counter == data_len:
- break
-
- if stream is None:
- self.to_stderr('\n')
- self.report_error('Did not get any data blocks')
- return False
- if tmpfilename != '-':
- stream.close()
-
- if data_len is not None and byte_counter != data_len:
- raise ContentTooShortError(byte_counter, int(data_len))
- self.try_rename(tmpfilename, filename)
-
- # Update file modification time
- if self.params.get('updatetime', True):
- info_dict['filetime'] = self.try_utime(filename, data.info().get('last-modified', None))
-
- self._hook_progress({
- 'downloaded_bytes': byte_counter,
- 'total_bytes': byte_counter,
- 'filename': filename,
- 'status': 'finished',
- 'elapsed': time.time() - start,
- })
-
- return True
+ return True
+
+ while count <= retries:
+ try:
+ establish_connection()
+ download()
+ return True
+ except RetryDownload as e:
+ count += 1
+ if count <= retries:
+ self.report_retry(e.source_error, count, retries)
+ continue
+ except SucceedDownload:
+ return True
+
+ self.report_error('giving up after %s retries' % retries)
+ return False
if is_audio:
smhd_payload = s88.pack(0) # balance
- smhd_payload = u16.pack(0) # reserved
+ smhd_payload += u16.pack(0) # reserved
media_header_box = full_box(b'smhd', 0, 0, smhd_payload) # Sound Media Header
else:
vmhd_payload = u16.pack(0) # graphics mode
if fourcc == 'AACL':
sample_entry_box = box(b'mp4a', sample_entry_payload)
else:
- sample_entry_payload = sample_entry_payload
sample_entry_payload += u16.pack(0) # pre defined
sample_entry_payload += u16.pack(0) # reserved
sample_entry_payload += u32.pack(0) * 3 # pre defined
import re
from .common import InfoExtractor
+from ..compat import compat_str
from ..utils import (
ExtractorError,
js_to_json,
int_or_none,
parse_iso8601,
+ try_get,
)
title = video_params.get('title') or video_params['seriesTitle']
stream = next(s for s in video_params['playlist'] if s.get('type') == 'program')
- formats = self._extract_akamai_formats(stream['hds-unmetered'], video_id)
+ format_urls = [
+ try_get(stream, lambda x: x['hds-unmetered'], compat_str)]
+
+ # May have higher quality video
+ sd_url = try_get(
+ stream, lambda x: x['streams']['hds']['sd'], compat_str)
+ if sd_url:
+ format_urls.append(sd_url.replace('metered', 'um'))
+
+ formats = []
+ for format_url in format_urls:
+ if format_url:
+ formats.extend(
+ self._extract_akamai_formats(format_url, video_id))
self._sort_formats(formats)
subtitles = {}
from .amp import AMPIE
from .common import InfoExtractor
+from .youtube import YoutubeIE
from ..compat import compat_urlparse
class AbcNewsVideoIE(AMPIE):
IE_NAME = 'abcnews:video'
- _VALID_URL = r'https?://abcnews\.go\.com/[^/]+/video/(?P<display_id>[0-9a-z-]+)-(?P<id>\d+)'
+ _VALID_URL = r'''(?x)
+ https?://
+ abcnews\.go\.com/
+ (?:
+ [^/]+/video/(?P<display_id>[0-9a-z-]+)-|
+ video/embed\?.*?\bid=
+ )
+ (?P<id>\d+)
+ '''
_TESTS = [{
'url': 'http://abcnews.go.com/ThisWeek/video/week-exclusive-irans-foreign-minister-zarif-20411932',
# m3u8 download
'skip_download': True,
},
+ }, {
+ 'url': 'http://abcnews.go.com/video/embed?id=46979033',
+ 'only_matching': True,
}, {
'url': 'http://abcnews.go.com/2020/video/2020-husband-stands-teacher-jail-student-affairs-26119478',
'only_matching': True,
r'window\.abcnvideo\.url\s*=\s*"([^"]+)"', webpage, 'video URL')
full_video_url = compat_urlparse.urljoin(url, video_url)
- youtube_url = self._html_search_regex(
- r'<iframe[^>]+src="(https://www\.youtube\.com/embed/[^"]+)"',
- webpage, 'YouTube URL', default=None)
+ youtube_url = YoutubeIE._extract_url(webpage)
timestamp = None
date_str = self._html_search_regex(
}
if youtube_url:
- entries = [entry, self.url_result(youtube_url, 'Youtube')]
+ entries = [entry, self.url_result(youtube_url, ie=YoutubeIE.ie_key())]
return self.playlist_result(entries)
return entry
'display_id': 'east-bay-museum-celebrates-vintage-synthesizers',
'ext': 'mp4',
'title': 'East Bay museum celebrates vintage synthesizers',
- 'description': 'md5:a4f10fb2f2a02565c1749d4adbab4b10',
+ 'description': 'md5:24ed2bd527096ec2a5c67b9d5a9005f3',
'thumbnail': r're:^https?://.*\.jpg$',
'timestamp': 1421123075,
'upload_date': '20150113',
intlist_to_bytes,
srt_subtitles_timecode,
strip_or_none,
+ urljoin,
)
'description': 'md5:2f7b5aa76edbc1a7a92cedcda8a528d5',
}
}
+ _BASE_URL = 'http://animedigitalnetwork.fr'
def _get_subtitles(self, sub_path, video_id):
if not sub_path:
return None
enc_subtitles = self._download_webpage(
- 'http://animedigitalnetwork.fr/' + sub_path,
- video_id, fatal=False)
+ urljoin(self._BASE_URL, sub_path),
+ video_id, fatal=False, headers={
+ 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:53.0) Gecko/20100101 Firefox/53.0',
+ })
if not enc_subtitles:
return None
# http://animedigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js
dec_subtitles = intlist_to_bytes(aes_cbc_decrypt(
bytes_to_intlist(base64.b64decode(enc_subtitles[24:])),
- bytes_to_intlist(b'\nd\xaf\xd2J\xd0\xfc\xe1\xfc\xdf\xb61\xe8\xe1\xf0\xcc'),
+ bytes_to_intlist(b'\x1b\xe0\x29\x61\x38\x94\x24\x00\x12\xbd\xc5\x80\xac\xce\xbe\xb0'),
bytes_to_intlist(base64.b64decode(enc_subtitles[:24]))
))
subtitles_json = self._parse_json(
- dec_subtitles[:-compat_ord(dec_subtitles[-1])],
+ dec_subtitles[:-compat_ord(dec_subtitles[-1])].decode(),
None, fatal=False)
if not subtitles_json:
return None
metas = options.get('metas') or {}
title = metas.get('title') or video_info['title']
links = player_config.get('links') or {}
+ error = None
+ if not links:
+ links_url = player_config['linksurl']
+ links_data = self._download_json(urljoin(
+ self._BASE_URL, links_url), video_id)
+ links = links_data.get('links') or {}
+ error = links_data.get('error')
formats = []
for format_id, qualities in links.items():
+ if not isinstance(qualities, dict):
+ continue
for load_balancer_url in qualities.values():
load_balancer_data = self._download_json(
load_balancer_url, video_id, fatal=False) or {}
for f in m3u8_formats:
f['language'] = 'fr'
formats.extend(m3u8_formats)
- error = options.get('error')
+ if not error:
+ error = options.get('error')
if not formats and error:
raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True)
self._sort_formats(formats)
import xml.etree.ElementTree as etree
from .common import InfoExtractor
-from ..compat import compat_urlparse
+from ..compat import (
+ compat_kwargs,
+ compat_urlparse,
+)
from ..utils import (
unescapeHTML,
urlencode_postdata,
unified_timestamp,
ExtractorError,
+ NO_DEFAULT,
)
'username_field': 'username',
'password_field': 'password',
},
+ 'ATTOTT': {
+ 'name': 'DIRECTV NOW',
+ 'username_field': 'email',
+ 'password_field': 'loginpassword',
+ },
'Rogers': {
'name': 'Rogers',
'username_field': 'UserName',
'username_field': 'Ecom_User_ID',
'password_field': 'Ecom_Password',
},
+ 'Brighthouse': {
+ 'name': 'Bright House Networks | Spectrum',
+ 'username_field': 'j_username',
+ 'password_field': 'j_password',
+ },
'Charter_Direct': {
'name': 'Charter Spectrum',
'username_field': 'IDToken1',
_USER_AGENT = 'Mozilla/5.0 (X11; Linux i686; rv:47.0) Gecko/20100101 Firefox/47.0'
_MVPD_CACHE = 'ap-mvpd'
+ _DOWNLOADING_LOGIN_PAGE = 'Downloading Provider Login Page'
+
def _download_webpage_handle(self, *args, **kwargs):
headers = kwargs.get('headers', {})
headers.update(self.geo_verification_headers())
kwargs['headers'] = headers
- return super(AdobePassIE, self)._download_webpage_handle(*args, **kwargs)
+ return super(AdobePassIE, self)._download_webpage_handle(
+ *args, **compat_kwargs(kwargs))
@staticmethod
def _get_mvpd_resource(provider_id, title, guid, rating):
'Use --ap-mso to specify Adobe Pass Multiple-system operator Identifier '
'and --ap-username and --ap-password or --netrc to provide account credentials.', expected=True)
+ def extract_redirect_url(html, url=None, fatal=False):
+ # TODO: eliminate code duplication with generic extractor and move
+ # redirection code into _download_webpage_handle
+ REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
+ redirect_url = self._search_regex(
+ r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
+ r'(?:[a-z-]+="[^"]+"\s+)*?content="%s' % REDIRECT_REGEX,
+ html, 'meta refresh redirect',
+ default=NO_DEFAULT if fatal else None, fatal=fatal)
+ if not redirect_url:
+ return None
+ if url:
+ redirect_url = compat_urlparse.urljoin(url, unescapeHTML(redirect_url))
+ return redirect_url
+
mvpd_headers = {
'ap_42': 'anonymous',
'ap_11': 'Linux i686',
if '<form name="signin"' in provider_redirect_page:
provider_login_page_res = provider_redirect_page_res
elif 'http-equiv="refresh"' in provider_redirect_page:
- oauth_redirect_url = self._html_search_regex(
- r'content="0;\s*url=([^\'"]+)',
- provider_redirect_page, 'meta refresh redirect')
+ oauth_redirect_url = extract_redirect_url(
+ provider_redirect_page, fatal=True)
provider_login_page_res = self._download_webpage_handle(
oauth_redirect_url, video_id,
- 'Downloading Provider Login Page')
+ self._DOWNLOADING_LOGIN_PAGE)
else:
provider_login_page_res = post_form(
provider_redirect_page_res,
- 'Downloading Provider Login Page')
+ self._DOWNLOADING_LOGIN_PAGE)
mvpd_confirm_page_res = post_form(
provider_login_page_res, 'Logging in', {
'Content-Type': 'application/x-www-form-urlencoded'
})
else:
+ # Some providers (e.g. DIRECTV NOW) have another meta refresh
+ # based redirect that should be followed.
+ provider_redirect_page, urlh = provider_redirect_page_res
+ provider_refresh_redirect_url = extract_redirect_url(
+ provider_redirect_page, url=urlh.geturl())
+ if provider_refresh_redirect_url:
+ provider_redirect_page_res = self._download_webpage_handle(
+ provider_refresh_redirect_url, video_id,
+ 'Downloading Provider Redirect Page (meta refresh)')
provider_login_page_res = post_form(
- provider_redirect_page_res, 'Downloading Provider Login Page')
+ provider_redirect_page_res, self._DOWNLOADING_LOGIN_PAGE)
mvpd_confirm_page_res = post_form(provider_login_page_res, 'Logging in', {
mso_info.get('username_field', 'username'): username,
mso_info.get('password_field', 'password'): password,
--- /dev/null
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ float_or_none,
+ try_get,
+)
+
+
+class AliExpressLiveIE(InfoExtractor):
+ _VALID_URL = r'https?://live\.aliexpress\.com/live/(?P<id>\d+)'
+ _TEST = {
+ 'url': 'https://live.aliexpress.com/live/2800002704436634',
+ 'md5': 'e729e25d47c5e557f2630eaf99b740a5',
+ 'info_dict': {
+ 'id': '2800002704436634',
+ 'ext': 'mp4',
+ 'title': 'CASIMA7.22',
+ 'thumbnail': r're:http://.*\.jpg',
+ 'uploader': 'CASIMA Official Store',
+ 'timestamp': 1500717600,
+ 'upload_date': '20170722',
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ data = self._parse_json(
+ self._search_regex(
+ r'(?s)runParams\s*=\s*({.+?})\s*;?\s*var',
+ webpage, 'runParams'),
+ video_id)
+
+ title = data['title']
+
+ formats = self._extract_m3u8_formats(
+ data['replyStreamUrl'], video_id, 'mp4',
+ entry_protocol='m3u8_native', m3u8_id='hls')
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'thumbnail': data.get('coverUrl'),
+ 'uploader': try_get(
+ data, lambda x: x['followBar']['name'], compat_str),
+ 'timestamp': float_or_none(data.get('startTimeLong'), scale=1000),
+ 'formats': formats,
+ }
from .theplatform import ThePlatformIE
from ..utils import (
- update_url_query,
- parse_age_limit,
int_or_none,
+ parse_age_limit,
+ try_get,
+ update_url_query,
)
info = self._parse_theplatform_metadata(theplatform_metadata)
video_id = theplatform_metadata['pid']
title = theplatform_metadata['title']
- rating = theplatform_metadata['ratings'][0]['rating']
+ rating = try_get(
+ theplatform_metadata, lambda x: x['ratings'][0]['rating'])
auth_required = self._search_regex(
r'window\.authRequired\s*=\s*(true|false);',
webpage, 'auth required')
--- /dev/null
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ int_or_none,
+ try_get,
+ unified_strdate,
+)
+
+
+class AmericasTestKitchenIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?americastestkitchen\.com/(?:episode|videos)/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://www.americastestkitchen.com/episode/548-summer-dinner-party',
+ 'md5': 'b861c3e365ac38ad319cfd509c30577f',
+ 'info_dict': {
+ 'id': '1_5g5zua6e',
+ 'title': 'Summer Dinner Party',
+ 'ext': 'mp4',
+ 'description': 'md5:858d986e73a4826979b6a5d9f8f6a1ec',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'timestamp': 1497285541,
+ 'upload_date': '20170612',
+ 'uploader_id': 'roger.metcalf@americastestkitchen.com',
+ 'release_date': '20170617',
+ 'series': "America's Test Kitchen",
+ 'season_number': 17,
+ 'episode': 'Summer Dinner Party',
+ 'episode_number': 24,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.americastestkitchen.com/videos/3420-pan-seared-salmon',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ partner_id = self._search_regex(
+ r'src=["\'](?:https?:)?//(?:[^/]+\.)kaltura\.com/(?:[^/]+/)*(?:p|partner_id)/(\d+)',
+ webpage, 'kaltura partner id')
+
+ video_data = self._parse_json(
+ self._search_regex(
+ r'window\.__INITIAL_STATE__\s*=\s*({.+?})\s*;\s*</script>',
+ webpage, 'initial context'),
+ video_id)
+
+ ep_data = try_get(
+ video_data,
+ (lambda x: x['episodeDetail']['content']['data'],
+ lambda x: x['videoDetail']['content']['data']), dict)
+ ep_meta = ep_data.get('full_video', {})
+ external_id = ep_data.get('external_id') or ep_meta['external_id']
+
+ title = ep_data.get('title') or ep_meta.get('title')
+ description = clean_html(ep_meta.get('episode_description') or ep_data.get(
+ 'description') or ep_meta.get('description'))
+ thumbnail = try_get(ep_meta, lambda x: x['photo']['image_url'])
+ release_date = unified_strdate(ep_data.get('aired_at'))
+
+ season_number = int_or_none(ep_meta.get('season_number'))
+ episode = ep_meta.get('title')
+ episode_number = int_or_none(ep_meta.get('episode_number'))
+
+ return {
+ '_type': 'url_transparent',
+ 'url': 'kaltura:%s:%s' % (partner_id, external_id),
+ 'ie_key': 'Kaltura',
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'release_date': release_date,
+ 'series': "America's Test Kitchen",
+ 'season_number': season_number,
+ 'episode': episode,
+ 'episode_number': episode_number,
+ }
import re
from .common import InfoExtractor
-from ..compat import (
- compat_urlparse,
- compat_str,
-)
+from ..compat import compat_str
from ..utils import (
determine_ext,
extract_attributes,
ExtractorError,
- sanitized_Request,
urlencode_postdata,
+ urljoin,
)
_LOGIN_URL = 'https://www.anime-on-demand.de/users/sign_in'
_APPLY_HTML5_URL = 'https://www.anime-on-demand.de/html5apply'
_NETRC_MACHINE = 'animeondemand'
+ # German-speaking countries of Europe
+ _GEO_COUNTRIES = ['AT', 'CH', 'DE', 'LI', 'LU']
_TESTS = [{
# jap, OmU
'url': 'https://www.anime-on-demand.de/anime/161',
# Full length film, non-series, ger/jap, Dub/OmU, account required
'url': 'https://www.anime-on-demand.de/anime/185',
'only_matching': True,
+ }, {
+ # Flash videos
+ 'url': 'https://www.anime-on-demand.de/anime/12',
+ 'only_matching': True,
}]
def _login(self):
'post url', default=self._LOGIN_URL, group='url')
if not post_url.startswith('http'):
- post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url)
-
- request = sanitized_Request(
- post_url, urlencode_postdata(login_form))
- request.add_header('Referer', self._LOGIN_URL)
+ post_url = urljoin(self._LOGIN_URL, post_url)
response = self._download_webpage(
- request, None, 'Logging in as %s' % username)
+ post_url, None, 'Logging in as %s' % username,
+ data=urlencode_postdata(login_form), headers={
+ 'Referer': self._LOGIN_URL,
+ })
if all(p not in response for p in ('>Logout<', 'href="/users/sign_out"')):
error = self._search_regex(
formats = []
for input_ in re.findall(
- r'<input[^>]+class=["\'].*?streamstarter_html5[^>]+>', html):
+ r'<input[^>]+class=["\'].*?streamstarter[^>]+>', html):
attributes = extract_attributes(input_)
+ title = attributes.get('data-dialog-header')
playlist_urls = []
- for playlist_key in ('data-playlist', 'data-otherplaylist'):
+ for playlist_key in ('data-playlist', 'data-otherplaylist', 'data-stream'):
playlist_url = attributes.get(playlist_key)
if isinstance(playlist_url, compat_str) and re.match(
r'/?[\da-zA-Z]+', playlist_url):
format_id_list.append(compat_str(num))
format_id = '-'.join(format_id_list)
format_note = ', '.join(filter(None, (kind, lang_note)))
- request = sanitized_Request(
- compat_urlparse.urljoin(url, playlist_url),
+ item_id_list = []
+ if format_id:
+ item_id_list.append(format_id)
+ item_id_list.append('videomaterial')
+ playlist = self._download_json(
+ urljoin(url, playlist_url), video_id,
+ 'Downloading %s JSON' % ' '.join(item_id_list),
headers={
'X-Requested-With': 'XMLHttpRequest',
'X-CSRF-Token': csrf_token,
'Referer': url,
'Accept': 'application/json, text/javascript, */*; q=0.01',
- })
- playlist = self._download_json(
- request, video_id, 'Downloading %s playlist JSON' % format_id,
- fatal=False)
+ }, fatal=False)
if not playlist:
continue
+ stream_url = playlist.get('streamurl')
+ if stream_url:
+ rtmp = re.search(
+ r'^(?P<url>rtmpe?://(?P<host>[^/]+)/(?P<app>.+/))(?P<playpath>mp[34]:.+)',
+ stream_url)
+ if rtmp:
+ formats.append({
+ 'url': rtmp.group('url'),
+ 'app': rtmp.group('app'),
+ 'play_path': rtmp.group('playpath'),
+ 'page_url': url,
+ 'player_url': 'https://www.anime-on-demand.de/assets/jwplayer.flash-55abfb34080700304d49125ce9ffb4a6.swf',
+ 'rtmp_real_time': True,
+ 'format_id': 'rtmp',
+ 'ext': 'flv',
+ })
+ continue
start_video = playlist.get('startvideo', 0)
playlist = playlist.get('playlist')
if not playlist or not isinstance(playlist, list):
f.update({
'id': '%s-%s' % (f['id'], m.group('kind').lower()),
'title': m.group('title'),
- 'url': compat_urlparse.urljoin(url, m.group('href')),
+ 'url': urljoin(url, m.group('href')),
})
entries.append(f)
from .common import InfoExtractor
from ..utils import (
- ExtractorError,
- HEADRequest,
+ int_or_none,
+ mimetype2ext,
)
class AparatIE(InfoExtractor):
- _VALID_URL = r'^https?://(?:www\.)?aparat\.com/(?:v/|video/video/embed/videohash/)(?P<id>[a-zA-Z0-9]+)'
+ _VALID_URL = r'https?://(?:www\.)?aparat\.com/(?:v/|video/video/embed/videohash/)(?P<id>[a-zA-Z0-9]+)'
_TEST = {
'url': 'http://www.aparat.com/v/wP8On',
# Note: There is an easier-to-parse configuration at
# http://www.aparat.com/video/video/config/videohash/%video_id
# but the URL in there does not work
- embed_url = 'http://www.aparat.com/video/video/embed/vt/frame/showvideo/yes/videohash/' + video_id
- webpage = self._download_webpage(embed_url, video_id)
-
- file_list = self._parse_json(self._search_regex(
- r'fileList\s*=\s*JSON\.parse\(\'([^\']+)\'\)', webpage, 'file list'), video_id)
- for i, item in enumerate(file_list[0]):
- video_url = item['file']
- req = HEADRequest(video_url)
- res = self._request_webpage(
- req, video_id, note='Testing video URL %d' % i, errnote=False)
- if res:
- break
- else:
- raise ExtractorError('No working video URLs found')
+ webpage = self._download_webpage(
+ 'http://www.aparat.com/video/video/embed/vt/frame/showvideo/yes/videohash/' + video_id,
+ video_id)
title = self._search_regex(r'\s+title:\s*"([^"]+)"', webpage, 'title')
+
+ file_list = self._parse_json(
+ self._search_regex(
+ r'fileList\s*=\s*JSON\.parse\(\'([^\']+)\'\)', webpage,
+ 'file list'),
+ video_id)
+
+ formats = []
+ for item in file_list[0]:
+ file_url = item.get('file')
+ if not file_url:
+ continue
+ ext = mimetype2ext(item.get('type'))
+ label = item.get('label')
+ formats.append({
+ 'url': file_url,
+ 'ext': ext,
+ 'format_id': label or ext,
+ 'height': int_or_none(self._search_regex(
+ r'(\d+)[pP]', label or '', 'height', default=None)),
+ })
+ self._sort_formats(formats)
+
thumbnail = self._search_regex(
r'image:\s*"([^"]+)"', webpage, 'thumbnail', fatal=False)
return {
'id': video_id,
'title': title,
- 'url': video_url,
- 'ext': 'mp4',
'thumbnail': thumbnail,
'age_limit': self._family_friendly_search(webpage),
+ 'formats': formats,
}
duration = int_or_none(media_info.get('_duration'))
thumbnail = media_info.get('_previewImage')
+ is_live = media_info.get('_isLive') is True
subtitles = {}
subtitle_url = media_info.get('_subtitleUrl')
'id': video_id,
'duration': duration,
'thumbnail': thumbnail,
+ 'is_live': is_live,
'formats': formats,
'subtitles': subtitles,
}
# determine video id from url
m = re.match(self._VALID_URL, url)
+ document_id = None
+
numid = re.search(r'documentId=([0-9]+)', url)
if numid:
- video_id = numid.group(1)
+ document_id = video_id = numid.group(1)
else:
video_id = m.group('video_id')
'formats': formats,
}
else: # request JSON file
+ if not document_id:
+ video_id = self._search_regex(
+ r'/play/(?:config|media)/(\d+)', webpage, 'media id')
info = self._extract_media_info(
- 'http://www.ardmediathek.de/play/media/%s' % video_id, webpage, video_id)
+ 'http://www.ardmediathek.de/play/media/%s' % video_id,
+ webpage, video_id)
info.update({
'id': video_id,
- 'title': title,
+ 'title': self._live_title(title) if info.get('is_live') else title,
'description': description,
'thumbnail': thumbnail,
})
compat_urllib_parse_urlparse,
)
from ..utils import (
+ ExtractorError,
find_xpath_attr,
- unified_strdate,
get_element_by_attribute,
int_or_none,
NO_DEFAULT,
qualities,
+ unified_strdate,
)
# There are different sources of video in arte.tv, the extraction process
info = self._download_json(json_url, video_id)
player_info = info['videoJsonPlayer']
+ vsr = player_info['VSR']
+
+ if not vsr:
+ raise ExtractorError(
+ 'Video %s is not available' % player_info.get('VID') or video_id,
+ expected=True)
+
upload_date_str = player_info.get('shootingDate')
if not upload_date_str:
upload_date_str = (player_info.get('VRA') or player_info.get('VDA') or '').split(' ')[0]
langcode = LANGS.get(lang, lang)
formats = []
- for format_id, format_dict in player_info['VSR'].items():
+ for format_id, format_dict in vsr.items():
f = dict(format_dict)
versionCode = f.get('versionCode')
l = re.escape(langcode)
--- /dev/null
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from .kaltura import KalturaIE
+from ..utils import (
+ extract_attributes,
+ remove_end,
+ urlencode_postdata,
+)
+
+
+class AsianCrushIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?asiancrush\.com/video/(?:[^/]+/)?0+(?P<id>\d+)v\b'
+ _TESTS = [{
+ 'url': 'https://www.asiancrush.com/video/012869v/women-who-flirt/',
+ 'md5': 'c3b740e48d0ba002a42c0b72857beae6',
+ 'info_dict': {
+ 'id': '1_y4tmjm5r',
+ 'ext': 'mp4',
+ 'title': 'Women Who Flirt',
+ 'description': 'md5:3db14e9186197857e7063522cb89a805',
+ 'timestamp': 1496936429,
+ 'upload_date': '20170608',
+ 'uploader_id': 'craig@crifkin.com',
+ },
+ }, {
+ 'url': 'https://www.asiancrush.com/video/she-was-pretty/011886v-pretty-episode-3/',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ data = self._download_json(
+ 'https://www.asiancrush.com/wp-admin/admin-ajax.php', video_id,
+ data=urlencode_postdata({
+ 'postid': video_id,
+ 'action': 'get_channel_kaltura_vars',
+ }))
+
+ entry_id = data['entry_id']
+
+ return self.url_result(
+ 'kaltura:%s:%s' % (data['partner_id'], entry_id),
+ ie=KalturaIE.ie_key(), video_id=entry_id,
+ video_title=data.get('vid_label'))
+
+
+class AsianCrushPlaylistIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?asiancrush\.com/series/0+(?P<id>\d+)s\b'
+ _TEST = {
+ 'url': 'https://www.asiancrush.com/series/012481s/scholar-walks-night/',
+ 'info_dict': {
+ 'id': '12481',
+ 'title': 'Scholar Who Walks the Night',
+ 'description': 'md5:7addd7c5132a09fd4741152d96cce886',
+ },
+ 'playlist_count': 20,
+ }
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, playlist_id)
+
+ entries = []
+
+ for mobj in re.finditer(
+ r'<a[^>]+href=(["\'])(?P<url>%s.*?)\1[^>]*>' % AsianCrushIE._VALID_URL,
+ webpage):
+ attrs = extract_attributes(mobj.group(0))
+ if attrs.get('class') == 'clearfix':
+ entries.append(self.url_result(
+ mobj.group('url'), ie=AsianCrushIE.ie_key()))
+
+ title = remove_end(
+ self._html_search_regex(
+ r'(?s)<h1\b[^>]\bid=["\']movieTitle[^>]+>(.+?)</h1>', webpage,
+ 'title', default=None) or self._og_search_title(
+ webpage, default=None) or self._html_search_meta(
+ 'twitter:title', webpage, 'title',
+ default=None) or self._search_regex(
+ r'<title>([^<]+)</title>', webpage, 'title', fatal=False),
+ ' | AsianCrush')
+
+ description = self._og_search_description(
+ webpage, default=None) or self._html_search_meta(
+ 'twitter:description', webpage, 'description', fatal=False)
+
+ return self.playlist_result(entries, playlist_id, title, description)
def from_clip(field):
if clip:
- clip.get(field)
+ return clip.get(field)
audio_url = from_clip('clipURLPriorToLoading') or self._og_search_property(
'audio', webpage, 'audio url')
ExtractorError,
float_or_none,
int_or_none,
+ KNOWN_EXTENSIONS,
parse_filesize,
unescapeHTML,
update_url_query,
+ unified_strdate,
)
class BandcampIE(InfoExtractor):
- _VALID_URL = r'https?://.*?\.bandcamp\.com/track/(?P<title>.*)'
+ _VALID_URL = r'https?://.*?\.bandcamp\.com/track/(?P<title>[^/?#&]+)'
_TESTS = [{
'url': 'http://youtube-dl.bandcamp.com/track/youtube-dl-test-song',
'md5': 'c557841d5e50261777a6585648adf439',
class BandcampAlbumIE(InfoExtractor):
IE_NAME = 'Bandcamp:album'
- _VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?:/album/(?P<album_id>[^?#]+)|/?(?:$|[?#]))'
+ _VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?:/album/(?P<album_id>[^/?#&]+))?'
_TESTS = [{
'url': 'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1',
'playlist_count': 2,
}]
+ @classmethod
+ def suitable(cls, url):
+ return (False
+ if BandcampWeeklyIE.suitable(url) or BandcampIE.suitable(url)
+ else super(BandcampAlbumIE, cls).suitable(url))
+
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
uploader_id = mobj.group('subdomain')
raise ExtractorError('The page doesn\'t contain any tracks')
# Only tracks with duration info have songs
entries = [
- self.url_result(compat_urlparse.urljoin(url, t_path), ie=BandcampIE.ie_key())
+ self.url_result(
+ compat_urlparse.urljoin(url, t_path),
+ ie=BandcampIE.ie_key(),
+ video_title=self._search_regex(
+ r'<span\b[^>]+\bitemprop=["\']name["\'][^>]*>([^<]+)',
+ elem_content, 'track title', fatal=False))
for elem_content, t_path in track_elements
if self._html_search_meta('duration', elem_content, default=None)]
'title': title,
'entries': entries,
}
+
+
+class BandcampWeeklyIE(InfoExtractor):
+ IE_NAME = 'Bandcamp:weekly'
+ _VALID_URL = r'https?://(?:www\.)?bandcamp\.com/?\?(?:.*?&)?show=(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://bandcamp.com/?show=224',
+ 'md5': 'b00df799c733cf7e0c567ed187dea0fd',
+ 'info_dict': {
+ 'id': '224',
+ 'ext': 'opus',
+ 'title': 'BC Weekly April 4th 2017 - Magic Moments',
+ 'description': 'md5:5d48150916e8e02d030623a48512c874',
+ 'duration': 5829.77,
+ 'release_date': '20170404',
+ 'series': 'Bandcamp Weekly',
+ 'episode': 'Magic Moments',
+ 'episode_number': 208,
+ 'episode_id': '224',
+ }
+ }, {
+ 'url': 'https://bandcamp.com/?blah/blah@&show=228',
+ 'only_matching': True
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ blob = self._parse_json(
+ self._search_regex(
+ r'data-blob=(["\'])(?P<blob>{.+?})\1', webpage,
+ 'blob', group='blob'),
+ video_id, transform_source=unescapeHTML)
+
+ show = blob['bcw_show']
+
+ # This is desired because any invalid show id redirects to `bandcamp.com`
+ # which happens to expose the latest Bandcamp Weekly episode.
+ show_id = int_or_none(show.get('show_id')) or int_or_none(video_id)
+
+ formats = []
+ for format_id, format_url in show['audio_stream'].items():
+ if not isinstance(format_url, compat_str):
+ continue
+ for known_ext in KNOWN_EXTENSIONS:
+ if known_ext in format_id:
+ ext = known_ext
+ break
+ else:
+ ext = None
+ formats.append({
+ 'format_id': format_id,
+ 'url': format_url,
+ 'ext': ext,
+ 'vcodec': 'none',
+ })
+ self._sort_formats(formats)
+
+ title = show.get('audio_title') or 'Bandcamp Weekly'
+ subtitle = show.get('subtitle')
+ if subtitle:
+ title += ' - %s' % subtitle
+
+ episode_number = None
+ seq = blob.get('bcw_seq')
+
+ if seq and isinstance(seq, list):
+ try:
+ episode_number = next(
+ int_or_none(e.get('episode_number'))
+ for e in seq
+ if isinstance(e, dict) and int_or_none(e.get('id')) == show_id)
+ except StopIteration:
+ pass
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': show.get('desc') or show.get('short_desc'),
+ 'duration': float_or_none(show.get('audio_duration')),
+ 'is_live': False,
+ 'release_date': unified_strdate(show.get('published_date')),
+ 'series': 'Bandcamp Weekly',
+ 'episode': show.get('subtitle'),
+ 'episode_number': episode_number,
+ 'episode_id': compat_str(video_id),
+ 'formats': formats
+ }
from .common import InfoExtractor
from ..utils import (
+ clean_html,
dict_get,
ExtractorError,
float_or_none,
+ get_element_by_class,
int_or_none,
parse_duration,
parse_iso8601,
try_get,
unescapeHTML,
+ urlencode_postdata,
+ urljoin,
)
from ..compat import (
compat_etree_fromstring,
class BBCCoUkIE(InfoExtractor):
IE_NAME = 'bbc.co.uk'
IE_DESC = 'BBC iPlayer'
- _ID_REGEX = r'[pb][\da-z]{7}'
+ _ID_REGEX = r'[pbw][\da-z]{7}'
_VALID_URL = r'''(?x)
https?://
(?:www\.)?bbc\.co\.uk/
(?:
programmes/(?!articles/)|
iplayer(?:/[^/]+)?/(?:episode/|playlist/)|
- music/clips[/#]|
- radio/player/
+ music/(?:clips|audiovideo/popular)[/#]|
+ radio/player/|
+ events/[^/]+/play/[^/]+/
)
(?P<id>%s)(?!/(?:episodes|broadcasts|clips))
''' % _ID_REGEX
+ _LOGIN_URL = 'https://account.bbc.com/signin'
+ _NETRC_MACHINE = 'bbc'
+
_MEDIASELECTOR_URLS = [
# Provides HQ HLS streams with even better quality that pc mediaset but fails
# with geolocation in some cases when it's even not geo restricted at all (e.g.
}, {
'url': 'http://www.bbc.co.uk/radio/player/p03cchwf',
'only_matching': True,
- }
- ]
+ }, {
+ 'url': 'https://www.bbc.co.uk/music/audiovideo/popular#p055bc55',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.bbc.co.uk/programmes/w3csv1y9',
+ 'only_matching': True,
+ }]
_USP_RE = r'/([^/]+?)\.ism(?:\.hlsv2\.ism)?/[^/]+\.m3u8'
+ def _login(self):
+ username, password = self._get_login_info()
+ if username is None:
+ return
+
+ login_page = self._download_webpage(
+ self._LOGIN_URL, None, 'Downloading signin page')
+
+ login_form = self._hidden_inputs(login_page)
+
+ login_form.update({
+ 'username': username,
+ 'password': password,
+ })
+
+ post_url = urljoin(self._LOGIN_URL, self._search_regex(
+ r'<form[^>]+action=(["\'])(?P<url>.+?)\1', login_page,
+ 'post url', default=self._LOGIN_URL, group='url'))
+
+ response, urlh = self._download_webpage_handle(
+ post_url, None, 'Logging in', data=urlencode_postdata(login_form),
+ headers={'Referer': self._LOGIN_URL})
+
+ if self._LOGIN_URL in urlh.geturl():
+ error = clean_html(get_element_by_class('form-message', response))
+ if error:
+ raise ExtractorError(
+ 'Unable to login: %s' % error, expected=True)
+ raise ExtractorError('Unable to log in')
+
+ def _real_initialize(self):
+ self._login()
+
class MediaSelectionError(Exception):
def __init__(self, id):
self.id = id
webpage = self._download_webpage(url, group_id, 'Downloading video page')
+ error = self._search_regex(
+ r'<div\b[^>]+\bclass=["\']smp__message delta["\'][^>]*>([^<]+)<',
+ webpage, 'error', default=None)
+ if error:
+ raise ExtractorError(error, expected=True)
+
programme_id = None
duration = None
ExtractorError,
clean_html,
compat_str,
+ float_or_none,
int_or_none,
parse_iso8601,
try_get,
+ urljoin,
)
-class BeamProLiveIE(InfoExtractor):
- IE_NAME = 'Beam:live'
- _VALID_URL = r'https?://(?:\w+\.)?beam\.pro/(?P<id>[^/?#&]+)'
+class BeamProBaseIE(InfoExtractor):
+ _API_BASE = 'https://mixer.com/api/v1'
_RATINGS = {'family': 0, 'teen': 13, '18+': 18}
+
+ def _extract_channel_info(self, chan):
+ user_id = chan.get('userId') or try_get(chan, lambda x: x['user']['id'])
+ return {
+ 'uploader': chan.get('token') or try_get(
+ chan, lambda x: x['user']['username'], compat_str),
+ 'uploader_id': compat_str(user_id) if user_id else None,
+ 'age_limit': self._RATINGS.get(chan.get('audience')),
+ }
+
+
+class BeamProLiveIE(BeamProBaseIE):
+ IE_NAME = 'Mixer:live'
+ _VALID_URL = r'https?://(?:\w+\.)?(?:beam\.pro|mixer\.com)/(?P<id>[^/?#&]+)'
_TEST = {
- 'url': 'http://www.beam.pro/niterhayven',
+ 'url': 'http://mixer.com/niterhayven',
'info_dict': {
'id': '261562',
'ext': 'mp4',
},
}
+ _MANIFEST_URL_TEMPLATE = '%s/channels/%%s/manifest.%%s' % BeamProBaseIE._API_BASE
+
+ @classmethod
+ def suitable(cls, url):
+ return False if BeamProVodIE.suitable(url) else super(BeamProLiveIE, cls).suitable(url)
+
def _real_extract(self, url):
channel_name = self._match_id(url)
chan = self._download_json(
- 'https://beam.pro/api/v1/channels/%s' % channel_name, channel_name)
+ '%s/channels/%s' % (self._API_BASE, channel_name), channel_name)
if chan.get('online') is False:
raise ExtractorError(
channel_id = chan['id']
+ def manifest_url(kind):
+ return self._MANIFEST_URL_TEMPLATE % (channel_id, kind)
+
formats = self._extract_m3u8_formats(
- 'https://beam.pro/api/v1/channels/%s/manifest.m3u8' % channel_id,
- channel_name, ext='mp4', m3u8_id='hls', fatal=False)
+ manifest_url('m3u8'), channel_name, ext='mp4', m3u8_id='hls',
+ fatal=False)
+ formats.extend(self._extract_smil_formats(
+ manifest_url('smil'), channel_name, fatal=False))
self._sort_formats(formats)
- user_id = chan.get('userId') or try_get(chan, lambda x: x['user']['id'])
-
- return {
+ info = {
'id': compat_str(chan.get('id') or channel_name),
'title': self._live_title(chan.get('name') or channel_name),
'description': clean_html(chan.get('description')),
- 'thumbnail': try_get(chan, lambda x: x['thumbnail']['url'], compat_str),
+ 'thumbnail': try_get(
+ chan, lambda x: x['thumbnail']['url'], compat_str),
'timestamp': parse_iso8601(chan.get('updatedAt')),
- 'uploader': chan.get('token') or try_get(
- chan, lambda x: x['user']['username'], compat_str),
- 'uploader_id': compat_str(user_id) if user_id else None,
- 'age_limit': self._RATINGS.get(chan.get('audience')),
'is_live': True,
'view_count': int_or_none(chan.get('viewersTotal')),
'formats': formats,
}
+ info.update(self._extract_channel_info(chan))
+
+ return info
+
+
+class BeamProVodIE(BeamProBaseIE):
+ IE_NAME = 'Mixer:vod'
+ _VALID_URL = r'https?://(?:\w+\.)?(?:beam\.pro|mixer\.com)/[^/?#&]+\?.*?\bvod=(?P<id>\d+)'
+ _TEST = {
+ 'url': 'https://mixer.com/willow8714?vod=2259830',
+ 'md5': 'b2431e6e8347dc92ebafb565d368b76b',
+ 'info_dict': {
+ 'id': '2259830',
+ 'ext': 'mp4',
+ 'title': 'willow8714\'s Channel',
+ 'duration': 6828.15,
+ 'thumbnail': r're:https://.*source\.png$',
+ 'timestamp': 1494046474,
+ 'upload_date': '20170506',
+ 'uploader': 'willow8714',
+ 'uploader_id': '6085379',
+ 'age_limit': 13,
+ 'view_count': int,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }
+
+ @staticmethod
+ def _extract_format(vod, vod_type):
+ if not vod.get('baseUrl'):
+ return []
+
+ if vod_type == 'hls':
+ filename, protocol = 'manifest.m3u8', 'm3u8_native'
+ elif vod_type == 'raw':
+ filename, protocol = 'source.mp4', 'https'
+ else:
+ assert False
+
+ data = vod.get('data') if isinstance(vod.get('data'), dict) else {}
+
+ format_id = [vod_type]
+ if isinstance(data.get('Height'), compat_str):
+ format_id.append('%sp' % data['Height'])
+
+ return [{
+ 'url': urljoin(vod['baseUrl'], filename),
+ 'format_id': '-'.join(format_id),
+ 'ext': 'mp4',
+ 'protocol': protocol,
+ 'width': int_or_none(data.get('Width')),
+ 'height': int_or_none(data.get('Height')),
+ 'fps': int_or_none(data.get('Fps')),
+ 'tbr': int_or_none(data.get('Bitrate'), 1000),
+ }]
+
+ def _real_extract(self, url):
+ vod_id = self._match_id(url)
+
+ vod_info = self._download_json(
+ '%s/recordings/%s' % (self._API_BASE, vod_id), vod_id)
+
+ state = vod_info.get('state')
+ if state != 'AVAILABLE':
+ raise ExtractorError(
+ 'VOD %s is not available (state: %s)' % (vod_id, state),
+ expected=True)
+
+ formats = []
+ thumbnail_url = None
+
+ for vod in vod_info['vods']:
+ vod_type = vod.get('format')
+ if vod_type in ('hls', 'raw'):
+ formats.extend(self._extract_format(vod, vod_type))
+ elif vod_type == 'thumbnail':
+ thumbnail_url = urljoin(vod.get('baseUrl'), 'source.png')
+
+ self._sort_formats(formats)
+
+ info = {
+ 'id': vod_id,
+ 'title': vod_info.get('name') or vod_id,
+ 'duration': float_or_none(vod_info.get('duration')),
+ 'thumbnail': thumbnail_url,
+ 'timestamp': parse_iso8601(vod_info.get('createdAt')),
+ 'view_count': int_or_none(vod_info.get('viewsTotal')),
+ 'formats': formats,
+ }
+ info.update(self._extract_channel_info(vod_info.get('channel') or {}))
+
+ return info
from ..utils import (
int_or_none,
parse_iso8601,
+ urljoin,
)
webpage = self._download_webpage(url, video_id)
cpl_url = self._search_regex(
- r'<script[^>]+src=(["\'])(?P<url>(?:https?:)?//static\.beeg\.com/cpl/\d+\.js.*?)\1',
+ r'<script[^>]+src=(["\'])(?P<url>(?:/static|(?:https?:)?//static\.beeg\.com)/cpl/\d+\.js.*?)\1',
webpage, 'cpl', default=None, group='url')
+ cpl_url = urljoin(url, cpl_url)
+
beeg_version, beeg_salt = [None] * 2
if cpl_url:
r'beeg_salt\s*=\s*(["\'])(?P<beeg_salt>.+?)\1', cpl, 'beeg salt',
default=None, group='beeg_salt')
- beeg_version = beeg_version or '2000'
+ beeg_version = beeg_version or '2185'
beeg_salt = beeg_salt or 'pmweAkq8lAYKdfWcFCUj0yoVgoPlinamH5UE1CB3H'
video = self._download_json(
'description': 'å¦ęä½ ęÆē„ęļ¼å¹¶äøč½å¤č®©å¦ę³ęäøŗē°å®ćé£ä½ ä¼čæč”ęä¹ę ·ēå¦ę³ļ¼ęÆę·«é”ēäøēļ¼ē¬č£ē¤¾ä¼ļ¼ęÆēę§ēå¶č£ļ¼čæęÆā¦ā¦2015幓ļ¼ę¶©č°·ćä»6幓ååēē大ē¾å®³ā궩谷å°éāä¹åå¤å
“äŗēčæäøŖč”åŗéę°č®¾ē«ēē§ē«é«äø...',
},
'skip': 'Geo-restricted to China',
+ }, {
+ # Title with double quotes
+ 'url': 'http://www.bilibili.com/video/av8903802/',
+ 'info_dict': {
+ 'id': '8903802',
+ 'ext': 'mp4',
+ 'title': 'éæ껓č±ęļ½č±ęęåäŗ«#6 "Closer',
+ 'description': 'ę»“å¦¹ä»å¤©å±Closerēµ¦ä½ č½! ęå²ä»„ę„ļ¼č¢«ęØęå¤ę¬”ä¹ęÆęä¹
ēęę²ļ¼å
¶å®ęčÆč·ęåę¬ę³åå·®č®å¤ēļ¼äøčæčæęÆ儽å¬ļ¼ å¾®å@éæ껓č±ę',
+ 'uploader': 'éæ껓č±ę',
+ 'uploader_id': '65880958',
+ 'timestamp': 1488382620,
+ 'upload_date': '20170301',
+ },
+ 'params': {
+ 'skip_download': True, # Test metadata only
+ },
}]
_APP_KEY = '84956560bc028eb7'
'formats': formats,
})
- title = self._html_search_regex('<h1[^>]+title="([^"]+)">', webpage, 'title')
+ title = self._html_search_regex('<h1[^>]*>([^<]+)</h1>', webpage, 'title')
description = self._html_search_meta('description', webpage)
timestamp = unified_timestamp(self._html_search_regex(
r'<time[^>]+datetime="([^"]+)"', webpage, 'upload time', default=None))
title = self._html_search_regex(
r'<h2 class="white">(.*?)</h2>', webpage, 'title')
video_info_dicts = re.findall(
- r"({\s*src:\s*'http://film\.bpb\.de/[^}]+})", webpage)
+ r"({\s*src\s*:\s*'https?://film\.bpb\.de/[^}]+})", webpage)
formats = []
for video_info in video_info_dicts:
- video_info = self._parse_json(video_info, video_id, transform_source=js_to_json)
- quality = video_info['quality']
- video_url = video_info['src']
+ video_info = self._parse_json(
+ video_info, video_id, transform_source=js_to_json, fatal=False)
+ if not video_info:
+ continue
+ video_url = video_info.get('src')
+ if not video_url:
+ continue
+ quality = 'high' if '_high' in video_url else 'low'
formats.append({
'url': video_url,
'preference': 10 if quality == 'high' else 0,
continue
entries.append(self.url_result(video['url']))
- facebook_url = FacebookIE._extract_url(webpage)
- if facebook_url:
- entries.append(self.url_result(facebook_url))
+ facebook_urls = FacebookIE._extract_urls(webpage)
+ entries.extend([
+ self.url_result(facebook_url)
+ for facebook_url in facebook_urls])
return {
'_type': 'playlist',
'media': 'http://search.yahoo.com/mrss/',
'clearleap': 'http://www.clearleap.com/namespace/clearleap/1.0/',
}
+ _GEO_COUNTRIES = ['CA']
def _call_api(self, path, video_id):
url = path if path.startswith('http') else self._API_BASE_URL + path
class CBCWatchVideoIE(CBCWatchBaseIE):
IE_NAME = 'cbc.ca:watch:video'
_VALID_URL = r'https?://api-cbc\.cloud\.clearleap\.com/cloffice/client/web/play/?\?.*?\bcontentId=(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
+ _TEST = {
+ # geo-restricted to Canada, bypassable
+ 'url': 'https://api-cbc.cloud.clearleap.com/cloffice/client/web/play/?contentId=3c84472a-1eea-4dee-9267-2655d5055dcf&categoryId=ebc258f5-ee40-4cca-b66b-ba6bd55b7235',
+ 'only_matching': True,
+ }
def _real_extract(self, url):
video_id = self._match_id(url)
IE_NAME = 'cbc.ca:watch'
_VALID_URL = r'https?://watch\.cbc\.ca/(?:[^/]+/)+(?P<id>[0-9a-f-]+)'
_TESTS = [{
+ # geo-restricted to Canada, bypassable
'url': 'http://watch.cbc.ca/doc-zone/season-6/customer-disservice/38e815a-009e3ab12e4',
'info_dict': {
- 'id': '38e815a-009e3ab12e4',
+ 'id': '9673749a-5e77-484c-8b62-a1092a6b5168',
'ext': 'mp4',
'title': 'Customer (Dis)Service',
'description': 'md5:8bdd6913a0fe03d4b2a17ebe169c7c87',
'skip_download': True,
'format': 'bestvideo',
},
- 'skip': 'Geo-restricted to Canada',
}, {
+ # geo-restricted to Canada, bypassable
'url': 'http://watch.cbc.ca/arthur/all/1ed4b385-cd84-49cf-95f0-80f004680057',
'info_dict': {
'id': '1ed4b385-cd84-49cf-95f0-80f004680057',
'description': 'Arthur, the sweetest 8-year-old aardvark, and his pals solve all kinds of problems with humour, kindness and teamwork.',
},
'playlist_mincount': 30,
- 'skip': 'Geo-restricted to Canada',
}]
def _real_extract(self, url):
'only_matching': True,
}]
- def _extract_video_info(self, content_id):
+ def _extract_video_info(self, content_id, site='cbs', mpx_acc=2198311517):
items_data = self._download_xml(
'http://can.cbs.com/thunder/player/videoPlayerService.php',
- content_id, query={'partner': 'cbs', 'contentId': content_id})
+ content_id, query={'partner': site, 'contentId': content_id})
video_data = xpath_element(items_data, './/item')
title = xpath_text(video_data, 'videoTitle', 'title', True)
- tp_path = 'dJ5BDC/media/guid/2198311517/%s' % content_id
+ tp_path = 'dJ5BDC/media/guid/%d/%s' % (mpx_acc, content_id)
tp_release_url = 'http://link.theplatform.com/s/' + tp_path
asset_types = []
import re
-from .theplatform import ThePlatformIE
+from .cbs import CBSIE
from ..utils import int_or_none
-class CBSInteractiveIE(ThePlatformIE):
- _VALID_URL = r'https?://(?:www\.)?(?P<site>cnet|zdnet)\.com/(?:videos|video/share)/(?P<id>[^/?]+)'
+class CBSInteractiveIE(CBSIE):
+ _VALID_URL = r'https?://(?:www\.)?(?P<site>cnet|zdnet)\.com/(?:videos|video(?:/share)?)/(?P<id>[^/?]+)'
_TESTS = [{
'url': 'http://www.cnet.com/videos/hands-on-with-microsofts-windows-8-1-update/',
'info_dict': {
- 'id': '56f4ea68-bd21-4852-b08c-4de5b8354c60',
- 'ext': 'flv',
+ 'id': 'R49SYt__yAfmlXR85z4f7gNmCBDcN_00',
+ 'display_id': 'hands-on-with-microsofts-windows-8-1-update',
+ 'ext': 'mp4',
'title': 'Hands-on with Microsoft Windows 8.1 Update',
'description': 'The new update to the Windows 8 OS brings improved performance for mouse and keyboard users.',
'uploader_id': '6085384d-619e-11e3-b231-14feb5ca9861',
'timestamp': 1396479627,
'upload_date': '20140402',
},
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
}, {
'url': 'http://www.cnet.com/videos/whiny-pothole-tweets-at-local-government-when-hit-by-cars-tomorrow-daily-187/',
+ 'md5': 'f11d27b2fa18597fbf92444d2a9ed386',
'info_dict': {
- 'id': '56527b93-d25d-44e3-b738-f989ce2e49ba',
- 'ext': 'flv',
+ 'id': 'kjOJd_OoVJqbg_ZD8MZCOk8Wekb9QccK',
+ 'display_id': 'whiny-pothole-tweets-at-local-government-when-hit-by-cars-tomorrow-daily-187',
+ 'ext': 'mp4',
'title': 'Whiny potholes tweet at local government when hit by cars (Tomorrow Daily 187)',
- 'description': 'Khail and Ashley wonder what other civic woes can be solved by self-tweeting objects, investigate a new kind of VR camera and watch an origami robot self-assemble, walk, climb, dig and dissolve. #TDPothole',
+ 'description': 'md5:d2b9a95a5ffe978ae6fbd4cf944d618f',
'uploader_id': 'b163284d-6b73-44fc-b3e6-3da66c392d40',
'uploader': 'Ashley Esqueda',
'duration': 1482,
}, {
'url': 'http://www.zdnet.com/video/share/video-keeping-android-smartphones-and-tablets-secure/',
'info_dict': {
- 'id': 'bc1af9f0-a2b5-4e54-880d-0d95525781c0',
+ 'id': 'k0r4T_ehht4xW_hAOqiVQPuBDPZ8SRjt',
+ 'display_id': 'video-keeping-android-smartphones-and-tablets-secure',
'ext': 'mp4',
'title': 'Video: Keeping Android smartphones and tablets secure',
'description': 'Here\'s the best way to keep Android devices secure, and what you do when they\'ve come to the end of their lives.',
'uploader_id': 'f2d97ea2-8175-11e2-9d12-0018fe8a00b0',
'uploader': 'Adrian Kingsley-Hughes',
- 'timestamp': 1448961720,
- 'upload_date': '20151201',
+ 'duration': 731,
+ 'timestamp': 1449129925,
+ 'upload_date': '20151203',
},
'params': {
# m3u8 download
'skip_download': True,
- }
+ },
+ }, {
+ 'url': 'http://www.zdnet.com/video/huawei-matebook-x-video/',
+ 'only_matching': True,
}]
- TP_RELEASE_URL_TEMPLATE = 'http://link.theplatform.com/s/kYEXFC/%s?mbr=true'
+
MPX_ACCOUNTS = {
- 'cnet': 2288573011,
+ 'cnet': 2198311517,
'zdnet': 2387448114,
}
data = self._parse_json(data_json, display_id)
vdata = data.get('video') or data['videos'][0]
- video_id = vdata['id']
+ video_id = vdata['mpxRefId']
+
title = vdata['title']
author = vdata.get('author')
if author:
uploader = None
uploader_id = None
- media_guid_path = 'media/guid/%d/%s' % (self.MPX_ACCOUNTS[site], vdata['mpxRefId'])
- formats, subtitles = [], {}
- for (fkey, vid) in vdata['files'].items():
- if fkey == 'hls_phone' and 'hls_tablet' in vdata['files']:
- continue
- release_url = self.TP_RELEASE_URL_TEMPLATE % vid
- if fkey == 'hds':
- release_url += '&manifest=f4m'
- tp_formats, tp_subtitles = self._extract_theplatform_smil(release_url, video_id, 'Downloading %s SMIL data' % fkey)
- formats.extend(tp_formats)
- subtitles = self._merge_subtitles(subtitles, tp_subtitles)
- self._sort_formats(formats)
-
- info = self._extract_theplatform_metadata('kYEXFC/%s' % media_guid_path, video_id)
+ info = self._extract_video_info(video_id, site, self.MPX_ACCOUNTS[site])
info.update({
'id': video_id,
'display_id': display_id,
'duration': int_or_none(vdata.get('duration')),
'uploader': uploader,
'uploader_id': uploader_id,
- 'subtitles': subtitles,
- 'formats': formats,
})
return info
_TESTS = [
{
- 'url': 'http://www.cbsnews.com/news/tesla-and-spacex-elon-musks-industrial-empire/',
+ # 60 minutes
+ 'url': 'http://www.cbsnews.com/news/artificial-intelligence-positioned-to-be-a-game-changer/',
'info_dict': {
- 'id': 'tesla-and-spacex-elon-musks-industrial-empire',
- 'ext': 'flv',
- 'title': 'Tesla and SpaceX: Elon Musk\'s industrial empire',
- 'thumbnail': 'http://beta.img.cbsnews.com/i/2014/03/30/60147937-2f53-4565-ad64-1bdd6eb64679/60-0330-pelley-640x360.jpg',
- 'duration': 791,
+ 'id': '_B6Ga3VJrI4iQNKsir_cdFo9Re_YJHE_',
+ 'ext': 'mp4',
+ 'title': 'Artificial Intelligence',
+ 'description': 'md5:8818145f9974431e0fb58a1b8d69613c',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 1606,
+ 'uploader': 'CBSI-NEW',
+ 'timestamp': 1498431900,
+ 'upload_date': '20170625',
},
'params': {
- # rtmp download
+ # m3u8 download
'skip_download': True,
},
- 'skip': 'Subscribers only',
},
{
'url': 'http://www.cbsnews.com/videos/fort-hood-shooting-army-downplays-mental-illness-as-cause-of-attack/',
'skip_download': True,
},
},
+ {
+ # 48 hours
+ 'url': 'http://www.cbsnews.com/news/maria-ridulph-murder-will-the-nations-oldest-cold-case-to-go-to-trial-ever-get-solved/',
+ 'info_dict': {
+ 'id': 'QpM5BJjBVEAUFi7ydR9LusS69DPLqPJ1',
+ 'ext': 'mp4',
+ 'title': 'Cold as Ice',
+ 'description': 'Can a childhood memory of a friend\'s murder solve a 1957 cold case? "48 Hours" correspondent Erin Moriarty has the latest.',
+ 'upload_date': '20170604',
+ 'timestamp': 1496538000,
+ 'uploader': 'CBSI-NEW',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
]
def _real_extract(self, url):
webpage = self._download_webpage(url, video_id)
video_info = self._parse_json(self._html_search_regex(
- r'(?:<ul class="media-list items" id="media-related-items"><li data-video-info|<div id="cbsNewsVideoPlayer" data-video-player-options)=\'({.+?})\'',
- webpage, 'video JSON info'), video_id)
+ r'(?:<ul class="media-list items" id="media-related-items"[^>]*><li data-video-info|<div id="cbsNewsVideoPlayer" data-video-player-options)=\'({.+?})\'',
+ webpage, 'video JSON info', default='{}'), video_id, fatal=False)
+
+ if video_info:
+ item = video_info['item'] if 'item' in video_info else video_info
+ else:
+ state = self._parse_json(self._search_regex(
+ r'data-cbsvideoui-options=(["\'])(?P<json>{.+?})\1', webpage,
+ 'playlist JSON info', group='json'), video_id)['state']
+ item = state['playlist'][state['pid']]
- item = video_info['item'] if 'item' in video_info else video_info
- guid = item['mpxRefId']
- return self._extract_video_info(guid)
+ return self._extract_video_info(item['mpxRefId'], 'cbsnews')
class CBSNewsLiveVideoIE(InfoExtractor):
}
def extract_format(page, version):
- json_str = self._search_regex(
+ json_str = self._html_search_regex(
r'player_data=(\\?["\'])(?P<player_data>.+?)\1', page,
'%s player_json' % version, fatal=False, group='player_data')
if not json_str:
class CharlieRoseIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?charlierose\.com/video(?:s|/player)/(?P<id>\d+)'
+ _VALID_URL = r'https?://(?:www\.)?charlierose\.com/(?:video|episode)(?:s|/player)/(?P<id>\d+)'
_TESTS = [{
'url': 'https://charlierose.com/videos/27996',
'md5': 'fda41d49e67d4ce7c2411fd2c4702e09',
}, {
'url': 'https://charlierose.com/videos/27996',
'only_matching': True,
+ }, {
+ 'url': 'https://charlierose.com/episodes/30887?autoplay=true',
+ 'only_matching': True,
}]
_PLAYER_BASE = 'https://charlierose.com/video/player/%s'
import json
from .common import InfoExtractor
+from .youtube import YoutubeIE
from ..utils import (
clean_html,
ExtractorError
# If nativePlatform is None a fallback mechanism is used (i.e. youtube embed)
if native_platform is None:
- youtube_url = self._html_search_regex(
- r'<iframe.* src="((?:https?:)?//(?:[^.]+\.)?youtube\.com/.+?)"',
- webpage, 'fallback video URL', default=None)
- if youtube_url is not None:
- return self.url_result(youtube_url, ie='Youtube')
+ youtube_url = YoutubeIE._extract_url(webpage)
+ if youtube_url:
+ return self.url_result(youtube_url, ie=YoutubeIE.ie_key())
# Non Fallback: Decide to use native source (e.g. youtube or vimeo) or
# the own CDN
class CinchcastIE(InfoExtractor):
- _VALID_URL = r'https?://player\.cinchcast\.com/.*?assetId=(?P<id>[0-9]+)'
- _TEST = {
+ _VALID_URL = r'https?://player\.cinchcast\.com/.*?(?:assetId|show_id)=(?P<id>[0-9]+)'
+ _TESTS = [{
+ 'url': 'http://player.cinchcast.com/?show_id=5258197&platformId=1&assetType=single',
+ 'info_dict': {
+ 'id': '5258197',
+ 'ext': 'mp3',
+ 'title': 'Train Your Brain to Up Your Game with Coach Mandy',
+ 'upload_date': '20130816',
+ },
+ }, {
# Actual test is run in generic, look for undergroundwellness
'url': 'http://player.cinchcast.com/?platformId=1&assetType=single&assetId=7141703',
'only_matching': True,
- }
+ }]
def _real_extract(self, url):
video_id = self._match_id(url)
--- /dev/null
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ unescapeHTML,
+)
+
+
+class CJSWIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?cjsw\.com/program/(?P<program>[^/]+)/episode/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'http://cjsw.com/program/freshly-squeezed/episode/20170620',
+ 'md5': 'cee14d40f1e9433632c56e3d14977120',
+ 'info_dict': {
+ 'id': '91d9f016-a2e7-46c5-8dcb-7cbcd7437c41',
+ 'ext': 'mp3',
+ 'title': 'Freshly Squeezed ā Episode June 20, 2017',
+ 'description': 'md5:c967d63366c3898a80d0c7b0ff337202',
+ 'series': 'Freshly Squeezed',
+ 'episode_id': '20170620',
+ },
+ }, {
+ # no description
+ 'url': 'http://cjsw.com/program/road-pops/episode/20170707/',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ program, episode_id = mobj.group('program', 'id')
+ audio_id = '%s/%s' % (program, episode_id)
+
+ webpage = self._download_webpage(url, episode_id)
+
+ title = unescapeHTML(self._search_regex(
+ (r'<h1[^>]+class=["\']episode-header__title["\'][^>]*>(?P<title>[^<]+)',
+ r'data-audio-title=(["\'])(?P<title>(?:(?!\1).)+)\1'),
+ webpage, 'title', group='title'))
+
+ audio_url = self._search_regex(
+ r'<button[^>]+data-audio-src=(["\'])(?P<url>(?:(?!\1).)+)\1',
+ webpage, 'audio url', group='url')
+
+ audio_id = self._search_regex(
+ r'/([\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})\.mp3',
+ audio_url, 'audio id', default=audio_id)
+
+ formats = [{
+ 'url': audio_url,
+ 'ext': determine_ext(audio_url, 'mp3'),
+ 'vcodec': 'none',
+ }]
+
+ description = self._html_search_regex(
+ r'<p>(?P<description>.+?)</p>', webpage, 'description',
+ default=None)
+ series = self._search_regex(
+ r'data-showname=(["\'])(?P<name>(?:(?!\1).)+)\1', webpage,
+ 'series', default=program, group='name')
+
+ return {
+ 'id': audio_id,
+ 'title': title,
+ 'description': description,
+ 'formats': formats,
+ 'series': series,
+ 'episode_id': episode_id,
+ }
+++ /dev/null
-# coding: utf-8
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-from ..utils import (
- int_or_none,
- unified_strdate,
-)
-
-
-class ClipfishIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?clipfish\.de/(?:[^/]+/)+video/(?P<id>[0-9]+)'
- _TEST = {
- 'url': 'http://www.clipfish.de/special/ugly-americans/video/4343170/s01-e01-ugly-americans-date-in-der-hoelle/',
- 'md5': 'b9a5dc46294154c1193e2d10e0c95693',
- 'info_dict': {
- 'id': '4343170',
- 'ext': 'mp4',
- 'title': 'S01 E01 - Ugly Americans - Date in der Hƶlle',
- 'description': 'Mark Lilly arbeitet im Sozialdienst der Stadt New York und soll Immigranten bei ihrer EinbĆ¼rgerung in die USA zur Seite stehen.',
- 'upload_date': '20161005',
- 'duration': 1291,
- 'view_count': int,
- }
- }
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- video_info = self._download_json(
- 'http://www.clipfish.de/devapi/id/%s?format=json&apikey=hbbtv' % video_id,
- video_id)['items'][0]
-
- formats = []
-
- m3u8_url = video_info.get('media_videourl_hls')
- if m3u8_url:
- formats.append({
- 'url': m3u8_url.replace('de.hls.fra.clipfish.de', 'hls.fra.clipfish.de'),
- 'ext': 'mp4',
- 'format_id': 'hls',
- })
-
- mp4_url = video_info.get('media_videourl')
- if mp4_url:
- formats.append({
- 'url': mp4_url,
- 'format_id': 'mp4',
- 'width': int_or_none(video_info.get('width')),
- 'height': int_or_none(video_info.get('height')),
- 'tbr': int_or_none(video_info.get('bitrate')),
- })
-
- descr = video_info.get('descr')
- if descr:
- descr = descr.strip()
-
- return {
- 'id': video_id,
- 'title': video_info['title'],
- 'description': descr,
- 'formats': formats,
- 'thumbnail': video_info.get('media_content_thumbnail_large') or video_info.get('media_thumbnail'),
- 'duration': int_or_none(video_info.get('media_length')),
- 'upload_date': unified_strdate(video_info.get('pubDate')),
- 'view_count': int_or_none(video_info.get('media_views'))
- }
--- /dev/null
+# coding: utf-8
+
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ parse_iso8601,
+ qualities,
+)
+
+import re
+
+
+class ClippitIE(InfoExtractor):
+
+ _VALID_URL = r'https?://(?:www\.)?clippituser\.tv/c/(?P<id>[a-z]+)'
+ _TEST = {
+ 'url': 'https://www.clippituser.tv/c/evmgm',
+ 'md5': '963ae7a59a2ec4572ab8bf2f2d2c5f09',
+ 'info_dict': {
+ 'id': 'evmgm',
+ 'ext': 'mp4',
+ 'title': 'Bye bye Brutus. #BattleBots - Clippit',
+ 'uploader': 'lizllove',
+ 'uploader_url': 'https://www.clippituser.tv/p/lizllove',
+ 'timestamp': 1472183818,
+ 'upload_date': '20160826',
+ 'description': 'BattleBots | ABC',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ title = self._html_search_regex(r'<title.*>(.+?)</title>', webpage, 'title')
+
+ FORMATS = ('sd', 'hd')
+ quality = qualities(FORMATS)
+ formats = []
+ for format_id in FORMATS:
+ url = self._html_search_regex(r'data-%s-file="(.+?)"' % format_id,
+ webpage, 'url', fatal=False)
+ if not url:
+ continue
+ match = re.search(r'/(?P<height>\d+)\.mp4', url)
+ formats.append({
+ 'url': url,
+ 'format_id': format_id,
+ 'quality': quality(format_id),
+ 'height': int(match.group('height')) if match else None,
+ })
+
+ uploader = self._html_search_regex(r'class="username".*>\s+(.+?)\n',
+ webpage, 'uploader', fatal=False)
+ uploader_url = ('https://www.clippituser.tv/p/' + uploader
+ if uploader else None)
+
+ timestamp = self._html_search_regex(r'datetime="(.+?)"',
+ webpage, 'date', fatal=False)
+ thumbnail = self._html_search_regex(r'data-image="(.+?)"',
+ webpage, 'thumbnail', fatal=False)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'uploader': uploader,
+ 'uploader_url': uploader_url,
+ 'timestamp': parse_iso8601(timestamp),
+ 'description': self._og_search_description(webpage),
+ 'thumbnail': thumbnail,
+ }
video_id = self._match_id(url)
webpage = self._download_webpage(
- 'http://www.cloudy.ec/embed.php?id=%s' % video_id, video_id)
+ 'https://www.cloudy.ec/embed.php', video_id, query={
+ 'id': video_id,
+ 'playerPage': 1,
+ 'autoplay': 1,
+ })
info = self._parse_html5_media_entries(url, webpage, video_id)[0]
compat_urllib_parse_urlencode,
compat_urllib_request,
compat_urlparse,
+ compat_xml_parse_error,
)
from ..downloader.f4m import remove_encrypted_media
from ..utils import (
cls._VALID_URL_RE = re.compile(cls._VALID_URL)
m = cls._VALID_URL_RE.match(url)
assert m
- return m.group('id')
+ return compat_str(m.group('id'))
@classmethod
def working(cls):
if country_code:
self._x_forwarded_for_ip = GeoUtils.random_ipv4(country_code)
if self._downloader.params.get('verbose', False):
- self._downloader.to_stdout(
+ self._downloader.to_screen(
'[debug] Using fake IP %s (%s) as X-Forwarded-For.'
% (self._x_forwarded_for_ip, country_code.upper()))
def _download_xml(self, url_or_request, video_id,
note='Downloading XML', errnote='Unable to download XML',
- transform_source=None, fatal=True, encoding=None, data=None, headers={}, query={}):
+ transform_source=None, fatal=True, encoding=None,
+ data=None, headers={}, query={}):
"""Return the xml as an xml.etree.ElementTree.Element"""
xml_string = self._download_webpage(
- url_or_request, video_id, note, errnote, fatal=fatal, encoding=encoding, data=data, headers=headers, query=query)
+ url_or_request, video_id, note, errnote, fatal=fatal,
+ encoding=encoding, data=data, headers=headers, query=query)
if xml_string is False:
return xml_string
+ return self._parse_xml(
+ xml_string, video_id, transform_source=transform_source,
+ fatal=fatal)
+
+ def _parse_xml(self, xml_string, video_id, transform_source=None, fatal=True):
if transform_source:
xml_string = transform_source(xml_string)
- return compat_etree_fromstring(xml_string.encode('utf-8'))
+ try:
+ return compat_etree_fromstring(xml_string.encode('utf-8'))
+ except compat_xml_parse_error as ve:
+ errmsg = '%s: Failed to parse XML ' % video_id
+ if fatal:
+ raise ExtractorError(errmsg, cause=ve)
+ else:
+ self.report_warning(errmsg + str(ve))
def _download_json(self, url_or_request, video_id,
note='Downloading JSON metadata',
video_info['title'] = video_title
return video_info
- def playlist_from_matches(self, matches, video_id, video_title, getter=None, ie=None):
- urlrs = orderedSet(
+ def playlist_from_matches(self, matches, playlist_id=None, playlist_title=None, getter=None, ie=None):
+ urls = orderedSet(
self.url_result(self._proto_relative_url(getter(m) if getter else m), ie)
for m in matches)
return self.playlist_result(
- urlrs, playlist_id=video_id, playlist_title=video_title)
+ urls, playlist_id=playlist_id, playlist_title=playlist_title)
@staticmethod
def playlist_result(entries, playlist_id=None, playlist_title=None, playlist_description=None):
def _family_friendly_search(self, html):
# See http://schema.org/VideoObject
- family_friendly = self._html_search_meta('isFamilyFriendly', html)
+ family_friendly = self._html_search_meta(
+ 'isFamilyFriendly', html, default=None)
if not family_friendly:
return None
item_type = e.get('@type')
if expected_type is not None and expected_type != item_type:
return info
- if item_type == 'TVEpisode':
+ if item_type in ('TVEpisode', 'Episode'):
info.update({
'episode': unescapeHTML(e.get('name')),
'episode_number': int_or_none(e.get('episodeNumber')),
'description': unescapeHTML(e.get('description')),
})
part_of_season = e.get('partOfSeason')
- if isinstance(part_of_season, dict) and part_of_season.get('@type') == 'TVSeason':
+ if isinstance(part_of_season, dict) and part_of_season.get('@type') in ('TVSeason', 'Season', 'CreativeWorkSeason'):
info['season_number'] = int_or_none(part_of_season.get('seasonNumber'))
part_of_series = e.get('partOfSeries') or e.get('partOfTVSeries')
- if isinstance(part_of_series, dict) and part_of_series.get('@type') == 'TVSeries':
+ if isinstance(part_of_series, dict) and part_of_series.get('@type') in ('TVSeries', 'Series', 'CreativeWorkSeries'):
info['series'] = unescapeHTML(part_of_series.get('name'))
elif item_type == 'Article':
info.update({
})
elif item_type == 'VideoObject':
extract_video_object(e)
- elif item_type == 'WebPage':
- video = e.get('video')
- if isinstance(video, dict) and video.get('@type') == 'VideoObject':
- extract_video_object(video)
+ continue
+ video = e.get('video')
+ if isinstance(video, dict) and video.get('@type') == 'VideoObject':
+ extract_video_object(video)
break
return dict((k, v) for k, v in info.items() if v is not None)
ms_info['timescale'] = int(timescale)
segment_duration = source.get('duration')
if segment_duration:
- ms_info['segment_duration'] = int(segment_duration)
+ ms_info['segment_duration'] = float(segment_duration)
def extract_Initialization(source):
initialization = source.find(_add_ns('Initialization'))
'Bandwidth': bandwidth,
}
+ def location_key(location):
+ return 'url' if re.match(r'^https?://', location) else 'path'
+
if 'segment_urls' not in representation_ms_info and 'media' in representation_ms_info:
media_template = prepare_template('media', ('Number', 'Bandwidth', 'Time'))
+ media_location_key = location_key(media_template)
# As per [1, 5.3.9.4.4, Table 16, page 55] $Number$ and $Time$
# can't be used at the same time
segment_duration = float_or_none(representation_ms_info['segment_duration'], representation_ms_info['timescale'])
representation_ms_info['total_number'] = int(math.ceil(float(period_duration) / segment_duration))
representation_ms_info['fragments'] = [{
- 'url': media_template % {
+ media_location_key: media_template % {
'Number': segment_number,
'Bandwidth': bandwidth,
},
'Number': segment_number,
}
representation_ms_info['fragments'].append({
- 'url': segment_url,
+ media_location_key: segment_url,
'duration': float_or_none(segment_d, representation_ms_info['timescale']),
})
for s in representation_ms_info['s']:
duration = float_or_none(s['d'], timescale)
for r in range(s.get('r', 0) + 1):
+ segment_uri = representation_ms_info['segment_urls'][segment_index]
fragments.append({
- 'url': representation_ms_info['segment_urls'][segment_index],
+ location_key(segment_uri): segment_uri,
'duration': duration,
})
segment_index += 1
# No fragments key is present in this case.
if 'fragments' in representation_ms_info:
f.update({
+ 'fragment_base_url': base_url,
'fragments': [],
'protocol': 'http_dash_segments',
})
initialization_url = representation_ms_info['initialization_url']
if not f.get('url'):
f['url'] = initialization_url
- f['fragments'].append({'url': initialization_url})
+ f['fragments'].append({location_key(initialization_url): initialization_url})
f['fragments'].extend(representation_ms_info['fragments'])
- for fragment in f['fragments']:
- fragment['url'] = urljoin(base_url, fragment['url'])
try:
existing_format = next(
fo for fo in formats
return f
return {}
- def _media_formats(src, cur_media_type):
+ def _media_formats(src, cur_media_type, type_info={}):
full_url = absolute_url(src)
- ext = determine_ext(full_url)
+ ext = type_info.get('ext') or determine_ext(full_url)
if ext == 'm3u8':
is_plain_url = False
formats = self._extract_m3u8_formats(
full_url, video_id, ext='mp4',
entry_protocol=m3u8_entry_protocol, m3u8_id=m3u8_id,
- preference=preference)
+ preference=preference, fatal=False)
elif ext == 'mpd':
is_plain_url = False
formats = self._extract_mpd_formats(
- full_url, video_id, mpd_id=mpd_id)
+ full_url, video_id, mpd_id=mpd_id, fatal=False)
else:
is_plain_url = True
formats = [{
return is_plain_url, formats
entries = []
+ # amp-video and amp-audio are very similar to their HTML5 counterparts
+ # so we wll include them right here (see
+ # https://www.ampproject.org/docs/reference/components/amp-video)
media_tags = [(media_tag, media_type, '')
for media_tag, media_type
- in re.findall(r'(?s)(<(video|audio)[^>]*/>)', webpage)]
+ in re.findall(r'(?s)(<(?:amp-)?(video|audio)[^>]*/>)', webpage)]
media_tags.extend(re.findall(
# We only allow video|audio followed by a whitespace or '>'.
# Allowing more characters may end up in significant slow down (see
# https://github.com/rg3/youtube-dl/issues/11979, example URL:
# http://www.porntrex.com/maps/videositemap.xml).
- r'(?s)(<(?P<tag>video|audio)(?:\s+[^>]*)?>)(.*?)</(?P=tag)>', webpage))
+ r'(?s)(<(?P<tag>(?:amp-)?(?:video|audio))(?:\s+[^>]*)?>)(.*?)</(?P=tag)>', webpage))
for media_tag, media_type, media_content in media_tags:
media_info = {
'formats': [],
src = source_attributes.get('src')
if not src:
continue
- is_plain_url, formats = _media_formats(src, media_type)
+ f = parse_content_type(source_attributes.get('type'))
+ is_plain_url, formats = _media_formats(src, media_type, f)
if is_plain_url:
- f = parse_content_type(source_attributes.get('type'))
+ # res attribute is not standard but seen several times
+ # in the wild
+ f.update({
+ 'height': int_or_none(source_attributes.get('res')),
+ 'format_id': source_attributes.get('label'),
+ })
f.update(formats[0])
media_info['formats'].append(f)
else:
tracks = video_data.get('tracks')
if tracks and isinstance(tracks, list):
for track in tracks:
+ if not isinstance(track, dict):
+ continue
if track.get('kind') != 'captions':
continue
track_url = urljoin(base_url, track.get('file'))
urls = []
formats = []
for source in jwplayer_sources_data:
+ if not isinstance(source, dict):
+ continue
source_url = self._proto_relative_url(source.get('file'))
if not source_url:
continue
self._downloader.report_warning(msg)
return res
- def _set_cookie(self, domain, name, value, expire_time=None):
+ def _set_cookie(self, domain, name, value, expire_time=None, port=None,
+ path='/', secure=False, discard=False, rest={}, **kwargs):
cookie = compat_cookiejar.Cookie(
- 0, name, value, None, None, domain, None,
- None, '/', True, False, expire_time, '', None, None, None)
+ 0, name, value, port, port is not None, domain, True,
+ domain.startswith('.'), path, True, secure, expire_time,
+ discard, None, None, rest)
self._downloader.cookiejar.set_cookie(cookie)
def _get_cookies(self, url):
entries = [self.url_result(build_url(path), 'CondeNast') for path in paths]
return self.playlist_result(entries, playlist_title=title)
- def _extract_video_params(self, webpage):
- query = {}
- params = self._search_regex(
- r'(?s)var params = {(.+?)}[;,]', webpage, 'player params', default=None)
- if params:
- query.update({
- 'videoId': self._search_regex(r'videoId: [\'"](.+?)[\'"]', params, 'video id'),
- 'playerId': self._search_regex(r'playerId: [\'"](.+?)[\'"]', params, 'player id'),
- 'target': self._search_regex(r'target: [\'"](.+?)[\'"]', params, 'target'),
- })
+ def _extract_video_params(self, webpage, display_id):
+ query = self._parse_json(
+ self._search_regex(
+ r'(?s)var\s+params\s*=\s*({.+?})[;,]', webpage, 'player params',
+ default='{}'),
+ display_id, transform_source=js_to_json, fatal=False)
+ if query:
+ query['videoId'] = self._search_regex(
+ r'(?:data-video-id=|currentVideoId\s*=\s*)["\']([\da-f]+)',
+ webpage, 'video id', default=None)
else:
params = extract_attributes(self._search_regex(
r'(<[^>]+data-js="video-player"[^>]+>)',
video_id = params['videoId']
video_info = None
- if params.get('playerId'):
- info_page = self._download_json(
- 'http://player.cnevids.com/player/video.js',
- video_id, 'Downloading video info', fatal=False, query=params)
- if info_page:
- video_info = info_page.get('video')
- if not video_info:
- info_page = self._download_webpage(
- 'http://player.cnevids.com/player/loader.js',
- video_id, 'Downloading loader info', query=params)
- else:
+
+ # New API path
+ query = params.copy()
+ query['embedType'] = 'inline'
+ info_page = self._download_json(
+ 'http://player.cnevids.com/embed-api.json', video_id,
+ 'Downloading embed info', fatal=False, query=query)
+
+ # Old fallbacks
+ if not info_page:
+ if params.get('playerId'):
+ info_page = self._download_json(
+ 'http://player.cnevids.com/player/video.js', video_id,
+ 'Downloading video info', fatal=False, query=params)
+ if info_page:
+ video_info = info_page.get('video')
+ if not video_info:
+ info_page = self._download_webpage(
+ 'http://player.cnevids.com/player/loader.js',
+ video_id, 'Downloading loader info', query=params)
+ if not video_info:
info_page = self._download_webpage(
'https://player.cnevids.com/inline/video/%s.js' % video_id,
video_id, 'Downloading inline info', query={
if url_type == 'series':
return self._extract_series(url, webpage)
else:
- params = self._extract_video_params(webpage)
+ params = self._extract_video_params(webpage, display_id)
info = self._search_json_ld(
webpage, display_id, fatal=False)
info.update(self._extract_video(params))
class CorusIE(ThePlatformFeedIE):
- _VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:globaltv|etcanada)\.com|(?:hgtv|foodnetwork|slice)\.ca)/(?:video/|(?:[^/]+/)+(?:videos/[a-z0-9-]+-|video\.html\?.*?\bv=))(?P<id>\d+)'
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:www\.)?
+ (?P<domain>
+ (?:globaltv|etcanada)\.com|
+ (?:hgtv|foodnetwork|slice|history|showcase)\.ca
+ )
+ /(?:video/|(?:[^/]+/)+(?:videos/[a-z0-9-]+-|video\.html\?.*?\bv=))
+ (?P<id>\d+)
+ '''
_TESTS = [{
'url': 'http://www.hgtv.ca/shows/bryan-inc/videos/movie-night-popcorn-with-bryan-870923331648/',
'md5': '05dcbca777bf1e58c2acbb57168ad3a6',
}, {
'url': 'http://etcanada.com/video/873675331955/meet-the-survivor-game-changers-castaways-part-2/',
'only_matching': True,
+ }, {
+ 'url': 'http://www.history.ca/the-world-without-canada/video/full-episodes/natural-resources/video.html?v=955054659646#video',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.showcase.ca/eyewitness/video/eyewitness++106/video.html?v=955070531919&p=1&s=da#video',
+ 'only_matching': True,
}]
_TP_FEEDS = {
'feed_id': '5tUJLgV2YNJ5',
'account_id': 2414427935,
},
+ 'history': {
+ 'feed_id': 'tQFx_TyyEq4J',
+ 'account_id': 2369613659,
+ },
+ 'showcase': {
+ 'feed_id': '9H6qyshBZU3E',
+ 'account_id': 2414426607,
+ },
}
def _real_extract(self, url):
import re
from .common import InfoExtractor
+from .youtube import YoutubeIE
from ..utils import (
parse_iso8601,
str_to_int,
webpage = self._download_webpage(url, video_id)
- youtube_url = self._search_regex(
- r'<iframe[^>]+src="((?:https?:)?//www\.youtube\.com/embed/[^"]+)"',
- webpage, 'youtube url', default=None)
+ youtube_url = YoutubeIE._extract_url(webpage)
if youtube_url:
- return self.url_result(youtube_url, 'Youtube')
+ return self.url_result(youtube_url, ie=YoutubeIE.ie_key())
video_url = self._html_search_regex(
[r'var\s+CK_vidSrc\s*=\s*"([^"]+)"', r'<video\s+src="([^"]+)"'],
# webpage provide more accurate data than series_title from XML
series = self._html_search_regex(
- r'id=["\']showmedia_about_episode_num[^>]+>\s*<a[^>]+>([^<]+)',
+ r'(?s)<h\d[^>]+\bid=["\']showmedia_about_episode_num[^>]+>(.+?)</h\d',
webpage, 'series', fatal=False)
season = xpath_text(metadata, 'series_title')
episode_number = int_or_none(xpath_text(metadata, 'episode_number'))
season_number = int_or_none(self._search_regex(
- r'(?s)<h4[^>]+id=["\']showmedia_about_episode_num[^>]+>.+?</h4>\s*<h4>\s*Season (\d+)',
+ r'(?s)<h\d[^>]+id=["\']showmedia_about_episode_num[^>]+>.+?</h\d>\s*<h4>\s*Season (\d+)',
webpage, 'season number', default=None))
return {
# coding: utf-8
from __future__ import unicode_literals
+import re
+
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
class DailyMailIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?dailymail\.co\.uk/video/[^/]+/video-(?P<id>[0-9]+)'
- _TEST = {
+ _VALID_URL = r'https?://(?:www\.)?dailymail\.co\.uk/(?:video/[^/]+/video-|embed/video/)(?P<id>[0-9]+)'
+ _TESTS = [{
'url': 'http://www.dailymail.co.uk/video/tvshowbiz/video-1295863/The-Mountain-appears-sparkling-water-ad-Heavy-Bubbles.html',
'md5': 'f6129624562251f628296c3a9ffde124',
'info_dict': {
'title': 'The Mountain appears in sparkling water ad for \'Heavy Bubbles\'',
'description': 'md5:a93d74b6da172dd5dc4d973e0b766a84',
}
- }
+ }, {
+ 'url': 'http://www.dailymail.co.uk/embed/video/1295863.html',
+ 'only_matching': True,
+ }]
+
+ @staticmethod
+ def _extract_urls(webpage):
+ return re.findall(
+ r'<iframe\b[^>]+\bsrc=["\'](?P<url>(?:https?:)?//(?:www\.)?dailymail\.co\.uk/embed/video/\d+\.html)',
+ webpage)
def _real_extract(self, url):
video_id = self._match_id(url)
view_count_str = self._search_regex(
(r'<meta[^>]+itemprop="interactionCount"[^>]+content="UserPlays:([\s\d,.]+)"',
r'video_views_count[^>]+>\s+([\s\d\,.]+)'),
- webpage, 'view count', fatal=False)
+ webpage, 'view count', default=None)
if view_count_str:
view_count_str = re.sub(r'\s', '', view_count_str)
view_count = str_to_int(view_count_str)
[r'buildPlayer\(({.+?})\);\n', # See https://github.com/rg3/youtube-dl/issues/7826
r'playerV5\s*=\s*dmp\.create\([^,]+?,\s*({.+?})\);',
r'buildPlayer\(({.+?})\);',
- r'var\s+config\s*=\s*({.+?});'],
+ r'var\s+config\s*=\s*({.+?});',
+ # New layout regex (see https://github.com/rg3/youtube-dl/issues/13580)
+ r'__PLAYER_CONFIG__\s*=\s*({.+?});'],
webpage, 'player v5', default=None)
if player_v5:
player = self._parse_json(player_v5, video_id)
class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
IE_NAME = 'dailymotion:playlist'
- _VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P<id>.+?)/'
+ _VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P<id>[^/?#&]+)'
_MORE_PAGES_INDICATOR = r'(?s)<div class="pages[^"]*">.*?<a\s+class="[^"]*?icon-arrow_right[^"]*?"'
_PAGE_TEMPLATE = 'https://www.dailymotion.com/playlist/%s/%s'
_TESTS = [{
class DisneyIE(InfoExtractor):
_VALID_URL = r'''(?x)
- https?://(?P<domain>(?:[^/]+\.)?(?:disney\.[a-z]{2,3}(?:\.[a-z]{2})?|disney(?:(?:me|latino)\.com|turkiye\.com\.tr)|(?:starwars|marvelkids)\.com))/(?:(?:embed/|(?:[^/]+/)+[\w-]+-)(?P<id>[a-z0-9]{24})|(?:[^/]+/)?(?P<display_id>[^/?#]+))'''
+ https?://(?P<domain>(?:[^/]+\.)?(?:disney\.[a-z]{2,3}(?:\.[a-z]{2})?|disney(?:(?:me|latino)\.com|turkiye\.com\.tr|channel\.de)|(?:starwars|marvelkids)\.com))/(?:(?:embed/|(?:[^/]+/)+[\w-]+-)(?P<id>[a-z0-9]{24})|(?:[^/]+/)?(?P<display_id>[^/?#]+))'''
_TESTS = [{
# Disney.EmbedVideo
'url': 'http://video.disney.com/watch/moana-trailer-545ed1857afee5a0ec239977',
}, {
'url': 'http://disneyjunior.en.disneyme.com/dj/watch-my-friends-tigger-and-pooh-promo',
'only_matching': True,
+ }, {
+ 'url': 'http://disneychannel.de/sehen/soy-luna-folge-118-5518518987ba27f3cc729268',
+ 'only_matching': True,
}, {
'url': 'http://disneyjunior.disney.com/galactech-the-galactech-grab-galactech-an-admiral-rescue',
'only_matching': True,
class DigitallySpeakingIE(InfoExtractor):
- _VALID_URL = r'https?://(?:evt\.dispeak|events\.digitallyspeaking)\.com/(?:[^/]+/)+xml/(?P<id>[^.]+)\.xml'
+ _VALID_URL = r'https?://(?:s?evt\.dispeak|events\.digitallyspeaking)\.com/(?:[^/]+/)+xml/(?P<id>[^.]+)\.xml'
_TESTS = [{
# From http://gdcvault.com/play/1023460/Tenacious-Design-and-The-Interface
# From http://www.gdcvault.com/play/1014631/Classic-Game-Postmortem-PAC
'url': 'http://events.digitallyspeaking.com/gdc/sf11/xml/12396_1299111843500GMPX.xml',
'only_matching': True,
+ }, {
+ # From http://www.gdcvault.com/play/1013700/Advanced-Material
+ 'url': 'http://sevt.dispeak.com/ubm/gdc/eur10/xml/11256_1282118587281VNIT.xml',
+ 'only_matching': True,
}]
def _parse_mp4(self, metadata):
from .common import InfoExtractor
from ..compat import (
- compat_urlparse,
compat_HTTPError,
+ compat_str,
+ compat_urlparse,
)
from ..utils import (
- USER_AGENTS,
ExtractorError,
int_or_none,
- unified_strdate,
remove_end,
+ try_get,
+ unified_strdate,
update_url_query,
+ USER_AGENTS,
)
webpage = self._download_webpage(url, display_id)
- info_url = self._search_regex(
- r'url\s*:\s*["\']((?:https?:)?//[^/]+/playback/videoPlaybackInfo/\d+)',
- webpage, 'video id')
-
title = remove_end(self._og_search_title(webpage), ' | Dplay')
- try:
- info = self._download_json(
- info_url, display_id, headers={
- 'Authorization': 'Bearer %s' % self._get_cookies(url).get(
- 'dplayit_token').value,
- 'Referer': url,
- })
- except ExtractorError as e:
- if isinstance(e.cause, compat_HTTPError) and e.cause.code in (400, 403):
- info = self._parse_json(e.cause.read().decode('utf-8'), display_id)
- error = info['errors'][0]
- if error.get('code') == 'access.denied.geoblocked':
- self.raise_geo_restricted(
- msg=error.get('detail'), countries=self._GEO_COUNTRIES)
- raise ExtractorError(info['errors'][0]['detail'], expected=True)
- raise
+ video_id = None
+
+ info = self._search_regex(
+ r'playback_json\s*:\s*JSON\.parse\s*\(\s*("(?:\\.|[^"\\])+?")',
+ webpage, 'playback JSON', default=None)
+ if info:
+ for _ in range(2):
+ info = self._parse_json(info, display_id, fatal=False)
+ if not info:
+ break
+ else:
+ video_id = try_get(info, lambda x: x['data']['id'])
+
+ if not info:
+ info_url = self._search_regex(
+ r'url\s*[:=]\s*["\']((?:https?:)?//[^/]+/playback/videoPlaybackInfo/\d+)',
+ webpage, 'info url')
+
+ video_id = info_url.rpartition('/')[-1]
+
+ try:
+ info = self._download_json(
+ info_url, display_id, headers={
+ 'Authorization': 'Bearer %s' % self._get_cookies(url).get(
+ 'dplayit_token').value,
+ 'Referer': url,
+ })
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code in (400, 403):
+ info = self._parse_json(e.cause.read().decode('utf-8'), display_id)
+ error = info['errors'][0]
+ if error.get('code') == 'access.denied.geoblocked':
+ self.raise_geo_restricted(
+ msg=error.get('detail'), countries=self._GEO_COUNTRIES)
+ raise ExtractorError(info['errors'][0]['detail'], expected=True)
+ raise
hls_url = info['data']['attributes']['streaming']['hls']['url']
season_number = episode_number = upload_date = None
return {
- 'id': info_url.rpartition('/')[-1],
+ 'id': compat_str(video_id or display_id),
'display_id': display_id,
'title': title,
'description': self._og_search_description(webpage),
ExtractorError,
clean_html,
int_or_none,
+ remove_end,
sanitized_Request,
urlencode_postdata
)
'url': 'http://www.dramafever.com/drama/4512/1/Cooking_with_Shin/',
'info_dict': {
'id': '4512.1',
- 'ext': 'mp4',
- 'title': 'Cooking with Shin 4512.1',
+ 'ext': 'flv',
+ 'title': 'Cooking with Shin',
'description': 'md5:a8eec7942e1664a6896fcd5e1287bfd0',
'episode': 'Episode 1',
'episode_number': 1,
'thumbnail': r're:^https?://.*\.jpg',
'timestamp': 1404336058,
'upload_date': '20140702',
- 'duration': 343,
+ 'duration': 344,
},
'params': {
# m3u8 download
'url': 'http://www.dramafever.com/drama/4826/4/Mnet_Asian_Music_Awards_2015/?ap=1',
'info_dict': {
'id': '4826.4',
- 'ext': 'mp4',
- 'title': 'Mnet Asian Music Awards 2015 4826.4',
+ 'ext': 'flv',
+ 'title': 'Mnet Asian Music Awards 2015',
'description': 'md5:3ff2ee8fedaef86e076791c909cf2e91',
'episode': 'Mnet Asian Music Awards 2015 - Part 3',
'episode_number': 4,
'thumbnail': r're:^https?://.*\.jpg',
'timestamp': 1450213200,
'upload_date': '20151215',
- 'duration': 5602,
+ 'duration': 5359,
},
'params': {
# m3u8 download
countries=self._GEO_COUNTRIES)
raise
+ # title is postfixed with video id for some reason, removing
+ if info.get('title'):
+ info['title'] = remove_end(info['title'], video_id).strip()
+
series_id, episode_number = video_id.split('.')
episode_info = self._download_json(
# We only need a single episode info, so restricting page size to one episode
from __future__ import unicode_literals
-import json
import re
from .common import InfoExtractor
from ..utils import (
- int_or_none,
- parse_iso8601,
+ js_to_json,
+ parse_duration,
+ unescapeHTML,
)
class DRBonanzaIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?dr\.dk/bonanza/(?:[^/]+/)+(?:[^/])+?(?:assetId=(?P<id>\d+))?(?:[#&]|$)'
-
- _TESTS = [{
- 'url': 'http://www.dr.dk/bonanza/serie/portraetter/Talkshowet.htm?assetId=65517',
+ _VALID_URL = r'https?://(?:www\.)?dr\.dk/bonanza/[^/]+/\d+/[^/]+/(?P<id>\d+)/(?P<display_id>[^/?#&]+)'
+ _TEST = {
+ 'url': 'http://www.dr.dk/bonanza/serie/154/matador/40312/matador---0824-komme-fremmede-',
'info_dict': {
- 'id': '65517',
+ 'id': '40312',
+ 'display_id': 'matador---0824-komme-fremmede-',
'ext': 'mp4',
- 'title': 'Talkshowet - Leonard Cohen',
- 'description': 'md5:8f34194fb30cd8c8a30ad8b27b70c0ca',
- 'thumbnail': r're:^https?://.*\.(?:gif|jpg)$',
- 'timestamp': 1295537932,
- 'upload_date': '20110120',
- 'duration': 3664,
- },
- 'params': {
- 'skip_download': True, # requires rtmp
- },
- }, {
- 'url': 'http://www.dr.dk/bonanza/radio/serie/sport/fodbold.htm?assetId=59410',
- 'md5': '6dfe039417e76795fb783c52da3de11d',
- 'info_dict': {
- 'id': '59410',
- 'ext': 'mp3',
- 'title': 'EM fodbold 1992 Danmark - Tyskland finale Transmission',
- 'description': 'md5:501e5a195749480552e214fbbed16c4e',
+ 'title': 'MATADOR - 08:24. "Komme fremmede".',
+ 'description': 'md5:77b4c1ac4d4c1b9d610ab4395212ff84',
'thumbnail': r're:^https?://.*\.(?:gif|jpg)$',
- 'timestamp': 1223274900,
- 'upload_date': '20081006',
- 'duration': 7369,
+ 'duration': 4613,
},
- }]
+ }
def _real_extract(self, url):
- url_id = self._match_id(url)
- webpage = self._download_webpage(url, url_id)
-
- if url_id:
- info = json.loads(self._html_search_regex(r'({.*?%s.*})' % url_id, webpage, 'json'))
- else:
- # Just fetch the first video on that page
- info = json.loads(self._html_search_regex(r'bonanzaFunctions.newPlaylist\(({.*})\)', webpage, 'json'))
-
- asset_id = str(info['AssetId'])
- title = info['Title'].rstrip(' \'\"-,.:;!?')
- duration = int_or_none(info.get('Duration'), scale=1000)
- # First published online. "FirstPublished" contains the date for original airing.
- timestamp = parse_iso8601(
- re.sub(r'\.\d+$', '', info['Created']))
-
- def parse_filename_info(url):
- match = re.search(r'/\d+_(?P<width>\d+)x(?P<height>\d+)x(?P<bitrate>\d+)K\.(?P<ext>\w+)$', url)
- if match:
- return {
- 'width': int(match.group('width')),
- 'height': int(match.group('height')),
- 'vbr': int(match.group('bitrate')),
- 'ext': match.group('ext')
- }
- match = re.search(r'/\d+_(?P<bitrate>\d+)K\.(?P<ext>\w+)$', url)
- if match:
- return {
- 'vbr': int(match.group('bitrate')),
- 'ext': match.group(2)
- }
- return {}
+ mobj = re.match(self._VALID_URL, url)
+ video_id, display_id = mobj.group('id', 'display_id')
- video_types = ['VideoHigh', 'VideoMid', 'VideoLow']
- preferencemap = {
- 'VideoHigh': -1,
- 'VideoMid': -2,
- 'VideoLow': -3,
- 'Audio': -4,
- }
+ webpage = self._download_webpage(url, display_id)
- formats = []
- for file in info['Files']:
- if info['Type'] == 'Video':
- if file['Type'] in video_types:
- format = parse_filename_info(file['Location'])
- format.update({
- 'url': file['Location'],
- 'format_id': file['Type'].replace('Video', ''),
- 'preference': preferencemap.get(file['Type'], -10),
- })
- if format['url'].startswith('rtmp'):
- rtmp_url = format['url']
- format['rtmp_live'] = True # --resume does not work
- if '/bonanza/' in rtmp_url:
- format['play_path'] = rtmp_url.split('/bonanza/')[1]
- formats.append(format)
- elif file['Type'] == 'Thumb':
- thumbnail = file['Location']
- elif info['Type'] == 'Audio':
- if file['Type'] == 'Audio':
- format = parse_filename_info(file['Location'])
- format.update({
- 'url': file['Location'],
- 'format_id': file['Type'],
- 'vcodec': 'none',
- })
- formats.append(format)
- elif file['Type'] == 'Thumb':
- thumbnail = file['Location']
+ info = self._parse_html5_media_entries(
+ url, webpage, display_id, m3u8_id='hls',
+ m3u8_entry_protocol='m3u8_native')[0]
+ self._sort_formats(info['formats'])
- description = '%s\n%s\n%s\n' % (
- info['Description'], info['Actors'], info['Colophon'])
+ asset = self._parse_json(
+ self._search_regex(
+ r'(?s)currentAsset\s*=\s*({.+?})\s*</script', webpage, 'asset'),
+ display_id, transform_source=js_to_json)
- self._sort_formats(formats)
+ title = unescapeHTML(asset['AssetTitle']).strip()
- display_id = re.sub(r'[^\w\d-]', '', re.sub(r' ', '-', title.lower())) + '-' + asset_id
- display_id = re.sub(r'-+', '-', display_id)
+ def extract(field):
+ return self._search_regex(
+ r'<div[^>]+>\s*<p>%s:<p>\s*</div>\s*<div[^>]+>\s*<p>([^<]+)</p>' % field,
+ webpage, field, default=None)
- return {
- 'id': asset_id,
+ info.update({
+ 'id': asset.get('AssetId') or video_id,
'display_id': display_id,
'title': title,
- 'formats': formats,
- 'description': description,
- 'thumbnail': thumbnail,
- 'timestamp': timestamp,
- 'duration': duration,
- }
+ 'description': extract('Programinfo'),
+ 'duration': parse_duration(extract('Tid')),
+ 'thumbnail': asset.get('AssetImageUrl'),
+ })
+ return info
webpage = self._download_webpage(
'http://www.drtuber.com/video/%s' % video_id, display_id)
- video_url = self._html_search_regex(
- r'<source src="([^"]+)"', webpage, 'video URL')
+ video_data = self._download_json(
+ 'http://www.drtuber.com/player_config_json/', video_id, query={
+ 'vid': video_id,
+ 'embed': 0,
+ 'aid': 0,
+ 'domain_id': 0,
+ })
+
+ formats = []
+ for format_id, video_url in video_data['files'].items():
+ if video_url:
+ formats.append({
+ 'format_id': format_id,
+ 'quality': 2 if format_id == 'hq' else 1,
+ 'url': video_url
+ })
+ self._sort_formats(formats)
title = self._html_search_regex(
(r'class="title_watch"[^>]*><(?:p|h\d+)[^>]*>([^<]+)<',
return {
'id': video_id,
'display_id': display_id,
- 'url': video_url,
+ 'formats': formats,
'title': title,
'thumbnail': thumbnail,
'like_count': like_count,
if target == 'HDS':
f4m_formats = self._extract_f4m_formats(
uri + '?hdcore=3.3.0&plugin=aasp-3.3.0.99.43',
- video_id, preference, f4m_id=format_id)
+ video_id, preference, f4m_id=format_id, fatal=False)
if kind == 'AudioResource':
for f in f4m_formats:
f['vcodec'] = 'none'
elif target == 'HLS':
formats.extend(self._extract_m3u8_formats(
uri, video_id, 'mp4', entry_protocol='m3u8_native',
- preference=preference, m3u8_id=format_id))
+ preference=preference, m3u8_id=format_id,
+ fatal=False))
else:
bitrate = link.get('Bitrate')
if bitrate:
from .common import InfoExtractor
from ..utils import (
+ determine_ext,
+ ExtractorError,
+ int_or_none,
js_to_json,
+ mimetype2ext,
unescapeHTML,
- ExtractorError,
)
'id': 'dc0768de855511e49e4b0025900fea04',
'ext': 'mp4',
'title': 'Vondra o ÄeskĆ©m stoletĆ: PÅi pohledu na Havla mi bylo trapnÄ',
- }
- }, {
- 'url': 'http://video.aktualne.cz/dvtv/stropnicky-policie-vrbetice-preventivne-nekontrolovala/r~82ed4322849211e4a10c0025900fea04/',
- 'md5': '6388f1941b48537dbd28791f712af8bf',
- 'info_dict': {
- 'id': '72c02230849211e49f60002590604f2e',
- 'ext': 'mp4',
- 'title': 'StropnickĆ½: Policie VrbÄtice preventivnÄ nekontrolovala',
+ 'duration': 1484,
}
}, {
'url': 'http://video.aktualne.cz/dvtv/dvtv-16-12-2014-utok-talibanu-boj-o-kliniku-uprchlici/r~973eb3bc854e11e498be002590604f2e/',
'info_dict': {
'id': 'b0b40906854d11e4bdad0025900fea04',
'ext': 'mp4',
- 'title': 'DrtinovĆ” VeselovskĆ½ TV 16. 12. 2014: TĆ©mata dne'
+ 'title': 'DrtinovĆ” VeselovskĆ½ TV 16. 12. 2014: TĆ©mata dne',
+ 'description': 'md5:0916925dea8e30fe84222582280b47a0',
+ 'timestamp': 1418760010,
+ 'upload_date': '20141216',
}
}, {
'md5': '5f7652a08b05009c1292317b449ffea2',
'info_dict': {
'id': '420ad9ec854a11e4bdad0025900fea04',
'ext': 'mp4',
- 'title': 'Å kolnĆ masakr možnĆ” zmÄnĆ boj s Talibanem, ÅĆkĆ” novinĆ”Åka'
+ 'title': 'Å kolnĆ masakr možnĆ” zmÄnĆ boj s Talibanem, ÅĆkĆ” novinĆ”Åka',
+ 'description': 'md5:ff2f9f6de73c73d7cef4f756c1c1af42',
+ 'timestamp': 1418760010,
+ 'upload_date': '20141216',
}
}, {
'md5': '498eb9dfa97169f409126c617e2a3d64',
'info_dict': {
'id': '95d35580846a11e4b6d20025900fea04',
'ext': 'mp4',
- 'title': 'Boj o kliniku: VeÅejnĆ½ zĆ”jem, nebo prĆ”vo na majetek?'
+ 'title': 'Boj o kliniku: VeÅejnĆ½ zĆ”jem, nebo prĆ”vo na majetek?',
+ 'description': 'md5:889fe610a70fee5511dc3326a089188e',
+ 'timestamp': 1418760010,
+ 'upload_date': '20141216',
}
}, {
'md5': 'b8dc6b744844032dab6ba3781a7274b9',
'info_dict': {
'id': '6fe14d66853511e4833a0025900fea04',
'ext': 'mp4',
- 'title': 'PĆ”nek: OdmĆtĆ”nĆ syrskĆ½ch uprchlĆkÅÆ je ostudou ÄeskĆ© vlĆ”dy'
+ 'title': 'PĆ”nek: OdmĆtĆ”nĆ syrskĆ½ch uprchlĆkÅÆ je ostudou ÄeskĆ© vlĆ”dy',
+ 'description': 'md5:544f86de6d20c4815bea11bf2ac3004f',
+ 'timestamp': 1418760010,
+ 'upload_date': '20141216',
}
}],
+ }, {
+ 'url': 'https://video.aktualne.cz/dvtv/zeman-si-jen-leci-mindraky-sobotku-nenavidi-a-babis-se-mu-te/r~960cdb3a365a11e7a83b0025900fea04/',
+ 'md5': 'f8efe9656017da948369aa099788c8ea',
+ 'info_dict': {
+ 'id': '3c496fec365911e7a6500025900fea04',
+ 'ext': 'mp4',
+ 'title': 'Zeman si jen lĆ©ÄĆ mindrĆ”ky, Sobotku nenĆ”vidĆ a BabiÅ” se mu teÄ hodĆ, tvrdĆ Kmenta',
+ 'duration': 1103,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
}, {
'url': 'http://video.aktualne.cz/v-cechach-poprve-zazni-zelenkova-zrestaurovana-mse/r~45b4b00483ec11e4883b002590604f2e/',
'only_matching': True,
}]
def _parse_video_metadata(self, js, video_id):
- metadata = self._parse_json(js, video_id, transform_source=js_to_json)
+ data = self._parse_json(js, video_id, transform_source=js_to_json)
- formats = []
- for video in metadata['sources']:
- ext = video['type'][6:]
- formats.append({
- 'url': video['file'],
- 'ext': ext,
- 'format_id': '%s-%s' % (ext, video['label']),
- 'height': int(video['label'].rstrip('p')),
- 'fps': 25,
- })
+ title = unescapeHTML(data['title'])
+ formats = []
+ for video in data['sources']:
+ video_url = video.get('file')
+ if not video_url:
+ continue
+ video_type = video.get('type')
+ ext = determine_ext(video_url, mimetype2ext(video_type))
+ if video_type == 'application/vnd.apple.mpegurl' or ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ video_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False))
+ elif video_type == 'application/dash+xml' or ext == 'mpd':
+ formats.extend(self._extract_mpd_formats(
+ video_url, video_id, mpd_id='dash', fatal=False))
+ else:
+ label = video.get('label')
+ height = self._search_regex(
+ r'^(\d+)[pP]', label or '', 'height', default=None)
+ format_id = ['http']
+ for f in (ext, label):
+ if f:
+ format_id.append(f)
+ formats.append({
+ 'url': video_url,
+ 'format_id': '-'.join(format_id),
+ 'height': int_or_none(height),
+ })
self._sort_formats(formats)
return {
- 'id': metadata['mediaid'],
- 'title': unescapeHTML(metadata['title']),
- 'thumbnail': self._proto_relative_url(metadata['image'], 'http:'),
+ 'id': data.get('mediaid') or video_id,
+ 'title': title,
+ 'description': data.get('description'),
+ 'thumbnail': data.get('image'),
+ 'duration': int_or_none(data.get('duration')),
+ 'timestamp': int_or_none(data.get('pubtime')),
'formats': formats
}
# single video
item = self._search_regex(
- r"(?s)embedData[0-9a-f]{32}\['asset'\]\s*=\s*(\{.+?\});",
+ r'(?s)embedData[0-9a-f]{32}\[["\']asset["\']\]\s*=\s*(\{.+?\});',
webpage, 'video', default=None, fatal=False)
if item:
items = re.findall(
r"(?s)BBX\.context\.assets\['[0-9a-f]{32}'\]\.push\(({.+?})\);",
webpage)
+ if not items:
+ items = re.findall(r'(?s)var\s+asset\s*=\s*({.+?});\n', webpage)
if items:
return {
from ..utils import (
ExtractorError,
int_or_none,
+ unsmuggle_url,
)
'view_count': int,
},
'skip': 'Georestricted',
+ }, {
+ # referrer protected video (https://tvrain.ru/lite/teleshow/kak_vse_nachinalos/namin-418921/)
+ 'url': 'eagleplatform:tvrainru.media.eagleplatform.com:582306',
+ 'only_matching': True,
}]
@staticmethod
webpage)
if mobj is not None:
return mobj.group('url')
- # Basic usage embedding (see http://dultonmedia.github.io/eplayer/)
+ PLAYER_JS_RE = r'''
+ <script[^>]+
+ src=(?P<qjs>["\'])(?:https?:)?//(?P<host>(?:(?!(?P=qjs)).)+\.media\.eagleplatform\.com)/player/player\.js(?P=qjs)
+ .+?
+ '''
+ # "Basic usage" embedding (see http://dultonmedia.github.io/eplayer/)
mobj = re.search(
r'''(?xs)
- <script[^>]+
- src=(?P<q1>["\'])(?:https?:)?//(?P<host>.+?\.media\.eagleplatform\.com)/player/player\.js(?P=q1)
- .+?
+ %s
<div[^>]+
- class=(?P<q2>["\'])eagleplayer(?P=q2)[^>]+
+ class=(?P<qclass>["\'])eagleplayer(?P=qclass)[^>]+
data-id=["\'](?P<id>\d+)
- ''', webpage)
+ ''' % PLAYER_JS_RE, webpage)
+ if mobj is not None:
+ return 'eagleplatform:%(host)s:%(id)s' % mobj.groupdict()
+ # Generalization of "Javascript code usage", "Combined usage" and
+ # "Usage without attaching to DOM" embeddings (see
+ # http://dultonmedia.github.io/eplayer/)
+ mobj = re.search(
+ r'''(?xs)
+ %s
+ <script>
+ .+?
+ new\s+EaglePlayer\(
+ (?:[^,]+\s*,\s*)?
+ {
+ .+?
+ \bid\s*:\s*["\']?(?P<id>\d+)
+ .+?
+ }
+ \s*\)
+ .+?
+ </script>
+ ''' % PLAYER_JS_RE, webpage)
if mobj is not None:
return 'eagleplatform:%(host)s:%(id)s' % mobj.groupdict()
if status != 200:
raise ExtractorError(' '.join(response['errors']), expected=True)
- def _download_json(self, url_or_request, video_id, note='Downloading JSON metadata', *args, **kwargs):
+ def _download_json(self, url_or_request, video_id, *args, **kwargs):
try:
- response = super(EaglePlatformIE, self)._download_json(url_or_request, video_id, note)
+ response = super(EaglePlatformIE, self)._download_json(
+ url_or_request, video_id, *args, **kwargs)
except ExtractorError as ee:
if isinstance(ee.cause, compat_HTTPError):
response = self._parse_json(ee.cause.read().decode('utf-8'), video_id)
return self._download_json(url_or_request, video_id, note)['data'][0]
def _real_extract(self, url):
+ url, smuggled_data = unsmuggle_url(url, {})
+
mobj = re.match(self._VALID_URL, url)
host, video_id = mobj.group('custom_host') or mobj.group('host'), mobj.group('id')
+ headers = {}
+ query = {
+ 'id': video_id,
+ }
+
+ referrer = smuggled_data.get('referrer')
+ if referrer:
+ headers['Referer'] = referrer
+ query['referrer'] = referrer
+
player_data = self._download_json(
- 'http://%s/api/player_data?id=%s' % (host, video_id), video_id)
+ 'http://%s/api/player_data' % host, video_id,
+ headers=headers, query=query)
media = player_data['data']['playlist']['viewports'][0]['medialist'][0]
# coding: utf-8
from __future__ import unicode_literals
-import re
-
from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ try_get,
+ unified_timestamp,
+)
class EggheadCourseIE(InfoExtractor):
IE_DESC = 'egghead.io course'
IE_NAME = 'egghead:course'
- _VALID_URL = r'https://egghead\.io/courses/(?P<id>[a-zA-Z_0-9-]+)'
+ _VALID_URL = r'https://egghead\.io/courses/(?P<id>[^/?#&]+)'
_TEST = {
'url': 'https://egghead.io/courses/professor-frisby-introduces-composable-functional-javascript',
'playlist_count': 29,
def _real_extract(self, url):
playlist_id = self._match_id(url)
- webpage = self._download_webpage(url, playlist_id)
- title = self._html_search_regex(r'<h1 class="title">([^<]+)</h1>', webpage, 'title')
- ul = self._search_regex(r'(?s)<ul class="series-lessons-list">(.*?)</ul>', webpage, 'session list')
+ course = self._download_json(
+ 'https://egghead.io/api/v1/series/%s' % playlist_id, playlist_id)
+
+ entries = [
+ self.url_result(
+ 'wistia:%s' % lesson['wistia_id'], ie='Wistia',
+ video_id=lesson['wistia_id'], video_title=lesson.get('title'))
+ for lesson in course['lessons'] if lesson.get('wistia_id')]
+
+ return self.playlist_result(
+ entries, playlist_id, course.get('title'),
+ course.get('description'))
+
+
+class EggheadLessonIE(InfoExtractor):
+ IE_DESC = 'egghead.io lesson'
+ IE_NAME = 'egghead:lesson'
+ _VALID_URL = r'https://egghead\.io/lessons/(?P<id>[^/?#&]+)'
+ _TEST = {
+ 'url': 'https://egghead.io/lessons/javascript-linear-data-flow-with-container-style-types-box',
+ 'info_dict': {
+ 'id': 'fv5yotjxcg',
+ 'ext': 'mp4',
+ 'title': 'Create linear data flow with container style types (Box)',
+ 'description': 'md5:9aa2cdb6f9878ed4c39ec09e85a8150e',
+ 'thumbnail': r're:^https?:.*\.jpg$',
+ 'timestamp': 1481296768,
+ 'upload_date': '20161209',
+ 'duration': 304,
+ 'view_count': 0,
+ 'tags': ['javascript', 'free'],
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }
+
+ def _real_extract(self, url):
+ lesson_id = self._match_id(url)
- found = re.findall(r'(?s)<a class="[^"]*"\s*href="([^"]+)">\s*<li class="item', ul)
- entries = [self.url_result(m) for m in found]
+ lesson = self._download_json(
+ 'https://egghead.io/api/v1/lessons/%s' % lesson_id, lesson_id)
return {
- '_type': 'playlist',
- 'id': playlist_id,
- 'title': title,
- 'description': self._og_search_description(webpage),
- 'entries': entries,
+ '_type': 'url_transparent',
+ 'ie_key': 'Wistia',
+ 'url': 'wistia:%s' % lesson['wistia_id'],
+ 'id': lesson['wistia_id'],
+ 'title': lesson.get('title'),
+ 'description': lesson.get('summary'),
+ 'thumbnail': lesson.get('thumb_nail'),
+ 'timestamp': unified_timestamp(lesson.get('published_at')),
+ 'duration': int_or_none(lesson.get('duration')),
+ 'view_count': int_or_none(lesson.get('plays_count')),
+ 'tags': try_get(lesson, lambda x: x['tag_list'], list),
}
class ESPNIE(InfoExtractor):
- _VALID_URL = r'https?://(?:espn\.go|(?:www\.)?espn)\.com/video/clip(?:\?.*?\bid=|/_/id/)(?P<id>\d+)'
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ (?:(?:\w+\.)+)?espn\.go|
+ (?:www\.)?espn
+ )\.com/
+ (?:
+ (?:
+ video/clip|
+ watch/player
+ )
+ (?:
+ \?.*?\bid=|
+ /_/id/
+ )
+ )
+ (?P<id>\d+)
+ '''
+
_TESTS = [{
'url': 'http://espn.go.com/video/clip?id=10365079',
'info_dict': {
'skip_download': True,
},
}, {
- # intl video, from http://www.espnfc.us/video/mls-highlights/150/video/2743663/must-see-moments-best-of-the-mls-season
- 'url': 'http://espn.go.com/video/clip?id=2743663',
+ 'url': 'https://broadband.espn.go.com/video/clip?id=18910086',
'info_dict': {
- 'id': '2743663',
+ 'id': '18910086',
'ext': 'mp4',
- 'title': 'Must-See Moments: Best of the MLS season',
- 'description': 'md5:4c2d7232beaea572632bec41004f0aeb',
- 'timestamp': 1449446454,
- 'upload_date': '20151207',
+ 'title': 'Kyrie spins around defender for two',
+ 'description': 'md5:2b0f5bae9616d26fba8808350f0d2b9b',
+ 'timestamp': 1489539155,
+ 'upload_date': '20170315',
},
'params': {
'skip_download': True,
},
'expected_warnings': ['Unable to download f4m manifest'],
+ }, {
+ 'url': 'http://nonredline.sports.espn.go.com/video/clip?id=19744672',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://cdn.espn.go.com/video/clip/_/id/19771774',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.espn.com/watch/player?id=19141491',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.espn.com/watch/player?bucketId=257&id=19505875',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.espn.com/watch/player/_/id/19141491',
+ 'only_matching': True,
}, {
'url': 'http://www.espn.com/video/clip?id=10365079',
'only_matching': True,
from .aljazeera import AlJazeeraIE
from .alphaporno import AlphaPornoIE
from .amcnetworks import AMCNetworksIE
+from .americastestkitchen import AmericasTestKitchenIE
from .animeondemand import AnimeOnDemandIE
from .anitube import AnitubeIE
from .anvato import AnvatoIE
from .anysex import AnySexIE
from .aol import AolIE
from .allocine import AllocineIE
+from .aliexpress import AliExpressLiveIE
from .aparat import AparatIE
from .appleconnect import AppleConnectIE
from .appletrailers import (
TheOperaPlatformIE,
ArteTVPlaylistIE,
)
+from .asiancrush import (
+ AsianCrushIE,
+ AsianCrushPlaylistIE,
+)
from .atresplayer import AtresPlayerIE
from .atttechchannel import ATTTechChannelIE
from .atvat import ATVAtIE
)
from .baidu import BaiduVideoIE
from .bambuser import BambuserIE, BambuserChannelIE
-from .bandcamp import BandcampIE, BandcampAlbumIE
+from .bandcamp import BandcampIE, BandcampAlbumIE, BandcampWeeklyIE
from .bbc import (
BBCCoUkIE,
BBCCoUkArticleIE,
BBCCoUkPlaylistIE,
BBCIE,
)
-from .beampro import BeamProLiveIE
+from .beampro import (
+ BeamProLiveIE,
+ BeamProVodIE,
+)
from .beeg import BeegIE
from .behindkink import BehindKinkIE
from .bellmedia import BellMediaIE
ChirbitProfileIE,
)
from .cinchcast import CinchcastIE
-from .clipfish import ClipfishIE
+from .cjsw import CJSWIE
from .cliphunter import CliphunterIE
+from .clippit import ClippitIE
from .cliprs import ClipRsIE
from .clipsyndicate import ClipsyndicateIE
from .closertotruth import CloserToTruthIE
from .eagleplatform import EaglePlatformIE
from .ebaumsworld import EbaumsWorldIE
from .echomsk import EchoMskIE
-from .egghead import EggheadCourseIE
+from .egghead import (
+ EggheadCourseIE,
+ EggheadLessonIE,
+)
from .ehow import EHowIE
from .eighttracks import EightTracksIE
from .einthusan import EinthusanIE
from .folketinget import FolketingetIE
from .footyroom import FootyRoomIE
from .formula1 import Formula1IE
-from .fourtube import FourTubeIE
+from .fourtube import (
+ FourTubeIE,
+ PornTubeIE,
+ PornerBrosIE,
+ FuxIE,
+)
from .fox import FOXIE
from .fox9 import FOX9IE
from .foxgay import FoxgayIE
from .go import GoIE
from .go90 import Go90IE
from .godtube import GodTubeIE
-from .godtv import GodTVIE
from .golem import GolemIE
from .googledrive import GoogleDriveIE
from .googleplus import GooglePlusIE
)
from .jeuxvideo import JeuxVideoIE
from .jove import JoveIE
+from .joj import JojIE
from .jwplatform import JWPlatformIE
from .jpopsukitv import JpopsukiIE
+from .kakao import KakaoIE
from .kaltura import KalturaIE
from .kamcord import KamcordIE
from .kanalplay import KanalPlayIE
from .laola1tv import (
Laola1TvEmbedIE,
Laola1TvIE,
+ ITTFIE,
)
from .lci import LCIIE
from .lcp import (
LimelightChannelListIE,
)
from .litv import LiTVIE
-from .liveleak import LiveLeakIE
+from .liveleak import (
+ LiveLeakIE,
+ LiveLeakEmbedIE,
+)
from .livestream import (
LivestreamIE,
LivestreamOriginalIE,
MangomoloVideoIE,
MangomoloLiveIE,
)
+from .manyvids import ManyVidsIE
from .matchtv import MatchTVIE
from .mdr import MDRIE
from .mediaset import MediasetIE
from .medici import MediciIE
+from .megaphone import MegaphoneIE
from .meipai import MeipaiIE
from .melonvod import MelonVODIE
from .meta import METAIE
)
from .mlb import MLBIE
from .mnet import MnetIE
-from .mpora import MporaIE
from .moevideo import MoeVideoIE
from .mofosex import MofosexIE
from .mojvideo import MojvideoIE
NetEaseMusicProgramIE,
NetEaseMusicDjRadioIE,
)
-from .newgrounds import NewgroundsIE
+from .newgrounds import (
+ NewgroundsIE,
+ NewgroundsPlaylistIE,
+)
from .newstube import NewstubeIE
from .nextmedia import (
NextMediaIE,
AppleDailyIE,
NextTVIE,
)
+from .nexx import (
+ NexxIE,
+ NexxEmbedIE,
+)
from .nfb import NFBIE
from .nfl import NFLIE
from .nhk import NhkVodIE
NickIE,
NickDeIE,
NickNightIE,
+ NickRuIE,
)
from .niconico import NiconicoIE, NiconicoPlaylistIE
from .ninecninemedia import (
from .orf import (
ORFTVthekIE,
ORFFM4IE,
+ ORFFM4StoryIE,
ORFOE1IE,
ORFIPTVIE,
)
from .parliamentliveuk import ParliamentLiveUKIE
from .patreon import PatreonIE
from .pbs import PBSIE
+from .pearvideo import PearVideoIE
from .people import PeopleIE
from .periscope import (
PeriscopeIE,
PolskieRadioIE,
PolskieRadioCategoryIE,
)
+from .popcorntv import PopcornTVIE
from .porn91 import Porn91IE
from .porncom import PornComIE
from .pornflip import PornFlipIE
from .radiofrance import RadioFranceIE
from .rai import (
RaiPlayIE,
+ RaiPlayLiveIE,
RaiIE,
)
from .rbmaradio import RBMARadioIE
from .rds import RDSIE
from .redbulltv import RedBullTVIE
+from .reddit import (
+ RedditIE,
+ RedditRIE,
+)
from .redtube import RedTubeIE
from .regiotv import RegioTVIE
from .rentv import (
RutubeEmbedIE,
RutubeMovieIE,
RutubePersonIE,
+ RutubePlaylistIE,
)
from .rutv import RUTVIE
from .ruutu import RuutuIE
+from .ruv import RuvIE
from .sandia import SandiaIE
from .safari import (
SafariIE,
SoundcloudIE,
SoundcloudSetIE,
SoundcloudUserIE,
+ SoundcloudTrackStationIE,
SoundcloudPlaylistIE,
- SoundcloudSearchIE
+ SoundcloudSearchIE,
)
from .soundgasm import (
SoundgasmIE,
TagesschauIE,
)
from .tass import TassIE
+from .tastytrade import TastyTradeIE
from .tbs import TBSIE
from .tdslifeway import TDSLifewayIE
from .teachertube import (
)
from .teachingchannel import TeachingChannelIE
from .teamcoco import TeamcocoIE
-from .teamfourstar import TeamFourStarIE
from .techtalks import TechTalksIE
from .ted import TEDIE
from .tele13 import Tele13IE
from .trutv import TruTVIE
from .tube8 import Tube8IE
from .tubitv import TubiTvIE
-from .tudou import (
- TudouIE,
- TudouPlaylistIE,
- TudouAlbumIE,
-)
from .tumblr import TumblrIE
from .tunein import (
TuneInClipIE,
)
from .vlive import (
VLiveIE,
- VLiveChannelIE
+ VLiveChannelIE,
+ VLivePlaylistIE
)
from .vodlocker import VodlockerIE
from .vodpl import VODPlIE
from .vodplatform import VODPlatformIE
from .voicerepublic import VoiceRepublicIE
+from .voot import VootIE
from .voxmedia import VoxMediaIE
from .vporn import VpornIE
from .vrt import VRTIE
WashingtonPostArticleIE,
)
from .wat import WatIE
+from .watchbox import WatchBoxIE
from .watchindianporn import WatchIndianPornIE
from .wdr import (
WDRIE,
YahooIE,
YahooSearchIE,
)
-from .yam import YamIE
from .yandexmusic import (
YandexMusicTrackIE,
YandexMusicAlbumIE,
YandexMusicPlaylistIE,
)
+from .yandexdisk import YandexDiskIE
from .yesjapan import YesJapanIE
from .yinyuetai import YinYueTaiIE
from .ynet import YnetIE
}]
@staticmethod
- def _extract_url(webpage):
- mobj = re.search(
- r'<iframe[^>]+?src=(["\'])(?P<url>https://www\.facebook\.com/video/embed.+?)\1', webpage)
- if mobj is not None:
- return mobj.group('url')
-
+ def _extract_urls(webpage):
+ urls = []
+ for mobj in re.finditer(
+ r'<iframe[^>]+?src=(["\'])(?P<url>https?://www\.facebook\.com/(?:video/embed|plugins/video\.php).+?)\1',
+ webpage):
+ urls.append(mobj.group('url'))
# Facebook API embed
# see https://developers.facebook.com/docs/plugins/embedded-video-player
- mobj = re.search(r'''(?x)<div[^>]+
+ for mobj in re.finditer(r'''(?x)<div[^>]+
class=(?P<q1>[\'"])[^\'"]*\bfb-(?:video|post)\b[^\'"]*(?P=q1)[^>]+
- data-href=(?P<q2>[\'"])(?P<url>(?:https?:)?//(?:www\.)?facebook.com/.+?)(?P=q2)''', webpage)
- if mobj is not None:
- return mobj.group('url')
+ data-href=(?P<q2>[\'"])(?P<url>(?:https?:)?//(?:www\.)?facebook.com/.+?)(?P=q2)''', webpage):
+ urls.append(mobj.group('url'))
+ return urls
def _login(self):
(useremail, password) = self._get_login_info()
'format_id': f.get('name'),
'tbr': tbr,
'source_preference': quality(f.get('name')),
+ # quality metadata of http formats may be incorrect
+ 'preference': -1,
})
# m3u8 URL format is reverse engineered from [1] (search for
# master.m3u8). dashEdges (that is currently balancer-vod.1tv.ru)
'info_dict': {
'id': 'glavnoe',
'ext': 'mp4',
- 'title': 'ŠŃŠ¾Š³Šø Š½ŠµŠ“ŠµŠ»Šø Ń 8 ŠæŠ¾ 14 ŠøŃŠ½Ń 2015 Š³Š¾Š“Š°',
+ 'title': r're:^ŠŃŠ¾Š³Šø Š½ŠµŠ“ŠµŠ»Šø ŃĀ \d+ ŠæŠ¾Ā \d+Ā \w+Ā \d{4}Ā Š³Š¾Š“Š°$',
'thumbnail': r're:^https?://.*\.jpg$',
},
}, {
webpage = self._download_webpage(url, video_id)
video_url = self._search_regex(
- r'<a[^>]+?href="([^"]+)"[^>]+?class="videoplayer"',
+ [r'<div[^>]+?class="flowplayer[^>]+?data-href="([^"]+)"',
+ r'<a[^>]+?href="([^"]+)"[^>]+?class="videoplayer"'],
webpage, 'video url')
title = self._og_search_title(webpage, default=None) or self._search_regex(
from __future__ import unicode_literals
from .common import InfoExtractor
-from ..compat import compat_urllib_parse_urlencode
+from ..compat import (
+ compat_str,
+ compat_urllib_parse_urlencode,
+)
from ..utils import (
ExtractorError,
int_or_none,
formats = []
for stream in streams['stream']:
- stream_type = str(stream.get('type'))
+ stream_type = compat_str(stream.get('type'))
formats.append({
'format_id': stream_type,
'url': stream['_content'],
import re
from .common import InfoExtractor
+from ..compat import compat_urlparse
from ..utils import (
parse_duration,
parse_iso8601,
- sanitized_Request,
str_to_int,
)
-class FourTubeIE(InfoExtractor):
- IE_NAME = '4tube'
- _VALID_URL = r'https?://(?:www\.)?4tube\.com/videos/(?P<id>\d+)'
+class FourTubeBaseIE(InfoExtractor):
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ kind, video_id, display_id = mobj.group('kind', 'id', 'display_id')
- _TEST = {
- 'url': 'http://www.4tube.com/videos/209733/hot-babe-holly-michaels-gets-her-ass-stuffed-by-black',
- 'md5': '6516c8ac63b03de06bc8eac14362db4f',
- 'info_dict': {
- 'id': '209733',
- 'ext': 'mp4',
- 'title': 'Hot Babe Holly Michaels gets her ass stuffed by black',
- 'uploader': 'WCP Club',
- 'uploader_id': 'wcp-club',
- 'upload_date': '20131031',
- 'timestamp': 1383263892,
- 'duration': 583,
- 'view_count': int,
- 'like_count': int,
- 'categories': list,
- 'age_limit': 18,
- }
- }
+ if kind == 'm' or not display_id:
+ url = self._URL_TEMPLATE % video_id
- def _real_extract(self, url):
- video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
title = self._html_search_meta('name', webpage)
'uploadDate', webpage))
thumbnail = self._html_search_meta('thumbnailUrl', webpage)
uploader_id = self._html_search_regex(
- r'<a class="item-to-subscribe" href="[^"]+/channels/([^/"]+)" title="Go to [^"]+ page">',
+ r'<a class="item-to-subscribe" href="[^"]+/(?:channel|user)s?/([^/"]+)" title="Go to [^"]+ page">',
webpage, 'uploader id', fatal=False)
uploader = self._html_search_regex(
- r'<a class="item-to-subscribe" href="[^"]+/channels/[^/"]+" title="Go to ([^"]+) page">',
+ r'<a class="item-to-subscribe" href="[^"]+/(?:channel|user)s?/[^/"]+" title="Go to ([^"]+) page">',
webpage, 'uploader', fatal=False)
categories_html = self._search_regex(
view_count = str_to_int(self._search_regex(
r'<meta[^>]+itemprop="interactionCount"[^>]+content="UserPlays:([0-9,]+)">',
- webpage, 'view count', fatal=False))
+ webpage, 'view count', default=None))
like_count = str_to_int(self._search_regex(
r'<meta[^>]+itemprop="interactionCount"[^>]+content="UserLikes:([0-9,]+)">',
- webpage, 'like count', fatal=False))
+ webpage, 'like count', default=None))
duration = parse_duration(self._html_search_meta('duration', webpage))
media_id = self._search_regex(
media_id = params[0]
sources = ['%s' % p for p in params[2]]
- token_url = 'http://tkn.4tube.com/{0}/desktop/{1}'.format(
+ token_url = 'https://tkn.kodicdn.com/{0}/desktop/{1}'.format(
media_id, '+'.join(sources))
- headers = {
- b'Content-Type': b'application/x-www-form-urlencoded',
- b'Origin': b'http://www.4tube.com',
- }
- token_req = sanitized_Request(token_url, b'{}', headers)
- tokens = self._download_json(token_req, video_id)
+
+ parsed_url = compat_urlparse.urlparse(url)
+ tokens = self._download_json(token_url, video_id, data=b'', headers={
+ 'Origin': '%s://%s' % (parsed_url.scheme, parsed_url.hostname),
+ 'Referer': url,
+ })
formats = [{
'url': tokens[format]['token'],
'format_id': format + 'p',
'duration': duration,
'age_limit': 18,
}
+
+
+class FourTubeIE(FourTubeBaseIE):
+ IE_NAME = '4tube'
+ _VALID_URL = r'https?://(?:(?P<kind>www|m)\.)?4tube\.com/(?:videos|embed)/(?P<id>\d+)(?:/(?P<display_id>[^/?#&]+))?'
+ _URL_TEMPLATE = 'https://www.4tube.com/videos/%s/video'
+ _TESTS = [{
+ 'url': 'http://www.4tube.com/videos/209733/hot-babe-holly-michaels-gets-her-ass-stuffed-by-black',
+ 'md5': '6516c8ac63b03de06bc8eac14362db4f',
+ 'info_dict': {
+ 'id': '209733',
+ 'ext': 'mp4',
+ 'title': 'Hot Babe Holly Michaels gets her ass stuffed by black',
+ 'uploader': 'WCP Club',
+ 'uploader_id': 'wcp-club',
+ 'upload_date': '20131031',
+ 'timestamp': 1383263892,
+ 'duration': 583,
+ 'view_count': int,
+ 'like_count': int,
+ 'categories': list,
+ 'age_limit': 18,
+ },
+ }, {
+ 'url': 'http://www.4tube.com/embed/209733',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://m.4tube.com/videos/209733/hot-babe-holly-michaels-gets-her-ass-stuffed-by-black',
+ 'only_matching': True,
+ }]
+
+
+class FuxIE(FourTubeBaseIE):
+ _VALID_URL = r'https?://(?:(?P<kind>www|m)\.)?fux\.com/(?:video|embed)/(?P<id>\d+)(?:/(?P<display_id>[^/?#&]+))?'
+ _URL_TEMPLATE = 'https://www.fux.com/video/%s/video'
+ _TESTS = [{
+ 'url': 'https://www.fux.com/video/195359/awesome-fucking-kitchen-ends-cum-swallow',
+ 'info_dict': {
+ 'id': '195359',
+ 'ext': 'mp4',
+ 'title': 'Awesome fucking in the kitchen ends with cum swallow',
+ 'uploader': 'alenci2342',
+ 'uploader_id': 'alenci2342',
+ 'upload_date': '20131230',
+ 'timestamp': 1388361660,
+ 'duration': 289,
+ 'view_count': int,
+ 'like_count': int,
+ 'categories': list,
+ 'age_limit': 18,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.fux.com/embed/195359',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.fux.com/video/195359/awesome-fucking-kitchen-ends-cum-swallow',
+ 'only_matching': True,
+ }]
+
+
+class PornTubeIE(FourTubeBaseIE):
+ _VALID_URL = r'https?://(?:(?P<kind>www|m)\.)?porntube\.com/(?:videos/(?P<display_id>[^/]+)_|embed/)(?P<id>\d+)'
+ _URL_TEMPLATE = 'https://www.porntube.com/videos/video_%s'
+ _TESTS = [{
+ 'url': 'https://www.porntube.com/videos/teen-couple-doing-anal_7089759',
+ 'info_dict': {
+ 'id': '7089759',
+ 'ext': 'mp4',
+ 'title': 'Teen couple doing anal',
+ 'uploader': 'Alexy',
+ 'uploader_id': 'Alexy',
+ 'upload_date': '20150606',
+ 'timestamp': 1433595647,
+ 'duration': 5052,
+ 'view_count': int,
+ 'like_count': int,
+ 'categories': list,
+ 'age_limit': 18,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.porntube.com/embed/7089759',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://m.porntube.com/videos/teen-couple-doing-anal_7089759',
+ 'only_matching': True,
+ }]
+
+
+class PornerBrosIE(FourTubeBaseIE):
+ _VALID_URL = r'https?://(?:(?P<kind>www|m)\.)?pornerbros\.com/(?:videos/(?P<display_id>[^/]+)_|embed/)(?P<id>\d+)'
+ _URL_TEMPLATE = 'https://www.pornerbros.com/videos/video_%s'
+ _TESTS = [{
+ 'url': 'https://www.pornerbros.com/videos/skinny-brunette-takes-big-cock-down-her-anal-hole_181369',
+ 'md5': '6516c8ac63b03de06bc8eac14362db4f',
+ 'info_dict': {
+ 'id': '181369',
+ 'ext': 'mp4',
+ 'title': 'Skinny brunette takes big cock down her anal hole',
+ 'uploader': 'PornerBros HD',
+ 'uploader_id': 'pornerbros-hd',
+ 'upload_date': '20130130',
+ 'timestamp': 1359527401,
+ 'duration': 1224,
+ 'view_count': int,
+ 'categories': list,
+ 'age_limit': 18,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.pornerbros.com/embed/181369',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://m.pornerbros.com/videos/skinny-brunette-takes-big-cock-down-her-anal-hole_181369',
+ 'only_matching': True,
+ }]
from .adobepass import AdobePassIE
from ..utils import (
- smuggle_url,
- update_url_query,
+ int_or_none,
+ parse_age_limit,
+ parse_duration,
+ try_get,
+ unified_timestamp,
)
class FOXIE(AdobePassIE):
- _VALID_URL = r'https?://(?:www\.)?fox\.com/watch/(?P<id>[0-9]+)'
- _TEST = {
- 'url': 'http://www.fox.com/watch/255180355939/7684182528',
+ _VALID_URL = r'https?://(?:www\.)?fox\.com/watch/(?P<id>[\da-fA-F]+)'
+ _TESTS = [{
+ # clip
+ 'url': 'https://www.fox.com/watch/4b765a60490325103ea69888fb2bd4e8/',
'md5': 'ebd296fcc41dd4b19f8115d8461a3165',
'info_dict': {
- 'id': '255180355939',
+ 'id': '4b765a60490325103ea69888fb2bd4e8',
'ext': 'mp4',
- 'title': 'Official Trailer: Gotham',
- 'description': 'Tracing the rise of the great DC Comics Super-Villains and vigilantes, Gotham reveals an entirely new chapter that has never been told.',
- 'duration': 129,
- 'timestamp': 1400020798,
- 'upload_date': '20140513',
- 'uploader': 'NEWA-FNG-FOXCOM',
+ 'title': 'Aftermath: Bruce Wayne Develops Into The Dark Knight',
+ 'description': 'md5:549cd9c70d413adb32ce2a779b53b486',
+ 'duration': 102,
+ 'timestamp': 1504291893,
+ 'upload_date': '20170901',
+ 'creator': 'FOX',
+ 'series': 'Gotham',
},
- 'add_ie': ['ThePlatform'],
- }
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # episode, geo-restricted
+ 'url': 'https://www.fox.com/watch/087036ca7f33c8eb79b08152b4dd75c1/',
+ 'only_matching': True,
+ }, {
+ # episode, geo-restricted, tv provided required
+ 'url': 'https://www.fox.com/watch/30056b295fb57f7452aeeb4920bc3024/',
+ 'only_matching': True,
+ }]
def _real_extract(self, url):
video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
-
- settings = self._parse_json(self._search_regex(
- r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);',
- webpage, 'drupal settings'), video_id)
- fox_pdk_player = settings['fox_pdk_player']
- release_url = fox_pdk_player['release_url']
- query = {
- 'mbr': 'true',
- 'switch': 'http'
- }
- if fox_pdk_player.get('access') == 'locked':
- ap_p = settings['foxAdobePassProvider']
- rating = ap_p.get('videoRating')
- if rating == 'n/a':
- rating = None
- resource = self._get_mvpd_resource('fbc-fox', None, ap_p['videoGUID'], rating)
- query['auth'] = self._extract_mvpd_auth(url, video_id, 'fbc-fox', resource)
-
- info = self._search_json_ld(webpage, video_id, fatal=False)
- info.update({
- '_type': 'url_transparent',
- 'ie_key': 'ThePlatform',
- 'url': smuggle_url(update_url_query(release_url, query), {'force_smil_url': True}),
- 'id': video_id,
- })
- return info
+ video = self._download_json(
+ 'https://api.fox.com/fbc-content/v1_4/video/%s' % video_id,
+ video_id, headers={
+ 'apikey': 'abdcbed02c124d393b39e818a4312055',
+ 'Content-Type': 'application/json',
+ 'Referer': url,
+ })
+
+ title = video['name']
+
+ m3u8_url = self._download_json(
+ video['videoRelease']['url'], video_id)['playURL']
+
+ formats = self._extract_m3u8_formats(
+ m3u8_url, video_id, 'mp4',
+ entry_protocol='m3u8_native', m3u8_id='hls')
+ self._sort_formats(formats)
+
+ description = video.get('description')
+ duration = int_or_none(video.get('durationInSeconds')) or int_or_none(
+ video.get('duration')) or parse_duration(video.get('duration'))
+ timestamp = unified_timestamp(video.get('datePublished'))
+ age_limit = parse_age_limit(video.get('contentRating'))
+
+ data = try_get(
+ video, lambda x: x['trackingData']['properties'], dict) or {}
+
+ creator = data.get('brand') or data.get('network') or video.get('network')
+
+ series = video.get('seriesName') or data.get(
+ 'seriesName') or data.get('show')
+ season_number = int_or_none(video.get('seasonNumber'))
+ episode = video.get('name')
+ episode_number = int_or_none(video.get('episodeNumber'))
+ release_year = int_or_none(video.get('releaseYear'))
+
+ if data.get('authRequired'):
+ # TODO: AP
+ pass
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'duration': duration,
+ 'timestamp': timestamp,
+ 'age_limit': age_limit,
+ 'creator': creator,
+ 'series': series,
+ 'season_number': season_number,
+ 'episode': episode,
+ 'episode_number': episode_number,
+ 'release_year': release_year,
+ 'formats': formats,
+ }
from .common import InfoExtractor
from ..utils import (
get_element_by_id,
+ int_or_none,
remove_end,
)
formats = [{
'url': source,
- 'height': resolution,
+ 'height': int_or_none(resolution),
} for source, resolution in zip(
video_data['sources'], video_data.get('resolutions', itertools.repeat(None)))]
class FranceTVIE(FranceTVBaseInfoExtractor):
- _VALID_URL = r'https?://(?:(?:www\.)?france\.tv|mobile\.france\.tv)/(?:[^/]+/)+(?P<id>[^/]+)\.html'
+ _VALID_URL = r'https?://(?:(?:www\.)?france\.tv|mobile\.france\.tv)/(?:[^/]+/)*(?P<id>[^/]+)\.html'
_TESTS = [{
'url': 'https://www.france.tv/france-2/13h15-le-dimanche/140921-les-mysteres-de-jesus.html',
}, {
'url': 'https://mobile.france.tv/france-5/c-dans-l-air/137347-emission-du-vendredi-12-mai-2017.html',
'only_matching': True,
+ }, {
+ 'url': 'https://www.france.tv/142749-rouge-sang.html',
+ 'only_matching': True,
}]
def _real_extract(self, url):
from __future__ import unicode_literals
-import json
import re
from .common import InfoExtractor
-from ..utils import ExtractorError
+from ..utils import (
+ ExtractorError,
+ float_or_none,
+ int_or_none,
+ unified_timestamp,
+)
class FunnyOrDieIE(InfoExtractor):
'title': 'Heart-Shaped Box: Literal Video Version',
'description': 'md5:ea09a01bc9a1c46d9ab696c01747c338',
'thumbnail': r're:^http:.*\.jpg$',
+ 'uploader': 'DASjr',
+ 'timestamp': 1317904928,
+ 'upload_date': '20111006',
+ 'duration': 318.3,
},
}, {
'url': 'http://www.funnyordie.com/embed/e402820827',
'title': 'Please Use This Song (Jon Lajoie)',
'description': 'Please use this to sell something. www.jonlajoie.com',
'thumbnail': r're:^http:.*\.jpg$',
+ 'timestamp': 1398988800,
+ 'upload_date': '20140502',
},
'params': {
'skip_download': True,
'url': 'http://www.funnyordie.com%s' % src,
}]
- post_json = self._search_regex(
- r'fb_post\s*=\s*(\{.*?\});', webpage, 'post details')
- post = json.loads(post_json)
+ timestamp = unified_timestamp(self._html_search_meta(
+ 'uploadDate', webpage, 'timestamp', default=None))
+
+ uploader = self._html_search_regex(
+ r'<h\d[^>]+\bclass=["\']channel-preview-name[^>]+>(.+?)</h',
+ webpage, 'uploader', default=None)
+
+ title, description, thumbnail, duration = [None] * 4
+
+ medium = self._parse_json(
+ self._search_regex(
+ r'jsonMedium\s*=\s*({.+?});', webpage, 'JSON medium',
+ default='{}'),
+ video_id, fatal=False)
+ if medium:
+ title = medium.get('title')
+ duration = float_or_none(medium.get('duration'))
+ if not timestamp:
+ timestamp = unified_timestamp(medium.get('publishDate'))
+
+ post = self._parse_json(
+ self._search_regex(
+ r'fb_post\s*=\s*(\{.*?\});', webpage, 'post details',
+ default='{}'),
+ video_id, fatal=False)
+ if post:
+ if not title:
+ title = post.get('name')
+ description = post.get('description')
+ thumbnail = post.get('picture')
+
+ if not title:
+ title = self._og_search_title(webpage)
+ if not description:
+ description = self._og_search_description(webpage)
+ if not duration:
+ duration = int_or_none(self._html_search_meta(
+ ('video:duration', 'duration'), webpage, 'duration', default=False))
return {
'id': video_id,
- 'title': post['name'],
- 'description': post.get('description'),
- 'thumbnail': post.get('picture'),
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'uploader': uploader,
+ 'timestamp': timestamp,
+ 'duration': duration,
'formats': formats,
'subtitles': subtitles,
}
from ..utils import (
float_or_none,
int_or_none,
- js_to_json,
unified_strdate,
)
class GaskrankIE(InfoExtractor):
- """InfoExtractor for gaskrank.tv"""
- _VALID_URL = r'https?://(?:www\.)?gaskrank\.tv/tv/(?P<categories>[^/]+)/(?P<id>[^/]+)\.html?'
- _TESTS = [
- {
- 'url': 'http://www.gaskrank.tv/tv/motorrad-fun/strike-einparken-durch-anfaenger-crash-mit-groesserem-flurschaden.htm',
- 'md5': '1ae88dbac97887d85ebd1157a95fc4f9',
- 'info_dict': {
- 'id': '201601/26955',
- 'ext': 'mp4',
- 'title': 'Strike! Einparken kƶnnen nur MƤnner - Flurschaden hƤlt sich in Grenzen *lol*',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'categories': ['motorrad-fun'],
- 'display_id': 'strike-einparken-durch-anfaenger-crash-mit-groesserem-flurschaden',
- 'uploader_id': 'Bikefun',
- 'upload_date': '20170110',
- 'uploader_url': None,
- }
- },
- {
- 'url': 'http://www.gaskrank.tv/tv/racing/isle-of-man-tt-2011-michael-du-15920.htm',
- 'md5': 'c33ee32c711bc6c8224bfcbe62b23095',
- 'info_dict': {
- 'id': '201106/15920',
- 'ext': 'mp4',
- 'title': 'Isle of Man - Michael Dunlop vs Guy Martin - schwindelig kucken',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'categories': ['racing'],
- 'display_id': 'isle-of-man-tt-2011-michael-du-15920',
- 'uploader_id': 'IOM',
- 'upload_date': '20160506',
- 'uploader_url': 'www.iomtt.com',
- }
+ _VALID_URL = r'https?://(?:www\.)?gaskrank\.tv/tv/(?P<categories>[^/]+)/(?P<id>[^/]+)\.htm'
+ _TESTS = [{
+ 'url': 'http://www.gaskrank.tv/tv/motorrad-fun/strike-einparken-durch-anfaenger-crash-mit-groesserem-flurschaden.htm',
+ 'md5': '1ae88dbac97887d85ebd1157a95fc4f9',
+ 'info_dict': {
+ 'id': '201601/26955',
+ 'ext': 'mp4',
+ 'title': 'Strike! Einparken kƶnnen nur MƤnner - Flurschaden hƤlt sich in Grenzen *lol*',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'categories': ['motorrad-fun'],
+ 'display_id': 'strike-einparken-durch-anfaenger-crash-mit-groesserem-flurschaden',
+ 'uploader_id': 'Bikefun',
+ 'upload_date': '20170110',
+ 'uploader_url': None,
}
- ]
+ }, {
+ 'url': 'http://www.gaskrank.tv/tv/racing/isle-of-man-tt-2011-michael-du-15920.htm',
+ 'md5': 'c33ee32c711bc6c8224bfcbe62b23095',
+ 'info_dict': {
+ 'id': '201106/15920',
+ 'ext': 'mp4',
+ 'title': 'Isle of Man - Michael Dunlop vs Guy Martin - schwindelig kucken',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'categories': ['racing'],
+ 'display_id': 'isle-of-man-tt-2011-michael-du-15920',
+ 'uploader_id': 'IOM',
+ 'upload_date': '20170523',
+ 'uploader_url': 'www.iomtt.com',
+ }
+ }]
def _real_extract(self, url):
- """extract information from gaskrank.tv"""
- def fix_json(code):
- """Removes trailing comma in json: {{},} --> {{}}"""
- return re.sub(r',\s*}', r'}', js_to_json(code))
-
display_id = self._match_id(url)
+
webpage = self._download_webpage(url, display_id)
+
+ title = self._og_search_title(
+ webpage, default=None) or self._html_search_meta(
+ 'title', webpage, fatal=True)
+
categories = [re.match(self._VALID_URL, url).group('categories')]
- title = self._search_regex(
- r'movieName\s*:\s*\'([^\']*)\'',
- webpage, 'title')
- thumbnail = self._search_regex(
- r'poster\s*:\s*\'([^\']*)\'',
- webpage, 'thumbnail', default=None)
mobj = re.search(
r'Video von:\s*(?P<uploader_id>[^|]*?)\s*\|\s*vom:\s*(?P<upload_date>[0-9][0-9]\.[0-9][0-9]\.[0-9][0-9][0-9][0-9])',
if average_rating:
average_rating = float_or_none(average_rating.replace(',', '.'))
- playlist = self._parse_json(
- self._search_regex(
- r'playlist\s*:\s*\[([^\]]*)\]',
- webpage, 'playlist', default='{}'),
- display_id, transform_source=fix_json, fatal=False)
-
video_id = self._search_regex(
r'https?://movies\.gaskrank\.tv/([^-]*?)(-[^\.]*)?\.mp4',
- playlist.get('0').get('src'), 'video id')
-
- formats = []
- for key in playlist:
- formats.append({
- 'url': playlist[key]['src'],
- 'format_id': key,
- 'quality': playlist[key].get('quality')})
- self._sort_formats(formats, field_preference=['format_id'])
+ webpage, 'video id', default=display_id)
- return {
+ entry = self._parse_html5_media_entries(url, webpage, video_id)[0]
+ entry.update({
'id': video_id,
'title': title,
- 'formats': formats,
- 'thumbnail': thumbnail,
'categories': categories,
'display_id': display_id,
'uploader_id': uploader_id,
'tags': tags,
'view_count': view_count,
'average_rating': average_rating,
- }
+ })
+ self._sort_formats(entry['formats'])
+
+ return entry
from .youtube import YoutubeIE
from ..compat import (
compat_etree_fromstring,
+ compat_str,
compat_urllib_parse_unquote,
compat_urlparse,
compat_xml_parse_error,
BrightcoveLegacyIE,
BrightcoveNewIE,
)
+from .nexx import (
+ NexxIE,
+ NexxEmbedIE,
+)
from .nbc import NBCSportsVPlayerIE
from .ooyala import OoyalaIE
from .rutv import RUTVIE
DailymotionIE,
DailymotionCloudIE,
)
+from .dailymail import DailyMailIE
from .onionstudios import OnionStudiosIE
from .viewlift import ViewLiftEmbedIE
from .mtv import MTVServicesEmbeddedIE
from .washingtonpost import WashingtonPostIE
from .wistia import WistiaIE
from .mediaset import MediasetIE
+from .joj import JojIE
+from .megaphone import MegaphoneIE
+from .vzaar import VzaarIE
class GenericIE(InfoExtractor):
},
'skip': 'movie expired',
},
+ # ooyala video embedded with http://player.ooyala.com/static/v4/production/latest/core.min.js
+ {
+ 'url': 'http://wnep.com/2017/07/22/steampunk-fest-comes-to-honesdale/',
+ 'info_dict': {
+ 'id': 'lwYWYxYzE6V5uJMjNGyKtwwiw9ZJD7t2',
+ 'ext': 'mp4',
+ 'title': 'Steampunk Fest Comes to Honesdale',
+ 'duration': 43.276,
+ },
+ 'params': {
+ 'skip_download': True,
+ }
+ },
# embed.ly video
{
'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/',
},
'add_ie': ['Dailymotion'],
},
+ # DailyMail embed
+ {
+ 'url': 'http://www.bumm.sk/krimi/2017/07/05/biztonsagi-kamera-buktatta-le-az-agg-ferfit-utlegelo-apolot',
+ 'info_dict': {
+ 'id': '1495629',
+ 'ext': 'mp4',
+ 'title': 'Care worker punches elderly dementia patient in head 11 times',
+ 'description': 'md5:3a743dee84e57e48ec68bf67113199a5',
+ },
+ 'add_ie': ['DailyMail'],
+ 'params': {
+ 'skip_download': True,
+ },
+ },
# YouTube embed
{
'url': 'http://www.badzine.de/ansicht/datum/2014/06/09/so-funktioniert-die-neue-englische-badminton-liga.html',
},
'add_ie': ['Kaltura'],
},
- # Eagle.Platform embed (generic URL)
+ # EaglePlatform embed (generic URL)
{
'url': 'http://lenta.ru/news/2015/03/06/navalny/',
# Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used
'view_count': int,
'age_limit': 0,
},
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ # referrer protected EaglePlatform embed
+ {
+ 'url': 'https://tvrain.ru/lite/teleshow/kak_vse_nachinalos/namin-418921/',
+ 'info_dict': {
+ 'id': '582306',
+ 'ext': 'mp4',
+ 'title': 'Š”ŃŠ°Ń ŠŠ°Š¼ŠøŠ½: Ā«ŠŃ Š½Š°ŃŃŃŠøŠ»Šø Š“ŠµŠ²ŃŃŠ²ŠµŠ½Š½Š¾ŃŃŃ ŠŃŠµŠ¼Š»ŃĀ»',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 3382,
+ 'view_count': int,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
},
- # ClipYou (Eagle.Platform) embed (custom URL)
+ # ClipYou (EaglePlatform) embed (custom URL)
{
'url': 'http://muz-tv.ru/play/7129/',
# Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used
'duration': 216,
'view_count': int,
},
+ 'params': {
+ 'skip_download': True,
+ },
},
# Pladform embed
{
# LiveLeak embed
{
'url': 'http://www.wykop.pl/link/3088787/',
- 'md5': 'ace83b9ed19b21f68e1b50e844fdf95d',
+ 'md5': '7619da8c820e835bef21a1efa2a0fc71',
'info_dict': {
'id': '874_1459135191',
'ext': 'mp4',
'title': 'Man shows poor quality of new apartment building',
'description': 'The wall is like a sand pile.',
'uploader': 'Lake8737',
- }
+ },
+ 'add_ie': [LiveLeakIE.ie_key()],
+ },
+ # Another LiveLeak embed pattern (#13336)
+ {
+ 'url': 'https://milo.yiannopoulos.net/2017/06/concealed-carry-robbery/',
+ 'info_dict': {
+ 'id': '2eb_1496309988',
+ 'ext': 'mp4',
+ 'title': 'Thief robs place where everyone was armed',
+ 'description': 'md5:694d73ee79e535953cf2488562288eee',
+ 'uploader': 'brazilwtf',
+ },
+ 'add_ie': [LiveLeakIE.ie_key()],
},
# Duplicated embedded video URLs
{
},
'add_ie': ['BrightcoveLegacy'],
},
+ # Nexx embed
+ {
+ 'url': 'https://www.funk.net/serien/5940e15073f6120001657956/items/593efbb173f6120001657503',
+ 'info_dict': {
+ 'id': '247746',
+ 'ext': 'mp4',
+ 'title': "Yesterday's Jam (OV)",
+ 'description': 'md5:09bc0984723fed34e2581624a84e05f0',
+ 'timestamp': 1492594816,
+ 'upload_date': '20170419',
+ },
+ 'params': {
+ 'format': 'bestvideo',
+ 'skip_download': True,
+ },
+ },
# Facebook <iframe> embed
{
'url': 'https://www.hostblogger.de/blog/archives/6181-Auto-jagt-Betonmischer.html',
'title': 'Facebook video #599637780109885',
},
},
+ # Facebook <iframe> embed, plugin video
+ {
+ 'url': 'http://5pillarsuk.com/2017/06/07/tariq-ramadan-disagrees-with-pr-exercise-by-imams-refusing-funeral-prayers-for-london-attackers/',
+ 'info_dict': {
+ 'id': '1754168231264132',
+ 'ext': 'mp4',
+ 'title': 'About the Imams and Religious leaders refusing to perform funeral prayers for...',
+ 'uploader': 'Tariq Ramadan (official)',
+ 'timestamp': 1496758379,
+ 'upload_date': '20170606',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
# Facebook API embed
{
'url': 'http://www.lothype.com/blue-stars-2016-preview-standstill-full-show/',
},
'playlist_mincount': 5,
},
+ {
+ # Limelight embed (LimelightPlayerUtil.embed)
+ 'url': 'https://tv5.ca/videos?v=xuu8qowr291ri',
+ 'info_dict': {
+ 'id': '95d035dc5c8a401588e9c0e6bd1e9c92',
+ 'ext': 'mp4',
+ 'title': '07448641',
+ 'timestamp': 1499890639,
+ 'upload_date': '20170712',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'add_ie': ['LimelightMedia'],
+ },
{
'url': 'http://kron4.com/2017/04/28/standoff-with-walnut-creek-murder-suspect-ends-with-arrest/',
'info_dict': {
},
'add_ie': [MediasetIE.ie_key()],
},
+ {
+ # JOJ.sk embeds
+ 'url': 'https://www.noviny.sk/slovensko/238543-slovenskom-sa-prehnala-vlna-silnych-burok',
+ 'info_dict': {
+ 'id': '238543-slovenskom-sa-prehnala-vlna-silnych-burok',
+ 'title': 'Slovenskom sa prehnala vlna silnĆ½ch bĆŗrok',
+ },
+ 'playlist_mincount': 5,
+ 'add_ie': [JojIE.ie_key()],
+ },
+ {
+ # AMP embed (see https://www.ampproject.org/docs/reference/components/amp-video)
+ 'url': 'https://tvrain.ru/amp/418921/',
+ 'md5': 'cc00413936695987e8de148b67d14f1d',
+ 'info_dict': {
+ 'id': '418921',
+ 'ext': 'mp4',
+ 'title': 'Š”ŃŠ°Ń ŠŠ°Š¼ŠøŠ½: Ā«ŠŃ Š½Š°ŃŃŃŠøŠ»Šø Š“ŠµŠ²ŃŃŠ²ŠµŠ½Š½Š¾ŃŃŃ ŠŃŠµŠ¼Š»ŃĀ»',
+ },
+ },
+ {
+ # vzaar embed
+ 'url': 'http://help.vzaar.com/article/165-embedding-video',
+ 'md5': '7e3919d9d2620b89e3e00bec7fe8c9d4',
+ 'info_dict': {
+ 'id': '8707641',
+ 'ext': 'mp4',
+ 'title': 'Building A Business Online: Principal Chairs Q & A',
+ },
+ },
+ {
+ # multiple HTML5 videos on one page
+ 'url': 'https://www.paragon-software.com/home/rk-free/keyscenarios.html',
+ 'info_dict': {
+ 'id': 'keyscenarios',
+ 'title': 'Rescue Kit 14 Free Edition - Getting started',
+ },
+ 'playlist_count': 4,
+ }
# {
# # TODO: find another test
# # http://schema.org/VideoObject
if head_response is not False:
# Check for redirect
- new_url = head_response.geturl()
+ new_url = compat_str(head_response.geturl())
if url != new_url:
self.report_following_redirect(new_url)
if force_videoid:
content_type = head_response.headers.get('Content-Type', '').lower()
m = re.match(r'^(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>[^;\s]+)', content_type)
if m:
- format_id = m.group('format_id')
+ format_id = compat_str(m.group('format_id'))
if format_id.endswith('mpegurl'):
formats = self._extract_m3u8_formats(url, video_id, 'mp4')
elif format_id == 'f4m':
formats = self._extract_f4m_formats(url, video_id)
else:
formats = [{
- 'format_id': m.group('format_id'),
+ 'format_id': format_id,
'url': url,
'vcodec': 'none' if m.group('type') == 'audio' else None
}]
elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):
info_dict['formats'] = self._parse_mpd_formats(
doc, video_id,
- mpd_base_url=full_response.geturl().rpartition('/')[0],
+ mpd_base_url=compat_str(full_response.geturl()).rpartition('/')[0],
mpd_url=url)
self._sort_formats(info_dict['formats'])
return info_dict
video_description = self._og_search_description(webpage, default=None)
video_thumbnail = self._og_search_thumbnail(webpage, default=None)
+ info_dict.update({
+ 'title': video_title,
+ 'description': video_description,
+ 'thumbnail': video_thumbnail,
+ 'age_limit': age_limit,
+ })
+
# Look for Brightcove Legacy Studio embeds
bc_urls = BrightcoveLegacyIE._extract_brightcove_urls(webpage)
if bc_urls:
if bc_urls:
return self.playlist_from_matches(bc_urls, video_id, video_title, ie='BrightcoveNew')
+ # Look for Nexx embeds
+ nexx_urls = NexxIE._extract_urls(webpage)
+ if nexx_urls:
+ return self.playlist_from_matches(nexx_urls, video_id, video_title, ie=NexxIE.ie_key())
+
+ # Look for Nexx iFrame embeds
+ nexx_embed_urls = NexxEmbedIE._extract_urls(webpage)
+ if nexx_embed_urls:
+ return self.playlist_from_matches(nexx_embed_urls, video_id, video_title, ie=NexxEmbedIE.ie_key())
+
# Look for ThePlatform embeds
tp_urls = ThePlatformIE._extract_urls(webpage)
if tp_urls:
if vid_me_embed_url is not None:
return self.url_result(vid_me_embed_url, 'Vidme')
- # Look for embedded YouTube player
- matches = re.findall(r'''(?x)
- (?:
- <iframe[^>]+?src=|
- data-video-url=|
- <embed[^>]+?src=|
- embedSWF\(?:\s*|
- <object[^>]+data=|
- new\s+SWFObject\(
- )
- (["\'])
- (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
- (?:embed|v|p)/.+?)
- \1''', webpage)
- if matches:
+ # Look for YouTube embeds
+ youtube_urls = YoutubeIE._extract_urls(webpage)
+ if youtube_urls:
return self.playlist_from_matches(
- matches, video_id, video_title, lambda m: unescapeHTML(m[1]))
-
- # Look for lazyYT YouTube embed
- matches = re.findall(
- r'class="lazyYT" data-youtube-id="([^"]+)"', webpage)
- if matches:
- return self.playlist_from_matches(matches, video_id, video_title, lambda m: unescapeHTML(m))
-
- # Look for Wordpress "YouTube Video Importer" plugin
- matches = re.findall(r'''(?x)<div[^>]+
- class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
- data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
- if matches:
- return self.playlist_from_matches(matches, video_id, video_title, lambda m: m[-1])
+ youtube_urls, video_id, video_title, ie=YoutubeIE.ie_key())
matches = DailymotionIE._extract_urls(webpage)
if matches:
return self.playlist_from_matches(
playlists, video_id, video_title, lambda p: '//dailymotion.com/playlist/%s' % p)
+ # Look for DailyMail embeds
+ dailymail_urls = DailyMailIE._extract_urls(webpage)
+ if dailymail_urls:
+ return self.playlist_from_matches(
+ dailymail_urls, video_id, video_title, ie=DailyMailIE.ie_key())
+
# Look for embedded Wistia player
wistia_url = WistiaIE._extract_url(webpage)
if wistia_url:
# Look for Ooyala videos
mobj = (re.search(r'player\.ooyala\.com/[^"?]+[?#][^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
+ re.search(r'OO\.Player\.create\.apply\(\s*OO\.Player\s*,\s*op\(\s*\[\s*[\'"][^\'"]*[\'"]\s*,\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage) or
re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage))
if mobj is not None:
return self.url_result(mobj.group('url'))
# Look for embedded Facebook player
- facebook_url = FacebookIE._extract_url(webpage)
- if facebook_url is not None:
- return self.url_result(facebook_url, 'Facebook')
+ facebook_urls = FacebookIE._extract_urls(webpage)
+ if facebook_urls:
+ return self.playlist_from_matches(facebook_urls, video_id, video_title)
# Look for embedded VK player
mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1', webpage)
if kaltura_url:
return self.url_result(smuggle_url(kaltura_url, {'source_url': url}), KalturaIE.ie_key())
- # Look for Eagle.Platform embeds
+ # Look for EaglePlatform embeds
eagleplatform_url = EaglePlatformIE._extract_url(webpage)
if eagleplatform_url:
- return self.url_result(eagleplatform_url, EaglePlatformIE.ie_key())
+ return self.url_result(smuggle_url(eagleplatform_url, {'referrer': url}), EaglePlatformIE.ie_key())
- # Look for ClipYou (uses Eagle.Platform) embeds
+ # Look for ClipYou (uses EaglePlatform) embeds
mobj = re.search(
r'<iframe[^>]+src="https?://(?P<host>media\.clipyou\.ru)/index/player\?.*\brecord_id=(?P<id>\d+).*"', webpage)
if mobj is not None:
self._proto_relative_url(instagram_embed_url), InstagramIE.ie_key())
# Look for LiveLeak embeds
- liveleak_url = LiveLeakIE._extract_url(webpage)
- if liveleak_url:
- return self.url_result(liveleak_url, 'LiveLeak')
+ liveleak_urls = LiveLeakIE._extract_urls(webpage)
+ if liveleak_urls:
+ return self.playlist_from_matches(liveleak_urls, video_id, video_title)
# Look for 3Q SDN embeds
threeqsdn_url = ThreeQSDNIE._extract_url(webpage)
rutube_urls = RutubeIE._extract_urls(webpage)
if rutube_urls:
return self.playlist_from_matches(
- rutube_urls, ie=RutubeIE.ie_key())
+ rutube_urls, video_id, video_title, ie=RutubeIE.ie_key())
# Look for WashingtonPost embeds
wapo_urls = WashingtonPostIE._extract_urls(webpage)
return self.playlist_from_matches(
mediaset_urls, video_id, video_title, ie=MediasetIE.ie_key())
- # Looking for http://schema.org/VideoObject
- json_ld = self._search_json_ld(
- webpage, video_id, default={}, expected_type='VideoObject')
- if json_ld.get('url'):
- info_dict.update({
- 'title': video_title or info_dict['title'],
- 'description': video_description,
- 'thumbnail': video_thumbnail,
- 'age_limit': age_limit
- })
- info_dict.update(json_ld)
- return info_dict
+ # Look for JOJ.sk embeds
+ joj_urls = JojIE._extract_urls(webpage)
+ if joj_urls:
+ return self.playlist_from_matches(
+ joj_urls, video_id, video_title, ie=JojIE.ie_key())
+
+ # Look for megaphone.fm embeds
+ mpfn_urls = MegaphoneIE._extract_urls(webpage)
+ if mpfn_urls:
+ return self.playlist_from_matches(
+ mpfn_urls, video_id, video_title, ie=MegaphoneIE.ie_key())
+
+ # Look for vzaar embeds
+ vzaar_urls = VzaarIE._extract_urls(webpage)
+ if vzaar_urls:
+ return self.playlist_from_matches(
+ vzaar_urls, video_id, video_title, ie=VzaarIE.ie_key())
+
+ def merge_dicts(dict1, dict2):
+ merged = {}
+ for k, v in dict1.items():
+ if v is not None:
+ merged[k] = v
+ for k, v in dict2.items():
+ if v is None:
+ continue
+ if (k not in merged or
+ (isinstance(v, compat_str) and v and
+ isinstance(merged[k], compat_str) and
+ not merged[k])):
+ merged[k] = v
+ return merged
# Look for HTML5 media
entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls')
if entries:
- for entry in entries:
- entry.update({
+ if len(entries) == 1:
+ entries[0].update({
'id': video_id,
'title': video_title,
})
+ else:
+ for num, entry in enumerate(entries, start=1):
+ entry.update({
+ 'id': '%s-%s' % (video_id, num),
+ 'title': '%s (%d)' % (video_title, num),
+ })
+ for entry in entries:
self._sort_formats(entry['formats'])
- return self.playlist_result(entries)
+ return self.playlist_result(entries, video_id, video_title)
jwplayer_data = self._find_jwplayer_data(
webpage, video_id, transform_source=js_to_json)
if jwplayer_data:
info = self._parse_jwplayer_data(
jwplayer_data, video_id, require_title=False, base_url=url)
- if not info.get('title'):
- info['title'] = video_title
- return info
+ return merge_dicts(info, info_dict)
+
+ # Looking for http://schema.org/VideoObject
+ json_ld = self._search_json_ld(
+ webpage, video_id, default={}, expected_type='VideoObject')
+ if json_ld.get('url'):
+ return merge_dicts(json_ld, info_dict)
def check_video(vurl):
if YoutubeIE.suitable(vurl):
video_url = gfy.get('%sUrl' % format_id)
if not video_url:
continue
- filesize = gfy.get('%sSize' % format_id)
+ filesize = int_or_none(gfy.get('%sSize' % format_id))
formats.append({
'url': video_url,
'format_id': format_id,
from .common import InfoExtractor
from ..utils import (
- unescapeHTML,
- qualities,
+ determine_ext,
int_or_none,
+ qualities,
+ unescapeHTML,
)
_VALID_URL = r'https?://(?:www\.)?giantbomb\.com/videos/(?P<display_id>[^/]+)/(?P<id>\d+-\d+)'
_TEST = {
'url': 'http://www.giantbomb.com/videos/quick-look-destiny-the-dark-below/2300-9782/',
- 'md5': '57badeface303ecf6b98b812de1b9018',
+ 'md5': 'c8ea694254a59246a42831155dec57ac',
'info_dict': {
'id': '2300-9782',
'display_id': 'quick-look-destiny-the-dark-below',
for format_id, video_url in video['videoStreams'].items():
if format_id == 'f4m_stream':
continue
- if video_url.endswith('.f4m'):
+ ext = determine_ext(video_url)
+ if ext == 'f4m':
f4m_formats = self._extract_f4m_formats(video_url + '?hdcore=3.3.1', display_id)
if f4m_formats:
f4m_formats[0]['quality'] = quality(format_id)
formats.extend(f4m_formats)
+ elif ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ video_url, display_id, ext='mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False))
else:
formats.append({
'url': video_url,
+++ /dev/null
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-from .ooyala import OoyalaIE
-from ..utils import js_to_json
-
-
-class GodTVIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?god\.tv(?:/[^/]+)*/(?P<id>[^/?#&]+)'
- _TESTS = [{
- 'url': 'http://god.tv/jesus-image/video/jesus-conference-2016/randy-needham',
- 'info_dict': {
- 'id': 'lpd3g2MzE6D1g8zFAKz8AGpxWcpu6o_3',
- 'ext': 'mp4',
- 'title': 'Randy Needham',
- 'duration': 3615.08,
- },
- 'params': {
- 'skip_download': True,
- }
- }, {
- 'url': 'http://god.tv/playlist/bible-study',
- 'info_dict': {
- 'id': 'bible-study',
- },
- 'playlist_mincount': 37,
- }, {
- 'url': 'http://god.tv/node/15097',
- 'only_matching': True,
- }, {
- 'url': 'http://god.tv/live/africa',
- 'only_matching': True,
- }, {
- 'url': 'http://god.tv/liveevents',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- display_id = self._match_id(url)
-
- webpage = self._download_webpage(url, display_id)
-
- settings = self._parse_json(
- self._search_regex(
- r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);',
- webpage, 'settings', default='{}'),
- display_id, transform_source=js_to_json, fatal=False)
-
- ooyala_id = None
-
- if settings:
- playlist = settings.get('playlist')
- if playlist and isinstance(playlist, list):
- entries = [
- OoyalaIE._build_url_result(video['content_id'])
- for video in playlist if video.get('content_id')]
- if entries:
- return self.playlist_result(entries, display_id)
- ooyala_id = settings.get('ooyala', {}).get('content_id')
-
- if not ooyala_id:
- ooyala_id = self._search_regex(
- r'["\']content_id["\']\s*:\s*(["\'])(?P<id>[\w-]+)\1',
- webpage, 'ooyala id', group='id')
-
- return OoyalaIE._build_url_result(ooyala_id)
from .common import InfoExtractor
from ..compat import (
+ compat_str,
compat_urlparse,
)
from ..utils import (
continue
formats.append({
- 'format_id': e.tag,
+ 'format_id': compat_str(e.tag),
'url': compat_urlparse.urljoin(self._PREFIX, url),
'height': self._int(e.get('height'), 'height'),
'width': self._int(e.get('width'), 'width'),
from .common import InfoExtractor
from ..utils import (
+ determine_ext,
ExtractorError,
int_or_none,
lowercase_escape,
+ update_url_query,
)
class GoogleDriveIE(InfoExtractor):
- _VALID_URL = r'https?://(?:(?:docs|drive)\.google\.com/(?:uc\?.*?id=|file/d/)|video\.google\.com/get_player\?.*?docid=)(?P<id>[a-zA-Z0-9_-]{28,})'
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ (?:docs|drive)\.google\.com/
+ (?:
+ (?:uc|open)\?.*?id=|
+ file/d/
+ )|
+ video\.google\.com/get_player\?.*?docid=
+ )
+ (?P<id>[a-zA-Z0-9_-]{28,})
+ '''
_TESTS = [{
'url': 'https://drive.google.com/file/d/0ByeS4oOUV-49Zzh4R1J6R09zazQ/edit?pli=1',
- 'md5': 'd109872761f7e7ecf353fa108c0dbe1e',
+ 'md5': '5c602afbbf2c1db91831f5d82f678554',
'info_dict': {
'id': '0ByeS4oOUV-49Zzh4R1J6R09zazQ',
'ext': 'mp4',
'title': 'Big Buck Bunny.mp4',
'duration': 45,
}
+ }, {
+ # video can't be watched anonymously due to view count limit reached,
+ # but can be downloaded (see https://github.com/rg3/youtube-dl/issues/14046)
+ 'url': 'https://drive.google.com/file/d/0B-vUyvmDLdWDcEt4WjBqcmI2XzQ/view',
+ 'md5': 'bfbd670d03a470bb1e6d4a257adec12e',
+ 'info_dict': {
+ 'id': '0B-vUyvmDLdWDcEt4WjBqcmI2XzQ',
+ 'ext': 'mp4',
+ 'title': 'Annabelle Creation (2017)- Z.V1 [TH].MP4',
+ }
}, {
# video id is longer than 28 characters
'url': 'https://drive.google.com/file/d/1ENcQ_jeCuj7y19s66_Ou9dRP4GKGsodiDQ/edit',
+ 'info_dict': {
+ 'id': '1ENcQ_jeCuj7y19s66_Ou9dRP4GKGsodiDQ',
+ 'ext': 'mp4',
+ 'title': 'Andreea Banica feat Smiley - Hooky Song (Official Video).mp4',
+ 'duration': 189,
+ },
+ 'only_matching': True,
+ }, {
+ 'url': 'https://drive.google.com/open?id=0B2fjwgkl1A_CX083Tkowdmt6d28',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://drive.google.com/uc?id=0B2fjwgkl1A_CX083Tkowdmt6d28',
'only_matching': True,
}]
_FORMATS_EXT = {
'46': 'webm',
'59': 'mp4',
}
+ _BASE_URL_CAPTIONS = 'https://drive.google.com/timedtext'
+ _CAPTIONS_ENTRY_TAG = {
+ 'subtitles': 'track',
+ 'automatic_captions': 'target',
+ }
+ _caption_formats_ext = []
+ _captions_xml = None
@staticmethod
def _extract_url(webpage):
if mobj:
return 'https://drive.google.com/file/d/%s' % mobj.group('id')
+ def _download_subtitles_xml(self, video_id, subtitles_id, hl):
+ if self._captions_xml:
+ return
+ self._captions_xml = self._download_xml(
+ self._BASE_URL_CAPTIONS, video_id, query={
+ 'id': video_id,
+ 'vid': subtitles_id,
+ 'hl': hl,
+ 'v': video_id,
+ 'type': 'list',
+ 'tlangs': '1',
+ 'fmts': '1',
+ 'vssids': '1',
+ }, note='Downloading subtitles XML',
+ errnote='Unable to download subtitles XML', fatal=False)
+ if self._captions_xml:
+ for f in self._captions_xml.findall('format'):
+ if f.attrib.get('fmt_code') and not f.attrib.get('default'):
+ self._caption_formats_ext.append(f.attrib['fmt_code'])
+
+ def _get_captions_by_type(self, video_id, subtitles_id, caption_type,
+ origin_lang_code=None):
+ if not subtitles_id or not caption_type:
+ return
+ captions = {}
+ for caption_entry in self._captions_xml.findall(
+ self._CAPTIONS_ENTRY_TAG[caption_type]):
+ caption_lang_code = caption_entry.attrib.get('lang_code')
+ if not caption_lang_code:
+ continue
+ caption_format_data = []
+ for caption_format in self._caption_formats_ext:
+ query = {
+ 'vid': subtitles_id,
+ 'v': video_id,
+ 'fmt': caption_format,
+ 'lang': (caption_lang_code if origin_lang_code is None
+ else origin_lang_code),
+ 'type': 'track',
+ 'name': '',
+ 'kind': '',
+ }
+ if origin_lang_code is not None:
+ query.update({'tlang': caption_lang_code})
+ caption_format_data.append({
+ 'url': update_url_query(self._BASE_URL_CAPTIONS, query),
+ 'ext': caption_format,
+ })
+ captions[caption_lang_code] = caption_format_data
+ return captions
+
+ def _get_subtitles(self, video_id, subtitles_id, hl):
+ if not subtitles_id or not hl:
+ return
+ self._download_subtitles_xml(video_id, subtitles_id, hl)
+ if not self._captions_xml:
+ return
+ return self._get_captions_by_type(video_id, subtitles_id, 'subtitles')
+
+ def _get_automatic_captions(self, video_id, subtitles_id, hl):
+ if not subtitles_id or not hl:
+ return
+ self._download_subtitles_xml(video_id, subtitles_id, hl)
+ if not self._captions_xml:
+ return
+ track = self._captions_xml.find('track')
+ if track is None:
+ return
+ origin_lang_code = track.attrib.get('lang_code')
+ if not origin_lang_code:
+ return
+ return self._get_captions_by_type(
+ video_id, subtitles_id, 'automatic_captions', origin_lang_code)
+
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(
'http://docs.google.com/file/d/%s' % video_id, video_id)
- reason = self._search_regex(r'"reason"\s*,\s*"([^"]+)', webpage, 'reason', default=None)
- if reason:
- raise ExtractorError(reason)
-
- title = self._search_regex(r'"title"\s*,\s*"([^"]+)', webpage, 'title')
+ title = self._search_regex(
+ r'"title"\s*,\s*"([^"]+)', webpage, 'title',
+ default=None) or self._og_search_title(webpage)
duration = int_or_none(self._search_regex(
- r'"length_seconds"\s*,\s*"([^"]+)', webpage, 'length seconds', default=None))
- fmt_stream_map = self._search_regex(
- r'"fmt_stream_map"\s*,\s*"([^"]+)', webpage, 'fmt stream map').split(',')
- fmt_list = self._search_regex(r'"fmt_list"\s*,\s*"([^"]+)', webpage, 'fmt_list').split(',')
+ r'"length_seconds"\s*,\s*"([^"]+)', webpage, 'length seconds',
+ default=None))
formats = []
- for fmt, fmt_stream in zip(fmt_list, fmt_stream_map):
- fmt_id, fmt_url = fmt_stream.split('|')
- resolution = fmt.split('/')[1]
- width, height = resolution.split('x')
- formats.append({
- 'url': lowercase_escape(fmt_url),
- 'format_id': fmt_id,
- 'resolution': resolution,
- 'width': int_or_none(width),
- 'height': int_or_none(height),
- 'ext': self._FORMATS_EXT[fmt_id],
+ fmt_stream_map = self._search_regex(
+ r'"fmt_stream_map"\s*,\s*"([^"]+)', webpage,
+ 'fmt stream map', default='').split(',')
+ fmt_list = self._search_regex(
+ r'"fmt_list"\s*,\s*"([^"]+)', webpage,
+ 'fmt_list', default='').split(',')
+ if fmt_stream_map and fmt_list:
+ resolutions = {}
+ for fmt in fmt_list:
+ mobj = re.search(
+ r'^(?P<format_id>\d+)/(?P<width>\d+)[xX](?P<height>\d+)', fmt)
+ if mobj:
+ resolutions[mobj.group('format_id')] = (
+ int(mobj.group('width')), int(mobj.group('height')))
+
+ for fmt_stream in fmt_stream_map:
+ fmt_stream_split = fmt_stream.split('|')
+ if len(fmt_stream_split) < 2:
+ continue
+ format_id, format_url = fmt_stream_split[:2]
+ f = {
+ 'url': lowercase_escape(format_url),
+ 'format_id': format_id,
+ 'ext': self._FORMATS_EXT[format_id],
+ }
+ resolution = resolutions.get(format_id)
+ if resolution:
+ f.update({
+ 'width': resolution[0],
+ 'height': resolution[1],
+ })
+ formats.append(f)
+
+ source_url = update_url_query(
+ 'https://drive.google.com/uc', {
+ 'id': video_id,
+ 'export': 'download',
})
+ urlh = self._request_webpage(
+ source_url, video_id, note='Requesting source file',
+ errnote='Unable to request source file', fatal=False)
+ if urlh:
+ def add_source_format(src_url):
+ formats.append({
+ 'url': src_url,
+ 'ext': determine_ext(title, 'mp4').lower(),
+ 'format_id': 'source',
+ 'quality': 1,
+ })
+ if urlh.headers.get('Content-Disposition'):
+ add_source_format(source_url)
+ else:
+ confirmation_webpage = self._webpage_read_content(
+ urlh, url, video_id, note='Downloading confirmation page',
+ errnote='Unable to confirm download', fatal=False)
+ if confirmation_webpage:
+ confirm = self._search_regex(
+ r'confirm=([^&"\']+)', confirmation_webpage,
+ 'confirmation code', fatal=False)
+ if confirm:
+ add_source_format(update_url_query(source_url, {
+ 'confirm': confirm,
+ }))
+
+ if not formats:
+ reason = self._search_regex(
+ r'"reason"\s*,\s*"([^"]+)', webpage, 'reason', default=None)
+ if reason:
+ raise ExtractorError(reason, expected=True)
+
self._sort_formats(formats)
+ hl = self._search_regex(
+ r'"hl"\s*,\s*"([^"]+)', webpage, 'hl', default=None)
+ subtitles_id = None
+ ttsurl = self._search_regex(
+ r'"ttsurl"\s*,\s*"([^"]+)', webpage, 'ttsurl', default=None)
+ if ttsurl:
+ # the video Id for subtitles will be the last value in the ttsurl
+ # query string
+ subtitles_id = ttsurl.encode('utf-8').decode(
+ 'unicode_escape').split('=')[-1]
+
return {
'id': video_id,
'title': title,
'thumbnail': self._og_search_thumbnail(webpage, default=None),
'duration': duration,
'formats': formats,
+ 'subtitles': self.extract_subtitles(video_id, subtitles_id, hl),
+ 'automatic_captions': self.extract_automatic_captions(
+ video_id, subtitles_id, hl),
}
from __future__ import unicode_literals
from .common import InfoExtractor
+from .youtube import YoutubeIE
from ..utils import (
determine_ext,
int_or_none,
'description': 'md5:c934cbfb326c669c2bcabcbe3d3fcd20',
'thumbnail': r're:^https?://.*/gallery/$',
}
+ }, {
+ # YouTube embed
+ 'url': 'http://www.heise.de/newsticker/meldung/Netflix-In-20-Jahren-vom-Videoverleih-zum-TV-Revolutionaer-3814130.html',
+ 'md5': 'e403d2b43fea8e405e88e3f8623909f1',
+ 'info_dict': {
+ 'id': '6kmWbXleKW4',
+ 'ext': 'mp4',
+ 'title': 'NEU IM SEPTEMBER | Netflix',
+ 'description': 'md5:2131f3c7525e540d5fd841de938bd452',
+ 'upload_date': '20170830',
+ 'uploader': 'Netflix Deutschland, Ćsterreich und Schweiz',
+ 'uploader_id': 'netflixdach',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
}, {
'url': 'http://www.heise.de/ct/artikel/c-t-uplink-3-3-Owncloud-Tastaturen-Peilsender-Smartphone-2403911.html',
'only_matching': True,
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
+ title = self._html_search_meta('fulltitle', webpage, default=None)
+ if not title or title == "c't":
+ title = self._search_regex(
+ r'<div[^>]+class="videoplayerjw"[^>]+data-title="([^"]+)"',
+ webpage, 'title')
+
+ yt_urls = YoutubeIE._extract_urls(webpage)
+ if yt_urls:
+ return self.playlist_from_matches(yt_urls, video_id, title, ie=YoutubeIE.ie_key())
+
container_id = self._search_regex(
r'<div class="videoplayerjw"[^>]+data-container="([0-9]+)"',
webpage, 'container ID')
r'<div class="videoplayerjw"[^>]+data-sequenz="([0-9]+)"',
webpage, 'sequenz ID')
- title = self._html_search_meta('fulltitle', webpage, default=None)
- if not title or title == "c't":
- title = self._search_regex(
- r'<div[^>]+class="videoplayerjw"[^>]+data-title="([^"]+)"',
- webpage, 'title')
-
doc = self._download_xml(
'http://www.heise.de/videout/feed', video_id, query={
'container': container_id,
class HGTVComShowIE(InfoExtractor):
IE_NAME = 'hgtv.com:show'
_VALID_URL = r'https?://(?:www\.)?hgtv\.com/shows/[^/]+/(?P<id>[^/?#&]+)'
- _TEST = {
- 'url': 'http://www.hgtv.com/shows/flip-or-flop/flip-or-flop-full-episodes-videos',
+ _TESTS = [{
+ # data-module="video"
+ 'url': 'http://www.hgtv.com/shows/flip-or-flop/flip-or-flop-full-episodes-season-4-videos',
'info_dict': {
- 'id': 'flip-or-flop-full-episodes-videos',
+ 'id': 'flip-or-flop-full-episodes-season-4-videos',
'title': 'Flip or Flop Full Episodes',
},
'playlist_mincount': 15,
- }
+ }, {
+ # data-deferred-module="video"
+ 'url': 'http://www.hgtv.com/shows/good-bones/episodes/an-old-victorian-house-gets-a-new-facelift',
+ 'only_matching': True,
+ }]
def _real_extract(self, url):
display_id = self._match_id(url)
config = self._parse_json(
self._search_regex(
- r'(?s)data-module=["\']video["\'][^>]*>.*?<script[^>]+type=["\']text/x-config["\'][^>]*>(.+?)</script',
+ r'(?s)data-(?:deferred-)?module=["\']video["\'][^>]*>.*?<script[^>]+type=["\']text/x-config["\'][^>]*>(.+?)</script',
webpage, 'video config'),
display_id)['channels'][0]
class HitboxIE(InfoExtractor):
IE_NAME = 'hitbox'
- _VALID_URL = r'https?://(?:www\.)?hitbox\.tv/video/(?P<id>[0-9]+)'
- _TEST = {
+ _VALID_URL = r'https?://(?:www\.)?(?:hitbox|smashcast)\.tv/(?:[^/]+/)*videos?/(?P<id>[0-9]+)'
+ _TESTS = [{
'url': 'http://www.hitbox.tv/video/203213',
'info_dict': {
'id': '203213',
# m3u8 download
'skip_download': True,
},
- }
+ }, {
+ 'url': 'https://www.smashcast.tv/hitboxlive/videos/203213',
+ 'only_matching': True,
+ }]
def _extract_metadata(self, url, video_id):
thumb_base = 'https://edge.sf.hitbox.tv'
metadata = self._download_json(
- '%s/%s' % (url, video_id), video_id,
- 'Downloading metadata JSON')
+ '%s/%s' % (url, video_id), video_id, 'Downloading metadata JSON')
date = 'media_live_since'
media_type = 'livestream'
views = int_or_none(video_meta.get('media_views'))
timestamp = parse_iso8601(video_meta.get(date), ' ')
categories = [video_meta.get('category_name')]
- thumbs = [
- {'url': thumb_base + video_meta.get('media_thumbnail'),
- 'width': 320,
- 'height': 180},
- {'url': thumb_base + video_meta.get('media_thumbnail_large'),
- 'width': 768,
- 'height': 432},
- ]
+ thumbs = [{
+ 'url': thumb_base + video_meta.get('media_thumbnail'),
+ 'width': 320,
+ 'height': 180
+ }, {
+ 'url': thumb_base + video_meta.get('media_thumbnail_large'),
+ 'width': 768,
+ 'height': 432
+ }]
return {
'id': video_id,
video_id = self._match_id(url)
player_config = self._download_json(
- 'https://www.hitbox.tv/api/player/config/video/%s' % video_id,
+ 'https://www.smashcast.tv/api/player/config/video/%s' % video_id,
video_id, 'Downloading video JSON')
formats = []
self._sort_formats(formats)
metadata = self._extract_metadata(
- 'https://www.hitbox.tv/api/media/video',
- video_id)
+ 'https://www.smashcast.tv/api/media/video', video_id)
metadata['formats'] = formats
return metadata
class HitboxLiveIE(HitboxIE):
IE_NAME = 'hitbox:live'
- _VALID_URL = r'https?://(?:www\.)?hitbox\.tv/(?!video)(?P<id>.+)'
- _TEST = {
+ _VALID_URL = r'https?://(?:www\.)?(?:hitbox|smashcast)\.tv/(?P<id>[^/?#&]+)'
+ _TESTS = [{
'url': 'http://www.hitbox.tv/dimak',
'info_dict': {
'id': 'dimak',
# live
'skip_download': True,
},
- }
+ }, {
+ 'url': 'https://www.smashcast.tv/dimak',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if HitboxIE.suitable(url) else super(HitboxLiveIE, cls).suitable(url)
def _real_extract(self, url):
video_id = self._match_id(url)
player_config = self._download_json(
- 'https://www.hitbox.tv/api/player/config/live/%s' % video_id,
+ 'https://www.smashcast.tv/api/player/config/live/%s' % video_id,
video_id)
formats = []
self._sort_formats(formats)
metadata = self._extract_metadata(
- 'https://www.hitbox.tv/api/media/live',
- video_id)
+ 'https://www.smashcast.tv/api/media/live', video_id)
metadata['formats'] = formats
metadata['is_live'] = True
metadata['title'] = self._live_title(metadata.get('title'))
'url': 'http://me.ign.com/ar/angry-birds-2/106533/video/lrd-ldyy-lwl-lfylm-angry-birds',
'only_matching': True,
},
+ {
+ # videoId pattern
+ 'url': 'http://www.ign.com/articles/2017/06/08/new-ducktales-short-donalds-birthday-doesnt-go-as-planned',
+ 'only_matching': True,
+ },
]
def _find_video_id(self, webpage):
r'data-video-id="(.+?)"',
r'<object id="vid_(.+?)"',
r'<meta name="og:image" content=".*/(.+?)-(.+?)/.+.jpg"',
+ r'videoId"\s*:\s*"(.+?)"',
+ r'videoId["\']\s*:\s*["\']([^"\']+?)["\']',
]
return self._search_regex(res_id, webpage, 'video id', default=None)
def _add_sub_element(element, name):
return etree.SubElement(element, _add_ns(name))
+ production_id = (
+ params.get('data-video-autoplay-id') or
+ '%s#001' % (
+ params.get('data-video-episode-id') or
+ video_id.replace('a', '/')))
+
req_env = etree.Element(_add_ns('soapenv:Envelope'))
_add_sub_element(req_env, 'soapenv:Header')
body = _add_sub_element(req_env, 'soapenv:Body')
get_playlist = _add_sub_element(body, ('tem:GetPlaylist'))
request = _add_sub_element(get_playlist, 'tem:request')
- _add_sub_element(request, 'itv:ProductionId').text = params['data-video-id']
+ _add_sub_element(request, 'itv:ProductionId').text = production_id
_add_sub_element(request, 'itv:RequestGuid').text = compat_str(uuid.uuid4()).upper()
vodcrid = _add_sub_element(request, 'itv:Vodcrid')
_add_sub_element(vodcrid, 'com:Id')
--- /dev/null
+# coding: utf-8\r
+from __future__ import unicode_literals\r
+\r
+import re\r
+\r
+from .common import InfoExtractor\r
+from ..compat import compat_str\r
+from ..utils import (\r
+ int_or_none,\r
+ js_to_json,\r
+ try_get,\r
+)\r
+\r
+\r
+class JojIE(InfoExtractor):\r
+ _VALID_URL = r'''(?x)\r
+ (?:\r
+ joj:|\r
+ https?://media\.joj\.sk/embed/\r
+ )\r
+ (?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})\r
+ '''\r
+ _TESTS = [{\r
+ 'url': 'https://media.joj.sk/embed/a388ec4c-6019-4a4a-9312-b1bee194e932',\r
+ 'info_dict': {\r
+ 'id': 'a388ec4c-6019-4a4a-9312-b1bee194e932',\r
+ 'ext': 'mp4',\r
+ 'title': 'NOVĆ BĆVANIE',\r
+ 'thumbnail': r're:^https?://.*\.jpg$',\r
+ 'duration': 3118,\r
+ }\r
+ }, {\r
+ 'url': 'joj:a388ec4c-6019-4a4a-9312-b1bee194e932',\r
+ 'only_matching': True,\r
+ }]\r
+\r
+ @staticmethod\r
+ def _extract_urls(webpage):\r
+ return re.findall(\r
+ r'<iframe\b[^>]+\bsrc=["\'](?P<url>(?:https?:)?//media\.joj\.sk/embed/[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})',\r
+ webpage)\r
+\r
+ def _real_extract(self, url):\r
+ video_id = self._match_id(url)\r
+\r
+ webpage = self._download_webpage(\r
+ 'https://media.joj.sk/embed/%s' % video_id, video_id)\r
+\r
+ title = self._search_regex(\r
+ (r'videoTitle\s*:\s*(["\'])(?P<title>(?:(?!\1).)+)\1',\r
+ r'<title>(?P<title>[^<]+)'), webpage, 'title',\r
+ default=None, group='title') or self._og_search_title(webpage)\r
+\r
+ bitrates = self._parse_json(\r
+ self._search_regex(\r
+ r'(?s)bitrates\s*=\s*({.+?});', webpage, 'bitrates',\r
+ default='{}'),\r
+ video_id, transform_source=js_to_json, fatal=False)\r
+\r
+ formats = []\r
+ for format_url in try_get(bitrates, lambda x: x['mp4'], list) or []:\r
+ if isinstance(format_url, compat_str):\r
+ height = self._search_regex(\r
+ r'(\d+)[pP]\.', format_url, 'height', default=None)\r
+ formats.append({\r
+ 'url': format_url,\r
+ 'format_id': '%sp' % height if height else None,\r
+ 'height': int(height),\r
+ })\r
+ if not formats:\r
+ playlist = self._download_xml(\r
+ 'https://media.joj.sk/services/Video.php?clip=%s' % video_id,\r
+ video_id)\r
+ for file_el in playlist.findall('./files/file'):\r
+ path = file_el.get('path')\r
+ if not path:\r
+ continue\r
+ format_id = file_el.get('id') or file_el.get('label')\r
+ formats.append({\r
+ 'url': 'http://n16.joj.sk/storage/%s' % path.replace(\r
+ 'dat/', '', 1),\r
+ 'format_id': format_id,\r
+ 'height': int_or_none(self._search_regex(\r
+ r'(\d+)[pP]', format_id or path, 'height',\r
+ default=None)),\r
+ })\r
+ self._sort_formats(formats)\r
+\r
+ thumbnail = self._og_search_thumbnail(webpage)\r
+\r
+ duration = int_or_none(self._search_regex(\r
+ r'videoDuration\s*:\s*(\d+)', webpage, 'duration', fatal=False))\r
+\r
+ return {\r
+ 'id': video_id,\r
+ 'title': title,\r
+ 'thumbnail': thumbnail,\r
+ 'duration': duration,\r
+ 'formats': formats,\r
+ }\r
webpage, 'description', fatal=False)
publish_date = unified_strdate(self._html_search_meta(
'citation_publication_date', webpage, 'publish date', fatal=False))
- comment_count = self._html_search_regex(
+ comment_count = int(self._html_search_regex(
r'<meta name="num_comments" content="(\d+) Comments?"',
- webpage, 'comment count', fatal=False)
+ webpage, 'comment count', fatal=False))
return {
'id': video_id,
--- /dev/null
+# coding: utf-8
+
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ int_or_none,
+ unified_timestamp,
+ update_url_query,
+)
+
+
+class KakaoIE(InfoExtractor):
+ _VALID_URL = r'https?://tv\.kakao\.com/channel/(?P<channel>\d+)/cliplink/(?P<id>\d+)'
+ _API_BASE = 'http://tv.kakao.com/api/v1/ft/cliplinks'
+
+ _TESTS = [{
+ 'url': 'http://tv.kakao.com/channel/2671005/cliplink/301965083',
+ 'md5': '702b2fbdeb51ad82f5c904e8c0766340',
+ 'info_dict': {
+ 'id': '301965083',
+ 'ext': 'mp4',
+ 'title': 'ä¹ęØå46 ććććć³ ć3ęēē“¹ä»ć³ć¼ćć¼ćå§åļ¼é”é«ä½å·®GPćļ¼ć ćä¹ęØåå·„äŗäøć',
+ 'uploader_id': 2671005,
+ 'uploader': 'ź·øėź·øėģ“',
+ 'timestamp': 1488160199,
+ 'upload_date': '20170227',
+ }
+ }, {
+ 'url': 'http://tv.kakao.com/channel/2653210/cliplink/300103180',
+ 'md5': 'a8917742069a4dd442516b86e7d66529',
+ 'info_dict': {
+ 'id': '300103180',
+ 'ext': 'mp4',
+ 'description': 'ė¬ėøė¦¬ģ¦ - Destiny (ėģ ģ§źµ¬) (Lovelyz - Destiny)\r\n\r\n[ģ¼! ģģ
ģ¤ģ¬] 20160611, 507ķ',
+ 'title': 'ė¬ėøė¦¬ģ¦ - Destiny (ėģ ģ§źµ¬) (Lovelyz - Destiny)',
+ 'uploader_id': 2653210,
+ 'uploader': 'ģ¼ ģģ
ģ¤ģ¬',
+ 'timestamp': 1485684628,
+ 'upload_date': '20170129',
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ player_header = {
+ 'Referer': update_url_query(
+ 'http://tv.kakao.com/embed/player/cliplink/%s' % video_id, {
+ 'service': 'kakao_tv',
+ 'autoplay': '1',
+ 'profile': 'HIGH',
+ 'wmode': 'transparent',
+ })
+ }
+
+ QUERY_COMMON = {
+ 'player': 'monet_html5',
+ 'referer': url,
+ 'uuid': '',
+ 'service': 'kakao_tv',
+ 'section': '',
+ 'dteType': 'PC',
+ }
+
+ query = QUERY_COMMON.copy()
+ query['fields'] = 'clipLink,clip,channel,hasPlusFriend,-service,-tagList'
+ impress = self._download_json(
+ '%s/%s/impress' % (self._API_BASE, video_id),
+ video_id, 'Downloading video info',
+ query=query, headers=player_header)
+
+ clip_link = impress['clipLink']
+ clip = clip_link['clip']
+
+ title = clip.get('title') or clip_link.get('displayTitle')
+
+ tid = impress.get('tid', '')
+
+ query = QUERY_COMMON.copy()
+ query.update({
+ 'tid': tid,
+ 'profile': 'HIGH',
+ })
+ raw = self._download_json(
+ '%s/%s/raw' % (self._API_BASE, video_id),
+ video_id, 'Downloading video formats info',
+ query=query, headers=player_header)
+
+ formats = []
+ for fmt in raw.get('outputList', []):
+ try:
+ profile_name = fmt['profile']
+ fmt_url_json = self._download_json(
+ '%s/%s/raw/videolocation' % (self._API_BASE, video_id),
+ video_id,
+ 'Downloading video URL for profile %s' % profile_name,
+ query={
+ 'service': 'kakao_tv',
+ 'section': '',
+ 'tid': tid,
+ 'profile': profile_name
+ }, headers=player_header, fatal=False)
+
+ if fmt_url_json is None:
+ continue
+
+ fmt_url = fmt_url_json['url']
+ formats.append({
+ 'url': fmt_url,
+ 'format_id': profile_name,
+ 'width': int_or_none(fmt.get('width')),
+ 'height': int_or_none(fmt.get('height')),
+ 'format_note': fmt.get('label'),
+ 'filesize': int_or_none(fmt.get('filesize'))
+ })
+ except KeyError:
+ pass
+ self._sort_formats(formats)
+
+ thumbs = []
+ for thumb in clip.get('clipChapterThumbnailList', []):
+ thumbs.append({
+ 'url': thumb.get('thumbnailUrl'),
+ 'id': compat_str(thumb.get('timeInSec')),
+ 'preference': -1 if thumb.get('isDefault') else 0
+ })
+ top_thumbnail = clip.get('thumbnailUrl')
+ if top_thumbnail:
+ thumbs.append({
+ 'url': top_thumbnail,
+ 'preference': 10,
+ })
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': clip.get('description'),
+ 'uploader': clip_link.get('channel', {}).get('name'),
+ 'uploader_id': clip_link.get('channelId'),
+ 'thumbnails': thumbs,
+ 'timestamp': unified_timestamp(clip_link.get('createTime')),
+ 'duration': int_or_none(clip.get('duration')),
+ 'view_count': int_or_none(clip.get('playCount')),
+ 'like_count': int_or_none(clip.get('likeCount')),
+ 'comment_count': int_or_none(clip.get('commentCount')),
+ 'formats': formats,
+ }
if captions:
for caption in captions.get('objects', []):
# Continue if caption is not ready
- if f.get('status') != 2:
+ if caption.get('status') != 2:
continue
if not caption.get('id'):
continue
webpage = self._download_webpage(url, video_id)
title = (self._html_search_meta('title', webpage, default=None) or
- self._search_regex(r'<h1 class="title">([^<]+)</h1>'))
+ self._search_regex(r'<h1 class="title">([^<]+)</h1>', webpage, 'video title'))
video_id = self._search_regex(
r'/config/video/(.+?)\.xml', webpage, 'video id')
'formats': formats,
'is_live': is_live,
}
+
+
+class ITTFIE(InfoExtractor):
+ _VALID_URL = r'https?://tv\.ittf\.com/video/[^/]+/(?P<id>\d+)'
+ _TEST = {
+ 'url': 'https://tv.ittf.com/video/peng-wang-wei-matsudaira-kenta/951802',
+ 'only_matching': True,
+ }
+
+ def _real_extract(self, url):
+ return self.url_result(
+ update_url_query('https://www.laola1.tv/titanplayer.php', {
+ 'videoid': self._match_id(url),
+ 'type': 'V',
+ 'lang': 'en',
+ 'portal': 'int',
+ 'customer': 1024,
+ }), Laola1TvEmbedIE.ie_key())
'Channel': 'channel',
'ChannelList': 'channel_list',
}
+
+ def smuggle(url):
+ return smuggle_url(url, {'source_url': source_url})
+
entries = []
for kind, video_id in re.findall(
r'LimelightPlayer\.doLoad(Media|Channel|ChannelList)\(["\'](?P<id>[a-z0-9]{32})',
webpage):
entries.append(cls.url_result(
- smuggle_url(
- 'limelight:%s:%s' % (lm[kind], video_id),
- {'source_url': source_url}),
+ smuggle('limelight:%s:%s' % (lm[kind], video_id)),
'Limelight%s' % kind, video_id))
for mobj in re.finditer(
# As per [1] class attribute should be exactly equal to
''', webpage):
kind, video_id = mobj.group('kind'), mobj.group('id')
entries.append(cls.url_result(
- smuggle_url(
- 'limelight:%s:%s' % (kind, video_id),
- {'source_url': source_url}),
+ smuggle('limelight:%s:%s' % (kind, video_id)),
'Limelight%s' % kind.capitalize(), video_id))
+ # http://support.3playmedia.com/hc/en-us/articles/115009517327-Limelight-Embedding-the-Audio-Description-Plugin-with-the-Limelight-Player-on-Your-Web-Page)
+ for video_id in re.findall(
+ r'(?s)LimelightPlayerUtil\.embed\s*\(\s*{.*?\bmediaId["\']\s*:\s*["\'](?P<id>[a-z0-9]{32})',
+ webpage):
+ entries.append(cls.url_result(
+ smuggle('limelight:media:%s' % video_id),
+ LimelightMediaIE.ie_key(), video_id))
return entries
def _call_playlist_service(self, item_id, method, fatal=True, referer=None):
'params': {
'skip_download': True,
},
+ }, {
+ 'url': 'https://www.liveleak.com/view?i=677_1439397581',
+ 'info_dict': {
+ 'id': '677_1439397581',
+ 'title': 'Fuel Depot in China Explosion caught on video',
+ },
+ 'playlist_count': 3,
}]
@staticmethod
- def _extract_url(webpage):
- mobj = re.search(
- r'<iframe[^>]+src="https?://(?:\w+\.)?liveleak\.com/ll_embed\?(?:.*?)i=(?P<id>[\w_]+)(?:.*)',
+ def _extract_urls(webpage):
+ return re.findall(
+ r'<iframe[^>]+src="(https?://(?:\w+\.)?liveleak\.com/ll_embed\?[^"]*[if]=[\w_]+[^"]+)"',
webpage)
- if mobj:
- return 'http://www.liveleak.com/view?i=%s' % mobj.group('id')
def _real_extract(self, url):
video_id = self._match_id(url)
'age_limit': age_limit,
}
- info_dict = entries[0]
+ for idx, info_dict in enumerate(entries):
+ for a_format in info_dict['formats']:
+ if not a_format.get('height'):
+ a_format['height'] = int_or_none(self._search_regex(
+ r'([0-9]+)p\.mp4', a_format['url'], 'height label',
+ default=None))
+
+ self._sort_formats(info_dict['formats'])
+
+ # Don't append entry ID for one-video pages to keep backward compatibility
+ if len(entries) > 1:
+ info_dict['id'] = '%s_%s' % (video_id, idx + 1)
+ else:
+ info_dict['id'] = video_id
- for a_format in info_dict['formats']:
- if not a_format.get('height'):
- a_format['height'] = self._search_regex(
- r'([0-9]+)p\.mp4', a_format['url'], 'height label', default=None)
+ info_dict.update({
+ 'title': video_title,
+ 'description': video_description,
+ 'uploader': video_uploader,
+ 'age_limit': age_limit,
+ 'thumbnail': video_thumbnail,
+ })
- self._sort_formats(info_dict['formats'])
+ return self.playlist_result(entries, video_id, video_title)
+
+
+class LiveLeakEmbedIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?liveleak\.com/ll_embed\?.*?\b(?P<kind>[if])=(?P<id>[\w_]+)'
+
+ # See generic.py for actual test cases
+ _TESTS = [{
+ 'url': 'https://www.liveleak.com/ll_embed?i=874_1459135191',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.liveleak.com/ll_embed?f=ab065df993c1',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ kind, video_id = mobj.group('kind', 'id')
- info_dict.update({
- 'id': video_id,
- 'title': video_title,
- 'description': video_description,
- 'uploader': video_uploader,
- 'age_limit': age_limit,
- 'thumbnail': video_thumbnail,
- })
+ if kind == 'f':
+ webpage = self._download_webpage(url, video_id)
+ liveleak_url = self._search_regex(
+ r'logourl\s*:\s*(?P<q1>[\'"])(?P<url>%s)(?P=q1)' % LiveLeakIE._VALID_URL,
+ webpage, 'LiveLeak URL', group='url')
+ elif kind == 'i':
+ liveleak_url = 'http://www.liveleak.com/view?i=%s' % video_id
- return info_dict
+ return self.url_result(liveleak_url, ie=LiveLeakIE.ie_key())
class LyndaIE(LyndaBaseIE):
IE_NAME = 'lynda'
IE_DESC = 'lynda.com videos'
- _VALID_URL = r'https?://(?:www\.)?lynda\.com/(?:[^/]+/[^/]+/(?P<course_id>\d+)|player/embed)/(?P<id>\d+)'
+ _VALID_URL = r'https?://(?:www\.)?(?:lynda\.com|educourse\.ga)/(?:[^/]+/[^/]+/(?P<course_id>\d+)|player/embed)/(?P<id>\d+)'
_TIMECODE_REGEX = r'\[(?P<timecode>\d+:\d+:\d+[\.,]\d+)\]'
}, {
'url': 'https://www.lynda.com/player/embed/133770?tr=foo=1;bar=g;fizz=rt&fs=0',
'only_matching': True,
+ }, {
+ 'url': 'https://educourse.ga/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html',
+ 'only_matching': True,
}]
def _raise_unavailable(self, video_id):
# Course link equals to welcome/introduction video link of same course
# We will recognize it as course link
- _VALID_URL = r'https?://(?:www|m)\.lynda\.com/(?P<coursepath>[^/]+/[^/]+/(?P<courseid>\d+))-\d\.html'
+ _VALID_URL = r'https?://(?:www|m)\.(?:lynda\.com|educourse\.ga)/(?P<coursepath>[^/]+/[^/]+/(?P<courseid>\d+))-\d\.html'
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
--- /dev/null
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import int_or_none
+
+
+class ManyVidsIE(InfoExtractor):
+ _VALID_URL = r'(?i)https?://(?:www\.)?manyvids\.com/video/(?P<id>\d+)'
+ _TEST = {
+ 'url': 'https://www.manyvids.com/Video/133957/everthing-about-me/',
+ 'md5': '03f11bb21c52dd12a05be21a5c7dcc97',
+ 'info_dict': {
+ 'id': '133957',
+ 'ext': 'mp4',
+ 'title': 'everthing about me (Preview)',
+ 'view_count': int,
+ 'like_count': int,
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ video_url = self._search_regex(
+ r'data-(?:video-filepath|meta-video)\s*=s*(["\'])(?P<url>(?:(?!\1).)+)\1',
+ webpage, 'video URL', group='url')
+
+ title = '%s (Preview)' % self._html_search_regex(
+ r'<h2[^>]+class="m-a-0"[^>]*>([^<]+)', webpage, 'title')
+
+ like_count = int_or_none(self._search_regex(
+ r'data-likes=["\'](\d+)', webpage, 'like count', default=None))
+ view_count = int_or_none(self._html_search_regex(
+ r'(?s)<span[^>]+class="views-wrapper"[^>]*>(.+?)</span', webpage,
+ 'view count', default=None))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'view_count': view_count,
+ 'like_count': like_count,
+ 'formats': [{
+ 'url': video_url,
+ }],
+ }
class MedialaanIE(InfoExtractor):
_VALID_URL = r'''(?x)
https?://
- (?:www\.)?
+ (?:www\.|nieuws\.)?
(?:
(?P<site_id>vtm|q2|vtmkzoom)\.be/
(?:
# clip
'url': 'http://vtmkzoom.be/k3-dansstudio/een-nieuw-seizoen-van-k3-dansstudio',
'only_matching': True,
+ }, {
+ # http/s redirect
+ 'url': 'https://vtmkzoom.be/video?aid=45724',
+ 'info_dict': {
+ 'id': '257136373657000',
+ 'ext': 'mp4',
+ 'title': 'K3 Dansstudio Ushuaia afl.6',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'Requires account credentials',
+ }, {
+ # nieuws.vtm.be
+ 'url': 'https://nieuws.vtm.be/stadion/stadion/genk-nog-moeilijk-programma',
+ 'only_matching': True,
}]
def _real_initialize(self):
video_id, transform_source=lambda s: '[%s]' % s, fatal=False)
if player:
video = player[-1]
+ if video['videoUrl'] in ('http', 'https'):
+ return self.url_result(video['url'], MedialaanIE.ie_key())
info = {
'id': video_id,
'url': video['videoUrl'],
--- /dev/null
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import js_to_json
+
+
+class MegaphoneIE(InfoExtractor):
+ IE_NAME = 'megaphone.fm'
+ IE_DESC = 'megaphone.fm embedded players'
+ _VALID_URL = r'https://player\.megaphone\.fm/(?P<id>[A-Z0-9]+)'
+ _TEST = {
+ 'url': 'https://player.megaphone.fm/GLT9749789991?"',
+ 'md5': '4816a0de523eb3e972dc0dda2c191f96',
+ 'info_dict': {
+ 'id': 'GLT9749789991',
+ 'ext': 'mp3',
+ 'title': '#97 What Kind Of Idiot Gets Phished?',
+ 'thumbnail': 're:^https://.*\.png.*$',
+ 'duration': 1776.26375,
+ 'author': 'Reply All',
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ title = self._og_search_property('audio:title', webpage)
+ author = self._og_search_property('audio:artist', webpage)
+ thumbnail = self._og_search_thumbnail(webpage)
+
+ episode_json = self._search_regex(r'(?s)var\s+episode\s*=\s*(\{.+?\});', webpage, 'episode JSON')
+ episode_data = self._parse_json(episode_json, video_id, js_to_json)
+ video_url = self._proto_relative_url(episode_data['mediaUrl'], 'https:')
+
+ formats = [{
+ 'url': video_url,
+ }]
+
+ return {
+ 'id': video_id,
+ 'thumbnail': thumbnail,
+ 'title': title,
+ 'author': author,
+ 'duration': episode_data['duration'],
+ 'formats': formats,
+ }
+
+ @classmethod
+ def _extract_urls(cls, webpage):
+ return [m[0] for m in re.findall(
+ r'<iframe[^>]*?\ssrc=["\'](%s)' % cls._VALID_URL, webpage)]
video_id, 'Downloading gigya script')
# Get a appKey/uuid for getting the session key
- appKey_var = self._search_regex(
- r'value\s*\(\s*["\']appGridApplicationKey["\']\s*,\s*([0-9a-f]+)',
- gigya_sc, 'appKey variable')
appKey = self._search_regex(
- r'var\s+%s\s*=\s*["\']([0-9a-f]+)' % appKey_var, gigya_sc, 'appKey')
+ r'constant\s*\(\s*["\']_appGridApplicationKey["\']\s*,\s*["\']([0-9a-f]+)',
+ gigya_sc, 'appKey')
session_json = self._download_json(
'https://appgrid-api.cloud.accedo.tv/session',
from ..compat import (
compat_chr,
compat_ord,
+ compat_str,
compat_urllib_parse_unquote,
compat_urlparse,
+ compat_zip
)
from ..utils import (
clean_html,
ExtractorError,
+ int_or_none,
OnDemandPagedList,
str_to_int,
+ try_get,
+ urljoin,
)
'only_matching': True,
}]
- # See https://www.mixcloud.com/media/js2/www_js_2.9e23256562c080482435196ca3975ab5.js
@staticmethod
- def _decrypt_play_info(play_info):
- KEY = 'pleasedontdownloadourmusictheartistswontgetpaid'
-
- play_info = base64.b64decode(play_info.encode('ascii'))
-
+ def _decrypt_xor_cipher(key, ciphertext):
+ """Encrypt/Decrypt XOR cipher. Both ways are possible because it's XOR."""
return ''.join([
- compat_chr(compat_ord(ch) ^ compat_ord(KEY[idx % len(KEY)]))
- for idx, ch in enumerate(play_info)])
+ compat_chr(compat_ord(ch) ^ compat_ord(k))
+ for ch, k in compat_zip(ciphertext, itertools.cycle(key))])
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
webpage = self._download_webpage(url, track_id)
+ # Legacy path
+ encrypted_play_info = self._search_regex(
+ r'm-play-info="([^"]+)"', webpage, 'play info', default=None)
+
+ if encrypted_play_info is not None:
+ # Decode
+ encrypted_play_info = base64.b64decode(encrypted_play_info)
+ else:
+ # New path
+ full_info_json = self._parse_json(self._html_search_regex(
+ r'<script id="relay-data" type="text/x-mixcloud">([^<]+)</script>',
+ webpage, 'play info'), 'play info')
+ for item in full_info_json:
+ item_data = try_get(
+ item, lambda x: x['cloudcast']['data']['cloudcastLookup'],
+ dict)
+ if try_get(item_data, lambda x: x['streamInfo']['url']):
+ info_json = item_data
+ break
+ else:
+ raise ExtractorError('Failed to extract matching stream info')
+
message = self._html_search_regex(
r'(?s)<div[^>]+class="global-message cloudcast-disabled-notice-light"[^>]*>(.+?)<(?:a|/div)',
webpage, 'error message', default=None)
- encrypted_play_info = self._search_regex(
- r'm-play-info="([^"]+)"', webpage, 'play info')
- play_info = self._parse_json(
- self._decrypt_play_info(encrypted_play_info), track_id)
-
- if message and 'stream_url' not in play_info:
- raise ExtractorError('%s said: %s' % (self.IE_NAME, message), expected=True)
-
- song_url = play_info['stream_url']
-
- title = self._html_search_regex(r'm-title="([^"]+)"', webpage, 'title')
- thumbnail = self._proto_relative_url(self._html_search_regex(
- r'm-thumbnail-url="([^"]+)"', webpage, 'thumbnail', fatal=False))
- uploader = self._html_search_regex(
- r'm-owner-name="([^"]+)"', webpage, 'uploader', fatal=False)
- uploader_id = self._search_regex(
- r'\s+"profile": "([^"]+)",', webpage, 'uploader id', fatal=False)
- description = self._og_search_description(webpage)
- view_count = str_to_int(self._search_regex(
- [r'<meta itemprop="interactionCount" content="UserPlays:([0-9]+)"',
- r'/listeners/?">([0-9,.]+)</a>',
- r'(?:m|data)-tooltip=["\']([\d,.]+) plays'],
- webpage, 'play count', default=None))
+ js_url = self._search_regex(
+ r'<script[^>]+\bsrc=["\"](https://(?:www\.)?mixcloud\.com/media/(?:js2/www_js_4|js/www)\.[^>]+\.js)',
+ webpage, 'js url')
+ js = self._download_webpage(js_url, track_id, 'Downloading JS')
+ # Known plaintext attack
+ if encrypted_play_info:
+ kps = ['{"stream_url":']
+ kpa_target = encrypted_play_info
+ else:
+ kps = ['https://', 'http://']
+ kpa_target = base64.b64decode(info_json['streamInfo']['url'])
+ for kp in kps:
+ partial_key = self._decrypt_xor_cipher(kpa_target, kp)
+ for quote in ["'", '"']:
+ key = self._search_regex(
+ r'{0}({1}[^{0}]*){0}'.format(quote, re.escape(partial_key)),
+ js, 'encryption key', default=None)
+ if key is not None:
+ break
+ else:
+ continue
+ break
+ else:
+ raise ExtractorError('Failed to extract encryption key')
+
+ if encrypted_play_info is not None:
+ play_info = self._parse_json(self._decrypt_xor_cipher(key, encrypted_play_info), 'play info')
+ if message and 'stream_url' not in play_info:
+ raise ExtractorError('%s said: %s' % (self.IE_NAME, message), expected=True)
+ song_url = play_info['stream_url']
+ formats = [{
+ 'format_id': 'normal',
+ 'url': song_url
+ }]
+
+ title = self._html_search_regex(r'm-title="([^"]+)"', webpage, 'title')
+ thumbnail = self._proto_relative_url(self._html_search_regex(
+ r'm-thumbnail-url="([^"]+)"', webpage, 'thumbnail', fatal=False))
+ uploader = self._html_search_regex(
+ r'm-owner-name="([^"]+)"', webpage, 'uploader', fatal=False)
+ uploader_id = self._search_regex(
+ r'\s+"profile": "([^"]+)",', webpage, 'uploader id', fatal=False)
+ description = self._og_search_description(webpage)
+ view_count = str_to_int(self._search_regex(
+ [r'<meta itemprop="interactionCount" content="UserPlays:([0-9]+)"',
+ r'/listeners/?">([0-9,.]+)</a>',
+ r'(?:m|data)-tooltip=["\']([\d,.]+) plays'],
+ webpage, 'play count', default=None))
+
+ else:
+ title = info_json['name']
+ thumbnail = urljoin(
+ 'https://thumbnailer.mixcloud.com/unsafe/600x600/',
+ try_get(info_json, lambda x: x['picture']['urlRoot'], compat_str))
+ uploader = try_get(info_json, lambda x: x['owner']['displayName'])
+ uploader_id = try_get(info_json, lambda x: x['owner']['username'])
+ description = try_get(info_json, lambda x: x['description'])
+ view_count = int_or_none(try_get(info_json, lambda x: x['plays']))
+
+ stream_info = info_json['streamInfo']
+ formats = []
+
+ for url_key in ('url', 'hlsUrl', 'dashUrl'):
+ format_url = stream_info.get(url_key)
+ if not format_url:
+ continue
+ decrypted = self._decrypt_xor_cipher(key, base64.b64decode(format_url))
+ if not decrypted:
+ continue
+ if url_key == 'hlsUrl':
+ formats.extend(self._extract_m3u8_formats(
+ decrypted, track_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False))
+ elif url_key == 'dashUrl':
+ formats.extend(self._extract_mpd_formats(
+ decrypted, track_id, mpd_id='dash', fatal=False))
+ else:
+ formats.append({
+ 'format_id': 'http',
+ 'url': decrypted,
+ })
+ self._sort_formats(formats)
return {
'id': track_id,
'title': title,
- 'url': song_url,
+ 'formats': formats,
'description': description,
'thumbnail': thumbnail,
'uploader': uploader,
(?:[\da-z_-]+\.)*mlb\.com/
(?:
(?:
- (?:.*?/)?video/(?:topic/[\da-z_-]+/)?v|
+ (?:.*?/)?video/(?:topic/[\da-z_-]+/)?(?:v|.*?/c-)|
(?:
shared/video/embed/(?:embed|m-internal-embed)\.html|
(?:[^/]+/)+(?:play|index)\.jsp|
},
{
'url': 'http://m.mlb.com/news/article/118550098/blue-jays-kevin-pillar-goes-spidey-up-the-wall-to-rob-tim-beckham-of-a-homer',
- 'md5': 'b190e70141fb9a1552a85426b4da1b5d',
+ 'md5': 'aafaf5b0186fee8f32f20508092f8111',
'info_dict': {
'id': '75609783',
'ext': 'mp4',
'upload_date': '20150415',
}
},
+ {
+ 'url': 'https://www.mlb.com/video/hargrove-homers-off-caldwell/c-1352023483?tid=67793694',
+ 'only_matching': True,
+ },
{
'url': 'http://m.mlb.com/shared/video/embed/embed.html?content_id=35692085&topic_id=6479266&width=400&height=224&property=mlb',
'only_matching': True,
class MorningstarIE(InfoExtractor):
IE_DESC = 'morningstar.com'
- _VALID_URL = r'https?://(?:www\.)?morningstar\.com/[cC]over/video[cC]enter\.aspx\?id=(?P<id>[0-9]+)'
- _TEST = {
+ _VALID_URL = r'https?://(?:(?:www|news)\.)morningstar\.com/[cC]over/video[cC]enter\.aspx\?id=(?P<id>[0-9]+)'
+ _TESTS = [{
'url': 'http://www.morningstar.com/cover/videocenter.aspx?id=615869',
'md5': '6c0acface7a787aadc8391e4bbf7b0f5',
'info_dict': {
'description': "Vanguard's Joel Dickson on managing higher tax rates for high-income earners and fund capital-gain distributions in 2013.",
'thumbnail': r're:^https?://.*m(?:orning)?star\.com/.+thumb\.jpg$'
}
- }
+ }, {
+ 'url': 'http://news.morningstar.com/cover/videocenter.aspx?id=825556',
+ 'only_matching': True,
+ }]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
+++ /dev/null
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-from ..utils import int_or_none
-
-
-class MporaIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?mpora\.(?:com|de)/videos/(?P<id>[^?#/]+)'
- IE_NAME = 'MPORA'
-
- _TEST = {
- 'url': 'http://mpora.de/videos/AAdo8okx4wiz/embed?locale=de',
- 'md5': 'a7a228473eedd3be741397cf452932eb',
- 'info_dict': {
- 'id': 'AAdo8okx4wiz',
- 'ext': 'mp4',
- 'title': 'Katy Curd - Winter in the Forest',
- 'duration': 416,
- 'uploader': 'Peter Newman Media',
- },
- }
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
-
- data_json = self._search_regex(
- [r"new FM\.Player\('[^']+',\s*(\{.*?)\).player;",
- r"new\s+FM\.Kaltura\.Player\('[^']+'\s*,\s*({.+?})\);"],
- webpage, 'json')
- data = self._parse_json(data_json, video_id)
-
- uploader = data['info_overlay'].get('username')
- duration = data['video']['duration'] // 1000
- thumbnail = data['video']['encodings']['sd']['poster']
- title = data['info_overlay']['title']
-
- formats = []
- for encoding_id, edata in data['video']['encodings'].items():
- for src in edata['sources']:
- width_str = self._search_regex(
- r'_([0-9]+)\.[a-zA-Z0-9]+$', src['src'],
- False, default=None)
- vcodec = src['type'].partition('/')[2]
-
- formats.append({
- 'format_id': encoding_id + '-' + vcodec,
- 'url': src['src'],
- 'vcodec': vcodec,
- 'width': int_or_none(width_str),
- })
-
- self._sort_formats(formats)
-
- return {
- 'id': video_id,
- 'title': title,
- 'formats': formats,
- 'uploader': uploader,
- 'duration': duration,
- 'thumbnail': thumbnail,
- }
format_url = file_.get('url')
if not format_url:
continue
- ext = determine_ext(format_url)
- if ext == 'ism':
- formats.extend(self._extract_ism_formats(
- format_url + '/Manifest', display_id, 'mss', fatal=False))
if 'm3u8' in format_url:
# m3u8_native should not be used here until
# https://github.com/rg3/youtube-dl/issues/9913 is fixed
format_url, display_id, 'mp4',
m3u8_id='hls', fatal=False)
formats.extend(m3u8_formats)
+ elif determine_ext(format_url) == 'ism':
+ formats.extend(self._extract_ism_formats(
+ format_url + '/Manifest', display_id, 'mss', fatal=False))
else:
formats.append({
'url': format_url,
thumb_node = itemdoc.find(search_path)
if thumb_node is None:
return None
- else:
- return thumb_node.attrib['url']
+ return thumb_node.get('url') or thumb_node.text or None
def _extract_mobile_video_formats(self, mtvn_id):
webpage_url = self._MOBILE_TEMPLATE % mtvn_id
hls_url = rendition.find('./src').text
formats.extend(self._extract_m3u8_formats(
hls_url, video_id, ext='mp4', entry_protocol='m3u8_native',
- m3u8_id='hls'))
+ m3u8_id='hls', fatal=False))
else:
# fms
try:
}])
except (KeyError, TypeError):
raise ExtractorError('Invalid rendition field.')
- self._sort_formats(formats)
+ if formats:
+ self._sort_formats(formats)
return formats
def _extract_subtitles(self, mdoc, mtvn_id):
mediagen_url += 'acceptMethods='
mediagen_url += 'hls' if use_hls else 'fms'
- mediagen_doc = self._download_xml(mediagen_url, video_id,
- 'Downloading video urls')
+ mediagen_doc = self._download_xml(
+ mediagen_url, video_id, 'Downloading video urls', fatal=False)
+
+ if mediagen_doc is False:
+ return None
item = mediagen_doc.find('./video/item')
if item is not None and item.get('type') == 'text':
formats = self._extract_video_formats(mediagen_doc, mtvn_id, video_id)
+ # Some parts of complete video may be missing (e.g. missing Act 3 in
+ # http://www.southpark.de/alle-episoden/s14e01-sexual-healing)
+ if not formats:
+ return None
+
+ self._sort_formats(formats)
+
return {
'title': title,
'formats': formats,
title = xpath_text(idoc, './channel/title')
description = xpath_text(idoc, './channel/description')
+ entries = []
+ for item in idoc.findall('.//item'):
+ info = self._get_video_info(item, use_hls)
+ if info:
+ entries.append(info)
+
return self.playlist_result(
- [self._get_video_info(item, use_hls) for item in idoc.findall('.//item')],
- playlist_title=title, playlist_description=description)
+ entries, playlist_title=title, playlist_description=description)
def _extract_triforce_mgid(self, webpage, data_zone=None, video_id=None):
triforce_feed = self._parse_json(self._search_regex(
_VALID_URL = r'https?://vplayer\.nbcsports\.com/(?:[^/]+/)+(?P<id>[0-9a-zA-Z_]+)'
_TESTS = [{
- 'url': 'https://vplayer.nbcsports.com/p/BxmELC/nbcsports_share/select/9CsDKds0kvHI',
+ 'url': 'https://vplayer.nbcsports.com/p/BxmELC/nbcsports_embed/select/9CsDKds0kvHI',
'info_dict': {
'id': '9CsDKds0kvHI',
- 'ext': 'flv',
+ 'ext': 'mp4',
'description': 'md5:df390f70a9ba7c95ff1daace988f0d8d',
'title': 'Tyler Kalinoski hits buzzer-beater to lift Davidson',
'timestamp': 1426270238,
'uploader': 'NBCU-SPORTS',
}
}, {
- 'url': 'http://vplayer.nbcsports.com/p/BxmELC/nbc_embedshare/select/_hqLjQ95yx8Z',
+ 'url': 'https://vplayer.nbcsports.com/p/BxmELC/nbcsports_embed/select/media/_hqLjQ95yx8Z',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
- theplatform_url = self._og_search_video_url(webpage)
+ theplatform_url = self._og_search_video_url(webpage).replace(
+ 'vplayer.nbcsports.com', 'player.theplatform.com')
return self.url_result(theplatform_url, 'ThePlatform')
from __future__ import unicode_literals
+import re
+
from .common import InfoExtractor
+from ..utils import (
+ extract_attributes,
+ int_or_none,
+ parse_duration,
+ parse_filesize,
+ unified_timestamp,
+)
class NewgroundsIE(InfoExtractor):
'ext': 'mp3',
'title': 'B7 - BusMode',
'uploader': 'Burn7',
- }
+ 'timestamp': 1378878540,
+ 'upload_date': '20130911',
+ 'duration': 143,
+ },
}, {
'url': 'https://www.newgrounds.com/portal/view/673111',
'md5': '3394735822aab2478c31b1004fe5e5bc',
'ext': 'mp4',
'title': 'Dancin',
'uploader': 'Squirrelman82',
+ 'timestamp': 1460256780,
+ 'upload_date': '20160410',
+ },
+ }, {
+ # source format unavailable, additional mp4 formats
+ 'url': 'http://www.newgrounds.com/portal/view/689400',
+ 'info_dict': {
+ 'id': '689400',
+ 'ext': 'mp4',
+ 'title': 'ZTV News Episode 8',
+ 'uploader': 'BennettTheSage',
+ 'timestamp': 1487965140,
+ 'upload_date': '20170224',
+ },
+ 'params': {
+ 'skip_download': True,
},
}]
def _real_extract(self, url):
media_id = self._match_id(url)
+
webpage = self._download_webpage(url, media_id)
title = self._html_search_regex(
r'<title>([^>]+)</title>', webpage, 'title')
- uploader = self._html_search_regex(
- r'Author\s*<a[^>]+>([^<]+)', webpage, 'uploader', fatal=False)
+ media_url = self._parse_json(self._search_regex(
+ r'"url"\s*:\s*("[^"]+"),', webpage, ''), media_id)
+
+ formats = [{
+ 'url': media_url,
+ 'format_id': 'source',
+ 'quality': 1,
+ }]
+
+ max_resolution = int_or_none(self._search_regex(
+ r'max_resolution["\']\s*:\s*(\d+)', webpage, 'max resolution',
+ default=None))
+ if max_resolution:
+ url_base = media_url.rpartition('.')[0]
+ for resolution in (360, 720, 1080):
+ if resolution > max_resolution:
+ break
+ formats.append({
+ 'url': '%s.%dp.mp4' % (url_base, resolution),
+ 'format_id': '%dp' % resolution,
+ 'height': resolution,
+ })
+
+ self._check_formats(formats, media_id)
+ self._sort_formats(formats)
- music_url = self._parse_json(self._search_regex(
- r'"url":("[^"]+"),', webpage, ''), media_id)
+ uploader = self._search_regex(
+ r'(?:Author|Writer)\s*<a[^>]+>([^<]+)', webpage, 'uploader',
+ fatal=False)
+
+ timestamp = unified_timestamp(self._search_regex(
+ r'<dt>Uploaded</dt>\s*<dd>([^<]+)', webpage, 'timestamp',
+ default=None))
+ duration = parse_duration(self._search_regex(
+ r'<dd>Song\s*</dd><dd>.+?</dd><dd>([^<]+)', webpage, 'duration',
+ default=None))
+
+ filesize_approx = parse_filesize(self._html_search_regex(
+ r'<dd>Song\s*</dd><dd>(.+?)</dd>', webpage, 'filesize',
+ default=None))
+ if len(formats) == 1:
+ formats[0]['filesize_approx'] = filesize_approx
+
+ if '<dd>Song' in webpage:
+ formats[0]['vcodec'] = 'none'
return {
'id': media_id,
'title': title,
- 'url': music_url,
'uploader': uploader,
+ 'timestamp': timestamp,
+ 'duration': duration,
+ 'formats': formats,
}
+
+
+class NewgroundsPlaylistIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?newgrounds\.com/(?:collection|[^/]+/search/[^/]+)/(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://www.newgrounds.com/collection/cats',
+ 'info_dict': {
+ 'id': 'cats',
+ 'title': 'Cats',
+ },
+ 'playlist_mincount': 46,
+ }, {
+ 'url': 'http://www.newgrounds.com/portal/search/author/ZONE-SAMA',
+ 'info_dict': {
+ 'id': 'ZONE-SAMA',
+ 'title': 'Portal Search: ZONE-SAMA',
+ },
+ 'playlist_mincount': 47,
+ }, {
+ 'url': 'http://www.newgrounds.com/audio/search/title/cats',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, playlist_id)
+
+ title = self._search_regex(
+ r'<title>([^>]+)</title>', webpage, 'title', default=None)
+
+ # cut left menu
+ webpage = self._search_regex(
+ r'(?s)<div[^>]+\bclass=["\']column wide(.+)',
+ webpage, 'wide column', default=webpage)
+
+ entries = []
+ for a, path, media_id in re.findall(
+ r'(<a[^>]+\bhref=["\']/?((?:portal/view|audio/listen)/(\d+))[^>]+>)',
+ webpage):
+ a_class = extract_attributes(a).get('class')
+ if a_class not in ('item-portalsubmission', 'item-audiosubmission'):
+ continue
+ entries.append(
+ self.url_result(
+ 'https://www.newgrounds.com/%s' % path,
+ ie=NewgroundsIE.ie_key(), video_id=media_id))
+
+ return self.playlist_result(entries, playlist_id, title)
--- /dev/null
+# coding: utf-8
+from __future__ import unicode_literals
+
+import hashlib
+import random
+import re
+import time
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ parse_duration,
+ try_get,
+ urlencode_postdata,
+)
+
+
+class NexxIE(InfoExtractor):
+ _VALID_URL = r'https?://api\.nexx(?:\.cloud|cdn\.com)/v3/(?P<domain_id>\d+)/videos/byid/(?P<id>\d+)'
+ _TESTS = [{
+ # movie
+ 'url': 'https://api.nexx.cloud/v3/748/videos/byid/128907',
+ 'md5': '16746bfc28c42049492385c989b26c4a',
+ 'info_dict': {
+ 'id': '128907',
+ 'ext': 'mp4',
+ 'title': 'Stiftung Warentest',
+ 'alt_title': 'Wie ein Test ablƤuft',
+ 'description': 'md5:d1ddb1ef63de721132abd38639cc2fd2',
+ 'release_year': 2013,
+ 'creator': 'SPIEGEL TV',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 2509,
+ 'timestamp': 1384264416,
+ 'upload_date': '20131112',
+ },
+ 'params': {
+ 'format': 'bestvideo',
+ },
+ }, {
+ # episode
+ 'url': 'https://api.nexx.cloud/v3/741/videos/byid/247858',
+ 'info_dict': {
+ 'id': '247858',
+ 'ext': 'mp4',
+ 'title': 'Return of the Golden Child (OV)',
+ 'description': 'md5:5d969537509a92b733de21bae249dc63',
+ 'release_year': 2017,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 1397,
+ 'timestamp': 1495033267,
+ 'upload_date': '20170517',
+ 'episode_number': 2,
+ 'season_number': 2,
+ },
+ 'params': {
+ 'format': 'bestvideo',
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://api.nexxcdn.com/v3/748/videos/byid/128907',
+ 'only_matching': True,
+ }]
+
+ @staticmethod
+ def _extract_urls(webpage):
+ # Reference:
+ # 1. https://nx-s.akamaized.net/files/201510/44.pdf
+
+ entries = []
+
+ # JavaScript Integration
+ mobj = re.search(
+ r'<script\b[^>]+\bsrc=["\']https?://require\.nexx(?:\.cloud|cdn\.com)/(?P<id>\d+)',
+ webpage)
+ if mobj:
+ domain_id = mobj.group('id')
+ for video_id in re.findall(
+ r'(?is)onPLAYReady.+?_play\.init\s*\(.+?\s*,\s*["\']?(\d+)',
+ webpage):
+ entries.append(
+ 'https://api.nexx.cloud/v3/%s/videos/byid/%s'
+ % (domain_id, video_id))
+
+ # TODO: support more embed formats
+
+ return entries
+
+ @staticmethod
+ def _extract_url(webpage):
+ return NexxIE._extract_urls(webpage)[0]
+
+ def _handle_error(self, response):
+ status = int_or_none(try_get(
+ response, lambda x: x['metadata']['status']) or 200)
+ if 200 <= status < 300:
+ return
+ raise ExtractorError(
+ '%s said: %s' % (self.IE_NAME, response['metadata']['errorhint']),
+ expected=True)
+
+ def _call_api(self, domain_id, path, video_id, data=None, headers={}):
+ headers['Content-Type'] = 'application/x-www-form-urlencoded; charset=UTF-8'
+ result = self._download_json(
+ 'https://api.nexx.cloud/v3/%s/%s' % (domain_id, path), video_id,
+ 'Downloading %s JSON' % path, data=urlencode_postdata(data),
+ headers=headers)
+ self._handle_error(result)
+ return result['result']
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ domain_id, video_id = mobj.group('domain_id', 'id')
+
+ # Reverse engineered from JS code (see getDeviceID function)
+ device_id = '%d:%d:%d%d' % (
+ random.randint(1, 4), int(time.time()),
+ random.randint(1e4, 99999), random.randint(1, 9))
+
+ result = self._call_api(domain_id, 'session/init', video_id, data={
+ 'nxp_devh': device_id,
+ 'nxp_userh': '',
+ 'precid': '0',
+ 'playlicense': '0',
+ 'screenx': '1920',
+ 'screeny': '1080',
+ 'playerversion': '6.0.00',
+ 'gateway': 'html5',
+ 'adGateway': '',
+ 'explicitlanguage': 'en-US',
+ 'addTextTemplates': '1',
+ 'addDomainData': '1',
+ 'addAdModel': '1',
+ }, headers={
+ 'X-Request-Enable-Auth-Fallback': '1',
+ })
+
+ cid = result['general']['cid']
+
+ # As described in [1] X-Request-Token generation algorithm is
+ # as follows:
+ # md5( operation + domain_id + domain_secret )
+ # where domain_secret is a static value that will be given by nexx.tv
+ # as per [1]. Here is how this "secret" is generated (reversed
+ # from _play.api.init function, search for clienttoken). So it's
+ # actually not static and not that much of a secret.
+ # 1. https://nexxtvstorage.blob.core.windows.net/files/201610/27.pdf
+ secret = result['device']['clienttoken'][int(device_id[0]):]
+ secret = secret[0:len(secret) - int(device_id[-1])]
+
+ op = 'byid'
+
+ # Reversed from JS code for _play.api.call function (search for
+ # X-Request-Token)
+ request_token = hashlib.md5(
+ ''.join((op, domain_id, secret)).encode('utf-8')).hexdigest()
+
+ video = self._call_api(
+ domain_id, 'videos/%s/%s' % (op, video_id), video_id, data={
+ 'additionalfields': 'language,channel,actors,studio,licenseby,slug,subtitle,teaser,description',
+ 'addInteractionOptions': '1',
+ 'addStatusDetails': '1',
+ 'addStreamDetails': '1',
+ 'addCaptions': '1',
+ 'addScenes': '1',
+ 'addHotSpots': '1',
+ 'addBumpers': '1',
+ 'captionFormat': 'data',
+ }, headers={
+ 'X-Request-CID': cid,
+ 'X-Request-Token': request_token,
+ })
+
+ general = video['general']
+ title = general['title']
+
+ stream_data = video['streamdata']
+ language = general.get('language_raw') or ''
+
+ # TODO: reverse more cdns and formats
+
+ cdn = stream_data['cdnType']
+ assert cdn == 'azure'
+
+ azure_locator = stream_data['azureLocator']
+
+ AZURE_URL = 'http://nx-p%02d.akamaized.net/'
+
+ for secure in ('s', ''):
+ cdn_shield = stream_data.get('cdnShieldHTTP%s' % secure.upper())
+ if cdn_shield:
+ azure_base = 'http%s://%s' % (secure, cdn_shield)
+ break
+ else:
+ azure_base = AZURE_URL % int(stream_data['azureAccount'].replace('nexxplayplus', ''))
+
+ is_ml = ',' in language
+ azure_m3u8_url = '%s%s/%s_src%s.ism/Manifest(format=m3u8-aapl)' % (
+ azure_base, azure_locator, video_id, ('_manifest' if is_ml else ''))
+
+ protection_token = try_get(
+ video, lambda x: x['protectiondata']['token'], compat_str)
+ if protection_token:
+ azure_m3u8_url += '?hdnts=%s' % protection_token
+
+ formats = self._extract_m3u8_formats(
+ azure_m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='%s-hls' % cdn)
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'alt_title': general.get('subtitle'),
+ 'description': general.get('description'),
+ 'release_year': int_or_none(general.get('year')),
+ 'creator': general.get('studio') or general.get('studio_adref'),
+ 'thumbnail': try_get(
+ video, lambda x: x['imagedata']['thumb'], compat_str),
+ 'duration': parse_duration(general.get('runtime')),
+ 'timestamp': int_or_none(general.get('uploaded')),
+ 'episode_number': int_or_none(try_get(
+ video, lambda x: x['episodedata']['episode'])),
+ 'season_number': int_or_none(try_get(
+ video, lambda x: x['episodedata']['season'])),
+ 'formats': formats,
+ }
+
+
+class NexxEmbedIE(InfoExtractor):
+ _VALID_URL = r'https?://embed\.nexx(?:\.cloud|cdn\.com)/\d+/(?P<id>[^/?#&]+)'
+ _TEST = {
+ 'url': 'http://embed.nexx.cloud/748/KC1614647Z27Y7T?autoplay=1',
+ 'md5': '16746bfc28c42049492385c989b26c4a',
+ 'info_dict': {
+ 'id': '161464',
+ 'ext': 'mp4',
+ 'title': 'Nervenkitzel Achterbahn',
+ 'alt_title': 'Karussellbauer in Deutschland',
+ 'description': 'md5:ffe7b1cc59a01f585e0569949aef73cc',
+ 'release_year': 2005,
+ 'creator': 'SPIEGEL TV',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 2761,
+ 'timestamp': 1394021479,
+ 'upload_date': '20140305',
+ },
+ 'params': {
+ 'format': 'bestvideo',
+ 'skip_download': True,
+ },
+ }
+
+ @staticmethod
+ def _extract_urls(webpage):
+ # Reference:
+ # 1. https://nx-s.akamaized.net/files/201510/44.pdf
+
+ # iFrame Embed Integration
+ return [mobj.group('url') for mobj in re.finditer(
+ r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//embed\.nexx(?:\.cloud|cdn\.com)/\d+/(?:(?!\1).)+)\1',
+ webpage)]
+
+ def _real_extract(self, url):
+ embed_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, embed_id)
+
+ return self.url_result(NexxIE._extract_url(webpage), ie=NexxIE.ie_key())
IE_NAME = 'nick.com'
_VALID_URL = r'https?://(?:(?:www|beta)\.)?nick(?:jr)?\.com/(?:[^/]+/)?(?:videos/clip|[^/]+/videos)/(?P<id>[^/?#.]+)'
_FEED_URL = 'http://udat.mtvnservices.com/service1/dispatch.htm'
+ _GEO_COUNTRIES = ['US']
_TESTS = [{
'url': 'http://www.nick.com/videos/clip/alvinnn-and-the-chipmunks-112-full-episode.html',
'playlist': [
class NickDeIE(MTVServicesInfoExtractor):
IE_NAME = 'nick.de'
- _VALID_URL = r'https?://(?:www\.)?(?P<host>nick\.de|nickelodeon\.(?:nl|at))/(?:playlist|shows)/(?:[^/]+/)*(?P<id>[^/?#&]+)'
+ _VALID_URL = r'https?://(?:www\.)?(?P<host>nick\.(?:de|com\.pl)|nickelodeon\.(?:nl|at))/[^/]+/(?:[^/]+/)*(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'http://www.nick.de/playlist/3773-top-videos/videos/episode/17306-zu-wasser-und-zu-land-rauchende-erdnusse',
'only_matching': True,
}, {
'url': 'http://www.nickelodeon.at/playlist/3773-top-videos/videos/episode/77993-das-letzte-gefecht',
'only_matching': True,
+ }, {
+ 'url': 'http://www.nick.com.pl/seriale/474-spongebob-kanciastoporty/wideo/17412-teatr-to-jest-to-rodeo-oszolom',
+ 'only_matching': True,
}]
def _extract_mrss_url(self, webpage, host):
return self._search_regex(
r'mrss\s*:\s*(["\'])(?P<url>http.+?)\1', webpage,
'mrss url', group='url')
+
+
+class NickRuIE(MTVServicesInfoExtractor):
+ IE_NAME = 'nickelodeonru'
+ _VALID_URL = r'https?://(?:www\.)nickelodeon\.ru/(?:playlist|shows|videos)/(?:[^/]+/)*(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'http://www.nickelodeon.ru/shows/henrydanger/videos/episodes/3-sezon-15-seriya-licenziya-na-polyot/pmomfb#playlist/7airc6',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.nickelodeon.ru/videos/smotri-na-nickelodeon-v-iyule/g9hvh7',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ mgid = self._extract_mgid(webpage)
+ return self.url_result('http://media.mtvnservices.com/embed/%s' % mgid)
# coding: utf-8
from __future__ import unicode_literals
-import re
import json
import datetime
from .common import InfoExtractor
from ..compat import (
+ compat_parse_qs,
compat_urlparse,
)
from ..utils import (
+ determine_ext,
+ dict_get,
ExtractorError,
int_or_none,
+ float_or_none,
parse_duration,
parse_iso8601,
- sanitized_Request,
- xpath_text,
- determine_ext,
+ remove_start,
+ try_get,
+ unified_timestamp,
urlencode_postdata,
+ xpath_text,
)
'id': 'sm22312215',
'ext': 'mp4',
'title': 'Big Buck Bunny',
+ 'thumbnail': r're:https?://.*',
'uploader': 'takuya0301',
'uploader_id': '2698420',
'upload_date': '20131123',
'timestamp': 1385182762,
'description': '(c) copyright 2008, Blender Foundation / www.bigbuckbunny.org',
'duration': 33,
+ 'view_count': int,
+ 'comment_count': int,
},
'skip': 'Requires an account',
}, {
'ext': 'swf',
'title': 'ćé”é³ćŖć³ćDance on mediaććŖćŖćøćć«ćtake2!',
'description': 'md5:689f066d74610b3b22e0f1739add0f58',
+ 'thumbnail': r're:https?://.*',
'uploader': 'ćććć',
'uploader_id': '18822557',
'upload_date': '20110429',
'ext': 'unknown_video',
'description': 'deleted',
'title': 'ćć©ććććØćæć¼ćć«ē¬¬3話ćę±ŗę¦ē¬¬3ę°ę±äŗ¬åøćļ¼åē·Øļ¼',
+ 'thumbnail': r're:https?://.*',
'upload_date': '20071224',
'timestamp': int, # timestamp field has different value if logged in
'duration': 304,
+ 'view_count': int,
},
'skip': 'Requires an account',
}, {
'ext': 'mp4',
'title': 'ćē¬¬1åćRADIOć¢ćć”ććććÆć¹ ć©ćć©ć¤ćļ¼ļ½ć®ćććRadio Gardenļ½',
'description': 'md5:b27d224bb0ff53d3c8269e9f8b561cf1',
+ 'thumbnail': r're:https?://.*',
'timestamp': 1388851200,
'upload_date': '20140104',
'uploader': 'ć¢ćć”ććć£ć³ćć«',
'uploader_id': '312',
},
'skip': 'The viewing period of the video you were searching for has expired.',
+ }, {
+ # video not available via `getflv`; "old" HTML5 video
+ 'url': 'http://www.nicovideo.jp/watch/sm1151009',
+ 'md5': '8fa81c364eb619d4085354eab075598a',
+ 'info_dict': {
+ 'id': 'sm1151009',
+ 'ext': 'mp4',
+ 'title': 'ćć¹ćæć¼ć·ć¹ćć ę¬ä½å
čµć®ć¹ćććŖć®ć”ć¤ć³ćć¼ćļ¼ļ¼°ļ¼³ļ¼§ēļ¼',
+ 'description': 'md5:6ee077e0581ff5019773e2e714cdd0b7',
+ 'thumbnail': r're:https?://.*',
+ 'duration': 184,
+ 'timestamp': 1190868283,
+ 'upload_date': '20070927',
+ 'uploader': 'denden2',
+ 'uploader_id': '1392194',
+ 'view_count': int,
+ 'comment_count': int,
+ },
+ 'skip': 'Requires an account',
+ }, {
+ # "New" HTML5 video
+ 'url': 'http://www.nicovideo.jp/watch/sm31464864',
+ 'md5': '351647b4917660986dc0fa8864085135',
+ 'info_dict': {
+ 'id': 'sm31464864',
+ 'ext': 'mp4',
+ 'title': 'ę°ä½TVć¢ćć”ćę¦å§«ēµ¶å±ć·ć³ćć©ć®ć¢AXZćPV ęé«ē»č³Ŗ',
+ 'description': 'md5:e52974af9a96e739196b2c1ca72b5feb',
+ 'timestamp': 1498514060,
+ 'upload_date': '20170626',
+ 'uploader': 'ć²ć¹',
+ 'uploader_id': '40826363',
+ 'thumbnail': r're:https?://.*',
+ 'duration': 198,
+ 'view_count': int,
+ 'comment_count': int,
+ },
+ 'skip': 'Requires an account',
+ }, {
+ 'url': 'http://sp.nicovideo.jp/watch/sm28964488?ss_pos=1&cp_in=wt_tg',
+ 'only_matching': True,
}]
- _VALID_URL = r'https?://(?:www\.|secure\.)?nicovideo\.jp/watch/(?P<id>(?:[a-z]{2})?[0-9]+)'
+ _VALID_URL = r'https?://(?:www\.|secure\.|sp\.)?nicovideo\.jp/watch/(?P<id>(?:[a-z]{2})?[0-9]+)'
_NETRC_MACHINE = 'niconico'
def _real_initialize(self):
return True
# Log in
+ login_ok = True
login_form_strs = {
- 'mail': username,
+ 'mail_tel': username,
'password': password,
}
- login_data = urlencode_postdata(login_form_strs)
- request = sanitized_Request(
- 'https://secure.nicovideo.jp/secure/login', login_data)
- login_results = self._download_webpage(
- request, None, note='Logging in', errnote='Unable to log in')
- if re.search(r'(?i)<h1 class="mb8p4">Log in error</h1>', login_results) is not None:
+ urlh = self._request_webpage(
+ 'https://account.nicovideo.jp/api/v1/login', None,
+ note='Logging in', errnote='Unable to log in',
+ data=urlencode_postdata(login_form_strs))
+ if urlh is False:
+ login_ok = False
+ else:
+ parts = compat_urlparse.urlparse(urlh.geturl())
+ if compat_parse_qs(parts.query).get('message', [None])[0] == 'cant_login':
+ login_ok = False
+ if not login_ok:
self._downloader.report_warning('unable to log in: bad username or password')
- return False
- return True
+ return login_ok
+
+ def _extract_format_for_quality(self, api_data, video_id, audio_quality, video_quality):
+ def yesno(boolean):
+ return 'yes' if boolean else 'no'
+
+ session_api_data = api_data['video']['dmcInfo']['session_api']
+ session_api_endpoint = session_api_data['urls'][0]
+
+ format_id = '-'.join(map(lambda s: remove_start(s['id'], 'archive_'), [video_quality, audio_quality]))
+
+ session_response = self._download_json(
+ session_api_endpoint['url'], video_id,
+ query={'_format': 'json'},
+ headers={'Content-Type': 'application/json'},
+ note='Downloading JSON metadata for %s' % format_id,
+ data=json.dumps({
+ 'session': {
+ 'client_info': {
+ 'player_id': session_api_data['player_id'],
+ },
+ 'content_auth': {
+ 'auth_type': session_api_data['auth_types'][session_api_data['protocols'][0]],
+ 'content_key_timeout': session_api_data['content_key_timeout'],
+ 'service_id': 'nicovideo',
+ 'service_user_id': session_api_data['service_user_id']
+ },
+ 'content_id': session_api_data['content_id'],
+ 'content_src_id_sets': [{
+ 'content_src_ids': [{
+ 'src_id_to_mux': {
+ 'audio_src_ids': [audio_quality['id']],
+ 'video_src_ids': [video_quality['id']],
+ }
+ }]
+ }],
+ 'content_type': 'movie',
+ 'content_uri': '',
+ 'keep_method': {
+ 'heartbeat': {
+ 'lifetime': session_api_data['heartbeat_lifetime']
+ }
+ },
+ 'priority': session_api_data['priority'],
+ 'protocol': {
+ 'name': 'http',
+ 'parameters': {
+ 'http_parameters': {
+ 'parameters': {
+ 'http_output_download_parameters': {
+ 'use_ssl': yesno(session_api_endpoint['is_ssl']),
+ 'use_well_known_port': yesno(session_api_endpoint['is_well_known_port']),
+ }
+ }
+ }
+ }
+ },
+ 'recipe_id': session_api_data['recipe_id'],
+ 'session_operation_auth': {
+ 'session_operation_auth_by_signature': {
+ 'signature': session_api_data['signature'],
+ 'token': session_api_data['token'],
+ }
+ },
+ 'timing_constraint': 'unlimited'
+ }
+ }))
+
+ resolution = video_quality.get('resolution', {})
+
+ return {
+ 'url': session_response['data']['session']['content_uri'],
+ 'format_id': format_id,
+ 'ext': 'mp4', # Session API are used in HTML5, which always serves mp4
+ 'abr': float_or_none(audio_quality.get('bitrate'), 1000),
+ 'vbr': float_or_none(video_quality.get('bitrate'), 1000),
+ 'height': resolution.get('height'),
+ 'width': resolution.get('width'),
+ }
def _real_extract(self, url):
video_id = self._match_id(url)
if video_id.startswith('so'):
video_id = self._match_id(handle.geturl())
- video_info = self._download_xml(
- 'http://ext.nicovideo.jp/api/getthumbinfo/' + video_id, video_id,
- note='Downloading video info page')
-
- # Get flv info
- flv_info_webpage = self._download_webpage(
- 'http://flapi.nicovideo.jp/api/getflv/' + video_id + '?as3=1',
- video_id, 'Downloading flv info')
-
- flv_info = compat_urlparse.parse_qs(flv_info_webpage)
- if 'url' not in flv_info:
- if 'deleted' in flv_info:
- raise ExtractorError('The video has been deleted.',
- expected=True)
- elif 'closed' in flv_info:
- raise ExtractorError('Niconico videos now require logging in',
- expected=True)
- else:
- raise ExtractorError('Unable to find video URL')
-
- video_real_url = flv_info['url'][0]
+ api_data = self._parse_json(self._html_search_regex(
+ 'data-api-data="([^"]+)"', webpage,
+ 'API data', default='{}'), video_id)
+
+ def _format_id_from_url(video_url):
+ return 'economy' if video_real_url.endswith('low') else 'normal'
+
+ try:
+ video_real_url = api_data['video']['smileInfo']['url']
+ except KeyError: # Flash videos
+ # Get flv info
+ flv_info_webpage = self._download_webpage(
+ 'http://flapi.nicovideo.jp/api/getflv/' + video_id + '?as3=1',
+ video_id, 'Downloading flv info')
+
+ flv_info = compat_urlparse.parse_qs(flv_info_webpage)
+ if 'url' not in flv_info:
+ if 'deleted' in flv_info:
+ raise ExtractorError('The video has been deleted.',
+ expected=True)
+ elif 'closed' in flv_info:
+ raise ExtractorError('Niconico videos now require logging in',
+ expected=True)
+ elif 'error' in flv_info:
+ raise ExtractorError('%s reports error: %s' % (
+ self.IE_NAME, flv_info['error'][0]), expected=True)
+ else:
+ raise ExtractorError('Unable to find video URL')
+
+ video_info_xml = self._download_xml(
+ 'http://ext.nicovideo.jp/api/getthumbinfo/' + video_id,
+ video_id, note='Downloading video info page')
+
+ def get_video_info(items):
+ if not isinstance(items, list):
+ items = [items]
+ for item in items:
+ ret = xpath_text(video_info_xml, './/' + item)
+ if ret:
+ return ret
+
+ video_real_url = flv_info['url'][0]
+
+ extension = get_video_info('movie_type')
+ if not extension:
+ extension = determine_ext(video_real_url)
+
+ formats = [{
+ 'url': video_real_url,
+ 'ext': extension,
+ 'format_id': _format_id_from_url(video_real_url),
+ }]
+ else:
+ formats = []
+
+ dmc_info = api_data['video'].get('dmcInfo')
+ if dmc_info: # "New" HTML5 videos
+ quality_info = dmc_info['quality']
+ for audio_quality in quality_info['audios']:
+ for video_quality in quality_info['videos']:
+ if not audio_quality['available'] or not video_quality['available']:
+ continue
+ formats.append(self._extract_format_for_quality(
+ api_data, video_id, audio_quality, video_quality))
+
+ self._sort_formats(formats)
+ else: # "Old" HTML5 videos
+ formats = [{
+ 'url': video_real_url,
+ 'ext': 'mp4',
+ 'format_id': _format_id_from_url(video_real_url),
+ }]
+
+ def get_video_info(items):
+ return dict_get(api_data['video'], items)
# Start extracting information
- title = xpath_text(video_info, './/title')
+ title = get_video_info('title')
if not title:
title = self._og_search_title(webpage, default=None)
if not title:
watch_api_data = self._parse_json(watch_api_data_string, video_id) if watch_api_data_string else {}
video_detail = watch_api_data.get('videoDetail', {})
- extension = xpath_text(video_info, './/movie_type')
- if not extension:
- extension = determine_ext(video_real_url)
-
thumbnail = (
- xpath_text(video_info, './/thumbnail_url') or
+ get_video_info(['thumbnail_url', 'thumbnailURL']) or
self._html_search_meta('image', webpage, 'thumbnail', default=None) or
video_detail.get('thumbnail'))
- description = xpath_text(video_info, './/description')
+ description = get_video_info('description')
- timestamp = parse_iso8601(xpath_text(video_info, './/first_retrieve'))
+ timestamp = (parse_iso8601(get_video_info('first_retrieve')) or
+ unified_timestamp(get_video_info('postedDateTime')))
if not timestamp:
match = self._html_search_meta('datePublished', webpage, 'date published', default=None)
if match:
video_detail['postedAt'].replace('/', '-'),
delimiter=' ', timezone=datetime.timedelta(hours=9))
- view_count = int_or_none(xpath_text(video_info, './/view_counter'))
+ view_count = int_or_none(get_video_info(['view_counter', 'viewCount']))
if not view_count:
match = self._html_search_regex(
r'>Views: <strong[^>]*>([^<]+)</strong>',
view_count = int_or_none(match.replace(',', ''))
view_count = view_count or video_detail.get('viewCount')
- comment_count = int_or_none(xpath_text(video_info, './/comment_num'))
+ comment_count = (int_or_none(get_video_info('comment_num')) or
+ video_detail.get('commentCount') or
+ try_get(api_data, lambda x: x['thread']['commentCount']))
if not comment_count:
match = self._html_search_regex(
r'>Comments: <strong[^>]*>([^<]+)</strong>',
webpage, 'comment count', default=None)
if match:
comment_count = int_or_none(match.replace(',', ''))
- comment_count = comment_count or video_detail.get('commentCount')
duration = (parse_duration(
- xpath_text(video_info, './/length') or
+ get_video_info('length') or
self._html_search_meta(
'video:duration', webpage, 'video duration', default=None)) or
- video_detail.get('length'))
+ video_detail.get('length') or
+ get_video_info('duration'))
- webpage_url = xpath_text(video_info, './/watch_url') or url
+ webpage_url = get_video_info('watch_url') or url
- if video_info.find('.//ch_id') is not None:
- uploader_id = video_info.find('.//ch_id').text
- uploader = video_info.find('.//ch_name').text
- elif video_info.find('.//user_id') is not None:
- uploader_id = video_info.find('.//user_id').text
- uploader = video_info.find('.//user_nickname').text
- else:
- uploader_id = uploader = None
+ owner = api_data.get('owner', {})
+ uploader_id = get_video_info(['ch_id', 'user_id']) or owner.get('id')
+ uploader = get_video_info(['ch_name', 'user_nickname']) or owner.get('nickname')
return {
'id': video_id,
- 'url': video_real_url,
'title': title,
- 'ext': extension,
- 'format_id': 'economy' if video_real_url.endswith('low') else 'normal',
+ 'formats': formats,
'thumbnail': thumbnail,
'description': description,
'uploader': uploader,
from .common import InfoExtractor
from ..compat import compat_urlparse
from ..utils import (
+ extract_attributes,
get_element_by_class,
urlencode_postdata,
)
webpage = self._download_webpage(url, video_id)
formats = []
- for player_url, kind in re.findall(r'<a[^>]+href="(/player[^"]+)".+?<img[^>]+src="[^"]+qf_btn_([^".]+)', webpage):
- player_url = compat_urlparse.urljoin(url, player_url)
-
+ for mobj in re.finditer(r'<a[^>]+\bhref=(["\'])/player.+?[^>]*>', webpage):
+ player = extract_attributes(mobj.group(0))
+ player_path = player.get('href')
+ if not player_path:
+ continue
+ kind = self._search_regex(
+ r'(low|high)$', player.get('class') or '', 'kind',
+ default='low')
+ player_url = compat_urlparse.urljoin(url, player_path)
player_page = self._download_webpage(
player_url, video_id, note='Downloading player page')
-
entries = self._parse_html5_media_entries(
player_url, player_page, video_id, m3u8_id='hls-%s' % kind,
- m3u8_entry_protocol='m3u8_native',
- preference=2 if 'hq' in kind else 1)
- formats.extend(entries[0]['formats'])
+ m3u8_entry_protocol='m3u8_native')
+ kind_formats = entries[0]['formats']
+ for f in kind_formats:
+ f['quality'] = 2 if kind == 'high' else 1
+ formats.extend(kind_formats)
self._sort_formats(formats)
from ..compat import compat_str
from ..utils import (
int_or_none,
+ js_to_json,
smuggle_url,
try_get,
)
'timestamp': 1491399228,
'upload_date': '20170405',
'uploader_id': '618566855001',
- 'creator': 'vtele',
- 'view_count': int,
'series': 'RPM+',
},
'params': {
'info_dict': {
'id': '5395865725001',
'title': 'Ćpisode 13 : Les retrouvailles',
- 'description': 'md5:336d5ebc5436534e61d16e63ddfca327',
+ 'description': 'md5:888c3330f0c1b4476c5bc99a1c040473',
'ext': 'mp4',
'timestamp': 1492019320,
'upload_date': '20170412',
'uploader_id': '618566855001',
- 'creator': 'vtele',
- 'view_count': int,
'series': "L'amour est dans le prƩ",
'season_number': 5,
'episode': 'Ćpisode 13',
def _real_extract(self, url):
video_id = self._match_id(url)
- data = self._download_json(
- 'http://api.noovo.ca/api/v1/pages/single-episode/%s' % video_id,
- video_id)['data']
+ webpage = self._download_webpage(url, video_id)
- content = try_get(data, lambda x: x['contents'][0])
+ bc_url = BrightcoveNewIE._extract_url(self, webpage)
- brightcove_id = data.get('brightcoveId') or content['brightcoveId']
+ data = self._parse_json(
+ self._search_regex(
+ r'(?s)dataLayer\.push\(\s*({.+?})\s*\);', webpage, 'data',
+ default='{}'),
+ video_id, transform_source=js_to_json, fatal=False)
+
+ title = try_get(
+ data, lambda x: x['video']['nom'],
+ compat_str) or self._html_search_meta(
+ 'dcterms.Title', webpage, 'title', fatal=True)
+
+ description = self._html_search_meta(
+ ('dcterms.Description', 'description'), webpage, 'description')
series = try_get(
- data, (
- lambda x: x['show']['title'],
- lambda x: x['season']['show']['title']),
- compat_str)
+ data, lambda x: x['emission']['nom']) or self._search_regex(
+ r'<div[^>]+class="banner-card__subtitle h4"[^>]*>([^<]+)',
+ webpage, 'series', default=None)
- episode = None
- og = data.get('og')
- if isinstance(og, dict) and og.get('type') == 'video.episode':
- episode = og.get('title')
+ season_el = try_get(data, lambda x: x['emission']['saison'], dict) or {}
+ season = try_get(season_el, lambda x: x['nom'], compat_str)
+ season_number = int_or_none(try_get(season_el, lambda x: x['numero']))
- video = content or data
+ episode_el = try_get(season_el, lambda x: x['episode'], dict) or {}
+ episode = try_get(episode_el, lambda x: x['nom'], compat_str)
+ episode_number = int_or_none(try_get(episode_el, lambda x: x['numero']))
return {
'_type': 'url_transparent',
'ie_key': BrightcoveNewIE.ie_key(),
- 'url': smuggle_url(
- self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id,
- {'geo_countries': ['CA']}),
- 'id': brightcove_id,
- 'title': video.get('title'),
- 'creator': video.get('source'),
- 'view_count': int_or_none(video.get('viewsCount')),
+ 'url': smuggle_url(bc_url, {'geo_countries': ['CA']}),
+ 'title': title,
+ 'description': description,
'series': series,
- 'season_number': int_or_none(try_get(
- data, lambda x: x['season']['seasonNumber'])),
+ 'season': season,
+ 'season_number': season_number,
'episode': episode,
- 'episode_number': int_or_none(data.get('episodeNumber')),
+ 'episode_number': episode_number,
}
class NPOIE(NPOBaseIE):
IE_NAME = 'npo'
- IE_DESC = 'npo.nl and ntr.nl'
+ IE_DESC = 'npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl'
_VALID_URL = r'''(?x)
(?:
npo:|
https?://
(?:www\.)?
(?:
- npo\.nl/(?!live|radio)(?:[^/]+/){2}|
+ npo\.nl/(?!(?:live|radio)/)(?:[^/]+/){2}|
ntr\.nl/(?:[^/]+/){2,}|
omroepwnl\.nl/video/fragment/[^/]+__|
- zapp\.nl/[^/]+/[^/]+/
+ (?:zapp|npo3)\.nl/(?:[^/]+/){2}
)
)
(?P<id>[^/?#]+)
}, {
'url': 'http://www.zapp.nl/beste-vrienden-quiz/extra-video-s/WO_NTR_1067990',
'only_matching': True,
+ }, {
+ 'url': 'https://www.npo3.nl/3onderzoekt/16-09-2015/VPWON_1239870',
+ 'only_matching': True,
}, {
# live stream
'url': 'npo:LI_NL1_4188102',
'only_matching': True,
+ }, {
+ 'url': 'http://www.npo.nl/radio-gaga/13-06-2017/BNN_101383373',
+ 'only_matching': True,
}]
def _real_extract(self, url):
webpage = self._download_webpage(url, display_id)
live_id = self._search_regex(
- r'data-prid="([^"]+)"', webpage, 'live id')
+ [r'media-id="([^"]+)"', r'data-prid="([^"]+)"'], webpage, 'live id')
return {
'_type': 'url_transparent',
(?:/\d{2}-\d{2}-\d{4})?
(?:\#del=(?P<part_id>\d+))?
''' % _EPISODE_RE
- _API_HOST = 'psapi-we.nrk.no'
+ _API_HOST = 'psapi-ne.nrk.no'
_TESTS = [{
'url': 'https://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014',
get_element_by_class,
int_or_none,
js_to_json,
+ NO_DEFAULT,
parse_iso8601,
remove_start,
strip_or_none,
'upload_date': '20170214',
'timestamp': 1487078046,
},
+ }, {
+ # embedded via pulsembed
+ 'url': 'http://film.onet.pl/pensjonat-nad-rozlewiskiem-relacja-z-planu-serialu/y428n0',
+ 'info_dict': {
+ 'id': '501235.965429946',
+ 'ext': 'mp4',
+ 'title': '"Pensjonat nad rozlewiskiem": relacja z planu serialu',
+ 'upload_date': '20170622',
+ 'timestamp': 1498159955,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
}, {
'url': 'http://film.onet.pl/zwiastuny/ghost-in-the-shell-drugi-zwiastun-pl/5q6yl3',
'only_matching': True,
'only_matching': True,
}]
+ def _search_mvp_id(self, webpage, default=NO_DEFAULT):
+ return self._search_regex(
+ r'data-(?:params-)?mvp=["\'](\d+\.\d+)', webpage, 'mvp id',
+ default=default)
+
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
- mvp_id = self._search_regex(
- r'data-params-mvp=["\'](\d+\.\d+)', webpage, 'mvp id')
+ mvp_id = self._search_mvp_id(webpage, default=None)
+
+ if not mvp_id:
+ pulsembed_url = self._search_regex(
+ r'data-src=(["\'])(?P<url>(?:https?:)?//pulsembed\.eu/.+?)\1',
+ webpage, 'pulsembed url', group='url')
+ webpage = self._download_webpage(
+ pulsembed_url, video_id, 'Downloading pulsembed webpage')
+ mvp_id = self._search_mvp_id(webpage)
return self.url_result(
'onetmvp:%s' % mvp_id, OnetMVPIE.ie_key(), video_id=mvp_id)
import base64
from .common import InfoExtractor
+from ..compat import compat_str
from ..utils import (
- int_or_none,
- float_or_none,
+ determine_ext,
ExtractorError,
+ float_or_none,
+ int_or_none,
+ try_get,
unsmuggle_url,
- determine_ext,
)
from ..compat import compat_urllib_parse_urlencode
formats = []
if cur_auth_data['authorized']:
for stream in cur_auth_data['streams']:
- s_url = base64.b64decode(
- stream['url']['data'].encode('ascii')).decode('utf-8')
- if s_url in urls:
+ url_data = try_get(stream, lambda x: x['url']['data'], compat_str)
+ if not url_data:
+ continue
+ s_url = base64.b64decode(url_data.encode('ascii')).decode('utf-8')
+ if not s_url or s_url in urls:
continue
urls.append(s_url)
ext = determine_ext(s_url, None)
- delivery_type = stream['delivery_type']
+ delivery_type = stream.get('delivery_type')
if delivery_type == 'hls' or ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
re.sub(r'/ip(?:ad|hone)/', '/all/', s_url), embed_code, 'mp4', 'm3u8_native',
else:
formats.append({
'url': s_url,
- 'ext': ext or stream.get('delivery_type'),
+ 'ext': ext or delivery_type,
'vcodec': stream.get('video_codec'),
'format_id': delivery_type,
'width': int_or_none(stream.get('width')),
'title': 'Divide Tool Path.mp4',
'duration': 204.405,
}
+ },
+ {
+ # empty stream['url']['data']
+ 'url': 'http://player.ooyala.com/player.js?embedCode=w2bnZtYjE6axZ_dw1Cd0hQtXd_ige2Is',
+ 'only_matching': True,
}
]
# coding: utf-8
from __future__ import unicode_literals
+import json
+import os
import re
+import subprocess
+import tempfile
from .common import InfoExtractor
-from ..compat import compat_chr
+from ..compat import (
+ compat_urlparse,
+ compat_kwargs,
+)
from ..utils import (
+ check_executable,
determine_ext,
+ encodeArgument,
ExtractorError,
+ get_element_by_id,
+ get_exe_version,
+ is_outdated_version,
+ std_headers,
)
+def cookie_to_dict(cookie):
+ cookie_dict = {
+ 'name': cookie.name,
+ 'value': cookie.value,
+ }
+ if cookie.port_specified:
+ cookie_dict['port'] = cookie.port
+ if cookie.domain_specified:
+ cookie_dict['domain'] = cookie.domain
+ if cookie.path_specified:
+ cookie_dict['path'] = cookie.path
+ if cookie.expires is not None:
+ cookie_dict['expires'] = cookie.expires
+ if cookie.secure is not None:
+ cookie_dict['secure'] = cookie.secure
+ if cookie.discard is not None:
+ cookie_dict['discard'] = cookie.discard
+ try:
+ if (cookie.has_nonstandard_attr('httpOnly') or
+ cookie.has_nonstandard_attr('httponly') or
+ cookie.has_nonstandard_attr('HttpOnly')):
+ cookie_dict['httponly'] = True
+ except TypeError:
+ pass
+ return cookie_dict
+
+
+def cookie_jar_to_list(cookie_jar):
+ return [cookie_to_dict(cookie) for cookie in cookie_jar]
+
+
+class PhantomJSwrapper(object):
+ """PhantomJS wrapper class
+
+ This class is experimental.
+ """
+
+ _TEMPLATE = r'''
+ phantom.onError = function(msg, trace) {{
+ var msgStack = ['PHANTOM ERROR: ' + msg];
+ if(trace && trace.length) {{
+ msgStack.push('TRACE:');
+ trace.forEach(function(t) {{
+ msgStack.push(' -> ' + (t.file || t.sourceURL) + ': ' + t.line
+ + (t.function ? ' (in function ' + t.function +')' : ''));
+ }});
+ }}
+ console.error(msgStack.join('\n'));
+ phantom.exit(1);
+ }};
+ var page = require('webpage').create();
+ var fs = require('fs');
+ var read = {{ mode: 'r', charset: 'utf-8' }};
+ var write = {{ mode: 'w', charset: 'utf-8' }};
+ JSON.parse(fs.read("{cookies}", read)).forEach(function(x) {{
+ phantom.addCookie(x);
+ }});
+ page.settings.resourceTimeout = {timeout};
+ page.settings.userAgent = "{ua}";
+ page.onLoadStarted = function() {{
+ page.evaluate(function() {{
+ delete window._phantom;
+ delete window.callPhantom;
+ }});
+ }};
+ var saveAndExit = function() {{
+ fs.write("{html}", page.content, write);
+ fs.write("{cookies}", JSON.stringify(phantom.cookies), write);
+ phantom.exit();
+ }};
+ page.onLoadFinished = function(status) {{
+ if(page.url === "") {{
+ page.setContent(fs.read("{html}", read), "{url}");
+ }}
+ else {{
+ {jscode}
+ }}
+ }};
+ page.open("");
+ '''
+
+ _TMP_FILE_NAMES = ['script', 'html', 'cookies']
+
+ @staticmethod
+ def _version():
+ return get_exe_version('phantomjs', version_re=r'([0-9.]+)')
+
+ def __init__(self, extractor, required_version=None, timeout=10000):
+ self.exe = check_executable('phantomjs', ['-v'])
+ if not self.exe:
+ raise ExtractorError('PhantomJS executable not found in PATH, '
+ 'download it from http://phantomjs.org',
+ expected=True)
+
+ self.extractor = extractor
+
+ if required_version:
+ version = self._version()
+ if is_outdated_version(version, required_version):
+ self.extractor._downloader.report_warning(
+ 'Your copy of PhantomJS is outdated, update it to version '
+ '%s or newer if you encounter any errors.' % required_version)
+
+ self.options = {
+ 'timeout': timeout,
+ }
+ self._TMP_FILES = {}
+ for name in self._TMP_FILE_NAMES:
+ tmp = tempfile.NamedTemporaryFile(delete=False)
+ tmp.close()
+ self._TMP_FILES[name] = tmp
+
+ def __del__(self):
+ for name in self._TMP_FILE_NAMES:
+ try:
+ os.remove(self._TMP_FILES[name].name)
+ except:
+ pass
+
+ def _save_cookies(self, url):
+ cookies = cookie_jar_to_list(self.extractor._downloader.cookiejar)
+ for cookie in cookies:
+ if 'path' not in cookie:
+ cookie['path'] = '/'
+ if 'domain' not in cookie:
+ cookie['domain'] = compat_urlparse.urlparse(url).netloc
+ with open(self._TMP_FILES['cookies'].name, 'wb') as f:
+ f.write(json.dumps(cookies).encode('utf-8'))
+
+ def _load_cookies(self):
+ with open(self._TMP_FILES['cookies'].name, 'rb') as f:
+ cookies = json.loads(f.read().decode('utf-8'))
+ for cookie in cookies:
+ if cookie['httponly'] is True:
+ cookie['rest'] = {'httpOnly': None}
+ if 'expiry' in cookie:
+ cookie['expire_time'] = cookie['expiry']
+ self.extractor._set_cookie(**compat_kwargs(cookie))
+
+ def get(self, url, html=None, video_id=None, note=None, note2='Executing JS on webpage', headers={}, jscode='saveAndExit();'):
+ """
+ Downloads webpage (if needed) and executes JS
+
+ Params:
+ url: website url
+ html: optional, html code of website
+ video_id: video id
+ note: optional, displayed when downloading webpage
+ note2: optional, displayed when executing JS
+ headers: custom http headers
+ jscode: code to be executed when page is loaded
+
+ Returns tuple with:
+ * downloaded website (after JS execution)
+ * anything you print with `console.log` (but not inside `page.execute`!)
+
+ In most cases you don't need to add any `jscode`.
+ It is executed in `page.onLoadFinished`.
+ `saveAndExit();` is mandatory, use it instead of `phantom.exit()`
+ It is possible to wait for some element on the webpage, for example:
+ var check = function() {
+ var elementFound = page.evaluate(function() {
+ return document.querySelector('#b.done') !== null;
+ });
+ if(elementFound)
+ saveAndExit();
+ else
+ window.setTimeout(check, 500);
+ }
+
+ page.evaluate(function(){
+ document.querySelector('#a').click();
+ });
+ check();
+ """
+ if 'saveAndExit();' not in jscode:
+ raise ExtractorError('`saveAndExit();` not found in `jscode`')
+ if not html:
+ html = self.extractor._download_webpage(url, video_id, note=note, headers=headers)
+ with open(self._TMP_FILES['html'].name, 'wb') as f:
+ f.write(html.encode('utf-8'))
+
+ self._save_cookies(url)
+
+ replaces = self.options
+ replaces['url'] = url
+ user_agent = headers.get('User-Agent') or std_headers['User-Agent']
+ replaces['ua'] = user_agent.replace('"', '\\"')
+ replaces['jscode'] = jscode
+
+ for x in self._TMP_FILE_NAMES:
+ replaces[x] = self._TMP_FILES[x].name.replace('\\', '\\\\').replace('"', '\\"')
+
+ with open(self._TMP_FILES['script'].name, 'wb') as f:
+ f.write(self._TEMPLATE.format(**replaces).encode('utf-8'))
+
+ if video_id is None:
+ self.extractor.to_screen('%s' % (note2,))
+ else:
+ self.extractor.to_screen('%s: %s' % (video_id, note2))
+
+ p = subprocess.Popen([
+ self.exe, '--ssl-protocol=any',
+ self._TMP_FILES['script'].name
+ ], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ out, err = p.communicate()
+ if p.returncode != 0:
+ raise ExtractorError(
+ 'Executing JS failed\n:' + encodeArgument(err))
+ with open(self._TMP_FILES['html'].name, 'rb') as f:
+ html = f.read().decode('utf-8')
+
+ self._load_cookies()
+
+ return (html, encodeArgument(out))
+
+
class OpenloadIE(InfoExtractor):
_VALID_URL = r'https?://(?:openload\.(?:co|io)|oload\.tv)/(?:f|embed)/(?P<id>[a-zA-Z0-9-_]+)'
'only_matching': True,
}]
+ _USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36'
+
@staticmethod
def _extract_urls(webpage):
return re.findall(
def _real_extract(self, url):
video_id = self._match_id(url)
- webpage = self._download_webpage('https://openload.co/embed/%s/' % video_id, video_id)
+ url = 'https://openload.co/embed/%s/' % video_id
+ headers = {
+ 'User-Agent': self._USER_AGENT,
+ }
+
+ webpage = self._download_webpage(url, video_id, headers=headers)
if 'File not found' in webpage or 'deleted by the owner' in webpage:
- raise ExtractorError('File not found', expected=True)
-
- ol_id = self._search_regex(
- '<span[^>]+id="[^"]+"[^>]*>([0-9A-Za-z]+)</span>',
- webpage, 'openload ID')
-
- decoded = ''
- a = ol_id[0:24]
- b = []
- for i in range(0, len(a), 8):
- b.append(int(a[i:i + 8] or '0', 16))
- ol_id = ol_id[24:]
- j = 0
- k = 0
- while j < len(ol_id):
- c = 128
- d = 0
- e = 0
- f = 0
- _more = True
- while _more:
- if j + 1 >= len(ol_id):
- c = 143
- f = int(ol_id[j:j + 2] or '0', 16)
- j += 2
- d += (f & 127) << e
- e += 7
- _more = f >= c
- g = d ^ b[k % 3]
- for i in range(4):
- char_dec = (g >> 8 * i) & (c + 127)
- char = compat_chr(char_dec)
- if char != '#':
- decoded += char
- k += 1
-
- video_url = 'https://openload.co/stream/%s?mime=true'
- video_url = video_url % decoded
+ raise ExtractorError('File not found', expected=True, video_id=video_id)
+
+ phantom = PhantomJSwrapper(self, required_version='2.0')
+ webpage, _ = phantom.get(url, html=webpage, video_id=video_id, headers=headers)
+
+ decoded_id = get_element_by_id('streamurl', webpage)
+
+ video_url = 'https://openload.co/stream/%s?mime=true' % decoded_id
title = self._og_search_title(webpage, default=None) or self._search_regex(
r'<span[^>]+class=["\']title["\'][^>]*>([^<]+)', webpage,
'description', webpage, 'title', fatal=True)
entries = self._parse_html5_media_entries(url, webpage, video_id)
- subtitles = entries[0]['subtitles'] if entries else None
+ entry = entries[0] if entries else {}
+ subtitles = entry.get('subtitles')
info_dict = {
'id': video_id,
'title': title,
- 'thumbnail': self._og_search_thumbnail(webpage, default=None),
+ 'thumbnail': entry.get('thumbnail') or self._og_search_thumbnail(webpage, default=None),
'url': video_url,
# Seems all videos have extensions in their titles
'ext': determine_ext(title, 'mp4'),
'subtitles': subtitles,
+ 'http_headers': headers,
}
return info_dict
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
+ determine_ext,
+ float_or_none,
HEADRequest,
- unified_strdate,
- strip_jsonp,
int_or_none,
- float_or_none,
- determine_ext,
+ orderedSet,
remove_end,
+ strip_jsonp,
unescapeHTML,
+ unified_strdate,
)
'upload_date': upload_date,
'formats': formats,
}
+
+
+class ORFFM4StoryIE(InfoExtractor):
+ IE_NAME = 'orf:fm4:story'
+ IE_DESC = 'fm4.orf.at stories'
+ _VALID_URL = r'https?://fm4\.orf\.at/stories/(?P<id>\d+)'
+
+ _TEST = {
+ 'url': 'http://fm4.orf.at/stories/2865738/',
+ 'playlist': [{
+ 'md5': 'e1c2c706c45c7b34cf478bbf409907ca',
+ 'info_dict': {
+ 'id': '547792',
+ 'ext': 'flv',
+ 'title': 'Manu Delago und Inner Tongue live',
+ 'description': 'Manu Delago und Inner Tongue haben bei der FM4 Soundpark Session live alles gegeben. Hier gibt es Fotos und die gesamte Session als Video.',
+ 'duration': 1748.52,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'upload_date': '20170913',
+ },
+ }, {
+ 'md5': 'c6dd2179731f86f4f55a7b49899d515f',
+ 'info_dict': {
+ 'id': '547798',
+ 'ext': 'flv',
+ 'title': 'Manu Delago und Inner Tongue live (2)',
+ 'duration': 1504.08,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'upload_date': '20170913',
+ 'description': 'Manu Delago und Inner Tongue haben bei der FM4 Soundpark Session live alles gegeben. Hier gibt es Fotos und die gesamte Session als Video.',
+ },
+ }],
+ }
+
+ def _real_extract(self, url):
+ story_id = self._match_id(url)
+ webpage = self._download_webpage(url, story_id)
+
+ entries = []
+ all_ids = orderedSet(re.findall(r'data-video(?:id)?="(\d+)"', webpage))
+ for idx, video_id in enumerate(all_ids):
+ data = self._download_json(
+ 'http://bits.orf.at/filehandler/static-api/json/current/data.json?file=%s' % video_id,
+ video_id)[0]
+
+ duration = float_or_none(data['duration'], 1000)
+
+ video = data['sources']['q8c']
+ load_balancer_url = video['loadBalancerUrl']
+ abr = int_or_none(video.get('audioBitrate'))
+ vbr = int_or_none(video.get('bitrate'))
+ fps = int_or_none(video.get('videoFps'))
+ width = int_or_none(video.get('videoWidth'))
+ height = int_or_none(video.get('videoHeight'))
+ thumbnail = video.get('preview')
+
+ rendition = self._download_json(
+ load_balancer_url, video_id, transform_source=strip_jsonp)
+
+ f = {
+ 'abr': abr,
+ 'vbr': vbr,
+ 'fps': fps,
+ 'width': width,
+ 'height': height,
+ }
+
+ formats = []
+ for format_id, format_url in rendition['redirect'].items():
+ if format_id == 'rtmp':
+ ff = f.copy()
+ ff.update({
+ 'url': format_url,
+ 'format_id': format_id,
+ })
+ formats.append(ff)
+ elif determine_ext(format_url) == 'f4m':
+ formats.extend(self._extract_f4m_formats(
+ format_url, video_id, f4m_id=format_id))
+ elif determine_ext(format_url) == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ format_url, video_id, 'mp4', m3u8_id=format_id))
+ else:
+ continue
+ self._sort_formats(formats)
+
+ title = remove_end(self._og_search_title(webpage), ' - fm4.ORF.at')
+ if idx >= 1:
+ # Titles are duplicates, make them unique
+ title += ' (' + str(idx + 1) + ')'
+ description = self._og_search_description(webpage)
+ upload_date = unified_strdate(self._html_search_meta(
+ 'dc.date', webpage, 'upload date'))
+
+ entries.append({
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'duration': duration,
+ 'thumbnail': thumbnail,
+ 'upload_date': upload_date,
+ 'formats': formats,
+ })
+
+ return self.playlist_result(entries)
from __future__ import unicode_literals
+import json
import re
from .common import InfoExtractor
strip_or_none,
unified_timestamp,
urljoin,
- urlencode_postdata,
)
(username, password) = self._get_login_info()
if username is None:
return
- webpage = self._download_webpage(self._PACKT_BASE, None)
- login_form = self._form_hidden_inputs(
- 'packt-user-login-form', webpage)
- login_form.update({
- 'email': username,
- 'password': password,
- })
- self._download_webpage(
- self._PACKT_BASE, None, 'Logging in as %s' % username,
- data=urlencode_postdata(login_form))
try:
self._TOKEN = self._download_json(
- '%s/users/tokens/sessions' % self._MAPT_REST, None,
- 'Downloading Authorization Token')['data']['token']
+ self._MAPT_REST + '/users/tokens', None,
+ 'Downloading Authorization Token', data=json.dumps({
+ 'email': username,
+ 'password': password,
+ }).encode())['data']['access']
except ExtractorError as e:
- if isinstance(e.cause, compat_HTTPError) and e.cause.code in (401, 404):
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code in (400, 401, 404):
message = self._parse_json(e.cause.read().decode(), None)['message']
raise ExtractorError(message, expected=True)
raise
headers = {}
if self._TOKEN:
- headers['Authorization'] = self._TOKEN
+ headers['Authorization'] = 'Bearer ' + self._TOKEN
video = self._download_json(
'%s/users/me/products/%s/chapters/%s/sections/%s'
% (self._MAPT_REST, course_id, chapter_id, video_id), video_id,
class PandaTVIE(InfoExtractor):
IE_DESC = 'ēē«TV'
- _VALID_URL = r'http://(?:www\.)?panda\.tv/(?P<id>[0-9]+)'
- _TEST = {
- 'url': 'http://www.panda.tv/10091',
+ _VALID_URL = r'https?://(?:www\.)?panda\.tv/(?P<id>[0-9]+)'
+ _TESTS = [{
+ 'url': 'http://www.panda.tv/66666',
'info_dict': {
- 'id': '10091',
+ 'id': '66666',
'title': 're:.+',
- 'uploader': 'Ć„\9b\9aĆ„Ā¾\92',
+ 'uploader': 'Ć„\88\98Ʀ\9d\80Ć©ĀøĀ”',
'ext': 'flv',
'is_live': True,
},
'skip_download': True,
},
'skip': 'Live stream is offline',
- }
+ }, {
+ 'url': 'https://www.panda.tv/66666',
+ 'only_matching': True,
+ }]
def _real_extract(self, url):
video_id = self._match_id(url)
config = self._download_json(
- 'http://www.panda.tv/api_room?roomid=%s' % video_id, video_id)
+ 'https://www.panda.tv/api_room?roomid=%s' % video_id, video_id)
error_code = config.get('errno', 0)
if error_code is not 0:
continue
for pref, (ext, pl) in enumerate((('m3u8', '-hls'), ('flv', ''))):
formats.append({
- 'url': 'http://pl%s%s.live.panda.tv/live_panda/%s%s%s.%s'
+ 'url': 'https://pl%s%s.live.panda.tv/live_panda/%s%s%s.%s'
% (pl, plflag1, room_key, live_panda, suffix[quality], ext),
'format_id': '%s-%s' % (k, ext),
'quality': quality,
IE_NAME = 'pandora.tv'
IE_DESC = 'ķėė¼TV'
_VALID_URL = r'https?://(?:.+?\.)?channel\.pandora\.tv/channel/video\.ptv\?'
- _TEST = {
+ _TESTS = [{
'url': 'http://jp.channel.pandora.tv/channel/video.ptv?c1=&prgid=53294230&ch_userid=mikakim&ref=main&lot=cate_01_2',
'info_dict': {
'id': '53294230',
'view_count': int,
'like_count': int,
}
- }
+ }, {
+ 'url': 'http://channel.pandora.tv/channel/video.ptv?ch_userid=gogoucc&prgid=54721744',
+ 'info_dict': {
+ 'id': '54721744',
+ 'ext': 'flv',
+ 'title': '[HD] JAPAN COUNTDOWN 170423',
+ 'description': '[HD] JAPAN COUNTDOWN 170423',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 1704.9,
+ 'upload_date': '20170423',
+ 'uploader': 'GOGO_UCC',
+ 'uploader_id': 'gogoucc',
+ 'view_count': int,
+ 'like_count': int,
+ },
+ 'params': {
+ # Test metadata only
+ 'skip_download': True,
+ },
+ }]
def _real_extract(self, url):
qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
'description': info.get('body'),
'thumbnail': info.get('thumbnail') or info.get('poster'),
'duration': float_or_none(info.get('runtime'), 1000) or parse_duration(info.get('time')),
- 'upload_date': info['fid'][:8] if isinstance(info.get('fid'), compat_str) else None,
+ 'upload_date': info['fid'].split('/')[-1][:8] if isinstance(info.get('fid'), compat_str) else None,
'uploader': info.get('nickname'),
'uploader_id': info.get('upload_userid'),
'view_count': str_to_int(info.get('hit')),
# Direct video URL
(?:%s)/(?:viralplayer|video)/(?P<id>[0-9]+)/? |
# Article with embedded player (or direct video)
- (?:www\.)?pbs\.org/(?:[^/]+/){2,5}(?P<presumptive_id>[^/]+?)(?:\.html)?/?(?:$|[?\#]) |
+ (?:www\.)?pbs\.org/(?:[^/]+/){1,5}(?P<presumptive_id>[^/]+?)(?:\.html)?/?(?:$|[?\#]) |
# Player
(?:video|player)\.pbs\.org/(?:widget/)?partnerplayer/(?P<player_id>[^/]+)/
)
'formats': 'mincount:8',
},
},
+ {
+ # https://github.com/rg3/youtube-dl/issues/13801
+ 'url': 'https://www.pbs.org/video/pbs-newshour-full-episode-july-31-2017-1501539057/',
+ 'info_dict': {
+ 'id': '3003333873',
+ 'ext': 'mp4',
+ 'title': 'PBS NewsHour - full episode July 31, 2017',
+ 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
+ 'duration': 3265,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
{
'url': 'http://player.pbs.org/widget/partnerplayer/2365297708/?start=0&end=0&chapterbar=false&endscreen=false&topbar=true',
'only_matching': True,
if url:
break
+ if not url:
+ url = self._og_search_url(webpage)
+
mobj = re.match(self._VALID_URL, url)
player_id = mobj.group('player_id')
--- /dev/null
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ qualities,
+ unified_timestamp,
+)
+
+
+class PearVideoIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?pearvideo\.com/video_(?P<id>\d+)'
+ _TEST = {
+ 'url': 'http://www.pearvideo.com/video_1076290',
+ 'info_dict': {
+ 'id': '1076290',
+ 'ext': 'mp4',
+ 'title': 'å°ęµ£ēåØäø»äŗŗ家ē»ēäøę»ē³å¤“ļ¼ę²”ē ø',
+ 'description': 'md5:01d576b747de71be0ee85eb7cac25f9d',
+ 'timestamp': 1494275280,
+ 'upload_date': '20170508',
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ quality = qualities(
+ ('ldflv', 'ld', 'sdflv', 'sd', 'hdflv', 'hd', 'src'))
+
+ formats = [{
+ 'url': mobj.group('url'),
+ 'format_id': mobj.group('id'),
+ 'quality': quality(mobj.group('id')),
+ } for mobj in re.finditer(
+ r'(?P<id>[a-zA-Z]+)Url\s*=\s*(["\'])(?P<url>(?:https?:)?//.+?)\2',
+ webpage)]
+ self._sort_formats(formats)
+
+ title = self._search_regex(
+ (r'<h1[^>]+\bclass=(["\'])video-tt\1[^>]*>(?P<value>[^<]+)',
+ r'<[^>]+\bdata-title=(["\'])(?P<value>(?:(?!\1).)+)\1'),
+ webpage, 'title', group='value')
+ description = self._search_regex(
+ (r'<div[^>]+\bclass=(["\'])summary\1[^>]*>(?P<value>[^<]+)',
+ r'<[^>]+\bdata-summary=(["\'])(?P<value>(?:(?!\1).)+)\1'),
+ webpage, 'description', default=None,
+ group='value') or self._html_search_meta('Description', webpage)
+ timestamp = unified_timestamp(self._search_regex(
+ r'<div[^>]+\bclass=["\']date["\'][^>]*>([^<]+)',
+ webpage, 'timestamp', fatal=False))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'timestamp': timestamp,
+ 'formats': formats,
+ }
@staticmethod
def _extract_url(webpage):
mobj = re.search(
- r'<iframe[^>]+src=([\'"])(?P<url>(?:https?:)?//(?:www\.)?periscope\.tv/(?:(?!\1).)+)\1', webpage)
+ r'<iframe[^>]+src=([\'"])(?P<url>(?:https?:)?//(?:www\.)?(?:periscope|pscp)\.tv/(?:(?!\1).)+)\1', webpage)
if mobj:
return mobj.group('url')
stream = self._call_api(
'getAccessPublic', {'broadcast_id': token}, token)
+ video_urls = set()
formats = []
- for format_id in ('replay', 'rtmp', 'hls', 'https_hls'):
+ for format_id in ('replay', 'rtmp', 'hls', 'https_hls', 'lhls', 'lhlsweb'):
video_url = stream.get(format_id + '_url')
- if not video_url:
+ if not video_url or video_url in video_urls:
continue
- f = {
+ video_urls.add(video_url)
+ if format_id != 'rtmp':
+ formats.extend(self._extract_m3u8_formats(
+ video_url, token, 'mp4',
+ entry_protocol='m3u8_native'
+ if state in ('ended', 'timed_out') else 'm3u8',
+ m3u8_id=format_id, fatal=False))
+ continue
+ formats.append({
'url': video_url,
'ext': 'flv' if format_id == 'rtmp' else 'mp4',
- }
- if format_id != 'rtmp':
- f['protocol'] = 'm3u8_native' if state in ('ended', 'timed_out') else 'm3u8'
- formats.append(f)
+ })
self._sort_formats(formats)
return {
parse_duration,
qualities,
srt_subtitles_timecode,
+ try_get,
update_url_query,
urlencode_postdata,
)
class PluralsightBaseIE(InfoExtractor):
_API_BASE = 'https://app.pluralsight.com'
+ def _download_course(self, course_id, url, display_id):
+ try:
+ return self._download_course_rpc(course_id, url, display_id)
+ except ExtractorError:
+ # Old API fallback
+ return self._download_json(
+ 'https://app.pluralsight.com/player/user/api/v1/player/payload',
+ display_id, data=urlencode_postdata({'courseId': course_id}),
+ headers={'Referer': url})
+
+ def _download_course_rpc(self, course_id, url, display_id):
+ response = self._download_json(
+ '%s/player/functions/rpc' % self._API_BASE, display_id,
+ 'Downloading course JSON',
+ data=json.dumps({
+ 'fn': 'bootstrapPlayer',
+ 'payload': {
+ 'courseId': course_id,
+ },
+ }).encode('utf-8'),
+ headers={
+ 'Content-Type': 'application/json;charset=utf-8',
+ 'Referer': url,
+ })
+
+ course = try_get(response, lambda x: x['payload']['course'], dict)
+ if course:
+ return course
+
+ raise ExtractorError(
+ '%s said: %s' % (self.IE_NAME, response['error']['message']),
+ expected=True)
+
class PluralsightIE(PluralsightBaseIE):
IE_NAME = 'pluralsight'
display_id = '%s-%s' % (name, clip_id)
- course = self._download_json(
- 'https://app.pluralsight.com/player/user/api/v1/player/payload',
- display_id, data=urlencode_postdata({'courseId': course_name}),
- headers={'Referer': url})
+ course = self._download_course(course_name, url, display_id)
collection = course['modules']
req_format_split = req_format.split('-', 1)
if len(req_format_split) > 1:
req_ext, req_quality = req_format_split
+ req_quality = '-'.join(req_quality.split('-')[:2])
for allowed_quality in ALLOWED_QUALITIES:
if req_ext == allowed_quality.ext and req_quality in allowed_quality.qualities:
return (AllowedQuality(req_ext, (req_quality, )), )
# TODO: PSM cookie
- course = self._download_json(
- '%s/player/functions/rpc' % self._API_BASE, course_id,
- 'Downloading course JSON',
- data=json.dumps({
- 'fn': 'bootstrapPlayer',
- 'payload': {
- 'courseId': course_id,
- }
- }).encode('utf-8'),
- headers={
- 'Content-Type': 'application/json;charset=utf-8'
- })['payload']['course']
+ course = self._download_course(course_id, url, course_id)
title = course['title']
course_name = course['name']
class PodomaticIE(InfoExtractor):
IE_NAME = 'podomatic'
- _VALID_URL = r'^(?P<proto>https?)://(?P<channel>[^.]+)\.podomatic\.com/entry/(?P<id>[^?]+)'
+ _VALID_URL = r'''(?x)
+ (?P<proto>https?)://
+ (?:
+ (?P<channel>[^.]+)\.podomatic\.com/entry|
+ (?:www\.)?podomatic\.com/podcasts/(?P<channel_2>[^/]+)/episodes
+ )/
+ (?P<id>[^/?#&]+)
+ '''
- _TESTS = [
- {
- 'url': 'http://scienceteachingtips.podomatic.com/entry/2009-01-02T16_03_35-08_00',
- 'md5': '84bb855fcf3429e6bf72460e1eed782d',
- 'info_dict': {
- 'id': '2009-01-02T16_03_35-08_00',
- 'ext': 'mp3',
- 'uploader': 'Science Teaching Tips',
- 'uploader_id': 'scienceteachingtips',
- 'title': '64. When the Moon Hits Your Eye',
- 'duration': 446,
- }
- },
- {
- 'url': 'http://ostbahnhof.podomatic.com/entry/2013-11-15T16_31_21-08_00',
- 'md5': 'd2cf443931b6148e27638650e2638297',
- 'info_dict': {
- 'id': '2013-11-15T16_31_21-08_00',
- 'ext': 'mp3',
- 'uploader': 'Ostbahnhof / Techno Mix',
- 'uploader_id': 'ostbahnhof',
- 'title': 'Einunddreizig',
- 'duration': 3799,
- }
- },
- ]
+ _TESTS = [{
+ 'url': 'http://scienceteachingtips.podomatic.com/entry/2009-01-02T16_03_35-08_00',
+ 'md5': '84bb855fcf3429e6bf72460e1eed782d',
+ 'info_dict': {
+ 'id': '2009-01-02T16_03_35-08_00',
+ 'ext': 'mp3',
+ 'uploader': 'Science Teaching Tips',
+ 'uploader_id': 'scienceteachingtips',
+ 'title': '64. When the Moon Hits Your Eye',
+ 'duration': 446,
+ }
+ }, {
+ 'url': 'http://ostbahnhof.podomatic.com/entry/2013-11-15T16_31_21-08_00',
+ 'md5': 'd2cf443931b6148e27638650e2638297',
+ 'info_dict': {
+ 'id': '2013-11-15T16_31_21-08_00',
+ 'ext': 'mp3',
+ 'uploader': 'Ostbahnhof / Techno Mix',
+ 'uploader_id': 'ostbahnhof',
+ 'title': 'Einunddreizig',
+ 'duration': 3799,
+ }
+ }, {
+ 'url': 'https://www.podomatic.com/podcasts/scienceteachingtips/episodes/2009-01-02T16_03_35-08_00',
+ 'only_matching': True,
+ }]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
- channel = mobj.group('channel')
+ channel = mobj.group('channel') or mobj.group('channel_2')
json_url = (('%s://%s.podomatic.com/entry/embed_params/%s' +
'?permalink=true&rtmp=0') %
webpage = self._download_webpage(url, playlist_id)
content = self._search_regex(
- r'(?s)<div[^>]+class="audio atarticle"[^>]*>(.+?)<script>',
+ r'(?s)<div[^>]+class="\s*this-article\s*"[^>]*>(.+?)<div[^>]+class="tags"[^>]*>',
webpage, 'content')
timestamp = unified_timestamp(self._html_search_regex(
--- /dev/null
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ extract_attributes,
+ int_or_none,
+ unified_timestamp,
+)
+
+
+class PopcornTVIE(InfoExtractor):
+ _VALID_URL = r'https?://[^/]+\.popcorntv\.it/guarda/(?P<display_id>[^/]+)/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://animemanga.popcorntv.it/guarda/food-wars-battaglie-culinarie-episodio-01/9183',
+ 'md5': '47d65a48d147caf692ab8562fe630b45',
+ 'info_dict': {
+ 'id': '9183',
+ 'display_id': 'food-wars-battaglie-culinarie-episodio-01',
+ 'ext': 'mp4',
+ 'title': 'Food Wars, Battaglie Culinarie | Episodio 01',
+ 'description': 'md5:b8bea378faae4651d3b34c6e112463d0',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'timestamp': 1497610857,
+ 'upload_date': '20170616',
+ 'duration': 1440,
+ 'view_count': int,
+ },
+ }, {
+ 'url': 'https://cinema.popcorntv.it/guarda/smash-cut/10433',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ display_id, video_id = mobj.group('display_id', 'id')
+
+ webpage = self._download_webpage(url, display_id)
+
+ m3u8_url = extract_attributes(
+ self._search_regex(
+ r'(<link[^>]+itemprop=["\'](?:content|embed)Url[^>]*>)',
+ webpage, 'content'
+ ))['href']
+
+ formats = self._extract_m3u8_formats(
+ m3u8_url, display_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls')
+
+ title = self._search_regex(
+ r'<h1[^>]+itemprop=["\']name[^>]*>([^<]+)', webpage,
+ 'title', default=None) or self._og_search_title(webpage)
+
+ description = self._html_search_regex(
+ r'(?s)<article[^>]+itemprop=["\']description[^>]*>(.+?)</article>',
+ webpage, 'description', fatal=False)
+ thumbnail = self._og_search_thumbnail(webpage)
+ timestamp = unified_timestamp(self._html_search_meta(
+ 'uploadDate', webpage, 'timestamp'))
+ print(self._html_search_meta(
+ 'duration', webpage))
+ duration = int_or_none(self._html_search_meta(
+ 'duration', webpage), invscale=60)
+ view_count = int_or_none(self._html_search_meta(
+ 'interactionCount', webpage, 'view count'))
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'timestamp': timestamp,
+ 'duration': duration,
+ 'view_count': view_count,
+ 'formats': formats,
+ }
r'<title>(.+?) - .*?[Pp]ornHD.*?</title>'], webpage, 'title')
sources = self._parse_json(js_to_json(self._search_regex(
- r"(?s)'sources'\s*:\s*(\{.+?\})\s*\}[;,)]",
+ r"(?s)sources'?\s*[:=]\s*(\{.+?\})",
webpage, 'sources', default='{}')), video_id)
if not sources:
view_count = int_or_none(self._html_search_regex(
r'(\d+) views\s*<', webpage, 'view count', fatal=False))
thumbnail = self._search_regex(
- r"'poster'\s*:\s*'([^']+)'", webpage, 'thumbnail', fatal=False)
+ r"poster'?\s*:\s*([\"'])(?P<url>(?:(?!\1).)+)\1", webpage,
+ 'thumbnail', fatal=False, group='url')
return {
'id': video_id,
title, thumbnail, duration = [None] * 3
video_uploader = self._html_search_regex(
- r'(?s)From: .+?<(?:a href="/users/|a href="/channels/|span class="username)[^>]+>(.+?)<',
+ r'(?s)From: .+?<(?:a\b[^>]+\bhref=["\']/(?:user|channel)s/|span\b[^>]+\bclass=["\']username)[^>]+>(.+?)<',
webpage, 'uploader', fatal=False)
view_count = self._extract_count(
class PornHubPlaylistBaseIE(InfoExtractor):
def _extract_entries(self, webpage):
+ # Only process container div with main playlist content skipping
+ # drop-down menu that uses similar pattern for videos (see
+ # https://github.com/rg3/youtube-dl/issues/11594).
+ container = self._search_regex(
+ r'(?s)(<div[^>]+class=["\']container.+)', webpage,
+ 'container', default=webpage)
+
return [
self.url_result(
'http://www.pornhub.com/%s' % video_url,
PornHubIE.ie_key(), video_title=title)
for video_url, title in orderedSet(re.findall(
r'href="/?(view_video\.php\?.*\bviewkey=[\da-z]+[^"]*)"[^>]*\s+title="([^"]+)"',
- webpage))
+ container))
]
def _real_extract(self, url):
webpage = self._download_webpage(url, playlist_id)
- # Only process container div with main playlist content skipping
- # drop-down menu that uses similar pattern for videos (see
- # https://github.com/rg3/youtube-dl/issues/11594).
- container = self._search_regex(
- r'(?s)(<div[^>]+class=["\']container.+)', webpage,
- 'container', default=webpage)
-
- entries = self._extract_entries(container)
+ entries = self._extract_entries(webpage)
playlist = self._parse_json(
self._search_regex(
- r'playlistObject\s*=\s*({.+?});', webpage, 'playlist'),
- playlist_id)
+ r'(?:playlistObject|PLAYLIST_VIEW)\s*=\s*({.+?});', webpage,
+ 'playlist', default='{}'),
+ playlist_id, fatal=False)
+ title = playlist.get('title') or self._search_regex(
+ r'>Videos\s+in\s+(.+?)\s+[Pp]laylist<', webpage, 'title', fatal=False)
return self.playlist_result(
- entries, playlist_id, playlist.get('title'), playlist.get('description'))
+ entries, playlist_id, title, playlist.get('description'))
class PornHubPlaylistIE(PornHubPlaylistBaseIE):
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404:
break
+ raise
page_entries = self._extract_entries(webpage)
if not page_entries:
break
from __future__ import unicode_literals
import random
-import time
import re
+import time
from .common import InfoExtractor
from ..utils import (
- sanitized_Request,
- strip_jsonp,
- unescapeHTML,
clean_html,
ExtractorError,
+ strip_jsonp,
+ unescapeHTML,
)
class QQMusicIE(InfoExtractor):
IE_NAME = 'qqmusic'
IE_DESC = 'QQé³ä¹'
- _VALID_URL = r'https?://y\.qq\.com/#type=song&mid=(?P<id>[0-9A-Za-z]+)'
+ _VALID_URL = r'https?://y\.qq\.com/n/yqq/song/(?P<id>[0-9A-Za-z]+)\.html'
_TESTS = [{
- 'url': 'http://y.qq.com/#type=song&mid=004295Et37taLD',
- 'md5': '9ce1c1c8445f561506d2e3cfb0255705',
+ 'url': 'https://y.qq.com/n/yqq/song/004295Et37taLD.html',
+ 'md5': '5f1e6cea39e182857da7ffc5ef5e6bb8',
'info_dict': {
'id': '004295Et37taLD',
'ext': 'mp3',
'title': 'åÆęę²”å¦ę',
'release_date': '20141227',
'creator': 'ęäæę°',
- 'description': 'md5:d327722d0361576fde558f1ac68a7065',
+ 'description': 'md5:d85afb3051952ecc50a1ee8a286d1eac',
'thumbnail': r're:^https?://.*\.jpg$',
}
}, {
'note': 'There is no mp3-320 version of this song.',
- 'url': 'http://y.qq.com/#type=song&mid=004MsGEo3DdNxV',
+ 'url': 'https://y.qq.com/n/yqq/song/004MsGEo3DdNxV.html',
'md5': 'fa3926f0c585cda0af8fa4f796482e3e',
'info_dict': {
'id': '004MsGEo3DdNxV',
}
}, {
'note': 'lyrics not in .lrc format',
- 'url': 'http://y.qq.com/#type=song&mid=001JyApY11tIp6',
+ 'url': 'https://y.qq.com/n/yqq/song/001JyApY11tIp6.html',
'info_dict': {
'id': '001JyApY11tIp6',
'ext': 'mp3',
'title': 'Shadows Over Transylvania',
'release_date': '19970225',
'creator': 'Dark Funeral',
- 'description': 'md5:ed14d5bd7ecec19609108052c25b2c11',
+ 'description': 'md5:c9b20210587cbcd6836a1c597bab4525',
'thumbnail': r're:^https?://.*\.jpg$',
},
'params': {
[r'albummid:\'([0-9a-zA-Z]+)\'', r'"albummid":"([0-9a-zA-Z]+)"'],
detail_info_page, 'album mid', default=None)
if albummid:
- thumbnail_url = "http://i.gtimg.cn/music/photo/mid_album_500/%s/%s/%s.jpg" \
+ thumbnail_url = 'http://i.gtimg.cn/music/photo/mid_album_500/%s/%s/%s.jpg' \
% (albummid[-2:-1], albummid[-1], albummid)
guid = self.m_r_get_ruin()
def qq_static_url(category, mid):
return 'http://y.qq.com/y/static/%s/%s/%s/%s.html' % (category, mid[-2], mid[-1], mid)
- @classmethod
- def get_entries_from_page(cls, page):
+ def get_singer_all_songs(self, singmid, num):
+ return self._download_webpage(
+ r'https://c.y.qq.com/v8/fcg-bin/fcg_v8_singer_track_cp.fcg', singmid,
+ query={
+ 'format': 'json',
+ 'inCharset': 'utf8',
+ 'outCharset': 'utf-8',
+ 'platform': 'yqq',
+ 'needNewCode': 0,
+ 'singermid': singmid,
+ 'order': 'listen',
+ 'begin': 0,
+ 'num': num,
+ 'songstatus': 1,
+ })
+
+ def get_entries_from_page(self, singmid):
entries = []
- for item in re.findall(r'class="data"[^<>]*>([^<>]+)</', page):
- song_mid = unescapeHTML(item).split('|')[-5]
- entries.append(cls.url_result(
- 'http://y.qq.com/#type=song&mid=' + song_mid, 'QQMusic',
- song_mid))
+ default_num = 1
+ json_text = self.get_singer_all_songs(singmid, default_num)
+ json_obj_all_songs = self._parse_json(json_text, singmid)
+
+ if json_obj_all_songs['code'] == 0:
+ total = json_obj_all_songs['data']['total']
+ json_text = self.get_singer_all_songs(singmid, total)
+ json_obj_all_songs = self._parse_json(json_text, singmid)
+
+ for item in json_obj_all_songs['data']['list']:
+ if item['musicData'].get('songmid') is not None:
+ songmid = item['musicData']['songmid']
+ entries.append(self.url_result(
+ r'https://y.qq.com/n/yqq/song/%s.html' % songmid, 'QQMusic', songmid))
return entries
class QQMusicSingerIE(QQPlaylistBaseIE):
IE_NAME = 'qqmusic:singer'
IE_DESC = 'QQé³ä¹ - ęę'
- _VALID_URL = r'https?://y\.qq\.com/#type=singer&mid=(?P<id>[0-9A-Za-z]+)'
+ _VALID_URL = r'https?://y\.qq\.com/n/yqq/singer/(?P<id>[0-9A-Za-z]+)\.html'
_TEST = {
- 'url': 'http://y.qq.com/#type=singer&mid=001BLpXF2DyJe2',
+ 'url': 'https://y.qq.com/n/yqq/singer/001BLpXF2DyJe2.html',
'info_dict': {
'id': '001BLpXF2DyJe2',
'title': 'ęäæę°',
'description': 'md5:870ec08f7d8547c29c93010899103751',
},
- 'playlist_count': 12,
+ 'playlist_mincount': 12,
}
def _real_extract(self, url):
mid = self._match_id(url)
- singer_page = self._download_webpage(
- self.qq_static_url('singer', mid), mid, 'Download singer page')
-
- entries = self.get_entries_from_page(singer_page)
-
+ entries = self.get_entries_from_page(mid)
+ singer_page = self._download_webpage(url, mid, 'Download singer page')
singer_name = self._html_search_regex(
- r"singername\s*:\s*'([^']+)'", singer_page, 'singer name',
- default=None)
-
- singer_id = self._html_search_regex(
- r"singerid\s*:\s*'([0-9]+)'", singer_page, 'singer id',
- default=None)
-
+ r"singername\s*:\s*'(.*?)'", singer_page, 'singer name', default=None)
singer_desc = None
- if singer_id:
- req = sanitized_Request(
- 'http://s.plcloud.music.qq.com/fcgi-bin/fcg_get_singer_desc.fcg?utf8=1&outCharset=utf-8&format=xml&singerid=%s' % singer_id)
- req.add_header(
- 'Referer', 'http://s.plcloud.music.qq.com/xhr_proxy_utf8.html')
+ if mid:
singer_desc_page = self._download_xml(
- req, mid, 'Donwload singer description XML')
+ 'http://s.plcloud.music.qq.com/fcgi-bin/fcg_get_singer_desc.fcg', mid,
+ 'Donwload singer description XML',
+ query={'utf8': 1, 'outCharset': 'utf-8', 'format': 'xml', 'singermid': mid},
+ headers={'Referer': 'https://y.qq.com/n/yqq/singer/'})
singer_desc = singer_desc_page.find('./data/info/desc').text
class QQMusicAlbumIE(QQPlaylistBaseIE):
IE_NAME = 'qqmusic:album'
IE_DESC = 'QQé³ä¹ - äøč¾'
- _VALID_URL = r'https?://y\.qq\.com/#type=album&mid=(?P<id>[0-9A-Za-z]+)'
+ _VALID_URL = r'https?://y\.qq\.com/n/yqq/album/(?P<id>[0-9A-Za-z]+)\.html'
_TESTS = [{
- 'url': 'http://y.qq.com/#type=album&mid=000gXCTb2AhRR1',
+ 'url': 'https://y.qq.com/n/yqq/album/000gXCTb2AhRR1.html',
'info_dict': {
'id': '000gXCTb2AhRR1',
'title': 'ę们é½ęÆčæę ·éæ大ē',
},
'playlist_count': 4,
}, {
- 'url': 'http://y.qq.com/#type=album&mid=002Y5a3b3AlCu3',
+ 'url': 'https://y.qq.com/n/yqq/album/002Y5a3b3AlCu3.html',
'info_dict': {
'id': '002Y5a3b3AlCu3',
'title': 'ź·øė¦¬ź³ ...',
entries = [
self.url_result(
- 'http://y.qq.com/#type=song&mid=' + song['songmid'], 'QQMusic', song['songmid']
+ 'https://y.qq.com/n/yqq/song/' + song['songmid'] + '.html', 'QQMusic', song['songmid']
) for song in album['list']
]
album_name = album.get('name')
class QQMusicToplistIE(QQPlaylistBaseIE):
IE_NAME = 'qqmusic:toplist'
IE_DESC = 'QQé³ä¹ - ęč”ę¦'
- _VALID_URL = r'https?://y\.qq\.com/#type=toplist&p=(?P<id>(top|global)_[0-9]+)'
+ _VALID_URL = r'https?://y\.qq\.com/n/yqq/toplist/(?P<id>[0-9]+)\.html'
_TESTS = [{
- 'url': 'http://y.qq.com/#type=toplist&p=global_123',
+ 'url': 'https://y.qq.com/n/yqq/toplist/123.html',
'info_dict': {
- 'id': 'global_123',
+ 'id': '123',
'title': 'ē¾å½iTunesę¦',
+ 'description': 'md5:89db2335fdbb10678dee2d43fe9aba08',
},
- 'playlist_count': 10,
+ 'playlist_count': 100,
}, {
- 'url': 'http://y.qq.com/#type=toplist&p=top_3',
+ 'url': 'https://y.qq.com/n/yqq/toplist/3.html',
'info_dict': {
- 'id': 'top_3',
+ 'id': '3',
'title': 'å·
å³°ę¦Ā·ę¬§ē¾',
- 'description': 'QQé³ä¹å·
å³°ę¦Ā·ę¬§ē¾ę ¹ę®ēØę·ę¶å¬č”äøŗčŖåØēęļ¼éē»å½äøęęµč”ē꬧ē¾ę°ęļ¼:ę“ę°ę¶é“ļ¼ęÆåØå22ē¹|ē»'
- 'č®”åØęļ¼äøåØļ¼äøåØåč³ę¬åØäøļ¼|ē»č®”åÆ¹č±”ļ¼äøäøŖęå
åč”ē꬧ē¾ęę²|ē»č®”ę°éļ¼100é¦|ē»č®”ē®ę³ļ¼ę ¹ę®'
- 'ęę²åØäøåØå
ēęęęę¾ę¬”ę°ļ¼ē±é«å°ä½åå100åļ¼åäøęęęå¤å
č®ø5é¦ęę²åę¶äøę¦ļ¼|ęęęę¾ę¬”ę°ļ¼'
- 'ē»å½ēØę·å®ę“ęę¾äøé¦ęę²ļ¼č®°äøŗäøꬔęęęę¾ļ¼åäøēØę·ę¶å¬åäøé¦ęę²ļ¼ęÆå¤©č®°å½äøŗ1ꬔęęęę¾'
+ 'description': 'md5:5a600d42c01696b26b71f8c4d43407da',
},
'playlist_count': 100,
}, {
- 'url': 'http://y.qq.com/#type=toplist&p=global_106',
+ 'url': 'https://y.qq.com/n/yqq/toplist/106.html',
'info_dict': {
- 'id': 'global_106',
+ 'id': '106',
'title': 'é©å½Mnetę¦',
+ 'description': 'md5:cb84b325215e1d21708c615cac82a6e7',
},
'playlist_count': 50,
}]
def _real_extract(self, url):
list_id = self._match_id(url)
- list_type, num_id = list_id.split("_")
-
toplist_json = self._download_json(
- 'http://i.y.qq.com/v8/fcg-bin/fcg_v8_toplist_cp.fcg?type=%s&topid=%s&format=json'
- % (list_type, num_id),
- list_id, 'Download toplist page')
+ 'http://i.y.qq.com/v8/fcg-bin/fcg_v8_toplist_cp.fcg', list_id,
+ note='Download toplist page',
+ query={'type': 'toplist', 'topid': list_id, 'format': 'json'})
- entries = [
- self.url_result(
- 'http://y.qq.com/#type=song&mid=' + song['data']['songmid'], 'QQMusic', song['data']['songmid']
- ) for song in toplist_json['songlist']
- ]
+ entries = [self.url_result(
+ 'https://y.qq.com/n/yqq/song/' + song['data']['songmid'] + '.html', 'QQMusic',
+ song['data']['songmid'])
+ for song in toplist_json['songlist']]
topinfo = toplist_json.get('topinfo', {})
list_name = topinfo.get('ListName')
class QQMusicPlaylistIE(QQPlaylistBaseIE):
IE_NAME = 'qqmusic:playlist'
IE_DESC = 'QQé³ä¹ - ęå'
- _VALID_URL = r'https?://y\.qq\.com/#type=taoge&id=(?P<id>[0-9]+)'
+ _VALID_URL = r'https?://y\.qq\.com/n/yqq/playlist/(?P<id>[0-9]+)\.html'
_TESTS = [{
- 'url': 'http://y.qq.com/#type=taoge&id=3462654915',
+ 'url': 'http://y.qq.com/n/yqq/playlist/3462654915.html',
'info_dict': {
'id': '3462654915',
'title': 'é©å½5ęę°ęē²¾éäøę¬',
'playlist_count': 40,
'skip': 'playlist gone',
}, {
- 'url': 'http://y.qq.com/#type=taoge&id=1374105607',
+ 'url': 'https://y.qq.com/n/yqq/playlist/1374105607.html',
'info_dict': {
'id': '1374105607',
'title': 'ęå
„äŗŗåæēåčÆę°č°£',
list_id = self._match_id(url)
list_json = self._download_json(
- 'http://i.y.qq.com/qzone-music/fcg-bin/fcg_ucc_getcdinfo_byids_cp.fcg?type=1&json=1&utf8=1&onlysong=0&disstid=%s'
- % list_id, list_id, 'Download list page',
+ 'http://i.y.qq.com/qzone-music/fcg-bin/fcg_ucc_getcdinfo_byids_cp.fcg',
+ list_id, 'Download list page',
+ query={'type': 1, 'json': 1, 'utf8': 1, 'onlysong': 0, 'disstid': list_id},
transform_source=strip_jsonp)
if not len(list_json.get('cdlist', [])):
if list_json.get('code'):
raise ExtractorError('Unable to get playlist info')
cdlist = list_json['cdlist'][0]
- entries = [
- self.url_result(
- 'http://y.qq.com/#type=song&mid=' + song['songmid'], 'QQMusic', song['songmid']
- ) for song in cdlist['songlist']
- ]
+ entries = [self.url_result(
+ 'https://y.qq.com/n/yqq/song/' + song['songmid'] + '.html', 'QQMusic', song['songmid'])
+ for song in cdlist['songlist']]
list_name = cdlist.get('dissname')
list_description = clean_html(unescapeHTML(cdlist.get('desc')))
class RadioCanadaIE(InfoExtractor):
IE_NAME = 'radiocanada'
_VALID_URL = r'(?:radiocanada:|https?://ici\.radio-canada\.ca/widgets/mediaconsole/)(?P<app_code>[^:/]+)[:/](?P<id>[0-9]+)'
- _TEST = {
- 'url': 'http://ici.radio-canada.ca/widgets/mediaconsole/medianet/7184272',
- 'info_dict': {
- 'id': '7184272',
- 'ext': 'mp4',
- 'title': 'Le parcours du tireur captƩ sur vidƩo',
- 'description': 'Images des camƩras de surveillance fournies par la GRC montrant le parcours du tireur d\'Ottawa',
- 'upload_date': '20141023',
- },
- 'params': {
- # m3u8 download
- 'skip_download': True,
+ _TESTS = [
+ {
+ 'url': 'http://ici.radio-canada.ca/widgets/mediaconsole/medianet/7184272',
+ 'info_dict': {
+ 'id': '7184272',
+ 'ext': 'mp4',
+ 'title': 'Le parcours du tireur captƩ sur vidƩo',
+ 'description': 'Images des camƩras de surveillance fournies par la GRC montrant le parcours du tireur d\'Ottawa',
+ 'upload_date': '20141023',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ }
},
- }
+ {
+ # empty Title
+ 'url': 'http://ici.radio-canada.ca/widgets/mediaconsole/medianet/7754998/',
+ 'info_dict': {
+ 'id': '7754998',
+ 'ext': 'mp4',
+ 'title': 'letelejournal22h',
+ 'description': 'INTEGRALE WEB 22H-TJ',
+ 'upload_date': '20170720',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }
+ ]
def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {})
device_types.append('android')
formats = []
+ error = None
# TODO: extract f4m formats
# f4m formats can be extracted using flashhd device_type but they produce unplayable file
for device_type in device_types:
if not v_url:
continue
if v_url == 'null':
- raise ExtractorError('%s said: %s' % (
- self.IE_NAME, xpath_text(v_data, 'message')), expected=True)
+ error = xpath_text(v_data, 'message')
+ continue
ext = determine_ext(v_url)
if ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
formats.extend(self._extract_f4m_formats(
base_url + '/manifest.f4m', video_id,
f4m_id='hds', fatal=False))
+ if not formats and error:
+ raise ExtractorError(
+ '%s said: %s' % (self.IE_NAME, error), expected=True)
self._sort_formats(formats)
subtitles = {}
return {
'id': video_id,
- 'title': get_meta('Title'),
+ 'title': get_meta('Title') or get_meta('AV-nomEmission'),
'description': get_meta('Description') or get_meta('ShortDescription'),
'thumbnail': get_meta('imageHR') or get_meta('imageMR') or get_meta('imageBR'),
'duration': int_or_none(get_meta('length')),
info = {
'id': video_id,
- 'title': title,
+ 'title': self._live_title(title) if relinker_info.get(
+ 'is_live') else title,
'alt_title': media.get('subtitle'),
'description': media.get('description'),
- 'uploader': media.get('channel'),
- 'creator': media.get('editor'),
+ 'uploader': strip_or_none(media.get('channel')),
+ 'creator': strip_or_none(media.get('editor')),
'duration': parse_duration(video.get('duration')),
'timestamp': timestamp,
'thumbnails': thumbnails,
}
info.update(relinker_info)
-
return info
+class RaiPlayLiveIE(RaiBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?raiplay\.it/dirette/(?P<id>[^/?#&]+)'
+ _TEST = {
+ 'url': 'http://www.raiplay.it/dirette/rainews24',
+ 'info_dict': {
+ 'id': 'd784ad40-e0ae-4a69-aa76-37519d238a9c',
+ 'display_id': 'rainews24',
+ 'ext': 'mp4',
+ 'title': 're:^Diretta di Rai News 24 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+ 'description': 'md5:6eca31500550f9376819f174e5644754',
+ 'uploader': 'Rai News 24',
+ 'creator': 'Rai News 24',
+ 'is_live': True,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, display_id)
+
+ video_id = self._search_regex(
+ r'data-uniquename=["\']ContentItem-(%s)' % RaiBaseIE._UUID_RE,
+ webpage, 'content id')
+
+ return {
+ '_type': 'url_transparent',
+ 'ie_key': RaiPlayIE.ie_key(),
+ 'url': 'http://www.raiplay.it/dirette/ContentItem-%s.html' % video_id,
+ 'id': video_id,
+ 'display_id': display_id,
+ }
+
+
class RaiIE(RaiBaseIE):
_VALID_URL = r'https?://[^/]+\.(?:rai\.(?:it|tv)|rainews\.it)/dl/.+?-(?P<id>%s)(?:-.+?)?\.html' % RaiBaseIE._UUID_RE
_TESTS = [{
media_type = media['type']
if 'Audio' in media_type:
relinker_info = {
- 'formats': {
+ 'formats': [{
'format_id': media.get('formatoAudio'),
'url': media['audioUrl'],
'ext': media.get('formatoAudio'),
- }
+ }]
}
elif 'Video' in media_type:
relinker_info = self._extract_relinker_info(media['mediaUri'], content_id)
class RedBullTVIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?redbull\.tv/(?:video|film)/(?P<id>AP-\w+)'
+ _VALID_URL = r'https?://(?:www\.)?redbull\.tv/(?:video|film|live)/(?:AP-\w+/segment/)?(?P<id>AP-\w+)'
_TESTS = [{
# film
'url': 'https://www.redbull.tv/video/AP-1Q756YYX51W11/abc-of-wrc',
'season_number': 2,
'episode_number': 4,
},
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # segment
+ 'url': 'https://www.redbull.tv/live/AP-1R5DX49XS1W11/segment/AP-1QSAQJ6V52111/semi-finals',
+ 'info_dict': {
+ 'id': 'AP-1QSAQJ6V52111',
+ 'ext': 'mp4',
+ 'title': 'Semi Finals - Vans Park Series Pro Tour',
+ 'description': 'md5:306a2783cdafa9e65e39aa62f514fd97',
+ 'duration': 11791.991,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
}, {
'url': 'https://www.redbull.tv/film/AP-1MSKKF5T92111/in-motion',
'only_matching': True,
title = info['title'].strip()
formats = self._extract_m3u8_formats(
- video['url'], video_id, 'mp4', 'm3u8_native')
+ video['url'], video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls')
self._sort_formats(formats)
subtitles = {}
--- /dev/null
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ float_or_none,
+)
+
+
+class RedditIE(InfoExtractor):
+ _VALID_URL = r'https?://v\.redd\.it/(?P<id>[^/?#&]+)'
+ _TEST = {
+ # from https://www.reddit.com/r/videos/comments/6rrwyj/that_small_heart_attack/
+ 'url': 'https://v.redd.it/zv89llsvexdz',
+ 'md5': '655d06ace653ea3b87bccfb1b27ec99d',
+ 'info_dict': {
+ 'id': 'zv89llsvexdz',
+ 'ext': 'mp4',
+ 'title': 'zv89llsvexdz',
+ },
+ 'params': {
+ 'format': 'bestvideo',
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ formats = self._extract_m3u8_formats(
+ 'https://v.redd.it/%s/HLSPlaylist.m3u8' % video_id, video_id,
+ 'mp4', entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)
+
+ formats.extend(self._extract_mpd_formats(
+ 'https://v.redd.it/%s/DASHPlaylist.mpd' % video_id, video_id,
+ mpd_id='dash', fatal=False))
+
+ return {
+ 'id': video_id,
+ 'title': video_id,
+ 'formats': formats,
+ }
+
+
+class RedditRIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?reddit\.com/r/[^/]+/comments/(?P<id>[^/]+)'
+ _TESTS = [{
+ 'url': 'https://www.reddit.com/r/videos/comments/6rrwyj/that_small_heart_attack/',
+ 'info_dict': {
+ 'id': 'zv89llsvexdz',
+ 'ext': 'mp4',
+ 'title': 'That small heart attack.',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'timestamp': 1501941939,
+ 'upload_date': '20170805',
+ 'uploader': 'Antw87',
+ 'like_count': int,
+ 'dislike_count': int,
+ 'comment_count': int,
+ 'age_limit': 0,
+ },
+ 'params': {
+ 'format': 'bestvideo',
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.reddit.com/r/videos/comments/6rrwyj',
+ 'only_matching': True,
+ }, {
+ # imgur
+ 'url': 'https://www.reddit.com/r/MadeMeSmile/comments/6t7wi5/wait_for_it/',
+ 'only_matching': True,
+ }, {
+ # streamable
+ 'url': 'https://www.reddit.com/r/videos/comments/6t7sg9/comedians_hilarious_joke_about_the_guam_flag/',
+ 'only_matching': True,
+ }, {
+ # youtube
+ 'url': 'https://www.reddit.com/r/videos/comments/6t75wq/southern_man_tries_to_speak_without_an_accent/',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ data = self._download_json(
+ url + '.json', video_id)[0]['data']['children'][0]['data']
+
+ video_url = data['url']
+
+ # Avoid recursing into the same reddit URL
+ if 'reddit.com/' in video_url and '/%s/' % video_id in video_url:
+ raise ExtractorError('No media found', expected=True)
+
+ over_18 = data.get('over_18')
+ if over_18 is True:
+ age_limit = 18
+ elif over_18 is False:
+ age_limit = 0
+ else:
+ age_limit = None
+
+ return {
+ '_type': 'url_transparent',
+ 'url': video_url,
+ 'title': data.get('title'),
+ 'thumbnail': data.get('thumbnail'),
+ 'timestamp': float_or_none(data.get('created_utc')),
+ 'uploader': data.get('author'),
+ 'like_count': int_or_none(data.get('ups')),
+ 'dislike_count': int_or_none(data.get('downs')),
+ 'comment_count': int_or_none(data.get('num_comments')),
+ 'age_limit': age_limit,
+ }
import re
from .common import InfoExtractor
+from ..compat import compat_str
from ..utils import (
ExtractorError,
int_or_none,
'format_id': format_id,
'height': int_or_none(format_id),
})
- else:
+ medias = self._parse_json(
+ self._search_regex(
+ r'mediaDefinition\s*:\s*(\[.+?\])', webpage,
+ 'media definitions', default='{}'),
+ video_id, fatal=False)
+ if medias and isinstance(medias, list):
+ for media in medias:
+ format_url = media.get('videoUrl')
+ if not format_url or not isinstance(format_url, compat_str):
+ continue
+ format_id = media.get('quality')
+ formats.append({
+ 'url': format_url,
+ 'format_id': format_id,
+ 'height': int_or_none(format_id),
+ })
+ if not formats:
video_url = self._html_search_regex(
r'<source src="(.+?)" type="video/mp4">', webpage, 'video URL')
formats.append({'url': video_url})
r'<span[^>]+class="added-time"[^>]*>ADDED ([^<]+)<',
webpage, 'upload date', fatal=False))
duration = int_or_none(self._search_regex(
- r'videoDuration\s*:\s*(\d+)', webpage, 'duration', fatal=False))
+ r'videoDuration\s*:\s*(\d+)', webpage, 'duration', default=None))
view_count = str_to_int(self._search_regex(
r'<span[^>]*>VIEWS</span></td>\s*<td>([\d,.]+)',
webpage, 'view count', fatal=False))
https?://(?:www\.)?
(?:
rtlxl\.nl/[^\#]*\#!/[^/]+/|
- rtl\.nl/system/videoplayer/(?:[^/]+/)+(?:video_)?embed\.html\b.+?\buuid=
+ rtl\.nl/(?:system/videoplayer/(?:[^/]+/)+(?:video_)?embed\.html\b.+?\buuid=|video/)
)
(?P<id>[0-9a-f-]+)'''
}, {
'url': 'http://rtlxl.nl/?_ga=1.204735956.572365465.1466978370#!/rtl-nieuws-132237/3c487912-023b-49ac-903e-2c5d79f8410f',
'only_matching': True,
+ }, {
+ 'url': 'https://www.rtl.nl/video/c603c9c2-601d-4b5e-8175-64f1e942dc7d/',
+ 'only_matching': True,
}]
def _real_extract(self, url):
from .common import InfoExtractor
from ..compat import (
compat_str,
+ compat_parse_qs,
+ compat_urllib_parse_urlparse,
)
from ..utils import (
determine_ext,
- unified_strdate,
+ bool_or_none,
+ int_or_none,
+ try_get,
+ unified_timestamp,
)
-class RutubeIE(InfoExtractor):
+class RutubeBaseIE(InfoExtractor):
+ def _extract_video(self, video, video_id=None, require_title=True):
+ title = video['title'] if require_title else video.get('title')
+
+ age_limit = video.get('is_adult')
+ if age_limit is not None:
+ age_limit = 18 if age_limit is True else 0
+
+ uploader_id = try_get(video, lambda x: x['author']['id'])
+ category = try_get(video, lambda x: x['category']['name'])
+
+ return {
+ 'id': video.get('id') or video_id,
+ 'title': title,
+ 'description': video.get('description'),
+ 'thumbnail': video.get('thumbnail_url'),
+ 'duration': int_or_none(video.get('duration')),
+ 'uploader': try_get(video, lambda x: x['author']['name']),
+ 'uploader_id': compat_str(uploader_id) if uploader_id else None,
+ 'timestamp': unified_timestamp(video.get('created_ts')),
+ 'category': [category] if category else None,
+ 'age_limit': age_limit,
+ 'view_count': int_or_none(video.get('hits')),
+ 'comment_count': int_or_none(video.get('comments_count')),
+ 'is_live': bool_or_none(video.get('is_livestream')),
+ }
+
+
+class RutubeIE(RutubeBaseIE):
IE_NAME = 'rutube'
IE_DESC = 'Rutube videos'
_VALID_URL = r'https?://rutube\.ru/(?:video|(?:play/)?embed)/(?P<id>[\da-z]{32})'
_TESTS = [{
'url': 'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/',
+ 'md5': '79938ade01294ef7e27574890d0d3769',
'info_dict': {
'id': '3eac3b4561676c17df9132a9a1e62e3e',
- 'ext': 'mp4',
+ 'ext': 'flv',
'title': 'Š Š°Š½ŠµŠ½Š½ŃŠ¹ ŠŗŠµŠ½Š³ŃŃŃ Š·Š°Š±ŠµŠ¶Š°Š» Š² Š°ŠæŃŠµŠŗŃ',
'description': 'http://www.ntdtv.ru ',
'duration': 80,
'uploader': 'NTDRussian',
'uploader_id': '29790',
+ 'timestamp': 1381943602,
'upload_date': '20131016',
'age_limit': 0,
},
- 'params': {
- # It requires ffmpeg (m3u8 download)
- 'skip_download': True,
- },
}, {
'url': 'http://rutube.ru/play/embed/a10e53b86e8f349080f718582ce4c661',
'only_matching': True,
}, {
'url': 'http://rutube.ru/embed/a10e53b86e8f349080f718582ce4c661',
'only_matching': True,
+ }, {
+ 'url': 'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/?pl_id=4252',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://rutube.ru/video/10b3a03fc01d5bbcc632a2f3514e8aab/?pl_type=source',
+ 'only_matching': True,
}]
+ @classmethod
+ def suitable(cls, url):
+ return False if RutubePlaylistIE.suitable(url) else super(RutubeIE, cls).suitable(url)
+
@staticmethod
def _extract_urls(webpage):
return [mobj.group('url') for mobj in re.finditer(
def _real_extract(self, url):
video_id = self._match_id(url)
+
video = self._download_json(
'http://rutube.ru/api/video/%s/?format=json' % video_id,
video_id, 'Downloading video JSON')
- # Some videos don't have the author field
- author = video.get('author') or {}
+ info = self._extract_video(video, video_id)
options = self._download_json(
'http://rutube.ru/api/play/options/%s/?format=json' % video_id,
})
self._sort_formats(formats)
- return {
- 'id': video['id'],
- 'title': video['title'],
- 'description': video['description'],
- 'duration': video['duration'],
- 'view_count': video['hits'],
- 'formats': formats,
- 'thumbnail': video['thumbnail_url'],
- 'uploader': author.get('name'),
- 'uploader_id': compat_str(author['id']) if author else None,
- 'upload_date': unified_strdate(video['created_ts']),
- 'age_limit': 18 if video['is_adult'] else 0,
- }
+ info['formats'] = formats
+ return info
class RutubeEmbedIE(InfoExtractor):
'url': 'http://rutube.ru/video/embed/6722881?vk_puid37=&vk_puid38=',
'info_dict': {
'id': 'a10e53b86e8f349080f718582ce4c661',
- 'ext': 'mp4',
+ 'ext': 'flv',
+ 'timestamp': 1387830582,
'upload_date': '20131223',
'uploader_id': '297833',
'description': 'ŠŠøŠ“ŠµŠ¾ Š³ŃŃŠæŠæŃ ā
http://vk.com/foxkidsresetā
Š¼ŃŠ·ŠµŠ¹ Fox Kids Šø Jetix<br/><br/> Š²Š¾ŃŃŃŠ°Š½Š¾Š²Š»ŠµŠ½Š¾ Šø ŃŠ“ŠµŠ»Š°Š½Š¾ Š² ŃŠøŠŗŠ¾ŃŠ¾ŃŠ¼Š°ŃŠµ subziro89 http://vk.com/subziro89',
'title': 'ŠŠøŃŃŠøŃŠµŃŠŗŠøŠ¹ Š³Š¾ŃŠ¾Š“Š¾Šŗ ŠŠ¹ŃŠø Š² ŠŠ½Š“ŠøŠ°Š½ 5 ŃŠµŃŠøŃ Š¾Š·Š²ŃŃŠŗŠ° subziro89',
},
'params': {
- 'skip_download': 'Requires ffmpeg',
+ 'skip_download': True,
},
}, {
'url': 'http://rutube.ru/play/embed/8083783',
canonical_url = self._html_search_regex(
r'<link\s+rel="canonical"\s+href="([^"]+?)"', webpage,
'Canonical URL')
- return self.url_result(canonical_url, 'Rutube')
+ return self.url_result(canonical_url, RutubeIE.ie_key())
+
+
+class RutubePlaylistBaseIE(RutubeBaseIE):
+ def _next_page_url(self, page_num, playlist_id, *args, **kwargs):
+ return self._PAGE_TEMPLATE % (playlist_id, page_num)
+ def _entries(self, playlist_id, *args, **kwargs):
+ next_page_url = None
+ for pagenum in itertools.count(1):
+ page = self._download_json(
+ next_page_url or self._next_page_url(
+ pagenum, playlist_id, *args, **kwargs),
+ playlist_id, 'Downloading page %s' % pagenum)
+
+ results = page.get('results')
+ if not results or not isinstance(results, list):
+ break
+
+ for result in results:
+ video_url = result.get('video_url')
+ if not video_url or not isinstance(video_url, compat_str):
+ continue
+ entry = self._extract_video(result, require_title=False)
+ entry.update({
+ '_type': 'url',
+ 'url': video_url,
+ 'ie_key': RutubeIE.ie_key(),
+ })
+ yield entry
-class RutubeChannelIE(InfoExtractor):
+ next_page_url = page.get('next')
+ if not next_page_url or not page.get('has_next'):
+ break
+
+ def _extract_playlist(self, playlist_id, *args, **kwargs):
+ return self.playlist_result(
+ self._entries(playlist_id, *args, **kwargs),
+ playlist_id, kwargs.get('playlist_name'))
+
+ def _real_extract(self, url):
+ return self._extract_playlist(self._match_id(url))
+
+
+class RutubeChannelIE(RutubePlaylistBaseIE):
IE_NAME = 'rutube:channel'
IE_DESC = 'Rutube channels'
_VALID_URL = r'https?://rutube\.ru/tags/video/(?P<id>\d+)'
_PAGE_TEMPLATE = 'http://rutube.ru/api/tags/video/%s/?page=%s&format=json'
- def _extract_videos(self, channel_id, channel_title=None):
- entries = []
- for pagenum in itertools.count(1):
- page = self._download_json(
- self._PAGE_TEMPLATE % (channel_id, pagenum),
- channel_id, 'Downloading page %s' % pagenum)
- results = page['results']
- if not results:
- break
- entries.extend(self.url_result(result['video_url'], 'Rutube') for result in results)
- if not page['has_next']:
- break
- return self.playlist_result(entries, channel_id, channel_title)
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- channel_id = mobj.group('id')
- return self._extract_videos(channel_id)
-
-class RutubeMovieIE(RutubeChannelIE):
+class RutubeMovieIE(RutubePlaylistBaseIE):
IE_NAME = 'rutube:movie'
IE_DESC = 'Rutube movies'
_VALID_URL = r'https?://rutube\.ru/metainfo/tv/(?P<id>\d+)'
movie = self._download_json(
self._MOVIE_TEMPLATE % movie_id, movie_id,
'Downloading movie JSON')
- movie_name = movie['name']
- return self._extract_videos(movie_id, movie_name)
+ return self._extract_playlist(
+ movie_id, playlist_name=movie.get('name'))
-class RutubePersonIE(RutubeChannelIE):
+class RutubePersonIE(RutubePlaylistBaseIE):
IE_NAME = 'rutube:person'
IE_DESC = 'Rutube person videos'
_VALID_URL = r'https?://rutube\.ru/video/person/(?P<id>\d+)'
}]
_PAGE_TEMPLATE = 'http://rutube.ru/api/video/person/%s/?page=%s&format=json'
+
+
+class RutubePlaylistIE(RutubePlaylistBaseIE):
+ IE_NAME = 'rutube:playlist'
+ IE_DESC = 'Rutube playlists'
+ _VALID_URL = r'https?://rutube\.ru/(?:video|(?:play/)?embed)/[\da-z]{32}/\?.*?\bpl_id=(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://rutube.ru/video/cecd58ed7d531fc0f3d795d51cee9026/?pl_id=3097&pl_type=tag',
+ 'info_dict': {
+ 'id': '3097',
+ },
+ 'playlist_count': 27,
+ }, {
+ 'url': 'https://rutube.ru/video/10b3a03fc01d5bbcc632a2f3514e8aab/?pl_id=4252&pl_type=source',
+ 'only_matching': True,
+ }]
+
+ _PAGE_TEMPLATE = 'http://rutube.ru/api/playlist/%s/%s/?page=%s&format=json'
+
+ @classmethod
+ def suitable(cls, url):
+ if not super(RutubePlaylistIE, cls).suitable(url):
+ return False
+ params = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
+ return params.get('pl_type', [None])[0] and int_or_none(params.get('pl_id', [None])[0])
+
+ def _next_page_url(self, page_num, playlist_id, item_kind):
+ return self._PAGE_TEMPLATE % (item_kind, playlist_id, page_num)
+
+ def _real_extract(self, url):
+ qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
+ playlist_kind = qs['pl_type'][0]
+ playlist_id = qs['pl_id'][0]
+ return self._extract_playlist(playlist_id, item_kind=playlist_kind)
class RUTVIE(InfoExtractor):
IE_DESC = 'RUTV.RU'
_VALID_URL = r'''(?x)
- https?://player\.(?:rutv\.ru|vgtrk\.com)/
- (?P<path>flash\d+v/container\.swf\?id=
- |iframe/(?P<type>swf|video|live)/id/
- |index/iframe/cast_id/)
- (?P<id>\d+)'''
+ https?://
+ (?:test)?player\.(?:rutv\.ru|vgtrk\.com)/
+ (?P<path>
+ flash\d+v/container\.swf\?id=|
+ iframe/(?P<type>swf|video|live)/id/|
+ index/iframe/cast_id/
+ )
+ (?P<id>\d+)
+ '''
_TESTS = [
{
'skip_download': True,
},
},
+ {
+ 'url': 'https://testplayer.vgtrk.com/iframe/live/id/19201/showZoomBtn/false/isPlay/true/',
+ 'only_matching': True,
+ },
]
@classmethod
def _extract_url(cls, webpage):
mobj = re.search(
- r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.(?:rutv\.ru|vgtrk\.com)/(?:iframe/(?:swf|video|live)/id|index/iframe/cast_id)/.+?)\1', webpage)
+ r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:test)?player\.(?:rutv\.ru|vgtrk\.com)/(?:iframe/(?:swf|video|live)/id|index/iframe/cast_id)/.+?)\1', webpage)
if mobj:
return mobj.group('url')
mobj = re.search(
- r'<meta[^>]+?property=(["\'])og:video\1[^>]+?content=(["\'])(?P<url>https?://player\.(?:rutv\.ru|vgtrk\.com)/flash\d+v/container\.swf\?id=.+?\2)',
+ r'<meta[^>]+?property=(["\'])og:video\1[^>]+?content=(["\'])(?P<url>https?://(?:test)?player\.(?:rutv\.ru|vgtrk\.com)/flash\d+v/container\.swf\?id=.+?\2)',
webpage)
if mobj:
return mobj.group('url')
--- /dev/null
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ unified_timestamp,
+)
+
+
+class RuvIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?ruv\.is/(?:sarpurinn/[^/]+|node)/(?P<id>[^/]+(?:/\d+)?)'
+ _TESTS = [{
+ # m3u8
+ 'url': 'http://ruv.is/sarpurinn/ruv-aukaras/fh-valur/20170516',
+ 'md5': '66347652f4e13e71936817102acc1724',
+ 'info_dict': {
+ 'id': '1144499',
+ 'display_id': 'fh-valur/20170516',
+ 'ext': 'mp4',
+ 'title': 'FH - Valur',
+ 'description': 'Bein Ćŗtsending frĆ” 3. leik FH og Vals Ć Ćŗrslitum OlĆsdeildar karla Ć handbolta.',
+ 'timestamp': 1494963600,
+ 'upload_date': '20170516',
+ },
+ }, {
+ # mp3
+ 'url': 'http://ruv.is/sarpurinn/ras-2/morgunutvarpid/20170619',
+ 'md5': '395ea250c8a13e5fdb39d4670ef85378',
+ 'info_dict': {
+ 'id': '1153630',
+ 'display_id': 'morgunutvarpid/20170619',
+ 'ext': 'mp3',
+ 'title': 'MorgunĆŗtvarpiĆ°',
+ 'description': 'md5:a4cf1202c0a1645ca096b06525915418',
+ 'timestamp': 1497855000,
+ 'upload_date': '20170619',
+ },
+ }, {
+ 'url': 'http://ruv.is/sarpurinn/ruv/frettir/20170614',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.ruv.is/node/1151854',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://ruv.is/sarpurinn/klippa/secret-soltice-hefst-a-morgun',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://ruv.is/sarpurinn/ras-1/morgunvaktin/20170619',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, display_id)
+
+ title = self._og_search_title(webpage)
+
+ FIELD_RE = r'video\.%s\s*=\s*(["\'])(?P<url>(?:(?!\1).)+)\1'
+
+ media_url = self._html_search_regex(
+ FIELD_RE % 'src', webpage, 'video URL', group='url')
+
+ video_id = self._search_regex(
+ r'<link\b[^>]+\bhref=["\']https?://www\.ruv\.is/node/(\d+)',
+ webpage, 'video id', default=display_id)
+
+ ext = determine_ext(media_url)
+
+ if ext == 'm3u8':
+ formats = self._extract_m3u8_formats(
+ media_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls')
+ elif ext == 'mp3':
+ formats = [{
+ 'format_id': 'mp3',
+ 'url': media_url,
+ 'vcodec': 'none',
+ }]
+ else:
+ formats = [{
+ 'url': media_url,
+ }]
+
+ description = self._og_search_description(webpage, default=None)
+ thumbnail = self._og_search_thumbnail(
+ webpage, default=None) or self._search_regex(
+ FIELD_RE % 'poster', webpage, 'thumbnail', fatal=False)
+ timestamp = unified_timestamp(self._html_search_meta(
+ 'article:published_time', webpage, 'timestamp', fatal=False))
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'timestamp': timestamp,
+ 'formats': formats,
+ }
class SafariBaseIE(InfoExtractor):
_LOGIN_URL = 'https://www.safaribooksonline.com/accounts/login/'
- _SUCCESSFUL_LOGIN_REGEX = r'<a href="/accounts/logout/"[^>]*>Sign Out</a>'
_NETRC_MACHINE = 'safari'
_API_BASE = 'https://www.safaribooksonline.com/api/v1'
self._login()
def _login(self):
- # We only need to log in once for courses or individual videos
- if self.LOGGED_IN:
- return
-
(username, password) = self._get_login_info()
if username is None:
return
headers = std_headers.copy()
if 'Referer' not in headers:
headers['Referer'] = self._LOGIN_URL
- login_page_request = sanitized_Request(self._LOGIN_URL, headers=headers)
login_page = self._download_webpage(
- login_page_request, None,
- 'Downloading login form')
+ self._LOGIN_URL, None, 'Downloading login form', headers=headers)
+
+ def is_logged(webpage):
+ return any(re.search(p, webpage) for p in (
+ r'href=["\']/accounts/logout/', r'>Sign Out<'))
+
+ if is_logged(login_page):
+ self.LOGGED_IN = True
+ return
csrf = self._html_search_regex(
r"name='csrfmiddlewaretoken'\s+value='([^']+)'",
login_page = self._download_webpage(
request, None, 'Logging in as %s' % username)
- if re.search(self._SUCCESSFUL_LOGIN_REGEX, login_page) is None:
+ if not is_logged(login_page):
raise ExtractorError(
'Login failed; make sure your credentials are correct and try again.',
expected=True)
- SafariBaseIE.LOGGED_IN = True
-
- self.to_screen('Login successful')
+ self.LOGGED_IN = True
class SafariIE(SafariBaseIE):
formats = [{
'url': source['file'].replace('\\', ''),
'format_id': source.get('label'),
- 'height': self._search_regex(
- r'^(\d+)[pP]', source.get('label', ''), 'height', default=None),
+ 'height': int(self._search_regex(
+ r'^(\d+)[pP]', source.get('label', ''), 'height',
+ default=None)),
} for source in sources if source.get('file')]
self._sort_formats(formats)
page_title = mobj.group('title')
webpage = self._download_webpage(url, page_title)
slideshare_obj = self._search_regex(
- r'\$\.extend\(slideshare_object,\s*(\{.*?\})\);',
+ r'\$\.extend\(.*?slideshare_object,\s*(\{.*?\})\);',
webpage, 'slideshare object')
info = json.loads(slideshare_obj)
if info['slideshow']['type'] != 'video':
compat_str,
compat_urllib_parse_urlencode,
)
-from ..utils import ExtractorError
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ try_get,
+)
class SohuIE(InfoExtractor):
formats.append({
'url': video_url,
'format_id': format_id,
- 'filesize': data['clipsBytes'][i],
- 'width': data['width'],
- 'height': data['height'],
- 'fps': data['fps'],
+ 'filesize': int_or_none(
+ try_get(data, lambda x: x['clipsBytes'][i])),
+ 'width': int_or_none(data.get('width')),
+ 'height': int_or_none(data.get('height')),
+ 'fps': int_or_none(data.get('fps')),
})
self._sort_formats(formats)
# coding: utf-8
from __future__ import unicode_literals
-import re
import itertools
+import re
from .common import (
InfoExtractor,
ExtractorError,
int_or_none,
unified_strdate,
+ update_url_query,
)
_VALID_URL = r'''(?x)^(?:https?://)?
(?:(?:(?:www\.|m\.)?soundcloud\.com/
+ (?!stations/track)
(?P<uploader>[\w\d-]+)/
(?!(?:tracks|sets(?:/.+?)?|reposts|likes|spotlight)/?(?:$|[?#]))
(?P<title>[\w\d-]+)/?
'license': 'cc-by-sa',
},
},
+ # private link, downloadable format
+ {
+ 'url': 'https://soundcloud.com/oriuplift/uponly-238-no-talking-wav/s-AyZUd',
+ 'md5': '64a60b16e617d41d0bef032b7f55441e',
+ 'info_dict': {
+ 'id': '340344461',
+ 'ext': 'wav',
+ 'title': 'Uplifting Only 238 [No Talking] (incl. Alex Feed Guestmix) (Aug 31, 2017) [wav]',
+ 'description': 'md5:fa20ee0fca76a3d6df8c7e57f3715366',
+ 'uploader': 'Ori Uplift Music',
+ 'upload_date': '20170831',
+ 'duration': 7449,
+ 'license': 'all-rights-reserved',
+ },
+ },
]
- _CLIENT_ID = '2t9loNQH90kzJcsFCODdigxfp325aq4z'
+ _CLIENT_ID = 'JlZIsxg2hY5WnBgtn3jfS0UYCl0K8DOg'
_IPHONE_CLIENT_ID = '376f225bf427445fc4bfb6b99b72e0bf'
@staticmethod
@classmethod
def _resolv_url(cls, url):
- return 'http://api.soundcloud.com/resolve.json?url=' + url + '&client_id=' + cls._CLIENT_ID
+ return 'https://api.soundcloud.com/resolve.json?url=' + url + '&client_id=' + cls._CLIENT_ID
def _extract_info_dict(self, info, full_title=None, quiet=False, secret_token=None):
track_id = compat_str(info['id'])
'license': info.get('license'),
}
formats = []
+ query = {'client_id': self._CLIENT_ID}
+ if secret_token is not None:
+ query['secret_token'] = secret_token
if info.get('downloadable', False):
# We can build a direct link to the song
- format_url = (
- 'https://api.soundcloud.com/tracks/{0}/download?client_id={1}'.format(
- track_id, self._CLIENT_ID))
+ format_url = update_url_query(
+ 'https://api.soundcloud.com/tracks/%s/download' % track_id, query)
formats.append({
'format_id': 'download',
'ext': info.get('original_format', 'mp3'),
# We have to retrieve the url
format_dict = self._download_json(
- 'http://api.soundcloud.com/i1/tracks/%s/streams' % track_id,
- track_id, 'Downloading track url', query={
- 'client_id': self._CLIENT_ID,
- 'secret_token': secret_token,
- })
+ 'https://api.soundcloud.com/i1/tracks/%s/streams' % track_id,
+ track_id, 'Downloading track url', query=query)
for key, stream_url in format_dict.items():
abr = int_or_none(self._search_regex(
# cannot be always used, sometimes it can give an HTTP 404 error
formats.append({
'format_id': 'fallback',
- 'url': info['stream_url'] + '?client_id=' + self._CLIENT_ID,
+ 'url': update_url_query(info['stream_url'], query),
'ext': ext,
})
track_id = mobj.group('track_id')
if track_id is not None:
- info_json_url = 'http://api.soundcloud.com/tracks/' + track_id + '.json?client_id=' + self._CLIENT_ID
+ info_json_url = 'https://api.soundcloud.com/tracks/' + track_id + '.json?client_id=' + self._CLIENT_ID
full_title = track_id
token = mobj.group('secret_token')
if token:
self.report_resolve(full_title)
- url = 'http://soundcloud.com/%s' % resolve_title
+ url = 'https://soundcloud.com/%s' % resolve_title
info_json_url = self._resolv_url(url)
info = self._download_json(info_json_url, full_title, 'Downloading info JSON')
'id': '2284613',
'title': 'The Royal Concept EP',
},
- 'playlist_mincount': 6,
+ 'playlist_mincount': 5,
}, {
'url': 'https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep/token',
'only_matching': True,
# extract simple title (uploader + slug of song title)
slug_title = mobj.group('slug_title')
full_title = '%s/sets/%s' % (uploader, slug_title)
- url = 'http://soundcloud.com/%s/sets/%s' % (uploader, slug_title)
+ url = 'https://soundcloud.com/%s/sets/%s' % (uploader, slug_title)
token = mobj.group('token')
if token:
}
-class SoundcloudUserIE(SoundcloudPlaylistBaseIE):
+class SoundcloudPagedPlaylistBaseIE(SoundcloudPlaylistBaseIE):
+ _API_BASE = 'https://api.soundcloud.com'
+ _API_V2_BASE = 'https://api-v2.soundcloud.com'
+
+ def _extract_playlist(self, base_url, playlist_id, playlist_title):
+ COMMON_QUERY = {
+ 'limit': 50,
+ 'client_id': self._CLIENT_ID,
+ 'linked_partitioning': '1',
+ }
+
+ query = COMMON_QUERY.copy()
+ query['offset'] = 0
+
+ next_href = base_url + '?' + compat_urllib_parse_urlencode(query)
+
+ entries = []
+ for i in itertools.count():
+ response = self._download_json(
+ next_href, playlist_id, 'Downloading track page %s' % (i + 1))
+
+ collection = response['collection']
+ if not collection:
+ break
+
+ def resolve_permalink_url(candidates):
+ for cand in candidates:
+ if isinstance(cand, dict):
+ permalink_url = cand.get('permalink_url')
+ entry_id = self._extract_id(cand)
+ if permalink_url and permalink_url.startswith('http'):
+ return permalink_url, entry_id
+
+ for e in collection:
+ permalink_url, entry_id = resolve_permalink_url((e, e.get('track'), e.get('playlist')))
+ if permalink_url:
+ entries.append(self.url_result(permalink_url, video_id=entry_id))
+
+ next_href = response.get('next_href')
+ if not next_href:
+ break
+
+ parsed_next_href = compat_urlparse.urlparse(response['next_href'])
+ qs = compat_urlparse.parse_qs(parsed_next_href.query)
+ qs.update(COMMON_QUERY)
+ next_href = compat_urlparse.urlunparse(
+ parsed_next_href._replace(query=compat_urllib_parse_urlencode(qs, True)))
+
+ return {
+ '_type': 'playlist',
+ 'id': playlist_id,
+ 'title': playlist_title,
+ 'entries': entries,
+ }
+
+
+class SoundcloudUserIE(SoundcloudPagedPlaylistBaseIE):
_VALID_URL = r'''(?x)
https?://
(?:(?:www|m)\.)?soundcloud\.com/
'url': 'https://soundcloud.com/grynpyret/spotlight',
'info_dict': {
'id': '7098329',
- 'title': 'GRYNPYRET (Spotlight)',
+ 'title': 'Grynpyret (Spotlight)',
},
'playlist_mincount': 1,
}]
- _API_BASE = 'https://api.soundcloud.com'
- _API_V2_BASE = 'https://api-v2.soundcloud.com'
-
_BASE_URL_MAP = {
- 'all': '%s/profile/soundcloud:users:%%s' % _API_V2_BASE,
- 'tracks': '%s/users/%%s/tracks' % _API_BASE,
- 'sets': '%s/users/%%s/playlists' % _API_V2_BASE,
- 'reposts': '%s/profile/soundcloud:users:%%s/reposts' % _API_V2_BASE,
- 'likes': '%s/users/%%s/likes' % _API_V2_BASE,
- 'spotlight': '%s/users/%%s/spotlight' % _API_V2_BASE,
+ 'all': '%s/profile/soundcloud:users:%%s' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE,
+ 'tracks': '%s/users/%%s/tracks' % SoundcloudPagedPlaylistBaseIE._API_BASE,
+ 'sets': '%s/users/%%s/playlists' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE,
+ 'reposts': '%s/profile/soundcloud:users:%%s/reposts' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE,
+ 'likes': '%s/users/%%s/likes' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE,
+ 'spotlight': '%s/users/%%s/spotlight' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE,
}
_TITLE_MAP = {
mobj = re.match(self._VALID_URL, url)
uploader = mobj.group('user')
- url = 'http://soundcloud.com/%s/' % uploader
+ url = 'https://soundcloud.com/%s/' % uploader
resolv_url = self._resolv_url(url)
user = self._download_json(
resolv_url, uploader, 'Downloading user info')
resource = mobj.group('rsrc') or 'all'
- base_url = self._BASE_URL_MAP[resource] % user['id']
- COMMON_QUERY = {
- 'limit': 50,
- 'client_id': self._CLIENT_ID,
- 'linked_partitioning': '1',
- }
+ return self._extract_playlist(
+ self._BASE_URL_MAP[resource] % user['id'], compat_str(user['id']),
+ '%s (%s)' % (user['username'], self._TITLE_MAP[resource]))
- query = COMMON_QUERY.copy()
- query['offset'] = 0
-
- next_href = base_url + '?' + compat_urllib_parse_urlencode(query)
- entries = []
- for i in itertools.count():
- response = self._download_json(
- next_href, uploader, 'Downloading track page %s' % (i + 1))
-
- collection = response['collection']
- if not collection:
- break
-
- def resolve_permalink_url(candidates):
- for cand in candidates:
- if isinstance(cand, dict):
- permalink_url = cand.get('permalink_url')
- entry_id = self._extract_id(cand)
- if permalink_url and permalink_url.startswith('http'):
- return permalink_url, entry_id
+class SoundcloudTrackStationIE(SoundcloudPagedPlaylistBaseIE):
+ _VALID_URL = r'https?://(?:(?:www|m)\.)?soundcloud\.com/stations/track/[^/]+/(?P<id>[^/?#&]+)'
+ IE_NAME = 'soundcloud:trackstation'
+ _TESTS = [{
+ 'url': 'https://soundcloud.com/stations/track/officialsundial/your-text',
+ 'info_dict': {
+ 'id': '286017854',
+ 'title': 'Track station: your-text',
+ },
+ 'playlist_mincount': 47,
+ }]
- for e in collection:
- permalink_url, entry_id = resolve_permalink_url((e, e.get('track'), e.get('playlist')))
- if permalink_url:
- entries.append(self.url_result(permalink_url, video_id=entry_id))
+ def _real_extract(self, url):
+ track_name = self._match_id(url)
- next_href = response.get('next_href')
- if not next_href:
- break
+ webpage = self._download_webpage(url, track_name)
- parsed_next_href = compat_urlparse.urlparse(response['next_href'])
- qs = compat_urlparse.parse_qs(parsed_next_href.query)
- qs.update(COMMON_QUERY)
- next_href = compat_urlparse.urlunparse(
- parsed_next_href._replace(query=compat_urllib_parse_urlencode(qs, True)))
+ track_id = self._search_regex(
+ r'soundcloud:track-stations:(\d+)', webpage, 'track id')
- return {
- '_type': 'playlist',
- 'id': compat_str(user['id']),
- 'title': '%s (%s)' % (user['username'], self._TITLE_MAP[resource]),
- 'entries': entries,
- }
+ return self._extract_playlist(
+ '%s/stations/soundcloud:track-stations:%s/tracks'
+ % (self._API_V2_BASE, track_id),
+ track_id, 'Track station: %s' % track_name)
class SoundcloudPlaylistIE(SoundcloudPlaylistBaseIE):
_VALID_URL = r'https?://api\.soundcloud\.com/playlists/(?P<id>[0-9]+)(?:/?\?secret_token=(?P<token>[^&]+?))?$'
IE_NAME = 'soundcloud:playlist'
_TESTS = [{
- 'url': 'http://api.soundcloud.com/playlists/4110309',
+ 'url': 'https://api.soundcloud.com/playlists/4110309',
'info_dict': {
'id': '4110309',
'title': 'TILT Brass - Bowery Poetry Club, August \'03 [Non-Site SCR 02]',
import re
from .common import InfoExtractor
+from .nexx import NexxEmbedIE
from .spiegeltv import SpiegeltvIE
from ..compat import compat_urlparse
from ..utils import (
},
'playlist_count': 6,
+ }, {
+ # Nexx iFrame embed
+ 'url': 'http://www.spiegel.de/sptv/spiegeltv/spiegel-tv-ueber-schnellste-katapult-achterbahn-der-welt-taron-a-1137884.html',
+ 'info_dict': {
+ 'id': '161464',
+ 'ext': 'mp4',
+ 'title': 'Nervenkitzel Achterbahn',
+ 'alt_title': 'Karussellbauer in Deutschland',
+ 'description': 'md5:ffe7b1cc59a01f585e0569949aef73cc',
+ 'release_year': 2005,
+ 'creator': 'SPIEGEL TV',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 2761,
+ 'timestamp': 1394021479,
+ 'upload_date': '20140305',
+ },
+ 'params': {
+ 'format': 'bestvideo',
+ 'skip_download': True,
+ },
}]
def _real_extract(self, url):
entries = [
self.url_result(compat_urlparse.urljoin(
self.http_scheme() + '//spiegel.de/', embed_path))
- for embed_path in embeds
- ]
- return self.playlist_result(entries)
+ for embed_path in embeds]
+ if embeds:
+ return self.playlist_result(entries)
+
+ return self.playlist_from_matches(
+ NexxEmbedIE._extract_urls(webpage), ie=NexxEmbedIE.ie_key())
-# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
-from ..compat import compat_urllib_parse_urlparse
-from ..utils import (
- determine_ext,
- float_or_none,
-)
+from .nexx import NexxIE
class SpiegeltvIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?spiegel\.tv/(?:#/)?filme/(?P<id>[\-a-z0-9]+)'
- _TESTS = [{
- 'url': 'http://www.spiegel.tv/filme/flug-mh370/',
- 'info_dict': {
- 'id': 'flug-mh370',
- 'ext': 'm4v',
- 'title': 'Flug MH370',
- 'description': 'Das RƤtsel um die Boeing 777 der Malaysia-Airlines',
- 'thumbnail': r're:http://.*\.jpg$',
- },
- 'params': {
- # m3u8 download
- 'skip_download': True,
- }
- }, {
- 'url': 'http://www.spiegel.tv/#/filme/alleskino-die-wahrheit-ueber-maenner/',
+ _VALID_URL = r'https?://(?:www\.)?spiegel\.tv/videos/(?P<id>\d+)'
+ _TEST = {
+ 'url': 'http://www.spiegel.tv/videos/161681-flug-mh370/',
'only_matching': True,
- }]
+ }
def _real_extract(self, url):
- if '/#/' in url:
- url = url.replace('/#/', '/')
- video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
- title = self._html_search_regex(r'<h1.*?>(.*?)</h1>', webpage, 'title')
-
- apihost = 'http://spiegeltv-ivms2-restapi.s3.amazonaws.com'
- version_json = self._download_json(
- '%s/version.json' % apihost, video_id,
- note='Downloading version information')
- version_name = version_json['version_name']
-
- slug_json = self._download_json(
- '%s/%s/restapi/slugs/%s.json' % (apihost, version_name, video_id),
- video_id,
- note='Downloading object information')
- oid = slug_json['object_id']
-
- media_json = self._download_json(
- '%s/%s/restapi/media/%s.json' % (apihost, version_name, oid),
- video_id, note='Downloading media information')
- uuid = media_json['uuid']
- is_wide = media_json['is_wide']
-
- server_json = self._download_json(
- 'http://spiegeltv-prod-static.s3.amazonaws.com/projectConfigs/projectConfig.json',
- video_id, note='Downloading server information')
-
- format = '16x9' if is_wide else '4x3'
-
- formats = []
- for streamingserver in server_json['streamingserver']:
- endpoint = streamingserver.get('endpoint')
- if not endpoint:
- continue
- play_path = 'mp4:%s_spiegeltv_0500_%s.m4v' % (uuid, format)
- if endpoint.startswith('rtmp'):
- formats.append({
- 'url': endpoint,
- 'format_id': 'rtmp',
- 'app': compat_urllib_parse_urlparse(endpoint).path[1:],
- 'play_path': play_path,
- 'player_path': 'http://prod-static.spiegel.tv/frontend-076.swf',
- 'ext': 'flv',
- 'rtmp_live': True,
- })
- elif determine_ext(endpoint) == 'm3u8':
- formats.append({
- 'url': endpoint.replace('[video]', play_path),
- 'ext': 'm4v',
- 'format_id': 'hls', # Prefer hls since it allows to workaround georestriction
- 'protocol': 'm3u8',
- 'preference': 1,
- 'http_headers': {
- 'Accept-Encoding': 'deflate', # gzip causes trouble on the server side
- },
- })
- else:
- formats.append({
- 'url': endpoint,
- })
- self._check_formats(formats, video_id)
-
- thumbnails = []
- for image in media_json['images']:
- thumbnails.append({
- 'url': image['url'],
- 'width': image['width'],
- 'height': image['height'],
- })
-
- description = media_json['subtitle']
- duration = float_or_none(media_json.get('duration_in_ms'), scale=1000)
-
- return {
- 'id': video_id,
- 'title': title,
- 'description': description,
- 'duration': duration,
- 'thumbnails': thumbnails,
- 'formats': formats,
- }
+ return self.url_result(
+ 'https://api.nexx.cloud/v3/748/videos/byid/%s'
+ % self._match_id(url), ie=NexxIE.ie_key())
import re
from .common import InfoExtractor
-from ..utils import js_to_json
+from ..utils import (
+ determine_ext,
+ int_or_none,
+ js_to_json,
+)
class SportBoxEmbedIE(InfoExtractor):
'info_dict': {
'id': '211355',
'ext': 'mp4',
- 'title': 'Š ŠŠ¾Š²Š¾ŃŠ¾ŃŃŠøŠ¹ŃŠŗŠµ ŠæŃŠ¾ŃŠµŠ» Š“ŠµŃŃŠŗŠøŠ¹ ŃŃŃŠ½ŠøŃ Ā«ŠŠ¾Š»Šµ ŃŠ»Š°Š²Ń Š±Š¾ŠµŠ²Š¾Š¹Ā»',
+ 'title': '211355',
'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 292,
+ 'view_count': int,
},
'params': {
# m3u8 download
}, {
'url': 'http://news.sportbox.ru/vdl/player?nid=370908&only_player=1&autostart=false&playeri=2&height=340&width=580',
'only_matching': True,
+ }, {
+ 'url': 'https://news.sportbox.ru/vdl/player/media/193095',
+ 'only_matching': True,
}]
@staticmethod
webpage = self._download_webpage(url, video_id)
- formats = []
-
- def cleanup_js(code):
- # desktop_advert_config contains complex Javascripts and we don't need it
- return js_to_json(re.sub(r'desktop_advert_config.*', '', code))
-
- jwplayer_data = self._parse_json(self._search_regex(
- r'(?s)player\.setup\(({.+?})\);', webpage, 'jwplayer settings'), video_id,
- transform_source=cleanup_js)
-
- hls_url = jwplayer_data.get('hls_url')
- if hls_url:
- formats.extend(self._extract_m3u8_formats(
- hls_url, video_id, ext='mp4', m3u8_id='hls'))
-
- rtsp_url = jwplayer_data.get('rtsp_url')
- if rtsp_url:
- formats.append({
- 'url': rtsp_url,
- 'format_id': 'rtsp',
- })
+ wjplayer_data = self._parse_json(
+ self._search_regex(
+ r'(?s)wjplayer\(({.+?})\);', webpage, 'wjplayer settings'),
+ video_id, transform_source=js_to_json)
+ formats = []
+ for source in wjplayer_data['sources']:
+ src = source.get('src')
+ if not src:
+ continue
+ if determine_ext(src) == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ src, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False))
+ else:
+ formats.append({
+ 'url': src,
+ })
self._sort_formats(formats)
- title = jwplayer_data['node_title']
- thumbnail = jwplayer_data.get('image_url')
+ view_count = int_or_none(self._search_regex(
+ r'ŠŃŠ¾ŃŠ¼Š¾ŃŃŠ¾Š²\s*:\s*(\d+)', webpage, 'view count', default=None))
return {
'id': video_id,
- 'title': title,
- 'thumbnail': thumbnail,
+ 'title': video_id,
+ 'thumbnail': wjplayer_data.get('poster'),
+ 'duration': int_or_none(wjplayer_data.get('duration')),
+ 'view_count': view_count,
'formats': formats,
}
'ext': 'mp4',
'title': '20170315_150006.mp4',
}
+ }, {
+ # no og:title
+ 'url': 'https://streamango.com/embed/foqebrpftarclpob/asdf_asd_2_mp4',
+ 'info_dict': {
+ 'id': 'foqebrpftarclpob',
+ 'ext': 'mp4',
+ 'title': 'foqebrpftarclpob',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
}, {
'url': 'https://streamango.com/embed/clapasobsptpkdfe/20170315_150006_mp4',
'only_matching': True,
webpage = self._download_webpage(url, video_id)
- title = self._og_search_title(webpage)
+ title = self._og_search_title(webpage, default=video_id)
formats = []
for format_ in re.findall(r'({[^}]*\bsrc\s*:\s*[^}]*})', webpage):
_TESTS = [{
'url': 'http://www.stream.cz/peklonataliri/765767-ecka-pro-deti',
- 'md5': '6d3ca61a8d0633c9c542b92fcb936b0c',
+ 'md5': '934bb6a6d220d99c010783c9719960d5',
'info_dict': {
'id': '765767',
'ext': 'mp4',
},
}, {
'url': 'http://www.stream.cz/blanik/10002447-tri-roky-pro-mazanka',
- 'md5': 'e54a254fb8b871968fd8403255f28589',
+ 'md5': '849a88c1e1ca47d41403c2ba5e59e261',
'info_dict': {
'id': '10002447',
'ext': 'mp4',
else:
title = data['name']
+ subtitles = {}
+ srt_url = data.get('subtitles_srt')
+ if srt_url:
+ subtitles['cs'] = [{
+ 'ext': 'srt',
+ 'url': srt_url,
+ }]
+
return {
'id': video_id,
'title': title,
'description': data.get('web_site_text'),
'duration': int_or_none(data.get('duration')),
'view_count': int_or_none(data.get('views')),
+ 'subtitles': subtitles,
}
if video_id:
data = self._download_json(
- 'http://www.svt.se/videoplayer-api/video/%s' % video_id, video_id)
+ 'https://api.svt.se/videoplayer-api/video/%s' % video_id,
+ video_id, headers=self.geo_verification_headers())
info_dict = self._extract_video(data, video_id)
if not info_dict.get('title'):
info_dict['title'] = re.sub(
--- /dev/null
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from .ooyala import OoyalaIE
+
+
+class TastyTradeIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?tastytrade\.com/tt/shows/[^/]+/episodes/(?P<id>[^/?#&]+)'
+
+ _TESTS = [{
+ 'url': 'https://www.tastytrade.com/tt/shows/market-measures/episodes/correlation-in-short-volatility-06-28-2017',
+ 'info_dict': {
+ 'id': 'F3bnlzbToeI6pLEfRyrlfooIILUjz4nM',
+ 'ext': 'mp4',
+ 'title': 'A History of Teaming',
+ 'description': 'md5:2a9033db8da81f2edffa4c99888140b3',
+ 'duration': 422.255,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'add_ie': ['Ooyala'],
+ }, {
+ 'url': 'https://www.tastytrade.com/tt/shows/daily-dose/episodes/daily-dose-06-30-2017',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+
+ ooyala_code = self._search_regex(
+ r'data-media-id=(["\'])(?P<code>(?:(?!\1).)+)\1',
+ webpage, 'ooyala code', group='code')
+
+ info = self._search_json_ld(webpage, display_id, fatal=False)
+ info.update({
+ '_type': 'url_transparent',
+ 'ie_key': OoyalaIE.ie_key(),
+ 'url': 'ooyala:%s' % ooyala_code,
+ 'display_id': display_id,
+ })
+ return info
class TBSIE(TurnerBaseIE):
+ # https://github.com/rg3/youtube-dl/issues/13658
+ _WORKING = False
+
_VALID_URL = r'https?://(?:www\.)?(?P<site>tbs|tntdrama)\.com/videos/(?:[^/]+/)+(?P<id>[^/?#]+)\.html'
_TESTS = [{
'url': 'http://www.tbs.com/videos/people-of-earth/season-1/extras/2007318/theatrical-trailer.html',
'ext': 'mp4',
'title': 'Theatrical Trailer',
'description': 'Catch the latest comedy from TBS, People of Earth, premiering Halloween night--Monday, October 31, at 9/8c.',
- }
+ },
+ 'skip': 'TBS videos are deleted after a while',
}, {
'url': 'http://www.tntdrama.com/videos/good-behavior/season-1/extras/1538823/you-better-run.html',
'md5': 'ce53c6ead5e9f3280b4ad2031a6fab56',
'ext': 'mp4',
'title': 'You Better Run',
'description': 'Letty Raines must figure out what she\'s running toward while running away from her past. Good Behavior premieres November 15 at 9/8c.',
- }
+ },
+ 'skip': 'TBS videos are deleted after a while',
}]
def _real_extract(self, url):
+++ /dev/null
-# coding: utf-8
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-from .jwplatform import JWPlatformIE
-from ..utils import unified_strdate
-
-
-class TeamFourStarIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?teamfourstar\.com/(?P<id>[a-z0-9\-]+)'
- _TEST = {
- 'url': 'http://teamfourstar.com/tfs-abridged-parody-episode-1-2/',
- 'info_dict': {
- 'id': '0WdZO31W',
- 'title': 'TFS Abridged Parody Episode 1',
- 'description': 'md5:d60bc389588ebab2ee7ad432bda953ae',
- 'ext': 'mp4',
- 'timestamp': 1394168400,
- 'upload_date': '20080508',
- },
- }
-
- def _real_extract(self, url):
- display_id = self._match_id(url)
- webpage = self._download_webpage(url, display_id)
-
- jwplatform_url = JWPlatformIE._extract_url(webpage)
-
- video_title = self._html_search_regex(
- r'<h1[^>]+class="entry-title"[^>]*>(?P<title>.+?)</h1>',
- webpage, 'title')
- video_date = unified_strdate(self._html_search_regex(
- r'<span[^>]+class="meta-date date updated"[^>]*>(?P<date>.+?)</span>',
- webpage, 'date', fatal=False))
- video_description = self._html_search_regex(
- r'(?s)<div[^>]+class="content-inner"[^>]*>.*?(?P<description><p>.+?)</div>',
- webpage, 'description', fatal=False)
- video_thumbnail = self._og_search_thumbnail(webpage)
-
- return {
- '_type': 'url_transparent',
- 'display_id': display_id,
- 'title': video_title,
- 'description': video_description,
- 'upload_date': video_date,
- 'thumbnail': video_thumbnail,
- 'url': jwplatform_url,
- }
from .common import InfoExtractor
from ..compat import compat_str
-from ..utils import int_or_none
+from ..utils import (
+ int_or_none,
+ try_get,
+)
class TEDIE(InfoExtractor):
}
def _extract_info(self, webpage):
- info_json = self._search_regex(r'q\("\w+.init",({.+})\)</script>',
- webpage, 'info json')
+ info_json = self._search_regex(
+ r'(?s)q\(\s*"\w+.init"\s*,\s*({.+})\)\s*</script>',
+ webpage, 'info json')
return json.loads(info_json)
def _real_extract(self, url):
webpage = self._download_webpage(url, name,
'Downloading playlist webpage')
info = self._extract_info(webpage)
- playlist_info = info['playlist']
+
+ playlist_info = try_get(
+ info, lambda x: x['__INITIAL_DATA__']['playlist'],
+ dict) or info['playlist']
playlist_entries = [
self.url_result('http://www.ted.com/talks/' + talk['slug'], self.ie_key())
- for talk in info['talks']
+ for talk in try_get(
+ info, lambda x: x['__INITIAL_DATA__']['talks'],
+ dict) or info['talks']
]
return self.playlist_result(
playlist_entries,
def _talk_info(self, url, video_name):
webpage = self._download_webpage(url, video_name)
- self.report_extraction(video_name)
- talk_info = self._extract_info(webpage)['talks'][0]
+ info = self._extract_info(webpage)
+
+ talk_info = try_get(
+ info, lambda x: x['__INITIAL_DATA__']['talks'][0],
+ dict) or info['talks'][0]
+
+ title = talk_info['title'].strip()
external = talk_info.get('external')
if external:
'url': ext_url or external['uri'],
}
+ native_downloads = try_get(
+ talk_info, lambda x: x['downloads']['nativeDownloads'],
+ dict) or talk_info['nativeDownloads']
+
formats = [{
'url': format_url,
'format_id': format_id,
'format': format_id,
- } for (format_id, format_url) in talk_info['nativeDownloads'].items() if format_url is not None]
+ } for (format_id, format_url) in native_downloads.items() if format_url is not None]
if formats:
for f in formats:
finfo = self._NATIVE_FORMATS.get(f['format_id'])
if finfo:
f.update(finfo)
+ player_talk = talk_info['player_talks'][0]
+
+ resources_ = player_talk.get('resources') or talk_info.get('resources')
+
http_url = None
- for format_id, resources in talk_info['resources'].items():
+ for format_id, resources in resources_.items():
if format_id == 'h264':
for resource in resources:
h264_url = resource.get('file')
video_id = compat_str(talk_info['id'])
- thumbnail = talk_info['thumb']
- if not thumbnail.startswith('http'):
- thumbnail = 'http://' + thumbnail
return {
'id': video_id,
- 'title': talk_info['title'].strip(),
- 'uploader': talk_info['speaker'],
- 'thumbnail': thumbnail,
+ 'title': title,
+ 'uploader': player_talk.get('speaker') or talk_info.get('speaker'),
+ 'thumbnail': player_talk.get('thumb') or talk_info.get('thumb'),
'description': self._og_search_description(webpage),
'subtitles': self._get_subtitles(video_id, talk_info),
'formats': formats,
}
def _get_subtitles(self, video_id, talk_info):
- languages = [lang['languageCode'] for lang in talk_info.get('languages', [])]
- if languages:
- sub_lang_list = {}
- for l in languages:
- sub_lang_list[l] = [
- {
- 'url': 'http://www.ted.com/talks/subtitles/id/%s/lang/%s/format/%s' % (video_id, l, ext),
- 'ext': ext,
- }
- for ext in ['ted', 'srt']
- ]
- return sub_lang_list
- else:
- return {}
+ sub_lang_list = {}
+ for language in try_get(
+ talk_info,
+ (lambda x: x['downloads']['languages'],
+ lambda x: x['languages']), list):
+ lang_code = language.get('languageCode') or language.get('ianaCode')
+ if not lang_code:
+ continue
+ sub_lang_list[lang_code] = [
+ {
+ 'url': 'http://www.ted.com/talks/subtitles/id/%s/lang/%s/format/%s' % (video_id, lang_code, ext),
+ 'ext': ext,
+ }
+ for ext in ['ted', 'srt']
+ ]
+ return sub_lang_list
def _watch_info(self, url, name):
webpage = self._download_webpage(url, name)
from __future__ import unicode_literals
from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import try_get
class ThisOldHouseIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?thisoldhouse\.com/(?:watch|how-to|tv-episode)/(?P<id>[^/?#]+)'
_TESTS = [{
'url': 'https://www.thisoldhouse.com/how-to/how-to-build-storage-bench',
- 'md5': '946f05bbaa12a33f9ae35580d2dfcfe3',
+ 'md5': '568acf9ca25a639f0c4ff905826b662f',
'info_dict': {
'id': '2REGtUDQ',
'ext': 'mp4',
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
- drupal_settings = self._parse_json(self._search_regex(
- r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);',
- webpage, 'drupal settings'), display_id)
- video_id = drupal_settings['jwplatform']['video_id']
+ video_id = self._search_regex(
+ (r'data-mid=(["\'])(?P<id>(?:(?!\1).)+)\1',
+ r'id=(["\'])inline-video-player-(?P<id>(?:(?!\1).)+)\1'),
+ webpage, 'video id', default=None, group='id')
+ if not video_id:
+ drupal_settings = self._parse_json(self._search_regex(
+ r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);',
+ webpage, 'drupal settings'), display_id)
+ video_id = try_get(
+ drupal_settings, lambda x: x['jwplatform']['video_id'],
+ compat_str) or list(drupal_settings['comScore'])[0]
return self.url_result('jwplatform:' + video_id, 'JWPlatform', video_id)
class ToggleIE(InfoExtractor):
IE_NAME = 'toggle'
- _VALID_URL = r'https?://video\.toggle\.sg/(?:en|zh)/(?:series|clips|movies)/(?:[^/]+/)+(?P<id>[0-9]+)'
+ _VALID_URL = r'https?://video\.toggle\.sg/(?:en|zh)/(?:[^/]+/){2,}(?P<id>[0-9]+)'
_TESTS = [{
'url': 'http://video.toggle.sg/en/series/lion-moms-tif/trailers/lion-moms-premier/343115',
'info_dict': {
}, {
'url': 'http://video.toggle.sg/en/movies/seven-days/321936',
'only_matching': True,
+ }, {
+ 'url': 'https://video.toggle.sg/en/tv-show/news/may-2017-cna-singapore-tonight/fri-19-may-2017/512456',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://video.toggle.sg/en/channels/eleven-plus/401585',
+ 'only_matching': True,
}]
_FORMAT_PREFERENCES = {
from ..utils import (
int_or_none,
js_to_json,
- ExtractorError,
urlencode_postdata,
extract_attributes,
smuggle_url,
def _real_extract(self, url):
path = self._match_id(url)
metadata = self._download_json('http://ici.tou.tv/presentation/%s' % path, path)
+ # IsDrm does not necessarily mean the video is DRM protected (see
+ # https://github.com/rg3/youtube-dl/issues/13994).
if metadata.get('IsDrm'):
- raise ExtractorError('This video is DRM protected.', expected=True)
+ self.report_warning('This video is probably DRM protected.', path)
video_id = metadata['IdMedia']
details = metadata['Details']
title = details['OriginalTitle']
class ToypicsIE(InfoExtractor):
- IE_DESC = 'Toypics user profile'
- _VALID_URL = r'https?://videos\.toypics\.net/view/(?P<id>[0-9]+)/.*'
+ IE_DESC = 'Toypics video'
+ _VALID_URL = r'https?://videos\.toypics\.net/view/(?P<id>[0-9]+)'
_TEST = {
'url': 'http://videos.toypics.net/view/514/chancebulged,-2-1/',
'md5': '16e806ad6d6f58079d210fe30985e08b',
'info_dict': {
'id': '514',
'ext': 'mp4',
- 'title': 'Chance-Bulge\'d, 2',
+ 'title': "Chance-Bulge'd, 2",
'age_limit': 18,
'uploader': 'kidsune',
}
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
- page = self._download_webpage(url, video_id)
- video_url = self._html_search_regex(
- r'src:\s+"(http://static[0-9]+\.toypics\.net/flvideo/[^"]+)"', page, 'video URL')
- title = self._html_search_regex(
- r'<title>Toypics - ([^<]+)</title>', page, 'title')
- username = self._html_search_regex(
- r'toypics.net/([^/"]+)" class="user-name">', page, 'username')
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ formats = self._parse_html5_media_entries(
+ url, webpage, video_id)[0]['formats']
+ title = self._html_search_regex([
+ r'<h1[^>]+class=["\']view-video-title[^>]+>([^<]+)</h',
+ r'<title>([^<]+) - Toypics</title>',
+ ], webpage, 'title')
+
+ uploader = self._html_search_regex(
+ r'More videos from <strong>([^<]+)</strong>', webpage, 'uploader',
+ fatal=False)
+
return {
'id': video_id,
- 'url': video_url,
+ 'formats': formats,
'title': title,
- 'uploader': username,
+ 'uploader': uploader,
'age_limit': 18,
}
class ToypicsUserIE(InfoExtractor):
IE_DESC = 'Toypics user profile'
- _VALID_URL = r'https?://videos\.toypics\.net/(?P<username>[^/?]+)(?:$|[?#])'
+ _VALID_URL = r'https?://videos\.toypics\.net/(?!view)(?P<id>[^/?#&]+)'
_TEST = {
'url': 'http://videos.toypics.net/Mikey',
'info_dict': {
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- username = mobj.group('username')
+ username = self._match_id(url)
profile_page = self._download_webpage(
url, username, note='Retrieving profile page')
note='Downloading page %d/%d' % (n, page_count))
urls.extend(
re.findall(
- r'<p class="video-entry-title">\s+<a href="(https?://videos.toypics.net/view/[^"]+)">',
+ r'<div[^>]+class=["\']preview[^>]+>\s*<a[^>]+href="(https?://videos\.toypics\.net/view/[^"]+)"',
lpage))
return {
from __future__ import unicode_literals
from .common import InfoExtractor
-from ..compat import compat_str
-from ..utils import (
- ExtractorError,
- int_or_none,
- InAdvancePagedList,
- float_or_none,
- unescapeHTML,
-)
-
-
-class TudouIE(InfoExtractor):
- IE_NAME = 'tudou'
- _VALID_URL = r'https?://(?:www\.)?tudou\.com/(?:(?:programs|wlplay)/view|(?:listplay|albumplay)/[\w-]{11})/(?P<id>[\w-]{11})'
- _TESTS = [{
- 'url': 'http://www.tudou.com/listplay/zzdE77v6Mmo/2xN2duXMxmw.html',
- 'md5': '140a49ed444bd22f93330985d8475fcb',
- 'info_dict': {
- 'id': '159448201',
- 'ext': 'f4v',
- 'title': 'å”马ä¹å½č¶³å¼å¤§čéæä¼ å²åéé¦',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'timestamp': 1372113489000,
- 'description': 'å”马ä¹å”家åļ¼å¼å¤§čå
čæęęÆäøå®å
Øéé¦ļ¼',
- 'duration': 289.04,
- 'view_count': int,
- 'filesize': int,
- }
- }, {
- 'url': 'http://www.tudou.com/programs/view/ajX3gyhL0pc/',
- 'info_dict': {
- 'id': '117049447',
- 'ext': 'f4v',
- 'title': 'La Sylphide-Bolshoi-Ekaterina Krysanova & Vyacheslav Lopatin 2012',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'timestamp': 1349207518000,
- 'description': 'md5:294612423894260f2dcd5c6c04fe248b',
- 'duration': 5478.33,
- 'view_count': int,
- 'filesize': int,
- }
- }]
-
- _PLAYER_URL = 'http://js.tudouui.com/bin/lingtong/PortalPlayer_177.swf'
-
- # Translated from tudou/tools/TVCHelper.as in PortalPlayer_193.swf
- # 0001, 0002 and 4001 are not included as they indicate temporary issues
- TVC_ERRORS = {
- '0003': 'The video is deleted or does not exist',
- '1001': 'This video is unavailable due to licensing issues',
- '1002': 'This video is unavailable as it\'s under review',
- '1003': 'This video is unavailable as it\'s under review',
- '3001': 'Password required',
- '5001': 'This video is available in Mainland China only due to licensing issues',
- '7001': 'This video is unavailable',
- '8001': 'This video is unavailable due to licensing issues',
- }
-
- def _url_for_id(self, video_id, quality=None):
- info_url = 'http://v2.tudou.com/f?id=' + compat_str(video_id)
- if quality:
- info_url += '&hd' + quality
- xml_data = self._download_xml(info_url, video_id, 'Opening the info XML page')
- error = xml_data.attrib.get('error')
- if error is not None:
- raise ExtractorError('Tudou said: %s' % error, expected=True)
- final_url = xml_data.text
- return final_url
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- item_data = self._download_json(
- 'http://www.tudou.com/tvp/getItemInfo.action?ic=%s' % video_id, video_id)
-
- youku_vcode = item_data.get('vcode')
- if youku_vcode:
- return self.url_result('youku:' + youku_vcode, ie='Youku')
-
- if not item_data.get('itemSegs'):
- tvc_code = item_data.get('tvcCode')
- if tvc_code:
- err_msg = self.TVC_ERRORS.get(tvc_code)
- if err_msg:
- raise ExtractorError('Tudou said: %s' % err_msg, expected=True)
- raise ExtractorError('Unexpected error %s returned from Tudou' % tvc_code)
- raise ExtractorError('Unxpected error returned from Tudou')
-
- title = unescapeHTML(item_data['kw'])
- description = item_data.get('desc')
- thumbnail_url = item_data.get('pic')
- view_count = int_or_none(item_data.get('playTimes'))
- timestamp = int_or_none(item_data.get('pt'))
-
- segments = self._parse_json(item_data['itemSegs'], video_id)
- # It looks like the keys are the arguments that have to be passed as
- # the hd field in the request url, we pick the higher
- # Also, filter non-number qualities (see issue #3643).
- quality = sorted(filter(lambda k: k.isdigit(), segments.keys()),
- key=lambda k: int(k))[-1]
- parts = segments[quality]
- len_parts = len(parts)
- if len_parts > 1:
- self.to_screen('%s: found %s parts' % (video_id, len_parts))
-
- def part_func(partnum):
- part = parts[partnum]
- part_id = part['k']
- final_url = self._url_for_id(part_id, quality)
- ext = (final_url.split('?')[0]).split('.')[-1]
- return [{
- 'id': '%s' % part_id,
- 'url': final_url,
- 'ext': ext,
- 'title': title,
- 'thumbnail': thumbnail_url,
- 'description': description,
- 'view_count': view_count,
- 'timestamp': timestamp,
- 'duration': float_or_none(part.get('seconds'), 1000),
- 'filesize': int_or_none(part.get('size')),
- 'http_headers': {
- 'Referer': self._PLAYER_URL,
- },
- }]
-
- entries = InAdvancePagedList(part_func, len_parts, 1)
-
- return {
- '_type': 'multi_video',
- 'entries': entries,
- 'id': video_id,
- 'title': title,
- }
class TudouPlaylistIE(InfoExtractor):
import re
from .common import InfoExtractor
+from ..compat import compat_str
from ..utils import (
ExtractorError,
int_or_none,
for child in item:
m = re.search(r'url_video_(?P<quality>.+)', child.tag)
if m:
- quality = m.group('quality')
+ quality = compat_str(m.group('quality'))
formats.append({
'format_id': quality,
'url': child.text,
tv4\.se/(?:[^/]+)/klipp/(?:.*)-|
tv4play\.se/
(?:
- (?:program|barn)/(?:[^\?]+)\?video_id=|
+ (?:program|barn)/(?:[^/]+/|(?:[^\?]+)\?video_id=)|
iframe/video/|
film/|
sport/|
'url': 'http://www.tv4play.se/barn/looney-tunes?video_id=3062412',
'only_matching': True,
},
+ {
+ 'url': 'http://www.tv4play.se/program/farang/3922081',
+ 'only_matching': True,
+ }
]
def _real_extract(self, url):
int_or_none,
parse_iso8601,
qualities,
+ smuggle_url,
try_get,
+ unsmuggle_url,
update_url_query,
)
]
def _real_extract(self, url):
+ url, smuggled_data = unsmuggle_url(url, {})
+ self._initialize_geo_bypass(smuggled_data.get('geo_countries'))
+
video_id = self._match_id(url)
geo_country = self._search_regex(
r'https?://[^/]+\.([a-z]{2})', url,
r'currentVideo["\']\s*:\s*.+?["\']id["\']\s*:\s*["\'](\d{6,})',
webpage, 'video id')
- return self.url_result('mtg:%s' % video_id, TVPlayIE.ie_key())
+ return self.url_result(
+ smuggle_url(
+ 'mtg:%s' % video_id,
+ {'geo_countries': [
+ compat_urlparse.urlparse(url).netloc.rsplit('.', 1)[-1]]}),
+ ie=TVPlayIE.ie_key(), video_id=video_id)
'https://tvplayer.com/watch/context', display_id,
'Downloading JSON context', query={
'resource': resource_id,
- 'nonce': token,
+ 'gen': token,
})
validate = context['validate']
class TwentyFourVideoIE(InfoExtractor):
IE_NAME = '24video'
- _VALID_URL = r'https?://(?P<host>(?:www\.)?24video\.(?:net|me|xxx|sex|tube))/(?:video/(?:view|xml)/|player/new24_play\.swf\?id=)(?P<id>\d+)'
+ _VALID_URL = r'https?://(?P<host>(?:www\.)?24video\.(?:net|me|xxx|sex|tube|adult))/(?:video/(?:view|xml)/|player/new24_play\.swf\?id=)(?P<id>\d+)'
_TESTS = [{
'url': 'http://www.24video.net/video/view/1044982',
duration = int_or_none(self._og_search_property(
'duration', webpage, 'duration', fatal=False))
timestamp = parse_iso8601(self._search_regex(
- r'<time id="video-timeago" datetime="([^"]+)" itemprop="uploadDate">',
- webpage, 'upload date'))
+ r'<time[^>]+\bdatetime="([^"]+)"[^>]+itemprop="uploadDate"',
+ webpage, 'upload date', fatal=False))
uploader = self._html_search_regex(
r'class="video-uploaded"[^>]*>\s*<a href="/jsecUser/movies/[^"]+"[^>]*>([^<]+)</a>',
webpage, 'view count', fatal=False))
comment_count = int_or_none(self._html_search_regex(
r'<a[^>]+href="#tab-comments"[^>]*>(\d+) ŠŗŠ¾Š¼Š¼ŠµŠ½ŃŠ°ŃŠø',
- webpage, 'comment count', fatal=False))
+ webpage, 'comment count', default=None))
# Sets some cookies
self._download_xml(
@staticmethod
def _extract_urls(webpage):
return [m.group('url') for m in re.finditer(
- r'<iframe[^>]+src=(["\'])(?P<url>(?:https?://)?(?:www\.)?20min\.ch/videoplayer/videoplayer.html\?.*?\bvideoId@\d+.*?)\1',
+ r'<iframe[^>]+src=(["\'])(?P<url>(?:(?:https?:)?//)?(?:www\.)?20min\.ch/videoplayer/videoplayer.html\?.*?\bvideoId@\d+.*?)\1',
webpage)]
def _real_extract(self, url):
class TwitchBaseIE(InfoExtractor):
- _VALID_URL_BASE = r'https?://(?:www\.)?twitch\.tv'
+ _VALID_URL_BASE = r'https?://(?:(?:www|go)\.)?twitch\.tv'
_API_BASE = 'https://api.twitch.tv'
_USHER_BASE = 'https://usher.ttvnw.net'
_VALID_URL = r'''(?x)
https?://
(?:
- (?:www\.)?twitch\.tv/(?:[^/]+/v|videos)/|
+ (?:(?:www|go)\.)?twitch\.tv/(?:[^/]+/v|videos)/|
player\.twitch\.tv/\?.*?\bvideo=v
)
(?P<id>\d+)
_VALID_URL = r'''(?x)
https?://
(?:
- (?:www\.)?twitch\.tv/|
+ (?:(?:www|go)\.)?twitch\.tv/|
player\.twitch\.tv/\?.*?\bchannel=
)
(?P<id>[^/#?]+)
}, {
'url': 'https://player.twitch.tv/?channel=lotsofs',
'only_matching': True,
+ }, {
+ 'url': 'https://go.twitch.tv/food',
+ 'only_matching': True,
}]
@classmethod
from ..compat import compat_urlparse
from ..utils import (
determine_ext,
+ dict_get,
+ ExtractorError,
float_or_none,
- xpath_text,
- remove_end,
int_or_none,
- ExtractorError,
+ remove_end,
+ try_get,
+ xpath_text,
)
from .periscope import PeriscopeIE
class TwitterBaseIE(InfoExtractor):
- def _get_vmap_video_url(self, vmap_url, video_id):
+ def _extract_formats_from_vmap_url(self, vmap_url, video_id):
vmap_data = self._download_xml(vmap_url, video_id)
- return xpath_text(vmap_data, './/MediaFile').strip()
+ video_url = xpath_text(vmap_data, './/MediaFile').strip()
+ if determine_ext(video_url) == 'm3u8':
+ return self._extract_m3u8_formats(
+ video_url, video_id, ext='mp4', m3u8_id='hls',
+ entry_protocol='m3u8_native')
+ return [{
+ 'url': video_url,
+ }]
+
+ @staticmethod
+ def _search_dimensions_in_video_url(a_format, video_url):
+ m = re.search(r'/(?P<width>\d+)x(?P<height>\d+)/', video_url)
+ if m:
+ a_format.update({
+ 'width': int(m.group('width')),
+ 'height': int(m.group('height')),
+ })
class TwitterCardIE(TwitterBaseIE):
'title': 'Twitter Card',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 30.033,
- }
+ },
+ 'skip': 'Video gone',
},
{
'url': 'https://twitter.com/i/cards/tfw/v1/623160978427936768',
'thumbnail': r're:^https?://.*\.jpg',
'duration': 80.155,
},
+ 'skip': 'Video gone',
},
{
'url': 'https://twitter.com/i/cards/tfw/v1/654001591733886977',
},
{
'url': 'https://twitter.com/i/cards/tfw/v1/665289828897005568',
- 'md5': 'ab2745d0b0ce53319a534fccaa986439',
+ 'md5': '6dabeaca9e68cbb71c99c322a4b42a11',
'info_dict': {
'id': 'iBb2x00UVlv',
'ext': 'mp4',
'uploader_id': '1189339351084113920',
'uploader': 'ArsenalTerje',
'title': 'Vine by ArsenalTerje',
+ 'timestamp': 1447451307,
},
'add_ie': ['Vine'],
}, {
'url': 'https://twitter.com/i/videos/tweet/705235433198714880',
- 'md5': '3846d0a07109b5ab622425449b59049d',
+ 'md5': '884812a2adc8aaf6fe52b15ccbfa3b88',
'info_dict': {
'id': '705235433198714880',
'ext': 'mp4',
'title': 'Twitter web player',
- 'thumbnail': r're:^https?://.*\.jpg',
+ 'thumbnail': r're:^https?://.*',
},
}, {
'url': 'https://twitter.com/i/videos/752274308186120192',
},
]
+ def _parse_media_info(self, media_info, video_id):
+ formats = []
+ for media_variant in media_info.get('variants', []):
+ media_url = media_variant['url']
+ if media_url.endswith('.m3u8'):
+ formats.extend(self._extract_m3u8_formats(media_url, video_id, ext='mp4', m3u8_id='hls'))
+ elif media_url.endswith('.mpd'):
+ formats.extend(self._extract_mpd_formats(media_url, video_id, mpd_id='dash'))
+ else:
+ vbr = int_or_none(dict_get(media_variant, ('bitRate', 'bitrate')), scale=1000)
+ a_format = {
+ 'url': media_url,
+ 'format_id': 'http-%d' % vbr if vbr else 'http',
+ 'vbr': vbr,
+ }
+ # Reported bitRate may be zero
+ if not a_format['vbr']:
+ del a_format['vbr']
+
+ self._search_dimensions_in_video_url(a_format, media_url)
+
+ formats.append(a_format)
+ return formats
+
+ def _extract_mobile_formats(self, username, video_id):
+ webpage = self._download_webpage(
+ 'https://mobile.twitter.com/%s/status/%s' % (username, video_id),
+ video_id, 'Downloading mobile webpage',
+ headers={
+ # A recent mobile UA is necessary for `gt` cookie
+ 'User-Agent': 'Mozilla/5.0 (Android 6.0.1; Mobile; rv:54.0) Gecko/54.0 Firefox/54.0',
+ })
+ main_script_url = self._html_search_regex(
+ r'<script[^>]+src="([^"]+main\.[^"]+)"', webpage, 'main script URL')
+ main_script = self._download_webpage(
+ main_script_url, video_id, 'Downloading main script')
+ bearer_token = self._search_regex(
+ r'BEARER_TOKEN\s*:\s*"([^"]+)"',
+ main_script, 'bearer token')
+ guest_token = self._search_regex(
+ r'document\.cookie\s*=\s*decodeURIComponent\("gt=(\d+)',
+ webpage, 'guest token')
+ api_data = self._download_json(
+ 'https://api.twitter.com/2/timeline/conversation/%s.json' % video_id,
+ video_id, 'Downloading mobile API data',
+ headers={
+ 'Authorization': 'Bearer ' + bearer_token,
+ 'x-guest-token': guest_token,
+ })
+ media_info = try_get(api_data, lambda o: o['globalObjects']['tweets'][video_id]
+ ['extended_entities']['media'][0]['video_info']) or {}
+ return self._parse_media_info(media_info, video_id)
+
def _real_extract(self, url):
video_id = self._match_id(url)
if periscope_url:
return self.url_result(periscope_url, PeriscopeIE.ie_key())
- def _search_dimensions_in_video_url(a_format, video_url):
- m = re.search(r'/(?P<width>\d+)x(?P<height>\d+)/', video_url)
- if m:
- a_format.update({
- 'width': int(m.group('width')),
- 'height': int(m.group('height')),
- })
-
video_url = config.get('video_url') or config.get('playlist', [{}])[0].get('source')
if video_url:
'url': video_url,
}
- _search_dimensions_in_video_url(f, video_url)
+ self._search_dimensions_in_video_url(f, video_url)
formats.append(f)
vmap_url = config.get('vmapUrl') or config.get('vmap_url')
if vmap_url:
- formats.append({
- 'url': self._get_vmap_video_url(vmap_url, video_id),
- })
+ formats.extend(
+ self._extract_formats_from_vmap_url(vmap_url, video_id))
media_info = None
media_info = entity['mediaInfo']
if media_info:
- for media_variant in media_info['variants']:
- media_url = media_variant['url']
- if media_url.endswith('.m3u8'):
- formats.extend(self._extract_m3u8_formats(media_url, video_id, ext='mp4', m3u8_id='hls'))
- elif media_url.endswith('.mpd'):
- formats.extend(self._extract_mpd_formats(media_url, video_id, mpd_id='dash'))
- else:
- vbr = int_or_none(media_variant.get('bitRate'), scale=1000)
- a_format = {
- 'url': media_url,
- 'format_id': 'http-%d' % vbr if vbr else 'http',
- 'vbr': vbr,
- }
- # Reported bitRate may be zero
- if not a_format['vbr']:
- del a_format['vbr']
-
- _search_dimensions_in_video_url(a_format, media_url)
-
- formats.append(a_format)
-
+ formats.extend(self._parse_media_info(media_info, video_id))
duration = float_or_none(media_info.get('duration', {}).get('nanos'), scale=1e9)
+ username = config.get('user', {}).get('screen_name')
+ if username:
+ formats.extend(self._extract_mobile_formats(username, video_id))
+
+ self._remove_duplicate_formats(formats)
self._sort_formats(formats)
title = self._search_regex(r'<title>([^<]+)</title>', webpage, 'title')
thumbnail = config.get('posterImageUrl') or config.get('image_src')
- duration = float_or_none(config.get('duration')) or duration
+ duration = float_or_none(config.get('duration'), scale=1000) or duration
return {
'id': video_id,
class TwitterIE(InfoExtractor):
IE_NAME = 'twitter'
- _VALID_URL = r'https?://(?:www\.|m\.|mobile\.)?twitter\.com/(?P<user_id>[^/]+)/status/(?P<id>\d+)'
+ _VALID_URL = r'https?://(?:www\.|m\.|mobile\.)?twitter\.com/(?:i/web|(?P<user_id>[^/]+))/status/(?P<id>\d+)'
_TEMPLATE_URL = 'https://twitter.com/%s/status/%s'
+ _TEMPLATE_STATUSES_URL = 'https://twitter.com/statuses/%s'
_TESTS = [{
'url': 'https://twitter.com/freethenipple/status/643211948184596480',
'description': 'FREE THE NIPPLE on Twitter: "FTN supporters on Hollywood Blvd today! http://t.co/c7jHH749xJ"',
'uploader': 'FREE THE NIPPLE',
'uploader_id': 'freethenipple',
+ 'duration': 12.922,
},
'params': {
'skip_download': True, # requires ffmpeg
'info_dict': {
'id': '700207533655363584',
'ext': 'mp4',
- 'title': 'JG - BEAT PROD: @suhmeduh #Damndaniel',
- 'description': 'JG on Twitter: "BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ"',
+ 'title': 'ććć - BEAT PROD: @suhmeduh #Damndaniel',
+ 'description': 'ććć on Twitter: "BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ"',
'thumbnail': r're:^https?://.*\.jpg',
- 'uploader': 'JG',
+ 'uploader': 'ććć',
'uploader_id': 'jaydingeer',
+ 'duration': 30.0,
},
'params': {
'skip_download': True, # requires ffmpeg
'info_dict': {
'id': 'MIOxnrUteUd',
'ext': 'mp4',
- 'title': 'Dr.Pepperć®é£²ćæę¹ #japanese #ćć« #ććÆć #é»åć¬ć³',
- 'uploader': 'TAKUMA',
- 'uploader_id': '1004126642786242560',
+ 'title': 'Vince Mancini - Vine of the day',
+ 'description': 'Vince Mancini on Twitter: "Vine of the day https://t.co/xmTvRdqxWf"',
+ 'uploader': 'Vince Mancini',
+ 'uploader_id': 'Filmdrunk',
+ 'timestamp': 1402826626,
'upload_date': '20140615',
},
'add_ie': ['Vine'],
'description': 'Captain America on Twitter: "@King0fNerd Are you sure you made the right choice? Find out in theaters. https://t.co/GpgYi9xMJI"',
'uploader_id': 'captainamerica',
'uploader': 'Captain America',
+ 'duration': 3.17,
},
'params': {
'skip_download': True, # requires ffmpeg
'info_dict': {
'id': '1zqKVVlkqLaKB',
'ext': 'mp4',
- 'title': 'Sgt Kerry Schmidt - Ontario Provincial Police - Road rage, mischief, assault, rollover and fire in one occurrence',
+ 'title': 'Sgt Kerry Schmidt - LIVE on #Periscope: Road rage, mischief, assault, rollover and fire in one occurrence',
+ 'description': 'Sgt Kerry Schmidt on Twitter: "LIVE on #Periscope: Road rage, mischief, assault, rollover and fire in one occurrence https://t.co/EKrVgIXF3s"',
'upload_date': '20160923',
'uploader_id': 'OPP_HSD',
- 'uploader': 'Sgt Kerry Schmidt - Ontario Provincial Police',
+ 'uploader': 'Sgt Kerry Schmidt',
'timestamp': 1474613214,
},
'add_ie': ['Periscope'],
+ }, {
+ # has mp4 formats via mobile API
+ 'url': 'https://twitter.com/news_al3alm/status/852138619213144067',
+ 'info_dict': {
+ 'id': '852138619213144067',
+ 'ext': 'mp4',
+ 'title': 'Ų¹Ų§ŁŁ
Ų§ŁŲ£Ų®ŲØŲ§Ų± - ŁŁŁ
Ų© ŲŖŲ§Ų±ŁŲ®ŁŲ© ŲØŲ¬ŁŲ³Ų© Ų§ŁŲ¬ŁŲ§Ų³Ł Ų§ŁŲŖŲ§Ų±ŁŲ®ŁŲ©.. Ų§ŁŁŲ§Ų¦ŲØ Ų®Ų§ŁŲÆ Ł
Ų¤ŁŲ³ Ų§ŁŲ¹ŲŖŁŲØŁ ŁŁŁ
Ų¹Ų§Ų±Ų¶ŁŁ : Ų§ŲŖŁŁŲ§ Ų§ŁŁŁ .. Ų§ŁŲøŁŁ
ŲøŁŁ
Ų§ŲŖ ŁŁŁ
Ų§ŁŁŁŲ§Ł
Ų©',
+ 'description': 'Ų¹Ų§ŁŁ
Ų§ŁŲ£Ų®ŲØŲ§Ų± on Twitter: "ŁŁŁ
Ų© ŲŖŲ§Ų±ŁŲ®ŁŲ© ŲØŲ¬ŁŲ³Ų© Ų§ŁŲ¬ŁŲ§Ų³Ł Ų§ŁŲŖŲ§Ų±ŁŲ®ŁŲ©.. Ų§ŁŁŲ§Ų¦ŲØ Ų®Ų§ŁŲÆ Ł
Ų¤ŁŲ³ Ų§ŁŲ¹ŲŖŁŲØŁ ŁŁŁ
Ų¹Ų§Ų±Ų¶ŁŁ : Ų§ŲŖŁŁŲ§ Ų§ŁŁŁ .. Ų§ŁŲøŁŁ
ŲøŁŁ
Ų§ŲŖ ŁŁŁ
Ų§ŁŁŁŲ§Ł
Ų© https://t.co/xg6OhpyKfN"',
+ 'uploader': 'Ų¹Ų§ŁŁ
Ų§ŁŲ£Ų®ŲØŲ§Ų±',
+ 'uploader_id': 'news_al3alm',
+ 'duration': 277.4,
+ },
+ 'params': {
+ 'format': 'best[format_id^=http-]',
+ },
+ }, {
+ 'url': 'https://twitter.com/i/web/status/910031516746514432',
+ 'info_dict': {
+ 'id': '910031516746514432',
+ 'ext': 'mp4',
+ 'title': 'PrĆ©fet de Guadeloupe - [Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinĆ©s. RĆ©fugiez-vous dans la piĆØce la + sĆ»re.',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'description': 'PrĆ©fet de Guadeloupe on Twitter: "[Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinĆ©s. RĆ©fugiez-vous dans la piĆØce la + sĆ»re. https://t.co/mwx01Rs4lo"',
+ 'uploader': 'PrƩfet de Guadeloupe',
+ 'uploader_id': 'Prefet971',
+ 'duration': 47.48,
+ },
+ 'params': {
+ 'skip_download': True, # requires ffmpeg
+ },
}]
def _real_extract(self, url):
twid = mobj.group('id')
webpage, urlh = self._download_webpage_handle(
- self._TEMPLATE_URL % (user_id, twid), twid)
+ self._TEMPLATE_STATUSES_URL % twid, twid)
if 'twitter.com/account/suspended' in urlh.geturl():
raise ExtractorError('Account suspended by Twitter.', expected=True)
+ if user_id is None:
+ mobj = re.match(self._VALID_URL, urlh.geturl())
+ user_id = mobj.group('user_id')
+
username = remove_end(self._og_search_title(webpage), ' on Twitter')
title = description = self._og_search_description(webpage).strip('').replace('\n', ' ').strip('āā')
vmap_url = self._html_search_meta(
'twitter:amplify:vmap', webpage, 'vmap url')
- video_url = self._get_vmap_video_url(vmap_url, video_id)
+ formats = self._extract_formats_from_vmap_url(vmap_url, video_id)
thumbnails = []
thumbnail = self._html_search_meta(
})
video_w, video_h = _find_dimension('player')
- formats = [{
- 'url': video_url,
+ formats[0].update({
'width': video_w,
'height': video_h,
- }]
+ })
return {
'id': video_id,
ExtractorError,
float_or_none,
int_or_none,
+ js_to_json,
sanitized_Request,
unescapeHTML,
urlencode_postdata,
# new URL schema
'url': 'https://www.udemy.com/electric-bass-right-from-the-start/learn/v4/t/lecture/4580906',
'only_matching': True,
+ }, {
+ # no url in outputs format entry
+ 'url': 'https://www.udemy.com/learn-web-development-complete-step-by-step-guide-to-success/learn/v4/t/lecture/4125812',
+ 'only_matching': True,
}]
def _extract_course_info(self, webpage, video_id):
return compat_urlparse.urljoin(base_url, url) if not url.startswith('http') else url
checkout_url = unescapeHTML(self._search_regex(
- r'href=(["\'])(?P<url>(?:https?://(?:www\.)?udemy\.com)?/payment/checkout/.+?)\1',
+ r'href=(["\'])(?P<url>(?:https?://(?:www\.)?udemy\.com)?/(?:payment|cart)/checkout/.+?)\1',
webpage, 'checkout url', group='url', default=None))
if checkout_url:
raise ExtractorError(
def extract_output_format(src, f_id):
return {
- 'url': src['url'],
+ 'url': src.get('url'),
'format_id': '%sp' % (src.get('height') or f_id),
'width': int_or_none(src.get('width')),
'height': int_or_none(src.get('height')),
f = add_output_format_meta(f, format_id)
formats.append(f)
+ def extract_subtitles(track_list):
+ if not isinstance(track_list, list):
+ return
+ for track in track_list:
+ if not isinstance(track, dict):
+ continue
+ if track.get('kind') != 'captions':
+ continue
+ src = track.get('src')
+ if not src or not isinstance(src, compat_str):
+ continue
+ lang = track.get('language') or track.get(
+ 'srclang') or track.get('label')
+ sub_dict = automatic_captions if track.get(
+ 'autogenerated') is True else subtitles
+ sub_dict.setdefault(lang, []).append({
+ 'url': src,
+ })
+
download_urls = asset.get('download_urls')
if isinstance(download_urls, dict):
extract_formats(download_urls.get('Video'))
extract_formats(data.get('sources'))
if not duration:
duration = int_or_none(data.get('duration'))
- tracks = data.get('tracks')
- if isinstance(tracks, list):
- for track in tracks:
- if not isinstance(track, dict):
- continue
- if track.get('kind') != 'captions':
- continue
- src = track.get('src')
- if not src or not isinstance(src, compat_str):
- continue
- lang = track.get('language') or track.get(
- 'srclang') or track.get('label')
- sub_dict = automatic_captions if track.get(
- 'autogenerated') is True else subtitles
- sub_dict.setdefault(lang, []).append({
- 'url': src,
- })
+ extract_subtitles(data.get('tracks'))
+
+ if not subtitles and not automatic_captions:
+ text_tracks = self._parse_json(
+ self._search_regex(
+ r'text-tracks=(["\'])(?P<data>\[.+?\])\1', view_html,
+ 'text tracks', default='{}', group='data'), video_id,
+ transform_source=lambda s: js_to_json(unescapeHTML(s)),
+ fatal=False)
+ extract_subtitles(text_tracks)
self._sort_formats(formats, field_preference=('height', 'width', 'tbr', 'format_id'))
class VeohIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?veoh\.com/(?:watch|iphone/#_Watch)/(?P<id>(?:v|yapi-)[\da-zA-Z]+)'
+ _VALID_URL = r'https?://(?:www\.)?veoh\.com/(?:watch|iphone/#_Watch)/(?P<id>(?:v|e|yapi-)[\da-zA-Z]+)'
- _TESTS = [
- {
- 'url': 'http://www.veoh.com/watch/v56314296nk7Zdmz3',
- 'md5': '620e68e6a3cff80086df3348426c9ca3',
- 'info_dict': {
- 'id': '56314296',
- 'ext': 'mp4',
- 'title': 'Straight Backs Are Stronger',
- 'uploader': 'LUMOback',
- 'description': 'At LUMOback, we believe straight backs are stronger. The LUMOback Posture & Movement Sensor: It gently vibrates when you slouch, inspiring improved posture and mobility. Use the app to track your data and improve your posture over time. ',
- },
+ _TESTS = [{
+ 'url': 'http://www.veoh.com/watch/v56314296nk7Zdmz3',
+ 'md5': '620e68e6a3cff80086df3348426c9ca3',
+ 'info_dict': {
+ 'id': '56314296',
+ 'ext': 'mp4',
+ 'title': 'Straight Backs Are Stronger',
+ 'uploader': 'LUMOback',
+ 'description': 'At LUMOback, we believe straight backs are stronger. The LUMOback Posture & Movement Sensor: It gently vibrates when you slouch, inspiring improved posture and mobility. Use the app to track your data and improve your posture over time. ',
},
- {
- 'url': 'http://www.veoh.com/watch/v27701988pbTc4wzN?h1=Chile+workers+cover+up+to+avoid+skin+damage',
- 'md5': '4a6ff84b87d536a6a71e6aa6c0ad07fa',
- 'info_dict': {
- 'id': '27701988',
- 'ext': 'mp4',
- 'title': 'Chile workers cover up to avoid skin damage',
- 'description': 'md5:2bd151625a60a32822873efc246ba20d',
- 'uploader': 'afp-news',
- 'duration': 123,
- },
- 'skip': 'This video has been deleted.',
+ }, {
+ 'url': 'http://www.veoh.com/watch/v27701988pbTc4wzN?h1=Chile+workers+cover+up+to+avoid+skin+damage',
+ 'md5': '4a6ff84b87d536a6a71e6aa6c0ad07fa',
+ 'info_dict': {
+ 'id': '27701988',
+ 'ext': 'mp4',
+ 'title': 'Chile workers cover up to avoid skin damage',
+ 'description': 'md5:2bd151625a60a32822873efc246ba20d',
+ 'uploader': 'afp-news',
+ 'duration': 123,
},
- {
- 'url': 'http://www.veoh.com/watch/v69525809F6Nc4frX',
- 'md5': '4fde7b9e33577bab2f2f8f260e30e979',
- 'note': 'Embedded ooyala video',
- 'info_dict': {
- 'id': '69525809',
- 'ext': 'mp4',
- 'title': 'Doctors Alter Plan For Preteen\'s Weight Loss Surgery',
- 'description': 'md5:f5a11c51f8fb51d2315bca0937526891',
- 'uploader': 'newsy-videos',
- },
- 'skip': 'This video has been deleted.',
+ 'skip': 'This video has been deleted.',
+ }, {
+ 'url': 'http://www.veoh.com/watch/v69525809F6Nc4frX',
+ 'md5': '4fde7b9e33577bab2f2f8f260e30e979',
+ 'note': 'Embedded ooyala video',
+ 'info_dict': {
+ 'id': '69525809',
+ 'ext': 'mp4',
+ 'title': 'Doctors Alter Plan For Preteen\'s Weight Loss Surgery',
+ 'description': 'md5:f5a11c51f8fb51d2315bca0937526891',
+ 'uploader': 'newsy-videos',
},
- ]
+ 'skip': 'This video has been deleted.',
+ }, {
+ 'url': 'http://www.veoh.com/watch/e152215AJxZktGS',
+ 'only_matching': True,
+ }]
def _extract_formats(self, source):
formats = []
)
/?
(?:
- \#!/(?:video|live)/|
+ (?:\#!/)?(?:video|live)/|
embed?.*id=|
articles/
)|
{
'url': 'abtv:140026',
'only_matching': True,
- }
+ },
+ {
+ 'url': 'http://www.vgtv.no/video/84196/hevnen-er-soet-episode-10-abu',
+ 'only_matching': True,
+ },
]
def _real_extract(self, url):
idoc = self._download_xml(
doc_url, video_id,
'Downloading info', transform_source=fix_xml_ampersands)
- return self.playlist_result(
- [self._get_video_info(item) for item in idoc.findall('.//item')],
- playlist_id=video_id,
- )
+
+ entries = []
+ for item in idoc.findall('.//item'):
+ info = self._get_video_info(item)
+ if info:
+ entries.append(info)
+
+ return self.playlist_result(entries, playlist_id=video_id)
import json
from .adobepass import AdobePassIE
+from .youtube import YoutubeIE
from .common import InfoExtractor
from ..compat import compat_HTTPError
from ..utils import (
if embed_code:
return _url_res('ooyala:%s' % embed_code, 'Ooyala')
- youtube_url = self._html_search_regex(
- r'<iframe[^>]+src="(.*youtube\.com/.*)"',
- body, 'YouTube URL', default=None)
+ youtube_url = YoutubeIE._extract_url(body)
if youtube_url:
- return _url_res(youtube_url, 'Youtube')
+ return _url_res(youtube_url, YoutubeIE.ie_key())
video_url = self._html_search_regex(
r'data-video-url="([^"]+)"',
self._sort_formats(formats)
duration = int_or_none(duration or self._search_regex(
- r'data-video-duration=(["\'])(?P<duartion>\d+)\1', webpage, 'duration'))
+ r'data-video-duration=(["\'])(?P<duration>\d+)\1', webpage,
+ 'duration', fatal=False, group='duration'))
thumbnail = thumbnail or self._og_search_thumbnail(webpage)
like_count = int_or_none(self._search_regex(
import itertools
from .common import InfoExtractor
-from ..compat import compat_HTTPError
+from ..compat import (
+ compat_HTTPError,
+ compat_str,
+)
from ..utils import (
ExtractorError,
int_or_none,
'or for violating the terms of use.',
expected=True)
- formats = [{
- 'format_id': f.get('type'),
- 'url': f['uri'],
- 'width': int_or_none(f.get('width')),
- 'height': int_or_none(f.get('height')),
- 'preference': 0 if f.get('type', '').endswith('clip') else 1,
- } for f in video.get('formats', []) if f.get('uri')]
+ formats = []
+ for f in video.get('formats', []):
+ format_url = f.get('uri')
+ if not format_url or not isinstance(format_url, compat_str):
+ continue
+ format_type = f.get('type')
+ if format_type == 'dash':
+ formats.extend(self._extract_mpd_formats(
+ format_url, video_id, mpd_id='dash', fatal=False))
+ elif format_type == 'hls':
+ formats.extend(self._extract_m3u8_formats(
+ format_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False))
+ else:
+ formats.append({
+ 'format_id': f.get('type'),
+ 'url': format_url,
+ 'width': int_or_none(f.get('width')),
+ 'height': int_or_none(f.get('height')),
+ 'preference': 0 if f.get('type', '').endswith(
+ 'clip') else 1,
+ })
if not formats and video.get('complete_url'):
formats.append({
class VidmeUserIE(VidmeListBaseIE):
IE_NAME = 'vidme:user'
- _VALID_URL = r'https?://vid\.me/(?:e/)?(?P<id>[\da-zA-Z]{6,})(?!/likes)(?:[^\da-zA-Z]|$)'
+ _VALID_URL = r'https?://vid\.me/(?:e/)?(?P<id>[\da-zA-Z_-]{6,})(?!/likes)(?:[^\da-zA-Z_-]|$)'
_API_ITEM = 'list'
_TITLE = 'Videos'
- _TEST = {
- 'url': 'https://vid.me/EFARCHIVE',
+ _TESTS = [{
+ 'url': 'https://vid.me/MasakoX',
'info_dict': {
- 'id': '3834632',
- 'title': 'EFARCHIVE - %s' % _TITLE,
+ 'id': '16112341',
+ 'title': 'MasakoX - %s' % _TITLE,
},
- 'playlist_mincount': 238,
- }
+ 'playlist_mincount': 191,
+ }, {
+ 'url': 'https://vid.me/unsQuare_netWork',
+ 'only_matching': True,
+ }]
class VidmeUserLikesIE(VidmeListBaseIE):
IE_NAME = 'vidme:user:likes'
- _VALID_URL = r'https?://vid\.me/(?:e/)?(?P<id>[\da-zA-Z]{6,})/likes'
+ _VALID_URL = r'https?://vid\.me/(?:e/)?(?P<id>[\da-zA-Z_-]{6,})/likes'
_API_ITEM = 'likes'
_TITLE = 'Likes'
- _TEST = {
+ _TESTS = [{
'url': 'https://vid.me/ErinAlexis/likes',
'info_dict': {
'id': '6483530',
'title': 'ErinAlexis - %s' % _TITLE,
},
'playlist_mincount': 415,
- }
+ }, {
+ 'url': 'https://vid.me/Kaleidoscope-Ish/likes',
+ 'only_matching': True,
+ }]
class VierIE(InfoExtractor):
IE_NAME = 'vier'
IE_DESC = 'vier.be and vijf.be'
- _VALID_URL = r'https?://(?:www\.)?(?P<site>vier|vijf)\.be/(?:[^/]+/videos/(?P<display_id>[^/]+)(?:/(?P<id>\d+))?|video/v3/embed/(?P<embed_id>\d+))'
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:www\.)?(?P<site>vier|vijf)\.be/
+ (?:
+ (?:
+ [^/]+/videos|
+ video(?:/[^/]+)*
+ )/
+ (?P<display_id>[^/]+)(?:/(?P<id>\d+))?|
+ (?:
+ video/v3/embed|
+ embed/video/public
+ )/(?P<embed_id>\d+)
+ )
+ '''
_NETRC_MACHINE = 'vier'
_TESTS = [{
'url': 'http://www.vier.be/planb/videos/het-wordt-warm-de-moestuin/16129',
}, {
'url': 'http://www.vier.be/video/v3/embed/16129',
'only_matching': True,
+ }, {
+ 'url': 'https://www.vijf.be/embed/video/public/4093',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.vier.be/video/blockbusters/in-juli-en-augustus-summer-classics',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.vier.be/video/achter-de-rug/2017/achter-de-rug-seizoen-1-aflevering-6',
+ 'only_matching': True,
}]
def _real_initialize(self):
video_id = self._search_regex(
[r'data-nid="(\d+)"', r'"nid"\s*:\s*"(\d+)"'],
webpage, 'video id', default=video_id or display_id)
- application = self._search_regex(
- [r'data-application="([^"]+)"', r'"application"\s*:\s*"([^"]+)"'],
- webpage, 'application', default=site + '_vod')
- filename = self._search_regex(
- [r'data-filename="([^"]+)"', r'"filename"\s*:\s*"([^"]+)"'],
- webpage, 'filename')
-
- playlist_url = 'http://vod.streamcloud.be/%s/_definst_/mp4:%s.mp4/playlist.m3u8' % (application, filename)
+
+ playlist_url = self._search_regex(
+ r'data-file=(["\'])(?P<url>(?:https?:)?//[^/]+/.+?\.m3u8.*?)\1',
+ webpage, 'm3u8 url', default=None, group='url')
+
+ if not playlist_url:
+ application = self._search_regex(
+ [r'data-application="([^"]+)"', r'"application"\s*:\s*"([^"]+)"'],
+ webpage, 'application', default=site + '_vod')
+ filename = self._search_regex(
+ [r'data-filename="([^"]+)"', r'"filename"\s*:\s*"([^"]+)"'],
+ webpage, 'filename')
+ playlist_url = 'http://vod.streamcloud.be/%s/_definst_/mp4:%s.mp4/playlist.m3u8' % (application, filename)
+
formats = self._extract_wowza_formats(
playlist_url, display_id, skip_protocols=['dash'])
self._sort_formats(formats)
from .common import InfoExtractor
from ..compat import (
- compat_urlparse,
+ compat_HTTPError,
compat_str,
+ compat_urlparse,
)
from ..utils import (
- parse_duration,
+ ExtractorError,
js_to_json,
+ parse_duration,
parse_iso8601,
)
base_url = self._proto_relative_url(cfg['livepipe'], 'http:')
- lecture_data = self._download_json(
- '%s/site/api/lecture/%s?format=json' % (base_url, lecture_id),
- lecture_id)['lecture'][0]
+ try:
+ lecture_data = self._download_json(
+ '%s/site/api/lecture/%s?format=json' % (base_url, lecture_id),
+ lecture_id)['lecture'][0]
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
+ msg = self._parse_json(
+ e.cause.read().decode('utf-8'), lecture_id)
+ raise ExtractorError(msg['detail'], expected=True)
+ raise
lecture_info = {
'id': lecture_id,
_API_QUERY_TEMPLATE = '/v4/%sapp=%s&t=%s&site=www.viki.com'
_API_URL_TEMPLATE = 'http://api.viki.io%s&sig=%s'
- _APP = '65535a'
+ _APP = '100005a'
_APP_VERSION = '2.2.5.1428709186'
- _APP_SECRET = '-$iJ}@p7!G@SyU/je1bEyWg}upLu-6V6-Lg9VD(]siH,r.,m-r|ulZ,U4LC/SeR)'
+ _APP_SECRET = 'MM_d*yP@`&1@]@!AVrXf_o-HVEnoTnm$O-ti4[G~$JDI/Dc-&piU&z&5.;:}95=Iad'
_GEO_BYPASS = False
_NETRC_MACHINE = 'viki'
else:
mpd_manifest_urls = [(format_id, manifest_url)]
for f_id, m_url in mpd_manifest_urls:
- formats.extend(self._extract_mpd_formats(
+ mpd_formats = self._extract_mpd_formats(
m_url.replace('/master.json', '/master.mpd'), video_id, f_id,
'Downloading %s MPD information' % cdn_name,
- fatal=False))
+ fatal=False)
+ for f in mpd_formats:
+ if f.get('vcodec') == 'none':
+ f['preference'] = -50
+ elif f.get('acodec') == 'none':
+ f['preference'] = -40
+ formats.extend(mpd_formats)
subtitles = {}
text_tracks = config['request'].get('text_tracks')
if download_url and not source_file.get('is_cold') and not source_file.get('is_defrosting'):
source_name = source_file.get('public_name', 'Original')
if self._is_valid_url(download_url, video_id, '%s video' % source_name):
- ext = source_file.get('extension', determine_ext(download_url)).lower()
+ ext = (try_get(
+ source_file, lambda x: x['extension'],
+ compat_str) or determine_ext(
+ download_url, None) or 'mp4').lower()
formats.append({
'url': download_url,
'ext': ext,
username = data.get('username')
+ alt_title = 'Vine by %s' % username if username else None
+
return {
'id': video_id,
- 'title': data.get('description'),
- 'alt_title': 'Vine by %s' % username if username else None,
+ 'title': data.get('description') or alt_title or 'Vine video',
+ 'alt_title': alt_title,
'thumbnail': data.get('thumbnailUrl'),
'timestamp': unified_timestamp(data.get('created')),
'uploader': username,
import re
from .common import InfoExtractor
-from ..compat import compat_str
+from ..compat import (
+ compat_kwargs,
+ compat_str,
+)
from ..utils import (
ExtractorError,
int_or_none,
headers.update(kwargs.get('headers', {}))
kwargs['headers'] = headers
response = self._download_json(
- 'https://www.viu.com/api/' + path, *args, **kwargs)['response']
+ 'https://www.viu.com/api/' + path, *args,
+ **compat_kwargs(kwargs))['response']
if response.get('status') != 'success':
raise ExtractorError('%s said: %s' % (
self.IE_NAME, response['message']), expected=True)
from .dailymotion import DailymotionIE
from .pladform import PladformIE
from .vimeo import VimeoIE
+from .youtube import YoutubeIE
class VKBaseIE(InfoExtractor):
if re.search(error_re, info_page):
raise ExtractorError(error_msg % video_id, expected=True)
- youtube_url = self._search_regex(
- r'<iframe[^>]+src="((?:https?:)?//www.youtube.com/embed/[^"]+)"',
- info_page, 'youtube iframe', default=None)
+ youtube_url = YoutubeIE._extract_url(info_page)
if youtube_url:
- return self.url_result(youtube_url, 'Youtube')
+ return self.url_result(youtube_url, ie=YoutubeIE.ie_key())
vimeo_url = VimeoIE._extract_url(url, info_page)
if vimeo_url is not None:
},
}]
+ @classmethod
+ def suitable(cls, url):
+ return False if VLivePlaylistIE.suitable(url) else super(VLiveIE, cls).suitable(url)
+
def _real_extract(self, url):
video_id = self._match_id(url)
query={
'app_id': app_id,
'channelSeq': channel_seq,
- 'maxNumOfRows': 1000,
+ # Large values of maxNumOfRows (~300 or above) may cause
+ # empty responses (see [1]), e.g. this happens for [2] that
+ # has more than 300 videos.
+ # 1. https://github.com/rg3/youtube-dl/issues/13830
+ # 2. http://channels.vlive.tv/EDBF.
+ 'maxNumOfRows': 100,
'_': int(time.time()),
'pageNo': page_num
}
return self.playlist_result(
entries, channel_code, channel_name)
+
+
+class VLivePlaylistIE(InfoExtractor):
+ IE_NAME = 'vlive:playlist'
+ _VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/video/(?P<video_id>[0-9]+)/playlist/(?P<id>[0-9]+)'
+ _TEST = {
+ 'url': 'http://www.vlive.tv/video/22867/playlist/22912',
+ 'info_dict': {
+ 'id': '22912',
+ 'title': 'Valentine Day Message from TWICE'
+ },
+ 'playlist_mincount': 9
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id, playlist_id = mobj.group('video_id', 'id')
+
+ VIDEO_URL_TEMPLATE = 'http://www.vlive.tv/video/%s'
+ if self._downloader.params.get('noplaylist'):
+ self.to_screen(
+ 'Downloading just video %s because of --no-playlist' % video_id)
+ return self.url_result(
+ VIDEO_URL_TEMPLATE % video_id,
+ ie=VLiveIE.ie_key(), video_id=video_id)
+
+ self.to_screen(
+ 'Downloading playlist %s - add --no-playlist to just download video'
+ % playlist_id)
+
+ webpage = self._download_webpage(
+ 'http://www.vlive.tv/video/%s/playlist/%s'
+ % (video_id, playlist_id), playlist_id)
+
+ item_ids = self._parse_json(
+ self._search_regex(
+ r'playlistVideoSeqs\s*=\s*(\[[^]]+\])', webpage,
+ 'playlist video seqs'),
+ playlist_id)
+
+ entries = [
+ self.url_result(
+ VIDEO_URL_TEMPLATE % item_id, ie=VLiveIE.ie_key(),
+ video_id=compat_str(item_id))
+ for item_id in item_ids]
+
+ playlist_name = self._html_search_regex(
+ r'<div[^>]+class="[^"]*multicam_playlist[^>]*>\s*<h3[^>]+>([^<]+)',
+ webpage, 'playlist title', fatal=False)
+
+ return self.playlist_result(entries, playlist_id, playlist_name)
--- /dev/null
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from .kaltura import KalturaIE
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ try_get,
+ unified_timestamp,
+)
+
+
+class VootIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?voot\.com/(?:[^/]+/)+(?P<id>\d+)'
+ _GEO_COUNTRIES = ['IN']
+ _TESTS = [{
+ 'url': 'https://www.voot.com/shows/ishq-ka-rang-safed/1/360558/is-this-the-end-of-kamini-/441353',
+ 'info_dict': {
+ 'id': '0_8ledb18o',
+ 'ext': 'mp4',
+ 'title': 'Ishq Ka Rang Safed - Season 01 - Episode 340',
+ 'description': 'md5:06291fbbbc4dcbe21235c40c262507c1',
+ 'uploader_id': 'batchUser',
+ 'timestamp': 1472162937,
+ 'upload_date': '20160825',
+ 'duration': 1146,
+ 'series': 'Ishq Ka Rang Safed',
+ 'season_number': 1,
+ 'episode': 'Is this the end of Kamini?',
+ 'episode_number': 340,
+ 'view_count': int,
+ 'like_count': int,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'expected_warnings': ['Failed to download m3u8 information'],
+ }, {
+ 'url': 'https://www.voot.com/kids/characters/mighty-cat-masked-niyander-e-/400478/school-bag-disappears/440925',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.voot.com/movies/pandavas-5/424627',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ media_info = self._download_json(
+ 'https://wapi.voot.com/ws/ott/getMediaInfo.json', video_id,
+ query={
+ 'platform': 'Web',
+ 'pId': 2,
+ 'mediaId': video_id,
+ })
+
+ status_code = try_get(media_info, lambda x: x['status']['code'], int)
+ if status_code != 0:
+ raise ExtractorError(media_info['status']['message'], expected=True)
+
+ media = media_info['assets']
+
+ entry_id = media['EntryId']
+ title = media['MediaName']
+
+ description, series, season_number, episode, episode_number = [None] * 5
+
+ for meta in try_get(media, lambda x: x['Metas'], list) or []:
+ key, value = meta.get('Key'), meta.get('Value')
+ if not key or not value:
+ continue
+ if key == 'ContentSynopsis':
+ description = value
+ elif key == 'RefSeriesTitle':
+ series = value
+ elif key == 'RefSeriesSeason':
+ season_number = int_or_none(value)
+ elif key == 'EpisodeMainTitle':
+ episode = value
+ elif key == 'EpisodeNo':
+ episode_number = int_or_none(value)
+
+ return {
+ '_type': 'url_transparent',
+ 'url': 'kaltura:1982551:%s' % entry_id,
+ 'ie_key': KalturaIE.ie_key(),
+ 'title': title,
+ 'description': description,
+ 'series': series,
+ 'season_number': season_number,
+ 'episode': episode,
+ 'episode_number': episode_number,
+ 'timestamp': unified_timestamp(media.get('CreationDate')),
+ 'duration': int_or_none(media.get('Duration')),
+ 'view_count': int_or_none(media.get('ViewCounter')),
+ 'like_count': int_or_none(media.get('like_counter')),
+ }
# coding: utf-8
from __future__ import unicode_literals
+import re
+
from .common import InfoExtractor
from ..utils import (
int_or_none,
},
}]
+ @staticmethod
+ def _extract_urls(webpage):
+ return re.findall(
+ r'<iframe[^>]+src=["\']((?:https?:)?//(?:view\.vzaar\.com)/[0-9]+)',
+ webpage)
+
def _real_extract(self, url):
video_id = self._match_id(url)
video_data = self._download_json(
--- /dev/null
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ int_or_none,
+ js_to_json,
+ strip_or_none,
+ try_get,
+ unified_timestamp,
+)
+
+
+class WatchBoxIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?watchbox\.de/(?P<kind>serien|filme)/(?:[^/]+/)*[^/]+-(?P<id>\d+)'
+ _TESTS = [{
+ # film
+ 'url': 'https://www.watchbox.de/filme/free-jimmy-12325.html',
+ 'info_dict': {
+ 'id': '341368',
+ 'ext': 'mp4',
+ 'title': 'Free Jimmy',
+ 'description': 'md5:bcd8bafbbf9dc0ef98063d344d7cc5f6',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 4890,
+ 'age_limit': 16,
+ 'release_year': 2009,
+ },
+ 'params': {
+ 'format': 'bestvideo',
+ 'skip_download': True,
+ },
+ 'expected_warnings': ['Failed to download m3u8 information'],
+ }, {
+ # episode
+ 'url': 'https://www.watchbox.de/serien/ugly-americans-12231/staffel-1/date-in-der-hoelle-328286.html',
+ 'info_dict': {
+ 'id': '328286',
+ 'ext': 'mp4',
+ 'title': 'S01 E01 - Date in der Hƶlle',
+ 'description': 'md5:2f31c74a8186899f33cb5114491dae2b',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 1291,
+ 'age_limit': 12,
+ 'release_year': 2010,
+ 'series': 'Ugly Americans',
+ 'season_number': 1,
+ 'episode': 'Date in der Hƶlle',
+ 'episode_number': 1,
+ },
+ 'params': {
+ 'format': 'bestvideo',
+ 'skip_download': True,
+ },
+ 'expected_warnings': ['Failed to download m3u8 information'],
+ }, {
+ 'url': 'https://www.watchbox.de/serien/ugly-americans-12231/staffel-2/der-ring-des-powers-328270',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ kind, video_id = mobj.group('kind', 'id')
+
+ webpage = self._download_webpage(url, video_id)
+
+ source = self._parse_json(
+ self._search_regex(
+ r'(?s)source\s*:\s*({.+?})\s*,\s*\n', webpage, 'source',
+ default='{}'),
+ video_id, transform_source=js_to_json, fatal=False) or {}
+
+ video_id = compat_str(source.get('videoId') or video_id)
+
+ devapi = self._download_json(
+ 'http://api.watchbox.de/devapi/id/%s' % video_id, video_id, query={
+ 'format': 'json',
+ 'apikey': 'hbbtv',
+ }, fatal=False)
+
+ item = try_get(devapi, lambda x: x['items'][0], dict) or {}
+
+ title = item.get('title') or try_get(
+ item, lambda x: x['movie']['headline_movie'],
+ compat_str) or source['title']
+
+ formats = []
+ hls_url = item.get('media_videourl_hls') or source.get('hls')
+ if hls_url:
+ formats.extend(self._extract_m3u8_formats(
+ hls_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False))
+ dash_url = item.get('media_videourl_wv') or source.get('dash')
+ if dash_url:
+ formats.extend(self._extract_mpd_formats(
+ dash_url, video_id, mpd_id='dash', fatal=False))
+ mp4_url = item.get('media_videourl')
+ if mp4_url:
+ formats.append({
+ 'url': mp4_url,
+ 'format_id': 'mp4',
+ 'width': int_or_none(item.get('width')),
+ 'height': int_or_none(item.get('height')),
+ 'tbr': int_or_none(item.get('bitrate')),
+ })
+ self._sort_formats(formats)
+
+ description = strip_or_none(item.get('descr'))
+ thumbnail = item.get('media_content_thumbnail_large') or source.get('poster') or item.get('media_thumbnail')
+ duration = int_or_none(item.get('media_length') or source.get('length'))
+ timestamp = unified_timestamp(item.get('pubDate'))
+ view_count = int_or_none(item.get('media_views'))
+ age_limit = int_or_none(try_get(item, lambda x: x['movie']['fsk']))
+ release_year = int_or_none(try_get(item, lambda x: x['movie']['rel_year']))
+
+ info = {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'timestamp': timestamp,
+ 'view_count': view_count,
+ 'age_limit': age_limit,
+ 'release_year': release_year,
+ 'formats': formats,
+ }
+
+ if kind.lower() == 'serien':
+ series = try_get(
+ item, lambda x: x['special']['title'],
+ compat_str) or source.get('format')
+ season_number = int_or_none(self._search_regex(
+ r'^S(\d{1,2})\s*E\d{1,2}', title, 'season number',
+ default=None) or self._search_regex(
+ r'/staffel-(\d+)/', url, 'season number', default=None))
+ episode = source.get('title')
+ episode_number = int_or_none(self._search_regex(
+ r'^S\d{1,2}\s*E(\d{1,2})', title, 'episode number',
+ default=None))
+ info.update({
+ 'series': series,
+ 'season_number': season_number,
+ 'episode': episode,
+ 'episode_number': episode_number,
+ })
+
+ return info
import re
from .common import InfoExtractor
-from ..utils import (
- unified_strdate,
- parse_duration,
- int_or_none,
-)
+from ..utils import parse_duration
class WatchIndianPornIE(InfoExtractor):
'ext': 'mp4',
'title': 'Hot milf from kerala shows off her gorgeous large breasts on camera',
'thumbnail': r're:^https?://.*\.jpg$',
- 'uploader': 'LoveJay',
- 'upload_date': '20160428',
'duration': 226,
'view_count': int,
- 'comment_count': int,
'categories': list,
'age_limit': 18,
}
webpage = self._download_webpage(url, display_id)
- video_url = self._html_search_regex(
- r"url: escape\('([^']+)'\)", webpage, 'url')
+ info_dict = self._parse_html5_media_entries(url, webpage, video_id)[0]
- title = self._html_search_regex(
- r'<h2 class="he2"><span>(.*?)</span>',
- webpage, 'title')
- thumbnail = self._html_search_regex(
- r'<span id="container"><img\s+src="([^"]+)"',
- webpage, 'thumbnail', fatal=False)
-
- uploader = self._html_search_regex(
- r'class="aupa">\s*(.*?)</a>',
- webpage, 'uploader')
- upload_date = unified_strdate(self._html_search_regex(
- r'Added: <strong>(.+?)</strong>', webpage, 'upload date', fatal=False))
+ title = self._html_search_regex((
+ r'<title>(.+?)\s*-\s*Indian\s+Porn</title>',
+ r'<h4>(.+?)</h4>'
+ ), webpage, 'title')
duration = parse_duration(self._search_regex(
- r'<td>Time:\s*</td>\s*<td align="right"><span>\s*(.+?)\s*</span>',
+ r'Time:\s*<strong>\s*(.+?)\s*</strong>',
webpage, 'duration', fatal=False))
- view_count = int_or_none(self._search_regex(
- r'<td>Views:\s*</td>\s*<td align="right"><span>\s*(\d+)\s*</span>',
+ view_count = int(self._search_regex(
+ r'(?s)Time:\s*<strong>.*?</strong>.*?<strong>\s*(\d+)\s*</strong>',
webpage, 'view count', fatal=False))
- comment_count = int_or_none(self._search_regex(
- r'<td>Comments:\s*</td>\s*<td align="right"><span>\s*(\d+)\s*</span>',
- webpage, 'comment count', fatal=False))
categories = re.findall(
- r'<a href="[^"]+/search/video/desi"><span>([^<]+)</span></a>',
+ r'<a[^>]+class=[\'"]categories[\'"][^>]*>\s*([^<]+)\s*</a>',
webpage)
- return {
+ info_dict.update({
'id': video_id,
'display_id': display_id,
- 'url': video_url,
'http_headers': {
'Referer': url,
},
'title': title,
- 'thumbnail': thumbnail,
- 'uploader': uploader,
- 'upload_date': upload_date,
'duration': duration,
'view_count': view_count,
- 'comment_count': comment_count,
'categories': categories,
'age_limit': 18,
- }
+ })
+
+ return info_dict
_VALID_URL = r'''(?x)
(?:
https?://video-api\.wsj\.com/api-video/player/iframe\.html\?.*?\bguid=|
- https?://(?:www\.)?wsj\.com/video/[^/]+/|
+ https?://(?:www\.)?(?:wsj|barrons)\.com/video/[^/]+/|
wsj:
)
(?P<id>[a-fA-F0-9-]{36})
}, {
'url': 'http://www.wsj.com/video/can-alphabet-build-a-smarter-city/359DDAA8-9AC1-489C-82E6-0429C1E430E0.html',
'only_matching': True,
+ }, {
+ 'url': 'http://www.barrons.com/video/capitalism-deserves-more-respect-from-millennials/F301217E-6F46-43AE-B8D2-B7180D642EE9.html',
+ 'only_matching': True,
}]
def _real_extract(self, url):
ExtractorError,
int_or_none,
NO_DEFAULT,
- sanitized_Request,
urlencode_postdata,
)
(r'vidabc\.com', 'Vid ABC'),
(r'vidbom\.com', 'VidBom'),
(r'vidlo\.us', 'vidlo'),
+ (r'rapidvideo\.(?:cool|org)', 'RapidVideo.TV'),
+ (r'fastvideo\.me', 'FastVideo.me'),
)
IE_DESC = 'XFileShare based sites: %s' % ', '.join(list(zip(*_SITES))[1])
'params': {
'skip_download': True,
},
+ }, {
+ 'url': 'http://www.rapidvideo.cool/b667kprndr8w',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.fastvideo.me/k8604r8nk8sn/FAST_FURIOUS_8_-_Trailer_italiano_ufficiale.mp4.html',
+ 'only_matching': True
}]
def _real_extract(self, url):
if countdown:
self._sleep(countdown, video_id)
- post = urlencode_postdata(fields)
-
- req = sanitized_Request(url, post)
- req.add_header('Content-type', 'application/x-www-form-urlencoded')
-
- webpage = self._download_webpage(req, video_id, 'Downloading video page')
+ webpage = self._download_webpage(
+ url, video_id, 'Downloading video page',
+ data=urlencode_postdata(fields), headers={
+ 'Referer': url,
+ 'Content-type': 'application/x-www-form-urlencoded',
+ })
title = (self._search_regex(
(r'style="z-index: [0-9]+;">([^<]+)</span>',
def extract_formats(default=NO_DEFAULT):
urls = []
for regex in (
- r'file\s*:\s*(["\'])(?P<url>http(?:(?!\1).)+\.(?:m3u8|mp4|flv)(?:(?!\1).)*)\1',
+ r'(?:file|src)\s*:\s*(["\'])(?P<url>http(?:(?!\1).)+\.(?:m3u8|mp4|flv)(?:(?!\1).)*)\1',
r'file_link\s*=\s*(["\'])(?P<url>http(?:(?!\1).)+)\1',
r'addVariable\((\\?["\'])file\1\s*,\s*(\\?["\'])(?P<url>http(?:(?!\2).)+)\2\)',
r'<embed[^>]+src=(["\'])(?P<url>http(?:(?!\1).)+\.(?:m3u8|mp4|flv)(?:(?!\1).)*)\1'):
import re
from .common import InfoExtractor
+from ..compat import compat_str
from ..utils import (
+ clean_html,
dict_get,
ExtractorError,
int_or_none,
class XHamsterIE(InfoExtractor):
- _VALID_URL = r'(?P<proto>https?)://(?:.+?\.)?xhamster\.com/movies/(?P<id>[0-9]+)/(?P<seo>.*?)\.html(?:\?.*)?'
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:.+?\.)?xhamster\.com/
+ (?:
+ movies/(?P<id>\d+)/(?P<display_id>[^/]*)\.html|
+ videos/(?P<display_id_2>[^/]*)-(?P<id_2>\d+)
+ )
+ '''
+
_TESTS = [{
'url': 'http://xhamster.com/movies/1509445/femaleagent_shy_beauty_takes_the_bait.html',
'md5': '8281348b8d3c53d39fffb377d24eac4e',
'info_dict': {
'id': '1509445',
+ 'display_id': 'femaleagent_shy_beauty_takes_the_bait',
'ext': 'mp4',
'title': 'FemaleAgent Shy beauty takes the bait',
'upload_date': '20121014',
'uploader': 'Ruseful2011',
'duration': 893,
'age_limit': 18,
+ 'categories': ['Fake Hub', 'Amateur', 'MILFs', 'POV', 'Boss', 'Office', 'Oral', 'Reality', 'Sexy'],
},
}, {
'url': 'http://xhamster.com/movies/2221348/britney_spears_sexy_booty.html?hd',
'info_dict': {
'id': '2221348',
+ 'display_id': 'britney_spears_sexy_booty',
'ext': 'mp4',
'title': 'Britney Spears Sexy Booty',
'upload_date': '20130914',
'uploader': 'jojo747400',
'duration': 200,
'age_limit': 18,
+ 'categories': ['Britney Spears', 'Celebrities', 'HD Videos', 'Sexy', 'Sexy Booty'],
},
'params': {
'skip_download': True,
'uploader': 'parejafree',
'duration': 72,
'age_limit': 18,
+ 'categories': ['Amateur', 'Blowjobs'],
},
'params': {
'skip_download': True,
# This video is visible for marcoalfa123456's friends only
'url': 'https://it.xhamster.com/movies/7263980/la_mia_vicina.html',
'only_matching': True,
+ }, {
+ # new URL schema
+ 'url': 'https://pt.xhamster.com/videos/euro-pedal-pumping-7937821',
+ 'only_matching': True,
}]
def _real_extract(self, url):
- def extract_video_url(webpage, name):
- return self._search_regex(
- [r'''file\s*:\s*(?P<q>["'])(?P<mp4>.+?)(?P=q)''',
- r'''<a\s+href=(?P<q>["'])(?P<mp4>.+?)(?P=q)\s+class=["']mp4Thumb''',
- r'''<video[^>]+file=(?P<q>["'])(?P<mp4>.+?)(?P=q)[^>]*>'''],
- webpage, name, group='mp4')
-
- def is_hd(webpage):
- return '<div class=\'icon iconHD\'' in webpage
-
mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id') or mobj.group('id_2')
+ display_id = mobj.group('display_id') or mobj.group('display_id_2')
- video_id = mobj.group('id')
- seo = mobj.group('seo')
- proto = mobj.group('proto')
- mrss_url = '%s://xhamster.com/movies/%s/%s.html' % (proto, video_id, seo)
- webpage = self._download_webpage(mrss_url, video_id)
+ webpage = self._download_webpage(url, video_id)
error = self._html_search_regex(
r'<div[^>]+id=["\']videoClosed["\'][^>]*>(.+?)</div>',
r'<title[^>]*>(.+?)(?:,\s*[^,]*?\s*Porn\s*[^,]*?:\s*xHamster[^<]*| - xHamster\.com)</title>'],
webpage, 'title')
+ formats = []
+ format_urls = set()
+
+ sources = self._parse_json(
+ self._search_regex(
+ r'sources\s*:\s*({.+?})\s*,?\s*\n', webpage, 'sources',
+ default='{}'),
+ video_id, fatal=False)
+ for format_id, format_url in sources.items():
+ if not isinstance(format_url, compat_str):
+ continue
+ if format_url in format_urls:
+ continue
+ format_urls.add(format_url)
+ formats.append({
+ 'format_id': format_id,
+ 'url': format_url,
+ 'height': int_or_none(self._search_regex(
+ r'^(\d+)[pP]', format_id, 'height', default=None))
+ })
+
+ video_url = self._search_regex(
+ [r'''file\s*:\s*(?P<q>["'])(?P<mp4>.+?)(?P=q)''',
+ r'''<a\s+href=(?P<q>["'])(?P<mp4>.+?)(?P=q)\s+class=["']mp4Thumb''',
+ r'''<video[^>]+file=(?P<q>["'])(?P<mp4>.+?)(?P=q)[^>]*>'''],
+ webpage, 'video url', group='mp4', default=None)
+ if video_url and video_url not in format_urls:
+ formats.append({
+ 'url': video_url,
+ })
+
+ self._sort_formats(formats)
+
# Only a few videos have an description
mobj = re.search(r'<span>Description: </span>([^<]+)', webpage)
description = mobj.group(1) if mobj else None
webpage, 'upload date', fatal=False))
uploader = self._html_search_regex(
- r'<span[^>]+itemprop=["\']author[^>]+><a[^>]+href=["\'].+?xhamster\.com/user/[^>]+>(?P<uploader>.+?)</a>',
+ r'<span[^>]+itemprop=["\']author[^>]+><a[^>]+><span[^>]+>([^<]+)',
webpage, 'uploader', default='anonymous')
thumbnail = self._search_regex(
webpage, 'thumbnail', fatal=False, group='thumbnail')
duration = parse_duration(self._search_regex(
- r'Runtime:\s*</span>\s*([\d:]+)', webpage,
+ [r'<[^<]+\bitemprop=["\']duration["\'][^<]+\bcontent=["\'](.+?)["\']',
+ r'Runtime:\s*</span>\s*([\d:]+)'], webpage,
'duration', fatal=False))
view_count = int_or_none(self._search_regex(
r'content=["\']User(?:View|Play)s:(\d+)',
webpage, 'view count', fatal=False))
- mobj = re.search(r"hint='(?P<likecount>\d+) Likes / (?P<dislikecount>\d+) Dislikes'", webpage)
+ mobj = re.search(r'hint=[\'"](?P<likecount>\d+) Likes / (?P<dislikecount>\d+) Dislikes', webpage)
(like_count, dislike_count) = (mobj.group('likecount'), mobj.group('dislikecount')) if mobj else (None, None)
mobj = re.search(r'</label>Comments \((?P<commentcount>\d+)\)</div>', webpage)
age_limit = self._rta_search(webpage)
- hd = is_hd(webpage)
-
- format_id = 'hd' if hd else 'sd'
-
- video_url = extract_video_url(webpage, format_id)
- formats = [{
- 'url': video_url,
- 'format_id': 'hd' if hd else 'sd',
- 'preference': 1,
- }]
-
- if not hd:
- mrss_url = self._search_regex(r'<link rel="canonical" href="([^"]+)', webpage, 'mrss_url')
- webpage = self._download_webpage(mrss_url + '?hd', video_id, note='Downloading HD webpage')
- if is_hd(webpage):
- video_url = extract_video_url(webpage, 'hd')
- formats.append({
- 'url': video_url,
- 'format_id': 'hd',
- 'preference': 2,
- })
-
- self._sort_formats(formats)
+ categories_html = self._search_regex(
+ r'(?s)<table.+?(<span>Categories:.+?)</table>', webpage,
+ 'categories', default=None)
+ categories = [clean_html(category) for category in re.findall(
+ r'<a[^>]+>(.+?)</a>', categories_html)] if categories_html else None
return {
'id': video_id,
+ 'display_id': display_id,
'title': title,
'description': description,
'upload_date': upload_date,
'dislike_count': int_or_none(dislike_count),
'comment_count': int_or_none(comment_count),
'age_limit': age_limit,
+ 'categories': categories,
'formats': formats,
}
# coding: utf-8
from __future__ import unicode_literals
-import base64
-
from .common import InfoExtractor
-from ..compat import compat_urllib_parse_unquote
from ..utils import (
ExtractorError,
+ float_or_none,
+ get_element_by_attribute,
parse_iso8601,
- parse_duration,
+ remove_end,
)
'id': '3860914',
'ext': 'mp3',
'title': 'å¤å®ååē-ęå¾·é½',
+ 'description': 'å¤å®ååē-ęå¾·é½',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 247.246,
'timestamp': 1314932940,
'duration': 596.458,
'timestamp': 1454242500,
'upload_date': '20160131',
- 'uploader': 'yan12125',
+ 'uploader': 'å±å§„',
'uploader_id': '12158353',
'categories': ['åäŗŗēē'],
'description': 'http://download.blender.org/peach/bigbuckbunny_movies/BigBuckBunny_320x180.mp4',
# from http://forgetfulbc.blogspot.com/2016/06/date.html
'url': 'http://vlog.xuite.net/embed/cE1xbENoLTI3NDQ3MzM2LmZsdg==?ar=0&as=0',
'info_dict': {
- 'id': 'cE1xbENoLTI3NDQ3MzM2LmZsdg==',
+ 'id': '27447336',
'ext': 'mp4',
'title': 'ē·å„³å¹³ę¬åŖęÆå£čļ¼å°å®¶č§£éē“ęęē·ēęÆå¦č©²å¹«å„³ēä»é¢ (äøå)',
- 'description': 'md5:f0abdcb69df300f522a5442ef3146f2a',
+ 'description': 'md5:1223810fa123b179083a3aed53574706',
'timestamp': 1466160960,
'upload_date': '20160617',
'uploader': 'B.C. & Lowy',
'only_matching': True,
}]
- @staticmethod
- def base64_decode_utf8(data):
- return base64.b64decode(data.encode('utf-8')).decode('utf-8')
-
- @staticmethod
- def base64_encode_utf8(data):
- return base64.b64encode(data.encode('utf-8')).decode('utf-8')
-
- def _extract_flv_config(self, encoded_media_id):
- flv_config = self._download_xml(
- 'http://vlog.xuite.net/flash/player?media=%s' % encoded_media_id,
- 'flv config')
- prop_dict = {}
- for prop in flv_config.findall('./property'):
- prop_id = self.base64_decode_utf8(prop.attrib['id'])
- # CDATA may be empty in flv config
- if not prop.text:
- continue
- encoded_content = self.base64_decode_utf8(prop.text)
- prop_dict[prop_id] = compat_urllib_parse_unquote(encoded_content)
- return prop_dict
-
def _real_extract(self, url):
+ # /play/ URLs provide embedded video URL and more metadata
+ url = url.replace('/embed/', '/play/')
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
'%s returned error: %s' % (self.IE_NAME, error_msg),
expected=True)
- encoded_media_id = self._search_regex(
- r'attributes\.name\s*=\s*"([^"]+)"', webpage,
- 'encoded media id', default=None)
- if encoded_media_id is None:
- video_id = self._html_search_regex(
- r'data-mediaid="(\d+)"', webpage, 'media id')
- encoded_media_id = self.base64_encode_utf8(video_id)
- flv_config = self._extract_flv_config(encoded_media_id)
+ media_info = self._parse_json(self._search_regex(
+ r'var\s+mediaInfo\s*=\s*({.*});', webpage, 'media info'), video_id)
- FORMATS = {
- 'audio': 'mp3',
- 'video': 'mp4',
- }
+ video_id = media_info['MEDIA_ID']
formats = []
- for format_tag in ('src', 'hq_src'):
- video_url = flv_config.get(format_tag)
+ for key in ('html5Url', 'html5HQUrl'):
+ video_url = media_info.get(key)
if not video_url:
continue
format_id = self._search_regex(
- r'\bq=(.+?)\b', video_url, 'format id', default=format_tag)
+ r'\bq=(.+?)\b', video_url, 'format id', default=None)
formats.append({
'url': video_url,
- 'ext': FORMATS.get(flv_config['type'], 'mp4'),
+ 'ext': 'mp4' if format_id.isnumeric() else format_id,
'format_id': format_id,
'height': int(format_id) if format_id.isnumeric() else None,
})
self._sort_formats(formats)
- timestamp = flv_config.get('publish_datetime')
+ timestamp = media_info.get('PUBLISH_DATETIME')
if timestamp:
timestamp = parse_iso8601(timestamp + ' +0800', ' ')
- category = flv_config.get('category')
+ category = media_info.get('catName')
categories = [category] if category else []
+ uploader = media_info.get('NICKNAME')
+ uploader_url = None
+
+ author_div = get_element_by_attribute('itemprop', 'author', webpage)
+ if author_div:
+ uploader = uploader or self._html_search_meta('name', author_div)
+ uploader_url = self._html_search_regex(
+ r'<link[^>]+itemprop="url"[^>]+href="([^"]+)"', author_div,
+ 'uploader URL', fatal=False)
+
return {
'id': video_id,
- 'title': flv_config['title'],
- 'description': flv_config.get('description'),
- 'thumbnail': flv_config.get('thumb'),
+ 'title': media_info['TITLE'],
+ 'description': remove_end(media_info.get('metaDesc'), ' (Xuite å½±é³)'),
+ 'thumbnail': media_info.get('ogImageUrl'),
'timestamp': timestamp,
- 'uploader': flv_config.get('author_name'),
- 'uploader_id': flv_config.get('author_id'),
- 'duration': parse_duration(flv_config.get('duration')),
+ 'uploader': uploader,
+ 'uploader_id': media_info.get('MEMBER_ID'),
+ 'uploader_url': uploader_url,
+ 'duration': float_or_none(media_info.get('MEDIA_DURATION'), 1000000),
'categories': categories,
'formats': formats,
}
r"video_url\s*:\s*'([^']+)'", webpage, 'video URL')
title = self._html_search_regex(
- [r'<div class="block_header">\s*<h1>([^<]+)</h1>',
- r'<title>(.*?)\s*-\s*XXXYMovies\.com</title>'],
+ [r'<div[^>]+\bclass="block_header"[^>]*>\s*<h1>([^<]+)<',
+ r'<title>(.*?)\s*-\s*(?:XXXYMovies\.com|XXX\s+Movies)</title>'],
webpage, 'title')
thumbnail = self._search_regex(
+++ /dev/null
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..compat import compat_urlparse
-from ..utils import (
- float_or_none,
- month_by_abbreviation,
- ExtractorError,
- get_element_by_attribute,
-)
-
-
-class YamIE(InfoExtractor):
- IE_DESC = 'ččÆč¤yam天ē©ŗéØč½'
- _VALID_URL = r'https?://mymedia\.yam\.com/m/(?P<id>\d+)'
-
- _TESTS = [{
- # An audio hosted on Yam
- 'url': 'http://mymedia.yam.com/m/2283921',
- 'md5': 'c011b8e262a52d5473d9c2e3c9963b9c',
- 'info_dict': {
- 'id': '2283921',
- 'ext': 'mp3',
- 'title': 'ē¼ē¾ - č¶č äŗ¬čÆē
é²äø»é”ę²',
- 'description': 'ē¼ē¾ - č¶č äŗ¬čÆē
é²äø»é”ę²',
- 'uploader_id': 'princekt',
- 'upload_date': '20080807',
- 'duration': 313.0,
- }
- }, {
- # An external video hosted on YouTube
- 'url': 'http://mymedia.yam.com/m/3599430',
- 'md5': '03127cf10d8f35d120a9e8e52e3b17c6',
- 'info_dict': {
- 'id': 'CNpEoQlrIgA',
- 'ext': 'mp4',
- 'upload_date': '20150306',
- 'uploader': 'ę°čē¤¾å¤§ēä¼½ē¤¾',
- 'description': 'md5:11e2e405311633ace874f2e6226c8b17',
- 'uploader_id': '2323agoy',
- 'title': '20090412é½ęå±±äŗååŖ-1',
- },
- 'skip': 'Video does not exist',
- }, {
- 'url': 'http://mymedia.yam.com/m/3598173',
- 'info_dict': {
- 'id': '3598173',
- 'ext': 'mp4',
- },
- 'skip': 'cause Yam system error',
- }, {
- 'url': 'http://mymedia.yam.com/m/3599437',
- 'info_dict': {
- 'id': '3599437',
- 'ext': 'mp4',
- },
- 'skip': 'invalid YouTube URL',
- }, {
- 'url': 'http://mymedia.yam.com/m/2373534',
- 'md5': '7ff74b91b7a817269d83796f8c5890b1',
- 'info_dict': {
- 'id': '2373534',
- 'ext': 'mp3',
- 'title': 'ęäæå&č”åå¦-å°é
ēŖ©',
- 'description': 'md5:904003395a0fcce6cfb25028ff468420',
- 'upload_date': '20080928',
- 'uploader_id': 'onliner2',
- }
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- page = self._download_webpage(url, video_id)
-
- # Check for errors
- system_msg = self._html_search_regex(
- r'ē³»ēµ±čØęÆ(?:<br>|\n|\r)*([^<>]+)<br>', page, 'system message',
- default=None)
- if system_msg:
- raise ExtractorError(system_msg, expected=True)
-
- # Is it hosted externally on YouTube?
- youtube_url = self._html_search_regex(
- r'<embed src="(http://www.youtube.com/[^"]+)"',
- page, 'YouTube url', default=None)
- if youtube_url:
- return self.url_result(youtube_url, 'Youtube')
-
- title = self._html_search_regex(
- r'<h1[^>]+class="heading"[^>]*>\s*(.+)\s*</h1>', page, 'title')
-
- api_page = self._download_webpage(
- 'http://mymedia.yam.com/api/a/?pID=' + video_id, video_id,
- note='Downloading API page')
- api_result_obj = compat_urlparse.parse_qs(api_page)
-
- info_table = get_element_by_attribute('class', 'info', page)
- uploader_id = self._html_search_regex(
- r'<!-- ē¼č”Øä½č
-->ļ¼[\n ]+<a href="/([a-z0-9]+)"',
- info_table, 'uploader id', fatal=False)
- mobj = re.search(r'<!-- ē¼č”Øę¼ -->(?P<mon>[A-Z][a-z]{2})\s+' +
- r'(?P<day>\d{1,2}), (?P<year>\d{4})', page)
- if mobj:
- upload_date = '%s%02d%02d' % (
- mobj.group('year'),
- month_by_abbreviation(mobj.group('mon')),
- int(mobj.group('day')))
- else:
- upload_date = None
- duration = float_or_none(api_result_obj['totaltime'][0], scale=1000)
-
- return {
- 'id': video_id,
- 'url': api_result_obj['mp3file'][0],
- 'title': title,
- 'description': self._html_search_meta('description', page),
- 'duration': duration,
- 'uploader_id': uploader_id,
- 'upload_date': upload_date,
- }
--- /dev/null
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ determine_ext,
+ float_or_none,
+ int_or_none,
+ try_get,
+ urlencode_postdata,
+)
+
+
+class YandexDiskIE(InfoExtractor):
+ _VALID_URL = r'https?://yadi\.sk/[di]/(?P<id>[^/?#&]+)'
+
+ _TESTS = [{
+ 'url': 'https://yadi.sk/i/VdOeDou8eZs6Y',
+ 'md5': '33955d7ae052f15853dc41f35f17581c',
+ 'info_dict': {
+ 'id': 'VdOeDou8eZs6Y',
+ 'ext': 'mp4',
+ 'title': '4.mp4',
+ 'duration': 168.6,
+ 'uploader': 'y.botova',
+ 'uploader_id': '300043621',
+ 'view_count': int,
+ },
+ }, {
+ 'url': 'https://yadi.sk/d/h3WAXvDS3Li3Ce',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ status = self._download_webpage(
+ 'https://disk.yandex.com/auth/status', video_id, query={
+ 'urlOrigin': url,
+ 'source': 'public',
+ 'md5': 'false',
+ })
+
+ sk = self._search_regex(
+ r'(["\'])sk(?:External)?\1\s*:\s*(["\'])(?P<value>(?:(?!\2).)+)\2',
+ status, 'sk', group='value')
+
+ webpage = self._download_webpage(url, video_id)
+
+ models = self._parse_json(
+ self._search_regex(
+ r'<script[^>]+id=["\']models-client[^>]+>\s*(\[.+?\])\s*</script',
+ webpage, 'video JSON'),
+ video_id)
+
+ data = next(
+ model['data'] for model in models
+ if model.get('model') == 'resource')
+
+ video_hash = data['id']
+ title = data['name']
+
+ models = self._download_json(
+ 'https://disk.yandex.com/models/', video_id,
+ data=urlencode_postdata({
+ '_model.0': 'videoInfo',
+ 'id.0': video_hash,
+ '_model.1': 'do-get-resource-url',
+ 'id.1': video_hash,
+ 'version': '13.6',
+ 'sk': sk,
+ }), query={'_m': 'videoInfo'})['models']
+
+ videos = try_get(models, lambda x: x[0]['data']['videos'], list) or []
+ source_url = try_get(
+ models, lambda x: x[1]['data']['file'], compat_str)
+
+ formats = []
+ if source_url:
+ formats.append({
+ 'url': source_url,
+ 'format_id': 'source',
+ 'ext': determine_ext(title, 'mp4'),
+ 'quality': 1,
+ })
+ for video in videos:
+ format_url = video.get('url')
+ if not format_url:
+ continue
+ if determine_ext(format_url) == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ format_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False))
+ else:
+ formats.append({
+ 'url': format_url,
+ })
+ self._sort_formats(formats)
+
+ duration = float_or_none(try_get(
+ models, lambda x: x[0]['data']['duration']), 1000)
+ uploader = try_get(
+ data, lambda x: x['user']['display_name'], compat_str)
+ uploader_id = try_get(
+ data, lambda x: x['user']['uid'], compat_str)
+ view_count = int_or_none(try_get(
+ data, lambda x: x['meta']['views_counter']))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'duration': duration,
+ 'uploader': uploader,
+ 'uploader_id': uploader_id,
+ 'view_count': view_count,
+ 'formats': formats,
+ }
from __future__ import unicode_literals
+import re
+
from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ determine_ext,
+ int_or_none,
+ parse_duration,
+)
class YouJizzIE(InfoExtractor):
- _VALID_URL = r'https?://(?:\w+\.)?youjizz\.com/videos/(?:[^/#?]+)?-(?P<id>[0-9]+)\.html(?:$|[?#])'
+ _VALID_URL = r'https?://(?:\w+\.)?youjizz\.com/videos/(?:[^/#?]*-(?P<id>\d+)\.html|embed/(?P<embed_id>\d+))'
_TESTS = [{
'url': 'http://www.youjizz.com/videos/zeichentrick-1-2189178.html',
- 'md5': '78fc1901148284c69af12640e01c6310',
+ 'md5': 'b1e1dfaa8bb9537d8b84eeda9cf4acf4',
'info_dict': {
'id': '2189178',
'ext': 'mp4',
'title': 'Zeichentrick 1',
'age_limit': 18,
+ 'duration': 2874,
}
}, {
'url': 'http://www.youjizz.com/videos/-2189178.html',
'only_matching': True,
+ }, {
+ 'url': 'https://www.youjizz.com/videos/embed/31991001',
+ 'only_matching': True,
}]
def _real_extract(self, url):
- video_id = self._match_id(url)
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id') or mobj.group('embed_id')
+
webpage = self._download_webpage(url, video_id)
- # YouJizz's HTML5 player has invalid HTML
- webpage = webpage.replace('"controls', '" controls')
- age_limit = self._rta_search(webpage)
- video_title = self._html_search_regex(
- r'<title>\s*(.*)\s*</title>', webpage, 'title')
- info_dict = self._parse_html5_media_entries(url, webpage, video_id)[0]
+ title = self._html_search_regex(
+ r'<title>(.+?)</title>', webpage, 'title')
+
+ formats = []
+
+ encodings = self._parse_json(
+ self._search_regex(
+ r'encodings\s*=\s*(\[.+?\]);\n', webpage, 'encodings',
+ default='[]'),
+ video_id, fatal=False)
+ for encoding in encodings:
+ if not isinstance(encoding, dict):
+ continue
+ format_url = encoding.get('filename')
+ if not isinstance(format_url, compat_str):
+ continue
+ if determine_ext(format_url) == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ format_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False))
+ else:
+ format_id = encoding.get('name') or encoding.get('quality')
+ height = int_or_none(self._search_regex(
+ r'^(\d+)[pP]', format_id, 'height', default=None))
+ formats.append({
+ 'url': format_url,
+ 'format_id': format_id,
+ 'height': height,
+ })
+
+ if formats:
+ info_dict = {
+ 'formats': formats,
+ }
+ else:
+ # YouJizz's HTML5 player has invalid HTML
+ webpage = webpage.replace('"controls', '" controls')
+ info_dict = self._parse_html5_media_entries(
+ url, webpage, video_id)[0]
+
+ duration = parse_duration(self._search_regex(
+ r'<strong>Runtime:</strong>([^<]+)', webpage, 'duration',
+ default=None))
+ uploader = self._search_regex(
+ r'<strong>Uploaded By:.*?<a[^>]*>([^<]+)', webpage, 'uploader',
+ default=None)
info_dict.update({
'id': video_id,
- 'title': video_title,
- 'age_limit': age_limit,
+ 'title': title,
+ 'age_limit': self._rta_search(webpage),
+ 'duration': duration,
+ 'uploader': uploader,
})
return info_dict
# coding: utf-8
from __future__ import unicode_literals
-import base64
-import itertools
import random
import re
import string
import time
from .common import InfoExtractor
-from ..compat import (
- compat_ord,
- compat_str,
- compat_urllib_parse_urlencode,
-)
from ..utils import (
ExtractorError,
- get_element_by_attribute,
- try_get,
+ get_element_by_class,
+ js_to_json,
+ str_or_none,
+ strip_jsonp,
)
IE_DESC = 'ä¼é
·'
_VALID_URL = r'''(?x)
(?:
- http://(?:v|player)\.youku\.com/(?:v_show/id_|player\.php/sid/)|
+ https?://(
+ (?:v|player)\.youku\.com/(?:v_show/id_|player\.php/sid/)|
+ video\.tudou\.com/v/)|
youku:)
(?P<id>[A-Za-z0-9]+)(?:\.html|/v\.swf|)
'''
# MD5 is unstable
'url': 'http://v.youku.com/v_show/id_XMTc1ODE5Njcy.html',
'info_dict': {
- 'id': 'XMTc1ODE5Njcy_part1',
+ 'id': 'XMTc1ODE5Njcy',
'title': 'ā
Smileļ¹ā” Git Fresh -Booty Musicčč¹.',
- 'ext': 'flv'
+ 'ext': 'mp4',
+ 'duration': 74.73,
+ 'thumbnail': r're:^https?://.*',
+ 'uploader': 'ćčŗ²ē«ē«ć',
+ 'uploader_id': '36017967',
+ 'uploader_url': 'http://i.youku.com/u/UMTQ0MDcxODY4',
+ 'tags': list,
}
}, {
'url': 'http://player.youku.com/player.php/sid/XNDgyMDQ2NTQw/v.swf',
'url': 'http://v.youku.com/v_show/id_XODgxNjg1Mzk2_ev_1.html',
'info_dict': {
'id': 'XODgxNjg1Mzk2',
+ 'ext': 'mp4',
'title': 'ę¦åŖåØä¼ å„ 85',
+ 'duration': 1999.61,
+ 'thumbnail': r're:^https?://.*',
+ 'uploader': 'ēÆēč±č±',
+ 'uploader_id': '62583473',
+ 'uploader_url': 'http://i.youku.com/u/UMjUwMzMzODky',
+ 'tags': list,
},
- 'playlist_count': 11,
- 'skip': 'Available in China only',
}, {
'url': 'http://v.youku.com/v_show/id_XMTI1OTczNDM5Mg==.html',
'info_dict': {
'id': 'XMTI1OTczNDM5Mg',
+ 'ext': 'mp4',
'title': 'č±åéŖØ 04',
+ 'duration': 2363,
+ 'thumbnail': r're:^https?://.*',
+ 'uploader': 'ę¾å§åŗ-č±åéŖØ',
+ 'uploader_id': '772849359',
+ 'uploader_url': 'http://i.youku.com/u/UMzA5MTM5NzQzNg==',
+ 'tags': list,
},
- 'playlist_count': 13,
}, {
'url': 'http://v.youku.com/v_show/id_XNjA1NzA2Njgw.html',
'note': 'Video protected with password',
'info_dict': {
'id': 'XNjA1NzA2Njgw',
+ 'ext': 'mp4',
'title': 'é¢ē¾©ē°å¤ę¦č®²åŗ§ä¹ę³č±”äøēč”äŗŗāä»āå·¦č”½ååāčÆ“čµ·',
+ 'duration': 7264.5,
+ 'thumbnail': r're:^https?://.*',
+ 'uploader': 'FoxJin1006',
+ 'uploader_id': '322014285',
+ 'uploader_url': 'http://i.youku.com/u/UMTI4ODA1NzE0MA==',
+ 'tags': list,
},
- 'playlist_count': 19,
'params': {
'videopassword': '100600',
},
'url': 'http://v.youku.com/v_show/id_XOTUxMzg4NDMy.html',
'info_dict': {
'id': 'XOTUxMzg4NDMy',
+ 'ext': 'mp4',
'title': 'ęēäøēāęęåŗäø»āč½¦éēęāęäŗŗčŗęÆMinecraft',
+ 'duration': 702.08,
+ 'thumbnail': r're:^https?://.*',
+ 'uploader': 'ęęåŗäø»moon',
+ 'uploader_id': '38465621',
+ 'uploader_url': 'http://i.youku.com/u/UMTUzODYyNDg0',
+ 'tags': list,
+ },
+ }, {
+ 'url': 'http://video.tudou.com/v/XMjIyNzAzMTQ4NA==.html?f=46177805',
+ 'info_dict': {
+ 'id': 'XMjIyNzAzMTQ4NA',
+ 'ext': 'mp4',
+ 'title': 'å”马ä¹å½č¶³å¼å¤§čéæä¼ å²åéé¦',
+ 'duration': 289,
+ 'thumbnail': r're:^https?://.*',
+ 'uploader': 'éæåęęä¹ę',
+ 'uploader_id': '2382249',
+ 'uploader_url': 'http://i.youku.com/u/UOTUyODk5Ng==',
+ 'tags': list,
},
- 'playlist_count': 6,
+ }, {
+ 'url': 'http://video.tudou.com/v/XMjE4ODI3OTg2MA==.html',
+ 'only_matching': True,
}]
- def construct_video_urls(self, data):
- # get sid, token
- def yk_t(s1, s2):
- ls = list(range(256))
- t = 0
- for i in range(256):
- t = (t + ls[i] + compat_ord(s1[i % len(s1)])) % 256
- ls[i], ls[t] = ls[t], ls[i]
- s = bytearray()
- x, y = 0, 0
- for i in range(len(s2)):
- y = (y + 1) % 256
- x = (x + ls[y]) % 256
- ls[x], ls[y] = ls[y], ls[x]
- s.append(compat_ord(s2[i]) ^ ls[(ls[x] + ls[y]) % 256])
- return bytes(s)
-
- sid, token = yk_t(
- b'becaf9be', base64.b64decode(data['security']['encrypt_string'].encode('ascii'))
- ).decode('ascii').split('_')
-
- # get oip
- oip = data['security']['ip']
-
- fileid_dict = {}
- for stream in data['stream']:
- if stream.get('channel_type') == 'tail':
- continue
- format = stream.get('stream_type')
- fileid = try_get(
- stream, lambda x: x['segs'][0]['fileid'],
- compat_str) or stream['stream_fileid']
- fileid_dict[format] = fileid
-
- def get_fileid(format, n):
- number = hex(int(str(n), 10))[2:].upper()
- if len(number) == 1:
- number = '0' + number
- streamfileids = fileid_dict[format]
- fileid = streamfileids[0:8] + number + streamfileids[10:]
- return fileid
-
- # get ep
- def generate_ep(format, n):
- fileid = get_fileid(format, n)
- ep_t = yk_t(
- b'bf7e5f01',
- ('%s_%s_%s' % (sid, fileid, token)).encode('ascii')
- )
- ep = base64.b64encode(ep_t).decode('ascii')
- return ep
-
- # generate video_urls
- video_urls_dict = {}
- for stream in data['stream']:
- if stream.get('channel_type') == 'tail':
- continue
- format = stream.get('stream_type')
- video_urls = []
- for dt in stream['segs']:
- n = str(stream['segs'].index(dt))
- param = {
- 'K': dt['key'],
- 'hd': self.get_hd(format),
- 'myp': 0,
- 'ypp': 0,
- 'ctype': 12,
- 'ev': 1,
- 'token': token,
- 'oip': oip,
- 'ep': generate_ep(format, n)
- }
- video_url = \
- 'http://k.youku.com/player/getFlvPath/' + \
- 'sid/' + sid + \
- '_00' + \
- '/st/' + self.parse_ext_l(format) + \
- '/fileid/' + get_fileid(format, n) + '?' + \
- compat_urllib_parse_urlencode(param)
- video_urls.append(video_url)
- video_urls_dict[format] = video_urls
-
- return video_urls_dict
-
@staticmethod
def get_ysuid():
return '%d%s' % (int(time.time()), ''.join([
random.choice(string.ascii_letters) for i in range(3)]))
- def get_hd(self, fm):
- hd_id_dict = {
- '3gp': '0',
- '3gphd': '1',
- 'flv': '0',
- 'flvhd': '0',
- 'mp4': '1',
- 'mp4hd': '1',
- 'mp4hd2': '1',
- 'mp4hd3': '1',
- 'hd2': '2',
- 'hd3': '3',
- }
- return hd_id_dict[fm]
-
- def parse_ext_l(self, fm):
- ext_dict = {
- '3gp': 'flv',
- '3gphd': 'mp4',
- 'flv': 'flv',
- 'flvhd': 'flv',
- 'mp4': 'mp4',
- 'mp4hd': 'mp4',
- 'mp4hd2': 'flv',
- 'mp4hd3': 'flv',
- 'hd2': 'flv',
- 'hd3': 'flv',
- }
- return ext_dict[fm]
-
def get_format_name(self, fm):
_dict = {
'3gp': 'h6',
'hd2': 'h2',
'hd3': 'h1',
}
- return _dict[fm]
+ return _dict.get(fm)
def _real_extract(self, url):
video_id = self._match_id(url)
self._set_cookie('youku.com', '__ysuid', self.get_ysuid())
+ self._set_cookie('youku.com', 'xreferrer', 'http://www.youku.com')
- def retrieve_data(req_url, note):
- headers = {
- 'Referer': req_url,
- }
- headers.update(self.geo_verification_headers())
- self._set_cookie('youku.com', 'xreferrer', 'http://www.youku.com')
+ _, urlh = self._download_webpage_handle(
+ 'https://log.mmstat.com/eg.js', video_id, 'Retrieving cna info')
+ # The etag header is '"foobar"'; let's remove the double quotes
+ cna = urlh.headers['etag'][1:-1]
- raw_data = self._download_json(req_url, video_id, note=note, headers=headers)
-
- return raw_data['data']
+ # request basic data
+ basic_data_params = {
+ 'vid': video_id,
+ 'ccode': '0402' if 'tudou.com' in url else '0401',
+ 'client_ip': '192.168.1.1',
+ 'utid': cna,
+ 'client_ts': time.time() / 1000,
+ }
video_password = self._downloader.params.get('videopassword')
-
- # request basic data
- basic_data_url = 'http://play.youku.com/play/get.json?vid=%s&ct=12' % video_id
if video_password:
- basic_data_url += '&pwd=%s' % video_password
+ basic_data_params['password'] = video_password
- data = retrieve_data(basic_data_url, 'Downloading JSON metadata')
+ headers = {
+ 'Referer': url,
+ }
+ headers.update(self.geo_verification_headers())
+ data = self._download_json(
+ 'https://ups.youku.com/ups/get.json', video_id,
+ 'Downloading JSON metadata',
+ query=basic_data_params, headers=headers)['data']
error = data.get('error')
if error:
raise ExtractorError(msg)
# get video title
- title = data['video']['title']
-
- # generate video_urls_dict
- video_urls_dict = self.construct_video_urls(data)
-
- # construct info
- entries = [{
- 'id': '%s_part%d' % (video_id, i + 1),
- 'title': title,
- 'formats': [],
- # some formats are not available for all parts, we have to detect
- # which one has all
- } for i in range(max(len(v.get('segs')) for v in data['stream']))]
- for stream in data['stream']:
- if stream.get('channel_type') == 'tail':
- continue
- fm = stream.get('stream_type')
- video_urls = video_urls_dict[fm]
- for video_url, seg, entry in zip(video_urls, stream['segs'], entries):
- entry['formats'].append({
- 'url': video_url,
- 'format_id': self.get_format_name(fm),
- 'ext': self.parse_ext_l(fm),
- 'filesize': int(seg['size']),
- 'width': stream.get('width'),
- 'height': stream.get('height'),
- })
+ video_data = data['video']
+ title = video_data['title']
+
+ formats = [{
+ 'url': stream['m3u8_url'],
+ 'format_id': self.get_format_name(stream.get('stream_type')),
+ 'ext': 'mp4',
+ 'protocol': 'm3u8_native',
+ 'filesize': int(stream.get('size')),
+ 'width': stream.get('width'),
+ 'height': stream.get('height'),
+ } for stream in data['stream'] if stream.get('channel_type') != 'tail']
+ self._sort_formats(formats)
return {
- '_type': 'multi_video',
'id': video_id,
'title': title,
- 'entries': entries,
+ 'formats': formats,
+ 'duration': video_data.get('seconds'),
+ 'thumbnail': video_data.get('logo'),
+ 'uploader': video_data.get('username'),
+ 'uploader_id': str_or_none(video_data.get('userid')),
+ 'uploader_url': data.get('uploader', {}).get('homepage'),
+ 'tags': video_data.get('tags'),
}
class YoukuShowIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?youku\.com/show_page/id_(?P<id>[0-9a-z]+)\.html'
+ _VALID_URL = r'https?://list\.youku\.com/show/id_(?P<id>[0-9a-z]+)\.html'
IE_NAME = 'youku:show'
- _TEST = {
- 'url': 'http://www.youku.com/show_page/id_zc7c670be07ff11e48b3f.html',
+ _TESTS = [{
+ 'url': 'http://list.youku.com/show/id_zc7c670be07ff11e48b3f.html',
'info_dict': {
'id': 'zc7c670be07ff11e48b3f',
- 'title': 'č±åéŖØ ęŖå åē',
- 'description': 'md5:578d4f2145ae3f9128d9d4d863312910',
+ 'title': 'č±åéŖØ DVDē',
+ 'description': 'md5:a1ae6f5618571bbeb5c9821f9c81b558',
},
'playlist_count': 50,
- }
-
- _PAGE_SIZE = 40
+ }, {
+ # Episode number not starting from 1
+ 'url': 'http://list.youku.com/show/id_zefbfbd70efbfbd780bef.html',
+ 'info_dict': {
+ 'id': 'zefbfbd70efbfbd780bef',
+ 'title': 'č¶
ēŗ§é£ä¾ 3',
+ 'description': 'md5:275715156abebe5ccc2a1992e9d56b98',
+ },
+ 'playlist_count': 24,
+ }, {
+ # Ongoing playlist. The initial page is the last one
+ 'url': 'http://list.youku.com/show/id_za7c275ecd7b411e1a19e.html',
+ 'only_matchine': True,
+ }]
- def _find_videos_in_page(self, webpage):
- videos = re.findall(
- r'<li><a[^>]+href="(?P<url>https?://v\.youku\.com/[^"]+)"[^>]+title="(?P<title>[^"]+)"', webpage)
- return [
- self.url_result(video_url, YoukuIE.ie_key(), title)
- for video_url, title in videos]
+ def _extract_entries(self, playlist_data_url, show_id, note, query):
+ query['callback'] = 'cb'
+ playlist_data = self._download_json(
+ playlist_data_url, show_id, query=query, note=note,
+ transform_source=lambda s: js_to_json(strip_jsonp(s)))['html']
+ drama_list = (get_element_by_class('p-drama-grid', playlist_data) or
+ get_element_by_class('p-drama-half-row', playlist_data))
+ if drama_list is None:
+ raise ExtractorError('No episodes found')
+ video_urls = re.findall(r'<a[^>]+href="([^"]+)"', drama_list)
+ return playlist_data, [
+ self.url_result(self._proto_relative_url(video_url, 'http:'), YoukuIE.ie_key())
+ for video_url in video_urls]
def _real_extract(self, url):
show_id = self._match_id(url)
webpage = self._download_webpage(url, show_id)
- entries = self._find_videos_in_page(webpage)
-
- playlist_title = self._html_search_regex(
- r'<span[^>]+class="name">([^<]+)</span>', webpage, 'playlist title', fatal=False)
- detail_div = get_element_by_attribute('class', 'detail', webpage) or ''
- playlist_description = self._html_search_regex(
- r'<span[^>]+style="display:none"[^>]*>([^<]+)</span>',
- detail_div, 'playlist description', fatal=False)
-
- for idx in itertools.count(1):
- episodes_page = self._download_webpage(
- 'http://www.youku.com/show_episode/id_%s.html' % show_id,
- show_id, query={'divid': 'reload_%d' % (idx * self._PAGE_SIZE + 1)},
- note='Downloading episodes page %d' % idx)
- new_entries = self._find_videos_in_page(episodes_page)
+ entries = []
+ page_config = self._parse_json(self._search_regex(
+ r'var\s+PageConfig\s*=\s*({.+});', webpage, 'page config'),
+ show_id, transform_source=js_to_json)
+ first_page, initial_entries = self._extract_entries(
+ 'http://list.youku.com/show/module', show_id,
+ note='Downloading initial playlist data page',
+ query={
+ 'id': page_config['showid'],
+ 'tab': 'showInfo',
+ })
+ first_page_reload_id = self._html_search_regex(
+ r'<div[^>]+id="(reload_\d+)', first_page, 'first page reload id')
+ # The first reload_id has the same items as first_page
+ reload_ids = re.findall('<li[^>]+data-id="([^"]+)">', first_page)
+ for idx, reload_id in enumerate(reload_ids):
+ if reload_id == first_page_reload_id:
+ entries.extend(initial_entries)
+ continue
+ _, new_entries = self._extract_entries(
+ 'http://list.youku.com/show/episode', show_id,
+ note='Downloading playlist data page %d' % (idx + 1),
+ query={
+ 'id': page_config['showid'],
+ 'stage': reload_id,
+ })
entries.extend(new_entries)
- if len(new_entries) < self._PAGE_SIZE:
- break
- return self.playlist_result(entries, show_id, playlist_title, playlist_description)
+ desc = self._html_search_meta('description', webpage, fatal=False)
+ playlist_title = desc.split(',')[0] if desc else None
+ detail_li = get_element_by_class('p-intro', webpage)
+ playlist_description = get_element_by_class(
+ 'intro-more', detail_li) if detail_li else None
+
+ return self.playlist_result(
+ entries, show_id, playlist_title, playlist_description)
import re
from .common import InfoExtractor
+from ..compat import compat_str
from ..utils import (
int_or_none,
sanitized_Request,
'description': 'Love & Sex Answers: http://bit.ly/DanAndJenn -- Is It Unhealthy To Masturbate Daily?',
'thumbnail': r're:^https?://.*\.jpg$',
'uploader': 'Ask Dan And Jennifer',
- 'upload_date': '20101221',
+ 'upload_date': '20101217',
'average_rating': int,
'view_count': int,
'comment_count': int,
'description': 'http://sweetlivegirls.com Big Tits Awesome Brunette On amazing webcam show.mp4',
'thumbnail': r're:^https?://.*\.jpg$',
'uploader': 'Unknown',
- 'upload_date': '20111125',
+ 'upload_date': '20110418',
'average_rating': int,
'view_count': int,
'comment_count': int,
webpage = self._download_webpage(request, display_id)
title = self._search_regex(
- [r'(?:video_titles|videoTitle)\s*[:=]\s*(["\'])(?P<title>.+?)\1',
- r'<h1[^>]+class=["\']heading\d?["\'][^>]*>([^<])<'],
- webpage, 'title', group='title')
+ [r'(?:video_titles|videoTitle)\s*[:=]\s*(["\'])(?P<title>(?:(?!\1).)+)\1',
+ r'<h1[^>]+class=["\']heading\d?["\'][^>]*>(?P<title>[^<]+)<'],
+ webpage, 'title', group='title',
+ default=None) or self._og_search_title(
+ webpage, default=None) or self._html_search_meta(
+ 'title', webpage, fatal=True)
links = []
+ # Main source
+ definitions = self._parse_json(
+ self._search_regex(
+ r'mediaDefinition\s*=\s*(\[.+?\]);', webpage,
+ 'media definitions', default='[]'),
+ video_id, fatal=False)
+ if definitions:
+ for definition in definitions:
+ if not isinstance(definition, dict):
+ continue
+ video_url = definition.get('videoUrl')
+ if isinstance(video_url, compat_str) and video_url:
+ links.append(video_url)
+
+ # Fallback #1, this also contains extra low quality 180p format
+ for _, link in re.findall(r'<a[^>]+href=(["\'])(http.+?)\1[^>]+title=["\']Download [Vv]ideo', webpage):
+ links.append(link)
+
+ # Fallback #2 (unavailable as at 22.06.2017)
sources = self._search_regex(
r'(?s)sources\s*:\s*({.+?})', webpage, 'sources', default=None)
if sources:
for _, link in re.findall(r'[^:]+\s*:\s*(["\'])(http.+?)\1', sources):
links.append(link)
- # Fallback #1
+ # Fallback #3 (unavailable as at 22.06.2017)
for _, link in re.findall(
- r'(?:videoUrl|videoSrc|videoIpadUrl|html5PlayerSrc)\s*[:=]\s*(["\'])(http.+?)\1', webpage):
- links.append(link)
-
- # Fallback #2, this also contains extra low quality 180p format
- for _, link in re.findall(r'<a[^>]+href=(["\'])(http.+?)\1[^>]+title=["\']Download [Vv]ideo', webpage):
+ r'(?:videoSrc|videoIpadUrl|html5PlayerSrc)\s*[:=]\s*(["\'])(http.+?)\1', webpage):
links.append(link)
- # Fallback #3, encrypted links
+ # Fallback #4, encrypted links (unavailable as at 22.06.2017)
for _, encrypted_link in re.findall(
r'encryptedQuality\d{3,4}URL\s*=\s*(["\'])([\da-zA-Z+/=]+)\1', webpage):
links.append(aes_decrypt_text(encrypted_link, title, 32).decode('utf-8'))
r'(?s)<div[^>]+class=["\']submitByLink["\'][^>]*>(.+?)</div>',
webpage, 'uploader', fatal=False)
upload_date = unified_strdate(self._html_search_regex(
- r'(?s)<div[^>]+class=["\']videoInfo(?:Date|Time)["\'][^>]*>(.+?)</div>',
+ [r'Date\s+[Aa]dded:\s*<span>([^<]+)',
+ r'(?s)<div[^>]+class=["\']videoInfo(?:Date|Time)["\'][^>]*>(.+?)</div>'],
webpage, 'upload date', fatal=False))
age_limit = self._rta_search(webpage)
from ..swfinterp import SWFInterpreter
from ..compat import (
compat_chr,
+ compat_kwargs,
compat_parse_qs,
compat_urllib_parse_unquote,
compat_urllib_parse_unquote_plus,
return True
+ def _download_webpage(self, *args, **kwargs):
+ kwargs.setdefault('query', {})['disable_polymer'] = 'true'
+ return super(YoutubeBaseInfoExtractor, self)._download_webpage(
+ *args, **compat_kwargs(kwargs))
+
def _real_initialize(self):
if self._downloader is None:
return
},
},
# video_info is None (https://github.com/rg3/youtube-dl/issues/4421)
+ # YouTube Red ad is not captured for creator
{
'url': '__2ABJjxzNo',
'info_dict': {
'Skipping DASH manifest',
],
},
+ {
+ # The following content has been identified by the YouTube community
+ # as inappropriate or offensive to some audiences.
+ 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
+ 'info_dict': {
+ 'id': '6SJNVb0GnPI',
+ 'ext': 'mp4',
+ 'title': 'Race Differences in Intelligence',
+ 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
+ 'duration': 965,
+ 'upload_date': '20140124',
+ 'uploader': 'New Century Foundation',
+ 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
+ 'license': 'Standard YouTube License',
+ 'view_count': int,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
{
# itag 212
'url': '1t24XAntNCY',
sub_lang_list[sub_lang] = sub_formats
return sub_lang_list
+ def make_captions(sub_url, sub_langs):
+ parsed_sub_url = compat_urllib_parse_urlparse(sub_url)
+ caption_qs = compat_parse_qs(parsed_sub_url.query)
+ captions = {}
+ for sub_lang in sub_langs:
+ sub_formats = []
+ for ext in self._SUBTITLE_FORMATS:
+ caption_qs.update({
+ 'tlang': [sub_lang],
+ 'fmt': [ext],
+ })
+ sub_url = compat_urlparse.urlunparse(parsed_sub_url._replace(
+ query=compat_urllib_parse_urlencode(caption_qs, True)))
+ sub_formats.append({
+ 'url': sub_url,
+ 'ext': ext,
+ })
+ captions[sub_lang] = sub_formats
+ return captions
+
+ # New captions format as of 22.06.2017
+ player_response = args.get('player_response')
+ if player_response and isinstance(player_response, compat_str):
+ player_response = self._parse_json(
+ player_response, video_id, fatal=False)
+ if player_response:
+ renderer = player_response['captions']['playerCaptionsTracklistRenderer']
+ base_url = renderer['captionTracks'][0]['baseUrl']
+ sub_lang_list = []
+ for lang in renderer['translationLanguages']:
+ lang_code = lang.get('languageCode')
+ if lang_code:
+ sub_lang_list.append(lang_code)
+ return make_captions(base_url, sub_lang_list)
+
# Some videos don't provide ttsurl but rather caption_tracks and
# caption_translation_languages (e.g. 20LmZk1hakA)
+ # Does not used anymore as of 22.06.2017
caption_tracks = args['caption_tracks']
caption_translation_languages = args['caption_translation_languages']
caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]
- parsed_caption_url = compat_urllib_parse_urlparse(caption_url)
- caption_qs = compat_parse_qs(parsed_caption_url.query)
-
- sub_lang_list = {}
+ sub_lang_list = []
for lang in caption_translation_languages.split(','):
lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))
sub_lang = lang_qs.get('lc', [None])[0]
- if not sub_lang:
- continue
- sub_formats = []
- for ext in self._SUBTITLE_FORMATS:
- caption_qs.update({
- 'tlang': [sub_lang],
- 'fmt': [ext],
- })
- sub_url = compat_urlparse.urlunparse(parsed_caption_url._replace(
- query=compat_urllib_parse_urlencode(caption_qs, True)))
- sub_formats.append({
- 'url': sub_url,
- 'ext': ext,
- })
- sub_lang_list[sub_lang] = sub_formats
- return sub_lang_list
+ if sub_lang:
+ sub_lang_list.append(sub_lang)
+ return make_captions(caption_url, sub_lang_list)
# An extractor error can be raise by the download process if there are
# no automatic captions but there are subtitles
- except (KeyError, ExtractorError):
+ except (KeyError, IndexError, ExtractorError):
self._downloader.report_warning(err_msg)
return {}
playback_url, video_id, 'Marking watched',
'Unable to mark watched', fatal=False)
+ @staticmethod
+ def _extract_urls(webpage):
+ # Embedded YouTube player
+ entries = [
+ unescapeHTML(mobj.group('url'))
+ for mobj in re.finditer(r'''(?x)
+ (?:
+ <iframe[^>]+?src=|
+ data-video-url=|
+ <embed[^>]+?src=|
+ embedSWF\(?:\s*|
+ <object[^>]+data=|
+ new\s+SWFObject\(
+ )
+ (["\'])
+ (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
+ (?:embed|v|p)/.+?)
+ \1''', webpage)]
+
+ # lazyYT YouTube embed
+ entries.extend(list(map(
+ unescapeHTML,
+ re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
+
+ # Wordpress "YouTube Video Importer" plugin
+ matches = re.findall(r'''(?x)<div[^>]+
+ class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
+ data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
+ entries.extend(m[-1] for m in matches)
+
+ return entries
+
+ @staticmethod
+ def _extract_url(webpage):
+ urls = YoutubeIE._extract_urls(webpage)
+ return urls[0] if urls else None
+
@classmethod
def extract_id(cls, url):
mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
start_time = parse_duration(time_point)
if start_time is None:
continue
+ if start_time > duration:
+ break
end_time = (duration if next_num == len(chapter_lines)
else parse_duration(chapter_lines[next_num][1]))
if end_time is None:
continue
+ if end_time > duration:
+ end_time = duration
+ if start_time > end_time:
+ break
chapter_title = re.sub(
r'<a[^>]+>[^<]+</a>', '', chapter_line).strip(' \t-')
chapter_title = re.sub(r'\s+', ' ', chapter_title)
if dash_mpd and dash_mpd[0] not in dash_mpds:
dash_mpds.append(dash_mpd[0])
+ is_live = None
+ view_count = None
+
+ def extract_view_count(v_info):
+ return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))
+
# Get video info
embed_webpage = None
- is_live = None
if re.search(r'player-age-gate-content">', video_webpage) is not None:
age_gate = True
# We simulate the access to the video from www.youtube.com/v/{video_id}
else:
age_gate = False
video_info = None
+ sts = None
# Try looking directly into the video webpage
ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
if ytplayer_config:
args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
if args.get('livestream') == '1' or args.get('live_playback') == 1:
is_live = True
+ sts = ytplayer_config.get('sts')
if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
# We also try looking in get_video_info since it may contain different dashmpd
# URL that points to a DASH manifest with possibly different itag set (some itags
# The general idea is to take a union of itags of both DASH manifests (for example
# video with such 'manifest behavior' see https://github.com/rg3/youtube-dl/issues/6093)
self.report_video_info_webpage_download(video_id)
- for el_type in ['&el=info', '&el=embedded', '&el=detailpage', '&el=vevo', '']:
- video_info_url = (
- '%s://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
- % (proto, video_id, el_type))
+ for el in ('info', 'embedded', 'detailpage', 'vevo', ''):
+ query = {
+ 'video_id': video_id,
+ 'ps': 'default',
+ 'eurl': '',
+ 'gl': 'US',
+ 'hl': 'en',
+ }
+ if el:
+ query['el'] = el
+ if sts:
+ query['sts'] = sts
video_info_webpage = self._download_webpage(
- video_info_url,
+ '%s://www.youtube.com/get_video_info' % proto,
video_id, note=False,
- errnote='unable to download video info webpage')
+ errnote='unable to download video info webpage',
+ fatal=False, query=query)
+ if not video_info_webpage:
+ continue
get_video_info = compat_parse_qs(video_info_webpage)
- if get_video_info.get('use_cipher_signature') != ['True']:
- add_dash_mpd(get_video_info)
+ add_dash_mpd(get_video_info)
+ if view_count is None:
+ view_count = extract_view_count(get_video_info)
if not video_info:
video_info = get_video_info
if 'token' in get_video_info:
return self.playlist_result(entries, video_id, video_title, video_description)
self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
- if 'view_count' in video_info:
- view_count = int(video_info['view_count'][0])
- else:
- view_count = None
+ if view_count is None:
+ view_count = extract_view_count(video_info)
# Check for "rental" videos
if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
if not upload_date:
upload_date = self._search_regex(
[r'(?s)id="eow-date.*?>(.*?)</span>',
- r'id="watch-uploader-info".*?>.*?(?:Published|Uploaded|Streamed live|Started) on (.+?)</strong>'],
+ r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']'],
video_webpage, 'upload date', default=None)
- if upload_date:
- upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
upload_date = unified_strdate(upload_date)
video_license = self._html_search_regex(
video_webpage, 'license', default=None)
m_music = re.search(
- r'<h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*<ul[^>]*>\s*<li>(?P<title>.+?) by (?P<creator>.+?)(?:\(.+?\))?</li',
+ r'''(?x)
+ <h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*
+ <ul[^>]*>\s*
+ <li>(?P<title>.+?)
+ by (?P<creator>.+?)
+ (?:
+ \(.+?\)|
+ <a[^>]*
+ (?:
+ \bhref=["\']/red[^>]*>| # drop possible
+ >\s*Listen ad-free with YouTube Red # YouTube Red ad
+ )
+ .*?
+ )?</li
+ ''',
video_webpage)
if m_music:
video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))
format_id = url_data['itag'][0]
url = url_data['url'][0]
- if 'sig' in url_data:
- url += '&signature=' + url_data['sig'][0]
- elif 's' in url_data:
- encrypted_sig = url_data['s'][0]
+ if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True):
ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'
-
jsplayer_url_json = self._search_regex(
ASSETS_RE,
embed_webpage if age_gate else video_webpage,
video_webpage, 'age gate player URL')
player_url = json.loads(player_url_json)
+ if 'sig' in url_data:
+ url += '&signature=' + url_data['sig'][0]
+ elif 's' in url_data:
+ encrypted_sig = url_data['s'][0]
+
if self._downloader.params.get('verbose'):
if player_url is None:
player_version = 'unknown'
|
(%(playlist_id)s)
)""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
- _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s&disable_polymer=true'
+ _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
_VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&[^"]*?index=(?P<index>\d+)(?:[^>]+>(?P<title>[^<]+))?'
IE_NAME = 'youtube:playlist'
_TESTS = [{
from .version import __version__
+def _hide_login_info(opts):
+ PRIVATE_OPTS = set(['-p', '--password', '-u', '--username', '--video-password', '--ap-password', '--ap-username'])
+ eqre = re.compile('^(?P<key>' + ('|'.join(re.escape(po) for po in PRIVATE_OPTS)) + ')=.+$')
+
+ def _scrub_eq(o):
+ m = eqre.match(o)
+ if m:
+ return m.group('key') + '=PRIVATE'
+ else:
+ return o
+
+ opts = list(map(_scrub_eq, opts))
+ for idx, opt in enumerate(opts):
+ if opt in PRIVATE_OPTS and idx + 1 < len(opts):
+ opts[idx + 1] = 'PRIVATE'
+ return opts
+
+
def parseOpts(overrideArguments=None):
def _readOptions(filename_bytes, default=[]):
try:
def _comma_separated_values_options_callback(option, opt_str, value, parser):
setattr(parser.values, option.dest, value.split(','))
- def _hide_login_info(opts):
- PRIVATE_OPTS = ['-p', '--password', '-u', '--username', '--video-password', '--ap-password', '--ap-username']
- eqre = re.compile('^(?P<key>' + ('|'.join(re.escape(po) for po in PRIVATE_OPTS)) + ')=.+$')
-
- def _scrub_eq(o):
- m = eqre.match(o)
- if m:
- return m.group('key') + '=PRIVATE'
- else:
- return o
-
- opts = list(map(_scrub_eq, opts))
- for private_opt in PRIVATE_OPTS:
- try:
- i = opts.index(private_opt)
- opts[i + 1] = 'PRIVATE'
- except ValueError:
- pass
- return opts
-
# No need to wrap help messages if we're on a wide console
columns = compat_get_terminal_size().columns
max_width = columns if columns else 80
metavar='FILTER', dest='match_filter', default=None,
help=(
'Generic video filter. '
- 'Specify any key (see help for -o for a list of available keys) to '
+ 'Specify any key (see the "OUTPUT TEMPLATE" for a list of available keys) to '
'match if the key is present, '
'!key to check if the key is not present, '
'key > NUMBER (like "comment_count > 12", also works with '
verbosity.add_option(
'-j', '--dump-json',
action='store_true', dest='dumpjson', default=False,
- help='Simulate, quiet but print JSON information. See --output for a description of available keys.')
+ help='Simulate, quiet but print JSON information. See the "OUTPUT TEMPLATE" for a description of available keys.')
verbosity.add_option(
'-J', '--dump-single-json',
action='store_true', dest='dump_single_json', default=False,
postproc.add_option(
'--convert-subs', '--convert-subtitles',
metavar='FORMAT', dest='convertsubtitles', default=None,
- help='Convert the subtitles to other format (currently supported: srt|ass|vtt)')
+ help='Convert the subtitles to other format (currently supported: srt|ass|vtt|lrc)')
parser.add_option_group(general)
parser.add_option_group(network)
from .common import PostProcessor
from ..compat import compat_shlex_quote
-from ..utils import PostProcessingError
+from ..utils import (
+ encodeArgument,
+ PostProcessingError,
+)
class ExecAfterDownloadPP(PostProcessor):
cmd = cmd.replace('{}', compat_shlex_quote(information['filepath']))
self._downloader.to_screen('[exec] Executing command: %s' % cmd)
- retCode = subprocess.call(cmd, shell=True)
+ retCode = subprocess.call(encodeArgument(cmd), shell=True)
if retCode != 0:
raise PostProcessingError(
'Command returned error code %d' % retCode)
chapters = info.get('chapters', [])
if chapters:
- metadata_filename = encodeFilename(replace_extension(filename, 'meta'))
+ metadata_filename = replace_extension(filename, 'meta')
with io.open(metadata_filename, 'wt', encoding='utf-8') as f:
def ffmpeg_escape(text):
return re.sub(r'(=|;|#|\\|\n)', r'\\\1', text)
temp_filename = prepend_extension(filename, 'temp')
options = ['-c', 'copy', '-f', 'mp4', '-bsf:a', 'aac_adtstoasc']
- self._downloader.to_screen('[ffmpeg] Fixing malformated aac bitstream in "%s"' % filename)
+ self._downloader.to_screen('[ffmpeg] Fixing malformed AAC bitstream in "%s"' % filename)
self.run_ffmpeg(filename, temp_filename, options)
os.remove(encodeFilename(filename))
dfxp_file = old_file
srt_file = subtitles_filename(filename, lang, 'srt')
- with io.open(dfxp_file, 'rt', encoding='utf-8') as f:
+ with open(dfxp_file, 'rb') as f:
srt_data = dfxp2srt(f.read())
with io.open(srt_file, 'wt', encoding='utf-8') as f:
title = info['title']
match = re.match(self._titleregex, title)
if match is None:
- self._downloader.to_screen('[fromtitle] Could not interpret title of video as "%s"' % self._titleformat)
+ self._downloader.to_screen(
+ '[fromtitle] Could not interpret title of video as "%s"'
+ % self._titleformat)
return [], info
for attribute, value in match.groupdict().items():
- value = match.group(attribute)
info[attribute] = value
- self._downloader.to_screen('[fromtitle] parsed ' + attribute + ': ' + value)
+ self._downloader.to_screen(
+ '[fromtitle] parsed %s: %s'
+ % (attribute, value if value is not None else 'NA'))
return [], info
import math
import operator
import os
-import pipes
import platform
import random
import re
import zlib
from .compat import (
+ compat_HTMLParseError,
compat_HTMLParser,
compat_basestring,
compat_chr,
retlist = []
for m in re.finditer(r'''(?xs)
<([a-zA-Z0-9:._-]+)
- (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'))*?
+ (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
\s+%s=['"]?%s['"]?
- (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'))*?
+ (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
\s*>
(?P<content>.*?)
</\1>
but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
"""
parser = HTMLAttributeParser()
- parser.feed(html_element)
- parser.close()
+ try:
+ parser.feed(html_element)
+ parser.close()
+ # Older Python may throw HTMLParseError in case of malformed HTML
+ except compat_HTMLParseError:
+ pass
return parser.attrs
assert type(s) == compat_str
return re.sub(
- r'&([^;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
+ r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
def get_subprocess_encoding():
except zlib.error:
return zlib.decompress(data)
- @staticmethod
- def addinfourl_wrapper(stream, headers, url, code):
- if hasattr(compat_urllib_request.addinfourl, 'getcode'):
- return compat_urllib_request.addinfourl(stream, headers, url, code)
- ret = compat_urllib_request.addinfourl(stream, headers, url)
- ret.code = code
- return ret
-
def http_request(self, req):
# According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
# always respected by websites, some tend to give out URLs with non percent-encoded
break
else:
raise original_ioerror
- resp = self.addinfourl_wrapper(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
+ resp = compat_urllib_request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
resp.msg = old_resp.msg
del resp.headers['Content-encoding']
# deflate
if resp.headers.get('Content-encoding', '') == 'deflate':
gz = io.BytesIO(self.deflate(resp.read()))
- resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
+ resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
resp.msg = old_resp.msg
del resp.headers['Content-encoding']
# Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
if date_str is None:
return None
- date_str = date_str.replace(',', ' ')
+ date_str = re.sub(r'[,|]', '', date_str)
pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
timezone, date_str = extract_timezone(date_str)
if isinstance(a, bytes):
# We may get a filename encoded with 'encodeFilename'
a = a.decode(encoding)
- quoted_args.append(pipes.quote(a))
+ quoted_args.append(compat_shlex_quote(a))
return ' '.join(quoted_args)
return default
+def bool_or_none(v, default=None):
+ return v if isinstance(v, bool) else default
+
+
def strip_or_none(v):
return None if v is None else v.strip()
def strip_jsonp(code):
return re.sub(
- r'(?s)^[a-zA-Z0-9_.$]+\s*\(\s*(.*)\);?\s*?(?://[^\n]*)*$', r'\1', code)
+ r'''(?sx)^
+ (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]+)
+ (?:\s*&&\s*(?P=func_name))?
+ \s*\(\s*(?P<callback_data>.*)\);?
+ \s*?(?://[^\n]*)*$''',
+ r'\g<callback_data>', code)
def js_to_json(code):
def dfxp2srt(dfxp_data):
+ '''
+ @param dfxp_data A bytes-like object containing DFXP data
+ @returns A unicode object containing converted SRT data
+ '''
LEGACY_NAMESPACES = (
- ('http://www.w3.org/ns/ttml', [
- 'http://www.w3.org/2004/11/ttaf1',
- 'http://www.w3.org/2006/04/ttaf1',
- 'http://www.w3.org/2006/10/ttaf1',
+ (b'http://www.w3.org/ns/ttml', [
+ b'http://www.w3.org/2004/11/ttaf1',
+ b'http://www.w3.org/2006/04/ttaf1',
+ b'http://www.w3.org/2006/10/ttaf1',
]),
- ('http://www.w3.org/ns/ttml#styling', [
- 'http://www.w3.org/ns/ttml#style',
+ (b'http://www.w3.org/ns/ttml#styling', [
+ b'http://www.w3.org/ns/ttml#style',
]),
)
for ns in v:
dfxp_data = dfxp_data.replace(ns, k)
- dfxp = compat_etree_fromstring(dfxp_data.encode('utf-8'))
+ dfxp = compat_etree_fromstring(dfxp_data)
out = []
paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
param = params.get(param)
+ if param is None:
+ return []
assert isinstance(param, bool)
if separator:
return [command_option + separator + (true_value if param else false_value)]
from __future__ import unicode_literals
-__version__ = '2017.05.18.1'
+__version__ = '2017.09.24'