]> Raphaël G. Git Repositories - youtubedl/commitdiff
New upstream version 2017.02.24.1
authorRogério Brito <rbrito@ime.usp.br>
Sat, 25 Feb 2017 00:07:40 +0000 (21:07 -0300)
committerRogério Brito <rbrito@ime.usp.br>
Sat, 25 Feb 2017 00:07:40 +0000 (21:07 -0300)
113 files changed:
ChangeLog
README.md
README.txt
devscripts/make_lazy_extractors.py
devscripts/run_tests.sh [new file with mode: 0755]
docs/supportedsites.md
setup.py
test/test_YoutubeDL.py
test/test_download.py
test/test_utils.py
youtube-dl
youtube-dl.1
youtube-dl.bash-completion
youtube-dl.fish
youtube-dl.zsh
youtube_dl/YoutubeDL.py
youtube_dl/__init__.py
youtube_dl/compat.py
youtube_dl/downloader/dash.py
youtube_dl/downloader/external.py
youtube_dl/downloader/ism.py
youtube_dl/extractor/adobepass.py
youtube_dl/extractor/aenetworks.py
youtube_dl/extractor/amcnetworks.py
youtube_dl/extractor/archiveorg.py
youtube_dl/extractor/bbc.py
youtube_dl/extractor/bellmedia.py
youtube_dl/extractor/bloomberg.py
youtube_dl/extractor/brightcove.py
youtube_dl/extractor/ceskatelevize.py
youtube_dl/extractor/common.py
youtube_dl/extractor/commonmistakes.py
youtube_dl/extractor/corus.py [new file with mode: 0644]
youtube_dl/extractor/crackle.py
youtube_dl/extractor/crunchyroll.py
youtube_dl/extractor/dailymotion.py
youtube_dl/extractor/disney.py
youtube_dl/extractor/dramafever.py
youtube_dl/extractor/einthusan.py
youtube_dl/extractor/ellentv.py
youtube_dl/extractor/elpais.py
youtube_dl/extractor/extractors.py
youtube_dl/extractor/facebook.py
youtube_dl/extractor/generic.py
youtube_dl/extractor/go.py
youtube_dl/extractor/heise.py
youtube_dl/extractor/hgtv.py
youtube_dl/extractor/hotstar.py
youtube_dl/extractor/instagram.py
youtube_dl/extractor/iprima.py
youtube_dl/extractor/iqiyi.py
youtube_dl/extractor/itv.py
youtube_dl/extractor/ivi.py
youtube_dl/extractor/jwplatform.py
youtube_dl/extractor/kaltura.py
youtube_dl/extractor/leeco.py
youtube_dl/extractor/lemonde.py
youtube_dl/extractor/limelight.py
youtube_dl/extractor/lynda.py
youtube_dl/extractor/metacafe.py
youtube_dl/extractor/mgtv.py
youtube_dl/extractor/nbc.py
youtube_dl/extractor/ninecninemedia.py
youtube_dl/extractor/noco.py
youtube_dl/extractor/nrk.py
youtube_dl/extractor/ondemandkorea.py
youtube_dl/extractor/onet.py
youtube_dl/extractor/openload.py
youtube_dl/extractor/pbs.py
youtube_dl/extractor/pinkbike.py
youtube_dl/extractor/pluralsight.py
youtube_dl/extractor/pornhub.py
youtube_dl/extractor/pornoxo.py
youtube_dl/extractor/prosiebensat1.py
youtube_dl/extractor/rentv.py
youtube_dl/extractor/rudo.py
youtube_dl/extractor/screencastomatic.py
youtube_dl/extractor/scrippsnetworks.py [new file with mode: 0644]
youtube_dl/extractor/sendtonews.py
youtube_dl/extractor/sixplay.py
youtube_dl/extractor/skylinewebcams.py [new file with mode: 0644]
youtube_dl/extractor/sohu.py
youtube_dl/extractor/spankbang.py
youtube_dl/extractor/sprout.py [new file with mode: 0644]
youtube_dl/extractor/srgssr.py
youtube_dl/extractor/svt.py
youtube_dl/extractor/telequebec.py
youtube_dl/extractor/tfo.py
youtube_dl/extractor/theplatform.py
youtube_dl/extractor/thescene.py
youtube_dl/extractor/thisav.py
youtube_dl/extractor/tubitv.py
youtube_dl/extractor/tv4.py
youtube_dl/extractor/tvn24.py [new file with mode: 0644]
youtube_dl/extractor/tvnoe.py
youtube_dl/extractor/tvplayer.py [new file with mode: 0644]
youtube_dl/extractor/twentyfourvideo.py
youtube_dl/extractor/vbox7.py
youtube_dl/extractor/vgtv.py
youtube_dl/extractor/vice.py
youtube_dl/extractor/viceland.py
youtube_dl/extractor/vidzi.py
youtube_dl/extractor/viewster.py
youtube_dl/extractor/viki.py
youtube_dl/extractor/vodpl.py [new file with mode: 0644]
youtube_dl/extractor/wimp.py
youtube_dl/extractor/xtube.py
youtube_dl/extractor/youtube.py
youtube_dl/extractor/zdf.py
youtube_dl/options.py
youtube_dl/postprocessor/ffmpeg.py
youtube_dl/utils.py
youtube_dl/version.py

index 7e2afaacf411c744af2ba54530fb28ab91d8b338..add8a6758a4d46a916fe77a59599c14b5c873157 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,178 @@
+version 2017.02.24.1
+
+Extractors
+* [noco] Modernize
+* [noco] Switch login URL to https (#12246)
++ [thescene] Extract more metadata
+* [thescene] Fix extraction (#12235)
++ [tubitv] Use geo bypass mechanism
+* [openload] Fix extraction (#10408)
++ [ivi] Raise GeoRestrictedError
+
+
+version 2017.02.24
+
+Core
+* [options] Hide deprecated options from --help
+* [options] Deprecate --autonumber-size
++ [YoutubeDL] Add support for string formatting operations in output template
+  (#5185, #5748, #6841, #9929, #9966 #9978, #12189)
+
+Extractors
++ [lynda:course] Add webpage extraction fallback (#12238)
+* [go] Sign all uplynk URLs and use geo bypass only for free videos
+  (#12087, #12210)
++ [skylinewebcams] Add support for skylinewebcams.com (#12221)
++ [instagram] Add support for multi video posts (#12226)
++ [crunchyroll] Extract playlist entries ids
+* [mgtv] Fix extraction
++ [sohu] Raise GeoRestrictedError
++ [leeco] Raise GeoRestrictedError and use geo bypass mechanism
+
+
+version 2017.02.22
+
+Extractors
+* [crunchyroll] Fix descriptions with double quotes (#12124)
+* [dailymotion] Make comment count optional (#12209)
++ [vidzi] Add support for vidzi.cc (#12213)
++ [24video] Add support for 24video.tube (#12217)
++ [crackle] Use geo bypass mechanism
++ [viewster] Use geo verification headers
++ [tfo] Improve geo restriction detection and use geo bypass mechanism
++ [telequebec] Use geo bypass mechanism
++ [limelight] Extract PlaylistService errors and improve geo restriction
+  detection
+
+
+version 2017.02.21
+
+Core
+* [extractor/common] Allow calling _initialize_geo_bypass from extractors
+  (#11970)
++ [adobepass] Add support for Time Warner Cable (#12191)
++ [travis] Run tests in parallel
++ [downloader/ism] Honor HTTP headers when downloading fragments
++ [downloader/dash] Honor HTTP headers when downloading fragments
++ [utils] Add GeoUtils class for working with geo tools and GeoUtils.random_ipv4
++ Add option --geo-bypass-country for explicit geo bypass on behalf of
+  specified country
++ Add options to control geo bypass mechanism --geo-bypass and --no-geo-bypass
++ Add experimental geo restriction bypass mechanism based on faking
+  X-Forwarded-For HTTP header
++ [utils] Introduce GeoRestrictedError for geo restricted videos
++ [utils] Introduce YoutubeDLError base class for all youtube-dl exceptions
+
+Extractors
++ [ninecninemedia] Use geo bypass mechanism
+* [spankbang] Make uploader optional (#12193)
++ [iprima] Improve geo restriction detection and disable geo bypass
+* [iprima] Modernize
+* [commonmistakes] Disable UnicodeBOM extractor test for python 3.2
++ [prosiebensat1] Throw ExtractionError on unsupported page type (#12180)
+* [nrk] Update _API_HOST and relax _VALID_URL
++ [tv4] Bypass geo restriction and improve detection
+* [tv4] Switch to hls3 protocol (#12177)
++ [viki] Improve geo restriction detection
++ [vgtv] Improve geo restriction detection
++ [srgssr] Improve geo restriction detection
++ [vbox7] Improve geo restriction detection and use geo bypass mechanism
++ [svt] Improve geo restriction detection and use geo bypass mechanism
++ [pbs] Improve geo restriction detection and use geo bypass mechanism
++ [ondemandkorea] Improve geo restriction detection and use geo bypass mechanism
++ [nrk] Improve geo restriction detection and use geo bypass mechanism
++ [itv] Improve geo restriction detection and use geo bypass mechanism
++ [go] Improve geo restriction detection and use geo bypass mechanism
++ [dramafever] Improve geo restriction detection and use geo bypass mechanism
+* [brightcove:legacy] Restrict videoPlayer value (#12040)
++ [tvn24] Add support for tvn24.pl and tvn24bis.pl (#11679)
++ [thisav] Add support for HTML5 media (#11771)
+* [metacafe] Bypass family filter (#10371)
+* [viceland] Improve info extraction
+
+
+version 2017.02.17
+
+Extractors
+* [heise] Improve extraction (#9725)
+* [ellentv] Improve (#11653)
+* [openload] Fix extraction (#10408, #12002)
++ [theplatform] Recognize URLs with whitespaces (#12044)
+* [einthusan] Relax URL regular expression (#12141, #12159)
++ [generic] Support complex JWPlayer embedded videos (#12030)
+* [elpais] Improve extraction (#12139)
+
+
+version 2017.02.16
+
+Core
++ [utils] Add support for quoted string literals in --match-filter (#8050,
+  #12142, #12144)
+
+Extractors
+* [ceskatelevize] Lower priority for audio description sources (#12119)
+* [amcnetworks] Fix extraction (#12127)
+* [pinkbike] Fix uploader extraction (#12054)
++ [onetpl] Add support for businessinsider.com.pl and plejada.pl
++ [onetpl] Add support for onet.pl (#10507)
++ [onetmvp] Add shortcut extractor
++ [vodpl] Add support for vod.pl (#12122)
++ [pornhub] Extract video URL from tv platform site (#12007, #12129)
++ [ceskatelevize] Extract DASH formats (#12119, #12133)
+
+
+version 2017.02.14
+
+Core
+* TypeError is fixed with Python 2.7.13 on Windows (#11540, #12085)
+
+Extractor
+* [zdf] Fix extraction (#12117)
+* [xtube] Fix extraction for both kinds of video id (#12088)
+* [xtube] Improve title extraction (#12088)
++ [lemonde] Fallback delegate extraction to generic extractor (#12115, #12116)
+* [bellmedia] Allow video id longer than 6 characters (#12114)
++ [limelight] Add support for referer protected videos
+* [disney] Improve extraction (#4975, #11000, #11882, #11936)
+* [hotstar] Improve extraction (#12096)
+* [einthusan] Fix extraction (#11416)
++ [aenetworks] Add support for lifetimemovieclub.com (#12097)
+* [youtube] Fix parsing codecs (#12091)
+
+
+version 2017.02.11
+
+Core
++ [utils] Introduce get_elements_by_class and get_elements_by_attribute
+  utility functions
++ [extractor/common] Skip m3u8 manifests protected with Adobe Flash Access
+
+Extractor
+* [pluralsight:course] Fix extraction (#12075)
++ [bbc] Extract m3u8 formats with 320k audio
+* [facebook] Relax video id matching (#11017, #12055, #12056)
++ [corus] Add support for Corus Entertainment sites (#12060, #9164)
++ [pluralsight] Detect blocked account error message (#12070)
++ [bloomberg] Add another video id pattern (#12062)
+* [extractor/commonmistakes] Restrict URL regular expression (#12050)
++ [tvplayer] Add support for tvplayer.com
+
+
+version 2017.02.10
+
+Extractors
+* [xtube] Fix extraction (#12023)
+* [pornhub] Fix extraction (#12007, #12018)
+* [facebook] Improve JS data regular expression (#12042)
+* [kaltura] Improve embed partner id extraction (#12041)
++ [sprout] Add support for sproutonline.com
+* [6play] Improve extraction
++ [scrippsnetworks:watch] Add support for Scripps Networks sites (#10765)
++ [go] Add support for Adobe Pass authentication (#11468, #10831)
+* [6play] Fix extraction (#12011)
++ [nbc] Add support for Adobe Pass authentication (#12006)
+
+
 version 2017.02.07
 
 Core
index 89876bd7adcaab93bc30268f3b77632412ba47d7..0fc5984dc4781ae46b35afdd1d7c8e4426562274 100644 (file)
--- a/README.md
+++ b/README.md
@@ -99,11 +99,21 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
     --source-address IP              Client-side IP address to bind to
     -4, --force-ipv4                 Make all connections via IPv4
     -6, --force-ipv6                 Make all connections via IPv6
+
+## Geo Restriction:
     --geo-verification-proxy URL     Use this proxy to verify the IP address for
                                      some geo-restricted sites. The default
                                      proxy specified by --proxy (or none, if the
                                      options is not present) is used for the
                                      actual downloading.
+    --geo-bypass                     Bypass geographic restriction via faking
+                                     X-Forwarded-For HTTP header (experimental)
+    --no-geo-bypass                  Do not bypass geographic restriction via
+                                     faking X-Forwarded-For HTTP header
+                                     (experimental)
+    --geo-bypass-country CODE        Force bypass geographic restriction with
+                                     explicitly provided two-letter ISO 3166-2
+                                     country code (experimental)
 
 ## Video Selection:
     --playlist-start NUMBER          Playlist video to start at (default is 1)
@@ -137,20 +147,22 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
     --match-filter FILTER            Generic video filter. Specify any key (see
                                      help for -o for a list of available keys)
                                      to match if the key is present, !key to
-                                     check if the key is not present,key >
+                                     check if the key is not present, key >
                                      NUMBER (like "comment_count > 12", also
                                      works with >=, <, <=, !=, =) to compare
-                                     against a number, and & to require multiple
-                                     matches. Values which are not known are
-                                     excluded unless you put a question mark (?)
-                                     after the operator.For example, to only
-                                     match videos that have been liked more than
-                                     100 times and disliked less than 50 times
-                                     (or the dislike functionality is not
-                                     available at the given service), but who
-                                     also have a description, use --match-filter
-                                     "like_count > 100 & dislike_count <? 50 &
-                                     description" .
+                                     against a number, key = 'LITERAL' (like
+                                     "uploader = 'Mike Smith'", also works with
+                                     !=) to match against a string literal and &
+                                     to require multiple matches. Values which
+                                     are not known are excluded unless you put a
+                                     question mark (?) after the operator. For
+                                     example, to only match videos that have
+                                     been liked more than 100 times and disliked
+                                     less than 50 times (or the dislike
+                                     functionality is not available at the given
+                                     service), but who also have a description,
+                                     use --match-filter "like_count > 100 &
+                                     dislike_count <? 50 & description" .
     --no-playlist                    Download only the video, if the URL refers
                                      to a video and a playlist.
     --yes-playlist                   Download the playlist, if the URL refers to
@@ -205,21 +217,11 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
     --id                             Use only video ID in file name
     -o, --output TEMPLATE            Output filename template, see the "OUTPUT
                                      TEMPLATE" for all the info
-    --autonumber-size NUMBER         Specify the number of digits in
-                                     %(autonumber)s when it is present in output
-                                     filename template or --auto-number option
-                                     is given (default is 5)
     --autonumber-start NUMBER        Specify the start value for %(autonumber)s
                                      (default is 1)
     --restrict-filenames             Restrict filenames to only ASCII
                                      characters, and avoid "&" and spaces in
                                      filenames
-    -A, --auto-number                [deprecated; use -o
-                                     "%(autonumber)s-%(title)s.%(ext)s" ] Number
-                                     downloaded files starting from 00000
-    -t, --title                      [deprecated] Use title in file name
-                                     (default)
-    -l, --literal                    [deprecated] Alias of --title
     -w, --no-overwrites              Do not overwrite files
     -c, --continue                   Force resume of partially downloaded files.
                                      By default, youtube-dl will resume
@@ -474,87 +476,89 @@ The `-o` option allows users to indicate a template for the output file names.
 
 **tl;dr:** [navigate me to examples](#output-template-examples).
 
-The basic usage is not to set any template arguments when downloading a single file, like in `youtube-dl -o funny_video.flv "http://some/video"`. However, it may contain special sequences that will be replaced when downloading each video. The special sequences have the format `%(NAME)s`. To clarify, that is a percent symbol followed by a name in parentheses, followed by a lowercase S. Allowed names are:
-
- - `id`: Video identifier
- - `title`: Video title
- - `url`: Video URL
- - `ext`: Video filename extension
- - `alt_title`: A secondary title of the video
- - `display_id`: An alternative identifier for the video
- - `uploader`: Full name of the video uploader
- - `license`: License name the video is licensed under
- - `creator`: The creator of the video
- - `release_date`: The date (YYYYMMDD) when the video was released
- - `timestamp`: UNIX timestamp of the moment the video became available
- - `upload_date`: Video upload date (YYYYMMDD)
- - `uploader_id`: Nickname or id of the video uploader
- - `location`: Physical location where the video was filmed
- - `duration`: Length of the video in seconds
- - `view_count`: How many users have watched the video on the platform
- - `like_count`: Number of positive ratings of the video
- - `dislike_count`: Number of negative ratings of the video
- - `repost_count`: Number of reposts of the video
- - `average_rating`: Average rating give by users, the scale used depends on the webpage
- - `comment_count`: Number of comments on the video
- - `age_limit`: Age restriction for the video (years)
- - `format`: A human-readable description of the format 
- - `format_id`: Format code specified by `--format`
- - `format_note`: Additional info about the format
- - `width`: Width of the video
- - `height`: Height of the video
- - `resolution`: Textual description of width and height
- - `tbr`: Average bitrate of audio and video in KBit/s
- - `abr`: Average audio bitrate in KBit/s
- - `acodec`: Name of the audio codec in use
- - `asr`: Audio sampling rate in Hertz
- - `vbr`: Average video bitrate in KBit/s
- - `fps`: Frame rate
- - `vcodec`: Name of the video codec in use
- - `container`: Name of the container format
- - `filesize`: The number of bytes, if known in advance
- - `filesize_approx`: An estimate for the number of bytes
- - `protocol`: The protocol that will be used for the actual download
- - `extractor`: Name of the extractor
- - `extractor_key`: Key name of the extractor
- - `epoch`: Unix epoch when creating the file
- - `autonumber`: Five-digit number that will be increased with each download, starting at zero
- - `playlist`: Name or id of the playlist that contains the video
- - `playlist_index`: Index of the video in the playlist padded with leading zeros according to the total length of the playlist
- - `playlist_id`: Playlist identifier
- - `playlist_title`: Playlist title
+The basic usage is not to set any template arguments when downloading a single file, like in `youtube-dl -o funny_video.flv "http://some/video"`. However, it may contain special sequences that will be replaced when downloading each video. The special sequences may be formatted according to [python string formatting operations](https://docs.python.org/2/library/stdtypes.html#string-formatting). For example, `%(NAME)s` or `%(NAME)05d`. To clarify, that is a percent symbol followed by a name in parentheses, followed by a formatting operations. Allowed names along with sequence type are:
+
+ - `id` (string): Video identifier
+ - `title` (string): Video title
+ - `url` (string): Video URL
+ - `ext` (string): Video filename extension
+ - `alt_title` (string): A secondary title of the video
+ - `display_id` (string): An alternative identifier for the video
+ - `uploader` (string): Full name of the video uploader
+ - `license` (string): License name the video is licensed under
+ - `creator` (string): The creator of the video
+ - `release_date` (string): The date (YYYYMMDD) when the video was released
+ - `timestamp` (numeric): UNIX timestamp of the moment the video became available
+ - `upload_date` (string): Video upload date (YYYYMMDD)
+ - `uploader_id` (string): Nickname or id of the video uploader
+ - `location` (string): Physical location where the video was filmed
+ - `duration` (numeric): Length of the video in seconds
+ - `view_count` (numeric): How many users have watched the video on the platform
+ - `like_count` (numeric): Number of positive ratings of the video
+ - `dislike_count` (numeric): Number of negative ratings of the video
+ - `repost_count` (numeric): Number of reposts of the video
+ - `average_rating` (numeric): Average rating give by users, the scale used depends on the webpage
+ - `comment_count` (numeric): Number of comments on the video
+ - `age_limit` (numeric): Age restriction for the video (years)
+ - `format` (string): A human-readable description of the format 
+ - `format_id` (string): Format code specified by `--format`
+ - `format_note` (string): Additional info about the format
+ - `width` (numeric): Width of the video
+ - `height` (numeric): Height of the video
+ - `resolution` (string): Textual description of width and height
+ - `tbr` (numeric): Average bitrate of audio and video in KBit/s
+ - `abr` (numeric): Average audio bitrate in KBit/s
+ - `acodec` (string): Name of the audio codec in use
+ - `asr` (numeric): Audio sampling rate in Hertz
+ - `vbr` (numeric): Average video bitrate in KBit/s
+ - `fps` (numeric): Frame rate
+ - `vcodec` (string): Name of the video codec in use
+ - `container` (string): Name of the container format
+ - `filesize` (numeric): The number of bytes, if known in advance
+ - `filesize_approx` (numeric): An estimate for the number of bytes
+ - `protocol` (string): The protocol that will be used for the actual download
+ - `extractor` (string): Name of the extractor
+ - `extractor_key` (string): Key name of the extractor
+ - `epoch` (numeric): Unix epoch when creating the file
+ - `autonumber` (numeric): Five-digit number that will be increased with each download, starting at zero
+ - `playlist` (string): Name or id of the playlist that contains the video
+ - `playlist_index` (numeric): Index of the video in the playlist padded with leading zeros according to the total length of the playlist
+ - `playlist_id` (string): Playlist identifier
+ - `playlist_title` (string): Playlist title
 
 
 Available for the video that belongs to some logical chapter or section:
- - `chapter`: Name or title of the chapter the video belongs to
- - `chapter_number`: Number of the chapter the video belongs to
- - `chapter_id`: Id of the chapter the video belongs to
+ - `chapter` (string): Name or title of the chapter the video belongs to
+ - `chapter_number` (numeric): Number of the chapter the video belongs to
+ - `chapter_id` (string): Id of the chapter the video belongs to
 
 Available for the video that is an episode of some series or programme:
- - `series`: Title of the series or programme the video episode belongs to
- - `season`: Title of the season the video episode belongs to
- - `season_number`: Number of the season the video episode belongs to
- - `season_id`: Id of the season the video episode belongs to
- - `episode`: Title of the video episode
- - `episode_number`: Number of the video episode within a season
- - `episode_id`: Id of the video episode
+ - `series` (string): Title of the series or programme the video episode belongs to
+ - `season` (string): Title of the season the video episode belongs to
+ - `season_number` (numeric): Number of the season the video episode belongs to
+ - `season_id` (string): Id of the season the video episode belongs to
+ - `episode` (string): Title of the video episode
+ - `episode_number` (numeric): Number of the video episode within a season
+ - `episode_id` (string): Id of the video episode
 
 Available for the media that is a track or a part of a music album:
- - `track`: Title of the track
- - `track_number`: Number of the track within an album or a disc
- - `track_id`: Id of the track
- - `artist`: Artist(s) of the track
- - `genre`: Genre(s) of the track
- - `album`: Title of the album the track belongs to
- - `album_type`: Type of the album
- - `album_artist`: List of all artists appeared on the album
- - `disc_number`: Number of the disc or other physical medium the track belongs to
- - `release_year`: Year (YYYY) when the album was released
+ - `track` (string): Title of the track
+ - `track_number` (numeric): Number of the track within an album or a disc
+ - `track_id` (string): Id of the track
+ - `artist` (string): Artist(s) of the track
+ - `genre` (string): Genre(s) of the track
+ - `album` (string): Title of the album the track belongs to
+ - `album_type` (string): Type of the album
+ - `album_artist` (string): List of all artists appeared on the album
+ - `disc_number` (numeric): Number of the disc or other physical medium the track belongs to
+ - `release_year` (numeric): Year (YYYY) when the album was released
 
 Each aforementioned sequence when referenced in an output template will be replaced by the actual value corresponding to the sequence name. Note that some of the sequences are not guaranteed to be present since they depend on the metadata obtained by a particular extractor. Such sequences will be replaced with `NA`.
 
 For example for `-o %(title)s-%(id)s.%(ext)s` and an mp4 video with title `youtube-dl test video` and id `BaW_jenozKcj`, this will result in a `youtube-dl test video-BaW_jenozKcj.mp4` file created in the current directory.
 
+For numeric sequences you can use numeric related formatting, for example, `%(view_count)05d` will result in a string with view count padded with zeros up to 5 characters, like in `00042`.
+
 Output templates can also contain arbitrary hierarchical path, e.g. `-o '%(playlist)s/%(playlist_index)s - %(title)s.%(ext)s'` which will result in downloading each video in a directory corresponding to this path template. Any missing directory will be automatically created for you.
 
 To use percent literals in an output template use `%%`. To output to stdout use `-o -`.
index 24d43149bf85ef1ccf1a0b96e114ec06aec10ad3..0d89a04729d2854eacc93564ad365fa05c6cd608 100644 (file)
@@ -122,11 +122,23 @@ Network Options:
     --source-address IP              Client-side IP address to bind to
     -4, --force-ipv4                 Make all connections via IPv4
     -6, --force-ipv6                 Make all connections via IPv6
+
+
+Geo Restriction:
+
     --geo-verification-proxy URL     Use this proxy to verify the IP address for
                                      some geo-restricted sites. The default
                                      proxy specified by --proxy (or none, if the
                                      options is not present) is used for the
                                      actual downloading.
+    --geo-bypass                     Bypass geographic restriction via faking
+                                     X-Forwarded-For HTTP header (experimental)
+    --no-geo-bypass                  Do not bypass geographic restriction via
+                                     faking X-Forwarded-For HTTP header
+                                     (experimental)
+    --geo-bypass-country CODE        Force bypass geographic restriction with
+                                     explicitly provided two-letter ISO 3166-2
+                                     country code (experimental)
 
 
 Video Selection:
@@ -162,20 +174,22 @@ Video Selection:
     --match-filter FILTER            Generic video filter. Specify any key (see
                                      help for -o for a list of available keys)
                                      to match if the key is present, !key to
-                                     check if the key is not present,key >
+                                     check if the key is not present, key >
                                      NUMBER (like "comment_count > 12", also
                                      works with >=, <, <=, !=, =) to compare
-                                     against a number, and & to require multiple
-                                     matches. Values which are not known are
-                                     excluded unless you put a question mark (?)
-                                     after the operator.For example, to only
-                                     match videos that have been liked more than
-                                     100 times and disliked less than 50 times
-                                     (or the dislike functionality is not
-                                     available at the given service), but who
-                                     also have a description, use --match-filter
-                                     "like_count > 100 & dislike_count <? 50 &
-                                     description" .
+                                     against a number, key = 'LITERAL' (like
+                                     "uploader = 'Mike Smith'", also works with
+                                     !=) to match against a string literal and &
+                                     to require multiple matches. Values which
+                                     are not known are excluded unless you put a
+                                     question mark (?) after the operator. For
+                                     example, to only match videos that have
+                                     been liked more than 100 times and disliked
+                                     less than 50 times (or the dislike
+                                     functionality is not available at the given
+                                     service), but who also have a description,
+                                     use --match-filter "like_count > 100 &
+                                     dislike_count <? 50 & description" .
     --no-playlist                    Download only the video, if the URL refers
                                      to a video and a playlist.
     --yes-playlist                   Download the playlist, if the URL refers to
@@ -234,21 +248,11 @@ Filesystem Options:
     --id                             Use only video ID in file name
     -o, --output TEMPLATE            Output filename template, see the "OUTPUT
                                      TEMPLATE" for all the info
-    --autonumber-size NUMBER         Specify the number of digits in
-                                     %(autonumber)s when it is present in output
-                                     filename template or --auto-number option
-                                     is given (default is 5)
     --autonumber-start NUMBER        Specify the start value for %(autonumber)s
                                      (default is 1)
     --restrict-filenames             Restrict filenames to only ASCII
                                      characters, and avoid "&" and spaces in
                                      filenames
-    -A, --auto-number                [deprecated; use -o
-                                     "%(autonumber)s-%(title)s.%(ext)s" ] Number
-                                     downloaded files starting from 00000
-    -t, --title                      [deprecated] Use title in file name
-                                     (default)
-    -l, --literal                    [deprecated] Alias of --title
     -w, --no-overwrites              Do not overwrite files
     -c, --continue                   Force resume of partially downloaded files.
                                      By default, youtube-dl will resume
@@ -550,82 +554,91 @@ TL;DR: navigate me to examples.
 The basic usage is not to set any template arguments when downloading a
 single file, like in youtube-dl -o funny_video.flv "http://some/video".
 However, it may contain special sequences that will be replaced when
-downloading each video. The special sequences have the format %(NAME)s.
-To clarify, that is a percent symbol followed by a name in parentheses,
-followed by a lowercase S. Allowed names are:
-
--   id: Video identifier
--   title: Video title
--   url: Video URL
--   ext: Video filename extension
--   alt_title: A secondary title of the video
--   display_id: An alternative identifier for the video
--   uploader: Full name of the video uploader
--   license: License name the video is licensed under
--   creator: The creator of the video
--   release_date: The date (YYYYMMDD) when the video was released
--   timestamp: UNIX timestamp of the moment the video became available
--   upload_date: Video upload date (YYYYMMDD)
--   uploader_id: Nickname or id of the video uploader
--   location: Physical location where the video was filmed
--   duration: Length of the video in seconds
--   view_count: How many users have watched the video on the platform
--   like_count: Number of positive ratings of the video
--   dislike_count: Number of negative ratings of the video
--   repost_count: Number of reposts of the video
--   average_rating: Average rating give by users, the scale used depends
-    on the webpage
--   comment_count: Number of comments on the video
--   age_limit: Age restriction for the video (years)
--   format: A human-readable description of the format
--   format_id: Format code specified by --format
--   format_note: Additional info about the format
--   width: Width of the video
--   height: Height of the video
--   resolution: Textual description of width and height
--   tbr: Average bitrate of audio and video in KBit/s
--   abr: Average audio bitrate in KBit/s
--   acodec: Name of the audio codec in use
--   asr: Audio sampling rate in Hertz
--   vbr: Average video bitrate in KBit/s
--   fps: Frame rate
--   vcodec: Name of the video codec in use
--   container: Name of the container format
--   filesize: The number of bytes, if known in advance
--   filesize_approx: An estimate for the number of bytes
--   protocol: The protocol that will be used for the actual download
--   extractor: Name of the extractor
--   extractor_key: Key name of the extractor
--   epoch: Unix epoch when creating the file
--   autonumber: Five-digit number that will be increased with each
-    download, starting at zero
--   playlist: Name or id of the playlist that contains the video
--   playlist_index: Index of the video in the playlist padded with
-    leading zeros according to the total length of the playlist
--   playlist_id: Playlist identifier
--   playlist_title: Playlist title
+downloading each video. The special sequences may be formatted according
+to python string formatting operations. For example, %(NAME)s or
+%(NAME)05d. To clarify, that is a percent symbol followed by a name in
+parentheses, followed by a formatting operations. Allowed names along
+with sequence type are:
+
+-   id (string): Video identifier
+-   title (string): Video title
+-   url (string): Video URL
+-   ext (string): Video filename extension
+-   alt_title (string): A secondary title of the video
+-   display_id (string): An alternative identifier for the video
+-   uploader (string): Full name of the video uploader
+-   license (string): License name the video is licensed under
+-   creator (string): The creator of the video
+-   release_date (string): The date (YYYYMMDD) when the video was
+    released
+-   timestamp (numeric): UNIX timestamp of the moment the video became
+    available
+-   upload_date (string): Video upload date (YYYYMMDD)
+-   uploader_id (string): Nickname or id of the video uploader
+-   location (string): Physical location where the video was filmed
+-   duration (numeric): Length of the video in seconds
+-   view_count (numeric): How many users have watched the video on the
+    platform
+-   like_count (numeric): Number of positive ratings of the video
+-   dislike_count (numeric): Number of negative ratings of the video
+-   repost_count (numeric): Number of reposts of the video
+-   average_rating (numeric): Average rating give by users, the scale
+    used depends on the webpage
+-   comment_count (numeric): Number of comments on the video
+-   age_limit (numeric): Age restriction for the video (years)
+-   format (string): A human-readable description of the format
+-   format_id (string): Format code specified by --format
+-   format_note (string): Additional info about the format
+-   width (numeric): Width of the video
+-   height (numeric): Height of the video
+-   resolution (string): Textual description of width and height
+-   tbr (numeric): Average bitrate of audio and video in KBit/s
+-   abr (numeric): Average audio bitrate in KBit/s
+-   acodec (string): Name of the audio codec in use
+-   asr (numeric): Audio sampling rate in Hertz
+-   vbr (numeric): Average video bitrate in KBit/s
+-   fps (numeric): Frame rate
+-   vcodec (string): Name of the video codec in use
+-   container (string): Name of the container format
+-   filesize (numeric): The number of bytes, if known in advance
+-   filesize_approx (numeric): An estimate for the number of bytes
+-   protocol (string): The protocol that will be used for the actual
+    download
+-   extractor (string): Name of the extractor
+-   extractor_key (string): Key name of the extractor
+-   epoch (numeric): Unix epoch when creating the file
+-   autonumber (numeric): Five-digit number that will be increased with
+    each download, starting at zero
+-   playlist (string): Name or id of the playlist that contains the
+    video
+-   playlist_index (numeric): Index of the video in the playlist padded
+    with leading zeros according to the total length of the playlist
+-   playlist_id (string): Playlist identifier
+-   playlist_title (string): Playlist title
 
 Available for the video that belongs to some logical chapter or section:
-- chapter: Name or title of the chapter the video belongs to -
-chapter_number: Number of the chapter the video belongs to - chapter_id:
-Id of the chapter the video belongs to
+- chapter (string): Name or title of the chapter the video belongs to -
+chapter_number (numeric): Number of the chapter the video belongs to -
+chapter_id (string): Id of the chapter the video belongs to
 
 Available for the video that is an episode of some series or programme:
-- series: Title of the series or programme the video episode belongs to
-- season: Title of the season the video episode belongs to -
-season_number: Number of the season the video episode belongs to -
-season_id: Id of the season the video episode belongs to - episode:
-Title of the video episode - episode_number: Number of the video episode
-within a season - episode_id: Id of the video episode
+- series (string): Title of the series or programme the video episode
+belongs to - season (string): Title of the season the video episode
+belongs to - season_number (numeric): Number of the season the video
+episode belongs to - season_id (string): Id of the season the video
+episode belongs to - episode (string): Title of the video episode -
+episode_number (numeric): Number of the video episode within a season -
+episode_id (string): Id of the video episode
 
 Available for the media that is a track or a part of a music album: -
-track: Title of the track - track_number: Number of the track within an
-album or a disc - track_id: Id of the track - artist: Artist(s) of the
-track - genre: Genre(s) of the track - album: Title of the album the
-track belongs to - album_type: Type of the album - album_artist: List of
-all artists appeared on the album - disc_number: Number of the disc or
-other physical medium the track belongs to - release_year: Year (YYYY)
-when the album was released
+track (string): Title of the track - track_number (numeric): Number of
+the track within an album or a disc - track_id (string): Id of the track
+- artist (string): Artist(s) of the track - genre (string): Genre(s) of
+the track - album (string): Title of the album the track belongs to -
+album_type (string): Type of the album - album_artist (string): List of
+all artists appeared on the album - disc_number (numeric): Number of the
+disc or other physical medium the track belongs to - release_year
+(numeric): Year (YYYY) when the album was released
 
 Each aforementioned sequence when referenced in an output template will
 be replaced by the actual value corresponding to the sequence name. Note
@@ -638,6 +651,10 @@ youtube-dl test video and id BaW_jenozKcj, this will result in a
 youtube-dl test video-BaW_jenozKcj.mp4 file created in the current
 directory.
 
+For numeric sequences you can use numeric related formatting, for
+example, %(view_count)05d will result in a string with view count padded
+with zeros up to 5 characters, like in 00042.
+
 Output templates can also contain arbitrary hierarchical path, e.g.
 -o '%(playlist)s/%(playlist_index)s - %(title)s.%(ext)s' which will
 result in downloading each video in a directory corresponding to this
index 19114d30d1aa59e394e0c35e7ec9446eb4969c56..0a1762dbce85adf9049529ec15bdb61510787d0a 100644 (file)
@@ -1,6 +1,7 @@
 from __future__ import unicode_literals, print_function
 
 from inspect import getsource
+import io
 import os
 from os.path import dirname as dirn
 import sys
@@ -95,5 +96,5 @@ module_contents.append(
 
 module_src = '\n'.join(module_contents) + '\n'
 
-with open(lazy_extractors_filename, 'wt') as f:
+with io.open(lazy_extractors_filename, 'wt', encoding='utf-8') as f:
     f.write(module_src)
diff --git a/devscripts/run_tests.sh b/devscripts/run_tests.sh
new file mode 100755 (executable)
index 0000000..6ba2672
--- /dev/null
@@ -0,0 +1,21 @@
+#!/bin/bash
+
+DOWNLOAD_TESTS="age_restriction|download|subtitles|write_annotations|iqiyi_sdk_interpreter|youtube_lists"
+
+test_set=""
+multiprocess_args=""
+
+case "$YTDL_TEST_SET" in
+    core)
+        test_set="-I test_($DOWNLOAD_TESTS)\.py"
+    ;;
+    download)
+        test_set="-I test_(?!$DOWNLOAD_TESTS).+\.py"
+        multiprocess_args="--processes=4 --process-timeout=540"
+    ;;
+    *)
+        break
+    ;;
+esac
+
+nosetests test --verbose $test_set $multiprocess_args
index 2d82cc321cbbc4e608382ab0160a4bda50b282fe..f973973312318e3524e59b30871a84607fc79a35 100644 (file)
@@ -11,6 +11,7 @@
  - **4tube**
  - **56.com**
  - **5min**
+ - **6play**
  - **8tracks**
  - **91porn**
  - **9c9media**
  - **ComedyCentralShortname**
  - **ComedyCentralTV**
  - **CondeNast**: Condé Nast media group: Allure, Architectural Digest, Ars Technica, Bon Appétit, Brides, Condé Nast, Condé Nast Traveler, Details, Epicurious, GQ, Glamour, Golf Digest, SELF, Teen Vogue, The New Yorker, Vanity Fair, Vogue, W Magazine, WIRED
+ - **Corus**
  - **Coub**
  - **Cracked**
  - **Crackle**
  - **HellPorno**
  - **Helsinki**: helsinki.fi
  - **HentaiStigma**
- - **HGTV**
  - **hgtv.com:show**
  - **HistoricFilms**
  - **history:topic**: History.com Topic
  - **OktoberfestTV**
  - **on.aol.com**
  - **OnDemandKorea**
+ - **onet.pl**
  - **onet.tv**
  - **onet.tv:channel**
+ - **OnetMVP**
  - **OnionStudios**
  - **Ooyala**
  - **OoyalaExternal**
  - **screen.yahoo:search**: Yahoo screen search
  - **Screencast**
  - **ScreencastOMatic**
+ - **scrippsnetworks:watch**
  - **Seeker**
  - **SenateISVP**
  - **SendtoNews**
  - **Shared**: shared.sx
  - **ShowRoomLive**
  - **Sina**
- - **SixPlay**
+ - **SkylineWebcams**
  - **skynewsarabia:article**
  - **skynewsarabia:video**
  - **SkySports**
  - **SportBoxEmbed**
  - **SportDeutschland**
  - **Sportschau**
+ - **Sprout**
  - **sr:mediathek**: Saarländischer Rundfunk
  - **SRGSSR**
  - **SRGSSRPlay**: srf.ch, rts.ch, rsi.ch, rtr.ch and swissinfo.ch play sites
  - **TVCArticle**
  - **tvigle**: Интернет-телевидение Tvigle.ru
  - **tvland.com**
+ - **TVN24**
  - **TVNoe**
  - **tvp**: Telewizja Polska
  - **tvp:embed**: Telewizja Polska
  - **tvp:series**
+ - **TVPlayer**
  - **Tweakers**
  - **twitch:chapter**
  - **twitch:clips**
  - **vlive**
  - **vlive:channel**
  - **Vodlocker**
+ - **VODPl**
  - **VODPlatform**
  - **VoiceRepublic**
  - **VoxMedia**
index ce6dd1870bc52951d268f96aa4dc68ea6f92e04d..67d6633ed6f8301f6fdd31a079a6e4dd1ce0b6b3 100644 (file)
--- a/setup.py
+++ b/setup.py
@@ -107,8 +107,8 @@ setup(
     url='https://github.com/rg3/youtube-dl',
     author='Ricardo Garcia',
     author_email='ytdl@yt-dl.org',
-    maintainer='Philipp Hagemeister',
-    maintainer_email='phihag@phihag.de',
+    maintainer='Sergey M.',
+    maintainer_email='dstftw@gmail.com',
     packages=[
         'youtube_dl',
         'youtube_dl.extractor', 'youtube_dl.downloader',
@@ -130,6 +130,7 @@ setup(
         'Programming Language :: Python :: 3.3',
         'Programming Language :: Python :: 3.4',
         'Programming Language :: Python :: 3.5',
+        'Programming Language :: Python :: 3.6',
     ],
 
     cmdclass={'build_lazy_extractors': build_lazy_extractors},
index 8bf00bea9818f6b91fa7b91e89fcbbf8a6cc0dd3..8491a88bd2f4730cd1af4e39d936966d6e1d3bb1 100644 (file)
@@ -1,4 +1,5 @@
 #!/usr/bin/env python
+# coding: utf-8
 
 from __future__ import unicode_literals
 
@@ -525,6 +526,7 @@ class TestYoutubeDL(unittest.TestCase):
             'id': '1234',
             'ext': 'mp4',
             'width': None,
+            'height': 1080,
         }
 
         def fname(templ):
@@ -534,16 +536,29 @@ class TestYoutubeDL(unittest.TestCase):
         self.assertEqual(fname('%(id)s-%(width)s.%(ext)s'), '1234-NA.mp4')
         # Replace missing fields with 'NA'
         self.assertEqual(fname('%(uploader_date)s-%(id)s.%(ext)s'), 'NA-1234.mp4')
+        self.assertEqual(fname('%(height)d.%(ext)s'), '1080.mp4')
+        self.assertEqual(fname('%(height)6d.%(ext)s'), '  1080.mp4')
+        self.assertEqual(fname('%(height)-6d.%(ext)s'), '1080  .mp4')
+        self.assertEqual(fname('%(height)06d.%(ext)s'), '001080.mp4')
+        self.assertEqual(fname('%(height) 06d.%(ext)s'), ' 01080.mp4')
+        self.assertEqual(fname('%(height)   06d.%(ext)s'), ' 01080.mp4')
+        self.assertEqual(fname('%(height)0 6d.%(ext)s'), ' 01080.mp4')
+        self.assertEqual(fname('%(height)0   6d.%(ext)s'), ' 01080.mp4')
+        self.assertEqual(fname('%(height)   0   6d.%(ext)s'), ' 01080.mp4')
+        self.assertEqual(fname('%%(height)06d.%(ext)s'), '%(height)06d.mp4')
+        self.assertEqual(fname('%(width)06d.%(ext)s'), 'NA.mp4')
+        self.assertEqual(fname('%(width)06d.%%(ext)s'), 'NA.%(ext)s')
+        self.assertEqual(fname('%%(width)06d.%(ext)s'), '%(width)06d.mp4')
 
     def test_format_note(self):
         ydl = YoutubeDL()
         self.assertEqual(ydl._format_note({}), '')
         assertRegexpMatches(self, ydl._format_note({
             'vbr': 10,
-        }), '^\s*10k$')
+        }), r'^\s*10k$')
         assertRegexpMatches(self, ydl._format_note({
             'fps': 30,
-        }), '^30fps$')
+        }), r'^30fps$')
 
     def test_postprocessors(self):
         filename = 'post-processor-testfile.mp4'
@@ -606,6 +621,8 @@ class TestYoutubeDL(unittest.TestCase):
             'duration': 30,
             'filesize': 10 * 1024,
             'playlist_id': '42',
+            'uploader': "變態妍字幕版 太妍 тест",
+            'creator': "тест ' 123 ' тест--",
         }
         second = {
             'id': '2',
@@ -616,6 +633,7 @@ class TestYoutubeDL(unittest.TestCase):
             'description': 'foo',
             'filesize': 5 * 1024,
             'playlist_id': '43',
+            'uploader': "тест 123",
         }
         videos = [first, second]
 
@@ -656,6 +674,26 @@ class TestYoutubeDL(unittest.TestCase):
         res = get_videos(f)
         self.assertEqual(res, ['1'])
 
+        f = match_filter_func('uploader = "變態妍字幕版 太妍 тест"')
+        res = get_videos(f)
+        self.assertEqual(res, ['1'])
+
+        f = match_filter_func('uploader != "變態妍字幕版 太妍 тест"')
+        res = get_videos(f)
+        self.assertEqual(res, ['2'])
+
+        f = match_filter_func('creator = "тест \' 123 \' тест--"')
+        res = get_videos(f)
+        self.assertEqual(res, ['1'])
+
+        f = match_filter_func("creator = 'тест \\' 123 \\' тест--'")
+        res = get_videos(f)
+        self.assertEqual(res, ['1'])
+
+        f = match_filter_func(r"creator = 'тест \' 123 \' тест--' & duration > 30")
+        res = get_videos(f)
+        self.assertEqual(res, [])
+
     def test_playlist_items_selection(self):
         entries = [{
             'id': compat_str(i),
index 4639529897967ebc49883e488f5624a038c70c44..30034f9782410b1f0e9300916fb101e02f42050c 100644 (file)
@@ -65,6 +65,10 @@ defs = gettestcases()
 
 
 class TestDownload(unittest.TestCase):
+    # Parallel testing in nosetests. See
+    # http://nose.readthedocs.org/en/latest/doc_tests/test_multiprocess/multiprocess.html
+    _multiprocess_shared_ = True
+
     maxDiff = None
 
     def setUp(self):
@@ -73,7 +77,7 @@ class TestDownload(unittest.TestCase):
 # Dynamically generate tests
 
 
-def generator(test_case):
+def generator(test_case, tname):
 
     def test_template(self):
         ie = youtube_dl.extractor.get_info_extractor(test_case['name'])
@@ -102,6 +106,7 @@ def generator(test_case):
                 return
 
         params = get_params(test_case.get('params', {}))
+        params['outtmpl'] = tname + '_' + params['outtmpl']
         if is_playlist and 'playlist' not in test_case:
             params.setdefault('extract_flat', 'in_playlist')
             params.setdefault('skip_download', True)
@@ -146,7 +151,7 @@ def generator(test_case):
                         raise
 
                     if try_num == RETRIES:
-                        report_warning('Failed due to network errors, skipping...')
+                        report_warning('%s failed due to network errors, skipping...' % tname)
                         return
 
                     print('Retrying: {0} failed tries\n\n##########\n\n'.format(try_num))
@@ -221,12 +226,12 @@ def generator(test_case):
 
 # And add them to TestDownload
 for n, test_case in enumerate(defs):
-    test_method = generator(test_case)
     tname = 'test_' + str(test_case['name'])
     i = 1
     while hasattr(TestDownload, tname):
         tname = 'test_%s_%d' % (test_case['name'], i)
         i += 1
+    test_method = generator(test_case, tname)
     test_method.__name__ = str(tname)
     setattr(TestDownload, test_method.__name__, test_method)
     del test_method
index edc712f0741576c852be2b528f95dcf81f309bfc..3cdb21d409b97c4dec4a1c205960ae133642c620 100644 (file)
@@ -34,6 +34,9 @@ from youtube_dl.utils import (
     find_xpath_attr,
     fix_xml_ampersands,
     get_element_by_class,
+    get_element_by_attribute,
+    get_elements_by_class,
+    get_elements_by_attribute,
     InAdvancePagedList,
     intlist_to_bytes,
     is_html,
@@ -1124,6 +1127,32 @@ The first line
         self.assertEqual(get_element_by_class('foo', html), 'nice')
         self.assertEqual(get_element_by_class('no-such-class', html), None)
 
+    def test_get_element_by_attribute(self):
+        html = '''
+            <span class="foo bar">nice</span>
+        '''
+
+        self.assertEqual(get_element_by_attribute('class', 'foo bar', html), 'nice')
+        self.assertEqual(get_element_by_attribute('class', 'foo', html), None)
+        self.assertEqual(get_element_by_attribute('class', 'no-such-foo', html), None)
+
+    def test_get_elements_by_class(self):
+        html = '''
+            <span class="foo bar">nice</span><span class="foo bar">also nice</span>
+        '''
+
+        self.assertEqual(get_elements_by_class('foo', html), ['nice', 'also nice'])
+        self.assertEqual(get_elements_by_class('no-such-class', html), [])
+
+    def test_get_elements_by_attribute(self):
+        html = '''
+            <span class="foo bar">nice</span><span class="foo bar">also nice</span>
+        '''
+
+        self.assertEqual(get_elements_by_attribute('class', 'foo bar', html), ['nice', 'also nice'])
+        self.assertEqual(get_elements_by_attribute('class', 'foo', html), [])
+        self.assertEqual(get_elements_by_attribute('class', 'no-such-foo', html), [])
+
 
 if __name__ == '__main__':
     unittest.main()
index 2b3870d61762bd1c1cb731d06e7882f1b272a60d..bc236d23fe71b2a738cbb0fcbdff3ec91534a010 100755 (executable)
Binary files a/youtube-dl and b/youtube-dl differ
index 38c41e25d298435f70a215988ffaaf5638e98a02..869d891d1d41a6e76bfefe2ca1c17bfbb313df2e 100644 (file)
@@ -138,6 +138,7 @@ Make all connections via IPv4
 Make all connections via IPv6
 .RS
 .RE
+.SS Geo Restriction:
 .TP
 .B \-\-geo\-verification\-proxy \f[I]URL\f[]
 Use this proxy to verify the IP address for some geo\-restricted sites.
@@ -145,6 +146,24 @@ The default proxy specified by \-\-proxy (or none, if the options is not
 present) is used for the actual downloading.
 .RS
 .RE
+.TP
+.B \-\-geo\-bypass
+Bypass geographic restriction via faking X\-Forwarded\-For HTTP header
+(experimental)
+.RS
+.RE
+.TP
+.B \-\-no\-geo\-bypass
+Do not bypass geographic restriction via faking X\-Forwarded\-For HTTP
+header (experimental)
+.RS
+.RE
+.TP
+.B \-\-geo\-bypass\-country \f[I]CODE\f[]
+Force bypass geographic restriction with explicitly provided two\-letter
+ISO 3166\-2 country code (experimental)
+.RS
+.RE
 .SS Video Selection:
 .TP
 .B \-\-playlist\-start \f[I]NUMBER\f[]
@@ -224,15 +243,17 @@ Do not download any videos with more than COUNT views
 .B \-\-match\-filter \f[I]FILTER\f[]
 Generic video filter.
 Specify any key (see help for \-o for a list of available keys) to match
-if the key is present, !key to check if the key is not present,key >
+if the key is present, !key to check if the key is not present, key >
 NUMBER (like "comment_count > 12", also works with >=, <, <=, !=, =) to
-compare against a number, and & to require multiple matches.
+compare against a number, key = \[aq]LITERAL\[aq] (like "uploader =
+\[aq]Mike Smith\[aq]", also works with !=) to match against a string
+literal and & to require multiple matches.
 Values which are not known are excluded unless you put a question mark
-(?) after the operator.For example, to only match videos that have been
-liked more than 100 times and disliked less than 50 times (or the
-dislike functionality is not available at the given service), but who
-also have a description, use \-\-match\-filter "like_count > 100 &
-dislike_count <?
+(?) after the operator.
+For example, to only match videos that have been liked more than 100
+times and disliked less than 50 times (or the dislike functionality is
+not available at the given service), but who also have a description,
+use \-\-match\-filter "like_count > 100 & dislike_count <?
 50 & description" .
 .RS
 .RE
@@ -362,13 +383,6 @@ Output filename template, see the "OUTPUT TEMPLATE" for all the info
 .RS
 .RE
 .TP
-.B \-\-autonumber\-size \f[I]NUMBER\f[]
-Specify the number of digits in %(autonumber)s when it is present in
-output filename template or \-\-auto\-number option is given (default is
-5)
-.RS
-.RE
-.TP
 .B \-\-autonumber\-start \f[I]NUMBER\f[]
 Specify the start value for %(autonumber)s (default is 1)
 .RS
@@ -380,22 +394,6 @@ filenames
 .RS
 .RE
 .TP
-.B \-A, \-\-auto\-number
-[deprecated; use \-o "%(autonumber)s\-%(title)s.%(ext)s" ] Number
-downloaded files starting from 00000
-.RS
-.RE
-.TP
-.B \-t, \-\-title
-[deprecated] Use title in file name (default)
-.RS
-.RE
-.TP
-.B \-l, \-\-literal
-[deprecated] Alias of \-\-title
-.RS
-.RE
-.TP
 .B \-w, \-\-no\-overwrites
 Do not overwrite files
 .RS
@@ -995,135 +993,145 @@ single file, like in
 \f[C]youtube\-dl\ \-o\ funny_video.flv\ "http://some/video"\f[].
 However, it may contain special sequences that will be replaced when
 downloading each video.
-The special sequences have the format \f[C]%(NAME)s\f[].
+The special sequences may be formatted according to python string
+formatting
+operations (https://docs.python.org/2/library/stdtypes.html#string-formatting).
+For example, \f[C]%(NAME)s\f[] or \f[C]%(NAME)05d\f[].
 To clarify, that is a percent symbol followed by a name in parentheses,
-followed by a lowercase S.
-Allowed names are:
+followed by a formatting operations.
+Allowed names along with sequence type are:
 .IP \[bu] 2
-\f[C]id\f[]: Video identifier
+\f[C]id\f[] (string): Video identifier
 .IP \[bu] 2
-\f[C]title\f[]: Video title
+\f[C]title\f[] (string): Video title
 .IP \[bu] 2
-\f[C]url\f[]: Video URL
+\f[C]url\f[] (string): Video URL
 .IP \[bu] 2
-\f[C]ext\f[]: Video filename extension
+\f[C]ext\f[] (string): Video filename extension
 .IP \[bu] 2
-\f[C]alt_title\f[]: A secondary title of the video
+\f[C]alt_title\f[] (string): A secondary title of the video
 .IP \[bu] 2
-\f[C]display_id\f[]: An alternative identifier for the video
+\f[C]display_id\f[] (string): An alternative identifier for the video
 .IP \[bu] 2
-\f[C]uploader\f[]: Full name of the video uploader
+\f[C]uploader\f[] (string): Full name of the video uploader
 .IP \[bu] 2
-\f[C]license\f[]: License name the video is licensed under
+\f[C]license\f[] (string): License name the video is licensed under
 .IP \[bu] 2
-\f[C]creator\f[]: The creator of the video
+\f[C]creator\f[] (string): The creator of the video
 .IP \[bu] 2
-\f[C]release_date\f[]: The date (YYYYMMDD) when the video was released
+\f[C]release_date\f[] (string): The date (YYYYMMDD) when the video was
+released
 .IP \[bu] 2
-\f[C]timestamp\f[]: UNIX timestamp of the moment the video became
-available
+\f[C]timestamp\f[] (numeric): UNIX timestamp of the moment the video
+became available
 .IP \[bu] 2
-\f[C]upload_date\f[]: Video upload date (YYYYMMDD)
+\f[C]upload_date\f[] (string): Video upload date (YYYYMMDD)
 .IP \[bu] 2
-\f[C]uploader_id\f[]: Nickname or id of the video uploader
+\f[C]uploader_id\f[] (string): Nickname or id of the video uploader
 .IP \[bu] 2
-\f[C]location\f[]: Physical location where the video was filmed
+\f[C]location\f[] (string): Physical location where the video was filmed
 .IP \[bu] 2
-\f[C]duration\f[]: Length of the video in seconds
+\f[C]duration\f[] (numeric): Length of the video in seconds
 .IP \[bu] 2
-\f[C]view_count\f[]: How many users have watched the video on the
-platform
+\f[C]view_count\f[] (numeric): How many users have watched the video on
+the platform
 .IP \[bu] 2
-\f[C]like_count\f[]: Number of positive ratings of the video
+\f[C]like_count\f[] (numeric): Number of positive ratings of the video
 .IP \[bu] 2
-\f[C]dislike_count\f[]: Number of negative ratings of the video
+\f[C]dislike_count\f[] (numeric): Number of negative ratings of the
+video
 .IP \[bu] 2
-\f[C]repost_count\f[]: Number of reposts of the video
+\f[C]repost_count\f[] (numeric): Number of reposts of the video
 .IP \[bu] 2
-\f[C]average_rating\f[]: Average rating give by users, the scale used
-depends on the webpage
+\f[C]average_rating\f[] (numeric): Average rating give by users, the
+scale used depends on the webpage
 .IP \[bu] 2
-\f[C]comment_count\f[]: Number of comments on the video
+\f[C]comment_count\f[] (numeric): Number of comments on the video
 .IP \[bu] 2
-\f[C]age_limit\f[]: Age restriction for the video (years)
+\f[C]age_limit\f[] (numeric): Age restriction for the video (years)
 .IP \[bu] 2
-\f[C]format\f[]: A human\-readable description of the format
+\f[C]format\f[] (string): A human\-readable description of the format
 .IP \[bu] 2
-\f[C]format_id\f[]: Format code specified by \f[C]\-\-format\f[]
+\f[C]format_id\f[] (string): Format code specified by
+\f[C]\-\-format\f[]
 .IP \[bu] 2
-\f[C]format_note\f[]: Additional info about the format
+\f[C]format_note\f[] (string): Additional info about the format
 .IP \[bu] 2
-\f[C]width\f[]: Width of the video
+\f[C]width\f[] (numeric): Width of the video
 .IP \[bu] 2
-\f[C]height\f[]: Height of the video
+\f[C]height\f[] (numeric): Height of the video
 .IP \[bu] 2
-\f[C]resolution\f[]: Textual description of width and height
+\f[C]resolution\f[] (string): Textual description of width and height
 .IP \[bu] 2
-\f[C]tbr\f[]: Average bitrate of audio and video in KBit/s
+\f[C]tbr\f[] (numeric): Average bitrate of audio and video in KBit/s
 .IP \[bu] 2
-\f[C]abr\f[]: Average audio bitrate in KBit/s
+\f[C]abr\f[] (numeric): Average audio bitrate in KBit/s
 .IP \[bu] 2
-\f[C]acodec\f[]: Name of the audio codec in use
+\f[C]acodec\f[] (string): Name of the audio codec in use
 .IP \[bu] 2
-\f[C]asr\f[]: Audio sampling rate in Hertz
+\f[C]asr\f[] (numeric): Audio sampling rate in Hertz
 .IP \[bu] 2
-\f[C]vbr\f[]: Average video bitrate in KBit/s
+\f[C]vbr\f[] (numeric): Average video bitrate in KBit/s
 .IP \[bu] 2
-\f[C]fps\f[]: Frame rate
+\f[C]fps\f[] (numeric): Frame rate
 .IP \[bu] 2
-\f[C]vcodec\f[]: Name of the video codec in use
+\f[C]vcodec\f[] (string): Name of the video codec in use
 .IP \[bu] 2
-\f[C]container\f[]: Name of the container format
+\f[C]container\f[] (string): Name of the container format
 .IP \[bu] 2
-\f[C]filesize\f[]: The number of bytes, if known in advance
+\f[C]filesize\f[] (numeric): The number of bytes, if known in advance
 .IP \[bu] 2
-\f[C]filesize_approx\f[]: An estimate for the number of bytes
+\f[C]filesize_approx\f[] (numeric): An estimate for the number of bytes
 .IP \[bu] 2
-\f[C]protocol\f[]: The protocol that will be used for the actual
-download
+\f[C]protocol\f[] (string): The protocol that will be used for the
+actual download
 .IP \[bu] 2
-\f[C]extractor\f[]: Name of the extractor
+\f[C]extractor\f[] (string): Name of the extractor
 .IP \[bu] 2
-\f[C]extractor_key\f[]: Key name of the extractor
+\f[C]extractor_key\f[] (string): Key name of the extractor
 .IP \[bu] 2
-\f[C]epoch\f[]: Unix epoch when creating the file
+\f[C]epoch\f[] (numeric): Unix epoch when creating the file
 .IP \[bu] 2
-\f[C]autonumber\f[]: Five\-digit number that will be increased with each
-download, starting at zero
+\f[C]autonumber\f[] (numeric): Five\-digit number that will be increased
+with each download, starting at zero
 .IP \[bu] 2
-\f[C]playlist\f[]: Name or id of the playlist that contains the video
+\f[C]playlist\f[] (string): Name or id of the playlist that contains the
+video
 .IP \[bu] 2
-\f[C]playlist_index\f[]: Index of the video in the playlist padded with
-leading zeros according to the total length of the playlist
+\f[C]playlist_index\f[] (numeric): Index of the video in the playlist
+padded with leading zeros according to the total length of the playlist
 .IP \[bu] 2
-\f[C]playlist_id\f[]: Playlist identifier
+\f[C]playlist_id\f[] (string): Playlist identifier
 .IP \[bu] 2
-\f[C]playlist_title\f[]: Playlist title
+\f[C]playlist_title\f[] (string): Playlist title
 .PP
 Available for the video that belongs to some logical chapter or section:
-\- \f[C]chapter\f[]: Name or title of the chapter the video belongs to
-\- \f[C]chapter_number\f[]: Number of the chapter the video belongs to
-\- \f[C]chapter_id\f[]: Id of the chapter the video belongs to
+\- \f[C]chapter\f[] (string): Name or title of the chapter the video
+belongs to \- \f[C]chapter_number\f[] (numeric): Number of the chapter
+the video belongs to \- \f[C]chapter_id\f[] (string): Id of the chapter
+the video belongs to
 .PP
 Available for the video that is an episode of some series or programme:
-\- \f[C]series\f[]: Title of the series or programme the video episode
-belongs to \- \f[C]season\f[]: Title of the season the video episode
-belongs to \- \f[C]season_number\f[]: Number of the season the video
-episode belongs to \- \f[C]season_id\f[]: Id of the season the video
-episode belongs to \- \f[C]episode\f[]: Title of the video episode \-
-\f[C]episode_number\f[]: Number of the video episode within a season \-
-\f[C]episode_id\f[]: Id of the video episode
+\- \f[C]series\f[] (string): Title of the series or programme the video
+episode belongs to \- \f[C]season\f[] (string): Title of the season the
+video episode belongs to \- \f[C]season_number\f[] (numeric): Number of
+the season the video episode belongs to \- \f[C]season_id\f[] (string):
+Id of the season the video episode belongs to \- \f[C]episode\f[]
+(string): Title of the video episode \- \f[C]episode_number\f[]
+(numeric): Number of the video episode within a season \-
+\f[C]episode_id\f[] (string): Id of the video episode
 .PP
 Available for the media that is a track or a part of a music album: \-
-\f[C]track\f[]: Title of the track \- \f[C]track_number\f[]: Number of
-the track within an album or a disc \- \f[C]track_id\f[]: Id of the
-track \- \f[C]artist\f[]: Artist(s) of the track \- \f[C]genre\f[]:
-Genre(s) of the track \- \f[C]album\f[]: Title of the album the track
-belongs to \- \f[C]album_type\f[]: Type of the album \-
-\f[C]album_artist\f[]: List of all artists appeared on the album \-
-\f[C]disc_number\f[]: Number of the disc or other physical medium the
-track belongs to \- \f[C]release_year\f[]: Year (YYYY) when the album
-was released
+\f[C]track\f[] (string): Title of the track \- \f[C]track_number\f[]
+(numeric): Number of the track within an album or a disc \-
+\f[C]track_id\f[] (string): Id of the track \- \f[C]artist\f[] (string):
+Artist(s) of the track \- \f[C]genre\f[] (string): Genre(s) of the track
+\- \f[C]album\f[] (string): Title of the album the track belongs to \-
+\f[C]album_type\f[] (string): Type of the album \- \f[C]album_artist\f[]
+(string): List of all artists appeared on the album \-
+\f[C]disc_number\f[] (numeric): Number of the disc or other physical
+medium the track belongs to \- \f[C]release_year\f[] (numeric): Year
+(YYYY) when the album was released
 .PP
 Each aforementioned sequence when referenced in an output template will
 be replaced by the actual value corresponding to the sequence name.
@@ -1137,6 +1145,10 @@ with title \f[C]youtube\-dl\ test\ video\f[] and id
 \f[C]youtube\-dl\ test\ video\-BaW_jenozKcj.mp4\f[] file created in the
 current directory.
 .PP
+For numeric sequences you can use numeric related formatting, for
+example, \f[C]%(view_count)05d\f[] will result in a string with view
+count padded with zeros up to 5 characters, like in \f[C]00042\f[].
+.PP
 Output templates can also contain arbitrary hierarchical path, e.g.
 \f[C]\-o\ \[aq]%(playlist)s/%(playlist_index)s\ \-\ %(title)s.%(ext)s\[aq]\f[]
 which will result in downloading each video in a directory corresponding
index b9e0d2e5f3318dc5d6983a81ea84e017bb86cec4..cf81e2c46be137aeaf96db9a4d0f895b78dda730 100644 (file)
@@ -4,7 +4,7 @@ __youtube_dl()
     COMPREPLY=()
     cur="${COMP_WORDS[COMP_CWORD]}"
     prev="${COMP_WORDS[COMP_CWORD-1]}"
-    opts="--help --version --update --ignore-errors --abort-on-error --dump-user-agent --list-extractors --extractor-descriptions --force-generic-extractor --default-search --ignore-config --config-location --flat-playlist --mark-watched --no-mark-watched --no-color --proxy --socket-timeout --source-address --force-ipv4 --force-ipv6 --geo-verification-proxy --cn-verification-proxy --playlist-start --playlist-end --playlist-items --match-title --reject-title --max-downloads --min-filesize --max-filesize --date --datebefore --dateafter --min-views --max-views --match-filter --no-playlist --yes-playlist --age-limit --download-archive --include-ads --limit-rate --retries --fragment-retries --skip-unavailable-fragments --abort-on-unavailable-fragment --buffer-size --no-resize-buffer --test --playlist-reverse --playlist-random --xattr-set-filesize --hls-prefer-native --hls-prefer-ffmpeg --hls-use-mpegts --external-downloader --external-downloader-args --batch-file --id --output --autonumber-size --autonumber-start --restrict-filenames --auto-number --title --literal --no-overwrites --continue --no-continue --no-part --no-mtime --write-description --write-info-json --write-annotations --load-info-json --cookies --cache-dir --no-cache-dir --rm-cache-dir --write-thumbnail --write-all-thumbnails --list-thumbnails --quiet --no-warnings --simulate --skip-download --get-url --get-title --get-id --get-thumbnail --get-description --get-duration --get-filename --get-format --dump-json --dump-single-json --print-json --newline --no-progress --console-title --verbose --dump-pages --write-pages --youtube-print-sig-code --print-traffic --call-home --no-call-home --encoding --no-check-certificate --prefer-insecure --user-agent --referer --add-header --bidi-workaround --sleep-interval --max-sleep-interval --format --all-formats --prefer-free-formats --list-formats --youtube-include-dash-manifest --youtube-skip-dash-manifest --merge-output-format --write-sub --write-auto-sub --all-subs --list-subs --sub-format --sub-lang --username --password --twofactor --netrc --video-password --ap-mso --ap-username --ap-password --ap-list-mso --extract-audio --audio-format --audio-quality --recode-video --postprocessor-args --keep-video --no-post-overwrites --embed-subs --embed-thumbnail --add-metadata --metadata-from-title --xattrs --fixup --prefer-avconv --prefer-ffmpeg --ffmpeg-location --exec --convert-subs"
+    opts="--help --version --update --ignore-errors --abort-on-error --dump-user-agent --list-extractors --extractor-descriptions --force-generic-extractor --default-search --ignore-config --config-location --flat-playlist --mark-watched --no-mark-watched --no-color --proxy --socket-timeout --source-address --force-ipv4 --force-ipv6 --geo-verification-proxy --cn-verification-proxy --geo-bypass --no-geo-bypass --geo-bypass-country --playlist-start --playlist-end --playlist-items --match-title --reject-title --max-downloads --min-filesize --max-filesize --date --datebefore --dateafter --min-views --max-views --match-filter --no-playlist --yes-playlist --age-limit --download-archive --include-ads --limit-rate --retries --fragment-retries --skip-unavailable-fragments --abort-on-unavailable-fragment --buffer-size --no-resize-buffer --test --playlist-reverse --playlist-random --xattr-set-filesize --hls-prefer-native --hls-prefer-ffmpeg --hls-use-mpegts --external-downloader --external-downloader-args --batch-file --id --output --autonumber-size --autonumber-start --restrict-filenames --auto-number --title --literal --no-overwrites --continue --no-continue --no-part --no-mtime --write-description --write-info-json --write-annotations --load-info-json --cookies --cache-dir --no-cache-dir --rm-cache-dir --write-thumbnail --write-all-thumbnails --list-thumbnails --quiet --no-warnings --simulate --skip-download --get-url --get-title --get-id --get-thumbnail --get-description --get-duration --get-filename --get-format --dump-json --dump-single-json --print-json --newline --no-progress --console-title --verbose --dump-pages --write-pages --youtube-print-sig-code --print-traffic --call-home --no-call-home --encoding --no-check-certificate --prefer-insecure --user-agent --referer --add-header --bidi-workaround --sleep-interval --max-sleep-interval --format --all-formats --prefer-free-formats --list-formats --youtube-include-dash-manifest --youtube-skip-dash-manifest --merge-output-format --write-sub --write-auto-sub --all-subs --list-subs --sub-format --sub-lang --username --password --twofactor --netrc --video-password --ap-mso --ap-username --ap-password --ap-list-mso --extract-audio --audio-format --audio-quality --recode-video --postprocessor-args --keep-video --no-post-overwrites --embed-subs --embed-thumbnail --add-metadata --metadata-from-title --xattrs --fixup --prefer-avconv --prefer-ffmpeg --ffmpeg-location --exec --convert-subs"
     keywords=":ytfavorites :ytrecommended :ytsubscriptions :ytwatchlater :ythistory"
     fileopts="-a|--batch-file|--download-archive|--cookies|--load-info"
     diropts="--cache-dir"
index 067680fc3dea80a4609fd8ffc54c5e331bba998c..a94cf7a8b1a93ea301aecc846768dcd9fa69f37c 100644 (file)
@@ -22,6 +22,9 @@ complete --command youtube-dl --long-option force-ipv4 --short-option 4 --descri
 complete --command youtube-dl --long-option force-ipv6 --short-option 6 --description 'Make all connections via IPv6'
 complete --command youtube-dl --long-option geo-verification-proxy --description 'Use this proxy to verify the IP address for some geo-restricted sites. The default proxy specified by --proxy (or none, if the options is not present) is used for the actual downloading.'
 complete --command youtube-dl --long-option cn-verification-proxy
+complete --command youtube-dl --long-option geo-bypass --description 'Bypass geographic restriction via faking X-Forwarded-For HTTP header (experimental)'
+complete --command youtube-dl --long-option no-geo-bypass --description 'Do not bypass geographic restriction via faking X-Forwarded-For HTTP header (experimental)'
+complete --command youtube-dl --long-option geo-bypass-country --description 'Force bypass geographic restriction with explicitly provided two-letter ISO 3166-2 country code (experimental)'
 complete --command youtube-dl --long-option playlist-start --description 'Playlist video to start at (default is %default)'
 complete --command youtube-dl --long-option playlist-end --description 'Playlist video to end at (default is last)'
 complete --command youtube-dl --long-option playlist-items --description 'Playlist video items to download. Specify indices of the videos in the playlist separated by commas like: "--playlist-items 1,2,5,8" if you want to download videos indexed 1, 2, 5, 8 in the playlist. You can specify range: "--playlist-items 1-3,7,10-13", it will download the videos at index 1, 2, 3, 7, 10, 11, 12 and 13.'
@@ -35,7 +38,7 @@ complete --command youtube-dl --long-option datebefore --description 'Download o
 complete --command youtube-dl --long-option dateafter --description 'Download only videos uploaded on or after this date (i.e. inclusive)'
 complete --command youtube-dl --long-option min-views --description 'Do not download any videos with less than COUNT views'
 complete --command youtube-dl --long-option max-views --description 'Do not download any videos with more than COUNT views'
-complete --command youtube-dl --long-option match-filter --description 'Generic video filter. Specify any key (see help for -o for a list of available keys) to match if the key is present, !key to check if the key is not present,key > NUMBER (like "comment_count > 12", also works with >=, <, <=, !=, =) to compare against a number, and & to require multiple matches. Values which are not known are excluded unless you put a question mark (?) after the operator.For example, to only match videos that have been liked more than 100 times and disliked less than 50 times (or the dislike functionality is not available at the given service), but who also have a description, use --match-filter "like_count > 100 & dislike_count <? 50 & description" .'
+complete --command youtube-dl --long-option match-filter --description 'Generic video filter. Specify any key (see help for -o for a list of available keys) to match if the key is present, !key to check if the key is not present, key > NUMBER (like "comment_count > 12", also works with >=, <, <=, !=, =) to compare against a number, key = '"'"'LITERAL'"'"' (like "uploader = '"'"'Mike Smith'"'"'", also works with !=) to match against a string literal and & to require multiple matches. Values which are not known are excluded unless you put a question mark (?) after the operator. For example, to only match videos that have been liked more than 100 times and disliked less than 50 times (or the dislike functionality is not available at the given service), but who also have a description, use --match-filter "like_count > 100 & dislike_count <? 50 & description" .'
 complete --command youtube-dl --long-option no-playlist --description 'Download only the video, if the URL refers to a video and a playlist.'
 complete --command youtube-dl --long-option yes-playlist --description 'Download the playlist, if the URL refers to a video and a playlist.'
 complete --command youtube-dl --long-option age-limit --description 'Download only videos suitable for the given age'
@@ -60,12 +63,12 @@ complete --command youtube-dl --long-option external-downloader-args --descripti
 complete --command youtube-dl --long-option batch-file --short-option a --description 'File containing URLs to download ('"'"'-'"'"' for stdin)' --require-parameter
 complete --command youtube-dl --long-option id --description 'Use only video ID in file name'
 complete --command youtube-dl --long-option output --short-option o --description 'Output filename template, see the "OUTPUT TEMPLATE" for all the info'
-complete --command youtube-dl --long-option autonumber-size --description 'Specify the number of digits in %(autonumber)s when it is present in output filename template or --auto-number option is given (default is %default)'
+complete --command youtube-dl --long-option autonumber-size
 complete --command youtube-dl --long-option autonumber-start --description 'Specify the start value for %(autonumber)s (default is %default)'
 complete --command youtube-dl --long-option restrict-filenames --description 'Restrict filenames to only ASCII characters, and avoid "&" and spaces in filenames'
-complete --command youtube-dl --long-option auto-number --short-option A --description '[deprecated; use -o "%(autonumber)s-%(title)s.%(ext)s" ] Number downloaded files starting from 00000'
-complete --command youtube-dl --long-option title --short-option t --description '[deprecated] Use title in file name (default)'
-complete --command youtube-dl --long-option literal --short-option l --description '[deprecated] Alias of --title'
+complete --command youtube-dl --long-option auto-number --short-option A
+complete --command youtube-dl --long-option title --short-option t
+complete --command youtube-dl --long-option literal --short-option l
 complete --command youtube-dl --long-option no-overwrites --short-option w --description 'Do not overwrite files'
 complete --command youtube-dl --long-option continue --short-option c --description 'Force resume of partially downloaded files. By default, youtube-dl will resume downloads if possible.'
 complete --command youtube-dl --long-option no-continue --description 'Do not resume partially downloaded files (restart from beginning)'
index a0fe383f805be2d25f1d61dcf7e4a5df9e4d6166..6b060d7d1b24f369271d00ad51c1222dbb273372 100644 (file)
@@ -19,7 +19,7 @@ __youtube_dl() {
             elif [[ ${prev} == "--recode-video" ]]; then
                 _arguments '*: :(mp4 flv ogg webm mkv)'
             else
-                _arguments '*: :(--help --version --update --ignore-errors --abort-on-error --dump-user-agent --list-extractors --extractor-descriptions --force-generic-extractor --default-search --ignore-config --config-location --flat-playlist --mark-watched --no-mark-watched --no-color --proxy --socket-timeout --source-address --force-ipv4 --force-ipv6 --geo-verification-proxy --cn-verification-proxy --playlist-start --playlist-end --playlist-items --match-title --reject-title --max-downloads --min-filesize --max-filesize --date --datebefore --dateafter --min-views --max-views --match-filter --no-playlist --yes-playlist --age-limit --download-archive --include-ads --limit-rate --retries --fragment-retries --skip-unavailable-fragments --abort-on-unavailable-fragment --buffer-size --no-resize-buffer --test --playlist-reverse --playlist-random --xattr-set-filesize --hls-prefer-native --hls-prefer-ffmpeg --hls-use-mpegts --external-downloader --external-downloader-args --batch-file --id --output --autonumber-size --autonumber-start --restrict-filenames --auto-number --title --literal --no-overwrites --continue --no-continue --no-part --no-mtime --write-description --write-info-json --write-annotations --load-info-json --cookies --cache-dir --no-cache-dir --rm-cache-dir --write-thumbnail --write-all-thumbnails --list-thumbnails --quiet --no-warnings --simulate --skip-download --get-url --get-title --get-id --get-thumbnail --get-description --get-duration --get-filename --get-format --dump-json --dump-single-json --print-json --newline --no-progress --console-title --verbose --dump-pages --write-pages --youtube-print-sig-code --print-traffic --call-home --no-call-home --encoding --no-check-certificate --prefer-insecure --user-agent --referer --add-header --bidi-workaround --sleep-interval --max-sleep-interval --format --all-formats --prefer-free-formats --list-formats --youtube-include-dash-manifest --youtube-skip-dash-manifest --merge-output-format --write-sub --write-auto-sub --all-subs --list-subs --sub-format --sub-lang --username --password --twofactor --netrc --video-password --ap-mso --ap-username --ap-password --ap-list-mso --extract-audio --audio-format --audio-quality --recode-video --postprocessor-args --keep-video --no-post-overwrites --embed-subs --embed-thumbnail --add-metadata --metadata-from-title --xattrs --fixup --prefer-avconv --prefer-ffmpeg --ffmpeg-location --exec --convert-subs)'
+                _arguments '*: :(--help --version --update --ignore-errors --abort-on-error --dump-user-agent --list-extractors --extractor-descriptions --force-generic-extractor --default-search --ignore-config --config-location --flat-playlist --mark-watched --no-mark-watched --no-color --proxy --socket-timeout --source-address --force-ipv4 --force-ipv6 --geo-verification-proxy --cn-verification-proxy --geo-bypass --no-geo-bypass --geo-bypass-country --playlist-start --playlist-end --playlist-items --match-title --reject-title --max-downloads --min-filesize --max-filesize --date --datebefore --dateafter --min-views --max-views --match-filter --no-playlist --yes-playlist --age-limit --download-archive --include-ads --limit-rate --retries --fragment-retries --skip-unavailable-fragments --abort-on-unavailable-fragment --buffer-size --no-resize-buffer --test --playlist-reverse --playlist-random --xattr-set-filesize --hls-prefer-native --hls-prefer-ffmpeg --hls-use-mpegts --external-downloader --external-downloader-args --batch-file --id --output --autonumber-size --autonumber-start --restrict-filenames --auto-number --title --literal --no-overwrites --continue --no-continue --no-part --no-mtime --write-description --write-info-json --write-annotations --load-info-json --cookies --cache-dir --no-cache-dir --rm-cache-dir --write-thumbnail --write-all-thumbnails --list-thumbnails --quiet --no-warnings --simulate --skip-download --get-url --get-title --get-id --get-thumbnail --get-description --get-duration --get-filename --get-format --dump-json --dump-single-json --print-json --newline --no-progress --console-title --verbose --dump-pages --write-pages --youtube-print-sig-code --print-traffic --call-home --no-call-home --encoding --no-check-certificate --prefer-insecure --user-agent --referer --add-header --bidi-workaround --sleep-interval --max-sleep-interval --format --all-formats --prefer-free-formats --list-formats --youtube-include-dash-manifest --youtube-skip-dash-manifest --merge-output-format --write-sub --write-auto-sub --all-subs --list-subs --sub-format --sub-lang --username --password --twofactor --netrc --video-password --ap-mso --ap-username --ap-password --ap-list-mso --extract-audio --audio-format --audio-quality --recode-video --postprocessor-args --keep-video --no-post-overwrites --embed-subs --embed-thumbnail --add-metadata --metadata-from-title --xattrs --fixup --prefer-avconv --prefer-ffmpeg --ffmpeg-location --exec --convert-subs)'
             fi
         ;;
     esac
index a7bf5a1b06766094cc06e85fd40c6fa64c2cc64b..f7254560c04c87549cd65488408ce3ddfcd4bf5f 100755 (executable)
@@ -33,6 +33,7 @@ from .compat import (
     compat_get_terminal_size,
     compat_http_client,
     compat_kwargs,
+    compat_numeric_types,
     compat_os_name,
     compat_str,
     compat_tokenize_tokenize,
@@ -56,6 +57,8 @@ from .utils import (
     ExtractorError,
     format_bytes,
     formatSeconds,
+    GeoRestrictedError,
+    ISO3166Utils,
     locked_file,
     make_HTTPS_handler,
     MaxDownloadsReached,
@@ -272,6 +275,12 @@ class YoutubeDL(object):
                        If it returns None, the video is downloaded.
                        match_filter_func in utils.py is one example for this.
     no_color:          Do not emit color codes in output.
+    geo_bypass:        Bypass geographic restriction via faking X-Forwarded-For
+                       HTTP header (experimental)
+    geo_bypass_country:
+                       Two-letter ISO 3166-2 country code that will be used for
+                       explicit geographic restriction bypassing via faking
+                       X-Forwarded-For HTTP header (experimental)
 
     The following options determine which downloader is picked:
     external_downloader: Executable of the external downloader to call.
@@ -319,11 +328,21 @@ class YoutubeDL(object):
         self.params.update(params)
         self.cache = Cache(self)
 
-        if self.params.get('cn_verification_proxy') is not None:
-            self.report_warning('--cn-verification-proxy is deprecated. Use --geo-verification-proxy instead.')
+        def check_deprecated(param, option, suggestion):
+            if self.params.get(param) is not None:
+                self.report_warning(
+                    '%s is deprecated. Use %s instead.' % (option, suggestion))
+                return True
+            return False
+
+        if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
             if self.params.get('geo_verification_proxy') is None:
                 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
 
+        check_deprecated('autonumber_size', '--autonumber-size', 'output template with %(autonumber)0Nd, where N in the number of digits')
+        check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
+        check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
+
         if params.get('bidi_workaround', False):
             try:
                 import pty
@@ -585,10 +604,7 @@ class YoutubeDL(object):
             autonumber_size = self.params.get('autonumber_size')
             if autonumber_size is None:
                 autonumber_size = 5
-            autonumber_templ = '%0' + str(autonumber_size) + 'd'
-            template_dict['autonumber'] = autonumber_templ % (self.params.get('autonumber_start', 1) - 1 + self._num_downloads)
-            if template_dict.get('playlist_index') is not None:
-                template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
+            template_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
             if template_dict.get('resolution') is None:
                 if template_dict.get('width') and template_dict.get('height'):
                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
@@ -601,12 +617,61 @@ class YoutubeDL(object):
                 compat_str(v),
                 restricted=self.params.get('restrictfilenames'),
                 is_id=(k == 'id'))
-            template_dict = dict((k, sanitize(k, v))
+            template_dict = dict((k, v if isinstance(v, compat_numeric_types) else sanitize(k, v))
                                  for k, v in template_dict.items()
                                  if v is not None and not isinstance(v, (list, tuple, dict)))
             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
 
             outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
+
+            # For fields playlist_index and autonumber convert all occurrences
+            # of %(field)s to %(field)0Nd for backward compatibility
+            field_size_compat_map = {
+                'playlist_index': len(str(template_dict['n_entries'])),
+                'autonumber': autonumber_size,
+            }
+            FIELD_SIZE_COMPAT_RE = r'(?<!%)%\((?P<field>autonumber|playlist_index)\)s'
+            mobj = re.search(FIELD_SIZE_COMPAT_RE, outtmpl)
+            if mobj:
+                outtmpl = re.sub(
+                    FIELD_SIZE_COMPAT_RE,
+                    r'%%(\1)0%dd' % field_size_compat_map[mobj.group('field')],
+                    outtmpl)
+
+            NUMERIC_FIELDS = set((
+                'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
+                'upload_year', 'upload_month', 'upload_day',
+                'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
+                'average_rating', 'comment_count', 'age_limit',
+                'start_time', 'end_time',
+                'chapter_number', 'season_number', 'episode_number',
+                'track_number', 'disc_number', 'release_year',
+                'playlist_index',
+            ))
+
+            # Missing numeric fields used together with integer presentation types
+            # in format specification will break the argument substitution since
+            # string 'NA' is returned for missing fields. We will patch output
+            # template for missing fields to meet string presentation type.
+            for numeric_field in NUMERIC_FIELDS:
+                if numeric_field not in template_dict:
+                    # As of [1] format syntax is:
+                    #  %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
+                    # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
+                    FORMAT_RE = r'''(?x)
+                        (?<!%)
+                        %
+                        \({0}\)  # mapping key
+                        (?:[#0\-+ ]+)?  # conversion flags (optional)
+                        (?:\d+)?  # minimum field width (optional)
+                        (?:\.\d+)?  # precision (optional)
+                        [hlL]?  # length modifier (optional)
+                        [diouxXeEfFgGcrs%]  # conversion type
+                    '''
+                    outtmpl = re.sub(
+                        FORMAT_RE.format(numeric_field),
+                        r'%({0})s'.format(numeric_field), outtmpl)
+
             tmpl = compat_expanduser(outtmpl)
             filename = tmpl % template_dict
             # Temporary fix for #4787
@@ -707,6 +772,14 @@ class YoutubeDL(object):
                     return self.process_ie_result(ie_result, download, extra_info)
                 else:
                     return ie_result
+            except GeoRestrictedError as e:
+                msg = e.msg
+                if e.countries:
+                    msg += '\nThis video is available in %s.' % ', '.join(
+                        map(ISO3166Utils.short2full, e.countries))
+                msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
+                self.report_error(msg)
+                break
             except ExtractorError as e:  # An error we somewhat expected
                 self.report_error(compat_str(e), e.format_traceback())
                 break
@@ -847,8 +920,14 @@ class YoutubeDL(object):
             if self.params.get('playlistrandom', False):
                 random.shuffle(entries)
 
+            x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
+
             for i, entry in enumerate(entries, 1):
                 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
+                # This __x_forwarded_for_ip thing is a bit ugly but requires
+                # minimal changes
+                if x_forwarded_for:
+                    entry['__x_forwarded_for_ip'] = x_forwarded_for
                 extra = {
                     'n_entries': n_entries,
                     'playlist': playlist,
@@ -1233,6 +1312,11 @@ class YoutubeDL(object):
         if cookies:
             res['Cookie'] = cookies
 
+        if 'X-Forwarded-For' not in res:
+            x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
+            if x_forwarded_for_ip:
+                res['X-Forwarded-For'] = x_forwarded_for_ip
+
         return res
 
     def _calc_cookies(self, info_dict):
@@ -1375,6 +1459,9 @@ class YoutubeDL(object):
             full_format_info = info_dict.copy()
             full_format_info.update(format)
             format['http_headers'] = self._calc_headers(full_format_info)
+        # Remove private housekeeping stuff
+        if '__x_forwarded_for_ip' in info_dict:
+            del info_dict['__x_forwarded_for_ip']
 
         # TODO Central sorting goes here
 
index 5c5b8094bc1b6cbc68b09eba85b06a63a2ee785d..0c401baa6640fc7aaef83cd4ca11a1a4462919b1 100644 (file)
@@ -414,6 +414,11 @@ def _real_main(argv=None):
         'cn_verification_proxy': opts.cn_verification_proxy,
         'geo_verification_proxy': opts.geo_verification_proxy,
         'config_location': opts.config_location,
+        'geo_bypass': opts.geo_bypass,
+        'geo_bypass_country': opts.geo_bypass_country,
+        # just for deprecation check
+        'autonumber': opts.autonumber if opts.autonumber is True else None,
+        'usetitle': opts.usetitle if opts.usetitle is True else None,
     }
 
     with YoutubeDL(ydl_opts) as ydl:
index 7189020192601c289f47eafbda40feefd14cde6c..b257e2e8155c72ea6f560c2f7310ed8f5d0bb234 100644 (file)
@@ -2760,6 +2760,10 @@ else:
     compat_kwargs = lambda kwargs: kwargs
 
 
+compat_numeric_types = ((int, float, long, complex) if sys.version_info[0] < 3
+                        else (int, float, complex))
+
+
 if sys.version_info < (2, 7):
     def compat_socket_create_connection(address, timeout, source_address=None):
         host, port = address
@@ -2895,6 +2899,7 @@ __all__ = [
     'compat_input',
     'compat_itertools_count',
     'compat_kwargs',
+    'compat_numeric_types',
     'compat_ord',
     'compat_os_name',
     'compat_parse_qs',
index 8437dde30ca2afe031afb1ff2882ed12ac4b49b5..e2ddc369e03d03e46bde671c751018f290bf6e59 100644 (file)
@@ -43,7 +43,10 @@ class DashSegmentsFD(FragmentFD):
             count = 0
             while count <= fragment_retries:
                 try:
-                    success = ctx['dl'].download(target_filename, {'url': segment_url})
+                    success = ctx['dl'].download(target_filename, {
+                        'url': segment_url,
+                        'http_headers': info_dict.get('http_headers'),
+                    })
                     if not success:
                         return False
                     down, target_sanitized = sanitize_open(target_filename, 'rb')
index 41e37261d034bbb61dc1932fd08283952e4bee25..bdd3545a2f17a731f2b9326be9de68034b4fb86e 100644 (file)
@@ -275,7 +275,7 @@ class FFmpegFD(ExternalFD):
                 args += ['-f', 'mpegts']
             else:
                 args += ['-f', 'mp4']
-                if (ffpp.basename == 'ffmpeg' and is_outdated_version(ffpp._versions['ffmpeg'], '3.2')) and (not info_dict.get('acodec') or info_dict['acodec'].split('.')[0] in ('aac', 'mp4a')):
+                if (ffpp.basename == 'ffmpeg' and is_outdated_version(ffpp._versions['ffmpeg'], '3.2', False)) and (not info_dict.get('acodec') or info_dict['acodec'].split('.')[0] in ('aac', 'mp4a')):
                     args += ['-bsf:a', 'aac_adtstoasc']
         elif protocol == 'rtmp':
             args += ['-f', 'flv']
index 93cac5e9845db6f00efdd2053fb63b3f3c35e52b..63a636cb70079f81516da8b9e37c8e21cb9f54a2 100644 (file)
@@ -238,7 +238,10 @@ class IsmFD(FragmentFD):
             count = 0
             while count <= fragment_retries:
                 try:
-                    success = ctx['dl'].download(target_filename, {'url': segment_url})
+                    success = ctx['dl'].download(target_filename, {
+                        'url': segment_url,
+                        'http_headers': info_dict.get('http_headers'),
+                    })
                     if not success:
                         return False
                     down, target_sanitized = sanitize_open(target_filename, 'rb')
index 12eeab271c29f3dae271912969317a67fee826ae..4d655bd5e1c3a6c4afae98711c3b7a80c5ac58fc 100644 (file)
@@ -31,6 +31,11 @@ MSO_INFO = {
         'username_field': 'user',
         'password_field': 'passwd',
     },
+    'TWC': {
+        'name': 'Time Warner Cable | Spectrum',
+        'username_field': 'Ecom_User_ID',
+        'password_field': 'Ecom_Password',
+    },
     'thr030': {
         'name': '3 Rivers Communications'
     },
index c97317400ea1f660674d34ca22f4177365198d59..dd96a47cecd787de1dd3b071fd5dd49f7306560f 100644 (file)
@@ -23,7 +23,7 @@ class AENetworksBaseIE(ThePlatformIE):
 class AENetworksIE(AENetworksBaseIE):
     IE_NAME = 'aenetworks'
     IE_DESC = 'A+E Networks: A&E, Lifetime, History.com, FYI Network'
-    _VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:history|aetv|mylifetime)\.com|fyi\.tv)/(?:shows/(?P<show_path>[^/]+(?:/[^/]+){0,2})|movies/(?P<movie_display_id>[^/]+)/full-movie)'
+    _VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:history|aetv|mylifetime|lifetimemovieclub)\.com|fyi\.tv)/(?:shows/(?P<show_path>[^/]+(?:/[^/]+){0,2})|movies/(?P<movie_display_id>[^/]+)(?:/full-movie)?)'
     _TESTS = [{
         'url': 'http://www.history.com/shows/mountain-men/season-1/episode-1',
         'md5': 'a97a65f7e823ae10e9244bc5433d5fe6',
@@ -62,11 +62,15 @@ class AENetworksIE(AENetworksBaseIE):
     }, {
         'url': 'http://www.mylifetime.com/movies/center-stage-on-pointe/full-movie',
         'only_matching': True
+    }, {
+        'url': 'https://www.lifetimemovieclub.com/movies/a-killer-among-us',
+        'only_matching': True
     }]
     _DOMAIN_TO_REQUESTOR_ID = {
         'history.com': 'HISTORY',
         'aetv.com': 'AETV',
         'mylifetime.com': 'LIFETIME',
+        'lifetimemovieclub.com': 'LIFETIMEMOVIECLUB',
         'fyi.tv': 'FYI',
     }
 
index 87c803e948fd2e04cde6b0b43251d3f804b952a0..b71d1a093463c0e83dd319a2397a2c78c5b12e7f 100644 (file)
@@ -53,20 +53,30 @@ class AMCNetworksIE(ThePlatformIE):
             'mbr': 'true',
             'manifest': 'm3u',
         }
-        media_url = self._search_regex(r'window\.platformLinkURL\s*=\s*[\'"]([^\'"]+)', webpage, 'media url')
+        media_url = self._search_regex(
+            r'window\.platformLinkURL\s*=\s*[\'"]([^\'"]+)',
+            webpage, 'media url')
         theplatform_metadata = self._download_theplatform_metadata(self._search_regex(
-            r'https?://link.theplatform.com/s/([^?]+)', media_url, 'theplatform_path'), display_id)
+            r'link\.theplatform\.com/s/([^?]+)',
+            media_url, 'theplatform_path'), display_id)
         info = self._parse_theplatform_metadata(theplatform_metadata)
         video_id = theplatform_metadata['pid']
         title = theplatform_metadata['title']
         rating = theplatform_metadata['ratings'][0]['rating']
-        auth_required = self._search_regex(r'window\.authRequired\s*=\s*(true|false);', webpage, 'auth required')
+        auth_required = self._search_regex(
+            r'window\.authRequired\s*=\s*(true|false);',
+            webpage, 'auth required')
         if auth_required == 'true':
-            requestor_id = self._search_regex(r'window\.requestor_id\s*=\s*[\'"]([^\'"]+)', webpage, 'requestor id')
-            resource = self._get_mvpd_resource(requestor_id, title, video_id, rating)
-            query['auth'] = self._extract_mvpd_auth(url, video_id, requestor_id, resource)
+            requestor_id = self._search_regex(
+                r'window\.requestor_id\s*=\s*[\'"]([^\'"]+)',
+                webpage, 'requestor id')
+            resource = self._get_mvpd_resource(
+                requestor_id, title, video_id, rating)
+            query['auth'] = self._extract_mvpd_auth(
+                url, video_id, requestor_id, resource)
         media_url = update_url_query(media_url, query)
-        formats, subtitles = self._extract_theplatform_smil(media_url, video_id)
+        formats, subtitles = self._extract_theplatform_smil(
+            media_url, video_id)
         self._sort_formats(formats)
         info.update({
             'id': video_id,
@@ -78,9 +88,11 @@ class AMCNetworksIE(ThePlatformIE):
         if ns_keys:
             ns = list(ns_keys)[0]
             series = theplatform_metadata.get(ns + '$show')
-            season_number = int_or_none(theplatform_metadata.get(ns + '$season'))
+            season_number = int_or_none(
+                theplatform_metadata.get(ns + '$season'))
             episode = theplatform_metadata.get(ns + '$episodeTitle')
-            episode_number = int_or_none(theplatform_metadata.get(ns + '$episode'))
+            episode_number = int_or_none(
+                theplatform_metadata.get(ns + '$episode'))
             if season_number:
                 title = 'Season %d - %s' % (season_number, title)
             if series:
index 486dff82d00a44a13384e3b7d8ff1b0189da8451..e21045bed9f7eb161d2dbacd02008e8ac11f9ec3 100644 (file)
@@ -1,13 +1,13 @@
 from __future__ import unicode_literals
 
-from .jwplatform import JWPlatformBaseIE
+from .common import InfoExtractor
 from ..utils import (
     unified_strdate,
     clean_html,
 )
 
 
-class ArchiveOrgIE(JWPlatformBaseIE):
+class ArchiveOrgIE(InfoExtractor):
     IE_NAME = 'archive.org'
     IE_DESC = 'archive.org videos'
     _VALID_URL = r'https?://(?:www\.)?archive\.org/(?:details|embed)/(?P<id>[^/?#]+)(?:[?].*)?$'
index b17916137ec51808e8c0c869142d37bf083c90e0..8a2ed0ab6851e3ee128aea4fbb1254c0d37cdd7b 100644 (file)
@@ -225,6 +225,8 @@ class BBCCoUkIE(InfoExtractor):
         }
     ]
 
+    _USP_RE = r'/([^/]+?)\.ism(?:\.hlsv2\.ism)?/[^/]+\.m3u8'
+
     class MediaSelectionError(Exception):
         def __init__(self, id):
             self.id = id
@@ -336,6 +338,15 @@ class BBCCoUkIE(InfoExtractor):
                         formats.extend(self._extract_m3u8_formats(
                             href, programme_id, ext='mp4', entry_protocol='m3u8_native',
                             m3u8_id=format_id, fatal=False))
+                        if re.search(self._USP_RE, href):
+                            usp_formats = self._extract_m3u8_formats(
+                                re.sub(self._USP_RE, r'/\1.ism/\1.m3u8', href),
+                                programme_id, ext='mp4', entry_protocol='m3u8_native',
+                                m3u8_id=format_id, fatal=False)
+                            for f in usp_formats:
+                                if f.get('height') and f['height'] > 720:
+                                    continue
+                                formats.append(f)
                     elif transfer_format == 'hds':
                         formats.extend(self._extract_f4m_formats(
                             href, programme_id, f4m_id=format_id, fatal=False))
index 32326ed9e3b857fa38bd2b32eca4c9bbaf422ee8..1f5b6ed92a2fa4c79b07361eb02797c118241832 100644 (file)
@@ -24,7 +24,7 @@ class BellMediaIE(InfoExtractor):
                 space
             )\.ca|
             much\.com
-        )/.*?(?:\bvid=|-vid|~|%7E|/(?:episode)?)(?P<id>[0-9]{6})'''
+        )/.*?(?:\bvid=|-vid|~|%7E|/(?:episode)?)(?P<id>[0-9]{6,})'''
     _TESTS = [{
         'url': 'http://www.ctv.ca/video/player?vid=706966',
         'md5': 'ff2ebbeae0aa2dcc32a830c3fd69b7b0',
@@ -55,6 +55,9 @@ class BellMediaIE(InfoExtractor):
     }, {
         'url': 'http://www.much.com/shows/the-almost-impossible-gameshow/928979/episode-6',
         'only_matching': True,
+    }, {
+        'url': 'http://www.ctv.ca/DCs-Legends-of-Tomorrow/Video/S2E11-Turncoat-vid1051430',
+        'only_matching': True,
     }]
     _DOMAINS = {
         'thecomedynetwork': 'comedy',
index c5e11e8eb81151ca8dd07be04c1fe2005a26bfa9..2fbfad1ba04adf915c41441173a6464c6b96b100 100644 (file)
@@ -33,6 +33,10 @@ class BloombergIE(InfoExtractor):
         'params': {
             'format': 'best[format_id^=hds]',
         },
+    }, {
+        # data-bmmrid=
+        'url': 'https://www.bloomberg.com/politics/articles/2017-02-08/le-pen-aide-briefed-french-central-banker-on-plan-to-print-money',
+        'only_matching': True,
     }, {
         'url': 'http://www.bloomberg.com/news/articles/2015-11-12/five-strange-things-that-have-been-happening-in-financial-markets',
         'only_matching': True,
@@ -45,9 +49,10 @@ class BloombergIE(InfoExtractor):
         name = self._match_id(url)
         webpage = self._download_webpage(url, name)
         video_id = self._search_regex(
-            (r'["\']bmmrId["\']\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1',
-             r'videoId\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1'),
-            webpage, 'id', group='url', default=None)
+            (r'["\']bmmrId["\']\s*:\s*(["\'])(?P<id>(?:(?!\1).)+)\1',
+             r'videoId\s*:\s*(["\'])(?P<id>(?:(?!\1).)+)\1',
+             r'data-bmmrid=(["\'])(?P<id>(?:(?!\1).)+)\1'),
+            webpage, 'id', group='id', default=None)
         if not video_id:
             bplayer_data = self._parse_json(self._search_regex(
                 r'BPlayer\(null,\s*({[^;]+})\);', webpage, 'id'), name)
index 5c6e99da134efe150962dd979cbf36e391527d3a..27685eed0188312154463066293018496e875e5c 100644 (file)
@@ -191,6 +191,10 @@ class BrightcoveLegacyIE(InfoExtractor):
         # These fields hold the id of the video
         videoPlayer = find_param('@videoPlayer') or find_param('videoId') or find_param('videoID') or find_param('@videoList')
         if videoPlayer is not None:
+            if isinstance(videoPlayer, list):
+                videoPlayer = videoPlayer[0]
+            if not (videoPlayer.isdigit() or videoPlayer.startswith('ref:')):
+                return None
             params['@videoPlayer'] = videoPlayer
         linkBase = find_param('linkBaseURL')
         if linkBase is not None:
index 4f88c31ad2af53fe07df449e384137689f65c17d..b1dfacf8094f92493ad6cc95b6fe758b3b81f4fc 100644 (file)
@@ -13,6 +13,7 @@ from ..utils import (
     float_or_none,
     sanitized_Request,
     urlencode_postdata,
+    USER_AGENTS,
 )
 
 
@@ -21,10 +22,10 @@ class CeskaTelevizeIE(InfoExtractor):
     _TESTS = [{
         'url': 'http://www.ceskatelevize.cz/ivysilani/ivysilani/10441294653-hyde-park-civilizace/214411058091220',
         'info_dict': {
-            'id': '61924494876951776',
+            'id': '61924494877246241',
             'ext': 'mp4',
-            'title': 'Hyde Park Civilizace',
-            'description': 'md5:fe93f6eda372d150759d11644ebbfb4a',
+            'title': 'Hyde Park Civilizace: Život v Grónsku',
+            'description': 'md5:3fec8f6bb497be5cdb0c9e8781076626',
             'thumbnail': r're:^https?://.*\.jpg',
             'duration': 3350,
         },
@@ -114,70 +115,100 @@ class CeskaTelevizeIE(InfoExtractor):
             'requestSource': 'iVysilani',
         }
 
-        req = sanitized_Request(
-            'http://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist',
-            data=urlencode_postdata(data))
-
-        req.add_header('Content-type', 'application/x-www-form-urlencoded')
-        req.add_header('x-addr', '127.0.0.1')
-        req.add_header('X-Requested-With', 'XMLHttpRequest')
-        req.add_header('Referer', url)
-
-        playlistpage = self._download_json(req, playlist_id)
-
-        playlist_url = playlistpage['url']
-        if playlist_url == 'error_region':
-            raise ExtractorError(NOT_AVAILABLE_STRING, expected=True)
-
-        req = sanitized_Request(compat_urllib_parse_unquote(playlist_url))
-        req.add_header('Referer', url)
-
-        playlist_title = self._og_search_title(webpage, default=None)
-        playlist_description = self._og_search_description(webpage, default=None)
-
-        playlist = self._download_json(req, playlist_id)['playlist']
-        playlist_len = len(playlist)
-
         entries = []
-        for item in playlist:
-            is_live = item.get('type') == 'LIVE'
-            formats = []
-            for format_id, stream_url in item['streamUrls'].items():
-                formats.extend(self._extract_m3u8_formats(
-                    stream_url, playlist_id, 'mp4',
-                    entry_protocol='m3u8' if is_live else 'm3u8_native',
-                    fatal=False))
-            self._sort_formats(formats)
-
-            item_id = item.get('id') or item['assetId']
-            title = item['title']
-
-            duration = float_or_none(item.get('duration'))
-            thumbnail = item.get('previewImageUrl')
-
-            subtitles = {}
-            if item.get('type') == 'VOD':
-                subs = item.get('subtitles')
-                if subs:
-                    subtitles = self.extract_subtitles(episode_id, subs)
-
-            if playlist_len == 1:
-                final_title = playlist_title or title
-                if is_live:
-                    final_title = self._live_title(final_title)
-            else:
-                final_title = '%s (%s)' % (playlist_title, title)
-
-            entries.append({
-                'id': item_id,
-                'title': final_title,
-                'description': playlist_description if playlist_len == 1 else None,
-                'thumbnail': thumbnail,
-                'duration': duration,
-                'formats': formats,
-                'subtitles': subtitles,
-                'is_live': is_live,
-            })
+
+        for user_agent in (None, USER_AGENTS['Safari']):
+            req = sanitized_Request(
+                'http://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist',
+                data=urlencode_postdata(data))
+
+            req.add_header('Content-type', 'application/x-www-form-urlencoded')
+            req.add_header('x-addr', '127.0.0.1')
+            req.add_header('X-Requested-With', 'XMLHttpRequest')
+            if user_agent:
+                req.add_header('User-Agent', user_agent)
+            req.add_header('Referer', url)
+
+            playlistpage = self._download_json(req, playlist_id, fatal=False)
+
+            if not playlistpage:
+                continue
+
+            playlist_url = playlistpage['url']
+            if playlist_url == 'error_region':
+                raise ExtractorError(NOT_AVAILABLE_STRING, expected=True)
+
+            req = sanitized_Request(compat_urllib_parse_unquote(playlist_url))
+            req.add_header('Referer', url)
+
+            playlist_title = self._og_search_title(webpage, default=None)
+            playlist_description = self._og_search_description(webpage, default=None)
+
+            playlist = self._download_json(req, playlist_id, fatal=False)
+            if not playlist:
+                continue
+
+            playlist = playlist.get('playlist')
+            if not isinstance(playlist, list):
+                continue
+
+            playlist_len = len(playlist)
+
+            for num, item in enumerate(playlist):
+                is_live = item.get('type') == 'LIVE'
+                formats = []
+                for format_id, stream_url in item.get('streamUrls', {}).items():
+                    if 'playerType=flash' in stream_url:
+                        stream_formats = self._extract_m3u8_formats(
+                            stream_url, playlist_id, 'mp4',
+                            entry_protocol='m3u8' if is_live else 'm3u8_native',
+                            m3u8_id='hls-%s' % format_id, fatal=False)
+                    else:
+                        stream_formats = self._extract_mpd_formats(
+                            stream_url, playlist_id,
+                            mpd_id='dash-%s' % format_id, fatal=False)
+                    # See https://github.com/rg3/youtube-dl/issues/12119#issuecomment-280037031
+                    if format_id == 'audioDescription':
+                        for f in stream_formats:
+                            f['source_preference'] = -10
+                    formats.extend(stream_formats)
+
+                if user_agent and len(entries) == playlist_len:
+                    entries[num]['formats'].extend(formats)
+                    continue
+
+                item_id = item.get('id') or item['assetId']
+                title = item['title']
+
+                duration = float_or_none(item.get('duration'))
+                thumbnail = item.get('previewImageUrl')
+
+                subtitles = {}
+                if item.get('type') == 'VOD':
+                    subs = item.get('subtitles')
+                    if subs:
+                        subtitles = self.extract_subtitles(episode_id, subs)
+
+                if playlist_len == 1:
+                    final_title = playlist_title or title
+                    if is_live:
+                        final_title = self._live_title(final_title)
+                else:
+                    final_title = '%s (%s)' % (playlist_title, title)
+
+                entries.append({
+                    'id': item_id,
+                    'title': final_title,
+                    'description': playlist_description if playlist_len == 1 else None,
+                    'thumbnail': thumbnail,
+                    'duration': duration,
+                    'formats': formats,
+                    'subtitles': subtitles,
+                    'is_live': is_live,
+                })
+
+        for e in entries:
+            self._sort_formats(e['formats'])
 
         return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
 
index 0b4e2ac207b049d934c6b65fdf9317f856466b0f..4252d682563f9831a33225ff3dc906aa27f0eb01 100644 (file)
@@ -6,6 +6,7 @@ import hashlib
 import json
 import netrc
 import os
+import random
 import re
 import socket
 import sys
@@ -39,7 +40,10 @@ from ..utils import (
     ExtractorError,
     fix_xml_ampersands,
     float_or_none,
+    GeoRestrictedError,
+    GeoUtils,
     int_or_none,
+    js_to_json,
     parse_iso8601,
     RegexNotFoundError,
     sanitize_filename,
@@ -319,17 +323,34 @@ class InfoExtractor(object):
     _real_extract() methods and define a _VALID_URL regexp.
     Probably, they should also be added to the list of extractors.
 
+    _GEO_BYPASS attribute may be set to False in order to disable
+    geo restriction bypass mechanisms for a particular extractor.
+    Though it won't disable explicit geo restriction bypass based on
+    country code provided with geo_bypass_country. (experimental)
+
+    _GEO_COUNTRIES attribute may contain a list of presumably geo unrestricted
+    countries for this extractor. One of these countries will be used by
+    geo restriction bypass mechanism right away in order to bypass
+    geo restriction, of course, if the mechanism is not disabled. (experimental)
+
+    NB: both these geo attributes are experimental and may change in future
+    or be completely removed.
+
     Finally, the _WORKING attribute should be set to False for broken IEs
     in order to warn the users and skip the tests.
     """
 
     _ready = False
     _downloader = None
+    _x_forwarded_for_ip = None
+    _GEO_BYPASS = True
+    _GEO_COUNTRIES = None
     _WORKING = True
 
     def __init__(self, downloader=None):
         """Constructor. Receives an optional downloader."""
         self._ready = False
+        self._x_forwarded_for_ip = None
         self.set_downloader(downloader)
 
     @classmethod
@@ -358,15 +379,59 @@ class InfoExtractor(object):
 
     def initialize(self):
         """Initializes an instance (authentication, etc)."""
+        self._initialize_geo_bypass(self._GEO_COUNTRIES)
         if not self._ready:
             self._real_initialize()
             self._ready = True
 
+    def _initialize_geo_bypass(self, countries):
+        """
+        Initialize geo restriction bypass mechanism.
+
+        This method is used to initialize geo bypass mechanism based on faking
+        X-Forwarded-For HTTP header. A random country from provided country list
+        is selected and a random IP belonging to this country is generated. This
+        IP will be passed as X-Forwarded-For HTTP header in all subsequent
+        HTTP requests.
+
+        This method will be used for initial geo bypass mechanism initialization
+        during the instance initialization with _GEO_COUNTRIES.
+
+        You may also manually call it from extractor's code if geo countries
+        information is not available beforehand (e.g. obtained during
+        extraction) or due to some another reason.
+        """
+        if not self._x_forwarded_for_ip:
+            country_code = self._downloader.params.get('geo_bypass_country', None)
+            # If there is no explicit country for geo bypass specified and
+            # the extractor is known to be geo restricted let's fake IP
+            # as X-Forwarded-For right away.
+            if (not country_code and
+                    self._GEO_BYPASS and
+                    self._downloader.params.get('geo_bypass', True) and
+                    countries):
+                country_code = random.choice(countries)
+            if country_code:
+                self._x_forwarded_for_ip = GeoUtils.random_ipv4(country_code)
+                if self._downloader.params.get('verbose', False):
+                    self._downloader.to_stdout(
+                        '[debug] Using fake IP %s (%s) as X-Forwarded-For.'
+                        % (self._x_forwarded_for_ip, country_code.upper()))
+
     def extract(self, url):
         """Extracts URL information and returns it in list of dicts."""
         try:
-            self.initialize()
-            return self._real_extract(url)
+            for _ in range(2):
+                try:
+                    self.initialize()
+                    ie_result = self._real_extract(url)
+                    if self._x_forwarded_for_ip:
+                        ie_result['__x_forwarded_for_ip'] = self._x_forwarded_for_ip
+                    return ie_result
+                except GeoRestrictedError as e:
+                    if self.__maybe_fake_ip_and_retry(e.countries):
+                        continue
+                    raise
         except ExtractorError:
             raise
         except compat_http_client.IncompleteRead as e:
@@ -374,6 +439,21 @@ class InfoExtractor(object):
         except (KeyError, StopIteration) as e:
             raise ExtractorError('An extractor error has occurred.', cause=e)
 
+    def __maybe_fake_ip_and_retry(self, countries):
+        if (not self._downloader.params.get('geo_bypass_country', None) and
+                self._GEO_BYPASS and
+                self._downloader.params.get('geo_bypass', True) and
+                not self._x_forwarded_for_ip and
+                countries):
+            country_code = random.choice(countries)
+            self._x_forwarded_for_ip = GeoUtils.random_ipv4(country_code)
+            if self._x_forwarded_for_ip:
+                self.report_warning(
+                    'Video is geo restricted. Retrying extraction with fake IP %s (%s) as X-Forwarded-For.'
+                    % (self._x_forwarded_for_ip, country_code.upper()))
+                return True
+        return False
+
     def set_downloader(self, downloader):
         """Sets the downloader for this IE."""
         self._downloader = downloader
@@ -433,6 +513,15 @@ class InfoExtractor(object):
         if isinstance(url_or_request, (compat_str, str)):
             url_or_request = url_or_request.partition('#')[0]
 
+        # Some sites check X-Forwarded-For HTTP header in order to figure out
+        # the origin of the client behind proxy. This allows bypassing geo
+        # restriction by faking this header's value to IP that belongs to some
+        # geo unrestricted country. We will do so once we encounter any
+        # geo restriction error.
+        if self._x_forwarded_for_ip:
+            if 'X-Forwarded-For' not in headers:
+                headers['X-Forwarded-For'] = self._x_forwarded_for_ip
+
         urlh = self._request_webpage(url_or_request, video_id, note, errnote, fatal, data=data, headers=headers, query=query)
         if urlh is False:
             assert not fatal
@@ -608,10 +697,8 @@ class InfoExtractor(object):
             expected=True)
 
     @staticmethod
-    def raise_geo_restricted(msg='This video is not available from your location due to geo restriction'):
-        raise ExtractorError(
-            '%s. You might want to use --proxy to workaround.' % msg,
-            expected=True)
+    def raise_geo_restricted(msg='This video is not available from your location due to geo restriction', countries=None):
+        raise GeoRestrictedError(msg, countries=countries)
 
     # Methods for following #608
     @staticmethod
@@ -1208,6 +1295,9 @@ class InfoExtractor(object):
         m3u8_doc, urlh = res
         m3u8_url = urlh.geturl()
 
+        if '#EXT-X-FAXS-CM:' in m3u8_doc:  # Adobe Flash Access
+            return []
+
         formats = [self._m3u8_meta_format(m3u8_url, ext, preference, m3u8_id)]
 
         format_url = lambda u: (
@@ -2070,6 +2160,123 @@ class InfoExtractor(object):
                     })
         return formats
 
+    @staticmethod
+    def _find_jwplayer_data(webpage):
+        mobj = re.search(
+            r'jwplayer\((?P<quote>[\'"])[^\'" ]+(?P=quote)\)\.setup\s*\((?P<options>[^)]+)\)',
+            webpage)
+        if mobj:
+            return mobj.group('options')
+
+    def _extract_jwplayer_data(self, webpage, video_id, *args, **kwargs):
+        jwplayer_data = self._parse_json(
+            self._find_jwplayer_data(webpage), video_id,
+            transform_source=js_to_json)
+        return self._parse_jwplayer_data(
+            jwplayer_data, video_id, *args, **kwargs)
+
+    def _parse_jwplayer_data(self, jwplayer_data, video_id=None, require_title=True,
+                             m3u8_id=None, mpd_id=None, rtmp_params=None, base_url=None):
+        # JWPlayer backward compatibility: flattened playlists
+        # https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/api/config.js#L81-L96
+        if 'playlist' not in jwplayer_data:
+            jwplayer_data = {'playlist': [jwplayer_data]}
+
+        entries = []
+
+        # JWPlayer backward compatibility: single playlist item
+        # https://github.com/jwplayer/jwplayer/blob/v7.7.0/src/js/playlist/playlist.js#L10
+        if not isinstance(jwplayer_data['playlist'], list):
+            jwplayer_data['playlist'] = [jwplayer_data['playlist']]
+
+        for video_data in jwplayer_data['playlist']:
+            # JWPlayer backward compatibility: flattened sources
+            # https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/playlist/item.js#L29-L35
+            if 'sources' not in video_data:
+                video_data['sources'] = [video_data]
+
+            this_video_id = video_id or video_data['mediaid']
+
+            formats = []
+            for source in video_data['sources']:
+                source_url = self._proto_relative_url(source['file'])
+                if base_url:
+                    source_url = compat_urlparse.urljoin(base_url, source_url)
+                source_type = source.get('type') or ''
+                ext = mimetype2ext(source_type) or determine_ext(source_url)
+                if source_type == 'hls' or ext == 'm3u8':
+                    formats.extend(self._extract_m3u8_formats(
+                        source_url, this_video_id, 'mp4', 'm3u8_native', m3u8_id=m3u8_id, fatal=False))
+                elif ext == 'mpd':
+                    formats.extend(self._extract_mpd_formats(
+                        source_url, this_video_id, mpd_id=mpd_id, fatal=False))
+                # https://github.com/jwplayer/jwplayer/blob/master/src/js/providers/default.js#L67
+                elif source_type.startswith('audio') or ext in ('oga', 'aac', 'mp3', 'mpeg', 'vorbis'):
+                    formats.append({
+                        'url': source_url,
+                        'vcodec': 'none',
+                        'ext': ext,
+                    })
+                else:
+                    height = int_or_none(source.get('height'))
+                    if height is None:
+                        # Often no height is provided but there is a label in
+                        # format like 1080p.
+                        height = int_or_none(self._search_regex(
+                            r'^(\d{3,})[pP]$', source.get('label') or '',
+                            'height', default=None))
+                    a_format = {
+                        'url': source_url,
+                        'width': int_or_none(source.get('width')),
+                        'height': height,
+                        'ext': ext,
+                    }
+                    if source_url.startswith('rtmp'):
+                        a_format['ext'] = 'flv'
+
+                        # See com/longtailvideo/jwplayer/media/RTMPMediaProvider.as
+                        # of jwplayer.flash.swf
+                        rtmp_url_parts = re.split(
+                            r'((?:mp4|mp3|flv):)', source_url, 1)
+                        if len(rtmp_url_parts) == 3:
+                            rtmp_url, prefix, play_path = rtmp_url_parts
+                            a_format.update({
+                                'url': rtmp_url,
+                                'play_path': prefix + play_path,
+                            })
+                        if rtmp_params:
+                            a_format.update(rtmp_params)
+                    formats.append(a_format)
+            self._sort_formats(formats)
+
+            subtitles = {}
+            tracks = video_data.get('tracks')
+            if tracks and isinstance(tracks, list):
+                for track in tracks:
+                    if track.get('kind') != 'captions':
+                        continue
+                    track_url = urljoin(base_url, track.get('file'))
+                    if not track_url:
+                        continue
+                    subtitles.setdefault(track.get('label') or 'en', []).append({
+                        'url': self._proto_relative_url(track_url)
+                    })
+
+            entries.append({
+                'id': this_video_id,
+                'title': video_data['title'] if require_title else video_data.get('title'),
+                'description': video_data.get('description'),
+                'thumbnail': self._proto_relative_url(video_data.get('image')),
+                'timestamp': int_or_none(video_data.get('pubdate')),
+                'duration': float_or_none(jwplayer_data.get('duration') or video_data.get('duration')),
+                'subtitles': subtitles,
+                'formats': formats,
+            })
+        if len(entries) == 1:
+            return entries[0]
+        else:
+            return self.playlist_result(entries)
+
     def _live_title(self, name):
         """ Generate the title for a live video """
         now = datetime.datetime.now()
index 2f86e2381f447faa7b42d6056e685bc04f101f9c..79f7a9cd1192302ada53c49a82f618864fae39ed 100644 (file)
@@ -1,5 +1,7 @@
 from __future__ import unicode_literals
 
+import sys
+
 from .common import InfoExtractor
 from ..utils import ExtractorError
 
@@ -7,7 +9,7 @@ from ..utils import ExtractorError
 class CommonMistakesIE(InfoExtractor):
     IE_DESC = False  # Do not list
     _VALID_URL = r'''(?x)
-        (?:url|URL)
+        (?:url|URL)$
     '''
 
     _TESTS = [{
@@ -33,7 +35,9 @@ class UnicodeBOMIE(InfoExtractor):
         IE_DESC = False
         _VALID_URL = r'(?P<bom>\ufeff)(?P<id>.*)$'
 
-        _TESTS = [{
+        # Disable test for python 3.2 since BOM is broken in re in this version
+        # (see https://github.com/rg3/youtube-dl/issues/9751)
+        _TESTS = [] if (3, 0) < sys.version_info <= (3, 3) else [{
             'url': '\ufeffhttp://www.youtube.com/watch?v=BaW_jenozKc',
             'only_matching': True,
         }]
diff --git a/youtube_dl/extractor/corus.py b/youtube_dl/extractor/corus.py
new file mode 100644 (file)
index 0000000..7b2f500
--- /dev/null
@@ -0,0 +1,72 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .theplatform import ThePlatformFeedIE
+from ..utils import int_or_none
+
+
+class CorusIE(ThePlatformFeedIE):
+    _VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:globaltv|etcanada)\.com|(?:hgtv|foodnetwork|slice)\.ca)/(?:video/|(?:[^/]+/)+(?:videos/[a-z0-9-]+-|video\.html\?.*?\bv=))(?P<id>\d+)'
+    _TESTS = [{
+        'url': 'http://www.hgtv.ca/shows/bryan-inc/videos/movie-night-popcorn-with-bryan-870923331648/',
+        'md5': '05dcbca777bf1e58c2acbb57168ad3a6',
+        'info_dict': {
+            'id': '870923331648',
+            'ext': 'mp4',
+            'title': 'Movie Night Popcorn with Bryan',
+            'description': 'Bryan whips up homemade popcorn, the old fashion way for Jojo and Lincoln.',
+            'uploader': 'SHWM-NEW',
+            'upload_date': '20170206',
+            'timestamp': 1486392197,
+        },
+    }, {
+        'url': 'http://www.foodnetwork.ca/shows/chopped/video/episode/chocolate-obsession/video.html?v=872683587753',
+        'only_matching': True,
+    }, {
+        'url': 'http://etcanada.com/video/873675331955/meet-the-survivor-game-changers-castaways-part-2/',
+        'only_matching': True,
+    }]
+
+    _TP_FEEDS = {
+        'globaltv': {
+            'feed_id': 'ChQqrem0lNUp',
+            'account_id': 2269680845,
+        },
+        'etcanada': {
+            'feed_id': 'ChQqrem0lNUp',
+            'account_id': 2269680845,
+        },
+        'hgtv': {
+            'feed_id': 'L0BMHXi2no43',
+            'account_id': 2414428465,
+        },
+        'foodnetwork': {
+            'feed_id': 'ukK8o58zbRmJ',
+            'account_id': 2414429569,
+        },
+        'slice': {
+            'feed_id': '5tUJLgV2YNJ5',
+            'account_id': 2414427935,
+        },
+    }
+
+    def _real_extract(self, url):
+        domain, video_id = re.match(self._VALID_URL, url).groups()
+        feed_info = self._TP_FEEDS[domain.split('.')[0]]
+        return self._extract_feed_info('dtjsEC', feed_info['feed_id'], 'byId=' + video_id, video_id, lambda e: {
+            'episode_number': int_or_none(e.get('pl1$episode')),
+            'season_number': int_or_none(e.get('pl1$season')),
+            'series': e.get('pl1$show'),
+        }, {
+            'HLS': {
+                'manifest': 'm3u',
+            },
+            'DesktopHLS Default': {
+                'manifest': 'm3u',
+            },
+            'MP4 MBR': {
+                'manifest': 'm3u',
+            },
+        }, feed_info['account_id'])
index 377fb45e9d2bcd70c1a6aa6d835331636708c215..f919ed208d16d0cb8e8299ceacec1d96b0237c81 100644 (file)
@@ -6,6 +6,7 @@ from ..utils import int_or_none
 
 
 class CrackleIE(InfoExtractor):
+    _GEO_COUNTRIES = ['US']
     _VALID_URL = r'(?:crackle:|https?://(?:(?:www|m)\.)?crackle\.com/(?:playlist/\d+/|(?:[^/]+/)+))(?P<id>\d+)'
     _TEST = {
         'url': 'http://www.crackle.com/comedians-in-cars-getting-coffee/2498934',
index 109d1c5a864f283a01b2b2baaed784384776a5c1..a1fc6a75618cecada911953fefd753db2d3a5632 100644 (file)
@@ -123,7 +123,7 @@ class CrunchyrollIE(CrunchyrollBaseIE):
         'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513',
         'info_dict': {
             'id': '645513',
-            'ext': 'flv',
+            'ext': 'mp4',
             'title': 'Wanna be the Strongest in the World Episode 1 – An Idol-Wrestler is Born!',
             'description': 'md5:2d17137920c64f2f49981a7797d275ef',
             'thumbnail': 'http://img1.ak.crunchyroll.com/i/spire1-tmb/20c6b5e10f1a47b10516877d3c039cae1380951166_full.jpg',
@@ -192,6 +192,21 @@ class CrunchyrollIE(CrunchyrollBaseIE):
         # geo-restricted (US), 18+ maturity wall, non-premium available
         'url': 'http://www.crunchyroll.com/cosplay-complex-ova/episode-1-the-birth-of-the-cosplay-club-565617',
         'only_matching': True,
+    }, {
+        # A description with double quotes
+        'url': 'http://www.crunchyroll.com/11eyes/episode-1-piros-jszaka-red-night-535080',
+        'info_dict': {
+            'id': '535080',
+            'ext': 'mp4',
+            'title': '11eyes Episode 1 – Piros éjszaka - Red Night',
+            'description': 'Kakeru and Yuka are thrown into an alternate nightmarish world they call "Red Night".',
+            'uploader': 'Marvelous AQL Inc.',
+            'upload_date': '20091021',
+        },
+        'params': {
+            # Just test metadata extraction
+            'skip_download': True,
+        },
     }]
 
     _FORMAT_IDS = {
@@ -362,9 +377,9 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
             r'(?s)<h1[^>]*>((?:(?!<h1).)*?<span[^>]+itemprop=["\']title["\'][^>]*>(?:(?!<h1).)+?)</h1>',
             webpage, 'video_title')
         video_title = re.sub(r' {2,}', ' ', video_title)
-        video_description = self._html_search_regex(
-            r'<script[^>]*>\s*.+?\[media_id=%s\].+?"description"\s*:\s*"([^"]+)' % video_id,
-            webpage, 'description', default=None)
+        video_description = self._parse_json(self._html_search_regex(
+            r'<script[^>]*>\s*.+?\[media_id=%s\].+?({.+?"description"\s*:.+?})\);' % video_id,
+            webpage, 'description', default='{}'), video_id).get('description')
         if video_description:
             video_description = lowercase_escape(video_description.replace(r'\r\n', '\n'))
         video_upload_date = self._html_search_regex(
@@ -519,11 +534,11 @@ class CrunchyrollShowPlaylistIE(CrunchyrollBaseIE):
             r'(?s)<h1[^>]*>\s*<span itemprop="name">(.*?)</span>',
             webpage, 'title')
         episode_paths = re.findall(
-            r'(?s)<li id="showview_videos_media_[0-9]+"[^>]+>.*?<a href="([^"]+)"',
+            r'(?s)<li id="showview_videos_media_(\d+)"[^>]+>.*?<a href="([^"]+)"',
             webpage)
         entries = [
-            self.url_result('http://www.crunchyroll.com' + ep, 'Crunchyroll')
-            for ep in episode_paths
+            self.url_result('http://www.crunchyroll.com' + ep, 'Crunchyroll', ep_id)
+            for ep_id, ep in episode_paths
         ]
         entries.reverse()
 
index 31bf5faf6605553cdcd79f670285a554e711364f..b312401dc69b9a9ca35cebca767b11f10174e1c2 100644 (file)
@@ -66,7 +66,6 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
                 'uploader_id': 'xijv66',
                 'age_limit': 0,
                 'view_count': int,
-                'comment_count': int,
             }
         },
         # Vevo video
@@ -140,7 +139,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
         view_count = str_to_int(view_count_str)
         comment_count = int_or_none(self._search_regex(
             r'<meta[^>]+itemprop="interactionCount"[^>]+content="UserComments:(\d+)"',
-            webpage, 'comment count', fatal=False))
+            webpage, 'comment count', default=None))
 
         player_v5 = self._search_regex(
             [r'buildPlayer\(({.+?})\);\n',  # See https://github.com/rg3/youtube-dl/issues/7826
index 396873c6deea69791d517c1e111074d8d28d4b5d..939d1338c2ec53933b23997110845e99c95217f8 100644 (file)
@@ -9,13 +9,15 @@ from ..utils import (
     unified_strdate,
     compat_str,
     determine_ext,
+    ExtractorError,
 )
 
 
 class DisneyIE(InfoExtractor):
     _VALID_URL = r'''(?x)
-        https?://(?P<domain>(?:[^/]+\.)?(?:disney\.[a-z]{2,3}(?:\.[a-z]{2})?|disney(?:(?:me|latino)\.com|turkiye\.com\.tr)|starwars\.com))/(?:embed/|(?:[^/]+/)+[\w-]+-)(?P<id>[a-z0-9]{24})'''
+        https?://(?P<domain>(?:[^/]+\.)?(?:disney\.[a-z]{2,3}(?:\.[a-z]{2})?|disney(?:(?:me|latino)\.com|turkiye\.com\.tr)|(?:starwars|marvelkids)\.com))/(?:(?:embed/|(?:[^/]+/)+[\w-]+-)(?P<id>[a-z0-9]{24})|(?:[^/]+/)?(?P<display_id>[^/?#]+))'''
     _TESTS = [{
+        # Disney.EmbedVideo
         'url': 'http://video.disney.com/watch/moana-trailer-545ed1857afee5a0ec239977',
         'info_dict': {
             'id': '545ed1857afee5a0ec239977',
@@ -28,6 +30,20 @@ class DisneyIE(InfoExtractor):
             # m3u8 download
             'skip_download': True,
         }
+    }, {
+        # Grill.burger
+        'url': 'http://www.starwars.com/video/rogue-one-a-star-wars-story-intro-featurette',
+        'info_dict': {
+            'id': '5454e9f4e9804a552e3524c8',
+            'ext': 'mp4',
+            'title': '"Intro" Featurette: Rogue One: A Star Wars Story',
+            'upload_date': '20170104',
+            'description': 'Go behind-the-scenes of Rogue One: A Star Wars Story in this featurette with Director Gareth Edwards and the cast of the film.',
+        },
+        'params': {
+            # m3u8 download
+            'skip_download': True,
+        }
     }, {
         'url': 'http://videos.disneylatino.com/ver/spider-man-de-regreso-a-casa-primer-adelanto-543a33a1850bdcfcca13bae2',
         'only_matching': True,
@@ -43,31 +59,55 @@ class DisneyIE(InfoExtractor):
     }, {
         'url': 'http://www.starwars.com/embed/54690d1e6c42e5f09a0fb097',
         'only_matching': True,
+    }, {
+        'url': 'http://spiderman.marvelkids.com/embed/522900d2ced3c565e4cc0677',
+        'only_matching': True,
+    }, {
+        'url': 'http://spiderman.marvelkids.com/videos/contest-of-champions-part-four-clip-1',
+        'only_matching': True,
+    }, {
+        'url': 'http://disneyjunior.en.disneyme.com/dj/watch-my-friends-tigger-and-pooh-promo',
+        'only_matching': True,
+    }, {
+        'url': 'http://disneyjunior.disney.com/galactech-the-galactech-grab-galactech-an-admiral-rescue',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
-        domain, video_id = re.match(self._VALID_URL, url).groups()
-        webpage = self._download_webpage(
-            'http://%s/embed/%s' % (domain, video_id), video_id)
-        video_data = self._parse_json(self._search_regex(
-            r'Disney\.EmbedVideo=({.+});', webpage, 'embed data'), video_id)['video']
+        domain, video_id, display_id = re.match(self._VALID_URL, url).groups()
+        if not video_id:
+            webpage = self._download_webpage(url, display_id)
+            grill = re.sub(r'"\s*\+\s*"', '', self._search_regex(
+                r'Grill\.burger\s*=\s*({.+})\s*:',
+                webpage, 'grill data'))
+            page_data = next(s for s in self._parse_json(grill, display_id)['stack'] if s.get('type') == 'video')
+            video_data = page_data['data'][0]
+        else:
+            webpage = self._download_webpage(
+                'http://%s/embed/%s' % (domain, video_id), video_id)
+            page_data = self._parse_json(self._search_regex(
+                r'Disney\.EmbedVideo\s*=\s*({.+});',
+                webpage, 'embed data'), video_id)
+            video_data = page_data['video']
 
         for external in video_data.get('externals', []):
             if external.get('source') == 'vevo':
                 return self.url_result('vevo:' + external['data_id'], 'Vevo')
 
+        video_id = video_data['id']
         title = video_data['title']
 
         formats = []
         for flavor in video_data.get('flavors', []):
             flavor_format = flavor.get('format')
             flavor_url = flavor.get('url')
-            if not flavor_url or not re.match(r'https?://', flavor_url):
+            if not flavor_url or not re.match(r'https?://', flavor_url) or flavor_format == 'mp4_access':
                 continue
             tbr = int_or_none(flavor.get('bitrate'))
             if tbr == 99999:
                 formats.extend(self._extract_m3u8_formats(
-                    flavor_url, video_id, 'mp4', m3u8_id=flavor_format, fatal=False))
+                    flavor_url, video_id, 'mp4',
+                    m3u8_id=flavor_format, fatal=False))
                 continue
             format_id = []
             if flavor_format:
@@ -88,6 +128,10 @@ class DisneyIE(InfoExtractor):
                 'ext': ext,
                 'vcodec': 'none' if (width == 0 and height == 0) else None,
             })
+        if not formats and video_data.get('expired'):
+            raise ExtractorError(
+                '%s said: %s' % (self.IE_NAME, page_data['translations']['video_expired']),
+                expected=True)
         self._sort_formats(formats)
 
         subtitles = {}
index bcd9fe2a039550d36af3f1a63cb3cf8cc583cb2a..e7abc888988e9807ee46abc4d4e44f8276b9a084 100644 (file)
@@ -20,6 +20,7 @@ from ..utils import (
 class DramaFeverBaseIE(AMPIE):
     _LOGIN_URL = 'https://www.dramafever.com/accounts/login/'
     _NETRC_MACHINE = 'dramafever'
+    _GEO_COUNTRIES = ['US', 'CA']
 
     _CONSUMER_SECRET = 'DA59dtVXYLxajktV'
 
@@ -116,8 +117,9 @@ class DramaFeverIE(DramaFeverBaseIE):
                 'http://www.dramafever.com/amp/episode/feed.json?guid=%s' % video_id)
         except ExtractorError as e:
             if isinstance(e.cause, compat_HTTPError):
-                raise ExtractorError(
-                    'Currently unavailable in your country.', expected=True)
+                self.raise_geo_restricted(
+                    msg='Currently unavailable in your country',
+                    countries=self._GEO_COUNTRIES)
             raise
 
         series_id, episode_number = video_id.split('.')
index 6ca07a13d736b3909269aa1314d6e868150f8aa0..3f6268637c87a55d658b59f8a7f65a1d22c000f0 100644 (file)
@@ -1,67 +1,97 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
+import base64
+import json
+
 from .common import InfoExtractor
-from ..compat import compat_urlparse
+from ..compat import (
+    compat_urlparse,
+    compat_str,
+)
 from ..utils import (
-    remove_start,
-    sanitized_Request,
+    extract_attributes,
+    ExtractorError,
+    get_elements_by_class,
+    urlencode_postdata,
 )
 
 
 class EinthusanIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?einthusan\.com/movies/watch.php\?([^#]*?)id=(?P<id>[0-9]+)'
-    _TESTS = [
-        {
-            'url': 'http://www.einthusan.com/movies/watch.php?id=2447',
-            'md5': 'd71379996ff5b7f217eca034c34e3461',
-            'info_dict': {
-                'id': '2447',
-                'ext': 'mp4',
-                'title': 'Ek Villain',
-                'thumbnail': r're:^https?://.*\.jpg$',
-                'description': 'md5:9d29fc91a7abadd4591fb862fa560d93',
-            }
-        },
-        {
-            'url': 'http://www.einthusan.com/movies/watch.php?id=1671',
-            'md5': 'b16a6fd3c67c06eb7c79c8a8615f4213',
-            'info_dict': {
-                'id': '1671',
-                'ext': 'mp4',
-                'title': 'Soodhu Kavvuum',
-                'thumbnail': r're:^https?://.*\.jpg$',
-                'description': 'md5:b40f2bf7320b4f9414f3780817b2af8c',
-            }
-        },
-    ]
+    _VALID_URL = r'https?://einthusan\.tv/movie/watch/(?P<id>[^/?#&]+)'
+    _TESTS = [{
+        'url': 'https://einthusan.tv/movie/watch/9097/',
+        'md5': 'ff0f7f2065031b8a2cf13a933731c035',
+        'info_dict': {
+            'id': '9097',
+            'ext': 'mp4',
+            'title': 'Ae Dil Hai Mushkil',
+            'description': 'md5:33ef934c82a671a94652a9b4e54d931b',
+            'thumbnail': r're:^https?://.*\.jpg$',
+        }
+    }, {
+        'url': 'https://einthusan.tv/movie/watch/51MZ/?lang=hindi',
+        'only_matching': True,
+    }]
+
+    # reversed from jsoncrypto.prototype.decrypt() in einthusan-PGMovieWatcher.js
+    def _decrypt(self, encrypted_data, video_id):
+        return self._parse_json(base64.b64decode((
+            encrypted_data[:10] + encrypted_data[-1] + encrypted_data[12:-1]
+        ).encode('ascii')).decode('utf-8'), video_id)
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
 
-        request = sanitized_Request(url)
-        request.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 5.2; WOW64; rv:43.0) Gecko/20100101 Firefox/43.0')
-        webpage = self._download_webpage(request, video_id)
+        webpage = self._download_webpage(url, video_id)
+
+        title = self._html_search_regex(r'<h3>([^<]+)</h3>', webpage, 'title')
+
+        player_params = extract_attributes(self._search_regex(
+            r'(<section[^>]+id="UIVideoPlayer"[^>]+>)', webpage, 'player parameters'))
+
+        page_id = self._html_search_regex(
+            '<html[^>]+data-pageid="([^"]+)"', webpage, 'page ID')
+        video_data = self._download_json(
+            'https://einthusan.tv/ajax/movie/watch/%s/' % video_id, video_id,
+            data=urlencode_postdata({
+                'xEvent': 'UIVideoPlayer.PingOutcome',
+                'xJson': json.dumps({
+                    'EJOutcomes': player_params['data-ejpingables'],
+                    'NativeHLS': False
+                }),
+                'arcVersion': 3,
+                'appVersion': 59,
+                'gorilla.csrf.Token': page_id,
+            }))['Data']
+
+        if isinstance(video_data, compat_str) and video_data.startswith('/ratelimited/'):
+            raise ExtractorError(
+                'Download rate reached. Please try again later.', expected=True)
+
+        ej_links = self._decrypt(video_data['EJLinks'], video_id)
+
+        formats = []
 
-        title = self._html_search_regex(
-            r'<h1><a[^>]+class=["\']movie-title["\'][^>]*>(.+?)</a></h1>',
-            webpage, 'title')
+        m3u8_url = ej_links.get('HLSLink')
+        if m3u8_url:
+            formats.extend(self._extract_m3u8_formats(
+                m3u8_url, video_id, ext='mp4', entry_protocol='m3u8_native'))
 
-        video_id = self._search_regex(
-            r'data-movieid=["\'](\d+)', webpage, 'video id', default=video_id)
+        mp4_url = ej_links.get('MP4Link')
+        if mp4_url:
+            formats.append({
+                'url': mp4_url,
+            })
 
-        m3u8_url = self._download_webpage(
-            'http://cdn.einthusan.com/geturl/%s/hd/London,Washington,Toronto,Dallas,San,Sydney/'
-            % video_id, video_id, headers={'Referer': url})
-        formats = self._extract_m3u8_formats(
-            m3u8_url, video_id, ext='mp4', entry_protocol='m3u8_native')
+        self._sort_formats(formats)
 
-        description = self._html_search_meta('description', webpage)
+        description = get_elements_by_class('synopsis', webpage)[0]
         thumbnail = self._html_search_regex(
-            r'''<a class="movie-cover-wrapper".*?><img src=["'](.*?)["'].*?/></a>''',
-            webpage, "thumbnail url", fatal=False)
+            r'''<img[^>]+src=(["'])(?P<url>(?!\1).+?/moviecovers/(?!\1).+?)\1''',
+            webpage, 'thumbnail url', fatal=False, group='url')
         if thumbnail is not None:
-            thumbnail = compat_urlparse.urljoin(url, remove_start(thumbnail, '..'))
+            thumbnail = compat_urlparse.urljoin(url, thumbnail)
 
         return {
             'id': video_id,
index 74bbc5c51c576880465e76453604891b6b5f48ca..e0a13dd76cd4069ff40bcc4c771fd5b9eeb75498 100644 (file)
@@ -1,13 +1,9 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
-import json
-
 from .common import InfoExtractor
-from ..utils import (
-    ExtractorError,
-    NO_DEFAULT,
-)
+from .kaltura import KalturaIE
+from ..utils import NO_DEFAULT
 
 
 class EllenTVIE(InfoExtractor):
@@ -65,7 +61,7 @@ class EllenTVIE(InfoExtractor):
             if partner_id and kaltura_id:
                 break
 
-        return self.url_result('kaltura:%s:%s' % (partner_id, kaltura_id), 'Kaltura')
+        return self.url_result('kaltura:%s:%s' % (partner_id, kaltura_id), KalturaIE.ie_key())
 
 
 class EllenTVClipsIE(InfoExtractor):
@@ -77,14 +73,14 @@ class EllenTVClipsIE(InfoExtractor):
             'id': 'meryl-streep-vanessa-hudgens',
             'title': 'Meryl Streep, Vanessa Hudgens',
         },
-        'playlist_mincount': 7,
+        'playlist_mincount': 5,
     }
 
     def _real_extract(self, url):
         playlist_id = self._match_id(url)
 
         webpage = self._download_webpage(url, playlist_id)
-        playlist = self._extract_playlist(webpage)
+        playlist = self._extract_playlist(webpage, playlist_id)
 
         return {
             '_type': 'playlist',
@@ -93,16 +89,13 @@ class EllenTVClipsIE(InfoExtractor):
             'entries': self._extract_entries(playlist)
         }
 
-    def _extract_playlist(self, webpage):
+    def _extract_playlist(self, webpage, playlist_id):
         json_string = self._search_regex(r'playerView.addClips\(\[\{(.*?)\}\]\);', webpage, 'json')
-        try:
-            return json.loads('[{' + json_string + '}]')
-        except ValueError as ve:
-            raise ExtractorError('Failed to download JSON', cause=ve)
+        return self._parse_json('[{' + json_string + '}]', playlist_id)
 
     def _extract_entries(self, playlist):
         return [
             self.url_result(
                 'kaltura:%s:%s' % (item['kaltura_partner_id'], item['kaltura_entry_id']),
-                'Kaltura')
+                KalturaIE.ie_key(), video_id=item['kaltura_entry_id'])
             for item in playlist]
index 99e00cf3c68ea93fc00d5301e1e6be5567a72bff..b89f6db62fb283b360632200319d615ab0c2dfff 100644 (file)
@@ -39,6 +39,18 @@ class ElPaisIE(InfoExtractor):
             'description': 'La nave portaba cientos de ánforas y se hundió cerca de la isla de Cabrera por razones desconocidas',
             'upload_date': '20170127',
         },
+    }, {
+        'url': 'http://epv.elpais.com/epv/2017/02/14/programa_la_voz_de_inaki/1487062137_075943.html',
+        'info_dict': {
+            'id': '1487062137_075943',
+            'ext': 'mp4',
+            'title': 'Disyuntivas',
+            'description': 'md5:a0fb1485c4a6a8a917e6f93878e66218',
+            'upload_date': '20170214',
+        },
+        'params': {
+            'skip_download': True,
+        },
     }]
 
     def _real_extract(self, url):
@@ -59,14 +71,15 @@ class ElPaisIE(InfoExtractor):
         video_url = prefix + video_suffix
         thumbnail_suffix = self._search_regex(
             r"(?:URLMediaStill|urlFotogramaFijo_\d+)\s*=\s*url_cache\s*\+\s*'([^']+)'",
-            webpage, 'thumbnail URL', fatal=False)
+            webpage, 'thumbnail URL', default=None)
         thumbnail = (
             None if thumbnail_suffix is None
-            else prefix + thumbnail_suffix)
+            else prefix + thumbnail_suffix) or self._og_search_thumbnail(webpage)
         title = self._html_search_regex(
-            (r"tituloVideo\s*=\s*'([^']+)'", webpage, 'title',
-             r'<h2 class="entry-header entry-title.*?>(.*?)</h2>'),
-            webpage, 'title')
+            (r"tituloVideo\s*=\s*'([^']+)'",
+             r'<h2 class="entry-header entry-title.*?>(.*?)</h2>',
+             r'<h1[^>]+class="titulo"[^>]*>([^<]+)'),
+            webpage, 'title', default=None) or self._og_search_title(webpage)
         upload_date = unified_strdate(self._search_regex(
             r'<p class="date-header date-int updated"\s+title="([^"]+)">',
             webpage, 'upload date', default=None) or self._html_search_meta(
index 12cda36ccfc1088274a93377b976c0084f0e6c33..83a170fa708aa1c745f5abe9f095fba25a4e71e4 100644 (file)
@@ -202,6 +202,7 @@ from .commonprotocols import (
     RtmpIE,
 )
 from .condenast import CondeNastIE
+from .corus import CorusIE
 from .cracked import CrackedIE
 from .crackle import CrackleIE
 from .criterion import CriterionIE
@@ -381,10 +382,7 @@ from .heise import HeiseIE
 from .hellporno import HellPornoIE
 from .helsinki import HelsinkiIE
 from .hentaistigma import HentaiStigmaIE
-from .hgtv import (
-    HGTVIE,
-    HGTVComShowIE,
-)
+from .hgtv import HGTVComShowIE
 from .historicfilms import HistoricFilmsIE
 from .hitbox import HitboxIE, HitboxLiveIE
 from .hitrecord import HitRecordIE
@@ -696,6 +694,8 @@ from .ondemandkorea import OnDemandKoreaIE
 from .onet import (
     OnetIE,
     OnetChannelIE,
+    OnetMVPIE,
+    OnetPlIE,
 )
 from .onionstudios import OnionStudiosIE
 from .ooyala import (
@@ -838,6 +838,7 @@ from .sbs import SBSIE
 from .scivee import SciVeeIE
 from .screencast import ScreencastIE
 from .screencastomatic import ScreencastOMaticIE
+from .scrippsnetworks import ScrippsNetworksWatchIE
 from .seeker import SeekerIE
 from .senateisvp import SenateISVPIE
 from .sendtonews import SendtoNewsIE
@@ -851,6 +852,7 @@ from .shared import (
 from .showroomlive import ShowRoomLiveIE
 from .sina import SinaIE
 from .sixplay import SixPlayIE
+from .skylinewebcams import SkylineWebcamsIE
 from .skynewsarabia import (
     SkyNewsArabiaIE,
     SkyNewsArabiaArticleIE,
@@ -895,6 +897,7 @@ from .sport5 import Sport5IE
 from .sportbox import SportBoxEmbedIE
 from .sportdeutschland import SportDeutschlandIE
 from .sportschau import SportschauIE
+from .sprout import SproutIE
 from .srgssr import (
     SRGSSRIE,
     SRGSSRPlayIE,
@@ -1007,6 +1010,7 @@ from .tvc import (
 )
 from .tvigle import TvigleIE
 from .tvland import TVLandIE
+from .tvn24 import TVN24IE
 from .tvnoe import TVNoeIE
 from .tvp import (
     TVPEmbedIE,
@@ -1017,6 +1021,7 @@ from .tvplay import (
     TVPlayIE,
     ViafreeIE,
 )
+from .tvplayer import TVPlayerIE
 from .tweakers import TweakersIE
 from .twentyfourvideo import TwentyFourVideoIE
 from .twentymin import TwentyMinutenIE
@@ -1146,6 +1151,7 @@ from .vlive import (
     VLiveChannelIE
 )
 from .vodlocker import VodlockerIE
+from .vodpl import VODPlIE
 from .vodplatform import VODPlatformIE
 from .voicerepublic import VoiceRepublicIE
 from .voxmedia import VoxMediaIE
index b325c82004b8aedc612cf3656c54816dcaf48e94..70b8c95c5074dda5c553f1c1d1869165320a4707 100644 (file)
@@ -1,3 +1,4 @@
+# coding: utf-8
 from __future__ import unicode_literals
 
 import re
@@ -134,6 +135,46 @@ class FacebookIE(InfoExtractor):
             'upload_date': '20161030',
             'uploader': 'CNN',
         },
+    }, {
+        # bigPipe.onPageletArrive ... onPageletArrive pagelet_group_mall
+        'url': 'https://www.facebook.com/yaroslav.korpan/videos/1417995061575415/',
+        'info_dict': {
+            'id': '1417995061575415',
+            'ext': 'mp4',
+            'title': 'md5:a7b86ca673f51800cd54687b7f4012fe',
+            'timestamp': 1486648217,
+            'upload_date': '20170209',
+            'uploader': 'Yaroslav Korpan',
+        },
+        'params': {
+            'skip_download': True,
+        },
+    }, {
+        'url': 'https://www.facebook.com/LaGuiaDelVaron/posts/1072691702860471',
+        'info_dict': {
+            'id': '1072691702860471',
+            'ext': 'mp4',
+            'title': 'md5:ae2d22a93fbb12dad20dc393a869739d',
+            'timestamp': 1477305000,
+            'upload_date': '20161024',
+            'uploader': 'La Guía Del Varón',
+        },
+        'params': {
+            'skip_download': True,
+        },
+    }, {
+        'url': 'https://www.facebook.com/groups/1024490957622648/permalink/1396382447100162/',
+        'info_dict': {
+            'id': '1396382447100162',
+            'ext': 'mp4',
+            'title': 'md5:e2d2700afdf84e121f5d0f999bad13a3',
+            'timestamp': 1486035494,
+            'upload_date': '20170202',
+            'uploader': 'Elisabeth Ahtn',
+        },
+        'params': {
+            'skip_download': True,
+        },
     }, {
         'url': 'https://www.facebook.com/video.php?v=10204634152394104',
         'only_matching': True,
@@ -249,7 +290,7 @@ class FacebookIE(InfoExtractor):
             for item in instances:
                 if item[1][0] == 'VideoConfig':
                     video_item = item[2][0]
-                    if video_item.get('video_id') == video_id:
+                    if video_item.get('video_id'):
                         return video_item['videoData']
 
         server_js_data = self._parse_json(self._search_regex(
@@ -262,7 +303,7 @@ class FacebookIE(InfoExtractor):
         if not video_data:
             server_js_data = self._parse_json(
                 self._search_regex(
-                    r'bigPipe\.onPageletArrive\(({.+?})\)\s*;\s*}\s*\)\s*,\s*["\']onPageletArrive\s+stream_pagelet',
+                    r'bigPipe\.onPageletArrive\(({.+?})\)\s*;\s*}\s*\)\s*,\s*["\']onPageletArrive\s+(?:stream_pagelet|pagelet_group_mall)',
                     webpage, 'js data', default='{}'),
                 video_id, transform_source=js_to_json, fatal=False)
             if server_js_data:
index 1c233f038143bbdcda4630dc31c02faa0428bd6c..9868ca6d0b80397e6490315e8d4b3dab0b3c1bf2 100644 (file)
@@ -20,6 +20,7 @@ from ..utils import (
     float_or_none,
     HEADRequest,
     is_html,
+    js_to_json,
     orderedSet,
     sanitized_Request,
     smuggle_url,
@@ -961,6 +962,16 @@ class GenericIE(InfoExtractor):
                 'skip_download': True,
             }
         },
+        # Complex jwplayer
+        {
+            'url': 'http://www.indiedb.com/games/king-machine/videos',
+            'info_dict': {
+                'id': 'videos',
+                'ext': 'mp4',
+                'title': 'king machine trailer 1',
+                'thumbnail': r're:^https?://.*\.jpg$',
+            },
+        },
         # rtl.nl embed
         {
             'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen',
@@ -991,19 +1002,6 @@ class GenericIE(InfoExtractor):
                 'title': 'Os Guinness // Is It Fools Talk? // Unbelievable? Conference 2014',
             },
         },
-        # Kaltura embed protected with referrer
-        {
-            'url': 'http://www.disney.nl/disney-channel/filmpjes/achter-de-schermen#/videoId/violetta-achter-de-schermen-ruggero',
-            'info_dict': {
-                'id': '1_g4fbemnq',
-                'ext': 'mp4',
-                'title': 'Violetta - Achter De Schermen - Ruggero',
-                'description': 'Achter de schermen met Ruggero',
-                'timestamp': 1435133761,
-                'upload_date': '20150624',
-                'uploader_id': 'echojecka',
-            },
-        },
         # Kaltura embed with single quotes
         {
             'url': 'http://fod.infobase.com/p_ViewPlaylist.aspx?AssignmentID=NUN8ZY',
@@ -1503,7 +1501,12 @@ class GenericIE(InfoExtractor):
                 'skip_download': True,
             },
             'add_ie': [VideoPressIE.ie_key()],
-        }
+        },
+        {
+            # ThePlatform embedded with whitespaces in URLs
+            'url': 'http://www.golfchannel.com/topics/shows/golftalkcentral.htm',
+            'only_matching': True,
+        },
         # {
         #     # TODO: find another test
         #     # http://schema.org/VideoObject
@@ -2350,8 +2353,9 @@ class GenericIE(InfoExtractor):
                 'Channel': 'channel',
                 'ChannelList': 'channel_list',
             }
-            return self.url_result('limelight:%s:%s' % (
-                lm[mobj.group(1)], mobj.group(2)), 'Limelight%s' % mobj.group(1), mobj.group(2))
+            return self.url_result(smuggle_url('limelight:%s:%s' % (
+                lm[mobj.group(1)], mobj.group(2)), {'source_url': url}),
+                'Limelight%s' % mobj.group(1), mobj.group(2))
 
         mobj = re.search(
             r'''(?sx)
@@ -2361,7 +2365,9 @@ class GenericIE(InfoExtractor):
                         value=(["\'])(?:(?!\3).)*mediaId=(?P<id>[a-z0-9]{32})
             ''', webpage)
         if mobj:
-            return self.url_result('limelight:media:%s' % mobj.group('id'))
+            return self.url_result(smuggle_url(
+                'limelight:media:%s' % mobj.group('id'),
+                {'source_url': url}), 'LimelightMedia', mobj.group('id'))
 
         # Look for AdobeTVVideo embeds
         mobj = re.search(
@@ -2498,6 +2504,15 @@ class GenericIE(InfoExtractor):
                 self._sort_formats(entry['formats'])
             return self.playlist_result(entries)
 
+        jwplayer_data_str = self._find_jwplayer_data(webpage)
+        if jwplayer_data_str:
+            try:
+                jwplayer_data = self._parse_json(
+                    jwplayer_data_str, video_id, transform_source=js_to_json)
+                return self._parse_jwplayer_data(jwplayer_data, video_id)
+            except ExtractorError:
+                pass
+
         def check_video(vurl):
             if YoutubeIE.suitable(vurl):
                 return True
index a34779b169ddf852d3378389f07189c1b051d38c..21ed846b25c16df23de22897b1110fb0ab6ff6dd 100644 (file)
@@ -3,7 +3,7 @@ from __future__ import unicode_literals
 
 import re
 
-from .common import InfoExtractor
+from .adobepass import AdobePassIE
 from ..utils import (
     int_or_none,
     determine_ext,
@@ -13,15 +13,30 @@ from ..utils import (
 )
 
 
-class GoIE(InfoExtractor):
-    _BRANDS = {
-        'abc': '001',
-        'freeform': '002',
-        'watchdisneychannel': '004',
-        'watchdisneyjunior': '008',
-        'watchdisneyxd': '009',
+class GoIE(AdobePassIE):
+    _SITE_INFO = {
+        'abc': {
+            'brand': '001',
+            'requestor_id': 'ABC',
+        },
+        'freeform': {
+            'brand': '002',
+            'requestor_id': 'ABCFamily',
+        },
+        'watchdisneychannel': {
+            'brand': '004',
+            'requestor_id': 'Disney',
+        },
+        'watchdisneyjunior': {
+            'brand': '008',
+            'requestor_id': 'DisneyJunior',
+        },
+        'watchdisneyxd': {
+            'brand': '009',
+            'requestor_id': 'DisneyXD',
+        }
     }
-    _VALID_URL = r'https?://(?:(?P<sub_domain>%s)\.)?go\.com/(?:[^/]+/)*(?:vdka(?P<id>\w+)|season-\d+/\d+-(?P<display_id>[^/?#]+))' % '|'.join(_BRANDS.keys())
+    _VALID_URL = r'https?://(?:(?P<sub_domain>%s)\.)?go\.com/(?:[^/]+/)*(?:vdka(?P<id>\w+)|season-\d+/\d+-(?P<display_id>[^/?#]+))' % '|'.join(_SITE_INFO.keys())
     _TESTS = [{
         'url': 'http://abc.go.com/shows/castle/video/most-recent/vdka0_g86w5onx',
         'info_dict': {
@@ -47,7 +62,8 @@ class GoIE(InfoExtractor):
                 # There may be inner quotes, e.g. data-video-id="'VDKA3609139'"
                 # from http://freeform.go.com/shows/shadowhunters/episodes/season-2/1-this-guilty-blood
                 r'data-video-id=["\']*VDKA(\w+)', webpage, 'video id')
-        brand = self._BRANDS[sub_domain]
+        site_info = self._SITE_INFO[sub_domain]
+        brand = site_info['brand']
         video_data = self._download_json(
             'http://api.contents.watchabc.go.com/vp2/ws/contents/3000/videos/%s/001/-1/-1/-1/%s/-1/-1.json' % (brand, video_id),
             video_id)['video'][0]
@@ -62,28 +78,60 @@ class GoIE(InfoExtractor):
             ext = determine_ext(asset_url)
             if ext == 'm3u8':
                 video_type = video_data.get('type')
-                if video_type == 'lf':
-                    entitlement = self._download_json(
-                        'https://api.entitlement.watchabc.go.com/vp2/ws-secure/entitlement/2020/authorize.json',
-                        video_id, data=urlencode_postdata({
-                            'video_id': video_data['id'],
-                            'video_type': video_type,
-                            'brand': brand,
-                            'device': '001',
-                        }))
-                    errors = entitlement.get('errors', {}).get('errors', [])
-                    if errors:
-                        error_message = ', '.join([error['message'] for error in errors])
-                        raise ExtractorError('%s said: %s' % (self.IE_NAME, error_message), expected=True)
-                    asset_url += '?' + entitlement['uplynkData']['sessionKey']
+                data = {
+                    'video_id': video_data['id'],
+                    'video_type': video_type,
+                    'brand': brand,
+                    'device': '001',
+                }
+                if video_data.get('accesslevel') == '1':
+                    requestor_id = site_info['requestor_id']
+                    resource = self._get_mvpd_resource(
+                        requestor_id, title, video_id, None)
+                    auth = self._extract_mvpd_auth(
+                        url, video_id, requestor_id, resource)
+                    data.update({
+                        'token': auth,
+                        'token_type': 'ap',
+                        'adobe_requestor_id': requestor_id,
+                    })
+                else:
+                    self._initialize_geo_bypass(['US'])
+                entitlement = self._download_json(
+                    'https://api.entitlement.watchabc.go.com/vp2/ws-secure/entitlement/2020/authorize.json',
+                    video_id, data=urlencode_postdata(data), headers=self.geo_verification_headers())
+                errors = entitlement.get('errors', {}).get('errors', [])
+                if errors:
+                    for error in errors:
+                        if error.get('code') == 1002:
+                            self.raise_geo_restricted(
+                                error['message'], countries=['US'])
+                    error_message = ', '.join([error['message'] for error in errors])
+                    raise ExtractorError('%s said: %s' % (self.IE_NAME, error_message), expected=True)
+                asset_url += '?' + entitlement['uplynkData']['sessionKey']
                 formats.extend(self._extract_m3u8_formats(
                     asset_url, video_id, 'mp4', m3u8_id=format_id or 'hls', fatal=False))
             else:
-                formats.append({
+                f = {
                     'format_id': format_id,
                     'url': asset_url,
                     'ext': ext,
-                })
+                }
+                if re.search(r'(?:/mp4/source/|_source\.mp4)', asset_url):
+                    f.update({
+                        'format_id': ('%s-' % format_id if format_id else '') + 'SOURCE',
+                        'preference': 1,
+                    })
+                else:
+                    mobj = re.search(r'/(\d+)x(\d+)/', asset_url)
+                    if mobj:
+                        height = int(mobj.group(2))
+                        f.update({
+                            'format_id': ('%s-' % format_id if format_id else '') + '%dP' % height,
+                            'width': int(mobj.group(1)),
+                            'height': height,
+                        })
+                formats.append(f)
         self._sort_formats(formats)
 
         subtitles = {}
index 1629cdb8d5a7ca584321474cb160f9907884dd69..382f32771db513fc1c41e81f14ce42308ad50276 100644 (file)
@@ -6,59 +6,58 @@ from ..utils import (
     determine_ext,
     int_or_none,
     parse_iso8601,
+    xpath_text,
 )
 
 
 class HeiseIE(InfoExtractor):
-    _VALID_URL = r'''(?x)
-        https?://(?:www\.)?heise\.de/video/artikel/
-        .+?(?P<id>[0-9]+)\.html(?:$|[?#])
-    '''
-    _TEST = {
-        'url': (
-            'http://www.heise.de/video/artikel/Podcast-c-t-uplink-3-3-Owncloud-Tastaturen-Peilsender-Smartphone-2404147.html'
-        ),
+    _VALID_URL = r'https?://(?:www\.)?heise\.de/(?:[^/]+/)+[^/]+-(?P<id>[0-9]+)\.html'
+    _TESTS = [{
+        'url': 'http://www.heise.de/video/artikel/Podcast-c-t-uplink-3-3-Owncloud-Tastaturen-Peilsender-Smartphone-2404147.html',
         'md5': 'ffed432483e922e88545ad9f2f15d30e',
         'info_dict': {
             'id': '2404147',
             'ext': 'mp4',
-            'title': (
-                "Podcast: c't uplink 3.3 – Owncloud / Tastaturen / Peilsender Smartphone"
-            ),
+            'title': "Podcast: c't uplink 3.3 – Owncloud / Tastaturen / Peilsender Smartphone",
             'format_id': 'mp4_720p',
             'timestamp': 1411812600,
             'upload_date': '20140927',
-            'description': 'In uplink-Episode 3.3 geht es darum, wie man sich von Cloud-Anbietern emanzipieren kann, worauf man beim Kauf einer Tastatur achten sollte und was Smartphones über uns verraten.',
-            'thumbnail': r're:^https?://.*\.jpe?g$',
+            'description': 'md5:c934cbfb326c669c2bcabcbe3d3fcd20',
+            'thumbnail': r're:^https?://.*/gallery/$',
         }
-    }
+    }, {
+        'url': 'http://www.heise.de/ct/artikel/c-t-uplink-3-3-Owncloud-Tastaturen-Peilsender-Smartphone-2403911.html',
+        'only_matching': True,
+    }, {
+        'url': 'http://www.heise.de/newsticker/meldung/c-t-uplink-Owncloud-Tastaturen-Peilsender-Smartphone-2404251.html?wt_mc=rss.ho.beitrag.atom',
+        'only_matching': True,
+    }, {
+        'url': 'http://www.heise.de/ct/ausgabe/2016-12-Spiele-3214137.html',
+        'only_matching': True,
+    }]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
 
         container_id = self._search_regex(
-            r'<div class="videoplayerjw".*?data-container="([0-9]+)"',
+            r'<div class="videoplayerjw"[^>]+data-container="([0-9]+)"',
             webpage, 'container ID')
         sequenz_id = self._search_regex(
-            r'<div class="videoplayerjw".*?data-sequenz="([0-9]+)"',
+            r'<div class="videoplayerjw"[^>]+data-sequenz="([0-9]+)"',
             webpage, 'sequenz ID')
-        data_url = 'http://www.heise.de/videout/feed?container=%s&sequenz=%s' % (container_id, sequenz_id)
-        doc = self._download_xml(data_url, video_id)
 
-        info = {
-            'id': video_id,
-            'thumbnail': self._og_search_thumbnail(webpage),
-            'timestamp': parse_iso8601(
-                self._html_search_meta('date', webpage)),
-            'description': self._og_search_description(webpage),
-        }
+        title = self._html_search_meta('fulltitle', webpage, default=None)
+        if not title or title == "c't":
+            title = self._search_regex(
+                r'<div[^>]+class="videoplayerjw"[^>]+data-title="([^"]+)"',
+                webpage, 'title')
 
-        title = self._html_search_meta('fulltitle', webpage)
-        if title:
-            info['title'] = title
-        else:
-            info['title'] = self._og_search_title(webpage)
+        doc = self._download_xml(
+            'http://www.heise.de/videout/feed', video_id, query={
+                'container': container_id,
+                'sequenz': sequenz_id,
+            })
 
         formats = []
         for source_node in doc.findall('.//{http://rss.jwpcdn.com/}source'):
@@ -74,6 +73,18 @@ class HeiseIE(InfoExtractor):
                 'height': height,
             })
         self._sort_formats(formats)
-        info['formats'] = formats
 
-        return info
+        description = self._og_search_description(
+            webpage, default=None) or self._html_search_meta(
+            'description', webpage)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'thumbnail': (xpath_text(doc, './/{http://rss.jwpcdn.com/}image') or
+                          self._og_search_thumbnail(webpage)),
+            'timestamp': parse_iso8601(
+                self._html_search_meta('date', webpage)),
+            'formats': formats,
+        }
index 69543bff2cb3c844b8ebe82d38cd33672e168bbe..e854300c71b6b5c2592f9c9c84b386831578c482 100644 (file)
@@ -2,50 +2,6 @@
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
-from ..utils import (
-    int_or_none,
-    js_to_json,
-    smuggle_url,
-)
-
-
-class HGTVIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?hgtv\.ca/[^/]+/video/(?P<id>[^/]+)/video.html'
-    _TEST = {
-        'url': 'http://www.hgtv.ca/homefree/video/overnight-success/video.html?v=738081859718&p=1&s=da#video',
-        'md5': '',
-        'info_dict': {
-            'id': 'aFH__I_5FBOX',
-            'ext': 'mp4',
-            'title': 'Overnight Success',
-            'description': 'After weeks of hard work, high stakes, breakdowns and pep talks, the final 2 contestants compete to win the ultimate dream.',
-            'uploader': 'SHWM-NEW',
-            'timestamp': 1470320034,
-            'upload_date': '20160804',
-        },
-        'params': {
-            # m3u8 download
-            'skip_download': True,
-        },
-    }
-
-    def _real_extract(self, url):
-        display_id = self._match_id(url)
-        webpage = self._download_webpage(url, display_id)
-        embed_vars = self._parse_json(self._search_regex(
-            r'(?s)embed_vars\s*=\s*({.*?});',
-            webpage, 'embed vars'), display_id, js_to_json)
-        return {
-            '_type': 'url_transparent',
-            'url': smuggle_url(
-                'http://link.theplatform.com/s/dtjsEC/%s?mbr=true&manifest=m3u' % embed_vars['pid'], {
-                    'force_smil_url': True
-                }),
-            'series': embed_vars.get('show'),
-            'season_number': int_or_none(embed_vars.get('season')),
-            'episode_number': int_or_none(embed_vars.get('episode')),
-            'ie_key': 'ThePlatform',
-        }
 
 
 class HGTVComShowIE(InfoExtractor):
index f05d765d650f540abe7f684037cc957ff2ab0b77..3a7a66a343992879992d677af8f9c318dbd433ff 100644 (file)
@@ -34,11 +34,9 @@ class HotStarIE(InfoExtractor):
         'only_matching': True,
     }]
 
-    _GET_CONTENT_TEMPLATE = 'http://account.hotstar.com/AVS/besc?action=GetAggregatedContentDetails&channel=PCTV&contentId=%s'
-    _GET_CDN_TEMPLATE = 'http://getcdn.hotstar.com/AVS/besc?action=GetCDN&asJson=Y&channel=%s&id=%s&type=%s'
-
-    def _download_json(self, url_or_request, video_id, note='Downloading JSON metadata', fatal=True):
-        json_data = super(HotStarIE, self)._download_json(url_or_request, video_id, note, fatal=fatal)
+    def _download_json(self, url_or_request, video_id, note='Downloading JSON metadata', fatal=True, query=None):
+        json_data = super(HotStarIE, self)._download_json(
+            url_or_request, video_id, note, fatal=fatal, query=query)
         if json_data['resultCode'] != 'OK':
             if fatal:
                 raise ExtractorError(json_data['errorDescription'])
@@ -48,20 +46,37 @@ class HotStarIE(InfoExtractor):
     def _real_extract(self, url):
         video_id = self._match_id(url)
         video_data = self._download_json(
-            self._GET_CONTENT_TEMPLATE % video_id,
-            video_id)['contentInfo'][0]
+            'http://account.hotstar.com/AVS/besc', video_id, query={
+                'action': 'GetAggregatedContentDetails',
+                'channel': 'PCTV',
+                'contentId': video_id,
+            })['contentInfo'][0]
+        title = video_data['episodeTitle']
+
+        if video_data.get('encrypted') == 'Y':
+            raise ExtractorError('This video is DRM protected.', expected=True)
 
         formats = []
-        # PCTV for extracting f4m manifest
-        for f in ('TABLET',):
+        for f in ('JIO',):
             format_data = self._download_json(
-                self._GET_CDN_TEMPLATE % (f, video_id, 'VOD'),
-                video_id, 'Downloading %s JSON metadata' % f, fatal=False)
+                'http://getcdn.hotstar.com/AVS/besc',
+                video_id, 'Downloading %s JSON metadata' % f,
+                fatal=False, query={
+                    'action': 'GetCDN',
+                    'asJson': 'Y',
+                    'channel': f,
+                    'id': video_id,
+                    'type': 'VOD',
+                })
             if format_data:
-                format_url = format_data['src']
+                format_url = format_data.get('src')
+                if not format_url:
+                    continue
                 ext = determine_ext(format_url)
                 if ext == 'm3u8':
-                    formats.extend(self._extract_m3u8_formats(format_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
+                    formats.extend(self._extract_m3u8_formats(
+                        format_url, video_id, 'mp4',
+                        m3u8_id='hls', fatal=False))
                 elif ext == 'f4m':
                     # produce broken files
                     continue
@@ -75,9 +90,12 @@ class HotStarIE(InfoExtractor):
 
         return {
             'id': video_id,
-            'title': video_data['episodeTitle'],
+            'title': title,
             'description': video_data.get('description'),
             'duration': int_or_none(video_data.get('duration')),
             'timestamp': int_or_none(video_data.get('broadcastDate')),
             'formats': formats,
+            'episode': title,
+            'episode_number': int_or_none(video_data.get('episodeNumber')),
+            'series': video_data.get('contentTitle'),
         }
index 98f408c18650cf8393869432a861a3486575b533..c1921cbcfa35c677c365ed9423c8f6b0e74fb8b3 100644 (file)
@@ -3,6 +3,7 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
+from ..compat import compat_str
 from ..utils import (
     get_element_by_attribute,
     int_or_none,
@@ -50,6 +51,33 @@ class InstagramIE(InfoExtractor):
         'params': {
             'skip_download': True,
         },
+    }, {
+        # multi video post
+        'url': 'https://www.instagram.com/p/BQ0eAlwhDrw/',
+        'playlist': [{
+            'info_dict': {
+                'id': 'BQ0dSaohpPW',
+                'ext': 'mp4',
+                'title': 'Video 1',
+            },
+        }, {
+            'info_dict': {
+                'id': 'BQ0dTpOhuHT',
+                'ext': 'mp4',
+                'title': 'Video 2',
+            },
+        }, {
+            'info_dict': {
+                'id': 'BQ0dT7RBFeF',
+                'ext': 'mp4',
+                'title': 'Video 3',
+            },
+        }],
+        'info_dict': {
+            'id': 'BQ0eAlwhDrw',
+            'title': 'Post by instagram',
+            'description': 'md5:0f9203fc6a2ce4d228da5754bcf54957',
+        },
     }, {
         'url': 'https://instagram.com/p/-Cmh1cukG2/',
         'only_matching': True,
@@ -113,6 +141,32 @@ class InstagramIE(InfoExtractor):
                     'timestamp': int_or_none(comment.get('created_at')),
                 } for comment in media.get(
                     'comments', {}).get('nodes', []) if comment.get('text')]
+                if not video_url:
+                    edges = try_get(
+                        media, lambda x: x['edge_sidecar_to_children']['edges'],
+                        list) or []
+                    if edges:
+                        entries = []
+                        for edge_num, edge in enumerate(edges, start=1):
+                            node = try_get(edge, lambda x: x['node'], dict)
+                            if not node:
+                                continue
+                            node_video_url = try_get(node, lambda x: x['video_url'], compat_str)
+                            if not node_video_url:
+                                continue
+                            entries.append({
+                                'id': node.get('shortcode') or node['id'],
+                                'title': 'Video %d' % edge_num,
+                                'url': node_video_url,
+                                'thumbnail': node.get('display_url'),
+                                'width': int_or_none(try_get(node, lambda x: x['dimensions']['width'])),
+                                'height': int_or_none(try_get(node, lambda x: x['dimensions']['height'])),
+                                'view_count': int_or_none(node.get('video_view_count')),
+                            })
+                        return self.playlist_result(
+                            entries, video_id,
+                            'Post by %s' % uploader_id if uploader_id else None,
+                            description)
 
         if not video_url:
             video_url = self._og_search_video_url(webpage, secure=False)
index 0fe5768834cef9faed9226ebc8418661306f2b54..a29e6a5badd2ef0403a2b8b4afe1c1ef2f926d4c 100644 (file)
@@ -8,12 +8,12 @@ from .common import InfoExtractor
 from ..utils import (
     determine_ext,
     js_to_json,
-    sanitized_Request,
 )
 
 
 class IPrimaIE(InfoExtractor):
     _VALID_URL = r'https?://play\.iprima\.cz/(?:.+/)?(?P<id>[^?#]+)'
+    _GEO_BYPASS = False
 
     _TESTS = [{
         'url': 'http://play.iprima.cz/gondici-s-r-o-33',
@@ -29,6 +29,10 @@ class IPrimaIE(InfoExtractor):
     }, {
         'url': 'http://play.iprima.cz/particka/particka-92',
         'only_matching': True,
+    }, {
+        # geo restricted
+        'url': 'http://play.iprima.cz/closer-nove-pripady/closer-nove-pripady-iv-1',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
@@ -38,11 +42,13 @@ class IPrimaIE(InfoExtractor):
 
         video_id = self._search_regex(r'data-product="([^"]+)">', webpage, 'real id')
 
-        req = sanitized_Request(
-            'http://play.iprima.cz/prehravac/init?_infuse=1'
-            '&_ts=%s&productId=%s' % (round(time.time()), video_id))
-        req.add_header('Referer', url)
-        playerpage = self._download_webpage(req, video_id, note='Downloading player')
+        playerpage = self._download_webpage(
+            'http://play.iprima.cz/prehravac/init',
+            video_id, note='Downloading player', query={
+                '_infuse': 1,
+                '_ts': round(time.time()),
+                'productId': video_id,
+            }, headers={'Referer': url})
 
         formats = []
 
@@ -82,7 +88,7 @@ class IPrimaIE(InfoExtractor):
                 extract_formats(src)
 
         if not formats and '>GEO_IP_NOT_ALLOWED<' in playerpage:
-            self.raise_geo_restricted()
+            self.raise_geo_restricted(countries=['CZ'])
 
         self._sort_formats(formats)
 
index 01c7b30428f8a750c9932b0ea734f795c09866d6..2af6a6db4da0dd5186436672ae95905c9612df6a 100644 (file)
@@ -173,11 +173,12 @@ class IqiyiIE(InfoExtractor):
         }
     }, {
         'url': 'http://www.iqiyi.com/v_19rrhnnclk.html',
-        'md5': '667171934041350c5de3f5015f7f1152',
+        'md5': 'b7dc800a4004b1b57749d9abae0472da',
         'info_dict': {
             'id': 'e3f585b550a280af23c98b6cb2be19fb',
             'ext': 'mp4',
-            'title': '名侦探柯南 国语版:第752集 迫近灰原秘密的黑影 下篇',
+            # This can be either Simplified Chinese or Traditional Chinese
+            'title': r're:^(?:名侦探柯南 国语版:第752集 迫近灰原秘密的黑影 下篇|名偵探柯南 國語版:第752集 迫近灰原秘密的黑影 下篇)$',
         },
         'skip': 'Geo-restricted to China',
     }, {
index b0d8604526abc29b4b0a09eb59068561f17312a8..021c6b2787d8748544a1e2eb2ccd6161ffcd8266 100644 (file)
@@ -24,6 +24,7 @@ from ..utils import (
 
 class ITVIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?itv\.com/hub/[^/]+/(?P<id>[0-9a-zA-Z]+)'
+    _GEO_COUNTRIES = ['GB']
     _TEST = {
         'url': 'http://www.itv.com/hub/mr-bean-animated-series/2a2936a0053',
         'info_dict': {
@@ -98,7 +99,11 @@ class ITVIE(InfoExtractor):
             headers=headers, data=etree.tostring(req_env))
         playlist = xpath_element(resp_env, './/Playlist')
         if playlist is None:
+            fault_code = xpath_text(resp_env, './/faultcode')
             fault_string = xpath_text(resp_env, './/faultstring')
+            if fault_code == 'InvalidGeoRegion':
+                self.raise_geo_restricted(
+                    msg=fault_string, countries=self._GEO_COUNTRIES)
             raise ExtractorError('%s said: %s' % (self.IE_NAME, fault_string))
         title = xpath_text(playlist, 'EpisodeTitle', fatal=True)
         video_element = xpath_element(playlist, 'VideoEntries/Video', fatal=True)
index 3d3c15024457e30d2002a3ee19e6eeab8a29ee4d..cb51cef2d45f9096a50a4716874219abaae63642 100644 (file)
@@ -16,6 +16,8 @@ class IviIE(InfoExtractor):
     IE_DESC = 'ivi.ru'
     IE_NAME = 'ivi'
     _VALID_URL = r'https?://(?:www\.)?ivi\.ru/(?:watch/(?:[^/]+/)?|video/player\?.*?videoId=)(?P<id>\d+)'
+    _GEO_BYPASS = False
+    _GEO_COUNTRIES = ['RU']
 
     _TESTS = [
         # Single movie
@@ -91,7 +93,11 @@ class IviIE(InfoExtractor):
 
         if 'error' in video_json:
             error = video_json['error']
-            if error['origin'] == 'NoRedisValidData':
+            origin = error['origin']
+            if origin == 'NotAllowedForLocation':
+                self.raise_geo_restricted(
+                    msg=error['message'], countries=self._GEO_COUNTRIES)
+            elif origin == 'NoRedisValidData':
                 raise ExtractorError('Video %s does not exist' % video_id, expected=True)
             raise ExtractorError(
                 'Unable to download video %s: %s' % (video_id, error['message']),
index aff7ab49a9500c8bdabe78fac393eb30ef827db5..33d55f7706d79e0b87a9830ae9abce3de6f33826 100644 (file)
@@ -4,139 +4,9 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..compat import compat_urlparse
-from ..utils import (
-    determine_ext,
-    float_or_none,
-    int_or_none,
-    js_to_json,
-    mimetype2ext,
-    urljoin,
-)
 
 
-class JWPlatformBaseIE(InfoExtractor):
-    @staticmethod
-    def _find_jwplayer_data(webpage):
-        # TODO: Merge this with JWPlayer-related codes in generic.py
-
-        mobj = re.search(
-            r'jwplayer\((?P<quote>[\'"])[^\'" ]+(?P=quote)\)\.setup\s*\((?P<options>[^)]+)\)',
-            webpage)
-        if mobj:
-            return mobj.group('options')
-
-    def _extract_jwplayer_data(self, webpage, video_id, *args, **kwargs):
-        jwplayer_data = self._parse_json(
-            self._find_jwplayer_data(webpage), video_id,
-            transform_source=js_to_json)
-        return self._parse_jwplayer_data(
-            jwplayer_data, video_id, *args, **kwargs)
-
-    def _parse_jwplayer_data(self, jwplayer_data, video_id=None, require_title=True,
-                             m3u8_id=None, mpd_id=None, rtmp_params=None, base_url=None):
-        # JWPlayer backward compatibility: flattened playlists
-        # https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/api/config.js#L81-L96
-        if 'playlist' not in jwplayer_data:
-            jwplayer_data = {'playlist': [jwplayer_data]}
-
-        entries = []
-
-        # JWPlayer backward compatibility: single playlist item
-        # https://github.com/jwplayer/jwplayer/blob/v7.7.0/src/js/playlist/playlist.js#L10
-        if not isinstance(jwplayer_data['playlist'], list):
-            jwplayer_data['playlist'] = [jwplayer_data['playlist']]
-
-        for video_data in jwplayer_data['playlist']:
-            # JWPlayer backward compatibility: flattened sources
-            # https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/playlist/item.js#L29-L35
-            if 'sources' not in video_data:
-                video_data['sources'] = [video_data]
-
-            this_video_id = video_id or video_data['mediaid']
-
-            formats = []
-            for source in video_data['sources']:
-                source_url = self._proto_relative_url(source['file'])
-                if base_url:
-                    source_url = compat_urlparse.urljoin(base_url, source_url)
-                source_type = source.get('type') or ''
-                ext = mimetype2ext(source_type) or determine_ext(source_url)
-                if source_type == 'hls' or ext == 'm3u8':
-                    formats.extend(self._extract_m3u8_formats(
-                        source_url, this_video_id, 'mp4', 'm3u8_native', m3u8_id=m3u8_id, fatal=False))
-                elif ext == 'mpd':
-                    formats.extend(self._extract_mpd_formats(
-                        source_url, this_video_id, mpd_id=mpd_id, fatal=False))
-                # https://github.com/jwplayer/jwplayer/blob/master/src/js/providers/default.js#L67
-                elif source_type.startswith('audio') or ext in ('oga', 'aac', 'mp3', 'mpeg', 'vorbis'):
-                    formats.append({
-                        'url': source_url,
-                        'vcodec': 'none',
-                        'ext': ext,
-                    })
-                else:
-                    height = int_or_none(source.get('height'))
-                    if height is None:
-                        # Often no height is provided but there is a label in
-                        # format like 1080p.
-                        height = int_or_none(self._search_regex(
-                            r'^(\d{3,})[pP]$', source.get('label') or '',
-                            'height', default=None))
-                    a_format = {
-                        'url': source_url,
-                        'width': int_or_none(source.get('width')),
-                        'height': height,
-                        'ext': ext,
-                    }
-                    if source_url.startswith('rtmp'):
-                        a_format['ext'] = 'flv'
-
-                        # See com/longtailvideo/jwplayer/media/RTMPMediaProvider.as
-                        # of jwplayer.flash.swf
-                        rtmp_url_parts = re.split(
-                            r'((?:mp4|mp3|flv):)', source_url, 1)
-                        if len(rtmp_url_parts) == 3:
-                            rtmp_url, prefix, play_path = rtmp_url_parts
-                            a_format.update({
-                                'url': rtmp_url,
-                                'play_path': prefix + play_path,
-                            })
-                        if rtmp_params:
-                            a_format.update(rtmp_params)
-                    formats.append(a_format)
-            self._sort_formats(formats)
-
-            subtitles = {}
-            tracks = video_data.get('tracks')
-            if tracks and isinstance(tracks, list):
-                for track in tracks:
-                    if track.get('kind') != 'captions':
-                        continue
-                    track_url = urljoin(base_url, track.get('file'))
-                    if not track_url:
-                        continue
-                    subtitles.setdefault(track.get('label') or 'en', []).append({
-                        'url': self._proto_relative_url(track_url)
-                    })
-
-            entries.append({
-                'id': this_video_id,
-                'title': video_data['title'] if require_title else video_data.get('title'),
-                'description': video_data.get('description'),
-                'thumbnail': self._proto_relative_url(video_data.get('image')),
-                'timestamp': int_or_none(video_data.get('pubdate')),
-                'duration': float_or_none(jwplayer_data.get('duration') or video_data.get('duration')),
-                'subtitles': subtitles,
-                'formats': formats,
-            })
-        if len(entries) == 1:
-            return entries[0]
-        else:
-            return self.playlist_result(entries)
-
-
-class JWPlatformIE(JWPlatformBaseIE):
+class JWPlatformIE(InfoExtractor):
     _VALID_URL = r'(?:https?://content\.jwplatform\.com/(?:feeds|players|jw6)/|jwplatform:)(?P<id>[a-zA-Z0-9]{8})'
     _TEST = {
         'url': 'http://content.jwplatform.com/players/nPripu9l-ALJ3XQCI.js',
index 5ef382f9f730091c079ab5083e0ab87f4677c407..54374ea7671396f5f2cf8f7fe1b2aab0b24ec78b 100644 (file)
@@ -23,11 +23,11 @@ class KalturaIE(InfoExtractor):
                 (?:
                     kaltura:(?P<partner_id>\d+):(?P<id>[0-9a-z_]+)|
                     https?://
-                        (:?(?:www|cdnapi(?:sec)?)\.)?kaltura\.com/
+                        (:?(?:www|cdnapi(?:sec)?)\.)?kaltura\.com(?::\d+)?/
                         (?:
                             (?:
                                 # flash player
-                                index\.php/kwidget|
+                                index\.php/(?:kwidget|extwidget/preview)|
                                 # html5 player
                                 html5/html5lib/[^/]+/mwEmbedFrame\.php
                             )
@@ -94,6 +94,14 @@ class KalturaIE(InfoExtractor):
             'params': {
                 'skip_download': True,
             },
+        },
+        {
+            'url': 'https://www.kaltura.com/index.php/extwidget/preview/partner_id/1770401/uiconf_id/37307382/entry_id/0_58u8kme7/embed/iframe?&flashvars[streamerType]=auto',
+            'only_matching': True,
+        },
+        {
+            'url': 'https://www.kaltura.com:443/index.php/extwidget/preview/partner_id/1770401/uiconf_id/37307382/entry_id/0_58u8kme7/embed/iframe?&flashvars[streamerType]=auto',
+            'only_matching': True,
         }
     ]
 
@@ -112,7 +120,7 @@ class KalturaIE(InfoExtractor):
             re.search(
                 r'''(?xs)
                     (?P<q1>["\'])
-                        (?:https?:)?//cdnapi(?:sec)?\.kaltura\.com/(?:(?!(?P=q1)).)*(?:p|partner_id)/(?P<partner_id>\d+)(?:(?!(?P=q1)).)*
+                        (?:https?:)?//cdnapi(?:sec)?\.kaltura\.com(?::\d+)?/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+)(?:(?!(?P=q1)).)*
                     (?P=q1).*?
                     (?:
                         entry_?[Ii]d|
@@ -209,6 +217,8 @@ class KalturaIE(InfoExtractor):
                 partner_id = params['wid'][0][1:]
             elif 'p' in params:
                 partner_id = params['p'][0]
+            elif 'partner_id' in params:
+                partner_id = params['partner_id'][0]
             else:
                 raise ExtractorError('Invalid URL', expected=True)
             if 'entry_id' in params:
index 4321f90c87febbf44b4bedec97a0ba3d6a3e3b49..9eda956d25aa32dc6adf6186e2bde15f6cdcca8d 100644 (file)
@@ -30,7 +30,7 @@ from ..utils import (
 class LeIE(InfoExtractor):
     IE_DESC = '乐视网'
     _VALID_URL = r'https?://(?:www\.le\.com/ptv/vplay|(?:sports\.le|(?:www\.)?lesports)\.com/(?:match|video))/(?P<id>\d+)\.html'
-
+    _GEO_COUNTRIES = ['CN']
     _URL_TEMPLATE = 'http://www.le.com/ptv/vplay/%s.html'
 
     _TESTS = [{
@@ -126,10 +126,9 @@ class LeIE(InfoExtractor):
         if playstatus['status'] == 0:
             flag = playstatus['flag']
             if flag == 1:
-                msg = 'Country %s auth error' % playstatus['country']
+                self.raise_geo_restricted()
             else:
-                msg = 'Generic error. flag = %d' % flag
-            raise ExtractorError(msg, expected=True)
+                raise ExtractorError('Generic error. flag = %d' % flag, expected=True)
 
     def _real_extract(self, url):
         media_id = self._match_id(url)
index 42568f315ed1b6818907f1236705ebdeed2c02cb..3306892e8de1d9f83e7fc2ea51a3d248a39edfb6 100644 (file)
@@ -7,20 +7,40 @@ class LemondeIE(InfoExtractor):
     _VALID_URL = r'https?://(?:.+?\.)?lemonde\.fr/(?:[^/]+/)*(?P<id>[^/]+)\.html'
     _TESTS = [{
         'url': 'http://www.lemonde.fr/police-justice/video/2016/01/19/comprendre-l-affaire-bygmalion-en-cinq-minutes_4849702_1653578.html',
-        'md5': '01fb3c92de4c12c573343d63e163d302',
+        'md5': 'da120c8722d8632eec6ced937536cc98',
         'info_dict': {
             'id': 'lqm3kl',
             'ext': 'mp4',
             'title': "Comprendre l'affaire Bygmalion en 5 minutes",
             'thumbnail': r're:^https?://.*\.jpg',
-            'duration': 320,
+            'duration': 309,
             'upload_date': '20160119',
             'timestamp': 1453194778,
             'uploader_id': '3pmkp',
         },
+    }, {
+        # standard iframe embed
+        'url': 'http://www.lemonde.fr/les-decodeurs/article/2016/10/18/tout-comprendre-du-ceta-le-petit-cousin-du-traite-transatlantique_5015920_4355770.html',
+        'info_dict': {
+            'id': 'uzsxms',
+            'ext': 'mp4',
+            'title': "CETA : quelles suites pour l'accord commercial entre l'Europe et le Canada ?",
+            'thumbnail': r're:^https?://.*\.jpg',
+            'duration': 325,
+            'upload_date': '20161021',
+            'timestamp': 1477044540,
+            'uploader_id': '3pmkp',
+        },
+        'params': {
+            'skip_download': True,
+        },
     }, {
         'url': 'http://redaction.actu.lemonde.fr/societe/video/2016/01/18/calais-debut-des-travaux-de-defrichement-dans-la-jungle_4849233_3224.html',
         'only_matching': True,
+    }, {
+        # YouTube embeds
+        'url': 'http://www.lemonde.fr/pixels/article/2016/12/09/pourquoi-pewdiepie-superstar-de-youtube-a-menace-de-fermer-sa-chaine_5046649_4408996.html',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
@@ -30,5 +50,9 @@ class LemondeIE(InfoExtractor):
 
         digiteka_url = self._proto_relative_url(self._search_regex(
             r'url\s*:\s*(["\'])(?P<url>(?:https?://)?//(?:www\.)?(?:digiteka\.net|ultimedia\.com)/deliver/.+?)\1',
-            webpage, 'digiteka url', group='url'))
-        return self.url_result(digiteka_url, 'Digiteka')
+            webpage, 'digiteka url', group='url', default=None))
+
+        if digiteka_url:
+            return self.url_result(digiteka_url, 'Digiteka')
+
+        return self.url_result(url, 'Generic')
index e635f3c4dc46c6407a166dec9ab2ef06981b6221..422be25288f66a83c32e370e833d6690d83ab54b 100644 (file)
@@ -4,10 +4,13 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
+from ..compat import compat_HTTPError
 from ..utils import (
     determine_ext,
     float_or_none,
     int_or_none,
+    unsmuggle_url,
+    ExtractorError,
 )
 
 
@@ -15,20 +18,31 @@ class LimelightBaseIE(InfoExtractor):
     _PLAYLIST_SERVICE_URL = 'http://production-ps.lvp.llnw.net/r/PlaylistService/%s/%s/%s'
     _API_URL = 'http://api.video.limelight.com/rest/organizations/%s/%s/%s/%s.json'
 
-    def _call_playlist_service(self, item_id, method, fatal=True):
-        return self._download_json(
-            self._PLAYLIST_SERVICE_URL % (self._PLAYLIST_SERVICE_PATH, item_id, method),
-            item_id, 'Downloading PlaylistService %s JSON' % method, fatal=fatal)
+    def _call_playlist_service(self, item_id, method, fatal=True, referer=None):
+        headers = {}
+        if referer:
+            headers['Referer'] = referer
+        try:
+            return self._download_json(
+                self._PLAYLIST_SERVICE_URL % (self._PLAYLIST_SERVICE_PATH, item_id, method),
+                item_id, 'Downloading PlaylistService %s JSON' % method, fatal=fatal, headers=headers)
+        except ExtractorError as e:
+            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
+                error = self._parse_json(e.cause.read().decode(), item_id)['detail']['contentAccessPermission']
+                if error == 'CountryDisabled':
+                    self.raise_geo_restricted()
+                raise ExtractorError(error, expected=True)
+            raise
 
     def _call_api(self, organization_id, item_id, method):
         return self._download_json(
             self._API_URL % (organization_id, self._API_PATH, item_id, method),
             item_id, 'Downloading API %s JSON' % method)
 
-    def _extract(self, item_id, pc_method, mobile_method, meta_method):
-        pc = self._call_playlist_service(item_id, pc_method)
+    def _extract(self, item_id, pc_method, mobile_method, meta_method, referer=None):
+        pc = self._call_playlist_service(item_id, pc_method, referer=referer)
         metadata = self._call_api(pc['orgId'], item_id, meta_method)
-        mobile = self._call_playlist_service(item_id, mobile_method, fatal=False)
+        mobile = self._call_playlist_service(item_id, mobile_method, fatal=False, referer=referer)
         return pc, mobile, metadata
 
     def _extract_info(self, streams, mobile_urls, properties):
@@ -207,10 +221,14 @@ class LimelightMediaIE(LimelightBaseIE):
     _API_PATH = 'media'
 
     def _real_extract(self, url):
+        url, smuggled_data = unsmuggle_url(url, {})
         video_id = self._match_id(url)
+        self._initialize_geo_bypass(smuggled_data.get('geo_countries'))
 
         pc, mobile, metadata = self._extract(
-            video_id, 'getPlaylistByMediaId', 'getMobilePlaylistByMediaId', 'properties')
+            video_id, 'getPlaylistByMediaId',
+            'getMobilePlaylistByMediaId', 'properties',
+            smuggled_data.get('source_url'))
 
         return self._extract_info(
             pc['playlistItems'][0].get('streams', []),
@@ -247,11 +265,13 @@ class LimelightChannelIE(LimelightBaseIE):
     _API_PATH = 'channels'
 
     def _real_extract(self, url):
+        url, smuggled_data = unsmuggle_url(url, {})
         channel_id = self._match_id(url)
 
         pc, mobile, medias = self._extract(
             channel_id, 'getPlaylistByChannelId',
-            'getMobilePlaylistWithNItemsByChannelId?begin=0&count=-1', 'media')
+            'getMobilePlaylistWithNItemsByChannelId?begin=0&count=-1',
+            'media', smuggled_data.get('source_url'))
 
         entries = [
             self._extract_info(
index da94eab561b91d6b70675911e432b5750d5d5b04..d2f75296a1b5b8f50b3db8bcae6b7cb724f15204 100644 (file)
@@ -260,9 +260,24 @@ class LyndaCourseIE(LyndaBaseIE):
         course_path = mobj.group('coursepath')
         course_id = mobj.group('courseid')
 
+        item_template = 'https://www.lynda.com/%s/%%s-4.html' % course_path
+
         course = self._download_json(
             'https://www.lynda.com/ajax/player?courseId=%s&type=course' % course_id,
-            course_id, 'Downloading course JSON')
+            course_id, 'Downloading course JSON', fatal=False)
+
+        if not course:
+            webpage = self._download_webpage(url, course_id)
+            entries = [
+                self.url_result(
+                    item_template % video_id, ie=LyndaIE.ie_key(),
+                    video_id=video_id)
+                for video_id in re.findall(
+                    r'data-video-id=["\'](\d+)', webpage)]
+            return self.playlist_result(
+                entries, course_id,
+                self._og_search_title(webpage, fatal=False),
+                self._og_search_description(webpage))
 
         if course.get('Status') == 'NotFound':
             raise ExtractorError(
@@ -283,7 +298,7 @@ class LyndaCourseIE(LyndaBaseIE):
                 if video_id:
                     entries.append({
                         '_type': 'url_transparent',
-                        'url': 'https://www.lynda.com/%s/%s-4.html' % (course_path, video_id),
+                        'url': item_template % video_id,
                         'ie_key': LyndaIE.ie_key(),
                         'chapter': chapter.get('Title'),
                         'chapter_number': int_or_none(chapter.get('ChapterIndex')),
index 9880924e692380fffde3d0c776da329225de4ef8..28f59f63c836a6f16f58c85969337e669e12dcb3 100644 (file)
@@ -6,12 +6,12 @@ from .common import InfoExtractor
 from ..compat import (
     compat_parse_qs,
     compat_urllib_parse_unquote,
+    compat_urllib_parse_urlencode,
 )
 from ..utils import (
     determine_ext,
     ExtractorError,
     int_or_none,
-    urlencode_postdata,
     get_element_by_attribute,
     mimetype2ext,
 )
@@ -50,6 +50,21 @@ class MetacafeIE(InfoExtractor):
             },
             'skip': 'Page is temporarily unavailable.',
         },
+        # metacafe video with family filter
+        {
+            'url': 'http://www.metacafe.com/watch/2155630/adult_art_by_david_hart_156/',
+            'md5': 'b06082c5079bbdcde677a6291fbdf376',
+            'info_dict': {
+                'id': '2155630',
+                'ext': 'mp4',
+                'title': 'Adult Art By David Hart 156',
+                'uploader': '63346',
+                'description': 'md5:9afac8fc885252201ad14563694040fc',
+            },
+            'params': {
+                'skip_download': True,
+            },
+        },
         # AnyClip video
         {
             'url': 'http://www.metacafe.com/watch/an-dVVXnuY7Jh77J/the_andromeda_strain_1971_stop_the_bomb_part_3/',
@@ -112,22 +127,6 @@ class MetacafeIE(InfoExtractor):
     def report_disclaimer(self):
         self.to_screen('Retrieving disclaimer')
 
-    def _confirm_age(self):
-        # Retrieve disclaimer
-        self.report_disclaimer()
-        self._download_webpage(self._DISCLAIMER, None, False, 'Unable to retrieve disclaimer')
-
-        # Confirm age
-        self.report_age_confirmation()
-        self._download_webpage(
-            self._FILTER_POST, None, False, 'Unable to confirm age',
-            data=urlencode_postdata({
-                'filters': '0',
-                'submit': "Continue - I'm over 18",
-            }), headers={
-                'Content-Type': 'application/x-www-form-urlencoded',
-            })
-
     def _real_extract(self, url):
         # Extract id and simplified title from URL
         video_id, display_id = re.match(self._VALID_URL, url).groups()
@@ -143,13 +142,15 @@ class MetacafeIE(InfoExtractor):
             if prefix == 'cb':
                 return self.url_result('theplatform:%s' % ext_id, 'ThePlatform')
 
-        # self._confirm_age()
+        headers = {
+            # Disable family filter
+            'Cookie': 'user=%s; ' % compat_urllib_parse_urlencode({'ffilter': False})
+        }
 
         # AnyClip videos require the flashversion cookie so that we get the link
         # to the mp4 file
-        headers = {}
         if video_id.startswith('an-'):
-            headers['Cookie'] = 'flashVersion=0;'
+            headers['Cookie'] += 'flashVersion=0; '
 
         # Retrieve video webpage to extract further information
         webpage = self._download_webpage(url, video_id, headers=headers)
index 659ede8c2254d6ce524953c298c8a69b0b13d745..d53d96aae1799e15d578c765545a929255009d31 100644 (file)
@@ -2,16 +2,17 @@
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
+from ..compat import compat_str
 from ..utils import int_or_none
 
 
 class MGTVIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?mgtv\.com/v/(?:[^/]+/)*(?P<id>\d+)\.html'
+    _VALID_URL = r'https?://(?:www\.)?mgtv\.com/(v|b)/(?:[^/]+/)*(?P<id>\d+)\.html'
     IE_DESC = '芒果TV'
 
     _TESTS = [{
         'url': 'http://www.mgtv.com/v/1/290525/f/3116640.html',
-        'md5': '1bdadcf760a0b90946ca68ee9a2db41a',
+        'md5': 'b1ffc0fc163152acf6beaa81832c9ee7',
         'info_dict': {
             'id': '3116640',
             'ext': 'mp4',
@@ -21,48 +22,45 @@ class MGTVIE(InfoExtractor):
             'thumbnail': r're:^https?://.*\.jpg$',
         },
     }, {
-        # no tbr extracted from stream_url
-        'url': 'http://www.mgtv.com/v/1/1/f/3324755.html',
+        'url': 'http://www.mgtv.com/b/301817/3826653.html',
         'only_matching': True,
     }]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
         api_data = self._download_json(
-            'http://v.api.mgtv.com/player/video', video_id,
+            'http://pcweb.api.mgtv.com/player/video', video_id,
             query={'video_id': video_id},
             headers=self.geo_verification_headers())['data']
         info = api_data['info']
+        title = info['title'].strip()
+        stream_domain = api_data['stream_domain'][0]
 
         formats = []
         for idx, stream in enumerate(api_data['stream']):
-            stream_url = stream.get('url')
-            if not stream_url:
+            stream_path = stream.get('url')
+            if not stream_path:
+                continue
+            format_data = self._download_json(
+                stream_domain + stream_path, video_id,
+                note='Download video info for format #%d' % idx)
+            format_url = format_data.get('info')
+            if not format_url:
                 continue
             tbr = int_or_none(self._search_regex(
-                r'(\d+)\.mp4', stream_url, 'tbr', default=None))
-
-            def extract_format(stream_url, format_id, idx, query={}):
-                format_info = self._download_json(
-                    stream_url, video_id,
-                    note='Download video info for format %s' % (format_id or '#%d' % idx),
-                    query=query)
-                return {
-                    'format_id': format_id,
-                    'url': format_info['info'],
-                    'ext': 'mp4',
-                    'tbr': tbr,
-                }
-
-            formats.append(extract_format(
-                stream_url, 'hls-%d' % tbr if tbr else None, idx * 2))
-            formats.append(extract_format(stream_url.replace(
-                '/playlist.m3u8', ''), 'http-%d' % tbr if tbr else None, idx * 2 + 1, {'pno': 1031}))
+                r'_(\d+)_mp4/', format_url, 'tbr', default=None))
+            formats.append({
+                'format_id': compat_str(tbr or idx),
+                'url': format_url,
+                'ext': 'mp4',
+                'tbr': tbr,
+                'protocol': 'm3u8_native',
+            })
         self._sort_formats(formats)
 
         return {
             'id': video_id,
-            'title': info['title'].strip(),
+            'title': title,
             'formats': formats,
             'description': info.get('desc'),
             'duration': int_or_none(info.get('duration')),
index 434a94de49b9f1623385f20386feaa1b34da75fe..d2a44d05dffbf7d42298a260ba728f53e43d5efd 100644 (file)
@@ -4,23 +4,26 @@ import re
 
 from .common import InfoExtractor
 from .theplatform import ThePlatformIE
+from .adobepass import AdobePassIE
+from ..compat import compat_urllib_parse_urlparse
 from ..utils import (
     find_xpath_attr,
     lowercase_escape,
     smuggle_url,
     unescapeHTML,
     update_url_query,
+    int_or_none,
 )
 
 
-class NBCIE(InfoExtractor):
+class NBCIE(AdobePassIE):
     _VALID_URL = r'https?://(?:www\.)?nbc\.com/(?:[^/]+/)+(?P<id>n?\d+)'
 
     _TESTS = [
         {
-            'url': 'http://www.nbc.com/the-tonight-show/segments/112966',
+            'url': 'http://www.nbc.com/the-tonight-show/video/jimmy-fallon-surprises-fans-at-ben-jerrys/2848237',
             'info_dict': {
-                'id': '112966',
+                'id': '2848237',
                 'ext': 'mp4',
                 'title': 'Jimmy Fallon Surprises Fans at Ben & Jerry\'s',
                 'description': 'Jimmy gives out free scoops of his new "Tonight Dough" ice cream flavor by surprising customers at the Ben & Jerry\'s scoop shop.',
@@ -69,7 +72,7 @@ class NBCIE(InfoExtractor):
             # HLS streams requires the 'hdnea3' cookie
             'url': 'http://www.nbc.com/Kings/video/goliath/n1806',
             'info_dict': {
-                'id': 'n1806',
+                'id': '101528f5a9e8127b107e98c5e6ce4638',
                 'ext': 'mp4',
                 'title': 'Goliath',
                 'description': 'When an unknown soldier saves the life of the King\'s son in battle, he\'s thrust into the limelight and politics of the kingdom.',
@@ -87,21 +90,57 @@ class NBCIE(InfoExtractor):
     def _real_extract(self, url):
         video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
-        theplatform_url = unescapeHTML(lowercase_escape(self._html_search_regex(
-            [
-                r'(?:class="video-player video-player-full" data-mpx-url|class="player" src)="(.*?)"',
-                r'<iframe[^>]+src="((?:https?:)?//player\.theplatform\.com/[^"]+)"',
-                r'"embedURL"\s*:\s*"([^"]+)"'
-            ],
-            webpage, 'theplatform url').replace('_no_endcard', '').replace('\\/', '/')))
-        if theplatform_url.startswith('//'):
-            theplatform_url = 'http:' + theplatform_url
-        return {
+        info = {
             '_type': 'url_transparent',
             'ie_key': 'ThePlatform',
-            'url': smuggle_url(theplatform_url, {'source_url': url}),
             'id': video_id,
         }
+        video_data = None
+        preload = self._search_regex(
+            r'PRELOAD\s*=\s*({.+})', webpage, 'preload data', default=None)
+        if preload:
+            preload_data = self._parse_json(preload, video_id)
+            path = compat_urllib_parse_urlparse(url).path.rstrip('/')
+            entity_id = preload_data.get('xref', {}).get(path)
+            video_data = preload_data.get('entities', {}).get(entity_id)
+        if video_data:
+            query = {
+                'mbr': 'true',
+                'manifest': 'm3u',
+            }
+            video_id = video_data['guid']
+            title = video_data['title']
+            if video_data.get('entitlement') == 'auth':
+                resource = self._get_mvpd_resource(
+                    'nbcentertainment', title, video_id,
+                    video_data.get('vChipRating'))
+                query['auth'] = self._extract_mvpd_auth(
+                    url, video_id, 'nbcentertainment', resource)
+            theplatform_url = smuggle_url(update_url_query(
+                'http://link.theplatform.com/s/NnzsPC/media/guid/2410887629/' + video_id,
+                query), {'force_smil_url': True})
+            info.update({
+                'id': video_id,
+                'title': title,
+                'url': theplatform_url,
+                'description': video_data.get('description'),
+                'keywords': video_data.get('keywords'),
+                'season_number': int_or_none(video_data.get('seasonNumber')),
+                'episode_number': int_or_none(video_data.get('episodeNumber')),
+                'series': video_data.get('showName'),
+            })
+        else:
+            theplatform_url = unescapeHTML(lowercase_escape(self._html_search_regex(
+                [
+                    r'(?:class="video-player video-player-full" data-mpx-url|class="player" src)="(.*?)"',
+                    r'<iframe[^>]+src="((?:https?:)?//player\.theplatform\.com/[^"]+)"',
+                    r'"embedURL"\s*:\s*"([^"]+)"'
+                ],
+                webpage, 'theplatform url').replace('_no_endcard', '').replace('\\/', '/')))
+            if theplatform_url.startswith('//'):
+                theplatform_url = 'http:' + theplatform_url
+            info['url'] = smuggle_url(theplatform_url, {'source_url': url})
+        return info
 
 
 class NBCSportsVPlayerIE(InfoExtractor):
index ec4d675e277f842172dcbaad49e4cb666c6b10f4..d9943fc2c9eef739dc375c58c743bc5b8d829309 100644 (file)
@@ -19,6 +19,7 @@ class NineCNineMediaBaseIE(InfoExtractor):
 
 class NineCNineMediaStackIE(NineCNineMediaBaseIE):
     IE_NAME = '9c9media:stack'
+    _GEO_COUNTRIES = ['CA']
     _VALID_URL = r'9c9media:stack:(?P<destination_code>[^:]+):(?P<content_id>\d+):(?P<content_package>\d+):(?P<id>\d+)'
 
     def _real_extract(self, url):
index 70ff2ab3653525664b4f1ae590393ee680a2f6e5..8b83e1f760141bc1a6a0ff4c56761e09b6e3d920 100644 (file)
@@ -23,7 +23,7 @@ from ..utils import (
 
 class NocoIE(InfoExtractor):
     _VALID_URL = r'https?://(?:(?:www\.)?noco\.tv/emission/|player\.noco\.tv/\?idvideo=)(?P<id>\d+)'
-    _LOGIN_URL = 'http://noco.tv/do.php'
+    _LOGIN_URL = 'https://noco.tv/do.php'
     _API_URL_TEMPLATE = 'https://api.noco.tv/1.1/%s?ts=%s&tk=%s'
     _SUB_LANG_TEMPLATE = '&sub_lang=%s'
     _NETRC_MACHINE = 'noco'
@@ -69,16 +69,17 @@ class NocoIE(InfoExtractor):
         if username is None:
             return
 
-        login_form = {
-            'a': 'login',
-            'cookie': '1',
-            'username': username,
-            'password': password,
-        }
-        request = sanitized_Request(self._LOGIN_URL, urlencode_postdata(login_form))
-        request.add_header('Content-Type', 'application/x-www-form-urlencoded; charset=UTF-8')
-
-        login = self._download_json(request, None, 'Logging in as %s' % username)
+        login = self._download_json(
+            self._LOGIN_URL, None, 'Logging in as %s' % username,
+            data=urlencode_postdata({
+                'a': 'login',
+                'cookie': '1',
+                'username': username,
+                'password': password,
+            }),
+            headers={
+                'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
+            })
 
         if 'erreur' in login:
             raise ExtractorError('Unable to login: %s' % clean_html(login['erreur']), expected=True)
index fc3c0cd3ccb25ab8c41fdb1b8e9b424458c93209..7fe79cb539e2ce435d55891e16541fe8d2650f41 100644 (file)
@@ -1,7 +1,6 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
-import random
 import re
 
 from .common import InfoExtractor
@@ -15,24 +14,7 @@ from ..utils import (
 
 
 class NRKBaseIE(InfoExtractor):
-    _faked_ip = None
-
-    def _download_webpage_handle(self, *args, **kwargs):
-        # NRK checks X-Forwarded-For HTTP header in order to figure out the
-        # origin of the client behind proxy. This allows to bypass geo
-        # restriction by faking this header's value to some Norway IP.
-        # We will do so once we encounter any geo restriction error.
-        if self._faked_ip:
-            # NB: str is intentional
-            kwargs.setdefault(str('headers'), {})['X-Forwarded-For'] = self._faked_ip
-        return super(NRKBaseIE, self)._download_webpage_handle(*args, **kwargs)
-
-    def _fake_ip(self):
-        # Use fake IP from 37.191.128.0/17 in order to workaround geo
-        # restriction
-        def octet(lb=0, ub=255):
-            return random.randint(lb, ub)
-        self._faked_ip = '37.191.%d.%d' % (octet(128), octet())
+    _GEO_COUNTRIES = ['NO']
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
@@ -44,8 +26,6 @@ class NRKBaseIE(InfoExtractor):
         title = data.get('fullTitle') or data.get('mainTitle') or data['title']
         video_id = data.get('id') or video_id
 
-        http_headers = {'X-Forwarded-For': self._faked_ip} if self._faked_ip else {}
-
         entries = []
 
         conviva = data.get('convivaStatistics') or {}
@@ -90,7 +70,6 @@ class NRKBaseIE(InfoExtractor):
                     'duration': duration,
                     'subtitles': subtitles,
                     'formats': formats,
-                    'http_headers': http_headers,
                 })
 
         if not entries:
@@ -107,19 +86,17 @@ class NRKBaseIE(InfoExtractor):
                 }]
 
         if not entries:
-            message_type = data.get('messageType', '')
-            # Can be ProgramIsGeoBlocked or ChannelIsGeoBlocked*
-            if 'IsGeoBlocked' in message_type and not self._faked_ip:
-                self.report_warning(
-                    'Video is geo restricted, trying to fake IP')
-                self._fake_ip()
-                return self._real_extract(url)
-
             MESSAGES = {
                 'ProgramRightsAreNotReady': 'Du kan dessverre ikke se eller høre programmet',
                 'ProgramRightsHasExpired': 'Programmet har gått ut',
                 'ProgramIsGeoBlocked': 'NRK har ikke rettigheter til å vise dette programmet utenfor Norge',
             }
+            message_type = data.get('messageType', '')
+            # Can be ProgramIsGeoBlocked or ChannelIsGeoBlocked*
+            if 'IsGeoBlocked' in message_type:
+                self.raise_geo_restricted(
+                    msg=MESSAGES.get('ProgramIsGeoBlocked'),
+                    countries=self._GEO_COUNTRIES)
             raise ExtractorError(
                 '%s said: %s' % (self.IE_NAME, MESSAGES.get(
                     message_type, message_type)),
@@ -188,12 +165,12 @@ class NRKIE(NRKBaseIE):
                             https?://
                                 (?:
                                     (?:www\.)?nrk\.no/video/PS\*|
-                                    v8-psapi\.nrk\.no/mediaelement/
+                                    v8[-.]psapi\.nrk\.no/mediaelement/
                                 )
                             )
-                            (?P<id>[^/?#&]+)
+                            (?P<id>[^?#&]+)
                         '''
-    _API_HOST = 'v8.psapi.nrk.no'
+    _API_HOST = 'v8-psapi.nrk.no'
     _TESTS = [{
         # video
         'url': 'http://www.nrk.no/video/PS*150533',
@@ -219,6 +196,9 @@ class NRKIE(NRKBaseIE):
     }, {
         'url': 'nrk:ecc1b952-96dc-4a98-81b9-5296dc7a98d9',
         'only_matching': True,
+    }, {
+        'url': 'nrk:clip/7707d5a3-ebe7-434a-87d5-a3ebe7a34a70',
+        'only_matching': True,
     }, {
         'url': 'https://v8-psapi.nrk.no/mediaelement/ecc1b952-96dc-4a98-81b9-5296dc7a98d9',
         'only_matching': True,
index de1d6b08a409ef79272393ccd3f98607f436e4a3..df1ce3c1db1eaa22d03609ddb55748e404b1f4a9 100644 (file)
@@ -1,15 +1,16 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
-from .jwplatform import JWPlatformBaseIE
+from .common import InfoExtractor
 from ..utils import (
     ExtractorError,
     js_to_json,
 )
 
 
-class OnDemandKoreaIE(JWPlatformBaseIE):
+class OnDemandKoreaIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?ondemandkorea\.com/(?P<id>[^/]+)\.html'
+    _GEO_COUNTRIES = ['US', 'CA']
     _TEST = {
         'url': 'http://www.ondemandkorea.com/ask-us-anything-e43.html',
         'info_dict': {
@@ -35,7 +36,8 @@ class OnDemandKoreaIE(JWPlatformBaseIE):
 
         if 'msg_block_01.png' in webpage:
             self.raise_geo_restricted(
-                'This content is not available in your region')
+                msg='This content is not available in your region',
+                countries=self._GEO_COUNTRIES)
 
         if 'This video is only available to ODK PLUS members.' in webpage:
             raise ExtractorError(
index 0a501b3e5b9f0c0bd2c6789e4da302b7610ced28..94f57990b21664cc8922c088896dec54ab73a310 100644 (file)
@@ -23,7 +23,7 @@ class OnetBaseIE(InfoExtractor):
         return self._search_regex(
             r'id=(["\'])mvp:(?P<id>.+?)\1', webpage, 'mvp id', group='id')
 
-    def _extract_from_id(self, video_id, webpage):
+    def _extract_from_id(self, video_id, webpage=None):
         response = self._download_json(
             'http://qi.ckm.onetapi.pl/', video_id,
             query={
@@ -74,8 +74,10 @@ class OnetBaseIE(InfoExtractor):
 
         meta = video.get('meta', {})
 
-        title = self._og_search_title(webpage, default=None) or meta['title']
-        description = self._og_search_description(webpage, default=None) or meta.get('description')
+        title = (self._og_search_title(
+            webpage, default=None) if webpage else None) or meta['title']
+        description = (self._og_search_description(
+            webpage, default=None) if webpage else None) or meta.get('description')
         duration = meta.get('length') or meta.get('lenght')
         timestamp = parse_iso8601(meta.get('addDate'), ' ')
 
@@ -89,6 +91,18 @@ class OnetBaseIE(InfoExtractor):
         }
 
 
+class OnetMVPIE(OnetBaseIE):
+    _VALID_URL = r'onetmvp:(?P<id>\d+\.\d+)'
+
+    _TEST = {
+        'url': 'onetmvp:381027.1509591944',
+        'only_matching': True,
+    }
+
+    def _real_extract(self, url):
+        return self._extract_from_id(self._match_id(url))
+
+
 class OnetIE(OnetBaseIE):
     _VALID_URL = r'https?://(?:www\.)?onet\.tv/[a-z]/[a-z]+/(?P<display_id>[0-9a-z-]+)/(?P<id>[0-9a-z]+)'
     IE_NAME = 'onet.tv'
@@ -167,3 +181,44 @@ class OnetChannelIE(OnetBaseIE):
         channel_title = strip_or_none(get_element_by_class('o_channelName', webpage))
         channel_description = strip_or_none(get_element_by_class('o_channelDesc', webpage))
         return self.playlist_result(entries, channel_id, channel_title, channel_description)
+
+
+class OnetPlIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:[^/]+\.)?(?:onet|businessinsider\.com|plejada)\.pl/(?:[^/]+/)+(?P<id>[0-9a-z]+)'
+    IE_NAME = 'onet.pl'
+
+    _TESTS = [{
+        'url': 'http://eurosport.onet.pl/zimowe/skoki-narciarskie/ziobro-wygral-kwalifikacje-w-pjongczangu/9ckrly',
+        'md5': 'b94021eb56214c3969380388b6e73cb0',
+        'info_dict': {
+            'id': '1561707.1685479',
+            'ext': 'mp4',
+            'title': 'Ziobro wygrał kwalifikacje w Pjongczangu',
+            'description': 'md5:61fb0740084d2d702ea96512a03585b4',
+            'upload_date': '20170214',
+            'timestamp': 1487078046,
+        },
+    }, {
+        'url': 'http://film.onet.pl/zwiastuny/ghost-in-the-shell-drugi-zwiastun-pl/5q6yl3',
+        'only_matching': True,
+    }, {
+        'url': 'http://moto.onet.pl/jak-wybierane-sa-miejsca-na-fotoradary/6rs04e',
+        'only_matching': True,
+    }, {
+        'url': 'http://businessinsider.com.pl/wideo/scenariusz-na-koniec-swiata-wedlug-nasa/dwnqptk',
+        'only_matching': True,
+    }, {
+        'url': 'http://plejada.pl/weronika-rosati-o-swoim-domniemanym-slubie/n2bq89',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id)
+
+        mvp_id = self._search_regex(
+            r'data-params-mvp=["\'](\d+\.\d+)', webpage, 'mvp id')
+
+        return self.url_result(
+            'onetmvp:%s' % mvp_id, OnetMVPIE.ie_key(), video_id=mvp_id)
index 32289d8976dcf602839546d179c06ef224a79b20..fc7ff43a62ba4a1a294d368d0b8fbaeb9abdb2ed 100644 (file)
@@ -72,20 +72,25 @@ class OpenloadIE(InfoExtractor):
             raise ExtractorError('File not found', expected=True)
 
         ol_id = self._search_regex(
-            '<span[^>]+id="[^"]+"[^>]*>([0-9]+)</span>',
+            '<span[^>]+id="[^"]+"[^>]*>([0-9A-Za-z]+)</span>',
             webpage, 'openload ID')
 
-        first_three_chars = int(float(ol_id[0:][:3]))
-        fifth_char = int(float(ol_id[3:5]))
-        urlcode = ''
-        num = 5
+        first_char = int(ol_id[0])
+        urlcode = []
+        num = 1
 
         while num < len(ol_id):
-            urlcode += compat_chr(int(float(ol_id[num:][:3])) +
-                                  first_three_chars - fifth_char * int(float(ol_id[num + 3:][:2])))
+            i = ord(ol_id[num])
+            key = 0
+            if i <= 90:
+                key = i - 65
+            elif i >= 97:
+                key = 25 + i - 97
+            urlcode.append((key, compat_chr(int(ol_id[num + 2:num + 5]) // int(ol_id[num + 1]) - first_char)))
             num += 5
 
-        video_url = 'https://openload.co/stream/' + urlcode
+        video_url = 'https://openload.co/stream/' + ''.join(
+            [value for _, value in sorted(urlcode, key=lambda x: x[0])])
 
         title = self._og_search_title(webpage, default=None) or self._search_regex(
             r'<span[^>]+class=["\']title["\'][^>]*>([^<]+)', webpage,
index 6baed773fc6bf741a69f1baf222148065ef169c4..3e51b4dd746d88ead54bc9ca469113f1f3cf14d2 100644 (file)
@@ -193,6 +193,8 @@ class PBSIE(InfoExtractor):
         )
     ''' % '|'.join(list(zip(*_STATIONS))[0])
 
+    _GEO_COUNTRIES = ['US']
+
     _TESTS = [
         {
             'url': 'http://www.pbs.org/tpt/constitution-usa-peter-sagal/watch/a-more-perfect-union/',
@@ -489,11 +491,13 @@ class PBSIE(InfoExtractor):
                 headers=self.geo_verification_headers())
 
             if redirect_info['status'] == 'error':
+                message = self._ERRORS.get(
+                    redirect_info['http_code'], redirect_info['message'])
+                if redirect_info['http_code'] == 403:
+                    self.raise_geo_restricted(
+                        msg=message, countries=self._GEO_COUNTRIES)
                 raise ExtractorError(
-                    '%s said: %s' % (
-                        self.IE_NAME,
-                        self._ERRORS.get(redirect_info['http_code'], redirect_info['message'])),
-                    expected=True)
+                    '%s said: %s' % (self.IE_NAME, message), expected=True)
 
             format_url = redirect_info.get('url')
             if not format_url:
index 6a4580d54c8733166316c80a2361499e85eb9baf..9f3501f7770fa93fb54fc19ff554e64c6d8da25f 100644 (file)
@@ -64,7 +64,8 @@ class PinkbikeIE(InfoExtractor):
             'video:duration', webpage, 'duration'))
 
         uploader = self._search_regex(
-            r'un:\s*"([^"]+)"', webpage, 'uploader', fatal=False)
+            r'<a[^>]+\brel=["\']author[^>]+>([^<]+)', webpage,
+            'uploader', fatal=False)
         upload_date = unified_strdate(self._search_regex(
             r'class="fullTime"[^>]+title="([^"]+)"',
             webpage, 'upload date', fatal=False))
index 5c798e874837ff1704650fa991d5b07cde8ab210..e0cbd045e4ad68517b425f9565590ab7a75f9019 100644 (file)
@@ -18,6 +18,7 @@ from ..utils import (
     parse_duration,
     qualities,
     srt_subtitles_timecode,
+    update_url_query,
     urlencode_postdata,
 )
 
@@ -92,6 +93,10 @@ class PluralsightIE(PluralsightBaseIE):
             raise ExtractorError('Unable to login: %s' % error, expected=True)
 
         if all(p not in response for p in ('__INITIAL_STATE__', '"currentUser"')):
+            BLOCKED = 'Your account has been blocked due to suspicious activity'
+            if BLOCKED in response:
+                raise ExtractorError(
+                    'Unable to login: %s' % BLOCKED, expected=True)
             raise ExtractorError('Unable to log in')
 
     def _get_subtitles(self, author, clip_id, lang, name, duration, video_id):
@@ -327,25 +332,44 @@ class PluralsightCourseIE(PluralsightBaseIE):
         # TODO: PSM cookie
 
         course = self._download_json(
-            '%s/data/course/%s' % (self._API_BASE, course_id),
-            course_id, 'Downloading course JSON')
+            '%s/player/functions/rpc' % self._API_BASE, course_id,
+            'Downloading course JSON',
+            data=json.dumps({
+                'fn': 'bootstrapPlayer',
+                'payload': {
+                    'courseId': course_id,
+                }
+            }).encode('utf-8'),
+            headers={
+                'Content-Type': 'application/json;charset=utf-8'
+            })['payload']['course']
 
         title = course['title']
+        course_name = course['name']
+        course_data = course['modules']
         description = course.get('description') or course.get('shortDescription')
 
-        course_data = self._download_json(
-            '%s/data/course/content/%s' % (self._API_BASE, course_id),
-            course_id, 'Downloading course data JSON')
-
         entries = []
         for num, module in enumerate(course_data, 1):
+            author = module.get('author')
+            module_name = module.get('name')
+            if not author or not module_name:
+                continue
             for clip in module.get('clips', []):
-                player_parameters = clip.get('playerParameters')
-                if not player_parameters:
+                clip_index = int_or_none(clip.get('index'))
+                if clip_index is None:
                     continue
+                clip_url = update_url_query(
+                    '%s/player' % self._API_BASE, query={
+                        'mode': 'live',
+                        'course': course_name,
+                        'author': author,
+                        'name': module_name,
+                        'clip': clip_index,
+                    })
                 entries.append({
                     '_type': 'url_transparent',
-                    'url': '%s/training/player?%s' % (self._API_BASE, player_parameters),
+                    'url': clip_url,
                     'ie_key': PluralsightIE.ie_key(),
                     'chapter': module.get('title'),
                     'chapter_number': num,
index 017f6c55219ff3db0cf9bf745f74031882f12c54..9b413590a4078b9e962edc63912a85fbc8312523 100644 (file)
@@ -2,27 +2,27 @@
 from __future__ import unicode_literals
 
 import itertools
-import os
+import os
 import re
 
 from .common import InfoExtractor
 from ..compat import (
     compat_HTTPError,
-    compat_urllib_parse_unquote,
-    compat_urllib_parse_unquote_plus,
-    compat_urllib_parse_urlparse,
+    compat_urllib_parse_unquote,
+    compat_urllib_parse_unquote_plus,
+    compat_urllib_parse_urlparse,
 )
 from ..utils import (
     ExtractorError,
     int_or_none,
     js_to_json,
     orderedSet,
-    sanitized_Request,
+    sanitized_Request,
     str_to_int,
 )
-from ..aes import (
-    aes_decrypt_text
-)
+from ..aes import (
+    aes_decrypt_text
+)
 
 
 class PornHubIE(InfoExtractor):
@@ -109,10 +109,14 @@ class PornHubIE(InfoExtractor):
     def _real_extract(self, url):
         video_id = self._match_id(url)
 
-        req = sanitized_Request(
-            'http://www.pornhub.com/view_video.php?viewkey=%s' % video_id)
-        req.add_header('Cookie', 'age_verified=1')
-        webpage = self._download_webpage(req, video_id)
+        def dl_webpage(platform):
+            return self._download_webpage(
+                'http://www.pornhub.com/view_video.php?viewkey=%s' % video_id,
+                video_id, headers={
+                    'Cookie': 'age_verified=1; platform=%s' % platform,
+                })
+
+        webpage = dl_webpage('pc')
 
         error_msg = self._html_search_regex(
             r'(?s)<div[^>]+class=(["\'])(?:(?!\1).)*\b(?:removed|userMessageSection)\b(?:(?!\1).)*\1[^>]*>(?P<error>.+?)</div>',
@@ -123,10 +127,19 @@ class PornHubIE(InfoExtractor):
                 'PornHub said: %s' % error_msg,
                 expected=True, video_id=video_id)
 
+        tv_webpage = dl_webpage('tv')
+
+        video_url = self._search_regex(
+            r'<video[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//.+?)\1', tv_webpage,
+            'video url', group='url')
+
+        title = self._search_regex(
+            r'<h1>([^>]+)</h1>', tv_webpage, 'title', default=None)
+
         # video_title from flashvars contains whitespace instead of non-ASCII (see
         # http://www.pornhub.com/view_video.php?viewkey=1331683002), not relying
         # on that anymore.
-        title = self._html_search_meta(
+        title = title or self._html_search_meta(
             'twitter:title', webpage, default=None) or self._search_regex(
             (r'<h1[^>]+class=["\']title["\'][^>]*>(?P<title>[^<]+)',
              r'<div[^>]+data-video-title=(["\'])(?P<title>.+?)\1',
@@ -156,42 +169,6 @@ class PornHubIE(InfoExtractor):
         comment_count = self._extract_count(
             r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment')
 
-        video_urls = []
-        for quote, video_url in re.findall(
-                r'player_quality_[0-9]{3,4}p\s*=\s*(["\'])(.+?)\1;', webpage):
-            video_urls.append(compat_urllib_parse_unquote(re.sub(
-                r'{0}\s*\+\s*{0}'.format(quote), '', video_url)))
-
-        if webpage.find('"encrypted":true') != -1:
-            password = compat_urllib_parse_unquote_plus(
-                self._search_regex(r'"video_title":"([^"]+)', webpage, 'password'))
-            video_urls = list(map(lambda s: aes_decrypt_text(s, password, 32).decode('utf-8'), video_urls))
-
-        formats = []
-        for video_url in video_urls:
-            path = compat_urllib_parse_urlparse(video_url).path
-            extension = os.path.splitext(path)[1][1:]
-            format = path.split('/')[5].split('_')[:2]
-            format = '-'.join(format)
-
-            m = re.match(r'^(?P<height>[0-9]+)[pP]-(?P<tbr>[0-9]+)[kK]$', format)
-            if m is None:
-                height = None
-                tbr = None
-            else:
-                height = int(m.group('height'))
-                tbr = int(m.group('tbr'))
-
-            formats.append({
-                'url': video_url,
-                'ext': extension,
-                'format': format,
-                'format_id': format,
-                'tbr': tbr,
-                'height': height,
-            })
-        self._sort_formats(formats)
-
         page_params = self._parse_json(self._search_regex(
             r'page_params\.zoneDetails\[([\'"])[^\'"]+\1\]\s*=\s*(?P<data>{[^}]+})',
             webpage, 'page parameters', group='data', default='{}'),
@@ -203,6 +180,7 @@ class PornHubIE(InfoExtractor):
 
         return {
             'id': video_id,
+            'url': video_url,
             'uploader': video_uploader,
             'title': title,
             'thumbnail': thumbnail,
@@ -211,7 +189,7 @@ class PornHubIE(InfoExtractor):
             'like_count': like_count,
             'dislike_count': dislike_count,
             'comment_count': comment_count,
-            'formats': formats,
+            'formats': formats,
             'age_limit': 18,
             'tags': tags,
             'categories': categories,
index 1a0cce7e0274bb4a06bf9b0604d9ebdf75cf3df5..2831368b6a85e1aa7926c8721560f1e1dc8be5f1 100644 (file)
@@ -2,13 +2,13 @@ from __future__ import unicode_literals
 
 import re
 
-from .jwplatform import JWPlatformBaseIE
+from .common import InfoExtractor
 from ..utils import (
     str_to_int,
 )
 
 
-class PornoXOIE(JWPlatformBaseIE):
+class PornoXOIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?pornoxo\.com/videos/(?P<id>\d+)/(?P<display_id>[^/]+)\.html'
     _TEST = {
         'url': 'http://www.pornoxo.com/videos/7564/striptease-from-sexy-secretary.html',
index 5091d8456faf3a4841ba770ea408aa75be806f83..1245309a7ebc1e5621ee4c566ef64f16676c6011 100644 (file)
@@ -424,3 +424,6 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE):
             return self._extract_clip(url, webpage)
         elif page_type == 'playlist':
             return self._extract_playlist(url, webpage)
+        else:
+            raise ExtractorError(
+                'Unsupported page type %s' % page_type, expected=True)
index 422c02cff37620cbc52f7056f92987682f394c30..d338b3a933cf10fb50621ea4d785d4e83324466c 100644 (file)
@@ -2,11 +2,10 @@
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
-from .jwplatform import JWPlatformBaseIE
 from ..compat import compat_str
 
 
-class RENTVIE(JWPlatformBaseIE):
+class RENTVIE(InfoExtractor):
     _VALID_URL = r'(?:rentv:|https?://(?:www\.)?ren\.tv/(?:player|video/epizod)/)(?P<id>\d+)'
     _TESTS = [{
         'url': 'http://ren.tv/video/epizod/118577',
index 3bfe934d82c9db7fe7ff8b1b8820d4c96ca0cb32..51644011e5d5587462ef56a0511767437d93b77e 100644 (file)
@@ -3,7 +3,7 @@ from __future__ import unicode_literals
 
 import re
 
-from .jwplatform import JWPlatformBaseIE
+from .common import InfoExtractor
 from ..utils import (
     js_to_json,
     get_element_by_class,
@@ -11,7 +11,7 @@ from ..utils import (
 )
 
 
-class RudoIE(JWPlatformBaseIE):
+class RudoIE(InfoExtractor):
     _VALID_URL = r'https?://rudo\.video/vod/(?P<id>[0-9a-zA-Z]+)'
 
     _TEST = {
index 94a2a37d20696fa3ffc65b6f1df04cab42c7785d..b5e76c9af04d0c98a1214ec3ce8255b7d6c7d4e6 100644 (file)
@@ -1,11 +1,11 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
-from .jwplatform import JWPlatformBaseIE
+from .common import InfoExtractor
 from ..utils import js_to_json
 
 
-class ScreencastOMaticIE(JWPlatformBaseIE):
+class ScreencastOMaticIE(InfoExtractor):
     _VALID_URL = r'https?://screencast-o-matic\.com/watch/(?P<id>[0-9a-zA-Z]+)'
     _TEST = {
         'url': 'http://screencast-o-matic.com/watch/c2lD3BeOPl',
diff --git a/youtube_dl/extractor/scrippsnetworks.py b/youtube_dl/extractor/scrippsnetworks.py
new file mode 100644 (file)
index 0000000..597d6f5
--- /dev/null
@@ -0,0 +1,60 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .adobepass import AdobePassIE
+from ..utils import (
+    int_or_none,
+    smuggle_url,
+    update_url_query,
+)
+
+
+class ScrippsNetworksWatchIE(AdobePassIE):
+    IE_NAME = 'scrippsnetworks:watch'
+    _VALID_URL = r'https?://watch\.(?:hgtv|foodnetwork|travelchannel|diynetwork|cookingchanneltv)\.com/player\.[A-Z0-9]+\.html#(?P<id>\d+)'
+    _TEST = {
+        'url': 'http://watch.hgtv.com/player.HNT.html#0256538',
+        'md5': '26545fd676d939954c6808274bdb905a',
+        'info_dict': {
+            'id': '0256538',
+            'ext': 'mp4',
+            'title': 'Seeking a Wow House',
+            'description': 'Buyers retiring in Palm Springs, California, want a modern house with major wow factor. They\'re also looking for a pool and a large, open floorplan with tall windows looking out at the views.',
+            'uploader': 'SCNI',
+            'upload_date': '20170207',
+            'timestamp': 1486450493,
+        },
+        'skip': 'requires TV provider authentication',
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+        channel = self._parse_json(self._search_regex(
+            r'"channels"\s*:\s*(\[.+\])',
+            webpage, 'channels'), video_id)[0]
+        video_data = next(v for v in channel['videos'] if v.get('nlvid') == video_id)
+        title = video_data['title']
+        release_url = video_data['releaseUrl']
+        if video_data.get('restricted'):
+            requestor_id = self._search_regex(
+                r'requestorId\s*=\s*"([^"]+)";', webpage, 'requestor id')
+            resource = self._get_mvpd_resource(
+                requestor_id, title, video_id,
+                video_data.get('ratings', [{}])[0].get('rating'))
+            auth = self._extract_mvpd_auth(
+                url, video_id, requestor_id, resource)
+            release_url = update_url_query(release_url, {'auth': auth})
+
+        return {
+            '_type': 'url_transparent',
+            'id': video_id,
+            'title': title,
+            'url': smuggle_url(release_url, {'force_smil_url': True}),
+            'description': video_data.get('description'),
+            'thumbnail': video_data.get('thumbnailUrl'),
+            'series': video_data.get('showTitle'),
+            'season_number': int_or_none(video_data.get('season')),
+            'episode_number': int_or_none(video_data.get('episodeNumber')),
+            'ie_key': 'ThePlatform',
+        }
index 9880a5a78c1f4b18d41c55e1899405dbdb98e7dc..9d9652949bb64ca2a15c02b3e0733dd9b7f42493 100644 (file)
@@ -3,7 +3,7 @@ from __future__ import unicode_literals
 
 import re
 
-from .jwplatform import JWPlatformBaseIE
+from .common import InfoExtractor
 from ..utils import (
     float_or_none,
     parse_iso8601,
@@ -14,7 +14,7 @@ from ..utils import (
 )
 
 
-class SendtoNewsIE(JWPlatformBaseIE):
+class SendtoNewsIE(InfoExtractor):
     _VALID_URL = r'https?://embed\.sendtonews\.com/player2/embedplayer\.php\?.*\bSC=(?P<id>[0-9A-Za-z-]+)'
 
     _TEST = {
index d3aba58a29a981829d0562ee56d992fc49f2f67c..547be8f9555c6691a1d57ea4e3c4555b943ee53a 100644 (file)
 # coding: utf-8
 from __future__ import unicode_literals
 
+import re
+
 from .common import InfoExtractor
+from ..compat import compat_str
 from ..utils import (
-    qualities,
-    int_or_none,
-    mimetype2ext,
     determine_ext,
+    int_or_none,
+    try_get,
+    qualities,
 )
 
 
 class SixPlayIE(InfoExtractor):
+    IE_NAME = '6play'
     _VALID_URL = r'(?:6play:|https?://(?:www\.)?6play\.fr/.+?-c_)(?P<id>[0-9]+)'
     _TEST = {
-        'url': 'http://www.6play.fr/jamel-et-ses-amis-au-marrakech-du-rire-p_1316/jamel-et-ses-amis-au-marrakech-du-rire-2015-c_11495320',
+        'url': 'http://www.6play.fr/le-meilleur-patissier-p_1807/le-meilleur-patissier-special-fetes-mercredi-a-21-00-sur-m6-c_11638450',
         'md5': '42310bffe4ba3982db112b9cd3467328',
         'info_dict': {
-            'id': '11495320',
+            'id': '11638450',
             'ext': 'mp4',
-            'title': 'Jamel et ses amis au Marrakech du rire 2015',
-            'description': 'md5:ba2149d5c321d5201b78070ee839d872',
+            'title': 'Le Meilleur Pâtissier, spécial fêtes mercredi à 21:00 sur M6',
+            'description': 'md5:308853f6a5f9e2d55a30fc0654de415f',
+            'duration': 39,
+            'series': 'Le meilleur pâtissier',
+        },
+        'params': {
+            'skip_download': True,
         },
     }
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
-        clip_data = self._download_json(
-            'https://player.m6web.fr/v2/video/config/6play-auth/FR/%s.json' % video_id,
-            video_id)
-        video_data = clip_data['videoInfo']
 
+        data = self._download_json(
+            'https://pc.middleware.6play.fr/6play/v2/platforms/m6group_web/services/6play/videos/clip_%s' % video_id,
+            video_id, query={
+                'csa': 5,
+                'with': 'clips',
+            })
+
+        clip_data = data['clips'][0]
+        title = clip_data['title']
+
+        urls = []
         quality_key = qualities(['lq', 'sd', 'hq', 'hd'])
         formats = []
-        for source in clip_data['sources']:
-            source_type, source_url = source.get('type'), source.get('src')
-            if not source_url or source_type == 'hls/primetime':
+        for asset in clip_data['assets']:
+            asset_url = asset.get('full_physical_path')
+            protocol = asset.get('protocol')
+            if not asset_url or protocol == 'primetime' or asset_url in urls:
                 continue
-            ext = mimetype2ext(source_type) or determine_ext(source_url)
-            if ext == 'm3u8':
-                formats.extend(self._extract_m3u8_formats(
-                    source_url, video_id, 'mp4', 'm3u8_native',
-                    m3u8_id='hls', fatal=False))
-                formats.extend(self._extract_f4m_formats(
-                    source_url.replace('.m3u8', '.f4m'),
-                    video_id, f4m_id='hds', fatal=False))
-            elif ext == 'mp4':
-                quality = source.get('quality')
+            urls.append(asset_url)
+            container = asset.get('video_container')
+            ext = determine_ext(asset_url)
+            if container == 'm3u8' or ext == 'm3u8':
+                if protocol == 'usp':
+                    asset_url = re.sub(r'/([^/]+)\.ism/[^/]*\.m3u8', r'/\1.ism/\1.m3u8', asset_url)
+                    formats.extend(self._extract_m3u8_formats(
+                        asset_url, video_id, 'mp4', 'm3u8_native',
+                        m3u8_id='hls', fatal=False))
+                    formats.extend(self._extract_f4m_formats(
+                        asset_url.replace('.m3u8', '.f4m'),
+                        video_id, f4m_id='hds', fatal=False))
+                    formats.extend(self._extract_mpd_formats(
+                        asset_url.replace('.m3u8', '.mpd'),
+                        video_id, mpd_id='dash', fatal=False))
+                    formats.extend(self._extract_ism_formats(
+                        re.sub(r'/[^/]+\.m3u8', '/Manifest', asset_url),
+                        video_id, ism_id='mss', fatal=False))
+                else:
+                    formats.extend(self._extract_m3u8_formats(
+                        asset_url, video_id, 'mp4', 'm3u8_native',
+                        m3u8_id='hls', fatal=False))
+            elif container == 'mp4' or ext == 'mp4':
+                quality = asset.get('video_quality')
                 formats.append({
-                    'url': source_url,
+                    'url': asset_url,
                     'format_id': quality,
                     'quality': quality_key(quality),
                     'ext': ext,
                 })
         self._sort_formats(formats)
 
+        def get(getter):
+            for src in (data, clip_data):
+                v = try_get(src, getter, compat_str)
+                if v:
+                    return v
+
         return {
             'id': video_id,
-            'title': video_data['title'].strip(),
-            'description': video_data.get('description'),
-            'duration': int_or_none(video_data.get('duration')),
-            'series': video_data.get('titlePgm'),
+            'title': title,
+            'description': get(lambda x: x['description']),
+            'duration': int_or_none(clip_data.get('duration')),
+            'series': get(lambda x: x['program']['title']),
             'formats': formats,
         }
diff --git a/youtube_dl/extractor/skylinewebcams.py b/youtube_dl/extractor/skylinewebcams.py
new file mode 100644 (file)
index 0000000..5b4aaac
--- /dev/null
@@ -0,0 +1,42 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class SkylineWebcamsIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?skylinewebcams\.com/[^/]+/webcam/(?:[^/]+/)+(?P<id>[^/]+)\.html'
+    _TEST = {
+        'url': 'https://www.skylinewebcams.com/it/webcam/italia/lazio/roma/scalinata-piazza-di-spagna-barcaccia.html',
+        'info_dict': {
+            'id': 'scalinata-piazza-di-spagna-barcaccia',
+            'ext': 'mp4',
+            'title': 're:^Live Webcam Scalinata di Piazza di Spagna - La Barcaccia [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+            'description': 'Roma, veduta sulla Scalinata di Piazza di Spagna e sulla Barcaccia',
+            'is_live': True,
+        },
+        'params': {
+            'skip_download': True,
+        }
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id)
+
+        stream_url = self._search_regex(
+            r'url\s*:\s*(["\'])(?P<url>(?:https?:)?//.+?\.m3u8.*?)\1', webpage,
+            'stream url', group='url')
+
+        title = self._og_search_title(webpage)
+        description = self._og_search_description(webpage)
+
+        return {
+            'id': video_id,
+            'url': stream_url,
+            'ext': 'mp4',
+            'title': self._live_title(title),
+            'description': description,
+            'is_live': True,
+        }
index 30760ca06be4b3fc112f3fe0200c74b665d64855..7da12cef8fe1c06a064ee82da3e08ed39a39991d 100644 (file)
@@ -108,12 +108,11 @@ class SohuIE(InfoExtractor):
         if vid_data['play'] != 1:
             if vid_data.get('status') == 12:
                 raise ExtractorError(
-                    'Sohu said: There\'s something wrong in the video.',
+                    '%s said: There\'s something wrong in the video.' % self.IE_NAME,
                     expected=True)
             else:
-                raise ExtractorError(
-                    'Sohu said: The video is only licensed to users in Mainland China.',
-                    expected=True)
+                self.raise_geo_restricted(
+                    '%s said: The video is only licensed to users in Mainland China.' % self.IE_NAME)
 
         formats_json = {}
         for format_id in ('nor', 'high', 'super', 'ori', 'h2644k', 'h2654k'):
index 123c33ac36e275d8b624c8830153235c3a4ef338..3394c7e6ba4713ad0c63e2579d611a0319c45add 100644 (file)
@@ -23,6 +23,10 @@ class SpankBangIE(InfoExtractor):
         # 480p only
         'url': 'http://spankbang.com/1vt0/video/solvane+gangbang',
         'only_matching': True,
+    }, {
+        # no uploader
+        'url': 'http://spankbang.com/lklg/video/sex+with+anyone+wedding+edition+2',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
@@ -48,7 +52,7 @@ class SpankBangIE(InfoExtractor):
         thumbnail = self._og_search_thumbnail(webpage)
         uploader = self._search_regex(
             r'class="user"[^>]*><img[^>]+>([^<]+)',
-            webpage, 'uploader', fatal=False)
+            webpage, 'uploader', default=None)
 
         age_limit = self._rta_search(webpage)
 
diff --git a/youtube_dl/extractor/sprout.py b/youtube_dl/extractor/sprout.py
new file mode 100644 (file)
index 0000000..8467bf4
--- /dev/null
@@ -0,0 +1,52 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .adobepass import AdobePassIE
+from ..utils import (
+    extract_attributes,
+    update_url_query,
+    smuggle_url,
+)
+
+
+class SproutIE(AdobePassIE):
+    _VALID_URL = r'https?://(?:www\.)?sproutonline\.com/watch/(?P<id>[^/?#]+)'
+    _TEST = {
+        'url': 'http://www.sproutonline.com/watch/cowboy-adventure',
+        'md5': '74bf14128578d1e040c3ebc82088f45f',
+        'info_dict': {
+            'id': '9dexnwtmh8_X',
+            'ext': 'mp4',
+            'title': 'A Cowboy Adventure',
+            'description': 'Ruff-Ruff, Tweet and Dave get to be cowboys for the day at Six Cow Corral.',
+            'timestamp': 1437758640,
+            'upload_date': '20150724',
+            'uploader': 'NBCU-SPROUT-NEW',
+        }
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+        video_component = self._search_regex(
+            r'(?s)(<div[^>]+data-component="video"[^>]*?>)',
+            webpage, 'video component', default=None)
+        if video_component:
+            options = self._parse_json(extract_attributes(
+                video_component)['data-options'], video_id)
+            theplatform_url = options['video']
+            query = {
+                'mbr': 'true',
+                'manifest': 'm3u',
+            }
+            if options.get('protected'):
+                query['auth'] = self._extract_mvpd_auth(url, options['pid'], 'sprout', 'sprout')
+            theplatform_url = smuggle_url(update_url_query(
+                theplatform_url, query), {'force_smil_url': True})
+        else:
+            iframe = self._search_regex(
+                r'(<iframe[^>]+id="sproutVideoIframe"[^>]*?>)',
+                webpage, 'iframe')
+            theplatform_url = extract_attributes(iframe)['src']
+
+        return self.url_result(theplatform_url, 'ThePlatform')
index 319a48a7a543dfcfade0cb91726103a66d864711..bb73eb1d5c2adb848b1b8af13c7b70a0d10f919d 100644 (file)
@@ -14,6 +14,8 @@ from ..utils import (
 
 class SRGSSRIE(InfoExtractor):
     _VALID_URL = r'(?:https?://tp\.srgssr\.ch/p(?:/[^/]+)+\?urn=urn|srgssr):(?P<bu>srf|rts|rsi|rtr|swi):(?:[^:]+:)?(?P<type>video|audio):(?P<id>[0-9a-f\-]{36}|\d+)'
+    _GEO_BYPASS = False
+    _GEO_COUNTRIES = ['CH']
 
     _ERRORS = {
         'AGERATING12': 'To protect children under the age of 12, this video is only available between 8 p.m. and 6 a.m.',
@@ -40,8 +42,12 @@ class SRGSSRIE(InfoExtractor):
             media_id)[media_type.capitalize()]
 
         if media_data.get('block') and media_data['block'] in self._ERRORS:
-            raise ExtractorError('%s said: %s' % (
-                self.IE_NAME, self._ERRORS[media_data['block']]), expected=True)
+            message = self._ERRORS[media_data['block']]
+            if media_data['block'] == 'GEOBLOCK':
+                self.raise_geo_restricted(
+                    msg=message, countries=self._GEO_COUNTRIES)
+            raise ExtractorError(
+                '%s said: %s' % (self.IE_NAME, message), expected=True)
 
         return media_data
 
index 10cf808857e231cee482434010161a93eee85027..1b5afb73ee473b23e78c27caba6708ce9ceb348f 100644 (file)
@@ -13,6 +13,8 @@ from ..utils import (
 
 
 class SVTBaseIE(InfoExtractor):
+    _GEO_COUNTRIES = ['SE']
+
     def _extract_video(self, video_info, video_id):
         formats = []
         for vr in video_info['videoReferences']:
@@ -38,7 +40,9 @@ class SVTBaseIE(InfoExtractor):
                     'url': vurl,
                 })
         if not formats and video_info.get('rights', {}).get('geoBlockedSweden'):
-            self.raise_geo_restricted('This video is only available in Sweden')
+            self.raise_geo_restricted(
+                'This video is only available in Sweden',
+                countries=self._GEO_COUNTRIES)
         self._sort_formats(formats)
 
         subtitles = {}
index 4043fcb92457af1066735b3cb87e9550568efa4c..82d73c31d55c55dfb7cdf40f593265806f3fdb89 100644 (file)
@@ -2,7 +2,10 @@
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
-from ..utils import int_or_none
+from ..utils import (
+    int_or_none,
+    smuggle_url,
+)
 
 
 class TeleQuebecIE(InfoExtractor):
@@ -28,7 +31,7 @@ class TeleQuebecIE(InfoExtractor):
         return {
             '_type': 'url_transparent',
             'id': media_id,
-            'url': 'limelight:media:' + media_data['streamInfo']['sourceId'],
+            'url': smuggle_url('limelight:media:' + media_data['streamInfo']['sourceId'], {'geo_countries': ['CA']}),
             'title': media_data['title'],
             'description': media_data.get('descriptions', [{'text': None}])[0].get('text'),
             'duration': int_or_none(media_data.get('durationInMilliseconds'), 1000),
index 6f1eeac57bf67e5be4ab6fd45b88ca0d13c01f5c..0e2370cd828f78a2e1a708852a392a05d96e3039 100644 (file)
@@ -8,10 +8,12 @@ from ..utils import (
     HEADRequest,
     ExtractorError,
     int_or_none,
+    clean_html,
 )
 
 
 class TFOIE(InfoExtractor):
+    _GEO_COUNTRIES = ['CA']
     _VALID_URL = r'https?://(?:www\.)?tfo\.org/(?:en|fr)/(?:[^/]+/){2}(?P<id>\d+)'
     _TEST = {
         'url': 'http://www.tfo.org/en/universe/tfo-247/100463871/video-game-hackathon',
@@ -36,7 +38,9 @@ class TFOIE(InfoExtractor):
                 'X-tfo-session': self._get_cookies('http://www.tfo.org/')['tfo-session'].value,
             })
         if infos.get('success') == 0:
-            raise ExtractorError('%s said: %s' % (self.IE_NAME, infos['msg']), expected=True)
+            if infos.get('code') == 'ErrGeoBlocked':
+                self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
+            raise ExtractorError('%s said: %s' % (self.IE_NAME, clean_html(infos['msg'])), expected=True)
         video_data = infos['data']
 
         return {
index 192d8fa292e0a6f360929590274d06b4745fb8f6..9a424b1c6aeb089af8050d7eee6b29591968c3aa 100644 (file)
@@ -179,10 +179,12 @@ class ThePlatformIE(ThePlatformBaseIE, AdobePassIE):
         if m:
             return [m.group('url')]
 
+        # Are whitesapces ignored in URLs?
+        # https://github.com/rg3/youtube-dl/issues/12044
         matches = re.findall(
-            r'<(?:iframe|script)[^>]+src=(["\'])((?:https?:)?//player\.theplatform\.com/p/.+?)\1', webpage)
+            r'(?s)<(?:iframe|script)[^>]+src=(["\'])((?:https?:)?//player\.theplatform\.com/p/.+?)\1', webpage)
         if matches:
-            return list(zip(*matches))[1]
+            return [re.sub(r'\s', '', list(zip(*matches))[1][0])]
 
     @staticmethod
     def _sign_url(url, sig_key, sig_secret, life=600, include_qs=False):
@@ -306,9 +308,10 @@ class ThePlatformFeedIE(ThePlatformBaseIE):
         },
     }]
 
-    def _extract_feed_info(self, provider_id, feed_id, filter_query, video_id, custom_fields=None, asset_types_query={}):
+    def _extract_feed_info(self, provider_id, feed_id, filter_query, video_id, custom_fields=None, asset_types_query={}, account_id=None):
         real_url = self._URL_TEMPLATE % (self.http_scheme(), provider_id, feed_id, filter_query)
         entry = self._download_json(real_url, video_id)['entries'][0]
+        main_smil_url = 'http://link.theplatform.com/s/%s/media/guid/%d/%s' % (provider_id, account_id, entry['guid']) if account_id else None
 
         formats = []
         subtitles = {}
@@ -333,7 +336,7 @@ class ThePlatformFeedIE(ThePlatformBaseIE):
                 if asset_type in asset_types_query:
                     query.update(asset_types_query[asset_type])
                 cur_formats, cur_subtitles = self._extract_theplatform_smil(update_url_query(
-                    smil_url, query), video_id, 'Downloading SMIL data for %s' % asset_type)
+                    main_smil_url or smil_url, query), video_id, 'Downloading SMIL data for %s' % asset_type)
                 formats.extend(cur_formats)
                 subtitles = self._merge_subtitles(subtitles, cur_subtitles)
 
index ce1326c03643186b4e1eb58905ef8f9c868588f6..b8504f0ebdc04ade7d580102f0bcf506bebc4230 100644 (file)
@@ -3,7 +3,10 @@ from __future__ import unicode_literals
 from .common import InfoExtractor
 
 from ..compat import compat_urlparse
-from ..utils import qualities
+from ..utils import (
+    int_or_none,
+    qualities,
+)
 
 
 class TheSceneIE(InfoExtractor):
@@ -16,6 +19,11 @@ class TheSceneIE(InfoExtractor):
             'ext': 'mp4',
             'title': 'Narciso Rodriguez: Spring 2013 Ready-to-Wear',
             'display_id': 'narciso-rodriguez-spring-2013-ready-to-wear',
+            'duration': 127,
+            'series': 'Style.com Fashion Shows',
+            'season': 'Ready To Wear Spring 2013',
+            'tags': list,
+            'categories': list,
         },
     }
 
@@ -32,21 +40,29 @@ class TheSceneIE(InfoExtractor):
         player = self._download_webpage(player_url, display_id)
         info = self._parse_json(
             self._search_regex(
-                r'(?m)var\s+video\s+=\s+({.+?});$', player, 'info json'),
+                r'(?m)video\s*:\s*({.+?}),$', player, 'info json'),
             display_id)
 
+        video_id = info['id']
+        title = info['title']
+
         qualities_order = qualities(('low', 'high'))
         formats = [{
             'format_id': '{0}-{1}'.format(f['type'].split('/')[0], f['quality']),
             'url': f['src'],
             'quality': qualities_order(f['quality']),
-        } for f in info['sources'][0]]
+        } for f in info['sources']]
         self._sort_formats(formats)
 
         return {
-            'id': info['id'],
+            'id': video_id,
             'display_id': display_id,
-            'title': info['title'],
+            'title': title,
             'formats': formats,
             'thumbnail': info.get('poster_frame'),
+            'duration': int_or_none(info.get('duration')),
+            'series': info.get('series_title'),
+            'season': info.get('season_title'),
+            'tags': info.get('tags'),
+            'categories': info.get('categories'),
         }
index 4473a3c773c3d9c4c26361e907769e8bb1ac9fad..33683b139dee3cbf2513a30efb979701c5f93ee9 100644 (file)
@@ -3,13 +3,14 @@ from __future__ import unicode_literals
 
 import re
 
-from .jwplatform import JWPlatformBaseIE
+from .common import InfoExtractor
 from ..utils import remove_end
 
 
-class ThisAVIE(JWPlatformBaseIE):
+class ThisAVIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?thisav\.com/video/(?P<id>[0-9]+)/.*'
     _TESTS = [{
+        # jwplayer
         'url': 'http://www.thisav.com/video/47734/%98%26sup1%3B%83%9E%83%82---just-fit.html',
         'md5': '0480f1ef3932d901f0e0e719f188f19b',
         'info_dict': {
@@ -20,6 +21,7 @@ class ThisAVIE(JWPlatformBaseIE):
             'uploader_id': 'dj7970'
         }
     }, {
+        # html5 media
         'url': 'http://www.thisav.com/video/242352/nerdy-18yo-big-ass-tattoos-and-glasses.html',
         'md5': 'ba90c076bd0f80203679e5b60bf523ee',
         'info_dict': {
@@ -48,8 +50,12 @@ class ThisAVIE(JWPlatformBaseIE):
                 }],
             }
         else:
-            info_dict = self._extract_jwplayer_data(
-                webpage, video_id, require_title=False)
+            entries = self._parse_html5_media_entries(url, webpage, video_id)
+            if entries:
+                info_dict = entries[0]
+            else:
+                info_dict = self._extract_jwplayer_data(
+                    webpage, video_id, require_title=False)
         uploader = self._html_search_regex(
             r': <a href="http://www.thisav.com/user/[0-9]+/(?:[^"]+)">([^<]+)</a>',
             webpage, 'uploader name', fatal=False)
index 3a37df2e8eb710c68d448b6231850ddc846ec716..c44018aec39c56eb4b8dccc8b4d001f783e2a19d 100644 (file)
@@ -16,6 +16,7 @@ class TubiTvIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?tubitv\.com/video/(?P<id>[0-9]+)'
     _LOGIN_URL = 'http://tubitv.com/login'
     _NETRC_MACHINE = 'tubitv'
+    _GEO_COUNTRIES = ['US']
     _TEST = {
         'url': 'http://tubitv.com/video/283829/the_comedian_at_the_friday',
         'md5': '43ac06be9326f41912dc64ccf7a80320',
index ad79db92beb3825dc1293b047acf7c61ca99386a..7aeb2c6201513d49d2f2bf3b3f46e7c493fd6411 100644 (file)
@@ -24,6 +24,7 @@ class TV4IE(InfoExtractor):
                 sport/|
             )
         )(?P<id>[0-9]+)'''
+    _GEO_COUNTRIES = ['SE']
     _TESTS = [
         {
             'url': 'http://www.tv4.se/kalla-fakta/klipp/kalla-fakta-5-english-subtitles-2491650',
@@ -71,16 +72,12 @@ class TV4IE(InfoExtractor):
             'http://www.tv4play.se/player/assets/%s.json' % video_id,
             video_id, 'Downloading video info JSON')
 
-        # If is_geo_restricted is true, it doesn't necessarily mean we can't download it
-        if info.get('is_geo_restricted'):
-            self.report_warning('This content might not be available in your country due to licensing restrictions.')
-
         title = info['title']
 
         subtitles = {}
         formats = []
         # http formats are linked with unresolvable host
-        for kind in ('hls', ''):
+        for kind in ('hls3', ''):
             data = self._download_json(
                 'https://prima.tv4play.se/api/web/asset/%s/play.json' % video_id,
                 video_id, 'Downloading sources JSON', query={
@@ -113,6 +110,10 @@ class TV4IE(InfoExtractor):
                                 'url': manifest_url,
                                 'ext': 'vtt',
                             }]})
+
+        if not formats and info.get('is_geo_restricted'):
+            self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
+
         self._sort_formats(formats)
 
         return {
diff --git a/youtube_dl/extractor/tvn24.py b/youtube_dl/extractor/tvn24.py
new file mode 100644 (file)
index 0000000..12ed603
--- /dev/null
@@ -0,0 +1,76 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+    int_or_none,
+    unescapeHTML,
+)
+
+
+class TVN24IE(InfoExtractor):
+    _VALID_URL = r'https?://(?:(?:[^/]+)\.)?tvn24(?:bis)?\.pl/(?:[^/]+/)*(?P<id>[^/]+)\.html'
+    _TESTS = [{
+        'url': 'http://www.tvn24.pl/wiadomosci-z-kraju,3/oredzie-artura-andrusa,702428.html',
+        'md5': 'fbdec753d7bc29d96036808275f2130c',
+        'info_dict': {
+            'id': '1584444',
+            'ext': 'mp4',
+            'title': '"Święta mają być wesołe, dlatego, ludziska, wszyscy pod jemiołę"',
+            'description': 'Wyjątkowe orędzie Artura Andrusa, jednego z gości "Szkła kontaktowego".',
+            'thumbnail': 're:http://.*[.]jpeg',
+        }
+    }, {
+        'url': 'http://fakty.tvn24.pl/ogladaj-online,60/53-konferencja-bezpieczenstwa-w-monachium,716431.html',
+        'only_matching': True,
+    }, {
+        'url': 'http://sport.tvn24.pl/pilka-nozna,105/ligue-1-kamil-glik-rozcial-glowe-monaco-tylko-remisuje-z-bastia,716522.html',
+        'only_matching': True,
+    }, {
+        'url': 'http://tvn24bis.pl/poranek,146,m/gen-koziej-w-tvn24-bis-wracamy-do-czasow-zimnej-wojny,715660.html',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id)
+
+        title = self._og_search_title(webpage)
+
+        def extract_json(attr, name, fatal=True):
+            return self._parse_json(
+                self._search_regex(
+                    r'\b%s=(["\'])(?P<json>(?!\1).+?)\1' % attr, webpage,
+                    name, group='json', fatal=fatal) or '{}',
+                video_id, transform_source=unescapeHTML, fatal=fatal)
+
+        quality_data = extract_json('data-quality', 'formats')
+
+        formats = []
+        for format_id, url in quality_data.items():
+            formats.append({
+                'url': url,
+                'format_id': format_id,
+                'height': int_or_none(format_id.rstrip('p')),
+            })
+        self._sort_formats(formats)
+
+        description = self._og_search_description(webpage)
+        thumbnail = self._og_search_thumbnail(
+            webpage, default=None) or self._html_search_regex(
+            r'\bdata-poster=(["\'])(?P<url>(?!\1).+?)\1', webpage,
+            'thumbnail', group='url')
+
+        share_params = extract_json(
+            'data-share-params', 'share params', fatal=False)
+        if isinstance(share_params, dict):
+            video_id = share_params.get('id') or video_id
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'formats': formats,
+        }
index 6d5c748264197ba99a2be8fefccf375761818610..1a5b76bf2ebd069bc36ec107c61adaa99c4c180c 100644 (file)
@@ -1,7 +1,7 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
-from .jwplatform import JWPlatformBaseIE
+from .common import InfoExtractor
 from ..utils import (
     clean_html,
     get_element_by_class,
@@ -9,7 +9,7 @@ from ..utils import (
 )
 
 
-class TVNoeIE(JWPlatformBaseIE):
+class TVNoeIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?tvnoe\.cz/video/(?P<id>[0-9]+)'
     _TEST = {
         'url': 'http://www.tvnoe.cz/video/10362',
diff --git a/youtube_dl/extractor/tvplayer.py b/youtube_dl/extractor/tvplayer.py
new file mode 100644 (file)
index 0000000..b653714
--- /dev/null
@@ -0,0 +1,75 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import compat_HTTPError
+from ..utils import (
+    extract_attributes,
+    urlencode_postdata,
+    ExtractorError,
+)
+
+
+class TVPlayerIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?tvplayer\.com/watch/(?P<id>[^/?#]+)'
+    _TEST = {
+        'url': 'http://tvplayer.com/watch/bbcone',
+        'info_dict': {
+            'id': '89',
+            'ext': 'mp4',
+            'title': r're:^BBC One [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+        },
+        'params': {
+            # m3u8 download
+            'skip_download': True,
+        }
+    }
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+        webpage = self._download_webpage(url, display_id)
+
+        current_channel = extract_attributes(self._search_regex(
+            r'(<div[^>]+class="[^"]*current-channel[^"]*"[^>]*>)',
+            webpage, 'channel element'))
+        title = current_channel['data-name']
+
+        resource_id = self._search_regex(
+            r'resourceId\s*=\s*"(\d+)"', webpage, 'resource id')
+        platform = self._search_regex(
+            r'platform\s*=\s*"([^"]+)"', webpage, 'platform')
+        token = self._search_regex(
+            r'token\s*=\s*"([^"]+)"', webpage, 'token', default='null')
+        validate = self._search_regex(
+            r'validate\s*=\s*"([^"]+)"', webpage, 'validate', default='null')
+
+        try:
+            response = self._download_json(
+                'http://api.tvplayer.com/api/v2/stream/live',
+                resource_id, headers={
+                    'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
+                }, data=urlencode_postdata({
+                    'service': 1,
+                    'platform': platform,
+                    'id': resource_id,
+                    'token': token,
+                    'validate': validate,
+                }))['tvplayer']['response']
+        except ExtractorError as e:
+            if isinstance(e.cause, compat_HTTPError):
+                response = self._parse_json(
+                    e.cause.read().decode(), resource_id)['tvplayer']['response']
+                raise ExtractorError(
+                    '%s said: %s' % (self.IE_NAME, response['error']), expected=True)
+            raise
+
+        formats = self._extract_m3u8_formats(response['stream'], resource_id, 'mp4')
+        self._sort_formats(formats)
+
+        return {
+            'id': resource_id,
+            'display_id': display_id,
+            'title': self._live_title(title),
+            'formats': formats,
+            'is_live': True,
+        }
index a983ebf05ac512242415a3052fbd172668ff060e..f3541b6540c2b772afbc2d89d097f75ac531a2b3 100644 (file)
@@ -12,7 +12,7 @@ from ..utils import (
 
 class TwentyFourVideoIE(InfoExtractor):
     IE_NAME = '24video'
-    _VALID_URL = r'https?://(?:www\.)?24video\.(?:net|me|xxx|sex)/(?:video/(?:view|xml)/|player/new24_play\.swf\?id=)(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:www\.)?24video\.(?:net|me|xxx|sex|tube)/(?:video/(?:view|xml)/|player/new24_play\.swf\?id=)(?P<id>\d+)'
 
     _TESTS = [{
         'url': 'http://www.24video.net/video/view/1044982',
@@ -37,6 +37,9 @@ class TwentyFourVideoIE(InfoExtractor):
     }, {
         'url': 'http://www.24video.me/video/view/1044982',
         'only_matching': True,
+    }, {
+        'url': 'http://www.24video.tube/video/view/2363750',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
index bef6394626d4eca25785d35199c1092f69f45b54..8152acefd099da43e18bca3e5be579d24dd29062 100644 (file)
@@ -20,6 +20,7 @@ class Vbox7IE(InfoExtractor):
                         )
                         (?P<id>[\da-fA-F]+)
                     '''
+    _GEO_COUNTRIES = ['BG']
     _TESTS = [{
         'url': 'http://vbox7.com/play:0946fff23c',
         'md5': 'a60f9ab3a3a2f013ef9a967d5f7be5bf',
@@ -78,7 +79,7 @@ class Vbox7IE(InfoExtractor):
         video_url = video['src']
 
         if '/na.mp4' in video_url:
-            self.raise_geo_restricted()
+            self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
 
         uploader = video.get('uploader')
 
index 8a574bc269789e14f3dcadd6167c5caaa46e49e3..0f8c156a79ff7d67958b88e488ff50548bf3c998 100644 (file)
@@ -14,6 +14,7 @@ from ..utils import (
 
 class VGTVIE(XstreamIE):
     IE_DESC = 'VGTV, BTTV, FTV, Aftenposten and Aftonbladet'
+    _GEO_BYPASS = False
 
     _HOST_TO_APPNAME = {
         'vgtv.no': 'vgtv',
@@ -217,7 +218,8 @@ class VGTVIE(XstreamIE):
             properties = try_get(
                 data, lambda x: x['streamConfiguration']['properties'], list)
             if properties and 'geoblocked' in properties:
-                raise self.raise_geo_restricted()
+                raise self.raise_geo_restricted(
+                    countries=[host.rpartition('.')[-1].partition('/')[0].upper()])
 
         self._sort_formats(info['formats'])
 
index 8a00c8fee17ee84ae0d8d0e1e7360ca67befc8b0..f0a7fd7397bd81670f993dbeb18553eb011bbd2a 100644 (file)
@@ -70,10 +70,10 @@ class ViceBaseIE(AdobePassIE):
             'url': uplynk_preplay_url,
             'id': video_id,
             'title': title,
-            'description': base.get('body'),
+            'description': base.get('body') or base.get('display_body'),
             'thumbnail': watch_hub_data.get('cover-image') or watch_hub_data.get('thumbnail'),
-            'duration': parse_duration(video_data.get('video_duration') or watch_hub_data.get('video-duration')),
-            'timestamp': int_or_none(video_data.get('created_at')),
+            'duration': int_or_none(video_data.get('video_duration')) or parse_duration(watch_hub_data.get('video-duration')),
+            'timestamp': int_or_none(video_data.get('created_at'), 1000),
             'age_limit': parse_age_limit(video_data.get('video_rating')),
             'series': video_data.get('show_title') or watch_hub_data.get('show-title'),
             'episode_number': int_or_none(episode.get('episode_number') or watch_hub_data.get('episode')),
index 0eff055a6e5ce2ced15a618c82cadbf2c6c47a4a..87f9216b5da6965cf5b9aa163040ac42ce7baae4 100644 (file)
@@ -7,16 +7,16 @@ from .vice import ViceBaseIE
 class VicelandIE(ViceBaseIE):
     _VALID_URL = r'https?://(?:www\.)?viceland\.com/[^/]+/video/[^/]+/(?P<id>[a-f0-9]+)'
     _TEST = {
-        'url': 'https://www.viceland.com/en_us/video/cyberwar-trailer/57608447973ee7705f6fbd4e',
+        'url': 'https://www.viceland.com/en_us/video/trapped/588a70d0dba8a16007de7316',
         'info_dict': {
-            'id': '57608447973ee7705f6fbd4e',
+            'id': '588a70d0dba8a16007de7316',
             'ext': 'mp4',
-            'title': 'CYBERWAR (Trailer)',
-            'description': 'Tapping into the geopolitics of hacking and surveillance, Ben Makuch travels the world to meet with hackers, government officials, and dissidents to investigate the ecosystem of cyberwarfare.',
+            'title': 'TRAPPED (Series Trailer)',
+            'description': 'md5:7a8e95c2b6cd86461502a2845e581ccf',
             'age_limit': 14,
-            'timestamp': 1466008539,
-            'upload_date': '20160615',
-            'uploader_id': '11',
+            'timestamp': 1485474122,
+            'upload_date': '20170126',
+            'uploader_id': '57a204098cb727dec794c6a3',
             'uploader': 'Viceland',
         },
         'params': {
index 9950c62ad636ee4f03389bef627da4318f019c22..d0556297e449dfffa277bcc0e339347e91f12aec 100644 (file)
@@ -3,7 +3,7 @@ from __future__ import unicode_literals
 
 import re
 
-from .jwplatform import JWPlatformBaseIE
+from .common import InfoExtractor
 from ..utils import (
     decode_packed_codes,
     js_to_json,
@@ -12,8 +12,8 @@ from ..utils import (
 )
 
 
-class VidziIE(JWPlatformBaseIE):
-    _VALID_URL = r'https?://(?:www\.)?vidzi\.tv/(?:embed-)?(?P<id>[0-9a-zA-Z]+)'
+class VidziIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?vidzi\.(?:tv|cc)/(?:embed-)?(?P<id>[0-9a-zA-Z]+)'
     _TESTS = [{
         'url': 'http://vidzi.tv/cghql9yq6emu.html',
         'md5': '4f16c71ca0c8c8635ab6932b5f3f1660',
@@ -29,6 +29,9 @@ class VidziIE(JWPlatformBaseIE):
     }, {
         'url': 'http://vidzi.tv/embed-4z2yb0rzphe9-600x338.html',
         'skip_download': True,
+    }, {
+        'url': 'http://vidzi.cc/cghql9yq6emu.html',
+        'skip_download': True,
     }]
 
     def _real_extract(self, url):
index 52dd95e2fe19041f01e49b1a4a3f9566c1f0e60b..fcf0cb100c5fb05bd59a51bce8eadc3207c0d6f7 100644 (file)
@@ -86,7 +86,9 @@ class ViewsterIE(InfoExtractor):
     def _real_extract(self, url):
         video_id = self._match_id(url)
         # Get 'api_token' cookie
-        self._request_webpage(HEADRequest('http://www.viewster.com/'), video_id)
+        self._request_webpage(
+            HEADRequest('http://www.viewster.com/'),
+            video_id, headers=self.geo_verification_headers())
         cookies = self._get_cookies('http://www.viewster.com/')
         self._AUTH_TOKEN = compat_urllib_parse_unquote(cookies['api_token'].value)
 
index 9c48701c1a568589e0a875f35fa800386c4a4058..e9c8bf824c099b1dbc5fadbf93de21043b88d8c5 100644 (file)
@@ -27,6 +27,7 @@ class VikiBaseIE(InfoExtractor):
     _APP_VERSION = '2.2.5.1428709186'
     _APP_SECRET = '-$iJ}@p7!G@SyU/je1bEyWg}upLu-6V6-Lg9VD(]siH,r.,m-r|ulZ,U4LC/SeR)'
 
+    _GEO_BYPASS = False
     _NETRC_MACHINE = 'viki'
 
     _token = None
@@ -77,8 +78,11 @@ class VikiBaseIE(InfoExtractor):
     def _check_errors(self, data):
         for reason, status in data.get('blocking', {}).items():
             if status and reason in self._ERRORS:
+                message = self._ERRORS[reason]
+                if reason == 'geo':
+                    self.raise_geo_restricted(msg=message)
                 raise ExtractorError('%s said: %s' % (
-                    self.IE_NAME, self._ERRORS[reason]), expected=True)
+                    self.IE_NAME, message), expected=True)
 
     def _real_initialize(self):
         self._login()
diff --git a/youtube_dl/extractor/vodpl.py b/youtube_dl/extractor/vodpl.py
new file mode 100644 (file)
index 0000000..9e91970
--- /dev/null
@@ -0,0 +1,32 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .onet import OnetBaseIE
+
+
+class VODPlIE(OnetBaseIE):
+    _VALID_URL = r'https?://vod\.pl/(?:[^/]+/)+(?P<id>[0-9a-zA-Z]+)'
+
+    _TESTS = [{
+        'url': 'https://vod.pl/filmy/chlopaki-nie-placza/3ep3jns',
+        'md5': 'a7dc3b2f7faa2421aefb0ecaabf7ec74',
+        'info_dict': {
+            'id': '3ep3jns',
+            'ext': 'mp4',
+            'title': 'Chłopaki nie płaczą',
+            'description': 'md5:f5f03b84712e55f5ac9f0a3f94445224',
+            'timestamp': 1463415154,
+            'duration': 5765,
+            'upload_date': '20160516',
+        },
+    }, {
+        'url': 'https://vod.pl/seriale/belfer-na-planie-praca-kamery-online/2c10heh',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+        info_dict = self._extract_from_id(self._search_mvp_id(webpage), webpage)
+        info_dict['id'] = video_id
+        return info_dict
index 54eb5142793827f8b733592d22b979d326593bee..c022fb33e94ef7f9e6f0e90d73300f866e8ffc76 100644 (file)
@@ -1,10 +1,10 @@
 from __future__ import unicode_literals
 
+from .common import InfoExtractor
 from .youtube import YoutubeIE
-from .jwplatform import JWPlatformBaseIE
 
 
-class WimpIE(JWPlatformBaseIE):
+class WimpIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?wimp\.com/(?P<id>[^/]+)'
     _TESTS = [{
         'url': 'http://www.wimp.com/maru-is-exhausted/',
index 83bc1fef2095b322a67199c60e27fcc6f8f1bcbc..5584674a061fc5a67bbb65bc0b58fc96e96eae3b 100644 (file)
@@ -44,6 +44,9 @@ class XTubeIE(InfoExtractor):
     }, {
         'url': 'xtube:625837',
         'only_matching': True,
+    }, {
+        'url': 'xtube:kVTUy_G222_',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
@@ -53,14 +56,20 @@ class XTubeIE(InfoExtractor):
 
         if not display_id:
             display_id = video_id
-            url = 'http://www.xtube.com/watch.php?v=%s' % video_id
 
-        req = sanitized_Request(url)
-        req.add_header('Cookie', 'age_verified=1; cookiesAccepted=1')
-        webpage = self._download_webpage(req, display_id)
+        if video_id.isdigit() and len(video_id) < 11:
+            url_pattern = 'http://www.xtube.com/video-watch/-%s'
+        else:
+            url_pattern = 'http://www.xtube.com/watch.php?v=%s'
+
+        webpage = self._download_webpage(
+            url_pattern % video_id, display_id, headers={
+                'Cookie': 'age_verified=1; cookiesAccepted=1',
+            })
 
         sources = self._parse_json(self._search_regex(
-            r'sources\s*:\s*({.+?}),', webpage, 'sources'), video_id)
+            r'(["\'])sources\1\s*:\s*(?P<sources>{.+?}),',
+            webpage, 'sources', group='sources'), video_id)
 
         formats = []
         for format_id, format_url in sources.items():
@@ -72,7 +81,7 @@ class XTubeIE(InfoExtractor):
         self._sort_formats(formats)
 
         title = self._search_regex(
-            (r'<h1>(?P<title>[^<]+)</h1>', r'videoTitle\s*:\s*(["\'])(?P<title>.+?)\1'),
+            (r'<h1>\s*(?P<title>[^<]+?)\s*</h1>', r'videoTitle\s*:\s*(["\'])(?P<title>.+?)\1'),
             webpage, 'title', group='title')
         description = self._search_regex(
             r'</h1>\s*<p>([^<]+)', webpage, 'description', fatal=False)
@@ -81,10 +90,10 @@ class XTubeIE(InfoExtractor):
              r'<span[^>]+class="nickname"[^>]*>([^<]+)'),
             webpage, 'uploader', fatal=False)
         duration = parse_duration(self._search_regex(
-            r'<dt>Runtime:</dt>\s*<dd>([^<]+)</dd>',
+            r'<dt>Runtime:?</dt>\s*<dd>([^<]+)</dd>',
             webpage, 'duration', fatal=False))
         view_count = str_to_int(self._search_regex(
-            r'<dt>Views:</dt>\s*<dd>([\d,\.]+)</dd>',
+            r'<dt>Views:?</dt>\s*<dd>([\d,\.]+)</dd>',
             webpage, 'view count', fatal=False))
         comment_count = str_to_int(self._html_search_regex(
             r'>Comments? \(([\d,\.]+)\)<',
index 76710931ae5e6a292af767f3f57685ad0be98cac..dec02804bf52770719f7f29b7fe893f22b26cc5f 100644 (file)
@@ -34,6 +34,7 @@ from ..utils import (
     int_or_none,
     mimetype2ext,
     orderedSet,
+    parse_codecs,
     parse_duration,
     remove_quotes,
     remove_start,
@@ -1696,15 +1697,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                                     codecs = mobj.group('val')
                                     break
                             if codecs:
-                                codecs = codecs.split(',')
-                                if len(codecs) == 2:
-                                    acodec, vcodec = codecs[1], codecs[0]
-                                else:
-                                    acodec, vcodec = (codecs[0], 'none') if kind == 'audio' else ('none', codecs[0])
-                                dct.update({
-                                    'acodec': acodec,
-                                    'vcodec': vcodec,
-                                })
+                                dct.update(parse_codecs(codecs))
                 formats.append(dct)
         elif video_info.get('hlsvp'):
             manifest_url = video_info['hlsvp'][0]
index a365923fbbadc093a484a972c85cf6070f1d2765..523bb5c95cad19ca6c201774623456d916b1886c 100644 (file)
@@ -20,9 +20,9 @@ from ..utils import (
 
 
 class ZDFBaseIE(InfoExtractor):
-    def _call_api(self, url, player, referrer, video_id):
+    def _call_api(self, url, player, referrer, video_id, item):
         return self._download_json(
-            url, video_id, 'Downloading JSON content',
+            url, video_id, 'Downloading JSON %s' % item,
             headers={
                 'Referer': referrer,
                 'Api-Auth': 'Bearer %s' % player['apiToken'],
@@ -104,7 +104,7 @@ class ZDFIE(ZDFBaseIE):
             })
             formats.append(f)
 
-    def _extract_entry(self, url, content, video_id):
+    def _extract_entry(self, url, player, content, video_id):
         title = content.get('title') or content['teaserHeadline']
 
         t = content['mainVideoContent']['http://zdf.de/rels/target']
@@ -116,7 +116,8 @@ class ZDFIE(ZDFBaseIE):
                 'http://zdf.de/rels/streams/ptmd-template'].replace(
                 '{playerId}', 'portal')
 
-        ptmd = self._download_json(urljoin(url, ptmd_path), video_id)
+        ptmd = self._call_api(
+            urljoin(url, ptmd_path), player, url, video_id, 'metadata')
 
         formats = []
         track_uris = set()
@@ -174,8 +175,9 @@ class ZDFIE(ZDFBaseIE):
         }
 
     def _extract_regular(self, url, player, video_id):
-        content = self._call_api(player['content'], player, url, video_id)
-        return self._extract_entry(player['content'], content, video_id)
+        content = self._call_api(
+            player['content'], player, url, video_id, 'content')
+        return self._extract_entry(player['content'], player, content, video_id)
 
     def _extract_mobile(self, video_id):
         document = self._download_json(
index 349f44778e799af0c58b249939f14958036080a1..8b51d3c6f4cd1ecc3bf0c57b6ed42e83a1b30613 100644 (file)
@@ -228,17 +228,29 @@ def parseOpts(overrideArguments=None):
         action='store_const', const='::', dest='source_address',
         help='Make all connections via IPv6',
     )
-    network.add_option(
+
+    geo = optparse.OptionGroup(parser, 'Geo Restriction')
+    geo.add_option(
         '--geo-verification-proxy',
         dest='geo_verification_proxy', default=None, metavar='URL',
         help='Use this proxy to verify the IP address for some geo-restricted sites. '
-        'The default proxy specified by --proxy (or none, if the options is not present) is used for the actual downloading.'
-    )
-    network.add_option(
+        'The default proxy specified by --proxy (or none, if the options is not present) is used for the actual downloading.')
+    geo.add_option(
         '--cn-verification-proxy',
         dest='cn_verification_proxy', default=None, metavar='URL',
-        help=optparse.SUPPRESS_HELP,
-    )
+        help=optparse.SUPPRESS_HELP)
+    geo.add_option(
+        '--geo-bypass',
+        action='store_true', dest='geo_bypass', default=True,
+        help='Bypass geographic restriction via faking X-Forwarded-For HTTP header (experimental)')
+    geo.add_option(
+        '--no-geo-bypass',
+        action='store_false', dest='geo_bypass', default=True,
+        help='Do not bypass geographic restriction via faking X-Forwarded-For HTTP header (experimental)')
+    geo.add_option(
+        '--geo-bypass-country', metavar='CODE',
+        dest='geo_bypass_country', default=None,
+        help='Force bypass geographic restriction with explicitly provided two-letter ISO 3166-2 country code (experimental)')
 
     selection = optparse.OptionGroup(parser, 'Video Selection')
     selection.add_option(
@@ -298,14 +310,16 @@ def parseOpts(overrideArguments=None):
         metavar='FILTER', dest='match_filter', default=None,
         help=(
             'Generic video filter. '
-            'Specify any key (see help for -o for a list of available keys) to'
-            ' match if the key is present, '
-            '!key to check if the key is not present,'
+            'Specify any key (see help for -o for a list of available keys) to '
+            'match if the key is present, '
+            '!key to check if the key is not present, '
             'key > NUMBER (like "comment_count > 12", also works with '
-            '>=, <, <=, !=, =) to compare against a number, and '
-            '& to require multiple matches. '
-            'Values which are not known are excluded unless you'
-            ' put a question mark (?) after the operator.'
+            '>=, <, <=, !=, =) to compare against a number, '
+            'key = \'LITERAL\' (like "uploader = \'Mike Smith\'", also works with !=) '
+            'to match against a string literal '
+            'and & to require multiple matches. '
+            'Values which are not known are excluded unless you '
+            'put a question mark (?) after the operator. '
             'For example, to only match videos that have been liked more than '
             '100 times and disliked less than 50 times (or the dislike '
             'functionality is not available at the given service), but who '
@@ -665,8 +679,8 @@ def parseOpts(overrideArguments=None):
         help=('Output filename template, see the "OUTPUT TEMPLATE" for all the info'))
     filesystem.add_option(
         '--autonumber-size',
-        dest='autonumber_size', metavar='NUMBER', default=5, type=int,
-        help='Specify the number of digits in %(autonumber)s when it is present in output filename template or --auto-number option is given (default is %default)')
+        dest='autonumber_size', metavar='NUMBER', type=int,
+        help=optparse.SUPPRESS_HELP)
     filesystem.add_option(
         '--autonumber-start',
         dest='autonumber_start', metavar='NUMBER', default=1, type=int,
@@ -678,15 +692,15 @@ def parseOpts(overrideArguments=None):
     filesystem.add_option(
         '-A', '--auto-number',
         action='store_true', dest='autonumber', default=False,
-        help='[deprecated; use -o "%(autonumber)s-%(title)s.%(ext)s" ] Number downloaded files starting from 00000')
+        help=optparse.SUPPRESS_HELP)
     filesystem.add_option(
         '-t', '--title',
         action='store_true', dest='usetitle', default=False,
-        help='[deprecated] Use title in file name (default)')
+        help=optparse.SUPPRESS_HELP)
     filesystem.add_option(
         '-l', '--literal', default=False,
         action='store_true', dest='usetitle',
-        help='[deprecated] Alias of --title')
+        help=optparse.SUPPRESS_HELP)
     filesystem.add_option(
         '-w', '--no-overwrites',
         action='store_true', dest='nooverwrites', default=False,
@@ -834,6 +848,7 @@ def parseOpts(overrideArguments=None):
 
     parser.add_option_group(general)
     parser.add_option_group(network)
+    parser.add_option_group(geo)
     parser.add_option_group(selection)
     parser.add_option_group(downloader)
     parser.add_option_group(filesystem)
index 1881f4849e23c749d51da2e45d655ed4e6a68314..96ddb3b36f2b9219af7edfe209b09ca3f34ae546 100644 (file)
@@ -536,8 +536,7 @@ class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor):
             ext = sub['ext']
             if ext == new_ext:
                 self._downloader.to_screen(
-                    '[ffmpeg] Subtitle file for %s is already in the requested'
-                    'format' % new_ext)
+                    '[ffmpeg] Subtitle file for %s is already in the requested format' % new_ext)
                 continue
             old_file = subtitles_filename(filename, lang, ext)
             sub_filenames.append(old_file)
index 67a847ebad8238fc4f368f46b336b80e6caa3673..17b83794a2becf272005305442094223cd054638 100644 (file)
@@ -23,6 +23,7 @@ import operator
 import os
 import pipes
 import platform
+import random
 import re
 import socket
 import ssl
@@ -337,17 +338,30 @@ def get_element_by_id(id, html):
 
 
 def get_element_by_class(class_name, html):
-    return get_element_by_attribute(
+    """Return the content of the first tag with the specified class in the passed HTML document"""
+    retval = get_elements_by_class(class_name, html)
+    return retval[0] if retval else None
+
+
+def get_element_by_attribute(attribute, value, html, escape_value=True):
+    retval = get_elements_by_attribute(attribute, value, html, escape_value)
+    return retval[0] if retval else None
+
+
+def get_elements_by_class(class_name, html):
+    """Return the content of all tags with the specified class in the passed HTML document as a list"""
+    return get_elements_by_attribute(
         'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
         html, escape_value=False)
 
 
-def get_element_by_attribute(attribute, value, html, escape_value=True):
+def get_elements_by_attribute(attribute, value, html, escape_value=True):
     """Return the content of the tag with the specified attribute in the passed HTML document"""
 
     value = re.escape(value) if escape_value else value
 
-    m = re.search(r'''(?xs)
+    retlist = []
+    for m in re.finditer(r'''(?xs)
         <([a-zA-Z0-9:._-]+)
          (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'))*?
          \s+%s=['"]?%s['"]?
@@ -355,16 +369,15 @@ def get_element_by_attribute(attribute, value, html, escape_value=True):
         \s*>
         (?P<content>.*?)
         </\1>
-    ''' % (re.escape(attribute), value), html)
+    ''' % (re.escape(attribute), value), html):
+        res = m.group('content')
 
-    if not m:
-        return None
-    res = m.group('content')
+        if res.startswith('"') or res.startswith("'"):
+            res = res[1:-1]
 
-    if res.startswith('"') or res.startswith("'"):
-        res = res[1:-1]
+        retlist.append(unescapeHTML(res))
 
-    return unescapeHTML(res)
+    return retlist
 
 
 class HTMLAttributeParser(compat_HTMLParser):
@@ -689,7 +702,12 @@ def bug_reports_message():
     return msg
 
 
-class ExtractorError(Exception):
+class YoutubeDLError(Exception):
+    """Base exception for YoutubeDL errors."""
+    pass
+
+
+class ExtractorError(YoutubeDLError):
     """Error during info extraction."""
 
     def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
@@ -730,7 +748,19 @@ class RegexNotFoundError(ExtractorError):
     pass
 
 
-class DownloadError(Exception):
+class GeoRestrictedError(ExtractorError):
+    """Geographic restriction Error exception.
+
+    This exception may be thrown when a video is not available from your
+    geographic location due to geographic restrictions imposed by a website.
+    """
+    def __init__(self, msg, countries=None):
+        super(GeoRestrictedError, self).__init__(msg, expected=True)
+        self.msg = msg
+        self.countries = countries
+
+
+class DownloadError(YoutubeDLError):
     """Download Error exception.
 
     This exception may be thrown by FileDownloader objects if they are not
@@ -744,7 +774,7 @@ class DownloadError(Exception):
         self.exc_info = exc_info
 
 
-class SameFileError(Exception):
+class SameFileError(YoutubeDLError):
     """Same File exception.
 
     This exception will be thrown by FileDownloader objects if they detect
@@ -753,7 +783,7 @@ class SameFileError(Exception):
     pass
 
 
-class PostProcessingError(Exception):
+class PostProcessingError(YoutubeDLError):
     """Post Processing exception.
 
     This exception may be raised by PostProcessor's .run() method to
@@ -761,15 +791,16 @@ class PostProcessingError(Exception):
     """
 
     def __init__(self, msg):
+        super(PostProcessingError, self).__init__(msg)
         self.msg = msg
 
 
-class MaxDownloadsReached(Exception):
+class MaxDownloadsReached(YoutubeDLError):
     """ --max-downloads limit has been reached. """
     pass
 
 
-class UnavailableVideoError(Exception):
+class UnavailableVideoError(YoutubeDLError):
     """Unavailable Format exception.
 
     This exception will be thrown when a video is requested
@@ -778,7 +809,7 @@ class UnavailableVideoError(Exception):
     pass
 
 
-class ContentTooShortError(Exception):
+class ContentTooShortError(YoutubeDLError):
     """Content Too Short exception.
 
     This exception may be raised by FileDownloader objects when a file they
@@ -787,12 +818,15 @@ class ContentTooShortError(Exception):
     """
 
     def __init__(self, downloaded, expected):
+        super(ContentTooShortError, self).__init__(
+            'Downloaded {0} bytes, expected {1} bytes'.format(downloaded, expected)
+        )
         # Both in bytes
         self.downloaded = downloaded
         self.expected = expected
 
 
-class XAttrMetadataError(Exception):
+class XAttrMetadataError(YoutubeDLError):
     def __init__(self, code=None, msg='Unknown error'):
         super(XAttrMetadataError, self).__init__(msg)
         self.code = code
@@ -808,7 +842,7 @@ class XAttrMetadataError(Exception):
             self.reason = 'NOT_SUPPORTED'
 
 
-class XAttrUnavailableError(Exception):
+class XAttrUnavailableError(YoutubeDLError):
     pass
 
 
@@ -1672,6 +1706,11 @@ def setproctitle(title):
         libc = ctypes.cdll.LoadLibrary('libc.so.6')
     except OSError:
         return
+    except TypeError:
+        # LoadLibrary in Windows Python 2.7.13 only expects
+        # a bytestring, but since unicode_literals turns
+        # every string into a unicode string, it fails.
+        return
     title_bytes = title.encode('utf-8')
     buf = ctypes.create_string_buffer(len(title_bytes))
     buf.value = title_bytes
@@ -2366,6 +2405,7 @@ def _match_one(filter_part, dct):
         \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
         (?:
             (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)|
+            (?P<quote>["\'])(?P<quotedstrval>(?:\\.|(?!(?P=quote)|\\).)+?)(?P=quote)|
             (?P<strval>(?![0-9.])[a-z0-9A-Z]*)
         )
         \s*$
@@ -2374,7 +2414,8 @@ def _match_one(filter_part, dct):
     if m:
         op = COMPARISON_OPERATORS[m.group('op')]
         actual_value = dct.get(m.group('key'))
-        if (m.group('strval') is not None or
+        if (m.group('quotedstrval') is not None or
+            m.group('strval') is not None or
             # If the original field is a string and matching comparisonvalue is
             # a number we should respect the origin of the original field
             # and process comparison value as a string (see
@@ -2384,7 +2425,10 @@ def _match_one(filter_part, dct):
             if m.group('op') not in ('=', '!='):
                 raise ValueError(
                     'Operator %s does not support string values!' % m.group('op'))
-            comparison_value = m.group('strval') or m.group('intval')
+            comparison_value = m.group('quotedstrval') or m.group('strval') or m.group('intval')
+            quote = m.group('quote')
+            if quote is not None:
+                comparison_value = comparison_value.replace(r'\%s' % quote, quote)
         else:
             try:
                 comparison_value = int(m.group('intval'))
@@ -2996,6 +3040,260 @@ class ISO3166Utils(object):
         return cls._country_map.get(code.upper())
 
 
+class GeoUtils(object):
+    # Major IPv4 address blocks per country
+    _country_ip_map = {
+        'AD': '85.94.160.0/19',
+        'AE': '94.200.0.0/13',
+        'AF': '149.54.0.0/17',
+        'AG': '209.59.64.0/18',
+        'AI': '204.14.248.0/21',
+        'AL': '46.99.0.0/16',
+        'AM': '46.70.0.0/15',
+        'AO': '105.168.0.0/13',
+        'AP': '159.117.192.0/21',
+        'AR': '181.0.0.0/12',
+        'AS': '202.70.112.0/20',
+        'AT': '84.112.0.0/13',
+        'AU': '1.128.0.0/11',
+        'AW': '181.41.0.0/18',
+        'AZ': '5.191.0.0/16',
+        'BA': '31.176.128.0/17',
+        'BB': '65.48.128.0/17',
+        'BD': '114.130.0.0/16',
+        'BE': '57.0.0.0/8',
+        'BF': '129.45.128.0/17',
+        'BG': '95.42.0.0/15',
+        'BH': '37.131.0.0/17',
+        'BI': '154.117.192.0/18',
+        'BJ': '137.255.0.0/16',
+        'BL': '192.131.134.0/24',
+        'BM': '196.12.64.0/18',
+        'BN': '156.31.0.0/16',
+        'BO': '161.56.0.0/16',
+        'BQ': '161.0.80.0/20',
+        'BR': '152.240.0.0/12',
+        'BS': '24.51.64.0/18',
+        'BT': '119.2.96.0/19',
+        'BW': '168.167.0.0/16',
+        'BY': '178.120.0.0/13',
+        'BZ': '179.42.192.0/18',
+        'CA': '99.224.0.0/11',
+        'CD': '41.243.0.0/16',
+        'CF': '196.32.200.0/21',
+        'CG': '197.214.128.0/17',
+        'CH': '85.0.0.0/13',
+        'CI': '154.232.0.0/14',
+        'CK': '202.65.32.0/19',
+        'CL': '152.172.0.0/14',
+        'CM': '165.210.0.0/15',
+        'CN': '36.128.0.0/10',
+        'CO': '181.240.0.0/12',
+        'CR': '201.192.0.0/12',
+        'CU': '152.206.0.0/15',
+        'CV': '165.90.96.0/19',
+        'CW': '190.88.128.0/17',
+        'CY': '46.198.0.0/15',
+        'CZ': '88.100.0.0/14',
+        'DE': '53.0.0.0/8',
+        'DJ': '197.241.0.0/17',
+        'DK': '87.48.0.0/12',
+        'DM': '192.243.48.0/20',
+        'DO': '152.166.0.0/15',
+        'DZ': '41.96.0.0/12',
+        'EC': '186.68.0.0/15',
+        'EE': '90.190.0.0/15',
+        'EG': '156.160.0.0/11',
+        'ER': '196.200.96.0/20',
+        'ES': '88.0.0.0/11',
+        'ET': '196.188.0.0/14',
+        'EU': '2.16.0.0/13',
+        'FI': '91.152.0.0/13',
+        'FJ': '144.120.0.0/16',
+        'FM': '119.252.112.0/20',
+        'FO': '88.85.32.0/19',
+        'FR': '90.0.0.0/9',
+        'GA': '41.158.0.0/15',
+        'GB': '25.0.0.0/8',
+        'GD': '74.122.88.0/21',
+        'GE': '31.146.0.0/16',
+        'GF': '161.22.64.0/18',
+        'GG': '62.68.160.0/19',
+        'GH': '45.208.0.0/14',
+        'GI': '85.115.128.0/19',
+        'GL': '88.83.0.0/19',
+        'GM': '160.182.0.0/15',
+        'GN': '197.149.192.0/18',
+        'GP': '104.250.0.0/19',
+        'GQ': '105.235.224.0/20',
+        'GR': '94.64.0.0/13',
+        'GT': '168.234.0.0/16',
+        'GU': '168.123.0.0/16',
+        'GW': '197.214.80.0/20',
+        'GY': '181.41.64.0/18',
+        'HK': '113.252.0.0/14',
+        'HN': '181.210.0.0/16',
+        'HR': '93.136.0.0/13',
+        'HT': '148.102.128.0/17',
+        'HU': '84.0.0.0/14',
+        'ID': '39.192.0.0/10',
+        'IE': '87.32.0.0/12',
+        'IL': '79.176.0.0/13',
+        'IM': '5.62.80.0/20',
+        'IN': '117.192.0.0/10',
+        'IO': '203.83.48.0/21',
+        'IQ': '37.236.0.0/14',
+        'IR': '2.176.0.0/12',
+        'IS': '82.221.0.0/16',
+        'IT': '79.0.0.0/10',
+        'JE': '87.244.64.0/18',
+        'JM': '72.27.0.0/17',
+        'JO': '176.29.0.0/16',
+        'JP': '126.0.0.0/8',
+        'KE': '105.48.0.0/12',
+        'KG': '158.181.128.0/17',
+        'KH': '36.37.128.0/17',
+        'KI': '103.25.140.0/22',
+        'KM': '197.255.224.0/20',
+        'KN': '198.32.32.0/19',
+        'KP': '175.45.176.0/22',
+        'KR': '175.192.0.0/10',
+        'KW': '37.36.0.0/14',
+        'KY': '64.96.0.0/15',
+        'KZ': '2.72.0.0/13',
+        'LA': '115.84.64.0/18',
+        'LB': '178.135.0.0/16',
+        'LC': '192.147.231.0/24',
+        'LI': '82.117.0.0/19',
+        'LK': '112.134.0.0/15',
+        'LR': '41.86.0.0/19',
+        'LS': '129.232.0.0/17',
+        'LT': '78.56.0.0/13',
+        'LU': '188.42.0.0/16',
+        'LV': '46.109.0.0/16',
+        'LY': '41.252.0.0/14',
+        'MA': '105.128.0.0/11',
+        'MC': '88.209.64.0/18',
+        'MD': '37.246.0.0/16',
+        'ME': '178.175.0.0/17',
+        'MF': '74.112.232.0/21',
+        'MG': '154.126.0.0/17',
+        'MH': '117.103.88.0/21',
+        'MK': '77.28.0.0/15',
+        'ML': '154.118.128.0/18',
+        'MM': '37.111.0.0/17',
+        'MN': '49.0.128.0/17',
+        'MO': '60.246.0.0/16',
+        'MP': '202.88.64.0/20',
+        'MQ': '109.203.224.0/19',
+        'MR': '41.188.64.0/18',
+        'MS': '208.90.112.0/22',
+        'MT': '46.11.0.0/16',
+        'MU': '105.16.0.0/12',
+        'MV': '27.114.128.0/18',
+        'MW': '105.234.0.0/16',
+        'MX': '187.192.0.0/11',
+        'MY': '175.136.0.0/13',
+        'MZ': '197.218.0.0/15',
+        'NA': '41.182.0.0/16',
+        'NC': '101.101.0.0/18',
+        'NE': '197.214.0.0/18',
+        'NF': '203.17.240.0/22',
+        'NG': '105.112.0.0/12',
+        'NI': '186.76.0.0/15',
+        'NL': '145.96.0.0/11',
+        'NO': '84.208.0.0/13',
+        'NP': '36.252.0.0/15',
+        'NR': '203.98.224.0/19',
+        'NU': '49.156.48.0/22',
+        'NZ': '49.224.0.0/14',
+        'OM': '5.36.0.0/15',
+        'PA': '186.72.0.0/15',
+        'PE': '186.160.0.0/14',
+        'PF': '123.50.64.0/18',
+        'PG': '124.240.192.0/19',
+        'PH': '49.144.0.0/13',
+        'PK': '39.32.0.0/11',
+        'PL': '83.0.0.0/11',
+        'PM': '70.36.0.0/20',
+        'PR': '66.50.0.0/16',
+        'PS': '188.161.0.0/16',
+        'PT': '85.240.0.0/13',
+        'PW': '202.124.224.0/20',
+        'PY': '181.120.0.0/14',
+        'QA': '37.210.0.0/15',
+        'RE': '139.26.0.0/16',
+        'RO': '79.112.0.0/13',
+        'RS': '178.220.0.0/14',
+        'RU': '5.136.0.0/13',
+        'RW': '105.178.0.0/15',
+        'SA': '188.48.0.0/13',
+        'SB': '202.1.160.0/19',
+        'SC': '154.192.0.0/11',
+        'SD': '154.96.0.0/13',
+        'SE': '78.64.0.0/12',
+        'SG': '152.56.0.0/14',
+        'SI': '188.196.0.0/14',
+        'SK': '78.98.0.0/15',
+        'SL': '197.215.0.0/17',
+        'SM': '89.186.32.0/19',
+        'SN': '41.82.0.0/15',
+        'SO': '197.220.64.0/19',
+        'SR': '186.179.128.0/17',
+        'SS': '105.235.208.0/21',
+        'ST': '197.159.160.0/19',
+        'SV': '168.243.0.0/16',
+        'SX': '190.102.0.0/20',
+        'SY': '5.0.0.0/16',
+        'SZ': '41.84.224.0/19',
+        'TC': '65.255.48.0/20',
+        'TD': '154.68.128.0/19',
+        'TG': '196.168.0.0/14',
+        'TH': '171.96.0.0/13',
+        'TJ': '85.9.128.0/18',
+        'TK': '27.96.24.0/21',
+        'TL': '180.189.160.0/20',
+        'TM': '95.85.96.0/19',
+        'TN': '197.0.0.0/11',
+        'TO': '175.176.144.0/21',
+        'TR': '78.160.0.0/11',
+        'TT': '186.44.0.0/15',
+        'TV': '202.2.96.0/19',
+        'TW': '120.96.0.0/11',
+        'TZ': '156.156.0.0/14',
+        'UA': '93.72.0.0/13',
+        'UG': '154.224.0.0/13',
+        'US': '3.0.0.0/8',
+        'UY': '167.56.0.0/13',
+        'UZ': '82.215.64.0/18',
+        'VA': '212.77.0.0/19',
+        'VC': '24.92.144.0/20',
+        'VE': '186.88.0.0/13',
+        'VG': '172.103.64.0/18',
+        'VI': '146.226.0.0/16',
+        'VN': '14.160.0.0/11',
+        'VU': '202.80.32.0/20',
+        'WF': '117.20.32.0/21',
+        'WS': '202.4.32.0/19',
+        'YE': '134.35.0.0/16',
+        'YT': '41.242.116.0/22',
+        'ZA': '41.0.0.0/11',
+        'ZM': '165.56.0.0/13',
+        'ZW': '41.85.192.0/19',
+    }
+
+    @classmethod
+    def random_ipv4(cls, code):
+        block = cls._country_ip_map.get(code.upper())
+        if not block:
+            return None
+        addr, preflen = block.split('/')
+        addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
+        addr_max = addr_min | (0xffffffff >> int(preflen))
+        return compat_str(socket.inet_ntoa(
+            compat_struct_pack('!L', random.randint(addr_min, addr_max))))
+
+
 class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
     def __init__(self, proxies=None):
         # Set default handlers
index a73e9d89c11d33c6999313fbeaea010aa432401f..fe7462eac2fe18daaf097f2f09d9e6c4dc0ad7ce 100644 (file)
@@ -1,3 +1,3 @@
 from __future__ import unicode_literals
 
-__version__ = '2017.02.07'
+__version__ = '2017.02.24.1'