Imported Upstream version 2015.02.28

author Rogério Brito <rbrito@ime.usp.br>

Sun, 1 Mar 2015 05:04:05 +0000 (02:04 -0300)

committer Rogério Brito <rbrito@ime.usp.br>

Sun, 1 Mar 2015 05:04:05 +0000 (02:04 -0300)
author Rogério Brito <rbrito@ime.usp.br>
Sun, 1 Mar 2015 05:04:05 +0000 (02:04 -0300)
committer Rogério Brito <rbrito@ime.usp.br>
Sun, 1 Mar 2015 05:04:05 +0000 (02:04 -0300)
diff --git a/Makefile b/Makefile

index 0636fc4cbe108667d0ecb85aed68018e4e6803ee..c6c76274f995a85185290d35868b974c13240aa2 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,8 @@
  all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites
  
  clean:
-       rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part *.info.json *.mp4 *.flv *.mp3 CONTRIBUTING.md.tmp youtube-dl youtube-dl.exe
+       rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part *.info.json *.mp4 *.flv *.mp3 *.avi CONTRIBUTING.md.tmp youtube-dl youtube-dl.exe
+       find -name "*.pyc" -delete
  
  PREFIX ?= /usr/local
  BINDIR ?= $(PREFIX)/bin
@@ -43,7 +44,7 @@ test:
  ot: offlinetest
  
  offlinetest: codetest
-       nosetests --verbose test --exclude test_download --exclude test_age_restriction --exclude test_subtitles --exclude test_write_annotations --exclude test_youtube_lists
+       nosetests --verbose test --exclude test_download.py --exclude test_age_restriction.py --exclude test_subtitles.py --exclude test_write_annotations.py --exclude test_youtube_lists.py
  
  tar: youtube-dl.tar.gz
  
diff --git a/README.md b/README.md

index 06dea400dd221cfca75b151a2d1a2f8fd214122e..04f664cd351cd46adc68e559339e4436669ab67f 100644 (file)
--- a/README.md
+++ b/README.md
@@ -77,6 +77,7 @@ which means you can modify it, redistribute it or use it however you like.
                                       on Windows)
      --flat-playlist                  Do not extract the videos of a playlist,
                                       only list them.
+    --no-color                       Do not emit color codes in output.
  
  ## Network Options:
      --proxy URL                      Use the specified HTTP/HTTPS proxy. Pass in
@@ -119,8 +120,27 @@ which means you can modify it, redistribute it or use it however you like.
                                       COUNT views
      --max-views COUNT                Do not download any videos with more than
                                       COUNT views
+    --match-filter FILTER            (Experimental) Generic video filter.
+                                     Specify any key (see help for -o for a list
+                                     of available keys) to match if the key is
+                                     present, !key to check if the key is not
+                                     present,key > NUMBER (like "comment_count >
+                                     12", also works with >=, <, <=, !=, =) to
+                                     compare against a number, and & to require
+                                     multiple matches. Values which are not
+                                     known are excluded unless you put a
+                                     question mark (?) after the operator.For
+                                     example, to only match videos that have
+                                     been liked more than 100 times and disliked
+                                     less than 50 times (or the dislike
+                                     functionality is not available at the given
+                                     service), but who also have a description,
+                                     use  --match-filter "like_count > 100 &
+                                     dislike_count <? 50 & description" .
      --no-playlist                    If the URL refers to a video and a
                                       playlist, download only the video.
+    --yes-playlist                   If the URL refers to a video and a
+                                     playlist, download the playlist.
      --age-limit YEARS                download only videos suitable for the given
                                       age
      --download-archive FILE          Download only videos not listed in the
@@ -143,6 +163,8 @@ which means you can modify it, redistribute it or use it however you like.
      --playlist-reverse               Download playlist videos in reverse order
      --xattr-set-filesize             (experimental) set file xattribute
                                       ytdl.filesize with expected filesize
+    --hls-prefer-native              (experimental) Use the native HLS
+                                     downloader instead of ffmpeg.
      --external-downloader COMMAND    (experimental) Use the specified external
                                       downloader. Currently supports
                                       aria2c,curl,wget
@@ -292,18 +314,20 @@ which means you can modify it, redistribute it or use it however you like.
                                       video results by putting a condition in
                                       brackets, as in -f "best[height=720]" (or
                                       -f "[filesize>10M]").  This works for
-                                     filesize, height, width, tbr, abr, vbr, and
-                                     fps and the comparisons <, <=, >, >=, =, !=
-                                     . Formats for which the value is not known
-                                     are excluded unless you put a question mark
-                                     (?) after the operator. You can combine
-                                     format filters, so  -f "[height <=?
-                                     720][tbr>500]" selects up to 720p videos
-                                     (or videos where the height is not known)
-                                     with a bitrate of at least 500 KBit/s. By
-                                     default, youtube-dl will pick the best
-                                     quality. Use commas to download multiple
-                                     audio formats, such as -f
+                                     filesize, height, width, tbr, abr, vbr,
+                                     asr, and fps and the comparisons <, <=, >,
+                                     >=, =, != and for ext, acodec, vcodec,
+                                     container, and protocol and the comparisons
+                                     =, != . Formats for which the value is not
+                                     known are excluded unless you put a
+                                     question mark (?) after the operator. You
+                                     can combine format filters, so  -f "[height
+                                     <=? 720][tbr>500]" selects up to 720p
+                                     videos (or videos where the height is not
+                                     known) with a bitrate of at least 500
+                                     KBit/s. By default, youtube-dl will pick
+                                     the best quality. Use commas to download
+                                     multiple audio formats, such as -f
                                       136/137/mp4/bestvideo,140/m4a/bestaudio.
                                       You can merge the video and audio of two
                                       formats into a single file using -f <video-
@@ -329,8 +353,8 @@ which means you can modify it, redistribute it or use it however you like.
      --all-subs                       downloads all the available subtitles of
                                       the video
      --list-subs                      lists all available subtitles for the video
-    --sub-format FORMAT              subtitle format (default=srt) ([sbv/vtt]
-                                     youtube only)
+    --sub-format FORMAT              subtitle format, accepts formats
+                                     preference, for example: "ass/srt/best"
      --sub-lang LANGS                 languages of the subtitles to download
                                       (optional) separated by commas, use IETF
                                       language tags like 'en,pt'
@@ -377,10 +401,15 @@ which means you can modify it, redistribute it or use it however you like.
                                       postprocessors (default)
      --prefer-ffmpeg                  Prefer ffmpeg over avconv for running the
                                       postprocessors
+    --ffmpeg-location PATH           Location of the ffmpeg/avconv binary;
+                                     either the path to the binary or its
+                                     containing directory.
      --exec CMD                       Execute a command on the file after
                                       downloading, similar to find's -exec
                                       syntax. Example: --exec 'adb push {}
                                       /sdcard/Music/ && rm {}'
+    --convert-subtitles FORMAT       Convert the subtitles to other format
+                                     (currently supported: srt|ass|vtt)
  
  # CONFIGURATION
  
@@ -490,11 +519,15 @@ If you want to play the video on a machine that is not running youtube-dl, you c
  
  ### ERROR: no fmt_url_map or conn information found in video info
  
-youtube has switched to a new video info format in July 2011 which is not supported by old versions of youtube-dl. You can update youtube-dl with `sudo youtube-dl --update`.
+YouTube has switched to a new video info format in July 2011 which is not supported by old versions of youtube-dl. See [above](#how-do-i-update-youtube-dl) for how to update youtube-dl.
  
  ### ERROR: unable to download video ###
  
-youtube requires an additional signature since September 2012 which is not supported by old versions of youtube-dl. You can update youtube-dl with `sudo youtube-dl --update`.
+YouTube requires an additional signature since September 2012 which is not supported by old versions of youtube-dl. See [above](#how-do-i-update-youtube-dl) for how to update youtube-dl.
+
+### ExtractorError: Could not find JS function u'OF'
+
+In February 2015, the new YouTube player contained a character sequence in a string that was misinterpreted by old versions of youtube-dl. See [above](#how-do-i-update-youtube-dl) for how to update youtube-dl.
  
  ### SyntaxError: Non-ASCII character ###
  
@@ -542,7 +575,7 @@ Support requests for services that **do** purchase the rights to distribute thei
  
  ### How can I detect whether a given URL is supported by youtube-dl?
  
-For one, have a look at the [list of supported sites](docs/supportedsites.md). Note that it can sometimes happen that the site changes its URL scheme (say, from http://example.com/v/1234567 to http://example.com/v/1234567 ) and youtube-dl reports an URL of a service in that list as unsupported. In that case, simply report a bug.
+For one, have a look at the [list of supported sites](docs/supportedsites.md). Note that it can sometimes happen that the site changes its URL scheme (say, from http://example.com/video/1234567 to http://example.com/v/1234567 ) and youtube-dl reports an URL of a service in that list as unsupported. In that case, simply report a bug.
  
  It is *not* possible to detect whether a URL is supported or not. That's because youtube-dl contains a generic extractor which matches **all** URLs. You may be tempted to disable, exclude, or remove the generic extractor, but the generic extractor not only allows users to extract videos from lots of websites that embed a video from another service, but may also be used to extract video from a service that it's hosting itself. Therefore, we neither recommend nor support disabling, excluding, or removing the generic extractor.
  
diff --git a/README.txt b/README.txt

index 2cf50d26839a79f31cec79da8eef2fa51ff89a26..173daf0b6d3eda53e5249e3cd35fcff7c1b78b87 100644 (file)
--- a/README.txt
+++ b/README.txt
@@ -87,6 +87,7 @@ OPTIONS
                                       on Windows)
      --flat-playlist                  Do not extract the videos of a playlist,
                                       only list them.
+    --no-color                       Do not emit color codes in output.
  
  Network Options:
  ----------------
@@ -133,8 +134,27 @@ Video Selection:
                                       COUNT views
      --max-views COUNT                Do not download any videos with more than
                                       COUNT views
+    --match-filter FILTER            (Experimental) Generic video filter.
+                                     Specify any key (see help for -o for a list
+                                     of available keys) to match if the key is
+                                     present, !key to check if the key is not
+                                     present,key > NUMBER (like "comment_count >
+                                     12", also works with >=, <, <=, !=, =) to
+                                     compare against a number, and & to require
+                                     multiple matches. Values which are not
+                                     known are excluded unless you put a
+                                     question mark (?) after the operator.For
+                                     example, to only match videos that have
+                                     been liked more than 100 times and disliked
+                                     less than 50 times (or the dislike
+                                     functionality is not available at the given
+                                     service), but who also have a description,
+                                     use  --match-filter "like_count > 100 &
+                                     dislike_count <? 50 & description" .
      --no-playlist                    If the URL refers to a video and a
                                       playlist, download only the video.
+    --yes-playlist                   If the URL refers to a video and a
+                                     playlist, download the playlist.
      --age-limit YEARS                download only videos suitable for the given
                                       age
      --download-archive FILE          Download only videos not listed in the
@@ -159,6 +179,8 @@ Download Options:
      --playlist-reverse               Download playlist videos in reverse order
      --xattr-set-filesize             (experimental) set file xattribute
                                       ytdl.filesize with expected filesize
+    --hls-prefer-native              (experimental) Use the native HLS
+                                     downloader instead of ffmpeg.
      --external-downloader COMMAND    (experimental) Use the specified external
                                       downloader. Currently supports
                                       aria2c,curl,wget
@@ -318,18 +340,20 @@ Video Format Options:
                                       video results by putting a condition in
                                       brackets, as in -f "best[height=720]" (or
                                       -f "[filesize>10M]").  This works for
-                                     filesize, height, width, tbr, abr, vbr, and
-                                     fps and the comparisons <, <=, >, >=, =, !=
-                                     . Formats for which the value is not known
-                                     are excluded unless you put a question mark
-                                     (?) after the operator. You can combine
-                                     format filters, so  -f "[height <=?
-                                     720][tbr>500]" selects up to 720p videos
-                                     (or videos where the height is not known)
-                                     with a bitrate of at least 500 KBit/s. By
-                                     default, youtube-dl will pick the best
-                                     quality. Use commas to download multiple
-                                     audio formats, such as -f
+                                     filesize, height, width, tbr, abr, vbr,
+                                     asr, and fps and the comparisons <, <=, >,
+                                     >=, =, != and for ext, acodec, vcodec,
+                                     container, and protocol and the comparisons
+                                     =, != . Formats for which the value is not
+                                     known are excluded unless you put a
+                                     question mark (?) after the operator. You
+                                     can combine format filters, so  -f "[height
+                                     <=? 720][tbr>500]" selects up to 720p
+                                     videos (or videos where the height is not
+                                     known) with a bitrate of at least 500
+                                     KBit/s. By default, youtube-dl will pick
+                                     the best quality. Use commas to download
+                                     multiple audio formats, such as -f
                                       136/137/mp4/bestvideo,140/m4a/bestaudio.
                                       You can merge the video and audio of two
                                       formats into a single file using -f <video-
@@ -357,8 +381,8 @@ Subtitle Options:
      --all-subs                       downloads all the available subtitles of
                                       the video
      --list-subs                      lists all available subtitles for the video
-    --sub-format FORMAT              subtitle format (default=srt) ([sbv/vtt]
-                                     youtube only)
+    --sub-format FORMAT              subtitle format, accepts formats
+                                     preference, for example: "ass/srt/best"
      --sub-lang LANGS                 languages of the subtitles to download
                                       (optional) separated by commas, use IETF
                                       language tags like 'en,pt'
@@ -409,10 +433,15 @@ Post-processing Options:
                                       postprocessors (default)
      --prefer-ffmpeg                  Prefer ffmpeg over avconv for running the
                                       postprocessors
+    --ffmpeg-location PATH           Location of the ffmpeg/avconv binary;
+                                     either the path to the binary or its
+                                     containing directory.
      --exec CMD                       Execute a command on the file after
                                       downloading, similar to find's -exec
                                       syntax. Example: --exec 'adb push {}
                                       /sdcard/Music/ && rm {}'
+    --convert-subtitles FORMAT       Convert the subtitles to other format
+                                     (currently supported: srt|ass|vtt)
  
  CONFIGURATION
  =============
@@ -597,15 +626,21 @@ in turn.
  
  ERROR: no fmt_url_map or conn information found in video info
  
-youtube has switched to a new video info format in July 2011 which is
-not supported by old versions of youtube-dl. You can update youtube-dl
-with sudo youtube-dl --update.
+YouTube has switched to a new video info format in July 2011 which is
+not supported by old versions of youtube-dl. See above for how to update
+youtube-dl.
  
  ERROR: unable to download video
  
-youtube requires an additional signature since September 2012 which is
-not supported by old versions of youtube-dl. You can update youtube-dl
-with sudo youtube-dl --update.
+YouTube requires an additional signature since September 2012 which is
+not supported by old versions of youtube-dl. See above for how to update
+youtube-dl.
+
+ExtractorError: Could not find JS function u'OF'
+
+In February 2015, the new YouTube player contained a character sequence
+in a string that was misinterpreted by old versions of youtube-dl. See
+above for how to update youtube-dl.
  
  SyntaxError: Non-ASCII character
  
@@ -683,7 +718,7 @@ How can I detect whether a given URL is supported by youtube-dl?
  
  For one, have a look at the list of supported sites. Note that it can
  sometimes happen that the site changes its URL scheme (say, from
-http://example.com/v/1234567 to http://example.com/v/1234567 ) and
+http://example.com/video/1234567 to http://example.com/v/1234567 ) and
  youtube-dl reports an URL of a service in that list as unsupported. In
  that case, simply report a bug.
  
diff --git a/devscripts/check-porn.py b/devscripts/check-porn.py

index 216282712c1b38b96c049f74a6cfe8a0fcd30806..6a5bd9eda333246c47064bf84cfc03da09de4caf 100644 (file)
--- a/devscripts/check-porn.py
+++ b/devscripts/check-porn.py
@@ -45,12 +45,12 @@ for test in get_testcases():
  
          RESULT = ('.' + domain + '\n' in LIST or '\n' + domain + '\n' in LIST)
  
-    if RESULT and ('info_dict' not in test or 'age_limit' not in test['info_dict']
-                   or test['info_dict']['age_limit'] != 18):
+    if RESULT and ('info_dict' not in test or 'age_limit' not in test['info_dict'] or
+                   test['info_dict']['age_limit'] != 18):
          print('\nPotential missing age_limit check: {0}'.format(test['name']))
  
-    elif not RESULT and ('info_dict' in test and 'age_limit' in test['info_dict']
-                         and test['info_dict']['age_limit'] == 18):
+    elif not RESULT and ('info_dict' in test and 'age_limit' in test['info_dict'] and
+                         test['info_dict']['age_limit'] == 18):
          print('\nPotential false negative: {0}'.format(test['name']))
  
      else:
diff --git a/docs/supportedsites.md b/docs/supportedsites.md

index 2d8f9c316b83780f56d6a99d59a457610cfe05ea..062cb3d626443e7f69059aaea10c3ec3a30a7f28 100644 (file)
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@@ -1,4 +1,5 @@
  # Supported sites
+ - **1tv**: Первый канал
   - **1up.com**
   - **220.ro**
   - **24video**
@@ -14,7 +15,9 @@
   - **AddAnime**
   - **AdobeTV**
   - **AdultSwim**
+ - **Aftenposten**
   - **Aftonbladet**
+ - **AirMozilla**
   - **AlJazeera**
   - **Allocine**
   - **AlphaPorno**
@@ -59,14 +62,19 @@
   - **Brightcove**
   - **BuzzFeed**
   - **BYUtv**
+ - **Camdemy**
+ - **CamdemyFolder**
   - **Canal13cl**
   - **canalc2.tv**
   - **Canalplus**: canalplus.fr, piwiplus.fr and d8.tv
   - **CBS**
   - **CBSNews**: CBS News
+ - **CBSSports**
   - **CeskaTelevize**
   - **channel9**: Channel 9
   - **Chilloutzone**
+ - **chirbit**
+ - **chirbit:profile**
   - **Cinchcast**
   - **Cinemassacre**
   - **clipfish**
@@ -117,6 +125,7 @@
   - **EllenTV**
   - **EllenTV:clips**
   - **ElPais**: El País
+ - **Embedly**
   - **EMPFlix**
   - **Engadget**
   - **Eporner**
@@ -133,7 +142,6 @@
   - **fernsehkritik.tv:postecke**
   - **Firedrive**
   - **Firstpost**
- - **firsttv**: Видеоархив - Первый канал
   - **Flickr**
   - **Folketinget**: Folketinget (ft.dk; Danish parliament)
   - **Foxgay**
@@ -173,6 +181,7 @@
   - **Helsinki**: helsinki.fi
   - **HentaiStigma**
   - **HistoricFilms**
+ - **History**
   - **hitbox**
   - **hitbox:live**
   - **HornBunny**
@@ -186,6 +195,7 @@
   - **ign.com**
   - **imdb**: Internet Movie Database trailers
   - **imdb:list**: Internet Movie Database lists
+ - **Imgur**
   - **Ina**
   - **InfoQ**
   - **Instagram**
@@ -200,6 +210,7 @@
   - **Jove**
   - **jpopsuki.tv**
   - **Jukebox**
+ - **Kaltura**
   - **Kankan**
   - **Karaoketv**
   - **keek**
@@ -211,6 +222,9 @@
   - **Ku6**
   - **la7.tv**
   - **Laola1Tv**
+ - **Letv**
+ - **LetvPlaylist**
+ - **LetvTv**
   - **lifenews**: LIFE | NEWS
   - **LiveLeak**
   - **livestream**
@@ -224,6 +238,7 @@
   - **mailru**: Видео@Mail.Ru
   - **Malemotion**
   - **MDR**
+ - **media.ccc.de**
   - **metacafe**
   - **Metacritic**
   - **Mgoon**
@@ -257,6 +272,7 @@
   - **myvideo**
   - **MyVidster**
   - **n-tv.de**
+ - **NationalGeographic**
   - **Naver**
   - **NBA**
   - **NBC**
@@ -285,12 +301,15 @@
   - **nowvideo**: NowVideo
   - **npo.nl**
   - **npo.nl:live**
+ - **npo.nl:radio**
+ - **npo.nl:radio:fragment**
   - **NRK**
   - **NRKTV**
   - **ntv.ru**
   - **Nuvid**
   - **NYTimes**
   - **ocw.mit.edu**
+ - **Odnoklassniki**
   - **OktoberfestTV**
   - **on.aol.com**
   - **Ooyala**
@@ -312,12 +331,15 @@
   - **podomatic**
   - **PornHd**
   - **PornHub**
+ - **PornHubPlaylist**
   - **Pornotube**
   - **PornoXO**
   - **PromptFile**
   - **prosiebensat1**: ProSiebenSat.1 Digital
+ - **Puls4**
   - **Pyvideo**
   - **QuickVid**
+ - **R7**
   - **radio.de**
   - **radiobremen**
   - **radiofrance**
@@ -331,9 +353,9 @@
   - **Roxwel**
   - **RTBF**
   - **Rte**
+ - **rtl.nl**: rtl.nl and rtlxl.nl
   - **RTL2**
   - **RTLnow**
- - **rtlxl.nl**
   - **RTP**
   - **RTS**: RTS.ch
   - **rtve.es:alacarta**: RTVE a la carta
@@ -345,6 +367,7 @@
   - **rutube:movie**: Rutube movies
   - **rutube:person**: Rutube person videos
   - **RUTV**: RUTV.RU
+ - **Sandia**: Sandia National Laboratories
   - **Sapo**: SAPO Vídeos
   - **savefrom.net**
   - **SBS**: sbs.com.au
@@ -372,7 +395,8 @@
   - **soundcloud:playlist**
   - **soundcloud:set**
   - **soundcloud:user**
- - **Soundgasm**
+ - **soundgasm**
+ - **soundgasm:profile**
   - **southpark.cc.com**
   - **southpark.de**
   - **Space**
@@ -391,6 +415,7 @@
   - **StreamCZ**
   - **StreetVoice**
   - **SunPorno**
+ - **SVTPlay**: SVT Play and Öppet arkiv
   - **SWRMediathek**
   - **Syfy**
   - **SztvHu**
@@ -437,6 +462,7 @@
   - **Turbo**
   - **Tutv**
   - **tv.dfb.de**
+ - **TV4**: tv4.se and tv4play.se
   - **tvigle**: Интернет-телевидение Tvigle.ru
   - **tvp.pl**
   - **tvp.pl:Series**
@@ -524,6 +550,7 @@
   - **XVideos**
   - **XXXYMovies**
   - **Yahoo**: Yahoo screen and movies
+ - **Yam**
   - **YesJapan**
   - **Ynet**
   - **YouJizz**
@@ -543,6 +570,7 @@
   - **youtube:subscriptions**: YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)
   - **youtube:user**: YouTube.com user videos (URL or "ytuser" keyword)
   - **youtube:watch_later**: Youtube watch later list, ":ytwatchlater" for short (requires authentication)
+ - **Zapiks**
   - **ZDF**
   - **ZDFChannel**
   - **zingmp3:album**: mp3.zing.vn albums
diff --git a/test/helper.py b/test/helper.py

index 651ef99b983973dab1f17ab8619849af2b9fe9b1..12afdf184f0215e9947515cd3a8516ccad2e480e 100644 (file)
--- a/test/helper.py
+++ b/test/helper.py
@@ -113,6 +113,16 @@ def expect_info_dict(self, got_dict, expected_dict):
              self.assertTrue(
                  got.startswith(start_str),
                  'field %s (value: %r) should start with %r' % (info_field, got, start_str))
+        elif isinstance(expected, compat_str) and expected.startswith('contains:'):
+            got = got_dict.get(info_field)
+            contains_str = expected[len('contains:'):]
+            self.assertTrue(
+                isinstance(got, compat_str),
+                'Expected a %s object, but got %s for field %s' % (
+                    compat_str.__name__, type(got).__name__, info_field))
+            self.assertTrue(
+                contains_str in got,
+                'field %s (value: %r) should contain %r' % (info_field, got, contains_str))
          elif isinstance(expected, type):
              got = got_dict.get(info_field)
              self.assertTrue(isinstance(got, expected),
@@ -163,12 +173,14 @@ def expect_info_dict(self, got_dict, expected_dict):
              info_dict_str += ''.join(
                  '    %s: %s,\n' % (_repr(k), _repr(v))
                  for k, v in test_info_dict.items() if k not in missing_keys)
-            info_dict_str += '\n'
+
+            if info_dict_str:
+                info_dict_str += '\n'
          info_dict_str += ''.join(
              '    %s: %s,\n' % (_repr(k), _repr(test_info_dict[k]))
              for k in missing_keys)
          write_string(
-            '\n\'info_dict\': {\n' + info_dict_str + '}\n', out=sys.stderr)
+            '\n\'info_dict\': {\n' + info_dict_str + '},\n', out=sys.stderr)
          self.assertFalse(
              missing_keys,
              'Missing keys in test definition: %s' % (
diff --git a/test/parameters.json b/test/parameters.json

index 098cd0cd0c4d17ded161fffbcdc327a7bbb30ee3..cbff9bd16486fcda2c155e6978c354e320cfc95b 100644 (file)
--- a/test/parameters.json
+++ b/test/parameters.json
@@ -28,7 +28,7 @@
      "retries": 10, 
      "simulate": false, 
      "subtitleslang": null, 
-    "subtitlesformat": "srt",
+    "subtitlesformat": "best",
      "test": true, 
      "updatetime": true, 
      "usenetrc": false, 
@@ -39,5 +39,6 @@
      "writesubtitles": false,
      "allsubtitles": false,
      "listssubtitles": false,
-    "socket_timeout": 20
+    "socket_timeout": 20,
+    "fixup": "never"
  }
diff --git a/test/swftests/ArrayAccess.swf b/test/swftests/ArrayAccess.swf

index 0158cae1fd5ac9ff82eafaf8398c9ef313a958e8..c7778a312ab5f366b5076c09af52d31ef55092c9 100644 (file)

Binary files a/test/swftests/ArrayAccess.swf and b/test/swftests/ArrayAccess.swf differ
diff --git a/test/swftests/ClassCall.swf b/test/swftests/ClassCall.swf

index b24fadb1294d3dfed94ddfcb00e7e32c5f326790..5902cd3c1a00799a9db7951a80d20d096a56a88c 100644 (file)

Binary files a/test/swftests/ClassCall.swf and b/test/swftests/ClassCall.swf differ
diff --git a/test/swftests/ClassConstruction.swf b/test/swftests/ClassConstruction.swf

index ea74c2df1e779f6ac1d9e7ba997500035668ea2d..a551d79841b49b06f008ea998a3264b4e5a3e1ca 100644 (file)

Binary files a/test/swftests/ClassConstruction.swf and b/test/swftests/ClassConstruction.swf differ
diff --git a/test/swftests/ConstArrayAccess.swf b/test/swftests/ConstArrayAccess.swf

index 1acf40a0fdbd69259ec88aaed151f2fdf513659f..d0a17693a1cb71cd018bb5f5cb7282e0e875eea3 100644 (file)

Binary files a/test/swftests/ConstArrayAccess.swf and b/test/swftests/ConstArrayAccess.swf differ
diff --git a/test/swftests/ConstantInt.swf b/test/swftests/ConstantInt.swf

index 8c0359db672abbb7a5ddb1f879d47065a511bde3..923d8229f7682dd074de4b644edea76aceb52d57 100644 (file)

Binary files a/test/swftests/ConstantInt.swf and b/test/swftests/ConstantInt.swf differ
diff --git a/test/swftests/DictCall.swf b/test/swftests/DictCall.swf

index be3e096648a3820da47bf308e4c7ef214412e186..4d3e48edbba3fa876b3bd20b8a92fbf66424b72d 100644 (file)

Binary files a/test/swftests/DictCall.swf and b/test/swftests/DictCall.swf differ
diff --git a/test/swftests/EqualsOperator.swf b/test/swftests/EqualsOperator.swf

index f99ab27bdc95004a8a4e12078b42ba35d17390f4..b6250a1f1afabf9b7ac6e1ba32949e33a71a4e06 100644 (file)

Binary files a/test/swftests/EqualsOperator.swf and b/test/swftests/EqualsOperator.swf differ
diff --git a/test/swftests/LocalVars.swf b/test/swftests/LocalVars.swf

index 3216aee070d49ec43db239152ccf069b0ce821d1..3aaa2f7ebaad9d53e827e0cf0a5fd4e5dac1591d 100644 (file)

Binary files a/test/swftests/LocalVars.swf and b/test/swftests/LocalVars.swf differ
diff --git a/test/swftests/MemberAssignment.swf b/test/swftests/MemberAssignment.swf

index 5236bbb7cf902926601dd9bb60f0159e3dbf28fb..e1b2537ecb466e13178f5582abcf2ebe8a210dfe 100644 (file)

Binary files a/test/swftests/MemberAssignment.swf and b/test/swftests/MemberAssignment.swf differ
diff --git a/test/swftests/NeOperator.swf b/test/swftests/NeOperator.swf

index 9cbc3e3d7147661e662836d143336970f39be922..62a97cf9e4ecc109c445b4b4907c40ae356c0d95 100644 (file)

Binary files a/test/swftests/NeOperator.swf and b/test/swftests/NeOperator.swf differ
diff --git a/test/swftests/PrivateCall.swf b/test/swftests/PrivateCall.swf

index c2bd9c04b854332121dc9ba14aad904611b9d247..7b977cf83a1bdb7ab7b1248c5ba2105c1c3c099b 100644 (file)

Binary files a/test/swftests/PrivateCall.swf and b/test/swftests/PrivateCall.swf differ
diff --git a/test/swftests/PrivateVoidCall.swf b/test/swftests/PrivateVoidCall.swf

index 0b1a638d8e3ce8449365c1ee4a012ae588ced8c3..1dbb14bf9827a26c7cac398bc68af8b7a23cdc5b 100644 (file)

Binary files a/test/swftests/PrivateVoidCall.swf and b/test/swftests/PrivateVoidCall.swf differ
diff --git a/test/swftests/StaticAssignment.swf b/test/swftests/StaticAssignment.swf

index 0a74f9e568e80b0e225b0378a446b93618e04294..b7186233d56b6e3d049d5eb6cda6aded82112f16 100644 (file)

Binary files a/test/swftests/StaticAssignment.swf and b/test/swftests/StaticAssignment.swf differ
diff --git a/test/swftests/StaticRetrieval.swf b/test/swftests/StaticRetrieval.swf

index 5f0014b72f6434606310234eb25a716efa54a52a..b8326f13d0fc7be7f7da3ff803d9f461ee289b21 100644 (file)

Binary files a/test/swftests/StaticRetrieval.swf and b/test/swftests/StaticRetrieval.swf differ
diff --git a/test/swftests/StringBasics.swf b/test/swftests/StringBasics.swf

index f4839316ff1cb9ac823a902acd129f6f1acdd158..733ebba9dd6794690b4d318aad4b6596008c4d80 100644 (file)

Binary files a/test/swftests/StringBasics.swf and b/test/swftests/StringBasics.swf differ
diff --git a/test/swftests/StringCharCodeAt.swf b/test/swftests/StringCharCodeAt.swf

index 7e5b5bcf9cceccac26c0cbbea650d2f308a707c7..163707589a9b80514fa52aedc687e6ad084a5700 100644 (file)

Binary files a/test/swftests/StringCharCodeAt.swf and b/test/swftests/StringCharCodeAt.swf differ
diff --git a/test/swftests/StringConversion.swf b/test/swftests/StringConversion.swf

index 3f584927257873385ec941905ec6cb89a7847310..e4d458cb9ced0b9c6445aeee6407ee6f6b35bbf7 100644 (file)

Binary files a/test/swftests/StringConversion.swf and b/test/swftests/StringConversion.swf differ
diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py

index 678b9f7d15ee66892ef010552753f0a9577b67a7..055e4255583d500805facc4fc59e296170e876e4 100644 (file)
--- a/test/test_YoutubeDL.py
+++ b/test/test_YoutubeDL.py
@@ -13,6 +13,7 @@ import copy
  from test.helper import FakeYDL, assertRegexpMatches
  from youtube_dl import YoutubeDL
  from youtube_dl.extractor import YoutubeIE
+from youtube_dl.postprocessor.common import PostProcessor
  
  
  class YDL(FakeYDL):
@@ -336,6 +337,65 @@ class TestFormatSelection(unittest.TestCase):
          downloaded = ydl.downloaded_info_dicts[0]
          self.assertEqual(downloaded['format_id'], 'G')
  
+    def test_subtitles(self):
+        def s_formats(lang, autocaption=False):
+            return [{
+                'ext': ext,
+                'url': 'http://localhost/video.%s.%s' % (lang, ext),
+                '_auto': autocaption,
+            } for ext in ['vtt', 'srt', 'ass']]
+        subtitles = dict((l, s_formats(l)) for l in ['en', 'fr', 'es'])
+        auto_captions = dict((l, s_formats(l, True)) for l in ['it', 'pt', 'es'])
+        info_dict = {
+            'id': 'test',
+            'title': 'Test',
+            'url': 'http://localhost/video.mp4',
+            'subtitles': subtitles,
+            'automatic_captions': auto_captions,
+            'extractor': 'TEST',
+        }
+
+        def get_info(params={}):
+            params.setdefault('simulate', True)
+            ydl = YDL(params)
+            ydl.report_warning = lambda *args, **kargs: None
+            return ydl.process_video_result(info_dict, download=False)
+
+        result = get_info()
+        self.assertFalse(result.get('requested_subtitles'))
+        self.assertEqual(result['subtitles'], subtitles)
+        self.assertEqual(result['automatic_captions'], auto_captions)
+
+        result = get_info({'writesubtitles': True})
+        subs = result['requested_subtitles']
+        self.assertTrue(subs)
+        self.assertEqual(set(subs.keys()), set(['en']))
+        self.assertTrue(subs['en'].get('data') is None)
+        self.assertEqual(subs['en']['ext'], 'ass')
+
+        result = get_info({'writesubtitles': True, 'subtitlesformat': 'foo/srt'})
+        subs = result['requested_subtitles']
+        self.assertEqual(subs['en']['ext'], 'srt')
+
+        result = get_info({'writesubtitles': True, 'subtitleslangs': ['es', 'fr', 'it']})
+        subs = result['requested_subtitles']
+        self.assertTrue(subs)
+        self.assertEqual(set(subs.keys()), set(['es', 'fr']))
+
+        result = get_info({'writesubtitles': True, 'writeautomaticsub': True, 'subtitleslangs': ['es', 'pt']})
+        subs = result['requested_subtitles']
+        self.assertTrue(subs)
+        self.assertEqual(set(subs.keys()), set(['es', 'pt']))
+        self.assertFalse(subs['es']['_auto'])
+        self.assertTrue(subs['pt']['_auto'])
+
+        result = get_info({'writeautomaticsub': True, 'subtitleslangs': ['es', 'pt']})
+        subs = result['requested_subtitles']
+        self.assertTrue(subs)
+        self.assertEqual(set(subs.keys()), set(['es', 'pt']))
+        self.assertTrue(subs['es']['_auto'])
+        self.assertTrue(subs['pt']['_auto'])
+
      def test_add_extra_info(self):
          test_dict = {
              'extractor': 'Foo',
@@ -370,5 +430,35 @@ class TestFormatSelection(unittest.TestCase):
              'vbr': 10,
          }), '^\s*10k$')
  
+    def test_postprocessors(self):
+        filename = 'post-processor-testfile.mp4'
+        audiofile = filename + '.mp3'
+
+        class SimplePP(PostProcessor):
+            def run(self, info):
+                with open(audiofile, 'wt') as f:
+                    f.write('EXAMPLE')
+                info['filepath']
+                return False, info
+
+        def run_pp(params):
+            with open(filename, 'wt') as f:
+                f.write('EXAMPLE')
+            ydl = YoutubeDL(params)
+            ydl.add_post_processor(SimplePP())
+            ydl.post_process(filename, {'filepath': filename})
+
+        run_pp({'keepvideo': True})
+        self.assertTrue(os.path.exists(filename), '%s doesn\'t exist' % filename)
+        self.assertTrue(os.path.exists(audiofile), '%s doesn\'t exist' % audiofile)
+        os.unlink(filename)
+        os.unlink(audiofile)
+
+        run_pp({'keepvideo': False})
+        self.assertFalse(os.path.exists(filename), '%s exists' % filename)
+        self.assertTrue(os.path.exists(audiofile), '%s doesn\'t exist' % audiofile)
+        os.unlink(audiofile)
+
+
  if __name__ == '__main__':
      unittest.main()
diff --git a/test/test_jsinterp.py b/test/test_jsinterp.py

index b91b8c4924339ab13e90980a42f5ca0a29ba7d32..fc73e5dc29a5c8faab88f4604f99df4ee9de6b2e 100644 (file)
--- a/test/test_jsinterp.py
+++ b/test/test_jsinterp.py
@@ -70,6 +70,8 @@ class TestJSInterpreter(unittest.TestCase):
          self.assertEqual(jsi.call_function('f'), -11)
  
      def test_comments(self):
+        'Skipping: Not yet fully implemented'
+        return
          jsi = JSInterpreter('''
          function x() {
              var x = /* 1 + */ 2;
@@ -80,6 +82,15 @@ class TestJSInterpreter(unittest.TestCase):
          ''')
          self.assertEqual(jsi.call_function('x'), 52)
  
+        jsi = JSInterpreter('''
+        function f() {
+            var x = "/*";
+            var y = 1 /* comment */ + 2;
+            return y;
+        }
+        ''')
+        self.assertEqual(jsi.call_function('f'), 3)
+
      def test_precedence(self):
          jsi = JSInterpreter('''
          function x() {
diff --git a/test/test_subtitles.py b/test/test_subtitles.py

index 6336dd317ca5a77ebced2e55d3c49873b58ebda6..3f2d8a2ba74e6b4f04d4159a64deb1f69f9d105b 100644 (file)
--- a/test/test_subtitles.py
+++ b/test/test_subtitles.py
@@ -18,6 +18,14 @@ from youtube_dl.extractor import (
      VimeoIE,
      WallaIE,
      CeskaTelevizeIE,
+    LyndaIE,
+    NPOIE,
+    ComedyCentralIE,
+    NRKTVIE,
+    RaiIE,
+    VikiIE,
+    ThePlatformIE,
+    RTVEALaCartaIE,
  )
  
  
@@ -27,42 +35,38 @@ class BaseTestSubtitles(unittest.TestCase):
  
      def setUp(self):
          self.DL = FakeYDL()
-        self.ie = self.IE(self.DL)
+        self.ie = self.IE()
+        self.DL.add_info_extractor(self.ie)
  
      def getInfoDict(self):
-        info_dict = self.ie.extract(self.url)
+        info_dict = self.DL.extract_info(self.url, download=False)
          return info_dict
  
      def getSubtitles(self):
          info_dict = self.getInfoDict()
-        return info_dict['subtitles']
+        subtitles = info_dict['requested_subtitles']
+        if not subtitles:
+            return subtitles
+        for sub_info in subtitles.values():
+            if sub_info.get('data') is None:
+                uf = self.DL.urlopen(sub_info['url'])
+                sub_info['data'] = uf.read().decode('utf-8')
+        return dict((l, sub_info['data']) for l, sub_info in subtitles.items())
  
  
  class TestYoutubeSubtitles(BaseTestSubtitles):
      url = 'QRS8MkLhQmM'
      IE = YoutubeIE
  
-    def test_youtube_no_writesubtitles(self):
-        self.DL.params['writesubtitles'] = False
-        subtitles = self.getSubtitles()
-        self.assertEqual(subtitles, None)
-
-    def test_youtube_subtitles(self):
-        self.DL.params['writesubtitles'] = True
-        subtitles = self.getSubtitles()
-        self.assertEqual(md5(subtitles['en']), '4cd9278a35ba2305f47354ee13472260')
-
-    def test_youtube_subtitles_lang(self):
-        self.DL.params['writesubtitles'] = True
-        self.DL.params['subtitleslangs'] = ['it']
-        subtitles = self.getSubtitles()
-        self.assertEqual(md5(subtitles['it']), '164a51f16f260476a05b50fe4c2f161d')
-
      def test_youtube_allsubtitles(self):
          self.DL.params['writesubtitles'] = True
          self.DL.params['allsubtitles'] = True
          subtitles = self.getSubtitles()
          self.assertEqual(len(subtitles.keys()), 13)
+        self.assertEqual(md5(subtitles['en']), '4cd9278a35ba2305f47354ee13472260')
+        self.assertEqual(md5(subtitles['it']), '164a51f16f260476a05b50fe4c2f161d')
+        for lang in ['it', 'fr', 'de']:
+            self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
  
      def test_youtube_subtitles_sbv_format(self):
          self.DL.params['writesubtitles'] = True
@@ -76,12 +80,6 @@ class TestYoutubeSubtitles(BaseTestSubtitles):
          subtitles = self.getSubtitles()
          self.assertEqual(md5(subtitles['en']), '3cb210999d3e021bd6c7f0ea751eab06')
  
-    def test_youtube_list_subtitles(self):
-        self.DL.expect_warning('Video doesn\'t have automatic captions')
-        self.DL.params['listsubtitles'] = True
-        info_dict = self.getInfoDict()
-        self.assertEqual(info_dict, None)
-
      def test_youtube_automatic_captions(self):
          self.url = '8YoUxe5ncPo'
          self.DL.params['writeautomaticsub'] = True
@@ -103,55 +101,22 @@ class TestYoutubeSubtitles(BaseTestSubtitles):
          self.DL.params['writesubtitles'] = True
          self.DL.params['allsubtitles'] = True
          subtitles = self.getSubtitles()
-        self.assertEqual(len(subtitles), 0)
-
-    def test_youtube_multiple_langs(self):
-        self.url = 'QRS8MkLhQmM'
-        self.DL.params['writesubtitles'] = True
-        langs = ['it', 'fr', 'de']
-        self.DL.params['subtitleslangs'] = langs
-        subtitles = self.getSubtitles()
-        for lang in langs:
-            self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
+        self.assertFalse(subtitles)
  
  
  class TestDailymotionSubtitles(BaseTestSubtitles):
      url = 'http://www.dailymotion.com/video/xczg00'
      IE = DailymotionIE
  
-    def test_no_writesubtitles(self):
-        subtitles = self.getSubtitles()
-        self.assertEqual(subtitles, None)
-
-    def test_subtitles(self):
-        self.DL.params['writesubtitles'] = True
-        subtitles = self.getSubtitles()
-        self.assertEqual(md5(subtitles['en']), '976553874490cba125086bbfea3ff76f')
-
-    def test_subtitles_lang(self):
-        self.DL.params['writesubtitles'] = True
-        self.DL.params['subtitleslangs'] = ['fr']
-        subtitles = self.getSubtitles()
-        self.assertEqual(md5(subtitles['fr']), '594564ec7d588942e384e920e5341792')
-
      def test_allsubtitles(self):
          self.DL.params['writesubtitles'] = True
          self.DL.params['allsubtitles'] = True
          subtitles = self.getSubtitles()
-        self.assertEqual(len(subtitles.keys()), 5)
-
-    def test_list_subtitles(self):
-        self.DL.expect_warning('Automatic Captions not supported by this server')
-        self.DL.params['listsubtitles'] = True
-        info_dict = self.getInfoDict()
-        self.assertEqual(info_dict, None)
-
-    def test_automatic_captions(self):
-        self.DL.expect_warning('Automatic Captions not supported by this server')
-        self.DL.params['writeautomaticsub'] = True
-        self.DL.params['subtitleslang'] = ['en']
-        subtitles = self.getSubtitles()
-        self.assertTrue(len(subtitles.keys()) == 0)
+        self.assertTrue(len(subtitles.keys()) >= 6)
+        self.assertEqual(md5(subtitles['en']), '976553874490cba125086bbfea3ff76f')
+        self.assertEqual(md5(subtitles['fr']), '594564ec7d588942e384e920e5341792')
+        for lang in ['es', 'fr', 'de']:
+            self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
  
      def test_nosubtitles(self):
          self.DL.expect_warning('video doesn\'t have subtitles')
@@ -159,61 +124,21 @@ class TestDailymotionSubtitles(BaseTestSubtitles):
          self.DL.params['writesubtitles'] = True
          self.DL.params['allsubtitles'] = True
          subtitles = self.getSubtitles()
-        self.assertEqual(len(subtitles), 0)
-
-    def test_multiple_langs(self):
-        self.DL.params['writesubtitles'] = True
-        langs = ['es', 'fr', 'de']
-        self.DL.params['subtitleslangs'] = langs
-        subtitles = self.getSubtitles()
-        for lang in langs:
-            self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
+        self.assertFalse(subtitles)
  
  
  class TestTedSubtitles(BaseTestSubtitles):
      url = 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html'
      IE = TEDIE
  
-    def test_no_writesubtitles(self):
-        subtitles = self.getSubtitles()
-        self.assertEqual(subtitles, None)
-
-    def test_subtitles(self):
-        self.DL.params['writesubtitles'] = True
-        subtitles = self.getSubtitles()
-        self.assertEqual(md5(subtitles['en']), '4262c1665ff928a2dada178f62cb8d14')
-
-    def test_subtitles_lang(self):
-        self.DL.params['writesubtitles'] = True
-        self.DL.params['subtitleslangs'] = ['fr']
-        subtitles = self.getSubtitles()
-        self.assertEqual(md5(subtitles['fr']), '66a63f7f42c97a50f8c0e90bc7797bb5')
-
      def test_allsubtitles(self):
          self.DL.params['writesubtitles'] = True
          self.DL.params['allsubtitles'] = True
          subtitles = self.getSubtitles()
          self.assertTrue(len(subtitles.keys()) >= 28)
-
-    def test_list_subtitles(self):
-        self.DL.expect_warning('Automatic Captions not supported by this server')
-        self.DL.params['listsubtitles'] = True
-        info_dict = self.getInfoDict()
-        self.assertEqual(info_dict, None)
-
-    def test_automatic_captions(self):
-        self.DL.expect_warning('Automatic Captions not supported by this server')
-        self.DL.params['writeautomaticsub'] = True
-        self.DL.params['subtitleslang'] = ['en']
-        subtitles = self.getSubtitles()
-        self.assertTrue(len(subtitles.keys()) == 0)
-
-    def test_multiple_langs(self):
-        self.DL.params['writesubtitles'] = True
-        langs = ['es', 'fr', 'de']
-        self.DL.params['subtitleslangs'] = langs
-        subtitles = self.getSubtitles()
-        for lang in langs:
+        self.assertEqual(md5(subtitles['en']), '4262c1665ff928a2dada178f62cb8d14')
+        self.assertEqual(md5(subtitles['fr']), '66a63f7f42c97a50f8c0e90bc7797bb5')
+        for lang in ['es', 'fr', 'de']:
              self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
  
  
@@ -221,14 +146,7 @@ class TestBlipTVSubtitles(BaseTestSubtitles):
      url = 'http://blip.tv/a/a-6603250'
      IE = BlipTVIE
  
-    def test_list_subtitles(self):
-        self.DL.expect_warning('Automatic Captions not supported by this server')
-        self.DL.params['listsubtitles'] = True
-        info_dict = self.getInfoDict()
-        self.assertEqual(info_dict, None)
-
      def test_allsubtitles(self):
-        self.DL.expect_warning('Automatic Captions not supported by this server')
          self.DL.params['writesubtitles'] = True
          self.DL.params['allsubtitles'] = True
          subtitles = self.getSubtitles()
@@ -240,39 +158,13 @@ class TestVimeoSubtitles(BaseTestSubtitles):
      url = 'http://vimeo.com/76979871'
      IE = VimeoIE
  
-    def test_no_writesubtitles(self):
-        subtitles = self.getSubtitles()
-        self.assertEqual(subtitles, None)
-
-    def test_subtitles(self):
-        self.DL.params['writesubtitles'] = True
-        subtitles = self.getSubtitles()
-        self.assertEqual(md5(subtitles['en']), '26399116d23ae3cf2c087cea94bc43b4')
-
-    def test_subtitles_lang(self):
-        self.DL.params['writesubtitles'] = True
-        self.DL.params['subtitleslangs'] = ['fr']
-        subtitles = self.getSubtitles()
-        self.assertEqual(md5(subtitles['fr']), 'b6191146a6c5d3a452244d853fde6dc8')
-
      def test_allsubtitles(self):
          self.DL.params['writesubtitles'] = True
          self.DL.params['allsubtitles'] = True
          subtitles = self.getSubtitles()
          self.assertEqual(set(subtitles.keys()), set(['de', 'en', 'es', 'fr']))
-
-    def test_list_subtitles(self):
-        self.DL.expect_warning('Automatic Captions not supported by this server')
-        self.DL.params['listsubtitles'] = True
-        info_dict = self.getInfoDict()
-        self.assertEqual(info_dict, None)
-
-    def test_automatic_captions(self):
-        self.DL.expect_warning('Automatic Captions not supported by this server')
-        self.DL.params['writeautomaticsub'] = True
-        self.DL.params['subtitleslang'] = ['en']
-        subtitles = self.getSubtitles()
-        self.assertTrue(len(subtitles.keys()) == 0)
+        self.assertEqual(md5(subtitles['en']), '8062383cf4dec168fc40a088aa6d5888')
+        self.assertEqual(md5(subtitles['fr']), 'b6191146a6c5d3a452244d853fde6dc8')
  
      def test_nosubtitles(self):
          self.DL.expect_warning('video doesn\'t have subtitles')
@@ -280,27 +172,13 @@ class TestVimeoSubtitles(BaseTestSubtitles):
          self.DL.params['writesubtitles'] = True
          self.DL.params['allsubtitles'] = True
          subtitles = self.getSubtitles()
-        self.assertEqual(len(subtitles), 0)
-
-    def test_multiple_langs(self):
-        self.DL.params['writesubtitles'] = True
-        langs = ['es', 'fr', 'de']
-        self.DL.params['subtitleslangs'] = langs
-        subtitles = self.getSubtitles()
-        for lang in langs:
-            self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
+        self.assertFalse(subtitles)
  
  
  class TestWallaSubtitles(BaseTestSubtitles):
      url = 'http://vod.walla.co.il/movie/2705958/the-yes-men'
      IE = WallaIE
  
-    def test_list_subtitles(self):
-        self.DL.expect_warning('Automatic Captions not supported by this server')
-        self.DL.params['listsubtitles'] = True
-        info_dict = self.getInfoDict()
-        self.assertEqual(info_dict, None)
-
      def test_allsubtitles(self):
          self.DL.expect_warning('Automatic Captions not supported by this server')
          self.DL.params['writesubtitles'] = True
@@ -315,26 +193,20 @@ class TestWallaSubtitles(BaseTestSubtitles):
          self.DL.params['writesubtitles'] = True
          self.DL.params['allsubtitles'] = True
          subtitles = self.getSubtitles()
-        self.assertEqual(len(subtitles), 0)
+        self.assertFalse(subtitles)
  
  
  class TestCeskaTelevizeSubtitles(BaseTestSubtitles):
      url = 'http://www.ceskatelevize.cz/ivysilani/10600540290-u6-uzasny-svet-techniky'
      IE = CeskaTelevizeIE
  
-    def test_list_subtitles(self):
-        self.DL.expect_warning('Automatic Captions not supported by this server')
-        self.DL.params['listsubtitles'] = True
-        info_dict = self.getInfoDict()
-        self.assertEqual(info_dict, None)
-
      def test_allsubtitles(self):
          self.DL.expect_warning('Automatic Captions not supported by this server')
          self.DL.params['writesubtitles'] = True
          self.DL.params['allsubtitles'] = True
          subtitles = self.getSubtitles()
          self.assertEqual(set(subtitles.keys()), set(['cs']))
-        self.assertEqual(md5(subtitles['cs']), '9bf52d9549533c32c427e264bf0847d4')
+        self.assertTrue(len(subtitles['cs']) > 20000)
  
      def test_nosubtitles(self):
          self.DL.expect_warning('video doesn\'t have subtitles')
@@ -342,7 +214,110 @@ class TestCeskaTelevizeSubtitles(BaseTestSubtitles):
          self.DL.params['writesubtitles'] = True
          self.DL.params['allsubtitles'] = True
          subtitles = self.getSubtitles()
-        self.assertEqual(len(subtitles), 0)
+        self.assertFalse(subtitles)
+
+
+class TestLyndaSubtitles(BaseTestSubtitles):
+    url = 'http://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html'
+    IE = LyndaIE
+
+    def test_allsubtitles(self):
+        self.DL.params['writesubtitles'] = True
+        self.DL.params['allsubtitles'] = True
+        subtitles = self.getSubtitles()
+        self.assertEqual(set(subtitles.keys()), set(['en']))
+        self.assertEqual(md5(subtitles['en']), '09bbe67222259bed60deaa26997d73a7')
+
+
+class TestNPOSubtitles(BaseTestSubtitles):
+    url = 'http://www.npo.nl/nos-journaal/28-08-2014/POW_00722860'
+    IE = NPOIE
+
+    def test_allsubtitles(self):
+        self.DL.params['writesubtitles'] = True
+        self.DL.params['allsubtitles'] = True
+        subtitles = self.getSubtitles()
+        self.assertEqual(set(subtitles.keys()), set(['nl']))
+        self.assertEqual(md5(subtitles['nl']), 'fc6435027572b63fb4ab143abd5ad3f4')
+
+
+class TestMTVSubtitles(BaseTestSubtitles):
+    url = 'http://www.cc.com/video-clips/kllhuv/stand-up-greg-fitzsimmons--uncensored---too-good-of-a-mother'
+    IE = ComedyCentralIE
+
+    def getInfoDict(self):
+        return super(TestMTVSubtitles, self).getInfoDict()['entries'][0]
+
+    def test_allsubtitles(self):
+        self.DL.params['writesubtitles'] = True
+        self.DL.params['allsubtitles'] = True
+        subtitles = self.getSubtitles()
+        self.assertEqual(set(subtitles.keys()), set(['en']))
+        self.assertEqual(md5(subtitles['en']), 'b9f6ca22a6acf597ec76f61749765e65')
+
+
+class TestNRKSubtitles(BaseTestSubtitles):
+    url = 'http://tv.nrk.no/serie/ikke-gjoer-dette-hjemme/DMPV73000411/sesong-2/episode-1'
+    IE = NRKTVIE
+
+    def test_allsubtitles(self):
+        self.DL.params['writesubtitles'] = True
+        self.DL.params['allsubtitles'] = True
+        subtitles = self.getSubtitles()
+        self.assertEqual(set(subtitles.keys()), set(['no']))
+        self.assertEqual(md5(subtitles['no']), '1d221e6458c95c5494dcd38e6a1f129a')
+
+
+class TestRaiSubtitles(BaseTestSubtitles):
+    url = 'http://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-cb27157f-9dd0-4aee-b788-b1f67643a391.html'
+    IE = RaiIE
+
+    def test_allsubtitles(self):
+        self.DL.params['writesubtitles'] = True
+        self.DL.params['allsubtitles'] = True
+        subtitles = self.getSubtitles()
+        self.assertEqual(set(subtitles.keys()), set(['it']))
+        self.assertEqual(md5(subtitles['it']), 'b1d90a98755126b61e667567a1f6680a')
+
+
+class TestVikiSubtitles(BaseTestSubtitles):
+    url = 'http://www.viki.com/videos/1060846v-punch-episode-18'
+    IE = VikiIE
+
+    def test_allsubtitles(self):
+        self.DL.params['writesubtitles'] = True
+        self.DL.params['allsubtitles'] = True
+        subtitles = self.getSubtitles()
+        self.assertEqual(set(subtitles.keys()), set(['en']))
+        self.assertEqual(md5(subtitles['en']), '53cb083a5914b2d84ef1ab67b880d18a')
+
+
+class TestThePlatformSubtitles(BaseTestSubtitles):
+    # from http://www.3playmedia.com/services-features/tools/integrations/theplatform/
+    # (see http://theplatform.com/about/partners/type/subtitles-closed-captioning/)
+    url = 'theplatform:JFUjUE1_ehvq'
+    IE = ThePlatformIE
+
+    def test_allsubtitles(self):
+        self.DL.params['writesubtitles'] = True
+        self.DL.params['allsubtitles'] = True
+        subtitles = self.getSubtitles()
+        self.assertEqual(set(subtitles.keys()), set(['en']))
+        self.assertEqual(md5(subtitles['en']), '97e7670cbae3c4d26ae8bcc7fdd78d4b')
+
+
+class TestRtveSubtitles(BaseTestSubtitles):
+    url = 'http://www.rtve.es/alacarta/videos/los-misterios-de-laura/misterios-laura-capitulo-32-misterio-del-numero-17-2-parte/2428621/'
+    IE = RTVEALaCartaIE
+
+    def test_allsubtitles(self):
+        print('Skipping, only available from Spain')
+        return
+        self.DL.params['writesubtitles'] = True
+        self.DL.params['allsubtitles'] = True
+        subtitles = self.getSubtitles()
+        self.assertEqual(set(subtitles.keys()), set(['es']))
+        self.assertEqual(md5(subtitles['es']), '69e70cae2d40574fb7316f31d6eb7fca')
  
  
  if __name__ == '__main__':
diff --git a/test/test_swfinterp.py b/test/test_swfinterp.py

index 9f18055e629d3c21826ad8159bdf0ae55409bca2..f1e8998192b131613cb9d26a1167ce35e0a61e9f 100644 (file)
--- a/test/test_swfinterp.py
+++ b/test/test_swfinterp.py
@@ -34,8 +34,8 @@ def _make_testfunc(testfile):
      def test_func(self):
          as_file = os.path.join(TEST_DIR, testfile)
          swf_file = os.path.join(TEST_DIR, test_id + '.swf')
-        if ((not os.path.exists(swf_file))
-                or os.path.getmtime(swf_file) < os.path.getmtime(as_file)):
+        if ((not os.path.exists(swf_file)) or
+                os.path.getmtime(swf_file) < os.path.getmtime(as_file)):
              # Recompile
              try:
                  subprocess.check_call([
diff --git a/test/test_utils.py b/test/test_utils.py

index 80c765bc496f0ce51e3deda0b39689f8136b597a..3fba8ae11c3b516d86d82051bb179ac5e15b0a91 100644 (file)
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -53,6 +53,7 @@ from youtube_dl.utils import (
      version_tuple,
      xpath_with_ns,
      render_table,
+    match_str,
  )
  
  
@@ -84,6 +85,8 @@ class TestUtil(unittest.TestCase):
          self.assertEqual(
              sanitize_filename('New World record at 0:12:34'),
              'New World record at 0_12_34')
+        self.assertEqual(sanitize_filename('--gasdgf'), '_-gasdgf')
+        self.assertEqual(sanitize_filename('--gasdgf', is_id=True), '--gasdgf')
  
          forbidden = '"\0\\/'
          for fc in forbidden:
@@ -243,6 +246,7 @@ class TestUtil(unittest.TestCase):
          self.assertEqual(parse_duration('2.5 hours'), 9000)
          self.assertEqual(parse_duration('02:03:04'), 7384)
          self.assertEqual(parse_duration('01:02:03:04'), 93784)
+        self.assertEqual(parse_duration('1 hour 3 minutes'), 3780)
  
      def test_fix_xml_ampersands(self):
          self.assertEqual(
@@ -369,6 +373,10 @@ class TestUtil(unittest.TestCase):
              "playlist":[{"controls":{"all":null}}]
          }''')
  
+        inp = '"SAND Number: SAND 2013-7800P\\nPresenter: Tom Russo\\nHabanero Software Training - Xyce Software\\nXyce, Sandia\\u0027s"'
+        json_code = js_to_json(inp)
+        self.assertEqual(json.loads(json_code), json.loads(inp))
+
      def test_js_to_json_edgecases(self):
          on = js_to_json("{abc_def:'1\\'\\\\2\\\\\\'3\"4'}")
          self.assertEqual(json.loads(on), {"abc_def": "1'\\2\\'3\"4"})
@@ -459,6 +467,37 @@ ffmpeg version 2.4.4 Copyright (c) 2000-2014 the FFmpeg ...'''), '2.4.4')
              '123  4\n'
              '9999 51')
  
+    def test_match_str(self):
+        self.assertRaises(ValueError, match_str, 'xy>foobar', {})
+        self.assertFalse(match_str('xy', {'x': 1200}))
+        self.assertTrue(match_str('!xy', {'x': 1200}))
+        self.assertTrue(match_str('x', {'x': 1200}))
+        self.assertFalse(match_str('!x', {'x': 1200}))
+        self.assertTrue(match_str('x', {'x': 0}))
+        self.assertFalse(match_str('x>0', {'x': 0}))
+        self.assertFalse(match_str('x>0', {}))
+        self.assertTrue(match_str('x>?0', {}))
+        self.assertTrue(match_str('x>1K', {'x': 1200}))
+        self.assertFalse(match_str('x>2K', {'x': 1200}))
+        self.assertTrue(match_str('x>=1200 & x < 1300', {'x': 1200}))
+        self.assertFalse(match_str('x>=1100 & x < 1200', {'x': 1200}))
+        self.assertFalse(match_str('y=a212', {'y': 'foobar42'}))
+        self.assertTrue(match_str('y=foobar42', {'y': 'foobar42'}))
+        self.assertFalse(match_str('y!=foobar42', {'y': 'foobar42'}))
+        self.assertTrue(match_str('y!=foobar2', {'y': 'foobar42'}))
+        self.assertFalse(match_str(
+            'like_count > 100 & dislike_count <? 50 & description',
+            {'like_count': 90, 'description': 'foo'}))
+        self.assertTrue(match_str(
+            'like_count > 100 & dislike_count <? 50 & description',
+            {'like_count': 190, 'description': 'foo'}))
+        self.assertFalse(match_str(
+            'like_count > 100 & dislike_count <? 50 & description',
+            {'like_count': 190, 'dislike_count': 60, 'description': 'foo'}))
+        self.assertFalse(match_str(
+            'like_count > 100 & dislike_count <? 50 & description',
+            {'like_count': 190, 'dislike_count': 10}))
+
  
  if __name__ == '__main__':
      unittest.main()
diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py

index 13d228cd85e9e260942635a36652c4cdc010dc8d..060864434fe2ab81839dcde17475e6e9f61db0f2 100644 (file)
--- a/test/test_youtube_signature.py
+++ b/test/test_youtube_signature.py
@@ -8,11 +8,11 @@ import sys
  import unittest
  sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
  
-
  import io
  import re
  import string
  
+from test.helper import FakeYDL
  from youtube_dl.extractor import YoutubeIE
  from youtube_dl.compat import compat_str, compat_urlretrieve
  
@@ -64,6 +64,12 @@ _TESTS = [
          'js',
          '4646B5181C6C3020DF1D9C7FCFEA.AD80ABF70C39BD369CCCAE780AFBB98FA6B6CB42766249D9488C288',
          '82C8849D94266724DC6B6AF89BBFA087EACCD963.B93C07FBA084ACAEFCF7C9D1FD0203C6C1815B6B'
+    ),
+    (
+        'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflKjOTVq/html5player.js',
+        'js',
+        '312AA52209E3623129A412D56A40F11CB0AF14AE.3EE09501CB14E3BCDC3B2AE808BF3F1D14E7FBF12',
+        '112AA5220913623229A412D56A40F11CB0AF14AE.3EE0950FCB14EEBCDC3B2AE808BF331D14E7FBF3',
      )
  ]
  
@@ -88,7 +94,8 @@ def make_tfunc(url, stype, sig_input, expected_sig):
          if not os.path.exists(fn):
              compat_urlretrieve(url, fn)
  
-        ie = YoutubeIE()
+        ydl = FakeYDL()
+        ie = YoutubeIE(ydl)
          if stype == 'js':
              with io.open(fn, encoding='utf-8') as testf:
                  jscode = testf.read()
diff --git a/youtube-dl b/youtube-dl

index 14d9cce52b139f9d6c63a45e1235d9b768b76588..7cdc1016c6d585be3eb423c866e56035ec0f8881 100755 (executable)

Binary files a/youtube-dl and b/youtube-dl differ
diff --git a/youtube-dl.1 b/youtube-dl.1

index bddd9911fa29cc02c8616a16f10f465d235ea028..65d4fc3086d3254abba340580d6dd333c60e663e 100644 (file)
--- a/youtube-dl.1
+++ b/youtube-dl.1
@@ -50,6 +50,7 @@ redistribute it or use it however you like.
  \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ on\ Windows)
  \-\-flat\-playlist\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Do\ not\ extract\ the\ videos\ of\ a\ playlist,
  \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ only\ list\ them.
+\-\-no\-color\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Do\ not\ emit\ color\ codes\ in\ output.
  \f[]
  .fi
  .SS Network Options:
@@ -100,8 +101,27 @@ redistribute it or use it however you like.
  \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ COUNT\ views
  \-\-max\-views\ COUNT\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Do\ not\ download\ any\ videos\ with\ more\ than
  \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ COUNT\ views
+\-\-match\-filter\ FILTER\ \ \ \ \ \ \ \ \ \ \ \ (Experimental)\ Generic\ video\ filter.
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Specify\ any\ key\ (see\ help\ for\ \-o\ for\ a\ list
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ of\ available\ keys)\ to\ match\ if\ the\ key\ is
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ present,\ !key\ to\ check\ if\ the\ key\ is\ not
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ present,key\ >\ NUMBER\ (like\ "comment_count\ >
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ 12",\ also\ works\ with\ >=,\ <,\ <=,\ !=,\ =)\ to
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ compare\ against\ a\ number,\ and\ &\ to\ require
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ multiple\ matches.\ Values\ which\ are\ not
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ known\ are\ excluded\ unless\ you\ put\ a
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ question\ mark\ (?)\ after\ the\ operator.For
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ example,\ to\ only\ match\ videos\ that\ have
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ been\ liked\ more\ than\ 100\ times\ and\ disliked
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ less\ than\ 50\ times\ (or\ the\ dislike
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ functionality\ is\ not\ available\ at\ the\ given
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ service),\ but\ who\ also\ have\ a\ description,
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ use\ \ \-\-match\-filter\ "like_count\ >\ 100\ &
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ dislike_count\ <?\ 50\ &\ description"\ .
  \-\-no\-playlist\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ If\ the\ URL\ refers\ to\ a\ video\ and\ a
  \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ playlist,\ download\ only\ the\ video.
+\-\-yes\-playlist\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ If\ the\ URL\ refers\ to\ a\ video\ and\ a
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ playlist,\ download\ the\ playlist.
  \-\-age\-limit\ YEARS\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ download\ only\ videos\ suitable\ for\ the\ given
  \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ age
  \-\-download\-archive\ FILE\ \ \ \ \ \ \ \ \ \ Download\ only\ videos\ not\ listed\ in\ the
@@ -128,6 +148,8 @@ redistribute it or use it however you like.
  \-\-playlist\-reverse\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Download\ playlist\ videos\ in\ reverse\ order
  \-\-xattr\-set\-filesize\ \ \ \ \ \ \ \ \ \ \ \ \ (experimental)\ set\ file\ xattribute
  \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ ytdl.filesize\ with\ expected\ filesize
+\-\-hls\-prefer\-native\ \ \ \ \ \ \ \ \ \ \ \ \ \ (experimental)\ Use\ the\ native\ HLS
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ downloader\ instead\ of\ ffmpeg.
  \-\-external\-downloader\ COMMAND\ \ \ \ (experimental)\ Use\ the\ specified\ external
  \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ downloader.\ Currently\ supports
  \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ aria2c,curl,wget
@@ -297,18 +319,20 @@ redistribute it or use it however you like.
  \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ video\ results\ by\ putting\ a\ condition\ in
  \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ brackets,\ as\ in\ \-f\ "best[height=720]"\ (or
  \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \-f\ "[filesize>10M]").\ \ This\ works\ for
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ filesize,\ height,\ width,\ tbr,\ abr,\ vbr,\ and
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ fps\ and\ the\ comparisons\ <,\ <=,\ >,\ >=,\ =,\ !=
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ .\ Formats\ for\ which\ the\ value\ is\ not\ known
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ are\ excluded\ unless\ you\ put\ a\ question\ mark
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ (?)\ after\ the\ operator.\ You\ can\ combine
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ format\ filters,\ so\ \ \-f\ "[height\ <=?
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ 720][tbr>500]"\ selects\ up\ to\ 720p\ videos
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ (or\ videos\ where\ the\ height\ is\ not\ known)
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ with\ a\ bitrate\ of\ at\ least\ 500\ KBit/s.\ By
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ default,\ youtube\-dl\ will\ pick\ the\ best
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ quality.\ Use\ commas\ to\ download\ multiple
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ audio\ formats,\ such\ as\ \-f
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ filesize,\ height,\ width,\ tbr,\ abr,\ vbr,
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ asr,\ and\ fps\ and\ the\ comparisons\ <,\ <=,\ >,
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ >=,\ =,\ !=\ and\ for\ ext,\ acodec,\ vcodec,
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ container,\ and\ protocol\ and\ the\ comparisons
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ =,\ !=\ .\ Formats\ for\ which\ the\ value\ is\ not
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ known\ are\ excluded\ unless\ you\ put\ a
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ question\ mark\ (?)\ after\ the\ operator.\ You
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ can\ combine\ format\ filters,\ so\ \ \-f\ "[height
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ <=?\ 720][tbr>500]"\ selects\ up\ to\ 720p
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ videos\ (or\ videos\ where\ the\ height\ is\ not
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ known)\ with\ a\ bitrate\ of\ at\ least\ 500
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ KBit/s.\ By\ default,\ youtube\-dl\ will\ pick
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ the\ best\ quality.\ Use\ commas\ to\ download
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ multiple\ audio\ formats,\ such\ as\ \-f
  \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ 136/137/mp4/bestvideo,140/m4a/bestaudio.
  \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ You\ can\ merge\ the\ video\ and\ audio\ of\ two
  \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ formats\ into\ a\ single\ file\ using\ \-f\ <video\-
@@ -338,8 +362,8 @@ redistribute it or use it however you like.
  \-\-all\-subs\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ downloads\ all\ the\ available\ subtitles\ of
  \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ the\ video
  \-\-list\-subs\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ lists\ all\ available\ subtitles\ for\ the\ video
-\-\-sub\-format\ FORMAT\ \ \ \ \ \ \ \ \ \ \ \ \ \ subtitle\ format\ (default=srt)\ ([sbv/vtt]
-\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ youtube\ only)
+\-\-sub\-format\ FORMAT\ \ \ \ \ \ \ \ \ \ \ \ \ \ subtitle\ format,\ accepts\ formats
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ preference,\ for\ example:\ "ass/srt/best"
  \-\-sub\-lang\ LANGS\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ languages\ of\ the\ subtitles\ to\ download
  \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ (optional)\ separated\ by\ commas,\ use\ IETF
  \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ language\ tags\ like\ \[aq]en,pt\[aq]
@@ -394,10 +418,15 @@ redistribute it or use it however you like.
  \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ postprocessors\ (default)
  \-\-prefer\-ffmpeg\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Prefer\ ffmpeg\ over\ avconv\ for\ running\ the
  \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ postprocessors
+\-\-ffmpeg\-location\ PATH\ \ \ \ \ \ \ \ \ \ \ Location\ of\ the\ ffmpeg/avconv\ binary;
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ either\ the\ path\ to\ the\ binary\ or\ its
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ containing\ directory.
  \-\-exec\ CMD\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Execute\ a\ command\ on\ the\ file\ after
  \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ downloading,\ similar\ to\ find\[aq]s\ \-exec
  \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ syntax.\ Example:\ \-\-exec\ \[aq]adb\ push\ {}
  \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ /sdcard/Music/\ &&\ rm\ {}\[aq]
+\-\-convert\-subtitles\ FORMAT\ \ \ \ \ \ \ Convert\ the\ subtitles\ to\ other\ format
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ (currently\ supported:\ srt|ass|vtt)
  \f[]
  .fi
  .SH CONFIGURATION
@@ -619,14 +648,19 @@ stdout, or simply allow the player to download the files written by
  youtube\-dl in turn.
  .SS ERROR: no fmt_url_map or conn information found in video info
  .PP
-youtube has switched to a new video info format in July 2011 which is
+YouTube has switched to a new video info format in July 2011 which is
  not supported by old versions of youtube\-dl.
-You can update youtube\-dl with \f[C]sudo\ youtube\-dl\ \-\-update\f[].
+See above (#how-do-i-update-youtube-dl) for how to update youtube\-dl.
  .SS ERROR: unable to download video
  .PP
-youtube requires an additional signature since September 2012 which is
+YouTube requires an additional signature since September 2012 which is
  not supported by old versions of youtube\-dl.
-You can update youtube\-dl with \f[C]sudo\ youtube\-dl\ \-\-update\f[].
+See above (#how-do-i-update-youtube-dl) for how to update youtube\-dl.
+.SS ExtractorError: Could not find JS function u\[aq]OF\[aq]
+.PP
+In February 2015, the new YouTube player contained a character sequence
+in a string that was misinterpreted by old versions of youtube\-dl.
+See above (#how-do-i-update-youtube-dl) for how to update youtube\-dl.
  .SS SyntaxError: Non\-ASCII character
  .PP
  The error
@@ -716,9 +750,9 @@ legitimate purchase of content.
  For one, have a look at the list of supported
  sites (docs/supportedsites.md).
  Note that it can sometimes happen that the site changes its URL scheme
-(say, from http://example.com/v/1234567 to http://example.com/v/1234567
-) and youtube\-dl reports an URL of a service in that list as
-unsupported.
+(say, from http://example.com/video/1234567 to
+http://example.com/v/1234567 ) and youtube\-dl reports an URL of a
+service in that list as unsupported.
  In that case, simply report a bug.
  .PP
  It is \f[I]not\f[] possible to detect whether a URL is supported or not.
diff --git a/youtube-dl.bash-completion b/youtube-dl.bash-completion

index 6be85ed8260728ed56f9beb8e61e3b7ff2cf1bb2..5457ada2f0375a885b6e69520ba073ed29a94cbc 100644 (file)
--- a/youtube-dl.bash-completion
+++ b/youtube-dl.bash-completion
@@ -4,7 +4,7 @@ __youtube_dl()
      COMPREPLY=()
      cur="${COMP_WORDS[COMP_CWORD]}"
      prev="${COMP_WORDS[COMP_CWORD-1]}"
-    opts="--help --version --update --ignore-errors --abort-on-error --dump-user-agent --list-extractors --extractor-descriptions --default-search --ignore-config --flat-playlist --proxy --socket-timeout --source-address --force-ipv4 --force-ipv6 --playlist-start --playlist-end --playlist-items --match-title --reject-title --max-downloads --min-filesize --max-filesize --date --datebefore --dateafter --min-views --max-views --no-playlist --age-limit --download-archive --include-ads --rate-limit --retries --buffer-size --no-resize-buffer --test --playlist-reverse --xattr-set-filesize --external-downloader --batch-file --id --output --autonumber-size --restrict-filenames --auto-number --title --literal --no-overwrites --continue --no-continue --no-part --no-mtime --write-description --write-info-json --write-annotations --load-info --cookies --cache-dir --no-cache-dir --rm-cache-dir --write-thumbnail --write-all-thumbnails --list-thumbnails --quiet --no-warnings --simulate --skip-download --get-url --get-title --get-id --get-thumbnail --get-description --get-duration --get-filename --get-format --dump-json --dump-single-json --print-json --newline --no-progress --console-title --verbose --dump-intermediate-pages --write-pages --youtube-print-sig-code --print-traffic --call-home --no-call-home --encoding --no-check-certificate --prefer-insecure --user-agent --referer --add-header --bidi-workaround --sleep-interval --format --all-formats --prefer-free-formats --max-quality --list-formats --youtube-include-dash-manifest --youtube-skip-dash-manifest --merge-output-format --write-sub --write-auto-sub --all-subs --list-subs --sub-format --sub-lang --username --password --twofactor --netrc --video-password --extract-audio --audio-format --audio-quality --recode-video --keep-video --no-post-overwrites --embed-subs --embed-thumbnail --add-metadata --xattrs --fixup --prefer-avconv --prefer-ffmpeg --exec"
+    opts="--help --version --update --ignore-errors --abort-on-error --dump-user-agent --list-extractors --extractor-descriptions --default-search --ignore-config --flat-playlist --no-color --proxy --socket-timeout --source-address --force-ipv4 --force-ipv6 --playlist-start --playlist-end --playlist-items --match-title --reject-title --max-downloads --min-filesize --max-filesize --date --datebefore --dateafter --min-views --max-views --match-filter --no-playlist --yes-playlist --age-limit --download-archive --include-ads --rate-limit --retries --buffer-size --no-resize-buffer --test --playlist-reverse --xattr-set-filesize --hls-prefer-native --external-downloader --batch-file --id --output --autonumber-size --restrict-filenames --auto-number --title --literal --no-overwrites --continue --no-continue --no-part --no-mtime --write-description --write-info-json --write-annotations --load-info --cookies --cache-dir --no-cache-dir --rm-cache-dir --write-thumbnail --write-all-thumbnails --list-thumbnails --quiet --no-warnings --simulate --skip-download --get-url --get-title --get-id --get-thumbnail --get-description --get-duration --get-filename --get-format --dump-json --dump-single-json --print-json --newline --no-progress --console-title --verbose --dump-intermediate-pages --write-pages --youtube-print-sig-code --print-traffic --call-home --no-call-home --encoding --no-check-certificate --prefer-insecure --user-agent --referer --add-header --bidi-workaround --sleep-interval --format --all-formats --prefer-free-formats --max-quality --list-formats --youtube-include-dash-manifest --youtube-skip-dash-manifest --merge-output-format --write-sub --write-auto-sub --all-subs --list-subs --sub-format --sub-lang --username --password --twofactor --netrc --video-password --extract-audio --audio-format --audio-quality --recode-video --keep-video --no-post-overwrites --embed-subs --embed-thumbnail --add-metadata --xattrs --fixup --prefer-avconv --prefer-ffmpeg --ffmpeg-location --exec --convert-subtitles"
      keywords=":ytfavorites :ytrecommended :ytsubscriptions :ytwatchlater :ythistory"
      fileopts="-a|--batch-file|--download-archive|--cookies|--load-info"
      diropts="--cache-dir"
diff --git a/youtube-dl.fish b/youtube-dl.fish

index 5077f5e9f35f18be2989a5ba189c528eb7afdd5c..f2f1636c4add7318efb50fb88bfe3c85b5a17603 100644 (file)
--- a/youtube-dl.fish
+++ b/youtube-dl.fish
@@ -10,6 +10,7 @@ complete --command youtube-dl --long-option extractor-descriptions --description
  complete --command youtube-dl --long-option default-search --description 'Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for  youtube-dl "large apple". Use the value "auto" to let youtube-dl guess ("auto_warning" to emit a warning when guessing). "error" just throws an error. The default value "fixup_error" repairs broken URLs, but emits an error if this is not possible instead of searching.'
  complete --command youtube-dl --long-option ignore-config --description 'Do not read configuration files. When given in the global configuration file /etc/youtube-dl.conf: Do not read the user configuration in ~/.config/youtube-dl/config (%APPDATA%/youtube-dl/config.txt on Windows)'
  complete --command youtube-dl --long-option flat-playlist --description 'Do not extract the videos of a playlist, only list them.'
+complete --command youtube-dl --long-option no-color --description 'Do not emit color codes in output.'
  complete --command youtube-dl --long-option proxy --description 'Use the specified HTTP/HTTPS proxy. Pass in an empty string (--proxy "") for direct connection'
  complete --command youtube-dl --long-option socket-timeout --description 'Time to wait before giving up, in seconds'
  complete --command youtube-dl --long-option source-address --description 'Client-side IP address to bind to (experimental)'
@@ -28,7 +29,9 @@ complete --command youtube-dl --long-option datebefore --description 'download o
  complete --command youtube-dl --long-option dateafter --description 'download only videos uploaded on or after this date (i.e. inclusive)'
  complete --command youtube-dl --long-option min-views --description 'Do not download any videos with less than COUNT views'
  complete --command youtube-dl --long-option max-views --description 'Do not download any videos with more than COUNT views'
+complete --command youtube-dl --long-option match-filter --description '(Experimental) Generic video filter. Specify any key (see help for -o for a list of available keys) to match if the key is present, !key to check if the key is not present,key > NUMBER (like "comment_count > 12", also works with >=, <, <=, !=, =) to compare against a number, and & to require multiple matches. Values which are not known are excluded unless you put a question mark (?) after the operator.For example, to only match videos that have been liked more than 100 times and disliked less than 50 times (or the dislike functionality is not available at the given service), but who also have a description, use  --match-filter "like_count > 100 & dislike_count <? 50 & description" .'
  complete --command youtube-dl --long-option no-playlist --description 'If the URL refers to a video and a playlist, download only the video.'
+complete --command youtube-dl --long-option yes-playlist --description 'If the URL refers to a video and a playlist, download the playlist.'
  complete --command youtube-dl --long-option age-limit --description 'download only videos suitable for the given age'
  complete --command youtube-dl --long-option download-archive --description 'Download only videos not listed in the archive file. Record the IDs of all downloaded videos in it.' --require-parameter
  complete --command youtube-dl --long-option include-ads --description 'Download advertisements as well (experimental)'
@@ -39,6 +42,7 @@ complete --command youtube-dl --long-option no-resize-buffer --description 'do n
  complete --command youtube-dl --long-option test
  complete --command youtube-dl --long-option playlist-reverse --description 'Download playlist videos in reverse order'
  complete --command youtube-dl --long-option xattr-set-filesize --description '(experimental) set file xattribute ytdl.filesize with expected filesize'
+complete --command youtube-dl --long-option hls-prefer-native --description '(experimental) Use the native HLS downloader instead of ffmpeg.'
  complete --command youtube-dl --long-option external-downloader --description '(experimental) Use the specified external downloader. Currently supports aria2c,curl,wget'
  complete --command youtube-dl --long-option batch-file --short-option a --description 'file containing URLs to download ('"'"'-'"'"' for stdin)' --require-parameter
  complete --command youtube-dl --long-option id --description 'use only video ID in file name'
@@ -97,7 +101,7 @@ complete --command youtube-dl --long-option referer --description 'specify a cus
  complete --command youtube-dl --long-option add-header --description 'specify a custom HTTP header and its value, separated by a colon '"'"':'"'"'. You can use this option multiple times'
  complete --command youtube-dl --long-option bidi-workaround --description 'Work around terminals that lack bidirectional text support. Requires bidiv or fribidi executable in PATH'
  complete --command youtube-dl --long-option sleep-interval --description 'Number of seconds to sleep before each download.'
-complete --command youtube-dl --long-option format --short-option f --description 'video format code, specify the order of preference using slashes, as in -f 22/17/18 .  Instead of format codes, you can select by extension for the extensions aac, m4a, mp3, mp4, ogg, wav, webm. You can also use the special names "best", "bestvideo", "bestaudio", "worst".  You can filter the video results by putting a condition in brackets, as in -f "best[height=720]" (or -f "[filesize>10M]").  This works for filesize, height, width, tbr, abr, vbr, and fps and the comparisons <, <=, >, >=, =, != . Formats for which the value is not known are excluded unless you put a question mark (?) after the operator. You can combine format filters, so  -f "[height <=? 720][tbr>500]" selects up to 720p videos (or videos where the height is not known) with a bitrate of at least 500 KBit/s. By default, youtube-dl will pick the best quality. Use commas to download multiple audio formats, such as -f  136/137/mp4/bestvideo,140/m4a/bestaudio. You can merge the video and audio of two formats into a single file using -f <video-format>+<audio-format> (requires ffmpeg or avconv), for example -f bestvideo+bestaudio.'
+complete --command youtube-dl --long-option format --short-option f --description 'video format code, specify the order of preference using slashes, as in -f 22/17/18 .  Instead of format codes, you can select by extension for the extensions aac, m4a, mp3, mp4, ogg, wav, webm. You can also use the special names "best", "bestvideo", "bestaudio", "worst".  You can filter the video results by putting a condition in brackets, as in -f "best[height=720]" (or -f "[filesize>10M]").  This works for filesize, height, width, tbr, abr, vbr, asr, and fps and the comparisons <, <=, >, >=, =, != and for ext, acodec, vcodec, container, and protocol and the comparisons =, != . Formats for which the value is not known are excluded unless you put a question mark (?) after the operator. You can combine format filters, so  -f "[height <=? 720][tbr>500]" selects up to 720p videos (or videos where the height is not known) with a bitrate of at least 500 KBit/s. By default, youtube-dl will pick the best quality. Use commas to download multiple audio formats, such as -f  136/137/mp4/bestvideo,140/m4a/bestaudio. You can merge the video and audio of two formats into a single file using -f <video-format>+<audio-format> (requires ffmpeg or avconv), for example -f bestvideo+bestaudio.'
  complete --command youtube-dl --long-option all-formats --description 'download all available video formats'
  complete --command youtube-dl --long-option prefer-free-formats --description 'prefer free video formats unless a specific one is requested'
  complete --command youtube-dl --long-option max-quality --description 'highest quality format to download'
@@ -109,7 +113,7 @@ complete --command youtube-dl --long-option write-sub --description 'write subti
  complete --command youtube-dl --long-option write-auto-sub --description 'write automatic subtitle file (youtube only)'
  complete --command youtube-dl --long-option all-subs --description 'downloads all the available subtitles of the video'
  complete --command youtube-dl --long-option list-subs --description 'lists all available subtitles for the video'
-complete --command youtube-dl --long-option sub-format --description 'subtitle format (default=srt) ([sbv/vtt] youtube only)'
+complete --command youtube-dl --long-option sub-format --description 'subtitle format, accepts formats preference, for example: "ass/srt/best"'
  complete --command youtube-dl --long-option sub-lang --description 'languages of the subtitles to download (optional) separated by commas, use IETF language tags like '"'"'en,pt'"'"''
  complete --command youtube-dl --long-option username --short-option u --description 'login with this account ID'
  complete --command youtube-dl --long-option password --short-option p --description 'account password. If this option is left out, youtube-dl will ask interactively.'
@@ -129,7 +133,9 @@ complete --command youtube-dl --long-option xattrs --description 'write metadata
  complete --command youtube-dl --long-option fixup --description 'Automatically correct known faults of the file. One of never (do nothing), warn (only emit a warning), detect_or_warn(the default; fix file if we can, warn otherwise)'
  complete --command youtube-dl --long-option prefer-avconv --description 'Prefer avconv over ffmpeg for running the postprocessors (default)'
  complete --command youtube-dl --long-option prefer-ffmpeg --description 'Prefer ffmpeg over avconv for running the postprocessors'
+complete --command youtube-dl --long-option ffmpeg-location --description 'Location of the ffmpeg/avconv binary; either the path to the binary or its containing directory.'
  complete --command youtube-dl --long-option exec --description 'Execute a command on the file after downloading, similar to find'"'"'s -exec syntax. Example: --exec '"'"'adb push {} /sdcard/Music/ && rm {}'"'"''
+complete --command youtube-dl --long-option convert-subtitles --description 'Convert the subtitles to other format (currently supported: srt|ass|vtt)'
  
  
  complete --command youtube-dl --arguments ":ytfavorites :ytrecommended :ytsubscriptions :ytwatchlater :ythistory"
diff --git a/youtube-dl.zsh b/youtube-dl.zsh

index 71d142377fc694ed1bb3258cc1aaab31f07da7ce..5071f16f2289f1c10f6e6c4007253bd9e0305a1d 100644 (file)
--- a/youtube-dl.zsh
+++ b/youtube-dl.zsh
@@ -19,7 +19,7 @@ __youtube_dl() {
              elif [[ ${prev} == "--recode-video" ]]; then
                  _arguments '*: :(mp4 flv ogg webm mkv)'
              else
-                _arguments '*: :(--help --version --update --ignore-errors --abort-on-error --dump-user-agent --list-extractors --extractor-descriptions --default-search --ignore-config --flat-playlist --proxy --socket-timeout --source-address --force-ipv4 --force-ipv6 --playlist-start --playlist-end --playlist-items --match-title --reject-title --max-downloads --min-filesize --max-filesize --date --datebefore --dateafter --min-views --max-views --no-playlist --age-limit --download-archive --include-ads --rate-limit --retries --buffer-size --no-resize-buffer --test --playlist-reverse --xattr-set-filesize --external-downloader --batch-file --id --output --autonumber-size --restrict-filenames --auto-number --title --literal --no-overwrites --continue --no-continue --no-part --no-mtime --write-description --write-info-json --write-annotations --load-info --cookies --cache-dir --no-cache-dir --rm-cache-dir --write-thumbnail --write-all-thumbnails --list-thumbnails --quiet --no-warnings --simulate --skip-download --get-url --get-title --get-id --get-thumbnail --get-description --get-duration --get-filename --get-format --dump-json --dump-single-json --print-json --newline --no-progress --console-title --verbose --dump-intermediate-pages --write-pages --youtube-print-sig-code --print-traffic --call-home --no-call-home --encoding --no-check-certificate --prefer-insecure --user-agent --referer --add-header --bidi-workaround --sleep-interval --format --all-formats --prefer-free-formats --max-quality --list-formats --youtube-include-dash-manifest --youtube-skip-dash-manifest --merge-output-format --write-sub --write-auto-sub --all-subs --list-subs --sub-format --sub-lang --username --password --twofactor --netrc --video-password --extract-audio --audio-format --audio-quality --recode-video --keep-video --no-post-overwrites --embed-subs --embed-thumbnail --add-metadata --xattrs --fixup --prefer-avconv --prefer-ffmpeg --exec)'
+                _arguments '*: :(--help --version --update --ignore-errors --abort-on-error --dump-user-agent --list-extractors --extractor-descriptions --default-search --ignore-config --flat-playlist --no-color --proxy --socket-timeout --source-address --force-ipv4 --force-ipv6 --playlist-start --playlist-end --playlist-items --match-title --reject-title --max-downloads --min-filesize --max-filesize --date --datebefore --dateafter --min-views --max-views --match-filter --no-playlist --yes-playlist --age-limit --download-archive --include-ads --rate-limit --retries --buffer-size --no-resize-buffer --test --playlist-reverse --xattr-set-filesize --hls-prefer-native --external-downloader --batch-file --id --output --autonumber-size --restrict-filenames --auto-number --title --literal --no-overwrites --continue --no-continue --no-part --no-mtime --write-description --write-info-json --write-annotations --load-info --cookies --cache-dir --no-cache-dir --rm-cache-dir --write-thumbnail --write-all-thumbnails --list-thumbnails --quiet --no-warnings --simulate --skip-download --get-url --get-title --get-id --get-thumbnail --get-description --get-duration --get-filename --get-format --dump-json --dump-single-json --print-json --newline --no-progress --console-title --verbose --dump-intermediate-pages --write-pages --youtube-print-sig-code --print-traffic --call-home --no-call-home --encoding --no-check-certificate --prefer-insecure --user-agent --referer --add-header --bidi-workaround --sleep-interval --format --all-formats --prefer-free-formats --max-quality --list-formats --youtube-include-dash-manifest --youtube-skip-dash-manifest --merge-output-format --write-sub --write-auto-sub --all-subs --list-subs --sub-format --sub-lang --username --password --twofactor --netrc --video-password --extract-audio --audio-format --audio-quality --recode-video --keep-video --no-post-overwrites --embed-subs --embed-thumbnail --add-metadata --xattrs --fixup --prefer-avconv --prefer-ffmpeg --ffmpeg-location --exec --convert-subtitles)'
              fi
          ;;
      esac
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py

index 9605f8f99ac04bf72032b1cc8a64635bb6c2a8b8..74e4261680da06d9892b4c6f60a26481771c950b 100755 (executable)
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -154,7 +154,7 @@ class YoutubeDL(object):
      allsubtitles:      Downloads all the subtitles of the video
                         (requires writesubtitles or writeautomaticsub)
      listsubtitles:     Lists all available subtitles for the video
-    subtitlesformat:   Subtitle format [srt/sbv/vtt] (default=srt)
+    subtitlesformat:   The format code for subtitles
      subtitleslangs:    List of languages of the subtitles to download
      keepvideo:         Keep the video file after post-processing
      daterange:         A DateRange object, download only if the upload_date is in the range.
@@ -199,18 +199,25 @@ class YoutubeDL(object):
                         postprocessor.
      progress_hooks:    A list of functions that get called on download
                         progress, with a dictionary with the entries
-                       * status: One of "downloading" and "finished".
+                       * status: One of "downloading", "error", or "finished".
                                   Check this first and ignore unknown values.
  
-                       If status is one of "downloading" or "finished", the
+                       If status is one of "downloading", or "finished", the
                         following properties may also be present:
                         * filename: The final filename (always present)
+                       * tmpfilename: The filename we're currently writing to
                         * downloaded_bytes: Bytes on disk
                         * total_bytes: Size of the whole file, None if unknown
-                       * tmpfilename: The filename we're currently writing to
+                       * total_bytes_estimate: Guess of the eventual file size,
+                                               None if unavailable.
+                       * elapsed: The number of seconds since download started.
                         * eta: The estimated time in seconds, None if unknown
                         * speed: The download speed in bytes/second, None if
                                  unknown
+                       * fragment_index: The counter of the currently
+                                         downloaded video fragment.
+                       * fragment_count: The number of fragments (= individual
+                                         files that will be merged)
  
                         Progress hooks are guaranteed to be called at least once
                         (with status "finished") if the download is successful.
@@ -225,10 +232,19 @@ class YoutubeDL(object):
      call_home:         Boolean, true iff we are allowed to contact the
                         youtube-dl servers for debugging.
      sleep_interval:    Number of seconds to sleep before each download.
-    external_downloader:  Executable of the external downloader to call.
      listformats:       Print an overview of available video formats and exit.
      list_thumbnails:   Print a table of all thumbnails and exit.
-
+    match_filter:      A function that gets called with the info_dict of
+                       every video.
+                       If it returns a message, the video is ignored.
+                       If it returns None, the video is downloaded.
+                       match_filter_func in utils.py is one example for this.
+    no_color:          Do not emit color codes in output.
+
+    The following options determine which downloader is picked:
+    external_downloader: Executable of the external downloader to call.
+                       None or unset for standard (built-in) downloader.
+    hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv.
  
      The following parameters are not used by YoutubeDL itself, they are used by
      the FileDownloader:
@@ -292,8 +308,8 @@ class YoutubeDL(object):
                      raise
  
          if (sys.version_info >= (3,) and sys.platform != 'win32' and
-                sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
-                and not params.get('restrictfilenames', False)):
+                sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and
+                not params.get('restrictfilenames', False)):
              # On Python 3, the Unicode filesystem API will throw errors (#1474)
              self.report_warning(
                  'Assuming --restrict-filenames since file system encoding '
@@ -485,7 +501,7 @@ class YoutubeDL(object):
          else:
              if self.params.get('no_warnings'):
                  return
-            if self._err_file.isatty() and os.name != 'nt':
+            if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
                  _msg_header = '\033[0;33mWARNING:\033[0m'
              else:
                  _msg_header = 'WARNING:'
@@ -497,7 +513,7 @@ class YoutubeDL(object):
          Do the same as trouble, but prefixes the message with 'ERROR:', colored
          in red if stderr is a tty file.
          '''
-        if self._err_file.isatty() and os.name != 'nt':
+        if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
              _msg_header = '\033[0;31mERROR:\033[0m'
          else:
              _msg_header = 'ERROR:'
@@ -554,7 +570,7 @@ class YoutubeDL(object):
              self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
              return None
  
-    def _match_entry(self, info_dict):
+    def _match_entry(self, info_dict, incomplete):
          """ Returns None iff the file should be downloaded """
  
          video_title = info_dict.get('title', info_dict.get('id', 'video'))
@@ -583,9 +599,17 @@ class YoutubeDL(object):
              if max_views is not None and view_count > max_views:
                  return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
          if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
-            return 'Skipping "%s" because it is age restricted' % title
+            return 'Skipping "%s" because it is age restricted' % video_title
          if self.in_download_archive(info_dict):
              return '%s has already been recorded in archive' % video_title
+
+        if not incomplete:
+            match_filter = self.params.get('match_filter')
+            if match_filter is not None:
+                ret = match_filter(info_dict)
+                if ret is not None:
+                    return ret
+
          return None
  
      @staticmethod
@@ -779,7 +803,7 @@ class YoutubeDL(object):
                      'extractor_key': ie_result['extractor_key'],
                  }
  
-                reason = self._match_entry(entry)
+                reason = self._match_entry(entry, incomplete=True)
                  if reason is not None:
                      self.to_screen('[download] ' + reason)
                      continue
@@ -826,26 +850,43 @@ class YoutubeDL(object):
              '!=': operator.ne,
          }
          operator_rex = re.compile(r'''(?x)\s*\[
-            (?P<key>width|height|tbr|abr|vbr|filesize|fps)
+            (?P<key>width|height|tbr|abr|vbr|asr|filesize|fps)
              \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
              (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
              \]$
              ''' % '|'.join(map(re.escape, OPERATORS.keys())))
          m = operator_rex.search(format_spec)
+        if m:
+            try:
+                comparison_value = int(m.group('value'))
+            except ValueError:
+                comparison_value = parse_filesize(m.group('value'))
+                if comparison_value is None:
+                    comparison_value = parse_filesize(m.group('value') + 'B')
+                if comparison_value is None:
+                    raise ValueError(
+                        'Invalid value %r in format specification %r' % (
+                            m.group('value'), format_spec))
+            op = OPERATORS[m.group('op')]
+
          if not m:
-            raise ValueError('Invalid format specification %r' % format_spec)
+            STR_OPERATORS = {
+                '=': operator.eq,
+                '!=': operator.ne,
+            }
+            str_operator_rex = re.compile(r'''(?x)\s*\[
+                \s*(?P<key>ext|acodec|vcodec|container|protocol)
+                \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
+                \s*(?P<value>[a-zA-Z0-9_-]+)
+                \s*\]$
+                ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
+            m = str_operator_rex.search(format_spec)
+            if m:
+                comparison_value = m.group('value')
+                op = STR_OPERATORS[m.group('op')]
  
-        try:
-            comparison_value = int(m.group('value'))
-        except ValueError:
-            comparison_value = parse_filesize(m.group('value'))
-            if comparison_value is None:
-                comparison_value = parse_filesize(m.group('value') + 'B')
-            if comparison_value is None:
-                raise ValueError(
-                    'Invalid value %r in format specification %r' % (
-                        m.group('value'), format_spec))
-        op = OPERATORS[m.group('op')]
+        if not m:
+            raise ValueError('Invalid format specification %r' % format_spec)
  
          def _filter(f):
              actual_value = f.get(m.group('key'))
@@ -920,27 +961,9 @@ class YoutubeDL(object):
          return res
  
      def _calc_cookies(self, info_dict):
-        class _PseudoRequest(object):
-            def __init__(self, url):
-                self.url = url
-                self.headers = {}
-                self.unverifiable = False
-
-            def add_unredirected_header(self, k, v):
-                self.headers[k] = v
-
-            def get_full_url(self):
-                return self.url
-
-            def is_unverifiable(self):
-                return self.unverifiable
-
-            def has_header(self, h):
-                return h in self.headers
-
-        pr = _PseudoRequest(info_dict['url'])
+        pr = compat_urllib_request.Request(info_dict['url'])
          self.cookiejar.add_cookie_header(pr)
-        return pr.headers.get('Cookie')
+        return pr.get_header('Cookie')
  
      def process_video_result(self, info_dict, download=True):
          assert info_dict.get('_type', 'video') == 'video'
@@ -985,6 +1008,15 @@ class YoutubeDL(object):
                  info_dict['timestamp'])
              info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
  
+        if self.params.get('listsubtitles', False):
+            if 'automatic_captions' in info_dict:
+                self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions')
+            self.list_subtitles(info_dict['id'], info_dict.get('subtitles'), 'subtitles')
+            return
+        info_dict['requested_subtitles'] = self.process_subtitles(
+            info_dict['id'], info_dict.get('subtitles'),
+            info_dict.get('automatic_captions'))
+
          # This extractors handle format selection themselves
          if info_dict['extractor'] in ['Youku']:
              if download:
@@ -1113,6 +1145,55 @@ class YoutubeDL(object):
          info_dict.update(formats_to_download[-1])
          return info_dict
  
+    def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
+        """Select the requested subtitles and their format"""
+        available_subs = {}
+        if normal_subtitles and self.params.get('writesubtitles'):
+            available_subs.update(normal_subtitles)
+        if automatic_captions and self.params.get('writeautomaticsub'):
+            for lang, cap_info in automatic_captions.items():
+                if lang not in available_subs:
+                    available_subs[lang] = cap_info
+
+        if (not self.params.get('writesubtitles') and not
+                self.params.get('writeautomaticsub') or not
+                available_subs):
+            return None
+
+        if self.params.get('allsubtitles', False):
+            requested_langs = available_subs.keys()
+        else:
+            if self.params.get('subtitleslangs', False):
+                requested_langs = self.params.get('subtitleslangs')
+            elif 'en' in available_subs:
+                requested_langs = ['en']
+            else:
+                requested_langs = [list(available_subs.keys())[0]]
+
+        formats_query = self.params.get('subtitlesformat', 'best')
+        formats_preference = formats_query.split('/') if formats_query else []
+        subs = {}
+        for lang in requested_langs:
+            formats = available_subs.get(lang)
+            if formats is None:
+                self.report_warning('%s subtitles not available for %s' % (lang, video_id))
+                continue
+            for ext in formats_preference:
+                if ext == 'best':
+                    f = formats[-1]
+                    break
+                matches = list(filter(lambda f: f['ext'] == ext, formats))
+                if matches:
+                    f = matches[-1]
+                    break
+            else:
+                f = formats[-1]
+                self.report_warning(
+                    'No subtitle format found matching "%s" for language %s, '
+                    'using %s' % (formats_query, lang, f['ext']))
+            subs[lang] = f
+        return subs
+
      def process_info(self, info_dict):
          """Process a single resolved IE result."""
  
@@ -1133,7 +1214,7 @@ class YoutubeDL(object):
          if 'format' not in info_dict:
              info_dict['format'] = info_dict['ext']
  
-        reason = self._match_entry(info_dict)
+        reason = self._match_entry(info_dict, incomplete=False)
          if reason is not None:
              self.to_screen('[download] ' + reason)
              return
@@ -1215,15 +1296,23 @@ class YoutubeDL(object):
          subtitles_are_requested = any([self.params.get('writesubtitles', False),
                                         self.params.get('writeautomaticsub')])
  
-        if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
+        if subtitles_are_requested and info_dict.get('requested_subtitles'):
              # subtitles download errors are already managed as troubles in relevant IE
              # that way it will silently go on when used with unsupporting IE
-            subtitles = info_dict['subtitles']
-            sub_format = self.params.get('subtitlesformat', 'srt')
-            for sub_lang in subtitles.keys():
-                sub = subtitles[sub_lang]
-                if sub is None:
-                    continue
+            subtitles = info_dict['requested_subtitles']
+            ie = self.get_info_extractor(info_dict['extractor_key'])
+            for sub_lang, sub_info in subtitles.items():
+                sub_format = sub_info['ext']
+                if sub_info.get('data') is not None:
+                    sub_data = sub_info['data']
+                else:
+                    try:
+                        sub_data = ie._download_webpage(
+                            sub_info['url'], info_dict['id'], note=False)
+                    except ExtractorError as err:
+                        self.report_warning('Unable to download subtitle for "%s": %s' %
+                                            (sub_lang, compat_str(err.cause)))
+                        continue
                  try:
                      sub_filename = subtitles_filename(filename, sub_lang, sub_format)
                      if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
@@ -1231,7 +1320,7 @@ class YoutubeDL(object):
                      else:
                          self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
                          with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
-                            subfile.write(sub)
+                            subfile.write(sub_data)
                  except (OSError, IOError):
                      self.report_error('Cannot write subtitles file ' + sub_filename)
                      return
@@ -1264,7 +1353,7 @@ class YoutubeDL(object):
                      downloaded = []
                      success = True
                      merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
-                    if not merger._executable:
+                    if not merger.available:
                          postprocessors = []
                          self.report_warning('You have requested multiple '
                                              'formats but ffmpeg or avconv are not installed.'
@@ -1343,8 +1432,8 @@ class YoutubeDL(object):
          """Download a given list of URLs."""
          outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
          if (len(url_list) > 1 and
-                '%' not in outtmpl
-                and self.params.get('max_downloads') != 1):
+                '%' not in outtmpl and
+                self.params.get('max_downloads') != 1):
              raise SameFileError(outtmpl)
  
          for url in url_list:
@@ -1511,30 +1600,18 @@ class YoutubeDL(object):
          return res
  
      def list_formats(self, info_dict):
-        def line(format, idlen=20):
-            return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
-                format['format_id'],
-                format['ext'],
-                self.format_resolution(format),
-                self._format_note(format),
-            ))
-
          formats = info_dict.get('formats', [info_dict])
-        idlen = max(len('format code'),
-                    max(len(f['format_id']) for f in formats))
-        formats_s = [
-            line(f, idlen) for f in formats
+        table = [
+            [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
+            for f in formats
              if f.get('preference') is None or f['preference'] >= -1000]
          if len(formats) > 1:
-            formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)'
-            formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
+            table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
  
-        header_line = line({
-            'format_id': 'format code', 'ext': 'extension',
-            'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
+        header_line = ['format code', 'extension', 'resolution', 'note']
          self.to_screen(
-            '[info] Available formats for %s:\n%s\n%s' %
-            (info_dict['id'], header_line, '\n'.join(formats_s)))
+            '[info] Available formats for %s:\n%s' %
+            (info_dict['id'], render_table(header_line, table)))
  
      def list_thumbnails(self, info_dict):
          thumbnails = info_dict.get('thumbnails')
@@ -1553,6 +1630,17 @@ class YoutubeDL(object):
              ['ID', 'width', 'height', 'URL'],
              [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
  
+    def list_subtitles(self, video_id, subtitles, name='subtitles'):
+        if not subtitles:
+            self.to_screen('%s has no %s' % (video_id, name))
+            return
+        self.to_screen(
+            'Available %s for %s:' % (name, video_id))
+        self.to_screen(render_table(
+            ['Language', 'formats'],
+            [[lang, ', '.join(f['ext'] for f in reversed(formats))]
+                for lang, formats in subtitles.items()]))
+
      def urlopen(self, req):
          """ Start an HTTP download """
  
@@ -1614,7 +1702,7 @@ class YoutubeDL(object):
          self._write_string('[debug] Python version %s - %s\n' % (
              platform.python_version(), platform_name()))
  
-        exe_versions = FFmpegPostProcessor.get_versions()
+        exe_versions = FFmpegPostProcessor.get_versions(self)
          exe_versions['rtmpdump'] = rtmpdump_version()
          exe_str = ', '.join(
              '%s %s' % (exe, v)
diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py

index e90679ff974e6464885284733edf96ab4aca09b9..49f382695d478acad6892cbec3079d41eba64685 100644 (file)
--- a/youtube_dl/__init__.py
+++ b/youtube_dl/__init__.py
@@ -23,9 +23,10 @@ from .compat import (
  )
  from .utils import (
      DateRange,
-    DEFAULT_OUTTMPL,
      decodeOption,
+    DEFAULT_OUTTMPL,
      DownloadError,
+    match_filter_func,
      MaxDownloadsReached,
      preferredencoding,
      read_batch_urls,
@@ -169,6 +170,9 @@ def _real_main(argv=None):
      if opts.recodevideo is not None:
          if opts.recodevideo not in ['mp4', 'flv', 'webm', 'ogg', 'mkv']:
              parser.error('invalid video recode format specified')
+    if opts.convertsubtitles is not None:
+        if opts.convertsubtitles not in ['srt', 'vtt', 'ass']:
+            parser.error('invalid subtitle format specified')
  
      if opts.date is not None:
          date = DateRange.day(opts.date)
@@ -188,14 +192,14 @@ def _real_main(argv=None):
          # In Python 2, sys.argv is a bytestring (also note http://bugs.python.org/issue2128 for Windows systems)
          if opts.outtmpl is not None:
              opts.outtmpl = opts.outtmpl.decode(preferredencoding())
-    outtmpl = ((opts.outtmpl is not None and opts.outtmpl)
-               or (opts.format == '-1' and opts.usetitle and '%(title)s-%(id)s-%(format)s.%(ext)s')
-               or (opts.format == '-1' and '%(id)s-%(format)s.%(ext)s')
-               or (opts.usetitle and opts.autonumber and '%(autonumber)s-%(title)s-%(id)s.%(ext)s')
-               or (opts.usetitle and '%(title)s-%(id)s.%(ext)s')
-               or (opts.useid and '%(id)s.%(ext)s')
-               or (opts.autonumber and '%(autonumber)s-%(id)s.%(ext)s')
-               or DEFAULT_OUTTMPL)
+    outtmpl = ((opts.outtmpl is not None and opts.outtmpl) or
+               (opts.format == '-1' and opts.usetitle and '%(title)s-%(id)s-%(format)s.%(ext)s') or
+               (opts.format == '-1' and '%(id)s-%(format)s.%(ext)s') or
+               (opts.usetitle and opts.autonumber and '%(autonumber)s-%(title)s-%(id)s.%(ext)s') or
+               (opts.usetitle and '%(title)s-%(id)s.%(ext)s') or
+               (opts.useid and '%(id)s.%(ext)s') or
+               (opts.autonumber and '%(autonumber)s-%(id)s.%(ext)s') or
+               DEFAULT_OUTTMPL)
      if not os.path.splitext(outtmpl)[1] and opts.extractaudio:
          parser.error('Cannot download a video and extract audio into the same'
                       ' file! Use "{0}.%(ext)s" instead of "{0}" as the output'
@@ -222,10 +226,14 @@ def _real_main(argv=None):
              'key': 'FFmpegVideoConvertor',
              'preferedformat': opts.recodevideo,
          })
+    if opts.convertsubtitles:
+        postprocessors.append({
+            'key': 'FFmpegSubtitlesConvertor',
+            'format': opts.convertsubtitles,
+        })
      if opts.embedsubtitles:
          postprocessors.append({
              'key': 'FFmpegEmbedSubtitle',
-            'subtitlesformat': opts.subtitlesformat,
          })
      if opts.xattrs:
          postprocessors.append({'key': 'XAttrMetadata'})
@@ -247,6 +255,9 @@ def _real_main(argv=None):
              xattr  # Confuse flake8
          except ImportError:
              parser.error('setting filesize xattr requested but python-xattr is not available')
+    match_filter = (
+        None if opts.match_filter is None
+        else match_filter_func(opts.match_filter))
  
      ydl_opts = {
          'usenetrc': opts.usenetrc,
@@ -344,6 +355,10 @@ def _real_main(argv=None):
          'list_thumbnails': opts.list_thumbnails,
          'playlist_items': opts.playlist_items,
          'xattr_set_filesize': opts.xattr_set_filesize,
+        'match_filter': match_filter,
+        'no_color': opts.no_color,
+        'ffmpeg_location': opts.ffmpeg_location,
+        'hls_prefer_native': opts.hls_prefer_native,
      }
  
      with YoutubeDL(ydl_opts) as ydl:
diff --git a/youtube_dl/aes.py b/youtube_dl/aes.py

index 5efd0f836bcf2375b065be008a36e5b8a54d2e69..07224d5084158184ac30c927b1b79802d398c336 100644 (file)
--- a/youtube_dl/aes.py
+++ b/youtube_dl/aes.py
@@ -1,7 +1,5 @@
  from __future__ import unicode_literals
  
-__all__ = ['aes_encrypt', 'key_expansion', 'aes_ctr_decrypt', 'aes_cbc_decrypt', 'aes_decrypt_text']
-
  import base64
  from math import ceil
  
@@ -329,3 +327,5 @@ def inc(data):
              data[i] = data[i] + 1
              break
      return data
+
+__all__ = ['aes_encrypt', 'key_expansion', 'aes_ctr_decrypt', 'aes_cbc_decrypt', 'aes_decrypt_text']
diff --git a/youtube_dl/downloader/__init__.py b/youtube_dl/downloader/__init__.py

index eff1122c5c09eff494ad34af835b06e33c9e4751..9fb66e2f7f680a71c05fdd866c72b0db2dd91a77 100644 (file)
--- a/youtube_dl/downloader/__init__.py
+++ b/youtube_dl/downloader/__init__.py
@@ -34,6 +34,9 @@ def get_suitable_downloader(info_dict, params={}):
          if ed.supports(info_dict):
              return ed
  
+    if protocol == 'm3u8' and params.get('hls_prefer_native'):
+        return NativeHlsFD
+
      return PROTOCOL_MAP.get(protocol, HttpFD)
  
  
diff --git a/youtube_dl/downloader/common.py b/youtube_dl/downloader/common.py

index 7bb3a948d2ebd0eaca46ca72f72dfaa2e7ffd1bd..3ae90021a28e661ab532a2d42a7c4e0826d1f46f 100644 (file)
--- a/youtube_dl/downloader/common.py
+++ b/youtube_dl/downloader/common.py
@@ -1,4 +1,4 @@
-from __future__ import unicode_literals
+from __future__ import division, unicode_literals
  
  import os
  import re
@@ -54,6 +54,7 @@ class FileDownloader(object):
          self.ydl = ydl
          self._progress_hooks = []
          self.params = params
+        self.add_progress_hook(self.report_progress)
  
      @staticmethod
      def format_seconds(seconds):
@@ -226,42 +227,64 @@ class FileDownloader(object):
              self.to_screen(clear_line + fullmsg, skip_eol=not is_last_line)
          self.to_console_title('youtube-dl ' + msg)
  
-    def report_progress(self, percent, data_len_str, speed, eta):
-        """Report download progress."""
-        if self.params.get('noprogress', False):
+    def report_progress(self, s):
+        if s['status'] == 'finished':
+            if self.params.get('noprogress', False):
+                self.to_screen('[download] Download completed')
+            else:
+                s['_total_bytes_str'] = format_bytes(s['total_bytes'])
+                if s.get('elapsed') is not None:
+                    s['_elapsed_str'] = self.format_seconds(s['elapsed'])
+                    msg_template = '100%% of %(_total_bytes_str)s in %(_elapsed_str)s'
+                else:
+                    msg_template = '100%% of %(_total_bytes_str)s'
+                self._report_progress_status(
+                    msg_template % s, is_last_line=True)
+
+        if self.params.get('noprogress'):
              return
-        if eta is not None:
-            eta_str = self.format_eta(eta)
-        else:
-            eta_str = 'Unknown ETA'
-        if percent is not None:
-            percent_str = self.format_percent(percent)
+
+        if s['status'] != 'downloading':
+            return
+
+        if s.get('eta') is not None:
+            s['_eta_str'] = self.format_eta(s['eta'])
          else:
-            percent_str = 'Unknown %'
-        speed_str = self.format_speed(speed)
+            s['_eta_str'] = 'Unknown ETA'
  
-        msg = ('%s of %s at %s ETA %s' %
-               (percent_str, data_len_str, speed_str, eta_str))
-        self._report_progress_status(msg)
+        if s.get('total_bytes') and s.get('downloaded_bytes') is not None:
+            s['_percent_str'] = self.format_percent(100 * s['downloaded_bytes'] / s['total_bytes'])
+        elif s.get('total_bytes_estimate') and s.get('downloaded_bytes') is not None:
+            s['_percent_str'] = self.format_percent(100 * s['downloaded_bytes'] / s['total_bytes_estimate'])
+        else:
+            if s.get('downloaded_bytes') == 0:
+                s['_percent_str'] = self.format_percent(0)
+            else:
+                s['_percent_str'] = 'Unknown %'
  
-    def report_progress_live_stream(self, downloaded_data_len, speed, elapsed):
-        if self.params.get('noprogress', False):
-            return
-        downloaded_str = format_bytes(downloaded_data_len)
-        speed_str = self.format_speed(speed)
-        elapsed_str = FileDownloader.format_seconds(elapsed)
-        msg = '%s at %s (%s)' % (downloaded_str, speed_str, elapsed_str)
-        self._report_progress_status(msg)
-
-    def report_finish(self, data_len_str, tot_time):
-        """Report download finished."""
-        if self.params.get('noprogress', False):
-            self.to_screen('[download] Download completed')
+        if s.get('speed') is not None:
+            s['_speed_str'] = self.format_speed(s['speed'])
+        else:
+            s['_speed_str'] = 'Unknown speed'
+
+        if s.get('total_bytes') is not None:
+            s['_total_bytes_str'] = format_bytes(s['total_bytes'])
+            msg_template = '%(_percent_str)s of %(_total_bytes_str)s at %(_speed_str)s ETA %(_eta_str)s'
+        elif s.get('total_bytes_estimate') is not None:
+            s['_total_bytes_estimate_str'] = format_bytes(s['total_bytes_estimate'])
+            msg_template = '%(_percent_str)s of ~%(_total_bytes_estimate_str)s at %(_speed_str)s ETA %(_eta_str)s'
          else:
-            self._report_progress_status(
-                ('100%% of %s in %s' %
-                 (data_len_str, self.format_seconds(tot_time))),
-                is_last_line=True)
+            if s.get('downloaded_bytes') is not None:
+                s['_downloaded_bytes_str'] = format_bytes(s['downloaded_bytes'])
+                if s.get('elapsed'):
+                    s['_elapsed_str'] = self.format_seconds(s['elapsed'])
+                    msg_template = '%(_downloaded_bytes_str)s at %(_speed_str)s (%(_elapsed_str)s)'
+                else:
+                    msg_template = '%(_downloaded_bytes_str)s at %(_speed_str)s'
+            else:
+                msg_template = '%(_percent_str)s % at %(_speed_str)s ETA %(_eta_str)s'
+
+        self._report_progress_status(msg_template % s)
  
      def report_resuming_byte(self, resume_len):
          """Report attempt to resume at given byte."""
@@ -288,14 +311,14 @@ class FileDownloader(object):
          """
  
          nooverwrites_and_exists = (
-            self.params.get('nooverwrites', False)
-            and os.path.exists(encodeFilename(filename))
+            self.params.get('nooverwrites', False) and
+            os.path.exists(encodeFilename(filename))
          )
  
          continuedl_and_exists = (
-            self.params.get('continuedl', False)
-            and os.path.isfile(encodeFilename(filename))
-            and not self.params.get('nopart', False)
+            self.params.get('continuedl', False) and
+            os.path.isfile(encodeFilename(filename)) and
+            not self.params.get('nopart', False)
          )
  
          # Check file already present
diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py

index ff031d2e04253b775e517347866fe3ee75a666c6..51c41c70462674ee3a07aae6f645c06ae7c88c71 100644 (file)
--- a/youtube_dl/downloader/external.py
+++ b/youtube_dl/downloader/external.py
@@ -75,7 +75,7 @@ class ExternalFD(FileDownloader):
  
  class CurlFD(ExternalFD):
      def _make_cmd(self, tmpfilename, info_dict):
-        cmd = [self.exe, '-o', tmpfilename]
+        cmd = [self.exe, '--location', '-o', tmpfilename]
          for key, val in info_dict['http_headers'].items():
              cmd += ['--header', '%s: %s' % (key, val)]
          cmd += self._source_address('--interface')
diff --git a/youtube_dl/downloader/f4m.py b/youtube_dl/downloader/f4m.py

index 0e7a1c20075499e58b977da4154ce287b144f958..3dc796faaf038c00383089274c0005382977a431 100644 (file)
--- a/youtube_dl/downloader/f4m.py
+++ b/youtube_dl/downloader/f4m.py
@@ -1,4 +1,4 @@
-from __future__ import unicode_literals
+from __future__ import division, unicode_literals
  
  import base64
  import io
@@ -11,11 +11,11 @@ from .common import FileDownloader
  from .http import HttpFD
  from ..compat import (
      compat_urlparse,
+    compat_urllib_error,
  )
  from ..utils import (
      struct_pack,
      struct_unpack,
-    format_bytes,
      encodeFilename,
      sanitize_open,
      xpath_text,
@@ -122,7 +122,8 @@ class FlvReader(io.BytesIO):
  
          self.read_unsigned_int()  # BootstrapinfoVersion
          # Profile,Live,Update,Reserved
-        self.read(1)
+        flags = self.read_unsigned_char()
+        live = flags & 0x20 != 0
          # time scale
          self.read_unsigned_int()
          # CurrentMediaTime
@@ -161,6 +162,7 @@ class FlvReader(io.BytesIO):
          return {
              'segments': segments,
              'fragments': fragments,
+            'live': live,
          }
  
      def read_bootstrap_info(self):
@@ -183,6 +185,10 @@ def build_fragments_list(boot_info):
      for segment, fragments_count in segment_run_table['segment_run']:
          for _ in range(fragments_count):
              res.append((segment, next(fragments_counter)))
+
+    if boot_info['live']:
+        res = res[-2:]
+
      return res
  
  
@@ -247,22 +253,43 @@ class F4mFD(FileDownloader):
              self.report_error('Unsupported DRM')
          return media
  
+    def _get_bootstrap_from_url(self, bootstrap_url):
+        bootstrap = self.ydl.urlopen(bootstrap_url).read()
+        return read_bootstrap_info(bootstrap)
+
+    def _update_live_fragments(self, bootstrap_url, latest_fragment):
+        fragments_list = []
+        retries = 30
+        while (not fragments_list) and (retries > 0):
+            boot_info = self._get_bootstrap_from_url(bootstrap_url)
+            fragments_list = build_fragments_list(boot_info)
+            fragments_list = [f for f in fragments_list if f[1] > latest_fragment]
+            if not fragments_list:
+                # Retry after a while
+                time.sleep(5.0)
+                retries -= 1
+
+        if not fragments_list:
+            self.report_error('Failed to update fragments')
+
+        return fragments_list
+
+    def _parse_bootstrap_node(self, node, base_url):
+        if node.text is None:
+            bootstrap_url = compat_urlparse.urljoin(
+                base_url, node.attrib['url'])
+            boot_info = self._get_bootstrap_from_url(bootstrap_url)
+        else:
+            bootstrap_url = None
+            bootstrap = base64.b64decode(node.text)
+            boot_info = read_bootstrap_info(bootstrap)
+        return (boot_info, bootstrap_url)
+
      def real_download(self, filename, info_dict):
          man_url = info_dict['url']
          requested_bitrate = info_dict.get('tbr')
          self.to_screen('[download] Downloading f4m manifest')
          manifest = self.ydl.urlopen(man_url).read()
-        self.report_destination(filename)
-        http_dl = HttpQuietDownloader(
-            self.ydl,
-            {
-                'continuedl': True,
-                'quiet': True,
-                'noprogress': True,
-                'ratelimit': self.params.get('ratelimit', None),
-                'test': self.params.get('test', False),
-            }
-        )
  
          doc = etree.fromstring(manifest)
          formats = [(int(f.attrib.get('bitrate', -1)), f)
@@ -277,18 +304,13 @@ class F4mFD(FileDownloader):
  
          base_url = compat_urlparse.urljoin(man_url, media.attrib['url'])
          bootstrap_node = doc.find(_add_ns('bootstrapInfo'))
-        if bootstrap_node.text is None:
-            bootstrap_url = compat_urlparse.urljoin(
-                base_url, bootstrap_node.attrib['url'])
-            bootstrap = self.ydl.urlopen(bootstrap_url).read()
-        else:
-            bootstrap = base64.b64decode(bootstrap_node.text)
+        boot_info, bootstrap_url = self._parse_bootstrap_node(bootstrap_node, base_url)
+        live = boot_info['live']
          metadata_node = media.find(_add_ns('metadata'))
          if metadata_node is not None:
              metadata = base64.b64decode(metadata_node.text)
          else:
              metadata = None
-        boot_info = read_bootstrap_info(bootstrap)
  
          fragments_list = build_fragments_list(boot_info)
          if self.params.get('test', False):
@@ -298,64 +320,112 @@ class F4mFD(FileDownloader):
          # For some akamai manifests we'll need to add a query to the fragment url
          akamai_pv = xpath_text(doc, _add_ns('pv-2.0'))
  
+        self.report_destination(filename)
+        http_dl = HttpQuietDownloader(
+            self.ydl,
+            {
+                'continuedl': True,
+                'quiet': True,
+                'noprogress': True,
+                'ratelimit': self.params.get('ratelimit', None),
+                'test': self.params.get('test', False),
+            }
+        )
          tmpfilename = self.temp_name(filename)
          (dest_stream, tmpfilename) = sanitize_open(tmpfilename, 'wb')
+
          write_flv_header(dest_stream)
-        write_metadata_tag(dest_stream, metadata)
+        if not live:
+            write_metadata_tag(dest_stream, metadata)
  
          # This dict stores the download progress, it's updated by the progress
          # hook
          state = {
+            'status': 'downloading',
              'downloaded_bytes': 0,
-            'frag_counter': 0,
+            'frag_index': 0,
+            'frag_count': total_frags,
+            'filename': filename,
+            'tmpfilename': tmpfilename,
          }
          start = time.time()
  
-        def frag_progress_hook(status):
-            frag_total_bytes = status.get('total_bytes', 0)
-            estimated_size = (state['downloaded_bytes'] +
-                              (total_frags - state['frag_counter']) * frag_total_bytes)
-            if status['status'] == 'finished':
+        def frag_progress_hook(s):
+            if s['status'] not in ('downloading', 'finished'):
+                return
+
+            frag_total_bytes = s.get('total_bytes', 0)
+            if s['status'] == 'finished':
                  state['downloaded_bytes'] += frag_total_bytes
-                state['frag_counter'] += 1
-                progress = self.calc_percent(state['frag_counter'], total_frags)
-                byte_counter = state['downloaded_bytes']
+                state['frag_index'] += 1
+
+            estimated_size = (
+                (state['downloaded_bytes'] + frag_total_bytes) /
+                (state['frag_index'] + 1) * total_frags)
+            time_now = time.time()
+            state['total_bytes_estimate'] = estimated_size
+            state['elapsed'] = time_now - start
+
+            if s['status'] == 'finished':
+                progress = self.calc_percent(state['frag_index'], total_frags)
              else:
-                frag_downloaded_bytes = status['downloaded_bytes']
-                byte_counter = state['downloaded_bytes'] + frag_downloaded_bytes
+                frag_downloaded_bytes = s['downloaded_bytes']
                  frag_progress = self.calc_percent(frag_downloaded_bytes,
                                                    frag_total_bytes)
-                progress = self.calc_percent(state['frag_counter'], total_frags)
+                progress = self.calc_percent(state['frag_index'], total_frags)
                  progress += frag_progress / float(total_frags)
  
-            eta = self.calc_eta(start, time.time(), estimated_size, byte_counter)
-            self.report_progress(progress, format_bytes(estimated_size),
-                                 status.get('speed'), eta)
+                state['eta'] = self.calc_eta(
+                    start, time_now, estimated_size, state['downloaded_bytes'] + frag_downloaded_bytes)
+                state['speed'] = s.get('speed')
+            self._hook_progress(state)
+
          http_dl.add_progress_hook(frag_progress_hook)
  
          frags_filenames = []
-        for (seg_i, frag_i) in fragments_list:
+        while fragments_list:
+            seg_i, frag_i = fragments_list.pop(0)
              name = 'Seg%d-Frag%d' % (seg_i, frag_i)
              url = base_url + name
              if akamai_pv:
                  url += '?' + akamai_pv.strip(';')
              frag_filename = '%s-%s' % (tmpfilename, name)
-            success = http_dl.download(frag_filename, {'url': url})
-            if not success:
-                return False
-            with open(frag_filename, 'rb') as down:
-                down_data = down.read()
-                reader = FlvReader(down_data)
-                while True:
-                    _, box_type, box_data = reader.read_box_info()
-                    if box_type == b'mdat':
-                        dest_stream.write(box_data)
-                        break
-            frags_filenames.append(frag_filename)
+            try:
+                success = http_dl.download(frag_filename, {'url': url})
+                if not success:
+                    return False
+                with open(frag_filename, 'rb') as down:
+                    down_data = down.read()
+                    reader = FlvReader(down_data)
+                    while True:
+                        _, box_type, box_data = reader.read_box_info()
+                        if box_type == b'mdat':
+                            dest_stream.write(box_data)
+                            break
+                if live:
+                    os.remove(frag_filename)
+                else:
+                    frags_filenames.append(frag_filename)
+            except (compat_urllib_error.HTTPError, ) as err:
+                if live and (err.code == 404 or err.code == 410):
+                    # We didn't keep up with the live window. Continue
+                    # with the next available fragment.
+                    msg = 'Fragment %d unavailable' % frag_i
+                    self.report_warning(msg)
+                    fragments_list = []
+                else:
+                    raise
+
+            if not fragments_list and live and bootstrap_url:
+                fragments_list = self._update_live_fragments(bootstrap_url, frag_i)
+                total_frags += len(fragments_list)
+                if fragments_list and (fragments_list[0][1] > frag_i + 1):
+                    msg = 'Missed %d fragments' % (fragments_list[0][1] - (frag_i + 1))
+                    self.report_warning(msg)
  
          dest_stream.close()
-        self.report_finish(format_bytes(state['downloaded_bytes']), time.time() - start)
  
+        elapsed = time.time() - start
          self.try_rename(tmpfilename, filename)
          for frag_file in frags_filenames:
              os.remove(frag_file)
@@ -366,6 +436,7 @@ class F4mFD(FileDownloader):
              'total_bytes': fsize,
              'filename': filename,
              'status': 'finished',
+            'elapsed': elapsed,
          })
  
          return True
diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py

index e527ee425365a096b50f541b1c75c82dcb9013fb..8be4f424907e55adfac91af5eb587b62b54b8487 100644 (file)
--- a/youtube_dl/downloader/hls.py
+++ b/youtube_dl/downloader/hls.py
@@ -23,15 +23,14 @@ class HlsFD(FileDownloader):
          tmpfilename = self.temp_name(filename)
  
          ffpp = FFmpegPostProcessor(downloader=self)
-        program = ffpp._executable
-        if program is None:
+        if not ffpp.available:
              self.report_error('m3u8 download detected but ffmpeg or avconv could not be found. Please install one.')
              return False
          ffpp.check_version()
  
          args = [
              encodeArgument(opt)
-            for opt in (program, '-y', '-i', url, '-f', 'mp4', '-c', 'copy', '-bsf:a', 'aac_adtstoasc')]
+            for opt in (ffpp.executable, '-y', '-i', url, '-f', 'mp4', '-c', 'copy', '-bsf:a', 'aac_adtstoasc')]
          args.append(encodeFilename(tmpfilename, True))
  
          retval = subprocess.call(args)
@@ -48,7 +47,7 @@ class HlsFD(FileDownloader):
              return True
          else:
              self.to_stderr('\n')
-            self.report_error('%s exited with code %d' % (program, retval))
+            self.report_error('%s exited with code %d' % (ffpp.basename, retval))
              return False
  
  
diff --git a/youtube_dl/downloader/http.py b/youtube_dl/downloader/http.py

index 49170cf9d47634602efe7832b235e4a751e25817..2e3dac8251dbaf5d8b3e1a90bc459f362d14f72e 100644 (file)
--- a/youtube_dl/downloader/http.py
+++ b/youtube_dl/downloader/http.py
@@ -1,11 +1,10 @@
  from __future__ import unicode_literals
  
+import errno
  import os
+import socket
  import time
  
-from socket import error as SocketError
-import errno
-
  from .common import FileDownloader
  from ..compat import (
      compat_urllib_request,
@@ -15,7 +14,6 @@ from ..utils import (
      ContentTooShortError,
      encodeFilename,
      sanitize_open,
-    format_bytes,
  )
  
  
@@ -102,7 +100,7 @@ class HttpFD(FileDownloader):
                              resume_len = 0
                              open_mode = 'wb'
                              break
-            except SocketError as e:
+            except socket.error as e:
                  if e.errno != errno.ECONNRESET:
                      # Connection reset is no problem, just retry
                      raise
@@ -137,7 +135,6 @@ class HttpFD(FileDownloader):
                  self.to_screen('\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len))
                  return False
  
-        data_len_str = format_bytes(data_len)
          byte_counter = 0 + resume_len
          block_size = self.params.get('buffersize', 1024)
          start = time.time()
@@ -196,20 +193,19 @@ class HttpFD(FileDownloader):
              # Progress message
              speed = self.calc_speed(start, now, byte_counter - resume_len)
              if data_len is None:
-                eta = percent = None
+                eta = None
              else:
-                percent = self.calc_percent(byte_counter, data_len)
                  eta = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
-            self.report_progress(percent, data_len_str, speed, eta)
  
              self._hook_progress({
+                'status': 'downloading',
                  'downloaded_bytes': byte_counter,
                  'total_bytes': data_len,
                  'tmpfilename': tmpfilename,
                  'filename': filename,
-                'status': 'downloading',
                  'eta': eta,
                  'speed': speed,
+                'elapsed': now - start,
              })
  
              if is_test and byte_counter == data_len:
@@ -221,7 +217,13 @@ class HttpFD(FileDownloader):
              return False
          if tmpfilename != '-':
              stream.close()
-        self.report_finish(data_len_str, (time.time() - start))
+
+        self._hook_progress({
+            'downloaded_bytes': byte_counter,
+            'total_bytes': data_len,
+            'tmpfilename': tmpfilename,
+            'status': 'error',
+        })
          if data_len is not None and byte_counter != data_len:
              raise ContentTooShortError(byte_counter, int(data_len))
          self.try_rename(tmpfilename, filename)
@@ -235,6 +237,7 @@ class HttpFD(FileDownloader):
              'total_bytes': byte_counter,
              'filename': filename,
              'status': 'finished',
+            'elapsed': time.time() - start,
          })
  
          return True
diff --git a/youtube_dl/downloader/rtmp.py b/youtube_dl/downloader/rtmp.py

index f7eeb6f43f09670e8ecb6cba1791d49d09ecbf15..89e98ae61e128c80eab5b0e04109b1baa2ecff7e 100644 (file)
--- a/youtube_dl/downloader/rtmp.py
+++ b/youtube_dl/downloader/rtmp.py
@@ -11,7 +11,6 @@ from ..compat import compat_str
  from ..utils import (
      check_executable,
      encodeFilename,
-    format_bytes,
      get_exe_version,
  )
  
@@ -51,23 +50,23 @@ class RtmpFD(FileDownloader):
                      if not resume_percent:
                          resume_percent = percent
                          resume_downloaded_data_len = downloaded_data_len
-                    eta = self.calc_eta(start, time.time(), 100 - resume_percent, percent - resume_percent)
-                    speed = self.calc_speed(start, time.time(), downloaded_data_len - resume_downloaded_data_len)
+                    time_now = time.time()
+                    eta = self.calc_eta(start, time_now, 100 - resume_percent, percent - resume_percent)
+                    speed = self.calc_speed(start, time_now, downloaded_data_len - resume_downloaded_data_len)
                      data_len = None
                      if percent > 0:
                          data_len = int(downloaded_data_len * 100 / percent)
-                    data_len_str = '~' + format_bytes(data_len)
-                    self.report_progress(percent, data_len_str, speed, eta)
-                    cursor_in_new_line = False
                      self._hook_progress({
+                        'status': 'downloading',
                          'downloaded_bytes': downloaded_data_len,
-                        'total_bytes': data_len,
+                        'total_bytes_estimate': data_len,
                          'tmpfilename': tmpfilename,
                          'filename': filename,
-                        'status': 'downloading',
                          'eta': eta,
+                        'elapsed': time_now - start,
                          'speed': speed,
                      })
+                    cursor_in_new_line = False
                  else:
                      # no percent for live streams
                      mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec', line)
@@ -75,15 +74,15 @@ class RtmpFD(FileDownloader):
                          downloaded_data_len = int(float(mobj.group(1)) * 1024)
                          time_now = time.time()
                          speed = self.calc_speed(start, time_now, downloaded_data_len)
-                        self.report_progress_live_stream(downloaded_data_len, speed, time_now - start)
-                        cursor_in_new_line = False
                          self._hook_progress({
                              'downloaded_bytes': downloaded_data_len,
                              'tmpfilename': tmpfilename,
                              'filename': filename,
                              'status': 'downloading',
+                            'elapsed': time_now - start,
                              'speed': speed,
                          })
+                        cursor_in_new_line = False
                      elif self.params.get('verbose', False):
                          if not cursor_in_new_line:
                              self.to_screen('')
@@ -120,7 +119,9 @@ class RtmpFD(FileDownloader):
          # Download using rtmpdump. rtmpdump returns exit code 2 when
          # the connection was interrumpted and resuming appears to be
          # possible. This is part of rtmpdump's normal usage, AFAIK.
-        basic_args = ['rtmpdump', '--verbose', '-r', url, '-o', tmpfilename]
+        basic_args = [
+            'rtmpdump', '--verbose', '-r', url,
+            '-o', encodeFilename(tmpfilename, True)]
          if player_url is not None:
              basic_args += ['--swfVfy', player_url]
          if page_url is not None:
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py

index 047f7002a3e8dafaac1c4368ddbcc94f6550bc94..ffcc7d9ab3c8fa44b99e5660ab179b84ace94429 100644 (file)
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -6,7 +6,9 @@ from .academicearth import AcademicEarthCourseIE
  from .addanime import AddAnimeIE
  from .adobetv import AdobeTVIE
  from .adultswim import AdultSwimIE
+from .aftenposten import AftenpostenIE
  from .aftonbladet import AftonbladetIE
+from .airmozilla import AirMozillaIE
  from .aljazeera import AlJazeeraIE
  from .alphaporno import AlphaPornoIE
  from .anitube import AnitubeIE
@@ -48,14 +50,24 @@ from .brightcove import BrightcoveIE
  from .buzzfeed import BuzzFeedIE
  from .byutv import BYUtvIE
  from .c56 import C56IE
+from .camdemy import (
+    CamdemyIE,
+    CamdemyFolderIE
+)
  from .canal13cl import Canal13clIE
  from .canalplus import CanalplusIE
  from .canalc2 import Canalc2IE
  from .cbs import CBSIE
  from .cbsnews import CBSNewsIE
+from .cbssports import CBSSportsIE
+from .ccc import CCCIE
  from .ceskatelevize import CeskaTelevizeIE
  from .channel9 import Channel9IE
  from .chilloutzone import ChilloutzoneIE
+from .chirbit import (
+    ChirbitIE,
+    ChirbitProfileIE,
+)
  from .cinchcast import CinchcastIE
  from .clipfish import ClipfishIE
  from .cliphunter import CliphunterIE
@@ -73,7 +85,7 @@ from .collegehumor import CollegeHumorIE
  from .collegerama import CollegeRamaIE
  from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE
  from .comcarcoff import ComCarCoffIE
-from .commonmistakes import CommonMistakesIE
+from .commonmistakes import CommonMistakesIE, UnicodeBOMIE
  from .condenast import CondeNastIE
  from .cracked import CrackedIE
  from .criterion import CriterionIE
@@ -115,6 +127,7 @@ from .ellentv import (
      EllenTVClipsIE,
  )
  from .elpais import ElPaisIE
+from .embedly import EmbedlyIE
  from .empflix import EMPFlixIE
  from .engadget import EngadgetIE
  from .eporner import EpornerIE
@@ -183,6 +196,7 @@ from .hellporno import HellPornoIE
  from .helsinki import HelsinkiIE
  from .hentaistigma import HentaiStigmaIE
  from .historicfilms import HistoricFilmsIE
+from .history import HistoryIE
  from .hitbox import HitboxIE, HitboxLiveIE
  from .hornbunny import HornBunnyIE
  from .hostingbulk import HostingBulkIE
@@ -197,6 +211,7 @@ from .imdb import (
      ImdbIE,
      ImdbListIE
  )
+from .imgur import ImgurIE
  from .ina import InaIE
  from .infoq import InfoQIE
  from .instagram import InstagramIE, InstagramUserIE
@@ -212,6 +227,7 @@ from .jeuxvideo import JeuxVideoIE
  from .jove import JoveIE
  from .jukebox import JukeboxIE
  from .jpopsukitv import JpopsukiIE
+from .kaltura import KalturaIE
  from .kankan import KankanIE
  from .karaoketv import KaraoketvIE
  from .keezmovies import KeezMoviesIE
@@ -223,6 +239,11 @@ from .krasview import KrasViewIE
  from .ku6 import Ku6IE
  from .la7 import LA7IE
  from .laola1tv import Laola1TvIE
+from .letv import (
+    LetvIE,
+    LetvTvIE,
+    LetvPlaylistIE
+)
  from .lifenews import LifeNewsIE
  from .liveleak import LiveLeakIE
  from .livestream import (
@@ -275,6 +296,7 @@ from .myspace import MySpaceIE, MySpaceAlbumIE
  from .myspass import MySpassIE
  from .myvideo import MyVideoIE
  from .myvidster import MyVidsterIE
+from .nationalgeographic import NationalGeographicIE
  from .naver import NaverIE
  from .nba import NBAIE
  from .nbc import (
@@ -312,6 +334,8 @@ from .nowvideo import NowVideoIE
  from .npo import (
      NPOIE,
      NPOLiveIE,
+    NPORadioIE,
+    NPORadioFragmentIE,
      TegenlichtVproIE,
  )
  from .nrk import (
@@ -322,6 +346,7 @@ from .ntvde import NTVDeIE
  from .ntvru import NTVRuIE
  from .nytimes import NYTimesIE
  from .nuvid import NuvidIE
+from .odnoklassniki import OdnoklassnikiIE
  from .oktoberfesttv import OktoberfestTVIE
  from .ooyala import OoyalaIE
  from .openfilm import OpenFilmIE
@@ -341,13 +366,18 @@ from .playfm import PlayFMIE
  from .playvid import PlayvidIE
  from .podomatic import PodomaticIE
  from .pornhd import PornHdIE
-from .pornhub import PornHubIE
+from .pornhub import (
+    PornHubIE,
+    PornHubPlaylistIE,
+)
  from .pornotube import PornotubeIE
  from .pornoxo import PornoXOIE
  from .promptfile import PromptFileIE
  from .prosiebensat1 import ProSiebenSat1IE
+from .puls4 import Puls4IE
  from .pyvideo import PyvideoIE
  from .quickvid import QuickVidIE
+from .r7 import R7IE
  from .radiode import RadioDeIE
  from .radiobremen import RadioBremenIE
  from .radiofrance import RadioFranceIE
@@ -362,7 +392,7 @@ from .rottentomatoes import RottenTomatoesIE
  from .roxwel import RoxwelIE
  from .rtbf import RTBFIE
  from .rte import RteIE
-from .rtlnl import RtlXlIE
+from .rtlnl import RtlNlIE
  from .rtlnow import RTLnowIE
  from .rtl2 import RTL2IE
  from .rtp import RTPIE
@@ -377,6 +407,7 @@ from .rutube import (
      RutubePersonIE,
  )
  from .rutv import RUTVIE
+from .sandia import SandiaIE
  from .sapo import SapoIE
  from .savefrom import SaveFromIE
  from .sbs import SBSIE
@@ -407,7 +438,10 @@ from .soundcloud import (
      SoundcloudUserIE,
      SoundcloudPlaylistIE
  )
-from .soundgasm import SoundgasmIE
+from .soundgasm import (
+    SoundgasmIE,
+    SoundgasmProfileIE
+)
  from .southpark import (
      SouthParkIE,
      SouthparkDeIE,
@@ -427,6 +461,7 @@ from .streamcloud import StreamcloudIE
  from .streamcz import StreamCZIE
  from .streetvoice import StreetVoiceIE
  from .sunporno import SunPornoIE
+from .svtplay import SVTPlayIE
  from .swrmediathek import SWRMediathekIE
  from .syfy import SyfyIE
  from .sztvhu import SztvHuIE
@@ -472,6 +507,7 @@ from .tumblr import TumblrIE
  from .tunein import TuneInIE
  from .turbo import TurboIE
  from .tutv import TutvIE
+from .tv4 import TV4IE
  from .tvigle import TvigleIE
  from .tvp import TvpIE, TvpSeriesIE
  from .tvplay import TVPlayIE
@@ -569,6 +605,7 @@ from .yahoo import (
      YahooIE,
      YahooSearchIE,
  )
+from .yam import YamIE
  from .yesjapan import YesJapanIE
  from .ynet import YnetIE
  from .youjizz import YouJizzIE
@@ -592,6 +629,7 @@ from .youtube import (
      YoutubeUserIE,
      YoutubeWatchLaterIE,
  )
+from .zapiks import ZapiksIE
  from .zdf import ZDFIE, ZDFChannelIE
  from .zingmp3 import (
      ZingMp3SongIE,
diff --git a/youtube_dl/extractor/adobetv.py b/youtube_dl/extractor/adobetv.py

index 28e07f8b04ed89fe7c79f445f3454adfb04d0561..97d12856092975a094ec18a5fd7ecafef39c255a 100644 (file)
--- a/youtube_dl/extractor/adobetv.py
+++ b/youtube_dl/extractor/adobetv.py
@@ -28,7 +28,6 @@ class AdobeTVIE(InfoExtractor):
  
      def _real_extract(self, url):
          video_id = self._match_id(url)
-
          webpage = self._download_webpage(url, video_id)
  
          player = self._parse_json(
@@ -44,8 +43,10 @@ class AdobeTVIE(InfoExtractor):
              self._html_search_meta('datepublished', webpage, 'upload date'))
  
          duration = parse_duration(
-            self._html_search_meta('duration', webpage, 'duration')
-            or self._search_regex(r'Runtime:\s*(\d{2}:\d{2}:\d{2})', webpage, 'duration'))
+            self._html_search_meta('duration', webpage, 'duration') or
+            self._search_regex(
+                r'Runtime:\s*(\d{2}:\d{2}:\d{2})',
+                webpage, 'duration', fatal=False))
  
          view_count = str_to_int(self._search_regex(
              r'<div class="views">\s*Views?:\s*([\d,.]+)\s*</div>',
diff --git a/youtube_dl/extractor/adultswim.py b/youtube_dl/extractor/adultswim.py

index 502a9c25ad8fd6ab8fed0a46f2f52077f988aad9..34b8b01157bb930937f6f69c4950d8d01c39ed6e 100644 (file)
--- a/youtube_dl/extractor/adultswim.py
+++ b/youtube_dl/extractor/adultswim.py
@@ -38,6 +38,7 @@ class AdultSwimIE(InfoExtractor):
              },
          ],
          'info_dict': {
+            'id': 'rQxZvXQ4ROaSOqq-or2Mow',
              'title': 'Rick and Morty - Pilot',
              'description': "Rick moves in with his daughter's family and establishes himself as a bad influence on his grandson, Morty. "
          }
@@ -55,6 +56,7 @@ class AdultSwimIE(InfoExtractor):
              }
          ],
          'info_dict': {
+            'id': '-t8CamQlQ2aYZ49ItZCFog',
              'title': 'American Dad - Putting Francine Out of Business',
              'description': 'Stan hatches a plan to get Francine out of the real estate business.Watch more American Dad on [adult swim].'
          },
diff --git a/youtube_dl/extractor/aftenposten.py b/youtube_dl/extractor/aftenposten.py

new file mode 100644 (file)

index 0000000..2b257ed
--- /dev/null
+++ b/youtube_dl/extractor/aftenposten.py
@@ -0,0 +1,103 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    int_or_none,
+    parse_iso8601,
+    xpath_with_ns,
+    xpath_text,
+    find_xpath_attr,
+)
+
+
+class AftenpostenIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?aftenposten\.no/webtv/([^/]+/)*(?P<id>[^/]+)-\d+\.html'
+
+    _TEST = {
+        'url': 'http://www.aftenposten.no/webtv/serier-og-programmer/sweatshopenglish/TRAILER-SWEATSHOP---I-cant-take-any-more-7800835.html?paging=&section=webtv_serierogprogrammer_sweatshop_sweatshopenglish',
+        'md5': 'fd828cd29774a729bf4d4425fe192972',
+        'info_dict': {
+            'id': '21039',
+            'ext': 'mov',
+            'title': 'TRAILER: "Sweatshop" - I can´t take any more',
+            'description': 'md5:21891f2b0dd7ec2f78d84a50e54f8238',
+            'timestamp': 1416927969,
+            'upload_date': '20141125',
+        }
+    }
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, display_id)
+
+        video_id = self._html_search_regex(
+            r'data-xs-id="(\d+)"', webpage, 'video id')
+
+        data = self._download_xml(
+            'http://frontend.xstream.dk/ap/feed/video/?platform=web&id=%s' % video_id, video_id)
+
+        NS_MAP = {
+            'atom': 'http://www.w3.org/2005/Atom',
+            'xt': 'http://xstream.dk/',
+            'media': 'http://search.yahoo.com/mrss/',
+        }
+
+        entry = data.find(xpath_with_ns('./atom:entry', NS_MAP))
+
+        title = xpath_text(
+            entry, xpath_with_ns('./atom:title', NS_MAP), 'title')
+        description = xpath_text(
+            entry, xpath_with_ns('./atom:summary', NS_MAP), 'description')
+        timestamp = parse_iso8601(xpath_text(
+            entry, xpath_with_ns('./atom:published', NS_MAP), 'upload date'))
+
+        formats = []
+        media_group = entry.find(xpath_with_ns('./media:group', NS_MAP))
+        for media_content in media_group.findall(xpath_with_ns('./media:content', NS_MAP)):
+            media_url = media_content.get('url')
+            if not media_url:
+                continue
+            tbr = int_or_none(media_content.get('bitrate'))
+            mobj = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>[^/]+))/(?P<playpath>.+)$', media_url)
+            if mobj:
+                formats.append({
+                    'url': mobj.group('url'),
+                    'play_path': 'mp4:%s' % mobj.group('playpath'),
+                    'app': mobj.group('app'),
+                    'ext': 'flv',
+                    'tbr': tbr,
+                    'format_id': 'rtmp-%d' % tbr,
+                })
+            else:
+                formats.append({
+                    'url': media_url,
+                    'tbr': tbr,
+                })
+        self._sort_formats(formats)
+
+        link = find_xpath_attr(
+            entry, xpath_with_ns('./atom:link', NS_MAP), 'rel', 'original')
+        if link is not None:
+            formats.append({
+                'url': link.get('href'),
+                'format_id': link.get('rel'),
+            })
+
+        thumbnails = [{
+            'url': splash.get('url'),
+            'width': int_or_none(splash.get('width')),
+            'height': int_or_none(splash.get('height')),
+        } for splash in media_group.findall(xpath_with_ns('./xt:splash', NS_MAP))]
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'timestamp': timestamp,
+            'formats': formats,
+            'thumbnails': thumbnails,
+        }
diff --git a/youtube_dl/extractor/airmozilla.py b/youtube_dl/extractor/airmozilla.py

new file mode 100644 (file)

index 0000000..611ad1e
--- /dev/null
+++ b/youtube_dl/extractor/airmozilla.py
@@ -0,0 +1,74 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    int_or_none,
+    parse_duration,
+    parse_iso8601,
+)
+
+
+class AirMozillaIE(InfoExtractor):
+    _VALID_URL = r'https?://air\.mozilla\.org/(?P<id>[0-9a-z-]+)/?'
+    _TEST = {
+        'url': 'https://air.mozilla.org/privacy-lab-a-meetup-for-privacy-minded-people-in-san-francisco/',
+        'md5': '2e3e7486ba5d180e829d453875b9b8bf',
+        'info_dict': {
+            'id': '6x4q2w',
+            'ext': 'mp4',
+            'title': 'Privacy Lab - a meetup for privacy minded people in San Francisco',
+            'thumbnail': 're:https://\w+\.cloudfront\.net/6x4q2w/poster\.jpg\?t=\d+',
+            'description': 'Brings together privacy professionals and others interested in privacy at for-profits, non-profits, and NGOs in an effort to contribute to the state of the ecosystem...',
+            'timestamp': 1422487800,
+            'upload_date': '20150128',
+            'location': 'SFO Commons',
+            'duration': 3780,
+            'view_count': int,
+            'categories': ['Main'],
+        }
+    }
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+        webpage = self._download_webpage(url, display_id)
+        video_id = self._html_search_regex(r'//vid.ly/(.*?)/embed', webpage, 'id')
+
+        embed_script = self._download_webpage('https://vid.ly/{0}/embed'.format(video_id), video_id)
+        jwconfig = self._search_regex(r'\svar jwconfig = (\{.*?\});\s', embed_script, 'metadata')
+        metadata = self._parse_json(jwconfig, video_id)
+
+        formats = [{
+            'url': source['file'],
+            'ext': source['type'],
+            'format_id': self._search_regex(r'&format=(.*)$', source['file'], 'video format'),
+            'format': source['label'],
+            'height': int(source['label'].rstrip('p')),
+        } for source in metadata['playlist'][0]['sources']]
+        self._sort_formats(formats)
+
+        view_count = int_or_none(self._html_search_regex(
+            r'Views since archived: ([0-9]+)',
+            webpage, 'view count', fatal=False))
+        timestamp = parse_iso8601(self._html_search_regex(
+            r'<time datetime="(.*?)"', webpage, 'timestamp', fatal=False))
+        duration = parse_duration(self._search_regex(
+            r'Duration:\s*(\d+\s*hours?\s*\d+\s*minutes?)',
+            webpage, 'duration', fatal=False))
+
+        return {
+            'id': video_id,
+            'title': self._og_search_title(webpage),
+            'formats': formats,
+            'url': self._og_search_url(webpage),
+            'display_id': display_id,
+            'thumbnail': metadata['playlist'][0].get('image'),
+            'description': self._og_search_description(webpage),
+            'timestamp': timestamp,
+            'location': self._html_search_regex(r'Location: (.*)', webpage, 'location', default=None),
+            'duration': duration,
+            'view_count': view_count,
+            'categories': re.findall(r'<a href=".*?" class="channel">(.*?)</a>', webpage),
+        }
diff --git a/youtube_dl/extractor/aparat.py b/youtube_dl/extractor/aparat.py

index 15006336faacb0c7f6ab9c24263726776866dbb6..63429780e8abf528165daf7e50a6317bce9a6c7d 100644 (file)
--- a/youtube_dl/extractor/aparat.py
+++ b/youtube_dl/extractor/aparat.py
@@ -20,6 +20,7 @@ class AparatIE(InfoExtractor):
              'id': 'wP8On',
              'ext': 'mp4',
              'title': 'تیم گلکسی 11 - زومیت',
+            'age_limit': 0,
          },
          # 'skip': 'Extremely unreliable',
      }
@@ -34,7 +35,8 @@ class AparatIE(InfoExtractor):
                       video_id + '/vt/frame')
          webpage = self._download_webpage(embed_url, video_id)
  
-        video_urls = re.findall(r'fileList\[[0-9]+\]\s*=\s*"([^"]+)"', webpage)
+        video_urls = [video_url.replace('\\/', '/') for video_url in re.findall(
+            r'(?:fileList\[[0-9]+\]\s*=|"file"\s*:)\s*"([^"]+)"', webpage)]
          for i, video_url in enumerate(video_urls):
              req = HEADRequest(video_url)
              res = self._request_webpage(
@@ -46,7 +48,7 @@ class AparatIE(InfoExtractor):
  
          title = self._search_regex(r'\s+title:\s*"([^"]+)"', webpage, 'title')
          thumbnail = self._search_regex(
-            r'\s+image:\s*"([^"]+)"', webpage, 'thumbnail', fatal=False)
+            r'image:\s*"([^"]+)"', webpage, 'thumbnail', fatal=False)
  
          return {
              'id': video_id,
@@ -54,4 +56,5 @@ class AparatIE(InfoExtractor):
              'url': video_url,
              'ext': 'mp4',
              'thumbnail': thumbnail,
+            'age_limit': self._family_friendly_search(webpage),
          }
diff --git a/youtube_dl/extractor/appletrailers.py b/youtube_dl/extractor/appletrailers.py

index 287f71e076e91a44ea331c995410fbe8b40d178d..576f03b5b71115771555e1d8d46f4a108eb9de93 100644 (file)
--- a/youtube_dl/extractor/appletrailers.py
+++ b/youtube_dl/extractor/appletrailers.py
@@ -11,9 +11,12 @@ from ..utils import (
  
  
  class AppleTrailersIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/trailers/(?P<company>[^/]+)/(?P<movie>[^/]+)'
-    _TEST = {
+    _VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/(?:trailers|ca)/(?P<company>[^/]+)/(?P<movie>[^/]+)'
+    _TESTS = [{
          "url": "http://trailers.apple.com/trailers/wb/manofsteel/",
+        'info_dict': {
+            'id': 'manofsteel',
+        },
          "playlist": [
              {
                  "md5": "d97a8e575432dbcb81b7c3acb741f8a8",
@@ -60,7 +63,10 @@ class AppleTrailersIE(InfoExtractor):
                  },
              },
          ]
-    }
+    }, {
+        'url': 'http://trailers.apple.com/ca/metropole/autrui/',
+        'only_matching': True,
+    }]
  
      _JSON_RE = r'iTunes.playURL\((.*?)\);'
  
diff --git a/youtube_dl/extractor/atresplayer.py b/youtube_dl/extractor/atresplayer.py

index f016368fa8d0890de874a774b2a4a18db60a01c6..7669e0e3dc643b3bcf8d39663efcf6cba4540b04 100644 (file)
--- a/youtube_dl/extractor/atresplayer.py
+++ b/youtube_dl/extractor/atresplayer.py
@@ -3,7 +3,7 @@ from __future__ import unicode_literals
  import time
  import hmac
  
-from .subtitles import SubtitlesInfoExtractor
+from .common import InfoExtractor
  from ..compat import (
      compat_str,
      compat_urllib_parse,
@@ -17,7 +17,7 @@ from ..utils import (
  )
  
  
-class AtresPlayerIE(SubtitlesInfoExtractor):
+class AtresPlayerIE(InfoExtractor):
      _VALID_URL = r'https?://(?:www\.)?atresplayer\.com/television/[^/]+/[^/]+/[^/]+/(?P<id>.+?)_\d+\.html'
      _TESTS = [
          {
@@ -144,13 +144,12 @@ class AtresPlayerIE(SubtitlesInfoExtractor):
          thumbnail = xpath_text(episode, './media/asset/files/background', 'thumbnail')
  
          subtitles = {}
-        subtitle = xpath_text(episode, './media/asset/files/subtitle', 'subtitle')
-        if subtitle:
-            subtitles['es'] = subtitle
-
-        if self._downloader.params.get('listsubtitles', False):
-            self._list_available_subtitles(video_id, subtitles)
-            return
+        subtitle_url = xpath_text(episode, './media/asset/files/subtitle', 'subtitle')
+        if subtitle_url:
+            subtitles['es'] = [{
+                'ext': 'srt',
+                'url': subtitle_url,
+            }]
  
          return {
              'id': video_id,
@@ -159,5 +158,5 @@ class AtresPlayerIE(SubtitlesInfoExtractor):
              'thumbnail': thumbnail,
              'duration': duration,
              'formats': formats,
-            'subtitles': self.extract_subtitles(video_id, subtitles),
+            'subtitles': subtitles,
          }
diff --git a/youtube_dl/extractor/bambuser.py b/youtube_dl/extractor/bambuser.py

index 98e1443ab0c3d380737f34be2c67fa760e08a221..c193e66cad7275cffb6ee96e051d567b9262e773 100644 (file)
--- a/youtube_dl/extractor/bambuser.py
+++ b/youtube_dl/extractor/bambuser.py
@@ -50,7 +50,7 @@ class BambuserIE(InfoExtractor):
              'duration': int(info['length']),
              'view_count': int(info['views_total']),
              'uploader': info['username'],
-            'uploader_id': info['uid'],
+            'uploader_id': info['owner']['uid'],
          }
  
  
diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py

index aea0263d6b681ba33e66d20cc1a247213299540f..86929496708fccf3bc0febe78cd1e599fda1ab97 100644 (file)
--- a/youtube_dl/extractor/bandcamp.py
+++ b/youtube_dl/extractor/bandcamp.py
@@ -72,26 +72,29 @@ class BandcampIE(InfoExtractor):
  
          download_link = m_download.group(1)
          video_id = self._search_regex(
-            r'var TralbumData = {.*?id: (?P<id>\d+),?$',
-            webpage, 'video id', flags=re.MULTILINE | re.DOTALL)
+            r'(?ms)var TralbumData = {.*?id: (?P<id>\d+),?$',
+            webpage, 'video id')
  
          download_webpage = self._download_webpage(download_link, video_id, 'Downloading free downloads page')
          # We get the dictionary of the track from some javascript code
-        info = re.search(r'items: (.*?),$', download_webpage, re.MULTILINE).group(1)
-        info = json.loads(info)[0]
+        all_info = self._parse_json(self._search_regex(
+            r'(?sm)items: (.*?),$', download_webpage, 'items'), video_id)
+        info = all_info[0]
          # We pick mp3-320 for now, until format selection can be easily implemented.
          mp3_info = info['downloads']['mp3-320']
          # If we try to use this url it says the link has expired
          initial_url = mp3_info['url']
-        re_url = r'(?P<server>http://(.*?)\.bandcamp\.com)/download/track\?enc=mp3-320&fsig=(?P<fsig>.*?)&id=(?P<id>.*?)&ts=(?P<ts>.*)$'
-        m_url = re.match(re_url, initial_url)
+        m_url = re.match(
+            r'(?P<server>http://(.*?)\.bandcamp\.com)/download/track\?enc=mp3-320&fsig=(?P<fsig>.*?)&id=(?P<id>.*?)&ts=(?P<ts>.*)$',
+            initial_url)
          # We build the url we will use to get the final track url
          # This url is build in Bandcamp in the script download_bunde_*.js
          request_url = '%s/statdownload/track?enc=mp3-320&fsig=%s&id=%s&ts=%s&.rand=665028774616&.vrs=1' % (m_url.group('server'), m_url.group('fsig'), video_id, m_url.group('ts'))
          final_url_webpage = self._download_webpage(request_url, video_id, 'Requesting download url')
          # If we could correctly generate the .rand field the url would be
          # in the "download_url" key
-        final_url = re.search(r'"retry_url":"(.*?)"', final_url_webpage).group(1)
+        final_url = self._search_regex(
+            r'"retry_url":"(.*?)"', final_url_webpage, 'final video URL')
  
          return {
              'id': video_id,
@@ -106,7 +109,7 @@ class BandcampIE(InfoExtractor):
  
  class BandcampAlbumIE(InfoExtractor):
      IE_NAME = 'Bandcamp:album'
-    _VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?:/album/(?P<title>[^?#]+)|/?(?:$|[?#]))'
+    _VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?:/album/(?P<album_id>[^?#]+)|/?(?:$|[?#]))'
  
      _TESTS = [{
          'url': 'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1',
@@ -130,31 +133,37 @@ class BandcampAlbumIE(InfoExtractor):
          ],
          'info_dict': {
              'title': 'Jazz Format Mixtape vol.1',
+            'id': 'jazz-format-mixtape-vol-1',
+            'uploader_id': 'blazo',
          },
          'params': {
              'playlistend': 2
          },
-        'skip': 'Bandcamp imposes download limits. See test_playlists:test_bandcamp_album for the playlist test'
+        'skip': 'Bandcamp imposes download limits.'
      }, {
          'url': 'http://nightbringer.bandcamp.com/album/hierophany-of-the-open-grave',
          'info_dict': {
              'title': 'Hierophany of the Open Grave',
+            'uploader_id': 'nightbringer',
+            'id': 'hierophany-of-the-open-grave',
          },
          'playlist_mincount': 9,
      }, {
          'url': 'http://dotscale.bandcamp.com',
          'info_dict': {
              'title': 'Loom',
+            'id': 'dotscale',
+            'uploader_id': 'dotscale',
          },
          'playlist_mincount': 7,
      }]
  
      def _real_extract(self, url):
          mobj = re.match(self._VALID_URL, url)
-        playlist_id = mobj.group('subdomain')
-        title = mobj.group('title')
-        display_id = title or playlist_id
-        webpage = self._download_webpage(url, display_id)
+        uploader_id = mobj.group('subdomain')
+        album_id = mobj.group('album_id')
+        playlist_id = album_id or uploader_id
+        webpage = self._download_webpage(url, playlist_id)
          tracks_paths = re.findall(r'<a href="(.*?)" itemprop="url">', webpage)
          if not tracks_paths:
              raise ExtractorError('The page doesn\'t contain any tracks')
@@ -165,8 +174,8 @@ class BandcampAlbumIE(InfoExtractor):
              r'album_title\s*:\s*"(.*?)"', webpage, 'title', fatal=False)
          return {
              '_type': 'playlist',
+            'uploader_id': uploader_id,
              'id': playlist_id,
-            'display_id': display_id,
              'title': title,
              'entries': entries,
          }
diff --git a/youtube_dl/extractor/bbccouk.py b/youtube_dl/extractor/bbccouk.py

index 126c8824cccedbca287ac3ebfc92d1a5e2d93b57..abc34a5761487b5a900294dac59db4a053b95cb0 100644 (file)
--- a/youtube_dl/extractor/bbccouk.py
+++ b/youtube_dl/extractor/bbccouk.py
@@ -2,12 +2,12 @@ from __future__ import unicode_literals
  
  import xml.etree.ElementTree
  
-from .subtitles import SubtitlesInfoExtractor
+from .common import InfoExtractor
  from ..utils import ExtractorError
  from ..compat import compat_HTTPError
  
  
-class BBCCoUkIE(SubtitlesInfoExtractor):
+class BBCCoUkIE(InfoExtractor):
      IE_NAME = 'bbc.co.uk'
      IE_DESC = 'BBC iPlayer'
      _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:(?:(?:programmes|iplayer(?:/[^/]+)?/(?:episode|playlist))/)|music/clips[/#])(?P<id>[\da-z]{8})'
@@ -215,17 +215,32 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
              formats.extend(conn_formats)
          return formats
  
-    def _extract_captions(self, media, programme_id):
+    def _get_subtitles(self, media, programme_id):
          subtitles = {}
          for connection in self._extract_connections(media):
              captions = self._download_xml(connection.get('href'), programme_id, 'Downloading captions')
              lang = captions.get('{http://www.w3.org/XML/1998/namespace}lang', 'en')
              ps = captions.findall('./{0}body/{0}div/{0}p'.format('{http://www.w3.org/2006/10/ttaf1}'))
              srt = ''
+
+            def _extract_text(p):
+                if p.text is not None:
+                    stripped_text = p.text.strip()
+                    if stripped_text:
+                        return stripped_text
+                return ' '.join(span.text.strip() for span in p.findall('{http://www.w3.org/2006/10/ttaf1}span'))
              for pos, p in enumerate(ps):
-                srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (str(pos), p.get('begin'), p.get('end'),
-                                                          p.text.strip() if p.text is not None else '')
-            subtitles[lang] = srt
+                srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (str(pos), p.get('begin'), p.get('end'), _extract_text(p))
+            subtitles[lang] = [
+                {
+                    'url': connection.get('href'),
+                    'ext': 'ttml',
+                },
+                {
+                    'data': srt,
+                    'ext': 'srt',
+                },
+            ]
          return subtitles
  
      def _download_media_selector(self, programme_id):
@@ -249,7 +264,7 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
              elif kind == 'video':
                  formats.extend(self._extract_video(media, programme_id))
              elif kind == 'captions':
-                subtitles = self._extract_captions(media, programme_id)
+                subtitles = self.extract_subtitles(media, programme_id)
  
          return formats, subtitles
  
@@ -273,7 +288,7 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
                      formats, subtitles = self._download_media_selector(programme_id)
                  return programme_id, title, description, duration, formats, subtitles
          except ExtractorError as ee:
-            if not isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404:
+            if not (isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404):
                  raise
  
          # fallback to legacy playlist
@@ -324,10 +339,6 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
          else:
              programme_id, title, description, duration, formats, subtitles = self._download_playlist(group_id)
  
-        if self._downloader.params.get('listsubtitles', False):
-            self._list_available_subtitles(programme_id, subtitles)
-            return
-
          self._sort_formats(formats)
  
          return {
diff --git a/youtube_dl/extractor/beeg.py b/youtube_dl/extractor/beeg.py

index 4e79fea8f0346d8aca19bc0182fd087a78779809..b38057f2f500f520829ff9c5d7324e66558eb356 100644 (file)
--- a/youtube_dl/extractor/beeg.py
+++ b/youtube_dl/extractor/beeg.py
@@ -9,7 +9,7 @@ class BeegIE(InfoExtractor):
      _VALID_URL = r'https?://(?:www\.)?beeg\.com/(?P<id>\d+)'
      _TEST = {
          'url': 'http://beeg.com/5416503',
-        'md5': '634526ae978711f6b748fe0dd6c11f57',
+        'md5': '1bff67111adb785c51d1b42959ec10e5',
          'info_dict': {
              'id': '5416503',
              'ext': 'mp4',
diff --git a/youtube_dl/extractor/blinkx.py b/youtube_dl/extractor/blinkx.py

index 3e461e715e141b1ff4a294eb01b7657d16f05d4b..3b8eabe8f4e42283eaa8a2288413f971fdcd5b35 100644 (file)
--- a/youtube_dl/extractor/blinkx.py
+++ b/youtube_dl/extractor/blinkx.py
@@ -1,40 +1,35 @@
  from __future__ import unicode_literals
  
  import json
-import re
  
  from .common import InfoExtractor
-from ..utils import remove_start
+from ..utils import (
+    remove_start,
+    int_or_none,
+)
  
  
  class BlinkxIE(InfoExtractor):
-    _VALID_URL = r'^(?:https?://(?:www\.)blinkx\.com/#?ce/|blinkx:)(?P<id>[^?]+)'
+    _VALID_URL = r'(?:https?://(?:www\.)blinkx\.com/#?ce/|blinkx:)(?P<id>[^?]+)'
      IE_NAME = 'blinkx'
  
      _TEST = {
-        'url': 'http://www.blinkx.com/ce/8aQUy7GVFYgFzpKhT0oqsilwOGFRVXk3R1ZGWWdGenBLaFQwb3FzaWx3OGFRVXk3R1ZGWWdGenB',
-        'md5': '2e9a07364af40163a908edbf10bb2492',
+        'url': 'http://www.blinkx.com/ce/Da0Gw3xc5ucpNduzLuDDlv4WC9PuI4fDi1-t6Y3LyfdY2SZS5Urbvn-UPJvrvbo8LTKTc67Wu2rPKSQDJyZeeORCR8bYkhs8lI7eqddznH2ofh5WEEdjYXnoRtj7ByQwt7atMErmXIeYKPsSDuMAAqJDlQZ-3Ff4HJVeH_s3Gh8oQ',
+        'md5': '337cf7a344663ec79bf93a526a2e06c7',
          'info_dict': {
-            'id': '8aQUy7GV',
+            'id': 'Da0Gw3xc',
              'ext': 'mp4',
-            'title': 'Police Car Rolls Away',
-            'uploader': 'stupidvideos.com',
-            'upload_date': '20131215',
-            'timestamp': 1387068000,
-            'description': 'A police car gently rolls away from a fight. Maybe it felt weird being around a confrontation and just had to get out of there!',
-            'duration': 14.886,
-            'thumbnails': [{
-                'width': 100,
-                'height': 76,
-                'resolution': '100x76',
-                'url': 'http://cdn.blinkx.com/stream/b/41/StupidVideos/20131215/1873969261/1873969261_tn_0.jpg',
-            }],
+            'title': 'No Daily Show for John Oliver; HBO Show Renewed - IGN News',
+            'uploader': 'IGN News',
+            'upload_date': '20150217',
+            'timestamp': 1424215740,
+            'description': 'HBO has renewed Last Week Tonight With John Oliver for two more seasons.',
+            'duration': 47.743333,
          },
      }
  
-    def _real_extract(self, rl):
-        m = re.match(self._VALID_URL, rl)
-        video_id = m.group('id')
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
          display_id = video_id[:8]
  
          api_url = ('https://apib4.blinkx.com/api.php?action=play_video&' +
@@ -60,18 +55,20 @@ class BlinkxIE(InfoExtractor):
              elif m['type'] in ('flv', 'mp4'):
                  vcodec = remove_start(m['vcodec'], 'ff')
                  acodec = remove_start(m['acodec'], 'ff')
-                tbr = (int(m['vbr']) + int(m['abr'])) // 1000
+                vbr = int_or_none(m.get('vbr') or m.get('vbitrate'), 1000)
+                abr = int_or_none(m.get('abr') or m.get('abitrate'), 1000)
+                tbr = vbr + abr if vbr and abr else None
                  format_id = '%s-%sk-%s' % (vcodec, tbr, m['w'])
                  formats.append({
                      'format_id': format_id,
                      'url': m['link'],
                      'vcodec': vcodec,
                      'acodec': acodec,
-                    'abr': int(m['abr']) // 1000,
-                    'vbr': int(m['vbr']) // 1000,
+                    'abr': abr,
+                    'vbr': vbr,
                      'tbr': tbr,
-                    'width': int(m['w']),
-                    'height': int(m['h']),
+                    'width': int_or_none(m.get('w')),
+                    'height': int_or_none(m.get('h')),
                  })
  
          self._sort_formats(formats)
diff --git a/youtube_dl/extractor/bliptv.py b/youtube_dl/extractor/bliptv.py

index 436cc515563a07d853ced1b8373461a752ed6038..8c7ba4b910bcc78e5e3fa02d7168a9e4f443bf65 100644 (file)
--- a/youtube_dl/extractor/bliptv.py
+++ b/youtube_dl/extractor/bliptv.py
@@ -3,7 +3,6 @@ from __future__ import unicode_literals
  import re
  
  from .common import InfoExtractor
-from .subtitles import SubtitlesInfoExtractor
  
  from ..compat import (
      compat_str,
@@ -18,7 +17,7 @@ from ..utils import (
  )
  
  
-class BlipTVIE(SubtitlesInfoExtractor):
+class BlipTVIE(InfoExtractor):
      _VALID_URL = r'https?://(?:\w+\.)?blip\.tv/(?:(?:.+-|rss/flash/)(?P<id>\d+)|((?:play/|api\.swf#)(?P<lookup_id>[\da-zA-Z+_]+)))'
  
      _TESTS = [
@@ -143,7 +142,7 @@ class BlipTVIE(SubtitlesInfoExtractor):
          categories = [category.text for category in item.findall('category')]
  
          formats = []
-        subtitles = {}
+        subtitles_urls = {}
  
          media_group = item.find(media('group'))
          for media_content in media_group.findall(media('content')):
@@ -161,7 +160,7 @@ class BlipTVIE(SubtitlesInfoExtractor):
                  }
                  lang = role.rpartition('-')[-1].strip().lower()
                  langcode = LANGS.get(lang, lang)
-                subtitles[langcode] = url
+                subtitles_urls[langcode] = url
              elif media_type.startswith('video/'):
                  formats.append({
                      'url': real_url,
@@ -175,11 +174,7 @@ class BlipTVIE(SubtitlesInfoExtractor):
                  })
          self._sort_formats(formats)
  
-        # subtitles
-        video_subtitles = self.extract_subtitles(video_id, subtitles)
-        if self._downloader.params.get('listsubtitles', False):
-            self._list_available_subtitles(video_id, subtitles)
-            return
+        subtitles = self.extract_subtitles(video_id, subtitles_urls)
  
          return {
              'id': video_id,
@@ -192,15 +187,22 @@ class BlipTVIE(SubtitlesInfoExtractor):
              'thumbnail': thumbnail,
              'categories': categories,
              'formats': formats,
-            'subtitles': video_subtitles,
+            'subtitles': subtitles,
          }
  
-    def _download_subtitle_url(self, sub_lang, url):
-        # For some weird reason, blip.tv serves a video instead of subtitles
-        # when we request with a common UA
-        req = compat_urllib_request.Request(url)
-        req.add_header('User-Agent', 'youtube-dl')
-        return self._download_webpage(req, None, note=False)
+    def _get_subtitles(self, video_id, subtitles_urls):
+        subtitles = {}
+        for lang, url in subtitles_urls.items():
+            # For some weird reason, blip.tv serves a video instead of subtitles
+            # when we request with a common UA
+            req = compat_urllib_request.Request(url)
+            req.add_header('User-Agent', 'youtube-dl')
+            subtitles[lang] = [{
+                # The extension is 'srt' but it's actually an 'ass' file
+                'ext': 'ass',
+                'data': self._download_webpage(req, None, note=False),
+            }]
+        return subtitles
  
  
  class BlipTVUserIE(InfoExtractor):
diff --git a/youtube_dl/extractor/bloomberg.py b/youtube_dl/extractor/bloomberg.py

index c51a97ce4327cff934216927948587131dedfa80..4a88ccd13caf604f3ea892c6784d603434fb06ee 100644 (file)
--- a/youtube_dl/extractor/bloomberg.py
+++ b/youtube_dl/extractor/bloomberg.py
@@ -6,7 +6,7 @@ from .common import InfoExtractor
  
  
  class BloombergIE(InfoExtractor):
-    _VALID_URL = r'https?://www\.bloomberg\.com/video/(?P<name>.+?)\.html'
+    _VALID_URL = r'https?://www\.bloomberg\.com/video/(?P<id>.+?)\.html'
  
      _TEST = {
          'url': 'http://www.bloomberg.com/video/shah-s-presentation-on-foreign-exchange-strategies-qurhIVlJSB6hzkVi229d8g.html',
@@ -20,9 +20,9 @@ class BloombergIE(InfoExtractor):
      }
  
      def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        name = mobj.group('name')
+        name = self._match_id(url)
          webpage = self._download_webpage(url, name)
+
          f4m_url = self._search_regex(
              r'<source src="(https?://[^"]+\.f4m.*?)"', webpage,
              'f4m url')
diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py

index ea0969d4d259a99653bebbcabcebb0e1f87719f3..0733bece7c45880ab5c20b916d5bd8c9700da548 100644 (file)
--- a/youtube_dl/extractor/brightcove.py
+++ b/youtube_dl/extractor/brightcove.py
@@ -95,6 +95,7 @@ class BrightcoveIE(InfoExtractor):
              'url': 'http://c.brightcove.com/services/viewer/htmlFederated?playerID=3550052898001&playerKey=AQ%7E%7E%2CAAABmA9XpXk%7E%2C-Kp7jNgisre1fG5OdqpAFUTcs0lP_ZoL',
              'info_dict': {
                  'title': 'Sealife',
+                'id': '3550319591001',
              },
              'playlist_mincount': 7,
          },
@@ -247,7 +248,7 @@ class BrightcoveIE(InfoExtractor):
          playlist_info = json_data['videoList']
          videos = [self._extract_video_info(video_info) for video_info in playlist_info['mediaCollectionDTO']['videoDTOs']]
  
-        return self.playlist_result(videos, playlist_id=playlist_info['id'],
+        return self.playlist_result(videos, playlist_id='%s' % playlist_info['id'],
                                      playlist_title=playlist_info['mediaCollectionDTO']['displayName'])
  
      def _extract_video_info(self, video_info):
diff --git a/youtube_dl/extractor/buzzfeed.py b/youtube_dl/extractor/buzzfeed.py

index a5d2af1749f188a086e5384b1de6a2441e624902..df503ecc0f50283f0cc77a867353912a47eee5dd 100644 (file)
--- a/youtube_dl/extractor/buzzfeed.py
+++ b/youtube_dl/extractor/buzzfeed.py
@@ -33,6 +33,7 @@ class BuzzFeedIE(InfoExtractor):
              'skip_download': True,  # Got enough YouTube download tests
          },
          'info_dict': {
+            'id': 'look-at-this-cute-dog-omg',
              'description': 're:Munchkin the Teddy Bear is back ?!',
              'title': 'You Need To Stop What You\'re Doing And Watching This Dog Walk On A Treadmill',
          },
@@ -42,8 +43,8 @@ class BuzzFeedIE(InfoExtractor):
                  'ext': 'mp4',
                  'upload_date': '20141124',
                  'uploader_id': 'CindysMunchkin',
-                'description': 're:© 2014 Munchkin the Shih Tzu',
-                'uploader': 'Munchkin the Shih Tzu',
+                'description': 're:© 2014 Munchkin the',
+                'uploader': 're:^Munchkin the',
                  'title': 're:Munchkin the Teddy Bear gets her exercise',
              },
          }]
diff --git a/youtube_dl/extractor/camdemy.py b/youtube_dl/extractor/camdemy.py

new file mode 100644 (file)

index 0000000..897f3a1
--- /dev/null
+++ b/youtube_dl/extractor/camdemy.py
@@ -0,0 +1,153 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import datetime
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+    compat_urllib_parse,
+    compat_urlparse,
+)
+from ..utils import (
+    parse_iso8601,
+    str_to_int,
+)
+
+
+class CamdemyIE(InfoExtractor):
+    _VALID_URL = r'http://(?:www\.)?camdemy\.com/media/(?P<id>\d+)'
+    _TESTS = [{
+        # single file
+        'url': 'http://www.camdemy.com/media/5181/',
+        'md5': '5a5562b6a98b37873119102e052e311b',
+        'info_dict': {
+            'id': '5181',
+            'ext': 'mp4',
+            'title': 'Ch1-1 Introduction, Signals (02-23-2012)',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'description': '',
+            'creator': 'ss11spring',
+            'upload_date': '20130114',
+            'timestamp': 1358154556,
+            'view_count': int,
+        }
+    }, {
+        # With non-empty description
+        'url': 'http://www.camdemy.com/media/13885',
+        'md5': '4576a3bb2581f86c61044822adbd1249',
+        'info_dict': {
+            'id': '13885',
+            'ext': 'mp4',
+            'title': 'EverCam + Camdemy QuickStart',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'description': 'md5:050b62f71ed62928f8a35f1a41e186c9',
+            'creator': 'evercam',
+            'upload_date': '20140620',
+            'timestamp': 1403271569,
+        }
+    }, {
+        # External source
+        'url': 'http://www.camdemy.com/media/14842',
+        'md5': '50e1c3c3aa233d3d7b7daa2fa10b1cf7',
+        'info_dict': {
+            'id': '2vsYQzNIsJo',
+            'ext': 'mp4',
+            'upload_date': '20130211',
+            'uploader': 'Hun Kim',
+            'description': 'Excel 2013 Tutorial for Beginners - How to add Password Protection',
+            'uploader_id': 'hunkimtutorials',
+            'title': 'Excel 2013 Tutorial - How to add Password Protection',
+        }
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        page = self._download_webpage(url, video_id)
+
+        src_from = self._html_search_regex(
+            r"<div class='srcFrom'>Source: <a title='([^']+)'", page,
+            'external source', default=None)
+        if src_from:
+            return self.url_result(src_from)
+
+        oembed_obj = self._download_json(
+            'http://www.camdemy.com/oembed/?format=json&url=' + url, video_id)
+
+        thumb_url = oembed_obj['thumbnail_url']
+        video_folder = compat_urlparse.urljoin(thumb_url, 'video/')
+        file_list_doc = self._download_xml(
+            compat_urlparse.urljoin(video_folder, 'fileList.xml'),
+            video_id, 'Filelist XML')
+        file_name = file_list_doc.find('./video/item/fileName').text
+        video_url = compat_urlparse.urljoin(video_folder, file_name)
+
+        timestamp = parse_iso8601(self._html_search_regex(
+            r"<div class='title'>Posted\s*:</div>\s*<div class='value'>([^<>]+)<",
+            page, 'creation time', fatal=False),
+            delimiter=' ', timezone=datetime.timedelta(hours=8))
+        view_count = str_to_int(self._html_search_regex(
+            r"<div class='title'>Views\s*:</div>\s*<div class='value'>([^<>]+)<",
+            page, 'view count', fatal=False))
+
+        return {
+            'id': video_id,
+            'url': video_url,
+            'title': oembed_obj['title'],
+            'thumbnail': thumb_url,
+            'description': self._html_search_meta('description', page),
+            'creator': oembed_obj['author_name'],
+            'duration': oembed_obj['duration'],
+            'timestamp': timestamp,
+            'view_count': view_count,
+        }
+
+
+class CamdemyFolderIE(InfoExtractor):
+    _VALID_URL = r'http://www.camdemy.com/folder/(?P<id>\d+)'
+    _TESTS = [{
+        # links with trailing slash
+        'url': 'http://www.camdemy.com/folder/450',
+        'info_dict': {
+            'id': '450',
+            'title': '信號與系統 2012 & 2011 (Signals and Systems)',
+        },
+        'playlist_mincount': 145
+    }, {
+        # links without trailing slash
+        # and multi-page
+        'url': 'http://www.camdemy.com/folder/853',
+        'info_dict': {
+            'id': '853',
+            'title': '科學計算 - 使用 Matlab'
+        },
+        'playlist_mincount': 20
+    }, {
+        # with displayMode parameter. For testing the codes to add parameters
+        'url': 'http://www.camdemy.com/folder/853/?displayMode=defaultOrderByOrg',
+        'info_dict': {
+            'id': '853',
+            'title': '科學計算 - 使用 Matlab'
+        },
+        'playlist_mincount': 20
+    }]
+
+    def _real_extract(self, url):
+        folder_id = self._match_id(url)
+
+        # Add displayMode=list so that all links are displayed in a single page
+        parsed_url = list(compat_urlparse.urlparse(url))
+        query = dict(compat_urlparse.parse_qsl(parsed_url[4]))
+        query.update({'displayMode': 'list'})
+        parsed_url[4] = compat_urllib_parse.urlencode(query)
+        final_url = compat_urlparse.urlunparse(parsed_url)
+
+        page = self._download_webpage(final_url, folder_id)
+        matches = re.findall(r"href='(/media/\d+/?)'", page)
+
+        entries = [self.url_result('http://www.camdemy.com' + media_path)
+                   for media_path in matches]
+
+        folder_title = self._html_search_meta('keywords', page)
+
+        return self.playlist_result(entries, folder_id, folder_title)
diff --git a/youtube_dl/extractor/canalplus.py b/youtube_dl/extractor/canalplus.py

index 11d18d74ace31a513a7c06be40baf6ce89c858b3..1b14471e57198c2a04833089c174c0c6c3108ab8 100644 (file)
--- a/youtube_dl/extractor/canalplus.py
+++ b/youtube_dl/extractor/canalplus.py
@@ -15,12 +15,13 @@ from ..utils import (
  
  class CanalplusIE(InfoExtractor):
      IE_DESC = 'canalplus.fr, piwiplus.fr and d8.tv'
-    _VALID_URL = r'https?://(?:www\.(?P<site>canalplus\.fr|piwiplus\.fr|d8\.tv)/.*?/(?P<path>.*)|player\.canalplus\.fr/#/(?P<id>[0-9]+))'
+    _VALID_URL = r'https?://(?:www\.(?P<site>canalplus\.fr|piwiplus\.fr|d8\.tv|itele\.fr)/.*?/(?P<path>.*)|player\.canalplus\.fr/#/(?P<id>[0-9]+))'
      _VIDEO_INFO_TEMPLATE = 'http://service.canal-plus.com/video/rest/getVideosLiees/%s/%s'
      _SITE_ID_MAP = {
          'canalplus.fr': 'cplus',
          'piwiplus.fr': 'teletoon',
          'd8.tv': 'd8',
+        'itele.fr': 'itele',
      }
  
      _TESTS = [{
@@ -53,6 +54,16 @@ class CanalplusIE(InfoExtractor):
              'upload_date': '20131108',
          },
          'skip': 'videos get deleted after a while',
+    }, {
+        'url': 'http://www.itele.fr/france/video/aubervilliers-un-lycee-en-colere-111559',
+        'md5': '65aa83ad62fe107ce29e564bb8712580',
+        'info_dict': {
+            'id': '1213714',
+            'ext': 'flv',
+            'title': 'Aubervilliers : un lycée en colère - Le 11/02/2015 à 06h45',
+            'description': 'md5:8216206ec53426ea6321321f3b3c16db',
+            'upload_date': '20150211',
+        },
      }]
  
      def _real_extract(self, url):
diff --git a/youtube_dl/extractor/cbs.py b/youtube_dl/extractor/cbs.py

index e43756ec69b1d7f1872e45cc6901b41752ec6ef6..1ceb9d8d9df6c0268e33de5e34c01a245e134e05 100644 (file)
--- a/youtube_dl/extractor/cbs.py
+++ b/youtube_dl/extractor/cbs.py
@@ -1,7 +1,5 @@
  from __future__ import unicode_literals
  
-import re
-
  from .common import InfoExtractor
  
  
@@ -39,8 +37,7 @@ class CBSIE(InfoExtractor):
      }]
  
      def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)
          webpage = self._download_webpage(url, video_id)
          real_id = self._search_regex(
              r"video\.settings\.pid\s*=\s*'([^']+)';",
diff --git a/youtube_dl/extractor/cbssports.py b/youtube_dl/extractor/cbssports.py

new file mode 100644 (file)

index 0000000..ae47e74
--- /dev/null
+++ b/youtube_dl/extractor/cbssports.py
@@ -0,0 +1,30 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+
+class CBSSportsIE(InfoExtractor):
+    _VALID_URL = r'http://www\.cbssports\.com/video/player/(?P<section>[^/]+)/(?P<id>[^/]+)'
+
+    _TEST = {
+        'url': 'http://www.cbssports.com/video/player/tennis/318462531970/0/us-open-flashbacks-1990s',
+        'info_dict': {
+            'id': '_d5_GbO8p1sT',
+            'ext': 'flv',
+            'title': 'US Open flashbacks: 1990s',
+            'description': 'Bill Macatee relives the best moments in US Open history from the 1990s.',
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        section = mobj.group('section')
+        video_id = mobj.group('id')
+        all_videos = self._download_json(
+            'http://www.cbssports.com/data/video/player/getVideos/%s?as=json' % section,
+            video_id)
+        # The json file contains the info of all the videos in the section
+        video_info = next(v for v in all_videos if v['pcid'] == video_id)
+        return self.url_result('theplatform:%s' % video_info['pid'], 'ThePlatform')
diff --git a/youtube_dl/extractor/ccc.py b/youtube_dl/extractor/ccc.py

new file mode 100644 (file)

index 0000000..2a5d4be
--- /dev/null
+++ b/youtube_dl/extractor/ccc.py
@@ -0,0 +1,99 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    int_or_none,
+    qualities,
+    unified_strdate,
+)
+
+
+class CCCIE(InfoExtractor):
+    IE_NAME = 'media.ccc.de'
+    _VALID_URL = r'https?://(?:www\.)?media\.ccc\.de/[^?#]+/[^?#/]*?_(?P<id>[0-9]{8,})._[^?#/]*\.html'
+
+    _TEST = {
+        'url': 'http://media.ccc.de/browse/congress/2013/30C3_-_5443_-_en_-_saal_g_-_201312281830_-_introduction_to_processor_design_-_byterazor.html#video',
+        'md5': '205a365d0d57c0b1e43a12c9ffe8f9be',
+        'info_dict': {
+            'id': '20131228183',
+            'ext': 'mp4',
+            'title': 'Introduction to Processor Design',
+            'description': 'md5:5ddbf8c734800267f2cee4eab187bc1b',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'view_count': int,
+            'upload_date': '20131229',
+        }
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+
+        if self._downloader.params.get('prefer_free_formats'):
+            preference = qualities(['mp3', 'opus', 'mp4-lq', 'webm-lq', 'h264-sd', 'mp4-sd', 'webm-sd', 'mp4', 'webm', 'mp4-hd', 'h264-hd', 'webm-hd'])
+        else:
+            preference = qualities(['opus', 'mp3', 'webm-lq', 'mp4-lq', 'webm-sd', 'h264-sd', 'mp4-sd', 'webm', 'mp4', 'webm-hd', 'mp4-hd', 'h264-hd'])
+
+        title = self._html_search_regex(
+            r'(?s)<h1>(.*?)</h1>', webpage, 'title')
+        description = self._html_search_regex(
+            r"(?s)<p class='description'>(.*?)</p>",
+            webpage, 'description', fatal=False)
+        upload_date = unified_strdate(self._html_search_regex(
+            r"(?s)<span class='[^']*fa-calendar-o'></span>(.*?)</li>",
+            webpage, 'upload date', fatal=False))
+        view_count = int_or_none(self._html_search_regex(
+            r"(?s)<span class='[^']*fa-eye'></span>(.*?)</li>",
+            webpage, 'view count', fatal=False))
+
+        matches = re.finditer(r'''(?xs)
+            <(?:span|div)\s+class='label\s+filetype'>(?P<format>.*?)</(?:span|div)>\s*
+            <a\s+href='(?P<http_url>[^']+)'>\s*
+            (?:
+                .*?
+                <a\s+href='(?P<torrent_url>[^']+\.torrent)'
+            )?''', webpage)
+        formats = []
+        for m in matches:
+            format = m.group('format')
+            format_id = self._search_regex(
+                r'.*/([a-z0-9_-]+)/[^/]*$',
+                m.group('http_url'), 'format id', default=None)
+            vcodec = 'h264' if 'h264' in format_id else (
+                'none' if format_id in ('mp3', 'opus') else None
+            )
+            formats.append({
+                'format_id': format_id,
+                'format': format,
+                'url': m.group('http_url'),
+                'vcodec': vcodec,
+                'preference': preference(format_id),
+            })
+
+            if m.group('torrent_url'):
+                formats.append({
+                    'format_id': 'torrent-%s' % (format if format_id is None else format_id),
+                    'format': '%s (torrent)' % format,
+                    'proto': 'torrent',
+                    'format_note': '(unsupported; will just download the .torrent file)',
+                    'vcodec': vcodec,
+                    'preference': -100 + preference(format_id),
+                    'url': m.group('torrent_url'),
+                })
+        self._sort_formats(formats)
+
+        thumbnail = self._html_search_regex(
+            r"<video.*?poster='([^']+)'", webpage, 'thumbnail', fatal=False)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'view_count': view_count,
+            'upload_date': upload_date,
+            'formats': formats,
+        }
diff --git a/youtube_dl/extractor/ceskatelevize.py b/youtube_dl/extractor/ceskatelevize.py

index f70e090bb5b01942713149493e48bc0e51f7f74b..65f6be62313dfc623cf1f9aa7adc52282872aade 100644 (file)
--- a/youtube_dl/extractor/ceskatelevize.py
+++ b/youtube_dl/extractor/ceskatelevize.py
@@ -3,7 +3,7 @@ from __future__ import unicode_literals
  
  import re
  
-from .subtitles import SubtitlesInfoExtractor
+from .common import InfoExtractor
  from ..compat import (
      compat_urllib_request,
      compat_urllib_parse,
@@ -15,7 +15,7 @@ from ..utils import (
  )
  
  
-class CeskaTelevizeIE(SubtitlesInfoExtractor):
+class CeskaTelevizeIE(InfoExtractor):
      _VALID_URL = r'https?://www\.ceskatelevize\.cz/(porady|ivysilani)/(.+/)?(?P<id>[^?#]+)'
  
      _TESTS = [
@@ -107,13 +107,7 @@ class CeskaTelevizeIE(SubtitlesInfoExtractor):
          subtitles = {}
          subs = item.get('subtitles')
          if subs:
-            subtitles['cs'] = subs[0]['url']
-
-        if self._downloader.params.get('listsubtitles', False):
-            self._list_available_subtitles(video_id, subtitles)
-            return
-
-        subtitles = self._fix_subtitles(self.extract_subtitles(video_id, subtitles))
+            subtitles = self.extract_subtitles(episode_id, subs)
  
          return {
              'id': episode_id,
@@ -125,11 +119,20 @@ class CeskaTelevizeIE(SubtitlesInfoExtractor):
              'subtitles': subtitles,
          }
  
+    def _get_subtitles(self, episode_id, subs):
+        original_subtitles = self._download_webpage(
+            subs[0]['url'], episode_id, 'Downloading subtitles')
+        srt_subs = self._fix_subtitles(original_subtitles)
+        return {
+            'cs': [{
+                'ext': 'srt',
+                'data': srt_subs,
+            }]
+        }
+
      @staticmethod
      def _fix_subtitles(subtitles):
          """ Convert millisecond-based subtitles to SRT """
-        if subtitles is None:
-            return subtitles  # subtitles not requested
  
          def _msectotimecode(msec):
              """ Helper utility to convert milliseconds to timecode """
@@ -149,7 +152,4 @@ class CeskaTelevizeIE(SubtitlesInfoExtractor):
                  else:
                      yield line
  
-        fixed_subtitles = {}
-        for k, v in subtitles.items():
-            fixed_subtitles[k] = "\r\n".join(_fix_subtitle(v))
-        return fixed_subtitles
+        return "\r\n".join(_fix_subtitle(subtitles))
diff --git a/youtube_dl/extractor/chirbit.py b/youtube_dl/extractor/chirbit.py

new file mode 100644 (file)

index 0000000..b1eeaf1
--- /dev/null
+++ b/youtube_dl/extractor/chirbit.py
@@ -0,0 +1,84 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+    parse_duration,
+    int_or_none,
+)
+
+
+class ChirbitIE(InfoExtractor):
+    IE_NAME = 'chirbit'
+    _VALID_URL = r'https?://(?:www\.)?chirb\.it/(?:(?:wp|pl)/|fb_chirbit_player\.swf\?key=)?(?P<id>[\da-zA-Z]+)'
+    _TESTS = [{
+        'url': 'http://chirb.it/PrIPv5',
+        'md5': '9847b0dad6ac3e074568bf2cfb197de8',
+        'info_dict': {
+            'id': 'PrIPv5',
+            'ext': 'mp3',
+            'title': 'Фасадстрой',
+            'duration': 52,
+            'view_count': int,
+            'comment_count': int,
+        }
+    }, {
+        'url': 'https://chirb.it/fb_chirbit_player.swf?key=PrIPv5',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        audio_id = self._match_id(url)
+
+        webpage = self._download_webpage(
+            'http://chirb.it/%s' % audio_id, audio_id)
+
+        audio_url = self._search_regex(
+            r'"setFile"\s*,\s*"([^"]+)"', webpage, 'audio url')
+
+        title = self._search_regex(
+            r'itemprop="name">([^<]+)', webpage, 'title')
+        duration = parse_duration(self._html_search_meta(
+            'duration', webpage, 'duration', fatal=False))
+        view_count = int_or_none(self._search_regex(
+            r'itemprop="playCount"\s*>(\d+)', webpage,
+            'listen count', fatal=False))
+        comment_count = int_or_none(self._search_regex(
+            r'>(\d+) Comments?:', webpage,
+            'comment count', fatal=False))
+
+        return {
+            'id': audio_id,
+            'url': audio_url,
+            'title': title,
+            'duration': duration,
+            'view_count': view_count,
+            'comment_count': comment_count,
+        }
+
+
+class ChirbitProfileIE(InfoExtractor):
+    IE_NAME = 'chirbit:profile'
+    _VALID_URL = r'https?://(?:www\.)?chirbit.com/(?:rss/)?(?P<id>[^/]+)'
+    _TEST = {
+        'url': 'http://chirbit.com/ScarletBeauty',
+        'info_dict': {
+            'id': 'ScarletBeauty',
+            'title': 'Chirbits by ScarletBeauty',
+        },
+        'playlist_mincount': 3,
+    }
+
+    def _real_extract(self, url):
+        profile_id = self._match_id(url)
+
+        rss = self._download_xml(
+            'http://chirbit.com/rss/%s' % profile_id, profile_id)
+
+        entries = [
+            self.url_result(audio_url.text, 'Chirbit')
+            for audio_url in rss.findall('./channel/item/link')]
+
+        title = rss.find('./channel/title').text
+
+        return self.playlist_result(entries, profile_id, title)
diff --git a/youtube_dl/extractor/comedycentral.py b/youtube_dl/extractor/comedycentral.py

index b2453898199ed72da7df13fa90714572fad22133..e5edcc84b69ef7bdffdbb7ed158c901c560a7575 100644 (file)
--- a/youtube_dl/extractor/comedycentral.py
+++ b/youtube_dl/extractor/comedycentral.py
@@ -250,6 +250,8 @@ class ComedyCentralShowsIE(MTVServicesInfoExtractor):
                  })
                  self._sort_formats(formats)
  
+            subtitles = self._extract_subtitles(cdoc, guid)
+
              virtual_id = show_name + ' ' + epTitle + ' part ' + compat_str(part_num + 1)
              entries.append({
                  'id': guid,
@@ -260,6 +262,7 @@ class ComedyCentralShowsIE(MTVServicesInfoExtractor):
                  'duration': duration,
                  'thumbnail': thumbnail,
                  'description': description,
+                'subtitles': subtitles,
              })
  
          return {
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py

index df1a4417bb99fe6cf415b293ce068787d1568dc8..7977fa8d00faa01e95665e347fda4c492ab91ec0 100644 (file)
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -27,7 +27,6 @@ from ..utils import (
      compiled_regex_type,
      ExtractorError,
      float_or_none,
-    HEADRequest,
      int_or_none,
      RegexNotFoundError,
      sanitize_filename,
@@ -151,12 +150,19 @@ class InfoExtractor(object):
                      If not explicitly set, calculated from timestamp.
      uploader_id:    Nickname or id of the video uploader.
      location:       Physical location where the video was filmed.
-    subtitles:      The subtitle file contents as a dictionary in the format
-                    {language: subtitles}.
+    subtitles:      The available subtitles as a dictionary in the format
+                    {language: subformats}. "subformats" is a list sorted from
+                    lower to higher preference, each element is a dictionary
+                    with the "ext" entry and one of:
+                        * "data": The subtitles file contents
+                        * "url": A url pointing to the subtitles file
+    automatic_captions: Like 'subtitles', used by the YoutubeIE for
+                    automatically generated captions
      duration:       Length of the video in seconds, as an integer.
      view_count:     How many users have watched the video on the platform.
      like_count:     Number of positive ratings of the video
      dislike_count:  Number of negative ratings of the video
+    average_rating: Average rating give by users, the scale used depends on the webpage
      comment_count:  Number of comments on the video
      comments:       A list of comments, each with one or more of the following
                      properties (all but one of text or html optional):
@@ -264,8 +270,15 @@ class InfoExtractor(object):
  
      def extract(self, url):
          """Extracts URL information and returns it in list of dicts."""
-        self.initialize()
-        return self._real_extract(url)
+        try:
+            self.initialize()
+            return self._real_extract(url)
+        except ExtractorError:
+            raise
+        except compat_http_client.IncompleteRead as e:
+            raise ExtractorError('A network error has occured.', cause=e, expected=True)
+        except (KeyError, StopIteration) as e:
+            raise ExtractorError('An extractor error has occured.', cause=e)
  
      def set_downloader(self, downloader):
          """Sets the downloader for this IE."""
@@ -384,6 +397,16 @@ class InfoExtractor(object):
              if blocked_iframe:
                  msg += ' Visit %s for more details' % blocked_iframe
              raise ExtractorError(msg, expected=True)
+        if '<title>The URL you requested has been blocked</title>' in content[:512]:
+            msg = (
+                'Access to this webpage has been blocked by Indian censorship. '
+                'Use a VPN or proxy server (with --proxy) to route around it.')
+            block_msg = self._html_search_regex(
+                r'</h1><p>(.*?)</p>',
+                content, 'block message', default=None)
+            if block_msg:
+                msg += ' (Message: "%s")' % block_msg.replace('\n', ' ')
+            raise ExtractorError(msg, expected=True)
  
          return content
  
@@ -507,7 +530,7 @@ class InfoExtractor(object):
                  if mobj:
                      break
  
-        if os.name != 'nt' and sys.stderr.isatty():
+        if not self._downloader.params.get('no_color') and os.name != 'nt' and sys.stderr.isatty():
              _name = '\033[0;34m%s\033[0m' % name
          else:
              _name = name
@@ -656,6 +679,21 @@ class InfoExtractor(object):
          }
          return RATING_TABLE.get(rating.lower(), None)
  
+    def _family_friendly_search(self, html):
+        # See http://schema.org/VideoObject
+        family_friendly = self._html_search_meta('isFamilyFriendly', html)
+
+        if not family_friendly:
+            return None
+
+        RATING_TABLE = {
+            '1': 0,
+            'true': 0,
+            '0': 18,
+            'false': 18,
+        }
+        return RATING_TABLE.get(family_friendly.lower(), None)
+
      def _twitter_search_player(self, html):
          return self._html_search_meta('twitter:player', html,
                                        'twitter card player')
@@ -706,14 +744,14 @@ class InfoExtractor(object):
                  f.get('language_preference') if f.get('language_preference') is not None else -1,
                  f.get('quality') if f.get('quality') is not None else -1,
                  f.get('tbr') if f.get('tbr') is not None else -1,
+                f.get('filesize') if f.get('filesize') is not None else -1,
                  f.get('vbr') if f.get('vbr') is not None else -1,
-                ext_preference,
                  f.get('height') if f.get('height') is not None else -1,
                  f.get('width') if f.get('width') is not None else -1,
+                ext_preference,
                  f.get('abr') if f.get('abr') is not None else -1,
                  audio_ext_preference,
                  f.get('fps') if f.get('fps') is not None else -1,
-                f.get('filesize') if f.get('filesize') is not None else -1,
                  f.get('filesize_approx') if f.get('filesize_approx') is not None else -1,
                  f.get('source_preference') if f.get('source_preference') is not None else -1,
                  f.get('format_id'),
@@ -730,9 +768,7 @@ class InfoExtractor(object):
  
      def _is_valid_url(self, url, video_id, item='video'):
          try:
-            self._request_webpage(
-                HEADRequest(url), video_id,
-                'Checking %s URL' % item)
+            self._request_webpage(url, video_id, 'Checking %s URL' % item)
              return True
          except ExtractorError as e:
              if isinstance(e.cause, compat_HTTPError):
@@ -778,8 +814,8 @@ class InfoExtractor(object):
              media_nodes = manifest.findall('{http://ns.adobe.com/f4m/2.0}media')
          for i, media_el in enumerate(media_nodes):
              if manifest_version == '2.0':
-                manifest_url = ('/'.join(manifest_url.split('/')[:-1]) + '/'
-                                + (media_el.attrib.get('href') or media_el.attrib.get('url')))
+                manifest_url = ('/'.join(manifest_url.split('/')[:-1]) + '/' +
+                                (media_el.attrib.get('href') or media_el.attrib.get('url')))
              tbr = int_or_none(media_el.attrib.get('bitrate'))
              formats.append({
                  'format_id': '-'.join(filter(None, [f4m_id, 'f4m-%d' % (i if tbr is None else tbr)])),
@@ -803,7 +839,7 @@ class InfoExtractor(object):
              'url': m3u8_url,
              'ext': ext,
              'protocol': 'm3u8',
-            'preference': -1,
+            'preference': preference - 1 if preference else -1,
              'resolution': 'multiple',
              'format_note': 'Quality selection URL',
          }]
@@ -818,6 +854,7 @@ class InfoExtractor(object):
              note='Downloading m3u8 information',
              errnote='Failed to download m3u8 information')
          last_info = None
+        last_media = None
          kv_rex = re.compile(
              r'(?P<key>[a-zA-Z_-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)')
          for line in m3u8_doc.splitlines():
@@ -828,6 +865,13 @@ class InfoExtractor(object):
                      if v.startswith('"'):
                          v = v[1:-1]
                      last_info[m.group('key')] = v
+            elif line.startswith('#EXT-X-MEDIA:'):
+                last_media = {}
+                for m in kv_rex.finditer(line):
+                    v = m.group('val')
+                    if v.startswith('"'):
+                        v = v[1:-1]
+                    last_media[m.group('key')] = v
              elif line.startswith('#') or not line.strip():
                  continue
              else:
@@ -856,6 +900,9 @@ class InfoExtractor(object):
                      width_str, height_str = resolution.split('x')
                      f['width'] = int(width_str)
                      f['height'] = int(height_str)
+                if last_media is not None:
+                    f['m3u8_media'] = last_media
+                    last_media = None
                  formats.append(f)
                  last_info = {}
          self._sort_formats(formats)
@@ -874,39 +921,57 @@ class InfoExtractor(object):
  
          formats = []
          rtmp_count = 0
-        for video in smil.findall('./body/switch/video'):
-            src = video.get('src')
-            if not src:
-                continue
-            bitrate = int_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000)
-            width = int_or_none(video.get('width'))
-            height = int_or_none(video.get('height'))
-            proto = video.get('proto')
-            if not proto:
-                if base:
-                    if base.startswith('rtmp'):
-                        proto = 'rtmp'
-                    elif base.startswith('http'):
-                        proto = 'http'
-            ext = video.get('ext')
-            if proto == 'm3u8':
-                formats.extend(self._extract_m3u8_formats(src, video_id, ext))
-            elif proto == 'rtmp':
-                rtmp_count += 1
-                streamer = video.get('streamer') or base
-                formats.append({
-                    'url': streamer,
-                    'play_path': src,
-                    'ext': 'flv',
-                    'format_id': 'rtmp-%d' % (rtmp_count if bitrate is None else bitrate),
-                    'tbr': bitrate,
-                    'width': width,
-                    'height': height,
-                })
+        if smil.findall('./body/seq/video'):
+            video = smil.findall('./body/seq/video')[0]
+            fmts, rtmp_count = self._parse_smil_video(video, video_id, base, rtmp_count)
+            formats.extend(fmts)
+        else:
+            for video in smil.findall('./body/switch/video'):
+                fmts, rtmp_count = self._parse_smil_video(video, video_id, base, rtmp_count)
+                formats.extend(fmts)
+
          self._sort_formats(formats)
  
          return formats
  
+    def _parse_smil_video(self, video, video_id, base, rtmp_count):
+        src = video.get('src')
+        if not src:
+            return ([], rtmp_count)
+        bitrate = int_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000)
+        width = int_or_none(video.get('width'))
+        height = int_or_none(video.get('height'))
+        proto = video.get('proto')
+        if not proto:
+            if base:
+                if base.startswith('rtmp'):
+                    proto = 'rtmp'
+                elif base.startswith('http'):
+                    proto = 'http'
+        ext = video.get('ext')
+        if proto == 'm3u8':
+            return (self._extract_m3u8_formats(src, video_id, ext), rtmp_count)
+        elif proto == 'rtmp':
+            rtmp_count += 1
+            streamer = video.get('streamer') or base
+            return ([{
+                'url': streamer,
+                'play_path': src,
+                'ext': 'flv',
+                'format_id': 'rtmp-%d' % (rtmp_count if bitrate is None else bitrate),
+                'tbr': bitrate,
+                'width': width,
+                'height': height,
+            }], rtmp_count)
+        elif proto.startswith('http'):
+            return ([{
+                'url': base + src,
+                'ext': ext or 'flv',
+                'tbr': bitrate,
+                'width': width,
+                'height': height,
+            }], rtmp_count)
+
      def _live_title(self, name):
          """ Generate the title for a live video """
          now = datetime.datetime.now()
@@ -970,6 +1035,24 @@ class InfoExtractor(object):
              any_restricted = any_restricted or is_restricted
          return not any_restricted
  
+    def extract_subtitles(self, *args, **kwargs):
+        if (self._downloader.params.get('writesubtitles', False) or
+                self._downloader.params.get('listsubtitles')):
+            return self._get_subtitles(*args, **kwargs)
+        return {}
+
+    def _get_subtitles(self, *args, **kwargs):
+        raise NotImplementedError("This method must be implemented by subclasses")
+
+    def extract_automatic_captions(self, *args, **kwargs):
+        if (self._downloader.params.get('writeautomaticsub', False) or
+                self._downloader.params.get('listsubtitles')):
+            return self._get_automatic_captions(*args, **kwargs)
+        return {}
+
+    def _get_automatic_captions(self, *args, **kwargs):
+        raise NotImplementedError("This method must be implemented by subclasses")
+
  
  class SearchInfoExtractor(InfoExtractor):
      """
diff --git a/youtube_dl/extractor/commonmistakes.py b/youtube_dl/extractor/commonmistakes.py

index 75c06903fc6073be5e214bb8eb79469bfafbeb1b..2f86e2381f447faa7b42d6056e685bc04f101f9c 100644 (file)
--- a/youtube_dl/extractor/commonmistakes.py
+++ b/youtube_dl/extractor/commonmistakes.py
@@ -24,6 +24,23 @@ class CommonMistakesIE(InfoExtractor):
              'That doesn\'t make any sense. '
              'Simply remove the parameter in your command or configuration.'
          ) % url
-        if self._downloader.params.get('verbose'):
+        if not self._downloader.params.get('verbose'):
              msg += ' Add -v to the command line to see what arguments and configuration youtube-dl got.'
          raise ExtractorError(msg, expected=True)
+
+
+class UnicodeBOMIE(InfoExtractor):
+        IE_DESC = False
+        _VALID_URL = r'(?P<bom>\ufeff)(?P<id>.*)$'
+
+        _TESTS = [{
+            'url': '\ufeffhttp://www.youtube.com/watch?v=BaW_jenozKc',
+            'only_matching': True,
+        }]
+
+        def _real_extract(self, url):
+            real_url = self._match_id(url)
+            self.report_warning(
+                'Your URL starts with a Byte Order Mark (BOM). '
+                'Removing the BOM and looking for "%s" ...' % real_url)
+            return self.url_result(real_url)
diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py

index 1680f532f80167a65c2dbdc3b5bc0bfa83f7fc66..f1da7d09bc934af86f08aa45f8a1a3de32fa4673 100644 (file)
--- a/youtube_dl/extractor/crunchyroll.py
+++ b/youtube_dl/extractor/crunchyroll.py
@@ -9,7 +9,7 @@ import xml.etree.ElementTree
  
  from hashlib import sha1
  from math import pow, sqrt, floor
-from .subtitles import SubtitlesInfoExtractor
+from .common import InfoExtractor
  from ..compat import (
      compat_urllib_parse,
      compat_urllib_request,
@@ -25,10 +25,9 @@ from ..aes import (
      aes_cbc_decrypt,
      inc,
  )
-from .common import InfoExtractor
  
  
-class CrunchyrollIE(SubtitlesInfoExtractor):
+class CrunchyrollIE(InfoExtractor):
      _VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.(?:com|fr)/(?:[^/]*/[^/?&]*?|media/\?id=)(?P<video_id>[0-9]+))(?:[/?&]|$)'
      _TESTS = [{
          'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513',
@@ -187,6 +186,38 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
  
          return output
  
+    def _get_subtitles(self, video_id, webpage):
+        subtitles = {}
+        for sub_id, sub_name in re.findall(r'\?ssid=([0-9]+)" title="([^"]+)', webpage):
+            sub_page = self._download_webpage(
+                'http://www.crunchyroll.com/xml/?req=RpcApiSubtitle_GetXml&subtitle_script_id=' + sub_id,
+                video_id, note='Downloading subtitles for ' + sub_name)
+            id = self._search_regex(r'id=\'([0-9]+)', sub_page, 'subtitle_id', fatal=False)
+            iv = self._search_regex(r'<iv>([^<]+)', sub_page, 'subtitle_iv', fatal=False)
+            data = self._search_regex(r'<data>([^<]+)', sub_page, 'subtitle_data', fatal=False)
+            if not id or not iv or not data:
+                continue
+            id = int(id)
+            iv = base64.b64decode(iv)
+            data = base64.b64decode(data)
+
+            subtitle = self._decrypt_subtitles(data, iv, id).decode('utf-8')
+            lang_code = self._search_regex(r'lang_code=["\']([^"\']+)', subtitle, 'subtitle_lang_code', fatal=False)
+            if not lang_code:
+                continue
+            sub_root = xml.etree.ElementTree.fromstring(subtitle)
+            subtitles[lang_code] = [
+                {
+                    'ext': 'srt',
+                    'data': self._convert_subtitles_to_srt(sub_root),
+                },
+                {
+                    'ext': 'ass',
+                    'data': self._convert_subtitles_to_ass(sub_root),
+                },
+            ]
+        return subtitles
+
      def _real_extract(self, url):
          mobj = re.match(self._VALID_URL, url)
          video_id = mobj.group('video_id')
@@ -249,34 +280,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
                  'format_id': video_format,
              })
  
-        subtitles = {}
-        sub_format = self._downloader.params.get('subtitlesformat', 'srt')
-        for sub_id, sub_name in re.findall(r'\?ssid=([0-9]+)" title="([^"]+)', webpage):
-            sub_page = self._download_webpage(
-                'http://www.crunchyroll.com/xml/?req=RpcApiSubtitle_GetXml&subtitle_script_id=' + sub_id,
-                video_id, note='Downloading subtitles for ' + sub_name)
-            id = self._search_regex(r'id=\'([0-9]+)', sub_page, 'subtitle_id', fatal=False)
-            iv = self._search_regex(r'<iv>([^<]+)', sub_page, 'subtitle_iv', fatal=False)
-            data = self._search_regex(r'<data>([^<]+)', sub_page, 'subtitle_data', fatal=False)
-            if not id or not iv or not data:
-                continue
-            id = int(id)
-            iv = base64.b64decode(iv)
-            data = base64.b64decode(data)
-
-            subtitle = self._decrypt_subtitles(data, iv, id).decode('utf-8')
-            lang_code = self._search_regex(r'lang_code=["\']([^"\']+)', subtitle, 'subtitle_lang_code', fatal=False)
-            if not lang_code:
-                continue
-            sub_root = xml.etree.ElementTree.fromstring(subtitle)
-            if sub_format == 'ass':
-                subtitles[lang_code] = self._convert_subtitles_to_ass(sub_root)
-            else:
-                subtitles[lang_code] = self._convert_subtitles_to_srt(sub_root)
-
-        if self._downloader.params.get('listsubtitles', False):
-            self._list_available_subtitles(video_id, subtitles)
-            return
+        subtitles = self.extract_subtitles(video_id, webpage)
  
          return {
              'id': video_id,
diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py

index cf5841a7c6e92e115d7f685d8f7ce337a51cb92a..42b20a46ddefc1e4a7e66aacd0d959a1e062618f 100644 (file)
--- a/youtube_dl/extractor/dailymotion.py
+++ b/youtube_dl/extractor/dailymotion.py
@@ -6,7 +6,6 @@ import json
  import itertools
  
  from .common import InfoExtractor
-from .subtitles import SubtitlesInfoExtractor
  
  from ..compat import (
      compat_str,
@@ -31,7 +30,7 @@ class DailymotionBaseInfoExtractor(InfoExtractor):
          return request
  
  
-class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
+class DailymotionIE(DailymotionBaseInfoExtractor):
      """Information Extractor for Dailymotion"""
  
      _VALID_URL = r'(?i)(?:https?://)?(?:(www|touch)\.)?dailymotion\.[a-z]{2,3}/(?:(embed|#)/)?video/(?P<id>[^/?_]+)'
@@ -143,9 +142,6 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
  
          # subtitles
          video_subtitles = self.extract_subtitles(video_id, webpage)
-        if self._downloader.params.get('listsubtitles', False):
-            self._list_available_subtitles(video_id, webpage)
-            return
  
          view_count = str_to_int(self._search_regex(
              r'video_views_count[^>]+>\s+([\d\.,]+)',
@@ -169,7 +165,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
              'view_count': view_count,
          }
  
-    def _get_available_subtitles(self, video_id, webpage):
+    def _get_subtitles(self, video_id, webpage):
          try:
              sub_list = self._download_webpage(
                  'https://api.dailymotion.com/video/%s/subtitles?fields=id,language,url' % video_id,
@@ -179,7 +175,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
              return {}
          info = json.loads(sub_list)
          if (info['total'] > 0):
-            sub_lang_list = dict((l['language'], l['url']) for l in info['list'])
+            sub_lang_list = dict((l['language'], [{'url': l['url'], 'ext': 'srt'}]) for l in info['list'])
              return sub_lang_list
          self._downloader.report_warning('video doesn\'t have subtitles')
          return {}
@@ -194,6 +190,7 @@ class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
          'url': 'http://www.dailymotion.com/playlist/xv4bw_nqtv_sport/1#video=xl8v3q',
          'info_dict': {
              'title': 'SPORT',
+            'id': 'xv4bw_nqtv_sport',
          },
          'playlist_mincount': 20,
      }]
diff --git a/youtube_dl/extractor/dctp.py b/youtube_dl/extractor/dctp.py

index 6ed3543790c6f76877b5e11bac7e6c7ffaf5c028..aa2c09eb686f9da5a7bedfdfe57566e9d29a0700 100644 (file)
--- a/youtube_dl/extractor/dctp.py
+++ b/youtube_dl/extractor/dctp.py
@@ -14,6 +14,10 @@ class DctpTvIE(InfoExtractor):
              'display_id': 'videoinstallation-fuer-eine-kaufhausfassade',
              'ext': 'flv',
              'title': 'Videoinstallation für eine Kaufhausfassade'
+        },
+        'params': {
+            # rtmp download
+            'skip_download': True,
          }
      }
  
diff --git a/youtube_dl/extractor/defense.py b/youtube_dl/extractor/defense.py

index 2b90bf4fc2fcba04fe7e164602196586713d4225..98e3aedfd08ada1300cbf3114a41022949062402 100644 (file)
--- a/youtube_dl/extractor/defense.py
+++ b/youtube_dl/extractor/defense.py
@@ -25,8 +25,9 @@ class DefenseGouvFrIE(InfoExtractor):
              r"flashvars.pvg_id=\"(\d+)\";",
              webpage, 'ID')
  
-        json_url = ('http://static.videos.gouv.fr/brightcovehub/export/json/'
-                    + video_id)
+        json_url = (
+            'http://static.videos.gouv.fr/brightcovehub/export/json/%s' %
+            video_id)
          info = self._download_json(json_url, title, 'Downloading JSON config')
          video_url = info['renditions'][0]['url']
  
diff --git a/youtube_dl/extractor/dotsub.py b/youtube_dl/extractor/dotsub.py

index 638bb33cd81b53fdc2c4bbc31f300a098fb57652..f51d88a986b79d65cae3c1604ee3d16e9515c0fd 100644 (file)
--- a/youtube_dl/extractor/dotsub.py
+++ b/youtube_dl/extractor/dotsub.py
@@ -1,13 +1,14 @@
  from __future__ import unicode_literals
  
-import re
-import time
-
  from .common import InfoExtractor
+from ..utils import (
+    float_or_none,
+    int_or_none,
+)
  
  
  class DotsubIE(InfoExtractor):
-    _VALID_URL = r'http://(?:www\.)?dotsub\.com/view/(?P<id>[^/]+)'
+    _VALID_URL = r'https?://(?:www\.)?dotsub\.com/view/(?P<id>[^/]+)'
      _TEST = {
          'url': 'http://dotsub.com/view/aed3b8b2-1889-4df5-ae63-ad85f5572f27',
          'md5': '0914d4d69605090f623b7ac329fea66e',
@@ -15,28 +16,37 @@ class DotsubIE(InfoExtractor):
              'id': 'aed3b8b2-1889-4df5-ae63-ad85f5572f27',
              'ext': 'flv',
              'title': 'Pyramids of Waste (2010), AKA The Lightbulb Conspiracy - Planned obsolescence documentary',
+            'description': 'md5:699a0f7f50aeec6042cb3b1db2d0d074',
+            'thumbnail': 're:^https?://dotsub.com/media/aed3b8b2-1889-4df5-ae63-ad85f5572f27/p',
+            'duration': 3169,
              'uploader': '4v4l0n42',
-            'description': 'Pyramids of Waste (2010) also known as "The lightbulb conspiracy" is a documentary about how our economic system based on consumerism  and planned obsolescence is breaking our planet down.\r\n\r\nSolutions to this can be found at:\r\nhttp://robotswillstealyourjob.com\r\nhttp://www.federicopistono.org\r\n\r\nhttp://opensourceecology.org\r\nhttp://thezeitgeistmovement.com',
-            'thumbnail': 'http://dotsub.com/media/aed3b8b2-1889-4df5-ae63-ad85f5572f27/p',
+            'timestamp': 1292248482.625,
              'upload_date': '20101213',
+            'view_count': int,
          }
      }
  
      def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-        info_url = "https://dotsub.com/api/media/%s/metadata" % video_id
-        info = self._download_json(info_url, video_id)
-        date = time.gmtime(info['dateCreated'] / 1000)  # The timestamp is in miliseconds
+        video_id = self._match_id(url)
+
+        info = self._download_json(
+            'https://dotsub.com/api/media/%s/metadata' % video_id, video_id)
+        video_url = info.get('mediaURI')
+
+        if not video_url:
+            webpage = self._download_webpage(url, video_id)
+            video_url = self._search_regex(
+                r'"file"\s*:\s*\'([^\']+)', webpage, 'video url')
  
          return {
              'id': video_id,
-            'url': info['mediaURI'],
+            'url': video_url,
              'ext': 'flv',
              'title': info['title'],
-            'thumbnail': info['screenshotURI'],
-            'description': info['description'],
-            'uploader': info['user'],
-            'view_count': info['numberOfViews'],
-            'upload_date': '%04i%02i%02i' % (date.tm_year, date.tm_mon, date.tm_mday),
+            'description': info.get('description'),
+            'thumbnail': info.get('screenshotURI'),
+            'duration': int_or_none(info.get('duration'), 1000),
+            'uploader': info.get('user'),
+            'timestamp': float_or_none(info.get('dateCreated'), 1000),
+            'view_count': int_or_none(info.get('numberOfViews')),
          }
diff --git a/youtube_dl/extractor/drtuber.py b/youtube_dl/extractor/drtuber.py

index ca274dff691f2ad34d027f31bf814c8df850812d..37c5c181f799efd8ee69d850c0b6076130c64073 100644 (file)
--- a/youtube_dl/extractor/drtuber.py
+++ b/youtube_dl/extractor/drtuber.py
@@ -15,7 +15,7 @@ class DrTuberIE(InfoExtractor):
              'id': '1740434',
              'display_id': 'hot-perky-blonde-naked-golf',
              'ext': 'mp4',
-            'title': 'Hot Perky Blonde Naked Golf',
+            'title': 'hot perky blonde naked golf',
              'like_count': int,
              'dislike_count': int,
              'comment_count': int,
@@ -36,7 +36,8 @@ class DrTuberIE(InfoExtractor):
              r'<source src="([^"]+)"', webpage, 'video URL')
  
          title = self._html_search_regex(
-            r'<title>([^<]+)\s*-\s*Free', webpage, 'title')
+            [r'class="hd_title" style="[^"]+">([^<]+)</h1>', r'<title>([^<]+) - \d+'],
+            webpage, 'title')
  
          thumbnail = self._html_search_regex(
              r'poster="([^"]+)"',
diff --git a/youtube_dl/extractor/drtv.py b/youtube_dl/extractor/drtv.py

index d5df18d7c971c18f01c51128c75fbe4ee09ea070..8257e35a437b075461114fbaf1b4dd2d578f56d8 100644 (file)
--- a/youtube_dl/extractor/drtv.py
+++ b/youtube_dl/extractor/drtv.py
@@ -1,11 +1,10 @@
  from __future__ import unicode_literals
  
-from .subtitles import SubtitlesInfoExtractor
-from .common import ExtractorError
+from .common import InfoExtractor, ExtractorError
  from ..utils import parse_iso8601
  
  
-class DRTVIE(SubtitlesInfoExtractor):
+class DRTVIE(InfoExtractor):
      _VALID_URL = r'https?://(?:www\.)?dr\.dk/tv/se/(?:[^/]+/)*(?P<id>[\da-z-]+)(?:[/#?]|$)'
  
      _TEST = {
@@ -76,7 +75,7 @@ class DRTVIE(SubtitlesInfoExtractor):
                      }
                      for subs in subtitles_list:
                          lang = subs['Language']
-                        subtitles[LANGS.get(lang, lang)] = subs['Uri']
+                        subtitles[LANGS.get(lang, lang)] = [{'url': subs['Uri'], 'ext': 'vtt'}]
  
          if not formats and restricted_to_denmark:
              raise ExtractorError(
@@ -84,10 +83,6 @@ class DRTVIE(SubtitlesInfoExtractor):
  
          self._sort_formats(formats)
  
-        if self._downloader.params.get('listsubtitles', False):
-            self._list_available_subtitles(video_id, subtitles)
-            return
-
          return {
              'id': video_id,
              'title': title,
@@ -96,5 +91,5 @@ class DRTVIE(SubtitlesInfoExtractor):
              'timestamp': timestamp,
              'duration': duration,
              'formats': formats,
-            'subtitles': self.extract_subtitles(video_id, subtitles),
+            'subtitles': subtitles,
          }
diff --git a/youtube_dl/extractor/embedly.py b/youtube_dl/extractor/embedly.py

new file mode 100644 (file)

index 0000000..1cdb11e
--- /dev/null
+++ b/youtube_dl/extractor/embedly.py
@@ -0,0 +1,16 @@
+# encoding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import compat_urllib_parse_unquote
+
+
+class EmbedlyIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www|cdn\.)?embedly\.com/widgets/media\.html\?(?:[^#]*?&)?url=(?P<id>[^#&]+)'
+    _TESTS = [{
+        'url': 'https://cdn.embedly.com/widgets/media.html?src=http%3A%2F%2Fwww.youtube.com%2Fembed%2Fvideoseries%3Flist%3DUUGLim4T2loE5rwCMdpCIPVg&url=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DSU4fj_aEMVw%26list%3DUUGLim4T2loE5rwCMdpCIPVg&image=http%3A%2F%2Fi.ytimg.com%2Fvi%2FSU4fj_aEMVw%2Fhqdefault.jpg&key=8ee8a2e6a8cc47aab1a5ee67f9a178e0&type=text%2Fhtml&schema=youtube&autoplay=1',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        return self.url_result(compat_urllib_parse_unquote(self._match_id(url)))
diff --git a/youtube_dl/extractor/eporner.py b/youtube_dl/extractor/eporner.py

index 4de8d4bc5c9107ddc361a8351ea4a63d3da40783..e006921ec3f8d2a0aff0e6bb0595148469b1c256 100644 (file)
--- a/youtube_dl/extractor/eporner.py
+++ b/youtube_dl/extractor/eporner.py
@@ -35,10 +35,7 @@ class EpornerIE(InfoExtractor):
          title = self._html_search_regex(
              r'<title>(.*?) - EPORNER', webpage, 'title')
  
-        redirect_code = self._html_search_regex(
-            r'<script type="text/javascript" src="/config5/%s/([a-f\d]+)/">' % video_id,
-            webpage, 'redirect_code')
-        redirect_url = 'http://www.eporner.com/config5/%s/%s' % (video_id, redirect_code)
+        redirect_url = 'http://www.eporner.com/config5/%s' % video_id
          player_code = self._download_webpage(
              redirect_url, display_id, note='Downloading player config')
  
@@ -69,5 +66,5 @@ class EpornerIE(InfoExtractor):
              'duration': duration,
              'view_count': view_count,
              'formats': formats,
-            'age_limit': self._rta_search(webpage),
+            'age_limit': 18,
          }
diff --git a/youtube_dl/extractor/escapist.py b/youtube_dl/extractor/escapist.py

index e240cb8591ecc467c44d98742685740f4354cbda..e47f3e27a57aa14e3eee526af8998230b524bb4f 100644 (file)
--- a/youtube_dl/extractor/escapist.py
+++ b/youtube_dl/extractor/escapist.py
@@ -1,18 +1,20 @@
  from __future__ import unicode_literals
  
-import re
-
  from .common import InfoExtractor
  from ..compat import (
      compat_urllib_parse,
+    compat_urllib_request,
  )
  from ..utils import (
      ExtractorError,
+    js_to_json,
+    parse_duration,
  )
  
  
  class EscapistIE(InfoExtractor):
-    _VALID_URL = r'^https?://?(www\.)?escapistmagazine\.com/videos/view/(?P<showname>[^/]+)/(?P<id>[0-9]+)-'
+    _VALID_URL = r'https?://?(www\.)?escapistmagazine\.com/videos/view/[^/?#]+/(?P<id>[0-9]+)-[^/?#]*(?:$|[?#])'
+    _USER_AGENT = 'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko'
      _TEST = {
          'url': 'http://www.escapistmagazine.com/videos/view/the-escapist-presents/6618-Breaking-Down-Baldurs-Gate',
          'md5': 'ab3a706c681efca53f0a35f1415cf0d1',
@@ -20,64 +22,107 @@ class EscapistIE(InfoExtractor):
              'id': '6618',
              'ext': 'mp4',
              'description': "Baldur's Gate: Original, Modded or Enhanced Edition? I'll break down what you can expect from the new Baldur's Gate: Enhanced Edition.",
-            'uploader': 'the-escapist-presents',
+            'uploader_id': 'the-escapist-presents',
+            'uploader': 'The Escapist Presents',
              'title': "Breaking Down Baldur's Gate",
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'duration': 264,
          }
      }
  
      def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        showName = mobj.group('showname')
-        video_id = mobj.group('id')
-
-        self.report_extraction(video_id)
-        webpage = self._download_webpage(url, video_id)
-
-        videoDesc = self._html_search_regex(
-            r'<meta name="description" content="([^"]*)"',
-            webpage, 'description', fatal=False)
-
-        playerUrl = self._og_search_video_url(webpage, name='player URL')
-
-        title = self._html_search_regex(
-            r'<meta name="title" content="([^"]*)"',
-            webpage, 'title').split(' : ')[-1]
-
-        configUrl = self._search_regex('config=(.*)$', playerUrl, 'config URL')
-        configUrl = compat_urllib_parse.unquote(configUrl)
+        video_id = self._match_id(url)
+        webpage_req = compat_urllib_request.Request(url)
+        webpage_req.add_header('User-Agent', self._USER_AGENT)
+        webpage = self._download_webpage(webpage_req, video_id)
+
+        uploader_id = self._html_search_regex(
+            r"<h1\s+class='headline'>\s*<a\s+href='/videos/view/(.*?)'",
+            webpage, 'uploader ID', fatal=False)
+        uploader = self._html_search_regex(
+            r"<h1\s+class='headline'>(.*?)</a>",
+            webpage, 'uploader', fatal=False)
+        description = self._html_search_meta('description', webpage)
+        duration = parse_duration(self._html_search_meta('duration', webpage))
+
+        raw_title = self._html_search_meta('title', webpage, fatal=True)
+        title = raw_title.partition(' : ')[2]
+
+        config_url = compat_urllib_parse.unquote(self._html_search_regex(
+            r'''(?x)
+            (?:
+                <param\s+name="flashvars".*?\s+value="config=|
+                flashvars=&quot;config=
+            )
+            (https?://[^"&]+)
+            ''',
+            webpage, 'config URL'))
  
          formats = []
+        ad_formats = []
  
-        def _add_format(name, cfgurl, quality):
+        def _add_format(name, cfg_url, quality):
+            cfg_req = compat_urllib_request.Request(cfg_url)
+            cfg_req.add_header('User-Agent', self._USER_AGENT)
              config = self._download_json(
-                cfgurl, video_id,
+                cfg_req, video_id,
                  'Downloading ' + name + ' configuration',
                  'Unable to download ' + name + ' configuration',
-                transform_source=lambda s: s.replace("'", '"'))
+                transform_source=js_to_json)
  
              playlist = config['playlist']
-            formats.append({
-                'url': playlist[1]['url'],
-                'format_id': name,
-                'quality': quality,
-            })
-
-        _add_format('normal', configUrl, quality=0)
-        hq_url = (configUrl +
-                  ('&hq=1' if '?' in configUrl else configUrl + '?hq=1'))
+            for p in playlist:
+                if p.get('eventCategory') == 'Video':
+                    ar = formats
+                elif p.get('eventCategory') == 'Video Postroll':
+                    ar = ad_formats
+                else:
+                    continue
+
+                ar.append({
+                    'url': p['url'],
+                    'format_id': name,
+                    'quality': quality,
+                    'http_headers': {
+                        'User-Agent': self._USER_AGENT,
+                    },
+                })
+
+        _add_format('normal', config_url, quality=0)
+        hq_url = (config_url +
+                  ('&hq=1' if '?' in config_url else config_url + '?hq=1'))
          try:
              _add_format('hq', hq_url, quality=1)
          except ExtractorError:
              pass  # That's fine, we'll just use normal quality
-
          self._sort_formats(formats)
  
-        return {
+        if '/escapist/sales-marketing/' in formats[-1]['url']:
+            raise ExtractorError('This IP address has been blocked by The Escapist', expected=True)
+
+        res = {
              'id': video_id,
              'formats': formats,
-            'uploader': showName,
+            'uploader': uploader,
+            'uploader_id': uploader_id,
              'title': title,
              'thumbnail': self._og_search_thumbnail(webpage),
-            'description': videoDesc,
-            'player_url': playerUrl,
+            'description': description,
+            'duration': duration,
          }
+
+        if self._downloader.params.get('include_ads') and ad_formats:
+            self._sort_formats(ad_formats)
+            ad_res = {
+                'id': '%s-ad' % video_id,
+                'title': '%s (Postroll)' % title,
+                'formats': ad_formats,
+            }
+            return {
+                '_type': 'playlist',
+                'entries': [res, ad_res],
+                'title': title,
+                'id': video_id,
+            }
+
+        return res
diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py

index 1ad4e77a8a334dc0bfec62a0fb4752676e2e1435..f0e575320015d435889b1bd610b4871dbd84ae21 100644 (file)
--- a/youtube_dl/extractor/facebook.py
+++ b/youtube_dl/extractor/facebook.py
@@ -126,11 +126,17 @@ class FacebookIE(InfoExtractor):
          params_raw = compat_urllib_parse.unquote(data['params'])
          params = json.loads(params_raw)
          video_data = params['video_data'][0]
-        video_url = video_data.get('hd_src')
-        if not video_url:
-            video_url = video_data['sd_src']
-        if not video_url:
-            raise ExtractorError('Cannot find video URL')
+
+        formats = []
+        for quality in ['sd', 'hd']:
+            src = video_data.get('%s_src' % quality)
+            if src is not None:
+                formats.append({
+                    'format_id': quality,
+                    'url': src,
+                })
+        if not formats:
+            raise ExtractorError('Cannot find video formats')
  
          video_title = self._html_search_regex(
              r'<h2 class="uiHeaderTitle">([^<]*)</h2>', webpage, 'title',
@@ -146,7 +152,7 @@ class FacebookIE(InfoExtractor):
          return {
              'id': video_id,
              'title': video_title,
-            'url': video_url,
+            'formats': formats,
              'duration': int_or_none(video_data.get('video_duration')),
              'thumbnail': video_data.get('thumbnail_src'),
          }
diff --git a/youtube_dl/extractor/firstpost.py b/youtube_dl/extractor/firstpost.py

index 0993af1c9455cf6bc2189f1a15b6fd6f0066ae36..298227d5793770c82d8868256d655fa7ea3dc31c 100644 (file)
--- a/youtube_dl/extractor/firstpost.py
+++ b/youtube_dl/extractor/firstpost.py
@@ -1,7 +1,5 @@
  from __future__ import unicode_literals
  
-import re
-
  from .common import InfoExtractor
  
  
@@ -20,11 +18,10 @@ class FirstpostIE(InfoExtractor):
      }
  
      def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-
+        video_id = self._match_id(url)
          page = self._download_webpage(url, video_id)
-        title = self._html_search_meta('twitter:title', page, 'title')
+
+        title = self._html_search_meta('twitter:title', page, 'title', fatal=True)
          description = self._html_search_meta('twitter:description', page, 'title')
  
          data = self._download_xml(
@@ -42,6 +39,7 @@ class FirstpostIE(InfoExtractor):
                  'height': int(details.find('./height').text.strip()),
              } for details in item.findall('./source/file_details') if details.find('./file').text
          ]
+        self._sort_formats(formats)
  
          return {
              'id': video_id,
diff --git a/youtube_dl/extractor/firsttv.py b/youtube_dl/extractor/firsttv.py

index 08ceee4ed7d5e8b96b81e7d8b9b823a5ea18e120..510d4b108944d1f220c45ddc2fbe85cdad6114ca 100644 (file)
--- a/youtube_dl/extractor/firsttv.py
+++ b/youtube_dl/extractor/firsttv.py
@@ -1,52 +1,71 @@
  # encoding: utf-8
  from __future__ import unicode_literals
  
-import re
-
  from .common import InfoExtractor
  from ..utils import int_or_none
  
  
  class FirstTVIE(InfoExtractor):
-    IE_NAME = 'firsttv'
-    IE_DESC = 'Ð\92Ð¸Ð´ÐµÐ¾Ð°Ñ\80Ñ\85Ð¸Ð² - Ð\9fÐµÑ\80Ð²Ñ\8bÐ¹ ÐºÐ°Ð½Ð°Ð»'
-    _VALID_URL = r'http://(?:www\.)?1tv\.ru/videoarchive/(?P<id>\d+)'
+    IE_NAME = '1tv'
+    IE_DESC = 'Первый канал'
+    _VALID_URL = r'http://(?:www\.)?1tv\.ru/(?:[^/]+/)+(?P<id>.+)'
  
-    _TEST = {
+    _TESTS = [{
          'url': 'http://www.1tv.ru/videoarchive/73390',
-        'md5': '3de6390cf0cca4a5eae1d1d83895e5ad',
+        'md5': '777f525feeec4806130f4f764bc18a4f',
          'info_dict': {
              'id': '73390',
              'ext': 'mp4',
              'title': 'Олимпийские канатные дороги',
-            'description': 'md5:cc730d2bf4215463e37fff6a1e277b13',
-            'thumbnail': 'http://img1.1tv.ru/imgsize640x360/PR20140210114657.JPG',
+            'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
+            'thumbnail': 're:^https?://.*\.(?:jpg|JPG)$',
              'duration': 149,
+            'like_count': int,
+            'dislike_count': int,
+        },
+        'skip': 'Only works from Russia',
+    }, {
+        'url': 'http://www.1tv.ru/prj/inprivate/vypusk/35930',
+        'md5': 'a1b6b60d530ebcf8daacf4565762bbaf',
+        'info_dict': {
+            'id': '35930',
+            'ext': 'mp4',
+            'title': 'Наедине со всеми. Людмила Сенчина',
+            'description': 'md5:89553aed1d641416001fe8d450f06cb9',
+            'thumbnail': 're:^https?://.*\.(?:jpg|JPG)$',
+            'duration': 2694,
          },
          'skip': 'Only works from Russia',
-    }
+    }]
  
      def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)
  
          webpage = self._download_webpage(url, video_id, 'Downloading page')
  
          video_url = self._html_search_regex(
-            r'''(?s)jwplayer\('flashvideoportal_1'\)\.setup\({.*?'file': '([^']+)'.*?}\);''', webpage, 'video URL')
+            r'''(?s)(?:jwplayer\('flashvideoportal_1'\)\.setup\({|var\s+playlistObj\s*=).*?'file'\s*:\s*'([^']+)'.*?}\);''',
+            webpage, 'video URL')
  
          title = self._html_search_regex(
-            r'<div class="tv_translation">\s*<h1><a href="[^"]+">([^<]*)</a>', webpage, 'title')
+            [r'<div class="tv_translation">\s*<h1><a href="[^"]+">([^<]*)</a>',
+             r"'title'\s*:\s*'([^']+)'"], webpage, 'title')
          description = self._html_search_regex(
-            r'<div class="descr">\s*<div>&nbsp;</div>\s*<p>([^<]*)</p></div>', webpage, 'description', fatal=False)
+            r'<div class="descr">\s*<div>&nbsp;</div>\s*<p>([^<]*)</p></div>',
+            webpage, 'description', default=None) or self._html_search_meta(
+                'description', webpage, 'description')
  
          thumbnail = self._og_search_thumbnail(webpage)
-        duration = self._og_search_property('video:duration', webpage, 'video duration', fatal=False)
+        duration = self._og_search_property(
+            'video:duration', webpage,
+            'video duration', fatal=False)
  
-        like_count = self._html_search_regex(r'title="Понравилось".*?/></label> \[(\d+)\]',
-                                             webpage, 'like count', fatal=False)
-        dislike_count = self._html_search_regex(r'title="Не понравилось".*?/></label> \[(\d+)\]',
-                                                webpage, 'dislike count', fatal=False)
+        like_count = self._html_search_regex(
+            r'title="Понравилось".*?/></label> \[(\d+)\]',
+            webpage, 'like count', default=None)
+        dislike_count = self._html_search_regex(
+            r'title="Не понравилось".*?/></label> \[(\d+)\]',
+            webpage, 'dislike count', default=None)
  
          return {
              'id': video_id,
diff --git a/youtube_dl/extractor/fivemin.py b/youtube_dl/extractor/fivemin.py

index 5b24b921c13d497d09474fa405df5b164451dd80..157094e8c99a598a66a98e51ee70e70502494057 100644 (file)
--- a/youtube_dl/extractor/fivemin.py
+++ b/youtube_dl/extractor/fivemin.py
@@ -14,6 +14,7 @@ class FiveMinIE(InfoExtractor):
      IE_NAME = '5min'
      _VALID_URL = r'''(?x)
          (?:https?://[^/]*?5min\.com/Scripts/PlayerSeed\.js\?(?:.*?&)?playList=|
+            https?://(?:(?:massively|www)\.)?joystiq\.com/video/|
              5min:)
          (?P<id>\d+)
          '''
diff --git a/youtube_dl/extractor/gamekings.py b/youtube_dl/extractor/gamekings.py

index cf8e90d7dbe9483efffe7894bedbe437746c5da1..027f55eb2b7338dd2459c8128d587c0ac858434e 100644 (file)
--- a/youtube_dl/extractor/gamekings.py
+++ b/youtube_dl/extractor/gamekings.py
@@ -1,41 +1,67 @@
+# coding: utf-8
  from __future__ import unicode_literals
  
-import re
-
  from .common import InfoExtractor
+from ..utils import (
+    xpath_text,
+    xpath_with_ns,
+)
  
  
  class GamekingsIE(InfoExtractor):
-    _VALID_URL = r'http://www\.gamekings\.tv/videos/(?P<name>[0-9a-z\-]+)'
-    _TEST = {
+    _VALID_URL = r'http://www\.gamekings\.tv/(?:videos|nieuws)/(?P<id>[^/]+)'
+    _TESTS = [{
          'url': 'http://www.gamekings.tv/videos/phoenix-wright-ace-attorney-dual-destinies-review/',
          # MD5 is flaky, seems to change regularly
          # 'md5': '2f32b1f7b80fdc5cb616efb4f387f8a3',
          'info_dict': {
-            'id': '20130811',
+            'id': 'phoenix-wright-ace-attorney-dual-destinies-review',
              'ext': 'mp4',
              'title': 'Phoenix Wright: Ace Attorney \u2013 Dual Destinies Review',
              'description': 'md5:36fd701e57e8c15ac8682a2374c99731',
-        }
-    }
+            'thumbnail': 're:^https?://.*\.jpg$',
+        },
+    }, {
+        # vimeo video
+        'url': 'http://www.gamekings.tv/videos/the-legend-of-zelda-majoras-mask/',
+        'md5': '12bf04dfd238e70058046937657ea68d',
+        'info_dict': {
+            'id': 'the-legend-of-zelda-majoras-mask',
+            'ext': 'mp4',
+            'title': 'The Legend of Zelda: Majora’s Mask',
+            'description': 'md5:9917825fe0e9f4057601fe1e38860de3',
+            'thumbnail': 're:^https?://.*\.jpg$',
+        },
+    }, {
+        'url': 'http://www.gamekings.tv/nieuws/gamekings-extra-shelly-en-david-bereiden-zich-voor-op-de-livestream/',
+        'only_matching': True,
+    }]
  
      def _real_extract(self, url):
+        video_id = self._match_id(url)
  
-        mobj = re.match(self._VALID_URL, url)
-        name = mobj.group('name')
-        webpage = self._download_webpage(url, name)
-        video_url = self._og_search_video_url(webpage)
+        webpage = self._download_webpage(url, video_id)
  
-        video = re.search(r'[0-9]+', video_url)
-        video_id = video.group(0)
+        playlist_id = self._search_regex(
+            r'gogoVideo\(\s*\d+\s*,\s*"([^"]+)', webpage, 'playlist id')
  
-        # Todo: add medium format
-        video_url = video_url.replace(video_id, 'large/' + video_id)
+        playlist = self._download_xml(
+            'http://www.gamekings.tv/wp-content/themes/gk2010/rss_playlist.php?id=%s' % playlist_id,
+            video_id)
+
+        NS_MAP = {
+            'jwplayer': 'http://rss.jwpcdn.com/'
+        }
+
+        item = playlist.find('./channel/item')
+
+        thumbnail = xpath_text(item, xpath_with_ns('./jwplayer:image', NS_MAP), 'thumbnail')
+        video_url = item.find(xpath_with_ns('./jwplayer:source', NS_MAP)).get('file')
  
          return {
              'id': video_id,
-            'ext': 'mp4',
              'url': video_url,
              'title': self._og_search_title(webpage),
              'description': self._og_search_description(webpage),
+            'thumbnail': thumbnail,
          }
diff --git a/youtube_dl/extractor/gamestar.py b/youtube_dl/extractor/gamestar.py

index 7591a151ea352ea4d3c3b066d68bffb1141c513f..590ccf5266d61e67772a1276a83bfdb6919abc63 100644 (file)
--- a/youtube_dl/extractor/gamestar.py
+++ b/youtube_dl/extractor/gamestar.py
@@ -1,6 +1,8 @@
  # coding: utf-8
  from __future__ import unicode_literals
  
+import re
+
  from .common import InfoExtractor
  from ..utils import (
      int_or_none,
@@ -31,7 +33,7 @@ class GameStarIE(InfoExtractor):
          webpage = self._download_webpage(url, video_id)
  
          og_title = self._og_search_title(webpage)
-        title = og_title.replace(' - Video bei GameStar.de', '').strip()
+        title = re.sub(r'\s*- Video (bei|-) GameStar\.de$', '', og_title)
  
          url = 'http://gamestar.de/_misc/videos/portal/getVideoUrl.cfm?premium=0&videoId=' + video_id
  
diff --git a/youtube_dl/extractor/gdcvault.py b/youtube_dl/extractor/gdcvault.py

index fed968f5179ebf6159212da5ab75b024b3bc0a03..f7b467b0aff8f46aa028d1898f5909277e973318 100644 (file)
--- a/youtube_dl/extractor/gdcvault.py
+++ b/youtube_dl/extractor/gdcvault.py
@@ -7,6 +7,7 @@ from ..compat import (
      compat_urllib_parse,
      compat_urllib_request,
  )
+from ..utils import remove_end
  
  
  class GDCVaultIE(InfoExtractor):
@@ -65,10 +66,12 @@ class GDCVaultIE(InfoExtractor):
  
      def _parse_flv(self, xml_description):
          video_formats = []
-        akami_url = xml_description.find('./metadata/akamaiHost').text
+        akamai_url = xml_description.find('./metadata/akamaiHost').text
          slide_video_path = xml_description.find('./metadata/slideVideo').text
          video_formats.append({
-            'url': 'rtmp://' + akami_url + '/' + slide_video_path,
+            'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url,
+            'play_path': remove_end(slide_video_path, '.flv'),
+            'ext': 'flv',
              'format_note': 'slide deck video',
              'quality': -2,
              'preference': -2,
@@ -76,7 +79,9 @@ class GDCVaultIE(InfoExtractor):
          })
          speaker_video_path = xml_description.find('./metadata/speakerVideo').text
          video_formats.append({
-            'url': 'rtmp://' + akami_url + '/' + speaker_video_path,
+            'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url,
+            'play_path': remove_end(speaker_video_path, '.flv'),
+            'ext': 'flv',
              'format_note': 'speaker video',
              'quality': -1,
              'preference': -1,
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py

index fbbc79a574ca03f1e483738a726f2fde0bf6b21d..27e2bc3001c27750378cd790763d86b38442ffa7 100644 (file)
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -473,6 +473,7 @@ class GenericIE(InfoExtractor):
          {
              'url': 'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986',
              'info_dict': {
+                'id': '1986',
                  'title': 'Unity 8 desktop-mode windows on Mir! - Ubuntu Discourse',
              },
              'playlist_mincount': 2,
@@ -524,7 +525,50 @@ class GenericIE(InfoExtractor):
                  'upload_date': '20150126',
              },
              'add_ie': ['Viddler'],
-        }
+        },
+        # jwplayer YouTube
+        {
+            'url': 'http://media.nationalarchives.gov.uk/index.php/webinar-using-discovery-national-archives-online-catalogue/',
+            'info_dict': {
+                'id': 'Mrj4DVp2zeA',
+                'ext': 'mp4',
+                'upload_date': '20150212',
+                'uploader': 'The National Archives UK',
+                'description': 'md5:a236581cd2449dd2df4f93412f3f01c6',
+                'uploader_id': 'NationalArchives08',
+                'title': 'Webinar: Using Discovery, The National Archives’ online catalogue',
+            },
+        },
+        # rtl.nl embed
+        {
+            'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen',
+            'playlist_mincount': 5,
+            'info_dict': {
+                'id': 'aanslagen-kopenhagen',
+                'title': 'Aanslagen Kopenhagen | RTL Nieuws',
+            }
+        },
+        # Zapiks embed
+        {
+            'url': 'http://www.skipass.com/news/116090-bon-appetit-s5ep3-baqueira-mi-cor.html',
+            'info_dict': {
+                'id': '118046',
+                'ext': 'mp4',
+                'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !',
+            }
+        },
+        # Kaltura embed
+        {
+            'url': 'http://www.monumentalnetwork.com/videos/john-carlson-postgame-2-25-15',
+            'info_dict': {
+                'id': '1_eergr3h1',
+                'ext': 'mp4',
+                'upload_date': '20150226',
+                'uploader_id': 'MonumentalSports-Kaltura@perfectsensedigital.com',
+                'timestamp': int,
+                'title': 'John Carlson Postgame 2/25/15',
+            },
+        },
      ]
  
      def report_following_redirect(self, new_url):
@@ -769,6 +813,13 @@ class GenericIE(InfoExtractor):
                  'entries': entries,
              }
  
+        # Look for embedded rtl.nl player
+        matches = re.findall(
+            r'<iframe\s+(?:[a-zA-Z-]+="[^"]+"\s+)*?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+video_embed[^"]+)"',
+            webpage)
+        if matches:
+            return _playlist_from_matches(matches, ie='RtlNl')
+
          # Look for embedded (iframe) Vimeo player
          mobj = re.search(
              r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//player\.vimeo\.com/video/.+?)\1', webpage)
@@ -776,7 +827,6 @@ class GenericIE(InfoExtractor):
              player_url = unescapeHTML(mobj.group('url'))
              surl = smuggle_url(player_url, {'Referer': url})
              return self.url_result(surl)
-
          # Look for embedded (swf embed) Vimeo player
          mobj = re.search(
              r'<embed[^>]+?src="((?:https?:)?//(?:www\.)?vimeo\.com/moogaloop\.swf.+?)"', webpage)
@@ -1034,7 +1084,12 @@ class GenericIE(InfoExtractor):
  
          # Look for embedded sbs.com.au player
          mobj = re.search(
-            r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:www\.)sbs\.com\.au/ondemand/video/single/.+?)\1',
+            r'''(?x)
+            (?:
+                <meta\s+property="og:video"\s+content=|
+                <iframe[^>]+?src=
+            )
+            (["\'])(?P<url>https?://(?:www\.)?sbs\.com\.au/ondemand/video/.+?)\1''',
              webpage)
          if mobj is not None:
              return self.url_result(mobj.group('url'), 'SBS')
@@ -1064,7 +1119,21 @@ class GenericIE(InfoExtractor):
          if mobj is not None:
              return self.url_result(mobj.group('url'), 'Livestream')
  
+        # Look for Zapiks embed
+        mobj = re.search(
+            r'<iframe[^>]+src="(?P<url>https?://(?:www\.)?zapiks\.fr/index\.php\?.+?)"', webpage)
+        if mobj is not None:
+            return self.url_result(mobj.group('url'), 'Zapiks')
+
+        # Look for Kaltura embeds
+        mobj = re.search(
+            r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?'wid'\s*:\s*'_?(?P<partner_id>[^']+)',.*?'entry_id'\s*:\s*'(?P<id>[^']+)',", webpage)
+        if mobj is not None:
+            return self.url_result('kaltura:%(partner_id)s:%(id)s' % mobj.groupdict(), 'Kaltura')
+
          def check_video(vurl):
+            if YoutubeIE.suitable(vurl):
+                return True
              vpath = compat_urlparse.urlparse(vurl).path
              vext = determine_ext(vpath)
              return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml')
@@ -1082,7 +1151,8 @@ class GenericIE(InfoExtractor):
                      JWPlayerOptions|
                      jwplayer\s*\(\s*["'][^'"]+["']\s*\)\s*\.setup
                  )
-                .*?file\s*:\s*["\'](.*?)["\']''', webpage))
+                .*?
+                ['"]?file['"]?\s*:\s*["\'](.*?)["\']''', webpage))
          if not found:
              # Broaden the search a little bit
              found = filter_video(re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage))
@@ -1156,7 +1226,9 @@ class GenericIE(InfoExtractor):
              return entries[0]
          else:
              for num, e in enumerate(entries, start=1):
-                e['title'] = '%s (%d)' % (e['title'], num)
+                # 'url' results don't have a title
+                if e.get('title') is not None:
+                    e['title'] = '%s (%d)' % (e['title'], num)
              return {
                  '_type': 'playlist',
                  'entries': entries,
diff --git a/youtube_dl/extractor/goshgay.py b/youtube_dl/extractor/goshgay.py

index b116d251d5d3f30c6affc852454e7e326d14f660..1d9166455aae935f1eb51777d170e0f6259ffd4e 100644 (file)
--- a/youtube_dl/extractor/goshgay.py
+++ b/youtube_dl/extractor/goshgay.py
@@ -34,8 +34,6 @@ class GoshgayIE(InfoExtractor):
          duration = parse_duration(self._html_search_regex(
              r'<span class="duration">\s*-?\s*(.*?)</span>',
              webpage, 'duration', fatal=False))
-        family_friendly = self._html_search_meta(
-            'isFamilyFriendly', webpage, default='false')
  
          flashvars = compat_parse_qs(self._html_search_regex(
              r'<embed.+?id="flash-player-embed".+?flashvars="([^"]+)"',
@@ -49,5 +47,5 @@ class GoshgayIE(InfoExtractor):
              'title': title,
              'thumbnail': thumbnail,
              'duration': duration,
-            'age_limit': 0 if family_friendly == 'true' else 18,
+            'age_limit': self._family_friendly_search(webpage),
          }
diff --git a/youtube_dl/extractor/history.py b/youtube_dl/extractor/history.py

new file mode 100644 (file)

index 0000000..f86164a
--- /dev/null
+++ b/youtube_dl/extractor/history.py
@@ -0,0 +1,31 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import smuggle_url
+
+
+class HistoryIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?history\.com/(?:[^/]+/)+(?P<id>[^/]+?)(?:$|[?#])'
+
+    _TESTS = [{
+        'url': 'http://www.history.com/topics/valentines-day/history-of-valentines-day/videos/bet-you-didnt-know-valentines-day?m=528e394da93ae&s=undefined&f=1&free=false',
+        'md5': '6fe632d033c92aa10b8d4a9be047a7c5',
+        'info_dict': {
+            'id': 'bLx5Dv5Aka1G',
+            'ext': 'mp4',
+            'title': "Bet You Didn't Know: Valentine's Day",
+            'description': 'md5:7b57ea4829b391995b405fa60bd7b5f7',
+        },
+        'add_ie': ['ThePlatform'],
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id)
+
+        video_url = self._search_regex(
+            r'data-href="[^"]*/%s"[^>]+data-release-url="([^"]+)"' % video_id,
+            webpage, 'video url')
+
+        return self.url_result(smuggle_url(video_url, {'sig': {'key': 'crazyjava', 'secret': 's3cr3t'}}))
diff --git a/youtube_dl/extractor/ign.py b/youtube_dl/extractor/ign.py

index 3db668cd0297ea0ff3c0168c2b3f5db1491a0db4..3aade9e740673da3193324add6a8a3ac4eff8b1f 100644 (file)
--- a/youtube_dl/extractor/ign.py
+++ b/youtube_dl/extractor/ign.py
@@ -34,6 +34,9 @@ class IGNIE(InfoExtractor):
          },
          {
              'url': 'http://me.ign.com/en/feature/15775/100-little-things-in-gta-5-that-will-blow-your-mind',
+            'info_dict': {
+                'id': '100-little-things-in-gta-5-that-will-blow-your-mind',
+            },
              'playlist': [
                  {
                      'info_dict': {
diff --git a/youtube_dl/extractor/imgur.py b/youtube_dl/extractor/imgur.py

new file mode 100644 (file)

index 0000000..fe5d95e
--- /dev/null
+++ b/youtube_dl/extractor/imgur.py
@@ -0,0 +1,97 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    int_or_none,
+    js_to_json,
+    mimetype2ext,
+    ExtractorError,
+)
+
+
+class ImgurIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:i\.)?imgur\.com/(?P<id>[a-zA-Z0-9]+)(?:\.mp4|\.gifv)?'
+
+    _TESTS = [{
+        'url': 'https://i.imgur.com/A61SaA1.gifv',
+        'info_dict': {
+            'id': 'A61SaA1',
+            'ext': 'mp4',
+            'title': 're:Imgur GIF$|MRW gifv is up and running without any bugs$',
+            'description': 're:The origin of the Internet\'s most viral images$|The Internet\'s visual storytelling community\. Explore, share, and discuss the best visual stories the Internet has to offer\.$',
+        },
+    }, {
+        'url': 'https://imgur.com/A61SaA1',
+        'info_dict': {
+            'id': 'A61SaA1',
+            'ext': 'mp4',
+            'title': 're:Imgur GIF$|MRW gifv is up and running without any bugs$',
+            'description': 're:The origin of the Internet\'s most viral images$|The Internet\'s visual storytelling community\. Explore, share, and discuss the best visual stories the Internet has to offer\.$',
+        },
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+
+        width = int_or_none(self._search_regex(
+            r'<param name="width" value="([0-9]+)"',
+            webpage, 'width', fatal=False))
+        height = int_or_none(self._search_regex(
+            r'<param name="height" value="([0-9]+)"',
+            webpage, 'height', fatal=False))
+
+        video_elements = self._search_regex(
+            r'(?s)<div class="video-elements">(.*?)</div>',
+            webpage, 'video elements', default=None)
+        if not video_elements:
+            raise ExtractorError(
+                'No sources found for video %s. Maybe an image?' % video_id,
+                expected=True)
+
+        formats = []
+        for m in re.finditer(r'<source\s+src="(?P<src>[^"]+)"\s+type="(?P<type>[^"]+)"', video_elements):
+            formats.append({
+                'format_id': m.group('type').partition('/')[2],
+                'url': self._proto_relative_url(m.group('src')),
+                'ext': mimetype2ext(m.group('type')),
+                'acodec': 'none',
+                'width': width,
+                'height': height,
+                'http_headers': {
+                    'User-Agent': 'youtube-dl (like wget)',
+                },
+            })
+
+        gif_json = self._search_regex(
+            r'(?s)var\s+videoItem\s*=\s*(\{.*?\})',
+            webpage, 'GIF code', fatal=False)
+        if gif_json:
+            gifd = self._parse_json(
+                gif_json, video_id, transform_source=js_to_json)
+            formats.append({
+                'format_id': 'gif',
+                'preference': -10,
+                'width': width,
+                'height': height,
+                'ext': 'gif',
+                'acodec': 'none',
+                'vcodec': 'gif',
+                'container': 'gif',
+                'url': self._proto_relative_url(gifd['gifUrl']),
+                'filesize': gifd.get('size'),
+                'http_headers': {
+                    'User-Agent': 'youtube-dl (like wget)',
+                },
+            })
+
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'formats': formats,
+            'description': self._og_search_description(webpage),
+            'title': self._og_search_title(webpage),
+        }
diff --git a/youtube_dl/extractor/izlesene.py b/youtube_dl/extractor/izlesene.py

index d16d483eeb0d533debe041b7cd6c7b4826d41dde..99a1361f844c15520c842cd9fffa1e5c2e9b6974 100644 (file)
--- a/youtube_dl/extractor/izlesene.py
+++ b/youtube_dl/extractor/izlesene.py
@@ -80,9 +80,6 @@ class IzleseneIE(InfoExtractor):
              r'comment_count\s*=\s*\'([^\']+)\';',
              webpage, 'comment_count', fatal=False)
  
-        family_friendly = self._html_search_meta(
-            'isFamilyFriendly', webpage, 'age limit', fatal=False)
-
          content_url = self._html_search_meta(
              'contentURL', webpage, 'content URL', fatal=False)
          ext = determine_ext(content_url, 'mp4')
@@ -120,6 +117,6 @@ class IzleseneIE(InfoExtractor):
              'duration': duration,
              'view_count': int_or_none(view_count),
              'comment_count': int_or_none(comment_count),
-            'age_limit': 18 if family_friendly == 'False' else 0,
+            'age_limit': self._family_friendly_search(webpage),
              'formats': formats,
          }
diff --git a/youtube_dl/extractor/kaltura.py b/youtube_dl/extractor/kaltura.py

new file mode 100644 (file)

index 0000000..d287304
--- /dev/null
+++ b/youtube_dl/extractor/kaltura.py
@@ -0,0 +1,138 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_urllib_parse
+from ..utils import (
+    ExtractorError,
+    int_or_none,
+)
+
+
+class KalturaIE(InfoExtractor):
+    _VALID_URL = r'''(?x)
+    (?:kaltura:|
+       https?://(:?(?:www|cdnapisec)\.)?kaltura\.com/index\.php/kwidget/(?:[^/]+/)*?wid/_
+    )(?P<partner_id>\d+)
+    (?::|
+       /(?:[^/]+/)*?entry_id/
+    )(?P<id>[0-9a-z_]+)'''
+    _API_BASE = 'http://cdnapi.kaltura.com/api_v3/index.php?'
+    _TESTS = [
+        {
+            'url': 'kaltura:269692:1_1jc2y3e4',
+            'md5': '3adcbdb3dcc02d647539e53f284ba171',
+            'info_dict': {
+                'id': '1_1jc2y3e4',
+                'ext': 'mp4',
+                'title': 'Track 4',
+                'upload_date': '20131219',
+                'uploader_id': 'mlundberg@wolfgangsvault.com',
+                'description': 'The Allman Brothers Band, 12/16/1981',
+                'thumbnail': 're:^https?://.*/thumbnail/.*',
+                'timestamp': int,
+            },
+        },
+        {
+            'url': 'http://www.kaltura.com/index.php/kwidget/cache_st/1300318621/wid/_269692/uiconf_id/3873291/entry_id/1_1jc2y3e4',
+            'only_matching': True,
+        },
+        {
+            'url': 'https://cdnapisec.kaltura.com/index.php/kwidget/wid/_557781/uiconf_id/22845202/entry_id/1_plr1syf3',
+            'only_matching': True,
+        },
+    ]
+
+    def _kaltura_api_call(self, video_id, actions, *args, **kwargs):
+        params = actions[0]
+        if len(actions) > 1:
+            for i, a in enumerate(actions[1:], start=1):
+                for k, v in a.items():
+                    params['%d:%s' % (i, k)] = v
+
+        query = compat_urllib_parse.urlencode(params)
+        url = self._API_BASE + query
+        data = self._download_json(url, video_id, *args, **kwargs)
+
+        status = data if len(actions) == 1 else data[0]
+        if status.get('objectType') == 'KalturaAPIException':
+            raise ExtractorError(
+                '%s said: %s' % (self.IE_NAME, status['message']))
+
+        return data
+
+    def _get_kaltura_signature(self, video_id, partner_id):
+        actions = [{
+            'apiVersion': '3.1',
+            'expiry': 86400,
+            'format': 1,
+            'service': 'session',
+            'action': 'startWidgetSession',
+            'widgetId': '_%s' % partner_id,
+        }]
+        return self._kaltura_api_call(
+            video_id, actions, note='Downloading Kaltura signature')['ks']
+
+    def _get_video_info(self, video_id, partner_id):
+        signature = self._get_kaltura_signature(video_id, partner_id)
+        actions = [
+            {
+                'action': 'null',
+                'apiVersion': '3.1.5',
+                'clientTag': 'kdp:v3.8.5',
+                'format': 1,  # JSON, 2 = XML, 3 = PHP
+                'service': 'multirequest',
+                'ks': signature,
+            },
+            {
+                'action': 'get',
+                'entryId': video_id,
+                'service': 'baseentry',
+                'version': '-1',
+            },
+            {
+                'action': 'getContextData',
+                'contextDataParams:objectType': 'KalturaEntryContextDataParams',
+                'contextDataParams:referrer': 'http://www.kaltura.com/',
+                'contextDataParams:streamerType': 'http',
+                'entryId': video_id,
+                'service': 'baseentry',
+            },
+        ]
+        return self._kaltura_api_call(
+            video_id, actions, note='Downloading video info JSON')
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        mobj = re.match(self._VALID_URL, url)
+        partner_id, entry_id = mobj.group('partner_id'), mobj.group('id')
+
+        info, source_data = self._get_video_info(entry_id, partner_id)
+
+        formats = [{
+            'format_id': '%(fileExt)s-%(bitrate)s' % f,
+            'ext': f['fileExt'],
+            'tbr': f['bitrate'],
+            'fps': f.get('frameRate'),
+            'filesize_approx': int_or_none(f.get('size'), invscale=1024),
+            'container': f.get('containerFormat'),
+            'vcodec': f.get('videoCodecId'),
+            'height': f.get('height'),
+            'width': f.get('width'),
+            'url': '%s/flavorId/%s' % (info['dataUrl'], f['id']),
+        } for f in source_data['flavorAssets']]
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': info['name'],
+            'formats': formats,
+            'description': info.get('description'),
+            'thumbnail': info.get('thumbnailUrl'),
+            'duration': info.get('duration'),
+            'timestamp': info.get('createdAt'),
+            'uploader_id': info.get('userId'),
+            'view_count': info.get('plays'),
+        }
diff --git a/youtube_dl/extractor/laola1tv.py b/youtube_dl/extractor/laola1tv.py

index 2fd3b4699d288809974ead7519e2bc5ddf9bcc68..b459559b0349bcae6c8d658c2fb120c2cb81d37e 100644 (file)
--- a/youtube_dl/extractor/laola1tv.py
+++ b/youtube_dl/extractor/laola1tv.py
@@ -1,31 +1,32 @@
+# -*- coding: utf-8 -*-
  from __future__ import unicode_literals
  
  import random
  import re
  
  from .common import InfoExtractor
-from ..utils import ExtractorError
+from ..utils import (
+    ExtractorError,
+    xpath_text,
+)
  
  
  class Laola1TvIE(InfoExtractor):
      _VALID_URL = r'https?://(?:www\.)?laola1\.tv/(?P<lang>[a-z]+)-(?P<portal>[a-z]+)/.*?/(?P<id>[0-9]+)\.html'
      _TEST = {
-        'url': 'http://www.laola1.tv/de-de/live/bwf-bitburger-open-grand-prix-gold-court-1/250019.html',
+        'url': 'http://www.laola1.tv/de-de/video/straubing-tigers-koelner-haie/227883.html',
          'info_dict': {
-            'id': '250019',
+            'id': '227883',
              'ext': 'mp4',
-            'title': 'Bitburger Open Grand Prix Gold - Court 1',
-            'categories': ['Badminton'],
-            'uploader': 'BWF - Badminton World Federation',
-            'is_live': True,
+            'title': 'Straubing Tigers - Kölner Haie',
+            'categories': ['Eishockey'],
+            'is_live': False,
          },
          'params': {
              'skip_download': True,
          }
      }
  
-    _BROKEN = True  # Not really - extractor works fine, but f4m downloader does not support live streams yet.
-
      def _real_extract(self, url):
          mobj = re.match(self._VALID_URL, url)
          video_id = mobj.group('id')
@@ -43,15 +44,22 @@ class Laola1TvIE(InfoExtractor):
              r'flashvars\.([_a-zA-Z0-9]+)\s*=\s*"([^"]*)";', iframe)
          flashvars = dict((m[0], m[1]) for m in flashvars_m)
  
+        partner_id = self._search_regex(
+            r'partnerid\s*:\s*"([^"]+)"', iframe, 'partner id')
+
          xml_url = ('http://www.laola1.tv/server/hd_video.php?' +
-                   'play=%s&partner=1&portal=%s&v5ident=&lang=%s' % (
-                       video_id, portal, lang))
+                   'play=%s&partner=%s&portal=%s&v5ident=&lang=%s' % (
+                       video_id, partner_id, portal, lang))
          hd_doc = self._download_xml(xml_url, video_id)
  
-        title = hd_doc.find('.//video/title').text
-        flash_url = hd_doc.find('.//video/url').text
-        categories = hd_doc.find('.//video/meta_sports').text.split(',')
-        uploader = hd_doc.find('.//video/meta_organistation').text
+        title = xpath_text(hd_doc, './/video/title', fatal=True)
+        flash_url = xpath_text(hd_doc, './/video/url', fatal=True)
+        uploader = xpath_text(hd_doc, './/video/meta_organistation')
+        is_live = xpath_text(hd_doc, './/video/islive') == 'true'
+
+        categories = xpath_text(hd_doc, './/video/meta_sports')
+        if categories:
+            categories = categories.split(',')
  
          ident = random.randint(10000000, 99999999)
          token_url = '%s&ident=%s&klub=0&unikey=0&timestamp=%s&auth=%s' % (
@@ -60,15 +68,16 @@ class Laola1TvIE(InfoExtractor):
          token_doc = self._download_xml(
              token_url, video_id, note='Downloading token')
          token_attrib = token_doc.find('.//token').attrib
-        if token_attrib.get('auth') == 'blocked':
-            raise ExtractorError('Token error: ' % token_attrib.get('comment'))
+        if token_attrib.get('auth') in ('blocked', 'restricted'):
+            raise ExtractorError(
+                'Token error: %s' % token_attrib.get('comment'), expected=True)
  
          video_url = '%s?hdnea=%s&hdcore=3.2.0' % (
              token_attrib['url'], token_attrib['auth'])
  
          return {
              'id': video_id,
-            'is_live': True,
+            'is_live': is_live,
              'title': title,
              'url': video_url,
              'uploader': uploader,
diff --git a/youtube_dl/extractor/letv.py b/youtube_dl/extractor/letv.py

new file mode 100644 (file)

index 0000000..583ce35
--- /dev/null
+++ b/youtube_dl/extractor/letv.py
@@ -0,0 +1,190 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import datetime
+import re
+import time
+
+from .common import InfoExtractor
+from ..compat import (
+    compat_urlparse,
+    compat_urllib_parse,
+)
+from ..utils import (
+    determine_ext,
+    ExtractorError,
+    parse_iso8601,
+)
+
+
+class LetvIE(InfoExtractor):
+    _VALID_URL = r'http://www\.letv\.com/ptv/vplay/(?P<id>\d+).html'
+
+    _TESTS = [{
+        'url': 'http://www.letv.com/ptv/vplay/22005890.html',
+        'md5': 'cab23bd68d5a8db9be31c9a222c1e8df',
+        'info_dict': {
+            'id': '22005890',
+            'ext': 'mp4',
+            'title': '第87届奥斯卡颁奖礼完美落幕 《鸟人》成最大赢家',
+            'timestamp': 1424747397,
+            'upload_date': '20150224',
+            'description': 'md5:a9cb175fd753e2962176b7beca21a47c',
+        }
+    }, {
+        'url': 'http://www.letv.com/ptv/vplay/1415246.html',
+        'info_dict': {
+            'id': '1415246',
+            'ext': 'mp4',
+            'title': '美人天下01',
+            'description': 'md5:f88573d9d7225ada1359eaf0dbf8bcda',
+        },
+        'expected_warnings': [
+            'publish time'
+        ]
+    }]
+    # http://www.letv.com/ptv/vplay/1118082.html
+    # This video is available only in Mainland China
+
+    @staticmethod
+    def urshift(val, n):
+        return val >> n if val >= 0 else (val + 0x100000000) >> n
+
+    # ror() and calc_time_key() are reversed from a embedded swf file in KLetvPlayer.swf
+    def ror(self, param1, param2):
+        _loc3_ = 0
+        while _loc3_ < param2:
+            param1 = self.urshift(param1, 1) + ((param1 & 1) << 31)
+            _loc3_ += 1
+        return param1
+
+    def calc_time_key(self, param1):
+        _loc2_ = 773625421
+        _loc3_ = self.ror(param1, _loc2_ % 13)
+        _loc3_ = _loc3_ ^ _loc2_
+        _loc3_ = self.ror(_loc3_, _loc2_ % 17)
+        return _loc3_
+
+    def _real_extract(self, url):
+        media_id = self._match_id(url)
+        page = self._download_webpage(url, media_id)
+        params = {
+            'id': media_id,
+            'platid': 1,
+            'splatid': 101,
+            'format': 1,
+            'tkey': self.calc_time_key(int(time.time())),
+            'domain': 'www.letv.com'
+        }
+        play_json = self._download_json(
+            'http://api.letv.com/mms/out/video/playJson?' + compat_urllib_parse.urlencode(params),
+            media_id, 'playJson data')
+
+        # Check for errors
+        playstatus = play_json['playstatus']
+        if playstatus['status'] == 0:
+            flag = playstatus['flag']
+            if flag == 1:
+                msg = 'Country %s auth error' % playstatus['country']
+            else:
+                msg = 'Generic error. flag = %d' % flag
+            raise ExtractorError(msg, expected=True)
+
+        playurl = play_json['playurl']
+
+        formats = ['350', '1000', '1300', '720p', '1080p']
+        dispatch = playurl['dispatch']
+
+        urls = []
+        for format_id in formats:
+            if format_id in dispatch:
+                media_url = playurl['domain'][0] + dispatch[format_id][0]
+
+                # Mimic what flvxz.com do
+                url_parts = list(compat_urlparse.urlparse(media_url))
+                qs = dict(compat_urlparse.parse_qs(url_parts[4]))
+                qs.update({
+                    'platid': '14',
+                    'splatid': '1401',
+                    'tss': 'no',
+                    'retry': 1
+                })
+                url_parts[4] = compat_urllib_parse.urlencode(qs)
+                media_url = compat_urlparse.urlunparse(url_parts)
+
+                url_info_dict = {
+                    'url': media_url,
+                    'ext': determine_ext(dispatch[format_id][1])
+                }
+
+                if format_id[-1:] == 'p':
+                    url_info_dict['height'] = format_id[:-1]
+
+                urls.append(url_info_dict)
+
+        publish_time = parse_iso8601(self._html_search_regex(
+            r'发布时间&nbsp;([^<>]+) ', page, 'publish time', fatal=False),
+            delimiter=' ', timezone=datetime.timedelta(hours=8))
+        description = self._html_search_meta('description', page, fatal=False)
+
+        return {
+            'id': media_id,
+            'formats': urls,
+            'title': playurl['title'],
+            'thumbnail': playurl['pic'],
+            'description': description,
+            'timestamp': publish_time,
+        }
+
+
+class LetvTvIE(InfoExtractor):
+    _VALID_URL = r'http://www.letv.com/tv/(?P<id>\d+).html'
+    _TESTS = [{
+        'url': 'http://www.letv.com/tv/46177.html',
+        'info_dict': {
+            'id': '46177',
+            'title': '美人天下',
+            'description': 'md5:395666ff41b44080396e59570dbac01c'
+        },
+        'playlist_count': 35
+    }]
+
+    def _real_extract(self, url):
+        playlist_id = self._match_id(url)
+        page = self._download_webpage(url, playlist_id)
+
+        media_urls = list(set(re.findall(
+            r'http://www.letv.com/ptv/vplay/\d+.html', page)))
+        entries = [self.url_result(media_url, ie='Letv')
+                   for media_url in media_urls]
+
+        title = self._html_search_meta('keywords', page,
+                                       fatal=False).split('，')[0]
+        description = self._html_search_meta('description', page, fatal=False)
+
+        return self.playlist_result(entries, playlist_id, playlist_title=title,
+                                    playlist_description=description)
+
+
+class LetvPlaylistIE(LetvTvIE):
+    _VALID_URL = r'http://tv.letv.com/[a-z]+/(?P<id>[a-z]+)/index.s?html'
+    _TESTS = [{
+        'url': 'http://tv.letv.com/izt/wuzetian/index.html',
+        'info_dict': {
+            'id': 'wuzetian',
+            'title': '武媚娘传奇',
+            'description': 'md5:e12499475ab3d50219e5bba00b3cb248'
+        },
+        # This playlist contains some extra videos other than the drama itself
+        'playlist_mincount': 96
+    }, {
+        'url': 'http://tv.letv.com/pzt/lswjzzjc/index.shtml',
+        'info_dict': {
+            'id': 'lswjzzjc',
+            # The title should be "劲舞青春", but I can't find a simple way to
+            # determine the playlist title
+            'title': '乐视午间自制剧场',
+            'description': 'md5:b1eef244f45589a7b5b1af9ff25a4489'
+        },
+        'playlist_mincount': 7
+    }]
diff --git a/youtube_dl/extractor/livestream.py b/youtube_dl/extractor/livestream.py

index 5247c6f58500e301dab50ed48039df0c070b493a..3642089f7802238d77ec5c18e4f96b5cb21e3d72 100644 (file)
--- a/youtube_dl/extractor/livestream.py
+++ b/youtube_dl/extractor/livestream.py
@@ -37,6 +37,7 @@ class LivestreamIE(InfoExtractor):
          'url': 'http://new.livestream.com/tedx/cityenglish',
          'info_dict': {
              'title': 'TEDCity2.0 (English)',
+            'id': '2245590',
          },
          'playlist_mincount': 4,
      }, {
@@ -148,7 +149,8 @@ class LivestreamIE(InfoExtractor):
                    if is_relevant(video_data, video_id)]
          if video_id is None:
              # This is an event page:
-            return self.playlist_result(videos, info['id'], info['full_name'])
+            return self.playlist_result(
+                videos, '%s' % info['id'], info['full_name'])
          else:
              if not videos:
                  raise ExtractorError('Cannot find video %s' % video_id)
diff --git a/youtube_dl/extractor/lynda.py b/youtube_dl/extractor/lynda.py

index 762cefa34ec35aa172102a5bbe9f78c129bdef92..5dc22da22a6a5324887379fdff339f09f13d0309 100644 (file)
--- a/youtube_dl/extractor/lynda.py
+++ b/youtube_dl/extractor/lynda.py
@@ -3,7 +3,6 @@ from __future__ import unicode_literals
  import re
  import json
  
-from .subtitles import SubtitlesInfoExtractor
  from .common import InfoExtractor
  from ..compat import (
      compat_str,
@@ -16,10 +15,10 @@ from ..utils import (
  )
  
  
-class LyndaIE(SubtitlesInfoExtractor):
+class LyndaIE(InfoExtractor):
      IE_NAME = 'lynda'
      IE_DESC = 'lynda.com videos'
-    _VALID_URL = r'https?://www\.lynda\.com/[^/]+/[^/]+/\d+/(\d+)-\d\.html'
+    _VALID_URL = r'https?://www\.lynda\.com/(?:[^/]+/[^/]+/\d+|player/embed)/(\d+)'
      _LOGIN_URL = 'https://www.lynda.com/login/login.aspx'
      _NETRC_MACHINE = 'lynda'
  
@@ -28,7 +27,7 @@ class LyndaIE(SubtitlesInfoExtractor):
  
      ACCOUNT_CREDENTIALS_HINT = 'Use --username and --password options to provide lynda.com account credentials.'
  
-    _TEST = {
+    _TESTS = [{
          'url': 'http://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html',
          'md5': 'ecfc6862da89489161fb9cd5f5a6fac1',
          'info_dict': {
@@ -37,7 +36,10 @@ class LyndaIE(SubtitlesInfoExtractor):
              'title': 'Using the exercise files',
              'duration': 68
          }
-    }
+    }, {
+        'url': 'https://www.lynda.com/player/embed/133770?tr=foo=1;bar=g;fizz=rt&fs=0',
+        'only_matching': True,
+    }]
  
      def _real_initialize(self):
          self._login()
@@ -88,11 +90,7 @@ class LyndaIE(SubtitlesInfoExtractor):
          self._check_formats(formats, video_id)
          self._sort_formats(formats)
  
-        if self._downloader.params.get('listsubtitles', False):
-            self._list_available_subtitles(video_id, page)
-            return
-
-        subtitles = self._fix_subtitles(self.extract_subtitles(video_id, page))
+        subtitles = self.extract_subtitles(video_id, page)
  
          return {
              'id': video_id,
@@ -144,38 +142,31 @@ class LyndaIE(SubtitlesInfoExtractor):
          if re.search(self._SUCCESSFUL_LOGIN_REGEX, login_page) is None:
              raise ExtractorError('Unable to log in')
  
-    def _fix_subtitles(self, subtitles):
-        if subtitles is None:
-            return subtitles  # subtitles not requested
-
-        fixed_subtitles = {}
-        for k, v in subtitles.items():
-            subs = json.loads(v)
-            if len(subs) == 0:
+    def _fix_subtitles(self, subs):
+        srt = ''
+        for pos in range(0, len(subs) - 1):
+            seq_current = subs[pos]
+            m_current = re.match(self._TIMECODE_REGEX, seq_current['Timecode'])
+            if m_current is None:
                  continue
-            srt = ''
-            for pos in range(0, len(subs) - 1):
-                seq_current = subs[pos]
-                m_current = re.match(self._TIMECODE_REGEX, seq_current['Timecode'])
-                if m_current is None:
-                    continue
-                seq_next = subs[pos + 1]
-                m_next = re.match(self._TIMECODE_REGEX, seq_next['Timecode'])
-                if m_next is None:
-                    continue
-                appear_time = m_current.group('timecode')
-                disappear_time = m_next.group('timecode')
-                text = seq_current['Caption']
-                srt += '%s\r\n%s --> %s\r\n%s' % (str(pos), appear_time, disappear_time, text)
-            if srt:
-                fixed_subtitles[k] = srt
-        return fixed_subtitles
-
-    def _get_available_subtitles(self, video_id, webpage):
+            seq_next = subs[pos + 1]
+            m_next = re.match(self._TIMECODE_REGEX, seq_next['Timecode'])
+            if m_next is None:
+                continue
+            appear_time = m_current.group('timecode')
+            disappear_time = m_next.group('timecode')
+            text = seq_current['Caption'].lstrip()
+            srt += '%s\r\n%s --> %s\r\n%s' % (str(pos), appear_time, disappear_time, text)
+        if srt:
+            return srt
+
+    def _get_subtitles(self, video_id, webpage):
          url = 'http://www.lynda.com/ajax/player?videoId=%s&type=transcript' % video_id
-        sub = self._download_webpage(url, None, False)
-        sub_json = json.loads(sub)
-        return {'en': url} if len(sub_json) > 0 else {}
+        subs = self._download_json(url, None, False)
+        if subs:
+            return {'en': [{'ext': 'srt', 'data': self._fix_subtitles(subs)}]}
+        else:
+            return {}
  
  
  class LyndaCourseIE(InfoExtractor):
diff --git a/youtube_dl/extractor/mit.py b/youtube_dl/extractor/mit.py

index 3c61a850f296c32861cdfd35095746c2cf1ef4ad..d7ab6a9aef23235d099175c7aff76ddd0ac0f84d 100644 (file)
--- a/youtube_dl/extractor/mit.py
+++ b/youtube_dl/extractor/mit.py
@@ -5,9 +5,6 @@ import json
  
  from .common import InfoExtractor
  from .youtube import YoutubeIE
-from ..compat import (
-    compat_urlparse,
-)
  from ..utils import (
      clean_html,
      ExtractorError,
@@ -108,7 +105,6 @@ class OCWMITIE(InfoExtractor):
                  'upload_date': '20121109',
                  'uploader_id': 'MIT',
                  'uploader': 'MIT OpenCourseWare',
-                # 'subtitles': 'http://ocw.mit.edu/courses/electrical-engineering-and-computer-science/6-041-probabilistic-systems-analysis-and-applied-probability-fall-2010/video-lectures/lecture-7-multiple-variables-expectations-independence/MIT6_041F11_lec07_300k.mp4.srt'
              }
          },
          {
@@ -121,7 +117,6 @@ class OCWMITIE(InfoExtractor):
                  'uploader_id': 'MIT',
                  'uploader': 'MIT OpenCourseWare',
                  'description': 'This section contains lecture video excerpts, lecture notes, an interactive mathlet with supporting documents, and problem solving videos.',
-                # 'subtitles': 'http://ocw.mit.edu//courses/mathematics/18-01sc-single-variable-calculus-fall-2010/ocw-18.01-f07-lec01_300k.SRT'
              }
          }
      ]
@@ -140,7 +135,6 @@ class OCWMITIE(InfoExtractor):
              metadata = re.sub(r'[\'"]', '', embed_chapter_media.group(1))
              metadata = re.split(r', ?', metadata)
              yt = metadata[1]
-            subs = compat_urlparse.urljoin(self._BASE_URL, metadata[7])
          else:
              # search for call to ocw_embed_chapter_media(container_id, media_url, provider, page_url, image_url, captions_file)
              embed_media = re.search(r'ocw_embed_media\((.+?)\)', webpage)
@@ -148,7 +142,6 @@ class OCWMITIE(InfoExtractor):
                  metadata = re.sub(r'[\'"]', '', embed_media.group(1))
                  metadata = re.split(r', ?', metadata)
                  yt = metadata[1]
-                subs = compat_urlparse.urljoin(self._BASE_URL, metadata[5])
              else:
                  raise ExtractorError('Unable to find embedded YouTube video.')
          video_id = YoutubeIE.extract_id(yt)
@@ -159,7 +152,5 @@ class OCWMITIE(InfoExtractor):
              'title': title,
              'description': description,
              'url': yt,
-            'url_transparent'
-            'subtitles': subs,
              'ie_key': 'Youtube',
          }
diff --git a/youtube_dl/extractor/mitele.py b/youtube_dl/extractor/mitele.py

index 2567583235617e52b6420419863dbc8d319c8201..d8897eb90d526b7b7d2e5a5ace5bec84ebb40031 100644 (file)
--- a/youtube_dl/extractor/mitele.py
+++ b/youtube_dl/extractor/mitele.py
@@ -18,7 +18,7 @@ class MiTeleIE(InfoExtractor):
      IE_NAME = 'mitele.es'
      _VALID_URL = r'http://www\.mitele\.es/[^/]+/[^/]+/[^/]+/(?P<id>[^/]+)/'
  
-    _TEST = {
+    _TESTS = [{
          'url': 'http://www.mitele.es/programas-tv/diario-de/la-redaccion/programa-144/',
          'md5': '6a75fe9d0d3275bead0cb683c616fddb',
          'info_dict': {
@@ -29,7 +29,7 @@ class MiTeleIE(InfoExtractor):
              'display_id': 'programa-144',
              'duration': 2913,
          },
-    }
+    }]
  
      def _real_extract(self, url):
          episode = self._match_id(url)
diff --git a/youtube_dl/extractor/mpora.py b/youtube_dl/extractor/mpora.py

index 6db3c67a5a471d9cd850ad3bd828a9e2478c00e3..5a1bee5c85ea3e8c6105a58e5d34ab233a1ddbd2 100644 (file)
--- a/youtube_dl/extractor/mpora.py
+++ b/youtube_dl/extractor/mpora.py
@@ -5,7 +5,7 @@ from ..utils import int_or_none
  
  
  class MporaIE(InfoExtractor):
-    _VALID_URL = r'https?://(www\.)?mpora\.(?:com|de)/videos/(?P<id>[^?#/]+)'
+    _VALID_URL = r'https?://(?:www\.)?mpora\.(?:com|de)/videos/(?P<id>[^?#/]+)'
      IE_NAME = 'MPORA'
  
      _TEST = {
@@ -25,7 +25,9 @@ class MporaIE(InfoExtractor):
          webpage = self._download_webpage(url, video_id)
  
          data_json = self._search_regex(
-            r"new FM\.Player\('[^']+',\s*(\{.*?)\).player;", webpage, 'json')
+            [r"new FM\.Player\('[^']+',\s*(\{.*?)\).player;",
+             r"new\s+FM\.Kaltura\.Player\('[^']+'\s*,\s*({.+?})\);"],
+            webpage, 'json')
          data = self._parse_json(data_json, video_id)
  
          uploader = data['info_overlay'].get('username')
diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py

index bc7f49ebbac86cda7aa1bb711076b783e24bfea8..c11de1cb61b28d03ab2430ff1db3a82d317dc718 100644 (file)
--- a/youtube_dl/extractor/mtv.py
+++ b/youtube_dl/extractor/mtv.py
@@ -2,7 +2,7 @@ from __future__ import unicode_literals
  
  import re
  
-from .subtitles import SubtitlesInfoExtractor
+from .common import InfoExtractor
  from ..compat import (
      compat_urllib_parse,
      compat_urllib_request,
@@ -23,7 +23,7 @@ def _media_xml_tag(tag):
      return '{http://search.yahoo.com/mrss/}%s' % tag
  
  
-class MTVServicesInfoExtractor(SubtitlesInfoExtractor):
+class MTVServicesInfoExtractor(InfoExtractor):
      _MOBILE_TEMPLATE = None
  
      @staticmethod
@@ -95,25 +95,15 @@ class MTVServicesInfoExtractor(SubtitlesInfoExtractor):
  
      def _extract_subtitles(self, mdoc, mtvn_id):
          subtitles = {}
-        FORMATS = {
-            'scc': 'cea-608',
-            'eia-608': 'cea-608',
-            'xml': 'ttml',
-        }
-        subtitles_format = FORMATS.get(
-            self._downloader.params.get('subtitlesformat'), 'ttml')
          for transcript in mdoc.findall('.//transcript'):
              if transcript.get('kind') != 'captions':
                  continue
              lang = transcript.get('srclang')
-            for typographic in transcript.findall('./typographic'):
-                captions_format = typographic.get('format')
-                if captions_format == subtitles_format:
-                    subtitles[lang] = compat_str(typographic.get('src'))
-                    break
-        if self._downloader.params.get('listsubtitles', False):
-            self._list_available_subtitles(mtvn_id, subtitles)
-        return self.extract_subtitles(mtvn_id, subtitles)
+            subtitles[lang] = [{
+                'url': compat_str(typographic.get('src')),
+                'ext': typographic.get('format')
+            } for typographic in transcript.findall('./typographic')]
+        return subtitles
  
      def _get_video_info(self, itemdoc):
          uri = itemdoc.find('guid').text
@@ -196,8 +186,6 @@ class MTVServicesInfoExtractor(SubtitlesInfoExtractor):
                  webpage, 'mgid')
  
          videos_info = self._get_videos_info(mgid)
-        if self._downloader.params.get('listsubtitles', False):
-            return
          return videos_info
  
  
diff --git a/youtube_dl/extractor/musicvault.py b/youtube_dl/extractor/musicvault.py

index ebb1eb8e95c684ef20165f37f34d1b05f8b5a7f0..0e46ac7c1f7f20bc187ada32fd25a0095f72779b 100644 (file)
--- a/youtube_dl/extractor/musicvault.py
+++ b/youtube_dl/extractor/musicvault.py
@@ -3,17 +3,13 @@ from __future__ import unicode_literals
  import re
  
  from .common import InfoExtractor
-from ..utils import (
-    parse_duration,
-    unified_strdate,
-)
  
  
  class MusicVaultIE(InfoExtractor):
      _VALID_URL = r'https?://www\.musicvault\.com/(?P<uploader_id>[^/?#]*)/video/(?P<display_id>[^/?#]*)_(?P<id>[0-9]+)\.html'
      _TEST = {
          'url': 'http://www.musicvault.com/the-allman-brothers-band/video/straight-from-the-heart_1010863.html',
-        'md5': '2cdbb3ae75f7fb3519821507d2fb3c15',
+        'md5': '3adcbdb3dcc02d647539e53f284ba171',
          'info_dict': {
              'id': '1010863',
              'ext': 'mp4',
@@ -22,9 +18,10 @@ class MusicVaultIE(InfoExtractor):
              'duration': 244,
              'uploader': 'The Allman Brothers Band',
              'thumbnail': 're:^https?://.*/thumbnail/.*',
-            'upload_date': '19811216',
+            'upload_date': '20131219',
              'location': 'Capitol Theatre (Passaic, NJ)',
              'description': 'Listen to The Allman Brothers Band perform Straight from the Heart at Capitol Theatre (Passaic, NJ) on Dec 16, 1981',
+            'timestamp': int,
          }
      }
  
@@ -43,34 +40,24 @@ class MusicVaultIE(InfoExtractor):
              r'<h1.*?>(.*?)</h1>', data_div, 'uploader', fatal=False)
          title = self._html_search_regex(
              r'<h2.*?>(.*?)</h2>', data_div, 'title')
-        upload_date = unified_strdate(self._html_search_regex(
-            r'<h3.*?>(.*?)</h3>', data_div, 'uploader', fatal=False))
          location = self._html_search_regex(
              r'<h4.*?>(.*?)</h4>', data_div, 'location', fatal=False)
  
-        duration = parse_duration(self._html_search_meta('duration', webpage))
-
-        VIDEO_URL_TEMPLATE = 'http://cdnapi.kaltura.com/p/%(uid)s/sp/%(wid)s/playManifest/entryId/%(entry_id)s/format/url/protocol/http'
          kaltura_id = self._search_regex(
              r'<div id="video-detail-player" data-kaltura-id="([^"]+)"',
              webpage, 'kaltura ID')
-        video_url = VIDEO_URL_TEMPLATE % {
-            'entry_id': kaltura_id,
-            'wid': self._search_regex(r'/wid/_([0-9]+)/', webpage, 'wid'),
-            'uid': self._search_regex(r'uiconf_id/([0-9]+)/', webpage, 'uid'),
-        }
+        wid = self._search_regex(r'/wid/_([0-9]+)/', webpage, 'wid')
  
          return {
              'id': mobj.group('id'),
-            'url': video_url,
-            'ext': 'mp4',
+            '_type': 'url_transparent',
+            'url': 'kaltura:%s:%s' % (wid, kaltura_id),
+            'ie_key': 'Kaltura',
              'display_id': display_id,
              'uploader_id': mobj.group('uploader_id'),
              'thumbnail': thumbnail,
              'description': self._html_search_meta('description', webpage),
-            'upload_date': upload_date,
              'location': location,
              'title': title,
              'uploader': uploader,
-            'duration': duration,
          }
diff --git a/youtube_dl/extractor/nationalgeographic.py b/youtube_dl/extractor/nationalgeographic.py

new file mode 100644 (file)

index 0000000..c18640c
--- /dev/null
+++ b/youtube_dl/extractor/nationalgeographic.py
@@ -0,0 +1,38 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+    smuggle_url,
+    url_basename,
+)
+
+
+class NationalGeographicIE(InfoExtractor):
+    _VALID_URL = r'http://video\.nationalgeographic\.com/video/.*?'
+
+    _TEST = {
+        'url': 'http://video.nationalgeographic.com/video/news/150210-news-crab-mating-vin?source=featuredvideo',
+        'info_dict': {
+            'id': '4DmDACA6Qtk_',
+            'ext': 'flv',
+            'title': 'Mating Crabs Busted by Sharks',
+            'description': 'md5:16f25aeffdeba55aaa8ec37e093ad8b3',
+        },
+        'add_ie': ['ThePlatform'],
+    }
+
+    def _real_extract(self, url):
+        name = url_basename(url)
+
+        webpage = self._download_webpage(url, name)
+        feed_url = self._search_regex(r'data-feed-url="([^"]+)"', webpage, 'feed url')
+        guid = self._search_regex(r'data-video-guid="([^"]+)"', webpage, 'guid')
+
+        feed = self._download_xml('%s?byGuid=%s' % (feed_url, guid), name)
+        content = feed.find('.//{http://search.yahoo.com/mrss/}content')
+        theplatform_id = url_basename(content.attrib.get('url'))
+
+        return self.url_result(smuggle_url(
+            'http://link.theplatform.com/s/ngs/%s?format=SMIL&formats=MPEG4&manifest=f4m' % theplatform_id,
+            # For some reason, the normal links don't work and we must force the use of f4m
+            {'force_smil_url': True}))
diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py

index f840f65321997078859ab5f74682969ec4499359..3645d3033f74ae174e3eaa85ad55bbe677d9daba 100644 (file)
--- a/youtube_dl/extractor/nbc.py
+++ b/youtube_dl/extractor/nbc.py
@@ -1,7 +1,6 @@
  from __future__ import unicode_literals
  
  import re
-import json
  
  from .common import InfoExtractor
  from ..compat import (
@@ -19,13 +18,13 @@ class NBCIE(InfoExtractor):
  
      _TESTS = [
          {
-            'url': 'http://www.nbc.com/chicago-fire/video/i-am-a-firefighter/2734188',
+            'url': 'http://www.nbc.com/the-tonight-show/segments/112966',
              # md5 checksum is not stable
              'info_dict': {
-                'id': 'bTmnLCvIbaaH',
+                'id': 'c9xnCo0YPOPH',
                  'ext': 'flv',
-                'title': 'I Am a Firefighter',
-                'description': 'An emergency puts Dawson\'sf irefighter skills to the ultimate test in this four-part digital series.',
+                'title': 'Jimmy Fallon Surprises Fans at Ben & Jerry\'s',
+                'description': 'Jimmy gives out free scoops of his new "Tonight Dough" ice cream flavor by surprising customers at the Ben & Jerry\'s scoop shop.',
              },
          },
          {
@@ -52,9 +51,9 @@ class NBCIE(InfoExtractor):
  
  
  class NBCNewsIE(InfoExtractor):
-    _VALID_URL = r'''(?x)https?://www\.nbcnews\.com/
-        ((video/.+?/(?P<id>\d+))|
-        (feature/[^/]+/(?P<title>.+)))
+    _VALID_URL = r'''(?x)https?://(?:www\.)?nbcnews\.com/
+        (?:video/.+?/(?P<id>\d+)|
+        (?:feature|nightly-news)/[^/]+/(?P<title>.+))
          '''
  
      _TESTS = [
@@ -89,6 +88,16 @@ class NBCNewsIE(InfoExtractor):
                  'description': 'md5:757988edbaae9d7be1d585eb5d55cc04',
              },
          },
+        {
+            'url': 'http://www.nbcnews.com/nightly-news/video/nightly-news-with-brian-williams-full-broadcast-february-4-394064451844',
+            'md5': 'b5dda8cddd8650baa0dcb616dd2cf60d',
+            'info_dict': {
+                'id': 'sekXqyTVnmN3',
+                'ext': 'mp4',
+                'title': 'Nightly News with Brian Williams Full Broadcast (February 4)',
+                'description': 'md5:1c10c1eccbe84a26e5debb4381e2d3c5',
+            },
+        },
      ]
  
      def _real_extract(self, url):
@@ -107,13 +116,13 @@ class NBCNewsIE(InfoExtractor):
                  'thumbnail': find_xpath_attr(info, 'media', 'type', 'thumbnail').text,
              }
          else:
-            # "feature" pages use theplatform.com
+            # "feature" and "nightly-news" pages use theplatform.com
              title = mobj.group('title')
              webpage = self._download_webpage(url, title)
              bootstrap_json = self._search_regex(
-                r'var bootstrapJson = ({.+})\s*$', webpage, 'bootstrap json',
-                flags=re.MULTILINE)
-            bootstrap = json.loads(bootstrap_json)
+                r'var\s+(?:bootstrapJson|playlistData)\s*=\s*({.+});?\s*$',
+                webpage, 'bootstrap json', flags=re.MULTILINE)
+            bootstrap = self._parse_json(bootstrap_json, video_id)
              info = bootstrap['results'][0]['video']
              mpxid = info['mpxId']
  
diff --git a/youtube_dl/extractor/netzkino.py b/youtube_dl/extractor/netzkino.py

index 93567d1e38bc7da5ea2e621cf1f3adb848ef3461..bc17e20aa9d736eb9e4ba0a39929f20db47d8465 100644 (file)
--- a/youtube_dl/extractor/netzkino.py
+++ b/youtube_dl/extractor/netzkino.py
@@ -29,6 +29,9 @@ class NetzkinoIE(InfoExtractor):
              'timestamp': 1344858571,
              'age_limit': 12,
          },
+        'params': {
+            'skip_download': 'Download only works from Germany',
+        }
      }
  
      def _real_extract(self, url):
diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py

index 54be06a4edc375f736cc038961eac67e73eecf85..9c01eb0af8067948878581a0a30d9be326f990e9 100644 (file)
--- a/youtube_dl/extractor/npo.py
+++ b/youtube_dl/extractor/npo.py
@@ -1,6 +1,6 @@
  from __future__ import unicode_literals
  
-from .subtitles import SubtitlesInfoExtractor
+from .common import InfoExtractor
  from ..utils import (
      fix_xml_ampersands,
      parse_duration,
@@ -11,7 +11,7 @@ from ..utils import (
  )
  
  
-class NPOBaseIE(SubtitlesInfoExtractor):
+class NPOBaseIE(InfoExtractor):
      def _get_token(self, video_id):
          token_page = self._download_webpage(
              'http://ida.omroep.nl/npoplayer/i.js',
@@ -22,7 +22,7 @@ class NPOBaseIE(SubtitlesInfoExtractor):
  
  class NPOIE(NPOBaseIE):
      IE_NAME = 'npo.nl'
-    _VALID_URL = r'https?://www\.npo\.nl/[^/]+/[^/]+/(?P<id>[^/?]+)'
+    _VALID_URL = r'https?://(?:www\.)?npo\.nl/(?!live|radio)[^/]+/[^/]+/(?P<id>[^/?]+)'
  
      _TESTS = [
          {
@@ -163,13 +163,10 @@ class NPOIE(NPOBaseIE):
  
          subtitles = {}
          if metadata.get('tt888') == 'ja':
-            subtitles['nl'] = 'http://e.omroep.nl/tt888/%s' % video_id
-
-        if self._downloader.params.get('listsubtitles', False):
-            self._list_available_subtitles(video_id, subtitles)
-            return
-
-        subtitles = self.extract_subtitles(video_id, subtitles)
+            subtitles['nl'] = [{
+                'ext': 'vtt',
+                'url': 'http://e.omroep.nl/tt888/%s' % video_id,
+            }]
  
          return {
              'id': video_id,
@@ -185,7 +182,7 @@ class NPOIE(NPOBaseIE):
  
  class NPOLiveIE(NPOBaseIE):
      IE_NAME = 'npo.nl:live'
-    _VALID_URL = r'https?://www\.npo\.nl/live/(?P<id>.+)'
+    _VALID_URL = r'https?://(?:www\.)?npo\.nl/live/(?P<id>.+)'
  
      _TEST = {
          'url': 'http://www.npo.nl/live/npo-1',
@@ -260,6 +257,84 @@ class NPOLiveIE(NPOBaseIE):
          }
  
  
+class NPORadioIE(InfoExtractor):
+    IE_NAME = 'npo.nl:radio'
+    _VALID_URL = r'https?://(?:www\.)?npo\.nl/radio/(?P<id>[^/]+)/?$'
+
+    _TEST = {
+        'url': 'http://www.npo.nl/radio/radio-1',
+        'info_dict': {
+            'id': 'radio-1',
+            'ext': 'mp3',
+            'title': 're:^NPO Radio 1 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+            'is_live': True,
+        },
+        'params': {
+            'skip_download': True,
+        }
+    }
+
+    @staticmethod
+    def _html_get_attribute_regex(attribute):
+        return r'{0}\s*=\s*\'([^\']+)\''.format(attribute)
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id)
+
+        title = self._html_search_regex(
+            self._html_get_attribute_regex('data-channel'), webpage, 'title')
+
+        stream = self._parse_json(
+            self._html_search_regex(self._html_get_attribute_regex('data-streams'), webpage, 'data-streams'),
+            video_id)
+
+        codec = stream.get('codec')
+
+        return {
+            'id': video_id,
+            'url': stream['url'],
+            'title': self._live_title(title),
+            'acodec': codec,
+            'ext': codec,
+            'is_live': True,
+        }
+
+
+class NPORadioFragmentIE(InfoExtractor):
+    IE_NAME = 'npo.nl:radio:fragment'
+    _VALID_URL = r'https?://(?:www\.)?npo\.nl/radio/[^/]+/fragment/(?P<id>\d+)'
+
+    _TEST = {
+        'url': 'http://www.npo.nl/radio/radio-5/fragment/174356',
+        'md5': 'dd8cc470dad764d0fdc70a9a1e2d18c2',
+        'info_dict': {
+            'id': '174356',
+            'ext': 'mp3',
+            'title': 'Jubileumconcert Willeke Alberti',
+        },
+    }
+
+    def _real_extract(self, url):
+        audio_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, audio_id)
+
+        title = self._html_search_regex(
+            r'href="/radio/[^/]+/fragment/%s" title="([^"]+)"' % audio_id,
+            webpage, 'title')
+
+        audio_url = self._search_regex(
+            r"data-streams='([^']+)'", webpage, 'audio url')
+
+        return {
+            'id': audio_id,
+            'url': audio_url,
+            'title': title,
+        }
+
+
  class TegenlichtVproIE(NPOIE):
      IE_NAME = 'tegenlicht.vpro.nl'
      _VALID_URL = r'https?://tegenlicht\.vpro\.nl/afleveringen/.*?'
diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py

index f6de260222c678e2233b668d4b557e22e51d224c..1e4cfa2e7c8c5e3ae05c7d5fbc11242a334a5322 100644 (file)
--- a/youtube_dl/extractor/nrk.py
+++ b/youtube_dl/extractor/nrk.py
@@ -4,13 +4,13 @@ from __future__ import unicode_literals
  import re
  
  from .common import InfoExtractor
+from ..compat import compat_str
  from ..utils import (
      ExtractorError,
      float_or_none,
      parse_duration,
      unified_strdate,
  )
-from .subtitles import SubtitlesInfoExtractor
  
  
  class NRKIE(InfoExtractor):
@@ -73,7 +73,7 @@ class NRKIE(InfoExtractor):
          }
  
  
-class NRKTVIE(SubtitlesInfoExtractor):
+class NRKTVIE(InfoExtractor):
      _VALID_URL = r'(?P<baseurl>http://tv\.nrk(?:super)?\.no/)(?:serie/[^/]+|program)/(?P<id>[a-zA-Z]{4}\d{8})(?:/\d{2}-\d{2}-\d{4})?(?:#del=(?P<part_id>\d+))?'
  
      _TESTS = [
@@ -156,10 +156,12 @@ class NRKTVIE(SubtitlesInfoExtractor):
          if self._downloader.params.get('verbose', False):
              self.to_screen('[debug] %s' % txt)
  
-    def _extract_captions(self, subtitlesurl, video_id, baseurl):
+    def _get_subtitles(self, subtitlesurl, video_id, baseurl):
          url = "%s%s" % (baseurl, subtitlesurl)
          self._debug_print('%s: Subtitle url: %s' % (video_id, url))
-        captions = self._download_xml(url, video_id, 'Downloading subtitles')
+        captions = self._download_xml(
+            url, video_id, 'Downloading subtitles',
+            transform_source=lambda s: s.replace(r'<br />', '\r\n'))
          lang = captions.get('lang', 'no')
          ps = captions.findall('./{0}body/{0}div/{0}p'.format('{http://www.w3.org/ns/ttml}'))
          srt = ''
@@ -168,9 +170,11 @@ class NRKTVIE(SubtitlesInfoExtractor):
              duration = parse_duration(p.get('dur'))
              starttime = self._seconds2str(begin)
              endtime = self._seconds2str(begin + duration)
-            text = '\n'.join(p.itertext())
-            srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (str(pos), starttime, endtime, text)
-        return {lang: srt}
+            srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (compat_str(pos), starttime, endtime, p.text)
+        return {lang: [
+            {'ext': 'ttml', 'url': url},
+            {'ext': 'srt', 'data': srt},
+        ]}
  
      def _extract_f4m(self, manifest_url, video_id):
          return self._extract_f4m_formats(manifest_url + '?hdcore=3.1.1&plugin=aasp-3.1.1.69.124', video_id)
@@ -243,10 +247,7 @@ class NRKTVIE(SubtitlesInfoExtractor):
              webpage, 'subtitle URL', default=None)
          subtitles = None
          if subtitles_url:
-            subtitles = self._extract_captions(subtitles_url, video_id, baseurl)
-        if self._downloader.params.get('listsubtitles', False):
-            self._list_available_subtitles(video_id, subtitles)
-            return
+            subtitles = self.extract_subtitles(subtitles_url, video_id, baseurl)
  
          return {
              'id': video_id,
diff --git a/youtube_dl/extractor/ntvru.py b/youtube_dl/extractor/ntvru.py

index 0ab8d510011737775c397d8298c9198a6a5035ce..2cd924d059dafd9aa3734697c9c4a396b2bb01f6 100644 (file)
--- a/youtube_dl/extractor/ntvru.py
+++ b/youtube_dl/extractor/ntvru.py
@@ -3,7 +3,9 @@ from __future__ import unicode_literals
  
  from .common import InfoExtractor
  from ..utils import (
-    unescapeHTML
+    clean_html,
+    xpath_text,
+    int_or_none,
  )
  
  
@@ -14,73 +16,63 @@ class NTVRuIE(InfoExtractor):
      _TESTS = [
          {
              'url': 'http://www.ntv.ru/novosti/863142/',
+            'md5': 'ba7ea172a91cb83eb734cad18c10e723',
              'info_dict': {
                  'id': '746000',
-                'ext': 'flv',
+                'ext': 'mp4',
                  'title': 'Командующий Черноморским флотом провел переговоры в штабе ВМС Украины',
                  'description': 'Командующий Черноморским флотом провел переговоры в штабе ВМС Украины',
+                'thumbnail': 're:^http://.*\.jpg',
                  'duration': 136,
              },
-            'params': {
-                # rtmp download
-                'skip_download': True,
-            },
          },
          {
              'url': 'http://www.ntv.ru/video/novosti/750370/',
+            'md5': 'adecff79691b4d71e25220a191477124',
              'info_dict': {
                  'id': '750370',
-                'ext': 'flv',
+                'ext': 'mp4',
                  'title': 'Родные пассажиров пропавшего Boeing не верят в трагический исход',
                  'description': 'Родные пассажиров пропавшего Boeing не верят в трагический исход',
+                'thumbnail': 're:^http://.*\.jpg',
                  'duration': 172,
              },
-            'params': {
-                # rtmp download
-                'skip_download': True,
-            },
          },
          {
              'url': 'http://www.ntv.ru/peredacha/segodnya/m23700/o232416',
+            'md5': '82dbd49b38e3af1d00df16acbeab260c',
              'info_dict': {
                  'id': '747480',
-                'ext': 'flv',
-                'title': '«Сегодня». 21 марта 2014 года. 16:00 ',
-                'description': '«Сегодня». 21 марта 2014 года. 16:00 ',
+                'ext': 'mp4',
+                'title': '«Сегодня». 21 марта 2014 года. 16:00',
+                'description': '«Сегодня». 21 марта 2014 года. 16:00',
+                'thumbnail': 're:^http://.*\.jpg',
                  'duration': 1496,
              },
-            'params': {
-                # rtmp download
-                'skip_download': True,
-            },
          },
          {
              'url': 'http://www.ntv.ru/kino/Koma_film',
+            'md5': 'f825770930937aa7e5aca0dc0d29319a',
              'info_dict': {
-                'id': '758100',
-                'ext': 'flv',
+                'id': '1007609',
+                'ext': 'mp4',
                  'title': 'Остросюжетный фильм «Кома»',
                  'description': 'Остросюжетный фильм «Кома»',
+                'thumbnail': 're:^http://.*\.jpg',
                  'duration': 5592,
              },
-            'params': {
-                # rtmp download
-                'skip_download': True,
-            },
          },
          {
              'url': 'http://www.ntv.ru/serial/Delo_vrachey/m31760/o233916/',
+            'md5': '9320cd0e23f3ea59c330dc744e06ff3b',
              'info_dict': {
                  'id': '751482',
-                'ext': 'flv',
+                'ext': 'mp4',
                  'title': '«Дело врачей»: «Деревце жизни»',
                  'description': '«Дело врачей»: «Деревце жизни»',
+                'thumbnail': 're:^http://.*\.jpg',
                  'duration': 2590,
              },
-            'params': {
-                # rtmp download
-                'skip_download': True,
-            },
          },
      ]
  
@@ -92,45 +84,36 @@ class NTVRuIE(InfoExtractor):
  
      def _real_extract(self, url):
          video_id = self._match_id(url)
-        page = self._download_webpage(url, video_id)
  
-        video_id = self._html_search_regex(self._VIDEO_ID_REGEXES, page, 'video id')
+        webpage = self._download_webpage(url, video_id)
  
-        player = self._download_xml('http://www.ntv.ru/vi%s/' % video_id, video_id, 'Downloading video XML')
-        title = unescapeHTML(player.find('./data/title').text)
-        description = unescapeHTML(player.find('./data/description').text)
+        video_id = self._html_search_regex(self._VIDEO_ID_REGEXES, webpage, 'video id')
  
-        video = player.find('./data/video')
-        video_id = video.find('./id').text
-        thumbnail = video.find('./splash').text
-        duration = int(video.find('./totaltime').text)
-        view_count = int(video.find('./views').text)
-        puid22 = video.find('./puid22').text
+        player = self._download_xml(
+            'http://www.ntv.ru/vi%s/' % video_id,
+            video_id, 'Downloading video XML')
+        title = clean_html(xpath_text(player, './data/title', 'title', fatal=True))
+        description = clean_html(xpath_text(player, './data/description', 'description'))
  
-        apps = {
-            '4': 'video1',
-            '7': 'video2',
-        }
+        video = player.find('./data/video')
+        video_id = xpath_text(video, './id', 'video id')
+        thumbnail = xpath_text(video, './splash', 'thumbnail')
+        duration = int_or_none(xpath_text(video, './totaltime', 'duration'))
+        view_count = int_or_none(xpath_text(video, './views', 'view count'))
  
-        app = apps.get(puid22, apps['4'])
+        token = self._download_webpage(
+            'http://stat.ntv.ru/services/access/token',
+            video_id, 'Downloading access token')
  
          formats = []
          for format_id in ['', 'hi', 'webm']:
-            file = video.find('./%sfile' % format_id)
-            if file is None:
+            file_ = video.find('./%sfile' % format_id)
+            if file_ is None:
                  continue
              size = video.find('./%ssize' % format_id)
              formats.append({
-                'url': 'rtmp://media.ntv.ru/%s' % app,
-                'app': app,
-                'play_path': file.text,
-                'rtmp_conn': 'B:1',
-                'player_url': 'http://www.ntv.ru/swf/vps1.swf?update=20131128',
-                'page_url': 'http://www.ntv.ru',
-                'flash_version': 'LNX 11,2,202,341',
-                'rtmp_live': True,
-                'ext': 'flv',
-                'filesize': int(size.text),
+                'url': 'http://media2.ntv.ru/vod/%s&tok=%s' % (file_.text, token),
+                'filesize': int_or_none(size.text if size is not None else None),
              })
          self._sort_formats(formats)
  
diff --git a/youtube_dl/extractor/odnoklassniki.py b/youtube_dl/extractor/odnoklassniki.py

new file mode 100644 (file)

index 0000000..155d0ee
--- /dev/null
+++ b/youtube_dl/extractor/odnoklassniki.py
@@ -0,0 +1,85 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+    unified_strdate,
+    int_or_none,
+    qualities,
+)
+
+
+class OdnoklassnikiIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:odnoklassniki|ok)\.ru/(?:video|web-api/video/moviePlayer)/(?P<id>\d+)'
+    _TESTS = [{
+        'url': 'http://ok.ru/video/20079905452',
+        'md5': '8e24ad2da6f387948e7a7d44eb8668fe',
+        'info_dict': {
+            'id': '20079905452',
+            'ext': 'mp4',
+            'title': 'Культура меняет нас (прекрасный ролик!))',
+            'duration': 100,
+            'upload_date': '20141207',
+            'uploader_id': '330537914540',
+            'uploader': 'Виталий Добровольский',
+            'like_count': int,
+            'age_limit': 0,
+        },
+    }, {
+        'url': 'http://ok.ru/web-api/video/moviePlayer/20079905452',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id)
+
+        player = self._parse_json(
+            self._search_regex(
+                r"OKVideo\.start\(({.+?})\s*,\s*'VideoAutoplay_player'", webpage, 'player'),
+            video_id)
+
+        metadata = self._parse_json(player['flashvars']['metadata'], video_id)
+
+        movie = metadata['movie']
+        title = movie['title']
+        thumbnail = movie.get('poster')
+        duration = int_or_none(movie.get('duration'))
+
+        author = metadata.get('author', {})
+        uploader_id = author.get('id')
+        uploader = author.get('name')
+
+        upload_date = unified_strdate(self._html_search_meta(
+            'ya:ovs:upload_date', webpage, 'upload date'))
+
+        age_limit = None
+        adult = self._html_search_meta(
+            'ya:ovs:adult', webpage, 'age limit')
+        if adult:
+            age_limit = 18 if adult == 'true' else 0
+
+        like_count = int_or_none(metadata.get('likeCount'))
+
+        quality = qualities(('mobile', 'lowest', 'low', 'sd', 'hd'))
+
+        formats = [{
+            'url': f['url'],
+            'ext': 'mp4',
+            'format_id': f['name'],
+            'quality': quality(f['name']),
+        } for f in metadata['videos']]
+
+        return {
+            'id': video_id,
+            'title': title,
+            'thumbnail': thumbnail,
+            'duration': duration,
+            'upload_date': upload_date,
+            'uploader': uploader,
+            'uploader_id': uploader_id,
+            'like_count': like_count,
+            'age_limit': age_limit,
+            'formats': formats,
+        }
diff --git a/youtube_dl/extractor/patreon.py b/youtube_dl/extractor/patreon.py

index 5429592a75a9f66fdc9f0e9fb908af9e67559aae..f179ea2008636f061c6a4cdad6fc69841a291076 100644 (file)
--- a/youtube_dl/extractor/patreon.py
+++ b/youtube_dl/extractor/patreon.py
@@ -1,9 +1,6 @@
  # encoding: utf-8
  from __future__ import unicode_literals
  
-import json
-import re
-
  from .common import InfoExtractor
  from ..utils import (
      js_to_json,
@@ -11,7 +8,7 @@ from ..utils import (
  
  
  class PatreonIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?patreon\.com/creation\?hid=(.+)'
+    _VALID_URL = r'https?://(?:www\.)?patreon\.com/creation\?hid=(?P<id>[^&#]+)'
      _TESTS = [
          {
              'url': 'http://www.patreon.com/creation?hid=743933',
@@ -35,6 +32,23 @@ class PatreonIE(InfoExtractor):
                  'thumbnail': 're:^https?://.*$',
              },
          },
+        {
+            'url': 'https://www.patreon.com/creation?hid=1682498',
+            'info_dict': {
+                'id': 'SU4fj_aEMVw',
+                'ext': 'mp4',
+                'title': 'I\'m on Patreon!',
+                'uploader': 'TraciJHines',
+                'thumbnail': 're:^https?://.*$',
+                'upload_date': '20150211',
+                'description': 'md5:c5a706b1f687817a3de09db1eb93acd4',
+                'uploader_id': 'TraciJHines',
+            },
+            'params': {
+                'noplaylist': True,
+                'skip_download': True,
+            }
+        }
      ]
  
      # Currently Patreon exposes download URL via hidden CSS, so login is not
@@ -65,26 +79,29 @@ class PatreonIE(InfoExtractor):
      '''
  
      def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group(1)
-
+        video_id = self._match_id(url)
          webpage = self._download_webpage(url, video_id)
          title = self._og_search_title(webpage).strip()
  
          attach_fn = self._html_search_regex(
              r'<div class="attach"><a target="_blank" href="([^"]+)">',
              webpage, 'attachment URL', default=None)
+        embed = self._html_search_regex(
+            r'<div id="watchCreation">\s*<iframe class="embedly-embed" src="([^"]+)"',
+            webpage, 'embedded URL', default=None)
+
          if attach_fn is not None:
              video_url = 'http://www.patreon.com' + attach_fn
              thumbnail = self._og_search_thumbnail(webpage)
              uploader = self._html_search_regex(
                  r'<strong>(.*?)</strong> is creating', webpage, 'uploader')
+        elif embed is not None:
+            return self.url_result(embed)
          else:
-            playlist_js = self._search_regex(
+            playlist = self._parse_json(self._search_regex(
                  r'(?s)new\s+jPlayerPlaylist\(\s*\{\s*[^}]*},\s*(\[.*?,?\s*\])',
-                webpage, 'playlist JSON')
-            playlist_json = js_to_json(playlist_js)
-            playlist = json.loads(playlist_json)
+                webpage, 'playlist JSON'),
+                video_id, transform_source=js_to_json)
              data = playlist[0]
              video_url = self._proto_relative_url(data['mp3'])
              thumbnail = self._proto_relative_url(data.get('cover'))
diff --git a/youtube_dl/extractor/pornhd.py b/youtube_dl/extractor/pornhd.py

index 954dfccb75954d50a9a46bc14bdb1d0dcbd5588c..dbb2c3bd95fdd88df1edb6ea7a1a416262076620 100644 (file)
--- a/youtube_dl/extractor/pornhd.py
+++ b/youtube_dl/extractor/pornhd.py
@@ -46,16 +46,17 @@ class PornHdIE(InfoExtractor):
  
          quality = qualities(['sd', 'hd'])
          sources = json.loads(js_to_json(self._search_regex(
-            r"(?s)'sources'\s*:\s*(\{.+?\})\s*\}\);", webpage, 'sources')))
+            r"(?s)'sources'\s*:\s*(\{.+?\})\s*\}[;,)]",
+            webpage, 'sources')))
          formats = []
-        for container, s in sources.items():
-            for qname, video_url in s.items():
-                formats.append({
-                    'url': video_url,
-                    'container': container,
-                    'format_id': '%s-%s' % (container, qname),
-                    'quality': quality(qname),
-                })
+        for qname, video_url in sources.items():
+            if not video_url:
+                continue
+            formats.append({
+                'url': video_url,
+                'format_id': qname,
+                'quality': quality(qname),
+            })
          self._sort_formats(formats)
  
          return {
diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py

index fb2032832e4757e328d016ab289e892721d73af2..3a27e37890dc78b26af866c9884807c97c56ccb9 100644 (file)
--- a/youtube_dl/extractor/pornhub.py
+++ b/youtube_dl/extractor/pornhub.py
@@ -56,7 +56,7 @@ class PornHubIE(InfoExtractor):
  
          video_title = self._html_search_regex(r'<h1 [^>]+>([^<]+)', webpage, 'title')
          video_uploader = self._html_search_regex(
-            r'(?s)From:&nbsp;.+?<(?:a href="/users/|a href="/channels/|<span class="username)[^>]+>(.+?)<',
+            r'(?s)From:&nbsp;.+?<(?:a href="/users/|a href="/channels/|span class="username)[^>]+>(.+?)<',
              webpage, 'uploader', fatal=False)
          thumbnail = self._html_search_regex(r'"image_url":"([^"]+)', webpage, 'thumbnail', fatal=False)
          if thumbnail:
@@ -110,3 +110,33 @@ class PornHubIE(InfoExtractor):
              'formats': formats,
              'age_limit': 18,
          }
+
+
+class PornHubPlaylistIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?pornhub\.com/playlist/(?P<id>\d+)'
+    _TESTS = [{
+        'url': 'http://www.pornhub.com/playlist/6201671',
+        'info_dict': {
+            'id': '6201671',
+            'title': 'P0p4',
+        },
+        'playlist_mincount': 35,
+    }]
+
+    def _real_extract(self, url):
+        playlist_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, playlist_id)
+
+        entries = [
+            self.url_result('http://www.pornhub.com/%s' % video_url, 'PornHub')
+            for video_url in set(re.findall('href="/?(view_video\.php\?viewkey=\d+[^"]*)"', webpage))
+        ]
+
+        playlist = self._parse_json(
+            self._search_regex(
+                r'playlistObject\s*=\s*({.+?});', webpage, 'playlist'),
+            playlist_id)
+
+        return self.playlist_result(
+            entries, playlist_id, playlist.get('title'), playlist.get('description'))
diff --git a/youtube_dl/extractor/puls4.py b/youtube_dl/extractor/puls4.py

new file mode 100644 (file)

index 0000000..cce84b9
--- /dev/null
+++ b/youtube_dl/extractor/puls4.py
@@ -0,0 +1,88 @@
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+    ExtractorError,
+    unified_strdate,
+    int_or_none,
+)
+
+
+class Puls4IE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?puls4\.com/video/[^/]+/play/(?P<id>[0-9]+)'
+    _TESTS = [{
+        'url': 'http://www.puls4.com/video/pro-und-contra/play/2716816',
+        'md5': '49f6a6629747eeec43cef6a46b5df81d',
+        'info_dict': {
+            'id': '2716816',
+            'ext': 'mp4',
+            'title': 'Pro und Contra vom 23.02.2015',
+            'description': 'md5:293e44634d9477a67122489994675db6',
+            'duration': 2989,
+            'upload_date': '20150224',
+            'uploader': 'PULS_4',
+        },
+        'skip': 'Only works from Germany',
+    }, {
+        'url': 'http://www.puls4.com/video/kult-spielfilme/play/1298106',
+        'md5': '6a48316c8903ece8dab9b9a7bf7a59ec',
+        'info_dict': {
+            'id': '1298106',
+            'ext': 'mp4',
+            'title': 'Lucky Fritz',
+        },
+        'skip': 'Only works from Germany',
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+
+        error_message = self._html_search_regex(
+            r'<div class="message-error">(.+?)</div>',
+            webpage, 'error message', default=None)
+        if error_message:
+            raise ExtractorError(
+                '%s returned error: %s' % (self.IE_NAME, error_message), expected=True)
+
+        real_url = self._html_search_regex(
+            r'\"fsk-button\".+?href=\"([^"]+)',
+            webpage, 'fsk_button', default=None)
+        if real_url:
+            webpage = self._download_webpage(real_url, video_id)
+
+        player = self._search_regex(
+            r'p4_video_player(?:_iframe)?\("video_\d+_container"\s*,(.+?)\);\s*\}',
+            webpage, 'player')
+
+        player_json = self._parse_json(
+            '[%s]' % player, video_id,
+            transform_source=lambda s: s.replace('undefined,', ''))
+
+        formats = None
+        result = None
+
+        for v in player_json:
+            if isinstance(v, list) and not formats:
+                formats = [{
+                    'url': f['url'],
+                    'format': 'hd' if f.get('hd') else 'sd',
+                    'width': int_or_none(f.get('size_x')),
+                    'height': int_or_none(f.get('size_y')),
+                    'tbr': int_or_none(f.get('bitrate')),
+                } for f in v]
+                self._sort_formats(formats)
+            elif isinstance(v, dict) and not result:
+                result = {
+                    'id': video_id,
+                    'title': v['videopartname'].strip(),
+                    'description': v.get('videotitle'),
+                    'duration': int_or_none(v.get('videoduration') or v.get('episodeduration')),
+                    'upload_date': unified_strdate(v.get('clipreleasetime')),
+                    'uploader': v.get('channel'),
+                }
+
+        result['formats'] = formats
+
+        return result
diff --git a/youtube_dl/extractor/r7.py b/youtube_dl/extractor/r7.py

new file mode 100644 (file)

index 0000000..976c8fe
--- /dev/null
+++ b/youtube_dl/extractor/r7.py
@@ -0,0 +1,88 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+    js_to_json,
+    unescapeHTML,
+    int_or_none,
+)
+
+
+class R7IE(InfoExtractor):
+    _VALID_URL = r'''(?x)https?://
+                        (?:
+                            (?:[a-zA-Z]+)\.r7\.com(?:/[^/]+)+/idmedia/|
+                            noticias\.r7\.com(?:/[^/]+)+/[^/]+-|
+                            player\.r7\.com/video/i/
+                        )
+                        (?P<id>[\da-f]{24})
+                        '''
+    _TESTS = [{
+        'url': 'http://videos.r7.com/policiais-humilham-suspeito-a-beira-da-morte-morre-com-dignidade-/idmedia/54e7050b0cf2ff57e0279389.html',
+        'md5': '403c4e393617e8e8ddc748978ee8efde',
+        'info_dict': {
+            'id': '54e7050b0cf2ff57e0279389',
+            'ext': 'mp4',
+            'title': 'Policiais humilham suspeito à beira da morte: "Morre com dignidade"',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'duration': 98,
+            'like_count': int,
+            'view_count': int,
+        },
+    }, {
+        'url': 'http://esportes.r7.com/videos/cigano-manda-recado-aos-fas/idmedia/4e176727b51a048ee6646a1b.html',
+        'only_matching': True,
+    }, {
+        'url': 'http://noticias.r7.com/record-news/video/representante-do-instituto-sou-da-paz-fala-sobre-fim-do-estatuto-do-desarmamento-5480fc580cf2285b117f438d/',
+        'only_matching': True,
+    }, {
+        'url': 'http://player.r7.com/video/i/54e7050b0cf2ff57e0279389?play=true&video=http://vsh.r7.com/54e7050b0cf2ff57e0279389/ER7_RE_BG_MORTE_JOVENS_570kbps_2015-02-2009f17818-cc82-4c8f-86dc-89a66934e633-ATOS_copy.mp4&linkCallback=http://videos.r7.com/policiais-humilham-suspeito-a-beira-da-morte-morre-com-dignidade-/idmedia/54e7050b0cf2ff57e0279389.html&thumbnail=http://vtb.r7.com/ER7_RE_BG_MORTE_JOVENS_570kbps_2015-02-2009f17818-cc82-4c8f-86dc-89a66934e633-thumb.jpg&idCategory=192&share=true&layout=full&full=true',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(
+            'http://player.r7.com/video/i/%s' % video_id, video_id)
+
+        item = self._parse_json(js_to_json(self._search_regex(
+            r'(?s)var\s+item\s*=\s*({.+?});', webpage, 'player')), video_id)
+
+        title = unescapeHTML(item['title'])
+        thumbnail = item.get('init', {}).get('thumbUri')
+        duration = None
+
+        statistics = item.get('statistics', {})
+        like_count = int_or_none(statistics.get('likes'))
+        view_count = int_or_none(statistics.get('views'))
+
+        formats = []
+        for format_key, format_dict in item['playlist'][0].items():
+            src = format_dict.get('src')
+            if not src:
+                continue
+            format_id = format_dict.get('format') or format_key
+            if duration is None:
+                duration = format_dict.get('duration')
+            if '.f4m' in src:
+                formats.extend(self._extract_f4m_formats(src, video_id, preference=-1))
+            elif src.endswith('.m3u8'):
+                formats.extend(self._extract_m3u8_formats(src, video_id, 'mp4', preference=-2))
+            else:
+                formats.append({
+                    'url': src,
+                    'format_id': format_id,
+                })
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'thumbnail': thumbnail,
+            'duration': duration,
+            'like_count': like_count,
+            'view_count': view_count,
+            'formats': formats,
+        }
diff --git a/youtube_dl/extractor/radiode.py b/youtube_dl/extractor/radiode.py

index f95bc9454334b9ca15c5f74cf034cebd26487841..aa5f6f8ad41d1dcdb3cb975e2fcf883c8d3ac7f9 100644 (file)
--- a/youtube_dl/extractor/radiode.py
+++ b/youtube_dl/extractor/radiode.py
@@ -1,7 +1,5 @@
  from __future__ import unicode_literals
  
-import json
-
  from .common import InfoExtractor
  
  
@@ -10,13 +8,13 @@ class RadioDeIE(InfoExtractor):
      _VALID_URL = r'https?://(?P<id>.+?)\.(?:radio\.(?:de|at|fr|pt|es|pl|it)|rad\.io)'
      _TEST = {
          'url': 'http://ndr2.radio.de/',
-        'md5': '3b4cdd011bc59174596b6145cda474a4',
          'info_dict': {
              'id': 'ndr2',
              'ext': 'mp3',
              'title': 're:^NDR 2 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
              'description': 'md5:591c49c702db1a33751625ebfb67f273',
              'thumbnail': 're:^https?://.*\.png',
+            'is_live': True,
          },
          'params': {
              'skip_download': True,
@@ -25,16 +23,15 @@ class RadioDeIE(InfoExtractor):
  
      def _real_extract(self, url):
          radio_id = self._match_id(url)
-
          webpage = self._download_webpage(url, radio_id)
+        jscode = self._search_regex(
+            r"'components/station/stationService':\s*\{\s*'?station'?:\s*(\{.*?\s*\}),\n",
+            webpage, 'broadcast')
  
-        broadcast = json.loads(self._search_regex(
-            r'_getBroadcast\s*=\s*function\(\s*\)\s*{\s*return\s+({.+?})\s*;\s*}',
-            webpage, 'broadcast'))
-
+        broadcast = self._parse_json(jscode, radio_id)
          title = self._live_title(broadcast['name'])
          description = broadcast.get('description') or broadcast.get('shortDescription')
-        thumbnail = broadcast.get('picture4Url') or broadcast.get('picture4TransUrl')
+        thumbnail = broadcast.get('picture4Url') or broadcast.get('picture4TransUrl') or broadcast.get('logo100x100')
  
          formats = [{
              'url': stream['streamUrl'],
diff --git a/youtube_dl/extractor/rai.py b/youtube_dl/extractor/rai.py

index aa26b7e0bb0f4f0a489ad4cfdef330c704747680..144e3398259179e396206d0ea059e334953dcfd7 100644 (file)
--- a/youtube_dl/extractor/rai.py
+++ b/youtube_dl/extractor/rai.py
@@ -2,7 +2,7 @@ from __future__ import unicode_literals
  
  import re
  
-from .subtitles import SubtitlesInfoExtractor
+from .common import InfoExtractor
  from ..compat import (
      compat_urllib_parse,
  )
@@ -12,7 +12,7 @@ from ..utils import (
  )
  
  
-class RaiIE(SubtitlesInfoExtractor):
+class RaiIE(InfoExtractor):
      _VALID_URL = r'(?P<url>http://(?:.+?\.)?(?:rai\.it|rai\.tv|rainews\.it)/dl/.+?-(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})(?:-.+?)?\.html)'
      _TESTS = [
          {
@@ -89,15 +89,7 @@ class RaiIE(SubtitlesInfoExtractor):
                  'ext': 'mp4',
              })
  
-        if self._downloader.params.get('listsubtitles', False):
-            page = self._download_webpage(url, video_id)
-            self._list_available_subtitles(video_id, page)
-            return
-
-        subtitles = {}
-        if self._have_to_download_any_subtitles:
-            page = self._download_webpage(url, video_id)
-            subtitles = self.extract_subtitles(video_id, page)
+        subtitles = self.extract_subtitles(video_id, url)
  
          return {
              'id': video_id,
@@ -111,7 +103,8 @@ class RaiIE(SubtitlesInfoExtractor):
              'subtitles': subtitles,
          }
  
-    def _get_available_subtitles(self, video_id, webpage):
+    def _get_subtitles(self, video_id, url):
+        webpage = self._download_webpage(url, video_id)
          subtitles = {}
          m = re.search(r'<meta name="closedcaption" content="(?P<captions>[^"]+)"', webpage)
          if m:
@@ -120,5 +113,8 @@ class RaiIE(SubtitlesInfoExtractor):
              SRT_EXT = '.srt'
              if captions.endswith(STL_EXT):
                  captions = captions[:-len(STL_EXT)] + SRT_EXT
-            subtitles['it'] = 'http://www.rai.tv%s' % compat_urllib_parse.quote(captions)
+            subtitles['it'] = [{
+                'ext': 'srt',
+                'url': 'http://www.rai.tv%s' % compat_urllib_parse.quote(captions),
+            }]
          return subtitles
diff --git a/youtube_dl/extractor/rtlnl.py b/youtube_dl/extractor/rtlnl.py

index a3ca79f2ccfd2e00c09a4f9b2a9503fa85669b65..cfce4550ada568cfe13fae859a2bb745671074b5 100644 (file)
--- a/youtube_dl/extractor/rtlnl.py
+++ b/youtube_dl/extractor/rtlnl.py
@@ -1,16 +1,25 @@
+# coding: utf-8
  from __future__ import unicode_literals
  
-import re
-
  from .common import InfoExtractor
-from ..utils import parse_duration
+from ..utils import (
+    int_or_none,
+    parse_duration,
+)
  
  
-class RtlXlIE(InfoExtractor):
-    IE_NAME = 'rtlxl.nl'
-    _VALID_URL = r'https?://(www\.)?rtlxl\.nl/#!/[^/]+/(?P<uuid>[^/?]+)'
+class RtlNlIE(InfoExtractor):
+    IE_NAME = 'rtl.nl'
+    IE_DESC = 'rtl.nl and rtlxl.nl'
+    _VALID_URL = r'''(?x)
+        https?://(www\.)?
+        (?:
+            rtlxl\.nl/\#!/[^/]+/|
+            rtl\.nl/system/videoplayer/[^?#]+?/video_embed\.html\#uuid=
+        )
+        (?P<id>[0-9a-f-]+)'''
  
-    _TEST = {
+    _TESTS = [{
          'url': 'http://www.rtlxl.nl/#!/rtl-nieuws-132237/6e4203a6-0a5e-3596-8424-c599a59e0677',
          'md5': 'cc16baa36a6c169391f0764fa6b16654',
          'info_dict': {
@@ -22,21 +31,30 @@ class RtlXlIE(InfoExtractor):
              'upload_date': '20140814',
              'duration': 576.880,
          },
-    }
+    }, {
+        'url': 'http://www.rtl.nl/system/videoplayer/derden/rtlnieuws/video_embed.html#uuid=84ae5571-ac25-4225-ae0c-ef8d9efb2aed/autoplay=false',
+        'md5': 'dea7474214af1271d91ef332fb8be7ea',
+        'info_dict': {
+            'id': '84ae5571-ac25-4225-ae0c-ef8d9efb2aed',
+            'ext': 'mp4',
+            'timestamp': 1424039400,
+            'title': 'RTL Nieuws - Nieuwe beelden Kopenhagen: chaos direct na aanslag',
+            'thumbnail': 're:^https?://screenshots\.rtl\.nl/system/thumb/sz=[0-9]+x[0-9]+/uuid=84ae5571-ac25-4225-ae0c-ef8d9efb2aed$',
+            'upload_date': '20150215',
+            'description': 'Er zijn nieuwe beelden vrijgegeven die vlak na de aanslag in Kopenhagen zijn gemaakt. Op de video is goed te zien hoe omstanders zich bekommeren om één van de slachtoffers, terwijl de eerste agenten ter plaatse komen.',
+        }
+    }]
  
      def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        uuid = mobj.group('uuid')
-
+        uuid = self._match_id(url)
          info = self._download_json(
              'http://www.rtl.nl/system/s4m/vfd/version=2/uuid=%s/fmt=flash/' % uuid,
              uuid)
  
          material = info['material'][0]
-        episode_info = info['episodes'][0]
-
          progname = info['abstracts'][0]['name']
          subtitle = material['title'] or info['episodes'][0]['name']
+        description = material.get('synopsis') or info['episodes'][0]['synopsis']
  
          # Use unencrypted m3u8 streams (See https://github.com/rg3/youtube-dl/issues/4118)
          videopath = material['videopath'].replace('.f4m', '.m3u8')
@@ -58,14 +76,29 @@ class RtlXlIE(InfoExtractor):
                  'quality': 0,
              }
          ])
-
          self._sort_formats(formats)
  
+        thumbnails = []
+        meta = info.get('meta', {})
+        for p in ('poster_base_url', '"thumb_base_url"'):
+            if not meta.get(p):
+                continue
+
+            thumbnails.append({
+                'url': self._proto_relative_url(meta[p] + uuid),
+                'width': int_or_none(self._search_regex(
+                    r'/sz=([0-9]+)', meta[p], 'thumbnail width', fatal=False)),
+                'height': int_or_none(self._search_regex(
+                    r'/sz=[0-9]+x([0-9]+)',
+                    meta[p], 'thumbnail height', fatal=False))
+            })
+
          return {
              'id': uuid,
              'title': '%s - %s' % (progname, subtitle),
              'formats': formats,
              'timestamp': material['original_date'],
-            'description': episode_info['synopsis'],
+            'description': description,
              'duration': parse_duration(material.get('duration')),
+            'thumbnails': thumbnails,
          }
diff --git a/youtube_dl/extractor/rtlnow.py b/youtube_dl/extractor/rtlnow.py

index 285c3c4bebf8ec7c2cd793d2b40739222c18a5ca..785a8045e09d65f31405fbd15106dbeb684afcc9 100644 (file)
--- a/youtube_dl/extractor/rtlnow.py
+++ b/youtube_dl/extractor/rtlnow.py
@@ -91,6 +91,15 @@ class RTLnowIE(InfoExtractor):
              },
          },
          {
+            'url': 'http://rtl-now.rtl.de/der-bachelor/folge-4.php?film_id=188729&player=1&season=5',
+            'info_dict': {
+                'id': '188729',
+                'ext': 'flv',
+                'upload_date': '20150204',
+                'description': 'md5:5e1ce23095e61a79c166d134b683cecc',
+                'title': 'Der Bachelor - Folge 4',
+            }
+        }, {
              'url': 'http://www.n-tvnow.de/deluxe-alles-was-spass-macht/thema-ua-luxushotel-fuer-vierbeiner.php?container_id=153819&player=1&season=0',
              'only_matching': True,
          },
@@ -134,9 +143,18 @@ class RTLnowIE(InfoExtractor):
                      'player_url': video_page_url + 'includes/vodplayer.swf',
                  }
              else:
-                fmt = {
-                    'url': filename.text,
-                }
+                mobj = re.search(r'.*/(?P<hoster>[^/]+)/videos/(?P<play_path>.+)\.f4m', filename.text)
+                if mobj:
+                    fmt = {
+                        'url': 'rtmpe://fms.rtl.de/' + mobj.group('hoster'),
+                        'play_path': 'mp4:' + mobj.group('play_path'),
+                        'page_url': url,
+                        'player_url': video_page_url + 'includes/vodplayer.swf',
+                    }
+                else:
+                    fmt = {
+                        'url': filename.text,
+                    }
              fmt.update({
                  'width': int_or_none(filename.get('width')),
                  'height': int_or_none(filename.get('height')),
diff --git a/youtube_dl/extractor/rtp.py b/youtube_dl/extractor/rtp.py

index 4511cba4964ae83f1c3da5e648a0a4855c5a1144..ecf4939cdc031683eca7ddd7240a2439f803947d 100644 (file)
--- a/youtube_dl/extractor/rtp.py
+++ b/youtube_dl/extractor/rtp.py
@@ -1,16 +1,16 @@
  # coding: utf-8
  from __future__ import unicode_literals
  
-import json
+import re
  
  from .common import InfoExtractor
-from ..utils import js_to_json
  
  
  class RTPIE(InfoExtractor):
      _VALID_URL = r'https?://(?:www\.)?rtp\.pt/play/p(?P<program_id>[0-9]+)/(?P<id>[^/?#]+)/?'
      _TESTS = [{
          'url': 'http://www.rtp.pt/play/p405/e174042/paixoes-cruzadas',
+        'md5': 'e736ce0c665e459ddb818546220b4ef8',
          'info_dict': {
              'id': 'e174042',
              'ext': 'mp3',
@@ -18,9 +18,6 @@ class RTPIE(InfoExtractor):
              'description': 'As paixões musicais de António Cartaxo e António Macedo',
              'thumbnail': 're:^https?://.*\.jpg',
          },
-        'params': {
-            'skip_download': True,  # RTMP download
-        },
      }, {
          'url': 'http://www.rtp.pt/play/p831/a-quimica-das-coisas',
          'only_matching': True,
@@ -37,21 +34,48 @@ class RTPIE(InfoExtractor):
  
          player_config = self._search_regex(
              r'(?s)RTPPLAY\.player\.newPlayer\(\s*(\{.*?\})\s*\)', webpage, 'player config')
-        config = json.loads(js_to_json(player_config))
+        config = self._parse_json(player_config, video_id)
  
          path, ext = config.get('file').rsplit('.', 1)
          formats = [{
+            'format_id': 'rtmp',
+            'ext': ext,
+            'vcodec': config.get('type') == 'audio' and 'none' or None,
+            'preference': -2,
+            'url': 'rtmp://{streamer:s}/{application:s}'.format(**config),
              'app': config.get('application'),
              'play_path': '{ext:s}:{path:s}'.format(ext=ext, path=path),
              'page_url': url,
-            'url': 'rtmp://{streamer:s}/{application:s}'.format(**config),
              'rtmp_live': config.get('live', False),
-            'ext': ext,
-            'vcodec': config.get('type') == 'audio' and 'none' or None,
              'player_url': 'http://programas.rtp.pt/play/player.swf?v3',
              'rtmp_real_time': True,
          }]
  
+        # Construct regular HTTP download URLs
+        replacements = {
+            'audio': {
+                'format_id': 'mp3',
+                'pattern': r'^nas2\.share/wavrss/',
+                'repl': 'http://rsspod.rtp.pt/podcasts/',
+                'vcodec': 'none',
+            },
+            'video': {
+                'format_id': 'mp4_h264',
+                'pattern': r'^nas2\.share/h264/',
+                'repl': 'http://rsspod.rtp.pt/videocasts/',
+                'vcodec': 'h264',
+            },
+        }
+        r = replacements[config['type']]
+        if re.match(r['pattern'], config['file']) is not None:
+            formats.append({
+                'format_id': r['format_id'],
+                'url': re.sub(r['pattern'], r['repl'], config['file']),
+                'vcodec': r['vcodec'],
+            })
+
+        self._sort_formats(formats)
+
          return {
              'id': video_id,
              'title': title,
diff --git a/youtube_dl/extractor/rtve.py b/youtube_dl/extractor/rtve.py

index 3469d9578f5317222a404ce8f5918dd133d6f381..b42442d127c13e69fc81cb27e71cf117d2cb96b2 100644 (file)
--- a/youtube_dl/extractor/rtve.py
+++ b/youtube_dl/extractor/rtve.py
@@ -6,9 +6,11 @@ import re
  import time
  
  from .common import InfoExtractor
+from ..compat import compat_urlparse
  from ..utils import (
-    struct_unpack,
+    float_or_none,
      remove_end,
+    struct_unpack,
  )
  
  
@@ -66,6 +68,7 @@ class RTVEALaCartaIE(InfoExtractor):
              'id': '2491869',
              'ext': 'mp4',
              'title': 'Balonmano - Swiss Cup masculina. Final: España-Suecia',
+            'duration': 5024.566,
          },
      }, {
          'note': 'Live stream',
@@ -96,12 +99,14 @@ class RTVEALaCartaIE(InfoExtractor):
              ).replace('.net.rtve', '.multimedia.cdn.rtve')
              video_path = self._download_webpage(
                  auth_url, video_id, 'Getting video url')
-            # Use mvod.akcdn instead of flash.akamaihd.multimedia.cdn to get
+            # Use mvod1.akcdn instead of flash.akamaihd.multimedia.cdn to get
              # the right Content-Length header and the mp4 format
-            video_url = (
-                'http://mvod.akcdn.rtve.es/{0}&v=2.6.8'
-                '&fp=MAC%2016,0,0,296&r=MRUGG&g=OEOJWFXNFGCP'.format(video_path)
-            )
+            video_url = compat_urlparse.urljoin(
+                'http://mvod1.akcdn.rtve.es/', video_path)
+
+        subtitles = None
+        if info.get('sbtFile') is not None:
+            subtitles = self.extract_subtitles(video_id, info['sbtFile'])
  
          return {
              'id': video_id,
@@ -109,8 +114,18 @@ class RTVEALaCartaIE(InfoExtractor):
              'url': video_url,
              'thumbnail': info.get('image'),
              'page_url': url,
+            'subtitles': subtitles,
+            'duration': float_or_none(info.get('duration'), scale=1000),
          }
  
+    def _get_subtitles(self, video_id, sub_file):
+        subs = self._download_json(
+            sub_file + '.json', video_id,
+            'Downloading subtitles info')['page']['items']
+        return dict(
+            (s['lang'], [{'ext': 'vtt', 'url': s['src']}])
+            for s in subs)
+
  
  class RTVELiveIE(InfoExtractor):
      IE_NAME = 'rtve.es:live'
diff --git a/youtube_dl/extractor/sandia.py b/youtube_dl/extractor/sandia.py

new file mode 100644 (file)

index 0000000..9c88167
--- /dev/null
+++ b/youtube_dl/extractor/sandia.py
@@ -0,0 +1,117 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import itertools
+import json
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+    compat_urllib_request,
+    compat_urlparse,
+)
+from ..utils import (
+    int_or_none,
+    js_to_json,
+    mimetype2ext,
+    unified_strdate,
+)
+
+
+class SandiaIE(InfoExtractor):
+    IE_DESC = 'Sandia National Laboratories'
+    _VALID_URL = r'https?://digitalops\.sandia\.gov/Mediasite/Play/(?P<id>[0-9a-f]+)'
+    _TEST = {
+        'url': 'http://digitalops.sandia.gov/Mediasite/Play/24aace4429fc450fb5b38cdbf424a66e1d',
+        'md5': '9422edc9b9a60151727e4b6d8bef393d',
+        'info_dict': {
+            'id': '24aace4429fc450fb5b38cdbf424a66e1d',
+            'ext': 'mp4',
+            'title': 'Xyce Software Training - Section 1',
+            'description': 're:(?s)SAND Number: SAND 2013-7800.{200,}',
+            'upload_date': '20120904',
+            'duration': 7794,
+        }
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        req = compat_urllib_request.Request(url)
+        req.add_header('Cookie', 'MediasitePlayerCaps=ClientPlugins=4')
+        webpage = self._download_webpage(req, video_id)
+
+        js_path = self._search_regex(
+            r'<script type="text/javascript" src="(/Mediasite/FileServer/Presentation/[^"]+)"',
+            webpage, 'JS code URL')
+        js_url = compat_urlparse.urljoin(url, js_path)
+
+        js_code = self._download_webpage(
+            js_url, video_id, note='Downloading player')
+
+        def extract_str(key, **args):
+            return self._search_regex(
+                r'Mediasite\.PlaybackManifest\.%s\s*=\s*(.+);\s*?\n' % re.escape(key),
+                js_code, key, **args)
+
+        def extract_data(key, **args):
+            data_json = extract_str(key, **args)
+            if data_json is None:
+                return data_json
+            return self._parse_json(
+                data_json, video_id, transform_source=js_to_json)
+
+        formats = []
+        for i in itertools.count():
+            fd = extract_data('VideoUrls[%d]' % i, default=None)
+            if fd is None:
+                break
+            formats.append({
+                'format_id': '%s' % i,
+                'format_note': fd['MimeType'].partition('/')[2],
+                'ext': mimetype2ext(fd['MimeType']),
+                'url': fd['Location'],
+                'protocol': 'f4m' if fd['MimeType'] == 'video/x-mp4-fragmented' else None,
+            })
+        self._sort_formats(formats)
+
+        slide_baseurl = compat_urlparse.urljoin(
+            url, extract_data('SlideBaseUrl'))
+        slide_template = slide_baseurl + re.sub(
+            r'\{0:D?([0-9+])\}', r'%0\1d', extract_data('SlideImageFileNameTemplate'))
+        slides = []
+        last_slide_time = 0
+        for i in itertools.count(1):
+            sd = extract_str('Slides[%d]' % i, default=None)
+            if sd is None:
+                break
+            timestamp = int_or_none(self._search_regex(
+                r'^Mediasite\.PlaybackManifest\.CreateSlide\("[^"]*"\s*,\s*([0-9]+),',
+                sd, 'slide %s timestamp' % i, fatal=False))
+            slides.append({
+                'url': slide_template % i,
+                'duration': timestamp - last_slide_time,
+            })
+            last_slide_time = timestamp
+        formats.append({
+            'format_id': 'slides',
+            'protocol': 'slideshow',
+            'url': json.dumps(slides),
+            'preference': -10000,  # Downloader not yet written
+        })
+        self._sort_formats(formats)
+
+        title = extract_data('Title')
+        description = extract_data('Description', fatal=False)
+        duration = int_or_none(extract_data(
+            'Duration', fatal=False), scale=1000)
+        upload_date = unified_strdate(extract_data('AirDate', fatal=False))
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'formats': formats,
+            'upload_date': upload_date,
+            'duration': duration,
+        }
diff --git a/youtube_dl/extractor/sockshare.py b/youtube_dl/extractor/sockshare.py

index 7d3c0e93783afeac3d8e939e0cf317177df4ca9f..b5fa6f1da203c993873622a9ee80c923300eebb2 100644 (file)
--- a/youtube_dl/extractor/sockshare.py
+++ b/youtube_dl/extractor/sockshare.py
@@ -25,7 +25,6 @@ class SockshareIE(InfoExtractor):
              'id': '437BE28B89D799D7',
              'title': 'big_buck_bunny_720p_surround.avi',
              'ext': 'avi',
-            'thumbnail': 're:^http://.*\.jpg$',
          }
      }
  
@@ -45,7 +44,7 @@ class SockshareIE(InfoExtractor):
              ''', webpage, 'hash')
  
          fields = {
-            "hash": confirm_hash,
+            "hash": confirm_hash.encode('utf-8'),
              "confirm": "Continue as Free User"
          }
  
@@ -68,7 +67,7 @@ class SockshareIE(InfoExtractor):
              webpage, 'title', default=None)
          thumbnail = self._html_search_regex(
              r'<img\s+src="([^"]*)".+?name="bg"',
-            webpage, 'thumbnail')
+            webpage, 'thumbnail', default=None)
  
          formats = [{
              'format_id': 'sd',
diff --git a/youtube_dl/extractor/soulanime.py b/youtube_dl/extractor/soulanime.py

deleted file mode 100644 (file)

index feef33e..0000000
--- a/youtube_dl/extractor/soulanime.py
+++ /dev/null
@@ -1,80 +0,0 @@
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..utils import (
-    HEADRequest,
-    urlhandle_detect_ext,
-)
-
-
-class SoulAnimeWatchingIE(InfoExtractor):
-    IE_NAME = "soulanime:watching"
-    IE_DESC = "SoulAnime video"
-    _TEST = {
-        'url': 'http://www.soul-anime.net/watching/seirei-tsukai-no-blade-dance-episode-9/',
-        'md5': '05fae04abf72298098b528e98abf4298',
-        'info_dict': {
-            'id': 'seirei-tsukai-no-blade-dance-episode-9',
-            'ext': 'mp4',
-            'title': 'seirei-tsukai-no-blade-dance-episode-9',
-            'description': 'seirei-tsukai-no-blade-dance-episode-9'
-        }
-    }
-    _VALID_URL = r'http://[w.]*soul-anime\.(?P<domain>[^/]+)/watch[^/]*/(?P<id>[^/]+)'
-
-    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-        domain = mobj.group('domain')
-
-        page = self._download_webpage(url, video_id)
-
-        video_url_encoded = self._html_search_regex(
-            r'<div id="download">[^<]*<a href="(?P<url>[^"]+)"', page, 'url')
-        video_url = "http://www.soul-anime." + domain + video_url_encoded
-
-        ext_req = HEADRequest(video_url)
-        ext_handle = self._request_webpage(
-            ext_req, video_id, note='Determining extension')
-        ext = urlhandle_detect_ext(ext_handle)
-
-        return {
-            'id': video_id,
-            'url': video_url,
-            'ext': ext,
-            'title': video_id,
-            'description': video_id
-        }
-
-
-class SoulAnimeSeriesIE(InfoExtractor):
-    IE_NAME = "soulanime:series"
-    IE_DESC = "SoulAnime Series"
-
-    _VALID_URL = r'http://[w.]*soul-anime\.(?P<domain>[^/]+)/anime./(?P<id>[^/]+)'
-
-    _EPISODE_REGEX = r'<option value="(/watch[^/]*/[^"]+)">[^<]*</option>'
-
-    _TEST = {
-        'url': 'http://www.soul-anime.net/anime1/black-rock-shooter-tv/',
-        'info_dict': {
-            'id': 'black-rock-shooter-tv'
-        },
-        'playlist_count': 8
-    }
-
-    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        series_id = mobj.group('id')
-        domain = mobj.group('domain')
-
-        pattern = re.compile(self._EPISODE_REGEX)
-
-        page = self._download_webpage(url, series_id, "Downloading series page")
-        mobj = pattern.findall(page)
-
-        entries = [self.url_result("http://www.soul-anime." + domain + obj) for obj in mobj]
-
-        return self.playlist_result(entries, series_id)
diff --git a/youtube_dl/extractor/soundgasm.py b/youtube_dl/extractor/soundgasm.py

index a4f8ce6c3c8cce1854e5695783908a7804af0cac..3a4ddf57ea369a0b250a4d786738e0ea4db9e1dd 100644 (file)
--- a/youtube_dl/extractor/soundgasm.py
+++ b/youtube_dl/extractor/soundgasm.py
@@ -7,6 +7,7 @@ from .common import InfoExtractor
  
  
  class SoundgasmIE(InfoExtractor):
+    IE_NAME = 'soundgasm'
      _VALID_URL = r'https?://(?:www\.)?soundgasm\.net/u/(?P<user>[0-9a-zA-Z_\-]+)/(?P<title>[0-9a-zA-Z_\-]+)'
      _TEST = {
          'url': 'http://soundgasm.net/u/ytdl/Piano-sample',
@@ -38,3 +39,26 @@ class SoundgasmIE(InfoExtractor):
              'title': audio_title,
              'description': description
          }
+
+
+class SoundgasmProfileIE(InfoExtractor):
+    IE_NAME = 'soundgasm:profile'
+    _VALID_URL = r'https?://(?:www\.)?soundgasm\.net/u/(?P<id>[^/]+)/?(?:\#.*)?$'
+    _TEST = {
+        'url': 'http://soundgasm.net/u/ytdl',
+        'info_dict': {
+            'id': 'ytdl',
+        },
+        'playlist_count': 1,
+    }
+
+    def _real_extract(self, url):
+        profile_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, profile_id)
+
+        entries = [
+            self.url_result(audio_url, 'Soundgasm')
+            for audio_url in re.findall(r'href="([^"]+/u/%s/[^"]+)' % profile_id, webpage)]
+
+        return self.playlist_result(entries, profile_id)
diff --git a/youtube_dl/extractor/streamcz.py b/youtube_dl/extractor/streamcz.py

index c3ceb5f76d450001affda86e79466607b677e8f5..e92b93285c92ad9d049f2092fba9b70884057e8f 100644 (file)
--- a/youtube_dl/extractor/streamcz.py
+++ b/youtube_dl/extractor/streamcz.py
@@ -1,14 +1,30 @@
  # -*- coding: utf-8 -*-
  from __future__ import unicode_literals
  
+import hashlib
+import time
+
  from .common import InfoExtractor
+from ..compat import (
+    compat_urllib_request,
+)
  from ..utils import (
      int_or_none,
  )
  
  
+def _get_api_key(api_path):
+    if api_path.endswith('?'):
+        api_path = api_path[:-1]
+
+    api_key = 'fb5f58a820353bd7095de526253c14fd'
+    a = '{0:}{1:}{2:}'.format(api_key, api_path, int(round(time.time() / 24 / 3600)))
+    return hashlib.md5(a.encode('ascii')).hexdigest()
+
+
  class StreamCZIE(InfoExtractor):
      _VALID_URL = r'https?://(?:www\.)?stream\.cz/.+/(?P<id>[0-9]+)'
+    _API_URL = 'http://www.stream.cz/API'
  
      _TESTS = [{
          'url': 'http://www.stream.cz/peklonataliri/765767-ecka-pro-deti',
@@ -36,8 +52,11 @@ class StreamCZIE(InfoExtractor):
  
      def _real_extract(self, url):
          video_id = self._match_id(url)
-        data = self._download_json(
-            'http://www.stream.cz/API/episode/%s' % video_id, video_id)
+        api_path = '/episode/%s' % video_id
+
+        req = compat_urllib_request.Request(self._API_URL + api_path)
+        req.add_header('Api-Password', _get_api_key(api_path))
+        data = self._download_json(req, video_id)
  
          formats = []
          for quality, video in enumerate(data['video_qualities']):
diff --git a/youtube_dl/extractor/subtitles.py b/youtube_dl/extractor/subtitles.py

deleted file mode 100644 (file)

index 59a5126..0000000
--- a/youtube_dl/extractor/subtitles.py
+++ /dev/null
@@ -1,99 +0,0 @@
-from __future__ import unicode_literals
-from .common import InfoExtractor
-
-from ..compat import compat_str
-from ..utils import (
-    ExtractorError,
-)
-
-
-class SubtitlesInfoExtractor(InfoExtractor):
-    @property
-    def _have_to_download_any_subtitles(self):
-        return any([self._downloader.params.get('writesubtitles', False),
-                    self._downloader.params.get('writeautomaticsub')])
-
-    def _list_available_subtitles(self, video_id, webpage):
-        """ outputs the available subtitles for the video """
-        sub_lang_list = self._get_available_subtitles(video_id, webpage)
-        auto_captions_list = self._get_available_automatic_caption(video_id, webpage)
-        sub_lang = ",".join(list(sub_lang_list.keys()))
-        self.to_screen('%s: Available subtitles for video: %s' %
-                       (video_id, sub_lang))
-        auto_lang = ",".join(auto_captions_list.keys())
-        self.to_screen('%s: Available automatic captions for video: %s' %
-                       (video_id, auto_lang))
-
-    def extract_subtitles(self, video_id, webpage):
-        """
-        returns {sub_lang: sub} ,{} if subtitles not found or None if the
-        subtitles aren't requested.
-        """
-        if not self._have_to_download_any_subtitles:
-            return None
-        available_subs_list = {}
-        if self._downloader.params.get('writeautomaticsub', False):
-            available_subs_list.update(self._get_available_automatic_caption(video_id, webpage))
-        if self._downloader.params.get('writesubtitles', False):
-            available_subs_list.update(self._get_available_subtitles(video_id, webpage))
-
-        if not available_subs_list:  # error, it didn't get the available subtitles
-            return {}
-        if self._downloader.params.get('allsubtitles', False):
-            sub_lang_list = available_subs_list
-        else:
-            if self._downloader.params.get('subtitleslangs', False):
-                requested_langs = self._downloader.params.get('subtitleslangs')
-            elif 'en' in available_subs_list:
-                requested_langs = ['en']
-            else:
-                requested_langs = [list(available_subs_list.keys())[0]]
-
-            sub_lang_list = {}
-            for sub_lang in requested_langs:
-                if sub_lang not in available_subs_list:
-                    self._downloader.report_warning('no closed captions found in the specified language "%s"' % sub_lang)
-                    continue
-                sub_lang_list[sub_lang] = available_subs_list[sub_lang]
-
-        subtitles = {}
-        for sub_lang, url in sub_lang_list.items():
-            subtitle = self._request_subtitle_url(sub_lang, url)
-            if subtitle:
-                subtitles[sub_lang] = subtitle
-        return subtitles
-
-    def _download_subtitle_url(self, sub_lang, url):
-        return self._download_webpage(url, None, note=False)
-
-    def _request_subtitle_url(self, sub_lang, url):
-        """ makes the http request for the subtitle """
-        try:
-            sub = self._download_subtitle_url(sub_lang, url)
-        except ExtractorError as err:
-            self._downloader.report_warning('unable to download video subtitles for %s: %s' % (sub_lang, compat_str(err)))
-            return
-        if not sub:
-            self._downloader.report_warning('Did not fetch video subtitles')
-            return
-        return sub
-
-    def _get_available_subtitles(self, video_id, webpage):
-        """
-        returns {sub_lang: url} or {} if not available
-        Must be redefined by the subclasses
-        """
-
-        # By default, allow implementations to simply pass in the result
-        assert isinstance(webpage, dict), \
-            '_get_available_subtitles not implemented'
-        return webpage
-
-    def _get_available_automatic_caption(self, video_id, webpage):
-        """
-        returns {sub_lang: url} or {} if not available
-        Must be redefined by the subclasses that support automatic captions,
-        otherwise it will return {}
-        """
-        self._downloader.report_warning('Automatic Captions not supported by this server')
-        return {}
diff --git a/youtube_dl/extractor/sunporno.py b/youtube_dl/extractor/sunporno.py

index 8a333f1d24d6be3bd5160d843c3cd6451ef83178..854d01beeb5cefd1f82d7991ee2c0ce75ad33dfa 100644 (file)
--- a/youtube_dl/extractor/sunporno.py
+++ b/youtube_dl/extractor/sunporno.py
@@ -52,7 +52,7 @@ class SunPornoIE(InfoExtractor):
  
          formats = []
          quality = qualities(['mp4', 'flv'])
-        for video_url in re.findall(r'<source src="([^"]+)"', webpage):
+        for video_url in re.findall(r'<(?:source|video) src="([^"]+)"', webpage):
              video_ext = determine_ext(video_url)
              formats.append({
                  'url': video_url,
diff --git a/youtube_dl/extractor/svtplay.py b/youtube_dl/extractor/svtplay.py

new file mode 100644 (file)

index 0000000..433dfd1
--- /dev/null
+++ b/youtube_dl/extractor/svtplay.py
@@ -0,0 +1,82 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    determine_ext,
+)
+
+
+class SVTPlayIE(InfoExtractor):
+    IE_DESC = 'SVT Play and Öppet arkiv'
+    _VALID_URL = r'https?://(?:www\.)?(?P<host>svtplay|oppetarkiv)\.se/video/(?P<id>[0-9]+)'
+    _TESTS = [{
+        'url': 'http://www.svtplay.se/video/2609989/sm-veckan/sm-veckan-rally-final-sasong-1-sm-veckan-rally-final',
+        'md5': 'ade3def0643fa1c40587a422f98edfd9',
+        'info_dict': {
+            'id': '2609989',
+            'ext': 'flv',
+            'title': 'SM veckan vinter, Örebro - Rally, final',
+            'duration': 4500,
+            'thumbnail': 're:^https?://.*[\.-]jpg$',
+            'age_limit': 0,
+        },
+    }, {
+        'url': 'http://www.oppetarkiv.se/video/1058509/rederiet-sasong-1-avsnitt-1-av-318',
+        'md5': 'c3101a17ce9634f4c1f9800f0746c187',
+        'info_dict': {
+            'id': '1058509',
+            'ext': 'flv',
+            'title': 'Farlig kryssning',
+            'duration': 2566,
+            'thumbnail': 're:^https?://.*[\.-]jpg$',
+            'age_limit': 0,
+        },
+        'skip': 'Only works from Sweden',
+    }]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        host = mobj.group('host')
+
+        info = self._download_json(
+            'http://www.%s.se/video/%s?output=json' % (host, video_id), video_id)
+
+        title = info['context']['title']
+        thumbnail = info['context'].get('thumbnailImage')
+
+        video_info = info['video']
+        formats = []
+        for vr in video_info['videoReferences']:
+            vurl = vr['url']
+            ext = determine_ext(vurl)
+            if ext == 'm3u8':
+                formats.extend(self._extract_m3u8_formats(
+                    vurl, video_id,
+                    ext='mp4', entry_protocol='m3u8_native',
+                    m3u8_id=vr.get('playerType')))
+            elif ext == 'f4m':
+                formats.extend(self._extract_f4m_formats(
+                    vurl + '?hdcore=3.3.0', video_id,
+                    f4m_id=vr.get('playerType')))
+            else:
+                formats.append({
+                    'format_id': vr.get('playerType'),
+                    'url': vurl,
+                })
+        self._sort_formats(formats)
+
+        duration = video_info.get('materialLength')
+        age_limit = 18 if video_info.get('inappropriateForChildren') else 0
+
+        return {
+            'id': video_id,
+            'title': title,
+            'formats': formats,
+            'thumbnail': thumbnail,
+            'duration': duration,
+            'age_limit': age_limit,
+        }
diff --git a/youtube_dl/extractor/teamcoco.py b/youtube_dl/extractor/teamcoco.py

index 18a8237197ca4f017252fa181b08bfacf67c44b2..5793dbc1085a86fdf573a432805be129dc62de94 100644 (file)
--- a/youtube_dl/extractor/teamcoco.py
+++ b/youtube_dl/extractor/teamcoco.py
@@ -1,8 +1,10 @@
  from __future__ import unicode_literals
  
+import base64
  import re
  
  from .common import InfoExtractor
+from ..utils import qualities
  
  
  class TeamcocoIE(InfoExtractor):
@@ -15,7 +17,8 @@ class TeamcocoIE(InfoExtractor):
                  'id': '80187',
                  'ext': 'mp4',
                  'title': 'Conan Becomes A Mary Kay Beauty Consultant',
-                'description': 'Mary Kay is perhaps the most trusted name in female beauty, so of course Conan is a natural choice to sell their products.'
+                'description': 'Mary Kay is perhaps the most trusted name in female beauty, so of course Conan is a natural choice to sell their products.',
+                'age_limit': 0,
              }
          }, {
              'url': 'http://teamcoco.com/video/louis-ck-interview-george-w-bush',
@@ -23,11 +26,17 @@ class TeamcocoIE(InfoExtractor):
              'info_dict': {
                  'id': '19705',
                  'ext': 'mp4',
-                "description": "Louis C.K. got starstruck by George W. Bush, so what? Part one.",
-                "title": "Louis C.K. Interview Pt. 1 11/3/11"
+                'description': 'Louis C.K. got starstruck by George W. Bush, so what? Part one.',
+                'title': 'Louis C.K. Interview Pt. 1 11/3/11',
+                'age_limit': 0,
              }
          }
      ]
+    _VIDEO_ID_REGEXES = (
+        r'"eVar42"\s*:\s*(\d+)',
+        r'Ginger\.TeamCoco\.openInApp\("video",\s*"([^"]+)"',
+        r'"id_not"\s*:\s*(\d+)'
+    )
  
      def _real_extract(self, url):
          mobj = re.match(self._VALID_URL, url)
@@ -35,43 +44,39 @@ class TeamcocoIE(InfoExtractor):
          display_id = mobj.group('display_id')
          webpage = self._download_webpage(url, display_id)
  
-        video_id = mobj.group("video_id")
+        video_id = mobj.group('video_id')
          if not video_id:
              video_id = self._html_search_regex(
-                r'<div\s+class="player".*?data-id="(\d+?)"',
-                webpage, 'video id')
+                self._VIDEO_ID_REGEXES, webpage, 'video id')
  
-        data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id
-        data = self._download_xml(
-            data_url, display_id, 'Downloading data webpage')
+        embed_url = 'http://teamcoco.com/embed/v/%s' % video_id
+        embed = self._download_webpage(
+            embed_url, video_id, 'Downloading embed page')
+
+        encoded_data = self._search_regex(
+            r'"preload"\s*:\s*"([^"]+)"', embed, 'encoded data')
+        data = self._parse_json(
+            base64.b64decode(encoded_data.encode('ascii')).decode('utf-8'), video_id)
  
-        qualities = ['500k', '480p', '1000k', '720p', '1080p']
          formats = []
-        for filed in data.findall('files/file'):
-            if filed.attrib.get('playmode') == 'all':
-                # it just duplicates one of the entries
-                break
-            file_url = filed.text
-            m_format = re.search(r'(\d+(k|p))\.mp4', file_url)
+        get_quality = qualities(['500k', '480p', '1000k', '720p', '1080p'])
+        for filed in data['files']:
+            m_format = re.search(r'(\d+(k|p))\.mp4', filed['url'])
              if m_format is not None:
                  format_id = m_format.group(1)
              else:
-                format_id = filed.attrib['bitrate']
+                format_id = filed['bitrate']
              tbr = (
-                int(filed.attrib['bitrate'])
-                if filed.attrib['bitrate'].isdigit()
+                int(filed['bitrate'])
+                if filed['bitrate'].isdigit()
                  else None)
  
-            try:
-                quality = qualities.index(format_id)
-            except ValueError:
-                quality = -1
              formats.append({
-                'url': file_url,
+                'url': filed['url'],
                  'ext': 'mp4',
                  'tbr': tbr,
                  'format_id': format_id,
-                'quality': quality,
+                'quality': get_quality(format_id),
              })
  
          self._sort_formats(formats)
@@ -80,7 +85,8 @@ class TeamcocoIE(InfoExtractor):
              'id': video_id,
              'display_id': display_id,
              'formats': formats,
-            'title': self._og_search_title(webpage),
-            'thumbnail': self._og_search_thumbnail(webpage),
-            'description': self._og_search_description(webpage),
+            'title': data['title'],
+            'thumbnail': data.get('thumb', {}).get('href'),
+            'description': data.get('teaser'),
+            'age_limit': self._family_friendly_search(webpage),
          }
diff --git a/youtube_dl/extractor/ted.py b/youtube_dl/extractor/ted.py

index 10b3b706a9c82ef8398d408a948e72b6c52b31c3..4cec06f8bd6e2a18ac3062e916225746f5153c93 100644 (file)
--- a/youtube_dl/extractor/ted.py
+++ b/youtube_dl/extractor/ted.py
@@ -3,14 +3,14 @@ from __future__ import unicode_literals
  import json
  import re
  
-from .subtitles import SubtitlesInfoExtractor
+from .common import InfoExtractor
  
  from ..compat import (
      compat_str,
  )
  
  
-class TEDIE(SubtitlesInfoExtractor):
+class TEDIE(InfoExtractor):
      _VALID_URL = r'''(?x)
          (?P<proto>https?://)
          (?P<type>www|embed(?:-ssl)?)(?P<urlmain>\.ted\.com/
@@ -83,6 +83,22 @@ class TEDIE(SubtitlesInfoExtractor):
          'params': {
              'skip_download': True,
          },
+    }, {
+        # YouTube video
+        'url': 'http://www.ted.com/talks/jeffrey_kluger_the_sibling_bond',
+        'add_ie': ['Youtube'],
+        'info_dict': {
+            'id': 'aFBIPO-P7LM',
+            'ext': 'mp4',
+            'title': 'The hidden power of siblings: Jeff Kluger at TEDxAsheville',
+            'description': 'md5:3d7a4f50d95ca5dd67104e2a20f43fe1',
+            'uploader': 'TEDx Talks',
+            'uploader_id': 'TEDxTalks',
+            'upload_date': '20111216',
+        },
+        'params': {
+            'skip_download': True,
+        },
      }]
  
      _NATIVE_FORMATS = {
@@ -132,11 +148,16 @@ class TEDIE(SubtitlesInfoExtractor):
  
          talk_info = self._extract_info(webpage)['talks'][0]
  
-        if talk_info.get('external') is not None:
-            self.to_screen('Found video from %s' % talk_info['external']['service'])
+        external = talk_info.get('external')
+        if external:
+            service = external['service']
+            self.to_screen('Found video from %s' % service)
+            ext_url = None
+            if service.lower() == 'youtube':
+                ext_url = external.get('code')
              return {
                  '_type': 'url',
-                'url': talk_info['external']['uri'],
+                'url': ext_url or external['uri'],
              }
  
          formats = [{
@@ -163,11 +184,6 @@ class TEDIE(SubtitlesInfoExtractor):
          self._sort_formats(formats)
  
          video_id = compat_str(talk_info['id'])
-        # subtitles
-        video_subtitles = self.extract_subtitles(video_id, talk_info)
-        if self._downloader.params.get('listsubtitles', False):
-            self._list_available_subtitles(video_id, talk_info)
-            return
  
          thumbnail = talk_info['thumb']
          if not thumbnail.startswith('http'):
@@ -178,21 +194,25 @@ class TEDIE(SubtitlesInfoExtractor):
              'uploader': talk_info['speaker'],
              'thumbnail': thumbnail,
              'description': self._og_search_description(webpage),
-            'subtitles': video_subtitles,
+            'subtitles': self._get_subtitles(video_id, talk_info),
              'formats': formats,
              'duration': talk_info.get('duration'),
          }
  
-    def _get_available_subtitles(self, video_id, talk_info):
+    def _get_subtitles(self, video_id, talk_info):
          languages = [lang['languageCode'] for lang in talk_info.get('languages', [])]
          if languages:
              sub_lang_list = {}
              for l in languages:
-                url = 'http://www.ted.com/talks/subtitles/id/%s/lang/%s/format/srt' % (video_id, l)
-                sub_lang_list[l] = url
+                sub_lang_list[l] = [
+                    {
+                        'url': 'http://www.ted.com/talks/subtitles/id/%s/lang/%s/format/%s' % (video_id, l, ext),
+                        'ext': ext,
+                    }
+                    for ext in ['ted', 'srt']
+                ]
              return sub_lang_list
          else:
-            self._downloader.report_warning('video doesn\'t have subtitles')
              return {}
  
      def _watch_info(self, url, name):
diff --git a/youtube_dl/extractor/telecinco.py b/youtube_dl/extractor/telecinco.py

index be3f72df7c11043346b015528ae905913a3d05df..251a686804b6f26915c3fa25d9f6b2cc1f98ed4b 100644 (file)
--- a/youtube_dl/extractor/telecinco.py
+++ b/youtube_dl/extractor/telecinco.py
@@ -6,9 +6,9 @@ from .mitele import MiTeleIE
  
  class TelecincoIE(MiTeleIE):
      IE_NAME = 'telecinco.es'
-    _VALID_URL = r'https?://www\.telecinco\.es/[^/]+/[^/]+/[^/]+/(?P<id>.*?)\.html'
+    _VALID_URL = r'https?://www\.telecinco\.es/[^/]+/[^/]+/(?:[^/]+/)?(?P<id>.*?)\.html'
  
-    _TEST = {
+    _TESTS = [{
          'url': 'http://www.telecinco.es/robinfood/temporada-01/t01xp14/Bacalao-cocochas-pil-pil_0_1876350223.html',
          'info_dict': {
              'id': 'MDSVID20141015_0058',
@@ -16,4 +16,7 @@ class TelecincoIE(MiTeleIE):
              'title': 'Con Martín Berasategui, hacer un bacalao al ...',
              'duration': 662,
          },
-    }
+    }, {
+        'url': 'http://www.telecinco.es/informativos/nacional/Pablo_Iglesias-Informativos_Telecinco-entrevista-Pedro_Piqueras_2_1945155182.html',
+        'only_matching': True,
+    }]
diff --git a/youtube_dl/extractor/theonion.py b/youtube_dl/extractor/theonion.py

index b65d8e03f7741a712001099c601ee354830a74a1..10239c906201e460ed288386709dffc5b7f6efbc 100644 (file)
--- a/youtube_dl/extractor/theonion.py
+++ b/youtube_dl/extractor/theonion.py
@@ -4,11 +4,10 @@ from __future__ import unicode_literals
  import re
  
  from .common import InfoExtractor
-from ..utils import ExtractorError
  
  
  class TheOnionIE(InfoExtractor):
-    _VALID_URL = r'(?x)https?://(?:www\.)?theonion\.com/video/[^,]+,(?P<article_id>[0-9]+)/?'
+    _VALID_URL = r'https?://(?:www\.)?theonion\.com/video/[^,]+,(?P<id>[0-9]+)/?'
      _TEST = {
          'url': 'http://www.theonion.com/video/man-wearing-mm-jacket-gods-image,36918/',
          'md5': '19eaa9a39cf9b9804d982e654dc791ee',
@@ -22,10 +21,8 @@ class TheOnionIE(InfoExtractor):
      }
  
      def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        article_id = mobj.group('article_id')
-
-        webpage = self._download_webpage(url, article_id)
+        display_id = self._match_id(url)
+        webpage = self._download_webpage(url, display_id)
  
          video_id = self._search_regex(
              r'"videoId":\s(\d+),', webpage, 'video ID')
@@ -34,10 +31,6 @@ class TheOnionIE(InfoExtractor):
          thumbnail = self._og_search_thumbnail(webpage)
  
          sources = re.findall(r'<source src="([^"]+)" type="([^"]+)"', webpage)
-        if not sources:
-            raise ExtractorError(
-                'No sources found for video %s' % video_id, expected=True)
-
          formats = []
          for src, type_ in sources:
              if type_ == 'video/mp4':
@@ -54,15 +47,15 @@ class TheOnionIE(InfoExtractor):
                  })
              elif type_ == 'application/x-mpegURL':
                  formats.extend(
-                    self._extract_m3u8_formats(src, video_id, preference=-1))
+                    self._extract_m3u8_formats(src, display_id, preference=-1))
              else:
                  self.report_warning(
                      'Encountered unexpected format: %s' % type_)
-
          self._sort_formats(formats)
  
          return {
              'id': video_id,
+            'display_id': display_id,
              'title': title,
              'formats': formats,
              'thumbnail': thumbnail,
diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py

index 110ed976de3d1a3a31c8c9a88cd976482f7d78ca..feac666f78baff49f4fb312a147acad67d320bc2 100644 (file)
--- a/youtube_dl/extractor/theplatform.py
+++ b/youtube_dl/extractor/theplatform.py
@@ -2,8 +2,13 @@ from __future__ import unicode_literals
  
  import re
  import json
+import time
+import hmac
+import binascii
+import hashlib
  
-from .subtitles import SubtitlesInfoExtractor
+
+from .common import InfoExtractor
  from ..compat import (
      compat_str,
  )
@@ -11,14 +16,15 @@ from ..utils import (
      determine_ext,
      ExtractorError,
      xpath_with_ns,
+    unsmuggle_url,
  )
  
  _x = lambda p: xpath_with_ns(p, {'smil': 'http://www.w3.org/2005/SMIL21/Language'})
  
  
-class ThePlatformIE(SubtitlesInfoExtractor):
+class ThePlatformIE(InfoExtractor):
      _VALID_URL = r'''(?x)
-        (?:https?://(?:link|player)\.theplatform\.com/[sp]/[^/]+/
+        (?:https?://(?:link|player)\.theplatform\.com/[sp]/(?P<provider_id>[^/]+)/
             (?P<config>(?:[^/\?]+/(?:swf|config)|onsite)/select/)?
           |theplatform:)(?P<id>[^/\?&]+)'''
  
@@ -38,18 +44,48 @@ class ThePlatformIE(SubtitlesInfoExtractor):
          },
      }
  
+    @staticmethod
+    def _sign_url(url, sig_key, sig_secret, life=600, include_qs=False):
+        flags = '10' if include_qs else '00'
+        expiration_date = '%x' % (int(time.time()) + life)
+
+        def str_to_hex(str):
+            return binascii.b2a_hex(str.encode('ascii')).decode('ascii')
+
+        def hex_to_str(hex):
+            return binascii.a2b_hex(hex)
+
+        relative_path = url.split('http://link.theplatform.com/s/')[1].split('?')[0]
+        clear_text = hex_to_str(flags + expiration_date + str_to_hex(relative_path))
+        checksum = hmac.new(sig_key.encode('ascii'), clear_text, hashlib.sha1).hexdigest()
+        sig = flags + expiration_date + checksum + str_to_hex(sig_secret)
+        return '%s&sig=%s' % (url, sig)
+
      def _real_extract(self, url):
+        url, smuggled_data = unsmuggle_url(url, {})
+
          mobj = re.match(self._VALID_URL, url)
+        provider_id = mobj.group('provider_id')
          video_id = mobj.group('id')
-        if mobj.group('config'):
+
+        if not provider_id:
+            provider_id = 'dJ5BDC'
+
+        if smuggled_data.get('force_smil_url', False):
+            smil_url = url
+        elif mobj.group('config'):
              config_url = url + '&form=json'
              config_url = config_url.replace('swf/', 'config/')
              config_url = config_url.replace('onsite/', 'onsite/config/')
              config = self._download_json(config_url, video_id, 'Downloading config')
              smil_url = config['releaseUrl'] + '&format=SMIL&formats=MPEG4&manifest=f4m'
          else:
-            smil_url = ('http://link.theplatform.com/s/dJ5BDC/{0}/meta.smil?'
-                        'format=smil&mbr=true'.format(video_id))
+            smil_url = ('http://link.theplatform.com/s/{0}/{1}/meta.smil?'
+                        'format=smil&mbr=true'.format(provider_id, video_id))
+
+        sig = smuggled_data.get('sig')
+        if sig:
+            smil_url = self._sign_url(smil_url, sig['key'], sig['secret'])
  
          meta = self._download_xml(smil_url, video_id)
          try:
@@ -62,7 +98,7 @@ class ThePlatformIE(SubtitlesInfoExtractor):
          else:
              raise ExtractorError(error_msg, expected=True)
  
-        info_url = 'http://link.theplatform.com/s/dJ5BDC/{0}?format=preview'.format(video_id)
+        info_url = 'http://link.theplatform.com/s/{0}/{1}?format=preview'.format(provider_id, video_id)
          info_json = self._download_webpage(info_url, video_id)
          info = json.loads(info_json)
  
@@ -70,15 +106,11 @@ class ThePlatformIE(SubtitlesInfoExtractor):
          captions = info.get('captions')
          if isinstance(captions, list):
              for caption in captions:
-                lang, src = caption.get('lang'), caption.get('src')
-                if lang and src:
-                    subtitles[lang] = src
-
-        if self._downloader.params.get('listsubtitles', False):
-            self._list_available_subtitles(video_id, subtitles)
-            return
-
-        subtitles = self.extract_subtitles(video_id, subtitles)
+                lang, src, mime = caption.get('lang', 'en'), caption.get('src'), caption.get('type')
+                subtitles[lang] = [{
+                    'ext': 'srt' if mime == 'text/srt' else 'ttml',
+                    'url': src,
+                }]
  
          head = meta.find(_x('smil:head'))
          body = meta.find(_x('smil:body'))
diff --git a/youtube_dl/extractor/trilulilu.py b/youtube_dl/extractor/trilulilu.py

index 220a05b7b493fb728f3cd3c6dab74208a8f587eb..185accc4b6b6ebaad3f1aa9379fe8f7c8f6d33ea 100644 (file)
--- a/youtube_dl/extractor/trilulilu.py
+++ b/youtube_dl/extractor/trilulilu.py
@@ -1,40 +1,55 @@
+# coding: utf-8
  from __future__ import unicode_literals
  
-import json
+import re
  
  from .common import InfoExtractor
+from ..utils import ExtractorError
  
  
  class TriluliluIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?trilulilu\.ro/video-[^/]+/(?P<id>[^/]+)'
+    _VALID_URL = r'https?://(?:www\.)?trilulilu\.ro/(?:video-[^/]+/)?(?P<id>[^/#\?]+)'
      _TEST = {
          'url': 'http://www.trilulilu.ro/video-animatie/big-buck-bunny-1',
+        'md5': 'c1450a00da251e2769b74b9005601cac',
          'info_dict': {
-            'id': 'big-buck-bunny-1',
+            'id': 'ae2899e124140b',
              'ext': 'mp4',
              'title': 'Big Buck Bunny',
              'description': ':) pentru copilul din noi',
          },
-        # Server ignores Range headers (--test)
-        'params': {
-            'skip_download': True
-        }
      }
  
      def _real_extract(self, url):
-        video_id = self._match_id(url)
-        webpage = self._download_webpage(url, video_id)
+        display_id = self._match_id(url)
+        webpage = self._download_webpage(url, display_id)
+
+        if re.search(r'Fişierul nu este disponibil pentru vizionare în ţara dumneavoastră', webpage):
+            raise ExtractorError(
+                'This video is not available in your country.', expected=True)
+        elif re.search('Fişierul poate fi accesat doar de către prietenii lui', webpage):
+            raise ExtractorError('This video is private.', expected=True)
+
+        flashvars_str = self._search_regex(
+            r'block_flash_vars\s*=\s*(\{[^\}]+\})', webpage, 'flashvars', fatal=False, default=None)
  
+        if flashvars_str:
+            flashvars = self._parse_json(flashvars_str, display_id)
+        else:
+            raise ExtractorError(
+                'This page does not contain videos', expected=True)
+
+        if flashvars['isMP3'] == 'true':
+            raise ExtractorError(
+                'Audio downloads are currently not supported', expected=True)
+
+        video_id = flashvars['hash']
          title = self._og_search_title(webpage)
          thumbnail = self._og_search_thumbnail(webpage)
-        description = self._og_search_description(webpage)
-
-        log_str = self._search_regex(
-            r'block_flash_vars[ ]=[ ]({[^}]+})', webpage, 'log info')
-        log = json.loads(log_str)
+        description = self._og_search_description(webpage, default=None)
  
          format_url = ('http://fs%(server)s.trilulilu.ro/%(hash)s/'
-                      'video-formats2' % log)
+                      'video-formats2' % flashvars)
          format_doc = self._download_xml(
              format_url, video_id,
              note='Downloading formats',
@@ -44,10 +59,10 @@ class TriluliluIE(InfoExtractor):
              'http://fs%(server)s.trilulilu.ro/stream.php?type=video'
              '&source=site&hash=%(hash)s&username=%(userid)s&'
              'key=ministhebest&format=%%s&sig=&exp=' %
-            log)
+            flashvars)
          formats = [
              {
-                'format': fnode.text,
+                'format_id': fnode.text.partition('-')[2],
                  'url': video_url_template % fnode.text,
                  'ext': fnode.text.partition('-')[0]
              }
@@ -56,8 +71,8 @@ class TriluliluIE(InfoExtractor):
          ]
  
          return {
-            '_type': 'video',
              'id': video_id,
+            'display_id': display_id,
              'formats': formats,
              'title': title,
              'description': description,
diff --git a/youtube_dl/extractor/tv4.py b/youtube_dl/extractor/tv4.py

new file mode 100644 (file)

index 0000000..1c4b6d6
--- /dev/null
+++ b/youtube_dl/extractor/tv4.py
@@ -0,0 +1,100 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+    ExtractorError,
+    parse_iso8601,
+)
+
+
+class TV4IE(InfoExtractor):
+    IE_DESC = 'tv4.se and tv4play.se'
+    _VALID_URL = r'''(?x)https?://(?:www\.)?
+        (?:
+            tv4\.se/(?:[^/]+)/klipp/(?:.*)-|
+            tv4play\.se/
+            (?:
+                (?:program|barn)/(?:[^\?]+)\?video_id=|
+                iframe/video/|
+                film/|
+                sport/|
+            )
+        )(?P<id>[0-9]+)'''
+    _TESTS = [
+        {
+            'url': 'http://www.tv4.se/kalla-fakta/klipp/kalla-fakta-5-english-subtitles-2491650',
+            'md5': '909d6454b87b10a25aa04c4bdd416a9b',
+            'info_dict': {
+                'id': '2491650',
+                'ext': 'mp4',
+                'title': 'Kalla Fakta 5 (english subtitles)',
+                'thumbnail': 're:^https?://.*\.jpg$',
+                'timestamp': int,
+                'upload_date': '20131125',
+            },
+        },
+        {
+            'url': 'http://www.tv4play.se/iframe/video/3054113',
+            'md5': '77f851c55139ffe0ebd41b6a5552489b',
+            'info_dict': {
+                'id': '3054113',
+                'ext': 'mp4',
+                'title': 'Så här jobbar ficktjuvarna - se avslöjande bilder',
+                'thumbnail': 're:^https?://.*\.jpg$',
+                'description': 'Unika bilder avslöjar hur turisternas fickor vittjas mitt på Stockholms central. Två experter på ficktjuvarna avslöjar knepen du ska se upp för.',
+                'timestamp': int,
+                'upload_date': '20150130',
+            },
+        },
+        {
+            'url': 'http://www.tv4play.se/sport/3060959',
+            'only_matching': True,
+        },
+        {
+            'url': 'http://www.tv4play.se/film/2378136',
+            'only_matching': True,
+        },
+        {
+            'url': 'http://www.tv4play.se/barn/looney-tunes?video_id=3062412',
+            'only_matching': True,
+        },
+    ]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        info = self._download_json(
+            'http://www.tv4play.se/player/assets/%s.json' % video_id, video_id, 'Downloading video info JSON')
+
+        # If is_geo_restricted is true, it doesn't neceserally mean we can't download it
+        if info['is_geo_restricted']:
+            self.report_warning('This content might not be available in your country due to licensing restrictions.')
+        if info['requires_subscription']:
+            raise ExtractorError('This content requires subscription.', expected=True)
+
+        sources_data = self._download_json(
+            'https://prima.tv4play.se/api/web/asset/%s/play.json?protocol=http&videoFormat=MP4' % video_id, video_id, 'Downloading sources JSON')
+        sources = sources_data['playback']
+
+        formats = []
+        for item in sources.get('items', {}).get('item', []):
+            ext, bitrate = item['mediaFormat'], item['bitrate']
+            formats.append({
+                'format_id': '%s_%s' % (ext, bitrate),
+                'tbr': bitrate,
+                'ext': ext,
+                'url': item['url'],
+            })
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': info['title'],
+            'formats': formats,
+            'description': info.get('description'),
+            'timestamp': parse_iso8601(info.get('broadcast_date_time')),
+            'duration': info.get('duration'),
+            'thumbnail': info.get('image'),
+            'is_live': sources.get('live'),
+        }
diff --git a/youtube_dl/extractor/tvigle.py b/youtube_dl/extractor/tvigle.py

index ba65996dc01646e019cfd5820aa36c1934365d9b..102362b295450f58ff085ec9be7d21921a1ac494 100644 (file)
--- a/youtube_dl/extractor/tvigle.py
+++ b/youtube_dl/extractor/tvigle.py
@@ -1,6 +1,8 @@
  # encoding: utf-8
  from __future__ import unicode_literals
  
+import re
+
  from .common import InfoExtractor
  from ..utils import (
      float_or_none,
@@ -11,7 +13,7 @@ from ..utils import (
  class TvigleIE(InfoExtractor):
      IE_NAME = 'tvigle'
      IE_DESC = 'Интернет-телевидение Tvigle.ru'
-    _VALID_URL = r'http://(?:www\.)?tvigle\.ru/(?:[^/]+/)+(?P<id>[^/]+)/$'
+    _VALID_URL = r'https?://(?:www\.)?(?:tvigle\.ru/(?:[^/]+/)+(?P<display_id>[^/]+)/$|cloud\.tvigle\.ru/video/(?P<id>\d+))'
  
      _TESTS = [
          {
@@ -38,16 +40,22 @@ class TvigleIE(InfoExtractor):
                  'duration': 186.080,
                  'age_limit': 0,
              },
-        },
+        }, {
+            'url': 'https://cloud.tvigle.ru/video/5267604/',
+            'only_matching': True,
+        }
      ]
  
      def _real_extract(self, url):
-        display_id = self._match_id(url)
-
-        webpage = self._download_webpage(url, display_id)
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        display_id = mobj.group('display_id')
  
-        video_id = self._html_search_regex(
-            r'<li class="video-preview current_playing" id="(\d+)">', webpage, 'video id')
+        if not video_id:
+            webpage = self._download_webpage(url, display_id)
+            video_id = self._html_search_regex(
+                r'<li class="video-preview current_playing" id="(\d+)">',
+                webpage, 'video id')
  
          video_data = self._download_json(
              'http://cloud.tvigle.ru/api/play/video/%s/' % video_id, display_id)
diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py

index 87290d002e44850e6b3584a97ff2a3e1be7c1a0f..4b0ce54df4d329cf24e76621c6064c67a4befaaa 100644 (file)
--- a/youtube_dl/extractor/twitch.py
+++ b/youtube_dl/extractor/twitch.py
@@ -34,7 +34,15 @@ class TwitchBaseIE(InfoExtractor):
                  expected=True)
  
      def _download_json(self, url, video_id, note='Downloading JSON metadata'):
-        response = super(TwitchBaseIE, self)._download_json(url, video_id, note)
+        headers = {
+            'Referer': 'http://api.twitch.tv/crossdomain/receiver.html?v=2',
+            'X-Requested-With': 'XMLHttpRequest',
+        }
+        for cookie in self._downloader.cookiejar:
+            if cookie.name == 'api_token':
+                headers['Twitch-Api-Token'] = cookie.value
+        request = compat_urllib_request.Request(url, headers=headers)
+        response = super(TwitchBaseIE, self)._download_json(request, video_id, note)
          self._handle_error(response)
          return response
  
@@ -349,6 +357,13 @@ class TwitchStreamIE(TwitchBaseIE):
              % (self._USHER_BASE, channel_id, compat_urllib_parse.urlencode(query).encode('utf-8')),
              channel_id, 'mp4')
  
+        # prefer the 'source' stream, the others are limited to 30 fps
+        def _sort_source(f):
+            if f.get('m3u8_media') is not None and f['m3u8_media'].get('NAME') == 'Source':
+                return 1
+            return 0
+        formats = sorted(formats, key=_sort_source)
+
          view_count = stream.get('viewers')
          timestamp = parse_iso8601(stream.get('created_at'))
  
diff --git a/youtube_dl/extractor/videolecturesnet.py b/youtube_dl/extractor/videolecturesnet.py

index ebd2a3dca3ac0e7bd812226c80c356a19b3677ab..d6a7eb2033e58a92df09be6c042c91d6e932f8b7 100644 (file)
--- a/youtube_dl/extractor/videolecturesnet.py
+++ b/youtube_dl/extractor/videolecturesnet.py
@@ -49,15 +49,31 @@ class VideoLecturesNetIE(InfoExtractor):
          thumbnail = (
              None if thumbnail_el is None else thumbnail_el.attrib.get('src'))
  
-        formats = [{
-            'url': v.attrib['src'],
-            'width': int_or_none(v.attrib.get('width')),
-            'height': int_or_none(v.attrib.get('height')),
-            'filesize': int_or_none(v.attrib.get('size')),
-            'tbr': int_or_none(v.attrib.get('systemBitrate')) / 1000.0,
-            'ext': v.attrib.get('ext'),
-        } for v in switch.findall('./video')
-            if v.attrib.get('proto') == 'http']
+        formats = []
+        for v in switch.findall('./video'):
+            proto = v.attrib.get('proto')
+            if proto not in ['http', 'rtmp']:
+                continue
+            f = {
+                'width': int_or_none(v.attrib.get('width')),
+                'height': int_or_none(v.attrib.get('height')),
+                'filesize': int_or_none(v.attrib.get('size')),
+                'tbr': int_or_none(v.attrib.get('systemBitrate')) / 1000.0,
+                'ext': v.attrib.get('ext'),
+            }
+            src = v.attrib['src']
+            if proto == 'http':
+                if self._is_valid_url(src, video_id):
+                    f['url'] = src
+                    formats.append(f)
+            elif proto == 'rtmp':
+                f.update({
+                    'url': v.attrib['streamer'],
+                    'play_path': src,
+                    'rtmp_real_time': True,
+                })
+                formats.append(f)
+        self._sort_formats(formats)
  
          return {
              'id': video_id,
diff --git a/youtube_dl/extractor/viki.py b/youtube_dl/extractor/viki.py

index 944901e1482a666ae90cc5e1c0f86e325ec2aecc..6816dacb665e2253a132cfe678999a1129860a0b 100644 (file)
--- a/youtube_dl/extractor/viki.py
+++ b/youtube_dl/extractor/viki.py
@@ -2,16 +2,17 @@ from __future__ import unicode_literals
  
  import re
  
+from ..compat import compat_urlparse
  from ..utils import (
      ExtractorError,
      unescapeHTML,
      unified_strdate,
      US_RATINGS,
  )
-from .subtitles import SubtitlesInfoExtractor
+from .common import InfoExtractor
  
  
-class VikiIE(SubtitlesInfoExtractor):
+class VikiIE(InfoExtractor):
      IE_NAME = 'viki'
  
      _VALID_URL = r'^https?://(?:www\.)?viki\.com/videos/(?P<id>[0-9]+v)'
@@ -69,9 +70,6 @@ class VikiIE(SubtitlesInfoExtractor):
  
          # subtitles
          video_subtitles = self.extract_subtitles(video_id, info_webpage)
-        if self._downloader.params.get('listsubtitles', False):
-            self._list_available_subtitles(video_id, info_webpage)
-            return
  
          return {
              'id': video_id,
@@ -85,12 +83,15 @@ class VikiIE(SubtitlesInfoExtractor):
              'upload_date': upload_date,
          }
  
-    def _get_available_subtitles(self, video_id, info_webpage):
+    def _get_subtitles(self, video_id, info_webpage):
          res = {}
-        for sturl_html in re.findall(r'<track src="([^"]+)"/>', info_webpage):
+        for sturl_html in re.findall(r'<track src="([^"]+)"', info_webpage):
              sturl = unescapeHTML(sturl_html)
              m = re.search(r'/(?P<lang>[a-z]+)\.vtt', sturl)
              if not m:
                  continue
-            res[m.group('lang')] = sturl
+            res[m.group('lang')] = [{
+                'url': compat_urlparse.urljoin('http://www.viki.com', sturl),
+                'ext': 'vtt',
+            }]
          return res
diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py

index 1bb47351435bd48832671b84038b6c4a749cdfbc..8f540f5780570d06fa10e695555026c537b7c0f0 100644 (file)
--- a/youtube_dl/extractor/vimeo.py
+++ b/youtube_dl/extractor/vimeo.py
@@ -4,9 +4,9 @@ from __future__ import unicode_literals
  import json
  import re
  import itertools
+import hashlib
  
  from .common import InfoExtractor
-from .subtitles import SubtitlesInfoExtractor
  from ..compat import (
      compat_HTTPError,
      compat_urllib_parse,
@@ -18,6 +18,7 @@ from ..utils import (
      InAdvancePagedList,
      int_or_none,
      RegexNotFoundError,
+    smuggle_url,
      std_headers,
      unsmuggle_url,
      urlencode_postdata,
@@ -51,7 +52,7 @@ class VimeoBaseInfoExtractor(InfoExtractor):
          self._download_webpage(login_request, None, False, 'Wrong login info')
  
  
-class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
+class VimeoIE(VimeoBaseInfoExtractor):
      """Information extractor for vimeo.com."""
  
      # _VALID_URL matches Vimeo URLs
@@ -174,7 +175,7 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
      def _verify_video_password(self, url, video_id, webpage):
          password = self._downloader.params.get('videopassword', None)
          if password is None:
-            raise ExtractorError('This video is protected by a password, use the --video-password option')
+            raise ExtractorError('This video is protected by a password, use the --video-password option', expected=True)
          token = self._search_regex(r'xsrft: \'(.*?)\'', webpage, 'login token')
          data = compat_urllib_parse.urlencode({
              'password': password,
@@ -188,9 +189,9 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
          password_request = compat_urllib_request.Request(pass_url + '/password', data)
          password_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
          password_request.add_header('Cookie', 'xsrft=%s' % token)
-        self._download_webpage(password_request, video_id,
-                               'Verifying the password',
-                               'Wrong password')
+        return self._download_webpage(
+            password_request, video_id,
+            'Verifying the password', 'Wrong password')
  
      def _verify_player_video_password(self, url, video_id):
          password = self._downloader.params.get('videopassword', None)
@@ -224,6 +225,11 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
          if mobj.group('pro') or mobj.group('player'):
              url = 'http://player.vimeo.com/video/' + video_id
  
+        password = self._downloader.params.get('videopassword', None)
+        if password:
+            headers['Cookie'] = '%s_password=%s' % (
+                video_id, hashlib.md5(password.encode('utf-8')).hexdigest())
+
          # Retrieve video webpage to extract further information
          request = compat_urllib_request.Request(url, None, headers)
          try:
@@ -266,9 +272,12 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
              if re.search('The creator of this video has not given you permission to embed it on this domain.', webpage):
                  raise ExtractorError('The author has restricted the access to this video, try with the "--referer" option')
  
-            if re.search('<form[^>]+?id="pw_form"', webpage) is not None:
+            if re.search(r'<form[^>]+?id="pw_form"', webpage) is not None:
+                if data and '_video_password_verified' in data:
+                    raise ExtractorError('video password verification failed!')
                  self._verify_video_password(url, video_id, webpage)
-                return self._real_extract(url)
+                return self._real_extract(
+                    smuggle_url(url, {'_video_password_verified': 'verified'}))
              else:
                  raise ExtractorError('Unable to extract info section',
                                       cause=e)
@@ -368,12 +377,10 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
          text_tracks = config['request'].get('text_tracks')
          if text_tracks:
              for tt in text_tracks:
-                subtitles[tt['lang']] = 'http://vimeo.com' + tt['url']
-
-        video_subtitles = self.extract_subtitles(video_id, subtitles)
-        if self._downloader.params.get('listsubtitles', False):
-            self._list_available_subtitles(video_id, subtitles)
-            return
+                subtitles[tt['lang']] = [{
+                    'ext': 'vtt',
+                    'url': 'http://vimeo.com' + tt['url'],
+                }]
  
          return {
              'id': video_id,
@@ -389,7 +396,7 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
              'view_count': view_count,
              'like_count': like_count,
              'comment_count': comment_count,
-            'subtitles': video_subtitles,
+            'subtitles': subtitles,
          }
  
  
@@ -401,6 +408,7 @@ class VimeoChannelIE(InfoExtractor):
      _TESTS = [{
          'url': 'http://vimeo.com/channels/tributes',
          'info_dict': {
+            'id': 'tributes',
              'title': 'Vimeo Tributes',
          },
          'playlist_mincount': 25,
@@ -412,12 +420,47 @@ class VimeoChannelIE(InfoExtractor):
      def _extract_list_title(self, webpage):
          return self._html_search_regex(self._TITLE_RE, webpage, 'list title')
  
+    def _login_list_password(self, page_url, list_id, webpage):
+        login_form = self._search_regex(
+            r'(?s)<form[^>]+?id="pw_form"(.*?)</form>',
+            webpage, 'login form', default=None)
+        if not login_form:
+            return webpage
+
+        password = self._downloader.params.get('videopassword', None)
+        if password is None:
+            raise ExtractorError('This album is protected by a password, use the --video-password option', expected=True)
+        fields = dict(re.findall(r'''(?x)<input\s+
+            type="hidden"\s+
+            name="([^"]+)"\s+
+            value="([^"]*)"
+            ''', login_form))
+        token = self._search_regex(r'xsrft: \'(.*?)\'', webpage, 'login token')
+        fields['token'] = token
+        fields['password'] = password
+        post = compat_urllib_parse.urlencode(fields)
+        password_path = self._search_regex(
+            r'action="([^"]+)"', login_form, 'password URL')
+        password_url = compat_urlparse.urljoin(page_url, password_path)
+        password_request = compat_urllib_request.Request(password_url, post)
+        password_request.add_header('Content-type', 'application/x-www-form-urlencoded')
+        self._set_cookie('vimeo.com', 'xsrft', token)
+
+        return self._download_webpage(
+            password_request, list_id,
+            'Verifying the password', 'Wrong password')
+
      def _extract_videos(self, list_id, base_url):
          video_ids = []
          for pagenum in itertools.count(1):
+            page_url = self._page_url(base_url, pagenum)
              webpage = self._download_webpage(
-                self._page_url(base_url, pagenum), list_id,
+                page_url, list_id,
                  'Downloading page %s' % pagenum)
+
+            if pagenum == 1:
+                webpage = self._login_list_password(page_url, list_id, webpage)
+
              video_ids.extend(re.findall(r'id="clip_(\d+?)"', webpage))
              if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None:
                  break
@@ -444,6 +487,7 @@ class VimeoUserIE(VimeoChannelIE):
          'url': 'http://vimeo.com/nkistudio/videos',
          'info_dict': {
              'title': 'Nki',
+            'id': 'nkistudio',
          },
          'playlist_mincount': 66,
      }]
@@ -461,17 +505,28 @@ class VimeoAlbumIE(VimeoChannelIE):
      _TESTS = [{
          'url': 'http://vimeo.com/album/2632481',
          'info_dict': {
+            'id': '2632481',
              'title': 'Staff Favorites: November 2013',
          },
          'playlist_mincount': 13,
+    }, {
+        'note': 'Password-protected album',
+        'url': 'https://vimeo.com/album/3253534',
+        'info_dict': {
+            'title': 'test',
+            'id': '3253534',
+        },
+        'playlist_count': 1,
+        'params': {
+            'videopassword': 'youtube-dl',
+        }
      }]
  
      def _page_url(self, base_url, pagenum):
          return '%s/page:%d/' % (base_url, pagenum)
  
      def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        album_id = mobj.group('id')
+        album_id = self._match_id(url)
          return self._extract_videos(album_id, 'http://vimeo.com/album/%s' % album_id)
  
  
@@ -481,6 +536,7 @@ class VimeoGroupsIE(VimeoAlbumIE):
      _TESTS = [{
          'url': 'http://vimeo.com/groups/rolexawards',
          'info_dict': {
+            'id': 'rolexawards',
              'title': 'Rolex Awards for Enterprise',
          },
          'playlist_mincount': 73,
@@ -563,6 +619,7 @@ class VimeoLikesIE(InfoExtractor):
          'url': 'https://vimeo.com/user755559/likes/',
          'playlist_mincount': 293,
          "info_dict": {
+            'id': 'user755559_likes',
              "description": "See all the videos urza likes",
              "title": 'Videos urza likes',
          },
diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py

index 81e02a6244d83327b05c6b76c490391b97b15f92..7dea8c59d2a30673b93ae181bab128b8ef0a8b58 100644 (file)
--- a/youtube_dl/extractor/vk.py
+++ b/youtube_dl/extractor/vk.py
@@ -217,6 +217,9 @@ class VKUserVideosIE(InfoExtractor):
      _TEMPLATE_URL = 'https://vk.com/videos'
      _TEST = {
          'url': 'http://vk.com/videos205387401',
+        'info_dict': {
+            'id': '205387401',
+        },
          'playlist_mincount': 4,
      }
  
diff --git a/youtube_dl/extractor/walla.py b/youtube_dl/extractor/walla.py

index 672bda7a7aea761b28f2d2d48be2fe2a9dbd054e..24efbd6e6341ba5aa73e5df11cb9af36f941da43 100644 (file)
--- a/youtube_dl/extractor/walla.py
+++ b/youtube_dl/extractor/walla.py
@@ -3,14 +3,14 @@ from __future__ import unicode_literals
  
  import re
  
-from .subtitles import SubtitlesInfoExtractor
+from .common import InfoExtractor
  from ..utils import (
      xpath_text,
      int_or_none,
  )
  
  
-class WallaIE(SubtitlesInfoExtractor):
+class WallaIE(InfoExtractor):
      _VALID_URL = r'http://vod\.walla\.co\.il/[^/]+/(?P<id>\d+)/(?P<display_id>.+)'
      _TEST = {
          'url': 'http://vod.walla.co.il/movie/2642630/one-direction-all-for-one',
@@ -52,13 +52,10 @@ class WallaIE(SubtitlesInfoExtractor):
          subtitles = {}
          for subtitle in item.findall('./subtitles/subtitle'):
              lang = xpath_text(subtitle, './title')
-            subtitles[self._SUBTITLE_LANGS.get(lang, lang)] = xpath_text(subtitle, './src')
-
-        if self._downloader.params.get('listsubtitles', False):
-            self._list_available_subtitles(video_id, subtitles)
-            return
-
-        subtitles = self.extract_subtitles(video_id, subtitles)
+            subtitles[self._SUBTITLE_LANGS.get(lang, lang)] = [{
+                'ext': 'srt',
+                'url': xpath_text(subtitle, './src'),
+            }]
  
          formats = []
          for quality in item.findall('./qualities/quality'):
diff --git a/youtube_dl/extractor/wdr.py b/youtube_dl/extractor/wdr.py

index c9048850061e1ecae4380557503a6b3927d2220c..b468023060d3476e0a0ede8e36ea23b25ffeb298 100644 (file)
--- a/youtube_dl/extractor/wdr.py
+++ b/youtube_dl/extractor/wdr.py
@@ -28,6 +28,7 @@ class WDRIE(InfoExtractor):
                  'title': 'Servicezeit',
                  'description': 'md5:c8f43e5e815eeb54d0b96df2fba906cb',
                  'upload_date': '20140310',
+                'is_live': False
              },
              'params': {
                  'skip_download': True,
@@ -41,6 +42,7 @@ class WDRIE(InfoExtractor):
                  'title': 'Marga Spiegel ist tot',
                  'description': 'md5:2309992a6716c347891c045be50992e4',
                  'upload_date': '20140311',
+                'is_live': False
              },
              'params': {
                  'skip_download': True,
@@ -55,6 +57,7 @@ class WDRIE(InfoExtractor):
                  'title': 'Erlebte Geschichten: Marga Spiegel (29.11.2009)',
                  'description': 'md5:2309992a6716c347891c045be50992e4',
                  'upload_date': '20091129',
+                'is_live': False
              },
          },
          {
@@ -66,6 +69,7 @@ class WDRIE(InfoExtractor):
                  'title': 'Flavia Coelho: Amar é Amar',
                  'description': 'md5:7b29e97e10dfb6e265238b32fa35b23a',
                  'upload_date': '20140717',
+                'is_live': False
              },
          },
          {
@@ -74,6 +78,20 @@ class WDRIE(InfoExtractor):
              'info_dict': {
                  'id': 'mediathek/video/sendungen/quarks_und_co/filterseite-quarks-und-co100',
              }
+        },
+        {
+            'url': 'http://www1.wdr.de/mediathek/video/livestream/index.html',
+            'info_dict': {
+                'id': 'mdb-103364',
+                'title': 're:^WDR Fernsehen [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+                'description': 'md5:ae2ff888510623bf8d4b115f95a9b7c9',
+                'ext': 'flv',
+                'upload_date': '20150212',
+                'is_live': True
+            },
+            'params': {
+                'skip_download': True,
+            },
          }
      ]
  
@@ -119,6 +137,10 @@ class WDRIE(InfoExtractor):
          video_url = flashvars['dslSrc'][0]
          title = flashvars['trackerClipTitle'][0]
          thumbnail = flashvars['startPicture'][0] if 'startPicture' in flashvars else None
+        is_live = flashvars.get('isLive', ['0'])[0] == '1'
+
+        if is_live:
+            title = self._live_title(title)
  
          if 'trackerClipAirTime' in flashvars:
              upload_date = flashvars['trackerClipAirTime'][0]
@@ -131,6 +153,13 @@ class WDRIE(InfoExtractor):
          if video_url.endswith('.f4m'):
              video_url += '?hdcore=3.2.0&plugin=aasp-3.2.0.77.18'
              ext = 'flv'
+        elif video_url.endswith('.smil'):
+            fmt = self._extract_smil_formats(video_url, page_id)[0]
+            video_url = fmt['url']
+            sep = '&' if '?' in video_url else '?'
+            video_url += sep
+            video_url += 'hdcore=3.3.0&plugin=aasp-3.3.0.99.43'
+            ext = fmt['ext']
          else:
              ext = determine_ext(video_url)
  
@@ -144,6 +173,7 @@ class WDRIE(InfoExtractor):
              'description': description,
              'thumbnail': thumbnail,
              'upload_date': upload_date,
+            'is_live': is_live
          }
  
  
diff --git a/youtube_dl/extractor/webofstories.py b/youtube_dl/extractor/webofstories.py

index 396cf4e8312ca73f90f45b3e24f3fb3561f54fa8..73077a312549f6b883fdf549a2b364f6de35db9f 100644 (file)
--- a/youtube_dl/extractor/webofstories.py
+++ b/youtube_dl/extractor/webofstories.py
@@ -45,19 +45,17 @@ class WebOfStoriesIE(InfoExtractor):
          description = self._html_search_meta('description', webpage)
          thumbnail = self._og_search_thumbnail(webpage)
  
-        story_filename = self._search_regex(
-            r'\.storyFileName\("([^"]+)"\)', webpage, 'story filename')
-        speaker_id = self._search_regex(
-            r'\.speakerId\("([^"]+)"\)', webpage, 'speaker ID')
-        story_id = self._search_regex(
-            r'\.storyId\((\d+)\)', webpage, 'story ID')
-        speaker_type = self._search_regex(
-            r'\.speakerType\("([^"]+)"\)', webpage, 'speaker type')
-        great_life = self._search_regex(
-            r'isGreatLifeStory\s*=\s*(true|false)', webpage, 'great life story')
+        embed_params = [s.strip(" \r\n\t'") for s in self._search_regex(
+            r'(?s)\$\("#embedCode"\).html\(getEmbedCode\((.*?)\)',
+            webpage, 'embed params').split(',')]
+
+        (
+            _, speaker_id, story_id, story_duration,
+            speaker_type, great_life, _thumbnail, _has_subtitles,
+            story_filename, _story_order) = embed_params
+
          is_great_life_series = great_life == 'true'
-        duration = int_or_none(self._search_regex(
-            r'\.duration\((\d+)\)', webpage, 'duration', fatal=False))
+        duration = int_or_none(story_duration)
  
          # URL building, see: http://www.webofstories.com/scripts/player.js
          ms_prefix = ''
diff --git a/youtube_dl/extractor/wsj.py b/youtube_dl/extractor/wsj.py

index cbe3dc7bec5c982df8ec53431acdbb0fcd7e1d3a..2ddf29a694ec6365e9089bc18536320489b4d2c3 100644 (file)
--- a/youtube_dl/extractor/wsj.py
+++ b/youtube_dl/extractor/wsj.py
@@ -18,8 +18,8 @@ class WSJIE(InfoExtractor):
              'id': '1BD01A4C-BFE8-40A5-A42F-8A8AF9898B1A',
              'ext': 'mp4',
              'upload_date': '20150202',
-            'uploader_id': 'bbright',
-            'creator': 'bbright',
+            'uploader_id': 'jdesai',
+            'creator': 'jdesai',
              'categories': list,  # a long list
              'duration': 90,
              'title': 'Bills Coach Rex Ryan Updates His Old Jets Tattoo',
diff --git a/youtube_dl/extractor/xtube.py b/youtube_dl/extractor/xtube.py

index e8490b028e53080b8e685be13577a05603a4af9e..1644f53c876329f053406be3d3dc1aa463cddc1b 100644 (file)
--- a/youtube_dl/extractor/xtube.py
+++ b/youtube_dl/extractor/xtube.py
@@ -22,7 +22,7 @@ class XTubeIE(InfoExtractor):
              'id': 'kVTUy_G222_',
              'ext': 'mp4',
              'title': 'strange erotica',
-            'description': 'http://www.xtube.com an ET kind of thing',
+            'description': 'contains:an ET kind of thing',
              'uploader': 'greenshowers',
              'duration': 450,
              'age_limit': 18,
diff --git a/youtube_dl/extractor/yahoo.py b/youtube_dl/extractor/yahoo.py

index f8e7041a08d042ac44c13338439b5568bf4caac6..97dbac4cce53d7fe956b074fddbe40993fd5681f 100644 (file)
--- a/youtube_dl/extractor/yahoo.py
+++ b/youtube_dl/extractor/yahoo.py
@@ -24,7 +24,6 @@ class YahooIE(InfoExtractor):
      _TESTS = [
          {
              'url': 'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html',
-            'md5': '4962b075c08be8690a922ee026d05e69',
              'info_dict': {
                  'id': '2d25e626-2378-391f-ada0-ddaf1417e588',
                  'ext': 'mp4',
diff --git a/youtube_dl/extractor/yam.py b/youtube_dl/extractor/yam.py

new file mode 100644 (file)

index 0000000..b294767
--- /dev/null
+++ b/youtube_dl/extractor/yam.py
@@ -0,0 +1,81 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_urlparse
+from ..utils import (
+    float_or_none,
+    month_by_abbreviation,
+)
+
+
+class YamIE(InfoExtractor):
+    _VALID_URL = r'http://mymedia.yam.com/m/(?P<id>\d+)'
+
+    _TESTS = [{
+        # An audio hosted on Yam
+        'url': 'http://mymedia.yam.com/m/2283921',
+        'md5': 'c011b8e262a52d5473d9c2e3c9963b9c',
+        'info_dict': {
+            'id': '2283921',
+            'ext': 'mp3',
+            'title': '發現 - 趙薇 京華煙雲主題曲',
+            'uploader_id': 'princekt',
+            'upload_date': '20080807',
+            'duration': 313.0,
+        }
+    }, {
+        # An external video hosted on YouTube
+        'url': 'http://mymedia.yam.com/m/3598173',
+        'md5': '0238ceec479c654e8c2f1223755bf3e9',
+        'info_dict': {
+            'id': 'pJ2Deys283c',
+            'ext': 'mp4',
+            'upload_date': '20150202',
+            'uploader': '新莊社大瑜伽社',
+            'description': 'md5:f5cc72f0baf259a70fb731654b0d2eff',
+            'uploader_id': '2323agoy',
+            'title': '外婆的澎湖灣KTV-潘安邦',
+        }
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        page = self._download_webpage(url, video_id)
+
+        # Is it hosted externally on YouTube?
+        youtube_url = self._html_search_regex(
+            r'<embed src="(http://www.youtube.com/[^"]+)"',
+            page, 'YouTube url', default=None)
+        if youtube_url:
+            return self.url_result(youtube_url, 'Youtube')
+
+        api_page = self._download_webpage(
+            'http://mymedia.yam.com/api/a/?pID=' + video_id, video_id,
+            note='Downloading API page')
+        api_result_obj = compat_urlparse.parse_qs(api_page)
+
+        uploader_id = self._html_search_regex(
+            r'<!-- 發表作者 -->：[\n ]+<a href="/([a-z]+)"',
+            page, 'uploader id', fatal=False)
+        mobj = re.search(r'<!-- 發表於 -->(?P<mon>[A-Z][a-z]{2})  ' +
+                         r'(?P<day>\d{1,2}), (?P<year>\d{4})', page)
+        if mobj:
+            upload_date = '%s%02d%02d' % (
+                mobj.group('year'),
+                month_by_abbreviation(mobj.group('mon')),
+                int(mobj.group('day')))
+        else:
+            upload_date = None
+        duration = float_or_none(api_result_obj['totaltime'][0], scale=1000)
+
+        return {
+            'id': video_id,
+            'url': api_result_obj['mp3file'][0],
+            'title': self._html_search_meta('description', page),
+            'duration': duration,
+            'uploader_id': uploader_id,
+            'upload_date': upload_date,
+        }
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py

index e4b26b84fe5cf65dfdcedc5d9fd9bf2b67e17f35..3690f8021267b30171be4f2e7a019133aaeaaca9 100644 (file)
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -11,7 +11,6 @@ import time
  import traceback
  
  from .common import InfoExtractor, SearchInfoExtractor
-from .subtitles import SubtitlesInfoExtractor
  from ..jsinterp import JSInterpreter
  from ..swfinterp import SWFInterpreter
  from ..compat import (
@@ -25,6 +24,7 @@ from ..compat import (
  from ..utils import (
      clean_html,
      ExtractorError,
+    float_or_none,
      get_element_by_attribute,
      get_element_by_id,
      int_or_none,
@@ -184,7 +184,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
              return
  
  
-class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
+class YoutubeIE(YoutubeBaseInfoExtractor):
      IE_DESC = 'YouTube.com'
      _VALID_URL = r"""(?x)^
                       (
@@ -540,26 +540,30 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
          if cache_spec is not None:
              return lambda s: ''.join(s[i] for i in cache_spec)
  
+        download_note = (
+            'Downloading player %s' % player_url
+            if self._downloader.params.get('verbose') else
+            'Downloading %s player %s' % (player_type, player_id)
+        )
          if player_type == 'js':
              code = self._download_webpage(
                  player_url, video_id,
-                note='Downloading %s player %s' % (player_type, player_id),
+                note=download_note,
                  errnote='Download of %s failed' % player_url)
              res = self._parse_sig_js(code)
          elif player_type == 'swf':
              urlh = self._request_webpage(
                  player_url, video_id,
-                note='Downloading %s player %s' % (player_type, player_id),
+                note=download_note,
                  errnote='Download of %s failed' % player_url)
              code = urlh.read()
              res = self._parse_sig_swf(code)
          else:
              assert False, 'Invalid player type %r' % player_type
  
-        if cache_spec is None:
-            test_string = ''.join(map(compat_chr, range(len(example_sig))))
-            cache_res = res(test_string)
-            cache_spec = [ord(c) for c in cache_res]
+        test_string = ''.join(map(compat_chr, range(len(example_sig))))
+        cache_res = res(test_string)
+        cache_spec = [ord(c) for c in cache_res]
  
          self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
          return res
@@ -643,7 +647,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
              raise ExtractorError(
                  'Signature extraction failed: ' + tb, cause=e)
  
-    def _get_available_subtitles(self, video_id, webpage):
+    def _get_subtitles(self, video_id, webpage):
          try:
              subs_doc = self._download_xml(
                  'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
@@ -657,23 +661,27 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
              lang = track.attrib['lang_code']
              if lang in sub_lang_list:
                  continue
-            params = compat_urllib_parse.urlencode({
-                'lang': lang,
-                'v': video_id,
-                'fmt': self._downloader.params.get('subtitlesformat', 'srt'),
-                'name': track.attrib['name'].encode('utf-8'),
-            })
-            url = 'https://www.youtube.com/api/timedtext?' + params
-            sub_lang_list[lang] = url
+            sub_formats = []
+            for ext in ['sbv', 'vtt', 'srt']:
+                params = compat_urllib_parse.urlencode({
+                    'lang': lang,
+                    'v': video_id,
+                    'fmt': ext,
+                    'name': track.attrib['name'].encode('utf-8'),
+                })
+                sub_formats.append({
+                    'url': 'https://www.youtube.com/api/timedtext?' + params,
+                    'ext': ext,
+                })
+            sub_lang_list[lang] = sub_formats
          if not sub_lang_list:
              self._downloader.report_warning('video doesn\'t have subtitles')
              return {}
          return sub_lang_list
  
-    def _get_available_automatic_caption(self, video_id, webpage):
+    def _get_automatic_captions(self, video_id, webpage):
          """We need the webpage for getting the captions url, pass it as an
             argument to speed up the process."""
-        sub_format = self._downloader.params.get('subtitlesformat', 'srt')
          self.to_screen('%s: Looking for automatic captions' % video_id)
          mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
          err_msg = 'Couldn\'t find automatic captions for %s' % video_id
@@ -703,14 +711,20 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
              sub_lang_list = {}
              for lang_node in caption_list.findall('target'):
                  sub_lang = lang_node.attrib['lang_code']
-                params = compat_urllib_parse.urlencode({
-                    'lang': original_lang,
-                    'tlang': sub_lang,
-                    'fmt': sub_format,
-                    'ts': timestamp,
-                    'kind': caption_kind,
-                })
-                sub_lang_list[sub_lang] = caption_url + '&' + params
+                sub_formats = []
+                for ext in ['sbv', 'vtt', 'srt']:
+                    params = compat_urllib_parse.urlencode({
+                        'lang': original_lang,
+                        'tlang': sub_lang,
+                        'fmt': ext,
+                        'ts': timestamp,
+                        'kind': caption_kind,
+                    })
+                    sub_formats.append({
+                        'url': caption_url + '&' + params,
+                        'ext': ext,
+                    })
+                sub_lang_list[sub_lang] = sub_formats
              return sub_lang_list
          # An extractor error can be raise by the download process if there are
          # no automatic captions but there are subtitles
@@ -780,8 +794,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                      fo for fo in formats
                      if fo['format_id'] == format_id)
              except StopIteration:
-                f.update(self._formats.get(format_id, {}).items())
-                formats.append(f)
+                full_info = self._formats.get(format_id, {}).copy()
+                full_info.update(f)
+                formats.append(full_info)
              else:
                  existing_format.update(f)
          return formats
@@ -964,10 +979,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
  
          # subtitles
          video_subtitles = self.extract_subtitles(video_id, video_webpage)
-
-        if self._downloader.params.get('listsubtitles', False):
-            self._list_available_subtitles(video_id, video_webpage)
-            return
+        automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
  
          if 'length_seconds' not in video_info:
              self._downloader.report_warning('unable to extract video duration')
@@ -1116,6 +1128,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
              'description': video_description,
              'categories': video_categories,
              'subtitles': video_subtitles,
+            'automatic_captions': automatic_captions,
              'duration': video_duration,
              'age_limit': 18 if age_gate else 0,
              'annotations': video_annotations,
@@ -1123,6 +1136,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
              'view_count': view_count,
              'like_count': like_count,
              'dislike_count': dislike_count,
+            'average_rating': float_or_none(video_info.get('avg_rating', [None])[0]),
              'formats': formats,
          }
  
@@ -1139,13 +1153,13 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
                          |  p/
                          )
                          (
-                            (?:PL|LL|EC|UU|FL|RD)?[0-9A-Za-z-_]{10,}
+                            (?:PL|LL|EC|UU|FL|RD|UL)?[0-9A-Za-z-_]{10,}
                              # Top tracks, they can also include dots
                              |(?:MC)[\w\.]*
                          )
                          .*
                       |
-                        ((?:PL|LL|EC|UU|FL|RD)[0-9A-Za-z-_]{10,})
+                        ((?:PL|LL|EC|UU|FL|RD|UL)[0-9A-Za-z-_]{10,})
                       )"""
      _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
      _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&amp;[^"]*?index=(?P<index>\d+)'
@@ -1230,7 +1244,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
              for vid_id in ids]
  
      def _extract_mix(self, playlist_id):
-        # The mixes are generated from a a single video
+        # The mixes are generated from a single video
          # the id of the playlist is just 'RD' + video_id
          url = 'https://youtube.com/watch?v=%s&list=%s' % (playlist_id[-11:], playlist_id)
          webpage = self._download_webpage(
@@ -1266,7 +1280,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
              else:
                  self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
  
-        if playlist_id.startswith('RD'):
+        if playlist_id.startswith('RD') or playlist_id.startswith('UL'):
              # Mixes require a custom extraction process
              return self._extract_mix(playlist_id)
  
diff --git a/youtube_dl/extractor/zapiks.py b/youtube_dl/extractor/zapiks.py

new file mode 100644 (file)

index 0000000..22a9a57
--- /dev/null
+++ b/youtube_dl/extractor/zapiks.py
@@ -0,0 +1,110 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    parse_duration,
+    parse_iso8601,
+    xpath_with_ns,
+    xpath_text,
+    int_or_none,
+)
+
+
+class ZapiksIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?zapiks\.(?:fr|com)/(?:(?:[a-z]{2}/)?(?P<display_id>.+?)\.html|index\.php\?.*\bmedia_id=(?P<id>\d+))'
+    _TESTS = [
+        {
+            'url': 'http://www.zapiks.fr/ep2s3-bon-appetit-eh-be-viva.html',
+            'md5': 'aeb3c473b2d564b2d46d664d28d5f050',
+            'info_dict': {
+                'id': '80798',
+                'ext': 'mp4',
+                'title': 'EP2S3 - Bon Appétit - Eh bé viva les pyrénées con!',
+                'description': 'md5:7054d6f6f620c6519be1fe710d4da847',
+                'thumbnail': 're:^https?://.*\.jpg$',
+                'duration': 528,
+                'timestamp': 1359044972,
+                'upload_date': '20130124',
+                'view_count': int,
+                'comment_count': int,
+            },
+        },
+        {
+            'url': 'http://www.zapiks.com/ep3s5-bon-appetit-baqueira-m-1.html',
+            'only_matching': True,
+        },
+        {
+            'url': 'http://www.zapiks.com/nl/ep3s5-bon-appetit-baqueira-m-1.html',
+            'only_matching': True,
+        },
+        {
+            'url': 'http://www.zapiks.fr/index.php?action=playerIframe&amp;media_id=118046&amp;width=640&amp;height=360&amp;autoStart=false&amp;language=fr',
+            'only_matching': True,
+        },
+    ]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        display_id = mobj.group('display_id') or video_id
+
+        webpage = self._download_webpage(url, display_id)
+
+        if not video_id:
+            video_id = self._search_regex(
+                r'data-media-id="(\d+)"', webpage, 'video id')
+
+        playlist = self._download_xml(
+            'http://www.zapiks.fr/view/index.php?action=playlist&media_id=%s&lang=en' % video_id,
+            display_id)
+
+        NS_MAP = {
+            'jwplayer': 'http://rss.jwpcdn.com/'
+        }
+
+        def ns(path):
+            return xpath_with_ns(path, NS_MAP)
+
+        item = playlist.find('./channel/item')
+
+        title = xpath_text(item, 'title', 'title') or self._og_search_title(webpage)
+        description = self._og_search_description(webpage, default=None)
+        thumbnail = xpath_text(
+            item, ns('./jwplayer:image'), 'thumbnail') or self._og_search_thumbnail(webpage, default=None)
+        duration = parse_duration(self._html_search_meta(
+            'duration', webpage, 'duration', default=None))
+        timestamp = parse_iso8601(self._html_search_meta(
+            'uploadDate', webpage, 'upload date', default=None), ' ')
+
+        view_count = int_or_none(self._search_regex(
+            r'UserPlays:(\d+)', webpage, 'view count', default=None))
+        comment_count = int_or_none(self._search_regex(
+            r'UserComments:(\d+)', webpage, 'comment count', default=None))
+
+        formats = []
+        for source in item.findall(ns('./jwplayer:source')):
+            format_id = source.attrib['label']
+            f = {
+                'url': source.attrib['file'],
+                'format_id': format_id,
+            }
+            m = re.search(r'^(?P<height>\d+)[pP]', format_id)
+            if m:
+                f['height'] = int(m.group('height'))
+            formats.append(f)
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'duration': duration,
+            'timestamp': timestamp,
+            'view_count': view_count,
+            'comment_count': comment_count,
+            'formats': formats,
+        }
diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py

index 453e2732cc4faa453a98b153356c2188feef1d35..0e0c7d90d5aa2fbb8039dddf642ac4692f2974a7 100644 (file)
--- a/youtube_dl/jsinterp.py
+++ b/youtube_dl/jsinterp.py
@@ -30,13 +30,10 @@ class JSInterpreter(object):
      def __init__(self, code, objects=None):
          if objects is None:
              objects = {}
-        self.code = self._remove_comments(code)
+        self.code = code
          self._functions = {}
          self._objects = objects
  
-    def _remove_comments(self, code):
-        return re.sub(r'(?s)/\*.*?\*/', '', code)
-
      def interpret_statement(self, stmt, local_vars, allow_recursion=100):
          if allow_recursion < 0:
              raise ExtractorError('Recursion limit reached')
diff --git a/youtube_dl/options.py b/youtube_dl/options.py

index d5e5759649a5572432b538333518c76c92ba98d7..58f811162eadc27996d4c3687b297f8c96217741 100644 (file)
--- a/youtube_dl/options.py
+++ b/youtube_dl/options.py
@@ -165,6 +165,11 @@ def parseOpts(overrideArguments=None):
          action='store_const', dest='extract_flat', const='in_playlist',
          default=False,
          help='Do not extract the videos of a playlist, only list them.')
+    general.add_option(
+        '--no-color', '--no-colors',
+        action='store_true', dest='no_color',
+        default=False,
+        help='Do not emit color codes in output.')
  
      network = optparse.OptionGroup(parser, 'Network Options')
      network.add_option(
@@ -244,10 +249,33 @@ def parseOpts(overrideArguments=None):
          '--max-views',
          metavar='COUNT', dest='max_views', default=None, type=int,
          help='Do not download any videos with more than COUNT views')
+    selection.add_option(
+        '--match-filter',
+        metavar='FILTER', dest='match_filter', default=None,
+        help=(
+            '(Experimental) Generic video filter. '
+            'Specify any key (see help for -o for a list of available keys) to'
+            ' match if the key is present, '
+            '!key to check if the key is not present,'
+            'key > NUMBER (like "comment_count > 12", also works with '
+            '>=, <, <=, !=, =) to compare against a number, and '
+            '& to require multiple matches. '
+            'Values which are not known are excluded unless you'
+            ' put a question mark (?) after the operator.'
+            'For example, to only match videos that have been liked more than '
+            '100 times and disliked less than 50 times (or the dislike '
+            'functionality is not available at the given service), but who '
+            'also have a description, use  --match-filter '
+            '"like_count > 100 & dislike_count <? 50 & description" .'
+        ))
      selection.add_option(
          '--no-playlist',
          action='store_true', dest='noplaylist', default=False,
          help='If the URL refers to a video and a playlist, download only the video.')
+    selection.add_option(
+        '--yes-playlist',
+        action='store_false', dest='noplaylist', default=False,
+        help='If the URL refers to a video and a playlist, download the playlist.')
      selection.add_option(
          '--age-limit',
          metavar='YEARS', dest='age_limit', default=None, type=int,
@@ -297,8 +325,10 @@ def parseOpts(overrideArguments=None):
              ' You can filter the video results by putting a condition in'
              ' brackets, as in -f "best[height=720]"'
              ' (or -f "[filesize>10M]"). '
-            ' This works for filesize, height, width, tbr, abr, vbr, and fps'
-            ' and the comparisons <, <=, >, >=, =, != .'
+            ' This works for filesize, height, width, tbr, abr, vbr, asr, and fps'
+            ' and the comparisons <, <=, >, >=, =, !='
+            ' and for ext, acodec, vcodec, container, and protocol'
+            ' and the comparisons =, != .'
              ' Formats for which the value is not known are excluded unless you'
              ' put a question mark (?) after the operator.'
              ' You can combine format filters, so  '
@@ -361,8 +391,8 @@ def parseOpts(overrideArguments=None):
          help='lists all available subtitles for the video')
      subtitles.add_option(
          '--sub-format',
-        action='store', dest='subtitlesformat', metavar='FORMAT', default='srt',
-        help='subtitle format (default=srt) ([sbv/vtt] youtube only)')
+        action='store', dest='subtitlesformat', metavar='FORMAT', default='best',
+        help='subtitle format, accepts formats preference, for example: "ass/srt/best"')
      subtitles.add_option(
          '--sub-lang', '--sub-langs', '--srt-lang',
          action='callback', dest='subtitleslangs', metavar='LANGS', type='str',
@@ -398,6 +428,10 @@ def parseOpts(overrideArguments=None):
          '--xattr-set-filesize',
          dest='xattr_set_filesize', action='store_true',
          help='(experimental) set file xattribute ytdl.filesize with expected filesize')
+    downloader.add_option(
+        '--hls-prefer-native',
+        dest='hls_prefer_native', action='store_true',
+        help='(experimental) Use the native HLS downloader instead of ffmpeg.')
      downloader.add_option(
          '--external-downloader',
          dest='external_downloader', metavar='COMMAND',
@@ -531,7 +565,7 @@ def parseOpts(overrideArguments=None):
          action='store_true', dest='youtube_print_sig_code', default=False,
          help=optparse.SUPPRESS_HELP)
      verbosity.add_option(
-        '--print-traffic',
+        '--print-traffic', '--dump-headers',
          dest='debug_printtraffic', action='store_true', default=False,
          help='Display sent and read HTTP traffic')
      verbosity.add_option(
@@ -709,10 +743,18 @@ def parseOpts(overrideArguments=None):
          '--prefer-ffmpeg',
          action='store_true', dest='prefer_ffmpeg',
          help='Prefer ffmpeg over avconv for running the postprocessors')
+    postproc.add_option(
+        '--ffmpeg-location', '--avconv-location', metavar='PATH',
+        dest='ffmpeg_location',
+        help='Location of the ffmpeg/avconv binary; either the path to the binary or its containing directory.')
      postproc.add_option(
          '--exec',
          metavar='CMD', dest='exec_cmd',
          help='Execute a command on the file after downloading, similar to find\'s -exec syntax. Example: --exec \'adb push {} /sdcard/Music/ && rm {}\'')
+    postproc.add_option(
+        '--convert-subtitles', '--convert-subs',
+        metavar='FORMAT', dest='convertsubtitles', default=None,
+        help='Convert the subtitles to other format (currently supported: srt|ass|vtt)')
  
      parser.add_option_group(general)
      parser.add_option_group(network)
@@ -732,22 +774,22 @@ def parseOpts(overrideArguments=None):
          if opts.verbose:
              write_string('[debug] Override config: ' + repr(overrideArguments) + '\n')
      else:
-        commandLineConf = sys.argv[1:]
-        if '--ignore-config' in commandLineConf:
-            systemConf = []
-            userConf = []
+        command_line_conf = sys.argv[1:]
+        if '--ignore-config' in command_line_conf:
+            system_conf = []
+            user_conf = []
          else:
-            systemConf = _readOptions('/etc/youtube-dl.conf')
-            if '--ignore-config' in systemConf:
-                userConf = []
+            system_conf = _readOptions('/etc/youtube-dl.conf')
+            if '--ignore-config' in system_conf:
+                user_conf = []
              else:
-                userConf = _readUserConf()
-        argv = systemConf + userConf + commandLineConf
+                user_conf = _readUserConf()
+        argv = system_conf + user_conf + command_line_conf
  
          opts, args = parser.parse_args(argv)
          if opts.verbose:
-            write_string('[debug] System config: ' + repr(_hide_login_info(systemConf)) + '\n')
-            write_string('[debug] User config: ' + repr(_hide_login_info(userConf)) + '\n')
-            write_string('[debug] Command-line args: ' + repr(_hide_login_info(commandLineConf)) + '\n')
+            write_string('[debug] System config: ' + repr(_hide_login_info(system_conf)) + '\n')
+            write_string('[debug] User config: ' + repr(_hide_login_info(user_conf)) + '\n')
+            write_string('[debug] Command-line args: ' + repr(_hide_login_info(command_line_conf)) + '\n')
  
      return parser, opts, args
diff --git a/youtube_dl/postprocessor/__init__.py b/youtube_dl/postprocessor/__init__.py

index 0ffbca258587651fb5a4aaba9cede2c3f87d9fc5..708df3dd493ca97e6d1649d572ac68a0d6847464 100644 (file)
--- a/youtube_dl/postprocessor/__init__.py
+++ b/youtube_dl/postprocessor/__init__.py
@@ -11,6 +11,7 @@ from .ffmpeg import (
      FFmpegMergerPP,
      FFmpegMetadataPP,
      FFmpegVideoConvertorPP,
+    FFmpegSubtitlesConvertorPP,
  )
  from .xattrpp import XAttrMetadataPP
  from .execafterdownload import ExecAfterDownloadPP
@@ -31,6 +32,7 @@ __all__ = [
      'FFmpegMergerPP',
      'FFmpegMetadataPP',
      'FFmpegPostProcessor',
+    'FFmpegSubtitlesConvertorPP',
      'FFmpegVideoConvertorPP',
      'XAttrMetadataPP',
  ]
diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py

index 4a4422c5a6132cd9a3dfe3116fe554ac2a374ca5..30094c2f37f767f937052306ddf3967279858a01 100644 (file)
--- a/youtube_dl/postprocessor/ffmpeg.py
+++ b/youtube_dl/postprocessor/ffmpeg.py
@@ -1,5 +1,6 @@
  from __future__ import unicode_literals
  
+import io
  import os
  import subprocess
  import sys
@@ -30,54 +31,95 @@ class FFmpegPostProcessorError(PostProcessingError):
  class FFmpegPostProcessor(PostProcessor):
      def __init__(self, downloader=None, deletetempfiles=False):
          PostProcessor.__init__(self, downloader)
-        self._versions = self.get_versions()
          self._deletetempfiles = deletetempfiles
+        self._determine_executables()
  
      def check_version(self):
-        if not self._executable:
+        if not self.available:
              raise FFmpegPostProcessorError('ffmpeg or avconv not found. Please install one.')
  
-        required_version = '10-0' if self._uses_avconv() else '1.0'
+        required_version = '10-0' if self.basename == 'avconv' else '1.0'
          if is_outdated_version(
-                self._versions[self._executable], required_version):
+                self._versions[self.basename], required_version):
              warning = 'Your copy of %s is outdated, update %s to version %s or newer if you encounter any errors.' % (
-                self._executable, self._executable, required_version)
+                self.basename, self.basename, required_version)
              if self._downloader:
                  self._downloader.report_warning(warning)
  
      @staticmethod
-    def get_versions():
-        programs = ['avprobe', 'avconv', 'ffmpeg', 'ffprobe']
-        return dict((p, get_exe_version(p, args=['-version'])) for p in programs)
-
-    @property
-    def available(self):
-        return self._executable is not None
+    def get_versions(downloader=None):
+        return FFmpegPostProcessor(downloader)._versions
  
-    @property
-    def _executable(self):
-        if self._downloader.params.get('prefer_ffmpeg', False):
+    def _determine_executables(self):
+        programs = ['avprobe', 'avconv', 'ffmpeg', 'ffprobe']
+        prefer_ffmpeg = self._downloader.params.get('prefer_ffmpeg', False)
+
+        self.basename = None
+        self.probe_basename = None
+
+        self._paths = None
+        self._versions = None
+        if self._downloader:
+            location = self._downloader.params.get('ffmpeg_location')
+            if location is not None:
+                if not os.path.exists(location):
+                    self._downloader.report_warning(
+                        'ffmpeg-location %s does not exist! '
+                        'Continuing without avconv/ffmpeg.' % (location))
+                    self._versions = {}
+                    return
+                elif not os.path.isdir(location):
+                    basename = os.path.splitext(os.path.basename(location))[0]
+                    if basename not in programs:
+                        self._downloader.report_warning(
+                            'Cannot identify executable %s, its basename should be one of %s. '
+                            'Continuing without avconv/ffmpeg.' %
+                            (location, ', '.join(programs)))
+                        self._versions = {}
+                        return None
+                    location = os.path.dirname(os.path.abspath(location))
+                    if basename in ('ffmpeg', 'ffprobe'):
+                        prefer_ffmpeg = True
+
+                self._paths = dict(
+                    (p, os.path.join(location, p)) for p in programs)
+                self._versions = dict(
+                    (p, get_exe_version(self._paths[p], args=['-version']))
+                    for p in programs)
+        if self._versions is None:
+            self._versions = dict(
+                (p, get_exe_version(p, args=['-version'])) for p in programs)
+            self._paths = dict((p, p) for p in programs)
+
+        if prefer_ffmpeg:
              prefs = ('ffmpeg', 'avconv')
          else:
              prefs = ('avconv', 'ffmpeg')
          for p in prefs:
              if self._versions[p]:
-                return p
-        return None
+                self.basename = p
+                break
  
-    @property
-    def _probe_executable(self):
-        if self._downloader.params.get('prefer_ffmpeg', False):
+        if prefer_ffmpeg:
              prefs = ('ffprobe', 'avprobe')
          else:
              prefs = ('avprobe', 'ffprobe')
          for p in prefs:
              if self._versions[p]:
-                return p
-        return None
+                self.probe_basename = p
+                break
+
+    @property
+    def available(self):
+        return self.basename is not None
  
-    def _uses_avconv(self):
-        return self._executable == 'avconv'
+    @property
+    def executable(self):
+        return self._paths[self.basename]
+
+    @property
+    def probe_executable(self):
+        return self._paths[self.probe_basename]
  
      def run_ffmpeg_multiple_files(self, input_paths, out_path, opts):
          self.check_version()
@@ -88,14 +130,14 @@ class FFmpegPostProcessor(PostProcessor):
          files_cmd = []
          for path in input_paths:
              files_cmd.extend([encodeArgument('-i'), encodeFilename(path, True)])
-        cmd = ([encodeFilename(self._executable, True), encodeArgument('-y')] +
+        cmd = ([encodeFilename(self.executable, True), encodeArgument('-y')] +
                 files_cmd +
                 [encodeArgument(o) for o in opts] +
                 [encodeFilename(self._ffmpeg_filename_argument(out_path), True)])
  
          if self._downloader.params.get('verbose', False):
              self._downloader.to_screen('[debug] ffmpeg command line: %s' % shell_quote(cmd))
-        p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
          stdout, stderr = p.communicate()
          if p.returncode != 0:
              stderr = stderr.decode('utf-8', 'replace')
@@ -127,14 +169,16 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
  
      def get_audio_codec(self, path):
  
-        if not self._probe_executable:
+        if not self.probe_executable:
              raise PostProcessingError('ffprobe or avprobe not found. Please install one.')
          try:
              cmd = [
-                encodeFilename(self._probe_executable, True),
+                encodeFilename(self.probe_executable, True),
                  encodeArgument('-show_streams'),
                  encodeFilename(self._ffmpeg_filename_argument(path), True)]
-            handle = subprocess.Popen(cmd, stderr=compat_subprocess_get_DEVNULL(), stdout=subprocess.PIPE)
+            if self._downloader.params.get('verbose', False):
+                self._downloader.to_screen('[debug] %s command line: %s' % (self.basename, shell_quote(cmd)))
+            handle = subprocess.Popen(cmd, stderr=compat_subprocess_get_DEVNULL(), stdout=subprocess.PIPE, stdin=subprocess.PIPE)
              output = handle.communicate()[0]
              if handle.wait() != 0:
                  return None
@@ -166,14 +210,13 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
          if filecodec is None:
              raise PostProcessingError('WARNING: unable to obtain file audio codec with ffprobe')
  
-        uses_avconv = self._uses_avconv()
          more_opts = []
          if self._preferredcodec == 'best' or self._preferredcodec == filecodec or (self._preferredcodec == 'm4a' and filecodec == 'aac'):
              if filecodec == 'aac' and self._preferredcodec in ['m4a', 'best']:
                  # Lossless, but in another container
                  acodec = 'copy'
                  extension = 'm4a'
-                more_opts = ['-bsf:a' if uses_avconv else '-absf', 'aac_adtstoasc']
+                more_opts = ['-bsf:a', 'aac_adtstoasc']
              elif filecodec in ['aac', 'mp3', 'vorbis', 'opus']:
                  # Lossless if possible
                  acodec = 'copy'
@@ -189,9 +232,9 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
                  more_opts = []
                  if self._preferredquality is not None:
                      if int(self._preferredquality) < 10:
-                        more_opts += ['-q:a' if uses_avconv else '-aq', self._preferredquality]
+                        more_opts += ['-q:a', self._preferredquality]
                      else:
-                        more_opts += ['-b:a' if uses_avconv else '-ab', self._preferredquality + 'k']
+                        more_opts += ['-b:a', self._preferredquality + 'k']
          else:
              # We convert the audio (lossy)
              acodec = {'mp3': 'libmp3lame', 'aac': 'aac', 'm4a': 'aac', 'opus': 'opus', 'vorbis': 'libvorbis', 'wav': None}[self._preferredcodec]
@@ -200,13 +243,13 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
              if self._preferredquality is not None:
                  # The opus codec doesn't support the -aq option
                  if int(self._preferredquality) < 10 and extension != 'opus':
-                    more_opts += ['-q:a' if uses_avconv else '-aq', self._preferredquality]
+                    more_opts += ['-q:a', self._preferredquality]
                  else:
-                    more_opts += ['-b:a' if uses_avconv else '-ab', self._preferredquality + 'k']
+                    more_opts += ['-b:a', self._preferredquality + 'k']
              if self._preferredcodec == 'aac':
                  more_opts += ['-f', 'adts']
              if self._preferredcodec == 'm4a':
-                more_opts += ['-bsf:a' if uses_avconv else '-absf', 'aac_adtstoasc']
+                more_opts += ['-bsf:a', 'aac_adtstoasc']
              if self._preferredcodec == 'vorbis':
                  extension = 'ogg'
              if self._preferredcodec == 'wav':
@@ -224,14 +267,14 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
              if self._nopostoverwrites and os.path.exists(encodeFilename(new_path)):
                  self._downloader.to_screen('[youtube] Post-process file %s exists, skipping' % new_path)
              else:
-                self._downloader.to_screen('[' + self._executable + '] Destination: ' + new_path)
+                self._downloader.to_screen('[' + self.basename + '] Destination: ' + new_path)
                  self.run_ffmpeg(path, new_path, acodec, more_opts)
          except:
              etype, e, tb = sys.exc_info()
              if isinstance(e, AudioConversionError):
                  msg = 'audio conversion failed: ' + e.msg
              else:
-                msg = 'error running ' + self._executable
+                msg = 'error running ' + self.basename
              raise PostProcessingError(msg)
  
          # Try to update the date time for extracted audio file.
@@ -454,10 +497,6 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
          'zu': 'zul',
      }
  
-    def __init__(self, downloader=None, subtitlesformat='srt'):
-        super(FFmpegEmbedSubtitlePP, self).__init__(downloader)
-        self._subformat = subtitlesformat
-
      @classmethod
      def _conver_lang_code(cls, code):
          """Convert language code from ISO 639-1 to ISO 639-2/T"""
@@ -467,13 +506,14 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
          if information['ext'] != 'mp4':
              self._downloader.to_screen('[ffmpeg] Subtitles can only be embedded in mp4 files')
              return True, information
-        if not information.get('subtitles'):
+        subtitles = information.get('requested_subtitles')
+        if not subtitles:
              self._downloader.to_screen('[ffmpeg] There aren\'t any subtitles to embed')
              return True, information
  
-        sub_langs = [key for key in information['subtitles']]
+        sub_langs = list(subtitles.keys())
          filename = information['filepath']
-        input_files = [filename] + [subtitles_filename(filename, lang, self._subformat) for lang in sub_langs]
+        input_files = [filename] + [subtitles_filename(filename, lang, sub_info['ext']) for lang, sub_info in subtitles.items()]
  
          opts = [
              '-map', '0',
@@ -596,3 +636,40 @@ class FFmpegFixupM4aPP(FFmpegPostProcessor):
          os.rename(encodeFilename(temp_filename), encodeFilename(filename))
  
          return True, info
+
+
+class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor):
+    def __init__(self, downloader=None, format=None):
+        super(FFmpegSubtitlesConvertorPP, self).__init__(downloader)
+        self.format = format
+
+    def run(self, info):
+        subs = info.get('requested_subtitles')
+        filename = info['filepath']
+        new_ext = self.format
+        new_format = new_ext
+        if new_format == 'vtt':
+            new_format = 'webvtt'
+        if subs is None:
+            self._downloader.to_screen('[ffmpeg] There aren\'t any subtitles to convert')
+            return True, info
+        self._downloader.to_screen('[ffmpeg] Converting subtitles')
+        for lang, sub in subs.items():
+            ext = sub['ext']
+            if ext == new_ext:
+                self._downloader.to_screen(
+                    '[ffmpeg] Subtitle file for %s is already in the requested'
+                    'format' % new_ext)
+                continue
+            new_file = subtitles_filename(filename, lang, new_ext)
+            self.run_ffmpeg(
+                subtitles_filename(filename, lang, ext),
+                new_file, ['-f', new_format])
+
+            with io.open(new_file, 'rt', encoding='utf-8') as f:
+                subs[lang] = {
+                    'ext': ext,
+                    'data': f.read(),
+                }
+
+        return True, info
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py

index 8f5463f1c9a1e1a2660867abdc0f1f62e9147032..1f3bfef7d562e3fb0e63db16e644b86819eaaa5c 100644 (file)
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -17,6 +17,7 @@ import io
  import json
  import locale
  import math
+import operator
  import os
  import pipes
  import platform
@@ -53,7 +54,7 @@ from .compat import (
  compiled_regex_type = type(re.compile(''))
  
  std_headers = {
-    'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0 (Chrome)',
+    'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20150101 Firefox/20.0 (Chrome)',
      'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
      'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
      'Accept-Encoding': 'gzip, deflate',
@@ -61,6 +62,11 @@ std_headers = {
  }
  
  
+ENGLISH_MONTH_NAMES = [
+    'January', 'February', 'March', 'April', 'May', 'June',
+    'July', 'August', 'September', 'October', 'November', 'December']
+
+
  def preferredencoding():
      """Get preferred encoding.
  
@@ -298,6 +304,8 @@ def sanitize_filename(s, restricted=False, is_id=False):
          # Common case of "Foreign band name - English song title"
          if restricted and result.startswith('-_'):
              result = result[2:]
+        if result.startswith('-'):
+            result = '_' + result[len('-'):]
          if not result:
              result = '_'
      return result
@@ -665,26 +673,27 @@ class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
              req, **kwargs)
  
  
-def parse_iso8601(date_str, delimiter='T'):
+def parse_iso8601(date_str, delimiter='T', timezone=None):
      """ Return a UNIX timestamp from the given date """
  
      if date_str is None:
          return None
  
-    m = re.search(
-        r'(\.[0-9]+)?(?:Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
-        date_str)
-    if not m:
-        timezone = datetime.timedelta()
-    else:
-        date_str = date_str[:-len(m.group(0))]
-        if not m.group('sign'):
+    if timezone is None:
+        m = re.search(
+            r'(\.[0-9]+)?(?:Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
+            date_str)
+        if not m:
              timezone = datetime.timedelta()
          else:
-            sign = 1 if m.group('sign') == '+' else -1
-            timezone = datetime.timedelta(
-                hours=sign * int(m.group('hours')),
-                minutes=sign * int(m.group('minutes')))
+            date_str = date_str[:-len(m.group(0))]
+            if not m.group('sign'):
+                timezone = datetime.timedelta()
+            else:
+                sign = 1 if m.group('sign') == '+' else -1
+                timezone = datetime.timedelta(
+                    hours=sign * int(m.group('hours')),
+                    minutes=sign * int(m.group('minutes')))
      date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
      dt = datetime.datetime.strptime(date_str, date_format) - timezone
      return calendar.timegm(dt.timetuple())
@@ -893,8 +902,8 @@ def _windows_write_string(s, out):
      def not_a_console(handle):
          if handle == INVALID_HANDLE_VALUE or handle is None:
              return True
-        return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
-                or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
+        return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR or
+                GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
  
      if not_a_console(h):
          return False
@@ -1183,11 +1192,18 @@ def get_term_width():
  def month_by_name(name):
      """ Return the number of a month by (locale-independently) English name """
  
-    ENGLISH_NAMES = [
-        'January', 'February', 'March', 'April', 'May', 'June',
-        'July', 'August', 'September', 'October', 'November', 'December']
      try:
-        return ENGLISH_NAMES.index(name) + 1
+        return ENGLISH_MONTH_NAMES.index(name) + 1
+    except ValueError:
+        return None
+
+
+def month_by_abbreviation(abbrev):
+    """ Return the number of a month by (locale-independently) English
+        abbreviations """
+
+    try:
+        return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
      except ValueError:
          return None
  
@@ -1274,6 +1290,7 @@ def parse_duration(s):
              (?P<only_mins>[0-9.]+)\s*(?:mins?|minutes?)\s*|
              (?P<only_hours>[0-9.]+)\s*(?:hours?)|
  
+            \s*(?P<hours_reversed>[0-9]+)\s*(?:[:h]|hours?)\s*(?P<mins_reversed>[0-9]+)\s*(?:[:m]|mins?|minutes?)\s*|
              (?:
                  (?:
                      (?:(?P<days>[0-9]+)\s*(?:[:d]|days?)\s*)?
@@ -1292,10 +1309,14 @@ def parse_duration(s):
          return float_or_none(m.group('only_hours'), invscale=60 * 60)
      if m.group('secs'):
          res += int(m.group('secs'))
+    if m.group('mins_reversed'):
+        res += int(m.group('mins_reversed')) * 60
      if m.group('mins'):
          res += int(m.group('mins')) * 60
      if m.group('hours'):
          res += int(m.group('hours')) * 60 * 60
+    if m.group('hours_reversed'):
+        res += int(m.group('hours_reversed')) * 60 * 60
      if m.group('days'):
          res += int(m.group('days')) * 24 * 60 * 60
      if m.group('ms'):
@@ -1546,8 +1567,8 @@ def js_to_json(code):
          return '"%s"' % v
  
      res = re.sub(r'''(?x)
-        "(?:[^"\\]*(?:\\\\|\\")?)*"|
-        '(?:[^'\\]*(?:\\\\|\\')?)*'|
+        "(?:[^"\\]*(?:\\\\|\\['"nu]))*[^"\\]*"|
+        '(?:[^'\\]*(?:\\\\|\\['"nu]))*[^'\\]*'|
          [a-zA-Z_][.a-zA-Z_0-9]*
          ''', fix_kv, code)
      res = re.sub(r',(\s*\])', lambda m: m.group(1), res)
@@ -1602,6 +1623,15 @@ def args_to_str(args):
      return ' '.join(shlex_quote(a) for a in args)
  
  
+def mimetype2ext(mt):
+    _, _, res = mt.rpartition('/')
+
+    return {
+        'x-ms-wmv': 'wmv',
+        'x-mp4-fragmented': 'mp4',
+    }.get(res, res)
+
+
  def urlhandle_detect_ext(url_handle):
      try:
          url_handle.headers
@@ -1617,7 +1647,7 @@ def urlhandle_detect_ext(url_handle):
              if e:
                  return e
  
-    return getheader('Content-Type').split("/")[1]
+    return mimetype2ext(getheader('Content-Type'))
  
  
  def age_restricted(content_limit, age_limit):
@@ -1678,3 +1708,79 @@ def render_table(header_row, data):
      max_lens = [max(len(compat_str(v)) for v in col) for col in zip(*table)]
      format_str = ' '.join('%-' + compat_str(ml + 1) + 's' for ml in max_lens[:-1]) + '%s'
      return '\n'.join(format_str % tuple(row) for row in table)
+
+
+def _match_one(filter_part, dct):
+    COMPARISON_OPERATORS = {
+        '<': operator.lt,
+        '<=': operator.le,
+        '>': operator.gt,
+        '>=': operator.ge,
+        '=': operator.eq,
+        '!=': operator.ne,
+    }
+    operator_rex = re.compile(r'''(?x)\s*
+        (?P<key>[a-z_]+)
+        \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
+        (?:
+            (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)|
+            (?P<strval>(?![0-9.])[a-z0-9A-Z]*)
+        )
+        \s*$
+        ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
+    m = operator_rex.search(filter_part)
+    if m:
+        op = COMPARISON_OPERATORS[m.group('op')]
+        if m.group('strval') is not None:
+            if m.group('op') not in ('=', '!='):
+                raise ValueError(
+                    'Operator %s does not support string values!' % m.group('op'))
+            comparison_value = m.group('strval')
+        else:
+            try:
+                comparison_value = int(m.group('intval'))
+            except ValueError:
+                comparison_value = parse_filesize(m.group('intval'))
+                if comparison_value is None:
+                    comparison_value = parse_filesize(m.group('intval') + 'B')
+                if comparison_value is None:
+                    raise ValueError(
+                        'Invalid integer value %r in filter part %r' % (
+                            m.group('intval'), filter_part))
+        actual_value = dct.get(m.group('key'))
+        if actual_value is None:
+            return m.group('none_inclusive')
+        return op(actual_value, comparison_value)
+
+    UNARY_OPERATORS = {
+        '': lambda v: v is not None,
+        '!': lambda v: v is None,
+    }
+    operator_rex = re.compile(r'''(?x)\s*
+        (?P<op>%s)\s*(?P<key>[a-z_]+)
+        \s*$
+        ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
+    m = operator_rex.search(filter_part)
+    if m:
+        op = UNARY_OPERATORS[m.group('op')]
+        actual_value = dct.get(m.group('key'))
+        return op(actual_value)
+
+    raise ValueError('Invalid filter part %r' % filter_part)
+
+
+def match_str(filter_str, dct):
+    """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false """
+
+    return all(
+        _match_one(filter_part, dct) for filter_part in filter_str.split('&'))
+
+
+def match_filter_func(filter_str):
+    def _match_func(info_dict):
+        if match_str(filter_str, info_dict):
+            return None
+        else:
+            video_title = info_dict.get('title', info_dict.get('id', 'video'))
+            return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
+    return _match_func
diff --git a/youtube_dl/version.py b/youtube_dl/version.py

index 1091ae61bbf7599ef59390978f893bc2cdc1f28f..5582348bae3d442aac3e51218bad00ab5b71494b 100644 (file)
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,3 +1,3 @@
  from __future__ import unicode_literals
  
-__version__ = '2015.02.06'
+__version__ = '2015.02.28'
author	Rogério Brito <rbrito@ime.usp.br>
	Sun, 1 Mar 2015 05:04:05 +0000 (02:04 -0300)
committer	Rogério Brito <rbrito@ime.usp.br>
	Sun, 1 Mar 2015 05:04:05 +0000 (02:04 -0300)
Makefile		patch \| blob \| history
README.md		patch \| blob \| history
README.txt		patch \| blob \| history
devscripts/check-porn.py		patch \| blob \| history
docs/supportedsites.md		patch \| blob \| history
test/helper.py		patch \| blob \| history
test/parameters.json		patch \| blob \| history
test/swftests/ArrayAccess.swf		patch \| blob \| history
test/swftests/ClassCall.swf		patch \| blob \| history
test/swftests/ClassConstruction.swf		patch \| blob \| history
test/swftests/ConstArrayAccess.swf		patch \| blob \| history
test/swftests/ConstantInt.swf		patch \| blob \| history
test/swftests/DictCall.swf		patch \| blob \| history
test/swftests/EqualsOperator.swf		patch \| blob \| history
test/swftests/LocalVars.swf		patch \| blob \| history
test/swftests/MemberAssignment.swf		patch \| blob \| history
test/swftests/NeOperator.swf		patch \| blob \| history
test/swftests/PrivateCall.swf		patch \| blob \| history
test/swftests/PrivateVoidCall.swf		patch \| blob \| history
test/swftests/StaticAssignment.swf		patch \| blob \| history
test/swftests/StaticRetrieval.swf		patch \| blob \| history
test/swftests/StringBasics.swf		patch \| blob \| history
test/swftests/StringCharCodeAt.swf		patch \| blob \| history
test/swftests/StringConversion.swf		patch \| blob \| history
test/test_YoutubeDL.py		patch \| blob \| history
test/test_jsinterp.py		patch \| blob \| history
test/test_subtitles.py		patch \| blob \| history
test/test_swfinterp.py		patch \| blob \| history
test/test_utils.py		patch \| blob \| history
test/test_youtube_signature.py		patch \| blob \| history
youtube-dl		patch \| blob \| history
youtube-dl.1		patch \| blob \| history
youtube-dl.bash-completion		patch \| blob \| history
youtube-dl.fish		patch \| blob \| history
youtube-dl.zsh		patch \| blob \| history
youtube_dl/YoutubeDL.py		patch \| blob \| history
youtube_dl/__init__.py		patch \| blob \| history
youtube_dl/aes.py		patch \| blob \| history
youtube_dl/downloader/__init__.py		patch \| blob \| history
youtube_dl/downloader/common.py		patch \| blob \| history
youtube_dl/downloader/external.py		patch \| blob \| history
youtube_dl/downloader/f4m.py		patch \| blob \| history
youtube_dl/downloader/hls.py		patch \| blob \| history
youtube_dl/downloader/http.py		patch \| blob \| history
youtube_dl/downloader/rtmp.py		patch \| blob \| history
youtube_dl/extractor/__init__.py		patch \| blob \| history
youtube_dl/extractor/adobetv.py		patch \| blob \| history
youtube_dl/extractor/adultswim.py		patch \| blob \| history
youtube_dl/extractor/aftenposten.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/airmozilla.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/aparat.py		patch \| blob \| history
youtube_dl/extractor/appletrailers.py		patch \| blob \| history
youtube_dl/extractor/atresplayer.py		patch \| blob \| history
youtube_dl/extractor/bambuser.py		patch \| blob \| history
youtube_dl/extractor/bandcamp.py		patch \| blob \| history
youtube_dl/extractor/bbccouk.py		patch \| blob \| history
youtube_dl/extractor/beeg.py		patch \| blob \| history
youtube_dl/extractor/blinkx.py		patch \| blob \| history
youtube_dl/extractor/bliptv.py		patch \| blob \| history
youtube_dl/extractor/bloomberg.py		patch \| blob \| history
youtube_dl/extractor/brightcove.py		patch \| blob \| history
youtube_dl/extractor/buzzfeed.py		patch \| blob \| history
youtube_dl/extractor/camdemy.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/canalplus.py		patch \| blob \| history
youtube_dl/extractor/cbs.py		patch \| blob \| history
youtube_dl/extractor/cbssports.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/ccc.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/ceskatelevize.py		patch \| blob \| history
youtube_dl/extractor/chirbit.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/comedycentral.py		patch \| blob \| history
youtube_dl/extractor/common.py		patch \| blob \| history
youtube_dl/extractor/commonmistakes.py		patch \| blob \| history
youtube_dl/extractor/crunchyroll.py		patch \| blob \| history
youtube_dl/extractor/dailymotion.py		patch \| blob \| history
youtube_dl/extractor/dctp.py		patch \| blob \| history
youtube_dl/extractor/defense.py		patch \| blob \| history
youtube_dl/extractor/dotsub.py		patch \| blob \| history
youtube_dl/extractor/drtuber.py		patch \| blob \| history
youtube_dl/extractor/drtv.py		patch \| blob \| history
youtube_dl/extractor/embedly.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/eporner.py		patch \| blob \| history
youtube_dl/extractor/escapist.py		patch \| blob \| history
youtube_dl/extractor/facebook.py		patch \| blob \| history
youtube_dl/extractor/firstpost.py		patch \| blob \| history
youtube_dl/extractor/firsttv.py		patch \| blob \| history
youtube_dl/extractor/fivemin.py		patch \| blob \| history
youtube_dl/extractor/gamekings.py		patch \| blob \| history
youtube_dl/extractor/gamestar.py		patch \| blob \| history
youtube_dl/extractor/gdcvault.py		patch \| blob \| history
youtube_dl/extractor/generic.py		patch \| blob \| history
youtube_dl/extractor/goshgay.py		patch \| blob \| history
youtube_dl/extractor/history.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/ign.py		patch \| blob \| history
youtube_dl/extractor/imgur.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/izlesene.py		patch \| blob \| history
youtube_dl/extractor/kaltura.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/laola1tv.py		patch \| blob \| history
youtube_dl/extractor/letv.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/livestream.py		patch \| blob \| history
youtube_dl/extractor/lynda.py		patch \| blob \| history
youtube_dl/extractor/mit.py		patch \| blob \| history
youtube_dl/extractor/mitele.py		patch \| blob \| history
youtube_dl/extractor/mpora.py		patch \| blob \| history
youtube_dl/extractor/mtv.py		patch \| blob \| history
youtube_dl/extractor/musicvault.py		patch \| blob \| history
youtube_dl/extractor/nationalgeographic.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/nbc.py		patch \| blob \| history
youtube_dl/extractor/netzkino.py		patch \| blob \| history
youtube_dl/extractor/npo.py		patch \| blob \| history
youtube_dl/extractor/nrk.py		patch \| blob \| history
youtube_dl/extractor/ntvru.py		patch \| blob \| history
youtube_dl/extractor/odnoklassniki.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/patreon.py		patch \| blob \| history
youtube_dl/extractor/pornhd.py		patch \| blob \| history
youtube_dl/extractor/pornhub.py		patch \| blob \| history
youtube_dl/extractor/puls4.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/r7.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/radiode.py		patch \| blob \| history
youtube_dl/extractor/rai.py		patch \| blob \| history
youtube_dl/extractor/rtlnl.py		patch \| blob \| history
youtube_dl/extractor/rtlnow.py		patch \| blob \| history
youtube_dl/extractor/rtp.py		patch \| blob \| history
youtube_dl/extractor/rtve.py		patch \| blob \| history
youtube_dl/extractor/sandia.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/sockshare.py		patch \| blob \| history
youtube_dl/extractor/soulanime.py	[deleted file]	patch \| blob \| history
youtube_dl/extractor/soundgasm.py		patch \| blob \| history
youtube_dl/extractor/streamcz.py		patch \| blob \| history
youtube_dl/extractor/subtitles.py	[deleted file]	patch \| blob \| history
youtube_dl/extractor/sunporno.py		patch \| blob \| history
youtube_dl/extractor/svtplay.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/teamcoco.py		patch \| blob \| history
youtube_dl/extractor/ted.py		patch \| blob \| history
youtube_dl/extractor/telecinco.py		patch \| blob \| history
youtube_dl/extractor/theonion.py		patch \| blob \| history
youtube_dl/extractor/theplatform.py		patch \| blob \| history
youtube_dl/extractor/trilulilu.py		patch \| blob \| history
youtube_dl/extractor/tv4.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/tvigle.py		patch \| blob \| history
youtube_dl/extractor/twitch.py		patch \| blob \| history
youtube_dl/extractor/videolecturesnet.py		patch \| blob \| history
youtube_dl/extractor/viki.py		patch \| blob \| history
youtube_dl/extractor/vimeo.py		patch \| blob \| history
youtube_dl/extractor/vk.py		patch \| blob \| history
youtube_dl/extractor/walla.py		patch \| blob \| history
youtube_dl/extractor/wdr.py		patch \| blob \| history
youtube_dl/extractor/webofstories.py		patch \| blob \| history
youtube_dl/extractor/wsj.py		patch \| blob \| history
youtube_dl/extractor/xtube.py		patch \| blob \| history
youtube_dl/extractor/yahoo.py		patch \| blob \| history
youtube_dl/extractor/yam.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/youtube.py		patch \| blob \| history
youtube_dl/extractor/zapiks.py	[new file with mode: 0644]	patch \| blob
youtube_dl/jsinterp.py		patch \| blob \| history
youtube_dl/options.py		patch \| blob \| history
youtube_dl/postprocessor/__init__.py		patch \| blob \| history
youtube_dl/postprocessor/ffmpeg.py		patch \| blob \| history
youtube_dl/utils.py		patch \| blob \| history
youtube_dl/version.py		patch \| blob \| history