]> Raphaël G. Git Repositories - youtubedl/commitdiff
New upstream version 2019.01.16
authorRogério Brito <rbrito@ime.usp.br>
Wed, 16 Jan 2019 21:12:11 +0000 (19:12 -0200)
committerRogério Brito <rbrito@ime.usp.br>
Wed, 16 Jan 2019 21:12:11 +0000 (19:12 -0200)
105 files changed:
ChangeLog
README.md
README.txt
docs/supportedsites.md
setup.py
test/helper.py
test/test_YoutubeDLCookieJar.py [new file with mode: 0644]
test/test_compat.py
test/test_postprocessors.py
test/testdata/cookies/session_cookies.txt [new file with mode: 0644]
youtube-dl
youtube-dl.1
youtube_dl/YoutubeDL.py
youtube_dl/downloader/hls.py
youtube_dl/extractor/acast.py
youtube_dl/extractor/aenetworks.py
youtube_dl/extractor/americastestkitchen.py
youtube_dl/extractor/ard.py
youtube_dl/extractor/atvat.py
youtube_dl/extractor/audiomack.py
youtube_dl/extractor/azmedien.py
youtube_dl/extractor/bbc.py
youtube_dl/extractor/beeg.py
youtube_dl/extractor/bitchute.py
youtube_dl/extractor/cammodels.py
youtube_dl/extractor/camtube.py
youtube_dl/extractor/camwithher.py
youtube_dl/extractor/carambatv.py
youtube_dl/extractor/ciscolive.py [new file with mode: 0644]
youtube_dl/extractor/cnn.py
youtube_dl/extractor/common.py
youtube_dl/extractor/crackle.py
youtube_dl/extractor/curiositystream.py
youtube_dl/extractor/discovery.py
youtube_dl/extractor/dtube.py
youtube_dl/extractor/extractors.py
youtube_dl/extractor/fox.py
youtube_dl/extractor/foxsports.py
youtube_dl/extractor/freespeech.py
youtube_dl/extractor/funimation.py
youtube_dl/extractor/gaia.py [new file with mode: 0644]
youtube_dl/extractor/gamespot.py
youtube_dl/extractor/generic.py
youtube_dl/extractor/gfycat.py
youtube_dl/extractor/globo.py
youtube_dl/extractor/hotstar.py
youtube_dl/extractor/hungama.py [new file with mode: 0644]
youtube_dl/extractor/imgur.py
youtube_dl/extractor/iprima.py
youtube_dl/extractor/joj.py
youtube_dl/extractor/jwplatform.py
youtube_dl/extractor/kaltura.py
youtube_dl/extractor/lecturio.py [new file with mode: 0644]
youtube_dl/extractor/libraryofcongress.py
youtube_dl/extractor/liveleak.py
youtube_dl/extractor/livestream.py
youtube_dl/extractor/lynda.py
youtube_dl/extractor/manyvids.py
youtube_dl/extractor/mediasite.py
youtube_dl/extractor/mixcloud.py
youtube_dl/extractor/nationalgeographic.py
youtube_dl/extractor/nbc.py
youtube_dl/extractor/nova.py
youtube_dl/extractor/npo.py
youtube_dl/extractor/nrk.py
youtube_dl/extractor/nzz.py
youtube_dl/extractor/openload.py
youtube_dl/extractor/outsidetv.py [new file with mode: 0644]
youtube_dl/extractor/packtpub.py
youtube_dl/extractor/picarto.py
youtube_dl/extractor/playplustv.py [new file with mode: 0644]
youtube_dl/extractor/pornhub.py
youtube_dl/extractor/rmcdecouverte.py
youtube_dl/extractor/rte.py
youtube_dl/extractor/ruutu.py
youtube_dl/extractor/safari.py
youtube_dl/extractor/savefrom.py
youtube_dl/extractor/scrippsnetworks.py
youtube_dl/extractor/shared.py
youtube_dl/extractor/sixplay.py
youtube_dl/extractor/skylinewebcams.py
youtube_dl/extractor/tbs.py
youtube_dl/extractor/teachable.py [moved from youtube_dl/extractor/upskill.py with 53% similarity]
youtube_dl/extractor/ted.py
youtube_dl/extractor/testurl.py
youtube_dl/extractor/theplatform.py
youtube_dl/extractor/tiktok.py [new file with mode: 0644]
youtube_dl/extractor/tnaflix.py
youtube_dl/extractor/tvnow.py
youtube_dl/extractor/twitter.py
youtube_dl/extractor/uol.py
youtube_dl/extractor/vimeo.py
youtube_dl/extractor/vk.py
youtube_dl/extractor/vrv.py
youtube_dl/extractor/wimp.py
youtube_dl/extractor/wistia.py
youtube_dl/extractor/wwe.py [new file with mode: 0644]
youtube_dl/extractor/xvideos.py
youtube_dl/extractor/youporn.py
youtube_dl/extractor/yourporn.py
youtube_dl/extractor/youtube.py
youtube_dl/extractor/zype.py [new file with mode: 0644]
youtube_dl/postprocessor/ffmpeg.py
youtube_dl/utils.py
youtube_dl/version.py

index fa5de8b0416c9224454aa185c4a72b25f57a8573..13019bf2b03f87daf786e59ae30204dcfd7c459e 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,178 @@
+version 2019.01.16
+
+Core
++ [test/helper] Add support for maxcount and count collection len checkers
+* [downloader/hls] Fix uplynk ad skipping (#18824)
+* [postprocessor/ffmpeg] Improve ffmpeg version parsing (#18813)
+
+Extractors
+* [youtube] Skip unsupported adaptive stream type (#18804)
++ [youtube] Extract DASH formats from player response (#18804)
+* [funimation] Fix extraction (#14089)
+* [skylinewebcams] Fix extraction (#18853)
++ [curiositystream] Add support for non app URLs
++ [bitchute] Check formats (#18833)
+* [wistia] Extend URL regular expression (#18823)
++ [playplustv] Add support for playplus.com (#18789)
+
+
+version 2019.01.10
+
+Core
+* [extractor/common] Use episode name as title in _json_ld
++ [extractor/common] Add support for movies in _json_ld
+* [postprocessor/ffmpeg] Embed subtitles with non-standard language codes
+  (#18765)
++ [utils] Add language codes replaced in 1989 revision of ISO 639
+  to ISO639Utils (#18765)
+
+Extractors
+* [youtube] Extract live HLS URL from player response (#18799)
++ [outsidetv] Add support for outsidetv.com (#18774)
+* [jwplatform] Use JW Platform Delivery API V2 and add support for more URLs
++ [fox] Add support National Geographic (#17985, #15333, #14698)
++ [playplustv] Add support for playplus.tv (#18789)
+* [globo] Set GLBID cookie manually (#17346)
++ [gaia] Add support for gaia.com (#14605)
+* [youporn] Fix title and description extraction (#18748)
++ [hungama] Add support for hungama.com (#17402, #18771)
+* [dtube] Fix extraction (#18741)
+* [tvnow] Fix and rework extractors and prepare for a switch to the new API
+  (#17245, #18499)
+* [carambatv:page] Fix extraction (#18739)
+
+
+version 2019.01.02
+
+Extractors
+* [discovery] Use geo verification headers (#17838)
++ [packtpub] Add support for subscription.packtpub.com (#18718)
+* [yourporn] Fix extraction (#18583)
++ [acast:channel] Add support for play.acast.com (#18587)
++ [extractors] Add missing age limits (#18621)
++ [rmcdecouverte] Add support for live stream
+* [rmcdecouverte] Bypass geo restriction
+* [rmcdecouverte] Update URL regular expression (#18595, 18697)
+* [manyvids] Fix extraction (#18604, #18614)
+* [bitchute] Fix extraction (#18567)
+
+
+version 2018.12.31
+
+Extractors
++ [bbc] Add support for another embed pattern (#18643)
++ [npo:live] Add support for npostart.nl (#18644)
+* [beeg] Fix extraction (#18610, #18626)
+* [youtube] Unescape HTML for series (#18641)
++ [youtube] Extract more format metadata
+* [youtube] Detect DRM protected videos (#1774)
+* [youtube] Relax HTML5 player regular expressions (#18465, #18466)
+* [youtube] Extend HTML5 player regular expression (#17516)
++ [liveleak] Add support for another embed type and restore original
+  format extraction
++ [crackle] Extract ISM and HTTP formats
++ [twitter] Pass Referer with card request (#18579)
+* [mediasite] Extend URL regular expression (#18558)
++ [lecturio] Add support for lecturio.de (#18562)
++ [discovery] Add support for Scripps Networks watch domains (#17947)
+
+
+version 2018.12.17
+
+Extractors
+* [ard:beta] Improve geo restricted videos extraction
+* [ard:beta] Fix subtitles extraction
+* [ard:beta] Improve extraction robustness
+* [ard:beta] Relax URL regular expression (#18441)
+* [acast] Add support for embed.acast.com and play.acast.com (#18483)
+* [iprima] Relax URL regular expression (#18515, #18540)
+* [vrv] Fix initial state extraction (#18553)
+* [youtube] Fix mark watched (#18546)
++ [safari] Add support for learning.oreilly.com (#18510)
+* [youtube] Fix multifeed extraction (#18531)
+* [lecturio] Improve subtitles extraction (#18488)
+* [uol] Fix format URL extraction (#18480)
++ [ard:mediathek] Add support for classic.ardmediathek.de (#18473)
+
+
+version 2018.12.09
+
+Core
+* [YoutubeDL] Keep session cookies in cookie file between runs
+* [YoutubeDL] Recognize session cookies with expired set to 0 (#12929)
+
+Extractors
++ [teachable] Add support for teachable platform sites (#5451, #18150, #18272)
++ [aenetworks] Add support for historyvault.com (#18460)
+* [imgur] Improve gallery and album detection and extraction (#9133, #16577,
+  #17223, #18404)
+* [iprima] Relax URL regular expression (#18453)
+* [hotstar] Fix video data extraction (#18386)
+* [ard:mediathek] Fix title and description extraction (#18349, #18371)
+* [xvideos] Switch to HTTPS (#18422, #18427)
++ [lecturio] Add support for lecturio.com (#18405)
++ [nrktv:series] Add support for extra materials
+* [nrktv:season,series] Fix extraction (#17159, #17258)
+* [nrktv] Relax URL regular expression (#18304, #18387)
+* [yourporn] Fix extraction (#18424, #18425)
+* [tbs] Fix info extraction (#18403)
++ [gamespot] Add support for review URLs
+
+
+version 2018.12.03
+
+Core
+* [utils] Fix random_birthday to generate existing dates only (#18284)
+
+Extractors
++ [tiktok] Add support for tiktok.com (#18108, #18135)
+* [pornhub] Use actual URL host for requests (#18359)
+* [lynda] Fix authentication (#18158, #18217)
+* [gfycat] Update API endpoint (#18333, #18343)
++ [hotstar] Add support for alternative app state layout (#18320)
+* [azmedien] Fix extraction (#18334, #18336)
++ [vimeo] Add support for VHX (Vimeo OTT) (#14835)
+* [joj] Fix extraction (#18280, #18281)
++ [wistia] Add support for fast.wistia.com (#18287)
+
+
+version 2018.11.23
+
+Core
++ [setup.py] Add more relevant classifiers
+
+Extractors
+* [mixcloud] Fallback to hardcoded decryption key (#18016)
+* [nbc:news] Fix article extraction (#16194)
+* [foxsports] Fix extraction (#17543)
+* [loc] Relax regular expression and improve formats extraction
++ [ciscolive] Add support for ciscolive.cisco.com (#17984)
+* [nzz] Relax kaltura regex (#18228)
+* [sixplay] Fix formats extraction
+* [bitchute] Improve title extraction
+* [kaltura] Limit requested MediaEntry fields
++ [americastestkitchen] Add support for zype embeds (#18225)
++ [pornhub] Add pornhub.net alias
+* [nova:embed] Fix extraction (#18222)
+
+
+version 2018.11.18
+
+Extractors
++ [wwe] Extract subtitles
++ [wwe] Add support for playlistst (#14781)
++ [wwe] Add support for wwe.com (#14781, #17450)
+* [vk] Detect geo restriction (#17767)
+* [openload] Use original host during extraction (#18211)
+* [atvat] Fix extraction (#18041)
++ [rte] Add support for new API endpoint (#18206)
+* [tnaflixnetwork:embed] Fix extraction (#18205)
+* [picarto] Use API and add token support (#16518)
++ [zype] Add support for player.zype.com (#18143)
+* [vivo] Fix extraction (#18139)
+* [ruutu] Update API endpoint (#18138)
+
+
 version 2018.11.07
 
 Extractors
index 35c3de5127455792bedc26d032d987806faaec0b..70bcfaccf9579bf25f6c0e95713aae75eefa5061 100644 (file)
--- a/README.md
+++ b/README.md
@@ -496,7 +496,7 @@ The `-o` option allows users to indicate a template for the output file names.
 
 **tl;dr:** [navigate me to examples](#output-template-examples).
 
-The basic usage is not to set any template arguments when downloading a single file, like in `youtube-dl -o funny_video.flv "https://some/video"`. However, it may contain special sequences that will be replaced when downloading each video. The special sequences may be formatted according to [python string formatting operations](https://docs.python.org/2/library/stdtypes.html#string-formatting). For example, `%(NAME)s` or `%(NAME)05d`. To clarify, that is a percent symbol followed by a name in parentheses, followed by formatting operations. Allowed names along with sequence type are:
+The basic usage is not to set any template arguments when downloading a single file, like in `youtube-dl -o funny_video.flv "https://some/video"`. However, it may contain special sequences that will be replaced when downloading each video. The special sequences may be formatted according to [python string formatting operations](https://docs.python.org/2/library/stdtypes.html#string-formatting). For example, `%(NAME)s` or `%(NAME)05d`. To clarify, that is a percent symbol followed by a name in parentheses, followed by formatting operations. Allowed names along with sequence type are:
 
  - `id` (string): Video identifier
  - `title` (string): Video title
@@ -1024,16 +1024,20 @@ After you have ensured this site is distributing its content legally, you can fo
     ```
 5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/extractors.py).
 6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note that tests with `only_matching` key in test's dict are not counted in.
-7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L74-L252). Add tests and code for as many as you want.
-8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://pypi.python.org/pypi/flake8). Also make sure your code works under all [Python](https://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+.
-9. When the tests pass, [add](https://git-scm.com/docs/git-add) the new files and [commit](https://git-scm.com/docs/git-commit) them and [push](https://git-scm.com/docs/git-push) the result, like this:
+7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303). Add tests and code for as many as you want.
+8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](http://flake8.pycqa.org/en/latest/index.html#quickstart):
+
+        $ flake8 youtube_dl/extractor/yourextractor.py
+
+9. Make sure your code works under all [Python](https://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+.
+10. When the tests pass, [add](https://git-scm.com/docs/git-add) the new files and [commit](https://git-scm.com/docs/git-commit) them and [push](https://git-scm.com/docs/git-push) the result, like this:
 
         $ git add youtube_dl/extractor/extractors.py
         $ git add youtube_dl/extractor/yourextractor.py
         $ git commit -m '[yourextractor] Add new extractor'
         $ git push origin yourextractor
 
-10. Finally, [create a pull request](https://help.github.com/articles/creating-a-pull-request). We'll then review and merge it.
+11. Finally, [create a pull request](https://help.github.com/articles/creating-a-pull-request). We'll then review and merge it.
 
 In any case, thank you very much for your contributions!
 
@@ -1045,7 +1049,7 @@ Extractors are very fragile by nature since they depend on the layout of the sou
 
 ### Mandatory and optional metafields
 
-For extraction to work youtube-dl relies on metadata your extractor extracts and provides to youtube-dl expressed by an [information dictionary](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L75-L257) or simply *info dict*. Only the following meta fields in the *info dict* are considered mandatory for a successful extraction process by youtube-dl:
+For extraction to work youtube-dl relies on metadata your extractor extracts and provides to youtube-dl expressed by an [information dictionary](https://github.com/rg3/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303) or simply *info dict*. Only the following meta fields in the *info dict* are considered mandatory for a successful extraction process by youtube-dl:
 
  - `id` (media identifier)
  - `title` (media title)
@@ -1053,7 +1057,7 @@ For extraction to work youtube-dl relies on metadata your extractor extracts and
 
 In fact only the last option is technically mandatory (i.e. if you can't figure out the download location of the media the extraction does not make any sense). But by convention youtube-dl also treats `id` and `title` as mandatory. Thus the aforementioned metafields are the critical data that the extraction does not make any sense without and if any of them fail to be extracted then the extractor is considered completely broken.
 
-[Any field](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L149-L257) apart from the aforementioned ones are considered **optional**. That means that extraction should be **tolerant** to situations when sources for these fields can potentially be unavailable (even if they are always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields.
+[Any field](https://github.com/rg3/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L188-L303) apart from the aforementioned ones are considered **optional**. That means that extraction should be **tolerant** to situations when sources for these fields can potentially be unavailable (even if they are always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields.
 
 #### Example
 
@@ -1129,11 +1133,33 @@ title = meta.get('title') or self._og_search_title(webpage)
 
 This code will try to extract from `meta` first and if it fails it will try extracting `og:title` from a `webpage`.
 
-### Make regular expressions flexible
+### Regular expressions
+
+#### Don't capture groups you don't use
+
+Capturing group must be an indication that it's used somewhere in the code. Any group that is not used must be non capturing.
+
+##### Example
+
+Don't capture id attribute name here since you can't use it for anything anyway.
+
+Correct:
+
+```python
+r'(?:id|ID)=(?P<id>\d+)'
+```
+
+Incorrect:
+```python
+r'(id|ID)=(?P<id>\d+)'
+```
+
 
-When using regular expressions try to write them fuzzy and flexible.
+#### Make regular expressions relaxed and flexible
+
+When using regular expressions try to write them fuzzy, relaxed and flexible, skipping insignificant parts that are more likely to change, allowing both single and double quotes for quoted values and so on.
  
-#### Example
+##### Example
 
 Say you need to extract `title` from the following HTML code:
 
@@ -1166,6 +1192,25 @@ title = self._search_regex(
     webpage, 'title', group='title')
 ```
 
+### Long lines policy
+
+There is a soft limit to keep lines of code under 80 characters long. This means it should be respected if possible and if it does not make readability and code maintenance worse.
+
+For example, you should **never** split long string literals like URLs or some other often copied entities over multiple lines to fit this limit:
+
+Correct:
+
+```python
+'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4'
+```
+
+Incorrect:
+
+```python
+'https://www.youtube.com/watch?v=FqZTN594JQw&list='
+'PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4'
+```
+
 ### Use safe conversion functions
 
 Wrap all extracted numeric data into safe functions from [`youtube_dl/utils.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/utils.py): `int_or_none`, `float_or_none`. Use them for string to number conversions as well.
index 19988f992080121c96ce4ed6218bd3137403f374..6ba6d68caf747152f10a67810815e53ab85b3b2e 100644 (file)
@@ -576,8 +576,8 @@ However, it may contain special sequences that will be replaced when
 downloading each video. The special sequences may be formatted according
 to python string formatting operations. For example, %(NAME)s or
 %(NAME)05d. To clarify, that is a percent symbol followed by a name in
-parentheses, followed by a formatting operations. Allowed names along
-with sequence type are:
+parentheses, followed by formatting operations. Allowed names along with
+sequence type are:
 
 -   id (string): Video identifier
 -   title (string): Video title
@@ -1419,18 +1419,21 @@ yourextractor):
     methods and a detailed description of what your extractor should and
     may return. Add tests and code for as many as you want.
 8.  Make sure your code follows youtube-dl coding conventions and check
-    the code with flake8. Also make sure your code works under all
-    Python versions claimed supported by youtube-dl, namely 2.6, 2.7,
-    and 3.2+.
-9.  When the tests pass, add the new files and commit them and push the
+    the code with flake8:
+
+         $ flake8 youtube_dl/extractor/yourextractor.py
+
+9.  Make sure your code works under all Python versions claimed
+    supported by youtube-dl, namely 2.6, 2.7, and 3.2+.
+10. When the tests pass, add the new files and commit them and push the
     result, like this:
 
-         $ git add youtube_dl/extractor/extractors.py
-         $ git add youtube_dl/extractor/yourextractor.py
-         $ git commit -m '[yourextractor] Add new extractor'
-         $ git push origin yourextractor
+        $ git add youtube_dl/extractor/extractors.py
+        $ git add youtube_dl/extractor/yourextractor.py
+        $ git commit -m '[yourextractor] Add new extractor'
+        $ git push origin yourextractor
 
-10. Finally, create a pull request. We'll then review and merge it.
+11. Finally, create a pull request. We'll then review and merge it.
 
 In any case, thank you very much for your contributions!
 
@@ -1559,9 +1562,31 @@ fallback scenario:
 This code will try to extract from meta first and if it fails it will
 try extracting og:title from a webpage.
 
-Make regular expressions flexible
+Regular expressions
+
+Don't capture groups you don't use
 
-When using regular expressions try to write them fuzzy and flexible.
+Capturing group must be an indication that it's used somewhere in the
+code. Any group that is not used must be non capturing.
+
+Example
+
+Don't capture id attribute name here since you can't use it for anything
+anyway.
+
+Correct:
+
+    r'(?:id|ID)=(?P<id>\d+)'
+
+Incorrect:
+
+    r'(id|ID)=(?P<id>\d+)'
+
+Make regular expressions relaxed and flexible
+
+When using regular expressions try to write them fuzzy, relaxed and
+flexible, skipping insignificant parts that are more likely to change,
+allowing both single and double quotes for quoted values and so on.
 
 Example
 
@@ -1589,6 +1614,24 @@ The code definitely should not look like:
         r'<span style="position: absolute; left: 910px; width: 90px; float: right; z-index: 9999;" class="title">(.*?)</span>',
         webpage, 'title', group='title')
 
+Long lines policy
+
+There is a soft limit to keep lines of code under 80 characters long.
+This means it should be respected if possible and if it does not make
+readability and code maintenance worse.
+
+For example, you should NEVER split long string literals like URLs or
+some other often copied entities over multiple lines to fit this limit:
+
+Correct:
+
+    'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4'
+
+Incorrect:
+
+    'https://www.youtube.com/watch?v=FqZTN594JQw&list='
+    'PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4'
+
 Use safe conversion functions
 
 Wrap all extracted numeric data into safe functions from
index 24c3254c36ac37e756996cbdf6ca5d2d78a565d2..c014094199d78cd6a601e7ec4b6e865dcee5d750 100644 (file)
@@ -33,7 +33,7 @@
  - **AdobeTVShow**
  - **AdobeTVVideo**
  - **AdultSwim**
- - **aenetworks**: A+E Networks: A&E, Lifetime, History.com, FYI Network
+ - **aenetworks**: A+E Networks: A&E, Lifetime, History.com, FYI Network and History Vault
  - **afreecatv**: afreecatv.com
  - **AirMozilla**
  - **AliExpressLive**
  - **chirbit**
  - **chirbit:profile**
  - **Cinchcast**
+ - **CiscoLiveSearch**
+ - **CiscoLiveSession**
  - **CJSW**
  - **cliphunter**
  - **Clippit**
  - **Fusion**
  - **Fux**
  - **FXNetworks**
+ - **Gaia**
  - **GameInformer**
  - **GameOne**
  - **gameone:playlist**
  - **HRTiPlaylist**
  - **Huajiao**: 花椒直播
  - **HuffPost**: Huffington Post
+ - **Hungama**
+ - **HungamaSong**
  - **Hypem**
  - **Iconosquare**
  - **ign.com**
  - **imdb**: Internet Movie Database trailers
  - **imdb:list**: Internet Movie Database lists
  - **Imgur**
- - **ImgurAlbum**
+ - **imgur:album**
+ - **imgur:gallery**
  - **Ina**
  - **Inc**
  - **IndavideoEmbed**
  - **Le**: 乐视网
  - **Learnr**
  - **Lecture2Go**
+ - **Lecturio**
+ - **LecturioCourse**
+ - **LecturioDeCourse**
  - **LEGO**
  - **Lemonde**
  - **Lenta**
  - **MyviEmbed**
  - **MyVisionTV**
  - **n-tv.de**
- - **natgeo**
- - **natgeo:episodeguide**
  - **natgeo:video**
  - **Naver**
  - **NBA**
  - **orf:oe1**: Radio Österreich 1
  - **orf:tvthek**: ORF TVthek
  - **OsnatelTV**
+ - **OutsideTV**
  - **PacktPub**
  - **PacktPubCourse**
  - **PandaTV**: 熊猫TV
  - **Pinkbike**
  - **Pladform**
  - **play.fm**
+ - **PlayPlusTV**
  - **PlaysTV**
  - **Playtvak**: Playtvak.cz, iDNES.cz and Lidovky.cz
  - **Playvid**
  - **TastyTrade**
  - **TBS**
  - **TDSLifeway**
+ - **Teachable**
+ - **TeachableCourse**
  - **teachertube**: teachertube.com videos
  - **teachertube:user:collection**: teachertube.com user and collection videos
  - **TeachingChannel**
  - **ThisAmericanLife**
  - **ThisAV**
  - **ThisOldHouse**
+ - **TikTok**
+ - **TikTokUser**
  - **tinypic**: tinypic.com videos
  - **TMZ**
  - **TMZArticle**
  - **TVNet**
  - **TVNoe**
  - **TVNow**
- - **TVNowList**
+ - **TVNowAnnual**
+ - **TVNowNew**
+ - **TVNowSeason**
  - **TVNowShow**
  - **tvp**: Telewizja Polska
  - **tvp:embed**: Telewizja Polska
  - **uol.com.br**
  - **uplynk**
  - **uplynk:preplay**
- - **Upskill**
- - **UpskillCourse**
  - **Urort**: NRK P3 Urørt
  - **URPlay**
  - **USANetwork**
  - **VevoPlaylist**
  - **VGTV**: VGTV, BTTV, FTV, Aftenposten and Aftonbladet
  - **vh1.com**
+ - **vhx:embed**
  - **Viafree**
  - **vice**
  - **vice:article**
  - **wrzuta.pl:playlist**
  - **WSJ**: Wall Street Journal
  - **WSJArticle**
+ - **WWE**
  - **XBef**
  - **XboxClips**
  - **XFileShare**: XFileShare based sites: DaClips, FileHoot, GorillaVid, MovPod, PowerWatch, Rapidvideo.ws, TheVideoBee, Vidto, Streamin.To, XVIDSTAGE, Vid ABC, VidBom, vidlo, RapidVideo.TV, FastVideo.me
  - **ZDF**
  - **ZDFChannel**
  - **zingmp3**: mp3.zing.vn
+ - **Zype**
index 7dbb5805f8e124dd3f4634255d9090940b2880e4..dfb669ad235a7b2d7763d98246c1785e0edb4838 100644 (file)
--- a/setup.py
+++ b/setup.py
@@ -124,6 +124,8 @@ setup(
         'Development Status :: 5 - Production/Stable',
         'Environment :: Console',
         'License :: Public Domain',
+        'Programming Language :: Python',
+        'Programming Language :: Python :: 2',
         'Programming Language :: Python :: 2.6',
         'Programming Language :: Python :: 2.7',
         'Programming Language :: Python :: 3',
@@ -132,6 +134,13 @@ setup(
         'Programming Language :: Python :: 3.4',
         'Programming Language :: Python :: 3.5',
         'Programming Language :: Python :: 3.6',
+        'Programming Language :: Python :: 3.7',
+        'Programming Language :: Python :: 3.8',
+        'Programming Language :: Python :: Implementation',
+        'Programming Language :: Python :: Implementation :: CPython',
+        'Programming Language :: Python :: Implementation :: IronPython',
+        'Programming Language :: Python :: Implementation :: Jython',
+        'Programming Language :: Python :: Implementation :: PyPy',
     ],
 
     cmdclass={'build_lazy_extractors': build_lazy_extractors},
index aa9a1c9b2aadcd3a9eaeb1170c2e8d90afabb0b8..e62aab11e777cca955bb8a7a2149d7216430dbca 100644 (file)
@@ -153,15 +153,27 @@ def expect_value(self, got, expected, field):
                 isinstance(got, compat_str),
                 'Expected field %s to be a unicode object, but got value %r of type %r' % (field, got, type(got)))
             got = 'md5:' + md5(got)
-        elif isinstance(expected, compat_str) and expected.startswith('mincount:'):
+        elif isinstance(expected, compat_str) and re.match(r'^(?:min|max)?count:\d+', expected):
             self.assertTrue(
                 isinstance(got, (list, dict)),
                 'Expected field %s to be a list or a dict, but it is of type %s' % (
                     field, type(got).__name__))
-            expected_num = int(expected.partition(':')[2])
-            assertGreaterEqual(
+            op, _, expected_num = expected.partition(':')
+            expected_num = int(expected_num)
+            if op == 'mincount':
+                assert_func = assertGreaterEqual
+                msg_tmpl = 'Expected %d items in field %s, but only got %d'
+            elif op == 'maxcount':
+                assert_func = assertLessEqual
+                msg_tmpl = 'Expected maximum %d items in field %s, but got %d'
+            elif op == 'count':
+                assert_func = assertEqual
+                msg_tmpl = 'Expected exactly %d items in field %s, but got %d'
+            else:
+                assert False
+            assert_func(
                 self, len(got), expected_num,
-                'Expected %d items in field %s, but only got %d' % (expected_num, field, len(got)))
+                msg_tmpl % (expected_num, field, len(got)))
             return
         self.assertEqual(
             expected, got,
@@ -237,6 +249,20 @@ def assertGreaterEqual(self, got, expected, msg=None):
         self.assertTrue(got >= expected, msg)
 
 
+def assertLessEqual(self, got, expected, msg=None):
+    if not (got <= expected):
+        if msg is None:
+            msg = '%r not less than or equal to %r' % (got, expected)
+        self.assertTrue(got <= expected, msg)
+
+
+def assertEqual(self, got, expected, msg=None):
+    if not (got == expected):
+        if msg is None:
+            msg = '%r not equal to %r' % (got, expected)
+        self.assertTrue(got == expected, msg)
+
+
 def expect_warnings(ydl, warnings_re):
     real_warning = ydl.report_warning
 
diff --git a/test/test_YoutubeDLCookieJar.py b/test/test_YoutubeDLCookieJar.py
new file mode 100644 (file)
index 0000000..6a82435
--- /dev/null
@@ -0,0 +1,34 @@
+#!/usr/bin/env python
+# coding: utf-8
+
+from __future__ import unicode_literals
+
+import os
+import re
+import sys
+import tempfile
+import unittest
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from youtube_dl.utils import YoutubeDLCookieJar
+
+
+class TestYoutubeDLCookieJar(unittest.TestCase):
+    def test_keep_session_cookies(self):
+        cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/session_cookies.txt')
+        cookiejar.load(ignore_discard=True, ignore_expires=True)
+        tf = tempfile.NamedTemporaryFile(delete=False)
+        try:
+            cookiejar.save(filename=tf.name, ignore_discard=True, ignore_expires=True)
+            temp = tf.read().decode('utf-8')
+            self.assertTrue(re.search(
+                r'www\.foobar\.foobar\s+FALSE\s+/\s+TRUE\s+0\s+YoutubeDLExpiresEmpty\s+YoutubeDLExpiresEmptyValue', temp))
+            self.assertTrue(re.search(
+                r'www\.foobar\.foobar\s+FALSE\s+/\s+TRUE\s+0\s+YoutubeDLExpires0\s+YoutubeDLExpires0Value', temp))
+        finally:
+            tf.close()
+            os.remove(tf.name)
+
+
+if __name__ == '__main__':
+    unittest.main()
index d6c54e135810f9d03a970e3d261d64c70bf19530..51fe6aa0b06d78fe0b2bbeedfe3074aea0301984 100644 (file)
@@ -39,7 +39,7 @@ class TestCompat(unittest.TestCase):
 
     def test_compat_expanduser(self):
         old_home = os.environ.get('HOME')
-        test_str = 'C:\Documents and Settings\тест\Application Data'
+        test_str = r'C:\Documents and Settings\тест\Application Data'
         compat_setenv('HOME', test_str)
         self.assertEqual(compat_expanduser('~'), test_str)
         compat_setenv('HOME', old_home or '')
index addb69d6fa205f00f50a9a733d40d4a2ea928155..4209d1d9a0cefa96fc5ea9d26229f6ac44116996 100644 (file)
@@ -14,4 +14,4 @@ from youtube_dl.postprocessor import MetadataFromTitlePP
 class TestMetadataFromTitle(unittest.TestCase):
     def test_format_to_regex(self):
         pp = MetadataFromTitlePP(None, '%(title)s - %(artist)s')
-        self.assertEqual(pp._titleregex, '(?P<title>.+)\ \-\ (?P<artist>.+)')
+        self.assertEqual(pp._titleregex, r'(?P<title>.+)\ \-\ (?P<artist>.+)')
diff --git a/test/testdata/cookies/session_cookies.txt b/test/testdata/cookies/session_cookies.txt
new file mode 100644 (file)
index 0000000..f6996f0
--- /dev/null
@@ -0,0 +1,6 @@
+# Netscape HTTP Cookie File
+# http://curl.haxx.se/rfc/cookie_spec.html
+# This is a generated file!  Do not edit.
+
+www.foobar.foobar      FALSE   /       TRUE            YoutubeDLExpiresEmpty   YoutubeDLExpiresEmptyValue
+www.foobar.foobar      FALSE   /       TRUE    0       YoutubeDLExpires0       YoutubeDLExpires0Value
index 0bab846a651b32c28ca28948fa6dcb812a6e01c8..542006d3f44f67b08f9e492aafc1454e570d971c 100755 (executable)
Binary files a/youtube-dl and b/youtube-dl differ
index 8ce66300d1bb2e15beb87c2bc173c7cd3fd78d93..cb8f218560b8cbb1e1f26e313c9318b7356d0b16 100644 (file)
@@ -1033,7 +1033,7 @@ formatting
 operations (https://docs.python.org/2/library/stdtypes.html#string-formatting).
 For example, \f[C]%(NAME)s\f[] or \f[C]%(NAME)05d\f[].
 To clarify, that is a percent symbol followed by a name in parentheses,
-followed by formatting operations.
+followed by formatting operations.
 Allowed names along with sequence type are:
 .IP \[bu] 2
 \f[C]id\f[] (string): Video identifier
@@ -2091,15 +2091,24 @@ Have a look at
 \f[C]youtube_dl/extractor/common.py\f[] (https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py)
 for possible helper methods and a detailed description of what your
 extractor should and may
-return (https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L74-L252).
+return (https://github.com/rg3/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303).
 Add tests and code for as many as you want.
 .IP " 8." 4
 Make sure your code follows youtube\-dl coding conventions and check the
-code with flake8 (https://pypi.python.org/pypi/flake8).
-Also make sure your code works under all
-Python (https://www.python.org/) versions claimed supported by
-youtube\-dl, namely 2.6, 2.7, and 3.2+.
+code with
+flake8 (http://flake8.pycqa.org/en/latest/index.html#quickstart):
+.RS 4
+.IP
+.nf
+\f[C]
+\ $\ flake8\ youtube_dl/extractor/yourextractor.py
+\f[]
+.fi
+.RE
 .IP " 9." 4
+Make sure your code works under all Python (https://www.python.org/)
+versions claimed supported by youtube\-dl, namely 2.6, 2.7, and 3.2+.
+.IP "10." 4
 When the tests pass, add (https://git-scm.com/docs/git-add) the new
 files and commit (https://git-scm.com/docs/git-commit) them and
 push (https://git-scm.com/docs/git-push) the result, like this:
@@ -2107,14 +2116,14 @@ push (https://git-scm.com/docs/git-push) the result, like this:
 .IP
 .nf
 \f[C]
-$\ git\ add\ youtube_dl/extractor/extractors.py
-$\ git\ add\ youtube_dl/extractor/yourextractor.py
-$\ git\ commit\ \-m\ \[aq][yourextractor]\ Add\ new\ extractor\[aq]
-$\ git\ push\ origin\ yourextractor
+$\ git\ add\ youtube_dl/extractor/extractors.py
+$\ git\ add\ youtube_dl/extractor/yourextractor.py
+$\ git\ commit\ \-m\ \[aq][yourextractor]\ Add\ new\ extractor\[aq]
+$\ git\ push\ origin\ yourextractor
 \f[]
 .fi
 .RE
-.IP "10." 4
+.IP "11." 4
 Finally, create a pull
 request (https://help.github.com/articles/creating-a-pull-request).
 We\[aq]ll then review and merge it.
@@ -2144,7 +2153,7 @@ update at all.
 .PP
 For extraction to work youtube\-dl relies on metadata your extractor
 extracts and provides to youtube\-dl expressed by an information
-dictionary (https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L75-L257)
+dictionary (https://github.com/rg3/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303)
 or simply \f[I]info dict\f[].
 Only the following meta fields in the \f[I]info dict\f[] are considered
 mandatory for a successful extraction process by youtube\-dl:
@@ -2165,7 +2174,7 @@ extraction does not make any sense without and if any of them fail to be
 extracted then the extractor is considered completely broken.
 .PP
 Any
-field (https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L149-L257)
+field (https://github.com/rg3/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L188-L303)
 apart from the aforementioned ones are considered \f[B]optional\f[].
 That means that extraction should be \f[B]tolerant\f[] to situations
 when sources for these fields can potentially be unavailable (even if
@@ -2286,9 +2295,37 @@ title\ =\ meta.get(\[aq]title\[aq])\ or\ self._og_search_title(webpage)
 .PP
 This code will try to extract from \f[C]meta\f[] first and if it fails
 it will try extracting \f[C]og:title\f[] from a \f[C]webpage\f[].
-.SS Make regular expressions flexible
+.SS Regular expressions
+.SS Don\[aq]t capture groups you don\[aq]t use
+.PP
+Capturing group must be an indication that it\[aq]s used somewhere in
+the code.
+Any group that is not used must be non capturing.
+.SS Example
+.PP
+Don\[aq]t capture id attribute name here since you can\[aq]t use it for
+anything anyway.
+.PP
+Correct:
+.IP
+.nf
+\f[C]
+r\[aq](?:id|ID)=(?P<id>\\d+)\[aq]
+\f[]
+.fi
+.PP
+Incorrect:
+.IP
+.nf
+\f[C]
+r\[aq](id|ID)=(?P<id>\\d+)\[aq]
+\f[]
+.fi
+.SS Make regular expressions relaxed and flexible
 .PP
-When using regular expressions try to write them fuzzy and flexible.
+When using regular expressions try to write them fuzzy, relaxed and
+flexible, skipping insignificant parts that are more likely to change,
+allowing both single and double quotes for quoted values and so on.
 .SS Example
 .PP
 Say you need to extract \f[C]title\f[] from the following HTML code:
@@ -2331,6 +2368,32 @@ title\ =\ self._search_regex(
 \ \ \ \ webpage,\ \[aq]title\[aq],\ group=\[aq]title\[aq])
 \f[]
 .fi
+.SS Long lines policy
+.PP
+There is a soft limit to keep lines of code under 80 characters long.
+This means it should be respected if possible and if it does not make
+readability and code maintenance worse.
+.PP
+For example, you should \f[B]never\f[] split long string literals like
+URLs or some other often copied entities over multiple lines to fit this
+limit:
+.PP
+Correct:
+.IP
+.nf
+\f[C]
+\[aq]https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4\[aq]
+\f[]
+.fi
+.PP
+Incorrect:
+.IP
+.nf
+\f[C]
+\[aq]https://www.youtube.com/watch?v=FqZTN594JQw&list=\[aq]
+\[aq]PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4\[aq]
+\f[]
+.fi
 .SS Use safe conversion functions
 .PP
 Wrap all extracted numeric data into safe functions from
index 38ba43a977c1627f4d1f7a181c093477108ab959..4493fd0e1aabc0452041a68262772b9b5d223d79 100755 (executable)
@@ -88,6 +88,7 @@ from .utils import (
     version_tuple,
     write_json_file,
     write_string,
+    YoutubeDLCookieJar,
     YoutubeDLCookieProcessor,
     YoutubeDLHandler,
 )
@@ -558,7 +559,7 @@ class YoutubeDL(object):
         self.restore_console_title()
 
         if self.params.get('cookiefile') is not None:
-            self.cookiejar.save()
+            self.cookiejar.save(ignore_discard=True, ignore_expires=True)
 
     def trouble(self, message=None, tb=None):
         """Determine action to take when a download problem appears.
@@ -2297,10 +2298,9 @@ class YoutubeDL(object):
             self.cookiejar = compat_cookiejar.CookieJar()
         else:
             opts_cookiefile = expand_path(opts_cookiefile)
-            self.cookiejar = compat_cookiejar.MozillaCookieJar(
-                opts_cookiefile)
+            self.cookiejar = YoutubeDLCookieJar(opts_cookiefile)
             if os.access(opts_cookiefile, os.R_OK):
-                self.cookiejar.load()
+                self.cookiejar.load(ignore_discard=True, ignore_expires=True)
 
         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
         if opts_proxy is not None:
index fd304527e854e768038880bf785d84f211f4471b..4def8e2d579c17956f4a95ebb3de914d457c59f4 100644 (file)
@@ -75,10 +75,14 @@ class HlsFD(FragmentFD):
                 fd.add_progress_hook(ph)
             return fd.real_download(filename, info_dict)
 
-        def is_ad_fragment(s):
+        def is_ad_fragment_start(s):
             return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=ad' in s or
                     s.startswith('#UPLYNK-SEGMENT') and s.endswith(',ad'))
 
+        def is_ad_fragment_end(s):
+            return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=master' in s or
+                    s.startswith('#UPLYNK-SEGMENT') and s.endswith(',segment'))
+
         media_frags = 0
         ad_frags = 0
         ad_frag_next = False
@@ -87,12 +91,13 @@ class HlsFD(FragmentFD):
             if not line:
                 continue
             if line.startswith('#'):
-                if is_ad_fragment(line):
-                    ad_frags += 1
+                if is_ad_fragment_start(line):
                     ad_frag_next = True
+                elif is_ad_fragment_end(line):
+                    ad_frag_next = False
                 continue
             if ad_frag_next:
-                ad_frag_next = False
+                ad_frags += 1
                 continue
             media_frags += 1
 
@@ -123,7 +128,6 @@ class HlsFD(FragmentFD):
             if line:
                 if not line.startswith('#'):
                     if ad_frag_next:
-                        ad_frag_next = False
                         continue
                     frag_index += 1
                     if frag_index <= ctx['fragment_index']:
@@ -196,8 +200,10 @@ class HlsFD(FragmentFD):
                         'start': sub_range_start,
                         'end': sub_range_start + int(splitted_byte_range[0]),
                     }
-                elif is_ad_fragment(line):
+                elif is_ad_fragment_start(line):
                     ad_frag_next = True
+                elif is_ad_fragment_end(line):
+                    ad_frag_next = False
 
         self._finish_frag_download(ctx)
 
index 6d846ea7a18829d9af03278b5da3996782bb26ab..c4362be88b44164bad47e13e0e3f71ce9923e1f0 100644 (file)
@@ -17,25 +17,15 @@ from ..utils import (
 
 class ACastIE(InfoExtractor):
     IE_NAME = 'acast'
-    _VALID_URL = r'https?://(?:www\.)?acast\.com/(?P<channel>[^/]+)/(?P<id>[^/#?]+)'
+    _VALID_URL = r'''(?x)
+                    https?://
+                        (?:
+                            (?:(?:embed|www)\.)?acast\.com/|
+                            play\.acast\.com/s/
+                        )
+                        (?P<channel>[^/]+)/(?P<id>[^/#?]+)
+                    '''
     _TESTS = [{
-        # test with one bling
-        'url': 'https://www.acast.com/condenasttraveler/-where-are-you-taipei-101-taiwan',
-        'md5': 'ada3de5a1e3a2a381327d749854788bb',
-        'info_dict': {
-            'id': '57de3baa-4bb0-487e-9418-2692c1277a34',
-            'ext': 'mp3',
-            'title': '"Where Are You?": Taipei 101, Taiwan',
-            'description': 'md5:a0b4ef3634e63866b542e5b1199a1a0e',
-            'timestamp': 1196172000,
-            'upload_date': '20071127',
-            'duration': 211,
-            'creator': 'Concierge',
-            'series': 'Condé Nast Traveler Podcast',
-            'episode': '"Where Are You?": Taipei 101, Taiwan',
-        }
-    }, {
-        # test with multiple blings
         'url': 'https://www.acast.com/sparpodcast/2.raggarmordet-rosterurdetforflutna',
         'md5': 'a02393c74f3bdb1801c3ec2695577ce0',
         'info_dict': {
@@ -50,6 +40,12 @@ class ACastIE(InfoExtractor):
             'series': 'Spår',
             'episode': '2. Raggarmordet - Röster ur det förflutna',
         }
+    }, {
+        'url': 'http://embed.acast.com/adambuxton/ep.12-adam-joeschristmaspodcast2015',
+        'only_matching': True,
+    }, {
+        'url': 'https://play.acast.com/s/rattegangspodden/s04e09-styckmordet-i-helenelund-del-22',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
@@ -83,17 +79,27 @@ class ACastIE(InfoExtractor):
 
 class ACastChannelIE(InfoExtractor):
     IE_NAME = 'acast:channel'
-    _VALID_URL = r'https?://(?:www\.)?acast\.com/(?P<id>[^/#?]+)'
-    _TEST = {
-        'url': 'https://www.acast.com/condenasttraveler',
+    _VALID_URL = r'''(?x)
+                    https?://
+                        (?:
+                            (?:www\.)?acast\.com/|
+                            play\.acast\.com/s/
+                        )
+                        (?P<id>[^/#?]+)
+                    '''
+    _TESTS = [{
+        'url': 'https://www.acast.com/todayinfocus',
         'info_dict': {
-            'id': '50544219-29bb-499e-a083-6087f4cb7797',
-            'title': 'Condé Nast Traveler Podcast',
-            'description': 'md5:98646dee22a5b386626ae31866638fbd',
+            'id': '4efc5294-5385-4847-98bd-519799ce5786',
+            'title': 'Today in Focus',
+            'description': 'md5:9ba5564de5ce897faeb12963f4537a64',
         },
-        'playlist_mincount': 20,
-    }
-    _API_BASE_URL = 'https://www.acast.com/api/'
+        'playlist_mincount': 35,
+    }, {
+        'url': 'http://play.acast.com/s/ft-banking-weekly',
+        'only_matching': True,
+    }]
+    _API_BASE_URL = 'https://play.acast.com/api/'
     _PAGE_SIZE = 10
 
     @classmethod
@@ -106,7 +112,7 @@ class ACastChannelIE(InfoExtractor):
             channel_slug, note='Download page %d of channel data' % page)
         for cast in casts:
             yield self.url_result(
-                'https://www.acast.com/%s/%s' % (channel_slug, cast['url']),
+                'https://play.acast.com/s/%s/%s' % (channel_slug, cast['url']),
                 'ACast', cast['id'])
 
     def _real_extract(self, url):
index 398e56ea301f1fb88eeb481c33c801371c09e430..85ec6392daf807b18253b815c0165afc07badbc0 100644 (file)
@@ -22,18 +22,19 @@ class AENetworksBaseIE(ThePlatformIE):
 
 class AENetworksIE(AENetworksBaseIE):
     IE_NAME = 'aenetworks'
-    IE_DESC = 'A+E Networks: A&E, Lifetime, History.com, FYI Network'
+    IE_DESC = 'A+E Networks: A&E, Lifetime, History.com, FYI Network and History Vault'
     _VALID_URL = r'''(?x)
                     https?://
                         (?:www\.)?
                         (?P<domain>
-                            (?:history|aetv|mylifetime|lifetimemovieclub)\.com|
+                            (?:history(?:vault)?|aetv|mylifetime|lifetimemovieclub)\.com|
                             fyi\.tv
                         )/
                         (?:
                             shows/(?P<show_path>[^/]+(?:/[^/]+){0,2})|
                             movies/(?P<movie_display_id>[^/]+)(?:/full-movie)?|
-                            specials/(?P<special_display_id>[^/]+)/full-special
+                            specials/(?P<special_display_id>[^/]+)/full-special|
+                            collections/[^/]+/(?P<collection_display_id>[^/]+)
                         )
                     '''
     _TESTS = [{
@@ -80,6 +81,9 @@ class AENetworksIE(AENetworksBaseIE):
     }, {
         'url': 'http://www.history.com/specials/sniper-into-the-kill-zone/full-special',
         'only_matching': True
+    }, {
+        'url': 'https://www.historyvault.com/collections/america-the-story-of-us/westward',
+        'only_matching': True
     }]
     _DOMAIN_TO_REQUESTOR_ID = {
         'history.com': 'HISTORY',
@@ -90,9 +94,9 @@ class AENetworksIE(AENetworksBaseIE):
     }
 
     def _real_extract(self, url):
-        domain, show_path, movie_display_id, special_display_id = re.match(self._VALID_URL, url).groups()
-        display_id = show_path or movie_display_id or special_display_id
-        webpage = self._download_webpage(url, display_id)
+        domain, show_path, movie_display_id, special_display_id, collection_display_id = re.match(self._VALID_URL, url).groups()
+        display_id = show_path or movie_display_id or special_display_id or collection_display_id
+        webpage = self._download_webpage(url, display_id, headers=self.geo_verification_headers())
         if show_path:
             url_parts = show_path.split('/')
             url_parts_len = len(url_parts)
index 01736872dc79709ea756a3d3af255662e19491bf..8b32aa886e9696e9334f73a777a70264f28c9433 100644 (file)
@@ -43,10 +43,6 @@ class AmericasTestKitchenIE(InfoExtractor):
 
         webpage = self._download_webpage(url, video_id)
 
-        partner_id = self._search_regex(
-            r'src=["\'](?:https?:)?//(?:[^/]+\.)kaltura\.com/(?:[^/]+/)*(?:p|partner_id)/(\d+)',
-            webpage, 'kaltura partner id')
-
         video_data = self._parse_json(
             self._search_regex(
                 r'window\.__INITIAL_STATE__\s*=\s*({.+?})\s*;\s*</script>',
@@ -58,7 +54,18 @@ class AmericasTestKitchenIE(InfoExtractor):
             (lambda x: x['episodeDetail']['content']['data'],
              lambda x: x['videoDetail']['content']['data']), dict)
         ep_meta = ep_data.get('full_video', {})
-        external_id = ep_data.get('external_id') or ep_meta['external_id']
+
+        zype_id = ep_meta.get('zype_id')
+        if zype_id:
+            embed_url = 'https://player.zype.com/embed/%s.js?api_key=jZ9GUhRmxcPvX7M3SlfejB6Hle9jyHTdk2jVxG7wOHPLODgncEKVdPYBhuz9iWXQ' % zype_id
+            ie_key = 'Zype'
+        else:
+            partner_id = self._search_regex(
+                r'src=["\'](?:https?:)?//(?:[^/]+\.)kaltura\.com/(?:[^/]+/)*(?:p|partner_id)/(\d+)',
+                webpage, 'kaltura partner id')
+            external_id = ep_data.get('external_id') or ep_meta['external_id']
+            embed_url = 'kaltura:%s:%s' % (partner_id, external_id)
+            ie_key = 'Kaltura'
 
         title = ep_data.get('title') or ep_meta.get('title')
         description = clean_html(ep_meta.get('episode_description') or ep_data.get(
@@ -72,8 +79,8 @@ class AmericasTestKitchenIE(InfoExtractor):
 
         return {
             '_type': 'url_transparent',
-            'url': 'kaltura:%s:%s' % (partner_id, external_id),
-            'ie_key': 'Kaltura',
+            'url': embed_url,
+            'ie_key': ie_key,
             'title': title,
             'description': description,
             'thumbnail': thumbnail,
index 6bf8f61eb03c0994c87651af51ecb58cd3065060..8adae46449232fe487ec6796b5b60345f144d310 100644 (file)
@@ -8,20 +8,23 @@ from .generic import GenericIE
 from ..utils import (
     determine_ext,
     ExtractorError,
-    qualities,
     int_or_none,
     parse_duration,
+    qualities,
+    str_or_none,
+    try_get,
     unified_strdate,
-    xpath_text,
+    unified_timestamp,
     update_url_query,
     url_or_none,
+    xpath_text,
 )
 from ..compat import compat_etree_fromstring
 
 
 class ARDMediathekIE(InfoExtractor):
     IE_NAME = 'ARD:mediathek'
-    _VALID_URL = r'^https?://(?:(?:www\.)?ardmediathek\.de|mediathek\.(?:daserste|rbb-online)\.de|one\.ard\.de)/(?:.*/)(?P<video_id>[0-9]+|[^0-9][^/\?]+)[^/\?]*(?:\?.*)?'
+    _VALID_URL = r'^https?://(?:(?:(?:www|classic)\.)?ardmediathek\.de|mediathek\.(?:daserste|rbb-online)\.de|one\.ard\.de)/(?:.*/)(?P<video_id>[0-9]+|[^0-9][^/\?]+)[^/\?]*(?:\?.*)?'
 
     _TESTS = [{
         # available till 26.07.2022
@@ -51,8 +54,15 @@ class ARDMediathekIE(InfoExtractor):
         # audio
         'url': 'http://mediathek.rbb-online.de/radio/Hörspiel/Vor-dem-Fest/kulturradio/Audio?documentId=30796318&topRessort=radio&bcastId=9839158',
         'only_matching': True,
+    }, {
+        'url': 'https://classic.ardmediathek.de/tv/Panda-Gorilla-Co/Panda-Gorilla-Co-Folge-274/Das-Erste/Video?bcastId=16355486&documentId=58234698',
+        'only_matching': True,
     }]
 
+    @classmethod
+    def suitable(cls, url):
+        return False if ARDBetaMediathekIE.suitable(url) else super(ARDMediathekIE, cls).suitable(url)
+
     def _extract_media_info(self, media_info_url, webpage, video_id):
         media_info = self._download_json(
             media_info_url, video_id, 'Downloading media JSON')
@@ -173,13 +183,18 @@ class ARDMediathekIE(InfoExtractor):
         title = self._html_search_regex(
             [r'<h1(?:\s+class="boxTopHeadline")?>(.*?)</h1>',
              r'<meta name="dcterms\.title" content="(.*?)"/>',
-             r'<h4 class="headline">(.*?)</h4>'],
+             r'<h4 class="headline">(.*?)</h4>',
+             r'<title[^>]*>(.*?)</title>'],
             webpage, 'title')
         description = self._html_search_meta(
             'dcterms.abstract', webpage, 'description', default=None)
         if description is None:
             description = self._html_search_meta(
-                'description', webpage, 'meta description')
+                'description', webpage, 'meta description', default=None)
+        if description is None:
+            description = self._html_search_regex(
+                r'<p\s+class="teasertext">(.+?)</p>',
+                webpage, 'teaser text', default=None)
 
         # Thumbnail is sometimes not present.
         # It is in the mobile version, but that seems to use a different URL
@@ -288,7 +303,7 @@ class ARDIE(InfoExtractor):
 
 
 class ARDBetaMediathekIE(InfoExtractor):
-    _VALID_URL = r'https://beta\.ardmediathek\.de/[a-z]+/player/(?P<video_id>[a-zA-Z0-9]+)/(?P<display_id>[^/?#]+)'
+    _VALID_URL = r'https://(?:beta|www)\.ardmediathek\.de/[^/]+/(?:player|live)/(?P<video_id>[a-zA-Z0-9]+)(?:/(?P<display_id>[^/?#]+))?'
     _TESTS = [{
         'url': 'https://beta.ardmediathek.de/ard/player/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE/die-robuste-roswita',
         'md5': '2d02d996156ea3c397cfc5036b5d7f8f',
@@ -302,12 +317,18 @@ class ARDBetaMediathekIE(InfoExtractor):
             'upload_date': '20180826',
             'ext': 'mp4',
         },
+    }, {
+        'url': 'https://www.ardmediathek.de/ard/player/Y3JpZDovL3N3ci5kZS9hZXgvbzEwNzE5MTU/',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.ardmediathek.de/swr/live/Y3JpZDovL3N3ci5kZS8xMzQ4MTA0Mg',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         video_id = mobj.group('video_id')
-        display_id = mobj.group('display_id')
+        display_id = mobj.group('display_id') or video_id
 
         webpage = self._download_webpage(url, display_id)
         data_json = self._search_regex(r'window\.__APOLLO_STATE__\s*=\s*(\{.*);\n', webpage, 'json')
@@ -318,43 +339,62 @@ class ARDBetaMediathekIE(InfoExtractor):
             'display_id': display_id,
         }
         formats = []
+        subtitles = {}
+        geoblocked = False
         for widget in data.values():
-            if widget.get('_geoblocked'):
-                raise ExtractorError('This video is not available due to geoblocking', expected=True)
-
+            if widget.get('_geoblocked') is True:
+                geoblocked = True
             if '_duration' in widget:
-                res['duration'] = widget['_duration']
+                res['duration'] = int_or_none(widget['_duration'])
             if 'clipTitle' in widget:
                 res['title'] = widget['clipTitle']
             if '_previewImage' in widget:
                 res['thumbnail'] = widget['_previewImage']
             if 'broadcastedOn' in widget:
-                res['upload_date'] = unified_strdate(widget['broadcastedOn'])
+                res['timestamp'] = unified_timestamp(widget['broadcastedOn'])
             if 'synopsis' in widget:
                 res['description'] = widget['synopsis']
-            if '_subtitleUrl' in widget:
-                res['subtitles'] = {'de': [{
+            subtitle_url = url_or_none(widget.get('_subtitleUrl'))
+            if subtitle_url:
+                subtitles.setdefault('de', []).append({
                     'ext': 'ttml',
-                    'url': widget['_subtitleUrl'],
-                }]}
+                    'url': subtitle_url,
+                })
             if '_quality' in widget:
-                format_url = widget['_stream']['json'][0]
-
-                if format_url.endswith('.f4m'):
+                format_url = url_or_none(try_get(
+                    widget, lambda x: x['_stream']['json'][0]))
+                if not format_url:
+                    continue
+                ext = determine_ext(format_url)
+                if ext == 'f4m':
                     formats.extend(self._extract_f4m_formats(
                         format_url + '?hdcore=3.11.0',
                         video_id, f4m_id='hds', fatal=False))
-                elif format_url.endswith('m3u8'):
+                elif ext == 'm3u8':
                     formats.extend(self._extract_m3u8_formats(
-                        format_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
+                        format_url, video_id, 'mp4', m3u8_id='hls',
+                        fatal=False))
                 else:
+                    # HTTP formats are not available when geoblocked is True,
+                    # other formats are fine though
+                    if geoblocked:
+                        continue
+                    quality = str_or_none(widget.get('_quality'))
                     formats.append({
-                        'format_id': 'http-' + widget['_quality'],
+                        'format_id': ('http-' + quality) if quality else 'http',
                         'url': format_url,
                         'preference': 10,  # Plain HTTP, that's nice
                     })
 
+        if not formats and geoblocked:
+            self.raise_geo_restricted(
+                msg='This video is not available due to geoblocking',
+                countries=['DE'])
+
         self._sort_formats(formats)
-        res['formats'] = formats
+        res.update({
+            'subtitles': subtitles,
+            'formats': formats,
+        })
 
         return res
index 1584d53fc3784d3ffe1a89ca2856d30f4b94380c..95e572d70c2473e69ebce18df4ad8c9e51c50c1d 100644 (file)
@@ -28,8 +28,10 @@ class ATVAtIE(InfoExtractor):
         display_id = self._match_id(url)
         webpage = self._download_webpage(url, display_id)
         video_data = self._parse_json(unescapeHTML(self._search_regex(
-            r'class="[^"]*jsb_video/FlashPlayer[^"]*"[^>]+data-jsb="([^"]+)"',
-            webpage, 'player data')), display_id)['config']['initial_video']
+            [r'flashPlayerOptions\s*=\s*(["\'])(?P<json>(?:(?!\1).)+)\1',
+             r'class="[^"]*jsb_video/FlashPlayer[^"]*"[^>]+data-jsb="(?P<json>[^"]+)"'],
+            webpage, 'player data', group='json')),
+            display_id)['config']['initial_video']
 
         video_id = video_data['id']
         video_title = video_data['title']
index 62049b921089d4a10cc9f71b625540f82ea7e3b6..cc7771354c10b39778f0b2dcad55052db42066b3 100644 (file)
@@ -62,7 +62,7 @@ class AudiomackIE(InfoExtractor):
         # Audiomack wraps a lot of soundcloud tracks in their branded wrapper
         # if so, pass the work off to the soundcloud extractor
         if SoundcloudIE.suitable(api_response['url']):
-            return {'_type': 'url', 'url': api_response['url'], 'ie_key': 'Soundcloud'}
+            return self.url_result(api_response['url'], SoundcloudIE.ie_key())
 
         return {
             'id': compat_str(api_response.get('id', album_url_tag)),
index a57a5f114c825f80e3f5ec9dc9f3b6a7511c67e9..fcbdc71b98d98076852e0f88559f4a2ed428d7af 100644 (file)
@@ -36,7 +36,6 @@ class AZMedienIE(InfoExtractor):
             'id': '1_anruz3wy',
             'ext': 'mp4',
             'title': 'Bundesrats-Vakanzen / EU-Rahmenabkommen',
-            'description': 'md5:dd9f96751ec9c35e409a698a328402f3',
             'uploader_id': 'TVOnline',
             'upload_date': '20180930',
             'timestamp': 1538328802,
@@ -53,15 +52,12 @@ class AZMedienIE(InfoExtractor):
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
+        host = mobj.group('host')
         video_id = mobj.group('id')
         entry_id = mobj.group('kaltura_id')
 
         if not entry_id:
-            webpage = self._download_webpage(url, video_id)
-            api_path = self._search_regex(
-                r'["\']apiPath["\']\s*:\s*["\']([^"^\']+)["\']',
-                webpage, 'api path')
-            api_url = 'https://www.%s%s' % (mobj.group('host'), api_path)
+            api_url = 'https://www.%s/api/pub/gql/%s' % (host, host.split('.')[0])
             payload = {
                 'query': '''query VideoContext($articleId: ID!) {
                     article: node(id: $articleId) {
index abcfa301d9abf4e76a01d42cac71a01e06e2f1eb..eac9a5a4668961571c9162ca2be93aba3211438a 100644 (file)
@@ -795,6 +795,15 @@ class BBCIE(BBCCoUkIE):
             'uploader': 'Radio 3',
             'uploader_id': 'bbc_radio_three',
         },
+    }, {
+        'url': 'http://www.bbc.co.uk/learningenglish/chinese/features/lingohack/ep-181227',
+        'info_dict': {
+            'id': 'p06w9tws',
+            'ext': 'mp4',
+            'title': 'md5:2fabf12a726603193a2879a055f72514',
+            'description': 'Learn English words and phrases from this story',
+        },
+        'add_ie': [BBCCoUkIE.ie_key()],
     }]
 
     @classmethod
@@ -945,6 +954,15 @@ class BBCIE(BBCCoUkIE):
         if entries:
             return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
 
+        # http://www.bbc.co.uk/learningenglish/chinese/features/lingohack/ep-181227
+        group_id = self._search_regex(
+            r'<div[^>]+\bclass=["\']video["\'][^>]+\bdata-pid=["\'](%s)' % self._ID_REGEX,
+            webpage, 'group id', default=None)
+        if playlist_id:
+            return self.url_result(
+                'https://www.bbc.co.uk/programmes/%s' % group_id,
+                ie=BBCCoUkIE.ie_key())
+
         # single video story (e.g. http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret)
         programme_id = self._search_regex(
             [r'data-(?:video-player|media)-vpid="(%s)"' % self._ID_REGEX,
index bf22a41b745db2eef277c6fc41cf716cef6d6366..1086d76324a1df1484adb042f74b35a614fea01b 100644 (file)
@@ -1,15 +1,10 @@
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
-from ..compat import (
-    compat_chr,
-    compat_ord,
-    compat_urllib_parse_unquote,
-)
+from ..compat import compat_str
 from ..utils import (
     int_or_none,
-    parse_iso8601,
-    urljoin,
+    unified_timestamp,
 )
 
 
@@ -36,29 +31,9 @@ class BeegIE(InfoExtractor):
 
         webpage = self._download_webpage(url, video_id)
 
-        cpl_url = self._search_regex(
-            r'<script[^>]+src=(["\'])(?P<url>(?:/static|(?:https?:)?//static\.beeg\.com)/cpl/\d+\.js.*?)\1',
-            webpage, 'cpl', default=None, group='url')
-
-        cpl_url = urljoin(url, cpl_url)
-
-        beeg_version, beeg_salt = [None] * 2
-
-        if cpl_url:
-            cpl = self._download_webpage(
-                self._proto_relative_url(cpl_url), video_id,
-                'Downloading cpl JS', fatal=False)
-            if cpl:
-                beeg_version = int_or_none(self._search_regex(
-                    r'beeg_version\s*=\s*([^\b]+)', cpl,
-                    'beeg version', default=None)) or self._search_regex(
-                    r'/(\d+)\.js', cpl_url, 'beeg version', default=None)
-                beeg_salt = self._search_regex(
-                    r'beeg_salt\s*=\s*(["\'])(?P<beeg_salt>.+?)\1', cpl, 'beeg salt',
-                    default=None, group='beeg_salt')
-
-        beeg_version = beeg_version or '2185'
-        beeg_salt = beeg_salt or 'pmweAkq8lAYKdfWcFCUj0yoVgoPlinamH5UE1CB3H'
+        beeg_version = self._search_regex(
+            r'beeg_version\s*=\s*([\da-zA-Z_-]+)', webpage, 'beeg version',
+            default='1546225636701')
 
         for api_path in ('', 'api.'):
             video = self._download_json(
@@ -68,37 +43,6 @@ class BeegIE(InfoExtractor):
             if video:
                 break
 
-        def split(o, e):
-            def cut(s, x):
-                n.append(s[:x])
-                return s[x:]
-            n = []
-            r = len(o) % e
-            if r > 0:
-                o = cut(o, r)
-            while len(o) > e:
-                o = cut(o, e)
-            n.append(o)
-            return n
-
-        def decrypt_key(key):
-            # Reverse engineered from http://static.beeg.com/cpl/1738.js
-            a = beeg_salt
-            e = compat_urllib_parse_unquote(key)
-            o = ''.join([
-                compat_chr(compat_ord(e[n]) - compat_ord(a[n % len(a)]) % 21)
-                for n in range(len(e))])
-            return ''.join(split(o, 3)[::-1])
-
-        def decrypt_url(encrypted_url):
-            encrypted_url = self._proto_relative_url(
-                encrypted_url.replace('{DATA_MARKERS}', ''), 'https:')
-            key = self._search_regex(
-                r'/key=(.*?)%2Cend=', encrypted_url, 'key', default=None)
-            if not key:
-                return encrypted_url
-            return encrypted_url.replace(key, decrypt_key(key))
-
         formats = []
         for format_id, video_url in video.items():
             if not video_url:
@@ -108,18 +52,20 @@ class BeegIE(InfoExtractor):
             if not height:
                 continue
             formats.append({
-                'url': decrypt_url(video_url),
+                'url': self._proto_relative_url(
+                    video_url.replace('{DATA_MARKERS}', 'data=pc_XX__%s_0' % beeg_version), 'https:'),
                 'format_id': format_id,
                 'height': int(height),
             })
         self._sort_formats(formats)
 
         title = video['title']
-        video_id = video.get('id') or video_id
+        video_id = compat_str(video.get('id') or video_id)
         display_id = video.get('code')
         description = video.get('desc')
+        series = video.get('ps_name')
 
-        timestamp = parse_iso8601(video.get('date'), ' ')
+        timestamp = unified_timestamp(video.get('date'))
         duration = int_or_none(video.get('duration'))
 
         tags = [tag.strip() for tag in video['tags'].split(',')] if video.get('tags') else None
@@ -129,6 +75,7 @@ class BeegIE(InfoExtractor):
             'display_id': display_id,
             'title': title,
             'description': description,
+            'series': series,
             'timestamp': timestamp,
             'duration': duration,
             'tags': tags,
index 446a1ab19f93955ab143e729628e4ecf1add9b16..4f39424f59ee12bb5adb916e707b3a33e9d35e58 100644 (file)
@@ -5,7 +5,10 @@ import itertools
 import re
 
 from .common import InfoExtractor
-from ..utils import urlencode_postdata
+from ..utils import (
+    orderedSet,
+    urlencode_postdata,
+)
 
 
 class BitChuteIE(InfoExtractor):
@@ -37,16 +40,22 @@ class BitChuteIE(InfoExtractor):
                 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.57 Safari/537.36',
             })
 
-        title = self._search_regex(
+        title = self._html_search_regex(
             (r'<[^>]+\bid=["\']video-title[^>]+>([^<]+)', r'<title>([^<]+)'),
             webpage, 'title', default=None) or self._html_search_meta(
             'description', webpage, 'title',
             default=None) or self._og_search_description(webpage)
 
+        format_urls = []
+        for mobj in re.finditer(
+                r'addWebSeed\s*\(\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage):
+            format_urls.append(mobj.group('url'))
+        format_urls.extend(re.findall(r'as=(https?://[^&"\']+)', webpage))
+
         formats = [
-            {'url': mobj.group('url')}
-            for mobj in re.finditer(
-                r'addWebSeed\s*\(\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage)]
+            {'url': format_url}
+            for format_url in orderedSet(format_urls)]
+        self._check_formats(formats, video_id)
         self._sort_formats(formats)
 
         description = self._html_search_regex(
index 79350817f5599fba1b560215076aea2878984835..1eb81b75e95044d460d7f53e1d59e9ec174c6480 100644 (file)
@@ -14,6 +14,7 @@ class CamModelsIE(InfoExtractor):
     _TESTS = [{
         'url': 'https://www.cammodels.com/cam/AutumnKnight/',
         'only_matching': True,
+        'age_limit': 18
     }]
 
     def _real_extract(self, url):
@@ -93,4 +94,5 @@ class CamModelsIE(InfoExtractor):
             'title': self._live_title(user_id),
             'is_live': True,
             'formats': formats,
+            'age_limit': 18
         }
index c7d40f849a206b4842af84119d78e2f75a4da696..b3be3bdcf7850c44ca85519e6c32356e5a5d9804 100644 (file)
@@ -20,6 +20,7 @@ class CamTubeIE(InfoExtractor):
             'duration': 1274,
             'timestamp': 1528018608,
             'upload_date': '20180603',
+            'age_limit': 18
         },
         'params': {
             'skip_download': True,
@@ -66,4 +67,5 @@ class CamTubeIE(InfoExtractor):
             'like_count': like_count,
             'creator': creator,
             'formats': formats,
+            'age_limit': 18
         }
index afbc5ea267400394dd7c477b9468ef434c96be6c..bbc5205fded6ee2ba99c159e5436d9fcb74ffd2b 100644 (file)
@@ -25,6 +25,7 @@ class CamWithHerIE(InfoExtractor):
             'comment_count': int,
             'uploader': 'MileenaK',
             'upload_date': '20160322',
+            'age_limit': 18,
         },
         'params': {
             'skip_download': True,
@@ -84,4 +85,5 @@ class CamWithHerIE(InfoExtractor):
             'comment_count': comment_count,
             'uploader': uploader,
             'upload_date': upload_date,
+            'age_limit': 18
         }
index 9ba909a918755b5a02f8d8fe684a6f73a438ada9..b57b86af7e6b05527e102974b6533b4538f5d366 100644 (file)
@@ -82,6 +82,12 @@ class CarambaTVPageIE(InfoExtractor):
         webpage = self._download_webpage(url, video_id)
 
         videomore_url = VideomoreIE._extract_url(webpage)
+        if not videomore_url:
+            videomore_id = self._search_regex(
+                r'getVMCode\s*\(\s*["\']?(\d+)', webpage, 'videomore id',
+                default=None)
+            if videomore_id:
+                videomore_url = 'videomore:%s' % videomore_id
         if videomore_url:
             title = self._og_search_title(webpage)
             return {
diff --git a/youtube_dl/extractor/ciscolive.py b/youtube_dl/extractor/ciscolive.py
new file mode 100644 (file)
index 0000000..c99b6ee
--- /dev/null
@@ -0,0 +1,142 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import itertools
+
+from .common import InfoExtractor
+from ..compat import (
+    compat_parse_qs,
+    compat_urllib_parse_urlparse,
+)
+from ..utils import (
+    clean_html,
+    float_or_none,
+    int_or_none,
+    try_get,
+    urlencode_postdata,
+)
+
+
+class CiscoLiveBaseIE(InfoExtractor):
+    # These appear to be constant across all Cisco Live presentations
+    # and are not tied to any user session or event
+    RAINFOCUS_API_URL = 'https://events.rainfocus.com/api/%s'
+    RAINFOCUS_API_PROFILE_ID = 'Na3vqYdAlJFSxhYTYQGuMbpafMqftalz'
+    RAINFOCUS_WIDGET_ID = 'n6l4Lo05R8fiy3RpUBm447dZN8uNWoye'
+    BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/5647924234001/SyK2FdqjM_default/index.html?videoId=%s'
+
+    HEADERS = {
+        'Origin': 'https://ciscolive.cisco.com',
+        'rfApiProfileId': RAINFOCUS_API_PROFILE_ID,
+        'rfWidgetId': RAINFOCUS_WIDGET_ID,
+    }
+
+    def _call_api(self, ep, rf_id, query, referrer, note=None):
+        headers = self.HEADERS.copy()
+        headers['Referer'] = referrer
+        return self._download_json(
+            self.RAINFOCUS_API_URL % ep, rf_id, note=note,
+            data=urlencode_postdata(query), headers=headers)
+
+    def _parse_rf_item(self, rf_item):
+        event_name = rf_item.get('eventName')
+        title = rf_item['title']
+        description = clean_html(rf_item.get('abstract'))
+        presenter_name = try_get(rf_item, lambda x: x['participants'][0]['fullName'])
+        bc_id = rf_item['videos'][0]['url']
+        bc_url = self.BRIGHTCOVE_URL_TEMPLATE % bc_id
+        duration = float_or_none(try_get(rf_item, lambda x: x['times'][0]['length']))
+        location = try_get(rf_item, lambda x: x['times'][0]['room'])
+
+        if duration:
+            duration = duration * 60
+
+        return {
+            '_type': 'url_transparent',
+            'url': bc_url,
+            'ie_key': 'BrightcoveNew',
+            'title': title,
+            'description': description,
+            'duration': duration,
+            'creator': presenter_name,
+            'location': location,
+            'series': event_name,
+        }
+
+
+class CiscoLiveSessionIE(CiscoLiveBaseIE):
+    _VALID_URL = r'https?://ciscolive\.cisco\.com/on-demand-library/\??[^#]*#/session/(?P<id>[^/?&]+)'
+    _TEST = {
+        'url': 'https://ciscolive.cisco.com/on-demand-library/?#/session/1423353499155001FoSs',
+        'md5': 'c98acf395ed9c9f766941c70f5352e22',
+        'info_dict': {
+            'id': '5803694304001',
+            'ext': 'mp4',
+            'title': '13 Smart Automations to Monitor Your Cisco IOS Network',
+            'description': 'md5:ec4a436019e09a918dec17714803f7cc',
+            'timestamp': 1530305395,
+            'upload_date': '20180629',
+            'uploader_id': '5647924234001',
+            'location': '16B Mezz.',
+        },
+    }
+
+    def _real_extract(self, url):
+        rf_id = self._match_id(url)
+        rf_result = self._call_api('session', rf_id, {'id': rf_id}, url)
+        return self._parse_rf_item(rf_result['items'][0])
+
+
+class CiscoLiveSearchIE(CiscoLiveBaseIE):
+    _VALID_URL = r'https?://ciscolive\.cisco\.com/on-demand-library/'
+    _TESTS = [{
+        'url': 'https://ciscolive.cisco.com/on-demand-library/?search.event=ciscoliveus2018&search.technicallevel=scpsSkillLevel_aintroductory&search.focus=scpsSessionFocus_designAndDeployment#/',
+        'info_dict': {
+            'title': 'Search query',
+        },
+        'playlist_count': 5,
+    }, {
+        'url': 'https://ciscolive.cisco.com/on-demand-library/?search.technology=scpsTechnology_applicationDevelopment&search.technology=scpsTechnology_ipv6&search.focus=scpsSessionFocus_troubleshootingTroubleshooting#/',
+        'only_matching': True,
+    }]
+
+    @classmethod
+    def suitable(cls, url):
+        return False if CiscoLiveSessionIE.suitable(url) else super(CiscoLiveSearchIE, cls).suitable(url)
+
+    @staticmethod
+    def _check_bc_id_exists(rf_item):
+        return int_or_none(try_get(rf_item, lambda x: x['videos'][0]['url'])) is not None
+
+    def _entries(self, query, url):
+        query['size'] = 50
+        query['from'] = 0
+        for page_num in itertools.count(1):
+            results = self._call_api(
+                'search', None, query, url,
+                'Downloading search JSON page %d' % page_num)
+            sl = try_get(results, lambda x: x['sectionList'][0], dict)
+            if sl:
+                results = sl
+            items = results.get('items')
+            if not items or not isinstance(items, list):
+                break
+            for item in items:
+                if not isinstance(item, dict):
+                    continue
+                if not self._check_bc_id_exists(item):
+                    continue
+                yield self._parse_rf_item(item)
+            size = int_or_none(results.get('size'))
+            if size is not None:
+                query['size'] = size
+            total = int_or_none(results.get('total'))
+            if total is not None and query['from'] + query['size'] > total:
+                break
+            query['from'] += query['size']
+
+    def _real_extract(self, url):
+        query = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
+        query['type'] = 'session'
+        return self.playlist_result(
+            self._entries(query, url), playlist_title='Search query')
index 5fc311f538eb23b0b16e99f6c5623f0db4290b40..774b7105580f69dedac4318c82aee2ab62c27381 100644 (file)
@@ -119,11 +119,7 @@ class CNNBlogsIE(InfoExtractor):
     def _real_extract(self, url):
         webpage = self._download_webpage(url, url_basename(url))
         cnn_url = self._html_search_regex(r'data-url="(.+?)"', webpage, 'cnn url')
-        return {
-            '_type': 'url',
-            'url': cnn_url,
-            'ie_key': CNNIE.ie_key(),
-        }
+        return self.url_result(cnn_url, CNNIE.ie_key())
 
 
 class CNNArticleIE(InfoExtractor):
@@ -145,8 +141,4 @@ class CNNArticleIE(InfoExtractor):
     def _real_extract(self, url):
         webpage = self._download_webpage(url, url_basename(url))
         cnn_url = self._html_search_regex(r"video:\s*'([^']+)'", webpage, 'cnn url')
-        return {
-            '_type': 'url',
-            'url': 'http://cnn.com/video/?/video/' + cnn_url,
-            'ie_key': CNNIE.ie_key(),
-        }
+        return self.url_result('http://cnn.com/video/?/video/' + cnn_url, CNNIE.ie_key())
index e5f8136fc1511d573978298393f5aba5d94d7af8..9e7febcadf12c9d2450f35783e45b4b6d04dbe3f 100644 (file)
@@ -1239,17 +1239,27 @@ class InfoExtractor(object):
                 if expected_type is not None and expected_type != item_type:
                     return info
                 if item_type in ('TVEpisode', 'Episode'):
+                    episode_name = unescapeHTML(e.get('name'))
                     info.update({
-                        'episode': unescapeHTML(e.get('name')),
+                        'episode': episode_name,
                         'episode_number': int_or_none(e.get('episodeNumber')),
                         'description': unescapeHTML(e.get('description')),
                     })
+                    if not info.get('title') and episode_name:
+                        info['title'] = episode_name
                     part_of_season = e.get('partOfSeason')
                     if isinstance(part_of_season, dict) and part_of_season.get('@type') in ('TVSeason', 'Season', 'CreativeWorkSeason'):
                         info['season_number'] = int_or_none(part_of_season.get('seasonNumber'))
                     part_of_series = e.get('partOfSeries') or e.get('partOfTVSeries')
                     if isinstance(part_of_series, dict) and part_of_series.get('@type') in ('TVSeries', 'Series', 'CreativeWorkSeries'):
                         info['series'] = unescapeHTML(part_of_series.get('name'))
+                elif item_type == 'Movie':
+                    info.update({
+                        'title': unescapeHTML(e.get('name')),
+                        'description': unescapeHTML(e.get('description')),
+                        'duration': parse_duration(e.get('duration')),
+                        'timestamp': unified_timestamp(e.get('dateCreated')),
+                    })
                 elif item_type in ('Article', 'NewsArticle'):
                     info.update({
                         'timestamp': parse_iso8601(e.get('datePublished')),
index 8dd9d66872eab57fca0abf82d50db004d7df425a..f73ef6b63c8bf7a138e8dcaf8a35fc2798322aea 100644 (file)
@@ -48,6 +48,21 @@ class CrackleIE(InfoExtractor):
         'only_matching': True,
     }]
 
+    _MEDIA_FILE_SLOTS = {
+        '360p.mp4': {
+            'width': 640,
+            'height': 360,
+        },
+        '480p.mp4': {
+            'width': 768,
+            'height': 432,
+        },
+        '480p_1mbps.mp4': {
+            'width': 852,
+            'height': 480,
+        },
+    }
+
     def _real_extract(self, url):
         video_id = self._match_id(url)
 
@@ -95,6 +110,20 @@ class CrackleIE(InfoExtractor):
                 elif ext == 'mpd':
                     formats.extend(self._extract_mpd_formats(
                         format_url, video_id, mpd_id='dash', fatal=False))
+                elif format_url.endswith('.ism/Manifest'):
+                    formats.extend(self._extract_ism_formats(
+                        format_url, video_id, ism_id='mss', fatal=False))
+                else:
+                    mfs_path = e.get('Type')
+                    mfs_info = self._MEDIA_FILE_SLOTS.get(mfs_path)
+                    if not mfs_info:
+                        continue
+                    formats.append({
+                        'url': format_url,
+                        'format_id': 'http-' + mfs_path.split('.')[0],
+                        'width': mfs_info['width'],
+                        'height': mfs_info['height'],
+                    })
             self._sort_formats(formats)
 
             description = media.get('Description')
index 35b1e7a34e21b634bc1251e122741d53aade77c5..e4a7fca6ce1e7352a8ac2785c1fa3e73445c9cca 100644 (file)
@@ -46,8 +46,24 @@ class CuriosityStreamBaseIE(InfoExtractor):
         self._handle_errors(result)
         self._auth_token = result['message']['auth_token']
 
-    def _extract_media_info(self, media):
-        video_id = compat_str(media['id'])
+
+class CuriosityStreamIE(CuriosityStreamBaseIE):
+    IE_NAME = 'curiositystream'
+    _VALID_URL = r'https?://(?:app\.)?curiositystream\.com/video/(?P<id>\d+)'
+    _TEST = {
+        'url': 'https://app.curiositystream.com/video/2',
+        'md5': '262bb2f257ff301115f1973540de8983',
+        'info_dict': {
+            'id': '2',
+            'ext': 'mp4',
+            'title': 'How Did You Develop The Internet?',
+            'description': 'Vint Cerf, Google\'s Chief Internet Evangelist, describes how he and Bob Kahn created the internet.',
+        }
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        media = self._call_api('media/' + video_id, video_id)
         title = media['title']
 
         formats = []
@@ -114,38 +130,21 @@ class CuriosityStreamBaseIE(InfoExtractor):
         }
 
 
-class CuriosityStreamIE(CuriosityStreamBaseIE):
-    IE_NAME = 'curiositystream'
-    _VALID_URL = r'https?://app\.curiositystream\.com/video/(?P<id>\d+)'
-    _TEST = {
-        'url': 'https://app.curiositystream.com/video/2',
-        'md5': '262bb2f257ff301115f1973540de8983',
-        'info_dict': {
-            'id': '2',
-            'ext': 'mp4',
-            'title': 'How Did You Develop The Internet?',
-            'description': 'Vint Cerf, Google\'s Chief Internet Evangelist, describes how he and Bob Kahn created the internet.',
-        }
-    }
-
-    def _real_extract(self, url):
-        video_id = self._match_id(url)
-        media = self._call_api('media/' + video_id, video_id)
-        return self._extract_media_info(media)
-
-
 class CuriosityStreamCollectionIE(CuriosityStreamBaseIE):
     IE_NAME = 'curiositystream:collection'
-    _VALID_URL = r'https?://app\.curiositystream\.com/collection/(?P<id>\d+)'
-    _TEST = {
+    _VALID_URL = r'https?://(?:app\.)?curiositystream\.com/(?:collection|series)/(?P<id>\d+)'
+    _TESTS = [{
         'url': 'https://app.curiositystream.com/collection/2',
         'info_dict': {
             'id': '2',
             'title': 'Curious Minds: The Internet',
             'description': 'How is the internet shaping our lives in the 21st Century?',
         },
-        'playlist_mincount': 12,
-    }
+        'playlist_mincount': 17,
+    }, {
+        'url': 'https://curiositystream.com/series/2',
+        'only_matching': True,
+    }]
 
     def _real_extract(self, url):
         collection_id = self._match_id(url)
@@ -153,7 +152,10 @@ class CuriosityStreamCollectionIE(CuriosityStreamBaseIE):
             'collections/' + collection_id, collection_id)
         entries = []
         for media in collection.get('media', []):
-            entries.append(self._extract_media_info(media))
+            media_id = compat_str(media.get('id'))
+            entries.append(self.url_result(
+                'https://curiositystream.com/video/' + media_id,
+                CuriosityStreamIE.ie_key(), media_id))
         return self.playlist_result(
             entries, collection_id,
             collection.get('title'), collection.get('description'))
index 3589bd42831515d5d4b16bfd64dbb861830b5173..b70c307a75b520aa63e44fea69762cacc6bbc319 100644 (file)
@@ -17,16 +17,29 @@ from ..compat import compat_HTTPError
 
 
 class DiscoveryIE(DiscoveryGoBaseIE):
-    _VALID_URL = r'''(?x)https?://(?:www\.)?(?P<site>
-            discovery|
-            investigationdiscovery|
-            discoverylife|
-            animalplanet|
-            ahctv|
-            destinationamerica|
-            sciencechannel|
-            tlc|
-            velocity
+    _VALID_URL = r'''(?x)https?://
+        (?P<site>
+            (?:www\.)?
+                (?:
+                    discovery|
+                    investigationdiscovery|
+                    discoverylife|
+                    animalplanet|
+                    ahctv|
+                    destinationamerica|
+                    sciencechannel|
+                    tlc|
+                    velocity
+                )|
+            watch\.
+                (?:
+                    hgtv|
+                    foodnetwork|
+                    travelchannel|
+                    diynetwork|
+                    cookingchanneltv|
+                    motortrend
+                )
         )\.com(?P<path>/tv-shows/[^/]+/(?:video|full-episode)s/(?P<id>[^./?#]+))'''
     _TESTS = [{
         'url': 'https://www.discovery.com/tv-shows/cash-cab/videos/dave-foley',
@@ -71,7 +84,7 @@ class DiscoveryIE(DiscoveryGoBaseIE):
 
         if not access_token:
             access_token = self._download_json(
-                'https://www.%s.com/anonymous' % site, display_id, query={
+                'https://%s.com/anonymous' % site, display_id, query={
                     'authRel': 'authorization',
                     'client_id': try_get(
                         react_data, lambda x: x['application']['apiClientId'],
@@ -81,11 +94,12 @@ class DiscoveryIE(DiscoveryGoBaseIE):
                 })['access_token']
 
         try:
+            headers = self.geo_verification_headers()
+            headers['Authorization'] = 'Bearer ' + access_token
+
             stream = self._download_json(
                 'https://api.discovery.com/v1/streaming/video/' + video_id,
-                display_id, headers={
-                    'Authorization': 'Bearer ' + access_token,
-                })
+                display_id, headers=headers)
         except ExtractorError as e:
             if isinstance(e.cause, compat_HTTPError) and e.cause.code in (401, 403):
                 e_description = self._parse_json(
index 5887887e15ef9065515abb1472e06511e55e32c9..114d2dbe34e94c3d517cd8ea8230379d3d13f908 100644 (file)
@@ -15,16 +15,16 @@ from ..utils import (
 class DTubeIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?d\.tube/(?:#!/)?v/(?P<uploader_id>[0-9a-z.-]+)/(?P<id>[0-9a-z]{8})'
     _TEST = {
-        'url': 'https://d.tube/#!/v/benswann/zqd630em',
-        'md5': 'a03eaa186618ffa7a3145945543a251e',
+        'url': 'https://d.tube/#!/v/broncnutz/x380jtr1',
+        'md5': '9f29088fa08d699a7565ee983f56a06e',
         'info_dict': {
-            'id': 'zqd630em',
+            'id': 'x380jtr1',
             'ext': 'mp4',
-            'title': 'Reality Check: FDA\'s Disinformation Campaign on Kratom',
-            'description': 'md5:700d164e066b87f9eac057949e4227c2',
-            'uploader_id': 'benswann',
-            'upload_date': '20180222',
-            'timestamp': 1519328958,
+            'title': 'Lefty 3-Rings is Back Baby!! NCAA Picks',
+            'description': 'md5:60be222088183be3a42f196f34235776',
+            'uploader_id': 'broncnutz',
+            'upload_date': '20190107',
+            'timestamp': 1546854054,
         },
         'params': {
             'format': '480p',
@@ -48,7 +48,7 @@ class DTubeIE(InfoExtractor):
         def canonical_url(h):
             if not h:
                 return None
-            return 'https://ipfs.io/ipfs/' + h
+            return 'https://video.dtube.top/ipfs/' + h
 
         formats = []
         for q in ('240', '480', '720', '1080', ''):
index e5488cce45e9180d46daddc072ea08e7269c0d9e..de38c6641acb6730c73d3d3975ed2faca1e70914 100644 (file)
@@ -194,6 +194,10 @@ from .chirbit import (
     ChirbitProfileIE,
 )
 from .cinchcast import CinchcastIE
+from .ciscolive import (
+    CiscoLiveSessionIE,
+    CiscoLiveSearchIE,
+)
 from .cjsw import CJSWIE
 from .cliphunter import CliphunterIE
 from .clippit import ClippitIE
@@ -407,6 +411,7 @@ from .funk import (
 from .funnyordie import FunnyOrDieIE
 from .fusion import FusionIE
 from .fxnetworks import FXNetworksIE
+from .gaia import GaiaIE
 from .gameinformer import GameInformerIE
 from .gameone import (
     GameOneIE,
@@ -465,6 +470,10 @@ from .hrti import (
 )
 from .huajiao import HuajiaoIE
 from .huffpost import HuffPostIE
+from .hungama import (
+    HungamaIE,
+    HungamaSongIE,
+)
 from .hypem import HypemIE
 from .iconosquare import IconosquareIE
 from .ign import (
@@ -479,6 +488,7 @@ from .imdb import (
 from .imgur import (
     ImgurIE,
     ImgurAlbumIE,
+    ImgurGalleryIE,
 )
 from .ina import InaIE
 from .inc import IncIE
@@ -549,6 +559,11 @@ from .lcp import (
 )
 from .learnr import LearnrIE
 from .lecture2go import Lecture2GoIE
+from .lecturio import (
+    LecturioIE,
+    LecturioCourseIE,
+    LecturioDeCourseIE,
+)
 from .leeco import (
     LeIE,
     LePlaylistIE,
@@ -672,11 +687,7 @@ from .myvi import (
     MyviEmbedIE,
 )
 from .myvidster import MyVidsterIE
-from .nationalgeographic import (
-    NationalGeographicVideoIE,
-    NationalGeographicIE,
-    NationalGeographicEpisodeGuideIE,
-)
+from .nationalgeographic import NationalGeographicVideoIE
 from .naver import NaverIE
 from .nba import NBAIE
 from .nbc import (
@@ -818,6 +829,7 @@ from .orf import (
     ORFOE1IE,
     ORFIPTVIE,
 )
+from .outsidetv import OutsideTVIE
 from .packtpub import (
     PacktPubIE,
     PacktPubCourseIE,
@@ -846,6 +858,7 @@ from .piksel import PikselIE
 from .pinkbike import PinkbikeIE
 from .pladform import PladformIE
 from .playfm import PlayFMIE
+from .playplustv import PlayPlusTVIE
 from .plays import PlaysTVIE
 from .playtvak import PlaytvakIE
 from .playvid import PlayvidIE
@@ -1082,6 +1095,10 @@ from .tass import TassIE
 from .tastytrade import TastyTradeIE
 from .tbs import TBSIE
 from .tdslifeway import TDSLifewayIE
+from .teachable import (
+    TeachableIE,
+    TeachableCourseIE,
+)
 from .teachertube import (
     TeacherTubeIE,
     TeacherTubeUserIE,
@@ -1120,6 +1137,10 @@ from .thisamericanlife import ThisAmericanLifeIE
 from .thisav import ThisAVIE
 from .thisoldhouse import ThisOldHouseIE
 from .threeqsdn import ThreeQSDNIE
+from .tiktok import (
+    TikTokIE,
+    TikTokUserIE,
+)
 from .tinypic import TinyPicIE
 from .tmz import (
     TMZIE,
@@ -1175,7 +1196,9 @@ from .tvnet import TVNetIE
 from .tvnoe import TVNoeIE
 from .tvnow import (
     TVNowIE,
-    TVNowListIE,
+    TVNowNewIE,
+    TVNowSeasonIE,
+    TVNowAnnualIE,
     TVNowShowIE,
 )
 from .tvp import (
@@ -1227,10 +1250,6 @@ from .uplynk import (
     UplynkIE,
     UplynkPreplayIE,
 )
-from .upskill import (
-    UpskillIE,
-    UpskillCourseIE,
-)
 from .urort import UrortIE
 from .urplay import URPlayIE
 from .usanetwork import USANetworkIE
@@ -1299,6 +1318,7 @@ from .vimeo import (
     VimeoReviewIE,
     VimeoUserIE,
     VimeoWatchLaterIE,
+    VHXEmbedIE,
 )
 from .vimple import VimpleIE
 from .vine import (
@@ -1386,6 +1406,7 @@ from .wsj import (
     WSJIE,
     WSJArticleIE,
 )
+from .wwe import WWEIE
 from .xbef import XBefIE
 from .xboxclips import XboxClipsIE
 from .xfileshare import XFileShareIE
@@ -1478,3 +1499,4 @@ from .zattoo import (
 )
 from .zdf import ZDFIE, ZDFChannelIE
 from .zingmp3 import ZingMp3IE
+from .zype import ZypeIE
index 11d6c9c3251ff86c6ea26a27a83dec064963ac2c..b1c91f0950986df6f7d25c674e3b05da3dfe3975 100644 (file)
@@ -1,11 +1,11 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
+# import json
+# import uuid
+
 from .adobepass import AdobePassIE
-from .uplynk import UplynkPreplayIE
-from ..compat import compat_str
 from ..utils import (
-    HEADRequest,
     int_or_none,
     parse_age_limit,
     parse_duration,
@@ -16,7 +16,7 @@ from ..utils import (
 
 
 class FOXIE(AdobePassIE):
-    _VALID_URL = r'https?://(?:www\.)?fox\.com/watch/(?P<id>[\da-fA-F]+)'
+    _VALID_URL = r'https?://(?:www\.)?(?:fox\.com|nationalgeographic\.com/tv)/watch/(?P<id>[\da-fA-F]+)'
     _TESTS = [{
         # clip
         'url': 'https://www.fox.com/watch/4b765a60490325103ea69888fb2bd4e8/',
@@ -43,41 +43,47 @@ class FOXIE(AdobePassIE):
         # episode, geo-restricted, tv provided required
         'url': 'https://www.fox.com/watch/30056b295fb57f7452aeeb4920bc3024/',
         'only_matching': True,
+    }, {
+        'url': 'https://www.nationalgeographic.com/tv/watch/f690e05ebbe23ab79747becd0cc223d1/',
+        'only_matching': True,
     }]
+    # _access_token = None
+
+    # def _call_api(self, path, video_id, data=None):
+    #     headers = {
+    #         'X-Api-Key': '238bb0a0c2aba67922c48709ce0c06fd',
+    #     }
+    #     if self._access_token:
+    #         headers['Authorization'] = 'Bearer ' + self._access_token
+    #     return self._download_json(
+    #         'https://api2.fox.com/v2.0/' + path, video_id, data=data, headers=headers)
+
+    # def _real_initialize(self):
+    #     self._access_token = self._call_api(
+    #         'login', None, json.dumps({
+    #             'deviceId': compat_str(uuid.uuid4()),
+    #         }).encode())['accessToken']
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
 
         video = self._download_json(
-            'https://api.fox.com/fbc-content/v1_4/video/%s' % video_id,
+            'https://api.fox.com/fbc-content/v1_5/video/%s' % video_id,
             video_id, headers={
                 'apikey': 'abdcbed02c124d393b39e818a4312055',
                 'Content-Type': 'application/json',
                 'Referer': url,
             })
+        # video = self._call_api('vodplayer/' + video_id, video_id)
 
         title = video['name']
         release_url = video['videoRelease']['url']
-
-        description = video.get('description')
-        duration = int_or_none(video.get('durationInSeconds')) or int_or_none(
-            video.get('duration')) or parse_duration(video.get('duration'))
-        timestamp = unified_timestamp(video.get('datePublished'))
-        rating = video.get('contentRating')
-        age_limit = parse_age_limit(rating)
+        # release_url = video['url']
 
         data = try_get(
             video, lambda x: x['trackingData']['properties'], dict) or {}
 
-        creator = data.get('brand') or data.get('network') or video.get('network')
-
-        series = video.get('seriesName') or data.get(
-            'seriesName') or data.get('show')
-        season_number = int_or_none(video.get('seasonNumber'))
-        episode = video.get('name')
-        episode_number = int_or_none(video.get('episodeNumber'))
-        release_year = int_or_none(video.get('releaseYear'))
-
+        rating = video.get('contentRating')
         if data.get('authRequired'):
             resource = self._get_mvpd_resource(
                 'fbc-fox', title, video.get('guid'), rating)
@@ -86,6 +92,18 @@ class FOXIE(AdobePassIE):
                     'auth': self._extract_mvpd_auth(
                         url, video_id, 'fbc-fox', resource)
                 })
+        m3u8_url = self._download_json(release_url, video_id)['playURL']
+        formats = self._extract_m3u8_formats(
+            m3u8_url, video_id, 'mp4',
+            entry_protocol='m3u8_native', m3u8_id='hls')
+        self._sort_formats(formats)
+
+        duration = int_or_none(video.get('durationInSeconds')) or int_or_none(
+            video.get('duration')) or parse_duration(video.get('duration'))
+        timestamp = unified_timestamp(video.get('datePublished'))
+        creator = data.get('brand') or data.get('network') or video.get('network')
+        series = video.get('seriesName') or data.get(
+            'seriesName') or data.get('show')
 
         subtitles = {}
         for doc_rel in video.get('documentReleases', []):
@@ -98,36 +116,19 @@ class FOXIE(AdobePassIE):
             }]
             break
 
-        info = {
+        return {
             'id': video_id,
             'title': title,
-            'description': description,
+            'formats': formats,
+            'description': video.get('description'),
             'duration': duration,
             'timestamp': timestamp,
-            'age_limit': age_limit,
+            'age_limit': parse_age_limit(rating),
             'creator': creator,
             'series': series,
-            'season_number': season_number,
-            'episode': episode,
-            'episode_number': episode_number,
-            'release_year': release_year,
+            'season_number': int_or_none(video.get('seasonNumber')),
+            'episode': video.get('name'),
+            'episode_number': int_or_none(video.get('episodeNumber')),
+            'release_year': int_or_none(video.get('releaseYear')),
             'subtitles': subtitles,
         }
-
-        urlh = self._request_webpage(HEADRequest(release_url), video_id)
-        video_url = compat_str(urlh.geturl())
-
-        if UplynkPreplayIE.suitable(video_url):
-            info.update({
-                '_type': 'url_transparent',
-                'url': video_url,
-                'ie_key': UplynkPreplayIE.ie_key(),
-            })
-        else:
-            m3u8_url = self._download_json(release_url, video_id)['playURL']
-            formats = self._extract_m3u8_formats(
-                m3u8_url, video_id, 'mp4',
-                entry_protocol='m3u8_native', m3u8_id='hls')
-            self._sort_formats(formats)
-            info['formats'] = formats
-        return info
index 985542727e4273dc1ee379dc2e82e12a580517c2..2b2cb6c6f682681a2e42cd2cc07266b583f1316a 100644 (file)
@@ -1,43 +1,33 @@
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
-from ..utils import (
-    smuggle_url,
-    update_url_query,
-)
 
 
 class FoxSportsIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?foxsports\.com/(?:[^/]+/)*(?P<id>[^/]+)'
+    _VALID_URL = r'https?://(?:www\.)?foxsports\.com/(?:[^/]+/)*video/(?P<id>\d+)'
 
     _TEST = {
         'url': 'http://www.foxsports.com/tennessee/video/432609859715',
         'md5': 'b49050e955bebe32c301972e4012ac17',
         'info_dict': {
-            'id': 'bwduI3X_TgUB',
+            'id': '432609859715',
             'ext': 'mp4',
             'title': 'Courtney Lee on going up 2-0 in series vs. Blazers',
             'description': 'Courtney Lee talks about Memphis being focused.',
-            'upload_date': '20150423',
-            'timestamp': 1429761109,
+            # TODO: fix timestamp
+            'upload_date': '19700101',  # '20150423',
+            # 'timestamp': 1429761109,
             'uploader': 'NEWA-FNG-FOXSPORTS',
         },
+        'params': {
+            # m3u8 download
+            'skip_download': True,
+        },
         'add_ie': ['ThePlatform'],
     }
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
 
-        webpage = self._download_webpage(url, video_id)
-
-        config = self._parse_json(
-            self._html_search_regex(
-                r"""class="[^"]*(?:fs-player|platformPlayer-wrapper)[^"]*".+?data-player-config='([^']+)'""",
-                webpage, 'data player config'),
-            video_id)
-
-        return self.url_result(smuggle_url(update_url_query(
-            config['releaseURL'], {
-                'mbr': 'true',
-                'switch': 'http',
-            }), {'force_smil_url': True}))
+        return self.url_result(
+            'https://feed.theplatform.com/f/BKQ29B/foxsports-all?byId=' + video_id, 'ThePlatformFeed')
index 486a49c05271c37280b059fb3a6c6bb4fd0de3ed..ea9c3e317ce5c162bfb7d46691f65ba3cd38ceb8 100644 (file)
@@ -1,6 +1,7 @@
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
+from .youtube import YoutubeIE
 
 
 class FreespeechIE(InfoExtractor):
@@ -27,8 +28,4 @@ class FreespeechIE(InfoExtractor):
             r'data-video-url="([^"]+)"',
             webpage, 'youtube url')
 
-        return {
-            '_type': 'url',
-            'url': youtube_url,
-            'ie_key': 'Youtube',
-        }
+        return self.url_result(youtube_url, YoutubeIE.ie_key())
index 07d01caecfe6a1cc9bde8e23eb8d3955cdeda62c..8bbedca269233b2ba4bdd02febf7d8e63007feb4 100644 (file)
@@ -1,6 +1,9 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
+import random
+import string
+
 from .common import InfoExtractor
 from ..compat import compat_HTTPError
 from ..utils import (
@@ -87,7 +90,7 @@ class FunimationIE(InfoExtractor):
 
         video_id = title_data.get('id') or self._search_regex([
             r"KANE_customdimensions.videoID\s*=\s*'(\d+)';",
-            r'<iframe[^>]+src="/player/(\d+)"',
+            r'<iframe[^>]+src="/player/(\d+)',
         ], webpage, 'video_id', default=None)
         if not video_id:
             player_url = self._html_search_meta([
@@ -108,8 +111,10 @@ class FunimationIE(InfoExtractor):
             if self._TOKEN:
                 headers['Authorization'] = 'Token %s' % self._TOKEN
             sources = self._download_json(
-                'https://prod-api-funimationnow.dadcdigital.com/api/source/catalog/video/%s/signed/' % video_id,
-                video_id, headers=headers)['items']
+                'https://www.funimation.com/api/showexperience/%s/' % video_id,
+                video_id, headers=headers, query={
+                    'pinst_id': ''.join([random.choice(string.digits + string.ascii_letters) for _ in range(8)]),
+                })['items']
         except ExtractorError as e:
             if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
                 error = self._parse_json(e.cause.read(), video_id)['errors'][0]
diff --git a/youtube_dl/extractor/gaia.py b/youtube_dl/extractor/gaia.py
new file mode 100644 (file)
index 0000000..f2eef3f
--- /dev/null
@@ -0,0 +1,98 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+    int_or_none,
+    str_or_none,
+    strip_or_none,
+    try_get,
+)
+
+
+class GaiaIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?gaia\.com/video/(?P<id>[^/?]+).*?\bfullplayer=(?P<type>feature|preview)'
+    _TESTS = [{
+        'url': 'https://www.gaia.com/video/connecting-universal-consciousness?fullplayer=feature',
+        'info_dict': {
+            'id': '89356',
+            'ext': 'mp4',
+            'title': 'Connecting with Universal Consciousness',
+            'description': 'md5:844e209ad31b7d31345f5ed689e3df6f',
+            'upload_date': '20151116',
+            'timestamp': 1447707266,
+            'duration': 936,
+        },
+        'params': {
+            # m3u8 download
+            'skip_download': True,
+        },
+    }, {
+        'url': 'https://www.gaia.com/video/connecting-universal-consciousness?fullplayer=preview',
+        'info_dict': {
+            'id': '89351',
+            'ext': 'mp4',
+            'title': 'Connecting with Universal Consciousness',
+            'description': 'md5:844e209ad31b7d31345f5ed689e3df6f',
+            'upload_date': '20151116',
+            'timestamp': 1447707266,
+            'duration': 53,
+        },
+        'params': {
+            # m3u8 download
+            'skip_download': True,
+        },
+    }]
+
+    def _real_extract(self, url):
+        display_id, vtype = re.search(self._VALID_URL, url).groups()
+        node_id = self._download_json(
+            'https://brooklyn.gaia.com/pathinfo', display_id, query={
+                'path': 'video/' + display_id,
+            })['id']
+        node = self._download_json(
+            'https://brooklyn.gaia.com/node/%d' % node_id, node_id)
+        vdata = node[vtype]
+        media_id = compat_str(vdata['nid'])
+        title = node['title']
+
+        media = self._download_json(
+            'https://brooklyn.gaia.com/media/' + media_id, media_id)
+        formats = self._extract_m3u8_formats(
+            media['mediaUrls']['bcHLS'], media_id, 'mp4')
+        self._sort_formats(formats)
+
+        subtitles = {}
+        text_tracks = media.get('textTracks', {})
+        for key in ('captions', 'subtitles'):
+            for lang, sub_url in text_tracks.get(key, {}).items():
+                subtitles.setdefault(lang, []).append({
+                    'url': sub_url,
+                })
+
+        fivestar = node.get('fivestar', {})
+        fields = node.get('fields', {})
+
+        def get_field_value(key, value_key='value'):
+            return try_get(fields, lambda x: x[key][0][value_key])
+
+        return {
+            'id': media_id,
+            'display_id': display_id,
+            'title': title,
+            'formats': formats,
+            'description': strip_or_none(get_field_value('body') or get_field_value('teaser')),
+            'timestamp': int_or_none(node.get('created')),
+            'subtitles': subtitles,
+            'duration': int_or_none(vdata.get('duration')),
+            'like_count': int_or_none(try_get(fivestar, lambda x: x['up_count']['value'])),
+            'dislike_count': int_or_none(try_get(fivestar, lambda x: x['down_count']['value'])),
+            'comment_count': int_or_none(node.get('comment_count')),
+            'series': try_get(node, lambda x: x['series']['title'], compat_str),
+            'season_number': int_or_none(get_field_value('season')),
+            'season_id': str_or_none(get_field_value('series_nid', 'nid')),
+            'episode_number': int_or_none(get_field_value('episode')),
+        }
index ab647dd4154cdc996c455ca45a5d6fa9d1e20f70..4236a5ed8a9bd638c31cfe33f040c555268c937f 100644 (file)
@@ -14,7 +14,7 @@ from ..utils import (
 
 
 class GameSpotIE(OnceIE):
-    _VALID_URL = r'https?://(?:www\.)?gamespot\.com/(?:video|article)s/(?:[^/]+/\d+-|embed/)(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:www\.)?gamespot\.com/(?:video|article|review)s/(?:[^/]+/\d+-|embed/)(?P<id>\d+)'
     _TESTS = [{
         'url': 'http://www.gamespot.com/videos/arma-3-community-guide-sitrep-i/2300-6410818/',
         'md5': 'b2a30deaa8654fcccd43713a6b6a4825',
@@ -41,6 +41,9 @@ class GameSpotIE(OnceIE):
     }, {
         'url': 'https://www.gamespot.com/articles/the-last-of-us-2-receives-new-ps4-trailer/1100-6454469/',
         'only_matching': True,
+    }, {
+        'url': 'https://www.gamespot.com/reviews/gears-of-war-review/1900-6161188/',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
index 545e033711995d04544aa1dafea11110084fa58f..067de28cd16b83f20717aca02f77e25051ac222d 100644 (file)
@@ -109,11 +109,13 @@ from .vice import ViceIE
 from .xfileshare import XFileShareIE
 from .cloudflarestream import CloudflareStreamIE
 from .peertube import PeerTubeIE
+from .teachable import TeachableIE
 from .indavideo import IndavideoEmbedIE
 from .apa import APAIE
 from .foxnews import FoxNewsIE
 from .viqeo import ViqeoIE
 from .expressen import ExpressenIE
+from .zype import ZypeIE
 
 
 class GenericIE(InfoExtractor):
@@ -2070,6 +2072,20 @@ class GenericIE(InfoExtractor):
             },
             'playlist_count': 6,
         },
+        {
+            # Zype embed
+            'url': 'https://www.cookscountry.com/episode/554-smoky-barbecue-favorites',
+            'info_dict': {
+                'id': '5b400b834b32992a310622b9',
+                'ext': 'mp4',
+                'title': 'Smoky Barbecue Favorites',
+                'thumbnail': r're:^https?://.*\.jpe?g',
+            },
+            'add_ie': [ZypeIE.ie_key()],
+            'params': {
+                'skip_download': True,
+            },
+        },
         {
             # videojs embed
             'url': 'https://video.sibnet.ru/shell.php?videoid=3422904',
@@ -2181,10 +2197,7 @@ class GenericIE(InfoExtractor):
 
     def _real_extract(self, url):
         if url.startswith('//'):
-            return {
-                '_type': 'url',
-                'url': self.http_scheme() + url,
-            }
+            return self.url_result(self.http_scheme() + url)
 
         parsed_url = compat_urlparse.urlparse(url)
         if not parsed_url.scheme:
@@ -3097,6 +3110,10 @@ class GenericIE(InfoExtractor):
             return self.playlist_from_matches(
                 peertube_urls, video_id, video_title, ie=PeerTubeIE.ie_key())
 
+        teachable_url = TeachableIE._extract_url(webpage, url)
+        if teachable_url:
+            return self.url_result(teachable_url)
+
         indavideo_urls = IndavideoEmbedIE._extract_urls(webpage)
         if indavideo_urls:
             return self.playlist_from_matches(
@@ -3129,6 +3146,11 @@ class GenericIE(InfoExtractor):
             return self.playlist_from_matches(
                 expressen_urls, video_id, video_title, ie=ExpressenIE.ie_key())
 
+        zype_urls = ZypeIE._extract_urls(webpage)
+        if zype_urls:
+            return self.playlist_from_matches(
+                zype_urls, video_id, video_title, ie=ZypeIE.ie_key())
+
         # Look for HTML5 media
         entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls')
         if entries:
index a0670b6456adf7e092dadbfc14d193f920aa0262..c1b36a59b51e9885b99535f19eef508c5e39bcaa 100644 (file)
@@ -53,7 +53,7 @@ class GfycatIE(InfoExtractor):
         video_id = self._match_id(url)
 
         gfy = self._download_json(
-            'http://gfycat.com/cajax/get/%s' % video_id,
+            'https://api.gfycat.com/v1/gfycats/%s' % video_id,
             video_id, 'Downloading video info')
         if 'error' in gfy:
             raise ExtractorError('Gfycat said: ' + gfy['error'], expected=True)
index c2140c36274b0bd0b82a59a81eb35aa6aca9ab9e..fb8f7679b0366ad7accdf802af34a644b03aea01 100644 (file)
@@ -72,7 +72,7 @@ class GloboIE(InfoExtractor):
             return
 
         try:
-            self._download_json(
+            glb_id = (self._download_json(
                 'https://login.globo.com/api/authentication', None, data=json.dumps({
                     'payload': {
                         'email': email,
@@ -81,7 +81,9 @@ class GloboIE(InfoExtractor):
                     },
                 }).encode(), headers={
                     'Content-Type': 'application/json; charset=utf-8',
-                })
+                }) or {}).get('glbId')
+            if glb_id:
+                self._set_cookie('.globo.com', 'GLBID', glb_id)
         except ExtractorError as e:
             if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
                 resp = self._parse_json(e.cause.read(), None)
index bf5717f1bf1f8366e45222e716951fb11069e8f6..8de9c4fafb852456ba772090f50ccdef3c5065d2 100644 (file)
@@ -43,6 +43,7 @@ class HotStarIE(HotStarBaseIE):
     IE_NAME = 'hotstar'
     _VALID_URL = r'https?://(?:www\.)?hotstar\.com/(?:.+?[/-])?(?P<id>\d{10})'
     _TESTS = [{
+        # contentData
         'url': 'https://www.hotstar.com/can-you-not-spread-rumours/1000076273',
         'info_dict': {
             'id': '1000076273',
@@ -57,6 +58,10 @@ class HotStarIE(HotStarBaseIE):
             # m3u8 download
             'skip_download': True,
         }
+    }, {
+        # contentDetail
+        'url': 'https://www.hotstar.com/movies/radha-gopalam/1000057157',
+        'only_matching': True,
     }, {
         'url': 'http://www.hotstar.com/sports/cricket/rajitha-sizzles-on-debut-with-329/2001477583',
         'only_matching': True,
@@ -74,10 +79,15 @@ class HotStarIE(HotStarBaseIE):
             r'<script>window\.APP_STATE\s*=\s*({.+?})</script>',
             webpage, 'app state'), video_id)
         video_data = {}
+        getters = list(
+            lambda x, k=k: x['initialState']['content%s' % k]['content']
+            for k in ('Data', 'Detail')
+        )
         for v in app_state.values():
-            content = try_get(v, lambda x: x['initialState']['contentData']['content'], dict)
+            content = try_get(v, getters, dict)
             if content and content.get('contentId') == video_id:
                 video_data = content
+                break
 
         title = video_data['title']
 
diff --git a/youtube_dl/extractor/hungama.py b/youtube_dl/extractor/hungama.py
new file mode 100644 (file)
index 0000000..3fdaac5
--- /dev/null
@@ -0,0 +1,117 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+    int_or_none,
+    urlencode_postdata,
+)
+
+
+class HungamaIE(InfoExtractor):
+    _VALID_URL = r'''(?x)
+                    https?://
+                        (?:www\.)?hungama\.com/
+                        (?:
+                            (?:video|movie)/[^/]+/|
+                            tv-show/(?:[^/]+/){2}\d+/episode/[^/]+/
+                        )
+                        (?P<id>\d+)
+                    '''
+    _TESTS = [{
+        'url': 'http://www.hungama.com/video/krishna-chants/39349649/',
+        'md5': 'a845a6d1ebd08d80c1035126d49bd6a0',
+        'info_dict': {
+            'id': '2931166',
+            'ext': 'mp4',
+            'title': 'Lucky Ali - Kitni Haseen Zindagi',
+            'track': 'Kitni Haseen Zindagi',
+            'artist': 'Lucky Ali',
+            'album': 'Aks',
+            'release_year': 2000,
+        }
+    }, {
+        'url': 'https://www.hungama.com/movie/kahaani-2/44129919/',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.hungama.com/tv-show/padded-ki-pushup/season-1/44139461/episode/ep-02-training-sasu-pathlaag-karing/44139503/',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id)
+
+        info = self._search_json_ld(webpage, video_id)
+
+        m3u8_url = self._download_json(
+            'https://www.hungama.com/index.php', video_id,
+            data=urlencode_postdata({'content_id': video_id}), headers={
+                'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
+                'X-Requested-With': 'XMLHttpRequest',
+            }, query={
+                'c': 'common',
+                'm': 'get_video_mdn_url',
+            })['stream_url']
+
+        formats = self._extract_m3u8_formats(
+            m3u8_url, video_id, ext='mp4', entry_protocol='m3u8_native',
+            m3u8_id='hls')
+        self._sort_formats(formats)
+
+        info.update({
+            'id': video_id,
+            'formats': formats,
+        })
+        return info
+
+
+class HungamaSongIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?hungama\.com/song/[^/]+/(?P<id>\d+)'
+    _TEST = {
+        'url': 'https://www.hungama.com/song/kitni-haseen-zindagi/2931166/',
+        'md5': 'a845a6d1ebd08d80c1035126d49bd6a0',
+        'info_dict': {
+            'id': '2931166',
+            'ext': 'mp4',
+            'title': 'Lucky Ali - Kitni Haseen Zindagi',
+            'track': 'Kitni Haseen Zindagi',
+            'artist': 'Lucky Ali',
+            'album': 'Aks',
+            'release_year': 2000,
+        }
+    }
+
+    def _real_extract(self, url):
+        audio_id = self._match_id(url)
+
+        data = self._download_json(
+            'https://www.hungama.com/audio-player-data/track/%s' % audio_id,
+            audio_id, query={'_country': 'IN'})[0]
+
+        track = data['song_name']
+        artist = data.get('singer_name')
+
+        m3u8_url = self._download_json(
+            data.get('file') or data['preview_link'],
+            audio_id)['response']['media_url']
+
+        formats = self._extract_m3u8_formats(
+            m3u8_url, audio_id, ext='mp4', entry_protocol='m3u8_native',
+            m3u8_id='hls')
+        self._sort_formats(formats)
+
+        title = '%s - %s' % (artist, track) if artist else track
+        thumbnail = data.get('img_src') or data.get('album_image')
+
+        return {
+            'id': audio_id,
+            'title': title,
+            'thumbnail': thumbnail,
+            'track': track,
+            'artist': artist,
+            'album': data.get('album_name'),
+            'release_year': int_or_none(data.get('date')),
+            'formats': formats,
+        }
index ecc958a1717d35ad37cc68c3b44147f3d0db6161..0eb54db3f0ea34cfda6cd2be34efa2c9e1040cbf 100644 (file)
@@ -12,7 +12,7 @@ from ..utils import (
 
 
 class ImgurIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:i\.)?imgur\.com/(?:(?:gallery|(?:topic|r)/[^/]+)/)?(?P<id>[a-zA-Z0-9]{6,})(?:[/?#&]+|\.[a-z0-9]+)?$'
+    _VALID_URL = r'https?://(?:i\.)?imgur\.com/(?!(?:a|gallery|(?:t(?:opic)?|r)/[^/]+)/)(?P<id>[a-zA-Z0-9]+)'
 
     _TESTS = [{
         'url': 'https://i.imgur.com/A61SaA1.gifv',
@@ -20,28 +20,9 @@ class ImgurIE(InfoExtractor):
             'id': 'A61SaA1',
             'ext': 'mp4',
             'title': 're:Imgur GIF$|MRW gifv is up and running without any bugs$',
-            'description': 'Imgur: The magic of the Internet',
         },
     }, {
         'url': 'https://imgur.com/A61SaA1',
-        'info_dict': {
-            'id': 'A61SaA1',
-            'ext': 'mp4',
-            'title': 're:Imgur GIF$|MRW gifv is up and running without any bugs$',
-            'description': 'Imgur: The magic of the Internet',
-        },
-    }, {
-        'url': 'https://imgur.com/gallery/YcAQlkx',
-        'info_dict': {
-            'id': 'YcAQlkx',
-            'ext': 'mp4',
-            'title': 'Classic Steve Carell gif...cracks me up everytime....damn the repost downvotes....',
-        }
-    }, {
-        'url': 'http://imgur.com/topic/Funny/N8rOudd',
-        'only_matching': True,
-    }, {
-        'url': 'http://imgur.com/r/aww/VQcQPhM',
         'only_matching': True,
     }, {
         'url': 'https://i.imgur.com/crGpqCV.mp4',
@@ -50,8 +31,8 @@ class ImgurIE(InfoExtractor):
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
-        gifv_url = 'https://i.imgur.com/{id}.gifv'.format(id=video_id)
-        webpage = self._download_webpage(gifv_url, video_id)
+        webpage = self._download_webpage(
+            'https://i.imgur.com/{id}.gifv'.format(id=video_id), video_id)
 
         width = int_or_none(self._og_search_property(
             'video:width', webpage, default=None))
@@ -72,7 +53,6 @@ class ImgurIE(InfoExtractor):
                 'format_id': m.group('type').partition('/')[2],
                 'url': self._proto_relative_url(m.group('src')),
                 'ext': mimetype2ext(m.group('type')),
-                'acodec': 'none',
                 'width': width,
                 'height': height,
                 'http_headers': {
@@ -107,44 +87,64 @@ class ImgurIE(InfoExtractor):
         return {
             'id': video_id,
             'formats': formats,
-            'description': self._og_search_description(webpage, default=None),
             'title': self._og_search_title(webpage),
         }
 
 
-class ImgurAlbumIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:i\.)?imgur\.com/(?:(?:a|gallery|topic/[^/]+)/)?(?P<id>[a-zA-Z0-9]{5})(?:[/?#&]+)?$'
+class ImgurGalleryIE(InfoExtractor):
+    IE_NAME = 'imgur:gallery'
+    _VALID_URL = r'https?://(?:i\.)?imgur\.com/(?:gallery|(?:t(?:opic)?|r)/[^/]+)/(?P<id>[a-zA-Z0-9]+)'
 
     _TESTS = [{
         'url': 'http://imgur.com/gallery/Q95ko',
         'info_dict': {
             'id': 'Q95ko',
+            'title': 'Adding faces make every GIF better',
         },
         'playlist_count': 25,
     }, {
-        'url': 'http://imgur.com/a/j6Orj',
+        'url': 'http://imgur.com/topic/Aww/ll5Vk',
         'only_matching': True,
     }, {
-        'url': 'http://imgur.com/topic/Aww/ll5Vk',
+        'url': 'https://imgur.com/gallery/YcAQlkx',
+        'info_dict': {
+            'id': 'YcAQlkx',
+            'ext': 'mp4',
+            'title': 'Classic Steve Carell gif...cracks me up everytime....damn the repost downvotes....',
+        }
+    }, {
+        'url': 'http://imgur.com/topic/Funny/N8rOudd',
+        'only_matching': True,
+    }, {
+        'url': 'http://imgur.com/r/aww/VQcQPhM',
         'only_matching': True,
     }]
 
     def _real_extract(self, url):
-        album_id = self._match_id(url)
-
-        album_images = self._download_json(
-            'http://imgur.com/gallery/%s/album_images/hit.json?all=true' % album_id,
-            album_id, fatal=False)
-
-        if album_images:
-            data = album_images.get('data')
-            if data and isinstance(data, dict):
-                images = data.get('images')
-                if images and isinstance(images, list):
-                    entries = [
-                        self.url_result('http://imgur.com/%s' % image['hash'])
-                        for image in images if image.get('hash')]
-                    return self.playlist_result(entries, album_id)
-
-        # Fallback to single video
-        return self.url_result('http://imgur.com/%s' % album_id, ImgurIE.ie_key())
+        gallery_id = self._match_id(url)
+
+        data = self._download_json(
+            'https://imgur.com/gallery/%s.json' % gallery_id,
+            gallery_id)['data']['image']
+
+        if data.get('is_album'):
+            entries = [
+                self.url_result('http://imgur.com/%s' % image['hash'], ImgurIE.ie_key(), image['hash'])
+                for image in data['album_images']['images'] if image.get('hash')]
+            return self.playlist_result(entries, gallery_id, data.get('title'), data.get('description'))
+
+        return self.url_result('http://imgur.com/%s' % gallery_id, ImgurIE.ie_key(), gallery_id)
+
+
+class ImgurAlbumIE(ImgurGalleryIE):
+    IE_NAME = 'imgur:album'
+    _VALID_URL = r'https?://(?:i\.)?imgur\.com/a/(?P<id>[a-zA-Z0-9]+)'
+
+    _TESTS = [{
+        'url': 'http://imgur.com/a/j6Orj',
+        'info_dict': {
+            'id': 'j6Orj',
+            'title': 'A Literary Analysis of "Star Wars: The Force Awakens"',
+        },
+        'playlist_count': 12,
+    }]
index 1d58d6e850724f226d66f5822777010f8a8b8d38..11bbeb5922a9d85e05977196c822a076c8b45ec3 100644 (file)
@@ -12,7 +12,7 @@ from ..utils import (
 
 
 class IPrimaIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:play|prima)\.iprima\.cz/(?:.+/)?(?P<id>[^?#]+)'
+    _VALID_URL = r'https?://(?:[^/]+)\.iprima\.cz/(?:[^/]+/)*(?P<id>[^/?#&]+)'
     _GEO_BYPASS = False
 
     _TESTS = [{
@@ -41,6 +41,24 @@ class IPrimaIE(InfoExtractor):
         # iframe prima.iprima.cz
         'url': 'https://prima.iprima.cz/porady/jak-se-stavi-sen/rodina-rathousova-praha',
         'only_matching': True,
+    }, {
+        'url': 'http://www.iprima.cz/filmy/desne-rande',
+        'only_matching': True,
+    }, {
+        'url': 'https://zoom.iprima.cz/10-nejvetsich-tajemstvi-zahad/posvatna-mista-a-stavby',
+        'only_matching': True,
+    }, {
+        'url': 'https://krimi.iprima.cz/mraz-0/sebevrazdy',
+        'only_matching': True,
+    }, {
+        'url': 'https://cool.iprima.cz/derava-silnice-nevadi',
+        'only_matching': True,
+    }, {
+        'url': 'https://love.iprima.cz/laska-az-za-hrob/slib-dany-bratrovi',
+        'only_matching': True,
+    }, {
+        'url': 'https://autosalon.iprima.cz/motorsport/7-epizoda-1',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
index d9f8dbfd240db90fac3ddcfd23dd76d095cef52a..62b28e9809856abaca23c4690c4670cacc96965a 100644 (file)
@@ -61,7 +61,7 @@ class JojIE(InfoExtractor):
 \r
         bitrates = self._parse_json(\r
             self._search_regex(\r
-                r'(?s)bitrates\s*=\s*({.+?});', webpage, 'bitrates',\r
+                r'(?s)(?:src|bitrates)\s*=\s*({.+?});', webpage, 'bitrates',\r
                 default='{}'),\r
             video_id, transform_source=js_to_json, fatal=False)\r
 \r
index 63d0dc998cf1cf281dda3c27f3afaae84f4906c9..d19a6a774d2ef198bdc6e42ba4bae8289e7980f5 100644 (file)
@@ -7,8 +7,8 @@ from .common import InfoExtractor
 
 
 class JWPlatformIE(InfoExtractor):
-    _VALID_URL = r'(?:https?://content\.jwplatform\.com/(?:feeds|players|jw6)/|jwplatform:)(?P<id>[a-zA-Z0-9]{8})'
-    _TEST = {
+    _VALID_URL = r'(?:https?://(?:content\.jwplatform|cdn\.jwplayer)\.com/(?:(?:feed|player|thumb|preview|video|manifest)s|jw6|v2/media)/|jwplatform:)(?P<id>[a-zA-Z0-9]{8})'
+    _TESTS = [{
         'url': 'http://content.jwplatform.com/players/nPripu9l-ALJ3XQCI.js',
         'md5': 'fa8899fa601eb7c83a64e9d568bdf325',
         'info_dict': {
@@ -19,7 +19,10 @@ class JWPlatformIE(InfoExtractor):
             'upload_date': '20081127',
             'timestamp': 1227796140,
         }
-    }
+    }, {
+        'url': 'https://cdn.jwplayer.com/players/nPripu9l-ALJ3XQCI.js',
+        'only_matching': True,
+    }]
 
     @staticmethod
     def _extract_url(webpage):
@@ -34,5 +37,5 @@ class JWPlatformIE(InfoExtractor):
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
-        json_data = self._download_json('http://content.jwplatform.com/feeds/%s.json' % video_id, video_id)
+        json_data = self._download_json('https://cdn.jwplayer.com/v2/media/' + video_id, video_id)
         return self._parse_jwplayer_data(json_data, video_id)
index 04f68fce41fc129f7cf65e0e1cddb0e85636fb9e..fdf7f5bbc4aa314ac8d703376ec3a82014d8a6bf 100644 (file)
@@ -192,6 +192,8 @@ class KalturaIE(InfoExtractor):
                 'entryId': video_id,
                 'service': 'baseentry',
                 'ks': '{1:result:ks}',
+                'responseProfile:fields': 'createdAt,dataUrl,duration,name,plays,thumbnailUrl,userId',
+                'responseProfile:type': 1,
             },
             {
                 'action': 'getbyentryid',
diff --git a/youtube_dl/extractor/lecturio.py b/youtube_dl/extractor/lecturio.py
new file mode 100644 (file)
index 0000000..24f78d9
--- /dev/null
@@ -0,0 +1,229 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+    determine_ext,
+    extract_attributes,
+    ExtractorError,
+    float_or_none,
+    int_or_none,
+    str_or_none,
+    url_or_none,
+    urlencode_postdata,
+    urljoin,
+)
+
+
+class LecturioBaseIE(InfoExtractor):
+    _LOGIN_URL = 'https://app.lecturio.com/en/login'
+    _NETRC_MACHINE = 'lecturio'
+
+    def _real_initialize(self):
+        self._login()
+
+    def _login(self):
+        username, password = self._get_login_info()
+        if username is None:
+            return
+
+        # Sets some cookies
+        _, urlh = self._download_webpage_handle(
+            self._LOGIN_URL, None, 'Downloading login popup')
+
+        def is_logged(url_handle):
+            return self._LOGIN_URL not in compat_str(url_handle.geturl())
+
+        # Already logged in
+        if is_logged(urlh):
+            return
+
+        login_form = {
+            'signin[email]': username,
+            'signin[password]': password,
+            'signin[remember]': 'on',
+        }
+
+        response, urlh = self._download_webpage_handle(
+            self._LOGIN_URL, None, 'Logging in',
+            data=urlencode_postdata(login_form))
+
+        # Logged in successfully
+        if is_logged(urlh):
+            return
+
+        errors = self._html_search_regex(
+            r'(?s)<ul[^>]+class=["\']error_list[^>]+>(.+?)</ul>', response,
+            'errors', default=None)
+        if errors:
+            raise ExtractorError('Unable to login: %s' % errors, expected=True)
+        raise ExtractorError('Unable to log in')
+
+
+class LecturioIE(LecturioBaseIE):
+    _VALID_URL = r'''(?x)
+                    https://
+                        (?:
+                            app\.lecturio\.com/[^/]+/(?P<id>[^/?#&]+)\.lecture|
+                            (?:www\.)?lecturio\.de/[^/]+/(?P<id_de>[^/?#&]+)\.vortrag
+                        )
+                    '''
+    _TESTS = [{
+        'url': 'https://app.lecturio.com/medical-courses/important-concepts-and-terms-introduction-to-microbiology.lecture#tab/videos',
+        'md5': 'f576a797a5b7a5e4e4bbdfc25a6a6870',
+        'info_dict': {
+            'id': '39634',
+            'ext': 'mp4',
+            'title': 'Important Concepts and Terms – Introduction to Microbiology',
+        },
+        'skip': 'Requires lecturio account credentials',
+    }, {
+        'url': 'https://www.lecturio.de/jura/oeffentliches-recht-staatsexamen.vortrag',
+        'only_matching': True,
+    }]
+
+    _CC_LANGS = {
+        'German': 'de',
+        'English': 'en',
+        'Spanish': 'es',
+        'French': 'fr',
+        'Polish': 'pl',
+        'Russian': 'ru',
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        display_id = mobj.group('id') or mobj.group('id_de')
+
+        webpage = self._download_webpage(
+            'https://app.lecturio.com/en/lecture/%s/player.html' % display_id,
+            display_id)
+
+        lecture_id = self._search_regex(
+            r'lecture_id\s*=\s*(?:L_)?(\d+)', webpage, 'lecture id')
+
+        api_url = self._search_regex(
+            r'lectureDataLink\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
+            'api url', group='url')
+
+        video = self._download_json(api_url, display_id)
+
+        title = video['title'].strip()
+
+        formats = []
+        for format_ in video['content']['media']:
+            if not isinstance(format_, dict):
+                continue
+            file_ = format_.get('file')
+            if not file_:
+                continue
+            ext = determine_ext(file_)
+            if ext == 'smil':
+                # smil contains only broken RTMP formats anyway
+                continue
+            file_url = url_or_none(file_)
+            if not file_url:
+                continue
+            label = str_or_none(format_.get('label'))
+            filesize = int_or_none(format_.get('fileSize'))
+            formats.append({
+                'url': file_url,
+                'format_id': label,
+                'filesize': float_or_none(filesize, invscale=1000)
+            })
+        self._sort_formats(formats)
+
+        subtitles = {}
+        automatic_captions = {}
+        cc = self._parse_json(
+            self._search_regex(
+                r'subtitleUrls\s*:\s*({.+?})\s*,', webpage, 'subtitles',
+                default='{}'), display_id, fatal=False)
+        for cc_label, cc_url in cc.items():
+            cc_url = url_or_none(cc_url)
+            if not cc_url:
+                continue
+            lang = self._search_regex(
+                r'/([a-z]{2})_', cc_url, 'lang',
+                default=cc_label.split()[0] if cc_label else 'en')
+            original_lang = self._search_regex(
+                r'/[a-z]{2}_([a-z]{2})_', cc_url, 'original lang',
+                default=None)
+            sub_dict = (automatic_captions
+                        if 'auto-translated' in cc_label or original_lang
+                        else subtitles)
+            sub_dict.setdefault(self._CC_LANGS.get(lang, lang), []).append({
+                'url': cc_url,
+            })
+
+        return {
+            'id': lecture_id,
+            'title': title,
+            'formats': formats,
+            'subtitles': subtitles,
+            'automatic_captions': automatic_captions,
+        }
+
+
+class LecturioCourseIE(LecturioBaseIE):
+    _VALID_URL = r'https://app\.lecturio\.com/[^/]+/(?P<id>[^/?#&]+)\.course'
+    _TEST = {
+        'url': 'https://app.lecturio.com/medical-courses/microbiology-introduction.course#/',
+        'info_dict': {
+            'id': 'microbiology-introduction',
+            'title': 'Microbiology: Introduction',
+        },
+        'playlist_count': 45,
+        'skip': 'Requires lecturio account credentials',
+    }
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, display_id)
+
+        entries = []
+        for mobj in re.finditer(
+                r'(?s)<[^>]+\bdata-url=(["\'])(?:(?!\1).)+\.lecture\b[^>]+>',
+                webpage):
+            params = extract_attributes(mobj.group(0))
+            lecture_url = urljoin(url, params.get('data-url'))
+            lecture_id = params.get('data-id')
+            entries.append(self.url_result(
+                lecture_url, ie=LecturioIE.ie_key(), video_id=lecture_id))
+
+        title = self._search_regex(
+            r'<span[^>]+class=["\']content-title[^>]+>([^<]+)', webpage,
+            'title', default=None)
+
+        return self.playlist_result(entries, display_id, title)
+
+
+class LecturioDeCourseIE(LecturioBaseIE):
+    _VALID_URL = r'https://(?:www\.)?lecturio\.de/[^/]+/(?P<id>[^/?#&]+)\.kurs'
+    _TEST = {
+        'url': 'https://www.lecturio.de/jura/grundrechte.kurs',
+        'only_matching': True,
+    }
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, display_id)
+
+        entries = []
+        for mobj in re.finditer(
+                r'(?s)<td[^>]+\bdata-lecture-id=["\'](?P<id>\d+).+?\bhref=(["\'])(?P<url>(?:(?!\2).)+\.vortrag)\b[^>]+>',
+                webpage):
+            lecture_url = urljoin(url, mobj.group('url'))
+            lecture_id = mobj.group('id')
+            entries.append(self.url_result(
+                lecture_url, ie=LecturioIE.ie_key(), video_id=lecture_id))
+
+        title = self._search_regex(
+            r'<h1[^>]*>([^<]+)', webpage, 'title', default=None)
+
+        return self.playlist_result(entries, display_id, title)
index 40295a30b51f733b637c651cc8a434ede14f517a..03f2051444d63bb046b6b5943cc5400085c6d8d3 100644 (file)
@@ -16,16 +16,15 @@ from ..utils import (
 class LibraryOfCongressIE(InfoExtractor):
     IE_NAME = 'loc'
     IE_DESC = 'Library of Congress'
-    _VALID_URL = r'https?://(?:www\.)?loc\.gov/(?:item/|today/cyberlc/feature_wdesc\.php\?.*\brec=)(?P<id>[0-9]+)'
+    _VALID_URL = r'https?://(?:www\.)?loc\.gov/(?:item/|today/cyberlc/feature_wdesc\.php\?.*\brec=)(?P<id>[0-9a-z_.]+)'
     _TESTS = [{
         # embedded via <div class="media-player"
         'url': 'http://loc.gov/item/90716351/',
-        'md5': '353917ff7f0255aa6d4b80a034833de8',
+        'md5': '6ec0ae8f07f86731b1b2ff70f046210a',
         'info_dict': {
             'id': '90716351',
             'ext': 'mp4',
             'title': "Pa's trip to Mars",
-            'thumbnail': r're:^https?://.*\.jpg$',
             'duration': 0,
             'view_count': int,
         },
@@ -57,6 +56,12 @@ class LibraryOfCongressIE(InfoExtractor):
         'params': {
             'skip_download': True,
         },
+    }, {
+        'url': 'https://www.loc.gov/item/ihas.200197114/',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.loc.gov/item/afc1981005_afs20503/',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
@@ -67,12 +72,13 @@ class LibraryOfCongressIE(InfoExtractor):
             (r'id=(["\'])media-player-(?P<id>.+?)\1',
              r'<video[^>]+id=(["\'])uuid-(?P<id>.+?)\1',
              r'<video[^>]+data-uuid=(["\'])(?P<id>.+?)\1',
-             r'mediaObjectId\s*:\s*(["\'])(?P<id>.+?)\1'),
+             r'mediaObjectId\s*:\s*(["\'])(?P<id>.+?)\1',
+             r'data-tab="share-media-(?P<id>[0-9A-F]{32})"'),
             webpage, 'media id', group='id')
 
         data = self._download_json(
             'https://media.loc.gov/services/v1/media?id=%s&context=json' % media_id,
-            video_id)['mediaObject']
+            media_id)['mediaObject']
 
         derivative = data['derivatives'][0]
         media_url = derivative['derivativeUrl']
@@ -89,25 +95,29 @@ class LibraryOfCongressIE(InfoExtractor):
         if ext not in ('mp4', 'mp3'):
             media_url += '.mp4' if is_video else '.mp3'
 
-        if 'vod/mp4:' in media_url:
-            formats = [{
-                'url': media_url.replace('vod/mp4:', 'hls-vod/media/') + '.m3u8',
+        formats = []
+        if '/vod/mp4:' in media_url:
+            formats.append({
+                'url': media_url.replace('/vod/mp4:', '/hls-vod/media/') + '.m3u8',
                 'format_id': 'hls',
                 'ext': 'mp4',
                 'protocol': 'm3u8_native',
                 'quality': 1,
-            }]
-        elif 'vod/mp3:' in media_url:
-            formats = [{
-                'url': media_url.replace('vod/mp3:', ''),
-                'vcodec': 'none',
-            }]
+            })
+        http_format = {
+            'url': re.sub(r'(://[^/]+/)(?:[^/]+/)*(?:mp4|mp3):', r'\1', media_url),
+            'format_id': 'http',
+            'quality': 1,
+        }
+        if not is_video:
+            http_format['vcodec'] = 'none'
+        formats.append(http_format)
 
         download_urls = set()
         for m in re.finditer(
                 r'<option[^>]+value=(["\'])(?P<url>.+?)\1[^>]+data-file-download=[^>]+>\s*(?P<id>.+?)(?:(?:&nbsp;|\s+)\((?P<size>.+?)\))?\s*<', webpage):
             format_id = m.group('id').lower()
-            if format_id == 'gif':
+            if format_id in ('gif', 'jpeg'):
                 continue
             download_url = m.group('url')
             if download_url in download_urls:
index 26671753c429401fc9085b69e5a0aae10d495e6f..22a067e40ecf3dc06b10b3e40e5fc03d8d597f83 100644 (file)
@@ -87,7 +87,7 @@ class LiveLeakIE(InfoExtractor):
     @staticmethod
     def _extract_urls(webpage):
         return re.findall(
-            r'<iframe[^>]+src="(https?://(?:\w+\.)?liveleak\.com/ll_embed\?[^"]*[if]=[\w_]+[^"]+)"',
+            r'<iframe[^>]+src="(https?://(?:\w+\.)?liveleak\.com/ll_embed\?[^"]*[ift]=[\w_]+[^"]+)"',
             webpage)
 
     def _real_extract(self, url):
@@ -120,13 +120,27 @@ class LiveLeakIE(InfoExtractor):
             }
 
         for idx, info_dict in enumerate(entries):
+            formats = []
             for a_format in info_dict['formats']:
                 if not a_format.get('height'):
                     a_format['height'] = int_or_none(self._search_regex(
                         r'([0-9]+)p\.mp4', a_format['url'], 'height label',
                         default=None))
-
-            self._sort_formats(info_dict['formats'])
+                formats.append(a_format)
+
+                # Removing '.*.mp4' gives the raw video, which is essentially
+                # the same video without the LiveLeak logo at the top (see
+                # https://github.com/rg3/youtube-dl/pull/4768)
+                orig_url = re.sub(r'\.mp4\.[^.]+', '', a_format['url'])
+                if a_format['url'] != orig_url:
+                    format_id = a_format.get('format_id')
+                    formats.append({
+                        'format_id': 'original' + ('-' + format_id if format_id else ''),
+                        'url': orig_url,
+                        'preference': 1,
+                    })
+            self._sort_formats(formats)
+            info_dict['formats'] = formats
 
             # Don't append entry ID for one-video pages to keep backward compatibility
             if len(entries) > 1:
@@ -146,7 +160,7 @@ class LiveLeakIE(InfoExtractor):
 
 
 class LiveLeakEmbedIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?liveleak\.com/ll_embed\?.*?\b(?P<kind>[if])=(?P<id>[\w_]+)'
+    _VALID_URL = r'https?://(?:www\.)?liveleak\.com/ll_embed\?.*?\b(?P<kind>[ift])=(?P<id>[\w_]+)'
 
     # See generic.py for actual test cases
     _TESTS = [{
@@ -158,15 +172,14 @@ class LiveLeakEmbedIE(InfoExtractor):
     }]
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        kind, video_id = mobj.group('kind', 'id')
+        kind, video_id = re.match(self._VALID_URL, url).groups()
 
         if kind == 'f':
             webpage = self._download_webpage(url, video_id)
             liveleak_url = self._search_regex(
-                r'logourl\s*:\s*(?P<q1>[\'"])(?P<url>%s)(?P=q1)' % LiveLeakIE._VALID_URL,
+                r'(?:logourl\s*:\s*|window\.open\()(?P<q1>[\'"])(?P<url>%s)(?P=q1)' % LiveLeakIE._VALID_URL,
                 webpage, 'LiveLeak URL', group='url')
-        elif kind == 'i':
-            liveleak_url = 'http://www.liveleak.com/view?i=%s' % video_id
+        else:
+            liveleak_url = 'http://www.liveleak.com/view?%s=%s' % (kind, video_id)
 
         return self.url_result(liveleak_url, ie=LiveLeakIE.ie_key())
index c4776bbf3b5aa590e2b014b4d23e55f0de569cc6..e55b1a202426026763375880b549180efbd52a42 100644 (file)
@@ -363,7 +363,4 @@ class LivestreamShortenerIE(InfoExtractor):
         id = mobj.group('id')
         webpage = self._download_webpage(url, id)
 
-        return {
-            '_type': 'url',
-            'url': self._og_search_url(webpage),
-        }
+        return self.url_result(self._og_search_url(webpage))
index 4ba61cd8a1ccc187eaa539114796a35d083737a2..3084c6dffc9cd3b32ede52906669299807a9ad20 100644 (file)
@@ -15,7 +15,7 @@ from ..utils import (
 
 
 class LyndaBaseIE(InfoExtractor):
-    _SIGNIN_URL = 'https://www.lynda.com/signin'
+    _SIGNIN_URL = 'https://www.lynda.com/signin/lynda'
     _PASSWORD_URL = 'https://www.lynda.com/signin/password'
     _USER_URL = 'https://www.lynda.com/signin/user'
     _ACCOUNT_CREDENTIALS_HINT = 'Use --username and --password options to provide lynda.com account credentials.'
index b94b3c2abe333a2ef194abe2186775b182145b9d..e8d7163e4abeaf31c1666eb9b2e03765602fa82f 100644 (file)
@@ -2,12 +2,18 @@
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
-from ..utils import int_or_none
+from ..utils import (
+    determine_ext,
+    int_or_none,
+    str_to_int,
+    urlencode_postdata,
+)
 
 
 class ManyVidsIE(InfoExtractor):
     _VALID_URL = r'(?i)https?://(?:www\.)?manyvids\.com/video/(?P<id>\d+)'
-    _TEST = {
+    _TESTS = [{
+        # preview video
         'url': 'https://www.manyvids.com/Video/133957/everthing-about-me/',
         'md5': '03f11bb21c52dd12a05be21a5c7dcc97',
         'info_dict': {
@@ -17,7 +23,18 @@ class ManyVidsIE(InfoExtractor):
             'view_count': int,
             'like_count': int,
         },
-    }
+    }, {
+        # full video
+        'url': 'https://www.manyvids.com/Video/935718/MY-FACE-REVEAL/',
+        'md5': 'f3e8f7086409e9b470e2643edb96bdcc',
+        'info_dict': {
+            'id': '935718',
+            'ext': 'mp4',
+            'title': 'MY FACE REVEAL',
+            'view_count': int,
+            'like_count': int,
+        },
+    }]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
@@ -28,12 +45,41 @@ class ManyVidsIE(InfoExtractor):
             r'data-(?:video-filepath|meta-video)\s*=s*(["\'])(?P<url>(?:(?!\1).)+)\1',
             webpage, 'video URL', group='url')
 
-        title = '%s (Preview)' % self._html_search_regex(
-            r'<h2[^>]+class="m-a-0"[^>]*>([^<]+)', webpage, 'title')
+        title = self._html_search_regex(
+            (r'<span[^>]+class=["\']item-title[^>]+>([^<]+)',
+             r'<h2[^>]+class=["\']h2 m-0["\'][^>]*>([^<]+)'),
+            webpage, 'title', default=None) or self._html_search_meta(
+            'twitter:title', webpage, 'title', fatal=True)
+
+        if any(p in webpage for p in ('preview_videos', '_preview.mp4')):
+            title += ' (Preview)'
+
+        mv_token = self._search_regex(
+            r'data-mvtoken=(["\'])(?P<value>(?:(?!\1).)+)\1', webpage,
+            'mv token', default=None, group='value')
+
+        if mv_token:
+            # Sets some cookies
+            self._download_webpage(
+                'https://www.manyvids.com/includes/ajax_repository/you_had_me_at_hello.php',
+                video_id, fatal=False, data=urlencode_postdata({
+                    'mvtoken': mv_token,
+                    'vid': video_id,
+                }), headers={
+                    'Referer': url,
+                    'X-Requested-With': 'XMLHttpRequest'
+                })
+
+        if determine_ext(video_url) == 'm3u8':
+            formats = self._extract_m3u8_formats(
+                video_url, video_id, 'mp4', entry_protocol='m3u8_native',
+                m3u8_id='hls')
+        else:
+            formats = [{'url': video_url}]
 
         like_count = int_or_none(self._search_regex(
             r'data-likes=["\'](\d+)', webpage, 'like count', default=None))
-        view_count = int_or_none(self._html_search_regex(
+        view_count = str_to_int(self._html_search_regex(
             r'(?s)<span[^>]+class="views-wrapper"[^>]*>(.+?)</span', webpage,
             'view count', default=None))
 
@@ -42,7 +88,5 @@ class ManyVidsIE(InfoExtractor):
             'title': title,
             'view_count': view_count,
             'like_count': like_count,
-            'formats': [{
-                'url': video_url,
-            }],
+            'formats': formats,
         }
index 84876b883811cc8ad2607de7caed0f621f35f660..ef9628e651aaa7afeb2d181f55c01959b712a38f 100644 (file)
@@ -21,7 +21,7 @@ from ..utils import (
 
 
 class MediasiteIE(InfoExtractor):
-    _VALID_URL = r'(?xi)https?://[^/]+/Mediasite/Play/(?P<id>[0-9a-f]{32,34})(?P<query>\?[^#]+|)'
+    _VALID_URL = r'(?xi)https?://[^/]+/Mediasite/(?:Play|Showcase/(?:default|livebroadcast)/Presentation)/(?P<id>[0-9a-f]{32,34})(?P<query>\?[^#]+|)'
     _TESTS = [
         {
             'url': 'https://hitsmediaweb.h-its.org/mediasite/Play/2db6c271681e4f199af3c60d1f82869b1d',
@@ -84,7 +84,15 @@ class MediasiteIE(InfoExtractor):
                 'timestamp': 1333983600,
                 'duration': 7794,
             }
-        }
+        },
+        {
+            'url': 'https://collegerama.tudelft.nl/Mediasite/Showcase/livebroadcast/Presentation/ada7020854f743c49fbb45c9ec7dbb351d',
+            'only_matching': True,
+        },
+        {
+            'url': 'https://mediasite.ntnu.no/Mediasite/Showcase/default/Presentation/7d8b913259334b688986e970fae6fcb31d',
+            'only_matching': True,
+        },
     ]
 
     # look in Mediasite.Core.js (Mediasite.ContentStreamType[*])
index b7bccb504529d7ca1e7f70ced45d86b9259d454b..bcac13ec5edfdeb83216e732b46ae34a36656205 100644 (file)
@@ -161,11 +161,17 @@ class MixcloudIE(InfoExtractor):
             stream_info = info_json['streamInfo']
             formats = []
 
+            def decrypt_url(f_url):
+                for k in (key, 'IFYOUWANTTHEARTISTSTOGETPAIDDONOTDOWNLOADFROMMIXCLOUD'):
+                    decrypted_url = self._decrypt_xor_cipher(k, f_url)
+                    if re.search(r'^https?://[0-9a-z.]+/[0-9A-Za-z/.?=&_-]+$', decrypted_url):
+                        return decrypted_url
+
             for url_key in ('url', 'hlsUrl', 'dashUrl'):
                 format_url = stream_info.get(url_key)
                 if not format_url:
                     continue
-                decrypted = self._decrypt_xor_cipher(key, compat_b64decode(format_url))
+                decrypted = decrypt_url(compat_b64decode(format_url))
                 if not decrypted:
                     continue
                 if url_key == 'hlsUrl':
index 4d2ee64080710a0a531aaf391eeb116da730cc89..165964ca085185e9a2da2fd8b276308570ddd5aa 100644 (file)
@@ -1,15 +1,9 @@
 from __future__ import unicode_literals
 
-import re
-
 from .common import InfoExtractor
-from .adobepass import AdobePassIE
-from .theplatform import ThePlatformIE
 from ..utils import (
     smuggle_url,
     url_basename,
-    update_url_query,
-    get_element_by_class,
 )
 
 
@@ -64,132 +58,3 @@ class NationalGeographicVideoIE(InfoExtractor):
                 {'force_smil_url': True}),
             'id': guid,
         }
-
-
-class NationalGeographicIE(ThePlatformIE, AdobePassIE):
-    IE_NAME = 'natgeo'
-    _VALID_URL = r'https?://channel\.nationalgeographic\.com/(?:(?:(?:wild/)?[^/]+/)?(?:videos|episodes)|u)/(?P<id>[^/?]+)'
-
-    _TESTS = [
-        {
-            'url': 'http://channel.nationalgeographic.com/u/kdi9Ld0PN2molUUIMSBGxoeDhD729KRjQcnxtetilWPMevo8ZwUBIDuPR0Q3D2LVaTsk0MPRkRWDB8ZhqWVeyoxfsZZm36yRp1j-zPfsHEyI_EgAeFY/',
-            'md5': '518c9aa655686cf81493af5cc21e2a04',
-            'info_dict': {
-                'id': 'vKInpacll2pC',
-                'ext': 'mp4',
-                'title': 'Uncovering a Universal Knowledge',
-                'description': 'md5:1a89148475bf931b3661fcd6ddb2ae3a',
-                'timestamp': 1458680907,
-                'upload_date': '20160322',
-                'uploader': 'NEWA-FNG-NGTV',
-            },
-            'add_ie': ['ThePlatform'],
-        },
-        {
-            'url': 'http://channel.nationalgeographic.com/u/kdvOstqYaBY-vSBPyYgAZRUL4sWUJ5XUUPEhc7ISyBHqoIO4_dzfY3K6EjHIC0hmFXoQ7Cpzm6RkET7S3oMlm6CFnrQwSUwo/',
-            'md5': 'c4912f656b4cbe58f3e000c489360989',
-            'info_dict': {
-                'id': 'Pok5lWCkiEFA',
-                'ext': 'mp4',
-                'title': 'The Stunning Red Bird of Paradise',
-                'description': 'md5:7bc8cd1da29686be4d17ad1230f0140c',
-                'timestamp': 1459362152,
-                'upload_date': '20160330',
-                'uploader': 'NEWA-FNG-NGTV',
-            },
-            'add_ie': ['ThePlatform'],
-        },
-        {
-            'url': 'http://channel.nationalgeographic.com/the-story-of-god-with-morgan-freeman/episodes/the-power-of-miracles/',
-            'only_matching': True,
-        },
-        {
-            'url': 'http://channel.nationalgeographic.com/videos/treasures-rediscovered/',
-            'only_matching': True,
-        },
-        {
-            'url': 'http://channel.nationalgeographic.com/the-story-of-god-with-morgan-freeman/videos/uncovering-a-universal-knowledge/',
-            'only_matching': True,
-        },
-        {
-            'url': 'http://channel.nationalgeographic.com/wild/destination-wild/videos/the-stunning-red-bird-of-paradise/',
-            'only_matching': True,
-        }
-    ]
-
-    def _real_extract(self, url):
-        display_id = self._match_id(url)
-        webpage = self._download_webpage(url, display_id)
-        release_url = self._search_regex(
-            r'video_auth_playlist_url\s*=\s*"([^"]+)"',
-            webpage, 'release url')
-        theplatform_path = self._search_regex(r'https?://link\.theplatform\.com/s/([^?]+)', release_url, 'theplatform path')
-        video_id = theplatform_path.split('/')[-1]
-        query = {
-            'mbr': 'true',
-        }
-        is_auth = self._search_regex(r'video_is_auth\s*=\s*"([^"]+)"', webpage, 'is auth', fatal=False)
-        if is_auth == 'auth':
-            auth_resource_id = self._search_regex(
-                r"video_auth_resourceId\s*=\s*'([^']+)'",
-                webpage, 'auth resource id')
-            query['auth'] = self._extract_mvpd_auth(url, video_id, 'natgeo', auth_resource_id)
-
-        formats = []
-        subtitles = {}
-        for key, value in (('switch', 'http'), ('manifest', 'm3u')):
-            tp_query = query.copy()
-            tp_query.update({
-                key: value,
-            })
-            tp_formats, tp_subtitles = self._extract_theplatform_smil(
-                update_url_query(release_url, tp_query), video_id, 'Downloading %s SMIL data' % value)
-            formats.extend(tp_formats)
-            subtitles = self._merge_subtitles(subtitles, tp_subtitles)
-        self._sort_formats(formats)
-
-        info = self._extract_theplatform_metadata(theplatform_path, display_id)
-        info.update({
-            'id': video_id,
-            'formats': formats,
-            'subtitles': subtitles,
-            'display_id': display_id,
-        })
-        return info
-
-
-class NationalGeographicEpisodeGuideIE(InfoExtractor):
-    IE_NAME = 'natgeo:episodeguide'
-    _VALID_URL = r'https?://channel\.nationalgeographic\.com/(?:wild/)?(?P<id>[^/]+)/episode-guide'
-    _TESTS = [
-        {
-            'url': 'http://channel.nationalgeographic.com/the-story-of-god-with-morgan-freeman/episode-guide/',
-            'info_dict': {
-                'id': 'the-story-of-god-with-morgan-freeman-season-1',
-                'title': 'The Story of God with Morgan Freeman - Season 1',
-            },
-            'playlist_mincount': 6,
-        },
-        {
-            'url': 'http://channel.nationalgeographic.com/underworld-inc/episode-guide/?s=2',
-            'info_dict': {
-                'id': 'underworld-inc-season-2',
-                'title': 'Underworld, Inc. - Season 2',
-            },
-            'playlist_mincount': 7,
-        },
-    ]
-
-    def _real_extract(self, url):
-        display_id = self._match_id(url)
-        webpage = self._download_webpage(url, display_id)
-        show = get_element_by_class('show', webpage)
-        selected_season = self._search_regex(
-            r'<div[^>]+class="select-seasons[^"]*".*?<a[^>]*>(.*?)</a>',
-            webpage, 'selected season')
-        entries = [
-            self.url_result(self._proto_relative_url(entry_url), 'NationalGeographic')
-            for entry_url in re.findall('(?s)<div[^>]+class="col-inner"[^>]*?>.*?<a[^>]+href="([^"]+)"', webpage)]
-        return self.playlist_result(
-            entries, '%s-%s' % (display_id, selected_season.lower().replace(' ', '-')),
-            '%s - %s' % (show, selected_season))
index 765c46fd228f1b90e37902e94e8a5e45a3bacb98..3282f84ee4ef637eb7d7ece2b9b859ba0059e42c 100644 (file)
@@ -9,10 +9,8 @@ from .theplatform import ThePlatformIE
 from .adobepass import AdobePassIE
 from ..compat import compat_urllib_parse_unquote
 from ..utils import (
-    find_xpath_attr,
     smuggle_url,
     try_get,
-    unescapeHTML,
     update_url_query,
     int_or_none,
 )
@@ -269,27 +267,14 @@ class CSNNEIE(InfoExtractor):
 
 
 class NBCNewsIE(ThePlatformIE):
-    _VALID_URL = r'''(?x)https?://(?:www\.)?(?:nbcnews|today|msnbc)\.com/
-        (?:video/.+?/(?P<id>\d+)|
-        ([^/]+/)*(?:.*-)?(?P<mpx_id>[^/?]+))
-        '''
+    _VALID_URL = r'(?x)https?://(?:www\.)?(?:nbcnews|today|msnbc)\.com/([^/]+/)*(?:.*-)?(?P<id>[^/?]+)'
 
     _TESTS = [
-        {
-            'url': 'http://www.nbcnews.com/video/nbc-news/52753292',
-            'md5': '47abaac93c6eaf9ad37ee6c4463a5179',
-            'info_dict': {
-                'id': '52753292',
-                'ext': 'flv',
-                'title': 'Crew emerges after four-month Mars food study',
-                'description': 'md5:24e632ffac72b35f8b67a12d1b6ddfc1',
-            },
-        },
         {
             'url': 'http://www.nbcnews.com/watch/nbcnews-com/how-twitter-reacted-to-the-snowden-interview-269389891880',
             'md5': 'af1adfa51312291a017720403826bb64',
             'info_dict': {
-                'id': 'p_tweet_snow_140529',
+                'id': '269389891880',
                 'ext': 'mp4',
                 'title': 'How Twitter Reacted To The Snowden Interview',
                 'description': 'md5:65a0bd5d76fe114f3c2727aa3a81fe64',
@@ -313,7 +298,7 @@ class NBCNewsIE(ThePlatformIE):
             'url': 'http://www.nbcnews.com/nightly-news/video/nightly-news-with-brian-williams-full-broadcast-february-4-394064451844',
             'md5': '73135a2e0ef819107bbb55a5a9b2a802',
             'info_dict': {
-                'id': 'nn_netcast_150204',
+                'id': '394064451844',
                 'ext': 'mp4',
                 'title': 'Nightly News with Brian Williams Full Broadcast (February 4)',
                 'description': 'md5:1c10c1eccbe84a26e5debb4381e2d3c5',
@@ -326,7 +311,7 @@ class NBCNewsIE(ThePlatformIE):
             'url': 'http://www.nbcnews.com/business/autos/volkswagen-11-million-vehicles-could-have-suspect-software-emissions-scandal-n431456',
             'md5': 'a49e173825e5fcd15c13fc297fced39d',
             'info_dict': {
-                'id': 'x_lon_vwhorn_150922',
+                'id': '529953347624',
                 'ext': 'mp4',
                 'title': 'Volkswagen U.S. Chief:\xa0 We Have Totally Screwed Up',
                 'description': 'md5:c8be487b2d80ff0594c005add88d8351',
@@ -339,7 +324,7 @@ class NBCNewsIE(ThePlatformIE):
             'url': 'http://www.today.com/video/see-the-aurora-borealis-from-space-in-stunning-new-nasa-video-669831235788',
             'md5': '118d7ca3f0bea6534f119c68ef539f71',
             'info_dict': {
-                'id': 'tdy_al_space_160420',
+                'id': '669831235788',
                 'ext': 'mp4',
                 'title': 'See the aurora borealis from space in stunning new NASA video',
                 'description': 'md5:74752b7358afb99939c5f8bb2d1d04b1',
@@ -352,7 +337,7 @@ class NBCNewsIE(ThePlatformIE):
             'url': 'http://www.msnbc.com/all-in-with-chris-hayes/watch/the-chaotic-gop-immigration-vote-314487875924',
             'md5': '6d236bf4f3dddc226633ce6e2c3f814d',
             'info_dict': {
-                'id': 'n_hayes_Aimm_140801_272214',
+                'id': '314487875924',
                 'ext': 'mp4',
                 'title': 'The chaotic GOP immigration vote',
                 'description': 'The Republican House votes on a border bill that has no chance of getting through the Senate or signed by the President and is drawing criticism from all sides.',
@@ -374,60 +359,22 @@ class NBCNewsIE(ThePlatformIE):
     ]
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-        if video_id is not None:
-            all_info = self._download_xml('http://www.nbcnews.com/id/%s/displaymode/1219' % video_id, video_id)
-            info = all_info.find('video')
-
-            return {
-                'id': video_id,
-                'title': info.find('headline').text,
-                'ext': 'flv',
-                'url': find_xpath_attr(info, 'media', 'type', 'flashVideo').text,
-                'description': info.find('caption').text,
-                'thumbnail': find_xpath_attr(info, 'media', 'type', 'thumbnail').text,
-            }
-        else:
-            # "feature" and "nightly-news" pages use theplatform.com
-            video_id = mobj.group('mpx_id')
+        video_id = self._match_id(url)
+        if not video_id.isdigit():
             webpage = self._download_webpage(url, video_id)
 
-            filter_param = 'byId'
-            bootstrap_json = self._search_regex(
-                [r'(?m)(?:var\s+(?:bootstrapJson|playlistData)|NEWS\.videoObj)\s*=\s*({.+});?\s*$',
-                 r'videoObj\s*:\s*({.+})', r'data-video="([^"]+)"',
-                 r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);'],
-                webpage, 'bootstrap json', default=None)
-            if bootstrap_json:
-                bootstrap = self._parse_json(
-                    bootstrap_json, video_id, transform_source=unescapeHTML)
-
-                info = None
-                if 'results' in bootstrap:
-                    info = bootstrap['results'][0]['video']
-                elif 'video' in bootstrap:
-                    info = bootstrap['video']
-                elif 'msnbcVideoInfo' in bootstrap:
-                    info = bootstrap['msnbcVideoInfo']['meta']
-                elif 'msnbcThePlatform' in bootstrap:
-                    info = bootstrap['msnbcThePlatform']['videoPlayer']['video']
-                else:
-                    info = bootstrap
-
-                if 'guid' in info:
-                    video_id = info['guid']
-                    filter_param = 'byGuid'
-                elif 'mpxId' in info:
-                    video_id = info['mpxId']
-
-            return {
-                '_type': 'url_transparent',
-                'id': video_id,
-                # http://feed.theplatform.com/f/2E2eJC/nbcnews also works
-                'url': update_url_query('http://feed.theplatform.com/f/2E2eJC/nnd_NBCNews', {filter_param: video_id}),
-                'ie_key': 'ThePlatformFeed',
-            }
+            data = self._parse_json(self._search_regex(
+                r'window\.__data\s*=\s*({.+});', webpage,
+                'bootstrap json'), video_id)
+            video_id = data['article']['content'][0]['primaryMedia']['video']['mpxMetadata']['id']
+
+        return {
+            '_type': 'url_transparent',
+            'id': video_id,
+            # http://feed.theplatform.com/f/2E2eJC/nbcnews also works
+            'url': update_url_query('http://feed.theplatform.com/f/2E2eJC/nnd_NBCNews', {'byId': video_id}),
+            'ie_key': 'ThePlatformFeed',
+        }
 
 
 class NBCOlympicsIE(InfoExtractor):
index 80186ec50cef0e580e1e07104006f8e3f89fa799..901f44b54f40c2c02e120c636a80b0b5bfb4ea2e 100644 (file)
@@ -35,7 +35,7 @@ class NovaEmbedIE(InfoExtractor):
 
         bitrates = self._parse_json(
             self._search_regex(
-                r'(?s)bitrates\s*=\s*({.+?})\s*;', webpage, 'formats'),
+                r'(?s)(?:src|bitrates)\s*=\s*({.+?})\s*;', webpage, 'formats'),
             video_id, transform_source=js_to_json)
 
         QUALITIES = ('lq', 'mq', 'hq', 'hd')
index c2cb85a734f256330b9cbb9bfd77910a6f962c45..5a427c39693dace826df03c805f5f5452b700dc2 100644 (file)
@@ -363,7 +363,7 @@ class NPOIE(NPOBaseIE):
 
 class NPOLiveIE(NPOBaseIE):
     IE_NAME = 'npo.nl:live'
-    _VALID_URL = r'https?://(?:www\.)?npo\.nl/live(?:/(?P<id>[^/?#&]+))?'
+    _VALID_URL = r'https?://(?:www\.)?npo(?:start)?\.nl/live(?:/(?P<id>[^/?#&]+))?'
 
     _TESTS = [{
         'url': 'http://www.npo.nl/live/npo-1',
@@ -380,6 +380,9 @@ class NPOLiveIE(NPOBaseIE):
     }, {
         'url': 'http://www.npo.nl/live',
         'only_matching': True,
+    }, {
+        'url': 'https://www.npostart.nl/live/npo-1',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
index a231735fb79aa18140e3f4e5bdd80fb416b8020c..072f920a973a39faae363c9109196c438edc9f30 100644 (file)
@@ -211,13 +211,13 @@ class NRKIE(NRKBaseIE):
     _TESTS = [{
         # video
         'url': 'http://www.nrk.no/video/PS*150533',
-        'md5': '2f7f6eeb2aacdd99885f355428715cfa',
+        'md5': '706f34cdf1322577589e369e522b50ef',
         'info_dict': {
             'id': '150533',
             'ext': 'mp4',
             'title': 'Dompap og andre fugler i Piip-Show',
             'description': 'md5:d9261ba34c43b61c812cb6b0269a5c8f',
-            'duration': 263,
+            'duration': 262,
         }
     }, {
         # audio
@@ -248,7 +248,7 @@ class NRKTVIE(NRKBaseIE):
     _VALID_URL = r'''(?x)
                         https?://
                             (?:tv|radio)\.nrk(?:super)?\.no/
-                            (?:serie/[^/]+|program)/
+                            (?:serie(?:/[^/]+){1,2}|program)/
                             (?![Ee]pisodes)%s
                             (?:/\d{2}-\d{2}-\d{4})?
                             (?:\#del=(?P<part_id>\d+))?
@@ -256,14 +256,14 @@ class NRKTVIE(NRKBaseIE):
     _API_HOSTS = ('psapi-ne.nrk.no', 'psapi-we.nrk.no')
     _TESTS = [{
         'url': 'https://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014',
-        'md5': '4e9ca6629f09e588ed240fb11619922a',
+        'md5': '9a167e54d04671eb6317a37b7bc8a280',
         'info_dict': {
             'id': 'MUHH48000314AA',
             'ext': 'mp4',
             'title': '20 spørsmål 23.05.2014',
             'description': 'md5:bdea103bc35494c143c6a9acdd84887a',
             'duration': 1741,
-            'series': '20 spørsmål - TV',
+            'series': '20 spørsmål',
             'episode': '23.05.2014',
         },
     }, {
@@ -301,7 +301,7 @@ class NRKTVIE(NRKBaseIE):
                 'id': 'MSPO40010515AH',
                 'ext': 'mp4',
                 'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015 (Part 1)',
-                'description': 'md5:c03aba1e917561eface5214020551b7a',
+                'description': 'md5:1f97a41f05a9486ee00c56f35f82993d',
                 'duration': 772,
                 'series': 'Tour de Ski',
                 'episode': '06.01.2015',
@@ -314,7 +314,7 @@ class NRKTVIE(NRKBaseIE):
                 'id': 'MSPO40010515BH',
                 'ext': 'mp4',
                 'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015 (Part 2)',
-                'description': 'md5:c03aba1e917561eface5214020551b7a',
+                'description': 'md5:1f97a41f05a9486ee00c56f35f82993d',
                 'duration': 6175,
                 'series': 'Tour de Ski',
                 'episode': '06.01.2015',
@@ -326,7 +326,7 @@ class NRKTVIE(NRKBaseIE):
         'info_dict': {
             'id': 'MSPO40010515',
             'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015',
-            'description': 'md5:c03aba1e917561eface5214020551b7a',
+            'description': 'md5:1f97a41f05a9486ee00c56f35f82993d',
         },
         'expected_warnings': ['Video is geo restricted'],
     }, {
@@ -362,6 +362,9 @@ class NRKTVIE(NRKBaseIE):
     }, {
         'url': 'https://radio.nrk.no/serie/dagsnytt/NPUB21019315/12-07-2015#',
         'only_matching': True,
+    }, {
+        'url': 'https://tv.nrk.no/serie/lindmo/2018/MUHU11006318/avspiller',
+        'only_matching': True,
     }]
 
 
@@ -403,21 +406,35 @@ class NRKTVSerieBaseIE(InfoExtractor):
     def _extract_series(self, webpage, display_id, fatal=True):
         config = self._parse_json(
             self._search_regex(
-                r'({.+?})\s*,\s*"[^"]+"\s*\)\s*</script>', webpage, 'config',
-                default='{}' if not fatal else NO_DEFAULT),
+                (r'INITIAL_DATA_*\s*=\s*({.+?})\s*;',
+                 r'({.+?})\s*,\s*"[^"]+"\s*\)\s*</script>'),
+                webpage, 'config', default='{}' if not fatal else NO_DEFAULT),
             display_id, fatal=False)
         if not config:
             return
-        return try_get(config, lambda x: x['series'], dict)
+        return try_get(
+            config,
+            (lambda x: x['initialState']['series'], lambda x: x['series']),
+            dict)
+
+    def _extract_seasons(self, seasons):
+        if not isinstance(seasons, list):
+            return []
+        entries = []
+        for season in seasons:
+            entries.extend(self._extract_episodes(season))
+        return entries
 
     def _extract_episodes(self, season):
-        entries = []
         if not isinstance(season, dict):
-            return entries
-        episodes = season.get('episodes')
-        if not isinstance(episodes, list):
-            return entries
-        for episode in episodes:
+            return []
+        return self._extract_entries(season.get('episodes'))
+
+    def _extract_entries(self, entry_list):
+        if not isinstance(entry_list, list):
+            return []
+        entries = []
+        for episode in entry_list:
             nrk_id = episode.get('prfId')
             if not nrk_id or not isinstance(nrk_id, compat_str):
                 continue
@@ -462,7 +479,7 @@ class NRKTVSeriesIE(NRKTVSerieBaseIE):
     _VALID_URL = r'https?://(?:tv|radio)\.nrk(?:super)?\.no/serie/(?P<id>[^/]+)'
     _ITEM_RE = r'(?:data-season=["\']|id=["\']season-)(?P<id>\d+)'
     _TESTS = [{
-        # new layout
+        # new layout, seasons
         'url': 'https://tv.nrk.no/serie/backstage',
         'info_dict': {
             'id': 'backstage',
@@ -471,20 +488,21 @@ class NRKTVSeriesIE(NRKTVSerieBaseIE):
         },
         'playlist_mincount': 60,
     }, {
-        # old layout
+        # new layout, instalments
         'url': 'https://tv.nrk.no/serie/groenn-glede',
         'info_dict': {
             'id': 'groenn-glede',
             'title': 'Grønn glede',
             'description': 'md5:7576e92ae7f65da6993cf90ee29e4608',
         },
-        'playlist_mincount': 9,
+        'playlist_mincount': 10,
     }, {
-        'url': 'http://tv.nrksuper.no/serie/labyrint',
+        # old layout
+        'url': 'https://tv.nrksuper.no/serie/labyrint',
         'info_dict': {
             'id': 'labyrint',
             'title': 'Labyrint',
-            'description': 'md5:58afd450974c89e27d5a19212eee7115',
+            'description': 'md5:318b597330fdac5959247c9b69fdb1ec',
         },
         'playlist_mincount': 3,
     }, {
@@ -517,11 +535,12 @@ class NRKTVSeriesIE(NRKTVSerieBaseIE):
             description = try_get(
                 series, lambda x: x['titles']['subtitle'], compat_str)
             entries = []
-            for season in series['seasons']:
-                entries.extend(self._extract_episodes(season))
+            entries.extend(self._extract_seasons(series.get('seasons')))
+            entries.extend(self._extract_entries(series.get('instalments')))
+            entries.extend(self._extract_episodes(series.get('extraMaterial')))
             return self.playlist_result(entries, series_id, title, description)
 
-        # Old layout (e.g. https://tv.nrk.no/serie/groenn-glede)
+        # Old layout (e.g. https://tv.nrksuper.no/serie/labyrint)
         entries = [
             self.url_result(
                 'https://tv.nrk.no/program/Episodes/{series}/{season}'.format(
@@ -533,6 +552,9 @@ class NRKTVSeriesIE(NRKTVSerieBaseIE):
             'seriestitle', webpage,
             'title', default=None) or self._og_search_title(
             webpage, fatal=False)
+        if title:
+            title = self._search_regex(
+                r'NRK (?:Super )?TV\s*[-–]\s*(.+)', title, 'title', default=title)
 
         description = self._html_search_meta(
             'series_description', webpage,
@@ -593,7 +615,7 @@ class NRKPlaylistIE(NRKPlaylistBaseIE):
             'title': 'Rivertonprisen til Karin Fossum',
             'description': 'Første kvinne på 15 år til å vinne krimlitteraturprisen.',
         },
-        'playlist_count': 5,
+        'playlist_count': 2,
     }]
 
     def _extract_title(self, webpage):
index 2d352f53f2908e0732d62e42cc09d457bfd5c77e..61ee77adbd3f47123d844c3731b8e8132afd1813 100644 (file)
@@ -11,20 +11,27 @@ from ..utils import (
 
 class NZZIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?nzz\.ch/(?:[^/]+/)*[^/?#]+-ld\.(?P<id>\d+)'
-    _TEST = {
+    _TESTS = [{
         'url': 'http://www.nzz.ch/zuerich/gymizyte/gymizyte-schreiben-schueler-heute-noch-diktate-ld.9153',
         'info_dict': {
             'id': '9153',
         },
         'playlist_mincount': 6,
-    }
+    }, {
+        'url': 'https://www.nzz.ch/video/nzz-standpunkte/cvp-auf-der-suche-nach-dem-mass-der-mitte-ld.1368112',
+        'info_dict': {
+            'id': '1368112',
+        },
+        'playlist_count': 1,
+    }]
 
     def _real_extract(self, url):
         page_id = self._match_id(url)
         webpage = self._download_webpage(url, page_id)
 
         entries = []
-        for player_element in re.findall(r'(<[^>]+class="kalturaPlayer"[^>]*>)', webpage):
+        for player_element in re.findall(
+                r'(<[^>]+class="kalturaPlayer[^"]*"[^>]*>)', webpage):
             player_params = extract_attributes(player_element)
             if player_params.get('data-type') not in ('kaltura_singleArticle',):
                 self.report_warning('Unsupported player type')
index 2473536fd8ac03a09afce39a3de4dfe771356dcb..cf51e4770db6d4e67f97ceeedb3eab966584ef3d 100644 (file)
@@ -243,7 +243,18 @@ class PhantomJSwrapper(object):
 
 
 class OpenloadIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?(?:openload\.(?:co|io|link)|oload\.(?:tv|stream|site|xyz|win|download|cloud|cc|icu|fun))/(?:f|embed)/(?P<id>[a-zA-Z0-9-_]+)'
+    _VALID_URL = r'''(?x)
+                    https?://
+                        (?P<host>
+                            (?:www\.)?
+                            (?:
+                                openload\.(?:co|io|link)|
+                                oload\.(?:tv|stream|site|xyz|win|download|cloud|cc|icu|fun)
+                            )
+                        )/
+                        (?:f|embed)/
+                        (?P<id>[a-zA-Z0-9-_]+)
+                    '''
 
     _TESTS = [{
         'url': 'https://openload.co/f/kUEfGclsU9o',
@@ -334,8 +345,11 @@ class OpenloadIE(InfoExtractor):
             webpage)
 
     def _real_extract(self, url):
-        video_id = self._match_id(url)
-        url_pattern = 'https://openload.co/%%s/%s/' % video_id
+        mobj = re.match(self._VALID_URL, url)
+        host = mobj.group('host')
+        video_id = mobj.group('id')
+
+        url_pattern = 'https://%s/%%s/%s/' % (host, video_id)
         headers = {
             'User-Agent': self._USER_AGENT,
         }
@@ -368,7 +382,7 @@ class OpenloadIE(InfoExtractor):
                            r'>\s*([\w~-]+~[a-f0-9:]+~[\w~-]+)'), webpage,
                           'stream URL'))
 
-        video_url = 'https://openload.co/stream/%s?mime=true' % decoded_id
+        video_url = 'https://%s/stream/%s?mime=true' % (host, decoded_id)
 
         title = self._og_search_title(webpage, default=None) or self._search_regex(
             r'<span[^>]+class=["\']title["\'][^>]*>([^<]+)', webpage,
@@ -379,7 +393,7 @@ class OpenloadIE(InfoExtractor):
         entry = entries[0] if entries else {}
         subtitles = entry.get('subtitles')
 
-        info_dict = {
+        return {
             'id': video_id,
             'title': title,
             'thumbnail': entry.get('thumbnail') or self._og_search_thumbnail(webpage, default=None),
@@ -388,4 +402,3 @@ class OpenloadIE(InfoExtractor):
             'subtitles': subtitles,
             'http_headers': headers,
         }
-        return info_dict
diff --git a/youtube_dl/extractor/outsidetv.py b/youtube_dl/extractor/outsidetv.py
new file mode 100644 (file)
index 0000000..c5333b0
--- /dev/null
@@ -0,0 +1,28 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class OutsideTVIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?outsidetv\.com/(?:[^/]+/)*?play/[a-zA-Z0-9]{8}/\d+/\d+/(?P<id>[a-zA-Z0-9]{8})'
+    _TESTS = [{
+        'url': 'http://www.outsidetv.com/category/snow/play/ZjQYboH6/1/10/Hdg0jukV/4',
+        'md5': '192d968fedc10b2f70ec31865ffba0da',
+        'info_dict': {
+            'id': 'Hdg0jukV',
+            'ext': 'mp4',
+            'title': 'Home - Jackson Ep 1 | Arbor Snowboards',
+            'description': 'md5:41a12e94f3db3ca253b04bb1e8d8f4cd',
+            'upload_date': '20181225',
+            'timestamp': 1545742800,
+        }
+    }, {
+        'url': 'http://www.outsidetv.com/home/play/ZjQYboH6/1/10/Hdg0jukV/4',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        jw_media_id = self._match_id(url)
+        return self.url_result(
+            'jwplatform:' + jw_media_id, 'JWPlatform', jw_media_id)
index 56a2a1083a11275a02031b4233b44c76f2f6d3f8..1324137dfe44bef6bf09ca3899803beba88ddbe9 100644 (file)
@@ -24,9 +24,9 @@ class PacktPubBaseIE(InfoExtractor):
 
 
 class PacktPubIE(PacktPubBaseIE):
-    _VALID_URL = r'https?://(?:www\.)?packtpub\.com/mapt/video/[^/]+/(?P<course_id>\d+)/(?P<chapter_id>\d+)/(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:(?:www\.)?packtpub\.com/mapt|subscription\.packtpub\.com)/video/[^/]+/(?P<course_id>\d+)/(?P<chapter_id>\d+)/(?P<id>\d+)'
 
-    _TEST = {
+    _TESTS = [{
         'url': 'https://www.packtpub.com/mapt/video/web-development/9781787122215/20528/20530/Project+Intro',
         'md5': '1e74bd6cfd45d7d07666f4684ef58f70',
         'info_dict': {
@@ -37,7 +37,10 @@ class PacktPubIE(PacktPubBaseIE):
             'timestamp': 1490918400,
             'upload_date': '20170331',
         },
-    }
+    }, {
+        'url': 'https://subscription.packtpub.com/video/web_development/9781787122215/20528/20530/project-intro',
+        'only_matching': True,
+    }]
     _NETRC_MACHINE = 'packtpub'
     _TOKEN = None
 
@@ -110,15 +113,18 @@ class PacktPubIE(PacktPubBaseIE):
 
 
 class PacktPubCourseIE(PacktPubBaseIE):
-    _VALID_URL = r'(?P<url>https?://(?:www\.)?packtpub\.com/mapt/video/[^/]+/(?P<id>\d+))'
-    _TEST = {
+    _VALID_URL = r'(?P<url>https?://(?:(?:www\.)?packtpub\.com/mapt|subscription\.packtpub\.com)/video/[^/]+/(?P<id>\d+))'
+    _TESTS = [{
         'url': 'https://www.packtpub.com/mapt/video/web-development/9781787122215',
         'info_dict': {
             'id': '9781787122215',
             'title': 'Learn Nodejs by building 12 projects [Video]',
         },
         'playlist_count': 90,
-    }
+    }, {
+        'url': 'https://subscription.packtpub.com/video/web_development/9781787122215',
+        'only_matching': True,
+    }]
 
     @classmethod
     def suitable(cls, url):
index 2366dfb34748c31e0046901fe75ca558c7643346..8099ef1d6bd609a81f0c10e7a33f26311c63fcc8 100644 (file)
@@ -1,6 +1,7 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
+import re
 import time
 
 from .common import InfoExtractor
@@ -15,7 +16,7 @@ from ..utils import (
 
 
 class PicartoIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www.)?picarto\.tv/(?P<id>[a-zA-Z0-9]+)'
+    _VALID_URL = r'https?://(?:www.)?picarto\.tv/(?P<id>[a-zA-Z0-9]+)(?:/(?P<token>[a-zA-Z0-9]+))?'
     _TEST = {
         'url': 'https://picarto.tv/Setz',
         'info_dict': {
@@ -33,20 +34,14 @@ class PicartoIE(InfoExtractor):
         return False if PicartoVodIE.suitable(url) else super(PicartoIE, cls).suitable(url)
 
     def _real_extract(self, url):
-        channel_id = self._match_id(url)
-        stream_page = self._download_webpage(url, channel_id)
+        mobj = re.match(self._VALID_URL, url)
+        channel_id = mobj.group('id')
 
-        if '>This channel does not exist' in stream_page:
-            raise ExtractorError(
-                'Channel %s does not exist' % channel_id, expected=True)
+        metadata = self._download_json(
+            'https://api.picarto.tv/v1/channel/name/' + channel_id,
+            channel_id)
 
-        player = self._parse_json(
-            self._search_regex(
-                r'(?s)playerSettings\[\d+\]\s*=\s*(\{.+?\}\s*\n)', stream_page,
-                'player settings'),
-            channel_id, transform_source=js_to_json)
-
-        if player.get('online') is False:
+        if metadata.get('online') is False:
             raise ExtractorError('Stream is offline', expected=True)
 
         cdn_data = self._download_json(
@@ -54,20 +49,13 @@ class PicartoIE(InfoExtractor):
             data=urlencode_postdata({'loadbalancinginfo': channel_id}),
             note='Downloading load balancing info')
 
-        def get_event(key):
-            return try_get(player, lambda x: x['event'][key], compat_str) or ''
-
+        token = mobj.group('token') or 'public'
         params = {
-            'token': player.get('token') or '',
-            'ticket': get_event('ticket'),
             'con': int(time.time() * 1000),
-            'type': get_event('ticket'),
-            'scope': get_event('scope'),
+            'token': token,
         }
 
         prefered_edge = cdn_data.get('preferedEdge')
-        default_tech = player.get('defaultTech')
-
         formats = []
 
         for edge in cdn_data['edges']:
@@ -81,8 +69,6 @@ class PicartoIE(InfoExtractor):
                 preference = 0
                 if edge_id == prefered_edge:
                     preference += 1
-                if tech_type == default_tech:
-                    preference += 1
                 format_id = []
                 if edge_id:
                     format_id.append(edge_id)
@@ -109,7 +95,7 @@ class PicartoIE(InfoExtractor):
                     continue
         self._sort_formats(formats)
 
-        mature = player.get('mature')
+        mature = metadata.get('adult')
         if mature is None:
             age_limit = None
         else:
@@ -117,9 +103,11 @@ class PicartoIE(InfoExtractor):
 
         return {
             'id': channel_id,
-            'title': self._live_title(channel_id),
+            'title': self._live_title(metadata.get('title') or channel_id),
             'is_live': True,
-            'thumbnail': player.get('vodThumb'),
+            'thumbnail': try_get(metadata, lambda x: x['thumbnails']['web']),
+            'channel': channel_id,
+            'channel_url': 'https://picarto.tv/%s' % channel_id,
             'age_limit': age_limit,
             'formats': formats,
         }
diff --git a/youtube_dl/extractor/playplustv.py b/youtube_dl/extractor/playplustv.py
new file mode 100644 (file)
index 0000000..1e30ab2
--- /dev/null
@@ -0,0 +1,109 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_HTTPError
+from ..utils import (
+    clean_html,
+    ExtractorError,
+    int_or_none,
+    PUTRequest,
+)
+
+
+class PlayPlusTVIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?playplus\.(?:com|tv)/VOD/(?P<project_id>[0-9]+)/(?P<id>[0-9a-f]{32})'
+    _TEST = {
+        'url': 'https://www.playplus.tv/VOD/7572/db8d274a5163424e967f35a30ddafb8e',
+        'md5': 'd078cb89d7ab6b9df37ce23c647aef72',
+        'info_dict': {
+            'id': 'db8d274a5163424e967f35a30ddafb8e',
+            'ext': 'mp4',
+            'title': 'Capítulo 179 - Final',
+            'description': 'md5:01085d62d8033a1e34121d3c3cabc838',
+            'timestamp': 1529992740,
+            'upload_date': '20180626',
+        },
+        'skip': 'Requires account credential',
+    }
+    _NETRC_MACHINE = 'playplustv'
+    _GEO_COUNTRIES = ['BR']
+    _token = None
+    _profile_id = None
+
+    def _call_api(self, resource, video_id=None, query=None):
+        return self._download_json('https://api.playplus.tv/api/media/v2/get' + resource, video_id, headers={
+            'Authorization': 'Bearer ' + self._token,
+        }, query=query)
+
+    def _real_initialize(self):
+        email, password = self._get_login_info()
+        if email is None:
+            self.raise_login_required()
+
+        req = PUTRequest(
+            'https://api.playplus.tv/api/web/login', json.dumps({
+                'email': email,
+                'password': password,
+            }).encode(), {
+                'Content-Type': 'application/json; charset=utf-8',
+            })
+
+        try:
+            self._token = self._download_json(req, None)['token']
+        except ExtractorError as e:
+            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
+                raise ExtractorError(self._parse_json(
+                    e.cause.read(), None)['errorMessage'], expected=True)
+            raise
+
+        self._profile = self._call_api('Profiles')['list'][0]['_id']
+
+    def _real_extract(self, url):
+        project_id, media_id = re.match(self._VALID_URL, url).groups()
+        media = self._call_api(
+            'Media', media_id, {
+                'profileId': self._profile,
+                'projectId': project_id,
+                'mediaId': media_id,
+            })['obj']
+        title = media['title']
+
+        formats = []
+        for f in media.get('files', []):
+            f_url = f.get('url')
+            if not f_url:
+                continue
+            file_info = f.get('fileInfo') or {}
+            formats.append({
+                'url': f_url,
+                'width': int_or_none(file_info.get('width')),
+                'height': int_or_none(file_info.get('height')),
+            })
+        self._sort_formats(formats)
+
+        thumbnails = []
+        for thumb in media.get('thumbs', []):
+            thumb_url = thumb.get('url')
+            if not thumb_url:
+                continue
+            thumbnails.append({
+                'url': thumb_url,
+                'width': int_or_none(thumb.get('width')),
+                'height': int_or_none(thumb.get('height')),
+            })
+
+        return {
+            'id': media_id,
+            'title': title,
+            'formats': formats,
+            'thumbnails': thumbnails,
+            'description': clean_html(media.get('description')) or media.get('shortDescription'),
+            'timestamp': int_or_none(media.get('publishDate'), 1000),
+            'view_count': int_or_none(media.get('numberOfViews')),
+            'comment_count': int_or_none(media.get('numberOfComments')),
+            'tags': media.get('tags'),
+        }
index 19eaf389f829c2c9b0956e89615119dbab6f0593..e377de19664524844699e4046d3ebe48fcbc0167 100644 (file)
@@ -27,7 +27,7 @@ class PornHubIE(InfoExtractor):
     _VALID_URL = r'''(?x)
                     https?://
                         (?:
-                            (?:[^/]+\.)?pornhub\.com/(?:(?:view_video\.php|video/show)\?viewkey=|embed/)|
+                            (?:[^/]+\.)?(?P<host>pornhub\.(?:com|net))/(?:(?:view_video\.php|video/show)\?viewkey=|embed/)|
                             (?:www\.)?thumbzilla\.com/video/
                         )
                         (?P<id>[\da-z]+)
@@ -121,12 +121,15 @@ class PornHubIE(InfoExtractor):
     }, {
         'url': 'http://www.pornhub.com/video/show?viewkey=648719015',
         'only_matching': True,
+    }, {
+        'url': 'https://www.pornhub.net/view_video.php?viewkey=203640933',
+        'only_matching': True,
     }]
 
     @staticmethod
     def _extract_urls(webpage):
         return re.findall(
-            r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?pornhub\.com/embed/[\da-z]+)',
+            r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?pornhub\.(?:com|net)/embed/[\da-z]+)',
             webpage)
 
     def _extract_count(self, pattern, webpage, name):
@@ -134,14 +137,16 @@ class PornHubIE(InfoExtractor):
             pattern, webpage, '%s count' % name, fatal=False))
 
     def _real_extract(self, url):
-        video_id = self._match_id(url)
+        mobj = re.match(self._VALID_URL, url)
+        host = mobj.group('host') or 'pornhub.com'
+        video_id = mobj.group('id')
 
-        self._set_cookie('pornhub.com', 'age_verified', '1')
+        self._set_cookie(host, 'age_verified', '1')
 
         def dl_webpage(platform):
-            self._set_cookie('pornhub.com', 'platform', platform)
+            self._set_cookie(host, 'platform', platform)
             return self._download_webpage(
-                'http://www.pornhub.com/view_video.php?viewkey=%s' % video_id,
+                'http://www.%s/view_video.php?viewkey=%s' % (host, video_id),
                 video_id, 'Downloading %s webpage' % platform)
 
         webpage = dl_webpage('pc')
@@ -303,7 +308,7 @@ class PornHubIE(InfoExtractor):
 
 
 class PornHubPlaylistBaseIE(InfoExtractor):
-    def _extract_entries(self, webpage):
+    def _extract_entries(self, webpage, host):
         # Only process container div with main playlist content skipping
         # drop-down menu that uses similar pattern for videos (see
         # https://github.com/rg3/youtube-dl/issues/11594).
@@ -313,7 +318,7 @@ class PornHubPlaylistBaseIE(InfoExtractor):
 
         return [
             self.url_result(
-                'http://www.pornhub.com/%s' % video_url,
+                'http://www.%s/%s' % (host, video_url),
                 PornHubIE.ie_key(), video_title=title)
             for video_url, title in orderedSet(re.findall(
                 r'href="/?(view_video\.php\?.*\bviewkey=[\da-z]+[^"]*)"[^>]*\s+title="([^"]+)"',
@@ -321,11 +326,13 @@ class PornHubPlaylistBaseIE(InfoExtractor):
         ]
 
     def _real_extract(self, url):
-        playlist_id = self._match_id(url)
+        mobj = re.match(self._VALID_URL, url)
+        host = mobj.group('host')
+        playlist_id = mobj.group('id')
 
         webpage = self._download_webpage(url, playlist_id)
 
-        entries = self._extract_entries(webpage)
+        entries = self._extract_entries(webpage, host)
 
         playlist = self._parse_json(
             self._search_regex(
@@ -340,7 +347,7 @@ class PornHubPlaylistBaseIE(InfoExtractor):
 
 
 class PornHubPlaylistIE(PornHubPlaylistBaseIE):
-    _VALID_URL = r'https?://(?:[^/]+\.)?pornhub\.com/playlist/(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:[^/]+\.)?(?P<host>pornhub\.(?:com|net))/playlist/(?P<id>\d+)'
     _TESTS = [{
         'url': 'http://www.pornhub.com/playlist/4667351',
         'info_dict': {
@@ -355,7 +362,7 @@ class PornHubPlaylistIE(PornHubPlaylistBaseIE):
 
 
 class PornHubUserVideosIE(PornHubPlaylistBaseIE):
-    _VALID_URL = r'https?://(?:[^/]+\.)?pornhub\.com/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/]+)/videos'
+    _VALID_URL = r'https?://(?:[^/]+\.)?(?P<host>pornhub\.(?:com|net))/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/]+)/videos'
     _TESTS = [{
         'url': 'http://www.pornhub.com/users/zoe_ph/videos/public',
         'info_dict': {
@@ -396,7 +403,9 @@ class PornHubUserVideosIE(PornHubPlaylistBaseIE):
     }]
 
     def _real_extract(self, url):
-        user_id = self._match_id(url)
+        mobj = re.match(self._VALID_URL, url)
+        host = mobj.group('host')
+        user_id = mobj.group('id')
 
         entries = []
         for page_num in itertools.count(1):
@@ -408,7 +417,7 @@ class PornHubUserVideosIE(PornHubPlaylistBaseIE):
                 if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404:
                     break
                 raise
-            page_entries = self._extract_entries(webpage)
+            page_entries = self._extract_entries(webpage, host)
             if not page_entries:
                 break
             entries.extend(page_entries)
index e921ca3e6204a3f4dcd892c3e18b00e6148d4cc8..c3623edcc7adc3fad26d769d79c7088fc98c2593 100644 (file)
@@ -1,38 +1,46 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
+import re
+
 from .common import InfoExtractor
 from .brightcove import BrightcoveLegacyIE
 from ..compat import (
     compat_parse_qs,
     compat_urlparse,
 )
+from ..utils import smuggle_url
 
 
 class RMCDecouverteIE(InfoExtractor):
-    _VALID_URL = r'https?://rmcdecouverte\.bfmtv\.com/mediaplayer-replay.*?\bid=(?P<id>\d+)'
+    _VALID_URL = r'https?://rmcdecouverte\.bfmtv\.com/(?:(?:[^/]+/)*program_(?P<id>\d+)|(?P<live_id>mediaplayer-direct))'
 
-    _TEST = {
-        'url': 'http://rmcdecouverte.bfmtv.com/mediaplayer-replay/?id=13502&title=AQUAMEN:LES%20ROIS%20DES%20AQUARIUMS%20:UN%20DELICIEUX%20PROJET',
+    _TESTS = [{
+        'url': 'https://rmcdecouverte.bfmtv.com/wheeler-dealers-occasions-a-saisir/program_2566/',
         'info_dict': {
-            'id': '5419055995001',
+            'id': '5983675500001',
             'ext': 'mp4',
-            'title': 'UN DELICIEUX PROJET',
-            'description': 'md5:63610df7c8b1fc1698acd4d0d90ba8b5',
+            'title': 'CORVETTE',
+            'description': 'md5:c1e8295521e45ffebf635d6a7658f506',
             'uploader_id': '1969646226001',
-            'upload_date': '20170502',
-            'timestamp': 1493745308,
+            'upload_date': '20181226',
+            'timestamp': 1545861635,
         },
         'params': {
             'skip_download': True,
         },
         'skip': 'only available for a week',
-    }
+    }, {
+        # live, geo restricted, bypassable
+        'url': 'https://rmcdecouverte.bfmtv.com/mediaplayer-direct/',
+        'only_matching': True,
+    }]
     BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1969646226001/default_default/index.html?videoId=%s'
 
     def _real_extract(self, url):
-        video_id = self._match_id(url)
-        webpage = self._download_webpage(url, video_id)
+        mobj = re.match(self._VALID_URL, url)
+        display_id = mobj.group('id') or mobj.group('live_id')
+        webpage = self._download_webpage(url, display_id)
         brightcove_legacy_url = BrightcoveLegacyIE._extract_brightcove_url(webpage)
         if brightcove_legacy_url:
             brightcove_id = compat_parse_qs(compat_urlparse.urlparse(
@@ -41,5 +49,7 @@ class RMCDecouverteIE(InfoExtractor):
             brightcove_id = self._search_regex(
                 r'data-video-id=["\'](\d+)', webpage, 'brightcove id')
         return self.url_result(
-            self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew',
-            brightcove_id)
+            smuggle_url(
+                self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id,
+                {'geo_countries': ['FR']}),
+            'BrightcoveNew', brightcove_id)
index a6fac6c35d00327c2858f9aead301845c4af572a..1fbc72915ea9b45cfcfe4f17e418a5958d44093c 100644 (file)
@@ -8,7 +8,10 @@ from ..compat import compat_HTTPError
 from ..utils import (
     float_or_none,
     parse_iso8601,
+    str_or_none,
+    try_get,
     unescapeHTML,
+    url_or_none,
     ExtractorError,
 )
 
@@ -17,65 +20,87 @@ class RteBaseIE(InfoExtractor):
     def _real_extract(self, url):
         item_id = self._match_id(url)
 
-        try:
-            json_string = self._download_json(
-                'http://www.rte.ie/rteavgen/getplaylist/?type=web&format=json&id=' + item_id,
-                item_id)
-        except ExtractorError as ee:
-            if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404:
-                error_info = self._parse_json(ee.cause.read().decode(), item_id, fatal=False)
-                if error_info:
-                    raise ExtractorError(
-                        '%s said: %s' % (self.IE_NAME, error_info['message']),
-                        expected=True)
-            raise
-
-        # NB the string values in the JSON are stored using XML escaping(!)
-        show = json_string['shows'][0]
-        title = unescapeHTML(show['title'])
-        description = unescapeHTML(show.get('description'))
-        thumbnail = show.get('thumbnail')
-        duration = float_or_none(show.get('duration'), 1000)
-        timestamp = parse_iso8601(show.get('published'))
-
-        mg = show['media:group'][0]
-
+        info_dict = {}
         formats = []
 
-        if mg.get('url'):
-            m = re.match(r'(?P<url>rtmpe?://[^/]+)/(?P<app>.+)/(?P<playpath>mp4:.*)', mg['url'])
-            if m:
-                m = m.groupdict()
-                formats.append({
-                    'url': m['url'] + '/' + m['app'],
-                    'app': m['app'],
-                    'play_path': m['playpath'],
-                    'player_url': url,
-                    'ext': 'flv',
-                    'format_id': 'rtmp',
-                })
-
-        if mg.get('hls_server') and mg.get('hls_url'):
-            formats.extend(self._extract_m3u8_formats(
-                mg['hls_server'] + mg['hls_url'], item_id, 'mp4',
-                entry_protocol='m3u8_native', m3u8_id='hls', fatal=False))
-
-        if mg.get('hds_server') and mg.get('hds_url'):
-            formats.extend(self._extract_f4m_formats(
-                mg['hds_server'] + mg['hds_url'], item_id,
-                f4m_id='hds', fatal=False))
+        ENDPOINTS = (
+            'https://feeds.rasset.ie/rteavgen/player/playlist?type=iptv&format=json&showId=',
+            'http://www.rte.ie/rteavgen/getplaylist/?type=web&format=json&id=',
+        )
+
+        for num, ep_url in enumerate(ENDPOINTS, start=1):
+            try:
+                data = self._download_json(ep_url + item_id, item_id)
+            except ExtractorError as ee:
+                if num < len(ENDPOINTS) or formats:
+                    continue
+                if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404:
+                    error_info = self._parse_json(ee.cause.read().decode(), item_id, fatal=False)
+                    if error_info:
+                        raise ExtractorError(
+                            '%s said: %s' % (self.IE_NAME, error_info['message']),
+                            expected=True)
+                raise
+
+            # NB the string values in the JSON are stored using XML escaping(!)
+            show = try_get(data, lambda x: x['shows'][0], dict)
+            if not show:
+                continue
+
+            if not info_dict:
+                title = unescapeHTML(show['title'])
+                description = unescapeHTML(show.get('description'))
+                thumbnail = show.get('thumbnail')
+                duration = float_or_none(show.get('duration'), 1000)
+                timestamp = parse_iso8601(show.get('published'))
+                info_dict = {
+                    'id': item_id,
+                    'title': title,
+                    'description': description,
+                    'thumbnail': thumbnail,
+                    'timestamp': timestamp,
+                    'duration': duration,
+                }
+
+            mg = try_get(show, lambda x: x['media:group'][0], dict)
+            if not mg:
+                continue
+
+            if mg.get('url'):
+                m = re.match(r'(?P<url>rtmpe?://[^/]+)/(?P<app>.+)/(?P<playpath>mp4:.*)', mg['url'])
+                if m:
+                    m = m.groupdict()
+                    formats.append({
+                        'url': m['url'] + '/' + m['app'],
+                        'app': m['app'],
+                        'play_path': m['playpath'],
+                        'player_url': url,
+                        'ext': 'flv',
+                        'format_id': 'rtmp',
+                    })
+
+            if mg.get('hls_server') and mg.get('hls_url'):
+                formats.extend(self._extract_m3u8_formats(
+                    mg['hls_server'] + mg['hls_url'], item_id, 'mp4',
+                    entry_protocol='m3u8_native', m3u8_id='hls', fatal=False))
+
+            if mg.get('hds_server') and mg.get('hds_url'):
+                formats.extend(self._extract_f4m_formats(
+                    mg['hds_server'] + mg['hds_url'], item_id,
+                    f4m_id='hds', fatal=False))
+
+            mg_rte_server = str_or_none(mg.get('rte:server'))
+            mg_url = str_or_none(mg.get('url'))
+            if mg_rte_server and mg_url:
+                hds_url = url_or_none(mg_rte_server + mg_url)
+                if hds_url:
+                    formats.extend(self._extract_f4m_formats(
+                        hds_url, item_id, f4m_id='hds', fatal=False))
 
         self._sort_formats(formats)
 
-        return {
-            'id': item_id,
-            'title': title,
-            'description': description,
-            'thumbnail': thumbnail,
-            'timestamp': timestamp,
-            'duration': duration,
-            'formats': formats,
-        }
+        info_dict['formats'] = formats
+        return info_dict
 
 
 class RteIE(RteBaseIE):
index 9fa8688f838c902de682ed69e1caedc93ed5f4bc..f530f0083faf5ccc60c1f24a9a0b731157271199 100644 (file)
@@ -65,7 +65,8 @@ class RuutuIE(InfoExtractor):
         video_id = self._match_id(url)
 
         video_xml = self._download_xml(
-            'http://gatling.ruutu.fi/media-xml-cache?id=%s' % video_id, video_id)
+            'https://gatling.nelonenmedia.fi/media-xml-cache', video_id,
+            query={'id': video_id})
 
         formats = []
         processed_urls = []
index 30e2a38b45f0c559b14e18c5e317e438a8d831f4..c0d32a1b9b5a403efdde67bc38c8c29d8e43ebe7 100644 (file)
@@ -15,10 +15,10 @@ from ..utils import (
 
 
 class SafariBaseIE(InfoExtractor):
-    _LOGIN_URL = 'https://www.safaribooksonline.com/accounts/login/'
+    _LOGIN_URL = 'https://learning.oreilly.com/accounts/login/'
     _NETRC_MACHINE = 'safari'
 
-    _API_BASE = 'https://www.safaribooksonline.com/api/v1'
+    _API_BASE = 'https://learning.oreilly.com/api/v1'
     _API_FORMAT = 'json'
 
     LOGGED_IN = False
@@ -76,7 +76,7 @@ class SafariIE(SafariBaseIE):
     IE_DESC = 'safaribooksonline.com online video'
     _VALID_URL = r'''(?x)
                         https?://
-                            (?:www\.)?safaribooksonline\.com/
+                            (?:www\.)?(?:safaribooksonline|learning\.oreilly)\.com/
                             (?:
                                 library/view/[^/]+/(?P<course_id>[^/]+)/(?P<part>[^/?\#&]+)\.html|
                                 videos/[^/]+/[^/]+/(?P<reference_id>[^-]+-[^/?\#&]+)
@@ -104,6 +104,9 @@ class SafariIE(SafariBaseIE):
     }, {
         'url': 'https://www.safaribooksonline.com/videos/python-programming-language/9780134217314/9780134217314-PYMC_13_00',
         'only_matching': True,
+    }, {
+        'url': 'https://learning.oreilly.com/videos/hadoop-fundamentals-livelessons/9780133392838/9780133392838-00_SeriesIntro',
+        'only_matching': True,
     }]
 
     _PARTNER_ID = '1926081'
@@ -160,7 +163,7 @@ class SafariIE(SafariBaseIE):
 
 class SafariApiIE(SafariBaseIE):
     IE_NAME = 'safari:api'
-    _VALID_URL = r'https?://(?:www\.)?safaribooksonline\.com/api/v1/book/(?P<course_id>[^/]+)/chapter(?:-content)?/(?P<part>[^/?#&]+)\.html'
+    _VALID_URL = r'https?://(?:www\.)?(?:safaribooksonline|learning\.oreilly)\.com/api/v1/book/(?P<course_id>[^/]+)/chapter(?:-content)?/(?P<part>[^/?#&]+)\.html'
 
     _TESTS = [{
         'url': 'https://www.safaribooksonline.com/api/v1/book/9780133392838/chapter/part00.html',
@@ -185,7 +188,7 @@ class SafariCourseIE(SafariBaseIE):
     _VALID_URL = r'''(?x)
                     https?://
                         (?:
-                            (?:www\.)?safaribooksonline\.com/
+                            (?:www\.)?(?:safaribooksonline|learning\.oreilly)\.com/
                             (?:
                                 library/view/[^/]+|
                                 api/v1/book|
@@ -213,6 +216,9 @@ class SafariCourseIE(SafariBaseIE):
     }, {
         'url': 'https://www.safaribooksonline.com/videos/python-programming-language/9780134217314',
         'only_matching': True,
+    }, {
+        'url': 'https://learning.oreilly.com/videos/hadoop-fundamentals-livelessons/9780133392838',
+        'only_matching': True,
     }]
 
     @classmethod
index 30f9cf8245856398239e88e5e8454c1df4bd8c3f..21e44b69abaf555abbb1faa9195779e80c44fda9 100644 (file)
@@ -30,8 +30,5 @@ class SaveFromIE(InfoExtractor):
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         video_id = os.path.splitext(url.split('/')[-1])[0]
-        return {
-            '_type': 'url',
-            'id': video_id,
-            'url': mobj.group('url'),
-        }
+
+        return self.url_result(mobj.group('url'), video_id=video_id)
index 4023aeef81e4b4094744b89842c3aab5576a61ab..8b3275735b1638b98c9c64f8360a6498c525a8f7 100644 (file)
@@ -19,7 +19,7 @@ class ScrippsNetworksWatchIE(AWSIE):
     _VALID_URL = r'''(?x)
                     https?://
                         watch\.
-                        (?P<site>hgtv|foodnetwork|travelchannel|diynetwork|cookingchanneltv|geniuskitchen)\.com/
+                        (?P<site>geniuskitchen)\.com/
                         (?:
                             player\.[A-Z0-9]+\.html\#|
                             show/(?:[^/]+/){2}|
@@ -28,38 +28,23 @@ class ScrippsNetworksWatchIE(AWSIE):
                         (?P<id>\d+)
                     '''
     _TESTS = [{
-        'url': 'http://watch.hgtv.com/show/HGTVE/Best-Ever-Treehouses/2241515/Best-Ever-Treehouses/',
-        'md5': '26545fd676d939954c6808274bdb905a',
+        'url': 'http://watch.geniuskitchen.com/player/3787617/Ample-Hills-Ice-Cream-Bike/',
         'info_dict': {
-            'id': '4173834',
+            'id': '4194875',
             'ext': 'mp4',
-            'title': 'Best Ever Treehouses',
-            'description': "We're searching for the most over the top treehouses.",
+            'title': 'Ample Hills Ice Cream Bike',
+            'description': 'Courtney Rada churns up a signature GK Now ice cream with The Scoopmaster.',
             'uploader': 'ANV',
-            'upload_date': '20170922',
-            'timestamp': 1506056400,
+            'upload_date': '20171011',
+            'timestamp': 1507698000,
         },
         'params': {
             'skip_download': True,
         },
         'add_ie': [AnvatoIE.ie_key()],
-    }, {
-        'url': 'http://watch.diynetwork.com/show/DSAL/Salvage-Dawgs/2656646/Covington-Church/',
-        'only_matching': True,
-    }, {
-        'url': 'http://watch.diynetwork.com/player.HNT.html#2656646',
-        'only_matching': True,
-    }, {
-        'url': 'http://watch.geniuskitchen.com/player/3787617/Ample-Hills-Ice-Cream-Bike/',
-        'only_matching': True,
     }]
 
     _SNI_TABLE = {
-        'hgtv': 'hgtv',
-        'diynetwork': 'diy',
-        'foodnetwork': 'food',
-        'cookingchanneltv': 'cook',
-        'travelchannel': 'trav',
         'geniuskitchen': 'genius',
     }
 
index b2250afddd43ee01d9552ddd41a9fb5067c5335e..931a0f70e0e0924689dfb6b71c374e6fa2c14457 100644 (file)
@@ -5,6 +5,7 @@ from ..compat import compat_b64decode
 from ..utils import (
     ExtractorError,
     int_or_none,
+    url_or_none,
     urlencode_postdata,
 )
 
@@ -86,9 +87,16 @@ class VivoIE(SharedBaseIE):
     }
 
     def _extract_video_url(self, webpage, video_id, *args):
+        def decode_url(encoded_url):
+            return compat_b64decode(encoded_url).decode('utf-8')
+
+        stream_url = url_or_none(decode_url(self._search_regex(
+            r'data-stream\s*=\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
+            'stream url', default=None, group='url')))
+        if stream_url:
+            return stream_url
         return self._parse_json(
             self._search_regex(
                 r'InitializeStream\s*\(\s*(["\'])(?P<url>(?:(?!\1).)+)\1',
                 webpage, 'stream', group='url'),
-            video_id,
-            transform_source=lambda x: compat_b64decode(x).decode('utf-8'))[0]
+            video_id, transform_source=decode_url)[0]
index 207ab44771aa76be0ee8581e85cecac3993f86a6..0c4f865ef8af5c6a4c4141e854340a85c254ac50 100644 (file)
@@ -64,7 +64,7 @@ class SixPlayIE(InfoExtractor):
         for asset in clip_data['assets']:
             asset_url = asset.get('full_physical_path')
             protocol = asset.get('protocol')
-            if not asset_url or protocol == 'primetime' or asset_url in urls:
+            if not asset_url or protocol == 'primetime' or asset.get('type') == 'usp_hlsfp_h264' or asset_url in urls:
                 continue
             urls.append(asset_url)
             container = asset.get('video_container')
@@ -81,19 +81,17 @@ class SixPlayIE(InfoExtractor):
                         if not urlh:
                             continue
                         asset_url = urlh.geturl()
-                    asset_url = re.sub(r'/([^/]+)\.ism/[^/]*\.m3u8', r'/\1.ism/\1.m3u8', asset_url)
-                    formats.extend(self._extract_m3u8_formats(
-                        asset_url, video_id, 'mp4', 'm3u8_native',
-                        m3u8_id='hls', fatal=False))
-                    formats.extend(self._extract_f4m_formats(
-                        asset_url.replace('.m3u8', '.f4m'),
-                        video_id, f4m_id='hds', fatal=False))
-                    formats.extend(self._extract_mpd_formats(
-                        asset_url.replace('.m3u8', '.mpd'),
-                        video_id, mpd_id='dash', fatal=False))
-                    formats.extend(self._extract_ism_formats(
-                        re.sub(r'/[^/]+\.m3u8', '/Manifest', asset_url),
-                        video_id, ism_id='mss', fatal=False))
+                    for i in range(3, 0, -1):
+                        asset_url = asset_url = asset_url.replace('_sd1/', '_sd%d/' % i)
+                        m3u8_formats = self._extract_m3u8_formats(
+                            asset_url, video_id, 'mp4', 'm3u8_native',
+                            m3u8_id='hls', fatal=False)
+                        formats.extend(m3u8_formats)
+                        formats.extend(self._extract_mpd_formats(
+                            asset_url.replace('.m3u8', '.mpd'),
+                            video_id, mpd_id='dash', fatal=False))
+                        if m3u8_formats:
+                            break
                 else:
                     formats.extend(self._extract_m3u8_formats(
                         asset_url, video_id, 'mp4', 'm3u8_native',
index 5b4aaac6febdcb82c19eb82777a1f2fb72001eeb..b7f8ac7368724b64d48632ed4e8fedeca0e901f5 100644 (file)
@@ -26,7 +26,7 @@ class SkylineWebcamsIE(InfoExtractor):
         webpage = self._download_webpage(url, video_id)
 
         stream_url = self._search_regex(
-            r'url\s*:\s*(["\'])(?P<url>(?:https?:)?//.+?\.m3u8.*?)\1', webpage,
+            r'(?:url|source)\s*:\s*(["\'])(?P<url>(?:https?:)?//.+?\.m3u8.*?)\1', webpage,
             'stream url', group='url')
 
         title = self._og_search_title(webpage)
index 784f8ed6639d9ef2eb4e73e4c5645d309e6bb500..e8a7c65e0552b2904a48d141c6b950dd36cd44a4 100644 (file)
@@ -16,7 +16,7 @@ from ..utils import (
 
 
 class TBSIE(TurnerBaseIE):
-    _VALID_URL = r'https?://(?:www\.)?(?P<site>tbs|tntdrama)\.com/(?:movies|shows/[^/]+/(?:clips|season-\d+/episode-\d+))/(?P<id>[^/?#]+)'
+    _VALID_URL = r'https?://(?:www\.)?(?P<site>tbs|tntdrama)\.com(?P<path>/(?:movies|shows/[^/]+/(?:clips|season-\d+/episode-\d+))/(?P<id>[^/?#]+))'
     _TESTS = [{
         'url': 'http://www.tntdrama.com/shows/the-alienist/clips/monster',
         'info_dict': {
@@ -40,12 +40,12 @@ class TBSIE(TurnerBaseIE):
     }]
 
     def _real_extract(self, url):
-        site, display_id = re.match(self._VALID_URL, url).groups()
+        site, path, display_id = re.match(self._VALID_URL, url).groups()
         webpage = self._download_webpage(url, display_id)
         drupal_settings = self._parse_json(self._search_regex(
             r'<script[^>]+?data-drupal-selector="drupal-settings-json"[^>]*?>({.+?})</script>',
             webpage, 'drupal setting'), display_id)
-        video_data = drupal_settings['turner_playlist'][0]
+        video_data = next(v for v in drupal_settings['turner_playlist'] if v.get('url') == path)
 
         media_id = video_data['mediaID']
         title = video_data['title']
similarity index 53%
rename from youtube_dl/extractor/upskill.py
rename to youtube_dl/extractor/teachable.py
index 30297b4dd813585e63cb1c9f4d98097ec71e2f1c..47ac95ee8789ff0ba697cc56625e3a93f2cdded8 100644 (file)
@@ -14,20 +14,38 @@ from ..utils import (
 )
 
 
-class UpskillBaseIE(InfoExtractor):
-    _LOGIN_URL = 'http://upskillcourses.com/sign_in'
-    _NETRC_MACHINE = 'upskill'
+class TeachableBaseIE(InfoExtractor):
+    _NETRC_MACHINE = 'teachable'
+    _URL_PREFIX = 'teachable:'
+
+    _SITES = {
+        # Only notable ones here
+        'upskillcourses.com': 'upskill',
+        'academy.gns3.com': 'gns3',
+        'academyhacker.com': 'academyhacker',
+        'stackskills.com': 'stackskills',
+        'market.saleshacker.com': 'saleshacker',
+        'learnability.org': 'learnability',
+        'edurila.com': 'edurila',
+    }
+
+    _VALID_URL_SUB_TUPLE = (_URL_PREFIX, '|'.join(re.escape(site) for site in _SITES.keys()))
 
     def _real_initialize(self):
-        self._login()
+        self._logged_in = False
 
-    def _login(self):
-        username, password = self._get_login_info()
+    def _login(self, site):
+        if self._logged_in:
+            return
+
+        username, password = self._get_login_info(
+            netrc_machine=self._SITES.get(site, site))
         if username is None:
             return
 
         login_page, urlh = self._download_webpage_handle(
-            self._LOGIN_URL, None, 'Downloading login page')
+            'https://%s/sign_in' % site, None,
+            'Downloading %s login page' % site)
 
         login_url = compat_str(urlh.geturl())
 
@@ -46,18 +64,24 @@ class UpskillBaseIE(InfoExtractor):
             post_url = urljoin(login_url, post_url)
 
         response = self._download_webpage(
-            post_url, None, 'Logging in',
+            post_url, None, 'Logging in to %s' % site,
             data=urlencode_postdata(login_form),
             headers={
                 'Content-Type': 'application/x-www-form-urlencoded',
                 'Referer': login_url,
             })
 
+        if '>I accept the new Privacy Policy<' in response:
+            raise ExtractorError(
+                'Unable to login: %s asks you to accept new Privacy Policy. '
+                'Go to https://%s/ and accept.' % (site, site), expected=True)
+
         # Successful login
         if any(re.search(p, response) for p in (
                 r'class=["\']user-signout',
                 r'<a[^>]+\bhref=["\']/sign_out',
                 r'>\s*Log out\s*<')):
+            self._logged_in = True
             return
 
         message = get_element_by_class('alert', response)
@@ -68,8 +92,14 @@ class UpskillBaseIE(InfoExtractor):
         raise ExtractorError('Unable to log in')
 
 
-class UpskillIE(UpskillBaseIE):
-    _VALID_URL = r'https?://(?:www\.)?upskillcourses\.com/courses/[^/]+/lectures/(?P<id>\d+)'
+class TeachableIE(TeachableBaseIE):
+    _VALID_URL = r'''(?x)
+                    (?:
+                        %shttps?://(?P<site_t>[^/]+)|
+                        https?://(?:www\.)?(?P<site>%s)
+                    )
+                    /courses/[^/]+/lectures/(?P<id>\d+)
+                    ''' % TeachableBaseIE._VALID_URL_SUB_TUPLE
 
     _TESTS = [{
         'url': 'http://upskillcourses.com/courses/essential-web-developer-course/lectures/1747100',
@@ -77,7 +107,7 @@ class UpskillIE(UpskillBaseIE):
             'id': 'uzw6zw58or',
             'ext': 'mp4',
             'title': 'Welcome to the Course!',
-            'description': 'md5:8d66c13403783370af62ca97a7357bdd',
+            'description': 'md5:65edb0affa582974de4625b9cdea1107',
             'duration': 138.763,
             'timestamp': 1479846621,
             'upload_date': '20161122',
@@ -88,10 +118,37 @@ class UpskillIE(UpskillBaseIE):
     }, {
         'url': 'http://upskillcourses.com/courses/119763/lectures/1747100',
         'only_matching': True,
+    }, {
+        'url': 'https://academy.gns3.com/courses/423415/lectures/6885939',
+        'only_matching': True,
+    }, {
+        'url': 'teachable:https://upskillcourses.com/courses/essential-web-developer-course/lectures/1747100',
+        'only_matching': True,
     }]
 
+    @staticmethod
+    def _is_teachable(webpage):
+        return 'teachableTracker.linker:autoLink' in webpage and re.search(
+            r'<link[^>]+href=["\']https?://process\.fs\.teachablecdn\.com',
+            webpage)
+
+    @staticmethod
+    def _extract_url(webpage, source_url):
+        if not TeachableIE._is_teachable(webpage):
+            return
+        if re.match(r'https?://[^/]+/(?:courses|p)', source_url):
+            return '%s%s' % (TeachableBaseIE._URL_PREFIX, source_url)
+
     def _real_extract(self, url):
-        video_id = self._match_id(url)
+        mobj = re.match(self._VALID_URL, url)
+        site = mobj.group('site') or mobj.group('site_t')
+        video_id = mobj.group('id')
+
+        self._login(site)
+
+        prefixed = url.startswith(self._URL_PREFIX)
+        if prefixed:
+            url = url[len(self._URL_PREFIX):]
 
         webpage = self._download_webpage(url, video_id)
 
@@ -113,12 +170,18 @@ class UpskillIE(UpskillBaseIE):
         }
 
 
-class UpskillCourseIE(UpskillBaseIE):
-    _VALID_URL = r'https?://(?:www\.)?upskillcourses\.com/courses/(?:enrolled/)?(?P<id>[^/?#&]+)'
+class TeachableCourseIE(TeachableBaseIE):
+    _VALID_URL = r'''(?x)
+                        (?:
+                            %shttps?://(?P<site_t>[^/]+)|
+                            https?://(?:www\.)?(?P<site>%s)
+                        )
+                        /(?:courses|p)/(?:enrolled/)?(?P<id>[^/?#&]+)
+                    ''' % TeachableBaseIE._VALID_URL_SUB_TUPLE
     _TESTS = [{
         'url': 'http://upskillcourses.com/courses/essential-web-developer-course/',
         'info_dict': {
-            'id': '119763',
+            'id': 'essential-web-developer-course',
             'title': 'The Essential Web Developer Course (Free)',
         },
         'playlist_count': 192,
@@ -128,21 +191,37 @@ class UpskillCourseIE(UpskillBaseIE):
     }, {
         'url': 'http://upskillcourses.com/courses/enrolled/119763',
         'only_matching': True,
+    }, {
+        'url': 'https://academy.gns3.com/courses/enrolled/423415',
+        'only_matching': True,
+    }, {
+        'url': 'teachable:https://learn.vrdev.school/p/gear-vr-developer-mini',
+        'only_matching': True,
+    }, {
+        'url': 'teachable:https://filmsimplified.com/p/davinci-resolve-15-crash-course',
+        'only_matching': True,
     }]
 
     @classmethod
     def suitable(cls, url):
-        return False if UpskillIE.suitable(url) else super(
-            UpskillCourseIE, cls).suitable(url)
+        return False if TeachableIE.suitable(url) else super(
+            TeachableCourseIE, cls).suitable(url)
 
     def _real_extract(self, url):
-        course_id = self._match_id(url)
+        mobj = re.match(self._VALID_URL, url)
+        site = mobj.group('site') or mobj.group('site_t')
+        course_id = mobj.group('id')
+
+        self._login(site)
+
+        prefixed = url.startswith(self._URL_PREFIX)
+        if prefixed:
+            prefix = self._URL_PREFIX
+            url = url[len(prefix):]
 
         webpage = self._download_webpage(url, course_id)
 
-        course_id = self._search_regex(
-            r'data-course-id=["\'](\d+)', webpage, 'course id',
-            default=course_id)
+        url_base = 'https://%s/' % site
 
         entries = []
 
@@ -162,10 +241,13 @@ class UpskillCourseIE(UpskillBaseIE):
             title = self._html_search_regex(
                 r'<span[^>]+class=["\']lecture-name[^>]+>([^<]+)', li,
                 'title', default=None)
+            entry_url = urljoin(url_base, lecture_url)
+            if prefixed:
+                entry_url = self._URL_PREFIX + entry_url
             entries.append(
                 self.url_result(
-                    urljoin('http://upskillcourses.com/', lecture_url),
-                    ie=UpskillIE.ie_key(), video_id=lecture_id,
+                    entry_url,
+                    ie=TeachableIE.ie_key(), video_id=lecture_id,
                     video_title=clean_html(title)))
 
         course_title = self._html_search_regex(
index f9b6aa48f03d3b3a8cf49f80573eb9d24d115384..d3e4205f50472d8ea0a485f25a146e97be558aff 100644 (file)
@@ -203,10 +203,8 @@ class TEDIE(InfoExtractor):
                 ext_url = None
                 if service.lower() == 'youtube':
                     ext_url = external.get('code')
-                return {
-                    '_type': 'url',
-                    'url': ext_url or external['uri'],
-                }
+
+                return self.url_result(ext_url or external['uri'])
 
         resources_ = player_talk.get('resources') or talk_info.get('resources')
 
index 46918adb05fc77d45b480b138575afff9d69a086..84a14a0bdead5b96a47784da9c205231292c826b 100644 (file)
@@ -61,8 +61,4 @@ class TestURLIE(InfoExtractor):
 
         self.to_screen('Test URL: %s' % tc['url'])
 
-        return {
-            '_type': 'url',
-            'url': tc['url'],
-            'id': video_id,
-        }
+        return self.url_result(tc['url'], video_id=video_id)
index 18162061578f93d10b0c66c1cc11cae5b76fac6b..90b351cbbdc06ada72ab0ff74f2586804eb3c466 100644 (file)
@@ -343,7 +343,7 @@ class ThePlatformFeedIE(ThePlatformBaseIE):
     def _extract_feed_info(self, provider_id, feed_id, filter_query, video_id, custom_fields=None, asset_types_query={}, account_id=None):
         real_url = self._URL_TEMPLATE % (self.http_scheme(), provider_id, feed_id, filter_query)
         entry = self._download_json(real_url, video_id)['entries'][0]
-        main_smil_url = 'http://link.theplatform.com/s/%s/media/guid/%d/%s' % (provider_id, account_id, entry['guid']) if account_id else None
+        main_smil_url = 'http://link.theplatform.com/s/%s/media/guid/%d/%s' % (provider_id, account_id, entry['guid']) if account_id else entry.get('plmedia$publicUrl')
 
         formats = []
         subtitles = {}
@@ -356,7 +356,8 @@ class ThePlatformFeedIE(ThePlatformBaseIE):
             if first_video_id is None:
                 first_video_id = cur_video_id
                 duration = float_or_none(item.get('plfile$duration'))
-            for asset_type in item['plfile$assetTypes']:
+            file_asset_types = item.get('plfile$assetTypes') or compat_parse_qs(compat_urllib_parse_urlparse(smil_url).query)['assetTypes']
+            for asset_type in file_asset_types:
                 if asset_type in asset_types:
                     continue
                 asset_types.append(asset_type)
diff --git a/youtube_dl/extractor/tiktok.py b/youtube_dl/extractor/tiktok.py
new file mode 100644 (file)
index 0000000..083e9f3
--- /dev/null
@@ -0,0 +1,117 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+    compat_str,
+    ExtractorError,
+    int_or_none,
+    str_or_none,
+    try_get,
+    url_or_none,
+)
+
+
+class TikTokBaseIE(InfoExtractor):
+    def _extract_aweme(self, data):
+        video = data['video']
+        description = str_or_none(try_get(data, lambda x: x['desc']))
+        width = int_or_none(try_get(data, lambda x: video['width']))
+        height = int_or_none(try_get(data, lambda x: video['height']))
+
+        format_urls = set()
+        formats = []
+        for format_id in (
+                'play_addr_lowbr', 'play_addr', 'play_addr_h264',
+                'download_addr'):
+            for format in try_get(
+                    video, lambda x: x[format_id]['url_list'], list) or []:
+                format_url = url_or_none(format)
+                if not format_url:
+                    continue
+                if format_url in format_urls:
+                    continue
+                format_urls.add(format_url)
+                formats.append({
+                    'url': format_url,
+                    'ext': 'mp4',
+                    'height': height,
+                    'width': width,
+                })
+        self._sort_formats(formats)
+
+        thumbnail = url_or_none(try_get(
+            video, lambda x: x['cover']['url_list'][0], compat_str))
+        uploader = try_get(data, lambda x: x['author']['nickname'], compat_str)
+        timestamp = int_or_none(data.get('create_time'))
+        comment_count = int_or_none(data.get('comment_count')) or int_or_none(
+            try_get(data, lambda x: x['statistics']['comment_count']))
+        repost_count = int_or_none(try_get(
+            data, lambda x: x['statistics']['share_count']))
+
+        aweme_id = data['aweme_id']
+
+        return {
+            'id': aweme_id,
+            'title': uploader or aweme_id,
+            'description': description,
+            'thumbnail': thumbnail,
+            'uploader': uploader,
+            'timestamp': timestamp,
+            'comment_count': comment_count,
+            'repost_count': repost_count,
+            'formats': formats,
+        }
+
+
+class TikTokIE(TikTokBaseIE):
+    _VALID_URL = r'https?://(?:m\.)?tiktok\.com/v/(?P<id>\d+)'
+    _TEST = {
+        'url': 'https://m.tiktok.com/v/6606727368545406213.html',
+        'md5': 'd584b572e92fcd48888051f238022420',
+        'info_dict': {
+            'id': '6606727368545406213',
+            'ext': 'mp4',
+            'title': 'Zureeal',
+            'description': '#bowsette#mario#cosplay#uk#lgbt#gaming#asian#bowsettecosplay',
+            'thumbnail': r're:^https?://.*~noop.image',
+            'uploader': 'Zureeal',
+            'timestamp': 1538248586,
+            'upload_date': '20180929',
+            'comment_count': int,
+            'repost_count': int,
+        }
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+        data = self._parse_json(self._search_regex(
+            r'\bdata\s*=\s*({.+?})\s*;', webpage, 'data'), video_id)
+        return self._extract_aweme(data)
+
+
+class TikTokUserIE(TikTokBaseIE):
+    _VALID_URL = r'https?://(?:m\.)?tiktok\.com/h5/share/usr/(?P<id>\d+)'
+    _TEST = {
+        'url': 'https://m.tiktok.com/h5/share/usr/188294915489964032.html',
+        'info_dict': {
+            'id': '188294915489964032',
+        },
+        'playlist_mincount': 24,
+    }
+
+    def _real_extract(self, url):
+        user_id = self._match_id(url)
+        data = self._download_json(
+            'https://m.tiktok.com/h5/share/usr/list/%s/' % user_id, user_id,
+            query={'_signature': '_'})
+        entries = []
+        for aweme in data['aweme_list']:
+            try:
+                entry = self._extract_aweme(aweme)
+            except ExtractorError:
+                continue
+            entry['extractor_key'] = TikTokIE.ie_key()
+            entries.append(entry)
+        return self.playlist_result(entries, user_id)
index 0c2f8f119fbc3d3535d571f430865a071f98e10f..6798ef4c3f32da387e17b2381bf2b1815bfdd119 100644 (file)
@@ -18,8 +18,9 @@ from ..utils import (
 class TNAFlixNetworkBaseIE(InfoExtractor):
     # May be overridden in descendants if necessary
     _CONFIG_REGEX = [
-        r'flashvars\.config\s*=\s*escape\("([^"]+)"',
-        r'<input[^>]+name="config\d?" value="([^"]+)"',
+        r'flashvars\.config\s*=\s*escape\("(?P<url>[^"]+)"',
+        r'<input[^>]+name="config\d?" value="(?P<url>[^"]+)"',
+        r'config\s*=\s*(["\'])(?P<url>(?:https?:)?//(?:(?!\1).)+)\1',
     ]
     _HOST = 'tna'
     _VKEY_SUFFIX = ''
@@ -85,7 +86,8 @@ class TNAFlixNetworkBaseIE(InfoExtractor):
         webpage = self._download_webpage(url, display_id)
 
         cfg_url = self._proto_relative_url(self._html_search_regex(
-            self._CONFIG_REGEX, webpage, 'flashvars.config', default=None), 'http:')
+            self._CONFIG_REGEX, webpage, 'flashvars.config', default=None,
+            group='url'), 'http:')
 
         if not cfg_url:
             inputs = self._hidden_inputs(webpage)
index 60937616f2c57e55cf093cbfe5f1d55987c75b55..3c6a60c39a49f90bdeb083d8fdc36646486c6291 100644 (file)
@@ -10,8 +10,9 @@ from ..utils import (
     int_or_none,
     parse_iso8601,
     parse_duration,
-    try_get,
+    str_or_none,
     update_url_query,
+    urljoin,
 )
 
 
@@ -24,8 +25,7 @@ class TVNowBaseIE(InfoExtractor):
 
     def _call_api(self, path, video_id, query):
         return self._download_json(
-            'https://api.tvnow.de/v3/' + path,
-            video_id, query=query)
+            'https://api.tvnow.de/v3/' + path, video_id, query=query)
 
     def _extract_video(self, info, display_id):
         video_id = compat_str(info['id'])
@@ -108,6 +108,11 @@ class TVNowIE(TVNowBaseIE):
                         (?!(?:list|jahr)(?:/|$))(?P<id>[^/?\#&]+)
                     '''
 
+    @classmethod
+    def suitable(cls, url):
+        return (False if TVNowNewIE.suitable(url) or TVNowSeasonIE.suitable(url) or TVNowAnnualIE.suitable(url) or TVNowShowIE.suitable(url)
+                else super(TVNowIE, cls).suitable(url))
+
     _TESTS = [{
         'url': 'https://www.tvnow.de/rtl2/grip-das-motormagazin/der-neue-porsche-911-gt-3/player',
         'info_dict': {
@@ -116,7 +121,6 @@ class TVNowIE(TVNowBaseIE):
             'ext': 'mp4',
             'title': 'Der neue Porsche 911 GT 3',
             'description': 'md5:6143220c661f9b0aae73b245e5d898bb',
-            'thumbnail': r're:^https?://.*\.jpg$',
             'timestamp': 1495994400,
             'upload_date': '20170528',
             'duration': 5283,
@@ -161,136 +165,314 @@ class TVNowIE(TVNowBaseIE):
         info = self._call_api(
             'movies/' + display_id, display_id, query={
                 'fields': ','.join(self._VIDEO_FIELDS),
-                'station': mobj.group(1),
             })
 
         return self._extract_video(info, display_id)
 
 
-class TVNowListBaseIE(TVNowBaseIE):
-    _SHOW_VALID_URL = r'''(?x)
-                    (?P<base_url>
-                        https?://
-                            (?:www\.)?tvnow\.(?:de|at|ch)/[^/]+/
-                            (?P<show_id>[^/]+)
-                    )
+class TVNowNewIE(InfoExtractor):
+    _VALID_URL = r'''(?x)
+                    (?P<base_url>https?://
+                        (?:www\.)?tvnow\.(?:de|at|ch)/
+                        (?:shows|serien))/
+                        (?P<show>[^/]+)-\d+/
+                        [^/]+/
+                        episode-\d+-(?P<episode>[^/?$&]+)-(?P<id>\d+)
                     '''
 
-    def _extract_list_info(self, display_id, show_id):
-        fields = list(self._SHOW_FIELDS)
-        fields.extend('formatTabs.%s' % field for field in self._SEASON_FIELDS)
-        fields.extend(
-            'formatTabs.formatTabPages.container.movies.%s' % field
-            for field in self._VIDEO_FIELDS)
-        return self._call_api(
-            'formats/seo', display_id, query={
-                'fields': ','.join(fields),
-                'name': show_id + '.php'
-            })
-
-
-class TVNowListIE(TVNowListBaseIE):
-    _VALID_URL = r'%s/(?:list|jahr)/(?P<id>[^?\#&]+)' % TVNowListBaseIE._SHOW_VALID_URL
+    _TESTS = [{
+        'url': 'https://www.tvnow.de/shows/grip-das-motormagazin-1669/2017-05/episode-405-der-neue-porsche-911-gt-3-331082',
+        'only_matching': True,
+    }]
 
-    _SHOW_FIELDS = ('title', )
-    _SEASON_FIELDS = ('id', 'headline', 'seoheadline', )
-    _VIDEO_FIELDS = ('id', 'headline', 'seoUrl', )
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        base_url = re.sub(r'(?:shows|serien)', '_', mobj.group('base_url'))
+        show, episode = mobj.group('show', 'episode')
+        return self.url_result(
+            # Rewrite new URLs to the old format and use extraction via old API
+            # at api.tvnow.de as a loophole for bypassing premium content checks
+            '%s/%s/%s' % (base_url, show, episode),
+            ie=TVNowIE.ie_key(), video_id=mobj.group('id'))
+
+
+class TVNowNewBaseIE(InfoExtractor):
+    def _call_api(self, path, video_id, query={}):
+        result = self._download_json(
+            'https://apigw.tvnow.de/module/' + path, video_id, query=query)
+        error = result.get('error')
+        if error:
+            raise ExtractorError(
+                '%s said: %s' % (self.IE_NAME, error), expected=True)
+        return result
+
+
+"""
+TODO: new apigw.tvnow.de based version of TVNowIE. Replace old TVNowIE with it
+when api.tvnow.de is shut down. This version can't bypass premium checks though.
+class TVNowIE(TVNowNewBaseIE):
+    _VALID_URL = r'''(?x)
+                    https?://
+                        (?:www\.)?tvnow\.(?:de|at|ch)/
+                        (?:shows|serien)/[^/]+/
+                        (?:[^/]+/)+
+                        (?P<display_id>[^/?$&]+)-(?P<id>\d+)
+                    '''
 
     _TESTS = [{
-        'url': 'https://www.tvnow.de/rtl/30-minuten-deutschland/list/aktuell',
+        # episode with annual navigation
+        'url': 'https://www.tvnow.de/shows/grip-das-motormagazin-1669/2017-05/episode-405-der-neue-porsche-911-gt-3-331082',
         'info_dict': {
-            'id': '28296',
-            'title': '30 Minuten Deutschland - Aktuell',
+            'id': '331082',
+            'display_id': 'grip-das-motormagazin/der-neue-porsche-911-gt-3',
+            'ext': 'mp4',
+            'title': 'Der neue Porsche 911 GT 3',
+            'description': 'md5:6143220c661f9b0aae73b245e5d898bb',
+            'thumbnail': r're:^https?://.*\.jpg$',
+            'timestamp': 1495994400,
+            'upload_date': '20170528',
+            'duration': 5283,
+            'series': 'GRIP - Das Motormagazin',
+            'season_number': 14,
+            'episode_number': 405,
+            'episode': 'Der neue Porsche 911 GT 3',
         },
-        'playlist_mincount': 1,
     }, {
-        'url': 'https://www.tvnow.de/vox/ab-ins-beet/list/staffel-14',
+        # rtl2, episode with season navigation
+        'url': 'https://www.tvnow.de/shows/armes-deutschland-11471/staffel-3/episode-14-bernd-steht-seit-der-trennung-von-seiner-frau-allein-da-526124',
         'only_matching': True,
     }, {
-        'url': 'https://www.tvnow.de/rtl2/grip-das-motormagazin/jahr/2018/3',
+        # rtlnitro
+        'url': 'https://www.tvnow.de/serien/alarm-fuer-cobra-11-die-autobahnpolizei-1815/staffel-13/episode-5-auf-eigene-faust-pilot-366822',
+        'only_matching': True,
+    }, {
+        # superrtl
+        'url': 'https://www.tvnow.de/shows/die-lustigsten-schlamassel-der-welt-1221/staffel-2/episode-14-u-a-ketchup-effekt-364120',
+        'only_matching': True,
+    }, {
+        # ntv
+        'url': 'https://www.tvnow.de/shows/startup-news-10674/staffel-2/episode-39-goetter-in-weiss-387630',
+        'only_matching': True,
+    }, {
+        # vox
+        'url': 'https://www.tvnow.de/shows/auto-mobil-174/2017-11/episode-46-neues-vom-automobilmarkt-2017-11-19-17-00-00-380072',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.tvnow.de/shows/grip-das-motormagazin-1669/2017-05/episode-405-der-neue-porsche-911-gt-3-331082',
         'only_matching': True,
     }]
 
-    @classmethod
-    def suitable(cls, url):
-        return (False if TVNowIE.suitable(url)
-                else super(TVNowListIE, cls).suitable(url))
+    def _extract_video(self, info, url, display_id):
+        config = info['config']
+        source = config['source']
 
-    def _real_extract(self, url):
-        base_url, show_id, season_id = re.match(self._VALID_URL, url).groups()
+        video_id = compat_str(info.get('id') or source['videoId'])
+        title = source['title'].strip()
 
-        list_info = self._extract_list_info(season_id, show_id)
+        paths = []
+        for manifest_url in (info.get('manifest') or {}).values():
+            if not manifest_url:
+                continue
+            manifest_url = update_url_query(manifest_url, {'filter': ''})
+            path = self._search_regex(r'https?://[^/]+/(.+?)\.ism/', manifest_url, 'path')
+            if path in paths:
+                continue
+            paths.append(path)
 
-        season = next(
-            season for season in list_info['formatTabs']['items']
-            if season.get('seoheadline') == season_id)
+            def url_repl(proto, suffix):
+                return re.sub(
+                    r'(?:hls|dash|hss)([.-])', proto + r'\1', re.sub(
+                        r'\.ism/(?:[^.]*\.(?:m3u8|mpd)|[Mm]anifest)',
+                        '.ism/' + suffix, manifest_url))
 
-        title = list_info.get('title')
-        headline = season.get('headline')
-        if title and headline:
-            title = '%s - %s' % (title, headline)
+            formats = self._extract_mpd_formats(
+                url_repl('dash', '.mpd'), video_id,
+                mpd_id='dash', fatal=False)
+            formats.extend(self._extract_ism_formats(
+                url_repl('hss', 'Manifest'),
+                video_id, ism_id='mss', fatal=False))
+            formats.extend(self._extract_m3u8_formats(
+                url_repl('hls', '.m3u8'), video_id, 'mp4',
+                'm3u8_native', m3u8_id='hls', fatal=False))
+            if formats:
+                break
         else:
-            title = headline or title
+            if try_get(info, lambda x: x['rights']['isDrm']):
+                raise ExtractorError(
+                    'Video %s is DRM protected' % video_id, expected=True)
+            if try_get(config, lambda x: x['boards']['geoBlocking']['block']):
+                raise self.raise_geo_restricted()
+            if not info.get('free', True):
+                raise ExtractorError(
+                    'Video %s is not available for free' % video_id, expected=True)
+        self._sort_formats(formats)
+
+        description = source.get('description')
+        thumbnail = url_or_none(source.get('poster'))
+        timestamp = unified_timestamp(source.get('previewStart'))
+        duration = parse_duration(source.get('length'))
+
+        series = source.get('format')
+        season_number = int_or_none(self._search_regex(
+            r'staffel-(\d+)', url, 'season number', default=None))
+        episode_number = int_or_none(self._search_regex(
+            r'episode-(\d+)', url, 'episode number', default=None))
+
+        return {
+            'id': video_id,
+            'display_id': display_id,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'timestamp': timestamp,
+            'duration': duration,
+            'series': series,
+            'season_number': season_number,
+            'episode_number': episode_number,
+            'episode': title,
+            'formats': formats,
+        }
+
+    def _real_extract(self, url):
+        display_id, video_id = re.match(self._VALID_URL, url).groups()
+        info = self._call_api('player/' + video_id, video_id)
+        return self._extract_video(info, video_id, display_id)
+"""
+
+
+class TVNowListBaseIE(TVNowNewBaseIE):
+    _SHOW_VALID_URL = r'''(?x)
+                    (?P<base_url>
+                        https?://
+                            (?:www\.)?tvnow\.(?:de|at|ch)/(?:shows|serien)/
+                            [^/?#&]+-(?P<show_id>\d+)
+                    )
+                    '''
+
+    @classmethod
+    def suitable(cls, url):
+        return (False if TVNowNewIE.suitable(url)
+                else super(TVNowListBaseIE, cls).suitable(url))
+
+    def _extract_items(self, url, show_id, list_id, query):
+        items = self._call_api(
+            'teaserrow/format/episode/' + show_id, list_id,
+            query=query)['items']
 
         entries = []
-        for container in season['formatTabPages']['items']:
-            items = try_get(
-                container, lambda x: x['container']['movies']['items'],
-                list) or []
-            for info in items:
-                seo_url = info.get('seoUrl')
-                if not seo_url:
-                    continue
-                video_id = info.get('id')
-                entries.append(self.url_result(
-                    '%s/%s/player' % (base_url, seo_url), TVNowIE.ie_key(),
-                    compat_str(video_id) if video_id else None))
+        for item in items:
+            if not isinstance(item, dict):
+                continue
+            item_url = urljoin(url, item.get('url'))
+            if not item_url:
+                continue
+            video_id = str_or_none(item.get('id') or item.get('videoId'))
+            item_title = item.get('subheadline') or item.get('text')
+            entries.append(self.url_result(
+                item_url, ie=TVNowNewIE.ie_key(), video_id=video_id,
+                video_title=item_title))
 
-        return self.playlist_result(
-            entries, compat_str(season.get('id') or season_id), title)
+        return self.playlist_result(entries, '%s/%s' % (show_id, list_id))
 
 
-class TVNowShowIE(TVNowListBaseIE):
-    _VALID_URL = TVNowListBaseIE._SHOW_VALID_URL
+class TVNowSeasonIE(TVNowListBaseIE):
+    _VALID_URL = r'%s/staffel-(?P<id>\d+)' % TVNowListBaseIE._SHOW_VALID_URL
+    _TESTS = [{
+        'url': 'https://www.tvnow.de/serien/alarm-fuer-cobra-11-die-autobahnpolizei-1815/staffel-13',
+        'info_dict': {
+            'id': '1815/13',
+        },
+        'playlist_mincount': 22,
+    }]
+
+    def _real_extract(self, url):
+        _, show_id, season_id = re.match(self._VALID_URL, url).groups()
+        return self._extract_items(
+            url, show_id, season_id, {'season': season_id})
 
-    _SHOW_FIELDS = ('id', 'title', )
-    _SEASON_FIELDS = ('id', 'headline', 'seoheadline', )
-    _VIDEO_FIELDS = ()
 
+class TVNowAnnualIE(TVNowListBaseIE):
+    _VALID_URL = r'%s/(?P<year>\d{4})-(?P<month>\d{2})' % TVNowListBaseIE._SHOW_VALID_URL
     _TESTS = [{
-        'url': 'https://www.tvnow.at/vox/ab-ins-beet',
+        'url': 'https://www.tvnow.de/shows/grip-das-motormagazin-1669/2017-05',
         'info_dict': {
-            'id': 'ab-ins-beet',
-            'title': 'Ab ins Beet!',
+            'id': '1669/2017-05',
         },
-        'playlist_mincount': 7,
-    }, {
-        'url': 'https://www.tvnow.at/vox/ab-ins-beet/list',
-        'only_matching': True,
+        'playlist_mincount': 2,
+    }]
+
+    def _real_extract(self, url):
+        _, show_id, year, month = re.match(self._VALID_URL, url).groups()
+        return self._extract_items(
+            url, show_id, '%s-%s' % (year, month), {
+                'year': int(year),
+                'month': int(month),
+            })
+
+
+class TVNowShowIE(TVNowListBaseIE):
+    _VALID_URL = TVNowListBaseIE._SHOW_VALID_URL
+    _TESTS = [{
+        # annual navigationType
+        'url': 'https://www.tvnow.de/shows/grip-das-motormagazin-1669',
+        'info_dict': {
+            'id': '1669',
+        },
+        'playlist_mincount': 73,
     }, {
-        'url': 'https://www.tvnow.de/rtl2/grip-das-motormagazin/jahr/',
-        'only_matching': True,
+        # season navigationType
+        'url': 'https://www.tvnow.de/shows/armes-deutschland-11471',
+        'info_dict': {
+            'id': '11471',
+        },
+        'playlist_mincount': 3,
     }]
 
     @classmethod
     def suitable(cls, url):
-        return (False if TVNowIE.suitable(url) or TVNowListIE.suitable(url)
+        return (False if TVNowNewIE.suitable(url) or TVNowSeasonIE.suitable(url) or TVNowAnnualIE.suitable(url)
                 else super(TVNowShowIE, cls).suitable(url))
 
     def _real_extract(self, url):
         base_url, show_id = re.match(self._VALID_URL, url).groups()
 
-        list_info = self._extract_list_info(show_id, show_id)
+        result = self._call_api(
+            'teaserrow/format/navigation/' + show_id, show_id)
+
+        items = result['items']
 
         entries = []
-        for season_info in list_info['formatTabs']['items']:
-            season_url = season_info.get('seoheadline')
-            if not season_url:
-                continue
-            season_id = season_info.get('id')
-            entries.append(self.url_result(
-                '%s/list/%s' % (base_url, season_url), TVNowListIE.ie_key(),
-                compat_str(season_id) if season_id else None,
-                season_info.get('headline')))
+        navigation = result.get('navigationType')
+        if navigation == 'annual':
+            for item in items:
+                if not isinstance(item, dict):
+                    continue
+                year = int_or_none(item.get('year'))
+                if year is None:
+                    continue
+                months = item.get('months')
+                if not isinstance(months, list):
+                    continue
+                for month_dict in months:
+                    if not isinstance(month_dict, dict) or not month_dict:
+                        continue
+                    month_number = int_or_none(list(month_dict.keys())[0])
+                    if month_number is None:
+                        continue
+                    entries.append(self.url_result(
+                        '%s/%04d-%02d' % (base_url, year, month_number),
+                        ie=TVNowAnnualIE.ie_key()))
+        elif navigation == 'season':
+            for item in items:
+                if not isinstance(item, dict):
+                    continue
+                season_number = int_or_none(item.get('season'))
+                if season_number is None:
+                    continue
+                entries.append(self.url_result(
+                    '%s/staffel-%d' % (base_url, season_number),
+                    ie=TVNowSeasonIE.ie_key()))
+        else:
+            raise ExtractorError('Unknown navigationType')
 
-        return self.playlist_result(entries, show_id, list_info.get('title'))
+        return self.playlist_result(entries, show_id)
index de41065d64921af6e861775f85dc5f3011caa524..41d0b6be8c8654bba8db336e1032f95abdc72e05 100644 (file)
@@ -171,7 +171,8 @@ class TwitterCardIE(TwitterBaseIE):
             urls.append('https://twitter.com/i/videos/' + video_id)
 
         for u in urls:
-            webpage = self._download_webpage(u, video_id)
+            webpage = self._download_webpage(
+                u, video_id, headers={'Referer': 'https://twitter.com/'})
 
             iframe_url = self._html_search_regex(
                 r'<iframe[^>]+src="((?:https?:)?//(?:www\.youtube\.com/embed/[^"]+|(?:www\.)?vine\.co/v/\w+/card))"',
index e67083004789f250faf842ee31fc2b343ad54754..08f0c072e28b09dfbbde2f662b52f5de4cf46f3d 100644 (file)
@@ -61,7 +61,7 @@ class UOLIE(InfoExtractor):
             'height': 360,
         },
         '5': {
-            'width': 1080,
+            'width': 1280,
             'height': 720,
         },
         '6': {
@@ -80,6 +80,10 @@ class UOLIE(InfoExtractor):
             'width': 568,
             'height': 320,
         },
+        '11': {
+            'width': 640,
+            'height': 360,
+        }
     }
 
     def _real_extract(self, url):
@@ -111,19 +115,31 @@ class UOLIE(InfoExtractor):
             'ver': video_data.get('numRevision', 2),
             'r': 'http://mais.uol.com.br',
         }
+        for k in ('token', 'sign'):
+            v = video_data.get(k)
+            if v:
+                query[k] = v
+
         formats = []
         for f in video_data.get('formats', []):
             f_url = f.get('url') or f.get('secureUrl')
             if not f_url:
                 continue
+            f_url = update_url_query(f_url, query)
             format_id = str_or_none(f.get('id'))
+            if format_id == '10':
+                formats.extend(self._extract_m3u8_formats(
+                    f_url, video_id, 'mp4', 'm3u8_native',
+                    m3u8_id='hls', fatal=False))
+                continue
             fmt = {
                 'format_id': format_id,
-                'url': update_url_query(f_url, query),
+                'url': f_url,
+                'source_preference': 1,
             }
             fmt.update(self._FORMATS.get(format_id, {}))
             formats.append(fmt)
-        self._sort_formats(formats)
+        self._sort_formats(formats, ('height', 'width', 'source_preference', 'tbr', 'ext'))
 
         tags = []
         for tag in video_data.get('tags', []):
index 88f4d99794e5df73c6383d06b4b5f143a9f76739..5e15f060bc8ce12562f678e7d97a2ff5be480e81 100644 (file)
@@ -14,10 +14,13 @@ from ..compat import (
 from ..utils import (
     determine_ext,
     ExtractorError,
+    js_to_json,
     InAdvancePagedList,
     int_or_none,
     merge_dicts,
     NO_DEFAULT,
+    parse_filesize,
+    qualities,
     RegexNotFoundError,
     sanitized_Request,
     smuggle_url,
@@ -27,7 +30,6 @@ from ..utils import (
     unsmuggle_url,
     urlencode_postdata,
     unescapeHTML,
-    parse_filesize,
 )
 
 
@@ -1063,3 +1065,96 @@ class VimeoLikesIE(InfoExtractor):
             'description': description,
             'entries': pl,
         }
+
+
+class VHXEmbedIE(InfoExtractor):
+    IE_NAME = 'vhx:embed'
+    _VALID_URL = r'https?://embed\.vhx\.tv/videos/(?P<id>\d+)'
+
+    def _call_api(self, video_id, access_token, path='', query=None):
+        return self._download_json(
+            'https://api.vhx.tv/videos/' + video_id + path, video_id, headers={
+                'Authorization': 'Bearer ' + access_token,
+            }, query=query)
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+        credentials = self._parse_json(self._search_regex(
+            r'(?s)credentials\s*:\s*({.+?}),', webpage,
+            'config'), video_id, js_to_json)
+        access_token = credentials['access_token']
+
+        query = {}
+        for k, v in credentials.items():
+            if k in ('authorization', 'authUserToken', 'ticket') and v and v != 'undefined':
+                if k == 'authUserToken':
+                    query['auth_user_token'] = v
+                else:
+                    query[k] = v
+        files = self._call_api(video_id, access_token, '/files', query)
+
+        formats = []
+        for f in files:
+            href = try_get(f, lambda x: x['_links']['source']['href'])
+            if not href:
+                continue
+            method = f.get('method')
+            if method == 'hls':
+                formats.extend(self._extract_m3u8_formats(
+                    href, video_id, 'mp4', 'm3u8_native',
+                    m3u8_id='hls', fatal=False))
+            elif method == 'dash':
+                formats.extend(self._extract_mpd_formats(
+                    href, video_id, mpd_id='dash', fatal=False))
+            else:
+                fmt = {
+                    'filesize': int_or_none(try_get(f, lambda x: x['size']['bytes'])),
+                    'format_id': 'http',
+                    'preference': 1,
+                    'url': href,
+                    'vcodec': f.get('codec'),
+                }
+                quality = f.get('quality')
+                if quality:
+                    fmt.update({
+                        'format_id': 'http-' + quality,
+                        'height': int_or_none(self._search_regex(r'(\d+)p', quality, 'height', default=None)),
+                    })
+                formats.append(fmt)
+        self._sort_formats(formats)
+
+        video_data = self._call_api(video_id, access_token)
+        title = video_data.get('title') or video_data['name']
+
+        subtitles = {}
+        for subtitle in try_get(video_data, lambda x: x['tracks']['subtitles'], list) or []:
+            lang = subtitle.get('srclang') or subtitle.get('label')
+            for _link in subtitle.get('_links', {}).values():
+                href = _link.get('href')
+                if not href:
+                    continue
+                subtitles.setdefault(lang, []).append({
+                    'url': href,
+                })
+
+        q = qualities(['small', 'medium', 'large', 'source'])
+        thumbnails = []
+        for thumbnail_id, thumbnail_url in video_data.get('thumbnail', {}).items():
+            thumbnails.append({
+                'id': thumbnail_id,
+                'url': thumbnail_url,
+                'preference': q(thumbnail_id),
+            })
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': video_data.get('description'),
+            'duration': int_or_none(try_get(video_data, lambda x: x['duration']['seconds'])),
+            'formats': formats,
+            'subtitles': subtitles,
+            'thumbnails': thumbnails,
+            'timestamp': unified_timestamp(video_data.get('created_at')),
+            'view_count': int_or_none(video_data.get('plays_count')),
+        }
index ef8b9bcb7b610c25925ed2725a8bb43d8f3a6d1c..b52d15ac6f0709dce8a60c2f42d690b49a56d391 100644 (file)
@@ -293,8 +293,12 @@ class VKIE(VKBaseIE):
             # This video is no longer available, because its author has been blocked.
             'url': 'https://vk.com/video-10639516_456240611',
             'only_matching': True,
-        }
-    ]
+        },
+        {
+            # The video is not available in your region.
+            'url': 'https://vk.com/video-51812607_171445436',
+            'only_matching': True,
+        }]
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
@@ -354,6 +358,9 @@ class VKIE(VKBaseIE):
 
             r'<!>This video is no longer available, because it has been deleted.':
             'Video %s is no longer available, because it has been deleted.',
+
+            r'<!>The video .+? is not available in your region.':
+            'Video %s is not available in your region.',
         }
 
         for error_re, error_msg in ERRORS.items():
index ac0819c7c10a9bc0db09b76993a4c79c08a02c4b..483a3be3a8c1f1b0c557c308a86bd7d55de82bdd 100644 (file)
@@ -120,8 +120,10 @@ class VRVIE(VRVBaseIE):
             url, video_id,
             headers=self.geo_verification_headers())
         media_resource = self._parse_json(self._search_regex(
-            r'window\.__INITIAL_STATE__\s*=\s*({.+?})</script>',
-            webpage, 'inital state'), video_id).get('watch', {}).get('mediaResource') or {}
+            [
+                r'window\.__INITIAL_STATE__\s*=\s*({.+?})(?:</script>|;)',
+                r'window\.__INITIAL_STATE__\s*=\s*({.+})'
+            ], webpage, 'inital state'), video_id).get('watch', {}).get('mediaResource') or {}
 
         video_data = media_resource.get('json')
         if not video_data:
index 3dab9145ba9c57bfd1d78a90a847761c23f0d8a8..ea234e3c5088f7c7c99e049ab95f1e69542dd38f 100644 (file)
@@ -40,11 +40,7 @@ class WimpIE(InfoExtractor):
              r'data-id=["\']([0-9A-Za-z_-]{11})'),
             webpage, 'video URL', default=None)
         if youtube_id:
-            return {
-                '_type': 'url',
-                'url': youtube_id,
-                'ie_key': YoutubeIE.ie_key(),
-            }
+            return self.url_result(youtube_id, YoutubeIE.ie_key())
 
         info_dict = self._extract_jwplayer_data(
             webpage, video_id, require_title=False)
index 2182d6fd485bf4f1ed6ead7d78451b7c357ddc37..fa142b974ae873a7883ede4894421f4f8a010549 100644 (file)
@@ -12,7 +12,7 @@ from ..utils import (
 
 
 class WistiaIE(InfoExtractor):
-    _VALID_URL = r'(?:wistia:|https?://(?:fast\.)?wistia\.net/embed/iframe/)(?P<id>[a-z0-9]+)'
+    _VALID_URL = r'(?:wistia:|https?://(?:fast\.)?wistia\.(?:net|com)/embed/(?:iframe|medias)/)(?P<id>[a-z0-9]+)'
     _API_URL = 'http://fast.wistia.com/embed/medias/%s.json'
     _IFRAME_URL = 'http://fast.wistia.net/embed/iframe/%s'
 
@@ -35,12 +35,18 @@ class WistiaIE(InfoExtractor):
         # with hls video
         'url': 'wistia:807fafadvk',
         'only_matching': True,
+    }, {
+        'url': 'http://fast.wistia.com/embed/iframe/sh7fpupwlt',
+        'only_matching': True,
+    }, {
+        'url': 'http://fast.wistia.net/embed/medias/sh7fpupwlt.json',
+        'only_matching': True,
     }]
 
     @staticmethod
     def _extract_url(webpage):
         match = re.search(
-            r'<(?:meta[^>]+?content|iframe[^>]+?src)=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage)
+            r'<(?:meta[^>]+?content|iframe[^>]+?src)=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.(?:net|com)/embed/iframe/.+?)\1', webpage)
         if match:
             return unescapeHTML(match.group('url'))
 
diff --git a/youtube_dl/extractor/wwe.py b/youtube_dl/extractor/wwe.py
new file mode 100644 (file)
index 0000000..bebc77b
--- /dev/null
@@ -0,0 +1,140 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+    try_get,
+    unescapeHTML,
+    url_or_none,
+    urljoin,
+)
+
+
+class WWEBaseIE(InfoExtractor):
+    _SUBTITLE_LANGS = {
+        'English': 'en',
+        'Deutsch': 'de',
+    }
+
+    def _extract_entry(self, data, url, video_id=None):
+        video_id = compat_str(video_id or data['nid'])
+        title = data['title']
+
+        formats = self._extract_m3u8_formats(
+            data['file'], video_id, 'mp4', entry_protocol='m3u8_native',
+            m3u8_id='hls')
+
+        description = data.get('description')
+        thumbnail = urljoin(url, data.get('image'))
+        series = data.get('show_name')
+        episode = data.get('episode_name')
+
+        subtitles = {}
+        tracks = data.get('tracks')
+        if isinstance(tracks, list):
+            for track in tracks:
+                if not isinstance(track, dict):
+                    continue
+                if track.get('kind') != 'captions':
+                    continue
+                track_file = url_or_none(track.get('file'))
+                if not track_file:
+                    continue
+                label = track.get('label')
+                lang = self._SUBTITLE_LANGS.get(label, label) or 'en'
+                subtitles.setdefault(lang, []).append({
+                    'url': track_file,
+                })
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'series': series,
+            'episode': episode,
+            'formats': formats,
+            'subtitles': subtitles,
+        }
+
+
+class WWEIE(WWEBaseIE):
+    _VALID_URL = r'https?://(?:[^/]+\.)?wwe\.com/(?:[^/]+/)*videos/(?P<id>[^/?#&]+)'
+    _TESTS = [{
+        'url': 'https://www.wwe.com/videos/daniel-bryan-vs-andrade-cien-almas-smackdown-live-sept-4-2018',
+        'md5': '92811c6a14bfc206f7a6a9c5d9140184',
+        'info_dict': {
+            'id': '40048199',
+            'ext': 'mp4',
+            'title': 'Daniel Bryan vs. Andrade "Cien" Almas: SmackDown LIVE, Sept. 4, 2018',
+            'description': 'md5:2d7424dbc6755c61a0e649d2a8677f67',
+            'thumbnail': r're:^https?://.*\.jpg$',
+        }
+    }, {
+        'url': 'https://de.wwe.com/videos/gran-metalik-vs-tony-nese-wwe-205-live-sept-4-2018',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+        webpage = self._download_webpage(url, display_id)
+
+        landing = self._parse_json(
+            self._html_search_regex(
+                r'(?s)Drupal\.settings\s*,\s*({.+?})\s*\)\s*;',
+                webpage, 'drupal settings'),
+            display_id)['WWEVideoLanding']
+
+        data = landing['initialVideo']['playlist'][0]
+        video_id = landing.get('initialVideoId')
+
+        info = self._extract_entry(data, url, video_id)
+        info['display_id'] = display_id
+        return info
+
+
+class WWEPlaylistIE(WWEBaseIE):
+    _VALID_URL = r'https?://(?:[^/]+\.)?wwe\.com/(?:[^/]+/)*(?P<id>[^/?#&]+)'
+    _TESTS = [{
+        'url': 'https://www.wwe.com/shows/raw/2018-11-12',
+        'info_dict': {
+            'id': '2018-11-12',
+        },
+        'playlist_mincount': 11,
+    }, {
+        'url': 'http://www.wwe.com/article/walk-the-prank-wwe-edition',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.wwe.com/shows/wwenxt/article/matt-riddle-interview',
+        'only_matching': True,
+    }]
+
+    @classmethod
+    def suitable(cls, url):
+        return False if WWEIE.suitable(url) else super(WWEPlaylistIE, cls).suitable(url)
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+        webpage = self._download_webpage(url, display_id)
+
+        entries = []
+        for mobj in re.finditer(
+                r'data-video\s*=\s*(["\'])(?P<data>{.+?})\1', webpage):
+            video = self._parse_json(
+                mobj.group('data'), display_id, transform_source=unescapeHTML,
+                fatal=False)
+            if not video:
+                continue
+            data = try_get(video, lambda x: x['playlist'][0], dict)
+            if not data:
+                continue
+            try:
+                entry = self._extract_entry(data, url)
+            except Exception:
+                continue
+            entry['extractor_key'] = WWEIE.ie_key()
+            entries.append(entry)
+
+        return self.playlist_result(entries, display_id)
index efee95651df0c21d57bca7b0ef5625dcc53d5cac..ec2d913fcadd5cb8e594460a5bf53b2ab5fce562 100644 (file)
@@ -45,7 +45,7 @@ class XVideosIE(InfoExtractor):
         video_id = self._match_id(url)
 
         webpage = self._download_webpage(
-            'http://www.xvideos.com/video%s/' % video_id, video_id)
+            'https://www.xvideos.com/video%s/' % video_id, video_id)
 
         mobj = re.search(r'<h1 class="inlineError">(.+?)</h1>', webpage)
         if mobj:
index ea0bce784c5fbe91d904428670377d3e92414453..d4eccb4b2a48efafec0232a451b3ee617e6bc859 100644 (file)
@@ -68,11 +68,9 @@ class YouPornIE(InfoExtractor):
         request.add_header('Cookie', 'age_verified=1')
         webpage = self._download_webpage(request, display_id)
 
-        title = self._search_regex(
-            [r'(?:video_titles|videoTitle)\s*[:=]\s*(["\'])(?P<title>(?:(?!\1).)+)\1',
-             r'<h1[^>]+class=["\']heading\d?["\'][^>]*>(?P<title>[^<]+)<'],
-            webpage, 'title', group='title',
-            default=None) or self._og_search_title(
+        title = self._html_search_regex(
+            r'(?s)<div[^>]+class=["\']watchVideoTitle[^>]+>(.+?)</div>',
+            webpage, 'title', default=None) or self._og_search_title(
             webpage, default=None) or self._html_search_meta(
             'title', webpage, fatal=True)
 
@@ -134,7 +132,11 @@ class YouPornIE(InfoExtractor):
             formats.append(f)
         self._sort_formats(formats)
 
-        description = self._og_search_description(webpage, default=None)
+        description = self._html_search_regex(
+            r'(?s)<div[^>]+\bid=["\']description["\'][^>]*>(.+?)</div>',
+            webpage, 'description',
+            default=None) or self._og_search_description(
+            webpage, default=None)
         thumbnail = self._search_regex(
             r'(?:imageurl\s*=|poster\s*:)\s*(["\'])(?P<thumbnail>.+?)\1',
             webpage, 'thumbnail', fatal=False, group='thumbnail')
index 6602f7c03a9074f5f81164cc120283e78976241d..c8dc29bd8a32cd7f50de601b94fe4ac77ff608bc 100644 (file)
@@ -14,6 +14,7 @@ class YourPornIE(InfoExtractor):
             'ext': 'mp4',
             'title': 'md5:c9f43630bd968267672651ba905a7d35',
             'thumbnail': r're:^https?://.*\.jpg$',
+            'age_limit': 18
         },
     }
 
@@ -26,7 +27,7 @@ class YourPornIE(InfoExtractor):
             self._search_regex(
                 r'data-vnfo=(["\'])(?P<data>{.+?})\1', webpage, 'data info',
                 group='data'),
-            video_id)[video_id])
+            video_id)[video_id]).replace('/cdn/', '/cdn3/')
 
         title = (self._search_regex(
             r'<[^>]+\bclass=["\']PostEditTA[^>]+>([^<]+)', webpage, 'title',
@@ -38,4 +39,5 @@ class YourPornIE(InfoExtractor):
             'url': video_url,
             'title': title,
             'thumbnail': thumbnail,
+            'age_limit': 18
         }
index 3f49f3889e6bcde1bd0f641f9f421caf8d560b1c..730935657981ca23ffe981b4e4afce2bbca22078 100644 (file)
@@ -48,6 +48,7 @@ from ..utils import (
     unified_strdate,
     unsmuggle_url,
     uppercase_escape,
+    url_or_none,
     urlencode_postdata,
 )
 
@@ -497,7 +498,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
                 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
                 'upload_date': '20121002',
-                'license': 'Standard YouTube License',
                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
                 'categories': ['Science & Technology'],
                 'tags': ['youtube-dl'],
@@ -526,7 +526,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'uploader': 'Icona Pop',
                 'uploader_id': 'IconaPop',
                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IconaPop',
-                'license': 'Standard YouTube License',
                 'creator': 'Icona Pop',
                 'track': 'I Love It (feat. Charli XCX)',
                 'artist': 'Icona Pop',
@@ -539,14 +538,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'id': '07FYdnEawAQ',
                 'ext': 'mp4',
                 'upload_date': '20130703',
-                'title': 'Justin Timberlake - Tunnel Vision (Explicit)',
+                'title': 'Justin Timberlake - Tunnel Vision (Official Music Video) (Explicit)',
                 'alt_title': 'Tunnel Vision',
-                'description': 'md5:64249768eec3bc4276236606ea996373',
+                'description': 'md5:07dab3356cde4199048e4c7cd93471e1',
                 'duration': 419,
                 'uploader': 'justintimberlakeVEVO',
                 'uploader_id': 'justintimberlakeVEVO',
                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/justintimberlakeVEVO',
-                'license': 'Standard YouTube License',
                 'creator': 'Justin Timberlake',
                 'track': 'Tunnel Vision',
                 'artist': 'Justin Timberlake',
@@ -565,7 +563,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'uploader': 'SET India',
                 'uploader_id': 'setindia',
                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
-                'license': 'Standard YouTube License',
                 'age_limit': 18,
             }
         },
@@ -580,7 +577,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'uploader_id': 'phihag',
                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
                 'upload_date': '20121002',
-                'license': 'Standard YouTube License',
                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
                 'categories': ['Science & Technology'],
                 'tags': ['youtube-dl'],
@@ -604,7 +600,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
                 'description': '',
                 'uploader': '8KVIDEO',
-                'license': 'Standard YouTube License',
                 'title': 'UHDTV TEST 8K VIDEO.mp4'
             },
             'params': {
@@ -619,13 +614,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             'info_dict': {
                 'id': 'IB3lcPjvWLA',
                 'ext': 'm4a',
-                'title': 'Afrojack, Spree Wilson - The Spark ft. Spree Wilson',
-                'description': 'md5:1900ed86ee514927b9e00fbead6969a5',
+                'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
+                'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
                 'duration': 244,
                 'uploader': 'AfrojackVEVO',
                 'uploader_id': 'AfrojackVEVO',
                 'upload_date': '20131011',
-                'license': 'Standard YouTube License',
             },
             'params': {
                 'youtube_include_dash_manifest': True,
@@ -639,13 +633,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'id': 'nfWlot6h_JM',
                 'ext': 'm4a',
                 'title': 'Taylor Swift - Shake It Off',
-                'alt_title': 'Shake It Off',
-                'description': 'md5:95f66187cd7c8b2c13eb78e1223b63c3',
+                'description': 'md5:bec2185232c05479482cb5a9b82719bf',
                 'duration': 242,
                 'uploader': 'TaylorSwiftVEVO',
                 'uploader_id': 'TaylorSwiftVEVO',
                 'upload_date': '20140818',
-                'license': 'Standard YouTube License',
                 'creator': 'Taylor Swift',
             },
             'params': {
@@ -661,10 +653,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'ext': 'mp4',
                 'duration': 219,
                 'upload_date': '20100909',
-                'uploader': 'TJ Kirk',
+                'uploader': 'Amazing Atheist',
                 'uploader_id': 'TheAmazingAtheist',
                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
-                'license': 'Standard YouTube License',
                 'title': 'Burning Everyone\'s Koran',
                 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
             }
@@ -682,7 +673,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'uploader_id': 'WitcherGame',
                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
                 'upload_date': '20140605',
-                'license': 'Standard YouTube License',
                 'age_limit': 18,
             },
         },
@@ -691,7 +681,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             'url': 'https://www.youtube.com/watch?v=6kLq3WMV1nU',
             'info_dict': {
                 'id': '6kLq3WMV1nU',
-                'ext': 'webm',
+                'ext': 'mp4',
                 'title': 'Dedication To My Ex (Miss That) (Lyric Video)',
                 'description': 'md5:33765bb339e1b47e7e72b5490139bb41',
                 'duration': 246,
@@ -699,7 +689,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'uploader_id': 'LloydVEVO',
                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/LloydVEVO',
                 'upload_date': '20110629',
-                'license': 'Standard YouTube License',
                 'age_limit': 18,
             },
         },
@@ -717,7 +706,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'creator': 'deadmau5',
                 'description': 'md5:12c56784b8032162bb936a5f76d55360',
                 'uploader': 'deadmau5',
-                'license': 'Standard YouTube License',
                 'title': 'Deadmau5 - Some Chords (HD)',
                 'alt_title': 'Some Chords',
             },
@@ -735,7 +723,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'upload_date': '20150827',
                 'uploader_id': 'olympic',
                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
-                'license': 'Standard YouTube License',
                 'description': 'HO09  - Women -  GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
                 'uploader': 'Olympic',
                 'title': 'Hockey - Women -  GER-AUS - London 2012 Olympic Games',
@@ -757,7 +744,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
                 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
                 'uploader': '孫ᄋᄅ',
-                'license': 'Standard YouTube License',
                 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
             },
         },
@@ -791,7 +777,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'uploader_id': 'dorappi2000',
                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
                 'uploader': 'dorappi2000',
-                'license': 'Standard YouTube License',
                 'formats': 'mincount:31',
             },
             'skip': 'not actual anymore',
@@ -807,7 +792,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'uploader': 'Airtek',
                 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
                 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
-                'license': 'Standard YouTube License',
                 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
             },
             'params': {
@@ -880,6 +864,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             'params': {
                 'skip_download': True,
             },
+            'skip': 'This video is not available.',
         },
         {
             # Multifeed video with comma in title (see https://github.com/rg3/youtube-dl/issues/8536)
@@ -916,7 +901,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'uploader_id': 'IronSoulElf',
                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
                 'uploader': 'IronSoulElf',
-                'license': 'Standard YouTube License',
                 'creator': 'Todd Haberman,  Daniel Law Heath and Aaron Kaplan',
                 'track': 'Dark Walk - Position Music',
                 'artist': 'Todd Haberman,  Daniel Law Heath and Aaron Kaplan',
@@ -1020,13 +1004,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'id': 'iqKdEhx-dD4',
                 'ext': 'mp4',
                 'title': 'Isolation - Mind Field (Ep 1)',
-                'description': 'md5:25b78d2f64ae81719f5c96319889b736',
+                'description': 'md5:46a29be4ceffa65b92d277b93f463c0f',
                 'duration': 2085,
                 'upload_date': '20170118',
                 'uploader': 'Vsauce',
                 'uploader_id': 'Vsauce',
                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
-                'license': 'Standard YouTube License',
                 'series': 'Mind Field',
                 'season_number': 1,
                 'episode_number': 1,
@@ -1052,7 +1035,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'uploader': 'New Century Foundation',
                 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
-                'license': 'Standard YouTube License',
             },
             'params': {
                 'skip_download': True,
@@ -1076,6 +1058,31 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             'url': 'https://invidio.us/watch?v=BaW_jenozKc',
             'only_matching': True,
         },
+        {
+            # DRM protected
+            'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
+            'only_matching': True,
+        },
+        {
+            # Video with unsupported adaptive stream type formats
+            'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
+            'info_dict': {
+                'id': 'Z4Vy8R84T1U',
+                'ext': 'mp4',
+                'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
+                'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
+                'duration': 433,
+                'upload_date': '20130923',
+                'uploader': 'Amelia Putri Harwita',
+                'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
+                'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
+                'formats': 'maxcount:10',
+            },
+            'params': {
+                'skip_download': True,
+                'youtube_include_dash_manifest': False,
+            },
+        }
     ]
 
     def __init__(self, *args, **kwargs):
@@ -1104,7 +1111,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
 
     def _extract_signature_function(self, video_id, player_url, example_sig):
         id_m = re.match(
-            r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?|(?:/[a-z]{2}_[A-Z]{2})?/base)?\.(?P<ext>[a-z]+)$',
+            r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?|(?:/[a-z]{2,3}_[A-Z]{2})?/base)?\.(?P<ext>[a-z]+)$',
             player_url)
         if not id_m:
             raise ExtractorError('Cannot identify player %r' % player_url)
@@ -1386,8 +1393,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             self._downloader.report_warning(err_msg)
             return {}
 
-    def _mark_watched(self, video_id, video_info):
-        playback_url = video_info.get('videostats_playback_base_url', [None])[0]
+    def _mark_watched(self, video_id, video_info, player_response):
+        playback_url = url_or_none(try_get(
+            player_response,
+            lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']) or try_get(
+            video_info, lambda x: x['videostats_playback_base_url'][0]))
         if not playback_url:
             return
         parsed_playback_url = compat_urlparse.urlparse(playback_url)
@@ -1536,6 +1546,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             if dash_mpd and dash_mpd[0] not in dash_mpds:
                 dash_mpds.append(dash_mpd[0])
 
+        def add_dash_mpd_pr(pl_response):
+            dash_mpd = url_or_none(try_get(
+                pl_response, lambda x: x['streamingData']['dashManifestUrl'],
+                compat_str))
+            if dash_mpd and dash_mpd not in dash_mpds:
+                dash_mpds.append(dash_mpd)
+
         is_live = None
         view_count = None
 
@@ -1593,6 +1610,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                         if isinstance(pl_response, dict):
                             player_response = pl_response
             if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
+                add_dash_mpd_pr(player_response)
                 # We also try looking in get_video_info since it may contain different dashmpd
                 # URL that points to a DASH manifest with possibly different itag set (some itags
                 # are missing from DASH manifest pointed by webpage's dashmpd, some - from DASH
@@ -1624,6 +1642,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                         pl_response = get_video_info.get('player_response', [None])[0]
                         if isinstance(pl_response, dict):
                             player_response = pl_response
+                            add_dash_mpd_pr(player_response)
                     add_dash_mpd(get_video_info)
                     if view_count is None:
                         view_count = extract_view_count(get_video_info)
@@ -1669,6 +1688,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                     '"token" parameter not in video info for unknown reason',
                     video_id=video_id)
 
+        if video_info.get('license_info'):
+            raise ExtractorError('This video is DRM protected.', expected=True)
+
         video_details = try_get(
             player_response, lambda x: x['videoDetails'], dict) or {}
 
@@ -1712,30 +1734,36 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             else:
                 video_description = ''
 
-        if 'multifeed_metadata_list' in video_info and not smuggled_data.get('force_singlefeed', False):
+        if not smuggled_data.get('force_singlefeed', False):
             if not self._downloader.params.get('noplaylist'):
-                entries = []
-                feed_ids = []
-                multifeed_metadata_list = video_info['multifeed_metadata_list'][0]
-                for feed in multifeed_metadata_list.split(','):
-                    # Unquote should take place before split on comma (,) since textual
-                    # fields may contain comma as well (see
-                    # https://github.com/rg3/youtube-dl/issues/8536)
-                    feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))
-                    entries.append({
-                        '_type': 'url_transparent',
-                        'ie_key': 'Youtube',
-                        'url': smuggle_url(
-                            '%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
-                            {'force_singlefeed': True}),
-                        'title': '%s (%s)' % (video_title, feed_data['title'][0]),
-                    })
-                    feed_ids.append(feed_data['id'][0])
-                self.to_screen(
-                    'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
-                    % (', '.join(feed_ids), video_id))
-                return self.playlist_result(entries, video_id, video_title, video_description)
-            self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
+                multifeed_metadata_list = try_get(
+                    player_response,
+                    lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
+                    compat_str) or try_get(
+                    video_info, lambda x: x['multifeed_metadata_list'][0], compat_str)
+                if multifeed_metadata_list:
+                    entries = []
+                    feed_ids = []
+                    for feed in multifeed_metadata_list.split(','):
+                        # Unquote should take place before split on comma (,) since textual
+                        # fields may contain comma as well (see
+                        # https://github.com/rg3/youtube-dl/issues/8536)
+                        feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))
+                        entries.append({
+                            '_type': 'url_transparent',
+                            'ie_key': 'Youtube',
+                            'url': smuggle_url(
+                                '%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
+                                {'force_singlefeed': True}),
+                            'title': '%s (%s)' % (video_title, feed_data['title'][0]),
+                        })
+                        feed_ids.append(feed_data['id'][0])
+                    self.to_screen(
+                        'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
+                        % (', '.join(feed_ids), video_id))
+                    return self.playlist_result(entries, video_id, video_title, video_description)
+            else:
+                self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
 
         if view_count is None:
             view_count = extract_view_count(video_info)
@@ -1776,11 +1804,34 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                                 'height': int_or_none(width_height[1]),
                             }
             q = qualities(['small', 'medium', 'hd720'])
+            streaming_formats = try_get(player_response, lambda x: x['streamingData']['formats'], list)
+            if streaming_formats:
+                for fmt in streaming_formats:
+                    itag = str_or_none(fmt.get('itag'))
+                    if not itag:
+                        continue
+                    quality = fmt.get('quality')
+                    quality_label = fmt.get('qualityLabel') or quality
+                    formats_spec[itag] = {
+                        'asr': int_or_none(fmt.get('audioSampleRate')),
+                        'filesize': int_or_none(fmt.get('contentLength')),
+                        'format_note': quality_label,
+                        'fps': int_or_none(fmt.get('fps')),
+                        'height': int_or_none(fmt.get('height')),
+                        'quality': q(quality),
+                        # bitrate for itag 43 is always 2147483647
+                        'tbr': float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000) if itag != '43' else None,
+                        'width': int_or_none(fmt.get('width')),
+                    }
             formats = []
             for url_data_str in encoded_url_map.split(','):
                 url_data = compat_parse_qs(url_data_str)
                 if 'itag' not in url_data or 'url' not in url_data:
                     continue
+                stream_type = int_or_none(try_get(url_data, lambda x: x['stream_type'][0]))
+                # Unsupported FORMAT_STREAM_TYPE_OTF
+                if stream_type == 3:
+                    continue
                 format_id = url_data['itag'][0]
                 url = url_data['url'][0]
 
@@ -1824,7 +1875,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                             else:
                                 player_version = self._search_regex(
                                     [r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js',
-                                     r'(?:www|player)-([^/]+)(?:/[a-z]{2}_[A-Z]{2})?/base\.js'],
+                                     r'(?:www|player(?:_ias)?)-([^/]+)(?:/[a-z]{2,3}_[A-Z]{2})?/base\.js'],
                                     player_url,
                                     'html5 player', fatal=False)
                                 player_desc = 'html5 player %s' % player_version
@@ -1858,7 +1909,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 filesize = int_or_none(url_data.get(
                     'clen', [None])[0]) or _extract_filesize(url)
 
-                quality = url_data.get('quality_label', [None])[0] or url_data.get('quality', [None])[0]
+                quality = url_data.get('quality', [None])[0]
 
                 more_fields = {
                     'filesize': filesize,
@@ -1866,7 +1917,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                     'width': width,
                     'height': height,
                     'fps': int_or_none(url_data.get('fps', [None])[0]),
-                    'format_note': quality,
+                    'format_note': url_data.get('quality_label', [None])[0] or quality,
                     'quality': q(quality),
                 }
                 for key, value in more_fields.items():
@@ -1894,31 +1945,38 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                         'http_chunk_size': 10485760,
                     }
                 formats.append(dct)
-        elif video_info.get('hlsvp'):
-            manifest_url = video_info['hlsvp'][0]
-            formats = []
-            m3u8_formats = self._extract_m3u8_formats(
-                manifest_url, video_id, 'mp4', fatal=False)
-            for a_format in m3u8_formats:
-                itag = self._search_regex(
-                    r'/itag/(\d+)/', a_format['url'], 'itag', default=None)
-                if itag:
-                    a_format['format_id'] = itag
-                    if itag in self._formats:
-                        dct = self._formats[itag].copy()
-                        dct.update(a_format)
-                        a_format = dct
-                a_format['player_url'] = player_url
-                # Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming
-                a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
-                formats.append(a_format)
         else:
-            error_message = clean_html(video_info.get('reason', [None])[0])
-            if not error_message:
-                error_message = extract_unavailable_message()
-            if error_message:
-                raise ExtractorError(error_message, expected=True)
-            raise ExtractorError('no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
+            manifest_url = (
+                url_or_none(try_get(
+                    player_response,
+                    lambda x: x['streamingData']['hlsManifestUrl'],
+                    compat_str)) or
+                url_or_none(try_get(
+                    video_info, lambda x: x['hlsvp'][0], compat_str)))
+            if manifest_url:
+                formats = []
+                m3u8_formats = self._extract_m3u8_formats(
+                    manifest_url, video_id, 'mp4', fatal=False)
+                for a_format in m3u8_formats:
+                    itag = self._search_regex(
+                        r'/itag/(\d+)/', a_format['url'], 'itag', default=None)
+                    if itag:
+                        a_format['format_id'] = itag
+                        if itag in self._formats:
+                            dct = self._formats[itag].copy()
+                            dct.update(a_format)
+                            a_format = dct
+                    a_format['player_url'] = player_url
+                    # Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming
+                    a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
+                    formats.append(a_format)
+            else:
+                error_message = clean_html(video_info.get('reason', [None])[0])
+                if not error_message:
+                    error_message = extract_unavailable_message()
+                if error_message:
+                    raise ExtractorError(error_message, expected=True)
+                raise ExtractorError('no conn, hlsvp, hlsManifestUrl or url_encoded_fmt_stream_map information found in video info')
 
         # uploader
         video_uploader = try_get(
@@ -2006,7 +2064,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',
             video_webpage)
         if m_episode:
-            series = m_episode.group('series')
+            series = unescapeHTML(m_episode.group('series'))
             season_number = int(m_episode.group('season'))
             episode_number = int(m_episode.group('episode'))
         else:
@@ -2116,7 +2174,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
 
         self._sort_formats(formats)
 
-        self.mark_watched(video_id, video_info)
+        self.mark_watched(video_id, video_info, player_response)
 
         return {
             'id': video_id,
diff --git a/youtube_dl/extractor/zype.py b/youtube_dl/extractor/zype.py
new file mode 100644 (file)
index 0000000..3b16e70
--- /dev/null
@@ -0,0 +1,57 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+
+class ZypeIE(InfoExtractor):
+    _VALID_URL = r'https?://player\.zype\.com/embed/(?P<id>[\da-fA-F]+)\.js\?.*?api_key=[^&]+'
+    _TEST = {
+        'url': 'https://player.zype.com/embed/5b400b834b32992a310622b9.js?api_key=jZ9GUhRmxcPvX7M3SlfejB6Hle9jyHTdk2jVxG7wOHPLODgncEKVdPYBhuz9iWXQ&autoplay=false&controls=true&da=false',
+        'md5': 'eaee31d474c76a955bdaba02a505c595',
+        'info_dict': {
+            'id': '5b400b834b32992a310622b9',
+            'ext': 'mp4',
+            'title': 'Smoky Barbecue Favorites',
+            'thumbnail': r're:^https?://.*\.jpe?g',
+        },
+    }
+
+    @staticmethod
+    def _extract_urls(webpage):
+        return [
+            mobj.group('url')
+            for mobj in re.finditer(
+                r'<script[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//player\.zype\.com/embed/[\da-fA-F]+\.js\?.*?api_key=.+?)\1',
+                webpage)]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id)
+
+        title = self._search_regex(
+            r'video_title\s*[:=]\s*(["\'])(?P<value>(?:(?!\1).)+)\1', webpage,
+            'title', group='value')
+
+        m3u8_url = self._search_regex(
+            r'(["\'])(?P<url>(?:(?!\1).)+\.m3u8(?:(?!\1).)*)\1', webpage,
+            'm3u8 url', group='url')
+
+        formats = self._extract_m3u8_formats(
+            m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
+            m3u8_id='hls')
+        self._sort_formats(formats)
+
+        thumbnail = self._search_regex(
+            r'poster\s*[:=]\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage, 'thumbnail',
+            default=False, group='url')
+
+        return {
+            'id': video_id,
+            'title': title,
+            'thumbnail': thumbnail,
+            'formats': formats,
+        }
index 757b496a1c723176c04e60fb43962b0270b6cf8e..39a905380c2634509c9ca2e88558e41caf0dd899 100644 (file)
@@ -79,6 +79,20 @@ class FFmpegPostProcessor(PostProcessor):
         programs = ['avprobe', 'avconv', 'ffmpeg', 'ffprobe']
         prefer_ffmpeg = True
 
+        def get_ffmpeg_version(path):
+            ver = get_exe_version(path, args=['-version'])
+            if ver:
+                regexs = [
+                    r'(?:\d+:)?([0-9.]+)-[0-9]+ubuntu[0-9.]+$',  # Ubuntu, see [1]
+                    r'n([0-9.]+)$',  # Arch Linux
+                    # 1. http://www.ducea.com/2006/06/17/ubuntu-package-version-naming-explanation/
+                ]
+                for regex in regexs:
+                    mobj = re.match(regex, ver)
+                    if mobj:
+                        ver = mobj.group(1)
+            return ver
+
         self.basename = None
         self.probe_basename = None
 
@@ -110,11 +124,10 @@ class FFmpegPostProcessor(PostProcessor):
                 self._paths = dict(
                     (p, os.path.join(location, p)) for p in programs)
                 self._versions = dict(
-                    (p, get_exe_version(self._paths[p], args=['-version']))
-                    for p in programs)
+                    (p, get_ffmpeg_version(self._paths[p])) for p in programs)
         if self._versions is None:
             self._versions = dict(
-                (p, get_exe_version(p, args=['-version'])) for p in programs)
+                (p, get_ffmpeg_version(p)) for p in programs)
             self._paths = dict((p, p) for p in programs)
 
         if prefer_ffmpeg is False:
@@ -384,9 +397,8 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
             opts += ['-c:s', 'mov_text']
         for (i, lang) in enumerate(sub_langs):
             opts.extend(['-map', '%d:0' % (i + 1)])
-            lang_code = ISO639Utils.short2long(lang)
-            if lang_code is not None:
-                opts.extend(['-metadata:s:s:%d' % i, 'language=%s' % lang_code])
+            lang_code = ISO639Utils.short2long(lang) or lang
+            opts.extend(['-metadata:s:s:%d' % i, 'language=%s' % lang_code])
 
         temp_filename = prepend_extension(filename, 'temp')
         self._downloader.to_screen('[ffmpeg] Embedding subtitles in \'%s\'' % filename)
index e84d35d4dee2077faf89bbecb0083ca01e6273c4..d2d3c1a9fde82510f47b8fcd43c726b51bedf9d6 100644 (file)
@@ -39,6 +39,7 @@ from .compat import (
     compat_HTMLParser,
     compat_basestring,
     compat_chr,
+    compat_cookiejar,
     compat_ctypes_WINFUNCTYPE,
     compat_etree_fromstring,
     compat_expanduser,
@@ -1139,6 +1140,33 @@ class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
             req, **kwargs)
 
 
+class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
+    def save(self, filename=None, ignore_discard=False, ignore_expires=False):
+        # Store session cookies with `expires` set to 0 instead of an empty
+        # string
+        for cookie in self:
+            if cookie.expires is None:
+                cookie.expires = 0
+        compat_cookiejar.MozillaCookieJar.save(self, filename, ignore_discard, ignore_expires)
+
+    def load(self, filename=None, ignore_discard=False, ignore_expires=False):
+        compat_cookiejar.MozillaCookieJar.load(self, filename, ignore_discard, ignore_expires)
+        # Session cookies are denoted by either `expires` field set to
+        # an empty string or 0. MozillaCookieJar only recognizes the former
+        # (see [1]). So we need force the latter to be recognized as session
+        # cookies on our own.
+        # Session cookies may be important for cookies-based authentication,
+        # e.g. usually, when user does not check 'Remember me' check box while
+        # logging in on a site, some important cookies are stored as session
+        # cookies so that not recognizing them will result in failed login.
+        # 1. https://bugs.python.org/issue17164
+        for cookie in self:
+            # Treat `expires=0` cookies as session cookies
+            if cookie.expires == 0:
+                cookie.expires = None
+                cookie.discard = True
+
+
 class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
     def __init__(self, cookiejar=None):
         compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
@@ -2940,6 +2968,7 @@ class ISO639Utils(object):
         'gv': 'glv',
         'ha': 'hau',
         'he': 'heb',
+        'iw': 'heb',  # Replaced by he in 1989 revision
         'hi': 'hin',
         'ho': 'hmo',
         'hr': 'hrv',
@@ -2949,6 +2978,7 @@ class ISO639Utils(object):
         'hz': 'her',
         'ia': 'ina',
         'id': 'ind',
+        'in': 'ind',  # Replaced by id in 1989 revision
         'ie': 'ile',
         'ig': 'ibo',
         'ii': 'iii',
@@ -3063,6 +3093,7 @@ class ISO639Utils(object):
         'wo': 'wol',
         'xh': 'xho',
         'yi': 'yid',
+        'ji': 'yid',  # Replaced by yi in 1989 revision
         'yo': 'yor',
         'za': 'zha',
         'zh': 'zho',
@@ -3948,8 +3979,12 @@ def write_xattr(path, key, value):
 
 
 def random_birthday(year_field, month_field, day_field):
+    start_date = datetime.date(1950, 1, 1)
+    end_date = datetime.date(1995, 12, 31)
+    offset = random.randint(0, (end_date - start_date).days)
+    random_date = start_date + datetime.timedelta(offset)
     return {
-        year_field: str(random.randint(1950, 1995)),
-        month_field: str(random.randint(1, 12)),
-        day_field: str(random.randint(1, 31)),
+        year_field: str(random_date.year),
+        month_field: str(random_date.month),
+        day_field: str(random_date.day),
     }
index 7f32ad36c4350bd1e137cb464c9b0dfb32f85303..c13f3a38ad01e8f7355e886e696b2fba361e2a69 100644 (file)
@@ -1,3 +1,3 @@
 from __future__ import unicode_literals
 
-__version__ = '2018.11.07'
+__version__ = '2019.01.16'