Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/curiositystream.py

   1 # coding: utf-8
   2 from __future__ import unicode_literals
   3
   4 import re
   5
   6 from .common import InfoExtractor
   7 from ..utils import (
   8     int_or_none,
   9     urlencode_postdata,
  10     compat_str,
  11     ExtractorError,
  12 )
  13
  14
  15 class CuriosityStreamBaseIE(InfoExtractor):
  16     _NETRC_MACHINE = 'curiositystream'
  17     _auth_token = None
  18     _API_BASE_URL = 'https://api.curiositystream.com/v1/'
  19
  20     def _handle_errors(self, result):
  21         error = result.get('error', {}).get('message')
  22         if error:
  23             if isinstance(error, dict):
  24                 error = ', '.join(error.values())
  25             raise ExtractorError(
  26                 '%s said: %s' % (self.IE_NAME, error), expected=True)
  27
  28     def _call_api(self, path, video_id):
  29         headers = {}
  30         if self._auth_token:
  31             headers['X-Auth-Token'] = self._auth_token
  32         result = self._download_json(
  33             self._API_BASE_URL + path, video_id, headers=headers)
  34         self._handle_errors(result)
  35         return result['data']
  36
  37     def _real_initialize(self):
  38         email, password = self._get_login_info()
  39         if email is None:
  40             return
  41         result = self._download_json(
  42             self._API_BASE_URL + 'login', None, data=urlencode_postdata({
  43                 'email': email,
  44                 'password': password,
  45             }))
  46         self._handle_errors(result)
  47         self._auth_token = result['message']['auth_token']
  48
  49
  50 class CuriosityStreamIE(CuriosityStreamBaseIE):
  51     IE_NAME = 'curiositystream'
  52     _VALID_URL = r'https?://(?:app\.)?curiositystream\.com/video/(?P<id>\d+)'
  53     _TEST = {
  54         'url': 'https://app.curiositystream.com/video/2',
  55         'md5': '262bb2f257ff301115f1973540de8983',
  56         'info_dict': {
  57             'id': '2',
  58             'ext': 'mp4',
  59             'title': 'How Did You Develop The Internet?',
  60             'description': 'Vint Cerf, Google\'s Chief Internet Evangelist, describes how he and Bob Kahn created the internet.',
  61         }
  62     }
  63
  64     def _real_extract(self, url):
  65         video_id = self._match_id(url)
  66         media = self._call_api('media/' + video_id, video_id)
  67         title = media['title']
  68
  69         formats = []
  70         for encoding in media.get('encodings', []):
  71             m3u8_url = encoding.get('master_playlist_url')
  72             if m3u8_url:
  73                 formats.extend(self._extract_m3u8_formats(
  74                     m3u8_url, video_id, 'mp4', 'm3u8_native',
  75                     m3u8_id='hls', fatal=False))
  76             encoding_url = encoding.get('url')
  77             file_url = encoding.get('file_url')
  78             if not encoding_url and not file_url:
  79                 continue
  80             f = {
  81                 'width': int_or_none(encoding.get('width')),
  82                 'height': int_or_none(encoding.get('height')),
  83                 'vbr': int_or_none(encoding.get('video_bitrate')),
  84                 'abr': int_or_none(encoding.get('audio_bitrate')),
  85                 'filesize': int_or_none(encoding.get('size_in_bytes')),
  86                 'vcodec': encoding.get('video_codec'),
  87                 'acodec': encoding.get('audio_codec'),
  88                 'container': encoding.get('container_type'),
  89             }
  90             for f_url in (encoding_url, file_url):
  91                 if not f_url:
  92                     continue
  93                 fmt = f.copy()
  94                 rtmp = re.search(r'^(?P<url>rtmpe?://(?P<host>[^/]+)/(?P<app>.+))/(?P<playpath>mp[34]:.+)$', f_url)
  95                 if rtmp:
  96                     fmt.update({
  97                         'url': rtmp.group('url'),
  98                         'play_path': rtmp.group('playpath'),
  99                         'app': rtmp.group('app'),
 100                         'ext': 'flv',
 101                         'format_id': 'rtmp',
 102                     })
 103                 else:
 104                     fmt.update({
 105                         'url': f_url,
 106                         'format_id': 'http',
 107                     })
 108                 formats.append(fmt)
 109         self._sort_formats(formats)
 110
 111         subtitles = {}
 112         for closed_caption in media.get('closed_captions', []):
 113             sub_url = closed_caption.get('file')
 114             if not sub_url:
 115                 continue
 116             lang = closed_caption.get('code') or closed_caption.get('language') or 'en'
 117             subtitles.setdefault(lang, []).append({
 118                 'url': sub_url,
 119             })
 120
 121         return {
 122             'id': video_id,
 123             'formats': formats,
 124             'title': title,
 125             'description': media.get('description'),
 126             'thumbnail': media.get('image_large') or media.get('image_medium') or media.get('image_small'),
 127             'duration': int_or_none(media.get('duration')),
 128             'tags': media.get('tags'),
 129             'subtitles': subtitles,
 130         }
 131
 132
 133 class CuriosityStreamCollectionIE(CuriosityStreamBaseIE):
 134     IE_NAME = 'curiositystream:collection'
 135     _VALID_URL = r'https?://(?:app\.)?curiositystream\.com/(?:collection|series)/(?P<id>\d+)'
 136     _TESTS = [{
 137         'url': 'https://app.curiositystream.com/collection/2',
 138         'info_dict': {
 139             'id': '2',
 140             'title': 'Curious Minds: The Internet',
 141             'description': 'How is the internet shaping our lives in the 21st Century?',
 142         },
 143         'playlist_mincount': 17,
 144     }, {
 145         'url': 'https://curiositystream.com/series/2',
 146         'only_matching': True,
 147     }]
 148
 149     def _real_extract(self, url):
 150         collection_id = self._match_id(url)
 151         collection = self._call_api(
 152             'collections/' + collection_id, collection_id)
 153         entries = []
 154         for media in collection.get('media', []):
 155             media_id = compat_str(media.get('id'))
 156             entries.append(self.url_result(
 157                 'https://curiositystream.com/video/' + media_id,
 158                 CuriosityStreamIE.ie_key(), media_id))
 159         return self.playlist_result(
 160             entries, collection_id,
 161             collection.get('title'), collection.get('description'))