Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/khanacademy.py

   1 from __future__ import unicode_literals
   2
   3 import re
   4
   5 from .common import InfoExtractor
   6 from ..utils import (
   7     unified_strdate,
   8 )
   9
  10
  11 class KhanAcademyIE(InfoExtractor):
  12     _VALID_URL = r'^https?://(?:(?:www|api)\.)?khanacademy\.org/(?P<key>[^/]+)/(?:[^/]+/){,2}(?P<id>[^?#/]+)(?:$|[?#])'
  13     IE_NAME = 'KhanAcademy'
  14
  15     _TESTS = [{
  16         'url': 'http://www.khanacademy.org/video/one-time-pad',
  17         'md5': '7021db7f2d47d4fff89b13177cb1e8f4',
  18         'info_dict': {
  19             'id': 'one-time-pad',
  20             'ext': 'mp4',
  21             'title': 'The one-time pad',
  22             'description': 'The perfect cipher',
  23             'duration': 176,
  24             'uploader': 'Brit Cruise',
  25             'upload_date': '20120411',
  26         }
  27     }, {
  28         'url': 'https://www.khanacademy.org/math/applied-math/cryptography',
  29         'info_dict': {
  30             'id': 'cryptography',
  31             'title': 'Journey into cryptography',
  32             'description': 'How have humans protected their secret messages through history? What has changed today?',
  33         },
  34         'playlist_mincount': 3,
  35     }]
  36
  37     def _real_extract(self, url):
  38         m = re.match(self._VALID_URL, url)
  39         video_id = m.group('id')
  40
  41         if m.group('key') == 'video':
  42             data = self._download_json(
  43                 'http://api.khanacademy.org/api/v1/videos/' + video_id,
  44                 video_id, 'Downloading video info')
  45
  46             upload_date = unified_strdate(data['date_added'])
  47             uploader = ', '.join(data['author_names'])
  48             return {
  49                 '_type': 'url_transparent',
  50                 'url': data['url'],
  51                 'id': video_id,
  52                 'title': data['title'],
  53                 'thumbnail': data['image_url'],
  54                 'duration': data['duration'],
  55                 'description': data['description'],
  56                 'uploader': uploader,
  57                 'upload_date': upload_date,
  58             }
  59         else:
  60             # topic
  61             data = self._download_json(
  62                 'http://api.khanacademy.org/api/v1/topic/' + video_id,
  63                 video_id, 'Downloading topic info')
  64
  65             entries = [
  66                 {
  67                     '_type': 'url',
  68                     'url': c['url'],
  69                     'id': c['id'],
  70                     'title': c['title'],
  71                 }
  72                 for c in data['children'] if c['kind'] in ('Video', 'Topic')]
  73
  74             return {
  75                 '_type': 'playlist',
  76                 'id': video_id,
  77                 'title': data['title'],
  78                 'description': data['description'],
  79                 'entries': entries,
  80             }