Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/noco.py

   1 # encoding: utf-8
   2 from __future__ import unicode_literals
   3
   4 import re
   5 import time
   6 import hashlib
   7
   8 from .common import InfoExtractor
   9 from ..utils import (
  10     compat_urllib_request,
  11     compat_urllib_parse,
  12     ExtractorError,
  13     clean_html,
  14     unified_strdate,
  15     compat_str,
  16 )
  17
  18
  19 class NocoIE(InfoExtractor):
  20     _VALID_URL = r'http://(?:(?:www\.)?noco\.tv/emission/|player\.noco\.tv/\?idvideo=)(?P<id>\d+)'
  21     _LOGIN_URL = 'http://noco.tv/do.php'
  22     _API_URL_TEMPLATE = 'https://api.noco.tv/1.1/%s?ts=%s&tk=%s'
  23     _SUB_LANG_TEMPLATE = '&sub_lang=%s'
  24     _NETRC_MACHINE = 'noco'
  25
  26     _TEST = {
  27         'url': 'http://noco.tv/emission/11538/nolife/ami-ami-idol-hello-france/',
  28         'md5': '0a993f0058ddbcd902630b2047ef710e',
  29         'info_dict': {
  30             'id': '11538',
  31             'ext': 'mp4',
  32             'title': 'Ami Ami Idol - Hello! France',
  33             'description': 'md5:4eaab46ab68fa4197a317a88a53d3b86',
  34             'upload_date': '20140412',
  35             'uploader': 'Nolife',
  36             'uploader_id': 'NOL',
  37             'duration': 2851.2,
  38         },
  39         'skip': 'Requires noco account',
  40     }
  41
  42     def _real_initialize(self):
  43         self._login()
  44
  45     def _login(self):
  46         (username, password) = self._get_login_info()
  47         if username is None:
  48             return
  49
  50         login_form = {
  51             'a': 'login',
  52             'cookie': '1',
  53             'username': username,
  54             'password': password,
  55         }
  56         request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(login_form))
  57         request.add_header('Content-Type', 'application/x-www-form-urlencoded; charset=UTF-8')
  58
  59         login = self._download_json(request, None, 'Logging in as %s' % username)
  60
  61         if 'erreur' in login:
  62             raise ExtractorError('Unable to login: %s' % clean_html(login['erreur']), expected=True)
  63
  64     def _call_api(self, path, video_id, note, sub_lang=None):
  65         ts = compat_str(int(time.time() * 1000))
  66         tk = hashlib.md5((hashlib.md5(ts.encode('ascii')).hexdigest() + '#8S?uCraTedap6a').encode('ascii')).hexdigest()
  67         url = self._API_URL_TEMPLATE % (path, ts, tk)
  68         if sub_lang:
  69             url += self._SUB_LANG_TEMPLATE % sub_lang
  70
  71         resp = self._download_json(url, video_id, note)
  72
  73         if isinstance(resp, dict) and resp.get('error'):
  74             self._raise_error(resp['error'], resp['description'])
  75
  76         return resp
  77
  78     def _raise_error(self, error, description):
  79         raise ExtractorError(
  80             '%s returned error: %s - %s' % (self.IE_NAME, error, description),
  81             expected=True)
  82
  83     def _real_extract(self, url):
  84         mobj = re.match(self._VALID_URL, url)
  85         video_id = mobj.group('id')
  86
  87         medias = self._call_api(
  88             'shows/%s/medias' % video_id,
  89             video_id, 'Downloading video JSON')
  90
  91         qualities = self._call_api(
  92             'qualities',
  93             video_id, 'Downloading qualities JSON')
  94
  95         formats = []
  96
  97         for lang, lang_dict in medias['fr']['video_list'].items():
  98             for format_id, fmt in lang_dict['quality_list'].items():
  99                 format_id_extended = '%s-%s' % (lang, format_id) if lang != 'none' else format_id
 100
 101                 video = self._call_api(
 102                     'shows/%s/video/%s/fr' % (video_id, format_id.lower()),
 103                     video_id, 'Downloading %s video JSON' % format_id_extended,
 104                     lang if lang != 'none' else None)
 105
 106                 file_url = video['file']
 107                 if not file_url:
 108                     continue
 109
 110                 if file_url in ['forbidden', 'not found']:
 111                     popmessage = video['popmessage']
 112                     self._raise_error(popmessage['title'], popmessage['message'])
 113
 114                 formats.append({
 115                     'url': file_url,
 116                     'format_id': format_id_extended,
 117                     'width': fmt['res_width'],
 118                     'height': fmt['res_lines'],
 119                     'abr': fmt['audiobitrate'],
 120                     'vbr': fmt['videobitrate'],
 121                     'filesize': fmt['filesize'],
 122                     'format_note': qualities[format_id]['quality_name'],
 123                     'preference': qualities[format_id]['priority'],
 124                 })
 125
 126         self._sort_formats(formats)
 127
 128         show = self._call_api(
 129             'shows/by_id/%s' % video_id,
 130             video_id, 'Downloading show JSON')[0]
 131
 132         upload_date = unified_strdate(show['online_date_start_utc'])
 133         uploader = show['partner_name']
 134         uploader_id = show['partner_key']
 135         duration = show['duration_ms'] / 1000.0
 136
 137         thumbnails = []
 138         for thumbnail_key, thumbnail_url in show.items():
 139             m = re.search(r'^screenshot_(?P<width>\d+)x(?P<height>\d+)$', thumbnail_key)
 140             if not m:
 141                 continue
 142             thumbnails.append({
 143                 'url': thumbnail_url,
 144                 'width': int(m.group('width')),
 145                 'height': int(m.group('height')),
 146             })
 147
 148         episode = show.get('show_TT') or show.get('show_OT')
 149         family = show.get('family_TT') or show.get('family_OT')
 150         episode_number = show.get('episode_number')
 151
 152         title = ''
 153         if family:
 154             title += family
 155         if episode_number:
 156             title += ' #' + compat_str(episode_number)
 157         if episode:
 158             title += ' - ' + episode
 159
 160         description = show.get('show_resume') or show.get('family_resume')
 161
 162         return {
 163             'id': video_id,
 164             'title': title,
 165             'description': description,
 166             'thumbnails': thumbnails,
 167             'upload_date': upload_date,
 168             'uploader': uploader,
 169             'uploader_id': uploader_id,
 170             'duration': duration,
 171             'formats': formats,
 172         }