Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/noco.py

   1 # encoding: utf-8
   2 from __future__ import unicode_literals
   3
   4 import re
   5 import time
   6 import hashlib
   7
   8 from .common import InfoExtractor
   9 from ..compat import (
  10     compat_str,
  11     compat_urllib_parse,
  12     compat_urllib_request,
  13 )
  14 from ..utils import (
  15     clean_html,
  16     ExtractorError,
  17     unified_strdate,
  18 )
  19
  20
  21 class NocoIE(InfoExtractor):
  22     _VALID_URL = r'http://(?:(?:www\.)?noco\.tv/emission/|player\.noco\.tv/\?idvideo=)(?P<id>\d+)'
  23     _LOGIN_URL = 'http://noco.tv/do.php'
  24     _API_URL_TEMPLATE = 'https://api.noco.tv/1.1/%s?ts=%s&tk=%s'
  25     _SUB_LANG_TEMPLATE = '&sub_lang=%s'
  26     _NETRC_MACHINE = 'noco'
  27
  28     _TEST = {
  29         'url': 'http://noco.tv/emission/11538/nolife/ami-ami-idol-hello-france/',
  30         'md5': '0a993f0058ddbcd902630b2047ef710e',
  31         'info_dict': {
  32             'id': '11538',
  33             'ext': 'mp4',
  34             'title': 'Ami Ami Idol - Hello! France',
  35             'description': 'md5:4eaab46ab68fa4197a317a88a53d3b86',
  36             'upload_date': '20140412',
  37             'uploader': 'Nolife',
  38             'uploader_id': 'NOL',
  39             'duration': 2851.2,
  40         },
  41         'skip': 'Requires noco account',
  42     }
  43
  44     def _real_initialize(self):
  45         self._login()
  46
  47     def _login(self):
  48         (username, password) = self._get_login_info()
  49         if username is None:
  50             return
  51
  52         login_form = {
  53             'a': 'login',
  54             'cookie': '1',
  55             'username': username,
  56             'password': password,
  57         }
  58         request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(login_form))
  59         request.add_header('Content-Type', 'application/x-www-form-urlencoded; charset=UTF-8')
  60
  61         login = self._download_json(request, None, 'Logging in as %s' % username)
  62
  63         if 'erreur' in login:
  64             raise ExtractorError('Unable to login: %s' % clean_html(login['erreur']), expected=True)
  65
  66     def _call_api(self, path, video_id, note, sub_lang=None):
  67         ts = compat_str(int(time.time() * 1000))
  68         tk = hashlib.md5((hashlib.md5(ts.encode('ascii')).hexdigest() + '#8S?uCraTedap6a').encode('ascii')).hexdigest()
  69         url = self._API_URL_TEMPLATE % (path, ts, tk)
  70         if sub_lang:
  71             url += self._SUB_LANG_TEMPLATE % sub_lang
  72
  73         resp = self._download_json(url, video_id, note)
  74
  75         if isinstance(resp, dict) and resp.get('error'):
  76             self._raise_error(resp['error'], resp['description'])
  77
  78         return resp
  79
  80     def _raise_error(self, error, description):
  81         raise ExtractorError(
  82             '%s returned error: %s - %s' % (self.IE_NAME, error, description),
  83             expected=True)
  84
  85     def _real_extract(self, url):
  86         mobj = re.match(self._VALID_URL, url)
  87         video_id = mobj.group('id')
  88
  89         medias = self._call_api(
  90             'shows/%s/medias' % video_id,
  91             video_id, 'Downloading video JSON')
  92
  93         qualities = self._call_api(
  94             'qualities',
  95             video_id, 'Downloading qualities JSON')
  96
  97         formats = []
  98
  99         for lang, lang_dict in medias['fr']['video_list'].items():
 100             for format_id, fmt in lang_dict['quality_list'].items():
 101                 format_id_extended = '%s-%s' % (lang, format_id) if lang != 'none' else format_id
 102
 103                 video = self._call_api(
 104                     'shows/%s/video/%s/fr' % (video_id, format_id.lower()),
 105                     video_id, 'Downloading %s video JSON' % format_id_extended,
 106                     lang if lang != 'none' else None)
 107
 108                 file_url = video['file']
 109                 if not file_url:
 110                     continue
 111
 112                 if file_url in ['forbidden', 'not found']:
 113                     popmessage = video['popmessage']
 114                     self._raise_error(popmessage['title'], popmessage['message'])
 115
 116                 formats.append({
 117                     'url': file_url,
 118                     'format_id': format_id_extended,
 119                     'width': fmt['res_width'],
 120                     'height': fmt['res_lines'],
 121                     'abr': fmt['audiobitrate'],
 122                     'vbr': fmt['videobitrate'],
 123                     'filesize': fmt['filesize'],
 124                     'format_note': qualities[format_id]['quality_name'],
 125                     'preference': qualities[format_id]['priority'],
 126                 })
 127
 128         self._sort_formats(formats)
 129
 130         show = self._call_api(
 131             'shows/by_id/%s' % video_id,
 132             video_id, 'Downloading show JSON')[0]
 133
 134         upload_date = unified_strdate(show['online_date_start_utc'])
 135         uploader = show['partner_name']
 136         uploader_id = show['partner_key']
 137         duration = show['duration_ms'] / 1000.0
 138
 139         thumbnails = []
 140         for thumbnail_key, thumbnail_url in show.items():
 141             m = re.search(r'^screenshot_(?P<width>\d+)x(?P<height>\d+)$', thumbnail_key)
 142             if not m:
 143                 continue
 144             thumbnails.append({
 145                 'url': thumbnail_url,
 146                 'width': int(m.group('width')),
 147                 'height': int(m.group('height')),
 148             })
 149
 150         episode = show.get('show_TT') or show.get('show_OT')
 151         family = show.get('family_TT') or show.get('family_OT')
 152         episode_number = show.get('episode_number')
 153
 154         title = ''
 155         if family:
 156             title += family
 157         if episode_number:
 158             title += ' #' + compat_str(episode_number)
 159         if episode:
 160             title += ' - ' + episode
 161
 162         description = show.get('show_resume') or show.get('family_resume')
 163
 164         return {
 165             'id': video_id,
 166             'title': title,
 167             'description': description,
 168             'thumbnails': thumbnails,
 169             'upload_date': upload_date,
 170             'uploader': uploader,
 171             'uploader_id': uploader_id,
 172             'duration': duration,
 173             'formats': formats,
 174         }