Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/hungama.py

   1 # coding: utf-8
   2 from __future__ import unicode_literals
   3
   4 from .common import InfoExtractor
   5 from ..utils import (
   6     int_or_none,
   7     urlencode_postdata,
   8 )
   9
  10
  11 class HungamaIE(InfoExtractor):
  12     _VALID_URL = r'''(?x)
  13                     https?://
  14                         (?:www\.)?hungama\.com/
  15                         (?:
  16                             (?:video|movie)/[^/]+/|
  17                             tv-show/(?:[^/]+/){2}\d+/episode/[^/]+/
  18                         )
  19                         (?P<id>\d+)
  20                     '''
  21     _TESTS = [{
  22         'url': 'http://www.hungama.com/video/krishna-chants/39349649/',
  23         'md5': 'a845a6d1ebd08d80c1035126d49bd6a0',
  24         'info_dict': {
  25             'id': '2931166',
  26             'ext': 'mp4',
  27             'title': 'Lucky Ali - Kitni Haseen Zindagi',
  28             'track': 'Kitni Haseen Zindagi',
  29             'artist': 'Lucky Ali',
  30             'album': 'Aks',
  31             'release_year': 2000,
  32         }
  33     }, {
  34         'url': 'https://www.hungama.com/movie/kahaani-2/44129919/',
  35         'only_matching': True,
  36     }, {
  37         'url': 'https://www.hungama.com/tv-show/padded-ki-pushup/season-1/44139461/episode/ep-02-training-sasu-pathlaag-karing/44139503/',
  38         'only_matching': True,
  39     }]
  40
  41     def _real_extract(self, url):
  42         video_id = self._match_id(url)
  43
  44         webpage = self._download_webpage(url, video_id)
  45
  46         info = self._search_json_ld(webpage, video_id)
  47
  48         m3u8_url = self._download_json(
  49             'https://www.hungama.com/index.php', video_id,
  50             data=urlencode_postdata({'content_id': video_id}), headers={
  51                 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
  52                 'X-Requested-With': 'XMLHttpRequest',
  53             }, query={
  54                 'c': 'common',
  55                 'm': 'get_video_mdn_url',
  56             })['stream_url']
  57
  58         formats = self._extract_m3u8_formats(
  59             m3u8_url, video_id, ext='mp4', entry_protocol='m3u8_native',
  60             m3u8_id='hls')
  61         self._sort_formats(formats)
  62
  63         info.update({
  64             'id': video_id,
  65             'formats': formats,
  66         })
  67         return info
  68
  69
  70 class HungamaSongIE(InfoExtractor):
  71     _VALID_URL = r'https?://(?:www\.)?hungama\.com/song/[^/]+/(?P<id>\d+)'
  72     _TEST = {
  73         'url': 'https://www.hungama.com/song/kitni-haseen-zindagi/2931166/',
  74         'md5': 'a845a6d1ebd08d80c1035126d49bd6a0',
  75         'info_dict': {
  76             'id': '2931166',
  77             'ext': 'mp4',
  78             'title': 'Lucky Ali - Kitni Haseen Zindagi',
  79             'track': 'Kitni Haseen Zindagi',
  80             'artist': 'Lucky Ali',
  81             'album': 'Aks',
  82             'release_year': 2000,
  83         }
  84     }
  85
  86     def _real_extract(self, url):
  87         audio_id = self._match_id(url)
  88
  89         data = self._download_json(
  90             'https://www.hungama.com/audio-player-data/track/%s' % audio_id,
  91             audio_id, query={'_country': 'IN'})[0]
  92
  93         track = data['song_name']
  94         artist = data.get('singer_name')
  95
  96         m3u8_url = self._download_json(
  97             data.get('file') or data['preview_link'],
  98             audio_id)['response']['media_url']
  99
 100         formats = self._extract_m3u8_formats(
 101             m3u8_url, audio_id, ext='mp4', entry_protocol='m3u8_native',
 102             m3u8_id='hls')
 103         self._sort_formats(formats)
 104
 105         title = '%s - %s' % (artist, track) if artist else track
 106         thumbnail = data.get('img_src') or data.get('album_image')
 107
 108         return {
 109             'id': audio_id,
 110             'title': title,
 111             'thumbnail': thumbnail,
 112             'track': track,
 113             'artist': artist,
 114             'album': data.get('album_name'),
 115             'release_year': int_or_none(data.get('date')),
 116             'formats': formats,
 117         }