Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/meipai.py

   1 # coding: utf-8
   2 from __future__ import unicode_literals
   3
   4 from .common import InfoExtractor
   5 from ..utils import (
   6     int_or_none,
   7     parse_duration,
   8     unified_timestamp,
   9 )
  10
  11
  12 class MeipaiIE(InfoExtractor):
  13     IE_DESC = '美拍'
  14     _VALID_URL = r'https?://(?:www\.)?meipai.com/media/(?P<id>[0-9]+)'
  15     _TESTS = [{
  16         # regular uploaded video
  17         'url': 'http://www.meipai.com/media/531697625',
  18         'md5': 'e3e9600f9e55a302daecc90825854b4f',
  19         'info_dict': {
  20             'id': '531697625',
  21             'ext': 'mp4',
  22             'title': '#葉子##阿桑##余姿昀##超級女聲#',
  23             'description': '#葉子##阿桑##余姿昀##超級女聲#',
  24             'thumbnail': r're:^https?://.*\.jpg$',
  25             'duration': 152,
  26             'timestamp': 1465492420,
  27             'upload_date': '20160609',
  28             'view_count': 35511,
  29             'creator': '她她-TATA',
  30             'tags': ['葉子', '阿桑', '余姿昀', '超級女聲'],
  31         }
  32     }, {
  33         # record of live streaming
  34         'url': 'http://www.meipai.com/media/585526361',
  35         'md5': 'ff7d6afdbc6143342408223d4f5fb99a',
  36         'info_dict': {
  37             'id': '585526361',
  38             'ext': 'mp4',
  39             'title': '姿昀和善願 練歌練琴啦😁😁😁',
  40             'description': '姿昀和善願 練歌練琴啦😁😁😁',
  41             'thumbnail': r're:^https?://.*\.jpg$',
  42             'duration': 5975,
  43             'timestamp': 1474311799,
  44             'upload_date': '20160919',
  45             'view_count': 1215,
  46             'creator': '她她-TATA',
  47         }
  48     }]
  49
  50     def _real_extract(self, url):
  51         video_id = self._match_id(url)
  52         webpage = self._download_webpage(url, video_id)
  53
  54         title = self._og_search_title(
  55             webpage, default=None) or self._html_search_regex(
  56             r'<title[^>]*>([^<]+)</title>', webpage, 'title')
  57
  58         formats = []
  59
  60         # recorded playback of live streaming
  61         m3u8_url = self._html_search_regex(
  62             r'file:\s*encodeURIComponent\((["\'])(?P<url>(?:(?!\1).)+)\1\)',
  63             webpage, 'm3u8 url', group='url', default=None)
  64         if m3u8_url:
  65             formats.extend(self._extract_m3u8_formats(
  66                 m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
  67                 m3u8_id='hls', fatal=False))
  68
  69         if not formats:
  70             # regular uploaded video
  71             video_url = self._search_regex(
  72                 r'data-video=(["\'])(?P<url>(?:(?!\1).)+)\1', webpage, 'video url',
  73                 group='url', default=None)
  74             if video_url:
  75                 formats.append({
  76                     'url': video_url,
  77                     'format_id': 'http',
  78                 })
  79
  80         timestamp = unified_timestamp(self._og_search_property(
  81             'video:release_date', webpage, 'release date', fatal=False))
  82
  83         tags = self._og_search_property(
  84             'video:tag', webpage, 'tags', default='').split(',')
  85
  86         view_count = int_or_none(self._html_search_meta(
  87             'interactionCount', webpage, 'view count'))
  88         duration = parse_duration(self._html_search_meta(
  89             'duration', webpage, 'duration'))
  90         creator = self._og_search_property(
  91             'video:director', webpage, 'creator', fatal=False)
  92
  93         return {
  94             'id': video_id,
  95             'title': title,
  96             'description': self._og_search_description(webpage),
  97             'thumbnail': self._og_search_thumbnail(webpage),
  98             'duration': duration,
  99             'timestamp': timestamp,
 100             'view_count': view_count,
 101             'creator': creator,
 102             'tags': tags,
 103             'formats': formats,
 104         }