]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/aparat.py
   2 from __future__ 
import unicode_literals
 
   4 from .common 
import InfoExtractor
 
  12 class AparatIE(InfoExtractor
): 
  13     _VALID_URL 
= r
'https?://(?:www\.)?aparat\.com/(?:v/|video/video/embed/videohash/)(?P<id>[a-zA-Z0-9]+)' 
  16         'url': 'http://www.aparat.com/v/wP8On', 
  17         'md5': '131aca2e14fe7c4dcb3c4877ba300c89', 
  21             'title': 'تیم گلکسی 11 - زومیت', 
  24         # 'skip': 'Extremely unreliable', 
  27     def _real_extract(self
, url
): 
  28         video_id 
= self
._match
_id
(url
) 
  30         # Note: There is an easier-to-parse configuration at 
  31         # http://www.aparat.com/video/video/config/videohash/%video_id 
  32         # but the URL in there does not work 
  33         webpage 
= self
._download
_webpage
( 
  34             'http://www.aparat.com/video/video/embed/vt/frame/showvideo/yes/videohash/' + video_id
, 
  37         title 
= self
._search
_regex
(r
'\s+title:\s*"([^"]+)"', webpage
, 'title') 
  39         file_list 
= self
._parse
_json
( 
  41                 r
'fileList\s*=\s*JSON\.parse\(\'([^
\']+)\'\
)', webpage, 
  46         for item in file_list[0]: 
  47             file_url = url_or_none(item.get('file')) 
  50             ext = mimetype2ext(item.get('type')) 
  51             label = item.get('label
') 
  55                 'format_id
': label or ext, 
  56                 'height
': int_or_none(self._search_regex( 
  57                     r'(\d
+)[pP
]', label or '', 'height
', default=None)), 
  59         self._sort_formats(formats) 
  61         thumbnail = self._search_regex( 
  62             r'image
:\s
*"([^"]+)"', webpage, 'thumbnail', fatal=False) 
  67             'thumbnail': thumbnail, 
  68             'age_limit': self._family_friendly_search(webpage),