]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/aparat.py
   2 from __future__ 
import unicode_literals
 
   4 from .common 
import InfoExtractor
 
  11 class AparatIE(InfoExtractor
): 
  12     _VALID_URL 
= r
'https?://(?:www\.)?aparat\.com/(?:v/|video/video/embed/videohash/)(?P<id>[a-zA-Z0-9]+)' 
  15         'url': 'http://www.aparat.com/v/wP8On', 
  16         'md5': '131aca2e14fe7c4dcb3c4877ba300c89', 
  20             'title': 'تیم گلکسی 11 - زومیت', 
  23         # 'skip': 'Extremely unreliable', 
  26     def _real_extract(self
, url
): 
  27         video_id 
= self
._match
_id
(url
) 
  29         # Note: There is an easier-to-parse configuration at 
  30         # http://www.aparat.com/video/video/config/videohash/%video_id 
  31         # but the URL in there does not work 
  32         webpage 
= self
._download
_webpage
( 
  33             'http://www.aparat.com/video/video/embed/vt/frame/showvideo/yes/videohash/' + video_id
, 
  36         title 
= self
._search
_regex
(r
'\s+title:\s*"([^"]+)"', webpage
, 'title') 
  38         file_list 
= self
._parse
_json
( 
  40                 r
'fileList\s*=\s*JSON\.parse\(\'([^
\']+)\'\
)', webpage, 
  45         for item in file_list[0]: 
  46             file_url = item.get('file') 
  49             ext = mimetype2ext(item.get('type')) 
  50             label = item.get('label
') 
  54                 'format_id
': label or ext, 
  55                 'height
': int_or_none(self._search_regex( 
  56                     r'(\d
+)[pP
]', label or '', 'height
', default=None)), 
  58         self._sort_formats(formats) 
  60         thumbnail = self._search_regex( 
  61             r'image
:\s
*"([^"]+)"', webpage, 'thumbnail', fatal=False) 
  66             'thumbnail': thumbnail, 
  67             'age_limit': self._family_friendly_search(webpage),