]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/twitter.py
   1 from __future__ 
import unicode_literals
 
   5 from .common 
import InfoExtractor
 
   6 from ..compat 
import compat_urllib_request
 
  13 class TwitterCardIE(InfoExtractor
): 
  14     _VALID_URL 
= r
'https?://(?:www\.)?twitter\.com/i/cards/tfw/v1/(?P<id>\d+)' 
  16         'url': 'https://twitter.com/i/cards/tfw/v1/560070183650213889', 
  17         'md5': 'a74f50b310c83170319ba16de6955192', 
  19             'id': '560070183650213889', 
  21             'title': 'TwitterCard', 
  22             'thumbnail': 're:^https?://.*\.jpg$', 
  27     def _real_extract(self
, url
): 
  28         video_id 
= self
._match
_id
(url
) 
  30         # Different formats served for different User-Agents 
  32             'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20150101 Firefox/20.0 (Chrome)',  # mp4 
  33             'Mozilla/5.0 (Windows NT 5.2; WOW64; rv:38.0) Gecko/20100101 Firefox/38.0',  # webm 
  38         for user_agent 
in USER_AGENTS
: 
  39             request 
= compat_urllib_request
.Request(url
) 
  40             request
.add_header('User-Agent', user_agent
) 
  41             webpage 
= self
._download
_webpage
(request
, video_id
) 
  43             config 
= self
._parse
_json
( 
  44                 unescapeHTML(self
._search
_regex
( 
  45                     r
'data-player-config="([^"]+)"', webpage
, 'data player config')), 
  48             video_url 
= config
['playlist'][0]['source'] 
  54             m 
= re
.search(r
'/(?P<width>\d+)x(?P<height>\d+)/', video_url
) 
  57                     'width': int(m
.group('width')), 
  58                     'height': int(m
.group('height')), 
  61         self
._sort
_formats
(formats
) 
  63         thumbnail 
= config
.get('posterImageUrl') 
  64         duration 
= float_or_none(config
.get('duration')) 
  68             'title': 'TwitterCard', 
  69             'thumbnail': thumbnail
,