]>
 
 
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/twitter.py 
 
 
 
 
 
 
 
 
   2  from  __future__ 
import  unicode_literals
 
   6  from  . common 
import  InfoExtractor
 
  17  class  TwitterBaseIE ( InfoExtractor
):  
  18      def  _get_vmap_video_url ( self
,  vmap_url
,  video_id
):  
  19          vmap_data 
=  self
._ download
_ xml
( vmap_url
,  video_id
)  
  20          return  xpath_text ( vmap_data
,  './/MediaFile' ). strip ()  
  23  class  TwitterCardIE ( TwitterBaseIE
):  
  24      IE_NAME 
=  'twitter:card'  
  25      _VALID_URL 
=  r
'https?://(?:www\.)?twitter\.com/i/cards/tfw/v1/(?P<id>\d+)'  
  28              'url' :  'https://twitter.com/i/cards/tfw/v1/560070183650213889' ,  
  29              # MD5 checksums are different in different places  
  31                  'id' :  '560070183650213889' ,  
  33                  'title' :  'TwitterCard' ,  
  34                  'thumbnail' :  're:^https?://.*\.jpg$' ,  
  39              'url' :  'https://twitter.com/i/cards/tfw/v1/623160978427936768' ,  
  40              'md5' :  '7ee2a553b63d1bccba97fbed97d9e1c8' ,  
  42                  'id' :  '623160978427936768' ,  
  44                  'title' :  'TwitterCard' ,  
  45                  'thumbnail' :  're:^https?://.*\.jpg' ,  
  50              'url' :  'https://twitter.com/i/cards/tfw/v1/654001591733886977' ,  
  51              'md5' :  'd4724ffe6d2437886d004fa5de1043b3' ,  
  55                  'title' :  'Ubuntu 11.10 Overview' ,  
  56                  'description' :  'Take a quick peek at what \' s new and improved in Ubuntu 11.10. \n\n Once installed take a look at 10 Things to Do After Installing: http://www.omgubuntu.co.uk/2011/10/10-things-to-do-after-installing-ubuntu-11-10/' ,  
  57                  'upload_date' :  '20111013' ,  
  58                  'uploader' :  'OMG! Ubuntu!' ,  
  59                  'uploader_id' :  'omgubuntu' ,  
  61              'add_ie' : [ 'Youtube' ],  
  64              'url' :  'https://twitter.com/i/cards/tfw/v1/665289828897005568' ,  
  65              'md5' :  'ab2745d0b0ce53319a534fccaa986439' ,  
  69                  'upload_date' :  '20151113' ,  
  70                  'uploader_id' :  '1189339351084113920' ,  
  71                  'uploader' :  'ArsenalTerje' ,  
  72                  'title' :  'Vine by ArsenalTerje' ,  
  78      def  _real_extract ( self
,  url
):  
  79          video_id 
=  self
._ match
_ id
( url
)  
  81          # Different formats served for different User-Agents  
  83              'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20150101 Firefox/20.0 (Chrome)' ,   # mp4  
  84              'Mozilla/5.0 (Windows NT 5.2; WOW64; rv:38.0) Gecko/20100101 Firefox/38.0' ,   # webm  
  89          for  user_agent 
in  USER_AGENTS
:  
  90              request 
=  sanitized_Request ( url
)  
  91              request
. add_header ( 'User-Agent' ,  user_agent
)  
  92              webpage 
=  self
._ download
_ webpage
( request
,  video_id
)  
  94              iframe_url 
=  self
._ html
_ search
_ regex
(  
  95                  r
'<iframe[^>]+src="((?:https?:)?//(?:www.youtube.com/embed/[^"]+|(?:www\.)?vine\.co/v/\w+/card))"' ,  
  96                  webpage
,  'video iframe' ,  default
= None )  
  98                  return  self
. url_result ( iframe_url
)  
 100              config 
=  self
._ parse
_ json
( self
._ html
_ search
_ regex
(  
 101                  r
'data-player-config="([^"]+)"' ,  webpage
,  'data player config' ),  
 103              if  'playlist'  not in  config
:  
 104                  if  'vmapUrl'  in  config
:  
 106                          'url' :  self
._ get
_ vmap
_ video
_u rl
( config
[ 'vmapUrl' ],  video_id
),  
 108                      break    # same video regardless of UA  
 111              video_url 
=  config
[ 'playlist' ][ 0 ][ 'source' ]  
 117              m 
=  re
. search ( r
'/(?P<width>\d+)x(?P<height>\d+)/' ,  video_url
)  
 120                      'width' :  int ( m
. group ( 'width' )),  
 121                      'height' :  int ( m
. group ( 'height' )),  
 124          self
._ sort
_ formats
( formats
)  
 126          thumbnail 
=  config
. get ( 'posterImageUrl' )  
 127          duration 
=  float_or_none ( config
. get ( 'duration' ))  
 131              'title' :  'TwitterCard' ,  
 132              'thumbnail' :  thumbnail
,  
 133              'duration' :  duration
,  
 138  class  TwitterIE ( InfoExtractor
):  
 140      _VALID_URL 
=  r
'https?://(?:www\.|m\.|mobile\.)?twitter\.com/(?P<user_id>[^/]+)/status/(?P<id>\d+)'  
 141      _TEMPLATE_URL 
=  'https://twitter.com/ %s /status/ %s '  
 144          'url' :  'https://twitter.com/freethenipple/status/643211948184596480' ,  
 145          # MD5 checksums are different in different places  
 147              'id' :  '643211948184596480' ,  
 149              'title' :  'FREE THE NIPPLE - FTN supporters on Hollywood Blvd today!' ,  
 150              'thumbnail' :  're:^https?://.*\.jpg' ,  
 152              'description' :  'FREE THE NIPPLE on Twitter: "FTN supporters on Hollywood Blvd today! http://t.co/c7jHH749xJ"' ,  
 153              'uploader' :  'FREE THE NIPPLE' ,  
 154              'uploader_id' :  'freethenipple' ,  
 157          'url' :  'https://twitter.com/giphz/status/657991469417025536/photo/1' ,  
 158          'md5' :  'f36dcd5fb92bf7057f155e7d927eeb42' ,  
 160              'id' :  '657991469417025536' ,  
 162              'title' :  'Gifs - tu vai cai tu vai cai tu nao eh capaz disso tu vai cai' ,  
 163              'description' :  'Gifs on Twitter: "tu vai cai tu vai cai tu nao eh capaz disso tu vai cai https://t.co/tM46VHFlO5"' ,  
 164              'thumbnail' :  're:^https?://.*\.png' ,  
 166              'uploader_id' :  'giphz' ,  
 168          'expected_warnings' : [ 'height' ,  'width' ],  
 170          'url' :  'https://twitter.com/starwars/status/665052190608723968' ,  
 171          'md5' :  '39b7199856dee6cd4432e72c74bc69d4' ,  
 173              'id' :  '665052190608723968' ,  
 175              'title' :  'Star Wars - A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens.' ,  
 176              'description' :  'Star Wars on Twitter: "A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens."' ,  
 177              'uploader_id' :  'starwars' ,  
 178              'uploader' :  'Star Wars' ,  
 182      def  _real_extract ( self
,  url
):  
 183          mobj 
=  re
. match ( self
._ VALID
_U RL
,  url
)  
 184          user_id 
=  mobj
. group ( 'user_id' )  
 185          twid 
=  mobj
. group ( 'id' )  
 187          webpage 
=  self
._ download
_ webpage
( self
._ TEMPLATE
_U RL 
% ( user_id
,  twid
),  twid
)  
 189          username 
=  remove_end ( self
._ og
_ search
_ title
( webpage
),  ' on Twitter' )  
 191          title 
=  description 
=  self
._ og
_ search
_ description
( webpage
). strip ( '' ). replace ( ' \n ' ,  ' ' ). strip ( '“”' )  
 193          # strip  'https -_t.co_BJYgOjSeGA' junk from filenames  
 194          title 
=  re
. sub ( r
'\s+(https?://[^ ]+)' ,  '' ,  title
)  
 197              'uploader_id' :  user_id
,  
 198              'uploader' :  username
,  
 200              'description' :  ' %s  on Twitter: " %s "'  % ( username
,  description
),  
 201              'title' :  username 
+  ' - '  +  title
,  
 204          card_id 
=  self
._ search
_ regex
(  
 205              r
'["\' ]/ i
/ cards
/ tfw
/ v1
/( \d
+) ', webpage, ' twitter card url
', default=None)  
 207              card_url = ' https
:// twitter
. com
/ i
/ cards
/ tfw
/ v1
/ ' + card_id  
 209                  ' _type
': ' url_transparent
',  
 210                  ' ie_key
': ' TwitterCard
',  
 215          mobj = re.search(r'''(?x)  
 216              <video[^>]+class="animated-gif"(?P<more_info>[^>]+)>\s*  
 217                  <source[^>]+video-src="(?P<url>[^"]+)"  
 221              more_info = mobj.group(' more_info
')  
 222              height = int_or_none(self._search_regex(  
 223                  r' data
- height
= "(\d+)" ', more_info, ' height
', fatal=False))  
 224              width = int_or_none(self._search_regex(  
 225                  r' data
- width
= "(\d+)" ', more_info, ' width
', fatal=False))  
 226              thumbnail = self._search_regex(  
 227                  r' poster
= "([^" ]+) "', more_info, 'poster', fatal=False)  
 230                  'url': mobj.group('url'),  
 233                  'thumbnail': thumbnail,  
 237          raise ExtractorError('There \' s no video in this tweet.')  
 240  class TwitterAmplifyIE(TwitterBaseIE):  
 241      IE_NAME = 'twitter:amplify'  
 242      _VALID_URL = 'https?://amp\.twimg\.com/v/(?P<id>[0-9a-f\-] {36} )'  
 245          'url': 'https://amp.twimg.com/v/0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',  
 246          'md5': '7df102d0b9fd7066b86f3159f8e81bf6',  
 248              'id': '0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',  
 250              'title': 'Twitter Video',  
 251              'thumbnail': 're:^https?://.*',  
 255      def _real_extract(self, url):  
 256          video_id = self._match_id(url)  
 257          webpage = self._download_webpage(url, video_id)  
 259          vmap_url = self._html_search_meta(  
 260              'twitter:amplify:vmap', webpage, 'vmap url')  
 261          video_url = self._get_vmap_video_url(vmap_url, video_id)  
 264          thumbnail = self._html_search_meta(  
 265              'twitter:image:src', webpage, 'thumbnail', fatal=False)  
 267          def _find_dimension(target):  
 268              w = int_or_none(self._html_search_meta(  
 269                  'twitter: %s :width' % target, webpage, fatal=False))  
 270              h = int_or_none(self._html_search_meta(  
 271                  'twitter: %s :height' % target, webpage, fatal=False))  
 275              thumbnail_w, thumbnail_h = _find_dimension('image')  
 278                  'width': thumbnail_w,  
 279                  'height': thumbnail_h,  
 282          video_w, video_h = _find_dimension('player')  
 291              'title': 'Twitter Video',  
 293              'thumbnails': thumbnails,