]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/twitter.py 
   2  from  __future__ 
import  unicode_literals
   6  from  . common 
import  InfoExtractor
  17  class  TwitterBaseIE ( InfoExtractor
):   18      def  _get_vmap_video_url ( self
,  vmap_url
,  video_id
):   19          vmap_data 
=  self
._ download
_ xml
( vmap_url
,  video_id
)   20          return  xpath_text ( vmap_data
,  './/MediaFile' ). strip ()   23  class  TwitterCardIE ( TwitterBaseIE
):   24      IE_NAME 
=  'twitter:card'   25      _VALID_URL 
=  r
'https?://(?:www\.)?twitter\.com/i/cards/tfw/v1/(?P<id>\d+)'   28              'url' :  'https://twitter.com/i/cards/tfw/v1/560070183650213889' ,   29              # MD5 checksums are different in different places   31                  'id' :  '560070183650213889' ,   33                  'title' :  'TwitterCard' ,   34                  'thumbnail' :  're:^https?://.*\.jpg$' ,   39              'url' :  'https://twitter.com/i/cards/tfw/v1/623160978427936768' ,   40              'md5' :  '7ee2a553b63d1bccba97fbed97d9e1c8' ,   42                  'id' :  '623160978427936768' ,   44                  'title' :  'TwitterCard' ,   45                  'thumbnail' :  're:^https?://.*\.jpg' ,   50              'url' :  'https://twitter.com/i/cards/tfw/v1/654001591733886977' ,   51              'md5' :  'd4724ffe6d2437886d004fa5de1043b3' ,   55                  'title' :  'Ubuntu 11.10 Overview' ,   56                  'description' :  'Take a quick peek at what \' s new and improved in Ubuntu 11.10. \n\n Once installed take a look at 10 Things to Do After Installing: http://www.omgubuntu.co.uk/2011/10/10-things-to-do-after-installing-ubuntu-11-10/' ,   57                  'upload_date' :  '20111013' ,   58                  'uploader' :  'OMG! Ubuntu!' ,   59                  'uploader_id' :  'omgubuntu' ,   61              'add_ie' : [ 'Youtube' ],   64              'url' :  'https://twitter.com/i/cards/tfw/v1/665289828897005568' ,   65              'md5' :  'ab2745d0b0ce53319a534fccaa986439' ,   69                  'upload_date' :  '20151113' ,   70                  'uploader_id' :  '1189339351084113920' ,   71                  'uploader' :  'ArsenalTerje' ,   72                  'title' :  'Vine by ArsenalTerje' ,   78      def  _real_extract ( self
,  url
):   79          video_id 
=  self
._ match
_ id
( url
)   81          # Different formats served for different User-Agents   83              'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20150101 Firefox/20.0 (Chrome)' ,   # mp4   84              'Mozilla/5.0 (Windows NT 5.2; WOW64; rv:38.0) Gecko/20100101 Firefox/38.0' ,   # webm   89          for  user_agent 
in  USER_AGENTS
:   90              request 
=  sanitized_Request ( url
)   91              request
. add_header ( 'User-Agent' ,  user_agent
)   92              webpage 
=  self
._ download
_ webpage
( request
,  video_id
)   94              iframe_url 
=  self
._ html
_ search
_ regex
(   95                  r
'<iframe[^>]+src="((?:https?:)?//(?:www.youtube.com/embed/[^"]+|(?:www\.)?vine\.co/v/\w+/card))"' ,   96                  webpage
,  'video iframe' ,  default
= None )   98                  return  self
. url_result ( iframe_url
)  100              config 
=  self
._ parse
_ json
( self
._ html
_ search
_ regex
(  101                  r
'data-player-config="([^"]+)"' ,  webpage
,  'data player config' ),  103              if  'playlist'  not in  config
:  104                  if  'vmapUrl'  in  config
:  106                          'url' :  self
._ get
_ vmap
_ video
_u rl
( config
[ 'vmapUrl' ],  video_id
),  108                      break    # same video regardless of UA  111              video_url 
=  config
[ 'playlist' ][ 0 ][ 'source' ]  117              m 
=  re
. search ( r
'/(?P<width>\d+)x(?P<height>\d+)/' ,  video_url
)  120                      'width' :  int ( m
. group ( 'width' )),  121                      'height' :  int ( m
. group ( 'height' )),  124          self
._ sort
_ formats
( formats
)  126          thumbnail 
=  config
. get ( 'posterImageUrl' )  127          duration 
=  float_or_none ( config
. get ( 'duration' ))  131              'title' :  'TwitterCard' ,  132              'thumbnail' :  thumbnail
,  133              'duration' :  duration
,  138  class  TwitterIE ( InfoExtractor
):  140      _VALID_URL 
=  r
'https?://(?:www\.|m\.|mobile\.)?twitter\.com/(?P<user_id>[^/]+)/status/(?P<id>\d+)'  141      _TEMPLATE_URL 
=  'https://twitter.com/ %s /status/ %s '  144          'url' :  'https://twitter.com/freethenipple/status/643211948184596480' ,  145          # MD5 checksums are different in different places  147              'id' :  '643211948184596480' ,  149              'title' :  'FREE THE NIPPLE - FTN supporters on Hollywood Blvd today!' ,  150              'thumbnail' :  're:^https?://.*\.jpg' ,  152              'description' :  'FREE THE NIPPLE on Twitter: "FTN supporters on Hollywood Blvd today! http://t.co/c7jHH749xJ"' ,  153              'uploader' :  'FREE THE NIPPLE' ,  154              'uploader_id' :  'freethenipple' ,  157          'url' :  'https://twitter.com/giphz/status/657991469417025536/photo/1' ,  158          'md5' :  'f36dcd5fb92bf7057f155e7d927eeb42' ,  160              'id' :  '657991469417025536' ,  162              'title' :  'Gifs - tu vai cai tu vai cai tu nao eh capaz disso tu vai cai' ,  163              'description' :  'Gifs on Twitter: "tu vai cai tu vai cai tu nao eh capaz disso tu vai cai https://t.co/tM46VHFlO5"' ,  164              'thumbnail' :  're:^https?://.*\.png' ,  166              'uploader_id' :  'giphz' ,  168          'expected_warnings' : [ 'height' ,  'width' ],  170          'url' :  'https://twitter.com/starwars/status/665052190608723968' ,  171          'md5' :  '39b7199856dee6cd4432e72c74bc69d4' ,  173              'id' :  '665052190608723968' ,  175              'title' :  'Star Wars - A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens.' ,  176              'description' :  'Star Wars on Twitter: "A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens."' ,  177              'uploader_id' :  'starwars' ,  178              'uploader' :  'Star Wars' ,  182      def  _real_extract ( self
,  url
):  183          mobj 
=  re
. match ( self
._ VALID
_U RL
,  url
)  184          user_id 
=  mobj
. group ( 'user_id' )  185          twid 
=  mobj
. group ( 'id' )  187          webpage 
=  self
._ download
_ webpage
( self
._ TEMPLATE
_U RL 
% ( user_id
,  twid
),  twid
)  189          username 
=  remove_end ( self
._ og
_ search
_ title
( webpage
),  ' on Twitter' )  191          title 
=  description 
=  self
._ og
_ search
_ description
( webpage
). strip ( '' ). replace ( ' \n ' ,  ' ' ). strip ( '“”' )  193          # strip  'https -_t.co_BJYgOjSeGA' junk from filenames  194          title 
=  re
. sub ( r
'\s+(https?://[^ ]+)' ,  '' ,  title
)  197              'uploader_id' :  user_id
,  198              'uploader' :  username
,  200              'description' :  ' %s  on Twitter: " %s "'  % ( username
,  description
),  201              'title' :  username 
+  ' - '  +  title
,  204          card_id 
=  self
._ search
_ regex
(  205              r
'["\' ]/ i
/ cards
/ tfw
/ v1
/( \d
+) ', webpage, ' twitter card url
', default=None)  207              card_url = ' https
:// twitter
. com
/ i
/ cards
/ tfw
/ v1
/ ' + card_id  209                  ' _type
': ' url_transparent
',  210                  ' ie_key
': ' TwitterCard
',  215          mobj = re.search(r'''(?x)  216              <video[^>]+class="animated-gif"(?P<more_info>[^>]+)>\s*  217                  <source[^>]+video-src="(?P<url>[^"]+)"  221              more_info = mobj.group(' more_info
')  222              height = int_or_none(self._search_regex(  223                  r' data
- height
= "(\d+)" ', more_info, ' height
', fatal=False))  224              width = int_or_none(self._search_regex(  225                  r' data
- width
= "(\d+)" ', more_info, ' width
', fatal=False))  226              thumbnail = self._search_regex(  227                  r' poster
= "([^" ]+) "', more_info, 'poster', fatal=False)  230                  'url': mobj.group('url'),  233                  'thumbnail': thumbnail,  237          raise ExtractorError('There \' s no video in this tweet.')  240  class TwitterAmplifyIE(TwitterBaseIE):  241      IE_NAME = 'twitter:amplify'  242      _VALID_URL = 'https?://amp\.twimg\.com/v/(?P<id>[0-9a-f\-] {36} )'  245          'url': 'https://amp.twimg.com/v/0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',  246          'md5': '7df102d0b9fd7066b86f3159f8e81bf6',  248              'id': '0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',  250              'title': 'Twitter Video',  251              'thumbnail': 're:^https?://.*',  255      def _real_extract(self, url):  256          video_id = self._match_id(url)  257          webpage = self._download_webpage(url, video_id)  259          vmap_url = self._html_search_meta(  260              'twitter:amplify:vmap', webpage, 'vmap url')  261          video_url = self._get_vmap_video_url(vmap_url, video_id)  264          thumbnail = self._html_search_meta(  265              'twitter:image:src', webpage, 'thumbnail', fatal=False)  267          def _find_dimension(target):  268              w = int_or_none(self._html_search_meta(  269                  'twitter: %s :width' % target, webpage, fatal=False))  270              h = int_or_none(self._html_search_meta(  271                  'twitter: %s :height' % target, webpage, fatal=False))  275              thumbnail_w, thumbnail_h = _find_dimension('image')  278                  'width': thumbnail_w,  279                  'height': thumbnail_h,  282          video_w, video_h = _find_dimension('player')  291              'title': 'Twitter Video',  293              'thumbnails': thumbnails,