]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/twitter.py 
   2  from  __future__ 
import  unicode_literals
   6  from  . common 
import  InfoExtractor
  17  class  TwitterBaseIE ( InfoExtractor
):   18      def  _get_vmap_video_url ( self
,  vmap_url
,  video_id
):   19          vmap_data 
=  self
._ download
_ xml
( vmap_url
,  video_id
)   20          return  xpath_text ( vmap_data
,  './/MediaFile' ). strip ()   23  class  TwitterCardIE ( TwitterBaseIE
):   24      IE_NAME 
=  'twitter:card'   25      _VALID_URL 
=  r
'https?://(?:www\.)?twitter\.com/i/(?:cards/tfw/v1|videos/tweet)/(?P<id>\d+)'   28              'url' :  'https://twitter.com/i/cards/tfw/v1/560070183650213889' ,   29              # MD5 checksums are different in different places   31                  'id' :  '560070183650213889' ,   33                  'title' :  'Twitter Card' ,   34                  'thumbnail' :  're:^https?://.*\.jpg$' ,   39              'url' :  'https://twitter.com/i/cards/tfw/v1/623160978427936768' ,   40              'md5' :  '7ee2a553b63d1bccba97fbed97d9e1c8' ,   42                  'id' :  '623160978427936768' ,   44                  'title' :  'Twitter Card' ,   45                  'thumbnail' :  're:^https?://.*\.jpg' ,   50              'url' :  'https://twitter.com/i/cards/tfw/v1/654001591733886977' ,   51              'md5' :  'd4724ffe6d2437886d004fa5de1043b3' ,   55                  'title' :  'Ubuntu 11.10 Overview' ,   56                  'description' :  'Take a quick peek at what \' s new and improved in Ubuntu 11.10. \n\n Once installed take a look at 10 Things to Do After Installing: http://www.omgubuntu.co.uk/2011/10/10...' ,   57                  'upload_date' :  '20111013' ,   58                  'uploader' :  'OMG! Ubuntu!' ,   59                  'uploader_id' :  'omgubuntu' ,   61              'add_ie' : [ 'Youtube' ],   64              'url' :  'https://twitter.com/i/cards/tfw/v1/665289828897005568' ,   65              'md5' :  'ab2745d0b0ce53319a534fccaa986439' ,   69                  'upload_date' :  '20151113' ,   70                  'uploader_id' :  '1189339351084113920' ,   71                  'uploader' :  'ArsenalTerje' ,   72                  'title' :  'Vine by ArsenalTerje' ,   76              'url' :  'https://twitter.com/i/videos/tweet/705235433198714880' ,   77              'md5' :  '3846d0a07109b5ab622425449b59049d' ,   79                  'id' :  '705235433198714880' ,   81                  'title' :  'Twitter web player' ,   82                  'thumbnail' :  're:^https?://.*\.jpg' ,   87      def  _real_extract ( self
,  url
):   88          video_id 
=  self
._ match
_ id
( url
)   94          webpage 
=  self
._ download
_ webpage
( url
,  video_id
)   96          iframe_url 
=  self
._ html
_ search
_ regex
(   97              r
'<iframe[^>]+src="((?:https?:)?//(?:www.youtube.com/embed/[^"]+|(?:www\.)?vine\.co/v/\w+/card))"' ,   98              webpage
,  'video iframe' ,  default
= None )  100              return  self
. url_result ( iframe_url
)  102          config 
=  self
._ parse
_ json
( self
._ html
_ search
_ regex
(  103              r
'data-(?:player-)?config="([^"]+)"' ,  webpage
,  'data player config' ),  106          if  config
. get ( 'source_type' ) ==  'vine' :  107              return  self
. url_result ( config
[ 'player_url' ],  'Vine' )  109          def  _search_dimensions_in_video_url ( a_format
,  video_url
):  110              m 
=  re
. search ( r
'/(?P<width>\d+)x(?P<height>\d+)/' ,  video_url
)  113                      'width' :  int ( m
. group ( 'width' )),  114                      'height' :  int ( m
. group ( 'height' )),  117          video_url 
=  config
. get ( 'video_url' )  or  config
. get ( 'playlist' , [{}])[ 0 ]. get ( 'source' )  120              if  determine_ext ( video_url
) ==  'm3u8' :  121                  formats
. extend ( self
._ extract
_ m
3u8_ formats
( video_url
,  video_id
,  ext
= 'mp4' ,  m3u8_id
= 'hls' ))  127                  _search_dimensions_in_video_url ( f
,  video_url
)  131          vmap_url 
=  config
. get ( 'vmapUrl' )  or  config
. get ( 'vmap_url' )  134                  'url' :  self
._ get
_ vmap
_ video
_u rl
( vmap_url
,  video_id
),  139          for  entity 
in  config
. get ( 'status' , {}). get ( 'entities' , []):  140              if  'mediaInfo'  in  entity
:  141                  media_info 
=  entity
[ 'mediaInfo' ]  144              for  media_variant 
in  media_info
[ 'variants' ]:  145                  media_url 
=  media_variant
[ 'url' ]  146                  if  media_url
. endswith ( '.m3u8' ):  147                      formats
. extend ( self
._ extract
_ m
3u8_ formats
( media_url
,  video_id
,  ext
= 'mp4' ,  m3u8_id
= 'hls' ))  148                  elif  media_url
. endswith ( '.mpd' ):  149                      formats
. extend ( self
._ extract
_ mpd
_ formats
( media_url
,  video_id
,  mpd_id
= 'dash' ))  151                      vbr 
=  int_or_none ( media_variant
. get ( 'bitRate' ),  scale
= 1000 )  154                          'format_id' :  'http- %d '  %  vbr 
if  vbr 
else  'http' ,  157                      # Reported bitRate may be zero  158                      if not  a_format
[ 'vbr' ]:  161                      _search_dimensions_in_video_url ( a_format
,  media_url
)  163                      formats
. append ( a_format
)  165              duration 
=  float_or_none ( media_info
. get ( 'duration' , {}). get ( 'nanos' ),  scale
= 1 e9
)  167          self
._ sort
_ formats
( formats
)  169          title 
=  self
._ search
_ regex
( r
'<title>([^<]+)</title>' ,  webpage
,  'title' )  170          thumbnail 
=  config
. get ( 'posterImageUrl' )  or  config
. get ( 'image_src' )  171          duration 
=  float_or_none ( config
. get ( 'duration' ))  or  duration
 176              'thumbnail' :  thumbnail
,  177              'duration' :  duration
,  182  class  TwitterIE ( InfoExtractor
):  184      _VALID_URL 
=  r
'https?://(?:www\.|m\.|mobile\.)?twitter\.com/(?P<user_id>[^/]+)/status/(?P<id>\d+)'  185      _TEMPLATE_URL 
=  'https://twitter.com/ %s /status/ %s '  188          'url' :  'https://twitter.com/freethenipple/status/643211948184596480' ,  190              'id' :  '643211948184596480' ,  192              'title' :  'FREE THE NIPPLE - FTN supporters on Hollywood Blvd today!' ,  193              'thumbnail' :  're:^https?://.*\.jpg' ,  194              'description' :  'FREE THE NIPPLE on Twitter: "FTN supporters on Hollywood Blvd today! http://t.co/c7jHH749xJ"' ,  195              'uploader' :  'FREE THE NIPPLE' ,  196              'uploader_id' :  'freethenipple' ,  199              'skip_download' :  True ,   # requires ffmpeg  202          'url' :  'https://twitter.com/giphz/status/657991469417025536/photo/1' ,  203          'md5' :  'f36dcd5fb92bf7057f155e7d927eeb42' ,  205              'id' :  '657991469417025536' ,  207              'title' :  'Gifs - tu vai cai tu vai cai tu nao eh capaz disso tu vai cai' ,  208              'description' :  'Gifs on Twitter: "tu vai cai tu vai cai tu nao eh capaz disso tu vai cai https://t.co/tM46VHFlO5"' ,  209              'thumbnail' :  're:^https?://.*\.png' ,  211              'uploader_id' :  'giphz' ,  213          'expected_warnings' : [ 'height' ,  'width' ],  214          'skip' :  'Account suspended' ,  216          'url' :  'https://twitter.com/starwars/status/665052190608723968' ,  217          'md5' :  '39b7199856dee6cd4432e72c74bc69d4' ,  219              'id' :  '665052190608723968' ,  221              'title' :  'Star Wars - A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens.' ,  222              'description' :  'Star Wars on Twitter: "A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens."' ,  223              'uploader_id' :  'starwars' ,  224              'uploader' :  'Star Wars' ,  227          'url' :  'https://twitter.com/BTNBrentYarina/status/705235433198714880' ,  229              'id' :  '705235433198714880' ,  231              'title' :  'Brent Yarina - Khalil Iverson \' s missed highlight dunk. And made highlight dunk. In one highlight.' ,  232              'description' :  'Brent Yarina on Twitter: "Khalil Iverson \' s missed highlight dunk. And made highlight dunk. In one highlight."' ,  233              'uploader_id' :  'BTNBrentYarina' ,  234              'uploader' :  'Brent Yarina' ,  237              # The same video as https://twitter.com/i/videos/tweet/705235433198714880  238              # Test case of TwitterCardIE  239              'skip_download' :  True ,  242          'url' :  'https://twitter.com/jaydingeer/status/700207533655363584' ,  245              'id' :  '700207533655363584' ,  247              'title' :  'Donte The Dumbass - BEAT PROD: @suhmeduh #Damndaniel' ,  248              'description' :  'Donte The Dumbass on Twitter: "BEAT PROD: @suhmeduh  https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ"' ,  249              'thumbnail' :  're:^https?://.*\.jpg' ,  250              'uploader' :  'Donte The Dumbass' ,  251              'uploader_id' :  'jaydingeer' ,  254              'skip_download' :  True ,   # requires ffmpeg  257          'url' :  'https://twitter.com/Filmdrunk/status/713801302971588609' ,  258          'md5' :  '89a15ed345d13b86e9a5a5e051fa308a' ,  262              'title' :  'Dr.Pepperの飲み方 #japanese #バカ #ドクペ #電動ガン' ,  263              'uploader' :  'TAKUMA' ,  264              'uploader_id' :  '1004126642786242560' ,  265              'upload_date' :  '20140615' ,  269          'url' :  'https://twitter.com/captainamerica/status/719944021058060289' ,  271              'id' :  '719944021058060289' ,  273              'title' :  'Captain America - @King0fNerd Are you sure you made the right choice? Find out in theaters.' ,  274              'description' :  'Captain America on Twitter: "@King0fNerd Are you sure you made the right choice? Find out in theaters. https://t.co/GpgYi9xMJI"' ,  275              'uploader_id' :  'captainamerica' ,  276              'uploader' :  'Captain America' ,  279              'skip_download' :  True ,   # requires ffmpeg  283      def  _real_extract ( self
,  url
):  284          mobj 
=  re
. match ( self
._ VALID
_U RL
,  url
)  285          user_id 
=  mobj
. group ( 'user_id' )  286          twid 
=  mobj
. group ( 'id' )  288          webpage
,  urlh 
=  self
._ download
_ webpage
_ handle
(  289              self
._ TEMPLATE
_U RL 
% ( user_id
,  twid
),  twid
)  291          if  'twitter.com/account/suspended'  in  urlh
. geturl ():  292              raise  ExtractorError ( 'Account suspended by Twitter.' ,  expected
= True )  294          username 
=  remove_end ( self
._ og
_ search
_ title
( webpage
),  ' on Twitter' )  296          title 
=  description 
=  self
._ og
_ search
_ description
( webpage
). strip ( '' ). replace ( ' \n ' ,  ' ' ). strip ( '“”' )  298          # strip  'https -_t.co_BJYgOjSeGA' junk from filenames  299          title 
=  re
. sub ( r
'\s+(https?://[^ ]+)' ,  '' ,  title
)  302              'uploader_id' :  user_id
,  303              'uploader' :  username
,  305              'description' :  ' %s  on Twitter: " %s "'  % ( username
,  description
),  306              'title' :  username 
+  ' - '  +  title
,  309          mobj 
=  re
. search ( r
'''(?x)  310              <video[^>]+class="animated-gif"(?P<more_info>[^>]+)>\s*  311                  <source[^>]+video-src="(?P<url>[^"]+)"  315              more_info 
=  mobj
. group ( 'more_info' )  316              height 
=  int_or_none ( self
._ search
_ regex
(  317                  r
'data-height="(\d+)"' ,  more_info
,  'height' ,  fatal
= False ))  318              width 
=  int_or_none ( self
._ search
_ regex
(  319                  r
'data-width="(\d+)"' ,  more_info
,  'width' ,  fatal
= False ))  320              thumbnail 
=  self
._ search
_ regex
(  321                  r
'poster="([^"]+)"' ,  more_info
,  'poster' ,  fatal
= False )  324                  'url' :  mobj
. group ( 'url' ),  327                  'thumbnail' :  thumbnail
,  331          if  'class="PlayableMedia'  in  webpage
:  333                  '_type' :  'url_transparent' ,  334                  'ie_key' :  'TwitterCard' ,  335                  'url' :  ' %s //twitter.com/i/videos/tweet/ %s '  % ( self
. http_scheme (),  twid
),  340          raise  ExtractorError ( 'There \' s no video in this tweet.' )  343  class  TwitterAmplifyIE ( TwitterBaseIE
):  344      IE_NAME 
=  'twitter:amplify'  345      _VALID_URL 
=  'https?://amp\.twimg\.com/v/(?P<id>[0-9a-f\-] {36} )'  348          'url' :  'https://amp.twimg.com/v/0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951' ,  349          'md5' :  '7df102d0b9fd7066b86f3159f8e81bf6' ,  351              'id' :  '0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951' ,  353              'title' :  'Twitter Video' ,  354              'thumbnail' :  're:^https?://.*' ,  358      def  _real_extract ( self
,  url
):  359          video_id 
=  self
._ match
_ id
( url
)  360          webpage 
=  self
._ download
_ webpage
( url
,  video_id
)  362          vmap_url 
=  self
._ html
_ search
_ meta
(  363              'twitter:amplify:vmap' ,  webpage
,  'vmap url' )  364          video_url 
=  self
._ get
_ vmap
_ video
_u rl
( vmap_url
,  video_id
)  367          thumbnail 
=  self
._ html
_ search
_ meta
(  368              'twitter:image:src' ,  webpage
,  'thumbnail' ,  fatal
= False )  370          def  _find_dimension ( target
):  371              w 
=  int_or_none ( self
._ html
_ search
_ meta
(  372                  'twitter: %s :width'  %  target
,  webpage
,  fatal
= False ))  373              h 
=  int_or_none ( self
._ html
_ search
_ meta
(  374                  'twitter: %s :height'  %  target
,  webpage
,  fatal
= False ))  378              thumbnail_w
,  thumbnail_h 
=  _find_dimension ( 'image' )  381                  'width' :  thumbnail_w
,  382                  'height' :  thumbnail_h
,  385          video_w
,  video_h 
=  _find_dimension ( 'player' )  394              'title' :  'Twitter Video' ,  396              'thumbnails' :  thumbnails
,