]>
 
 
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/twitter.py 
 
 
 
 
 
 
 
 
   2  from  __future__ 
import  unicode_literals
 
   6  from  . common 
import  InfoExtractor
 
   7  from  .. compat 
import  compat_urllib_request
 
  15  class  TwitterCardIE ( InfoExtractor
):  
  16      IE_NAME 
=  'twitter:card'  
  17      _VALID_URL 
=  r
'https?://(?:www\.)?twitter\.com/i/cards/tfw/v1/(?P<id>\d+)'  
  20              'url' :  'https://twitter.com/i/cards/tfw/v1/560070183650213889' ,  
  21              'md5' :  '7d2f6b4d2eb841a7ccc893d479bfceb4' ,  
  23                  'id' :  '560070183650213889' ,  
  25                  'title' :  'TwitterCard' ,  
  26                  'thumbnail' :  're:^https?://.*\.jpg$' ,  
  31              'url' :  'https://twitter.com/i/cards/tfw/v1/623160978427936768' ,  
  32              'md5' :  '7ee2a553b63d1bccba97fbed97d9e1c8' ,  
  34                  'id' :  '623160978427936768' ,  
  36                  'title' :  'TwitterCard' ,  
  37                  'thumbnail' :  're:^https?://.*\.jpg' ,  
  42              'url' :  'https://twitter.com/i/cards/tfw/v1/654001591733886977' ,  
  43              'md5' :  'b6f35e8b08a0bec6c8af77a2f4b3a814' ,  
  47                  'title' :  'Ubuntu 11.10 Overview' ,  
  48                  'description' :  'Take a quick peek at what \' s new and improved in Ubuntu 11.10. \n\n Once installed take a look at 10 Things to Do After Installing: http://www.omgubuntu.co.uk/2011/10/10-things-to-do-after-installing-ubuntu-11-10/' ,  
  49                  'upload_date' :  '20111013' ,  
  50                  'uploader' :  'OMG! Ubuntu!' ,  
  51                  'uploader_id' :  'omgubuntu' ,  
  56      def  _real_extract ( self
,  url
):  
  57          video_id 
=  self
._ match
_ id
( url
)  
  59          # Different formats served for different User-Agents  
  61              'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20150101 Firefox/20.0 (Chrome)' ,   # mp4  
  62              'Mozilla/5.0 (Windows NT 5.2; WOW64; rv:38.0) Gecko/20100101 Firefox/38.0' ,   # webm  
  67          for  user_agent 
in  USER_AGENTS
:  
  68              request 
=  compat_urllib_request
. Request ( url
)  
  69              request
. add_header ( 'User-Agent' ,  user_agent
)  
  70              webpage 
=  self
._ download
_ webpage
( request
,  video_id
)  
  72              youtube_url 
=  self
._ html
_ search
_ regex
(  
  73                  r
'<iframe[^>]+src="((?:https?:)?//www.youtube.com/embed/[^"]+)"' ,  
  74                  webpage
,  'youtube iframe' ,  default
= None )  
  76                  return  self
. url_result ( youtube_url
,  'Youtube' )  
  78              config 
=  self
._ parse
_ json
( self
._ html
_ search
_ regex
(  
  79                  r
'data-player-config="([^"]+)"' ,  webpage
,  'data player config' ),  
  81              if  'playlist'  not in  config
:  
  82                  if  'vmapUrl'  in  config
:  
  83                      vmap_data 
=  self
._ download
_ xml
( config
[ 'vmapUrl' ],  video_id
)  
  84                      video_url 
=  xpath_text ( vmap_data
,  './/MediaFile' ). strip ()  
  88                      break    # same video regardless of UA  
  91              video_url 
=  config
[ 'playlist' ][ 0 ][ 'source' ]  
  97              m 
=  re
. search ( r
'/(?P<width>\d+)x(?P<height>\d+)/' ,  video_url
)  
 100                      'width' :  int ( m
. group ( 'width' )),  
 101                      'height' :  int ( m
. group ( 'height' )),  
 104          self
._ sort
_ formats
( formats
)  
 106          thumbnail 
=  config
. get ( 'posterImageUrl' )  
 107          duration 
=  float_or_none ( config
. get ( 'duration' ))  
 111              'title' :  'TwitterCard' ,  
 112              'thumbnail' :  thumbnail
,  
 113              'duration' :  duration
,  
 118  class  TwitterIE ( InfoExtractor
):  
 120      _VALID_URL 
=  r
'https?://(?:www\.|m\.|mobile\.)?twitter\.com/(?P<user_id>[^/]+)/status/(?P<id>\d+)'  
 121      _TEMPLATE_URL 
=  'https://twitter.com/ %s /status/ %s '  
 124          'url' :  'https://twitter.com/freethenipple/status/643211948184596480' ,  
 125          'md5' :  '31cd83a116fc41f99ae3d909d4caf6a0' ,  
 127              'id' :  '643211948184596480' ,  
 129              'title' :  'FREE THE NIPPLE - FTN supporters on Hollywood Blvd today!' ,  
 130              'thumbnail' :  're:^https?://.*\.jpg' ,  
 132              'description' :  'FREE THE NIPPLE on Twitter: "FTN supporters on Hollywood Blvd today! http://t.co/c7jHH749xJ"' ,  
 133              'uploader' :  'FREE THE NIPPLE' ,  
 134              'uploader_id' :  'freethenipple' ,  
 138      def  _real_extract ( self
,  url
):  
 139          mobj 
=  re
. match ( self
._ VALID
_U RL
,  url
)  
 140          user_id 
=  mobj
. group ( 'user_id' )  
 141          twid 
=  mobj
. group ( 'id' )  
 143          webpage 
=  self
._ download
_ webpage
( self
._ TEMPLATE
_U RL 
% ( user_id
,  twid
),  twid
)  
 145          username 
=  remove_end ( self
._ og
_ search
_ title
( webpage
),  ' on Twitter' )  
 147          title 
=  self
._ og
_ search
_ description
( webpage
). strip ( '' ). replace ( ' \n ' ,  ' ' )  
 149          # strip  'https -_t.co_BJYgOjSeGA' junk from filenames  
 150          mobj 
=  re
. match ( r
'“(.*)\s+(https?://[^ ]+)”' ,  title
)  
 151          title
,  short_url 
=  mobj
. groups ()  
 153          card_id 
=  self
._ search
_ regex
(  
 154              r
'["\' ]/ i
/ cards
/ tfw
/ v1
/( \d
+) ', webpage, ' twitter card url
')  
 155          card_url = ' https
:// twitter
. com
/ i
/ cards
/ tfw
/ v1
/ ' + card_id  
 158              ' _type
': ' url_transparent
',  
 159              ' ie_key
': ' TwitterCard
',  
 160              ' uploader_id
': user_id,  
 161              ' uploader
': username,  
 164              ' description
': ' %s  on Twitter
:  " %s %s " ' % (username, title, short_url),  
 165              ' title
': username + '  -  ' + title,