]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/twitter.py
b7384298619608ab879337326b1e6719962932e3
2 from __future__
import unicode_literals
6 from . common
import InfoExtractor
17 class TwitterBaseIE ( InfoExtractor
):
18 def _get_vmap_video_url ( self
, vmap_url
, video_id
):
19 vmap_data
= self
._ download
_ xml
( vmap_url
, video_id
)
20 return xpath_text ( vmap_data
, './/MediaFile' ). strip ()
23 class TwitterCardIE ( TwitterBaseIE
):
24 IE_NAME
= 'twitter:card'
25 _VALID_URL
= r
'https?://(?:www\.)?twitter\.com/i/(?:cards/tfw/v1|videos/tweet)/(?P<id>\d+)'
28 'url' : 'https://twitter.com/i/cards/tfw/v1/560070183650213889' ,
29 # MD5 checksums are different in different places
31 'id' : '560070183650213889' ,
33 'title' : 'Twitter Card' ,
34 'thumbnail' : 're:^https?://.*\.jpg$' ,
39 'url' : 'https://twitter.com/i/cards/tfw/v1/623160978427936768' ,
40 'md5' : '7ee2a553b63d1bccba97fbed97d9e1c8' ,
42 'id' : '623160978427936768' ,
44 'title' : 'Twitter Card' ,
45 'thumbnail' : 're:^https?://.*\.jpg' ,
50 'url' : 'https://twitter.com/i/cards/tfw/v1/654001591733886977' ,
51 'md5' : 'd4724ffe6d2437886d004fa5de1043b3' ,
55 'title' : 'Ubuntu 11.10 Overview' ,
56 'description' : 'Take a quick peek at what \' s new and improved in Ubuntu 11.10. \n\n Once installed take a look at 10 Things to Do After Installing: http://www.omgubuntu.co.uk/2011/10/10...' ,
57 'upload_date' : '20111013' ,
58 'uploader' : 'OMG! Ubuntu!' ,
59 'uploader_id' : 'omgubuntu' ,
61 'add_ie' : [ 'Youtube' ],
64 'url' : 'https://twitter.com/i/cards/tfw/v1/665289828897005568' ,
65 'md5' : 'ab2745d0b0ce53319a534fccaa986439' ,
69 'upload_date' : '20151113' ,
70 'uploader_id' : '1189339351084113920' ,
71 'uploader' : 'ArsenalTerje' ,
72 'title' : 'Vine by ArsenalTerje' ,
76 'url' : 'https://twitter.com/i/videos/tweet/705235433198714880' ,
77 'md5' : '3846d0a07109b5ab622425449b59049d' ,
79 'id' : '705235433198714880' ,
81 'title' : 'Twitter web player' ,
82 'thumbnail' : 're:^https?://.*\.jpg' ,
87 def _real_extract ( self
, url
):
88 video_id
= self
._ match
_ id
( url
)
94 webpage
= self
._ download
_ webpage
( url
, video_id
)
96 iframe_url
= self
._ html
_ search
_ regex
(
97 r
'<iframe[^>]+src="((?:https?:)?//(?:www.youtube.com/embed/[^"]+|(?:www\.)?vine\.co/v/\w+/card))"' ,
98 webpage
, 'video iframe' , default
= None )
100 return self
. url_result ( iframe_url
)
102 config
= self
._ parse
_ json
( self
._ html
_ search
_ regex
(
103 r
'data-(?:player-)?config="([^"]+)"' , webpage
, 'data player config' ),
106 if config
. get ( 'source_type' ) == 'vine' :
107 return self
. url_result ( config
[ 'player_url' ], 'Vine' )
109 def _search_dimensions_in_video_url ( a_format
, video_url
):
110 m
= re
. search ( r
'/(?P<width>\d+)x(?P<height>\d+)/' , video_url
)
113 'width' : int ( m
. group ( 'width' )),
114 'height' : int ( m
. group ( 'height' )),
117 video_url
= config
. get ( 'video_url' ) or config
. get ( 'playlist' , [{}])[ 0 ]. get ( 'source' )
120 if determine_ext ( video_url
) == 'm3u8' :
121 formats
. extend ( self
._ extract
_ m
3u8_ formats
( video_url
, video_id
, ext
= 'mp4' , m3u8_id
= 'hls' ))
127 _search_dimensions_in_video_url ( f
, video_url
)
131 vmap_url
= config
. get ( 'vmapUrl' ) or config
. get ( 'vmap_url' )
134 'url' : self
._ get
_ vmap
_ video
_u rl
( vmap_url
, video_id
),
139 for entity
in config
. get ( 'status' , {}). get ( 'entities' , []):
140 if 'mediaInfo' in entity
:
141 media_info
= entity
[ 'mediaInfo' ]
144 for media_variant
in media_info
[ 'variants' ]:
145 media_url
= media_variant
[ 'url' ]
146 if media_url
. endswith ( '.m3u8' ):
147 formats
. extend ( self
._ extract
_ m
3u8_ formats
( media_url
, video_id
, ext
= 'mp4' , m3u8_id
= 'hls' ))
148 elif media_url
. endswith ( '.mpd' ):
149 formats
. extend ( self
._ extract
_ mpd
_ formats
( media_url
, video_id
, mpd_id
= 'dash' ))
151 vbr
= int_or_none ( media_variant
. get ( 'bitRate' ), scale
= 1000 )
154 'format_id' : 'http- %d ' % vbr
if vbr
else 'http' ,
157 # Reported bitRate may be zero
158 if not a_format
[ 'vbr' ]:
161 _search_dimensions_in_video_url ( a_format
, media_url
)
163 formats
. append ( a_format
)
165 duration
= float_or_none ( media_info
. get ( 'duration' , {}). get ( 'nanos' ), scale
= 1 e9
)
167 self
._ sort
_ formats
( formats
)
169 title
= self
._ search
_ regex
( r
'<title>([^<]+)</title>' , webpage
, 'title' )
170 thumbnail
= config
. get ( 'posterImageUrl' ) or config
. get ( 'image_src' )
171 duration
= float_or_none ( config
. get ( 'duration' )) or duration
176 'thumbnail' : thumbnail
,
177 'duration' : duration
,
182 class TwitterIE ( InfoExtractor
):
184 _VALID_URL
= r
'https?://(?:www\.|m\.|mobile\.)?twitter\.com/(?P<user_id>[^/]+)/status/(?P<id>\d+)'
185 _TEMPLATE_URL
= 'https://twitter.com/ %s /status/ %s '
188 'url' : 'https://twitter.com/freethenipple/status/643211948184596480' ,
190 'id' : '643211948184596480' ,
192 'title' : 'FREE THE NIPPLE - FTN supporters on Hollywood Blvd today!' ,
193 'thumbnail' : 're:^https?://.*\.jpg' ,
194 'description' : 'FREE THE NIPPLE on Twitter: "FTN supporters on Hollywood Blvd today! http://t.co/c7jHH749xJ"' ,
195 'uploader' : 'FREE THE NIPPLE' ,
196 'uploader_id' : 'freethenipple' ,
199 'skip_download' : True , # requires ffmpeg
202 'url' : 'https://twitter.com/giphz/status/657991469417025536/photo/1' ,
203 'md5' : 'f36dcd5fb92bf7057f155e7d927eeb42' ,
205 'id' : '657991469417025536' ,
207 'title' : 'Gifs - tu vai cai tu vai cai tu nao eh capaz disso tu vai cai' ,
208 'description' : 'Gifs on Twitter: "tu vai cai tu vai cai tu nao eh capaz disso tu vai cai https://t.co/tM46VHFlO5"' ,
209 'thumbnail' : 're:^https?://.*\.png' ,
211 'uploader_id' : 'giphz' ,
213 'expected_warnings' : [ 'height' , 'width' ],
214 'skip' : 'Account suspended' ,
216 'url' : 'https://twitter.com/starwars/status/665052190608723968' ,
217 'md5' : '39b7199856dee6cd4432e72c74bc69d4' ,
219 'id' : '665052190608723968' ,
221 'title' : 'Star Wars - A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens.' ,
222 'description' : 'Star Wars on Twitter: "A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens."' ,
223 'uploader_id' : 'starwars' ,
224 'uploader' : 'Star Wars' ,
227 'url' : 'https://twitter.com/BTNBrentYarina/status/705235433198714880' ,
229 'id' : '705235433198714880' ,
231 'title' : 'Brent Yarina - Khalil Iverson \' s missed highlight dunk. And made highlight dunk. In one highlight.' ,
232 'description' : 'Brent Yarina on Twitter: "Khalil Iverson \' s missed highlight dunk. And made highlight dunk. In one highlight."' ,
233 'uploader_id' : 'BTNBrentYarina' ,
234 'uploader' : 'Brent Yarina' ,
237 # The same video as https://twitter.com/i/videos/tweet/705235433198714880
238 # Test case of TwitterCardIE
239 'skip_download' : True ,
242 'url' : 'https://twitter.com/jaydingeer/status/700207533655363584' ,
245 'id' : '700207533655363584' ,
247 'title' : 'Donte The Dumbass - BEAT PROD: @suhmeduh #Damndaniel' ,
248 'description' : 'Donte The Dumbass on Twitter: "BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ"' ,
249 'thumbnail' : 're:^https?://.*\.jpg' ,
250 'uploader' : 'Donte The Dumbass' ,
251 'uploader_id' : 'jaydingeer' ,
254 'skip_download' : True , # requires ffmpeg
257 'url' : 'https://twitter.com/Filmdrunk/status/713801302971588609' ,
258 'md5' : '89a15ed345d13b86e9a5a5e051fa308a' ,
262 'title' : 'Dr.Pepperの飲み方 #japanese #バカ #ドクペ #電動ガン' ,
263 'uploader' : 'TAKUMA' ,
264 'uploader_id' : '1004126642786242560' ,
265 'upload_date' : '20140615' ,
269 'url' : 'https://twitter.com/captainamerica/status/719944021058060289' ,
271 'id' : '719944021058060289' ,
273 'title' : 'Captain America - @King0fNerd Are you sure you made the right choice? Find out in theaters.' ,
274 'description' : 'Captain America on Twitter: "@King0fNerd Are you sure you made the right choice? Find out in theaters. https://t.co/GpgYi9xMJI"' ,
275 'uploader_id' : 'captainamerica' ,
276 'uploader' : 'Captain America' ,
279 'skip_download' : True , # requires ffmpeg
283 def _real_extract ( self
, url
):
284 mobj
= re
. match ( self
._ VALID
_U RL
, url
)
285 user_id
= mobj
. group ( 'user_id' )
286 twid
= mobj
. group ( 'id' )
288 webpage
, urlh
= self
._ download
_ webpage
_ handle
(
289 self
._ TEMPLATE
_U RL
% ( user_id
, twid
), twid
)
291 if 'twitter.com/account/suspended' in urlh
. geturl ():
292 raise ExtractorError ( 'Account suspended by Twitter.' , expected
= True )
294 username
= remove_end ( self
._ og
_ search
_ title
( webpage
), ' on Twitter' )
296 title
= description
= self
._ og
_ search
_ description
( webpage
). strip ( '' ). replace ( ' \n ' , ' ' ). strip ( '“”' )
298 # strip 'https -_t.co_BJYgOjSeGA' junk from filenames
299 title
= re
. sub ( r
'\s+(https?://[^ ]+)' , '' , title
)
302 'uploader_id' : user_id
,
303 'uploader' : username
,
305 'description' : ' %s on Twitter: " %s "' % ( username
, description
),
306 'title' : username
+ ' - ' + title
,
309 mobj
= re
. search ( r
'''(?x)
310 <video[^>]+class="animated-gif"(?P<more_info>[^>]+)>\s*
311 <source[^>]+video-src="(?P<url>[^"]+)"
315 more_info
= mobj
. group ( 'more_info' )
316 height
= int_or_none ( self
._ search
_ regex
(
317 r
'data-height="(\d+)"' , more_info
, 'height' , fatal
= False ))
318 width
= int_or_none ( self
._ search
_ regex
(
319 r
'data-width="(\d+)"' , more_info
, 'width' , fatal
= False ))
320 thumbnail
= self
._ search
_ regex
(
321 r
'poster="([^"]+)"' , more_info
, 'poster' , fatal
= False )
324 'url' : mobj
. group ( 'url' ),
327 'thumbnail' : thumbnail
,
331 if 'class="PlayableMedia' in webpage
:
333 '_type' : 'url_transparent' ,
334 'ie_key' : 'TwitterCard' ,
335 'url' : ' %s //twitter.com/i/videos/tweet/ %s ' % ( self
. http_scheme (), twid
),
340 raise ExtractorError ( 'There \' s no video in this tweet.' )
343 class TwitterAmplifyIE ( TwitterBaseIE
):
344 IE_NAME
= 'twitter:amplify'
345 _VALID_URL
= 'https?://amp\.twimg\.com/v/(?P<id>[0-9a-f\-] {36} )'
348 'url' : 'https://amp.twimg.com/v/0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951' ,
349 'md5' : '7df102d0b9fd7066b86f3159f8e81bf6' ,
351 'id' : '0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951' ,
353 'title' : 'Twitter Video' ,
354 'thumbnail' : 're:^https?://.*' ,
358 def _real_extract ( self
, url
):
359 video_id
= self
._ match
_ id
( url
)
360 webpage
= self
._ download
_ webpage
( url
, video_id
)
362 vmap_url
= self
._ html
_ search
_ meta
(
363 'twitter:amplify:vmap' , webpage
, 'vmap url' )
364 video_url
= self
._ get
_ vmap
_ video
_u rl
( vmap_url
, video_id
)
367 thumbnail
= self
._ html
_ search
_ meta
(
368 'twitter:image:src' , webpage
, 'thumbnail' , fatal
= False )
370 def _find_dimension ( target
):
371 w
= int_or_none ( self
._ html
_ search
_ meta
(
372 'twitter: %s :width' % target
, webpage
, fatal
= False ))
373 h
= int_or_none ( self
._ html
_ search
_ meta
(
374 'twitter: %s :height' % target
, webpage
, fatal
= False ))
378 thumbnail_w
, thumbnail_h
= _find_dimension ( 'image' )
381 'width' : thumbnail_w
,
382 'height' : thumbnail_h
,
385 video_w
, video_h
= _find_dimension ( 'player' )
394 'title' : 'Twitter Video' ,
396 'thumbnails' : thumbnails
,