]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/twitter.py
a161f046b2532805d864a26e083de06f68cf7a1f
2 from __future__
import unicode_literals
6 from . common
import InfoExtractor
17 class TwitterCardIE ( InfoExtractor
):
18 IE_NAME
= 'twitter:card'
19 _VALID_URL
= r
'https?://(?:www\.)?twitter\.com/i/cards/tfw/v1/(?P<id>\d+)'
22 'url' : 'https://twitter.com/i/cards/tfw/v1/560070183650213889' ,
23 'md5' : '4fa26a35f9d1bf4b646590ba8e84be19' ,
25 'id' : '560070183650213889' ,
27 'title' : 'TwitterCard' ,
28 'thumbnail' : 're:^https?://.*\.jpg$' ,
33 'url' : 'https://twitter.com/i/cards/tfw/v1/623160978427936768' ,
34 'md5' : '7ee2a553b63d1bccba97fbed97d9e1c8' ,
36 'id' : '623160978427936768' ,
38 'title' : 'TwitterCard' ,
39 'thumbnail' : 're:^https?://.*\.jpg' ,
44 'url' : 'https://twitter.com/i/cards/tfw/v1/654001591733886977' ,
45 'md5' : 'b6f35e8b08a0bec6c8af77a2f4b3a814' ,
49 'title' : 'Ubuntu 11.10 Overview' ,
50 'description' : 'Take a quick peek at what \' s new and improved in Ubuntu 11.10. \n\n Once installed take a look at 10 Things to Do After Installing: http://www.omgubuntu.co.uk/2011/10/10-things-to-do-after-installing-ubuntu-11-10/' ,
51 'upload_date' : '20111013' ,
52 'uploader' : 'OMG! Ubuntu!' ,
53 'uploader_id' : 'omgubuntu' ,
55 'add_ie' : [ 'Youtube' ],
58 'url' : 'https://twitter.com/i/cards/tfw/v1/665289828897005568' ,
59 'md5' : 'ab2745d0b0ce53319a534fccaa986439' ,
63 'upload_date' : '20151113' ,
64 'uploader_id' : '1189339351084113920' ,
65 'uploader' : '@ArsenalTerje' ,
66 'title' : 'Vine by @ArsenalTerje' ,
72 def _real_extract ( self
, url
):
73 video_id
= self
._ match
_ id
( url
)
75 # Different formats served for different User-Agents
77 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20150101 Firefox/20.0 (Chrome)' , # mp4
78 'Mozilla/5.0 (Windows NT 5.2; WOW64; rv:38.0) Gecko/20100101 Firefox/38.0' , # webm
83 for user_agent
in USER_AGENTS
:
84 request
= sanitized_Request ( url
)
85 request
. add_header ( 'User-Agent' , user_agent
)
86 webpage
= self
._ download
_ webpage
( request
, video_id
)
88 iframe_url
= self
._ html
_ search
_ regex
(
89 r
'<iframe[^>]+src="((?:https?:)?//(?:www.youtube.com/embed/[^"]+|(?:www\.)?vine\.co/v/\w+/card))"' ,
90 webpage
, 'video iframe' , default
= None )
92 return self
. url_result ( iframe_url
)
94 config
= self
._ parse
_ json
( self
._ html
_ search
_ regex
(
95 r
'data-player-config="([^"]+)"' , webpage
, 'data player config' ),
97 if 'playlist' not in config
:
98 if 'vmapUrl' in config
:
99 vmap_data
= self
._ download
_ xml
( config
[ 'vmapUrl' ], video_id
)
100 video_url
= xpath_text ( vmap_data
, './/MediaFile' ). strip ()
104 break # same video regardless of UA
107 video_url
= config
[ 'playlist' ][ 0 ][ 'source' ]
113 m
= re
. search ( r
'/(?P<width>\d+)x(?P<height>\d+)/' , video_url
)
116 'width' : int ( m
. group ( 'width' )),
117 'height' : int ( m
. group ( 'height' )),
120 self
._ sort
_ formats
( formats
)
122 thumbnail
= config
. get ( 'posterImageUrl' )
123 duration
= float_or_none ( config
. get ( 'duration' ))
127 'title' : 'TwitterCard' ,
128 'thumbnail' : thumbnail
,
129 'duration' : duration
,
134 class TwitterIE ( InfoExtractor
):
136 _VALID_URL
= r
'https?://(?:www\.|m\.|mobile\.)?twitter\.com/(?P<user_id>[^/]+)/status/(?P<id>\d+)'
137 _TEMPLATE_URL
= 'https://twitter.com/ %s /status/ %s '
140 'url' : 'https://twitter.com/freethenipple/status/643211948184596480' ,
141 'md5' : 'db6612ec5d03355953c3ca9250c97e5e' ,
143 'id' : '643211948184596480' ,
145 'title' : 'FREE THE NIPPLE - FTN supporters on Hollywood Blvd today!' ,
146 'thumbnail' : 're:^https?://.*\.jpg' ,
148 'description' : 'FREE THE NIPPLE on Twitter: "FTN supporters on Hollywood Blvd today! http://t.co/c7jHH749xJ"' ,
149 'uploader' : 'FREE THE NIPPLE' ,
150 'uploader_id' : 'freethenipple' ,
153 'url' : 'https://twitter.com/giphz/status/657991469417025536/photo/1' ,
154 'md5' : 'f36dcd5fb92bf7057f155e7d927eeb42' ,
156 'id' : '657991469417025536' ,
158 'title' : 'Gifs - tu vai cai tu vai cai tu nao eh capaz disso tu vai cai' ,
159 'description' : 'Gifs on Twitter: "tu vai cai tu vai cai tu nao eh capaz disso tu vai cai https://t.co/tM46VHFlO5"' ,
160 'thumbnail' : 're:^https?://.*\.png' ,
162 'uploader_id' : 'giphz' ,
165 'url' : 'https://twitter.com/starwars/status/665052190608723968' ,
166 'md5' : '39b7199856dee6cd4432e72c74bc69d4' ,
168 'id' : '665052190608723968' ,
170 'title' : 'Star Wars - A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens.' ,
171 'description' : 'Star Wars on Twitter: "A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens."' ,
172 'uploader_id' : 'starwars' ,
173 'uploader' : 'Star Wars' ,
177 def _real_extract ( self
, url
):
178 mobj
= re
. match ( self
._ VALID
_U RL
, url
)
179 user_id
= mobj
. group ( 'user_id' )
180 twid
= mobj
. group ( 'id' )
182 webpage
= self
._ download
_ webpage
( self
._ TEMPLATE
_U RL
% ( user_id
, twid
), twid
)
184 username
= remove_end ( self
._ og
_ search
_ title
( webpage
), ' on Twitter' )
186 title
= description
= self
._ og
_ search
_ description
( webpage
). strip ( '' ). replace ( ' \n ' , ' ' ). strip ( '“”' )
188 # strip 'https -_t.co_BJYgOjSeGA' junk from filenames
189 title
= re
. sub ( r
'\s+(https?://[^ ]+)' , '' , title
)
192 'uploader_id' : user_id
,
193 'uploader' : username
,
195 'description' : ' %s on Twitter: " %s "' % ( username
, description
),
196 'title' : username
+ ' - ' + title
,
199 card_id
= self
._ search
_ regex
(
200 r
'["\' ]/ i
/ cards
/ tfw
/ v1
/( \d
+) ', webpage, ' twitter card url
', default=None)
202 card_url = ' https
:// twitter
. com
/ i
/ cards
/ tfw
/ v1
/ ' + card_id
204 ' _type
': ' url_transparent
',
205 ' ie_key
': ' TwitterCard
',
210 mobj = re.search(r'''(?x)
211 <video[^>]+class="animated-gif"[^>]+
212 (?:data-height="(?P<height>\d+)")?[^>]+
213 (?:data-width="(?P<width>\d+)")?[^>]+
214 (?:poster="(?P<poster>[^"]+)")?[^>]*>\s*
215 <source[^>]+video-src="(?P<url>[^"]+)"
221 ' url
': mobj.group(' url
'),
222 ' height
': int_or_none(mobj.group(' height
')),
223 ' width
': int_or_none(mobj.group(' width
')),
224 ' thumbnail
': mobj.group(' poster
'),
228 raise ExtractorError(' There
\' s
not video
in this tweet
. ')