]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/twitter.py
2 from __future__
import unicode_literals
6 from . common
import InfoExtractor
7 from .. compat
import compat_urllib_request
15 class TwitterCardIE ( InfoExtractor
):
16 IE_NAME
= 'twitter:card'
17 _VALID_URL
= r
'https?://(?:www\.)?twitter\.com/i/cards/tfw/v1/(?P<id>\d+)'
20 'url' : 'https://twitter.com/i/cards/tfw/v1/560070183650213889' ,
21 'md5' : '7d2f6b4d2eb841a7ccc893d479bfceb4' ,
23 'id' : '560070183650213889' ,
25 'title' : 'TwitterCard' ,
26 'thumbnail' : 're:^https?://.*\.jpg$' ,
31 'url' : 'https://twitter.com/i/cards/tfw/v1/623160978427936768' ,
32 'md5' : '7ee2a553b63d1bccba97fbed97d9e1c8' ,
34 'id' : '623160978427936768' ,
36 'title' : 'TwitterCard' ,
37 'thumbnail' : 're:^https?://.*\.jpg' ,
42 'url' : 'https://twitter.com/i/cards/tfw/v1/654001591733886977' ,
43 'md5' : 'b6f35e8b08a0bec6c8af77a2f4b3a814' ,
47 'title' : 'Ubuntu 11.10 Overview' ,
48 'description' : 'Take a quick peek at what \' s new and improved in Ubuntu 11.10. \n\n Once installed take a look at 10 Things to Do After Installing: http://www.omgubuntu.co.uk/2011/10/10-things-to-do-after-installing-ubuntu-11-10/' ,
49 'upload_date' : '20111013' ,
50 'uploader' : 'OMG! Ubuntu!' ,
51 'uploader_id' : 'omgubuntu' ,
56 def _real_extract ( self
, url
):
57 video_id
= self
._ match
_ id
( url
)
59 # Different formats served for different User-Agents
61 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20150101 Firefox/20.0 (Chrome)' , # mp4
62 'Mozilla/5.0 (Windows NT 5.2; WOW64; rv:38.0) Gecko/20100101 Firefox/38.0' , # webm
67 for user_agent
in USER_AGENTS
:
68 request
= compat_urllib_request
. Request ( url
)
69 request
. add_header ( 'User-Agent' , user_agent
)
70 webpage
= self
._ download
_ webpage
( request
, video_id
)
72 youtube_url
= self
._ html
_ search
_ regex
(
73 r
'<iframe[^>]+src="((?:https?:)?//www.youtube.com/embed/[^"]+)"' ,
74 webpage
, 'youtube iframe' , default
= None )
76 return self
. url_result ( youtube_url
, 'Youtube' )
78 config
= self
._ parse
_ json
( self
._ html
_ search
_ regex
(
79 r
'data-player-config="([^"]+)"' , webpage
, 'data player config' ),
81 if 'playlist' not in config
:
82 if 'vmapUrl' in config
:
83 vmap_data
= self
._ download
_ xml
( config
[ 'vmapUrl' ], video_id
)
84 video_url
= xpath_text ( vmap_data
, './/MediaFile' ). strip ()
88 break # same video regardless of UA
91 video_url
= config
[ 'playlist' ][ 0 ][ 'source' ]
97 m
= re
. search ( r
'/(?P<width>\d+)x(?P<height>\d+)/' , video_url
)
100 'width' : int ( m
. group ( 'width' )),
101 'height' : int ( m
. group ( 'height' )),
104 self
._ sort
_ formats
( formats
)
106 thumbnail
= config
. get ( 'posterImageUrl' )
107 duration
= float_or_none ( config
. get ( 'duration' ))
111 'title' : 'TwitterCard' ,
112 'thumbnail' : thumbnail
,
113 'duration' : duration
,
118 class TwitterIE ( InfoExtractor
):
120 _VALID_URL
= r
'https?://(?:www\.|m\.|mobile\.)?twitter\.com/(?P<user_id>[^/]+)/status/(?P<id>\d+)'
121 _TEMPLATE_URL
= 'https://twitter.com/ %s /status/ %s '
124 'url' : 'https://twitter.com/freethenipple/status/643211948184596480' ,
125 'md5' : '31cd83a116fc41f99ae3d909d4caf6a0' ,
127 'id' : '643211948184596480' ,
129 'title' : 'FREE THE NIPPLE - FTN supporters on Hollywood Blvd today!' ,
130 'thumbnail' : 're:^https?://.*\.jpg' ,
132 'description' : 'FREE THE NIPPLE on Twitter: "FTN supporters on Hollywood Blvd today! http://t.co/c7jHH749xJ"' ,
133 'uploader' : 'FREE THE NIPPLE' ,
134 'uploader_id' : 'freethenipple' ,
138 def _real_extract ( self
, url
):
139 mobj
= re
. match ( self
._ VALID
_U RL
, url
)
140 user_id
= mobj
. group ( 'user_id' )
141 twid
= mobj
. group ( 'id' )
143 webpage
= self
._ download
_ webpage
( self
._ TEMPLATE
_U RL
% ( user_id
, twid
), twid
)
145 username
= remove_end ( self
._ og
_ search
_ title
( webpage
), ' on Twitter' )
147 title
= self
._ og
_ search
_ description
( webpage
). strip ( '' ). replace ( ' \n ' , ' ' )
149 # strip 'https -_t.co_BJYgOjSeGA' junk from filenames
150 mobj
= re
. match ( r
'“(.*)\s+(https?://[^ ]+)”' , title
)
151 title
, short_url
= mobj
. groups ()
153 card_id
= self
._ search
_ regex
(
154 r
'["\' ]/ i
/ cards
/ tfw
/ v1
/( \d
+) ', webpage, ' twitter card url
')
155 card_url = ' https
:// twitter
. com
/ i
/ cards
/ tfw
/ v1
/ ' + card_id
158 ' _type
': ' url_transparent
',
159 ' ie_key
': ' TwitterCard
',
160 ' uploader_id
': user_id,
161 ' uploader
': username,
164 ' description
': ' %s on Twitter
: " %s %s " ' % (username, title, short_url),
165 ' title
': username + ' - ' + title,