]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/ustudio.py
   1 from __future__ 
import unicode_literals
 
   5 from .common 
import InfoExtractor
 
  13 class UstudioIE(InfoExtractor
): 
  15     _VALID_URL 
= r
'https?://(?:(?:www|v1)\.)?ustudio\.com/video/(?P<id>[^/]+)/(?P<display_id>[^/?#&]+)' 
  17         'url': 'http://ustudio.com/video/Uxu2my9bgSph/san_francisco_golden_gate_bridge', 
  18         'md5': '58bbfca62125378742df01fc2abbdef6', 
  21             'display_id': 'san_francisco_golden_gate_bridge', 
  23             'title': 'San Francisco: Golden Gate Bridge', 
  24             'description': 'md5:23925500697f2c6d4830e387ba51a9be', 
  25             'thumbnail': 're:^https?://.*\.jpg$', 
  26             'upload_date': '20111107', 
  27             'uploader': 'Tony Farley', 
  31     def _real_extract(self
, url
): 
  32         video_id
, display_id 
= re
.match(self
._VALID
_URL
, url
).groups() 
  34         config 
= self
._download
_xml
( 
  35             'http://v1.ustudio.com/embed/%s/ustudio/config.xml' % video_id
, 
  40                 'url': unescapeHTML(item
.attrib
['url']), 
  41                 'width': int_or_none(item
.get('width')), 
  42                 'height': int_or_none(item
.get('height')), 
  43             } for item 
in config
.findall('./qualities/quality/%s' % kind
) if item
.get('url')] 
  45         formats 
= extract('video') 
  46         self
._sort
_formats
(formats
) 
  48         webpage 
= self
._download
_webpage
(url
, display_id
) 
  50         title 
= self
._og
_search
_title
(webpage
) 
  51         upload_date 
= unified_strdate(self
._search
_regex
( 
  52             r
'(?s)Uploaded by\s*.+?\s*on\s*<span>([^<]+)</span>', 
  53             webpage
, 'upload date', fatal
=False)) 
  54         uploader 
= self
._search
_regex
( 
  55             r
'Uploaded by\s*<a[^>]*>([^<]+)<', 
  56             webpage
, 'uploader', fatal
=False) 
  60             'display_id': display_id
, 
  62             'description': self
._og
_search
_description
(webpage
), 
  63             'thumbnails': extract('image'), 
  64             'upload_date': upload_date
, 
  70 class UstudioEmbedIE(InfoExtractor
): 
  71     IE_NAME 
= 'ustudio:embed' 
  72     _VALID_URL 
= r
'https?://(?:(?:app|embed)\.)?ustudio\.com/embed/(?P<uid>[^/]+)/(?P<id>[^/]+)' 
  74         'url': 'http://app.ustudio.com/embed/DeN7VdYRDKhP/Uw7G1kMCe65T', 
  75         'md5': '47c0be52a09b23a7f40de9469cec58f4', 
  79             'title': '5 Things IT Should Know About Video', 
  80             'description': 'md5:93d32650884b500115e158c5677d25ad', 
  81             'uploader_id': 'DeN7VdYRDKhP', 
  85     def _real_extract(self
, url
): 
  86         uploader_id
, video_id 
= re
.match(self
._VALID
_URL
, url
).groups() 
  87         video_data 
= self
._download
_json
( 
  88             'http://app.ustudio.com/embed/%s/%s/config.json' % (uploader_id
, video_id
), 
  89             video_id
)['videos'][0] 
  90         title 
= video_data
['name'] 
  93         for ext
, qualities 
in video_data
.get('transcodes', {}).items(): 
  94             for quality 
in qualities
: 
  95                 quality_url 
= quality
.get('url') 
  98                 height 
= int_or_none(quality
.get('height')) 
 100                     'format_id': '%s-%dp' % (ext
, height
) if height 
else ext
, 
 102                     'width': int_or_none(quality
.get('width')), 
 105         self
._sort
_formats
(formats
) 
 108         for image 
in video_data
.get('images', []): 
 109             image_url 
= image
.get('url') 
 119             'description': video_data
.get('description'), 
 120             'duration': int_or_none(video_data
.get('duration')), 
 121             'uploader_id': uploader_id
, 
 122             'tags': video_data
.get('keywords'), 
 123             'thumbnails': thumbnails
,