]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/daum.py
   3 from __future__ 
import unicode_literals
 
   7 from .common 
import InfoExtractor
 
  13 class DaumIE(InfoExtractor
): 
  14     _VALID_URL 
= r
'https?://(?:m\.)?tvpot\.daum\.net/.*?clipid=(?P<id>\d+)' 
  18         'url': 'http://tvpot.daum.net/clip/ClipView.do?clipid=52554690', 
  22             'title': 'DOTA 2GETHER 시즌2 6회 - 2부', 
  23             'description': 'DOTA 2GETHER 시즌2 6회 - 2부', 
  24             'upload_date': '20130831', 
  29     def _real_extract(self
, url
): 
  30         mobj 
= re
.match(self
._VALID
_URL
, url
) 
  31         video_id 
= mobj
.group(1) 
  32         canonical_url 
= 'http://tvpot.daum.net/v/%s' % video_id
 
  33         webpage 
= self
._download
_webpage
(canonical_url
, video_id
) 
  34         full_id 
= self
._search
_regex
( 
  35             r
'<iframe src="http://videofarm.daum.net/controller/video/viewer/Video.html\?.*?vid=(.+?)[&"]', 
  37         query 
= compat_urllib_parse
.urlencode({'vid': full_id
}) 
  38         info 
= self
._download
_xml
( 
  39             'http://tvpot.daum.net/clip/ClipInfoXml.do?' + query
, video_id
, 
  40             'Downloading video info') 
  41         urls 
= self
._download
_xml
( 
  42             'http://videofarm.daum.net/controller/api/open/v1_2/MovieData.apixml?' + query
, 
  43             video_id
, 'Downloading video formats info') 
  45         self
.to_screen(u
'%s: Getting video urls' % video_id
) 
  47         for format_el 
in urls
.findall('result/output_list/output_list'): 
  48             profile 
= format_el
.attrib
['profile'] 
  49             format_query 
= compat_urllib_parse
.urlencode({ 
  53             url_doc 
= self
._download
_xml
( 
  54                 'http://videofarm.daum.net/controller/api/open/v1_2/MovieLocation.apixml?' + format_query
, 
  56             format_url 
= url_doc
.find('result/url').text
 
  64             'title': info
.find('TITLE').text
, 
  66             'thumbnail': self
._og
_search
_thumbnail
(webpage
), 
  67             'description': info
.find('CONTENTS').text
, 
  68             'duration': int(info
.find('DURATION').text
), 
  69             'upload_date': info
.find('REGDTTM').text
[:8],