]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/daum.py
   3 import xml
.etree
.ElementTree
 
   5 from .common 
import InfoExtractor
 
  12 class DaumIE(InfoExtractor
): 
  13     _VALID_URL 
= r
'https?://tvpot\.daum\.net/.*?clipid=(?P<id>\d+)' 
  17         u
'url': u
'http://tvpot.daum.net/clip/ClipView.do?clipid=52554690', 
  18         u
'file': u
'52554690.mp4', 
  20             u
'title': u
'DOTA 2GETHER 시즌2 6회 - 2부', 
  21             u
'description': u
'DOTA 2GETHER 시즌2 6회 - 2부', 
  22             u
'upload_date': u
'20130831', 
  27     def _real_extract(self
, url
): 
  28         mobj 
= re
.match(self
._VALID
_URL
, url
) 
  29         video_id 
= mobj
.group(1) 
  30         canonical_url 
= 'http://tvpot.daum.net/v/%s' % video_id
 
  31         webpage 
= self
._download
_webpage
(canonical_url
, video_id
) 
  32         full_id 
= self
._search
_regex
(r
'<link rel="video_src" href=".+?vid=(.+?)"', 
  34         query 
= compat_urllib_parse
.urlencode({'vid': full_id
}) 
  35         info_xml 
= self
._download
_webpage
( 
  36             'http://tvpot.daum.net/clip/ClipInfoXml.do?' + query
, video_id
, 
  37             u
'Downloading video info') 
  38         urls_xml 
= self
._download
_webpage
( 
  39             'http://videofarm.daum.net/controller/api/open/v1_2/MovieData.apixml?' + query
, 
  40             video_id
, u
'Downloading video formats info') 
  41         info 
= xml
.etree
.ElementTree
.fromstring(info_xml
.encode('utf-8')) 
  42         urls 
= xml
.etree
.ElementTree
.fromstring(urls_xml
.encode('utf-8')) 
  44         self
.to_screen(u
'%s: Getting video urls' % video_id
) 
  46         for format_el 
in urls
.findall('result/output_list/output_list'): 
  47             profile 
= format_el
.attrib
['profile'] 
  48             format_query 
= compat_urllib_parse
.urlencode({ 
  52             url_xml 
= self
._download
_webpage
( 
  53                 'http://videofarm.daum.net/controller/api/open/v1_2/MovieLocation.apixml?' + format_query
, 
  55             url_doc 
= xml
.etree
.ElementTree
.fromstring(url_xml
.encode('utf-8')) 
  56             format_url 
= url_doc
.find('result/url').text
 
  59                 'ext': determine_ext(format_url
), 
  65             'title': info
.find('TITLE').text
, 
  67             'thumbnail': self
._og
_search
_thumbnail
(webpage
), 
  68             'description': info
.find('CONTENTS').text
, 
  69             'duration': int(info
.find('DURATION').text
), 
  70             'upload_date': info
.find('REGDTTM').text
[:8], 
  72         # TODO: Remove when #980 has been merged 
  73         info
.update(formats
[-1])