2 import xml
.etree
.ElementTree
 
   4 from .common 
import InfoExtractor
 
   6     compat_urllib_parse_urlparse
, 
  13 class CollegeHumorIE(InfoExtractor
): 
  14     _VALID_URL 
= r
'^(?:https?://)?(?:www\.)?collegehumor\.com/(video|embed|e)/(?P<videoid>[0-9]+)/?(?P<shorttitle>.*)$' 
  17         u
'url': u
'http://www.collegehumor.com/video/6902724/comic-con-cosplay-catastrophe', 
  18         u
'file': u
'6902724.mp4', 
  19         u
'md5': u
'1264c12ad95dca142a9f0bf7968105a0', 
  21             u
'title': u
'Comic-Con Cosplay Catastrophe', 
  22             u
'description': u
'Fans get creative this year at San Diego.  Too creative.  And yes, that\'s really Joss Whedon.', 
  26         u
'url': u
'http://www.collegehumor.com/video/3505939/font-conference', 
  27         u
'file': u
'3505939.mp4', 
  28         u
'md5': u
'c51ca16b82bb456a4397987791a835f5', 
  30             u
'title': u
'Font Conference', 
  31             u
'description': u
'This video wasn\'t long enough, so we made it double-spaced.', 
  35     def _real_extract(self
, url
): 
  36         mobj 
= re
.match(self
._VALID
_URL
, url
) 
  38             raise ExtractorError(u
'Invalid URL: %s' % url
) 
  39         video_id 
= mobj
.group('videoid') 
  47         self
.report_extraction(video_id
) 
  48         xmlUrl 
= 'http://www.collegehumor.com/moogaloop/video/' + video_id
 
  49         metaXml 
= self
._download
_webpage
(xmlUrl
, video_id
, 
  50                                          u
'Downloading info XML', 
  51                                          u
'Unable to download video info XML') 
  53         mdoc 
= xml
.etree
.ElementTree
.fromstring(metaXml
) 
  55             videoNode 
= mdoc
.findall('./video')[0] 
  56             youtubeIdNode 
= videoNode
.find('./youtubeID') 
  57             if youtubeIdNode 
is not None: 
  58                 return self
.url_result(youtubeIdNode
.text
, 'Youtube') 
  59             info
['description'] = videoNode
.findall('./description')[0].text
 
  60             info
['title'] = videoNode
.findall('./caption')[0].text
 
  61             info
['thumbnail'] = videoNode
.findall('./thumbnail')[0].text
 
  62             next_url 
= videoNode
.findall('./file')[0].text
 
  64             raise ExtractorError(u
'Invalid metadata XML file') 
  66         if next_url
.endswith(u
'manifest.f4m'): 
  67             manifest_url 
= next_url 
+ '?hdcore=2.10.3' 
  68             manifestXml 
= self
._download
_webpage
(manifest_url
, video_id
, 
  69                                          u
'Downloading XML manifest', 
  70                                          u
'Unable to download video info XML') 
  72             adoc 
= xml
.etree
.ElementTree
.fromstring(manifestXml
) 
  74                 media_node 
= adoc
.findall('./{http://ns.adobe.com/f4m/1.0}media')[0] 
  75                 node_id 
= media_node
.attrib
['url'] 
  76                 video_id 
= adoc
.findall('./{http://ns.adobe.com/f4m/1.0}id')[0].text
 
  77             except IndexError as err
: 
  78                 raise ExtractorError(u
'Invalid manifest file') 
  79             url_pr 
= compat_urllib_parse_urlparse(info
['thumbnail']) 
  80             info
['url'] = url_pr
.scheme 
+ '://' + url_pr
.netloc 
+ video_id
[:-2].replace('.csmil','').replace(',','') 
  83             # Old-style direct links 
  84             info
['url'] = next_url
 
  85             info
['ext'] = determine_ext(info
['url'])