]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/sohu.py
   6 from .common 
import InfoExtractor
 
   7 from ..utils 
import ExtractorError
 
  10 class SohuIE(InfoExtractor
): 
  11     _VALID_URL 
= r
'https?://tv\.sohu\.com/\d+?/n(?P<id>\d+)\.shtml.*?' 
  14         u
'url': u
'http://tv.sohu.com/20130724/n382479172.shtml#super', 
  15         u
'file': u
'382479172.mp4', 
  16         u
'md5': u
'bde8d9a6ffd82c63a1eefaef4eeefec7', 
  18             u
'title': u
'MV:Far East Movement《The Illest》', 
  22     def _real_extract(self
, url
): 
  24         def _fetch_data(vid_id
): 
  25             base_data_url 
= u
'http://hot.vrs.sohu.com/vrs_flash.action?vid=' 
  26             data_url 
= base_data_url 
+ str(vid_id
) 
  27             data_json 
= self
._download
_webpage
( 
  29                 note
=u
'Downloading JSON data for ' + str(vid_id
)) 
  30             return json
.loads(data_json
) 
  32         mobj 
= re
.match(self
._VALID
_URL
, url
) 
  33         video_id 
= mobj
.group('id') 
  35         webpage 
= self
._download
_webpage
(url
, video_id
) 
  36         raw_title 
= self
._html
_search
_regex
(r
'(?s)<title>(.+?)</title>', 
  37                                             webpage
, u
'video title') 
  38         title 
= raw_title
.partition('-')[0].strip() 
  40         vid 
= self
._html
_search
_regex
(r
'var vid="(\d+)"', webpage
, 
  42         data 
= _fetch_data(vid
) 
  44         QUALITIES 
= ('ori', 'super', 'high', 'nor') 
  45         vid_ids 
= [data
['data'][q 
+ 'Vid'] 
  47                    if data
['data'][q 
+ 'Vid'] != 0] 
  49             raise ExtractorError(u
'No formats available for this video') 
  51         # For now, we just pick the highest available quality 
  54         format_data 
= data 
if vid 
== vid_id 
else _fetch_data(vid_id
) 
  55         part_count 
= format_data
['data']['totalBlocks'] 
  56         allot 
= format_data
['allot'] 
  57         prot 
= format_data
['prot'] 
  58         clipsURL 
= format_data
['data']['clipsURL'] 
  59         su 
= format_data
['data']['su'] 
  62         for i 
in range(part_count
): 
  63             part_url 
= ('http://%s/?prot=%s&file=%s&new=%s' % 
  64                         (allot
, prot
, clipsURL
[i
], su
[i
])) 
  65             part_str 
= self
._download
_webpage
( 
  67                 note
=u
'Downloading part %d of %d' % (i
+1, part_count
)) 
  69             part_info 
= part_str
.split('|') 
  70             video_url 
= '%s%s?key=%s' % (part_info
[0], su
[i
], part_info
[3]) 
  73                 'id': '%s_part%02d' % (video_id
, i 
+ 1), 
  78             playlist
.append(video_info
) 
  80         if len(playlist
) == 1: