]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/thvideo.py
   2 from __future__ 
import unicode_literals
 
   6 from .common 
import InfoExtractor
 
  12 class THVideoIE(InfoExtractor
): 
  13     _VALID_URL 
= r
'https?://(?:www\.)?thvideo\.tv/(?:v/th|mobile\.php\?cid=)(?P<id>[0-9]+)' 
  15         'url': 'http://thvideo.tv/v/th1987/', 
  16         'md5': 'fa107b1f73817e325e9433505a70db50', 
  20             'title': '【动画】秘封活动记录 ~ The Sealed Esoteric History.分镜稿预览', 
  21             'display_id': 'th1987', 
  22             'thumbnail': 'http://thvideo.tv/uploadfile/2014/0722/20140722013459856.jpg', 
  23             'description': '社团京都幻想剧团的第一个东方二次同人动画作品「秘封活动记录 ~ The Sealed Esoteric History.」 本视频是该动画第一期的分镜草稿...', 
  24             'upload_date': '20140722' 
  28     def _real_extract(self
, url
): 
  29         video_id 
= self
._match
_id
(url
) 
  31         # extract download link from mobile player page 
  32         webpage_player 
= self
._download
_webpage
( 
  33             'http://thvideo.tv/mobile.php?cid=%s-0' % (video_id
), 
  34             video_id
, note
='Downloading video source page') 
  35         video_url 
= self
._html
_search
_regex
( 
  36             r
'<source src="(.*?)" type', webpage_player
, 'video url') 
  38         # extract video info from main page 
  39         webpage 
= self
._download
_webpage
( 
  40             'http://thvideo.tv/v/th%s' % (video_id
), video_id
) 
  41         title 
= self
._og
_search
_title
(webpage
) 
  42         display_id 
= 'th%s' % video_id
 
  43         thumbnail 
= self
._og
_search
_thumbnail
(webpage
) 
  44         description 
= self
._og
_search
_description
(webpage
) 
  45         upload_date 
= unified_strdate(self
._html
_search
_regex
( 
  46             r
'span itemprop="datePublished" content="(.*?)">', webpage
, 
  47             'upload date', fatal
=False)) 
  54             'display_id': display_id
, 
  55             'thumbnail': thumbnail
, 
  56             'description': description
, 
  57             'upload_date': upload_date
 
  61 class THVideoPlaylistIE(InfoExtractor
): 
  62     _VALID_URL 
= r
'http?://(?:www\.)?thvideo\.tv/mylist(?P<id>[0-9]+)' 
  64         'url': 'http://thvideo.tv/mylist2', 
  69         'playlist_mincount': 23, 
  72     def _real_extract(self
, url
): 
  73         playlist_id 
= self
._match
_id
(url
) 
  75         webpage 
= self
._download
_webpage
(url
, playlist_id
) 
  76         list_title 
= self
._html
_search
_regex
( 
  77             r
'<h1 class="show_title">(.*?)<b id', webpage
, 'playlist title', 
  81             self
.url_result('http://thvideo.tv/v/th' + id, 'THVideo') 
  82             for id in re
.findall(r
'<dd><a href="http://thvideo.tv/v/th(\d+)/" target=', webpage
)] 
  84         return self
.playlist_result(entries
, playlist_id
, list_title
)