]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/viidea.py
   1 from __future__ 
import unicode_literals
 
   5 from .common 
import InfoExtractor
 
  17 class ViideaIE(InfoExtractor
): 
  18     _VALID_URL 
= r
'''(?x)http://(?:www\.)?(?: 
  20             flexilearn\.viidea\.net| 
  21             presentations\.ocwconsortium\.org| 
  22             video\.travel-zoom\.si| 
  23             video\.pomp-forum\.si| 
  29             video\.kiberpipa\.org| 
  32             edemokracija\.viidea\.com 
  33         )(?:/lecture)?/(?P<id>[^/]+)(?:/video/(?P<part>\d+))?/*(?:[#?].*)?$''' 
  36         'url': 'http://videolectures.net/promogram_igor_mekjavic_eng/', 
  39             'display_id': 'promogram_igor_mekjavic_eng', 
  41             'title': 'Automatics, robotics and biocybernetics', 
  42             'description': 'md5:815fc1deb6b3a2bff99de2d5325be482', 
  43             'thumbnail': 're:http://.*\.jpg', 
  44             'timestamp': 1372349289, 
  45             'upload_date': '20130627', 
  50             'skip_download': True, 
  53         # video with invalid direct format links (HTTP 403) 
  54         'url': 'http://videolectures.net/russir2010_filippova_nlp/', 
  57             'display_id': 'russir2010_filippova_nlp', 
  59             'title': 'NLP at Google', 
  60             'description': 'md5:fc7a6d9bf0302d7cc0e53f7ca23747b3', 
  61             'thumbnail': 're:http://.*\.jpg', 
  62             'timestamp': 1284375600, 
  63             'upload_date': '20100913', 
  68             'skip_download': True, 
  72         'url': 'http://videolectures.net/deeplearning2015_montreal/', 
  75             'title': 'Deep Learning Summer School, Montreal 2015', 
  76             'description': 'md5:0533a85e4bd918df52a01f0e1ebe87b7', 
  77             'thumbnail': 're:http://.*\.jpg', 
  78             'timestamp': 1438560000, 
  83         'url': 'http://videolectures.net/mlss09uk_bishop_ibi/', 
  86             'display_id': 'mlss09uk_bishop_ibi', 
  87             'title': 'Introduction To Bayesian Inference', 
  88             'thumbnail': 're:http://.*\.jpg', 
  89             'timestamp': 1251622800, 
  94                 'display_id': 'mlss09uk_bishop_ibi_part1', 
  96                 'title': 'Introduction To Bayesian Inference (Part 1)', 
  97                 'thumbnail': 're:http://.*\.jpg', 
  99                 'timestamp': 1251622800, 
 100                 'upload_date': '20090830', 
 105                 'display_id': 'mlss09uk_bishop_ibi_part2', 
 107                 'title': 'Introduction To Bayesian Inference (Part 2)', 
 108                 'thumbnail': 're:http://.*\.jpg', 
 110                 'timestamp': 1251622800, 
 111                 'upload_date': '20090830', 
 117     def _real_extract(self
, url
): 
 118         lecture_slug
, explicit_part_id 
= re
.match(self
._VALID
_URL
, url
).groups() 
 120         webpage 
= self
._download
_webpage
(url
, lecture_slug
) 
 122         cfg 
= self
._parse
_json
(self
._search
_regex
( 
 123             [r
'cfg\s*:\s*({.+?})\s*,\s*[\da-zA-Z_]+\s*:\s*\(?\s*function', 
 124              r
'cfg\s*:\s*({[^}]+})'], 
 125             webpage
, 'cfg'), lecture_slug
, js_to_json
) 
 127         lecture_id 
= compat_str(cfg
['obj_id']) 
 129         base_url 
= self
._proto
_relative
_url
(cfg
['livepipe'], 'http:') 
 131         lecture_data 
= self
._download
_json
( 
 132             '%s/site/api/lecture/%s?format=json' % (base_url
, lecture_id
), 
 133             lecture_id
)['lecture'][0] 
 137             'display_id': lecture_slug
, 
 138             'title': lecture_data
['title'], 
 139             'timestamp': parse_iso8601(lecture_data
.get('time')), 
 140             'description': lecture_data
.get('description_wiki'), 
 141             'thumbnail': lecture_data
.get('thumb'), 
 144         playlist_entries 
= [] 
 145         lecture_type 
= lecture_data
.get('type') 
 146         parts 
= [compat_str(video
) for video 
in cfg
.get('videos', [])] 
 148             multipart 
= len(parts
) > 1 
 150             def extract_part(part_id
): 
 151                 smil_url 
= '%s/%s/video/%s/smil.xml' % (base_url
, lecture_slug
, part_id
) 
 152                 smil 
= self
._download
_smil
(smil_url
, lecture_id
) 
 153                 info 
= self
._parse
_smil
(smil
, smil_url
, lecture_id
) 
 154                 info
['id'] = lecture_id 
if not multipart 
else '%s_part%s' % (lecture_id
, part_id
) 
 155                 info
['display_id'] = lecture_slug 
if not multipart 
else '%s_part%s' % (lecture_slug
, part_id
) 
 157                     info
['title'] += ' (Part %s)' % part_id
 
 158                 switch 
= smil
.find('.//switch') 
 159                 if switch 
is not None: 
 160                     info
['duration'] = parse_duration(switch
.attrib
.get('dur')) 
 161                 item_info 
= lecture_info
.copy() 
 162                 item_info
.update(info
) 
 165             if explicit_part_id 
or not multipart
: 
 166                 result 
= extract_part(explicit_part_id 
or parts
[0]) 
 169                     '_type': 'multi_video', 
 170                     'entries': [extract_part(part
) for part 
in parts
], 
 172                 result
.update(lecture_info
) 
 174             # Immediately return explicitly requested part or non event item 
 175             if explicit_part_id 
or lecture_type 
!= 'evt': 
 178             playlist_entries
.append(result
) 
 180         # It's probably a playlist 
 181         if not parts 
or lecture_type 
== 'evt': 
 182             playlist_webpage 
= self
._download
_webpage
( 
 183                 '%s/site/ajax/drilldown/?id=%s' % (base_url
, lecture_id
), lecture_id
) 
 185                 self
.url_result(compat_urlparse
.urljoin(url
, video_url
), 'Viidea') 
 186                 for _
, video_url 
in re
.findall( 
 187                     r
'<a[^>]+href=(["\'])(.+?
)\
1[^
>]+id=["\']lec=\d+', playlist_webpage)] 
 188             playlist_entries.extend(entries) 
 190         playlist = self.playlist_result(playlist_entries, lecture_id) 
 191         playlist.update(lecture_info)