]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/pbs.py 
fec5d65ad94892ca0f40a9e49703c857d98b47a4
   2  from  __future__ 
import  unicode_literals
   6  from  . common 
import  InfoExtractor
  16  class  PBSIE ( InfoExtractor
):   17      _VALID_URL 
=  r
'''(?x)https?://   20             video\.pbs\.org/(?:viralplayer|video)/(?P<id>[0-9]+)/? |   21             # Article with embedded player (or direct video)   22             (?:www\.)?pbs\.org/(?:[^/]+/){2,5}(?P<presumptive_id>[^/]+?)(?:\.html)?/?(?:$|[?\#]) |   24             video\.pbs\.org/(?:widget/)?partnerplayer/(?P<player_id>[^/]+)/   30              'url' :  'http://www.pbs.org/tpt/constitution-usa-peter-sagal/watch/a-more-perfect-union/' ,   31              'md5' :  'ce1888486f0908d555a8093cac9a7362' ,   35                  'title' :  'A More Perfect Union' ,   36                  'description' :  'md5:ba0c207295339c8d6eced00b7c363c6a' ,   40                  'skip_download' :  True ,   # requires ffmpeg   44              'url' :  'http://www.pbs.org/wgbh/pages/frontline/losing-iraq/' ,   45              'md5' :  '143c98aa54a346738a3d78f54c925321' ,   49                  'title' :  'Losing Iraq' ,   50                  'description' :  'md5:f5bfbefadf421e8bb8647602011caf8e' ,   54                  'skip_download' :  True ,   # requires ffmpeg   58              'url' :  'http://www.pbs.org/newshour/bb/education-jan-june12-cyberschools_02-23/' ,   59              'md5' :  'b19856d7f5351b17a5ab1dc6a64be633' ,   63                  'title' :  'Cyber Schools Gain Popularity, but Quality Questions Persist' ,   64                  'description' :  'md5:5871c15cba347c1b3d28ac47a73c7c28' ,   69              'url' :  'http://www.pbs.org/wnet/gperf/dudamel-conducts-verdi-requiem-hollywood-bowl-full-episode/3374/' ,   70              'md5' :  'c62859342be2a0358d6c9eb306595978' ,   74                  'description' :  'md5:68d87ef760660eb564455eb30ca464fe' ,   75                  'title' :  'Dudamel Conducts Verdi Requiem at the Hollywood Bowl - Full' ,   77                  'thumbnail' :  're:^https?://.*\.jpg$' ,   80                  'skip_download' :  True ,   # requires ffmpeg   84              'url' :  'http://www.pbs.org/wgbh/nova/earth/killer-typhoon.html' ,   85              'md5' :  '908f3e5473a693b266b84e25e1cf9703' ,   88                  'display_id' :  'killer-typhoon' ,   90                  'description' :  'md5:c741d14e979fc53228c575894094f157' ,   91                  'title' :  'Killer Typhoon' ,   93                  'thumbnail' :  're:^https?://.*\.jpg$' ,   94                  'upload_date' :  '20140122' ,   97                  'skip_download' :  True ,   # requires ffmpeg  101              'url' :  'http://www.pbs.org/wgbh/pages/frontline/united-states-of-secrets/' ,  103                  'id' :  'united-states-of-secrets' ,  108              'url' :  'http://www.pbs.org/wgbh/americanexperience/films/death/player/' ,  111                  'display_id' :  'player' ,  113                  'title' :  'Death and the Civil War' ,  114                  'description' :  'American Experience, TV’s most-watched history series, brings to life the compelling stories from our past that inform our understanding of the world today.' ,  116                  'thumbnail' :  're:^https?://.*\.jpg$' ,  119                  'skip_download' :  True ,   # requires ffmpeg  124      def  _extract_webpage ( self
,  url
):  125          mobj 
=  re
. match ( self
._ VALID
_U RL
,  url
)  127          presumptive_id 
=  mobj
. group ( 'presumptive_id' )  128          display_id 
=  presumptive_id
 130              webpage 
=  self
._ download
_ webpage
( url
,  display_id
)  132              upload_date 
=  unified_strdate ( self
._ search
_ regex
(  133                  r
'<input type="hidden" id="air_date_[0-9]+" value="([^"]+)"' ,  134                  webpage
,  'upload date' ,  default
= None ))  136              # tabbed frontline videos  137              tabbed_videos 
=  re
. findall (  138                  r
'<div[^>]+class="videotab[^"]*"[^>]+vid="(\d+)"' ,  webpage
)  140                  return  tabbed_videos
,  presumptive_id
,  upload_date
 143                  r
"div\s*:\s*'videoembed'\s*,\s*mediaid\s*:\s*'(\d+)'" ,   # frontline video embed  144                  r
'class="coveplayerid">([^<]+)<' ,                        # coveplayer  145                  r
'<input type="hidden" id="pbs_video_id_[0-9]+" value="([0-9]+)"/>' ,   # jwplayer  148              media_id 
=  self
._ search
_ regex
(  149                  MEDIA_ID_REGEXES
,  webpage
,  'media ID' ,  fatal
= False ,  default
= None )  151                  return  media_id
,  presumptive_id
,  upload_date
 153              url 
=  self
._ search
_ regex
(  154                  r
'<iframe\s+[^>]*\s+src=["\' ]([ ^
\' "]+partnerplayer[^ \' " ]+)[ " \' ]',  155                  webpage, 'player URL')  156              mobj = re.match(self._VALID_URL, url)  158          player_id = mobj.group('player_id')  160              display_id = player_id  162              player_page = self._download_webpage(  163                  url, display_id, note='Downloading player page',  164                  errnote='Could not download player page')  165              video_id = self._search_regex(  166                  r'<div\s+id=" video_ ([ 0 - 9 ]+) "', player_page, 'video ID')  168              video_id = mobj.group('id')  169              display_id = video_id  171          return video_id, display_id, None  173      def _real_extract(self, url):  174          video_id, display_id, upload_date = self._extract_webpage(url)  176          if isinstance(video_id, list):  177              entries = [self.url_result(  178                  'http://video.pbs.org/video/ %s ' % vid_id, 'PBS', vid_id)  179                  for vid_id in video_id]  180              return self.playlist_result(entries, display_id)  182          info = self._download_json(  183              'http://video.pbs.org/videoInfo/ %s ?format=json&type=partner' % video_id,  187          for encoding_name in ('recommended_encoding', 'alternate_encoding'):  188              redirect = info.get(encoding_name)  191              redirect_url = redirect.get('url')  195              redirect_info = self._download_json(  196                  redirect_url + '?format=json', display_id,  197                  'Downloading  %s  video url info' % encoding_name)  199              if redirect_info['status'] == 'error':  200                  if redirect_info['http_code'] == 403:  202                          'The video is not available in your region due to '  203                          'right restrictions')  205                      message = redirect_info['message']  206                  raise ExtractorError(message, expected=True)  208              format_url = redirect_info.get('url')  212              if determine_ext(format_url) == 'm3u8':  213                  formats.extend(self._extract_m3u8_formats(  214                      format_url, display_id, 'mp4', preference=1, m3u8_id='hls'))  218                      'format_id': redirect.get('eeid'),  220          self._sort_formats(formats)  222          rating_str = info.get('rating')  223          if rating_str is not None:  224              rating_str = rating_str.rpartition('-')[2]  225          age_limit = US_RATINGS.get(rating_str)  228          closed_captions_url = info.get('closed_captions_url')  229          if closed_captions_url:  232                  'url': closed_captions_url,  237              'display_id': display_id,  238              'title': info['title'],  239              'description': info['program'].get('description'),  240              'thumbnail': info.get('image_url'),  241              'duration': int_or_none(info.get('duration')),  242              'age_limit': age_limit,  243              'upload_date': upload_date,  245              'subtitles': subtitles,