]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/bostonglobe.py 
   2  from  __future__ 
import  unicode_literals
   6  from  . common 
import  InfoExtractor
  13  class  BostonGlobeIE ( InfoExtractor
):   14      _VALID_URL 
=  r
'(?i)https?://(?:www\.)?bostonglobe\.com/.*/(?P<id>[^/]+)/\w+(?:\.html)?'   17              'url' :  'http://www.bostonglobe.com/metro/2017/02/11/tree-finally-succumbs-disease-leaving-hole-neighborhood/h1b4lviqzMTIn9sVy8F3gP/story.html' ,   18              'md5' :  '0a62181079c85c2d2b618c9a738aedaf' ,   20                  'title' :  'A tree finally succumbs to disease, leaving a hole in a neighborhood' ,   21                  'id' :  '5320421710001' ,   23                  'description' :  'It arrived as a sapling when the Back Bay was in its infancy, a spindly American elm tamped down into a square of dirt cut into the brick sidewalk of 1880s Marlborough Street, no higher than the first bay window of the new brownstone behind it.' ,   24                  'timestamp' :  1486877593 ,   25                  'upload_date' :  '20170212' ,   26                  'uploader_id' :  '245991542' ,   30              # Embedded youtube video; we hand it off to the Generic extractor.   31              'url' :  'https://www.bostonglobe.com/lifestyle/names/2017/02/17/does-ben-affleck-play-matt-damon-favorite-version-batman/ruqkc9VxKBYmh5txn1XhSI/story.html' ,   32              'md5' :  '582b40327089d5c0c949b3c54b13c24b' ,   34                  'title' :  "Who Is Matt Damon's Favorite Batman?" ,   37                  'upload_date' :  '20170217' ,   38                  'description' :  'md5:3b3dccb9375867e0b4d527ed87d307cb' ,   39                  'uploader' :  'The Late Late Show with James Corden' ,   40                  'uploader_id' :  'TheLateLateShow' ,   42              'expected_warnings' : [ '404' ],   46      def  _real_extract ( self
,  url
):   47          page_id 
=  self
._ match
_ id
( url
)   48          webpage 
=  self
._ download
_ webpage
( url
,  page_id
)   50          page_title 
=  self
._ og
_ search
_ title
( webpage
,  default
= None )   52          # <video data-brightcove-video-id="5320421710001" data-account="245991542" data-player="SJWAiyYWg" data-embed="default" class="video-js" controls itemscope itemtype="http://schema.org/VideoObject">   54          for  video 
in  re
. findall ( r
'(?i)(<video[^>]+>)' ,  webpage
):   55              attrs 
=  extract_attributes ( video
)   57              video_id 
=  attrs
. get ( 'data-brightcove-video-id' )   58              account_id 
=  attrs
. get ( 'data-account' )   59              player_id 
=  attrs
. get ( 'data-player' )   60              embed 
=  attrs
. get ( 'data-embed' )   62              if  video_id 
and  account_id 
and  player_id 
and  embed
:   64                      'http://players.brightcove.net/ %s / %s _ %s /index.html?videoId= %s '   65                      % ( account_id
,  player_id
,  embed
,  video_id
))   68              return  self
. url_result ( url
,  'Generic' )   69          elif  len ( entries
) ==  1 :   70              return  self
. url_result ( entries
[ 0 ],  'BrightcoveNew' )   72              return  self
. playlist_from_matches ( entries
,  page_id
,  page_title
,  ie
= 'BrightcoveNew' )