]>
 
 
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/comedycentral.py 
 
 
 
 
 
 
 
 
   1  from  __future__ 
import  unicode_literals
 
   5  from  . mtv 
import  MTVServicesInfoExtractor
 
  17  class  ComedyCentralIE ( MTVServicesInfoExtractor
):  
  18      _VALID_URL 
=  r
'''(?x)https?://(?:www\.)?cc\.com/  
  19          (video-clips|episodes|cc-studios|video-collections|full-episodes)  
  21      _FEED_URL 
=  'http://comedycentral.com/feeds/mrss/'  
  24          'url' :  'http://www.cc.com/video-clips/kllhuv/stand-up-greg-fitzsimmons--uncensored---too-good-of-a-mother' ,  
  25          'md5' :  'c4f48e9eda1b16dd10add0744344b6d8' ,  
  27              'id' :  'cef0cbb3-e776-4bc9-b62e-8016deccb354' ,  
  29              'title' :  'CC:Stand-Up|Greg Fitzsimmons: Life on Stage|Uncensored - Too Good of a Mother' ,  
  30              'description' :  'After a certain point, breastfeeding becomes c**kblocking.' ,  
  35  class  ComedyCentralShowsIE ( MTVServicesInfoExtractor
):  
  36      IE_DESC 
=  'The Daily Show / The Colbert Report'  
  37      # urls can be abbreviations like :thedailyshow  
  38      # urls for episodes like:  
  39      # or urls for clips like: http://www.thedailyshow.com/watch/mon-december-10-2012/any-given-gun-day  
  40      #                     or: http://www.colbertnation.com/the-colbert-report-videos/421667/november-29-2012/moon-shattering-news  
  41      #                     or: http://www.colbertnation.com/the-colbert-report-collections/422008/festival-of-lights/79524  
  42      _VALID_URL 
=  r
'''(?x)^(:(?P<shortname>tds|thedailyshow)  
  44                            (?P<showname>thedailyshow|thecolbertreport)\.(?:cc\.)?com/  
  45                           ((?:full-)?episodes/(?:[0-9a-z] {6} /)?(?P<episode>.*)|  
  47                                (?:(?:guests/[^/]+|videos|video-playlists|special-editions|news-team/[^/]+)/[^/]+/(?P<videotitle>[^/?#]+))  
  48                                |(the-colbert-report-(videos|collections)/(?P<clipID>[0-9]+)/[^/]*/(?P<cntitle>.*?))  
  49                                |(watch/(?P<date>[^/]*)/(?P<tdstitle>.*))  
  52                                extended-interviews/(?P<interID>[0-9a-z]+)/  
  53                                (?:playlist_tds_extended_)?(?P<interview_title>[^/?#]*?)  
  54                                (?:/[^/?#]?|[?#]|$))))  
  57          'url' :  'http://thedailyshow.cc.com/watch/thu-december-13-2012/kristen-stewart' ,  
  58          'md5' :  '4e2f5cb088a83cd8cdb7756132f9739d' ,  
  60              'id' :  'ab9ab3e7-5a98-4dbe-8b21-551dc0523d55' ,  
  62              'upload_date' :  '20121213' ,  
  63              'description' :  'Kristen Stewart learns to let loose in "On the Road."' ,  
  64              'uploader' :  'thedailyshow' ,  
  65              'title' :  'thedailyshow kristen-stewart part 1' ,  
  68          'url' :  'http://thedailyshow.cc.com/extended-interviews/b6364d/sarah-chayes-extended-interview' ,  
  70              'id' :  'sarah-chayes-extended-interview' ,  
  71              'description' :  'Carnegie Endowment Senior Associate Sarah Chayes discusses how corrupt institutions function throughout the world in her book "Thieves of State: Why Corruption Threatens Global Security."' ,  
  72              'title' :  'thedailyshow Sarah Chayes Extended Interview' ,  
  77                      'id' :  '0baad492-cbec-4ec1-9e50-ad91c291127f' ,  
  79                      'upload_date' :  '20150129' ,  
  80                      'description' :  'Carnegie Endowment Senior Associate Sarah Chayes discusses how corrupt institutions function throughout the world in her book "Thieves of State: Why Corruption Threatens Global Security."' ,  
  81                      'uploader' :  'thedailyshow' ,  
  82                      'title' :  'thedailyshow sarah-chayes-extended-interview part 1' ,  
  87                      'id' :  '1e4fb91b-8ce7-4277-bd7c-98c9f1bbd283' ,  
  89                      'upload_date' :  '20150129' ,  
  90                      'description' :  'Carnegie Endowment Senior Associate Sarah Chayes discusses how corrupt institutions function throughout the world in her book "Thieves of State: Why Corruption Threatens Global Security."' ,  
  91                      'uploader' :  'thedailyshow' ,  
  92                      'title' :  'thedailyshow sarah-chayes-extended-interview part 2' ,  
  97              'skip_download' :  True ,  
 100          'url' :  'http://thedailyshow.cc.com/extended-interviews/xm3fnq/andrew-napolitano-extended-interview' ,  
 101          'only_matching' :  True ,  
 103          'url' :  'http://thecolbertreport.cc.com/videos/29w6fx/-realhumanpraise-for-fox-news' ,  
 104          'only_matching' :  True ,  
 106          'url' :  'http://thecolbertreport.cc.com/videos/gh6urb/neil-degrasse-tyson-pt--1?xrs=eml_col_031114' ,  
 107          'only_matching' :  True ,  
 109          'url' :  'http://thedailyshow.cc.com/guests/michael-lewis/3efna8/exclusive---michael-lewis-extended-interview-pt--3' ,  
 110          'only_matching' :  True ,  
 112          'url' :  'http://thedailyshow.cc.com/episodes/sy7yv0/april-8--2014---denis-leary' ,  
 113          'only_matching' :  True ,  
 115          'url' :  'http://thecolbertreport.cc.com/episodes/8ase07/april-8--2014---jane-goodall' ,  
 116          'only_matching' :  True ,  
 118          'url' :  'http://thedailyshow.cc.com/video-playlists/npde3s/the-daily-show-19088-highlights' ,  
 119          'only_matching' :  True ,  
 121          'url' :  'http://thedailyshow.cc.com/video-playlists/t6d9sg/the-daily-show-20038-highlights/be3cwo' ,  
 122          'only_matching' :  True ,  
 124          'url' :  'http://thedailyshow.cc.com/special-editions/2l8fdb/special-edition---a-look-back-at-food' ,  
 125          'only_matching' :  True ,  
 127          'url' :  'http://thedailyshow.cc.com/news-team/michael-che/7wnfel/we-need-to-talk-about-israel' ,  
 128          'only_matching' :  True ,  
 131      _available_formats 
= [ '3500' ,  '2200' ,  '1700' ,  '1200' ,  '750' ,  '400' ]  
 133      _video_extensions 
= {  
 141      _video_dimensions 
= {  
 150      def  _real_extract ( self
,  url
):  
 151          mobj 
=  re
. match ( self
._ VALID
_U RL
,  url
)  
 153          if  mobj
. group ( 'shortname' ):  
 154              if  mobj
. group ( 'shortname' )  in  ( 'tds' ,  'thedailyshow' ):  
 155                  url 
=  'http://thedailyshow.cc.com/full-episodes/'  
 157                  url 
=  'http://thecolbertreport.cc.com/full-episodes/'  
 158              mobj 
=  re
. match ( self
._ VALID
_U RL
,  url
,  re
. VERBOSE
)  
 159              assert  mobj 
is not None  
 161          if  mobj
. group ( 'clip' ):  
 162              if  mobj
. group ( 'videotitle' ):  
 163                  epTitle 
=  mobj
. group ( 'videotitle' )  
 164              elif  mobj
. group ( 'showname' ) ==  'thedailyshow' :  
 165                  epTitle 
=  mobj
. group ( 'tdstitle' )  
 167                  epTitle 
=  mobj
. group ( 'cntitle' )  
 169          elif  mobj
. group ( 'interview' ):  
 170              epTitle 
=  mobj
. group ( 'interview_title' )  
 173              dlNewest 
=  not  mobj
. group ( 'episode' )  
 175                  epTitle 
=  mobj
. group ( 'showname' )  
 177                  epTitle 
=  mobj
. group ( 'episode' )  
 178          show_name 
=  mobj
. group ( 'showname' )  
 180          webpage
,  htmlHandle 
=  self
._ download
_ webpage
_ handle
( url
,  epTitle
)  
 182              url 
=  htmlHandle
. geturl ()  
 183              mobj 
=  re
. match ( self
._ VALID
_U RL
,  url
,  re
. VERBOSE
)  
 185                  raise  ExtractorError ( 'Invalid redirected URL: '  +  url
)  
 186              if  mobj
. group ( 'episode' ) ==  '' :  
 187                  raise  ExtractorError ( 'Redirected URL is still not specific: '  +  url
)  
 188              epTitle 
= ( mobj
. group ( 'episode' )  or  mobj
. group ( 'videotitle' )). rpartition ( '/' )[- 1 ]  
 190          mMovieParams 
=  re
. findall ( '(?:<param name="movie" value="|var url = ")(http://media.mtvnservices.com/([^"]*(?:episode|video).*?:.*?))"' ,  webpage
)  
 191          if  len ( mMovieParams
) ==  0 :  
 192              # The Colbert Report embeds the information in a without  
 193              # a URL prefix; so extract the alternate reference  
 194              # and then add the URL prefix manually.  
 196              altMovieParams 
=  re
. findall ( 'data-mgid="([^"]*(?:episode|video|playlist).*?:.*?)"' ,  webpage
)  
 197              if  len ( altMovieParams
) ==  0 :  
 198                  raise  ExtractorError ( 'unable to find Flash URL in webpage '  +  url
)  
 200                  mMovieParams 
= [( "http://media.mtvnservices.com/"  +  altMovieParams
[ 0 ],  altMovieParams
[ 0 ])]  
 202          uri 
=  mMovieParams
[ 0 ][ 1 ]  
 203          # Correct cc.com in uri  
 204          uri 
=  re
. sub ( r
'(episode:[^.]+)(\.cc)?\.com' ,  r
'\1.cc.com' ,  uri
)  
 206          index_url 
=  'http:// %s .cc.com/feeds/mrss? %s '  % ( show_name
,  compat_urllib_parse
. urlencode ({ 'uri' :  uri
}))  
 207          idoc 
=  self
._ download
_ xml
(  
 209              'Downloading show index' ,  'Unable to download episode index' )  
 211          title 
=  idoc
. find ( './channel/title' ). text
 
 212          description 
=  idoc
. find ( './channel/description' ). text
 
 215          item_els 
=  idoc
. findall ( './/item' )  
 216          for  part_num
,  itemEl 
in  enumerate ( item_els
):  
 217              upload_date 
=  unified_strdate ( itemEl
. findall ( './pubDate' )[ 0 ]. text
)  
 218              thumbnail 
=  itemEl
. find ( './/{http://search.yahoo.com/mrss/}thumbnail' ). attrib
. get ( 'url' )  
 220              content 
=  itemEl
. find ( './/{http://search.yahoo.com/mrss/}content' )  
 221              duration 
=  float_or_none ( content
. attrib
. get ( 'duration' ))  
 222              mediagen_url 
=  content
. attrib
[ 'url' ]  
 223              guid 
=  itemEl
. find ( './guid' ). text
. rpartition ( ':' )[- 1 ]  
 225              cdoc 
=  self
._ download
_ xml
(  
 226                  mediagen_url
,  epTitle
,  
 227                  'Downloading configuration for segment  %d  /  %d '  % ( part_num 
+  1 ,  len ( item_els
)))  
 230              for  rendition 
in  cdoc
. findall ( './/rendition' ):  
 231                  finfo 
= ( rendition
. attrib
[ 'bitrate' ],  rendition
. findall ( './src' )[ 0 ]. text
)  
 235              for  format
,  rtmp_video_url 
in  turls
:  
 236                  w
,  h 
=  self
._ video
_ dimensions
. get ( format
, ( None ,  None ))  
 238                      'format_id' :  'vhttp- %s '  %  format
,  
 239                      'url' :  self
._ transform
_ rtmp
_u rl
( rtmp_video_url
),  
 240                      'ext' :  self
._ video
_ extensions
. get ( format
,  'mp4' ),  
 245                      'format_id' :  'rtmp- %s '  %  format
,  
 246                      'url' :  rtmp_video_url
. replace ( 'viacomccstrm' ,  'viacommtvstrm' ),  
 247                      'ext' :  self
._ video
_ extensions
. get ( format
,  'mp4' ),  
 251                  self
._ sort
_ formats
( formats
)  
 253              subtitles 
=  self
._ extract
_ subtitles
( cdoc
,  guid
)  
 255              virtual_id 
=  show_name 
+  ' '  +  epTitle 
+  ' part '  +  compat_str ( part_num 
+  1 )  
 260                  'uploader' :  show_name
,  
 261                  'upload_date' :  upload_date
,  
 262                  'duration' :  duration
,  
 263                  'thumbnail' :  thumbnail
,  
 264                  'description' :  description
,  
 265                  'subtitles' :  subtitles
,  
 272              'title' :  show_name 
+  ' '  +  title
,  
 273              'description' :  description
,