]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/comedycentral.py 
   1  from  __future__ 
import  unicode_literals
   5  from  . mtv 
import  MTVServicesInfoExtractor
  17  class  ComedyCentralIE ( MTVServicesInfoExtractor
):   18      _VALID_URL 
=  r
'''(?x)https?://(?:www\.)?cc\.com/   19          (video-clips|episodes|cc-studios|video-collections|full-episodes)   21      _FEED_URL 
=  'http://comedycentral.com/feeds/mrss/'   24          'url' :  'http://www.cc.com/video-clips/kllhuv/stand-up-greg-fitzsimmons--uncensored---too-good-of-a-mother' ,   25          'md5' :  'c4f48e9eda1b16dd10add0744344b6d8' ,   27              'id' :  'cef0cbb3-e776-4bc9-b62e-8016deccb354' ,   29              'title' :  'CC:Stand-Up|Greg Fitzsimmons: Life on Stage|Uncensored - Too Good of a Mother' ,   30              'description' :  'After a certain point, breastfeeding becomes c**kblocking.' ,   35  class  ComedyCentralShowsIE ( MTVServicesInfoExtractor
):   36      IE_DESC 
=  'The Daily Show / The Colbert Report'   37      # urls can be abbreviations like :thedailyshow   38      # urls for episodes like:   39      # or urls for clips like: http://www.thedailyshow.com/watch/mon-december-10-2012/any-given-gun-day   40      #                     or: http://www.colbertnation.com/the-colbert-report-videos/421667/november-29-2012/moon-shattering-news   41      #                     or: http://www.colbertnation.com/the-colbert-report-collections/422008/festival-of-lights/79524   42      _VALID_URL 
=  r
'''(?x)^(:(?P<shortname>tds|thedailyshow)   44                            (?P<showname>thedailyshow|thecolbertreport)\.(?:cc\.)?com/   45                           ((?:full-)?episodes/(?:[0-9a-z] {6} /)?(?P<episode>.*)|   47                                (?:(?:guests/[^/]+|videos|video-playlists|special-editions|news-team/[^/]+)/[^/]+/(?P<videotitle>[^/?#]+))   48                                |(the-colbert-report-(videos|collections)/(?P<clipID>[0-9]+)/[^/]*/(?P<cntitle>.*?))   49                                |(watch/(?P<date>[^/]*)/(?P<tdstitle>.*))   52                                extended-interviews/(?P<interID>[0-9a-z]+)/   53                                (?:playlist_tds_extended_)?(?P<interview_title>[^/?#]*?)   54                                (?:/[^/?#]?|[?#]|$))))   57          'url' :  'http://thedailyshow.cc.com/watch/thu-december-13-2012/kristen-stewart' ,   58          'md5' :  '4e2f5cb088a83cd8cdb7756132f9739d' ,   60              'id' :  'ab9ab3e7-5a98-4dbe-8b21-551dc0523d55' ,   62              'upload_date' :  '20121213' ,   63              'description' :  'Kristen Stewart learns to let loose in "On the Road."' ,   64              'uploader' :  'thedailyshow' ,   65              'title' :  'thedailyshow kristen-stewart part 1' ,   68          'url' :  'http://thedailyshow.cc.com/extended-interviews/b6364d/sarah-chayes-extended-interview' ,   70              'id' :  'sarah-chayes-extended-interview' ,   71              'description' :  'Carnegie Endowment Senior Associate Sarah Chayes discusses how corrupt institutions function throughout the world in her book "Thieves of State: Why Corruption Threatens Global Security."' ,   72              'title' :  'thedailyshow Sarah Chayes Extended Interview' ,   77                      'id' :  '0baad492-cbec-4ec1-9e50-ad91c291127f' ,   79                      'upload_date' :  '20150129' ,   80                      'description' :  'Carnegie Endowment Senior Associate Sarah Chayes discusses how corrupt institutions function throughout the world in her book "Thieves of State: Why Corruption Threatens Global Security."' ,   81                      'uploader' :  'thedailyshow' ,   82                      'title' :  'thedailyshow sarah-chayes-extended-interview part 1' ,   87                      'id' :  '1e4fb91b-8ce7-4277-bd7c-98c9f1bbd283' ,   89                      'upload_date' :  '20150129' ,   90                      'description' :  'Carnegie Endowment Senior Associate Sarah Chayes discusses how corrupt institutions function throughout the world in her book "Thieves of State: Why Corruption Threatens Global Security."' ,   91                      'uploader' :  'thedailyshow' ,   92                      'title' :  'thedailyshow sarah-chayes-extended-interview part 2' ,   97              'skip_download' :  True ,  100          'url' :  'http://thedailyshow.cc.com/extended-interviews/xm3fnq/andrew-napolitano-extended-interview' ,  101          'only_matching' :  True ,  103          'url' :  'http://thecolbertreport.cc.com/videos/29w6fx/-realhumanpraise-for-fox-news' ,  104          'only_matching' :  True ,  106          'url' :  'http://thecolbertreport.cc.com/videos/gh6urb/neil-degrasse-tyson-pt--1?xrs=eml_col_031114' ,  107          'only_matching' :  True ,  109          'url' :  'http://thedailyshow.cc.com/guests/michael-lewis/3efna8/exclusive---michael-lewis-extended-interview-pt--3' ,  110          'only_matching' :  True ,  112          'url' :  'http://thedailyshow.cc.com/episodes/sy7yv0/april-8--2014---denis-leary' ,  113          'only_matching' :  True ,  115          'url' :  'http://thecolbertreport.cc.com/episodes/8ase07/april-8--2014---jane-goodall' ,  116          'only_matching' :  True ,  118          'url' :  'http://thedailyshow.cc.com/video-playlists/npde3s/the-daily-show-19088-highlights' ,  119          'only_matching' :  True ,  121          'url' :  'http://thedailyshow.cc.com/video-playlists/t6d9sg/the-daily-show-20038-highlights/be3cwo' ,  122          'only_matching' :  True ,  124          'url' :  'http://thedailyshow.cc.com/special-editions/2l8fdb/special-edition---a-look-back-at-food' ,  125          'only_matching' :  True ,  127          'url' :  'http://thedailyshow.cc.com/news-team/michael-che/7wnfel/we-need-to-talk-about-israel' ,  128          'only_matching' :  True ,  131      _available_formats 
= [ '3500' ,  '2200' ,  '1700' ,  '1200' ,  '750' ,  '400' ]  133      _video_extensions 
= {  141      _video_dimensions 
= {  150      def  _real_extract ( self
,  url
):  151          mobj 
=  re
. match ( self
._ VALID
_U RL
,  url
)  153          if  mobj
. group ( 'shortname' ):  154              if  mobj
. group ( 'shortname' )  in  ( 'tds' ,  'thedailyshow' ):  155                  url 
=  'http://thedailyshow.cc.com/full-episodes/'  157                  url 
=  'http://thecolbertreport.cc.com/full-episodes/'  158              mobj 
=  re
. match ( self
._ VALID
_U RL
,  url
,  re
. VERBOSE
)  159              assert  mobj 
is not None  161          if  mobj
. group ( 'clip' ):  162              if  mobj
. group ( 'videotitle' ):  163                  epTitle 
=  mobj
. group ( 'videotitle' )  164              elif  mobj
. group ( 'showname' ) ==  'thedailyshow' :  165                  epTitle 
=  mobj
. group ( 'tdstitle' )  167                  epTitle 
=  mobj
. group ( 'cntitle' )  169          elif  mobj
. group ( 'interview' ):  170              epTitle 
=  mobj
. group ( 'interview_title' )  173              dlNewest 
=  not  mobj
. group ( 'episode' )  175                  epTitle 
=  mobj
. group ( 'showname' )  177                  epTitle 
=  mobj
. group ( 'episode' )  178          show_name 
=  mobj
. group ( 'showname' )  180          webpage
,  htmlHandle 
=  self
._ download
_ webpage
_ handle
( url
,  epTitle
)  182              url 
=  htmlHandle
. geturl ()  183              mobj 
=  re
. match ( self
._ VALID
_U RL
,  url
,  re
. VERBOSE
)  185                  raise  ExtractorError ( 'Invalid redirected URL: '  +  url
)  186              if  mobj
. group ( 'episode' ) ==  '' :  187                  raise  ExtractorError ( 'Redirected URL is still not specific: '  +  url
)  188              epTitle 
= ( mobj
. group ( 'episode' )  or  mobj
. group ( 'videotitle' )). rpartition ( '/' )[- 1 ]  190          mMovieParams 
=  re
. findall ( '(?:<param name="movie" value="|var url = ")(http://media.mtvnservices.com/([^"]*(?:episode|video).*?:.*?))"' ,  webpage
)  191          if  len ( mMovieParams
) ==  0 :  192              # The Colbert Report embeds the information in a without  193              # a URL prefix; so extract the alternate reference  194              # and then add the URL prefix manually.  196              altMovieParams 
=  re
. findall ( 'data-mgid="([^"]*(?:episode|video|playlist).*?:.*?)"' ,  webpage
)  197              if  len ( altMovieParams
) ==  0 :  198                  raise  ExtractorError ( 'unable to find Flash URL in webpage '  +  url
)  200                  mMovieParams 
= [( "http://media.mtvnservices.com/"  +  altMovieParams
[ 0 ],  altMovieParams
[ 0 ])]  202          uri 
=  mMovieParams
[ 0 ][ 1 ]  203          # Correct cc.com in uri  204          uri 
=  re
. sub ( r
'(episode:[^.]+)(\.cc)?\.com' ,  r
'\1.cc.com' ,  uri
)  206          index_url 
=  'http:// %s .cc.com/feeds/mrss? %s '  % ( show_name
,  compat_urllib_parse
. urlencode ({ 'uri' :  uri
}))  207          idoc 
=  self
._ download
_ xml
(  209              'Downloading show index' ,  'Unable to download episode index' )  211          title 
=  idoc
. find ( './channel/title' ). text
 212          description 
=  idoc
. find ( './channel/description' ). text
 215          item_els 
=  idoc
. findall ( './/item' )  216          for  part_num
,  itemEl 
in  enumerate ( item_els
):  217              upload_date 
=  unified_strdate ( itemEl
. findall ( './pubDate' )[ 0 ]. text
)  218              thumbnail 
=  itemEl
. find ( './/{http://search.yahoo.com/mrss/}thumbnail' ). attrib
. get ( 'url' )  220              content 
=  itemEl
. find ( './/{http://search.yahoo.com/mrss/}content' )  221              duration 
=  float_or_none ( content
. attrib
. get ( 'duration' ))  222              mediagen_url 
=  content
. attrib
[ 'url' ]  223              guid 
=  itemEl
. find ( './guid' ). text
. rpartition ( ':' )[- 1 ]  225              cdoc 
=  self
._ download
_ xml
(  226                  mediagen_url
,  epTitle
,  227                  'Downloading configuration for segment  %d  /  %d '  % ( part_num 
+  1 ,  len ( item_els
)))  230              for  rendition 
in  cdoc
. findall ( './/rendition' ):  231                  finfo 
= ( rendition
. attrib
[ 'bitrate' ],  rendition
. findall ( './src' )[ 0 ]. text
)  235              for  format
,  rtmp_video_url 
in  turls
:  236                  w
,  h 
=  self
._ video
_ dimensions
. get ( format
, ( None ,  None ))  238                      'format_id' :  'vhttp- %s '  %  format
,  239                      'url' :  self
._ transform
_ rtmp
_u rl
( rtmp_video_url
),  240                      'ext' :  self
._ video
_ extensions
. get ( format
,  'mp4' ),  245                      'format_id' :  'rtmp- %s '  %  format
,  246                      'url' :  rtmp_video_url
. replace ( 'viacomccstrm' ,  'viacommtvstrm' ),  247                      'ext' :  self
._ video
_ extensions
. get ( format
,  'mp4' ),  251                  self
._ sort
_ formats
( formats
)  253              virtual_id 
=  show_name 
+  ' '  +  epTitle 
+  ' part '  +  compat_str ( part_num 
+  1 )  258                  'uploader' :  show_name
,  259                  'upload_date' :  upload_date
,  260                  'duration' :  duration
,  261                  'thumbnail' :  thumbnail
,  262                  'description' :  description
,  269              'title' :  show_name 
+  ' '  +  title
,  270              'description' :  description
,