]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/comedycentral.py 
5b1b99675c760a7249bcdb23ff3072af86710114
   1  from  __future__ 
import  unicode_literals
   5  from  . mtv 
import  MTVServicesInfoExtractor
  17  class  ComedyCentralIE ( MTVServicesInfoExtractor
):   18      _VALID_URL 
=  r
'''(?x)https?://(?:www\.)?cc\.com/   19          (video-clips|episodes|cc-studios|video-collections|full-episodes|shows)   21      _FEED_URL 
=  'http://comedycentral.com/feeds/mrss/'   24          'url' :  'http://www.cc.com/video-clips/kllhuv/stand-up-greg-fitzsimmons--uncensored---too-good-of-a-mother' ,   25          'md5' :  'c4f48e9eda1b16dd10add0744344b6d8' ,   27              'id' :  'cef0cbb3-e776-4bc9-b62e-8016deccb354' ,   29              'title' :  'CC:Stand-Up|Greg Fitzsimmons: Life on Stage|Uncensored - Too Good of a Mother' ,   30              'description' :  'After a certain point, breastfeeding becomes c**kblocking.' ,   33          'url' :  'http://www.cc.com/shows/the-daily-show-with-trevor-noah/interviews/6yx39d/exclusive-rand-paul-extended-interview' ,   34          'only_matching' :  True ,   38  class  ComedyCentralShowsIE ( MTVServicesInfoExtractor
):   39      IE_DESC 
=  'The Daily Show / The Colbert Report'   40      # urls can be abbreviations like :thedailyshow   41      # urls for episodes like:   42      # or urls for clips like: http://www.thedailyshow.com/watch/mon-december-10-2012/any-given-gun-day   43      #                     or: http://www.colbertnation.com/the-colbert-report-videos/421667/november-29-2012/moon-shattering-news   44      #                     or: http://www.colbertnation.com/the-colbert-report-collections/422008/festival-of-lights/79524   45      _VALID_URL 
=  r
'''(?x)^(:(?P<shortname>tds|thedailyshow)   47                            (?P<showname>thedailyshow|thecolbertreport)\.(?:cc\.)?com/   48                           ((?:full-)?episodes/(?:[0-9a-z] {6} /)?(?P<episode>.*)|   50                                (?:(?:guests/[^/]+|videos|video-playlists|special-editions|news-team/[^/]+)/[^/]+/(?P<videotitle>[^/?#]+))   51                                |(the-colbert-report-(videos|collections)/(?P<clipID>[0-9]+)/[^/]*/(?P<cntitle>.*?))   52                                |(watch/(?P<date>[^/]*)/(?P<tdstitle>.*))   55                                extended-interviews/(?P<interID>[0-9a-z]+)/   56                                (?:playlist_tds_extended_)?(?P<interview_title>[^/?#]*?)   57                                (?:/[^/?#]?|[?#]|$))))   60          'url' :  'http://thedailyshow.cc.com/watch/thu-december-13-2012/kristen-stewart' ,   61          'md5' :  '4e2f5cb088a83cd8cdb7756132f9739d' ,   63              'id' :  'ab9ab3e7-5a98-4dbe-8b21-551dc0523d55' ,   65              'upload_date' :  '20121213' ,   66              'description' :  'Kristen Stewart learns to let loose in "On the Road."' ,   67              'uploader' :  'thedailyshow' ,   68              'title' :  'thedailyshow kristen-stewart part 1' ,   71          'url' :  'http://thedailyshow.cc.com/extended-interviews/b6364d/sarah-chayes-extended-interview' ,   73              'id' :  'sarah-chayes-extended-interview' ,   74              'description' :  'Carnegie Endowment Senior Associate Sarah Chayes discusses how corrupt institutions function throughout the world in her book "Thieves of State: Why Corruption Threatens Global Security."' ,   75              'title' :  'thedailyshow Sarah Chayes Extended Interview' ,   80                      'id' :  '0baad492-cbec-4ec1-9e50-ad91c291127f' ,   82                      'upload_date' :  '20150129' ,   83                      'description' :  'Carnegie Endowment Senior Associate Sarah Chayes discusses how corrupt institutions function throughout the world in her book "Thieves of State: Why Corruption Threatens Global Security."' ,   84                      'uploader' :  'thedailyshow' ,   85                      'title' :  'thedailyshow sarah-chayes-extended-interview part 1' ,   90                      'id' :  '1e4fb91b-8ce7-4277-bd7c-98c9f1bbd283' ,   92                      'upload_date' :  '20150129' ,   93                      'description' :  'Carnegie Endowment Senior Associate Sarah Chayes discusses how corrupt institutions function throughout the world in her book "Thieves of State: Why Corruption Threatens Global Security."' ,   94                      'uploader' :  'thedailyshow' ,   95                      'title' :  'thedailyshow sarah-chayes-extended-interview part 2' ,  100              'skip_download' :  True ,  103          'url' :  'http://thedailyshow.cc.com/extended-interviews/xm3fnq/andrew-napolitano-extended-interview' ,  104          'only_matching' :  True ,  106          'url' :  'http://thecolbertreport.cc.com/videos/29w6fx/-realhumanpraise-for-fox-news' ,  107          'only_matching' :  True ,  109          'url' :  'http://thecolbertreport.cc.com/videos/gh6urb/neil-degrasse-tyson-pt--1?xrs=eml_col_031114' ,  110          'only_matching' :  True ,  112          'url' :  'http://thedailyshow.cc.com/guests/michael-lewis/3efna8/exclusive---michael-lewis-extended-interview-pt--3' ,  113          'only_matching' :  True ,  115          'url' :  'http://thedailyshow.cc.com/episodes/sy7yv0/april-8--2014---denis-leary' ,  116          'only_matching' :  True ,  118          'url' :  'http://thecolbertreport.cc.com/episodes/8ase07/april-8--2014---jane-goodall' ,  119          'only_matching' :  True ,  121          'url' :  'http://thedailyshow.cc.com/video-playlists/npde3s/the-daily-show-19088-highlights' ,  122          'only_matching' :  True ,  124          'url' :  'http://thedailyshow.cc.com/video-playlists/t6d9sg/the-daily-show-20038-highlights/be3cwo' ,  125          'only_matching' :  True ,  127          'url' :  'http://thedailyshow.cc.com/special-editions/2l8fdb/special-edition---a-look-back-at-food' ,  128          'only_matching' :  True ,  130          'url' :  'http://thedailyshow.cc.com/news-team/michael-che/7wnfel/we-need-to-talk-about-israel' ,  131          'only_matching' :  True ,  134      _available_formats 
= [ '3500' ,  '2200' ,  '1700' ,  '1200' ,  '750' ,  '400' ]  136      _video_extensions 
= {  144      _video_dimensions 
= {  153      def  _real_extract ( self
,  url
):  154          mobj 
=  re
. match ( self
._ VALID
_U RL
,  url
)  156          if  mobj
. group ( 'shortname' ):  157              return  self
. url_result ( 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/full-episodes' )  159          if  mobj
. group ( 'clip' ):  160              if  mobj
. group ( 'videotitle' ):  161                  epTitle 
=  mobj
. group ( 'videotitle' )  162              elif  mobj
. group ( 'showname' ) ==  'thedailyshow' :  163                  epTitle 
=  mobj
. group ( 'tdstitle' )  165                  epTitle 
=  mobj
. group ( 'cntitle' )  167          elif  mobj
. group ( 'interview' ):  168              epTitle 
=  mobj
. group ( 'interview_title' )  171              dlNewest 
=  not  mobj
. group ( 'episode' )  173                  epTitle 
=  mobj
. group ( 'showname' )  175                  epTitle 
=  mobj
. group ( 'episode' )  176          show_name 
=  mobj
. group ( 'showname' )  178          webpage
,  htmlHandle 
=  self
._ download
_ webpage
_ handle
( url
,  epTitle
)  180              url 
=  htmlHandle
. geturl ()  181              mobj 
=  re
. match ( self
._ VALID
_U RL
,  url
,  re
. VERBOSE
)  183                  raise  ExtractorError ( 'Invalid redirected URL: '  +  url
)  184              if  mobj
. group ( 'episode' ) ==  '' :  185                  raise  ExtractorError ( 'Redirected URL is still not specific: '  +  url
)  186              epTitle 
= ( mobj
. group ( 'episode' )  or  mobj
. group ( 'videotitle' )). rpartition ( '/' )[- 1 ]  188          mMovieParams 
=  re
. findall ( '(?:<param name="movie" value="|var url = ")(http://media.mtvnservices.com/([^"]*(?:episode|video).*?:.*?))"' ,  webpage
)  189          if  len ( mMovieParams
) ==  0 :  190              # The Colbert Report embeds the information in a without  191              # a URL prefix; so extract the alternate reference  192              # and then add the URL prefix manually.  194              altMovieParams 
=  re
. findall ( 'data-mgid="([^"]*(?:episode|video|playlist).*?:.*?)"' ,  webpage
)  195              if  len ( altMovieParams
) ==  0 :  196                  raise  ExtractorError ( 'unable to find Flash URL in webpage '  +  url
)  198                  mMovieParams 
= [( 'http://media.mtvnservices.com/'  +  altMovieParams
[ 0 ],  altMovieParams
[ 0 ])]  200          uri 
=  mMovieParams
[ 0 ][ 1 ]  201          # Correct cc.com in uri  202          uri 
=  re
. sub ( r
'(episode:[^.]+)(\.cc)?\.com' ,  r
'\1.com' ,  uri
)  204          index_url 
=  'http:// %s .cc.com/feeds/mrss? %s '  % ( show_name
,  compat_urllib_parse
. urlencode ({ 'uri' :  uri
}))  205          idoc 
=  self
._ download
_ xml
(  207              'Downloading show index' ,  'Unable to download episode index' )  209          title 
=  idoc
. find ( './channel/title' ). text
 210          description 
=  idoc
. find ( './channel/description' ). text
 213          item_els 
=  idoc
. findall ( './/item' )  214          for  part_num
,  itemEl 
in  enumerate ( item_els
):  215              upload_date 
=  unified_strdate ( itemEl
. findall ( './pubDate' )[ 0 ]. text
)  216              thumbnail 
=  itemEl
. find ( './/{http://search.yahoo.com/mrss/}thumbnail' ). attrib
. get ( 'url' )  218              content 
=  itemEl
. find ( './/{http://search.yahoo.com/mrss/}content' )  219              duration 
=  float_or_none ( content
. attrib
. get ( 'duration' ))  220              mediagen_url 
=  content
. attrib
[ 'url' ]  221              guid 
=  itemEl
. find ( './guid' ). text
. rpartition ( ':' )[- 1 ]  223              cdoc 
=  self
._ download
_ xml
(  224                  mediagen_url
,  epTitle
,  225                  'Downloading configuration for segment  %d  /  %d '  % ( part_num 
+  1 ,  len ( item_els
)))  228              for  rendition 
in  cdoc
. findall ( './/rendition' ):  229                  finfo 
= ( rendition
. attrib
[ 'bitrate' ],  rendition
. findall ( './src' )[ 0 ]. text
)  233              for  format
,  rtmp_video_url 
in  turls
:  234                  w
,  h 
=  self
._ video
_ dimensions
. get ( format
, ( None ,  None ))  236                      'format_id' :  'vhttp- %s '  %  format
,  237                      'url' :  self
._ transform
_ rtmp
_u rl
( rtmp_video_url
),  238                      'ext' :  self
._ video
_ extensions
. get ( format
,  'mp4' ),  243                      'format_id' :  'rtmp- %s '  %  format
,  244                      'url' :  rtmp_video_url
. replace ( 'viacomccstrm' ,  'viacommtvstrm' ),  245                      'ext' :  self
._ video
_ extensions
. get ( format
,  'mp4' ),  249                  self
._ sort
_ formats
( formats
)  251              subtitles 
=  self
._ extract
_ subtitles
( cdoc
,  guid
)  253              virtual_id 
=  show_name 
+  ' '  +  epTitle 
+  ' part '  +  compat_str ( part_num 
+  1 )  258                  'uploader' :  show_name
,  259                  'upload_date' :  upload_date
,  260                  'duration' :  duration
,  261                  'thumbnail' :  thumbnail
,  262                  'description' :  description
,  263                  'subtitles' :  subtitles
,  270              'title' :  show_name 
+  ' '  +  title
,  271              'description' :  description
,