]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/comcarcoff.py
2 from __future__
import unicode_literals
4 from . common
import InfoExtractor
5 from .. compat
import compat_str
13 class ComCarCoffIE ( InfoExtractor
):
14 _VALID_URL
= r
'https?://(?:www\.)?comediansincarsgettingcoffee\.com/(?P<id>[a-z0-9\-]*)'
16 'url' : 'http://comediansincarsgettingcoffee.com/miranda-sings-happy-thanksgiving-miranda/' ,
20 'upload_date' : '20141127' ,
21 'timestamp' : 1417107600 ,
23 'title' : 'Happy Thanksgiving Miranda' ,
24 'description' : 'Jerry Seinfeld and his special guest Miranda Sings cruise around town in search of coffee, complaining and apologizing along the way.' ,
27 'skip_download' : 'requires ffmpeg' ,
31 def _real_extract ( self
, url
):
32 display_id
= self
._ match
_ id
( url
)
34 display_id
= 'comediansincarsgettingcoffee.com'
35 webpage
= self
._ download
_ webpage
( url
, display_id
)
37 full_data
= self
._ parse
_ json
(
39 r
'window\.app\s*=\s*({.+?});\n' , webpage
, 'full data json' ),
40 display_id
)[ 'videoData' ]
42 display_id
= full_data
[ 'activeVideo' ][ 'video' ]
43 video_data
= full_data
. get ( 'videos' , {}). get ( display_id
) or full_data
[ 'singleshots' ][ display_id
]
45 video_id
= compat_str ( video_data
[ 'mediaId' ])
46 title
= video_data
[ 'title' ]
47 formats
= self
._ extract
_ m
3u8_ formats
(
48 video_data
[ 'mediaUrl' ], video_id
, 'mp4' )
49 self
._ sort
_ formats
( formats
)
52 'url' : video_data
[ 'images' ][ 'thumb' ],
54 'url' : video_data
[ 'images' ][ 'poster' ],
57 timestamp
= int_or_none ( video_data
. get ( 'pubDateTime' )) or parse_iso8601 (
58 video_data
. get ( 'pubDate' ))
59 duration
= int_or_none ( video_data
. get ( 'durationSeconds' )) or parse_duration (
60 video_data
. get ( 'duration' ))
64 'display_id' : display_id
,
66 'description' : video_data
. get ( 'description' ),
67 'timestamp' : timestamp
,
69 'thumbnails' : thumbnails
,
71 'season_number' : int_or_none ( video_data
. get ( 'season' )),
72 'episode_number' : int_or_none ( video_data
. get ( 'episode' )),
73 'webpage_url' : 'http://comediansincarsgettingcoffee.com/ %s ' % ( video_data
. get ( 'urlSlug' , video_data
. get ( 'slug' ))),