3 from __future__
import unicode_literals
8 from .common
import InfoExtractor
9 from .youtube
import YoutubeIE
10 from ..compat
import (
13 compat_xml_parse_error
,
31 from .brightcove
import BrightcoveIE
32 from .nbc
import NBCSportsVPlayerIE
33 from .ooyala
import OoyalaIE
34 from .rutv
import RUTVIE
35 from .smotri
import SmotriIE
36 from .condenast
import CondeNastIE
37 from .udn
import UDNEmbedIE
38 from .senateisvp
import SenateISVPIE
39 from .bliptv
import BlipTVIE
40 from .svt
import SVTIE
43 class GenericIE(InfoExtractor
):
44 IE_DESC
= 'Generic downloader that works on some sites'
49 'url': 'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html',
50 'md5': '85b90ccc9d73b4acd9138d3af4c27f89',
52 'id': '13601338388002',
54 'uploader': 'www.hodiho.fr',
55 'title': 'R\u00e9gis plante sa Jeep',
58 # bandcamp page with custom domain
60 'add_ie': ['Bandcamp'],
61 'url': 'http://bronyrock.com/track/the-pony-mash',
65 'title': 'The Pony Mash',
66 'uploader': 'M_Pallante',
68 'skip': 'There is a limit of 200 free downloads / month for the test song',
70 # embedded brightcove video
71 # it also tests brightcove videos that need to set the 'Referer' in the
74 'add_ie': ['Brightcove'],
75 'url': 'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/',
77 'id': '2765128793001',
79 'title': 'Le cours de bourse : l’analyse technique',
80 'description': 'md5:7e9ad046e968cb2d1114004aba466fd9',
81 'uploader': 'BFM BUSINESS',
84 'skip_download': True,
88 # https://github.com/rg3/youtube-dl/issues/2253
89 'url': 'http://bcove.me/i6nfkrc3',
90 'md5': '0ba9446db037002366bab3b3eb30c88c',
92 'id': '3101154703001',
94 'title': 'Still no power',
95 'uploader': 'thestar.com',
96 'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.',
98 'add_ie': ['Brightcove'],
101 'url': 'http://www.championat.com/video/football/v/87/87499.html',
102 'md5': 'fb973ecf6e4a78a67453647444222983',
104 'id': '3414141473001',
106 'title': 'Видео. Удаление Дзагоева (ЦСКА)',
107 'description': 'Онлайн-трансляция матча ЦСКА - "Волга"',
108 'uploader': 'Championat',
112 # https://github.com/rg3/youtube-dl/issues/3541
113 'add_ie': ['Brightcove'],
114 'url': 'http://www.kijk.nl/sbs6/leermijvrouwenkennen/videos/jqMiXKAYan2S/aflevering-1',
116 'id': '3866516442001',
118 'title': 'Leer mij vrouwen kennen: Aflevering 1',
119 'description': 'Leer mij vrouwen kennen: Aflevering 1',
120 'uploader': 'SBS Broadcasting',
122 'skip': 'Restricted to Netherlands',
124 'skip_download': True, # m3u8 download
127 # Direct link to a video
129 'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4',
130 'md5': '67d406c2bcb6af27fa886f31aa934bbe',
135 'upload_date': '20100513',
140 'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
141 'md5': '166dd577b433b4d4ebfee10b0824d8ff',
143 'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ',
145 'title': '2cc213299525360.mov', # that's what we get
147 'add_ie': ['Ooyala'],
149 # multiple ooyala embeds on SBN network websites
151 'url': 'http://www.sbnation.com/college-football-recruiting/2015/2/3/7970291/national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
153 'id': 'national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
154 'title': '25 lies you will tell yourself on National Signing Day - SBNation.com',
156 'playlist_mincount': 3,
158 'skip_download': True,
160 'add_ie': ['Ooyala'],
164 'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
168 'upload_date': '20130224',
169 'uploader_id': 'TheVerge',
170 'description': 're:^Chris Ziegler takes a look at the\.*',
171 'uploader': 'The Verge',
172 'title': 'First Firefox OS phones side-by-side',
175 'skip_download': False,
180 'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/',
184 'title': 'Tested: Grinding Coffee at 2000 Frames Per Second',
185 'upload_date': '20140225',
186 'description': 'md5:06a40fbf30b220468f1e0957c0f558ff',
187 'uploader': 'Tested',
188 'uploader_id': 'testedcom',
190 # No need to test YoutubeIE here
192 'skip_download': True,
197 'url': 'http://www.theguardian.com/world/2014/mar/11/obama-zach-galifianakis-between-two-ferns',
201 'title': 'Between Two Ferns with Zach Galifianakis: President Barack Obama',
202 'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.',
207 'url': 'http://www.bbc.co.uk/blogs/adamcurtis/posts/BUGGER',
209 'title': 'BBC - Blogs - Adam Curtis - BUGGER',
211 'playlist_mincount': 18,
215 'url': 'http://www.rg.ru/2014/03/15/reg-dfo/anklav-anons.html',
219 'title': 'Охотское море стало целиком российским',
220 'description': 'md5:5ed62483b14663e2a95ebbe115eb8f43',
224 'skip_download': True,
229 'url': 'http://en.support.wordpress.com/videos/ted-talks/',
230 'md5': '65fdff94098e4a607385a60c5177c638',
234 'title': 'Hidden miracles of the natural world',
235 'uploader': 'Louie Schwartzberg',
236 'description': 'md5:8145d19d320ff3e52f28401f4c4283b9',
239 # Embeded Ustream video
241 'url': 'http://www.american.edu/spa/pti/nsa-privacy-janus-2014.cfm',
242 'md5': '27b99cdb639c9b12a79bca876a073417',
246 'uploader': 'AU SPA: The NSA and Privacy',
247 'title': 'NSA and Privacy Forum Debate featuring General Hayden and Barton Gellman'
250 # nowvideo embed hidden behind percent encoding
252 'url': 'http://www.waoanime.tv/the-super-dimension-fortress-macross-episode-1/',
253 'md5': '2baf4ddd70f697d94b1c18cf796d5107',
255 'id': '06e53103ca9aa',
257 'title': 'Macross Episode 001 Watch Macross Episode 001 onl',
258 'description': 'No description',
263 'url': 'http://www.tv-replay.fr/redirection/20-03-14/x-enius-arte-10753389.html',
264 'md5': '7653032cbb25bf6c80d80f217055fa43',
266 'id': '048195-004_PLUS7-F',
269 'description': 'md5:d5fdf32ef6613cdbfd516ae658abf168',
270 'upload_date': '20140320',
273 'skip_download': 'Requires rtmpdump'
278 'url': 'http://www.wired.com/2014/04/honda-asimo/',
279 'md5': 'ba0dfe966fa007657bd1443ee672db0f',
281 'id': '53501be369702d3275860000',
283 'title': 'Honda’s New Asimo Robot Is More Human Than Ever',
288 'url': 'http://www.spi0n.com/zap-spi0n-com-n216/',
289 'md5': '441aeeb82eb72c422c7f14ec533999cd',
291 'id': 'k2mm4bCdJ6CQ2i7c8o2',
293 'title': 'Le Zap de Spi0n n°216 - Zapping du Web',
296 'add_ie': ['Dailymotion'],
300 'url': 'http://www.badzine.de/ansicht/datum/2014/06/09/so-funktioniert-die-neue-englische-badminton-liga.html',
304 'title': 'The NBL Auction 2014',
305 'uploader': 'BADMINTON England',
306 'uploader_id': 'BADMINTONEvents',
307 'upload_date': '20140603',
308 'description': 'md5:9ef128a69f1e262a700ed83edb163a73',
310 'add_ie': ['Youtube'],
312 'skip_download': True,
317 'url': 'http://www.gametrailers.com/news-post/76093/north-america-europe-is-getting-that-mario-kart-8-mercedes-dlc-too',
318 'md5': '35727f82f58c76d996fc188f9755b0d5',
320 'id': '0306a69b-8adf-4fb5-aace-75f8e8cbfca9',
323 'description': 'Mario\'s life in the fast lane has never looked so good.',
326 # YouTube embed via <data-embed-url="">
328 'url': 'https://play.google.com/store/apps/details?id=com.gameloft.android.ANMP.GloftA8HM',
332 'title': 'Asphalt 8: Airborne - Update - Welcome to Dubai!',
333 'uploader': 'Gameloft',
334 'uploader_id': 'gameloft',
335 'upload_date': '20140828',
336 'description': 'md5:c80da9ed3d83ae6d1876c834de03e1c4',
339 'skip_download': True,
344 'url': 'http://www.ll.mit.edu/workshops/education/videocourses/antennas/lecture1/video/',
346 'md5': '0c5e352edabf715d762b0ad4e6d9ee67',
348 'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
349 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - video1',
354 'md5': '10e4bb3aaca9fd630e273ff92d9f3c63',
356 'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final_PIP',
357 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - pip',
363 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
368 'url': 'http://www.handjobhub.com/video/busty-blonde-siri-tit-fuck-while-wank-6313.html',
369 'md5': '9d65602bf31c6e20014319c7d07fba27',
371 'id': '5123ea6d5e5a7',
374 'uploader': 'www.handjobhub.com',
375 'title': 'Busty Blonde Siri Tit Fuck While Wank at HandjobHub.com',
380 'url': 'http://phihag.de/2014/youtube-dl/rss2.xml',
382 'id': 'http://phihag.de/2014/youtube-dl/rss2.xml',
383 'title': 'Zero Punctuation',
384 'description': 're:.*groundbreaking video review series.*'
386 'playlist_mincount': 11,
388 # Multiple brightcove videos
389 # https://github.com/rg3/youtube-dl/issues/2283
391 'url': 'http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html',
393 'id': 'always-never',
394 'title': 'Always / Never - The New Yorker',
398 'extract_flat': False,
399 'skip_download': True,
404 'url': 'http://umpire-empire.com/index.php/topic/58125-laz-decides-no-thats-low/',
405 'md5': '96f09a37e44da40dd083e12d9a683327',
409 'title': 'Ump changes call to ball',
410 'description': 'md5:71c11215384298a172a6dcb4c2e20685',
412 'timestamp': 1401537900,
413 'upload_date': '20140531',
414 'thumbnail': 're:^https?://.*\.jpg$',
419 'url': 'http://education-portal.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson',
420 'md5': '8788b683c777a5cf25621eaf286d0c23',
424 'title': 'md5:51364a8d3d009997ba99656004b5e20d',
426 'filesize': 182808282,
427 'uploader': 'education-portal.com',
431 'url': 'http://thoughtworks.wistia.com/medias/uxjb0lwrcz',
432 'md5': 'baf49c2baa8a7de5f3fc145a8506dcd4',
436 'title': 'Conversation about Hexagonal Rails Part 1 - ThoughtWorks',
438 'uploader': 'thoughtworks.wistia.com',
441 # Direct download with broken HEAD
443 'url': 'http://ai-radio.org:8000/radio.opus',
450 'skip_download': True, # infinite live stream
452 'expected_warnings': [
453 r
'501.*Not Implemented'
458 'url': 'http://nakedsecurity.sophos.com/2014/10/29/sscc-171-are-you-sure-that-1234-is-a-bad-password-podcast/',
462 'description': 'md5:ff867d6b555488ad3c52572bb33d432c',
463 'uploader': 'Sophos Security',
464 'title': 'Chet Chat 171 - Oct 29, 2014',
465 'upload_date': '20141029',
470 'url': 'http://www.esa.int/Our_Activities/Space_Science/Rosetta/Philae_comet_touch-down_webcast',
474 'upload_date': '20141112',
475 'title': 'Rosetta #CometLanding webcast HL 10',
480 'url': 'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986',
483 'title': 'Unity 8 desktop-mode windows on Mir! - Ubuntu Discourse',
485 'playlist_mincount': 2,
487 # Direct link with incorrect MIME type
489 'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
490 'md5': '4ccbebe5f36706d85221f204d7eb5913',
492 'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
493 'id': '5_Lennart_Poettering_-_Systemd',
495 'title': '5_Lennart_Poettering_-_Systemd',
496 'upload_date': '20141120',
498 'expected_warnings': [
499 'URL could be a direct video link, returning it as such.'
504 'url': 'http://undergroundwellness.com/podcasts/306-5-steps-to-permanent-gut-healing/',
508 'upload_date': '20141126',
509 'title': 'Jack Tips: 5 Steps to Permanent Gut Healing',
514 'url': 'http://www.abc.net.au/7.30/content/2015/s4164797.htm',
516 'id': '730m_DandD_1901_512k',
518 'uploader': 'www.abc.net.au',
519 'title': 'Game of Thrones with dice - Dungeons and Dragons fantasy role-playing game gets new life - 19/01/2015',
522 # embedded viddler video
524 'url': 'http://deadspin.com/i-cant-stop-watching-john-wall-chop-the-nuggets-with-th-1681801597',
528 'uploader': 'deadspin',
529 'title': 'WALL-TO-GORTAT',
530 'timestamp': 1422285291,
531 'upload_date': '20150126',
533 'add_ie': ['Viddler'],
537 'url': 'http://thedailyshow.cc.com/podcast/episodetwelve',
541 'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart",
542 'description': 'md5:601cb790edd05908957dae8aaa866465',
543 'upload_date': '20150220',
548 'url': 'http://media.nationalarchives.gov.uk/index.php/webinar-using-discovery-national-archives-online-catalogue/',
552 'upload_date': '20150212',
553 'uploader': 'The National Archives UK',
554 'description': 'md5:a236581cd2449dd2df4f93412f3f01c6',
555 'uploader_id': 'NationalArchives08',
556 'title': 'Webinar: Using Discovery, The National Archives’ online catalogue',
561 'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen',
562 'playlist_mincount': 5,
564 'id': 'aanslagen-kopenhagen',
565 'title': 'Aanslagen Kopenhagen | RTL Nieuws',
570 'url': 'http://www.skipass.com/news/116090-bon-appetit-s5ep3-baqueira-mi-cor.html',
574 'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !',
579 'url': 'http://www.monumentalnetwork.com/videos/john-carlson-postgame-2-25-15',
583 'upload_date': '20150226',
584 'uploader_id': 'MonumentalSports-Kaltura@perfectsensedigital.com',
586 'title': 'John Carlson Postgame 2/25/15',
589 # Eagle.Platform embed (generic URL)
591 'url': 'http://lenta.ru/news/2015/03/06/navalny/',
595 'title': 'Навальный вышел на свободу',
596 'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5',
597 'thumbnail': 're:^https?://.*\.jpg$',
603 # ClipYou (Eagle.Platform) embed (custom URL)
605 'url': 'http://muz-tv.ru/play/7129/',
609 'title': "'O Sole Mio",
610 'thumbnail': 're:^https?://.*\.jpg$',
617 'url': 'http://muz-tv.ru/kinozal/view/7400/',
621 'title': 'Тайны перевала Дятлова • 1 серия 2 часть',
622 'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
623 'thumbnail': 're:^https?://.*\.jpg$',
630 'url': 'http://www.cinemablend.com/new/First-Joe-Dirt-2-Trailer-Teaser-Stupid-Greatness-70874.html',
634 'title': 'Joe Dirt 2 Beautiful Loser Teaser Trailer',
635 'thumbnail': 're:^https?://.*\.png$',
641 'url': 'http://techcrunch.com/video/facebook-creates-on-this-day-crunch-report/518726732/',
642 'md5': '4c6f127a30736b59b3e2c19234ee2bf7',
646 'title': 'Facebook Creates "On This Day" | Crunch Report',
651 'url': 'http://www.svt.se/sport/ishockey/jagr-tacklar-giroux-under-intervjun',
655 'title': 'Här trycker Jagr till Giroux (under SVT-intervjun)',
660 # RSS feed with enclosure
662 'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
664 'id': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
666 'upload_date': '20150228',
667 'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
670 # Crooks and Liars embed
672 'url': 'http://crooksandliars.com/2015/04/fox-friends-says-protecting-atheists',
676 'title': "Fox & Friends Says Protecting Atheists From Discrimination Is Anti-Christian!",
677 'description': 'md5:e1a46ad1650e3a5ec7196d432799127f',
678 'timestamp': 1428207000,
679 'upload_date': '20150405',
680 'uploader': 'Heather',
683 # Crooks and Liars external embed
685 'url': 'http://theothermccain.com/2010/02/02/video-proves-that-bill-kristol-has-been-watching-glenn-beck/comment-page-1/',
687 'id': 'MTE3MjUtMzQ2MzA',
689 'title': 'md5:5e3662a81a4014d24c250d76d41a08d5',
690 'description': 'md5:9b8e9542d6c3c5de42d6451b7d780cec',
691 'timestamp': 1265032391,
692 'upload_date': '20100201',
693 'uploader': 'Heather',
696 # NBC Sports vplayer embed
698 'url': 'http://www.riderfans.com/forum/showthread.php?121827-Freeman&s=e98fa1ea6dc08e886b1678d35212494a',
700 'id': 'ln7x1qSThw4k',
702 'title': "PFT Live: New leader in the 'new-look' defense",
703 'description': 'md5:65a19b4bbfb3b0c0c5768bed1dfad74e',
708 'url': 'http://www.udn.com/news/story/7314/822787',
709 'md5': 'fd2060e988c326991037b9aff9df21a6',
713 'title': '中一中男師變性 全校師生力挺',
714 'thumbnail': 're:^https?://.*\.jpg$',
719 'url': 'http://www.businessinsider.com/excel-index-match-vlookup-video-how-to-2015-2?IR=T',
721 'id': '50YnY4czr4ms1vJ7yz3xzq0excz_pUMs',
723 'description': 'VIDEO: Index/Match versus VLOOKUP.',
724 'title': 'This is what separates the Excel masters from the wannabes',
728 'skip_download': True,
731 # Contains a SMIL manifest
733 'url': 'http://www.telewebion.com/fa/1263668/%D9%82%D8%B1%D8%B9%D9%87%E2%80%8C%DA%A9%D8%B4%DB%8C-%D9%84%DB%8C%DA%AF-%D9%82%D9%87%D8%B1%D9%85%D8%A7%D9%86%D8%A7%D9%86-%D8%A7%D8%B1%D9%88%D9%BE%D8%A7/%2B-%D9%81%D9%88%D8%AA%D8%A8%D8%A7%D9%84.html',
737 'title': '+ Football: Lottery Champions League Europe',
738 'uploader': 'www.telewebion.com',
742 'skip_download': True,
747 def report_following_redirect(self
, new_url
):
748 """Report information extraction."""
749 self
._downloader
.to_screen('[redirect] Following redirect to %s' % new_url
)
751 def _extract_rss(self
, url
, video_id
, doc
):
752 playlist_title
= doc
.find('./channel/title').text
753 playlist_desc_el
= doc
.find('./channel/description')
754 playlist_desc
= None if playlist_desc_el
is None else playlist_desc_el
.text
757 for it
in doc
.findall('./channel/item'):
758 next_url
= xpath_text(it
, 'link', fatal
=False)
760 enclosure_nodes
= it
.findall('./enclosure')
761 for e
in enclosure_nodes
:
762 next_url
= e
.attrib
.get('url')
772 'title': it
.find('title').text
,
778 'title': playlist_title
,
779 'description': playlist_desc
,
783 def _extract_camtasia(self
, url
, video_id
, webpage
):
784 """ Returns None if no camtasia video can be found. """
786 camtasia_cfg
= self
._search
_regex
(
787 r
'fo\.addVariable\(\s*"csConfigFile",\s*"([^"]+)"\s*\);',
788 webpage
, 'camtasia configuration file', default
=None)
789 if camtasia_cfg
is None:
792 title
= self
._html
_search
_meta
('DC.title', webpage
, fatal
=True)
794 camtasia_url
= compat_urlparse
.urljoin(url
, camtasia_cfg
)
795 camtasia_cfg
= self
._download
_xml
(
796 camtasia_url
, video_id
,
797 note
='Downloading camtasia configuration',
798 errnote
='Failed to download camtasia configuration')
799 fileset_node
= camtasia_cfg
.find('./playlist/array/fileset')
802 for n
in fileset_node
.getchildren():
803 url_n
= n
.find('./uri')
808 'id': os
.path
.splitext(url_n
.text
.rpartition('/')[2])[0],
809 'title': '%s - %s' % (title
, n
.tag
),
810 'url': compat_urlparse
.urljoin(url
, url_n
.text
),
811 'duration': float_or_none(n
.find('./duration').text
),
820 def _real_extract(self
, url
):
821 if url
.startswith('//'):
824 'url': self
.http_scheme() + url
,
827 parsed_url
= compat_urlparse
.urlparse(url
)
828 if not parsed_url
.scheme
:
829 default_search
= self
._downloader
.params
.get('default_search')
830 if default_search
is None:
831 default_search
= 'fixup_error'
833 if default_search
in ('auto', 'auto_warning', 'fixup_error'):
835 self
._downloader
.report_warning('The url doesn\'t specify the protocol, trying with http')
836 return self
.url_result('http://' + url
)
837 elif default_search
!= 'fixup_error':
838 if default_search
== 'auto_warning':
839 if re
.match(r
'^(?:url|URL)$', url
):
840 raise ExtractorError(
841 'Invalid URL: %r . Call youtube-dl like this: youtube-dl -v "https://www.youtube.com/watch?v=BaW_jenozKc" ' % url
,
844 self
._downloader
.report_warning(
845 'Falling back to youtube search for %s . Set --default-search "auto" to suppress this warning.' % url
)
846 return self
.url_result('ytsearch:' + url
)
848 if default_search
in ('error', 'fixup_error'):
849 raise ExtractorError(
850 '%r is not a valid URL. '
851 'Set --default-search "ytsearch" (or run youtube-dl "ytsearch:%s" ) to search YouTube'
852 % (url
, url
), expected
=True)
854 if ':' not in default_search
:
855 default_search
+= ':'
856 return self
.url_result(default_search
+ url
)
858 url
, smuggled_data
= unsmuggle_url(url
)
860 is_intentional
= smuggled_data
and smuggled_data
.get('to_generic')
861 if smuggled_data
and 'force_videoid' in smuggled_data
:
862 force_videoid
= smuggled_data
['force_videoid']
863 video_id
= force_videoid
865 video_id
= os
.path
.splitext(url
.rstrip('/').split('/')[-1])[0]
867 self
.to_screen('%s: Requesting header' % video_id
)
869 head_req
= HEADRequest(url
)
870 head_response
= self
._request
_webpage
(
872 note
=False, errnote
='Could not send HEAD request to %s' % url
,
875 if head_response
is not False:
877 new_url
= head_response
.geturl()
879 self
.report_following_redirect(new_url
)
881 new_url
= smuggle_url(
882 new_url
, {'force_videoid': force_videoid
})
883 return self
.url_result(new_url
)
886 if head_response
is False:
887 full_response
= self
._request
_webpage
(url
, video_id
)
888 head_response
= full_response
890 # Check for direct link to a video
891 content_type
= head_response
.headers
.get('Content-Type', '')
892 m
= re
.match(r
'^(?P<type>audio|video|application(?=/ogg$))/(?P<format_id>.+)$', content_type
)
894 upload_date
= unified_strdate(
895 head_response
.headers
.get('Last-Modified'))
898 'title': os
.path
.splitext(url_basename(url
))[0],
901 'format_id': m
.group('format_id'),
903 'vcodec': 'none' if m
.group('type') == 'audio' else None
905 'upload_date': upload_date
,
908 if not self
._downloader
.params
.get('test', False) and not is_intentional
:
909 self
._downloader
.report_warning('Falling back on generic information extractor.')
911 if not full_response
:
912 full_response
= self
._request
_webpage
(url
, video_id
)
914 # Maybe it's a direct link to a video?
915 # Be careful not to download the whole thing!
916 first_bytes
= full_response
.read(512)
917 if not is_html(first_bytes
):
918 self
._downloader
.report_warning(
919 'URL could be a direct video link, returning it as such.')
920 upload_date
= unified_strdate(
921 head_response
.headers
.get('Last-Modified'))
924 'title': os
.path
.splitext(url_basename(url
))[0],
927 'upload_date': upload_date
,
930 webpage
= self
._webpage
_read
_content
(
931 full_response
, url
, video_id
, prefix
=first_bytes
)
933 self
.report_extraction(video_id
)
937 doc
= parse_xml(webpage
)
939 return self
._extract
_rss
(url
, video_id
, doc
)
940 except compat_xml_parse_error
:
943 # Is it a Camtasia project?
944 camtasia_res
= self
._extract
_camtasia
(url
, video_id
, webpage
)
945 if camtasia_res
is not None:
948 # Sometimes embedded video player is hidden behind percent encoding
949 # (e.g. https://github.com/rg3/youtube-dl/issues/2448)
950 # Unescaping the whole page allows to handle those cases in a generic way
951 webpage
= compat_urllib_parse
.unquote(webpage
)
953 # it's tempting to parse this further, but you would
954 # have to take into account all the variations like
955 # Video Title - Site Name
956 # Site Name | Video Title
957 # Video Title - Tagline | Site Name
958 # and so on and so forth; it's just not practical
959 video_title
= self
._html
_search
_regex
(
960 r
'(?s)<title>(.*?)</title>', webpage
, 'video title',
963 # Try to detect age limit automatically
964 age_limit
= self
._rta
_search
(webpage
)
965 # And then there are the jokers who advertise that they use RTA,
966 # but actually don't.
967 AGE_LIMIT_MARKERS
= [
968 r
'Proudly Labeled <a href="http://www.rtalabel.org/" title="Restricted to Adults">RTA</a>',
970 if any(re
.search(marker
, webpage
) for marker
in AGE_LIMIT_MARKERS
):
973 # video uploader is domain name
974 video_uploader
= self
._search
_regex
(
975 r
'^(?:https?://)?([^/]*)/.*', url
, 'video uploader')
978 def _playlist_from_matches(matches
, getter
=None, ie
=None):
980 self
.url_result(self
._proto
_relative
_url
(getter(m
) if getter
else m
), ie
)
982 return self
.playlist_result(
983 urlrs
, playlist_id
=video_id
, playlist_title
=video_title
)
985 # Look for BrightCove:
986 bc_urls
= BrightcoveIE
._extract
_brightcove
_urls
(webpage
)
988 self
.to_screen('Brightcove video detected.')
991 'url': smuggle_url(bc_url
, {'Referer': url
}),
992 'ie_key': 'Brightcove'
993 } for bc_url
in bc_urls
]
997 'title': video_title
,
1002 # Look for embedded rtl.nl player
1003 matches
= re
.findall(
1004 r
'<iframe\s+(?:[a-zA-Z-]+="[^"]+"\s+)*?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+video_embed[^"]+)"',
1007 return _playlist_from_matches(matches
, ie
='RtlNl')
1009 # Look for embedded (iframe) Vimeo player
1011 r
'<iframe[^>]+?src=(["\'])(?P
<url
>(?
:https?
:)?
//player\
.vimeo\
.com
/video
/.+?
)\
1', webpage)
1013 player_url = unescapeHTML(mobj.group('url
'))
1014 surl = smuggle_url(player_url, {'Referer
': url})
1015 return self.url_result(surl)
1016 # Look for embedded (swf embed) Vimeo player
1018 r'<embed
[^
>]+?src
="((?:https?:)?//(?:www\.)?vimeo\.com/moogaloop\.swf.+?)"', webpage)
1020 return self.url_result(mobj.group(1))
1022 # Look for embedded YouTube player
1023 matches = re.findall(r'''(?x)
1032 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1036 return _playlist_from_matches(
1037 matches, lambda m: unescapeHTML(m[1]))
1039 # Look for lazyYT YouTube embed
1040 matches = re.findall(
1041 r'class="lazyYT" data
-youtube
-id="([^"]+)"', webpage)
1043 return _playlist_from_matches(matches, lambda m: unescapeHTML(m))
1045 # Look for embedded Dailymotion player
1046 matches = re.findall(
1047 r'<iframe[^>]+?src=(["\'])(?P
<url
>(?
:https?
:)?
//(?
:www\
.)?dailymotion\
.com
/embed
/video
/.+?
)\
1', webpage)
1049 return _playlist_from_matches(
1050 matches, lambda m: unescapeHTML(m[1]))
1052 # Look for embedded Dailymotion playlist player (#3822)
1054 r'<iframe
[^
>]+?src
=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.[a-z]{2,3}/widget/jukebox\?.+?)\1', webpage)
1056 playlists = re.findall(
1057 r'list\[\]=/playlist/([^/]+)/', unescapeHTML(m.group('url')))
1059 return _playlist_from_matches(
1060 playlists, lambda p: '//dailymotion.com/playlist/%s' % p)
1062 # Look for embedded Wistia player
1064 r'<(?:meta[^>]+?content|iframe[^>]+?src)=(["\'])(?P
<url
>(?
:https?
:)?
//(?
:fast\
.)?wistia\
.net
/embed
/iframe
/.+?
)\
1', webpage)
1066 embed_url = self._proto_relative_url(
1067 unescapeHTML(match.group('url
')))
1069 '_type
': 'url_transparent
',
1072 'uploader
': video_uploader,
1073 'title
': video_title,
1077 match = re.search(r'(?
:id=["\']wistia_|data-wistia-?id=["\']|Wistia\
.embed\
(["\'])(?P<id>[^"\']+)', webpage)
1080 '_type
': 'url_transparent
',
1081 'url
': 'http
://fast
.wistia
.net
/embed
/iframe
/{0:}
'.format(match.group('id')),
1083 'uploader
': video_uploader,
1084 'title
': video_title,
1085 'id': match.group('id')
1088 # Look for embedded blip.tv player
1089 bliptv_url = BlipTVIE._extract_url(webpage)
1091 return self.url_result(bliptv_url, 'BlipTV
')
1093 # Look for SVT player
1094 svt_url = SVTIE._extract_url(webpage)
1096 return self.url_result(svt_url, 'SVT
')
1098 # Look for embedded condenast player
1099 matches = re.findall(
1100 r'<iframe\s
+(?
:[a
-zA
-Z
-]+="[^"]+"\s+)*?src="(https?
://player\
.cnevids\
.com
/embed
/[^
"]+")',
1104 '_type
': 'playlist
',
1107 'ie_key
': 'CondeNast
',
1109 } for ma in matches],
1110 'title
': video_title,
1114 # Look for Bandcamp pages with custom domain
1115 mobj = re.search(r'<meta
property="og:url"[^
>]*?content
="(.*?bandcamp\.com.*?)"', webpage)
1116 if mobj is not None:
1117 burl = unescapeHTML(mobj.group(1))
1118 # Don't
set the extractor because it can be a track url
or an album
1119 return self
.url_result(burl
)
1121 # Look for embedded Vevo player
1123 r
'<iframe[^>]+?src=(["\'])(?P
<url
>(?
:https?
:)?
//(?
:cache\
.)?vevo\
.com
/.+?
)\
1', webpage)
1124 if mobj is not None:
1125 return self.url_result(mobj.group('url
'))
1127 # Look for embedded Viddler player
1129 r'<(?
:iframe
[^
>]+?src|param
[^
>]+?value
)=(["\'])(?P<url>(?:https?:)?//(?:www\.)?viddler\.com/(?:embed|player)/.+?)\1',
1131 if mobj is not None:
1132 return self.url_result(mobj.group('url'))
1134 # Look for NYTimes player
1136 r'<iframe[^>]+src=(["\'])(?P
<url
>(?
:https?
:)?
//graphics8\
.nytimes\
.com
/bcvideo
/[^
/]+/iframe
/embed\
.html
.+?
)\
1>',
1138 if mobj is not None:
1139 return self.url_result(mobj.group('url
'))
1141 # Look for Libsyn player
1143 r'<iframe
[^
>]+src
=(["\'])(?P<url>(?:https?:)?//html5-player\.libsyn\.com/embed/.+?)\1', webpage)
1144 if mobj is not None:
1145 return self.url_result(mobj.group('url'))
1147 # Look for Ooyala videos
1148 mobj = (re.search(r'player\.ooyala\.com/[^"?
]+\?[^
"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
1149 re.search(r'OO\
.Player\
.create\
([\'"].*?[\'"],\s
*[\'"](?P<ec>.{32})[\'"]', webpage) or
1150 re.search(r'SBN\
.VideoLinkset\
.ooyala\
([\'"](?P<ec>.{32})[\'"]\
)', webpage) or
1151 re.search(r'data
-ooyala
-video
-id\s
*=\s
*[\'"](?P<ec>.{32})[\'"]', webpage))
1152 if mobj is not None:
1153 return OoyalaIE._build_url_result(mobj.group('ec
'))
1155 # Look for multiple Ooyala embeds on SBN network websites
1156 mobj = re.search(r'SBN\
.VideoLinkset\
.entryGroup\
((\
[.*?\
])', webpage)
1157 if mobj is not None:
1158 embeds = self._parse_json(mobj.group(1), video_id, fatal=False)
1160 return _playlist_from_matches(
1161 embeds, getter=lambda v: OoyalaIE._url_for_embed_code(v['provider_video_id
']), ie='Ooyala
')
1163 # Look for Aparat videos
1164 mobj = re.search(r'<iframe
.*?src
="(http://www\.aparat\.com/video/[^"]+)"', webpage)
1165 if mobj is not None:
1166 return self.url_result(mobj.group(1), 'Aparat')
1168 # Look for MPORA videos
1169 mobj = re.search(r'<iframe .*?src="(http
://mpora\
.(?
:com|de
)/videos
/[^
"]+)"', webpage)
1170 if mobj is not None:
1171 return self.url_result(mobj.group(1), 'Mpora
')
1173 # Look for embedded NovaMov-based player
1175 r'''(?x)<(?:pagespeed_)?iframe[^>]+?src=(["\'])
1176 (?P<url>http://(?:(?:embed|www)\.)?
1178 nowvideo\.(?:ch|sx|eu|at|ag|co)|
1179 videoweed\.(?:es|com)|
1180 movshare\.(?:net|sx|ag)|
1181 divxstage\.(?:eu|net|ch|co|at|ag))
1182 /embed\.php.+?)\1''', webpage)
1183 if mobj is not None:
1184 return self.url_result(mobj.group('url
'))
1186 # Look for embedded Facebook player
1188 r'<iframe
[^
>]+?src
=(["\'])(?P<url>https://www\.facebook\.com/video/embed.+?)\1', webpage)
1189 if mobj is not None:
1190 return self.url_result(mobj.group('url'), 'Facebook')
1192 # Look for embedded VK player
1193 mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P
<url
>https?
://vk\
.com
/video_ext\
.php
.+?
)\
1', webpage)
1194 if mobj is not None:
1195 return self.url_result(mobj.group('url
'), 'VK
')
1197 # Look for embedded ivi player
1198 mobj = re.search(r'<embed
[^
>]+?src
=(["\'])(?P<url>https?://(?:www\.)?ivi\.ru/video/player.+?)\1', webpage)
1199 if mobj is not None:
1200 return self.url_result(mobj.group('url'), 'Ivi')
1202 # Look for embedded Huffington Post player
1204 r'<iframe[^>]+?src=(["\'])(?P
<url
>https?
://embed\
.live\
.huffingtonpost\
.com
/.+?
)\
1', webpage)
1205 if mobj is not None:
1206 return self.url_result(mobj.group('url
'), 'HuffPost
')
1209 mobj = re.search(r'class=["\']embedly-card["\'][^
>]href
=["\'](?P<url>[^"\']+)', webpage)
1210 if mobj is not None:
1211 return self.url_result(mobj.group('url
'))
1212 mobj = re.search(r'class=["\']embedly-embed["\'][^
>]src
=["\'][^"\']*url
=(?P
<url
>[^
&]+)', webpage)
1213 if mobj is not None:
1214 return self.url_result(compat_urllib_parse.unquote(mobj.group('url
')))
1216 # Look for funnyordie embed
1217 matches = re.findall(r'<iframe
[^
>]+?src
="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
1219 return _playlist_from_matches(
1220 matches, getter=unescapeHTML, ie='FunnyOrDie')
1222 # Look for BBC iPlayer embed
1223 matches = re.findall(r'setPlaylist\("(https?
://www\
.bbc\
.co\
.uk
/iplayer
/[^
/]+/[\da
-z
]{8}
)"\)', webpage)
1225 return _playlist_from_matches(matches, ie='BBCCoUk')
1227 # Look for embedded RUTV player
1228 rutv_url = RUTVIE._extract_url(webpage)
1230 return self.url_result(rutv_url, 'RUTV')
1232 # Look for embedded TED player
1234 r'<iframe[^>]+?src=(["\'])(?P
<url
>https?
://embed(?
:-ssl
)?\
.ted\
.com
/.+?
)\
1', webpage)
1235 if mobj is not None:
1236 return self.url_result(mobj.group('url
'), 'TED
')
1238 # Look for embedded Ustream videos
1240 r'<iframe
[^
>]+?src
=(["\'])(?P<url>http://www\.ustream\.tv/embed/.+?)\1', webpage)
1241 if mobj is not None:
1242 return self.url_result(mobj.group('url'), 'Ustream')
1244 # Look for embedded arte.tv player
1246 r'<script [^>]*?src="(?P
<url
>http
://www\
.arte\
.tv
/playerv2
/embed
[^
"]+)"',
1248 if mobj is not None:
1249 return self.url_result(mobj.group('url
'), 'ArteTVEmbed
')
1251 # Look for embedded smotri.com player
1252 smotri_url = SmotriIE._extract_url(webpage)
1254 return self.url_result(smotri_url, 'Smotri
')
1256 # Look for embeded soundcloud player
1258 r'<iframe\s
+(?
:[a
-zA
-Z0
-9_-]+="[^"]+"\s+)*src="(?P
<url
>https?
://(?
:w\
.)?soundcloud\
.com
/player
[^
"]+)"',
1260 if mobj is not None:
1261 url = unescapeHTML(mobj.group('url
'))
1262 return self.url_result(url)
1264 # Look for embedded vulture.com player
1266 r'<iframe src
="(?P<url>https?://video\.vulture\.com/[^"]+)"',
1268 if mobj is not None:
1269 url = unescapeHTML(mobj.group('url'))
1270 return self.url_result(url, ie='Vulture')
1272 # Look for embedded mtvservices player
1274 r'<iframe src="(?P
<url
>https?
://media\
.mtvnservices\
.com
/embed
/[^
"]+)"',
1276 if mobj is not None:
1277 url = unescapeHTML(mobj.group('url
'))
1278 return self.url_result(url, ie='MTVServicesEmbedded
')
1280 # Look for embedded yahoo player
1282 r'<iframe
[^
>]+?src
=(["\'])(?P<url>https?://(?:screen|movies)\.yahoo\.com/.+?\.html\?format=embed)\1',
1284 if mobj is not None:
1285 return self.url_result(mobj.group('url'), 'Yahoo')
1287 # Look for embedded sbs.com.au player
1291 <meta\s+property="og
:video
"\s+content=|
1294 (["\'])(?P
<url
>https?
://(?
:www\
.)?sbs\
.com\
.au
/ondemand
/video
/.+?
)\
1''',
1296 if mobj is not None:
1297 return self.url_result(mobj.group('url'), 'SBS')
1299 # Look for embedded Cinchcast player
1301 r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.cinchcast\.com/.+?)\1',
1303 if mobj is not None:
1304 return self.url_result(mobj.group('url'), 'Cinchcast')
1307 r'<iframe[^>]+?src=(["\'])(?P<url>https?://m(?:lb)?\.mlb\.com/shared/video/embed/embed\.html\?.+?)\1',
1311 r'data-video-link=["\'](?P<url>http://m.mlb.com/video/[^"\']+)',
1313 if mobj is not None:
1314 return self.url_result(mobj.group('url'), 'MLB')
1317 r'<iframe[^>]+?src=(["\'])(?P<url>%s)\1' % CondeNastIE.EMBED_URL,
1319 if mobj is not None:
1320 return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast')
1323 r'<iframe[^>]+src="(?P<url>https?://new\.livestream\.com/[^"]+/player[^"]+)"',
1325 if mobj is not None:
1326 return self.url_result(mobj.group('url'), 'Livestream')
1328 # Look for Zapiks embed
1330 r'<iframe[^>]+src="(?P<url>https?://(?:www\.)?zapiks\.fr/index\.php\?.+?)"', webpage)
1331 if mobj is not None:
1332 return self.url_result(mobj.group('url'), 'Zapiks')
1334 # Look for Kaltura embeds
1336 r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?'wid'\s*:\s*'_?(?P<partner_id>[^']+)',.*?'entry_id'\s*:\s*'(?P<id>[^']+)',", webpage)
1337 if mobj is not None:
1338 return self.url_result('kaltura:%(partner_id)s:%(id)s' % mobj.groupdict(), 'Kaltura')
1340 # Look for Eagle.Platform embeds
1342 r'<iframe[^>]+src="(?P<url>https?://.+?\.media\.eagleplatform\.com/index/player\?.+?)"', webpage)
1343 if mobj is not None:
1344 return self.url_result(mobj.group('url'), 'EaglePlatform')
1346 # Look for ClipYou (uses Eagle.Platform) embeds
1348 r'<iframe[^>]+src="https?://(?P<host>media\.clipyou\.ru)/index/player\?.*\brecord_id=(?P<id>\d+).*"', webpage)
1349 if mobj is not None:
1350 return self.url_result('eagleplatform:%(host)s:%(id)s' % mobj.groupdict(), 'EaglePlatform')
1352 # Look for Pladform embeds
1354 r'<iframe[^>]+src="(?P<url>https?://out\.pladform\.ru/player\?.+?)"', webpage)
1355 if mobj is not None:
1356 return self.url_result(mobj.group('url'), 'Pladform')
1358 # Look for Playwire embeds
1360 r'<script[^>]+data-config=(["\'])(?P<url>(?:https?:)?//config\.playwire\.com/.+?)\1', webpage)
1361 if mobj is not None:
1362 return self.url_result(mobj.group('url'))
1364 # Look for 5min embeds
1366 r'<meta[^>]+property="og:video"[^>]+content="https?://embed\.5min\.com/(?P<id>[0-9]+)/?', webpage)
1367 if mobj is not None:
1368 return self.url_result('5min:%s' % mobj.group('id'), 'FiveMin')
1370 # Look for Crooks and Liars embeds
1372 r'<(?:iframe[^>]+src|param[^>]+value)=(["\'])(?P<url>(?:https?:)?//embed\.crooksandliars\.com/(?:embed|v)/.+?)\1', webpage)
1373 if mobj is not None:
1374 return self.url_result(mobj.group('url'))
1376 # Look for NBC Sports VPlayer embeds
1377 nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage)
1379 return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
1381 # Look for UDN embeds
1383 r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._VALID_URL, webpage)
1384 if mobj is not None:
1385 return self.url_result(
1386 compat_urlparse.urljoin(url, mobj.group('url')), 'UDNEmbed')
1388 # Look for Senate ISVP iframe
1389 senate_isvp_url = SenateISVPIE._search_iframe_url(webpage)
1391 return self.url_result(surl, 'SenateISVP')
1393 def check_video(vurl):
1394 if YoutubeIE.suitable(vurl):
1396 vpath = compat_urlparse.urlparse(vurl).path
1397 vext = determine_ext(vpath)
1398 return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml')
1400 def filter_video(urls):
1401 return list(filter(check_video, urls))
1403 # Start with something easy: JW Player in SWFObject
1404 found = filter_video(re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage))
1406 # Look for gorilla-vid style embedding
1407 found = filter_video(re.findall(r'''(?sx
)
1411 jwplayer\s
*\
(\s
*["'][^'"]+["']\s*\)\s*\.setup
1414 ['"]?
file['"]?\s*:\s*["\'](.*?)["\']''', webpage))
1416 # Broaden the search a little bit
1417 found = filter_video(re.findall(r'[^A
-Za
-z0
-9]?
(?
:file|source
)=(http
[^
\'"&]*)', webpage))
1419 # Broaden the findall a little bit: JWPlayer JS loader
1420 found = filter_video(re.findall(
1421 r'[^A-Za-z0-9]?file["\']?
:\s
*["\'](http(?![^\'"]+\
.[0-9]+[\'"])[^\'"]+)["\']', webpage))
1424 found = filter_video(re.findall(r'''(?xs)
1425 flowplayer\("[^
"]+",\s
*
1427 \s
*\
{[^
}]+?
["']?clip["']?\s*:\s*\{\s*
1428 ["']?url
["']?\s*:\s*["']([^"']+)["']
1433 r"cinerama\
.embedPlayer\
(\s
*\'[^
']+\',\s*'([^
']+)'", webpage)
1435 # Try to find twitter cards info
1436 found = filter_video(re.findall(
1437 r'<meta (?:property|name)="twitter
:player
:stream
" (?:content|value)="(.+?
)"', webpage))
1439 # We look for Open Graph info:
1440 # We have to match any number spaces between elements, some sites try to align them (eg.: statigr.am)
1441 m_video_type = re.findall(r'<meta.*?property="og
:video
:type".*?content="video
/(.*?
)"', webpage)
1442 # We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
1443 if m_video_type is not None:
1444 found = filter_video(re.findall(r'<meta.*?property="og
:video
".*?content="(.*?
)"', webpage))
1447 found = re.findall(r'(?s)<video[^<]*(?:>.*?<source[^>]*)?\s+src=["\'](.*?
)["\']', webpage)
1449 REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
1451 r'(?i
)<meta\s
+(?
=(?
:[a
-z
-]+="[^"]+"\s+)*http-equiv="refresh
")'
1452 r'(?:[a-z-]+="[^
"]+"\s
+)*?content
="%s' % REDIRECT_REGEX,
1455 # Look also in Refresh HTTP header
1456 refresh_header = head_response.headers.get('Refresh')
1458 found = re.search(REDIRECT_REGEX, refresh_header)
1460 new_url = compat_urlparse.urljoin(url, found.group(1))
1461 self.report_following_redirect(new_url)
1467 raise UnsupportedError(url)
1470 for video_url in found:
1471 video_url = compat_urlparse.urljoin(url, video_url)
1472 video_id = compat_urllib_parse.unquote(os.path.basename(video_url))
1474 # Sometimes, jwplayer extraction will result in a YouTube URL
1475 if YoutubeIE.suitable(video_url):
1476 entries.append(self.url_result(video_url, 'Youtube'))
1479 # here's a fun little line of code for you:
1480 video_id = os.path.splitext(video_id)[0]
1482 if determine_ext(video_url) == 'smil':
1485 'formats': self._extract_smil_formats(video_url, video_id),
1486 'uploader': video_uploader,
1487 'title': video_title,
1488 'age_limit': age_limit,
1494 'uploader': video_uploader,
1495 'title': video_title,
1496 'age_limit': age_limit,
1499 if len(entries) == 1:
1502 for num, e in enumerate(entries, start=1):
1503 # 'url' results don't have a title
1504 if e.get('title') is not None:
1505 e['title'] = '%s (%d)' % (e['title'], num)
1507 '_type': 'playlist',