3 from __future__
import unicode_literals
8 from .common
import InfoExtractor
9 from .youtube
import YoutubeIE
10 from ..compat
import (
12 compat_urllib_parse_unquote
,
13 compat_urllib_request
,
15 compat_xml_parse_error
,
33 from .brightcove
import BrightcoveIE
34 from .nbc
import NBCSportsVPlayerIE
35 from .ooyala
import OoyalaIE
36 from .rutv
import RUTVIE
37 from .sportbox
import SportBoxEmbedIE
38 from .smotri
import SmotriIE
39 from .condenast
import CondeNastIE
40 from .udn
import UDNEmbedIE
41 from .senateisvp
import SenateISVPIE
42 from .bliptv
import BlipTVIE
43 from .svt
import SVTIE
46 class GenericIE(InfoExtractor
):
47 IE_DESC
= 'Generic downloader that works on some sites'
51 # Direct link to a video
53 'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4',
54 'md5': '67d406c2bcb6af27fa886f31aa934bbe',
59 'upload_date': '20100513',
62 # Direct link to media delivered compressed (until Accept-Encoding is *)
64 'url': 'http://calimero.tk/muzik/FictionJunction-Parallel_Hearts.flac',
65 'md5': '128c42e68b13950268b648275386fc74',
67 'id': 'FictionJunction-Parallel_Hearts',
69 'title': 'FictionJunction-Parallel_Hearts',
70 'upload_date': '20140522',
72 'expected_warnings': [
73 'URL could be a direct video link, returning it as such.'
76 # Direct download with broken HEAD
78 'url': 'http://ai-radio.org:8000/radio.opus',
85 'skip_download': True, # infinite live stream
87 'expected_warnings': [
88 r
'501.*Not Implemented'
91 # Direct link with incorrect MIME type
93 'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
94 'md5': '4ccbebe5f36706d85221f204d7eb5913',
96 'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
97 'id': '5_Lennart_Poettering_-_Systemd',
99 'title': '5_Lennart_Poettering_-_Systemd',
100 'upload_date': '20141120',
102 'expected_warnings': [
103 'URL could be a direct video link, returning it as such.'
108 'url': 'http://phihag.de/2014/youtube-dl/rss2.xml',
110 'id': 'http://phihag.de/2014/youtube-dl/rss2.xml',
111 'title': 'Zero Punctuation',
112 'description': 're:.*groundbreaking video review series.*'
114 'playlist_mincount': 11,
116 # RSS feed with enclosure
118 'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
120 'id': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
122 'upload_date': '20150228',
123 'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
128 'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
132 'upload_date': '20130224',
133 'uploader_id': 'TheVerge',
134 'description': 're:^Chris Ziegler takes a look at the\.*',
135 'uploader': 'The Verge',
136 'title': 'First Firefox OS phones side-by-side',
139 'skip_download': False,
143 'url': 'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html',
144 'md5': '85b90ccc9d73b4acd9138d3af4c27f89',
146 'id': '13601338388002',
148 'uploader': 'www.hodiho.fr',
149 'title': 'R\u00e9gis plante sa Jeep',
152 # bandcamp page with custom domain
154 'add_ie': ['Bandcamp'],
155 'url': 'http://bronyrock.com/track/the-pony-mash',
159 'title': 'The Pony Mash',
160 'uploader': 'M_Pallante',
162 'skip': 'There is a limit of 200 free downloads / month for the test song',
164 # embedded brightcove video
165 # it also tests brightcove videos that need to set the 'Referer' in the
168 'add_ie': ['Brightcove'],
169 'url': 'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/',
171 'id': '2765128793001',
173 'title': 'Le cours de bourse : l’analyse technique',
174 'description': 'md5:7e9ad046e968cb2d1114004aba466fd9',
175 'uploader': 'BFM BUSINESS',
178 'skip_download': True,
182 # https://github.com/rg3/youtube-dl/issues/2253
183 'url': 'http://bcove.me/i6nfkrc3',
184 'md5': '0ba9446db037002366bab3b3eb30c88c',
186 'id': '3101154703001',
188 'title': 'Still no power',
189 'uploader': 'thestar.com',
190 'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.',
192 'add_ie': ['Brightcove'],
195 'url': 'http://www.championat.com/video/football/v/87/87499.html',
196 'md5': 'fb973ecf6e4a78a67453647444222983',
198 'id': '3414141473001',
200 'title': 'Видео. Удаление Дзагоева (ЦСКА)',
201 'description': 'Онлайн-трансляция матча ЦСКА - "Волга"',
202 'uploader': 'Championat',
206 # https://github.com/rg3/youtube-dl/issues/3541
207 'add_ie': ['Brightcove'],
208 'url': 'http://www.kijk.nl/sbs6/leermijvrouwenkennen/videos/jqMiXKAYan2S/aflevering-1',
210 'id': '3866516442001',
212 'title': 'Leer mij vrouwen kennen: Aflevering 1',
213 'description': 'Leer mij vrouwen kennen: Aflevering 1',
214 'uploader': 'SBS Broadcasting',
216 'skip': 'Restricted to Netherlands',
218 'skip_download': True, # m3u8 download
223 'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
224 'md5': '166dd577b433b4d4ebfee10b0824d8ff',
226 'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ',
228 'title': '2cc213299525360.mov', # that's what we get
230 'add_ie': ['Ooyala'],
232 # multiple ooyala embeds on SBN network websites
234 'url': 'http://www.sbnation.com/college-football-recruiting/2015/2/3/7970291/national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
236 'id': 'national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
237 'title': '25 lies you will tell yourself on National Signing Day - SBNation.com',
239 'playlist_mincount': 3,
241 'skip_download': True,
243 'add_ie': ['Ooyala'],
247 'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/',
251 'title': 'Tested: Grinding Coffee at 2000 Frames Per Second',
252 'upload_date': '20140225',
253 'description': 'md5:06a40fbf30b220468f1e0957c0f558ff',
254 'uploader': 'Tested',
255 'uploader_id': 'testedcom',
257 # No need to test YoutubeIE here
259 'skip_download': True,
264 'url': 'http://www.theguardian.com/world/2014/mar/11/obama-zach-galifianakis-between-two-ferns',
268 'title': 'Between Two Ferns with Zach Galifianakis: President Barack Obama',
269 'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.',
274 'url': 'http://www.bbc.co.uk/blogs/adamcurtis/posts/BUGGER',
276 'title': 'BBC - Blogs - Adam Curtis - BUGGER',
278 'playlist_mincount': 18,
282 'url': 'http://www.rg.ru/2014/03/15/reg-dfo/anklav-anons.html',
286 'title': 'Охотское море стало целиком российским',
287 'description': 'md5:5ed62483b14663e2a95ebbe115eb8f43',
291 'skip_download': True,
296 'url': 'http://www.vestifinance.ru/articles/25753',
299 'title': 'Вести Экономика ― Прямые трансляции с Форума-выставки "Госзаказ-2013"',
304 'title': 'Госзаказ. День 3',
310 'title': 'Госзаказ. День 2',
316 'title': 'Госзаказ. День 1',
322 'skip_download': True,
327 'url': 'http://en.support.wordpress.com/videos/ted-talks/',
328 'md5': '65fdff94098e4a607385a60c5177c638',
332 'title': 'Hidden miracles of the natural world',
333 'uploader': 'Louie Schwartzberg',
334 'description': 'md5:8145d19d320ff3e52f28401f4c4283b9',
337 # Embeded Ustream video
339 'url': 'http://www.american.edu/spa/pti/nsa-privacy-janus-2014.cfm',
340 'md5': '27b99cdb639c9b12a79bca876a073417',
344 'uploader': 'AU SPA: The NSA and Privacy',
345 'title': 'NSA and Privacy Forum Debate featuring General Hayden and Barton Gellman'
348 # nowvideo embed hidden behind percent encoding
350 'url': 'http://www.waoanime.tv/the-super-dimension-fortress-macross-episode-1/',
351 'md5': '2baf4ddd70f697d94b1c18cf796d5107',
353 'id': '06e53103ca9aa',
355 'title': 'Macross Episode 001 Watch Macross Episode 001 onl',
356 'description': 'No description',
361 'url': 'http://www.tv-replay.fr/redirection/20-03-14/x-enius-arte-10753389.html',
362 'md5': '7653032cbb25bf6c80d80f217055fa43',
364 'id': '048195-004_PLUS7-F',
367 'description': 'md5:d5fdf32ef6613cdbfd516ae658abf168',
368 'upload_date': '20140320',
371 'skip_download': 'Requires rtmpdump'
376 'url': 'http://www.wired.com/2014/04/honda-asimo/',
377 'md5': 'ba0dfe966fa007657bd1443ee672db0f',
379 'id': '53501be369702d3275860000',
381 'title': 'Honda’s New Asimo Robot Is More Human Than Ever',
386 'url': 'http://www.spi0n.com/zap-spi0n-com-n216/',
387 'md5': '441aeeb82eb72c422c7f14ec533999cd',
389 'id': 'k2mm4bCdJ6CQ2i7c8o2',
391 'title': 'Le Zap de Spi0n n°216 - Zapping du Web',
394 'add_ie': ['Dailymotion'],
398 'url': 'http://www.badzine.de/ansicht/datum/2014/06/09/so-funktioniert-die-neue-englische-badminton-liga.html',
402 'title': 'The NBL Auction 2014',
403 'uploader': 'BADMINTON England',
404 'uploader_id': 'BADMINTONEvents',
405 'upload_date': '20140603',
406 'description': 'md5:9ef128a69f1e262a700ed83edb163a73',
408 'add_ie': ['Youtube'],
410 'skip_download': True,
415 'url': 'http://www.gametrailers.com/news-post/76093/north-america-europe-is-getting-that-mario-kart-8-mercedes-dlc-too',
416 'md5': '35727f82f58c76d996fc188f9755b0d5',
418 'id': '0306a69b-8adf-4fb5-aace-75f8e8cbfca9',
421 'description': 'Mario\'s life in the fast lane has never looked so good.',
424 # YouTube embed via <data-embed-url="">
426 'url': 'https://play.google.com/store/apps/details?id=com.gameloft.android.ANMP.GloftA8HM',
430 'title': 'Asphalt 8: Airborne - Update - Welcome to Dubai!',
431 'uploader': 'Gameloft',
432 'uploader_id': 'gameloft',
433 'upload_date': '20140828',
434 'description': 'md5:c80da9ed3d83ae6d1876c834de03e1c4',
437 'skip_download': True,
442 'url': 'http://www.ll.mit.edu/workshops/education/videocourses/antennas/lecture1/video/',
444 'md5': '0c5e352edabf715d762b0ad4e6d9ee67',
446 'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
447 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - video1',
452 'md5': '10e4bb3aaca9fd630e273ff92d9f3c63',
454 'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final_PIP',
455 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - pip',
461 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
466 'url': 'http://www.handjobhub.com/video/busty-blonde-siri-tit-fuck-while-wank-6313.html',
467 'md5': '9d65602bf31c6e20014319c7d07fba27',
469 'id': '5123ea6d5e5a7',
472 'uploader': 'www.handjobhub.com',
473 'title': 'Busty Blonde Siri Tit Fuck While Wank at HandjobHub.com',
476 # Multiple brightcove videos
477 # https://github.com/rg3/youtube-dl/issues/2283
479 'url': 'http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html',
481 'id': 'always-never',
482 'title': 'Always / Never - The New Yorker',
486 'extract_flat': False,
487 'skip_download': True,
492 'url': 'http://umpire-empire.com/index.php/topic/58125-laz-decides-no-thats-low/',
493 'md5': '96f09a37e44da40dd083e12d9a683327',
497 'title': 'Ump changes call to ball',
498 'description': 'md5:71c11215384298a172a6dcb4c2e20685',
500 'timestamp': 1401537900,
501 'upload_date': '20140531',
502 'thumbnail': 're:^https?://.*\.jpg$',
507 'url': 'http://education-portal.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson',
508 'md5': '8788b683c777a5cf25621eaf286d0c23',
512 'title': 'md5:51364a8d3d009997ba99656004b5e20d',
514 'filesize': 182808282,
515 'uploader': 'education-portal.com',
519 'url': 'http://thoughtworks.wistia.com/medias/uxjb0lwrcz',
520 'md5': 'baf49c2baa8a7de5f3fc145a8506dcd4',
524 'title': 'Conversation about Hexagonal Rails Part 1 - ThoughtWorks',
526 'uploader': 'thoughtworks.wistia.com',
531 'url': 'http://nakedsecurity.sophos.com/2014/10/29/sscc-171-are-you-sure-that-1234-is-a-bad-password-podcast/',
535 'description': 'md5:ff867d6b555488ad3c52572bb33d432c',
536 'uploader': 'Sophos Security',
537 'title': 'Chet Chat 171 - Oct 29, 2014',
538 'upload_date': '20141029',
543 'url': 'http://www.esa.int/Our_Activities/Space_Science/Rosetta/Philae_comet_touch-down_webcast',
547 'upload_date': '20141112',
548 'title': 'Rosetta #CometLanding webcast HL 10',
553 'url': 'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986',
556 'title': 'Unity 8 desktop-mode windows on Mir! - Ubuntu Discourse',
558 'playlist_mincount': 2,
562 'url': 'http://undergroundwellness.com/podcasts/306-5-steps-to-permanent-gut-healing/',
566 'upload_date': '20141126',
567 'title': 'Jack Tips: 5 Steps to Permanent Gut Healing',
572 'url': 'http://www.abc.net.au/7.30/content/2015/s4164797.htm',
574 'id': '730m_DandD_1901_512k',
576 'uploader': 'www.abc.net.au',
577 'title': 'Game of Thrones with dice - Dungeons and Dragons fantasy role-playing game gets new life - 19/01/2015',
580 # embedded viddler video
582 'url': 'http://deadspin.com/i-cant-stop-watching-john-wall-chop-the-nuggets-with-th-1681801597',
586 'uploader': 'deadspin',
587 'title': 'WALL-TO-GORTAT',
588 'timestamp': 1422285291,
589 'upload_date': '20150126',
591 'add_ie': ['Viddler'],
595 'url': 'http://thedailyshow.cc.com/podcast/episodetwelve',
599 'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart",
600 'description': 'md5:601cb790edd05908957dae8aaa866465',
601 'upload_date': '20150220',
606 'url': 'http://media.nationalarchives.gov.uk/index.php/webinar-using-discovery-national-archives-online-catalogue/',
610 'upload_date': '20150212',
611 'uploader': 'The National Archives UK',
612 'description': 'md5:a236581cd2449dd2df4f93412f3f01c6',
613 'uploader_id': 'NationalArchives08',
614 'title': 'Webinar: Using Discovery, The National Archives’ online catalogue',
619 'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen',
620 'playlist_mincount': 5,
622 'id': 'aanslagen-kopenhagen',
623 'title': 'Aanslagen Kopenhagen | RTL Nieuws',
628 'url': 'http://www.skipass.com/news/116090-bon-appetit-s5ep3-baqueira-mi-cor.html',
632 'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !',
637 'url': 'http://www.monumentalnetwork.com/videos/john-carlson-postgame-2-25-15',
641 'upload_date': '20150226',
642 'uploader_id': 'MonumentalSports-Kaltura@perfectsensedigital.com',
644 'title': 'John Carlson Postgame 2/25/15',
647 # Eagle.Platform embed (generic URL)
649 'url': 'http://lenta.ru/news/2015/03/06/navalny/',
653 'title': 'Навальный вышел на свободу',
654 'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5',
655 'thumbnail': 're:^https?://.*\.jpg$',
661 # ClipYou (Eagle.Platform) embed (custom URL)
663 'url': 'http://muz-tv.ru/play/7129/',
667 'title': "'O Sole Mio",
668 'thumbnail': 're:^https?://.*\.jpg$',
675 'url': 'http://muz-tv.ru/kinozal/view/7400/',
679 'title': 'Тайны перевала Дятлова • 1 серия 2 часть',
680 'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
681 'thumbnail': 're:^https?://.*\.jpg$',
688 'url': 'http://www.cinemablend.com/new/First-Joe-Dirt-2-Trailer-Teaser-Stupid-Greatness-70874.html',
692 'title': 'Joe Dirt 2 Beautiful Loser Teaser Trailer',
693 'thumbnail': 're:^https?://.*\.png$',
699 'url': 'http://techcrunch.com/video/facebook-creates-on-this-day-crunch-report/518726732/',
700 'md5': '4c6f127a30736b59b3e2c19234ee2bf7',
704 'title': 'Facebook Creates "On This Day" | Crunch Report',
709 'url': 'http://www.svt.se/sport/ishockey/jagr-tacklar-giroux-under-intervjun',
713 'title': 'Här trycker Jagr till Giroux (under SVT-intervjun)',
718 # Crooks and Liars embed
720 'url': 'http://crooksandliars.com/2015/04/fox-friends-says-protecting-atheists',
724 'title': "Fox & Friends Says Protecting Atheists From Discrimination Is Anti-Christian!",
725 'description': 'md5:e1a46ad1650e3a5ec7196d432799127f',
726 'timestamp': 1428207000,
727 'upload_date': '20150405',
728 'uploader': 'Heather',
731 # Crooks and Liars external embed
733 'url': 'http://theothermccain.com/2010/02/02/video-proves-that-bill-kristol-has-been-watching-glenn-beck/comment-page-1/',
735 'id': 'MTE3MjUtMzQ2MzA',
737 'title': 'md5:5e3662a81a4014d24c250d76d41a08d5',
738 'description': 'md5:9b8e9542d6c3c5de42d6451b7d780cec',
739 'timestamp': 1265032391,
740 'upload_date': '20100201',
741 'uploader': 'Heather',
744 # NBC Sports vplayer embed
746 'url': 'http://www.riderfans.com/forum/showthread.php?121827-Freeman&s=e98fa1ea6dc08e886b1678d35212494a',
748 'id': 'ln7x1qSThw4k',
750 'title': "PFT Live: New leader in the 'new-look' defense",
751 'description': 'md5:65a19b4bbfb3b0c0c5768bed1dfad74e',
756 'url': 'http://www.udn.com/news/story/7314/822787',
757 'md5': 'fd2060e988c326991037b9aff9df21a6',
761 'title': '中一中男師變性 全校師生力挺',
762 'thumbnail': 're:^https?://.*\.jpg$',
767 'url': 'http://www.businessinsider.com/excel-index-match-vlookup-video-how-to-2015-2?IR=T',
769 'id': '50YnY4czr4ms1vJ7yz3xzq0excz_pUMs',
771 'description': 'VIDEO: Index/Match versus VLOOKUP.',
772 'title': 'This is what separates the Excel masters from the wannabes',
776 'skip_download': True,
779 # Contains a SMIL manifest
781 'url': 'http://www.telewebion.com/fa/1263668/%D9%82%D8%B1%D8%B9%D9%87%E2%80%8C%DA%A9%D8%B4%DB%8C-%D9%84%DB%8C%DA%AF-%D9%82%D9%87%D8%B1%D9%85%D8%A7%D9%86%D8%A7%D9%86-%D8%A7%D8%B1%D9%88%D9%BE%D8%A7/%2B-%D9%81%D9%88%D8%AA%D8%A8%D8%A7%D9%84.html',
785 'title': '+ Football: Lottery Champions League Europe',
786 'uploader': 'www.telewebion.com',
790 'skip_download': True,
795 def report_following_redirect(self
, new_url
):
796 """Report information extraction."""
797 self
._downloader
.to_screen('[redirect] Following redirect to %s' % new_url
)
799 def _extract_rss(self
, url
, video_id
, doc
):
800 playlist_title
= doc
.find('./channel/title').text
801 playlist_desc_el
= doc
.find('./channel/description')
802 playlist_desc
= None if playlist_desc_el
is None else playlist_desc_el
.text
805 for it
in doc
.findall('./channel/item'):
806 next_url
= xpath_text(it
, 'link', fatal
=False)
808 enclosure_nodes
= it
.findall('./enclosure')
809 for e
in enclosure_nodes
:
810 next_url
= e
.attrib
.get('url')
820 'title': it
.find('title').text
,
826 'title': playlist_title
,
827 'description': playlist_desc
,
831 def _extract_camtasia(self
, url
, video_id
, webpage
):
832 """ Returns None if no camtasia video can be found. """
834 camtasia_cfg
= self
._search
_regex
(
835 r
'fo\.addVariable\(\s*"csConfigFile",\s*"([^"]+)"\s*\);',
836 webpage
, 'camtasia configuration file', default
=None)
837 if camtasia_cfg
is None:
840 title
= self
._html
_search
_meta
('DC.title', webpage
, fatal
=True)
842 camtasia_url
= compat_urlparse
.urljoin(url
, camtasia_cfg
)
843 camtasia_cfg
= self
._download
_xml
(
844 camtasia_url
, video_id
,
845 note
='Downloading camtasia configuration',
846 errnote
='Failed to download camtasia configuration')
847 fileset_node
= camtasia_cfg
.find('./playlist/array/fileset')
850 for n
in fileset_node
.getchildren():
851 url_n
= n
.find('./uri')
856 'id': os
.path
.splitext(url_n
.text
.rpartition('/')[2])[0],
857 'title': '%s - %s' % (title
, n
.tag
),
858 'url': compat_urlparse
.urljoin(url
, url_n
.text
),
859 'duration': float_or_none(n
.find('./duration').text
),
868 def _real_extract(self
, url
):
869 if url
.startswith('//'):
872 'url': self
.http_scheme() + url
,
875 parsed_url
= compat_urlparse
.urlparse(url
)
876 if not parsed_url
.scheme
:
877 default_search
= self
._downloader
.params
.get('default_search')
878 if default_search
is None:
879 default_search
= 'fixup_error'
881 if default_search
in ('auto', 'auto_warning', 'fixup_error'):
883 self
._downloader
.report_warning('The url doesn\'t specify the protocol, trying with http')
884 return self
.url_result('http://' + url
)
885 elif default_search
!= 'fixup_error':
886 if default_search
== 'auto_warning':
887 if re
.match(r
'^(?:url|URL)$', url
):
888 raise ExtractorError(
889 'Invalid URL: %r . Call youtube-dl like this: youtube-dl -v "https://www.youtube.com/watch?v=BaW_jenozKc" ' % url
,
892 self
._downloader
.report_warning(
893 'Falling back to youtube search for %s . Set --default-search "auto" to suppress this warning.' % url
)
894 return self
.url_result('ytsearch:' + url
)
896 if default_search
in ('error', 'fixup_error'):
897 raise ExtractorError(
898 '%r is not a valid URL. '
899 'Set --default-search "ytsearch" (or run youtube-dl "ytsearch:%s" ) to search YouTube'
900 % (url
, url
), expected
=True)
902 if ':' not in default_search
:
903 default_search
+= ':'
904 return self
.url_result(default_search
+ url
)
906 url
, smuggled_data
= unsmuggle_url(url
)
908 is_intentional
= smuggled_data
and smuggled_data
.get('to_generic')
909 if smuggled_data
and 'force_videoid' in smuggled_data
:
910 force_videoid
= smuggled_data
['force_videoid']
911 video_id
= force_videoid
913 video_id
= compat_urllib_parse_unquote(os
.path
.splitext(url
.rstrip('/').split('/')[-1])[0])
915 self
.to_screen('%s: Requesting header' % video_id
)
917 head_req
= HEADRequest(url
)
918 head_response
= self
._request
_webpage
(
920 note
=False, errnote
='Could not send HEAD request to %s' % url
,
923 if head_response
is not False:
925 new_url
= head_response
.geturl()
927 self
.report_following_redirect(new_url
)
929 new_url
= smuggle_url(
930 new_url
, {'force_videoid': force_videoid
})
931 return self
.url_result(new_url
)
934 if head_response
is False:
935 request
= compat_urllib_request
.Request(url
)
936 request
.add_header('Accept-Encoding', '*')
937 full_response
= self
._request
_webpage
(request
, video_id
)
938 head_response
= full_response
940 # Check for direct link to a video
941 content_type
= head_response
.headers
.get('Content-Type', '')
942 m
= re
.match(r
'^(?P<type>audio|video|application(?=/ogg$))/(?P<format_id>.+)$', content_type
)
944 upload_date
= unified_strdate(
945 head_response
.headers
.get('Last-Modified'))
948 'title': compat_urllib_parse_unquote(os
.path
.splitext(url_basename(url
))[0]),
951 'format_id': m
.group('format_id'),
953 'vcodec': 'none' if m
.group('type') == 'audio' else None
955 'upload_date': upload_date
,
958 if not self
._downloader
.params
.get('test', False) and not is_intentional
:
959 self
._downloader
.report_warning('Falling back on generic information extractor.')
961 if not full_response
:
962 request
= compat_urllib_request
.Request(url
)
963 # Some webservers may serve compressed content of rather big size (e.g. gzipped flac)
964 # making it impossible to download only chunk of the file (yet we need only 512kB to
965 # test whether it's HTML or not). According to youtube-dl default Accept-Encoding
966 # that will always result in downloading the whole file that is not desirable.
967 # Therefore for extraction pass we have to override Accept-Encoding to any in order
968 # to accept raw bytes and being able to download only a chunk.
969 # It may probably better to solve this by checking Content-Type for application/octet-stream
970 # after HEAD request finishes, but not sure if we can rely on this.
971 request
.add_header('Accept-Encoding', '*')
972 full_response
= self
._request
_webpage
(request
, video_id
)
974 # Maybe it's a direct link to a video?
975 # Be careful not to download the whole thing!
976 first_bytes
= full_response
.read(512)
977 if not is_html(first_bytes
):
978 self
._downloader
.report_warning(
979 'URL could be a direct video link, returning it as such.')
980 upload_date
= unified_strdate(
981 head_response
.headers
.get('Last-Modified'))
984 'title': compat_urllib_parse_unquote(os
.path
.splitext(url_basename(url
))[0]),
987 'upload_date': upload_date
,
990 webpage
= self
._webpage
_read
_content
(
991 full_response
, url
, video_id
, prefix
=first_bytes
)
993 self
.report_extraction(video_id
)
997 doc
= parse_xml(webpage
)
999 return self
._extract
_rss
(url
, video_id
, doc
)
1000 except compat_xml_parse_error
:
1003 # Is it a Camtasia project?
1004 camtasia_res
= self
._extract
_camtasia
(url
, video_id
, webpage
)
1005 if camtasia_res
is not None:
1008 # Sometimes embedded video player is hidden behind percent encoding
1009 # (e.g. https://github.com/rg3/youtube-dl/issues/2448)
1010 # Unescaping the whole page allows to handle those cases in a generic way
1011 webpage
= compat_urllib_parse
.unquote(webpage
)
1013 # it's tempting to parse this further, but you would
1014 # have to take into account all the variations like
1015 # Video Title - Site Name
1016 # Site Name | Video Title
1017 # Video Title - Tagline | Site Name
1018 # and so on and so forth; it's just not practical
1019 video_title
= self
._html
_search
_regex
(
1020 r
'(?s)<title>(.*?)</title>', webpage
, 'video title',
1023 # Try to detect age limit automatically
1024 age_limit
= self
._rta
_search
(webpage
)
1025 # And then there are the jokers who advertise that they use RTA,
1026 # but actually don't.
1027 AGE_LIMIT_MARKERS
= [
1028 r
'Proudly Labeled <a href="http://www.rtalabel.org/" title="Restricted to Adults">RTA</a>',
1030 if any(re
.search(marker
, webpage
) for marker
in AGE_LIMIT_MARKERS
):
1033 # video uploader is domain name
1034 video_uploader
= self
._search
_regex
(
1035 r
'^(?:https?://)?([^/]*)/.*', url
, 'video uploader')
1038 def _playlist_from_matches(matches
, getter
=None, ie
=None):
1040 self
.url_result(self
._proto
_relative
_url
(getter(m
) if getter
else m
), ie
)
1042 return self
.playlist_result(
1043 urlrs
, playlist_id
=video_id
, playlist_title
=video_title
)
1045 # Look for BrightCove:
1046 bc_urls
= BrightcoveIE
._extract
_brightcove
_urls
(webpage
)
1048 self
.to_screen('Brightcove video detected.')
1051 'url': smuggle_url(bc_url
, {'Referer': url
}),
1052 'ie_key': 'Brightcove'
1053 } for bc_url
in bc_urls
]
1056 '_type': 'playlist',
1057 'title': video_title
,
1062 # Look for embedded rtl.nl player
1063 matches
= re
.findall(
1064 r
'<iframe\s+(?:[a-zA-Z-]+="[^"]+"\s+)*?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+video_embed[^"]+)"',
1067 return _playlist_from_matches(matches
, ie
='RtlNl')
1069 # Look for embedded (iframe) Vimeo player
1071 r
'<iframe[^>]+?src=(["\'])(?P
<url
>(?
:https?
:)?
//player\
.vimeo\
.com
/video
/.+?
)\
1', webpage)
1073 player_url = unescapeHTML(mobj.group('url
'))
1074 surl = smuggle_url(player_url, {'Referer
': url})
1075 return self.url_result(surl)
1076 # Look for embedded (swf embed) Vimeo player
1078 r'<embed
[^
>]+?src
="((?:https?:)?//(?:www\.)?vimeo\.com/moogaloop\.swf.+?)"', webpage)
1080 return self.url_result(mobj.group(1))
1082 # Look for embedded YouTube player
1083 matches = re.findall(r'''(?x)
1092 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1096 return _playlist_from_matches(
1097 matches, lambda m: unescapeHTML(m[1]))
1099 # Look for lazyYT YouTube embed
1100 matches = re.findall(
1101 r'class="lazyYT" data
-youtube
-id="([^"]+)"', webpage)
1103 return _playlist_from_matches(matches, lambda m: unescapeHTML(m))
1105 # Look for embedded Dailymotion player
1106 matches = re.findall(
1107 r'<iframe[^>]+?src=(["\'])(?P
<url
>(?
:https?
:)?
//(?
:www\
.)?dailymotion\
.com
/embed
/video
/.+?
)\
1', webpage)
1109 return _playlist_from_matches(
1110 matches, lambda m: unescapeHTML(m[1]))
1112 # Look for embedded Dailymotion playlist player (#3822)
1114 r'<iframe
[^
>]+?src
=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.[a-z]{2,3}/widget/jukebox\?.+?)\1', webpage)
1116 playlists = re.findall(
1117 r'list\[\]=/playlist/([^/]+)/', unescapeHTML(m.group('url')))
1119 return _playlist_from_matches(
1120 playlists, lambda p: '//dailymotion.com/playlist/%s' % p)
1122 # Look for embedded Wistia player
1124 r'<(?:meta[^>]+?content|iframe[^>]+?src)=(["\'])(?P
<url
>(?
:https?
:)?
//(?
:fast\
.)?wistia\
.net
/embed
/iframe
/.+?
)\
1', webpage)
1126 embed_url = self._proto_relative_url(
1127 unescapeHTML(match.group('url
')))
1129 '_type
': 'url_transparent
',
1132 'uploader
': video_uploader,
1133 'title
': video_title,
1137 match = re.search(r'(?
:id=["\']wistia_|data-wistia-?id=["\']|Wistia\
.embed\
(["\'])(?P<id>[^"\']+)', webpage)
1140 '_type
': 'url_transparent
',
1141 'url
': 'http
://fast
.wistia
.net
/embed
/iframe
/{0:}
'.format(match.group('id')),
1143 'uploader
': video_uploader,
1144 'title
': video_title,
1145 'id': match.group('id')
1148 # Look for embedded blip.tv player
1149 bliptv_url = BlipTVIE._extract_url(webpage)
1151 return self.url_result(bliptv_url, 'BlipTV
')
1153 # Look for SVT player
1154 svt_url = SVTIE._extract_url(webpage)
1156 return self.url_result(svt_url, 'SVT
')
1158 # Look for embedded condenast player
1159 matches = re.findall(
1160 r'<iframe\s
+(?
:[a
-zA
-Z
-]+="[^"]+"\s+)*?src="(https?
://player\
.cnevids\
.com
/embed
/[^
"]+")',
1164 '_type
': 'playlist
',
1167 'ie_key
': 'CondeNast
',
1169 } for ma in matches],
1170 'title
': video_title,
1174 # Look for Bandcamp pages with custom domain
1175 mobj = re.search(r'<meta
property="og:url"[^
>]*?content
="(.*?bandcamp\.com.*?)"', webpage)
1176 if mobj is not None:
1177 burl = unescapeHTML(mobj.group(1))
1178 # Don't
set the extractor because it can be a track url
or an album
1179 return self
.url_result(burl
)
1181 # Look for embedded Vevo player
1183 r
'<iframe[^>]+?src=(["\'])(?P
<url
>(?
:https?
:)?
//(?
:cache\
.)?vevo\
.com
/.+?
)\
1', webpage)
1184 if mobj is not None:
1185 return self.url_result(mobj.group('url
'))
1187 # Look for embedded Viddler player
1189 r'<(?
:iframe
[^
>]+?src|param
[^
>]+?value
)=(["\'])(?P<url>(?:https?:)?//(?:www\.)?viddler\.com/(?:embed|player)/.+?)\1',
1191 if mobj is not None:
1192 return self.url_result(mobj.group('url'))
1194 # Look for NYTimes player
1196 r'<iframe[^>]+src=(["\'])(?P
<url
>(?
:https?
:)?
//graphics8\
.nytimes\
.com
/bcvideo
/[^
/]+/iframe
/embed\
.html
.+?
)\
1>',
1198 if mobj is not None:
1199 return self.url_result(mobj.group('url
'))
1201 # Look for Libsyn player
1203 r'<iframe
[^
>]+src
=(["\'])(?P<url>(?:https?:)?//html5-player\.libsyn\.com/embed/.+?)\1', webpage)
1204 if mobj is not None:
1205 return self.url_result(mobj.group('url'))
1207 # Look for Ooyala videos
1208 mobj = (re.search(r'player\.ooyala\.com/[^"?
]+\?[^
"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
1209 re.search(r'OO\
.Player\
.create\
([\'"].*?[\'"],\s
*[\'"](?P<ec>.{32})[\'"]', webpage) or
1210 re.search(r'SBN\
.VideoLinkset\
.ooyala\
([\'"](?P<ec>.{32})[\'"]\
)', webpage) or
1211 re.search(r'data
-ooyala
-video
-id\s
*=\s
*[\'"](?P<ec>.{32})[\'"]', webpage))
1212 if mobj is not None:
1213 return OoyalaIE._build_url_result(mobj.group('ec
'))
1215 # Look for multiple Ooyala embeds on SBN network websites
1216 mobj = re.search(r'SBN\
.VideoLinkset\
.entryGroup\
((\
[.*?\
])', webpage)
1217 if mobj is not None:
1218 embeds = self._parse_json(mobj.group(1), video_id, fatal=False)
1220 return _playlist_from_matches(
1221 embeds, getter=lambda v: OoyalaIE._url_for_embed_code(v['provider_video_id
']), ie='Ooyala
')
1223 # Look for Aparat videos
1224 mobj = re.search(r'<iframe
.*?src
="(http://www\.aparat\.com/video/[^"]+)"', webpage)
1225 if mobj is not None:
1226 return self.url_result(mobj.group(1), 'Aparat')
1228 # Look for MPORA videos
1229 mobj = re.search(r'<iframe .*?src="(http
://mpora\
.(?
:com|de
)/videos
/[^
"]+)"', webpage)
1230 if mobj is not None:
1231 return self.url_result(mobj.group(1), 'Mpora
')
1233 # Look for embedded NovaMov-based player
1235 r'''(?x)<(?:pagespeed_)?iframe[^>]+?src=(["\'])
1236 (?P<url>http://(?:(?:embed|www)\.)?
1238 nowvideo\.(?:ch|sx|eu|at|ag|co)|
1239 videoweed\.(?:es|com)|
1240 movshare\.(?:net|sx|ag)|
1241 divxstage\.(?:eu|net|ch|co|at|ag))
1242 /embed\.php.+?)\1''', webpage)
1243 if mobj is not None:
1244 return self.url_result(mobj.group('url
'))
1246 # Look for embedded Facebook player
1248 r'<iframe
[^
>]+?src
=(["\'])(?P<url>https://www\.facebook\.com/video/embed.+?)\1', webpage)
1249 if mobj is not None:
1250 return self.url_result(mobj.group('url'), 'Facebook')
1252 # Look for embedded VK player
1253 mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P
<url
>https?
://vk\
.com
/video_ext\
.php
.+?
)\
1', webpage)
1254 if mobj is not None:
1255 return self.url_result(mobj.group('url
'), 'VK
')
1257 # Look for embedded ivi player
1258 mobj = re.search(r'<embed
[^
>]+?src
=(["\'])(?P<url>https?://(?:www\.)?ivi\.ru/video/player.+?)\1', webpage)
1259 if mobj is not None:
1260 return self.url_result(mobj.group('url'), 'Ivi')
1262 # Look for embedded Huffington Post player
1264 r'<iframe[^>]+?src=(["\'])(?P
<url
>https?
://embed\
.live\
.huffingtonpost\
.com
/.+?
)\
1', webpage)
1265 if mobj is not None:
1266 return self.url_result(mobj.group('url
'), 'HuffPost
')
1269 mobj = re.search(r'class=["\']embedly-card["\'][^
>]href
=["\'](?P<url>[^"\']+)', webpage)
1270 if mobj is not None:
1271 return self.url_result(mobj.group('url
'))
1272 mobj = re.search(r'class=["\']embedly-embed["\'][^
>]src
=["\'][^"\']*url
=(?P
<url
>[^
&]+)', webpage)
1273 if mobj is not None:
1274 return self.url_result(compat_urllib_parse.unquote(mobj.group('url
')))
1276 # Look for funnyordie embed
1277 matches = re.findall(r'<iframe
[^
>]+?src
="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
1279 return _playlist_from_matches(
1280 matches, getter=unescapeHTML, ie='FunnyOrDie')
1282 # Look for BBC iPlayer embed
1283 matches = re.findall(r'setPlaylist\("(https?
://www\
.bbc\
.co\
.uk
/iplayer
/[^
/]+/[\da
-z
]{8}
)"\)', webpage)
1285 return _playlist_from_matches(matches, ie='BBCCoUk')
1287 # Look for embedded RUTV player
1288 rutv_url = RUTVIE._extract_url(webpage)
1290 return self.url_result(rutv_url, 'RUTV')
1292 # Look for embedded SportBox player
1293 sportbox_urls = SportBoxEmbedIE._extract_urls(webpage)
1295 return _playlist_from_matches(sportbox_urls, ie='SportBoxEmbed')
1297 # Look for embedded TED player
1299 r'<iframe[^>]+?src=(["\'])(?P
<url
>https?
://embed(?
:-ssl
)?\
.ted\
.com
/.+?
)\
1', webpage)
1300 if mobj is not None:
1301 return self.url_result(mobj.group('url
'), 'TED
')
1303 # Look for embedded Ustream videos
1305 r'<iframe
[^
>]+?src
=(["\'])(?P<url>http://www\.ustream\.tv/embed/.+?)\1', webpage)
1306 if mobj is not None:
1307 return self.url_result(mobj.group('url'), 'Ustream')
1309 # Look for embedded arte.tv player
1311 r'<script [^>]*?src="(?P
<url
>http
://www\
.arte\
.tv
/playerv2
/embed
[^
"]+)"',
1313 if mobj is not None:
1314 return self.url_result(mobj.group('url
'), 'ArteTVEmbed
')
1316 # Look for embedded smotri.com player
1317 smotri_url = SmotriIE._extract_url(webpage)
1319 return self.url_result(smotri_url, 'Smotri
')
1321 # Look for embeded soundcloud player
1323 r'<iframe\s
+(?
:[a
-zA
-Z0
-9_-]+="[^"]+"\s+)*src="(?P
<url
>https?
://(?
:w\
.)?soundcloud\
.com
/player
[^
"]+)"',
1325 if mobj is not None:
1326 url = unescapeHTML(mobj.group('url
'))
1327 return self.url_result(url)
1329 # Look for embedded vulture.com player
1331 r'<iframe src
="(?P<url>https?://video\.vulture\.com/[^"]+)"',
1333 if mobj is not None:
1334 url = unescapeHTML(mobj.group('url'))
1335 return self.url_result(url, ie='Vulture')
1337 # Look for embedded mtvservices player
1339 r'<iframe src="(?P
<url
>https?
://media\
.mtvnservices\
.com
/embed
/[^
"]+)"',
1341 if mobj is not None:
1342 url = unescapeHTML(mobj.group('url
'))
1343 return self.url_result(url, ie='MTVServicesEmbedded
')
1345 # Look for embedded yahoo player
1347 r'<iframe
[^
>]+?src
=(["\'])(?P<url>https?://(?:screen|movies)\.yahoo\.com/.+?\.html\?format=embed)\1',
1349 if mobj is not None:
1350 return self.url_result(mobj.group('url'), 'Yahoo')
1352 # Look for embedded sbs.com.au player
1356 <meta\s+property="og
:video
"\s+content=|
1359 (["\'])(?P
<url
>https?
://(?
:www\
.)?sbs\
.com\
.au
/ondemand
/video
/.+?
)\
1''',
1361 if mobj is not None:
1362 return self.url_result(mobj.group('url'), 'SBS')
1364 # Look for embedded Cinchcast player
1366 r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.cinchcast\.com/.+?)\1',
1368 if mobj is not None:
1369 return self.url_result(mobj.group('url'), 'Cinchcast')
1372 r'<iframe[^>]+?src=(["\'])(?P<url>https?://m(?:lb)?\.mlb\.com/shared/video/embed/embed\.html\?.+?)\1',
1376 r'data-video-link=["\'](?P<url>http://m.mlb.com/video/[^"\']+)',
1378 if mobj is not None:
1379 return self.url_result(mobj.group('url'), 'MLB')
1382 r'<iframe[^>]+?src=(["\'])(?P<url>%s)\1' % CondeNastIE.EMBED_URL,
1384 if mobj is not None:
1385 return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast')
1388 r'<iframe[^>]+src="(?P<url>https?://new\.livestream\.com/[^"]+/player[^"]+)"',
1390 if mobj is not None:
1391 return self.url_result(mobj.group('url'), 'Livestream')
1393 # Look for Zapiks embed
1395 r'<iframe[^>]+src="(?P<url>https?://(?:www\.)?zapiks\.fr/index\.php\?.+?)"', webpage)
1396 if mobj is not None:
1397 return self.url_result(mobj.group('url'), 'Zapiks')
1399 # Look for Kaltura embeds
1401 r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?'wid'\s*:\s*'_?(?P<partner_id>[^']+)',.*?'entry_id'\s*:\s*'(?P<id>[^']+)',", webpage)
1402 if mobj is not None:
1403 return self.url_result('kaltura:%(partner_id)s:%(id)s' % mobj.groupdict(), 'Kaltura')
1405 # Look for Eagle.Platform embeds
1407 r'<iframe[^>]+src="(?P<url>https?://.+?\.media\.eagleplatform\.com/index/player\?.+?)"', webpage)
1408 if mobj is not None:
1409 return self.url_result(mobj.group('url'), 'EaglePlatform')
1411 # Look for ClipYou (uses Eagle.Platform) embeds
1413 r'<iframe[^>]+src="https?://(?P<host>media\.clipyou\.ru)/index/player\?.*\brecord_id=(?P<id>\d+).*"', webpage)
1414 if mobj is not None:
1415 return self.url_result('eagleplatform:%(host)s:%(id)s' % mobj.groupdict(), 'EaglePlatform')
1417 # Look for Pladform embeds
1419 r'<iframe[^>]+src="(?P<url>https?://out\.pladform\.ru/player\?.+?)"', webpage)
1420 if mobj is not None:
1421 return self.url_result(mobj.group('url'), 'Pladform')
1423 # Look for Playwire embeds
1425 r'<script[^>]+data-config=(["\'])(?P<url>(?:https?:)?//config\.playwire\.com/.+?)\1', webpage)
1426 if mobj is not None:
1427 return self.url_result(mobj.group('url'))
1429 # Look for 5min embeds
1431 r'<meta[^>]+property="og:video"[^>]+content="https?://embed\.5min\.com/(?P<id>[0-9]+)/?', webpage)
1432 if mobj is not None:
1433 return self.url_result('5min:%s' % mobj.group('id'), 'FiveMin')
1435 # Look for Crooks and Liars embeds
1437 r'<(?:iframe[^>]+src|param[^>]+value)=(["\'])(?P<url>(?:https?:)?//embed\.crooksandliars\.com/(?:embed|v)/.+?)\1', webpage)
1438 if mobj is not None:
1439 return self.url_result(mobj.group('url'))
1441 # Look for NBC Sports VPlayer embeds
1442 nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage)
1444 return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
1446 # Look for UDN embeds
1448 r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._VALID_URL, webpage)
1449 if mobj is not None:
1450 return self.url_result(
1451 compat_urlparse.urljoin(url, mobj.group('url')), 'UDNEmbed')
1453 # Look for Senate ISVP iframe
1454 senate_isvp_url = SenateISVPIE._search_iframe_url(webpage)
1456 return self.url_result(senate_isvp_url, 'SenateISVP')
1458 def check_video(vurl):
1459 if YoutubeIE.suitable(vurl):
1461 vpath = compat_urlparse.urlparse(vurl).path
1462 vext = determine_ext(vpath)
1463 return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml')
1465 def filter_video(urls):
1466 return list(filter(check_video, urls))
1468 # Start with something easy: JW Player in SWFObject
1469 found = filter_video(re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage))
1471 # Look for gorilla-vid style embedding
1472 found = filter_video(re.findall(r'''(?sx
)
1476 jwplayer\s
*\
(\s
*["'][^'"]+["']\s*\)\s*\.setup
1479 ['"]?
file['"]?\s*:\s*["\'](.*?)["\']''', webpage))
1481 # Broaden the search a little bit
1482 found = filter_video(re.findall(r'[^A
-Za
-z0
-9]?
(?
:file|source
)=(http
[^
\'"&]*)', webpage))
1484 # Broaden the findall a little bit: JWPlayer JS loader
1485 found = filter_video(re.findall(
1486 r'[^A-Za-z0-9]?file["\']?
:\s
*["\'](http(?![^\'"]+\
.[0-9]+[\'"])[^\'"]+)["\']', webpage))
1489 found = filter_video(re.findall(r'''(?xs)
1490 flowplayer\("[^
"]+",\s
*
1492 \s
*\
{[^
}]+?
["']?clip["']?\s*:\s*\{\s*
1493 ["']?url
["']?\s*:\s*["']([^"']+)["']
1498 r"cinerama\
.embedPlayer\
(\s
*\'[^
']+\',\s*'([^
']+)'", webpage)
1500 # Try to find twitter cards info
1501 found = filter_video(re.findall(
1502 r'<meta (?:property|name)="twitter
:player
:stream
" (?:content|value)="(.+?
)"', webpage))
1504 # We look for Open Graph info:
1505 # We have to match any number spaces between elements, some sites try to align them (eg.: statigr.am)
1506 m_video_type = re.findall(r'<meta.*?property="og
:video
:type".*?content="video
/(.*?
)"', webpage)
1507 # We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
1508 if m_video_type is not None:
1509 found = filter_video(re.findall(r'<meta.*?property="og
:video
".*?content="(.*?
)"', webpage))
1512 found = re.findall(r'(?s)<video[^<]*(?:>.*?<source[^>]*)?\s+src=["\'](.*?
)["\']', webpage)
1514 REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
1516 r'(?i
)<meta\s
+(?
=(?
:[a
-z
-]+="[^"]+"\s+)*http-equiv="refresh
")'
1517 r'(?:[a-z-]+="[^
"]+"\s
+)*?content
="%s' % REDIRECT_REGEX,
1520 # Look also in Refresh HTTP header
1521 refresh_header = head_response.headers.get('Refresh')
1523 found = re.search(REDIRECT_REGEX, refresh_header)
1525 new_url = compat_urlparse.urljoin(url, found.group(1))
1526 self.report_following_redirect(new_url)
1532 raise UnsupportedError(url)
1535 for video_url in found:
1536 video_url = compat_urlparse.urljoin(url, video_url)
1537 video_id = compat_urllib_parse.unquote(os.path.basename(video_url))
1539 # Sometimes, jwplayer extraction will result in a YouTube URL
1540 if YoutubeIE.suitable(video_url):
1541 entries.append(self.url_result(video_url, 'Youtube'))
1544 # here's a fun little line of code for you:
1545 video_id = os.path.splitext(video_id)[0]
1547 if determine_ext(video_url) == 'smil':
1550 'formats': self._extract_smil_formats(video_url, video_id),
1551 'uploader': video_uploader,
1552 'title': video_title,
1553 'age_limit': age_limit,
1559 'uploader': video_uploader,
1560 'title': video_title,
1561 'age_limit': age_limit,
1564 if len(entries) == 1:
1567 for num, e in enumerate(entries, start=1):
1568 # 'url' results don't have a title
1569 if e.get('title') is not None:
1570 e['title'] = '%s (%d)' % (e['title'], num)
1572 '_type': 'playlist',