Merge tag 'upstream/2015.02.28'

[youtubedl] / youtube_dl / extractor / orf.py
diff --git a/youtube_dl/extractor/orf.py b/youtube_dl/extractor/orf.py

index 5f5694393765104b45b573c53155d447a45b1e50..4e293392b3d39b46ad1612d884068a2dbfaeef23 100644 (file)
--- a/youtube_dl/extractor/orf.py
+++ b/youtube_dl/extractor/orf.py
@@ -3,39 +3,69 @@ from __future__ import unicode_literals
  
  import json
  import re
+import calendar
+import datetime
  
  from .common import InfoExtractor
  from ..utils import (
      HEADRequest,
      unified_strdate,
+    ExtractorError,
  )
  
  
-class ORFIE(InfoExtractor):
-    _VALID_URL = r'https?://tvthek\.orf\.at/(?:programs/.+?/episodes|topics/.+?|program/[^/]+)/(?P<id>\d+)'
+class ORFTVthekIE(InfoExtractor):
+    IE_NAME = 'orf:tvthek'
+    IE_DESC = 'ORF TVthek'
+    _VALID_URL = r'https?://tvthek\.orf\.at/(?:programs/.+?/episodes|topics?/.+?|program/[^/]+)/(?P<id>\d+)'
  
-    _TEST = {
-        'url': 'http://tvthek.orf.at/program/matinee-Was-Sie-schon-immer-ueber-Klassik-wissen-wollten/7317210/Was-Sie-schon-immer-ueber-Klassik-wissen-wollten/7319746/Was-Sie-schon-immer-ueber-Klassik-wissen-wollten/7319747',
-        'file': '7319747.mp4',
-        'md5': 'bd803c5d8c32d3c64a0ea4b4eeddf375',
-        'info_dict': {
-            'title': 'Was Sie schon immer über Klassik wissen wollten',
-            'description': 'md5:0ddf0d5f0060bd53f744edaa5c2e04a4',
-            'duration': 3508,
-            'upload_date': '20140105',
-        },
-        'skip': 'Blocked outside of Austria',
-    }
+    _TESTS = [{
+        'url': 'http://tvthek.orf.at/program/Aufgetischt/2745173/Aufgetischt-Mit-der-Steirischen-Tafelrunde/8891389',
+        'playlist': [{
+            'md5': '2942210346ed779588f428a92db88712',
+            'info_dict': {
+                'id': '8896777',
+                'ext': 'mp4',
+                'title': 'Aufgetischt: Mit der Steirischen Tafelrunde',
+                'description': 'md5:c1272f0245537812d4e36419c207b67d',
+                'duration': 2668,
+                'upload_date': '20141208',
+            },
+        }],
+        'skip': 'Blocked outside of Austria / Germany',
+    }, {
+        'url': 'http://tvthek.orf.at/topic/Im-Wandel-der-Zeit/8002126/Best-of-Ingrid-Thurnher/7982256',
+        'playlist': [{
+            'md5': '68f543909aea49d621dfc7703a11cfaf',
+            'info_dict': {
+                'id': '7982259',
+                'ext': 'mp4',
+                'title': 'Best of Ingrid Thurnher',
+                'upload_date': '20140527',
+                'description': 'Viele Jahre war Ingrid Thurnher das "Gesicht" der ZIB 2. Vor ihrem Wechsel zur ZIB 2 im jahr 1995 moderierte sie unter anderem "Land und Leute", "Österreich-Bild" und "Niederösterreich heute".',
+            }
+        }],
+        '_skip': 'Blocked outside of Austria / Germany',
+    }]
  
      def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        playlist_id = mobj.group('id')
+        playlist_id = self._match_id(url)
          webpage = self._download_webpage(url, playlist_id)
  
          data_json = self._search_regex(
              r'initializeAdworx\((.+?)\);\n', webpage, 'video info')
          all_data = json.loads(data_json)
-        sdata = all_data[0]['values']['segments']
+
+        def get_segments(all_data):
+            for data in all_data:
+                if data['name'] in (
+                        'Tracker::EPISODE_DETAIL_PAGE_OVER_PROGRAM',
+                        'Tracker::EPISODE_DETAIL_PAGE_OVER_TOPIC'):
+                    return data['values']['segments']
+
+        sdata = get_segments(all_data)
+        if not sdata:
+            raise ExtractorError('Unable to extract segments')
  
          def quality_to_int(s):
              m = re.search('([0-9]+)', s)
@@ -96,3 +126,74 @@ class ORFIE(InfoExtractor):
              'entries': entries,
              'id': playlist_id,
          }
+
+
+class ORFOE1IE(InfoExtractor):
+    IE_NAME = 'orf:oe1'
+    IE_DESC = 'Radio Österreich 1'
+    _VALID_URL = r'http://oe1\.orf\.at/(?:programm/|konsole.*?#\?track_id=)(?P<id>[0-9]+)'
+
+    # Audios on ORF radio are only available for 7 days, so we can't add tests.
+    _TEST = {
+        'url': 'http://oe1.orf.at/konsole?show=on_demand#?track_id=394211',
+        'only_matching': True,
+    }
+
+    def _real_extract(self, url):
+        show_id = self._match_id(url)
+        data = self._download_json(
+            'http://oe1.orf.at/programm/%s/konsole' % show_id,
+            show_id
+        )
+
+        timestamp = datetime.datetime.strptime('%s %s' % (
+            data['item']['day_label'],
+            data['item']['time']
+        ), '%d.%m.%Y %H:%M')
+        unix_timestamp = calendar.timegm(timestamp.utctimetuple())
+
+        return {
+            'id': show_id,
+            'title': data['item']['title'],
+            'url': data['item']['url_stream'],
+            'ext': 'mp3',
+            'description': data['item'].get('info'),
+            'timestamp': unix_timestamp
+        }
+
+
+class ORFFM4IE(InfoExtractor):
+    IE_NAME = 'orf:fm4'
+    IE_DESC = 'radio FM4'
+    _VALID_URL = r'http://fm4\.orf\.at/7tage/?#(?P<date>[0-9]+)/(?P<show>\w+)'
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        show_date = mobj.group('date')
+        show_id = mobj.group('show')
+
+        data = self._download_json(
+            'http://audioapi.orf.at/fm4/json/2.0/broadcasts/%s/4%s' % (show_date, show_id),
+            show_id
+        )
+
+        def extract_entry_dict(info, title, subtitle):
+            return {
+                'id': info['loopStreamId'].replace('.mp3', ''),
+                'url': 'http://loopstream01.apa.at/?channel=fm4&id=%s' % info['loopStreamId'],
+                'title': title,
+                'description': subtitle,
+                'duration': (info['end'] - info['start']) / 1000,
+                'timestamp': info['start'] / 1000,
+                'ext': 'mp3'
+            }
+
+        entries = [extract_entry_dict(t, data['title'], data['subtitle']) for t in data['streams']]
+
+        return {
+            '_type': 'playlist',
+            'id': show_id,
+            'title': data['title'],
+            'description': data['subtitle'],
+            'entries': entries
+        }