Update changelog.

[youtubedl] / youtube_dl / extractor / ard.py
diff --git a/youtube_dl/extractor/ard.py b/youtube_dl/extractor/ard.py

index 07e67dd3393962eedccd2460954711d01f08efdb..915f8862e3769c3f186209435dcc34029dead932 100644 (file)
--- a/youtube_dl/extractor/ard.py
+++ b/youtube_dl/extractor/ard.py
@@ -93,6 +93,7 @@ class ARDMediathekIE(InfoExtractor):
  
          duration = int_or_none(media_info.get('_duration'))
          thumbnail = media_info.get('_previewImage')
  
          duration = int_or_none(media_info.get('_duration'))
          thumbnail = media_info.get('_previewImage')
+        is_live = media_info.get('_isLive') is True
  
          subtitles = {}
          subtitle_url = media_info.get('_subtitleUrl')
  
          subtitles = {}
          subtitle_url = media_info.get('_subtitleUrl')
@@ -106,6 +107,7 @@ class ARDMediathekIE(InfoExtractor):
              'id': video_id,
              'duration': duration,
              'thumbnail': thumbnail,
              'id': video_id,
              'duration': duration,
              'thumbnail': thumbnail,
+            'is_live': is_live,
              'formats': formats,
              'subtitles': subtitles,
          }
              'formats': formats,
              'subtitles': subtitles,
          }
@@ -166,19 +168,25 @@ class ARDMediathekIE(InfoExtractor):
          # determine video id from url
          m = re.match(self._VALID_URL, url)
  
          # determine video id from url
          m = re.match(self._VALID_URL, url)
  
+        document_id = None
+
          numid = re.search(r'documentId=([0-9]+)', url)
          if numid:
          numid = re.search(r'documentId=([0-9]+)', url)
          if numid:
-            video_id = numid.group(1)
+            document_id = video_id = numid.group(1)
          else:
              video_id = m.group('video_id')
  
          webpage = self._download_webpage(url, video_id)
  
          else:
              video_id = m.group('video_id')
  
          webpage = self._download_webpage(url, video_id)
  
-        if '>Der gewünschte Beitrag ist nicht mehr verfügbar.<' in webpage:
-            raise ExtractorError('Video %s is no longer available' % video_id, expected=True)
+        ERRORS = (
+            ('>Leider liegt eine Störung vor.', 'Video %s is unavailable'),
+            ('>Der gewünschte Beitrag ist nicht mehr verfügbar.<',
+             'Video %s is no longer available'),
+        )
  
  
-        if 'Diese Sendung ist für Jugendliche unter 12 Jahren nicht geeignet. Der Clip ist deshalb nur von 20 bis 6 Uhr verfügbar.' in webpage:
-            raise ExtractorError('This program is only suitable for those aged 12 and older. Video %s is therefore only available between 20 pm and 6 am.' % video_id, expected=True)
+        for pattern, message in ERRORS:
+            if pattern in webpage:
+                raise ExtractorError(message % video_id, expected=True)
  
          if re.search(r'[\?&]rss($|[=&])', url):
              doc = compat_etree_fromstring(webpage.encode('utf-8'))
  
          if re.search(r'[\?&]rss($|[=&])', url):
              doc = compat_etree_fromstring(webpage.encode('utf-8'))
@@ -187,7 +195,7 @@ class ARDMediathekIE(InfoExtractor):
  
          title = self._html_search_regex(
              [r'<h1(?:\s+class="boxTopHeadline")?>(.*?)</h1>',
  
          title = self._html_search_regex(
              [r'<h1(?:\s+class="boxTopHeadline")?>(.*?)</h1>',
-             r'<meta name="dcterms.title" content="(.*?)"/>',
+             r'<meta name="dcterms\.title" content="(.*?)"/>',
               r'<h4 class="headline">(.*?)</h4>'],
              webpage, 'title')
          description = self._html_search_meta(
               r'<h4 class="headline">(.*?)</h4>'],
              webpage, 'title')
          description = self._html_search_meta(
@@ -224,12 +232,16 @@ class ARDMediathekIE(InfoExtractor):
                  'formats': formats,
              }
          else:  # request JSON file
                  'formats': formats,
              }
          else:  # request JSON file
+            if not document_id:
+                video_id = self._search_regex(
+                    r'/play/(?:config|media)/(\d+)', webpage, 'media id')
              info = self._extract_media_info(
              info = self._extract_media_info(
-                'http://www.ardmediathek.de/play/media/%s' % video_id, webpage, video_id)
+                'http://www.ardmediathek.de/play/media/%s' % video_id,
+                webpage, video_id)
  
          info.update({
              'id': video_id,
  
          info.update({
              'id': video_id,
-            'title': title,
+            'title': self._live_title(title) if info.get('is_live') else title,
              'description': description,
              'thumbnail': thumbnail,
          })
              'description': description,
              'thumbnail': thumbnail,
          })
@@ -238,7 +250,7 @@ class ARDMediathekIE(InfoExtractor):
  
  
  class ARDIE(InfoExtractor):
  
  
  class ARDIE(InfoExtractor):
-    _VALID_URL = '(?P<mainurl>https?://(www\.)?daserste\.de/[^?#]+/videos/(?P<display_id>[^/?#]+)-(?P<id>[0-9]+))\.html'
+    _VALID_URL = r'(?P<mainurl>https?://(www\.)?daserste\.de/[^?#]+/videos/(?P<display_id>[^/?#]+)-(?P<id>[0-9]+))\.html'
      _TEST = {
          'url': 'http://www.daserste.de/information/reportage-dokumentation/dokus/videos/die-story-im-ersten-mission-unter-falscher-flagge-100.html',
          'md5': 'd216c3a86493f9322545e045ddc3eb35',
      _TEST = {
          'url': 'http://www.daserste.de/information/reportage-dokumentation/dokus/videos/die-story-im-ersten-mission-unter-falscher-flagge-100.html',
          'md5': 'd216c3a86493f9322545e045ddc3eb35',
@@ -249,7 +261,7 @@ class ARDIE(InfoExtractor):
              'duration': 2600,
              'title': 'Die Story im Ersten: Mission unter falscher Flagge',
              'upload_date': '20140804',
              'duration': 2600,
              'title': 'Die Story im Ersten: Mission unter falscher Flagge',
              'upload_date': '20140804',
-            'thumbnail': 're:^https?://.*\.jpg$',
+            'thumbnail': r're:^https?://.*\.jpg$',
          },
          'skip': 'HTTP Error 404: Not Found',
      }
          },
          'skip': 'HTTP Error 404: Not Found',
      }