make dash parser ignore segmented streams

2018-08-21 17:23:56 +02:00 · 2018-08-21 17:23:56 +02:00 · e662c97433
parent c4e16c7337
commit e662c97433
5 changed files with 128 additions and 20 deletions
--- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java
+++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java
@ -433,7 +433,7 @@ public class YoutubeStreamExtractor extends StreamExtractor {
    }

    @Override
-    public List<VideoStream> getVideoOnlyStreams() throws IOException, ExtractionException {
+    public List<VideoStream> getVideoOnlyStreams() throws ExtractionException {
        assertPageFetched();
        List<VideoStream> videoOnlyStreams = new ArrayList<>();
        try {
--- a/extractor/src/main/java/org/schabi/newpipe/extractor/stream/StreamInfo.java
+++ b/extractor/src/main/java/org/schabi/newpipe/extractor/stream/StreamInfo.java
@ -147,7 +147,10 @@ public class StreamInfo extends Info {
        Exception dashMpdError = null;
        if (streamInfo.getDashMpdUrl() != null && !streamInfo.getDashMpdUrl().isEmpty()) {
            try {
-                DashMpdParser.getStreams(streamInfo);
+                DashMpdParser.ParserResult result = DashMpdParser.getStreams(streamInfo);
+                streamInfo.getVideoOnlyStreams().addAll(result.getVideoOnlyStreams());
+                streamInfo.getAudioStreams().addAll(result.getAudioStreams());
+                streamInfo.getVideoStreams().addAll(result.getVideoStreams());
            } catch (Exception e) {
                // Sometimes we receive 403 (forbidden) error when trying to download the manifest (similar to what happens with youtube-dl),
                // just skip the exception (but store it somewhere), as we later check if we have streams anyway.
--- a/extractor/src/main/java/org/schabi/newpipe/extractor/utils/DashMpdParser.java
+++ b/extractor/src/main/java/org/schabi/newpipe/extractor/utils/DashMpdParser.java
@ -12,6 +12,7 @@ import org.schabi.newpipe.extractor.stream.StreamInfo;
 import org.schabi.newpipe.extractor.stream.VideoStream;
 import org.w3c.dom.Document;
 import org.w3c.dom.Element;
+import org.w3c.dom.Node;
 import org.w3c.dom.NodeList;

 import javax.xml.parsers.DocumentBuilder;
@ -19,6 +20,8 @@ import javax.xml.parsers.DocumentBuilderFactory;
 import java.io.ByteArrayInputStream;
 import java.io.IOException;
 import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.List;

 /*
 * Created by Christian Schabesberger on 02.02.16.
@ -51,6 +54,30 @@ public class DashMpdParser {
        }
    }

+    public static class ParserResult {
+        private final List<VideoStream> videoStreams;
+        private final List<AudioStream> audioStreams;
+        private final List<VideoStream> videoOnlyStreams;
+
+        public ParserResult(List<VideoStream> videoStreams, List<AudioStream> audioStreams, List<VideoStream> videoOnlyStreams) {
+            this.videoStreams = videoStreams;
+            this.audioStreams = audioStreams;
+            this.videoOnlyStreams = videoOnlyStreams;
+        }
+
+        public List<VideoStream> getVideoStreams() {
+            return videoStreams;
+        }
+
+        public List<AudioStream> getAudioStreams() {
+            return audioStreams;
+        }
+
+        public List<VideoStream> getVideoOnlyStreams() {
+            return videoOnlyStreams;
+        }
+    }
+
    /**
     * Will try to download (using {@link StreamInfo#dashMpdUrl}) and parse the dash manifest,
     * then it will search for any stream that the ItagItem has (by the id).
@ -58,9 +85,12 @@ public class DashMpdParser {
     * It has video, video only and audio streams and will only add to the list if it don't
     * find a similar stream in the respective lists (calling {@link Stream#equalStats}).
     *
+     * Info about dash MPD can be found here
+     * @see <a href="https://www.brendanlong.com/the-structure-of-an-mpeg-dash-mpd.html">www.brendanlog.com</a>
+     *
     * @param streamInfo where the parsed streams will be added
     */
-    public static void getStreams(StreamInfo streamInfo) throws DashMpdParsingException, ReCaptchaException {
+    public static ParserResult getStreams(final StreamInfo streamInfo) throws DashMpdParsingException, ReCaptchaException {
        String dashDoc;
        Downloader downloader = NewPipe.getDownloader();
        try {
@ -72,45 +102,58 @@ public class DashMpdParser {
        }

        try {
-            DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
-            DocumentBuilder builder = factory.newDocumentBuilder();
-            InputStream stream = new ByteArrayInputStream(dashDoc.getBytes());
+            final DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
+            final DocumentBuilder builder = factory.newDocumentBuilder();
+            final InputStream stream = new ByteArrayInputStream(dashDoc.getBytes());

-            Document doc = builder.parse(stream);
-            NodeList representationList = doc.getElementsByTagName("Representation");
+            final Document doc = builder.parse(stream);
+            final NodeList representationList = doc.getElementsByTagName("Representation");
+
+            final List<VideoStream> videoStreams = new ArrayList<>();
+            final List<AudioStream> audioStreams = new ArrayList<>();
+            final List<VideoStream> videoOnlyStreams = new ArrayList<>();

            for (int i = 0; i < representationList.getLength(); i++) {
-                Element representation = ((Element) representationList.item(i));
+                final Element representation = (Element) representationList.item(i);
                try {
-                    String mimeType = ((Element) representation.getParentNode()).getAttribute("mimeType");
-                    String id = representation.getAttribute("id");
-                    String url = representation.getElementsByTagName("BaseURL").item(0).getTextContent();
-                    ItagItem itag = ItagItem.getItag(Integer.parseInt(id));
-                    if (itag != null) {
-                        MediaFormat mediaFormat = MediaFormat.getFromMimeType(mimeType);
+                    final String mimeType = ((Element) representation.getParentNode()).getAttribute("mimeType");
+                    final String id = representation.getAttribute("id");
+                    final String url = representation.getElementsByTagName("BaseURL").item(0).getTextContent();
+                    final ItagItem itag = ItagItem.getItag(Integer.parseInt(id));
+                    final Node segmentationList = representation.getElementsByTagName("SegmentList").item(0);
+
+                    // if SegmentList is not null this means that BaseUrl is not representing the url to the stream.
+                    // instead we need to add the "media=" value from the <SegementURL/> tags inside the <SegmentList/>
+                    // tag in order to get a full working url. However each of these is just pointing to a part of the
+                    // video, so we can not return a URL with a working stream here.
+                    // We decided not to ignore such streams for the moment.
+                    if (itag != null && segmentationList == null) {
+                        final MediaFormat mediaFormat = MediaFormat.getFromMimeType(mimeType);

                        if (itag.itagType.equals(ItagItem.ItagType.AUDIO)) {
-                            AudioStream audioStream = new AudioStream(url, mediaFormat, itag.avgBitrate);
+                            final AudioStream audioStream = new AudioStream(url, mediaFormat, itag.avgBitrate);

                            if (!Stream.containSimilarStream(audioStream, streamInfo.getAudioStreams())) {
-                                streamInfo.getAudioStreams().add(audioStream);
+                                audioStreams.add(audioStream);
                            }
                        } else {
                            boolean isVideoOnly = itag.itagType.equals(ItagItem.ItagType.VIDEO_ONLY);
-                            VideoStream videoStream = new VideoStream(url, mediaFormat, itag.resolutionString, isVideoOnly);
+                            final VideoStream videoStream = new VideoStream(url, mediaFormat, itag.resolutionString, isVideoOnly);

                            if (isVideoOnly) {
                                if (!Stream.containSimilarStream(videoStream, streamInfo.getVideoOnlyStreams())) {
                                    streamInfo.getVideoOnlyStreams().add(videoStream);
+                                    videoOnlyStreams.add(videoStream);
                                }
                            } else if (!Stream.containSimilarStream(videoStream, streamInfo.getVideoStreams())) {
-                                streamInfo.getVideoStreams().add(videoStream);
+                                videoStreams.add(videoStream);
                            }
                        }
                    }
                } catch (Exception ignored) {
                }
            }
+            return new ParserResult(videoStreams, audioStreams, videoOnlyStreams);
        } catch (Exception e) {
            throw new DashMpdParsingException("Could not parse Dash mpd", e);
        }
--- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeStreamExtractorDASHText.java
+++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeStreamExtractorDASHText.java
@ -0,0 +1,60 @@
+package org.schabi.newpipe.extractor.services.youtube;
+
+
+/*
+ * Created by Christian Schabesberger on 30.12.15.
+ *
+ * Copyright (C) Christian Schabesberger 2015 <chris.schabesberger@mailbox.org>
+ * YoutubeVideoExtractorDefault.java is part of NewPipe.
+ *
+ * NewPipe is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * NewPipe is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with NewPipe.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.schabi.newpipe.Downloader;
+import org.schabi.newpipe.extractor.NewPipe;
+import org.schabi.newpipe.extractor.stream.StreamExtractor;
+import org.schabi.newpipe.extractor.stream.StreamInfo;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+import static org.schabi.newpipe.extractor.ServiceList.YouTube;
+
+/**
+ * Test for {@link StreamExtractor}
+ */
+public class YoutubeStreamExtractorDASHText {
+    private static StreamInfo info;
+
+    @BeforeClass
+    public static void setUp() throws Exception {
+        NewPipe.init(Downloader.getInstance());
+        info = StreamInfo.getInfo(YouTube, "https://www.youtube.com/watch?v=00Q4SUnVQK4");
+    }
+
+    @Test
+    public void testGetDashMpd() {
+        System.out.println(info.getDashMpdUrl());
+        assertTrue(info.getDashMpdUrl(),
+                info.getDashMpdUrl() != null && !info.getDashMpdUrl().isEmpty());
+    }
+
+    @Test
+    public void testDashMpdParser() {
+        assertEquals(0, info.getAudioStreams().size());
+        assertEquals(0, info.getVideoOnlyStreams().size());
+        assertEquals(4, info.getVideoStreams().size());
+    }
+}
--- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeStreamExtractorDefaultTest.java
+++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeStreamExtractorDefaultTest.java
@ -8,6 +8,7 @@ import org.schabi.newpipe.extractor.exceptions.ExtractionException;
 import org.schabi.newpipe.extractor.exceptions.ParsingException;
 import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeStreamExtractor;
 import org.schabi.newpipe.extractor.stream.*;
+import org.schabi.newpipe.extractor.utils.DashMpdParser;
 import org.schabi.newpipe.extractor.utils.Utils;

 import java.io.IOException;
@ -140,8 +141,9 @@ public class YoutubeStreamExtractorDefaultTest {

    @Test
    public void testGetDashMpd() throws ParsingException {
+        // we dont expect this particular video to have a DASH file. For this purpouse we use a different test class.
        assertTrue(extractor.getDashMpdUrl(),
-                extractor.getDashMpdUrl() != null || !extractor.getDashMpdUrl().isEmpty());
+                extractor.getDashMpdUrl() != null && extractor.getDashMpdUrl().isEmpty());
    }

    @Test