make dash parser ignore segmented streams

This commit is contained in:
Christian Schabesberger 2018-08-21 17:23:56 +02:00
parent c4e16c7337
commit e662c97433
5 changed files with 128 additions and 20 deletions

View File

@ -433,7 +433,7 @@ public class YoutubeStreamExtractor extends StreamExtractor {
} }
@Override @Override
public List<VideoStream> getVideoOnlyStreams() throws IOException, ExtractionException { public List<VideoStream> getVideoOnlyStreams() throws ExtractionException {
assertPageFetched(); assertPageFetched();
List<VideoStream> videoOnlyStreams = new ArrayList<>(); List<VideoStream> videoOnlyStreams = new ArrayList<>();
try { try {

View File

@ -147,7 +147,10 @@ public class StreamInfo extends Info {
Exception dashMpdError = null; Exception dashMpdError = null;
if (streamInfo.getDashMpdUrl() != null && !streamInfo.getDashMpdUrl().isEmpty()) { if (streamInfo.getDashMpdUrl() != null && !streamInfo.getDashMpdUrl().isEmpty()) {
try { try {
DashMpdParser.getStreams(streamInfo); DashMpdParser.ParserResult result = DashMpdParser.getStreams(streamInfo);
streamInfo.getVideoOnlyStreams().addAll(result.getVideoOnlyStreams());
streamInfo.getAudioStreams().addAll(result.getAudioStreams());
streamInfo.getVideoStreams().addAll(result.getVideoStreams());
} catch (Exception e) { } catch (Exception e) {
// Sometimes we receive 403 (forbidden) error when trying to download the manifest (similar to what happens with youtube-dl), // Sometimes we receive 403 (forbidden) error when trying to download the manifest (similar to what happens with youtube-dl),
// just skip the exception (but store it somewhere), as we later check if we have streams anyway. // just skip the exception (but store it somewhere), as we later check if we have streams anyway.

View File

@ -12,6 +12,7 @@ import org.schabi.newpipe.extractor.stream.StreamInfo;
import org.schabi.newpipe.extractor.stream.VideoStream; import org.schabi.newpipe.extractor.stream.VideoStream;
import org.w3c.dom.Document; import org.w3c.dom.Document;
import org.w3c.dom.Element; import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList; import org.w3c.dom.NodeList;
import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilder;
@ -19,6 +20,8 @@ import javax.xml.parsers.DocumentBuilderFactory;
import java.io.ByteArrayInputStream; import java.io.ByteArrayInputStream;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
/* /*
* Created by Christian Schabesberger on 02.02.16. * Created by Christian Schabesberger on 02.02.16.
@ -51,6 +54,30 @@ public class DashMpdParser {
} }
} }
public static class ParserResult {
private final List<VideoStream> videoStreams;
private final List<AudioStream> audioStreams;
private final List<VideoStream> videoOnlyStreams;
public ParserResult(List<VideoStream> videoStreams, List<AudioStream> audioStreams, List<VideoStream> videoOnlyStreams) {
this.videoStreams = videoStreams;
this.audioStreams = audioStreams;
this.videoOnlyStreams = videoOnlyStreams;
}
public List<VideoStream> getVideoStreams() {
return videoStreams;
}
public List<AudioStream> getAudioStreams() {
return audioStreams;
}
public List<VideoStream> getVideoOnlyStreams() {
return videoOnlyStreams;
}
}
/** /**
* Will try to download (using {@link StreamInfo#dashMpdUrl}) and parse the dash manifest, * Will try to download (using {@link StreamInfo#dashMpdUrl}) and parse the dash manifest,
* then it will search for any stream that the ItagItem has (by the id). * then it will search for any stream that the ItagItem has (by the id).
@ -58,9 +85,12 @@ public class DashMpdParser {
* It has video, video only and audio streams and will only add to the list if it don't * It has video, video only and audio streams and will only add to the list if it don't
* find a similar stream in the respective lists (calling {@link Stream#equalStats}). * find a similar stream in the respective lists (calling {@link Stream#equalStats}).
* *
* Info about dash MPD can be found here
* @see <a href="https://www.brendanlong.com/the-structure-of-an-mpeg-dash-mpd.html">www.brendanlog.com</a>
*
* @param streamInfo where the parsed streams will be added * @param streamInfo where the parsed streams will be added
*/ */
public static void getStreams(StreamInfo streamInfo) throws DashMpdParsingException, ReCaptchaException { public static ParserResult getStreams(final StreamInfo streamInfo) throws DashMpdParsingException, ReCaptchaException {
String dashDoc; String dashDoc;
Downloader downloader = NewPipe.getDownloader(); Downloader downloader = NewPipe.getDownloader();
try { try {
@ -72,45 +102,58 @@ public class DashMpdParser {
} }
try { try {
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); final DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
DocumentBuilder builder = factory.newDocumentBuilder(); final DocumentBuilder builder = factory.newDocumentBuilder();
InputStream stream = new ByteArrayInputStream(dashDoc.getBytes()); final InputStream stream = new ByteArrayInputStream(dashDoc.getBytes());
Document doc = builder.parse(stream); final Document doc = builder.parse(stream);
NodeList representationList = doc.getElementsByTagName("Representation"); final NodeList representationList = doc.getElementsByTagName("Representation");
final List<VideoStream> videoStreams = new ArrayList<>();
final List<AudioStream> audioStreams = new ArrayList<>();
final List<VideoStream> videoOnlyStreams = new ArrayList<>();
for (int i = 0; i < representationList.getLength(); i++) { for (int i = 0; i < representationList.getLength(); i++) {
Element representation = ((Element) representationList.item(i)); final Element representation = (Element) representationList.item(i);
try { try {
String mimeType = ((Element) representation.getParentNode()).getAttribute("mimeType"); final String mimeType = ((Element) representation.getParentNode()).getAttribute("mimeType");
String id = representation.getAttribute("id"); final String id = representation.getAttribute("id");
String url = representation.getElementsByTagName("BaseURL").item(0).getTextContent(); final String url = representation.getElementsByTagName("BaseURL").item(0).getTextContent();
ItagItem itag = ItagItem.getItag(Integer.parseInt(id)); final ItagItem itag = ItagItem.getItag(Integer.parseInt(id));
if (itag != null) { final Node segmentationList = representation.getElementsByTagName("SegmentList").item(0);
MediaFormat mediaFormat = MediaFormat.getFromMimeType(mimeType);
// if SegmentList is not null this means that BaseUrl is not representing the url to the stream.
// instead we need to add the "media=" value from the <SegementURL/> tags inside the <SegmentList/>
// tag in order to get a full working url. However each of these is just pointing to a part of the
// video, so we can not return a URL with a working stream here.
// We decided not to ignore such streams for the moment.
if (itag != null && segmentationList == null) {
final MediaFormat mediaFormat = MediaFormat.getFromMimeType(mimeType);
if (itag.itagType.equals(ItagItem.ItagType.AUDIO)) { if (itag.itagType.equals(ItagItem.ItagType.AUDIO)) {
AudioStream audioStream = new AudioStream(url, mediaFormat, itag.avgBitrate); final AudioStream audioStream = new AudioStream(url, mediaFormat, itag.avgBitrate);
if (!Stream.containSimilarStream(audioStream, streamInfo.getAudioStreams())) { if (!Stream.containSimilarStream(audioStream, streamInfo.getAudioStreams())) {
streamInfo.getAudioStreams().add(audioStream); audioStreams.add(audioStream);
} }
} else { } else {
boolean isVideoOnly = itag.itagType.equals(ItagItem.ItagType.VIDEO_ONLY); boolean isVideoOnly = itag.itagType.equals(ItagItem.ItagType.VIDEO_ONLY);
VideoStream videoStream = new VideoStream(url, mediaFormat, itag.resolutionString, isVideoOnly); final VideoStream videoStream = new VideoStream(url, mediaFormat, itag.resolutionString, isVideoOnly);
if (isVideoOnly) { if (isVideoOnly) {
if (!Stream.containSimilarStream(videoStream, streamInfo.getVideoOnlyStreams())) { if (!Stream.containSimilarStream(videoStream, streamInfo.getVideoOnlyStreams())) {
streamInfo.getVideoOnlyStreams().add(videoStream); streamInfo.getVideoOnlyStreams().add(videoStream);
videoOnlyStreams.add(videoStream);
} }
} else if (!Stream.containSimilarStream(videoStream, streamInfo.getVideoStreams())) { } else if (!Stream.containSimilarStream(videoStream, streamInfo.getVideoStreams())) {
streamInfo.getVideoStreams().add(videoStream); videoStreams.add(videoStream);
} }
} }
} }
} catch (Exception ignored) { } catch (Exception ignored) {
} }
} }
return new ParserResult(videoStreams, audioStreams, videoOnlyStreams);
} catch (Exception e) { } catch (Exception e) {
throw new DashMpdParsingException("Could not parse Dash mpd", e); throw new DashMpdParsingException("Could not parse Dash mpd", e);
} }

View File

@ -0,0 +1,60 @@
package org.schabi.newpipe.extractor.services.youtube;
/*
* Created by Christian Schabesberger on 30.12.15.
*
* Copyright (C) Christian Schabesberger 2015 <chris.schabesberger@mailbox.org>
* YoutubeVideoExtractorDefault.java is part of NewPipe.
*
* NewPipe is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* NewPipe is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with NewPipe. If not, see <http://www.gnu.org/licenses/>.
*/
import org.junit.BeforeClass;
import org.junit.Test;
import org.schabi.newpipe.Downloader;
import org.schabi.newpipe.extractor.NewPipe;
import org.schabi.newpipe.extractor.stream.StreamExtractor;
import org.schabi.newpipe.extractor.stream.StreamInfo;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import static org.schabi.newpipe.extractor.ServiceList.YouTube;
/**
* Test for {@link StreamExtractor}
*/
public class YoutubeStreamExtractorDASHText {
private static StreamInfo info;
@BeforeClass
public static void setUp() throws Exception {
NewPipe.init(Downloader.getInstance());
info = StreamInfo.getInfo(YouTube, "https://www.youtube.com/watch?v=00Q4SUnVQK4");
}
@Test
public void testGetDashMpd() {
System.out.println(info.getDashMpdUrl());
assertTrue(info.getDashMpdUrl(),
info.getDashMpdUrl() != null && !info.getDashMpdUrl().isEmpty());
}
@Test
public void testDashMpdParser() {
assertEquals(0, info.getAudioStreams().size());
assertEquals(0, info.getVideoOnlyStreams().size());
assertEquals(4, info.getVideoStreams().size());
}
}

View File

@ -8,6 +8,7 @@ import org.schabi.newpipe.extractor.exceptions.ExtractionException;
import org.schabi.newpipe.extractor.exceptions.ParsingException; import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeStreamExtractor; import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeStreamExtractor;
import org.schabi.newpipe.extractor.stream.*; import org.schabi.newpipe.extractor.stream.*;
import org.schabi.newpipe.extractor.utils.DashMpdParser;
import org.schabi.newpipe.extractor.utils.Utils; import org.schabi.newpipe.extractor.utils.Utils;
import java.io.IOException; import java.io.IOException;
@ -140,8 +141,9 @@ public class YoutubeStreamExtractorDefaultTest {
@Test @Test
public void testGetDashMpd() throws ParsingException { public void testGetDashMpd() throws ParsingException {
// we dont expect this particular video to have a DASH file. For this purpouse we use a different test class.
assertTrue(extractor.getDashMpdUrl(), assertTrue(extractor.getDashMpdUrl(),
extractor.getDashMpdUrl() != null || !extractor.getDashMpdUrl().isEmpty()); extractor.getDashMpdUrl() != null && extractor.getDashMpdUrl().isEmpty());
} }
@Test @Test