make dash parser ignore segmented streams
This commit is contained in:
parent
c4e16c7337
commit
e662c97433
|
@ -433,7 +433,7 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public List<VideoStream> getVideoOnlyStreams() throws IOException, ExtractionException {
|
public List<VideoStream> getVideoOnlyStreams() throws ExtractionException {
|
||||||
assertPageFetched();
|
assertPageFetched();
|
||||||
List<VideoStream> videoOnlyStreams = new ArrayList<>();
|
List<VideoStream> videoOnlyStreams = new ArrayList<>();
|
||||||
try {
|
try {
|
||||||
|
|
|
@ -147,7 +147,10 @@ public class StreamInfo extends Info {
|
||||||
Exception dashMpdError = null;
|
Exception dashMpdError = null;
|
||||||
if (streamInfo.getDashMpdUrl() != null && !streamInfo.getDashMpdUrl().isEmpty()) {
|
if (streamInfo.getDashMpdUrl() != null && !streamInfo.getDashMpdUrl().isEmpty()) {
|
||||||
try {
|
try {
|
||||||
DashMpdParser.getStreams(streamInfo);
|
DashMpdParser.ParserResult result = DashMpdParser.getStreams(streamInfo);
|
||||||
|
streamInfo.getVideoOnlyStreams().addAll(result.getVideoOnlyStreams());
|
||||||
|
streamInfo.getAudioStreams().addAll(result.getAudioStreams());
|
||||||
|
streamInfo.getVideoStreams().addAll(result.getVideoStreams());
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
// Sometimes we receive 403 (forbidden) error when trying to download the manifest (similar to what happens with youtube-dl),
|
// Sometimes we receive 403 (forbidden) error when trying to download the manifest (similar to what happens with youtube-dl),
|
||||||
// just skip the exception (but store it somewhere), as we later check if we have streams anyway.
|
// just skip the exception (but store it somewhere), as we later check if we have streams anyway.
|
||||||
|
|
|
@ -12,6 +12,7 @@ import org.schabi.newpipe.extractor.stream.StreamInfo;
|
||||||
import org.schabi.newpipe.extractor.stream.VideoStream;
|
import org.schabi.newpipe.extractor.stream.VideoStream;
|
||||||
import org.w3c.dom.Document;
|
import org.w3c.dom.Document;
|
||||||
import org.w3c.dom.Element;
|
import org.w3c.dom.Element;
|
||||||
|
import org.w3c.dom.Node;
|
||||||
import org.w3c.dom.NodeList;
|
import org.w3c.dom.NodeList;
|
||||||
|
|
||||||
import javax.xml.parsers.DocumentBuilder;
|
import javax.xml.parsers.DocumentBuilder;
|
||||||
|
@ -19,6 +20,8 @@ import javax.xml.parsers.DocumentBuilderFactory;
|
||||||
import java.io.ByteArrayInputStream;
|
import java.io.ByteArrayInputStream;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Created by Christian Schabesberger on 02.02.16.
|
* Created by Christian Schabesberger on 02.02.16.
|
||||||
|
@ -51,6 +54,30 @@ public class DashMpdParser {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static class ParserResult {
|
||||||
|
private final List<VideoStream> videoStreams;
|
||||||
|
private final List<AudioStream> audioStreams;
|
||||||
|
private final List<VideoStream> videoOnlyStreams;
|
||||||
|
|
||||||
|
public ParserResult(List<VideoStream> videoStreams, List<AudioStream> audioStreams, List<VideoStream> videoOnlyStreams) {
|
||||||
|
this.videoStreams = videoStreams;
|
||||||
|
this.audioStreams = audioStreams;
|
||||||
|
this.videoOnlyStreams = videoOnlyStreams;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<VideoStream> getVideoStreams() {
|
||||||
|
return videoStreams;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<AudioStream> getAudioStreams() {
|
||||||
|
return audioStreams;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<VideoStream> getVideoOnlyStreams() {
|
||||||
|
return videoOnlyStreams;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Will try to download (using {@link StreamInfo#dashMpdUrl}) and parse the dash manifest,
|
* Will try to download (using {@link StreamInfo#dashMpdUrl}) and parse the dash manifest,
|
||||||
* then it will search for any stream that the ItagItem has (by the id).
|
* then it will search for any stream that the ItagItem has (by the id).
|
||||||
|
@ -58,9 +85,12 @@ public class DashMpdParser {
|
||||||
* It has video, video only and audio streams and will only add to the list if it don't
|
* It has video, video only and audio streams and will only add to the list if it don't
|
||||||
* find a similar stream in the respective lists (calling {@link Stream#equalStats}).
|
* find a similar stream in the respective lists (calling {@link Stream#equalStats}).
|
||||||
*
|
*
|
||||||
|
* Info about dash MPD can be found here
|
||||||
|
* @see <a href="https://www.brendanlong.com/the-structure-of-an-mpeg-dash-mpd.html">www.brendanlog.com</a>
|
||||||
|
*
|
||||||
* @param streamInfo where the parsed streams will be added
|
* @param streamInfo where the parsed streams will be added
|
||||||
*/
|
*/
|
||||||
public static void getStreams(StreamInfo streamInfo) throws DashMpdParsingException, ReCaptchaException {
|
public static ParserResult getStreams(final StreamInfo streamInfo) throws DashMpdParsingException, ReCaptchaException {
|
||||||
String dashDoc;
|
String dashDoc;
|
||||||
Downloader downloader = NewPipe.getDownloader();
|
Downloader downloader = NewPipe.getDownloader();
|
||||||
try {
|
try {
|
||||||
|
@ -72,45 +102,58 @@ public class DashMpdParser {
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
|
final DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
|
||||||
DocumentBuilder builder = factory.newDocumentBuilder();
|
final DocumentBuilder builder = factory.newDocumentBuilder();
|
||||||
InputStream stream = new ByteArrayInputStream(dashDoc.getBytes());
|
final InputStream stream = new ByteArrayInputStream(dashDoc.getBytes());
|
||||||
|
|
||||||
Document doc = builder.parse(stream);
|
final Document doc = builder.parse(stream);
|
||||||
NodeList representationList = doc.getElementsByTagName("Representation");
|
final NodeList representationList = doc.getElementsByTagName("Representation");
|
||||||
|
|
||||||
|
final List<VideoStream> videoStreams = new ArrayList<>();
|
||||||
|
final List<AudioStream> audioStreams = new ArrayList<>();
|
||||||
|
final List<VideoStream> videoOnlyStreams = new ArrayList<>();
|
||||||
|
|
||||||
for (int i = 0; i < representationList.getLength(); i++) {
|
for (int i = 0; i < representationList.getLength(); i++) {
|
||||||
Element representation = ((Element) representationList.item(i));
|
final Element representation = (Element) representationList.item(i);
|
||||||
try {
|
try {
|
||||||
String mimeType = ((Element) representation.getParentNode()).getAttribute("mimeType");
|
final String mimeType = ((Element) representation.getParentNode()).getAttribute("mimeType");
|
||||||
String id = representation.getAttribute("id");
|
final String id = representation.getAttribute("id");
|
||||||
String url = representation.getElementsByTagName("BaseURL").item(0).getTextContent();
|
final String url = representation.getElementsByTagName("BaseURL").item(0).getTextContent();
|
||||||
ItagItem itag = ItagItem.getItag(Integer.parseInt(id));
|
final ItagItem itag = ItagItem.getItag(Integer.parseInt(id));
|
||||||
if (itag != null) {
|
final Node segmentationList = representation.getElementsByTagName("SegmentList").item(0);
|
||||||
MediaFormat mediaFormat = MediaFormat.getFromMimeType(mimeType);
|
|
||||||
|
// if SegmentList is not null this means that BaseUrl is not representing the url to the stream.
|
||||||
|
// instead we need to add the "media=" value from the <SegementURL/> tags inside the <SegmentList/>
|
||||||
|
// tag in order to get a full working url. However each of these is just pointing to a part of the
|
||||||
|
// video, so we can not return a URL with a working stream here.
|
||||||
|
// We decided not to ignore such streams for the moment.
|
||||||
|
if (itag != null && segmentationList == null) {
|
||||||
|
final MediaFormat mediaFormat = MediaFormat.getFromMimeType(mimeType);
|
||||||
|
|
||||||
if (itag.itagType.equals(ItagItem.ItagType.AUDIO)) {
|
if (itag.itagType.equals(ItagItem.ItagType.AUDIO)) {
|
||||||
AudioStream audioStream = new AudioStream(url, mediaFormat, itag.avgBitrate);
|
final AudioStream audioStream = new AudioStream(url, mediaFormat, itag.avgBitrate);
|
||||||
|
|
||||||
if (!Stream.containSimilarStream(audioStream, streamInfo.getAudioStreams())) {
|
if (!Stream.containSimilarStream(audioStream, streamInfo.getAudioStreams())) {
|
||||||
streamInfo.getAudioStreams().add(audioStream);
|
audioStreams.add(audioStream);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
boolean isVideoOnly = itag.itagType.equals(ItagItem.ItagType.VIDEO_ONLY);
|
boolean isVideoOnly = itag.itagType.equals(ItagItem.ItagType.VIDEO_ONLY);
|
||||||
VideoStream videoStream = new VideoStream(url, mediaFormat, itag.resolutionString, isVideoOnly);
|
final VideoStream videoStream = new VideoStream(url, mediaFormat, itag.resolutionString, isVideoOnly);
|
||||||
|
|
||||||
if (isVideoOnly) {
|
if (isVideoOnly) {
|
||||||
if (!Stream.containSimilarStream(videoStream, streamInfo.getVideoOnlyStreams())) {
|
if (!Stream.containSimilarStream(videoStream, streamInfo.getVideoOnlyStreams())) {
|
||||||
streamInfo.getVideoOnlyStreams().add(videoStream);
|
streamInfo.getVideoOnlyStreams().add(videoStream);
|
||||||
|
videoOnlyStreams.add(videoStream);
|
||||||
}
|
}
|
||||||
} else if (!Stream.containSimilarStream(videoStream, streamInfo.getVideoStreams())) {
|
} else if (!Stream.containSimilarStream(videoStream, streamInfo.getVideoStreams())) {
|
||||||
streamInfo.getVideoStreams().add(videoStream);
|
videoStreams.add(videoStream);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} catch (Exception ignored) {
|
} catch (Exception ignored) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
return new ParserResult(videoStreams, audioStreams, videoOnlyStreams);
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
throw new DashMpdParsingException("Could not parse Dash mpd", e);
|
throw new DashMpdParsingException("Could not parse Dash mpd", e);
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,60 @@
|
||||||
|
package org.schabi.newpipe.extractor.services.youtube;
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Created by Christian Schabesberger on 30.12.15.
|
||||||
|
*
|
||||||
|
* Copyright (C) Christian Schabesberger 2015 <chris.schabesberger@mailbox.org>
|
||||||
|
* YoutubeVideoExtractorDefault.java is part of NewPipe.
|
||||||
|
*
|
||||||
|
* NewPipe is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation, either version 3 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* NewPipe is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with NewPipe. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import org.junit.BeforeClass;
|
||||||
|
import org.junit.Test;
|
||||||
|
import org.schabi.newpipe.Downloader;
|
||||||
|
import org.schabi.newpipe.extractor.NewPipe;
|
||||||
|
import org.schabi.newpipe.extractor.stream.StreamExtractor;
|
||||||
|
import org.schabi.newpipe.extractor.stream.StreamInfo;
|
||||||
|
|
||||||
|
import static org.junit.Assert.assertEquals;
|
||||||
|
import static org.junit.Assert.assertTrue;
|
||||||
|
import static org.schabi.newpipe.extractor.ServiceList.YouTube;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test for {@link StreamExtractor}
|
||||||
|
*/
|
||||||
|
public class YoutubeStreamExtractorDASHText {
|
||||||
|
private static StreamInfo info;
|
||||||
|
|
||||||
|
@BeforeClass
|
||||||
|
public static void setUp() throws Exception {
|
||||||
|
NewPipe.init(Downloader.getInstance());
|
||||||
|
info = StreamInfo.getInfo(YouTube, "https://www.youtube.com/watch?v=00Q4SUnVQK4");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testGetDashMpd() {
|
||||||
|
System.out.println(info.getDashMpdUrl());
|
||||||
|
assertTrue(info.getDashMpdUrl(),
|
||||||
|
info.getDashMpdUrl() != null && !info.getDashMpdUrl().isEmpty());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testDashMpdParser() {
|
||||||
|
assertEquals(0, info.getAudioStreams().size());
|
||||||
|
assertEquals(0, info.getVideoOnlyStreams().size());
|
||||||
|
assertEquals(4, info.getVideoStreams().size());
|
||||||
|
}
|
||||||
|
}
|
|
@ -8,6 +8,7 @@ import org.schabi.newpipe.extractor.exceptions.ExtractionException;
|
||||||
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
||||||
import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeStreamExtractor;
|
import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeStreamExtractor;
|
||||||
import org.schabi.newpipe.extractor.stream.*;
|
import org.schabi.newpipe.extractor.stream.*;
|
||||||
|
import org.schabi.newpipe.extractor.utils.DashMpdParser;
|
||||||
import org.schabi.newpipe.extractor.utils.Utils;
|
import org.schabi.newpipe.extractor.utils.Utils;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
@ -140,8 +141,9 @@ public class YoutubeStreamExtractorDefaultTest {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testGetDashMpd() throws ParsingException {
|
public void testGetDashMpd() throws ParsingException {
|
||||||
|
// we dont expect this particular video to have a DASH file. For this purpouse we use a different test class.
|
||||||
assertTrue(extractor.getDashMpdUrl(),
|
assertTrue(extractor.getDashMpdUrl(),
|
||||||
extractor.getDashMpdUrl() != null || !extractor.getDashMpdUrl().isEmpty());
|
extractor.getDashMpdUrl() != null && extractor.getDashMpdUrl().isEmpty());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
|
Loading…
Reference in New Issue