From 6e3651fdf52cb5053ad5367ebcce6be9040b91ca Mon Sep 17 00:00:00 2001 From: tonakriz Date: Thu, 23 Nov 2017 00:10:12 +0100 Subject: [PATCH] Added method to download subtitles as VTT --- .../soundcloud/SoundcloudStreamExtractor.java | 8 ++++- .../youtube/YoutubeStreamExtractor.java | 36 +++++-------------- .../extractor/stream/StreamExtractor.java | 4 ++- .../newpipe/extractor/utils/Parser.java | 10 ++---- .../SoundcloudStreamExtractorDefaultTest.java | 2 +- .../YoutubeStreamExtractorDefaultTest.java | 15 ++++++-- .../YoutubeStreamExtractorRestrictedTest.java | 9 +++-- 7 files changed, 43 insertions(+), 41 deletions(-) diff --git a/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudStreamExtractor.java b/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudStreamExtractor.java index aa6e732e2..28bc4bdb4 100644 --- a/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudStreamExtractor.java +++ b/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudStreamExtractor.java @@ -10,6 +10,7 @@ import org.schabi.newpipe.extractor.StreamingService; import org.schabi.newpipe.extractor.exceptions.ContentNotAvailableException; import org.schabi.newpipe.extractor.exceptions.ExtractionException; import org.schabi.newpipe.extractor.exceptions.ParsingException; +import org.schabi.newpipe.extractor.exceptions.ReCaptchaException; import org.schabi.newpipe.extractor.stream.*; import org.schabi.newpipe.extractor.utils.Parser; @@ -194,10 +195,15 @@ public class SoundcloudStreamExtractor extends StreamExtractor { } @Override - public HashMap getSubtitles() throws IOException, ExtractionException, JsonParserException { + public HashMap getSubtitlesList() throws IOException, ExtractionException, JsonParserException { return new HashMap<>(); } + @Override + public String downloadSubtitles(String URL) throws IOException, ReCaptchaException { + return ""; + } + @Override public StreamType getStreamType() { return StreamType.AUDIO_STREAM; diff --git a/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeStreamExtractor.java b/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeStreamExtractor.java index 2a030f593..4fa3b7afc 100644 --- a/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeStreamExtractor.java +++ b/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeStreamExtractor.java @@ -420,34 +420,8 @@ public class YoutubeStreamExtractor extends StreamExtractor { return videoOnlyStreams; } - /** - * Example output: - * { - * #language code#: [ - * [0]"captions URL", - * [1]"language Name" - * ], - * "a.en": { // a.#language code# == auto generated - * [0]"https://youtube.com/api/timedtext..." - * [1]"English (Auto-generated)" - * }, - * ".en": { // .#language code# == normal (not auto generated) - * [0]"https://youtube.com/api/timedtext..." - * [1]"English" - * } - * } - * - * Example usage: - * 1) Get list of keys in the Map if there are any - * 2) Get - * - * @return Map(String, StringArray[2]) - * @throws IOException - Thrown when parsing HTML page - * @throws ExtractionException - Thrown when parsing HTML - * @throws JsonParserException - Thrown when parsing JSON from the web page - */ @Override - public HashMap getSubtitles() throws IOException, ExtractionException, JsonParserException { + public HashMap getSubtitlesList() throws IOException, ExtractionException, JsonParserException { HashMap result = new HashMap<>(); JsonObject playerConfig = getPlayerConfig(getPageHtml()); @@ -473,6 +447,14 @@ public class YoutubeStreamExtractor extends StreamExtractor { return result; } + @Override + public String downloadSubtitles(String URL) throws IOException, ReCaptchaException { + Downloader dl = NewPipe.getDownloader(); + // Instead of the WebVTT 'vtt' we can use also Timed Text Markup Language 'ttml' + String URLasVTT = URL.replaceAll("&fmt=[^&]*", "&fmt=vtt"); + return dl.download(URLasVTT); + } + @Override public StreamType getStreamType() throws ParsingException { //todo: if implementing livestream support this value should be generated dynamically diff --git a/src/main/java/org/schabi/newpipe/extractor/stream/StreamExtractor.java b/src/main/java/org/schabi/newpipe/extractor/stream/StreamExtractor.java index fa7a3d6a4..fc49e653a 100644 --- a/src/main/java/org/schabi/newpipe/extractor/stream/StreamExtractor.java +++ b/src/main/java/org/schabi/newpipe/extractor/stream/StreamExtractor.java @@ -26,6 +26,7 @@ import org.schabi.newpipe.extractor.StreamingService; import org.schabi.newpipe.extractor.UrlIdHandler; import org.schabi.newpipe.extractor.exceptions.ExtractionException; import org.schabi.newpipe.extractor.exceptions.ParsingException; +import org.schabi.newpipe.extractor.exceptions.ReCaptchaException; import java.io.IOException; import java.util.HashMap; @@ -66,7 +67,8 @@ public abstract class StreamExtractor extends Extractor { public abstract List getAudioStreams() throws IOException, ExtractionException; public abstract List getVideoStreams() throws IOException, ExtractionException; public abstract List getVideoOnlyStreams() throws IOException, ExtractionException; - public abstract HashMap getSubtitles() throws IOException, ExtractionException, JsonParserException; + public abstract HashMap getSubtitlesList() throws IOException, ExtractionException, JsonParserException; + public abstract String downloadSubtitles(String URL) throws IOException, ReCaptchaException; public abstract StreamType getStreamType() throws ParsingException; public abstract StreamInfoItem getNextVideo() throws IOException, ExtractionException; diff --git a/src/main/java/org/schabi/newpipe/extractor/utils/Parser.java b/src/main/java/org/schabi/newpipe/extractor/utils/Parser.java index 2db762462..9920c7fb4 100644 --- a/src/main/java/org/schabi/newpipe/extractor/utils/Parser.java +++ b/src/main/java/org/schabi/newpipe/extractor/utils/Parser.java @@ -54,7 +54,6 @@ public class Parser { if (foundMatch) { return mat.group(group); } else { - //Log.e(TAG, "failed to find pattern \""+pattern+"\" inside of \""+input+"\""); if (input.length() > 1024) { throw new RegexException("failed to find pattern \"" + pattern); } else { @@ -64,12 +63,9 @@ public class Parser { } public static boolean isMatch(String pattern, String input) { - try { - matchGroup1(pattern, input); - return true; - } catch (RegexException e) { - return false; - } + Pattern pat = Pattern.compile(pattern); + Matcher mat = pat.matcher(input); + return mat.find(); } public static Map compatParseMap(final String input) throws UnsupportedEncodingException { diff --git a/src/test/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudStreamExtractorDefaultTest.java b/src/test/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudStreamExtractorDefaultTest.java index 6df8d4303..882e0da36 100644 --- a/src/test/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudStreamExtractorDefaultTest.java +++ b/src/test/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudStreamExtractorDefaultTest.java @@ -105,6 +105,6 @@ public class SoundcloudStreamExtractorDefaultTest { @Test public void testGetSubtitles() throws IOException, ExtractionException, JsonParserException { - assertTrue(extractor.getSubtitles() != null); + assertTrue(extractor.getSubtitlesList() != null); } } diff --git a/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeStreamExtractorDefaultTest.java b/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeStreamExtractorDefaultTest.java index d05695f7a..77d484a27 100644 --- a/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeStreamExtractorDefaultTest.java +++ b/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeStreamExtractorDefaultTest.java @@ -152,7 +152,18 @@ public class YoutubeStreamExtractorDefaultTest { } @Test - public void testGetSubtitles() throws IOException, ExtractionException, JsonParserException { - assertTrue(extractor.getSubtitles() != null); + public void testGetSubtitlesList() throws IOException, ExtractionException, JsonParserException { + assertTrue(extractor.getSubtitlesList() != null); + } + + @Test + public void testDownloadSubtitles() throws Exception { + try { + extractor.downloadSubtitles(extractor.getSubtitlesList().get("en")[0]); + // Video has no subtitles! + assert false; + } catch (Exception e) { + assert true; + } } } diff --git a/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeStreamExtractorRestrictedTest.java b/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeStreamExtractorRestrictedTest.java index f9bc39995..7b604f435 100644 --- a/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeStreamExtractorRestrictedTest.java +++ b/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeStreamExtractorRestrictedTest.java @@ -106,7 +106,12 @@ public class YoutubeStreamExtractorRestrictedTest { } @Test - public void testGetSubtitles() throws IOException, ExtractionException, JsonParserException { - assertTrue(extractor.getSubtitles() != null); + public void testGetSubtitlesList() throws IOException, ExtractionException, JsonParserException { + assertTrue(extractor.getSubtitlesList() != null); + } + + @Test + public void testDownloadSubtitles() throws Exception { + assertTrue(extractor.downloadSubtitles("https://youtu.be/FmG385_uUys?t=174") != null); } }