Merge branch 'feature-YTsubtitles' of https://github.com/tonakriz/NewPipeExtractor into refactor

This commit is contained in:
Christian Schabesberger 2017-11-24 21:02:13 +01:00
commit 98358cb0f9
10 changed files with 188 additions and 44 deletions

View File

@ -1,30 +1,13 @@
<?xml version="1.0" encoding="UTF-8"?> <?xml version="1.0" encoding="UTF-8"?>
<module external.linked.project.id=":NewPipeExtractor" external.linked.project.path="$MODULE_DIR$" external.root.project.path="$MODULE_DIR$/.." external.system.id="GRADLE" type="JAVA_MODULE" version="4"> <module external.linked.project.id="NewPipeExtractor" external.linked.project.path="$MODULE_DIR$" external.root.project.path="$MODULE_DIR$" external.system.id="GRADLE" external.system.module.group="" external.system.module.version="unspecified" type="JAVA_MODULE" version="4">
<component name="FacetManager"> <component name="NewModuleRootManager" inherit-compiler-output="true">
<facet type="java-gradle" name="Java-Gradle">
<configuration>
<option name="BUILD_FOLDER_PATH" value="$MODULE_DIR$/build" />
<option name="BUILDABLE" value="true" />
</configuration>
</facet>
</component>
<component name="NewModuleRootManager" LANGUAGE_LEVEL="JDK_1_7" inherit-compiler-output="false">
<output url="file://$MODULE_DIR$/build/classes/java/main" />
<output-test url="file://$MODULE_DIR$/build/classes/java/test" />
<exclude-output /> <exclude-output />
<content url="file://$MODULE_DIR$"> <content url="file://$MODULE_DIR$">
<sourceFolder url="file://$MODULE_DIR$/src/main/java" isTestSource="false" />
<sourceFolder url="file://$MODULE_DIR$/src/main/resources" type="java-resource" />
<sourceFolder url="file://$MODULE_DIR$/src/test/java" isTestSource="true" />
<sourceFolder url="file://$MODULE_DIR$/src/test/resources" type="java-test-resource" />
<excludeFolder url="file://$MODULE_DIR$/.gradle" /> <excludeFolder url="file://$MODULE_DIR$/.gradle" />
<excludeFolder url="file://$MODULE_DIR$/build" />
<excludeFolder url="file://$MODULE_DIR$/out" />
</content> </content>
<orderEntry type="inheritedJdk" /> <orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" /> <orderEntry type="sourceFolder" forTests="false" />
<orderEntry type="library" exported="" scope="PROVIDED" name="nanojson-1.1" level="project" />
<orderEntry type="library" exported="" scope="PROVIDED" name="jsoup-1.9.2" level="project" />
<orderEntry type="library" exported="" scope="PROVIDED" name="rhino-1.7.7.1" level="project" />
<orderEntry type="library" exported="" scope="TEST" name="junit-4.12" level="project" />
<orderEntry type="library" exported="" scope="TEST" name="hamcrest-core-1.3" level="project" />
</component> </component>
</module> </module>

View File

@ -0,0 +1,30 @@
package org.schabi.newpipe.extractor;
import org.schabi.newpipe.extractor.stream.SubtitlesFormat;
public class Subtitles {
private SubtitlesFormat format;
private String languageCode, URL;
private boolean autoGenerated;
public Subtitles(SubtitlesFormat format, String languageCode, String URL, boolean autoGenerated) {
this.format = format;
this.languageCode = languageCode;
this.URL = URL;
this.autoGenerated = autoGenerated;
}
public SubtitlesFormat getFileType() { return format; }
public String getLanguageCode() {
return languageCode;
}
public String getURL() {
return URL;
}
public boolean isAutoGenerated() {
return autoGenerated;
}
}

View File

@ -3,19 +3,16 @@ package org.schabi.newpipe.extractor.services.soundcloud;
import com.grack.nanojson.JsonObject; import com.grack.nanojson.JsonObject;
import com.grack.nanojson.JsonParser; import com.grack.nanojson.JsonParser;
import com.grack.nanojson.JsonParserException; import com.grack.nanojson.JsonParserException;
import org.schabi.newpipe.extractor.Downloader; import org.schabi.newpipe.extractor.*;
import org.schabi.newpipe.extractor.MediaFormat;
import org.schabi.newpipe.extractor.NewPipe;
import org.schabi.newpipe.extractor.StreamingService;
import org.schabi.newpipe.extractor.exceptions.ContentNotAvailableException; import org.schabi.newpipe.extractor.exceptions.ContentNotAvailableException;
import org.schabi.newpipe.extractor.exceptions.ExtractionException; import org.schabi.newpipe.extractor.exceptions.ExtractionException;
import org.schabi.newpipe.extractor.exceptions.ParsingException; import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.exceptions.ReCaptchaException;
import org.schabi.newpipe.extractor.stream.*; import org.schabi.newpipe.extractor.stream.*;
import org.schabi.newpipe.extractor.utils.Parser; import org.schabi.newpipe.extractor.utils.Parser;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList; import java.util.*;
import java.util.List;
public class SoundcloudStreamExtractor extends StreamExtractor { public class SoundcloudStreamExtractor extends StreamExtractor {
private JsonObject track; private JsonObject track;
@ -150,6 +147,16 @@ public class SoundcloudStreamExtractor extends StreamExtractor {
return null; return null;
} }
@Override
public List<Subtitles> getSubtitlesDefault() throws IOException, ExtractionException {
return null;
}
@Override
public List<Subtitles> getSubtitles(SubtitlesFormat format) throws IOException, ExtractionException {
return null;
}
@Override @Override
public StreamType getStreamType() { public StreamType getStreamType() {
return StreamType.AUDIO_STREAM; return StreamType.AUDIO_STREAM;

View File

@ -1,7 +1,9 @@
package org.schabi.newpipe.extractor.services.youtube; package org.schabi.newpipe.extractor.services.youtube;
import com.grack.nanojson.JsonArray;
import com.grack.nanojson.JsonObject; import com.grack.nanojson.JsonObject;
import com.grack.nanojson.JsonParser; import com.grack.nanojson.JsonParser;
import com.grack.nanojson.JsonParserException;
import org.jsoup.Jsoup; import org.jsoup.Jsoup;
import org.jsoup.nodes.Document; import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element; import org.jsoup.nodes.Element;
@ -11,6 +13,7 @@ import org.mozilla.javascript.ScriptableObject;
import org.schabi.newpipe.extractor.Downloader; import org.schabi.newpipe.extractor.Downloader;
import org.schabi.newpipe.extractor.NewPipe; import org.schabi.newpipe.extractor.NewPipe;
import org.schabi.newpipe.extractor.StreamingService; import org.schabi.newpipe.extractor.StreamingService;
import org.schabi.newpipe.extractor.Subtitles;
import org.schabi.newpipe.extractor.exceptions.ContentNotAvailableException; import org.schabi.newpipe.extractor.exceptions.ContentNotAvailableException;
import org.schabi.newpipe.extractor.exceptions.ExtractionException; import org.schabi.newpipe.extractor.exceptions.ExtractionException;
import org.schabi.newpipe.extractor.exceptions.ParsingException; import org.schabi.newpipe.extractor.exceptions.ParsingException;
@ -20,10 +23,7 @@ import org.schabi.newpipe.extractor.utils.Parser;
import org.schabi.newpipe.extractor.utils.Utils; import org.schabi.newpipe.extractor.utils.Utils;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList; import java.util.*;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher; import java.util.regex.Matcher;
import java.util.regex.Pattern; import java.util.regex.Pattern;
@ -379,6 +379,49 @@ public class YoutubeStreamExtractor extends StreamExtractor {
return videoOnlyStreams; return videoOnlyStreams;
} }
@Override
public List<Subtitles> getSubtitlesDefault() throws IOException, ExtractionException {
return getSubtitles(SubtitlesFormat.TTML);
}
@Override
public List<Subtitles> getSubtitles(SubtitlesFormat format) throws IOException, ExtractionException {
JsonObject playerConfig = getPlayerConfig(getPageHtml());
String playerResponse = playerConfig.getObject("args").getString("player_response");
JsonObject captions;
try {
// Captions does not exist, return null
if (!JsonParser.object().from(playerResponse).has("captions")) return null;
captions = JsonParser.object().from(playerResponse).getObject("captions");
} catch (JsonParserException e) {
// Failed to parse subtitles
return null;
}
JsonArray captionsArray = captions.getObject("playerCaptionsTracklistRenderer").getArray("captionTracks");
int captionsSize = captionsArray.size();
// Should not happen, if there is the "captions" object, it should always has some captions in it
if(captionsSize == 0) return null;
List<Subtitles> result = new ArrayList<>();
for (int x = 0; x < captionsSize; x++) {
String baseUrl = captionsArray.getObject(x).getString("baseUrl");
String extension = format.getExtension();
String URL = baseUrl.replaceAll("&fmt=[^&]*", "&fmt=" + extension);
String captionsLangCode = captionsArray.getObject(x).getString("vssId");
boolean isAutoGenerated = captionsLangCode.startsWith("a.");
String languageCode = captionsLangCode.replaceFirst((isAutoGenerated) ? "a." : ".", "");
result.add(new Subtitles(format, languageCode, URL, isAutoGenerated));
}
return result;
}
@Override @Override
public StreamType getStreamType() throws ParsingException { public StreamType getStreamType() throws ParsingException {
//todo: if implementing livestream support this value should be generated dynamically //todo: if implementing livestream support this value should be generated dynamically
@ -456,13 +499,24 @@ public class YoutubeStreamExtractor extends StreamExtractor {
private static volatile String decryptionCode = ""; private static volatile String decryptionCode = "";
private static String pageHtml = null;
private String getPageHtml() throws IOException, ExtractionException{
if (pageHtml == null) {
Downloader dl = NewPipe.getDownloader();
pageHtml = dl.download(getCleanUrl());
}
return pageHtml;
}
@Override @Override
public void fetchPage() throws IOException, ExtractionException { public void fetchPage() throws IOException, ExtractionException {
Downloader dl = NewPipe.getDownloader(); Downloader dl = NewPipe.getDownloader();
String pageContent = dl.download(getCleanUrl()); String pageContent = getPageHtml();
doc = Jsoup.parse(pageContent, getCleanUrl()); doc = Jsoup.parse(pageContent, getCleanUrl());
String playerUrl; String playerUrl;
// Check if the video is age restricted // Check if the video is age restricted
if (pageContent.contains("<meta property=\"og:restrictions:age")) { if (pageContent.contains("<meta property=\"og:restrictions:age")) {
@ -616,7 +670,7 @@ public class YoutubeStreamExtractor extends StreamExtractor {
private String decryptSignature(String encryptedSig, String decryptionCode) throws DecryptException { private String decryptSignature(String encryptedSig, String decryptionCode) throws DecryptException {
Context context = Context.enter(); Context context = Context.enter();
context.setOptimizationLevel(-1); context.setOptimizationLevel(-1);
Object result = null; Object result;
try { try {
ScriptableObject scope = context.initStandardObjects(); ScriptableObject scope = context.initStandardObjects();
context.evaluateString(scope, decryptionCode, "decryptionCode", 1, null); context.evaluateString(scope, decryptionCode, "decryptionCode", 1, null);

View File

@ -20,11 +20,14 @@ package org.schabi.newpipe.extractor.stream;
* along with NewPipe. If not, see <http://www.gnu.org/licenses/>. * along with NewPipe. If not, see <http://www.gnu.org/licenses/>.
*/ */
import com.grack.nanojson.JsonParserException;
import org.schabi.newpipe.extractor.Extractor; import org.schabi.newpipe.extractor.Extractor;
import org.schabi.newpipe.extractor.StreamingService; import org.schabi.newpipe.extractor.StreamingService;
import org.schabi.newpipe.extractor.Subtitles;
import org.schabi.newpipe.extractor.UrlIdHandler; import org.schabi.newpipe.extractor.UrlIdHandler;
import org.schabi.newpipe.extractor.exceptions.ExtractionException; import org.schabi.newpipe.extractor.exceptions.ExtractionException;
import org.schabi.newpipe.extractor.exceptions.ParsingException; import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.exceptions.ReCaptchaException;
import org.schabi.newpipe.extractor.utils.Parser; import org.schabi.newpipe.extractor.utils.Parser;
import java.io.IOException; import java.io.IOException;
@ -109,6 +112,8 @@ public abstract class StreamExtractor extends Extractor {
public abstract List<AudioStream> getAudioStreams() throws IOException, ExtractionException; public abstract List<AudioStream> getAudioStreams() throws IOException, ExtractionException;
public abstract List<VideoStream> getVideoStreams() throws IOException, ExtractionException; public abstract List<VideoStream> getVideoStreams() throws IOException, ExtractionException;
public abstract List<VideoStream> getVideoOnlyStreams() throws IOException, ExtractionException; public abstract List<VideoStream> getVideoOnlyStreams() throws IOException, ExtractionException;
public abstract List<Subtitles> getSubtitlesDefault() throws IOException, ExtractionException;
public abstract List<Subtitles> getSubtitles(SubtitlesFormat format) throws IOException, ExtractionException;
public abstract StreamType getStreamType() throws ParsingException; public abstract StreamType getStreamType() throws ParsingException;
public abstract StreamInfoItem getNextVideo() throws IOException, ExtractionException; public abstract StreamInfoItem getNextVideo() throws IOException, ExtractionException;

View File

@ -0,0 +1,27 @@
package org.schabi.newpipe.extractor.stream;
import org.schabi.newpipe.extractor.Subtitles;
public enum SubtitlesFormat {
// YouTube subtitles formats
// TRANSCRIPT(3) is default YT format based on TTML,
// but unlike VTT or TTML, it is NOT W3 standard
// TRANSCRIPT subtitles are NOT supported by ExoPlayer, only VTT and TTML
VTT (0x0, "vtt"),
TTML (0x1, "ttml"),
TRANSCRIPT1 (0x2, "srv1"),
TRANSCRIPT2 (0x3, "srv2"),
TRANSCRIPT3 (0x4, "srv3");
private int id;
private String extension;
SubtitlesFormat(int id, String extension) {
this.id = id;
this.extension = extension;
}
public String getExtension() {
return extension;
}
}

View File

@ -54,7 +54,6 @@ public class Parser {
if (foundMatch) { if (foundMatch) {
return mat.group(group); return mat.group(group);
} else { } else {
//Log.e(TAG, "failed to find pattern \""+pattern+"\" inside of \""+input+"\"");
if (input.length() > 1024) { if (input.length() > 1024) {
throw new RegexException("failed to find pattern \"" + pattern); throw new RegexException("failed to find pattern \"" + pattern);
} else { } else {
@ -64,12 +63,9 @@ public class Parser {
} }
public static boolean isMatch(String pattern, String input) { public static boolean isMatch(String pattern, String input) {
try { Pattern pat = Pattern.compile(pattern);
matchGroup1(pattern, input); Matcher mat = pat.matcher(input);
return true; return mat.find();
} catch (RegexException e) {
return false;
}
} }
public static Map<String, String> compatParseMap(final String input) throws UnsupportedEncodingException { public static Map<String, String> compatParseMap(final String input) throws UnsupportedEncodingException {

View File

@ -1,16 +1,20 @@
package org.schabi.newpipe.extractor.services.soundcloud; package org.schabi.newpipe.extractor.services.soundcloud;
import com.grack.nanojson.JsonParserException;
import org.junit.Before; import org.junit.Before;
import org.junit.Test; import org.junit.Test;
import org.schabi.newpipe.Downloader; import org.schabi.newpipe.Downloader;
import org.schabi.newpipe.extractor.NewPipe; import org.schabi.newpipe.extractor.NewPipe;
import org.schabi.newpipe.extractor.Subtitles;
import org.schabi.newpipe.extractor.exceptions.ExtractionException; import org.schabi.newpipe.extractor.exceptions.ExtractionException;
import org.schabi.newpipe.extractor.exceptions.ParsingException; import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.stream.StreamExtractor; import org.schabi.newpipe.extractor.stream.StreamExtractor;
import org.schabi.newpipe.extractor.stream.StreamInfoItemCollector; import org.schabi.newpipe.extractor.stream.StreamInfoItemCollector;
import org.schabi.newpipe.extractor.stream.StreamType; import org.schabi.newpipe.extractor.stream.StreamType;
import org.schabi.newpipe.extractor.stream.SubtitlesFormat;
import java.io.IOException; import java.io.IOException;
import java.util.List;
import static org.junit.Assert.*; import static org.junit.Assert.*;
import static org.schabi.newpipe.extractor.ServiceList.SoundCloud; import static org.schabi.newpipe.extractor.ServiceList.SoundCloud;
@ -101,4 +105,16 @@ public class SoundcloudStreamExtractorDefaultTest {
assertFalse(relatedVideos.getItemList().isEmpty()); assertFalse(relatedVideos.getItemList().isEmpty());
assertTrue(relatedVideos.getErrors().isEmpty()); assertTrue(relatedVideos.getErrors().isEmpty());
} }
@Test
public void testGetSubtitlesListDefault() throws IOException, ExtractionException {
// Video (/view?v=YQHsXMglC9A) set in the setUp() method has no captions => null
assertTrue(extractor.getSubtitlesDefault() == null);
}
@Test
public void testGetSubtitlesList() throws IOException, ExtractionException {
// Video (/view?v=YQHsXMglC9A) set in the setUp() method has no captions => null
assertTrue(extractor.getSubtitles(SubtitlesFormat.VTT) == null);
}
} }

View File

@ -1,17 +1,16 @@
package org.schabi.newpipe.extractor.services.youtube; package org.schabi.newpipe.extractor.services.youtube;
import com.grack.nanojson.JsonParserException;
import org.junit.Before; import org.junit.Before;
import org.junit.Test; import org.junit.Test;
import org.schabi.newpipe.Downloader; import org.schabi.newpipe.Downloader;
import org.schabi.newpipe.extractor.NewPipe; import org.schabi.newpipe.extractor.NewPipe;
import org.schabi.newpipe.extractor.exceptions.ExtractionException; import org.schabi.newpipe.extractor.exceptions.ExtractionException;
import org.schabi.newpipe.extractor.exceptions.ParsingException; import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.stream.StreamExtractor; import org.schabi.newpipe.extractor.stream.*;
import org.schabi.newpipe.extractor.stream.StreamInfoItemCollector;
import org.schabi.newpipe.extractor.stream.StreamType;
import org.schabi.newpipe.extractor.stream.VideoStream;
import java.io.IOException; import java.io.IOException;
import java.util.HashMap;
import static org.junit.Assert.*; import static org.junit.Assert.*;
import static org.schabi.newpipe.extractor.ServiceList.YouTube; import static org.schabi.newpipe.extractor.ServiceList.YouTube;
@ -148,4 +147,16 @@ public class YoutubeStreamExtractorDefaultTest {
} }
assertTrue(relatedVideos.getErrors().isEmpty()); assertTrue(relatedVideos.getErrors().isEmpty());
} }
@Test
public void testGetSubtitlesListDefault() throws IOException, ExtractionException {
// Video (/view?v=YQHsXMglC9A) set in the setUp() method has no captions => null
assertTrue(extractor.getSubtitlesDefault() == null);
}
@Test
public void testGetSubtitlesList() throws IOException, ExtractionException {
// Video (/view?v=YQHsXMglC9A) set in the setUp() method has no captions => null
assertTrue(extractor.getSubtitles(SubtitlesFormat.VTT) == null);
}
} }

View File

@ -1,5 +1,6 @@
package org.schabi.newpipe.extractor.services.youtube; package org.schabi.newpipe.extractor.services.youtube;
import com.grack.nanojson.JsonParserException;
import org.junit.Before; import org.junit.Before;
import org.junit.Test; import org.junit.Test;
import org.schabi.newpipe.Downloader; import org.schabi.newpipe.Downloader;
@ -7,6 +8,7 @@ import org.schabi.newpipe.extractor.NewPipe;
import org.schabi.newpipe.extractor.exceptions.ExtractionException; import org.schabi.newpipe.extractor.exceptions.ExtractionException;
import org.schabi.newpipe.extractor.exceptions.ParsingException; import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.stream.StreamExtractor; import org.schabi.newpipe.extractor.stream.StreamExtractor;
import org.schabi.newpipe.extractor.stream.SubtitlesFormat;
import org.schabi.newpipe.extractor.stream.VideoStream; import org.schabi.newpipe.extractor.stream.VideoStream;
import java.io.IOException; import java.io.IOException;
@ -103,4 +105,17 @@ public class YoutubeStreamExtractorRestrictedTest {
0 <= s.format && s.format <= 4); 0 <= s.format && s.format <= 4);
} }
} }
@Test
public void testGetSubtitlesListDefault() throws IOException, ExtractionException {
// Video (/view?v=YQHsXMglC9A) set in the setUp() method has no captions => null
assertTrue(extractor.getSubtitlesDefault() == null);
}
@Test
public void testGetSubtitlesList() throws IOException, ExtractionException {
// Video (/view?v=YQHsXMglC9A) set in the setUp() method has no captions => null
assertTrue(extractor.getSubtitles(SubtitlesFormat.VTT) == null);
}
} }