-Fixed subtitles extraction to use method from youtube-dl.

-Expose subtitles during extraction.
-Make subtitle lists return nonnull empty collections instead of null.
This commit is contained in:
John Zhen Mo 2018-02-01 13:27:14 -08:00
parent 0061131d39
commit 1f1bbaad57
7 changed files with 82 additions and 57 deletions

View File

@ -2,7 +2,9 @@ package org.schabi.newpipe.extractor;
import org.schabi.newpipe.extractor.stream.SubtitlesFormat; import org.schabi.newpipe.extractor.stream.SubtitlesFormat;
public class Subtitles { import java.io.Serializable;
public class Subtitles implements Serializable {
private final SubtitlesFormat format; private final SubtitlesFormat format;
private final String languageCode; private final String languageCode;
private final String URL; private final String URL;

View File

@ -167,15 +167,15 @@ public class SoundcloudStreamExtractor extends StreamExtractor {
} }
@Override @Override
@Nullable @Nonnull
public List<Subtitles> getSubtitlesDefault() throws IOException, ExtractionException { public List<Subtitles> getSubtitlesDefault() throws IOException, ExtractionException {
return null; return Collections.emptyList();
} }
@Override @Override
@Nullable @Nonnull
public List<Subtitles> getSubtitles(SubtitlesFormat format) throws IOException, ExtractionException { public List<Subtitles> getSubtitles(SubtitlesFormat format) throws IOException, ExtractionException {
return null; return Collections.emptyList();
} }
@Override @Override

View File

@ -7,6 +7,7 @@ import com.grack.nanojson.JsonParserException;
import org.jsoup.Jsoup; import org.jsoup.Jsoup;
import org.jsoup.nodes.Document; import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element; import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.mozilla.javascript.Context; import org.mozilla.javascript.Context;
import org.mozilla.javascript.Function; import org.mozilla.javascript.Function;
import org.mozilla.javascript.ScriptableObject; import org.mozilla.javascript.ScriptableObject;
@ -26,8 +27,6 @@ import javax.annotation.Nonnull;
import javax.annotation.Nullable; import javax.annotation.Nullable;
import java.io.IOException; import java.io.IOException;
import java.util.*; import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/* /*
* Created by Christian Schabesberger on 06.08.15. * Created by Christian Schabesberger on 06.08.15.
@ -74,6 +73,12 @@ public class YoutubeStreamExtractor extends StreamExtractor {
} }
} }
public class SubtitlesException extends ContentNotAvailableException {
SubtitlesException(String message, Throwable cause) {
super(message, cause);
}
}
/*//////////////////////////////////////////////////////////////////////////*/ /*//////////////////////////////////////////////////////////////////////////*/
private Document doc; private Document doc;
@ -81,6 +86,8 @@ public class YoutubeStreamExtractor extends StreamExtractor {
private JsonObject playerArgs; private JsonObject playerArgs;
@Nonnull @Nonnull
private final Map<String, String> videoInfoPage = new HashMap<>(); private final Map<String, String> videoInfoPage = new HashMap<>();
@Nonnull
private List<Subtitles> availableSubtitles = new ArrayList<>();
private boolean isAgeRestricted; private boolean isAgeRestricted;
@ -419,54 +426,20 @@ public class YoutubeStreamExtractor extends StreamExtractor {
} }
@Override @Override
@Nullable @Nonnull
public List<Subtitles> getSubtitlesDefault() throws IOException, ExtractionException { public List<Subtitles> getSubtitlesDefault() throws IOException, ExtractionException {
return getSubtitles(SubtitlesFormat.TTML); return getSubtitles(SubtitlesFormat.VTT);
} }
@Override @Override
@Nullable @Nonnull
public List<Subtitles> getSubtitles(SubtitlesFormat format) throws IOException, ExtractionException { public List<Subtitles> getSubtitles(SubtitlesFormat format) throws IOException, ExtractionException {
assertPageFetched(); assertPageFetched();
if(isAgeRestricted) { List<Subtitles> subtitles = new ArrayList<>();
// If the video is age restricted getPlayerConfig will fail for (final Subtitles subtitle : availableSubtitles) {
return null; if (subtitle.getFileType() == format) subtitles.add(subtitle);
} }
// TODO: This should be done in onFetchPage() return subtitles;
JsonObject playerConfig = getPlayerConfig(getPageHtml(NewPipe.getDownloader()));
String playerResponse = playerConfig.getObject("args").getString("player_response");
JsonObject captions;
try {
// Captions does not exist, return null
if (!JsonParser.object().from(playerResponse).has("captions")) return null;
captions = JsonParser.object().from(playerResponse).getObject("captions");
} catch (JsonParserException e) {
// Failed to parse subtitles
return null;
}
JsonArray captionsArray = captions.getObject("playerCaptionsTracklistRenderer").getArray("captionTracks");
int captionsSize = captionsArray.size();
// Should not happen, if there is the "captions" object, it should always has some captions in it
if(captionsSize == 0) return null;
List<Subtitles> result = new ArrayList<>();
for (int x = 0; x < captionsSize; x++) {
String baseUrl = captionsArray.getObject(x).getString("baseUrl");
String extension = format.getExtension();
String URL = baseUrl.replaceAll("&fmt=[^&]*", "&fmt=" + extension);
String captionsLangCode = captionsArray.getObject(x).getString("vssId");
boolean isAutoGenerated = captionsLangCode.startsWith("a.");
String languageCode = captionsLangCode.replaceFirst((isAutoGenerated) ? "a." : ".", "");
result.add(new Subtitles(format, languageCode, URL, isAutoGenerated));
}
return result;
} }
@Override @Override
@ -580,6 +553,10 @@ public class YoutubeStreamExtractor extends StreamExtractor {
if (decryptionCode.isEmpty()) { if (decryptionCode.isEmpty()) {
decryptionCode = loadDecryptionCode(playerUrl); decryptionCode = loadDecryptionCode(playerUrl);
} }
if (availableSubtitles.isEmpty()) {
availableSubtitles.addAll(getAvailableSubtitles(getId()));
}
} }
private JsonObject getPlayerConfig(String pageContent) throws ParsingException { private JsonObject getPlayerConfig(String pageContent) throws ParsingException {
@ -732,6 +709,25 @@ public class YoutubeStreamExtractor extends StreamExtractor {
return result == null ? "" : result.toString(); return result == null ? "" : result.toString();
} }
private List<Subtitles> getAvailableSubtitles(final String id) throws SubtitlesException {
try {
final String listingUrl = getVideoSubtitlesListingUrl(id);
final String pageContent = NewPipe.getDownloader().download(listingUrl);
final Document listing = Jsoup.parse(pageContent, listingUrl);
final Elements tracks = listing.select("track");
List<Subtitles> subtitles = new ArrayList<>(tracks.size() * 5);
for (final Element track : tracks) {
final String languageCode = track.attr("lang_code");
subtitles.add(getVideoSubtitlesUrl(id, languageCode, SubtitlesFormat.TTML));
subtitles.add(getVideoSubtitlesUrl(id, languageCode, SubtitlesFormat.VTT));
// todo: add transcripts, they are currently omitted since they are incompatible with ExoPlayer
}
return subtitles;
} catch (IOException | ReCaptchaException e) {
throw new SubtitlesException("Unable to download subtitles listing", e);
}
}
/*////////////////////////////////////////////////////////////////////////// /*//////////////////////////////////////////////////////////////////////////
// Data Class // Data Class
//////////////////////////////////////////////////////////////////////////*/ //////////////////////////////////////////////////////////////////////////*/
@ -751,12 +747,25 @@ public class YoutubeStreamExtractor extends StreamExtractor {
//////////////////////////////////////////////////////////////////////////*/ //////////////////////////////////////////////////////////////////////////*/
@Nonnull @Nonnull
private String getVideoInfoUrl(final String id, final String sts) { private static String getVideoInfoUrl(final String id, final String sts) {
return "https://www.youtube.com/get_video_info?" + "video_id=" + id + return "https://www.youtube.com/get_video_info?" + "video_id=" + id +
"&eurl=https://youtube.googleapis.com/v/" + id + "&eurl=https://youtube.googleapis.com/v/" + id +
"&sts=" + sts + "&ps=default&gl=US&hl=en"; "&sts=" + sts + "&ps=default&gl=US&hl=en";
} }
@Nonnull
private static String getVideoSubtitlesListingUrl(final String id) {
return "https://video.google.com/timedtext?type=list&v=" + id;
}
@Nonnull
private static Subtitles getVideoSubtitlesUrl(final String id, final String locale, final SubtitlesFormat format) {
final String url = "https://www.youtube.com/api/timedtext?lang=" + locale +
"&fmt=" + format.getExtension() + "&name=&v=" + id;
// These are all non-generated
return new Subtitles(format, locale, url, false);
}
private Map<String, ItagItem> getItags(String encodedUrlMapKey, ItagItem.ItagType itagTypeWanted) throws ParsingException { private Map<String, ItagItem> getItags(String encodedUrlMapKey, ItagItem.ItagType itagTypeWanted) throws ParsingException {
Map<String, ItagItem> urlAndItags = new LinkedHashMap<>(); Map<String, ItagItem> urlAndItags = new LinkedHashMap<>();

View File

@ -132,10 +132,9 @@ public abstract class StreamExtractor extends Extractor {
public abstract List<VideoStream> getVideoStreams() throws IOException, ExtractionException; public abstract List<VideoStream> getVideoStreams() throws IOException, ExtractionException;
public abstract List<VideoStream> getVideoOnlyStreams() throws IOException, ExtractionException; public abstract List<VideoStream> getVideoOnlyStreams() throws IOException, ExtractionException;
@Nullable @Nonnull
public abstract List<Subtitles> getSubtitlesDefault() throws IOException, ExtractionException; public abstract List<Subtitles> getSubtitlesDefault() throws IOException, ExtractionException;
@Nonnull
@Nullable
public abstract List<Subtitles> getSubtitles(SubtitlesFormat format) throws IOException, ExtractionException; public abstract List<Subtitles> getSubtitles(SubtitlesFormat format) throws IOException, ExtractionException;
public abstract StreamType getStreamType() throws ParsingException; public abstract StreamType getStreamType() throws ParsingException;

View File

@ -138,6 +138,10 @@ public class StreamInfo extends Info {
return start_position; return start_position;
} }
public List<Subtitles> getSubtitles() {
return subtitles;
}
public void setStreamType(StreamType stream_type) { public void setStreamType(StreamType stream_type) {
this.stream_type = stream_type; this.stream_type = stream_type;
} }
@ -214,6 +218,10 @@ public class StreamInfo extends Info {
this.start_position = start_position; this.start_position = start_position;
} }
public void setSubtitles(List<Subtitles> subtitles) {
this.subtitles = subtitles;
}
public static class StreamExtractException extends ExtractionException { public static class StreamExtractException extends ExtractionException {
StreamExtractException(String message) { StreamExtractException(String message) {
super(message); super(message);
@ -313,6 +321,12 @@ public class StreamInfo extends Info {
streamInfo.addError(new ExtractionException("Couldn't get video only streams", e)); streamInfo.addError(new ExtractionException("Couldn't get video only streams", e));
} }
try {
streamInfo.setSubtitles(extractor.getSubtitlesDefault());
} catch (Exception e) {
streamInfo.addError(new ExtractionException("Couldn't get subtitles", e));
}
// Lists can be null if a exception was thrown during extraction // Lists can be null if a exception was thrown during extraction
if (streamInfo.getVideoStreams() == null) streamInfo.setVideoStreams(Collections.<VideoStream>emptyList()); if (streamInfo.getVideoStreams() == null) streamInfo.setVideoStreams(Collections.<VideoStream>emptyList());
if (streamInfo.getVideoOnlyStreams()== null) streamInfo.setVideoOnlyStreams(Collections.<VideoStream>emptyList()); if (streamInfo.getVideoOnlyStreams()== null) streamInfo.setVideoOnlyStreams(Collections.<VideoStream>emptyList());
@ -444,4 +458,6 @@ public class StreamInfo extends Info {
public List<InfoItem> related_streams; public List<InfoItem> related_streams;
//in seconds. some metadata is not passed using a StreamInfo object! //in seconds. some metadata is not passed using a StreamInfo object!
public long start_position = 0; public long start_position = 0;
public List<Subtitles> subtitles;
} }

View File

@ -3,7 +3,6 @@ package org.schabi.newpipe.extractor.services.youtube;
import org.junit.BeforeClass; import org.junit.BeforeClass;
import org.junit.Test; import org.junit.Test;
import org.schabi.newpipe.Downloader; import org.schabi.newpipe.Downloader;
import org.schabi.newpipe.extractor.ExtractorAsserts;
import org.schabi.newpipe.extractor.NewPipe; import org.schabi.newpipe.extractor.NewPipe;
import org.schabi.newpipe.extractor.exceptions.ExtractionException; import org.schabi.newpipe.extractor.exceptions.ExtractionException;
import org.schabi.newpipe.extractor.exceptions.ParsingException; import org.schabi.newpipe.extractor.exceptions.ParsingException;
@ -148,12 +147,12 @@ public class YoutubeStreamExtractorDefaultTest {
@Test @Test
public void testGetSubtitlesListDefault() throws IOException, ExtractionException { public void testGetSubtitlesListDefault() throws IOException, ExtractionException {
// Video (/view?v=YQHsXMglC9A) set in the setUp() method has no captions => null // Video (/view?v=YQHsXMglC9A) set in the setUp() method has no captions => null
assertTrue(extractor.getSubtitlesDefault() == null); assertTrue(extractor.getSubtitlesDefault().isEmpty());
} }
@Test @Test
public void testGetSubtitlesList() throws IOException, ExtractionException { public void testGetSubtitlesList() throws IOException, ExtractionException {
// Video (/view?v=YQHsXMglC9A) set in the setUp() method has no captions => null // Video (/view?v=YQHsXMglC9A) set in the setUp() method has no captions => null
assertTrue(extractor.getSubtitles(SubtitlesFormat.VTT) == null); assertTrue(extractor.getSubtitles(SubtitlesFormat.VTT).isEmpty());
} }
} }

View File

@ -122,12 +122,12 @@ public class YoutubeStreamExtractorRestrictedTest {
@Test @Test
public void testGetSubtitlesListDefault() throws IOException, ExtractionException { public void testGetSubtitlesListDefault() throws IOException, ExtractionException {
// Video (/view?v=YQHsXMglC9A) set in the setUp() method has no captions => null // Video (/view?v=YQHsXMglC9A) set in the setUp() method has no captions => null
assertNull(extractor.getSubtitlesDefault()); assertTrue(extractor.getSubtitlesDefault().isEmpty());
} }
@Test @Test
public void testGetSubtitlesList() throws IOException, ExtractionException { public void testGetSubtitlesList() throws IOException, ExtractionException {
// Video (/view?v=YQHsXMglC9A) set in the setUp() method has no captions => null // Video (/view?v=YQHsXMglC9A) set in the setUp() method has no captions => null
assertNull(extractor.getSubtitles(SubtitlesFormat.VTT)); assertTrue(extractor.getSubtitles(SubtitlesFormat.VTT).isEmpty());
} }
} }