[YouTube] Optimize extracting auto-translated captions
Faster and ordered: captions provided by the user are at the beginning of the list, auto-translated captions are at the end
This commit is contained in:
parent
ff030ad297
commit
30a4b3617d
|
@ -670,52 +670,72 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
||||||
assertPageFetched();
|
assertPageFetched();
|
||||||
|
|
||||||
// We cannot store the subtitles list because the media format may change
|
// We cannot store the subtitles list because the media format may change
|
||||||
final List<SubtitlesStream> subtitlesToReturn = new ArrayList<>();
|
final List<SubtitlesStream> subtitles = new ArrayList<>();
|
||||||
|
final List<SubtitlesStream> autoTranslatedSubtitles = new ArrayList<>();
|
||||||
final JsonObject renderer = playerResponse.getObject("captions")
|
final JsonObject renderer = playerResponse.getObject("captions")
|
||||||
.getObject("playerCaptionsTracklistRenderer");
|
.getObject("playerCaptionsTracklistRenderer");
|
||||||
final JsonArray captionsArray = renderer.getArray("captionTracks");
|
final JsonArray captionsArray = renderer.getArray("captionTracks");
|
||||||
|
|
||||||
|
// Generate list of languages available for auto-translations
|
||||||
|
final List<String> translationLanguages;
|
||||||
|
if (renderer.has("translationLanguages")) {
|
||||||
|
translationLanguages = renderer.getArray("translationLanguages")
|
||||||
|
.stream()
|
||||||
|
.map(JsonObject.class::cast)
|
||||||
|
.map(lang -> lang.getString("languageCode"))
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
} else {
|
||||||
|
translationLanguages = Collections.emptyList();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add subtitles
|
||||||
for (int i = 0; i < captionsArray.size(); i++) {
|
for (int i = 0; i < captionsArray.size(); i++) {
|
||||||
final JsonObject caption = captionsArray.getObject(i);
|
final JsonObject caption = captionsArray.getObject(i);
|
||||||
final String languageCode = caption.getString("languageCode");
|
final String languageCode = caption.getString("languageCode");
|
||||||
final String baseUrl = caption.getString("baseUrl");
|
final String baseUrl = caption.getString("baseUrl");
|
||||||
final String vssId = caption.getString("vssId");
|
final String vssId = caption.getString("vssId");
|
||||||
|
|
||||||
if (languageCode != null && baseUrl != null && vssId != null) {
|
if (languageCode == null || baseUrl == null || vssId == null) {
|
||||||
final boolean isAutoGenerated = vssId.startsWith("a.");
|
continue;
|
||||||
final String cleanUrl = baseUrl
|
}
|
||||||
// Remove preexisting format if exists
|
|
||||||
.replaceAll("&fmt=[^&]*", "")
|
|
||||||
// Remove translation language
|
|
||||||
.replaceAll("&tlang=[^&]*", "");
|
|
||||||
|
|
||||||
subtitlesToReturn.add(new SubtitlesStream.Builder()
|
final boolean isAutoGenerated = vssId.startsWith("a.");
|
||||||
.setContent(cleanUrl + "&fmt=" + format.getSuffix(), true)
|
final String cleanUrl = baseUrl
|
||||||
.setMediaFormat(format)
|
// Remove preexisting format if exists
|
||||||
.setLanguageCode(languageCode)
|
.replaceAll("&fmt=[^&]*", "")
|
||||||
.setAutoGenerated(isAutoGenerated)
|
// Remove translation language
|
||||||
.setAutoTranslated(false)
|
.replaceAll("&tlang=[^&]*", "");
|
||||||
.build());
|
|
||||||
if (i == 0 && caption.getBoolean("isTranslatable")
|
// add base subtitles
|
||||||
&& renderer.has("translationLanguages")) {
|
subtitles.add(new SubtitlesStream.Builder()
|
||||||
final JsonArray languages = renderer.getArray("translationLanguages");
|
.setContent(cleanUrl + "&fmt=" + format.getSuffix(), true)
|
||||||
for (int j = 0; j < languages.size(); j++) {
|
.setMediaFormat(format)
|
||||||
final JsonObject lang = languages.getObject(j);
|
.setLanguageCode(languageCode)
|
||||||
final String tLanguageCode = lang.getString("languageCode");
|
.setAutoGenerated(isAutoGenerated)
|
||||||
subtitlesToReturn.add(new SubtitlesStream.Builder()
|
.setAutoTranslated(false)
|
||||||
.setContent(cleanUrl + "&fmt=" + format.getSuffix()
|
.build());
|
||||||
+ "&tlang=" + tLanguageCode, true)
|
|
||||||
.setMediaFormat(format)
|
// add auto-translations of this subtitle if available
|
||||||
.setLanguageCode(tLanguageCode)
|
if (caption.getBoolean("isTranslatable")) {
|
||||||
.setAutoGenerated(isAutoGenerated)
|
for (final String tLanguageCode : translationLanguages) {
|
||||||
.setAutoTranslated(true)
|
autoTranslatedSubtitles.add(new SubtitlesStream.Builder()
|
||||||
.build());
|
.setContent(cleanUrl + "&fmt=" + format.getSuffix()
|
||||||
}
|
+ "&tlang=" + tLanguageCode, true)
|
||||||
|
.setMediaFormat(format)
|
||||||
|
.setLanguageCode(tLanguageCode)
|
||||||
|
.setAutoGenerated(true)
|
||||||
|
.setAutoTranslated(true)
|
||||||
|
.setBaseLanguageCode(languageCode)
|
||||||
|
.build());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return subtitlesToReturn;
|
// add auto-translations at the end for better sorting
|
||||||
|
subtitles.addAll(autoTranslatedSubtitles);
|
||||||
|
|
||||||
|
return subtitles;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -12,6 +12,8 @@ import javax.annotation.Nullable;
|
||||||
|
|
||||||
public final class SubtitlesStream extends Stream {
|
public final class SubtitlesStream extends Stream {
|
||||||
private final MediaFormat format;
|
private final MediaFormat format;
|
||||||
|
@Nullable
|
||||||
|
private final Locale baseLocale;
|
||||||
private final Locale locale;
|
private final Locale locale;
|
||||||
private final boolean autoGenerated;
|
private final boolean autoGenerated;
|
||||||
private final boolean autoTranslated;
|
private final boolean autoTranslated;
|
||||||
|
@ -31,6 +33,8 @@ public final class SubtitlesStream extends Stream {
|
||||||
@Nullable
|
@Nullable
|
||||||
private String manifestUrl;
|
private String manifestUrl;
|
||||||
private String languageCode;
|
private String languageCode;
|
||||||
|
@Nullable
|
||||||
|
private String baseLanguageCode;
|
||||||
// Use of the Boolean class instead of the primitive type needed for setter call check
|
// Use of the Boolean class instead of the primitive type needed for setter call check
|
||||||
private Boolean autoGenerated;
|
private Boolean autoGenerated;
|
||||||
private Boolean autoTranslated;
|
private Boolean autoTranslated;
|
||||||
|
@ -142,6 +146,18 @@ public final class SubtitlesStream extends Stream {
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Set the language code of the base language used to auto-translate
|
||||||
|
* the {@link SubtitlesStream} to the current language code.
|
||||||
|
*
|
||||||
|
* @param baseLanguageCode the language code of the {@link SubtitlesStream}
|
||||||
|
* @return this {@link Builder} instance
|
||||||
|
*/
|
||||||
|
public Builder setBaseLanguageCode(@Nullable final String baseLanguageCode) {
|
||||||
|
this.baseLanguageCode = baseLanguageCode;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Set whether the subtitles have been auto-generated by the streaming service.
|
* Set whether the subtitles have been auto-generated by the streaming service.
|
||||||
*
|
*
|
||||||
|
@ -222,26 +238,29 @@ public final class SubtitlesStream extends Stream {
|
||||||
}
|
}
|
||||||
|
|
||||||
return new SubtitlesStream(id, content, isUrl, mediaFormat, deliveryMethod,
|
return new SubtitlesStream(id, content, isUrl, mediaFormat, deliveryMethod,
|
||||||
languageCode, autoGenerated, autoTranslated, manifestUrl);
|
languageCode, autoGenerated, autoTranslated, baseLanguageCode, manifestUrl);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a new subtitles stream.
|
* Create a new subtitles stream.
|
||||||
*
|
*
|
||||||
* @param id the identifier which uniquely identifies the stream, e.g. for YouTube
|
* @param id the identifier which uniquely identifies the stream, e.g. for YouTube
|
||||||
* this would be the itag
|
* this would be the itag
|
||||||
* @param content the content or the URL of the stream, depending on whether isUrl is
|
* @param content the content or the URL of the stream, depending on whether isUrl is
|
||||||
* true
|
* true
|
||||||
* @param isUrl whether content is the URL or the actual content of e.g. a DASH
|
* @param isUrl whether content is the URL or the actual content of e.g. a DASH
|
||||||
* manifest
|
* manifest
|
||||||
* @param mediaFormat the {@link MediaFormat} used by the stream
|
* @param mediaFormat the {@link MediaFormat} used by the stream
|
||||||
* @param deliveryMethod the {@link DeliveryMethod} of the stream
|
* @param deliveryMethod the {@link DeliveryMethod} of the stream
|
||||||
* @param languageCode the language code of the stream
|
* @param languageCode the language code of the stream
|
||||||
* @param autoGenerated whether the subtitles are auto-generated by the streaming service
|
* @param autoGenerated whether the subtitles are auto-generated by the streaming service
|
||||||
* @param autoTranslated whether the subtitles are auto-translated by the streaming service
|
* @param autoTranslated whether the subtitles are auto-translated by the streaming service
|
||||||
* @param manifestUrl the URL of the manifest this stream comes from (if applicable,
|
* @param baseLanguageCode the language code of the base language used to translate
|
||||||
* otherwise null)
|
* the subtitles to the current language
|
||||||
|
* or null if the subtitles are not auto-translated
|
||||||
|
* @param manifestUrl the URL of the manifest this stream comes from (if applicable,
|
||||||
|
* otherwise null)
|
||||||
*/
|
*/
|
||||||
@SuppressWarnings("checkstyle:ParameterNumber")
|
@SuppressWarnings("checkstyle:ParameterNumber")
|
||||||
private SubtitlesStream(@Nonnull final String id,
|
private SubtitlesStream(@Nonnull final String id,
|
||||||
|
@ -252,6 +271,7 @@ public final class SubtitlesStream extends Stream {
|
||||||
@Nonnull final String languageCode,
|
@Nonnull final String languageCode,
|
||||||
final boolean autoGenerated,
|
final boolean autoGenerated,
|
||||||
final boolean autoTranslated,
|
final boolean autoTranslated,
|
||||||
|
@Nullable final String baseLanguageCode,
|
||||||
@Nullable final String manifestUrl) throws ParsingException {
|
@Nullable final String manifestUrl) throws ParsingException {
|
||||||
super(id, content, isUrl, mediaFormat, deliveryMethod, manifestUrl);
|
super(id, content, isUrl, mediaFormat, deliveryMethod, manifestUrl);
|
||||||
this.locale = LocaleCompat.forLanguageTag(languageCode).orElseThrow(
|
this.locale = LocaleCompat.forLanguageTag(languageCode).orElseThrow(
|
||||||
|
@ -261,6 +281,13 @@ public final class SubtitlesStream extends Stream {
|
||||||
this.format = mediaFormat;
|
this.format = mediaFormat;
|
||||||
this.autoGenerated = autoGenerated;
|
this.autoGenerated = autoGenerated;
|
||||||
this.autoTranslated = autoTranslated;
|
this.autoTranslated = autoTranslated;
|
||||||
|
if (baseLanguageCode == null) {
|
||||||
|
this.baseLocale = null;
|
||||||
|
} else {
|
||||||
|
this.baseLocale = LocaleCompat.forLanguageTag(baseLanguageCode).orElseThrow(
|
||||||
|
() -> new ParsingException(
|
||||||
|
"not a valid locale language code: " + baseLanguageCode));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -337,6 +364,37 @@ public final class SubtitlesStream extends Stream {
|
||||||
return locale;
|
return locale;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the {@link Locale baseLocale} which was used to automatically translated the subtitles
|
||||||
|
* into the current {@link #locale}.
|
||||||
|
*
|
||||||
|
* @return the {@link Locale baseLocale} for the subtitle translation
|
||||||
|
* or {@code null} if the subtitle is not auto-translated
|
||||||
|
*/
|
||||||
|
@Nullable
|
||||||
|
public Locale getBaseLocale() {
|
||||||
|
return baseLocale;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the display base language name of the subtitles.
|
||||||
|
*
|
||||||
|
* @return the display language name of the subtitles
|
||||||
|
*/
|
||||||
|
public String getDisplayBaseLanguageName() {
|
||||||
|
return locale.getDisplayName(locale);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the language tag of the subtitles.
|
||||||
|
*
|
||||||
|
* @return the language tag of the subtitles
|
||||||
|
*/
|
||||||
|
public String getBaseLanguageTag() {
|
||||||
|
return code;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* No subtitles which are currently extracted use an {@link ItagItem}, so {@code null} is
|
* No subtitles which are currently extracted use an {@link ItagItem}, so {@code null} is
|
||||||
* returned by this method.
|
* returned by this method.
|
||||||
|
@ -348,4 +406,16 @@ public final class SubtitlesStream extends Stream {
|
||||||
public ItagItem getItagItem() {
|
public ItagItem getItagItem() {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return "SubtitlesStream{"
|
||||||
|
+ "format=" + format
|
||||||
|
+ ", baseLocale=" + baseLocale
|
||||||
|
+ ", locale=" + locale
|
||||||
|
+ ", autoGenerated=" + autoGenerated
|
||||||
|
+ ", autoTranslated=" + autoTranslated
|
||||||
|
+ ", code='" + code + '\''
|
||||||
|
+ '}';
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
package org.schabi.newpipe.extractor.utils;
|
package org.schabi.newpipe.extractor.utils;
|
||||||
|
|
||||||
|
import javax.annotation.Nonnull;
|
||||||
import java.util.Locale;
|
import java.util.Locale;
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
|
|
||||||
|
@ -16,7 +17,7 @@ public final class LocaleCompat {
|
||||||
|
|
||||||
// Source: The AndroidX LocaleListCompat class's private forLanguageTagCompat() method.
|
// Source: The AndroidX LocaleListCompat class's private forLanguageTagCompat() method.
|
||||||
// Use Locale.forLanguageTag() on Android API level >= 21 / Java instead.
|
// Use Locale.forLanguageTag() on Android API level >= 21 / Java instead.
|
||||||
public static Optional<Locale> forLanguageTag(final String str) {
|
public static Optional<Locale> forLanguageTag(@Nonnull final String str) {
|
||||||
if (str.contains("-")) {
|
if (str.contains("-")) {
|
||||||
final String[] args = str.split("-", -1);
|
final String[] args = str.split("-", -1);
|
||||||
if (args.length > 2) {
|
if (args.length > 2) {
|
||||||
|
|
Loading…
Reference in New Issue