Merge pull request #1000 from AudricV/yt-streaminfoitemextractor-improvements

[YouTube] Improve YoutubeStreamInfoItemExtractor
This commit is contained in:
Tobi 2022-12-11 17:02:29 +01:00 committed by GitHub
commit 88e07e555d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 112 additions and 44 deletions

View File

@ -403,12 +403,7 @@ public class YoutubePlaylistExtractor extends PlaylistExtractor {
.map(JsonObject.class::cast) .map(JsonObject.class::cast)
.filter(video -> video.has(PLAYLIST_VIDEO_RENDERER)) .filter(video -> video.has(PLAYLIST_VIDEO_RENDERER))
.map(video -> new YoutubeStreamInfoItemExtractor( .map(video -> new YoutubeStreamInfoItemExtractor(
video.getObject(PLAYLIST_VIDEO_RENDERER), timeAgoParser) { video.getObject(PLAYLIST_VIDEO_RENDERER), timeAgoParser))
@Override
public long getViewCount() {
return -1;
}
})
.forEachOrdered(collector::commit); .forEachOrdered(collector::commit);
} }

View File

@ -10,6 +10,7 @@ import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeStreamLi
import org.schabi.newpipe.extractor.stream.StreamInfoItemExtractor; import org.schabi.newpipe.extractor.stream.StreamInfoItemExtractor;
import org.schabi.newpipe.extractor.stream.StreamType; import org.schabi.newpipe.extractor.stream.StreamType;
import org.schabi.newpipe.extractor.utils.JsonUtils; import org.schabi.newpipe.extractor.utils.JsonUtils;
import org.schabi.newpipe.extractor.utils.Parser;
import org.schabi.newpipe.extractor.utils.Utils; import org.schabi.newpipe.extractor.utils.Utils;
import javax.annotation.Nullable; import javax.annotation.Nullable;
@ -45,6 +46,7 @@ public class YoutubeStreamInfoItemExtractor implements StreamInfoItemExtractor {
private final JsonObject videoInfo; private final JsonObject videoInfo;
private final TimeAgoParser timeAgoParser; private final TimeAgoParser timeAgoParser;
private StreamType cachedStreamType; private StreamType cachedStreamType;
private Boolean isPremiere;
/** /**
* Creates an extractor of StreamInfoItems from a YouTube page. * Creates an extractor of StreamInfoItems from a YouTube page.
@ -66,6 +68,10 @@ public class YoutubeStreamInfoItemExtractor implements StreamInfoItemExtractor {
final JsonArray badges = videoInfo.getArray("badges"); final JsonArray badges = videoInfo.getArray("badges");
for (final Object badge : badges) { for (final Object badge : badges) {
if (!(badge instanceof JsonObject)) {
continue;
}
final JsonObject badgeRenderer final JsonObject badgeRenderer
= ((JsonObject) badge).getObject("metadataBadgeRenderer"); = ((JsonObject) badge).getObject("metadataBadgeRenderer");
if (badgeRenderer.getString("style", "").equals("BADGE_STYLE_TYPE_LIVE_NOW") if (badgeRenderer.getString("style", "").equals("BADGE_STYLE_TYPE_LIVE_NOW")
@ -76,6 +82,10 @@ public class YoutubeStreamInfoItemExtractor implements StreamInfoItemExtractor {
} }
for (final Object overlay : videoInfo.getArray("thumbnailOverlays")) { for (final Object overlay : videoInfo.getArray("thumbnailOverlays")) {
if (!(overlay instanceof JsonObject)) {
continue;
}
final String style = ((JsonObject) overlay) final String style = ((JsonObject) overlay)
.getObject("thumbnailOverlayTimeStatusRenderer") .getObject("thumbnailOverlayTimeStatusRenderer")
.getString("style", ""); .getString("style", "");
@ -116,30 +126,44 @@ public class YoutubeStreamInfoItemExtractor implements StreamInfoItemExtractor {
@Override @Override
public long getDuration() throws ParsingException { public long getDuration() throws ParsingException {
if (getStreamType() == StreamType.LIVE_STREAM || isPremiere()) { if (getStreamType() == StreamType.LIVE_STREAM) {
return -1; return -1;
} }
String duration = getTextFromObject(videoInfo.getObject("lengthText")); String duration = getTextFromObject(videoInfo.getObject("lengthText"));
if (isNullOrEmpty(duration)) { if (isNullOrEmpty(duration)) {
for (final Object thumbnailOverlay : videoInfo.getArray("thumbnailOverlays")) { // Available in playlists for videos
if (((JsonObject) thumbnailOverlay).has("thumbnailOverlayTimeStatusRenderer")) { duration = videoInfo.getString("lengthSeconds");
duration = getTextFromObject(((JsonObject) thumbnailOverlay)
.getObject("thumbnailOverlayTimeStatusRenderer").getObject("text")); if (isNullOrEmpty(duration)) {
final JsonObject timeOverlay = videoInfo.getArray("thumbnailOverlays")
.stream()
.filter(JsonObject.class::isInstance)
.map(JsonObject.class::cast)
.filter(thumbnailOverlay ->
thumbnailOverlay.has("thumbnailOverlayTimeStatusRenderer"))
.findFirst()
.orElse(null);
if (timeOverlay != null) {
duration = getTextFromObject(
timeOverlay.getObject("thumbnailOverlayTimeStatusRenderer")
.getObject("text"));
} }
} }
if (isNullOrEmpty(duration)) { if (isNullOrEmpty(duration)) {
if (isPremiere()) {
// Premieres can be livestreams, so the duration is not available in this
// case
return -1;
}
throw new ParsingException("Could not get duration"); throw new ParsingException("Could not get duration");
} }
} }
// NewPipe#8034 - YT returns not a correct duration for "YT shorts" videos
if ("SHORTS".equalsIgnoreCase(duration)) {
return 0;
}
return YoutubeParsingHelper.parseDurationString(duration); return YoutubeParsingHelper.parseDurationString(duration);
} }
@ -187,7 +211,6 @@ public class YoutubeStreamInfoItemExtractor implements StreamInfoItemExtractor {
@Nullable @Nullable
@Override @Override
public String getUploaderAvatarUrl() throws ParsingException { public String getUploaderAvatarUrl() throws ParsingException {
if (videoInfo.has("channelThumbnailSupportedRenderers")) { if (videoInfo.has("channelThumbnailSupportedRenderers")) {
return JsonUtils.getArray(videoInfo, "channelThumbnailSupportedRenderers" return JsonUtils.getArray(videoInfo, "channelThumbnailSupportedRenderers"
+ ".channelThumbnailWithLinkRenderer.thumbnail.thumbnails") + ".channelThumbnailWithLinkRenderer.thumbnail.thumbnails")
@ -218,13 +241,19 @@ public class YoutubeStreamInfoItemExtractor implements StreamInfoItemExtractor {
return DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm").format(getDateFromPremiere()); return DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm").format(getDateFromPremiere());
} }
final String publishedTimeText String publishedTimeText = getTextFromObject(videoInfo.getObject("publishedTimeText"));
= getTextFromObject(videoInfo.getObject("publishedTimeText"));
if (publishedTimeText != null && !publishedTimeText.isEmpty()) { if (isNullOrEmpty(publishedTimeText) && videoInfo.has("videoInfo")) {
return publishedTimeText; /*
Returned in playlists, in the form: view count separator upload date
*/
publishedTimeText = videoInfo.getObject("videoInfo")
.getArray("runs")
.getObject(2)
.getString("text");
} }
return null; return isNullOrEmpty(publishedTimeText) ? null : publishedTimeText;
} }
@Nullable @Nullable
@ -251,28 +280,69 @@ public class YoutubeStreamInfoItemExtractor implements StreamInfoItemExtractor {
@Override @Override
public long getViewCount() throws ParsingException { public long getViewCount() throws ParsingException {
try { if (videoInfo.has("topStandaloneBadge") || isPremium() || isPremiere()) {
if (videoInfo.has("topStandaloneBadge") || isPremium()) { return -1;
return -1;
}
if (!videoInfo.has("viewCountText")) {
// This object is null when a video has its views hidden.
return -1;
}
final String viewCount = getTextFromObject(videoInfo.getObject("viewCountText"));
if (viewCount.toLowerCase().contains("no views")) {
return 0;
} else if (viewCount.toLowerCase().contains("recommended")) {
return -1;
}
return Long.parseLong(Utils.removeNonDigitCharacters(viewCount));
} catch (final Exception e) {
throw new ParsingException("Could not get view count", e);
} }
final String viewCount = getTextFromObject(videoInfo.getObject("viewCountText"));
if (!isNullOrEmpty(viewCount)) {
try {
// These approaches are language dependent
if (viewCount.toLowerCase().contains("no views")) {
return 0;
} else if (viewCount.toLowerCase().contains("recommended")) {
return -1;
}
return Long.parseLong(Utils.removeNonDigitCharacters(viewCount));
} catch (final Exception ignored) {
// Ignore all exceptions, as we can fallback to accessibility data
}
}
// Try parsing the real view count from accessibility data, if that's not a running
// livestream (the view count is returned and not the count of people watching currently
// the livestream)
if (getStreamType() != StreamType.LIVE_STREAM) {
try {
return Long.parseLong(Utils.removeNonDigitCharacters(
// This approach is language dependent
Parser.matchGroup1("([\\d,]+) views$",
videoInfo.getObject("title")
.getObject("accessibility")
.getObject("accessibilityData")
.getString("label", ""))));
} catch (final Exception ignored) {
// Ignore all exceptions, as the view count can be hidden by creators, and so
// cannot be found in this case
}
}
// Fallback to a short view count, always used for livestreams (see why above)
try {
// Returned in playlists, in the form: view count separator upload date
if (videoInfo.has("videoInfo")) {
return Utils.mixedNumberWordToLong(videoInfo.getObject("videoInfo")
.getArray("runs")
.getObject(0)
.getString("text"));
}
// Returned everywhere but in playlists, used by the website to show view counts
if (videoInfo.has("shortViewCountText")) {
return Utils.mixedNumberWordToLong(videoInfo.getObject("shortViewCountText")
.getArray("runs")
.getObject(0)
.getString("text"));
}
} catch (final Exception ignored) {
// Ignore all exceptions, as the view count can be hidden by creators, and so cannot be
// found in this case
}
// No view count extracted: return -1, as the view count can be hidden by creators on videos
return -1;
} }
@Override @Override
@ -292,7 +362,10 @@ public class YoutubeStreamInfoItemExtractor implements StreamInfoItemExtractor {
} }
private boolean isPremiere() { private boolean isPremiere() {
return videoInfo.has("upcomingEventData"); if (isPremiere == null) {
isPremiere = videoInfo.has("upcomingEventData");
}
return isPremiere;
} }
private OffsetDateTime getDateFromPremiere() throws ParsingException { private OffsetDateTime getDateFromPremiere() throws ParsingException {