Merge pull request #1000 from AudricV/yt-streaminfoitemextractor-improvements
[YouTube] Improve YoutubeStreamInfoItemExtractor
This commit is contained in:
commit
88e07e555d
|
@ -403,12 +403,7 @@ public class YoutubePlaylistExtractor extends PlaylistExtractor {
|
||||||
.map(JsonObject.class::cast)
|
.map(JsonObject.class::cast)
|
||||||
.filter(video -> video.has(PLAYLIST_VIDEO_RENDERER))
|
.filter(video -> video.has(PLAYLIST_VIDEO_RENDERER))
|
||||||
.map(video -> new YoutubeStreamInfoItemExtractor(
|
.map(video -> new YoutubeStreamInfoItemExtractor(
|
||||||
video.getObject(PLAYLIST_VIDEO_RENDERER), timeAgoParser) {
|
video.getObject(PLAYLIST_VIDEO_RENDERER), timeAgoParser))
|
||||||
@Override
|
|
||||||
public long getViewCount() {
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
})
|
|
||||||
.forEachOrdered(collector::commit);
|
.forEachOrdered(collector::commit);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -10,6 +10,7 @@ import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeStreamLi
|
||||||
import org.schabi.newpipe.extractor.stream.StreamInfoItemExtractor;
|
import org.schabi.newpipe.extractor.stream.StreamInfoItemExtractor;
|
||||||
import org.schabi.newpipe.extractor.stream.StreamType;
|
import org.schabi.newpipe.extractor.stream.StreamType;
|
||||||
import org.schabi.newpipe.extractor.utils.JsonUtils;
|
import org.schabi.newpipe.extractor.utils.JsonUtils;
|
||||||
|
import org.schabi.newpipe.extractor.utils.Parser;
|
||||||
import org.schabi.newpipe.extractor.utils.Utils;
|
import org.schabi.newpipe.extractor.utils.Utils;
|
||||||
|
|
||||||
import javax.annotation.Nullable;
|
import javax.annotation.Nullable;
|
||||||
|
@ -45,6 +46,7 @@ public class YoutubeStreamInfoItemExtractor implements StreamInfoItemExtractor {
|
||||||
private final JsonObject videoInfo;
|
private final JsonObject videoInfo;
|
||||||
private final TimeAgoParser timeAgoParser;
|
private final TimeAgoParser timeAgoParser;
|
||||||
private StreamType cachedStreamType;
|
private StreamType cachedStreamType;
|
||||||
|
private Boolean isPremiere;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Creates an extractor of StreamInfoItems from a YouTube page.
|
* Creates an extractor of StreamInfoItems from a YouTube page.
|
||||||
|
@ -66,6 +68,10 @@ public class YoutubeStreamInfoItemExtractor implements StreamInfoItemExtractor {
|
||||||
|
|
||||||
final JsonArray badges = videoInfo.getArray("badges");
|
final JsonArray badges = videoInfo.getArray("badges");
|
||||||
for (final Object badge : badges) {
|
for (final Object badge : badges) {
|
||||||
|
if (!(badge instanceof JsonObject)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
final JsonObject badgeRenderer
|
final JsonObject badgeRenderer
|
||||||
= ((JsonObject) badge).getObject("metadataBadgeRenderer");
|
= ((JsonObject) badge).getObject("metadataBadgeRenderer");
|
||||||
if (badgeRenderer.getString("style", "").equals("BADGE_STYLE_TYPE_LIVE_NOW")
|
if (badgeRenderer.getString("style", "").equals("BADGE_STYLE_TYPE_LIVE_NOW")
|
||||||
|
@ -76,6 +82,10 @@ public class YoutubeStreamInfoItemExtractor implements StreamInfoItemExtractor {
|
||||||
}
|
}
|
||||||
|
|
||||||
for (final Object overlay : videoInfo.getArray("thumbnailOverlays")) {
|
for (final Object overlay : videoInfo.getArray("thumbnailOverlays")) {
|
||||||
|
if (!(overlay instanceof JsonObject)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
final String style = ((JsonObject) overlay)
|
final String style = ((JsonObject) overlay)
|
||||||
.getObject("thumbnailOverlayTimeStatusRenderer")
|
.getObject("thumbnailOverlayTimeStatusRenderer")
|
||||||
.getString("style", "");
|
.getString("style", "");
|
||||||
|
@ -116,30 +126,44 @@ public class YoutubeStreamInfoItemExtractor implements StreamInfoItemExtractor {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public long getDuration() throws ParsingException {
|
public long getDuration() throws ParsingException {
|
||||||
if (getStreamType() == StreamType.LIVE_STREAM || isPremiere()) {
|
if (getStreamType() == StreamType.LIVE_STREAM) {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
String duration = getTextFromObject(videoInfo.getObject("lengthText"));
|
String duration = getTextFromObject(videoInfo.getObject("lengthText"));
|
||||||
|
|
||||||
if (isNullOrEmpty(duration)) {
|
if (isNullOrEmpty(duration)) {
|
||||||
for (final Object thumbnailOverlay : videoInfo.getArray("thumbnailOverlays")) {
|
// Available in playlists for videos
|
||||||
if (((JsonObject) thumbnailOverlay).has("thumbnailOverlayTimeStatusRenderer")) {
|
duration = videoInfo.getString("lengthSeconds");
|
||||||
duration = getTextFromObject(((JsonObject) thumbnailOverlay)
|
|
||||||
.getObject("thumbnailOverlayTimeStatusRenderer").getObject("text"));
|
if (isNullOrEmpty(duration)) {
|
||||||
|
final JsonObject timeOverlay = videoInfo.getArray("thumbnailOverlays")
|
||||||
|
.stream()
|
||||||
|
.filter(JsonObject.class::isInstance)
|
||||||
|
.map(JsonObject.class::cast)
|
||||||
|
.filter(thumbnailOverlay ->
|
||||||
|
thumbnailOverlay.has("thumbnailOverlayTimeStatusRenderer"))
|
||||||
|
.findFirst()
|
||||||
|
.orElse(null);
|
||||||
|
|
||||||
|
if (timeOverlay != null) {
|
||||||
|
duration = getTextFromObject(
|
||||||
|
timeOverlay.getObject("thumbnailOverlayTimeStatusRenderer")
|
||||||
|
.getObject("text"));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (isNullOrEmpty(duration)) {
|
if (isNullOrEmpty(duration)) {
|
||||||
|
if (isPremiere()) {
|
||||||
|
// Premieres can be livestreams, so the duration is not available in this
|
||||||
|
// case
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
throw new ParsingException("Could not get duration");
|
throw new ParsingException("Could not get duration");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewPipe#8034 - YT returns not a correct duration for "YT shorts" videos
|
|
||||||
if ("SHORTS".equalsIgnoreCase(duration)) {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
return YoutubeParsingHelper.parseDurationString(duration);
|
return YoutubeParsingHelper.parseDurationString(duration);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -187,7 +211,6 @@ public class YoutubeStreamInfoItemExtractor implements StreamInfoItemExtractor {
|
||||||
@Nullable
|
@Nullable
|
||||||
@Override
|
@Override
|
||||||
public String getUploaderAvatarUrl() throws ParsingException {
|
public String getUploaderAvatarUrl() throws ParsingException {
|
||||||
|
|
||||||
if (videoInfo.has("channelThumbnailSupportedRenderers")) {
|
if (videoInfo.has("channelThumbnailSupportedRenderers")) {
|
||||||
return JsonUtils.getArray(videoInfo, "channelThumbnailSupportedRenderers"
|
return JsonUtils.getArray(videoInfo, "channelThumbnailSupportedRenderers"
|
||||||
+ ".channelThumbnailWithLinkRenderer.thumbnail.thumbnails")
|
+ ".channelThumbnailWithLinkRenderer.thumbnail.thumbnails")
|
||||||
|
@ -218,13 +241,19 @@ public class YoutubeStreamInfoItemExtractor implements StreamInfoItemExtractor {
|
||||||
return DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm").format(getDateFromPremiere());
|
return DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm").format(getDateFromPremiere());
|
||||||
}
|
}
|
||||||
|
|
||||||
final String publishedTimeText
|
String publishedTimeText = getTextFromObject(videoInfo.getObject("publishedTimeText"));
|
||||||
= getTextFromObject(videoInfo.getObject("publishedTimeText"));
|
|
||||||
if (publishedTimeText != null && !publishedTimeText.isEmpty()) {
|
if (isNullOrEmpty(publishedTimeText) && videoInfo.has("videoInfo")) {
|
||||||
return publishedTimeText;
|
/*
|
||||||
|
Returned in playlists, in the form: view count separator upload date
|
||||||
|
*/
|
||||||
|
publishedTimeText = videoInfo.getObject("videoInfo")
|
||||||
|
.getArray("runs")
|
||||||
|
.getObject(2)
|
||||||
|
.getString("text");
|
||||||
}
|
}
|
||||||
|
|
||||||
return null;
|
return isNullOrEmpty(publishedTimeText) ? null : publishedTimeText;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Nullable
|
@Nullable
|
||||||
|
@ -251,28 +280,69 @@ public class YoutubeStreamInfoItemExtractor implements StreamInfoItemExtractor {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public long getViewCount() throws ParsingException {
|
public long getViewCount() throws ParsingException {
|
||||||
try {
|
if (videoInfo.has("topStandaloneBadge") || isPremium() || isPremiere()) {
|
||||||
if (videoInfo.has("topStandaloneBadge") || isPremium()) {
|
return -1;
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!videoInfo.has("viewCountText")) {
|
|
||||||
// This object is null when a video has its views hidden.
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
final String viewCount = getTextFromObject(videoInfo.getObject("viewCountText"));
|
|
||||||
|
|
||||||
if (viewCount.toLowerCase().contains("no views")) {
|
|
||||||
return 0;
|
|
||||||
} else if (viewCount.toLowerCase().contains("recommended")) {
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
return Long.parseLong(Utils.removeNonDigitCharacters(viewCount));
|
|
||||||
} catch (final Exception e) {
|
|
||||||
throw new ParsingException("Could not get view count", e);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
final String viewCount = getTextFromObject(videoInfo.getObject("viewCountText"));
|
||||||
|
|
||||||
|
if (!isNullOrEmpty(viewCount)) {
|
||||||
|
try {
|
||||||
|
// These approaches are language dependent
|
||||||
|
if (viewCount.toLowerCase().contains("no views")) {
|
||||||
|
return 0;
|
||||||
|
} else if (viewCount.toLowerCase().contains("recommended")) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return Long.parseLong(Utils.removeNonDigitCharacters(viewCount));
|
||||||
|
} catch (final Exception ignored) {
|
||||||
|
// Ignore all exceptions, as we can fallback to accessibility data
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Try parsing the real view count from accessibility data, if that's not a running
|
||||||
|
// livestream (the view count is returned and not the count of people watching currently
|
||||||
|
// the livestream)
|
||||||
|
if (getStreamType() != StreamType.LIVE_STREAM) {
|
||||||
|
try {
|
||||||
|
return Long.parseLong(Utils.removeNonDigitCharacters(
|
||||||
|
// This approach is language dependent
|
||||||
|
Parser.matchGroup1("([\\d,]+) views$",
|
||||||
|
videoInfo.getObject("title")
|
||||||
|
.getObject("accessibility")
|
||||||
|
.getObject("accessibilityData")
|
||||||
|
.getString("label", ""))));
|
||||||
|
} catch (final Exception ignored) {
|
||||||
|
// Ignore all exceptions, as the view count can be hidden by creators, and so
|
||||||
|
// cannot be found in this case
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fallback to a short view count, always used for livestreams (see why above)
|
||||||
|
try {
|
||||||
|
// Returned in playlists, in the form: view count separator upload date
|
||||||
|
if (videoInfo.has("videoInfo")) {
|
||||||
|
return Utils.mixedNumberWordToLong(videoInfo.getObject("videoInfo")
|
||||||
|
.getArray("runs")
|
||||||
|
.getObject(0)
|
||||||
|
.getString("text"));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Returned everywhere but in playlists, used by the website to show view counts
|
||||||
|
if (videoInfo.has("shortViewCountText")) {
|
||||||
|
return Utils.mixedNumberWordToLong(videoInfo.getObject("shortViewCountText")
|
||||||
|
.getArray("runs")
|
||||||
|
.getObject(0)
|
||||||
|
.getString("text"));
|
||||||
|
}
|
||||||
|
} catch (final Exception ignored) {
|
||||||
|
// Ignore all exceptions, as the view count can be hidden by creators, and so cannot be
|
||||||
|
// found in this case
|
||||||
|
}
|
||||||
|
|
||||||
|
// No view count extracted: return -1, as the view count can be hidden by creators on videos
|
||||||
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -292,7 +362,10 @@ public class YoutubeStreamInfoItemExtractor implements StreamInfoItemExtractor {
|
||||||
}
|
}
|
||||||
|
|
||||||
private boolean isPremiere() {
|
private boolean isPremiere() {
|
||||||
return videoInfo.has("upcomingEventData");
|
if (isPremiere == null) {
|
||||||
|
isPremiere = videoInfo.has("upcomingEventData");
|
||||||
|
}
|
||||||
|
return isPremiere;
|
||||||
}
|
}
|
||||||
|
|
||||||
private OffsetDateTime getDateFromPremiere() throws ParsingException {
|
private OffsetDateTime getDateFromPremiere() throws ParsingException {
|
||||||
|
|
Loading…
Reference in New Issue