[YouTube] Support pageHeaderRenderer and interactiveTabbedHeaderRenderer channel headers

The addition of this support required to turn the isCarouselHeader boolean into
an enum containing all supported channel headers named HeaderType.

Also assert that the page has been fetched where needed to avoid
NullPointerExceptions when the channel page has been not fetched and remove the
getChannelHeaderJson method in YoutubeChannelExtractor, method for which its
code has been moved to its sole usage after the new headers support changes.
This commit is contained in:
AudricV 2023-08-08 18:57:16 +02:00
parent 7936987955
commit 1852031a0b
No known key found for this signature in database
GPG Key ID: DA92EC7905614198
2 changed files with 209 additions and 59 deletions

View File

@ -219,6 +219,50 @@ public final class YoutubeChannelHelper {
*/ */
public static final class ChannelHeader { public static final class ChannelHeader {
/**
* Types of supported YouTube channel headers.
*/
public enum HeaderType {
/**
* A {@code c4TabbedHeaderRenderer} channel header type.
*
* <p>
* This header is returned on the majority of channels and contains the channel's name,
* its banner and its avatar and its subscriber count in most cases.
* </p>
*/
C4_TABBED,
/**
* An {@code interactiveTabbedHeaderRenderer} channel header type.
*
* <p>
* This header is returned for gaming topic channels, and only contains the channel's
* name, its banner and a poster as its "avatar".
* </p>
*/
INTERACTIVE_TABBED,
/**
* A {@code carouselHeaderRenderer} channel header type.
*
* <p>
* This header returns only the channel's name, its avatar and its subscriber count.
* </p>
*/
CAROUSEL,
/**
* A {@code pageHeaderRenderer} channel header type.
*
* <p>
* This header returns only the channel's name and its avatar.
* </p>
*/
PAGE
}
/** /**
* The channel header JSON response. * The channel header JSON response.
*/ */
@ -226,17 +270,17 @@ public final class YoutubeChannelHelper {
public final JsonObject json; public final JsonObject json;
/** /**
* Whether the header is a {@code carouselHeaderRenderer}. * The type of the channel header.
* *
* <p> * <p>
* See the class documentation for more details. * See the documentation of the {@link HeaderType} class for more details.
* </p> * </p>
*/ */
public final boolean isCarouselHeader; public final HeaderType headerType;
private ChannelHeader(@Nonnull final JsonObject json, final boolean isCarouselHeader) { private ChannelHeader(@Nonnull final JsonObject json, final HeaderType headerType) {
this.json = json; this.json = json;
this.isCarouselHeader = isCarouselHeader; this.headerType = headerType;
} }
} }
@ -254,7 +298,7 @@ public final class YoutubeChannelHelper {
if (header.has("c4TabbedHeaderRenderer")) { if (header.has("c4TabbedHeaderRenderer")) {
return Optional.of(header.getObject("c4TabbedHeaderRenderer")) return Optional.of(header.getObject("c4TabbedHeaderRenderer"))
.map(json -> new ChannelHeader(json, false)); .map(json -> new ChannelHeader(json, ChannelHeader.HeaderType.C4_TABBED));
} else if (header.has("carouselHeaderRenderer")) { } else if (header.has("carouselHeaderRenderer")) {
return header.getObject("carouselHeaderRenderer") return header.getObject("carouselHeaderRenderer")
.getArray("contents") .getArray("contents")
@ -264,7 +308,14 @@ public final class YoutubeChannelHelper {
.filter(item -> item.has("topicChannelDetailsRenderer")) .filter(item -> item.has("topicChannelDetailsRenderer"))
.findFirst() .findFirst()
.map(item -> item.getObject("topicChannelDetailsRenderer")) .map(item -> item.getObject("topicChannelDetailsRenderer"))
.map(json -> new ChannelHeader(json, true)); .map(json -> new ChannelHeader(json, ChannelHeader.HeaderType.CAROUSEL));
} else if (header.has("pageHeaderRenderer")) {
return Optional.of(header.getObject("pageHeaderRenderer"))
.map(json -> new ChannelHeader(json, ChannelHeader.HeaderType.PAGE));
} else if (header.has("interactiveTabbedHeaderRenderer")) {
return Optional.of(header.getObject("interactiveTabbedHeaderRenderer"))
.map(json -> new ChannelHeader(json,
ChannelHeader.HeaderType.INTERACTIVE_TABBED));
} else { } else {
return Optional.empty(); return Optional.empty();
} }

View File

@ -17,6 +17,8 @@ import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler; import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler;
import org.schabi.newpipe.extractor.linkhandler.ReadyChannelTabListLinkHandler; import org.schabi.newpipe.extractor.linkhandler.ReadyChannelTabListLinkHandler;
import org.schabi.newpipe.extractor.services.youtube.YoutubeChannelHelper; import org.schabi.newpipe.extractor.services.youtube.YoutubeChannelHelper;
import org.schabi.newpipe.extractor.services.youtube.YoutubeChannelHelper.ChannelHeader;
import org.schabi.newpipe.extractor.services.youtube.YoutubeChannelHelper.ChannelHeader.HeaderType;
import org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper; import org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper;
import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeChannelTabExtractor.VideosTabExtractor; import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeChannelTabExtractor.VideosTabExtractor;
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeChannelLinkHandlerFactory; import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeChannelLinkHandlerFactory;
@ -59,7 +61,7 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
private JsonObject jsonResponse; private JsonObject jsonResponse;
@SuppressWarnings("OptionalUsedAsFieldOrParameterType") @SuppressWarnings("OptionalUsedAsFieldOrParameterType")
private Optional<YoutubeChannelHelper.ChannelHeader> channelHeader; private Optional<ChannelHeader> channelHeader;
private String channelId; private String channelId;
@ -116,11 +118,6 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
.orElse(null); .orElse(null);
} }
@Nonnull
private Optional<JsonObject> getChannelHeaderJson() {
return channelHeader.map(it -> it.json);
}
@Nonnull @Nonnull
@Override @Override
public String getUrl() throws ParsingException { public String getUrl() throws ParsingException {
@ -134,7 +131,8 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
@Nonnull @Nonnull
@Override @Override
public String getId() throws ParsingException { public String getId() throws ParsingException {
return getChannelHeaderJson() assertPageFetched();
return channelHeader.map(header -> header.json)
.flatMap(header -> Optional.ofNullable(header.getString("channelId")) .flatMap(header -> Optional.ofNullable(header.getString("channelId"))
.or(() -> Optional.ofNullable(header.getObject("navigationEndpoint") .or(() -> Optional.ofNullable(header.getObject("navigationEndpoint")
.getObject("browseEndpoint") .getObject("browseEndpoint")
@ -147,8 +145,13 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
@Nonnull @Nonnull
@Override @Override
public String getName() throws ParsingException { public String getName() throws ParsingException {
assertPageFetched();
if (channelAgeGateRenderer != null) { if (channelAgeGateRenderer != null) {
return channelAgeGateRenderer.getString("channelTitle"); final String title = channelAgeGateRenderer.getString("channelTitle");
if (isNullOrEmpty(title)) {
throw new ParsingException("Could not get channel name");
}
return title;
} }
final String metadataRendererTitle = jsonResponse.getObject("metadata") final String metadataRendererTitle = jsonResponse.getObject("metadata")
@ -158,55 +161,106 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
return metadataRendererTitle; return metadataRendererTitle;
} }
return getChannelHeaderJson().flatMap(header -> { return channelHeader.flatMap(header -> {
final Object title = header.get("title"); final JsonObject channelJson = header.json;
if (title instanceof String) { switch (header.headerType) {
return Optional.of((String) title); case PAGE:
} else if (title instanceof JsonObject) { return Optional.ofNullable(channelJson.getObject("content")
final String headerName = getTextFromObject((JsonObject) title); .getObject("pageHeaderViewModel")
if (!isNullOrEmpty(headerName)) { .getObject("title")
return Optional.of(headerName); .getObject("dynamicTextViewModel")
.getObject("text")
.getString("content", channelJson.getString("pageTitle")));
case CAROUSEL:
case INTERACTIVE_TABBED:
return Optional.ofNullable(getTextFromObject(channelJson.getObject("title")));
default:
return Optional.ofNullable(channelJson.getString("title"));
} }
} })
return Optional.empty(); // The channel name from a microformatDataRenderer may be different from the one displayed,
}).orElseThrow(() -> new ParsingException("Could not get channel name")); // especially for auto-generated channels, depending on the language requested for the
// interface (hl parameter of InnerTube requests' payload)
.or(() -> Optional.ofNullable(jsonResponse.getObject("microformat")
.getObject("microformatDataRenderer")
.getString("title")))
.orElseThrow(() -> new ParsingException("Could not get channel name"));
} }
@Override @Override
public String getAvatarUrl() throws ParsingException { public String getAvatarUrl() throws ParsingException {
final JsonObject avatarJsonObjectContainer; assertPageFetched();
if (channelAgeGateRenderer != null) { if (channelAgeGateRenderer != null) {
avatarJsonObjectContainer = channelAgeGateRenderer; return Optional.ofNullable(channelAgeGateRenderer.getObject("avatar")
} else { .getArray("thumbnails")
avatarJsonObjectContainer = getChannelHeaderJson() .getObject(0)
.getString("url"))
.map(YoutubeParsingHelper::fixThumbnailUrl)
.orElseThrow(() -> new ParsingException("Could not get avatar URL")); .orElseThrow(() -> new ParsingException("Could not get avatar URL"));
} }
return YoutubeParsingHelper.fixThumbnailUrl(avatarJsonObjectContainer.getObject("avatar") return channelHeader.map(header -> {
final HeaderType headerType = header.headerType;
if (headerType == HeaderType.PAGE) {
return Optional.ofNullable(header.json.getObject("content")
.getObject("pageHeaderViewModel")
.getObject("image")
.getObject("contentPreviewImageViewModel")
.getObject("image")
.getArray("sources")
.getObject(0)
.getString("url"))
.map(YoutubeParsingHelper::fixThumbnailUrl)
.orElse(null);
}
if (headerType == HeaderType.INTERACTIVE_TABBED) {
return Optional.ofNullable(header.json.getObject("boxArt")
.getArray("thumbnails") .getArray("thumbnails")
.getObject(0) .getObject(0)
.getString("url")); .getString("url"))
.map(YoutubeParsingHelper::fixThumbnailUrl)
.orElse(null);
}
return Optional.ofNullable(header.json.getObject("avatar")
.getArray("thumbnails")
.getObject(0)
.getString("url"))
.map(YoutubeParsingHelper::fixThumbnailUrl)
.orElse(null);
}).orElseThrow(() -> new ParsingException("Could not get avatar URL"));
} }
@Override @Override
public String getBannerUrl() throws ParsingException { public String getBannerUrl() throws ParsingException {
assertPageFetched();
if (channelAgeGateRenderer != null) { if (channelAgeGateRenderer != null) {
return ""; return null;
} }
return getChannelHeaderJson().flatMap(header -> Optional.ofNullable( if (channelHeader.isPresent()) {
header.getObject("banner") final ChannelHeader header = channelHeader.get();
if (header.headerType == HeaderType.PAGE) {
// No banner is available on pageHeaderRenderer headers
return null;
}
return Optional.ofNullable(header.json.getObject("banner")
.getArray("thumbnails") .getArray("thumbnails")
.getObject(0) .getObject(0)
.getString("url"))) .getString("url"))
.filter(url -> !url.contains("s.ytimg.com") && !url.contains("default_banner")) .filter(url -> !url.contains("s.ytimg.com") && !url.contains("default_banner"))
.map(YoutubeParsingHelper::fixThumbnailUrl) .map(YoutubeParsingHelper::fixThumbnailUrl)
// Channels may not have a banner, so no exception should be thrown if no banner is // Channels may not have a banner, so no exception should be thrown if no
// found // banner is found
// Return null in this case // Return null in this case
.orElse(null); .orElse(null);
} }
return null;
}
@Override @Override
public String getFeedUrl() throws ParsingException { public String getFeedUrl() throws ParsingException {
// RSS feeds are accessible for age-restricted channels, no need to check whether a channel // RSS feeds are accessible for age-restricted channels, no need to check whether a channel
@ -214,25 +268,34 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
try { try {
return YoutubeParsingHelper.getFeedUrlFrom(getId()); return YoutubeParsingHelper.getFeedUrlFrom(getId());
} catch (final Exception e) { } catch (final Exception e) {
throw new ParsingException("Could not get feed url", e); throw new ParsingException("Could not get feed URL", e);
} }
} }
@Override @Override
public long getSubscriberCount() throws ParsingException { public long getSubscriberCount() throws ParsingException {
assertPageFetched();
if (channelAgeGateRenderer != null) { if (channelAgeGateRenderer != null) {
return UNKNOWN_SUBSCRIBER_COUNT; return UNKNOWN_SUBSCRIBER_COUNT;
} }
final Optional<JsonObject> headerOpt = getChannelHeaderJson(); if (channelHeader.isPresent()) {
if (headerOpt.isPresent()) { final ChannelHeader header = channelHeader.get();
final JsonObject header = headerOpt.get();
if (header.headerType == HeaderType.INTERACTIVE_TABBED
|| header.headerType == HeaderType.PAGE) {
// No subscriber count is available on interactiveTabbedHeaderRenderer and
// pageHeaderRenderer headers
return UNKNOWN_SUBSCRIBER_COUNT;
}
final JsonObject headerJson = header.json;
JsonObject textObject = null; JsonObject textObject = null;
if (header.has("subscriberCountText")) { if (headerJson.has("subscriberCountText")) {
textObject = header.getObject("subscriberCountText"); textObject = headerJson.getObject("subscriberCountText");
} else if (header.has("subtitle")) { } else if (headerJson.has("subtitle")) {
textObject = header.getObject("subtitle"); textObject = headerJson.getObject("subtitle");
} }
if (textObject != null) { if (textObject != null) {
@ -249,11 +312,34 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
@Override @Override
public String getDescription() throws ParsingException { public String getDescription() throws ParsingException {
assertPageFetched();
if (channelAgeGateRenderer != null) { if (channelAgeGateRenderer != null) {
return null; return null;
} }
try { try {
if (channelHeader.isPresent()) {
final ChannelHeader header = channelHeader.get();
if (header.headerType == HeaderType.PAGE) {
// A pageHeaderRenderer doesn't contain a description
return null;
}
if (header.headerType == HeaderType.INTERACTIVE_TABBED) {
/*
In an interactiveTabbedHeaderRenderer, the real description, is only available
in its header
The other one returned in non-About tabs accessible in the
microformatDataRenderer object of the response may be completely different
The description extracted is incomplete and the original one can be only
accessed from the About tab
*/
return getTextFromObject(header.json.getObject("description"));
}
}
// The description is cut and the original one can be only accessed from the About tab
return jsonResponse.getObject("metadata") return jsonResponse.getObject("metadata")
.getObject("channelMetadataRenderer") .getObject("channelMetadataRenderer")
.getString("description"); .getString("description");
@ -279,27 +365,39 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
@Override @Override
public boolean isVerified() throws ParsingException { public boolean isVerified() throws ParsingException {
assertPageFetched();
if (channelAgeGateRenderer != null) { if (channelAgeGateRenderer != null) {
return false; return false;
} }
if (channelHeader.isPresent()) { if (channelHeader.isPresent()) {
final YoutubeChannelHelper.ChannelHeader header = channelHeader.get(); final ChannelHeader header = channelHeader.get();
// The CarouselHeaderRenderer does not contain any verification badges. // carouselHeaderRenderer and pageHeaderRenderer does not contain any verification
// Since it is only shown on YT-internal channels or on channels of large organizations // badges
// broadcasting live events, we can assume the channel to be verified. // Since they are only shown on YouTube internal channels or on channels of large
if (header.isCarouselHeader) { // organizations broadcasting live events, we can assume the channel to be verified
if (header.headerType == HeaderType.CAROUSEL || header.headerType == HeaderType.PAGE) {
return true; return true;
} }
if (header.headerType == HeaderType.INTERACTIVE_TABBED) {
// If the header has an autoGenerated property, it should mean that the channel has
// been auto generated by YouTube: we can assume the channel to be verified in this
// case
return header.json.has("autoGenerated");
}
return YoutubeParsingHelper.isVerified(header.json.getArray("badges")); return YoutubeParsingHelper.isVerified(header.json.getArray("badges"));
} }
return false; return false;
} }
@Nonnull @Nonnull
@Override @Override
public List<ListLinkHandler> getTabs() throws ParsingException { public List<ListLinkHandler> getTabs() throws ParsingException {
assertPageFetched();
if (channelAgeGateRenderer == null) { if (channelAgeGateRenderer == null) {
return getTabsForNonAgeRestrictedChannels(); return getTabsForNonAgeRestrictedChannels();
} }
@ -401,6 +499,7 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
@Nonnull @Nonnull
@Override @Override
public List<String> getTags() throws ParsingException { public List<String> getTags() throws ParsingException {
assertPageFetched();
if (channelAgeGateRenderer != null) { if (channelAgeGateRenderer != null) {
return List.of(); return List.of();
} }