fix: add support for CarouselHeaderRenderer

This commit is contained in:
ThetaDev 2023-04-16 17:35:02 +02:00
parent 7dba6e3891
commit 20370395c5
6 changed files with 1274 additions and 53 deletions

View File

@ -2,7 +2,6 @@ package org.schabi.newpipe.extractor.services.youtube.extractors;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.DISABLE_PRETTY_PRINT_PARAMETER; import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.DISABLE_PRETTY_PRINT_PARAMETER;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.YOUTUBEI_V1_URL; import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.YOUTUBEI_V1_URL;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.fixThumbnailUrl;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getJsonPostResponse; import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getJsonPostResponse;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getKey; import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getKey;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getTextFromObject; import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getTextFromObject;
@ -34,6 +33,7 @@ import java.nio.charset.StandardCharsets;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
import java.util.Objects; import java.util.Objects;
import java.util.Optional;
import javax.annotation.Nonnull; import javax.annotation.Nonnull;
import javax.annotation.Nullable; import javax.annotation.Nullable;
@ -60,6 +60,8 @@ import javax.annotation.Nullable;
public class YoutubeChannelExtractor extends ChannelExtractor { public class YoutubeChannelExtractor extends ChannelExtractor {
private JsonObject initialData; private JsonObject initialData;
private Optional<JsonObject> channelHeader;
private boolean isCarouselHeader = false;
private JsonObject videoTab; private JsonObject videoTab;
/** /**
@ -189,6 +191,30 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
} }
} }
@Nonnull
private Optional<JsonObject> getChannelHeader() {
if (channelHeader == null) {
final JsonObject h = initialData.getObject("header");
if (h.has("c4TabbedHeaderRenderer")) {
channelHeader = Optional.of(h.getObject("c4TabbedHeaderRenderer"));
} else if (h.has("carouselHeaderRenderer")) {
isCarouselHeader = true;
channelHeader = h.getObject("carouselHeaderRenderer")
.getArray("contents")
.stream()
.filter(JsonObject.class::isInstance)
.map(JsonObject.class::cast)
.filter(itm -> itm.has("topicChannelDetailsRenderer"))
.findFirst()
.map(itm -> itm.getObject("topicChannelDetailsRenderer"));
} else {
channelHeader = Optional.empty();
}
}
return channelHeader;
}
@Nonnull @Nonnull
@Override @Override
public String getUrl() throws ParsingException { public String getUrl() throws ParsingException {
@ -202,58 +228,61 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
@Nonnull @Nonnull
@Override @Override
public String getId() throws ParsingException { public String getId() throws ParsingException {
final String channelId = initialData.getObject("header") return getChannelHeader()
.getObject("c4TabbedHeaderRenderer") .flatMap(header -> Optional.ofNullable(header.getString("channelId")).or(
.getString("channelId", ""); () -> Optional.ofNullable(header.getObject("navigationEndpoint")
.getObject("browseEndpoint")
if (!channelId.isEmpty()) { .getString("browseId"))
return channelId; ))
} else if (!isNullOrEmpty(redirectedChannelId)) { .or(() -> Optional.ofNullable(redirectedChannelId))
return redirectedChannelId; .orElseThrow(() -> new ParsingException("Could not get channel id"));
} else {
throw new ParsingException("Could not get channel id");
}
} }
@Nonnull @Nonnull
@Override @Override
public String getName() throws ParsingException { public String getName() throws ParsingException {
try { final String mdName = initialData.getObject("metadata")
return initialData.getObject("header").getObject("c4TabbedHeaderRenderer") .getObject("channelMetadataRenderer")
.getString("title"); .getString("title");
} catch (final Exception e) { if (!isNullOrEmpty(mdName)) {
throw new ParsingException("Could not get channel name", e); return mdName;
} }
final Optional<JsonObject> header = getChannelHeader();
if (header.isPresent()) {
final Object title = header.get().get("title");
if (title instanceof String) {
return (String) title;
} else if (title instanceof JsonObject) {
final String headerName = getTextFromObject((JsonObject) title);
if (!isNullOrEmpty(headerName)) {
return headerName;
}
}
}
throw new ParsingException("Could not get channel name");
} }
@Override @Override
public String getAvatarUrl() throws ParsingException { public String getAvatarUrl() throws ParsingException {
try { return getChannelHeader().flatMap(header -> Optional.ofNullable(
final String url = initialData.getObject("header") header.getObject("avatar").getArray("thumbnails")
.getObject("c4TabbedHeaderRenderer").getObject("avatar").getArray("thumbnails") .getObject(0).getString("url")
.getObject(0).getString("url"); ))
.map(YoutubeParsingHelper::fixThumbnailUrl)
return fixThumbnailUrl(url); .orElseThrow(() -> new ParsingException("Could not get avatar"));
} catch (final Exception e) {
throw new ParsingException("Could not get avatar", e);
}
} }
@Override @Override
public String getBannerUrl() throws ParsingException { public String getBannerUrl() throws ParsingException {
try { return getChannelHeader().flatMap(header -> Optional.ofNullable(
final String url = initialData.getObject("header") header.getObject("banner").getArray("thumbnails")
.getObject("c4TabbedHeaderRenderer").getObject("banner").getArray("thumbnails") .getObject(0).getString("url")
.getObject(0).getString("url"); ))
.filter(url -> !url.contains("s.ytimg.com") && !url.contains("default_banner"))
if (url == null || url.contains("s.ytimg.com") || url.contains("default_banner")) { .map(YoutubeParsingHelper::fixThumbnailUrl)
return null; .orElseThrow(() -> new ParsingException("Could not get banner"));
}
return fixThumbnailUrl(url);
} catch (final Exception e) {
throw new ParsingException("Could not get banner", e);
}
} }
@Override @Override
@ -267,17 +296,25 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
@Override @Override
public long getSubscriberCount() throws ParsingException { public long getSubscriberCount() throws ParsingException {
final JsonObject c4TabbedHeaderRenderer = initialData.getObject("header") final Optional<JsonObject> header = getChannelHeader();
.getObject("c4TabbedHeaderRenderer"); if (header.isPresent()) {
if (!c4TabbedHeaderRenderer.has("subscriberCountText")) { JsonObject textObject = null;
return UNKNOWN_SUBSCRIBER_COUNT;
} if (header.get().has("subscriberCountText")) {
try { textObject = header.get().getObject("subscriberCountText");
return Utils.mixedNumberWordToLong(getTextFromObject(c4TabbedHeaderRenderer } else if (header.get().has("subtitle")) {
.getObject("subscriberCountText"))); textObject = header.get().getObject("subtitle");
} catch (final NumberFormatException e) { }
throw new ParsingException("Could not get subscriber count", e);
if (textObject != null) {
try {
return Utils.mixedNumberWordToLong(getTextFromObject(textObject));
} catch (final NumberFormatException e) {
throw new ParsingException("Could not get subscriber count", e);
}
}
} }
return UNKNOWN_SUBSCRIBER_COUNT;
} }
@Override @Override
@ -307,11 +344,17 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
@Override @Override
public boolean isVerified() throws ParsingException { public boolean isVerified() throws ParsingException {
final JsonArray badges = initialData.getObject("header") // The CarouselHeaderRenderer does not contain any verification badges.
.getObject("c4TabbedHeaderRenderer") // Since it is only shown on YT-internal channels or on channels of large organizations
.getArray("badges"); // broadcasting live events, we can assume the channel to be verified.
if (isCarouselHeader) {
return true;
}
return YoutubeParsingHelper.isVerified(badges); return getChannelHeader()
.map(header -> header.getArray("badges"))
.map(YoutubeParsingHelper::isVerified)
.orElse(false);
} }
@Nonnull @Nonnull

View File

@ -648,4 +648,94 @@ public class YoutubeChannelExtractorTest {
assertFalse(extractor.isVerified()); assertFalse(extractor.isVerified());
} }
} }
public static class Coachella implements BaseChannelExtractorTest {
private static YoutubeChannelExtractor extractor;
@BeforeAll
public static void setUp() throws Exception {
YoutubeTestsUtils.ensureStateless();
NewPipe.init(DownloaderFactory.getDownloader(RESOURCE_PATH + "coachella"));
extractor = (YoutubeChannelExtractor) YouTube
.getChannelExtractor("https://www.youtube.com/channel/UCHF66aWLOxBW4l6VkSrS3cQ");
extractor.fetchPage();
}
/*//////////////////////////////////////////////////////////////////////////
// Extractor
//////////////////////////////////////////////////////////////////////////*/
@Test
public void testServiceId() {
assertEquals(YouTube.getServiceId(), extractor.getServiceId());
}
@Test
public void testName() throws Exception {
assertEquals(extractor.getName(), "Coachella");
}
@Test
public void testId() throws Exception {
assertEquals("UCHF66aWLOxBW4l6VkSrS3cQ", extractor.getId());
}
@Test
public void testUrl() throws ParsingException {
assertEquals("https://www.youtube.com/channel/UCHF66aWLOxBW4l6VkSrS3cQ", extractor.getUrl());
}
@Test
public void testOriginalUrl() throws ParsingException {
assertEquals("https://www.youtube.com/channel/UCHF66aWLOxBW4l6VkSrS3cQ", extractor.getOriginalUrl());
}
/*//////////////////////////////////////////////////////////////////////////
// ListExtractor
//////////////////////////////////////////////////////////////////////////*/
@Test
public void testRelatedItems() throws Exception {
defaultTestRelatedItems(extractor);
}
@Test
public void testMoreRelatedItems() throws Exception {
defaultTestMoreItems(extractor);
}
/*//////////////////////////////////////////////////////////////////////////
// ChannelExtractor
//////////////////////////////////////////////////////////////////////////*/
@Override
public void testDescription() {
}
@Test
public void testAvatarUrl() throws Exception {
String avatarUrl = extractor.getAvatarUrl();
assertIsSecureUrl(avatarUrl);
ExtractorAsserts.assertContains("yt3", avatarUrl);
}
@Test
public void testBannerUrl() throws Exception {
// CarouselHeaderRender does not contain a banner
}
@Test
public void testFeedUrl() throws Exception {
assertEquals("https://www.youtube.com/feeds/videos.xml?channel_id=UCHF66aWLOxBW4l6VkSrS3cQ", extractor.getFeedUrl());
}
@Test
public void testSubscriberCount() throws Exception {
ExtractorAsserts.assertGreaterOrEqual(2_900_000, extractor.getSubscriberCount());
}
@Test
public void testVerified() throws Exception {
assertTrue(extractor.isVerified());
}
}
} }

View File

@ -0,0 +1,85 @@
{
"request": {
"httpMethod": "GET",
"url": "https://www.youtube.com/sw.js",
"headers": {
"Origin": [
"https://www.youtube.com"
],
"Referer": [
"https://www.youtube.com"
],
"Accept-Language": [
"en-GB, en;q\u003d0.9"
]
},
"localization": {
"languageCode": "en",
"countryCode": "GB"
}
},
"response": {
"responseCode": 200,
"responseMessage": "",
"responseHeaders": {
"access-control-allow-credentials": [
"true"
],
"access-control-allow-origin": [
"https://www.youtube.com"
],
"alt-svc": [
"h3\u003d\":443\"; ma\u003d2592000,h3-29\u003d\":443\"; ma\u003d2592000"
],
"cache-control": [
"private, max-age\u003d0"
],
"content-type": [
"text/javascript; charset\u003dutf-8"
],
"cross-origin-opener-policy": [
"same-origin; report-to\u003d\"youtube_main\""
],
"date": [
"Sun, 16 Apr 2023 15:33:19 GMT"
],
"expires": [
"Sun, 16 Apr 2023 15:33:19 GMT"
],
"origin-trial": [
"AvC9UlR6RDk2crliDsFl66RWLnTbHrDbp+DiY6AYz/PNQ4G4tdUTjrHYr2sghbkhGQAVxb7jaPTHpEVBz0uzQwkAAAB4eyJvcmlnaW4iOiJodHRwczovL3lvdXR1YmUuY29tOjQ0MyIsImZlYXR1cmUiOiJXZWJWaWV3WFJlcXVlc3RlZFdpdGhEZXByZWNhdGlvbiIsImV4cGlyeSI6MTcxOTUzMjc5OSwiaXNTdWJkb21haW4iOnRydWV9"
],
"p3p": [
"CP\u003d\"This is not a P3P policy! See http://support.google.com/accounts/answer/151657?hl\u003den-GB for more info.\""
],
"permissions-policy": [
"ch-ua-arch\u003d*, ch-ua-bitness\u003d*, ch-ua-full-version\u003d*, ch-ua-full-version-list\u003d*, ch-ua-model\u003d*, ch-ua-wow64\u003d*, ch-ua-platform\u003d*, ch-ua-platform-version\u003d*"
],
"report-to": [
"{\"group\":\"youtube_main\",\"max_age\":2592000,\"endpoints\":[{\"url\":\"https://csp.withgoogle.com/csp/report-to/youtube_main\"}]}"
],
"server": [
"ESF"
],
"set-cookie": [
"YSC\u003dOCGx8FJdx2E; Domain\u003d.youtube.com; Path\u003d/; Secure; HttpOnly; SameSite\u003dnone",
"VISITOR_INFO1_LIVE\u003d; Domain\u003d.youtube.com; Expires\u003dMon, 20-Jul-2020 15:33:19 GMT; Path\u003d/; Secure; HttpOnly; SameSite\u003dnone",
"CONSENT\u003dPENDING+955; expires\u003dTue, 15-Apr-2025 15:33:19 GMT; path\u003d/; domain\u003d.youtube.com; Secure"
],
"strict-transport-security": [
"max-age\u003d31536000"
],
"x-content-type-options": [
"nosniff"
],
"x-frame-options": [
"SAMEORIGIN"
],
"x-xss-protection": [
"0"
]
},
"responseBody": "\n self.addEventListener(\u0027install\u0027, event \u003d\u003e {\n event.waitUntil(self.skipWaiting());\n });\n self.addEventListener(\u0027activate\u0027, event \u003d\u003e {\n event.waitUntil(\n self.clients.claim().then(() \u003d\u003e self.registration.unregister()));\n });\n ",
"latestUrl": "https://www.youtube.com/sw.js"
}
}