Merge pull request #1050 from Theta-Dev/fix/channel-carousel-header
[YouTube] Add support for CarouselHeaderRenderer
This commit is contained in:
commit
2deb023da4
|
@ -2,7 +2,6 @@ package org.schabi.newpipe.extractor.services.youtube.extractors;
|
|||
|
||||
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.DISABLE_PRETTY_PRINT_PARAMETER;
|
||||
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.YOUTUBEI_V1_URL;
|
||||
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.fixThumbnailUrl;
|
||||
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getJsonPostResponse;
|
||||
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getKey;
|
||||
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getTextFromObject;
|
||||
|
@ -34,6 +33,7 @@ import java.nio.charset.StandardCharsets;
|
|||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
import java.util.Optional;
|
||||
|
||||
import javax.annotation.Nonnull;
|
||||
import javax.annotation.Nullable;
|
||||
|
@ -60,6 +60,8 @@ import javax.annotation.Nullable;
|
|||
|
||||
public class YoutubeChannelExtractor extends ChannelExtractor {
|
||||
private JsonObject initialData;
|
||||
private Optional<JsonObject> channelHeader;
|
||||
private boolean isCarouselHeader = false;
|
||||
private JsonObject videoTab;
|
||||
|
||||
/**
|
||||
|
@ -189,6 +191,30 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
|
|||
}
|
||||
}
|
||||
|
||||
@Nonnull
|
||||
private Optional<JsonObject> getChannelHeader() {
|
||||
if (channelHeader == null) {
|
||||
final JsonObject h = initialData.getObject("header");
|
||||
|
||||
if (h.has("c4TabbedHeaderRenderer")) {
|
||||
channelHeader = Optional.of(h.getObject("c4TabbedHeaderRenderer"));
|
||||
} else if (h.has("carouselHeaderRenderer")) {
|
||||
isCarouselHeader = true;
|
||||
channelHeader = h.getObject("carouselHeaderRenderer")
|
||||
.getArray("contents")
|
||||
.stream()
|
||||
.filter(JsonObject.class::isInstance)
|
||||
.map(JsonObject.class::cast)
|
||||
.filter(itm -> itm.has("topicChannelDetailsRenderer"))
|
||||
.findFirst()
|
||||
.map(itm -> itm.getObject("topicChannelDetailsRenderer"));
|
||||
} else {
|
||||
channelHeader = Optional.empty();
|
||||
}
|
||||
}
|
||||
return channelHeader;
|
||||
}
|
||||
|
||||
@Nonnull
|
||||
@Override
|
||||
public String getUrl() throws ParsingException {
|
||||
|
@ -202,58 +228,61 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
|
|||
@Nonnull
|
||||
@Override
|
||||
public String getId() throws ParsingException {
|
||||
final String channelId = initialData.getObject("header")
|
||||
.getObject("c4TabbedHeaderRenderer")
|
||||
.getString("channelId", "");
|
||||
|
||||
if (!channelId.isEmpty()) {
|
||||
return channelId;
|
||||
} else if (!isNullOrEmpty(redirectedChannelId)) {
|
||||
return redirectedChannelId;
|
||||
} else {
|
||||
throw new ParsingException("Could not get channel id");
|
||||
}
|
||||
return getChannelHeader()
|
||||
.flatMap(header -> Optional.ofNullable(header.getString("channelId")).or(
|
||||
() -> Optional.ofNullable(header.getObject("navigationEndpoint")
|
||||
.getObject("browseEndpoint")
|
||||
.getString("browseId"))
|
||||
))
|
||||
.or(() -> Optional.ofNullable(redirectedChannelId))
|
||||
.orElseThrow(() -> new ParsingException("Could not get channel id"));
|
||||
}
|
||||
|
||||
@Nonnull
|
||||
@Override
|
||||
public String getName() throws ParsingException {
|
||||
try {
|
||||
return initialData.getObject("header").getObject("c4TabbedHeaderRenderer")
|
||||
.getString("title");
|
||||
} catch (final Exception e) {
|
||||
throw new ParsingException("Could not get channel name", e);
|
||||
final String mdName = initialData.getObject("metadata")
|
||||
.getObject("channelMetadataRenderer")
|
||||
.getString("title");
|
||||
if (!isNullOrEmpty(mdName)) {
|
||||
return mdName;
|
||||
}
|
||||
|
||||
final Optional<JsonObject> header = getChannelHeader();
|
||||
if (header.isPresent()) {
|
||||
final Object title = header.get().get("title");
|
||||
if (title instanceof String) {
|
||||
return (String) title;
|
||||
} else if (title instanceof JsonObject) {
|
||||
final String headerName = getTextFromObject((JsonObject) title);
|
||||
if (!isNullOrEmpty(headerName)) {
|
||||
return headerName;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
throw new ParsingException("Could not get channel name");
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getAvatarUrl() throws ParsingException {
|
||||
try {
|
||||
final String url = initialData.getObject("header")
|
||||
.getObject("c4TabbedHeaderRenderer").getObject("avatar").getArray("thumbnails")
|
||||
.getObject(0).getString("url");
|
||||
|
||||
return fixThumbnailUrl(url);
|
||||
} catch (final Exception e) {
|
||||
throw new ParsingException("Could not get avatar", e);
|
||||
}
|
||||
return getChannelHeader().flatMap(header -> Optional.ofNullable(
|
||||
header.getObject("avatar").getArray("thumbnails")
|
||||
.getObject(0).getString("url")
|
||||
))
|
||||
.map(YoutubeParsingHelper::fixThumbnailUrl)
|
||||
.orElseThrow(() -> new ParsingException("Could not get avatar"));
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getBannerUrl() throws ParsingException {
|
||||
try {
|
||||
final String url = initialData.getObject("header")
|
||||
.getObject("c4TabbedHeaderRenderer").getObject("banner").getArray("thumbnails")
|
||||
.getObject(0).getString("url");
|
||||
|
||||
if (url == null || url.contains("s.ytimg.com") || url.contains("default_banner")) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return fixThumbnailUrl(url);
|
||||
} catch (final Exception e) {
|
||||
throw new ParsingException("Could not get banner", e);
|
||||
}
|
||||
return getChannelHeader().flatMap(header -> Optional.ofNullable(
|
||||
header.getObject("banner").getArray("thumbnails")
|
||||
.getObject(0).getString("url")
|
||||
))
|
||||
.filter(url -> !url.contains("s.ytimg.com") && !url.contains("default_banner"))
|
||||
.map(YoutubeParsingHelper::fixThumbnailUrl)
|
||||
.orElseThrow(() -> new ParsingException("Could not get banner"));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -267,17 +296,25 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
|
|||
|
||||
@Override
|
||||
public long getSubscriberCount() throws ParsingException {
|
||||
final JsonObject c4TabbedHeaderRenderer = initialData.getObject("header")
|
||||
.getObject("c4TabbedHeaderRenderer");
|
||||
if (!c4TabbedHeaderRenderer.has("subscriberCountText")) {
|
||||
return UNKNOWN_SUBSCRIBER_COUNT;
|
||||
}
|
||||
try {
|
||||
return Utils.mixedNumberWordToLong(getTextFromObject(c4TabbedHeaderRenderer
|
||||
.getObject("subscriberCountText")));
|
||||
} catch (final NumberFormatException e) {
|
||||
throw new ParsingException("Could not get subscriber count", e);
|
||||
final Optional<JsonObject> header = getChannelHeader();
|
||||
if (header.isPresent()) {
|
||||
JsonObject textObject = null;
|
||||
|
||||
if (header.get().has("subscriberCountText")) {
|
||||
textObject = header.get().getObject("subscriberCountText");
|
||||
} else if (header.get().has("subtitle")) {
|
||||
textObject = header.get().getObject("subtitle");
|
||||
}
|
||||
|
||||
if (textObject != null) {
|
||||
try {
|
||||
return Utils.mixedNumberWordToLong(getTextFromObject(textObject));
|
||||
} catch (final NumberFormatException e) {
|
||||
throw new ParsingException("Could not get subscriber count", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
return UNKNOWN_SUBSCRIBER_COUNT;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -307,11 +344,17 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
|
|||
|
||||
@Override
|
||||
public boolean isVerified() throws ParsingException {
|
||||
final JsonArray badges = initialData.getObject("header")
|
||||
.getObject("c4TabbedHeaderRenderer")
|
||||
.getArray("badges");
|
||||
// The CarouselHeaderRenderer does not contain any verification badges.
|
||||
// Since it is only shown on YT-internal channels or on channels of large organizations
|
||||
// broadcasting live events, we can assume the channel to be verified.
|
||||
if (isCarouselHeader) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return YoutubeParsingHelper.isVerified(badges);
|
||||
return getChannelHeader()
|
||||
.map(header -> header.getArray("badges"))
|
||||
.map(YoutubeParsingHelper::isVerified)
|
||||
.orElse(false);
|
||||
}
|
||||
|
||||
@Nonnull
|
||||
|
|
|
@ -648,4 +648,94 @@ public class YoutubeChannelExtractorTest {
|
|||
assertFalse(extractor.isVerified());
|
||||
}
|
||||
}
|
||||
|
||||
public static class CarouselHeader implements BaseChannelExtractorTest {
|
||||
private static YoutubeChannelExtractor extractor;
|
||||
|
||||
@BeforeAll
|
||||
public static void setUp() throws Exception {
|
||||
YoutubeTestsUtils.ensureStateless();
|
||||
NewPipe.init(DownloaderFactory.getDownloader(RESOURCE_PATH + "carouselHeader"));
|
||||
extractor = (YoutubeChannelExtractor) YouTube
|
||||
.getChannelExtractor("https://www.youtube.com/channel/UCHF66aWLOxBW4l6VkSrS3cQ");
|
||||
extractor.fetchPage();
|
||||
}
|
||||
|
||||
/*//////////////////////////////////////////////////////////////////////////
|
||||
// Extractor
|
||||
//////////////////////////////////////////////////////////////////////////*/
|
||||
|
||||
@Test
|
||||
public void testServiceId() {
|
||||
assertEquals(YouTube.getServiceId(), extractor.getServiceId());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testName() throws Exception {
|
||||
assertEquals(extractor.getName(), "Coachella");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testId() throws Exception {
|
||||
assertEquals("UCHF66aWLOxBW4l6VkSrS3cQ", extractor.getId());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testUrl() throws ParsingException {
|
||||
assertEquals("https://www.youtube.com/channel/UCHF66aWLOxBW4l6VkSrS3cQ", extractor.getUrl());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testOriginalUrl() throws ParsingException {
|
||||
assertEquals("https://www.youtube.com/channel/UCHF66aWLOxBW4l6VkSrS3cQ", extractor.getOriginalUrl());
|
||||
}
|
||||
|
||||
/*//////////////////////////////////////////////////////////////////////////
|
||||
// ListExtractor
|
||||
//////////////////////////////////////////////////////////////////////////*/
|
||||
|
||||
@Test
|
||||
public void testRelatedItems() throws Exception {
|
||||
defaultTestRelatedItems(extractor);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMoreRelatedItems() throws Exception {
|
||||
defaultTestMoreItems(extractor);
|
||||
}
|
||||
|
||||
/*//////////////////////////////////////////////////////////////////////////
|
||||
// ChannelExtractor
|
||||
//////////////////////////////////////////////////////////////////////////*/
|
||||
@Override
|
||||
public void testDescription() {
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testAvatarUrl() throws Exception {
|
||||
String avatarUrl = extractor.getAvatarUrl();
|
||||
assertIsSecureUrl(avatarUrl);
|
||||
ExtractorAsserts.assertContains("yt3", avatarUrl);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testBannerUrl() throws Exception {
|
||||
// CarouselHeaderRender does not contain a banner
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFeedUrl() throws Exception {
|
||||
assertEquals("https://www.youtube.com/feeds/videos.xml?channel_id=UCHF66aWLOxBW4l6VkSrS3cQ", extractor.getFeedUrl());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSubscriberCount() throws Exception {
|
||||
ExtractorAsserts.assertGreaterOrEqual(2_900_000, extractor.getSubscriberCount());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testVerified() throws Exception {
|
||||
assertTrue(extractor.isVerified());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,85 @@
|
|||
{
|
||||
"request": {
|
||||
"httpMethod": "GET",
|
||||
"url": "https://www.youtube.com/sw.js",
|
||||
"headers": {
|
||||
"Origin": [
|
||||
"https://www.youtube.com"
|
||||
],
|
||||
"Referer": [
|
||||
"https://www.youtube.com"
|
||||
],
|
||||
"Accept-Language": [
|
||||
"en-GB, en;q\u003d0.9"
|
||||
]
|
||||
},
|
||||
"localization": {
|
||||
"languageCode": "en",
|
||||
"countryCode": "GB"
|
||||
}
|
||||
},
|
||||
"response": {
|
||||
"responseCode": 200,
|
||||
"responseMessage": "",
|
||||
"responseHeaders": {
|
||||
"access-control-allow-credentials": [
|
||||
"true"
|
||||
],
|
||||
"access-control-allow-origin": [
|
||||
"https://www.youtube.com"
|
||||
],
|
||||
"alt-svc": [
|
||||
"h3\u003d\":443\"; ma\u003d2592000,h3-29\u003d\":443\"; ma\u003d2592000"
|
||||
],
|
||||
"cache-control": [
|
||||
"private, max-age\u003d0"
|
||||
],
|
||||
"content-type": [
|
||||
"text/javascript; charset\u003dutf-8"
|
||||
],
|
||||
"cross-origin-opener-policy": [
|
||||
"same-origin; report-to\u003d\"youtube_main\""
|
||||
],
|
||||
"date": [
|
||||
"Sun, 16 Apr 2023 15:33:19 GMT"
|
||||
],
|
||||
"expires": [
|
||||
"Sun, 16 Apr 2023 15:33:19 GMT"
|
||||
],
|
||||
"origin-trial": [
|
||||
"AvC9UlR6RDk2crliDsFl66RWLnTbHrDbp+DiY6AYz/PNQ4G4tdUTjrHYr2sghbkhGQAVxb7jaPTHpEVBz0uzQwkAAAB4eyJvcmlnaW4iOiJodHRwczovL3lvdXR1YmUuY29tOjQ0MyIsImZlYXR1cmUiOiJXZWJWaWV3WFJlcXVlc3RlZFdpdGhEZXByZWNhdGlvbiIsImV4cGlyeSI6MTcxOTUzMjc5OSwiaXNTdWJkb21haW4iOnRydWV9"
|
||||
],
|
||||
"p3p": [
|
||||
"CP\u003d\"This is not a P3P policy! See http://support.google.com/accounts/answer/151657?hl\u003den-GB for more info.\""
|
||||
],
|
||||
"permissions-policy": [
|
||||
"ch-ua-arch\u003d*, ch-ua-bitness\u003d*, ch-ua-full-version\u003d*, ch-ua-full-version-list\u003d*, ch-ua-model\u003d*, ch-ua-wow64\u003d*, ch-ua-platform\u003d*, ch-ua-platform-version\u003d*"
|
||||
],
|
||||
"report-to": [
|
||||
"{\"group\":\"youtube_main\",\"max_age\":2592000,\"endpoints\":[{\"url\":\"https://csp.withgoogle.com/csp/report-to/youtube_main\"}]}"
|
||||
],
|
||||
"server": [
|
||||
"ESF"
|
||||
],
|
||||
"set-cookie": [
|
||||
"YSC\u003dOCGx8FJdx2E; Domain\u003d.youtube.com; Path\u003d/; Secure; HttpOnly; SameSite\u003dnone",
|
||||
"VISITOR_INFO1_LIVE\u003d; Domain\u003d.youtube.com; Expires\u003dMon, 20-Jul-2020 15:33:19 GMT; Path\u003d/; Secure; HttpOnly; SameSite\u003dnone",
|
||||
"CONSENT\u003dPENDING+955; expires\u003dTue, 15-Apr-2025 15:33:19 GMT; path\u003d/; domain\u003d.youtube.com; Secure"
|
||||
],
|
||||
"strict-transport-security": [
|
||||
"max-age\u003d31536000"
|
||||
],
|
||||
"x-content-type-options": [
|
||||
"nosniff"
|
||||
],
|
||||
"x-frame-options": [
|
||||
"SAMEORIGIN"
|
||||
],
|
||||
"x-xss-protection": [
|
||||
"0"
|
||||
]
|
||||
},
|
||||
"responseBody": "\n self.addEventListener(\u0027install\u0027, event \u003d\u003e {\n event.waitUntil(self.skipWaiting());\n });\n self.addEventListener(\u0027activate\u0027, event \u003d\u003e {\n event.waitUntil(\n self.clients.claim().then(() \u003d\u003e self.registration.unregister()));\n });\n ",
|
||||
"latestUrl": "https://www.youtube.com/sw.js"
|
||||
}
|
||||
}
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
Loading…
Reference in New Issue