Merge pull request #1221 from AudricV/yt_support-new-shorts-ui-data

[YouTube] Fix extraction of Shorts in channels and remove visitor data usage
This commit is contained in:
Audric V. 2024-09-29 14:54:07 +02:00 committed by GitHub
commit 596bce294d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 1830 additions and 1688 deletions

View File

@ -42,30 +42,14 @@ import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty;
*/
public class YoutubeChannelTabExtractor extends ChannelTabExtractor {
/**
* Whether the visitor data extracted from the initial channel response is required to be used
* for continuations.
*
* <p>
* A valid {@code visitorData} is required to get continuations of shorts in channels.
* </p>
*
* <p>
* It should be not used when it is not needed, in order to reduce YouTube's tracking.
* </p>
*/
private final boolean useVisitorData;
private JsonObject jsonResponse;
private String channelId;
@Nullable
private String visitorData;
@SuppressWarnings("OptionalUsedAsFieldOrParameterType")
protected Optional<YoutubeChannelHelper.ChannelHeader> channelHeader;
public YoutubeChannelTabExtractor(final StreamingService service,
final ListLinkHandler linkHandler) {
super(service, linkHandler);
useVisitorData = getName().equals(ChannelTabs.SHORTS);
}
@Nonnull
@ -100,9 +84,6 @@ public class YoutubeChannelTabExtractor extends ChannelTabExtractor {
jsonResponse = data.jsonResponse;
channelHeader = YoutubeChannelHelper.getChannelHeader(jsonResponse);
channelId = data.channelId;
if (useVisitorData) {
visitorData = jsonResponse.getObject("responseContext").getString("visitorData");
}
}
@Nonnull
@ -176,10 +157,8 @@ public class YoutubeChannelTabExtractor extends ChannelTabExtractor {
channelName, channelUrl)
.orElse(null);
final Page nextPage = getNextPageFrom(continuation,
useVisitorData && !isNullOrEmpty(visitorData)
? List.of(channelName, channelUrl, verifiedStatus.toString(), visitorData)
: List.of(channelName, channelUrl, verifiedStatus.toString()));
final Page nextPage = getNextPageFrom(
continuation, List.of(channelName, channelUrl, verifiedStatus.toString()));
return new InfoItemsPage<>(collector, nextPage);
}
@ -299,6 +278,9 @@ public class YoutubeChannelTabExtractor extends ChannelTabExtractor {
} else if (richItem.has("reelItemRenderer")) {
commitReel(collector, richItem.getObject("reelItemRenderer"),
channelVerifiedStatus, channelName, channelUrl);
} else if (richItem.has("shortsLockupViewModel")) {
commitShortsLockup(collector, richItem.getObject("shortsLockupViewModel"),
channelVerifiedStatus, channelName, channelUrl);
} else if (richItem.has("playlistRenderer")) {
commitPlaylist(collector, richItem.getObject("playlistRenderer"),
channelVerifiedStatus, channelName, channelUrl);
@ -356,6 +338,30 @@ public class YoutubeChannelTabExtractor extends ChannelTabExtractor {
});
}
private static void commitShortsLockup(@Nonnull final MultiInfoItemsCollector collector,
@Nonnull final JsonObject shortsLockupViewModel,
@Nonnull final VerifiedStatus channelVerifiedStatus,
@Nullable final String channelName,
@Nullable final String channelUrl) {
collector.commit(
new YoutubeShortsLockupInfoItemExtractor(shortsLockupViewModel) {
@Override
public String getUploaderName() throws ParsingException {
return isNullOrEmpty(channelName) ? super.getUploaderName() : channelName;
}
@Override
public String getUploaderUrl() throws ParsingException {
return isNullOrEmpty(channelUrl) ? super.getUploaderName() : channelUrl;
}
@Override
public boolean isUploaderVerified() {
return channelVerifiedStatus == VerifiedStatus.VERIFIED;
}
});
}
private void commitVideo(@Nonnull final MultiInfoItemsCollector collector,
@Nonnull final TimeAgoParser timeAgoParser,
@Nonnull final JsonObject jsonObject,
@ -434,8 +440,7 @@ public class YoutubeChannelTabExtractor extends ChannelTabExtractor {
.getString("token");
final byte[] body = JsonWriter.string(prepareDesktopJsonBuilder(getExtractorLocalization(),
getExtractorContentCountry(),
useVisitorData && channelIds.size() >= 3 ? channelIds.get(2) : null)
getExtractorContentCountry())
.value("continuation", continuation)
.done())
.getBytes(StandardCharsets.UTF_8);

View File

@ -20,13 +20,19 @@ import javax.annotation.Nonnull;
import javax.annotation.Nullable;
/**
* A {@link StreamInfoItemExtractor} for YouTube's {@code reelItemRenderers}.
* A {@link StreamInfoItemExtractor} for YouTube's {@code reelItemRenderer}s.
*
* <p>
* {@code reelItemRenderers} are returned on YouTube for their short-form contents on almost every
* {@code reelItemRenderer}s were returned on YouTube for their short-form contents on almost every
* place and every major client. They provide a limited amount of information and do not provide
* the exact view count, any uploader info (name, URL, avatar, verified status) and the upload date.
* </p>
*
* <p>
* At the time this documentation has been updated, they are being replaced by
* {@code shortsLockupViewModel}s. See {@link YoutubeShortsLockupInfoItemExtractor} for an
* extractor for this new UI data type.
* </p>
*/
public class YoutubeReelInfoItemExtractor implements StreamInfoItemExtractor {

View File

@ -0,0 +1,150 @@
package org.schabi.newpipe.extractor.services.youtube.extractors;
import com.grack.nanojson.JsonObject;
import org.schabi.newpipe.extractor.Image;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.localization.DateWrapper;
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeStreamLinkHandlerFactory;
import org.schabi.newpipe.extractor.stream.StreamInfoItemExtractor;
import org.schabi.newpipe.extractor.stream.StreamType;
import org.schabi.newpipe.extractor.utils.Utils;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import java.util.List;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getThumbnailsFromInfoItem;
import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty;
/**
* A {@link StreamInfoItemExtractor} for YouTube's {@code shortsLockupViewModel}s.
*
* <p>
* {@code shortsLockupViewModel}s are returned on YouTube for their short-form contents on almost
* every place and every major client. They provide a limited amount of information and do not
* provide the exact view count, any uploader info (name, URL, avatar, verified status) and the
* upload date.
* </p>
*
* <p>
* At the time this documentation has been written, this data UI type is not fully used (rolled
* out), so {@code reelItemRenderer}s are also returned. See {@link YoutubeReelInfoItemExtractor}
* for an extractor for this UI data type.
* </p>
*/
public class YoutubeShortsLockupInfoItemExtractor implements StreamInfoItemExtractor {
@Nonnull
private final JsonObject shortsLockupViewModel;
public YoutubeShortsLockupInfoItemExtractor(@Nonnull final JsonObject shortsLockupViewModel) {
this.shortsLockupViewModel = shortsLockupViewModel;
}
@Override
public String getName() throws ParsingException {
return shortsLockupViewModel.getObject("overlayMetadata")
.getObject("primaryText")
.getString("content");
}
@Override
public String getUrl() throws ParsingException {
String videoId = shortsLockupViewModel.getObject("onTap")
.getObject("innertubeCommand")
.getObject("reelWatchEndpoint")
.getString("videoId");
if (isNullOrEmpty(videoId)) {
videoId = shortsLockupViewModel.getObject("inlinePlayerData")
.getObject("onVisible")
.getObject("innertubeCommand")
.getObject("watchEndpoint")
.getString("videoId");
}
if (isNullOrEmpty(videoId)) {
throw new ParsingException("Could not get video ID");
}
try {
return YoutubeStreamLinkHandlerFactory.getInstance().getUrl(videoId);
} catch (final Exception e) {
throw new ParsingException("Could not get URL", e);
}
}
@Nonnull
@Override
public List<Image> getThumbnails() throws ParsingException {
return getThumbnailsFromInfoItem(shortsLockupViewModel.getObject("thumbnail")
.getObject("sources"));
}
@Override
public StreamType getStreamType() throws ParsingException {
return StreamType.VIDEO_STREAM;
}
@Override
public long getViewCount() throws ParsingException {
final String viewCountText = shortsLockupViewModel.getObject("overlayMetadata")
.getObject("secondaryText")
.getString("content");
if (!isNullOrEmpty(viewCountText)) {
// This approach is language dependent
if (viewCountText.toLowerCase().contains("no views")) {
return 0;
}
return Utils.mixedNumberWordToLong(viewCountText);
}
throw new ParsingException("Could not get short view count");
}
@Override
public boolean isShortFormContent() {
return true;
}
// All the following properties cannot be obtained from shortsLockupViewModels
@Override
public boolean isAd() throws ParsingException {
return false;
}
@Override
public long getDuration() throws ParsingException {
return -1;
}
@Override
public String getUploaderName() throws ParsingException {
return null;
}
@Override
public String getUploaderUrl() throws ParsingException {
return null;
}
@Override
public boolean isUploaderVerified() throws ParsingException {
return false;
}
@Nullable
@Override
public String getTextualUploadDate() throws ParsingException {
return null;
}
@Nullable
@Override
public DateWrapper getUploadDate() throws ParsingException {
return null;
}
}

View File

@ -3,10 +3,10 @@
"httpMethod": "GET",
"url": "https://www.youtube.com/sw.js",
"headers": {
"Origin": [
"Referer": [
"https://www.youtube.com"
],
"Referer": [
"Origin": [
"https://www.youtube.com"
],
"Accept-Language": [
@ -34,6 +34,9 @@
"cache-control": [
"private, max-age\u003d0"
],
"content-security-policy": [
"require-trusted-types-for \u0027script\u0027"
],
"content-type": [
"text/javascript; charset\u003dutf-8"
],
@ -41,10 +44,10 @@
"same-origin; report-to\u003d\"youtube_main\""
],
"date": [
"Wed, 24 Jul 2024 17:37:25 GMT"
"Sun, 08 Sep 2024 15:45:44 GMT"
],
"expires": [
"Wed, 24 Jul 2024 17:37:25 GMT"
"Sun, 08 Sep 2024 15:45:44 GMT"
],
"origin-trial": [
"AmhMBR6zCLzDDxpW+HfpP67BqwIknWnyMOXOQGfzYswFmJe+fgaI6XZgAzcxOrzNtP7hEDsOo1jdjFnVr2IdxQ4AAAB4eyJvcmlnaW4iOiJodHRwczovL3lvdXR1YmUuY29tOjQ0MyIsImZlYXR1cmUiOiJXZWJWaWV3WFJlcXVlc3RlZFdpdGhEZXByZWNhdGlvbiIsImV4cGlyeSI6MTc1ODA2NzE5OSwiaXNTdWJkb21haW4iOnRydWV9"
@ -62,8 +65,8 @@
"ESF"
],
"set-cookie": [
"YSC\u003dQqImeZ_ECz4; Domain\u003d.youtube.com; Path\u003d/; Secure; HttpOnly; SameSite\u003dnone",
"VISITOR_INFO1_LIVE\u003d; Domain\u003d.youtube.com; Expires\u003dThu, 28-Oct-2021 17:37:25 GMT; Path\u003d/; Secure; HttpOnly; SameSite\u003dnone"
"YSC\u003dbsHskp20CKw; Domain\u003d.youtube.com; Path\u003d/; Secure; HttpOnly; SameSite\u003dnone",
"VISITOR_INFO1_LIVE\u003d; Domain\u003d.youtube.com; Expires\u003dMon, 13-Dec-2021 15:45:44 GMT; Path\u003d/; Secure; HttpOnly; SameSite\u003dnone"
],
"strict-transport-security": [
"max-age\u003d31536000"