Merge pull request #1221 from AudricV/yt_support-new-shorts-ui-data
[YouTube] Fix extraction of Shorts in channels and remove visitor data usage
This commit is contained in:
commit
596bce294d
|
@ -42,30 +42,14 @@ import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty;
|
|||
*/
|
||||
public class YoutubeChannelTabExtractor extends ChannelTabExtractor {
|
||||
|
||||
/**
|
||||
* Whether the visitor data extracted from the initial channel response is required to be used
|
||||
* for continuations.
|
||||
*
|
||||
* <p>
|
||||
* A valid {@code visitorData} is required to get continuations of shorts in channels.
|
||||
* </p>
|
||||
*
|
||||
* <p>
|
||||
* It should be not used when it is not needed, in order to reduce YouTube's tracking.
|
||||
* </p>
|
||||
*/
|
||||
private final boolean useVisitorData;
|
||||
private JsonObject jsonResponse;
|
||||
private String channelId;
|
||||
@Nullable
|
||||
private String visitorData;
|
||||
@SuppressWarnings("OptionalUsedAsFieldOrParameterType")
|
||||
protected Optional<YoutubeChannelHelper.ChannelHeader> channelHeader;
|
||||
|
||||
public YoutubeChannelTabExtractor(final StreamingService service,
|
||||
final ListLinkHandler linkHandler) {
|
||||
super(service, linkHandler);
|
||||
useVisitorData = getName().equals(ChannelTabs.SHORTS);
|
||||
}
|
||||
|
||||
@Nonnull
|
||||
|
@ -100,9 +84,6 @@ public class YoutubeChannelTabExtractor extends ChannelTabExtractor {
|
|||
jsonResponse = data.jsonResponse;
|
||||
channelHeader = YoutubeChannelHelper.getChannelHeader(jsonResponse);
|
||||
channelId = data.channelId;
|
||||
if (useVisitorData) {
|
||||
visitorData = jsonResponse.getObject("responseContext").getString("visitorData");
|
||||
}
|
||||
}
|
||||
|
||||
@Nonnull
|
||||
|
@ -176,10 +157,8 @@ public class YoutubeChannelTabExtractor extends ChannelTabExtractor {
|
|||
channelName, channelUrl)
|
||||
.orElse(null);
|
||||
|
||||
final Page nextPage = getNextPageFrom(continuation,
|
||||
useVisitorData && !isNullOrEmpty(visitorData)
|
||||
? List.of(channelName, channelUrl, verifiedStatus.toString(), visitorData)
|
||||
: List.of(channelName, channelUrl, verifiedStatus.toString()));
|
||||
final Page nextPage = getNextPageFrom(
|
||||
continuation, List.of(channelName, channelUrl, verifiedStatus.toString()));
|
||||
|
||||
return new InfoItemsPage<>(collector, nextPage);
|
||||
}
|
||||
|
@ -299,6 +278,9 @@ public class YoutubeChannelTabExtractor extends ChannelTabExtractor {
|
|||
} else if (richItem.has("reelItemRenderer")) {
|
||||
commitReel(collector, richItem.getObject("reelItemRenderer"),
|
||||
channelVerifiedStatus, channelName, channelUrl);
|
||||
} else if (richItem.has("shortsLockupViewModel")) {
|
||||
commitShortsLockup(collector, richItem.getObject("shortsLockupViewModel"),
|
||||
channelVerifiedStatus, channelName, channelUrl);
|
||||
} else if (richItem.has("playlistRenderer")) {
|
||||
commitPlaylist(collector, richItem.getObject("playlistRenderer"),
|
||||
channelVerifiedStatus, channelName, channelUrl);
|
||||
|
@ -356,6 +338,30 @@ public class YoutubeChannelTabExtractor extends ChannelTabExtractor {
|
|||
});
|
||||
}
|
||||
|
||||
private static void commitShortsLockup(@Nonnull final MultiInfoItemsCollector collector,
|
||||
@Nonnull final JsonObject shortsLockupViewModel,
|
||||
@Nonnull final VerifiedStatus channelVerifiedStatus,
|
||||
@Nullable final String channelName,
|
||||
@Nullable final String channelUrl) {
|
||||
collector.commit(
|
||||
new YoutubeShortsLockupInfoItemExtractor(shortsLockupViewModel) {
|
||||
@Override
|
||||
public String getUploaderName() throws ParsingException {
|
||||
return isNullOrEmpty(channelName) ? super.getUploaderName() : channelName;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getUploaderUrl() throws ParsingException {
|
||||
return isNullOrEmpty(channelUrl) ? super.getUploaderName() : channelUrl;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isUploaderVerified() {
|
||||
return channelVerifiedStatus == VerifiedStatus.VERIFIED;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
private void commitVideo(@Nonnull final MultiInfoItemsCollector collector,
|
||||
@Nonnull final TimeAgoParser timeAgoParser,
|
||||
@Nonnull final JsonObject jsonObject,
|
||||
|
@ -434,8 +440,7 @@ public class YoutubeChannelTabExtractor extends ChannelTabExtractor {
|
|||
.getString("token");
|
||||
|
||||
final byte[] body = JsonWriter.string(prepareDesktopJsonBuilder(getExtractorLocalization(),
|
||||
getExtractorContentCountry(),
|
||||
useVisitorData && channelIds.size() >= 3 ? channelIds.get(2) : null)
|
||||
getExtractorContentCountry())
|
||||
.value("continuation", continuation)
|
||||
.done())
|
||||
.getBytes(StandardCharsets.UTF_8);
|
||||
|
|
|
@ -20,13 +20,19 @@ import javax.annotation.Nonnull;
|
|||
import javax.annotation.Nullable;
|
||||
|
||||
/**
|
||||
* A {@link StreamInfoItemExtractor} for YouTube's {@code reelItemRenderers}.
|
||||
* A {@link StreamInfoItemExtractor} for YouTube's {@code reelItemRenderer}s.
|
||||
*
|
||||
* <p>
|
||||
* {@code reelItemRenderers} are returned on YouTube for their short-form contents on almost every
|
||||
* {@code reelItemRenderer}s were returned on YouTube for their short-form contents on almost every
|
||||
* place and every major client. They provide a limited amount of information and do not provide
|
||||
* the exact view count, any uploader info (name, URL, avatar, verified status) and the upload date.
|
||||
* </p>
|
||||
*
|
||||
* <p>
|
||||
* At the time this documentation has been updated, they are being replaced by
|
||||
* {@code shortsLockupViewModel}s. See {@link YoutubeShortsLockupInfoItemExtractor} for an
|
||||
* extractor for this new UI data type.
|
||||
* </p>
|
||||
*/
|
||||
public class YoutubeReelInfoItemExtractor implements StreamInfoItemExtractor {
|
||||
|
||||
|
|
|
@ -0,0 +1,150 @@
|
|||
package org.schabi.newpipe.extractor.services.youtube.extractors;
|
||||
|
||||
import com.grack.nanojson.JsonObject;
|
||||
import org.schabi.newpipe.extractor.Image;
|
||||
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
||||
import org.schabi.newpipe.extractor.localization.DateWrapper;
|
||||
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeStreamLinkHandlerFactory;
|
||||
import org.schabi.newpipe.extractor.stream.StreamInfoItemExtractor;
|
||||
import org.schabi.newpipe.extractor.stream.StreamType;
|
||||
import org.schabi.newpipe.extractor.utils.Utils;
|
||||
|
||||
import javax.annotation.Nonnull;
|
||||
import javax.annotation.Nullable;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getThumbnailsFromInfoItem;
|
||||
import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty;
|
||||
|
||||
/**
|
||||
* A {@link StreamInfoItemExtractor} for YouTube's {@code shortsLockupViewModel}s.
|
||||
*
|
||||
* <p>
|
||||
* {@code shortsLockupViewModel}s are returned on YouTube for their short-form contents on almost
|
||||
* every place and every major client. They provide a limited amount of information and do not
|
||||
* provide the exact view count, any uploader info (name, URL, avatar, verified status) and the
|
||||
* upload date.
|
||||
* </p>
|
||||
*
|
||||
* <p>
|
||||
* At the time this documentation has been written, this data UI type is not fully used (rolled
|
||||
* out), so {@code reelItemRenderer}s are also returned. See {@link YoutubeReelInfoItemExtractor}
|
||||
* for an extractor for this UI data type.
|
||||
* </p>
|
||||
*/
|
||||
public class YoutubeShortsLockupInfoItemExtractor implements StreamInfoItemExtractor {
|
||||
|
||||
@Nonnull
|
||||
private final JsonObject shortsLockupViewModel;
|
||||
|
||||
public YoutubeShortsLockupInfoItemExtractor(@Nonnull final JsonObject shortsLockupViewModel) {
|
||||
this.shortsLockupViewModel = shortsLockupViewModel;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getName() throws ParsingException {
|
||||
return shortsLockupViewModel.getObject("overlayMetadata")
|
||||
.getObject("primaryText")
|
||||
.getString("content");
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getUrl() throws ParsingException {
|
||||
String videoId = shortsLockupViewModel.getObject("onTap")
|
||||
.getObject("innertubeCommand")
|
||||
.getObject("reelWatchEndpoint")
|
||||
.getString("videoId");
|
||||
|
||||
if (isNullOrEmpty(videoId)) {
|
||||
videoId = shortsLockupViewModel.getObject("inlinePlayerData")
|
||||
.getObject("onVisible")
|
||||
.getObject("innertubeCommand")
|
||||
.getObject("watchEndpoint")
|
||||
.getString("videoId");
|
||||
}
|
||||
|
||||
if (isNullOrEmpty(videoId)) {
|
||||
throw new ParsingException("Could not get video ID");
|
||||
}
|
||||
|
||||
try {
|
||||
return YoutubeStreamLinkHandlerFactory.getInstance().getUrl(videoId);
|
||||
} catch (final Exception e) {
|
||||
throw new ParsingException("Could not get URL", e);
|
||||
}
|
||||
}
|
||||
|
||||
@Nonnull
|
||||
@Override
|
||||
public List<Image> getThumbnails() throws ParsingException {
|
||||
return getThumbnailsFromInfoItem(shortsLockupViewModel.getObject("thumbnail")
|
||||
.getObject("sources"));
|
||||
}
|
||||
|
||||
@Override
|
||||
public StreamType getStreamType() throws ParsingException {
|
||||
return StreamType.VIDEO_STREAM;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getViewCount() throws ParsingException {
|
||||
final String viewCountText = shortsLockupViewModel.getObject("overlayMetadata")
|
||||
.getObject("secondaryText")
|
||||
.getString("content");
|
||||
if (!isNullOrEmpty(viewCountText)) {
|
||||
// This approach is language dependent
|
||||
if (viewCountText.toLowerCase().contains("no views")) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
return Utils.mixedNumberWordToLong(viewCountText);
|
||||
}
|
||||
|
||||
throw new ParsingException("Could not get short view count");
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isShortFormContent() {
|
||||
return true;
|
||||
}
|
||||
|
||||
// All the following properties cannot be obtained from shortsLockupViewModels
|
||||
|
||||
@Override
|
||||
public boolean isAd() throws ParsingException {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getDuration() throws ParsingException {
|
||||
return -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getUploaderName() throws ParsingException {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getUploaderUrl() throws ParsingException {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isUploaderVerified() throws ParsingException {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Nullable
|
||||
@Override
|
||||
public String getTextualUploadDate() throws ParsingException {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Nullable
|
||||
@Override
|
||||
public DateWrapper getUploadDate() throws ParsingException {
|
||||
return null;
|
||||
}
|
||||
}
|
|
@ -3,10 +3,10 @@
|
|||
"httpMethod": "GET",
|
||||
"url": "https://www.youtube.com/sw.js",
|
||||
"headers": {
|
||||
"Origin": [
|
||||
"Referer": [
|
||||
"https://www.youtube.com"
|
||||
],
|
||||
"Referer": [
|
||||
"Origin": [
|
||||
"https://www.youtube.com"
|
||||
],
|
||||
"Accept-Language": [
|
||||
|
@ -34,6 +34,9 @@
|
|||
"cache-control": [
|
||||
"private, max-age\u003d0"
|
||||
],
|
||||
"content-security-policy": [
|
||||
"require-trusted-types-for \u0027script\u0027"
|
||||
],
|
||||
"content-type": [
|
||||
"text/javascript; charset\u003dutf-8"
|
||||
],
|
||||
|
@ -41,10 +44,10 @@
|
|||
"same-origin; report-to\u003d\"youtube_main\""
|
||||
],
|
||||
"date": [
|
||||
"Wed, 24 Jul 2024 17:37:25 GMT"
|
||||
"Sun, 08 Sep 2024 15:45:44 GMT"
|
||||
],
|
||||
"expires": [
|
||||
"Wed, 24 Jul 2024 17:37:25 GMT"
|
||||
"Sun, 08 Sep 2024 15:45:44 GMT"
|
||||
],
|
||||
"origin-trial": [
|
||||
"AmhMBR6zCLzDDxpW+HfpP67BqwIknWnyMOXOQGfzYswFmJe+fgaI6XZgAzcxOrzNtP7hEDsOo1jdjFnVr2IdxQ4AAAB4eyJvcmlnaW4iOiJodHRwczovL3lvdXR1YmUuY29tOjQ0MyIsImZlYXR1cmUiOiJXZWJWaWV3WFJlcXVlc3RlZFdpdGhEZXByZWNhdGlvbiIsImV4cGlyeSI6MTc1ODA2NzE5OSwiaXNTdWJkb21haW4iOnRydWV9"
|
||||
|
@ -62,8 +65,8 @@
|
|||
"ESF"
|
||||
],
|
||||
"set-cookie": [
|
||||
"YSC\u003dQqImeZ_ECz4; Domain\u003d.youtube.com; Path\u003d/; Secure; HttpOnly; SameSite\u003dnone",
|
||||
"VISITOR_INFO1_LIVE\u003d; Domain\u003d.youtube.com; Expires\u003dThu, 28-Oct-2021 17:37:25 GMT; Path\u003d/; Secure; HttpOnly; SameSite\u003dnone"
|
||||
"YSC\u003dbsHskp20CKw; Domain\u003d.youtube.com; Path\u003d/; Secure; HttpOnly; SameSite\u003dnone",
|
||||
"VISITOR_INFO1_LIVE\u003d; Domain\u003d.youtube.com; Expires\u003dMon, 13-Dec-2021 15:45:44 GMT; Path\u003d/; Secure; HttpOnly; SameSite\u003dnone"
|
||||
],
|
||||
"strict-transport-security": [
|
||||
"max-age\u003d31536000"
|
||||
|
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
Loading…
Reference in New Issue