diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsEUVMInfoItemExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsEUVMInfoItemExtractor.java new file mode 100644 index 000000000..857e6096f --- /dev/null +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsEUVMInfoItemExtractor.java @@ -0,0 +1,235 @@ +package org.schabi.newpipe.extractor.services.youtube.extractors; + +import com.grack.nanojson.JsonObject; +import org.schabi.newpipe.extractor.Image; +import org.schabi.newpipe.extractor.Page; +import org.schabi.newpipe.extractor.comments.CommentsInfoItemExtractor; +import org.schabi.newpipe.extractor.exceptions.ParsingException; +import org.schabi.newpipe.extractor.localization.DateWrapper; +import org.schabi.newpipe.extractor.localization.TimeAgoParser; +import org.schabi.newpipe.extractor.stream.Description; +import org.schabi.newpipe.extractor.utils.Utils; + +import javax.annotation.Nonnull; +import javax.annotation.Nullable; +import java.util.List; +import java.util.Objects; + +import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getAttributedDescription; +import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getImagesFromThumbnailsArray; +import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty; + +/** + * A {@link CommentsInfoItemExtractor} for YouTube comment data returned in a view model and entity + * updates. + */ +class YoutubeCommentsEUVMInfoItemExtractor implements CommentsInfoItemExtractor { + + private static final String AUTHOR = "author"; + private static final String PROPERTIES = "properties"; + + @Nonnull + private final JsonObject commentViewModel; + @Nullable + private final JsonObject commentRepliesRenderer; + @Nonnull + private final JsonObject commentEntityPayload; + @Nonnull + private final JsonObject engagementToolbarStateEntityPayload; + @Nonnull + private final String videoUrl; + @Nonnull + private final TimeAgoParser timeAgoParser; + + YoutubeCommentsEUVMInfoItemExtractor( + @Nonnull final JsonObject commentViewModel, + @Nullable final JsonObject commentRepliesRenderer, + @Nonnull final JsonObject commentEntityPayload, + @Nonnull final JsonObject engagementToolbarStateEntityPayload, + @Nonnull final String videoUrl, + @Nonnull final TimeAgoParser timeAgoParser) { + this.commentViewModel = commentViewModel; + this.commentRepliesRenderer = commentRepliesRenderer; + this.commentEntityPayload = commentEntityPayload; + this.engagementToolbarStateEntityPayload = engagementToolbarStateEntityPayload; + this.videoUrl = videoUrl; + this.timeAgoParser = timeAgoParser; + } + + @Override + public String getName() throws ParsingException { + return getUploaderName(); + } + + @Override + public String getUrl() throws ParsingException { + return videoUrl; + } + + @Nonnull + @Override + public List getThumbnails() throws ParsingException { + return getUploaderAvatars(); + } + + @Override + public int getLikeCount() throws ParsingException { + final String textualLikeCount = getTextualLikeCount(); + try { + if (Utils.isBlank(textualLikeCount)) { + return 0; + } + + return (int) Utils.mixedNumberWordToLong(textualLikeCount); + } catch (final Exception e) { + throw new ParsingException( + "Unexpected error while converting textual like count to like count", e); + } + } + + @Override + public String getTextualLikeCount() { + return commentEntityPayload.getObject("toolbar") + .getString("likeCountNotliked"); + } + + @Override + public Description getCommentText() throws ParsingException { + // Comments' text work in the same way as an attributed video description + return new Description( + getAttributedDescription(commentEntityPayload.getObject(PROPERTIES) + .getObject("content")), Description.HTML); + } + + @Override + public String getTextualUploadDate() throws ParsingException { + return commentEntityPayload.getObject(PROPERTIES) + .getString("publishedTime"); + } + + @Nullable + @Override + public DateWrapper getUploadDate() throws ParsingException { + final String textualPublishedTime = getTextualUploadDate(); + if (isNullOrEmpty(textualPublishedTime)) { + return null; + } + + return timeAgoParser.parse(textualPublishedTime); + } + + @Override + public String getCommentId() throws ParsingException { + String commentId = commentEntityPayload.getObject(PROPERTIES) + .getString("commentId"); + if (isNullOrEmpty(commentId)) { + commentId = commentViewModel.getString("commentId"); + if (isNullOrEmpty(commentId)) { + throw new ParsingException("Could not get comment ID"); + } + } + return commentId; + } + + @Override + public String getUploaderUrl() throws ParsingException { + final JsonObject author = commentEntityPayload.getObject(AUTHOR); + String channelId = author.getString("channelId"); + if (isNullOrEmpty(channelId)) { + channelId = author.getObject("channelCommand") + .getObject("innertubeCommand") + .getObject("browseEndpoint") + .getString("browseId"); + if (isNullOrEmpty(channelId)) { + channelId = author.getObject("avatar") + .getObject("endpoint") + .getObject("innertubeCommand") + .getObject("browseEndpoint") + .getString("browseId"); + if (isNullOrEmpty(channelId)) { + throw new ParsingException("Could not get channel ID"); + } + } + } + return "https://www.youtube.com/channel/" + channelId; + } + + @Override + public String getUploaderName() throws ParsingException { + return commentEntityPayload.getObject(AUTHOR) + .getString("displayName"); + } + + @Nonnull + @Override + public List getUploaderAvatars() throws ParsingException { + return getImagesFromThumbnailsArray(commentEntityPayload.getObject("avatar") + .getObject("image") + .getArray("sources")); + } + + @Override + public boolean isHeartedByUploader() { + return "TOOLBAR_HEART_STATE_HEARTED".equals( + engagementToolbarStateEntityPayload.getString("heartState")); + } + + @Override + public boolean isPinned() { + return commentViewModel.has("pinnedText"); + } + + @Override + public boolean isUploaderVerified() throws ParsingException { + final JsonObject author = commentEntityPayload.getObject(AUTHOR); + return author.getBoolean("isVerified") || author.getBoolean("isArtist"); + } + + @Override + public int getReplyCount() throws ParsingException { + // As YouTube allows replies up to 750 comments, we cannot check if the count returned is a + // mixed number or a real number + // Assume it is a mixed one, as it matches how numbers of most properties are returned + final String replyCountString = commentEntityPayload.getObject("toolbar") + .getString("replyCount"); + if (isNullOrEmpty(replyCountString)) { + return 0; + } + return (int) Utils.mixedNumberWordToLong(replyCountString); + } + + @Nullable + @Override + public Page getReplies() throws ParsingException { + if (isNullOrEmpty(commentRepliesRenderer)) { + return null; + } + + final String continuation = commentRepliesRenderer.getArray("contents") + .stream() + .filter(JsonObject.class::isInstance) + .map(JsonObject.class::cast) + .map(content -> content.getObject("continuationItemRenderer", null)) + .filter(Objects::nonNull) + .findFirst() + .map(continuationItemRenderer -> + continuationItemRenderer.getObject("continuationEndpoint") + .getObject("continuationCommand") + .getString("token")) + .orElseThrow(() -> + new ParsingException("Could not get comment replies continuation")); + return new Page(videoUrl, continuation); + } + + @Override + public boolean isChannelOwner() { + return commentEntityPayload.getObject(AUTHOR) + .getBoolean("isCreator"); + } + + @Override + public boolean hasCreatorReply() { + return commentRepliesRenderer != null + && commentRepliesRenderer.has("viewRepliesCreatorThumbnail"); + } +} diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java index 84e6c3e1e..8667768a4 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java @@ -13,6 +13,7 @@ import org.schabi.newpipe.extractor.exceptions.ExtractionException; import org.schabi.newpipe.extractor.exceptions.ParsingException; import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler; import org.schabi.newpipe.extractor.localization.Localization; +import org.schabi.newpipe.extractor.localization.TimeAgoParser; import org.schabi.newpipe.extractor.utils.JsonUtils; import org.schabi.newpipe.extractor.utils.Utils; @@ -21,7 +22,6 @@ import javax.annotation.Nullable; import java.io.IOException; import java.nio.charset.StandardCharsets; import java.util.Collections; -import java.util.List; import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getJsonPostResponse; import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getTextFromObject; @@ -30,6 +30,9 @@ import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty; public class YoutubeCommentsExtractor extends CommentsExtractor { + private static final String COMMENT_VIEW_MODEL_KEY = "commentViewModel"; + private static final String COMMENT_RENDERER_KEY = "commentRenderer"; + /** * Whether comments are disabled on video. */ @@ -74,8 +77,7 @@ public class YoutubeCommentsExtractor extends CommentsExtractor { return null; } - final String token = contents - .stream() + final String token = contents.stream() // Only use JsonObjects .filter(JsonObject.class::isInstance) .map(JsonObject.class::cast) @@ -120,6 +122,21 @@ public class YoutubeCommentsExtractor extends CommentsExtractor { } } + @Nonnull + private JsonObject getMutationPayloadFromEntityKey(@Nonnull final JsonArray mutations, + @Nonnull final String commentKey) + throws ParsingException { + return mutations.stream() + .filter(JsonObject.class::isInstance) + .map(JsonObject.class::cast) + .filter(mutation -> commentKey.equals( + mutation.getString("entityKey"))) + .findFirst() + .orElseThrow(() -> new ParsingException( + "Could not get comment entity payload mutation")) + .getObject("payload"); + } + @Nonnull private InfoItemsPage getInfoItemsPageForDisabledComments() { return new InfoItemsPage<>(Collections.emptyList(), null, Collections.emptyList()); @@ -207,8 +224,8 @@ public class YoutubeCommentsExtractor extends CommentsExtractor { return new InfoItemsPage<>(collector, getNextPage(jsonObject)); } - private void collectCommentsFrom(final CommentsInfoItemsCollector collector, - final JsonObject jsonObject) + private void collectCommentsFrom(@Nonnull final CommentsInfoItemsCollector collector, + @Nonnull final JsonObject jsonObject) throws ParsingException { final JsonArray onResponseReceivedEndpoints = @@ -233,6 +250,8 @@ public class YoutubeCommentsExtractor extends CommentsExtractor { final JsonArray contents; try { + // A copy of the array is needed, otherwise the continuation item is removed from the + // original object which is used to get the continuation contents = new JsonArray(JsonUtils.getArray(commentsEndpoint, path)); } catch (final Exception e) { // No comments @@ -244,23 +263,80 @@ public class YoutubeCommentsExtractor extends CommentsExtractor { contents.remove(index); } - final String jsonKey = contents.getObject(0).has("commentThreadRenderer") - ? "commentThreadRenderer" - : "commentRenderer"; + // The mutations object, which is returned in the comments' continuation + // It contains parts of comment data when comments are returned with a view model + final JsonArray mutations = jsonObject.getObject("frameworkUpdates") + .getObject("entityBatchUpdate") + .getArray("mutations"); + final String videoUrl = getUrl(); + final TimeAgoParser timeAgoParser = getTimeAgoParser(); - final List comments; - try { - comments = JsonUtils.getValues(contents, jsonKey); - } catch (final Exception e) { - throw new ParsingException("Unable to get parse youtube comments", e); + for (final Object o : contents) { + if (!(o instanceof JsonObject)) { + continue; + } + + collectCommentItem(mutations, (JsonObject) o, collector, videoUrl, timeAgoParser); } + } - final String url = getUrl(); - comments.stream() - .filter(JsonObject.class::isInstance) - .map(JsonObject.class::cast) - .map(jObj -> new YoutubeCommentsInfoItemExtractor(jObj, url, getTimeAgoParser())) - .forEach(collector::commit); + private void collectCommentItem(@Nonnull final JsonArray mutations, + @Nonnull final JsonObject content, + @Nonnull final CommentsInfoItemsCollector collector, + @Nonnull final String videoUrl, + @Nonnull final TimeAgoParser timeAgoParser) + throws ParsingException { + if (content.has("commentThreadRenderer")) { + final JsonObject commentThreadRenderer = + content.getObject("commentThreadRenderer"); + if (commentThreadRenderer.has(COMMENT_VIEW_MODEL_KEY)) { + final JsonObject commentViewModel = + commentThreadRenderer.getObject(COMMENT_VIEW_MODEL_KEY) + .getObject(COMMENT_VIEW_MODEL_KEY); + collector.commit(new YoutubeCommentsEUVMInfoItemExtractor( + commentViewModel, + commentThreadRenderer.getObject("replies") + .getObject("commentRepliesRenderer"), + getMutationPayloadFromEntityKey(mutations, + commentViewModel.getString("commentKey", "")) + .getObject("commentEntityPayload"), + getMutationPayloadFromEntityKey(mutations, + commentViewModel.getString("toolbarStateKey", "")) + .getObject("engagementToolbarStateEntityPayload"), + videoUrl, + timeAgoParser)); + } else if (commentThreadRenderer.has("comment")) { + collector.commit(new YoutubeCommentsInfoItemExtractor( + commentThreadRenderer.getObject("comment") + .getObject(COMMENT_RENDERER_KEY), + commentThreadRenderer.getObject("replies") + .getObject("commentRepliesRenderer"), + videoUrl, + timeAgoParser)); + } + } else if (content.has(COMMENT_VIEW_MODEL_KEY)) { + final JsonObject commentViewModel = content.getObject(COMMENT_VIEW_MODEL_KEY); + collector.commit(new YoutubeCommentsEUVMInfoItemExtractor( + commentViewModel, + null, + getMutationPayloadFromEntityKey(mutations, + commentViewModel.getString("commentKey", "")) + .getObject("commentEntityPayload"), + getMutationPayloadFromEntityKey(mutations, + commentViewModel.getString("toolbarStateKey", "")) + .getObject("engagementToolbarStateEntityPayload"), + videoUrl, + timeAgoParser)); + } else if (content.has(COMMENT_RENDERER_KEY)) { + // commentRenderers are directly returned for comment replies, so there is no + // commentRepliesRenderer to provide + // Also, YouTube has only one comment reply level + collector.commit(new YoutubeCommentsInfoItemExtractor( + content.getObject(COMMENT_RENDERER_KEY), + null, + videoUrl, + timeAgoParser)); + } } @Override @@ -307,10 +383,11 @@ public class YoutubeCommentsExtractor extends CommentsExtractor { return -1; } - final JsonObject countText = ajaxJson - .getArray("onResponseReceivedEndpoints").getObject(0) + final JsonObject countText = ajaxJson.getArray("onResponseReceivedEndpoints") + .getObject(0) .getObject("reloadContinuationItemsCommand") - .getArray("continuationItems").getObject(0) + .getArray("continuationItems") + .getObject(0) .getObject("commentsHeaderRenderer") .getObject("countText"); diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsInfoItemExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsInfoItemExtractor.java index 06b68fe5e..ddc7b7bcc 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsInfoItemExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsInfoItemExtractor.java @@ -22,40 +22,36 @@ import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtractor { - private final JsonObject json; - private JsonObject commentRenderer; + @Nonnull + private final JsonObject commentRenderer; + @Nullable + private final JsonObject commentRepliesRenderer; + @Nonnull private final String url; + @Nonnull private final TimeAgoParser timeAgoParser; - public YoutubeCommentsInfoItemExtractor(final JsonObject json, - final String url, - final TimeAgoParser timeAgoParser) { - this.json = json; + public YoutubeCommentsInfoItemExtractor(@Nonnull final JsonObject commentRenderer, + @Nullable final JsonObject commentRepliesRenderer, + @Nonnull final String url, + @Nonnull final TimeAgoParser timeAgoParser) { + this.commentRenderer = commentRenderer; + this.commentRepliesRenderer = commentRepliesRenderer; this.url = url; this.timeAgoParser = timeAgoParser; } - private JsonObject getCommentRenderer() throws ParsingException { - if (commentRenderer == null) { - if (json.has("comment")) { - commentRenderer = JsonUtils.getObject(json, "comment.commentRenderer"); - } else { - commentRenderer = json; - } - } - return commentRenderer; - } - @Nonnull private List getAuthorThumbnails() throws ParsingException { try { - return getImagesFromThumbnailsArray(JsonUtils.getArray(getCommentRenderer(), + return getImagesFromThumbnailsArray(JsonUtils.getArray(commentRenderer, "authorThumbnail.thumbnails")); } catch (final Exception e) { throw new ParsingException("Could not get author thumbnails", e); } } + @Nonnull @Override public String getUrl() throws ParsingException { return url; @@ -70,7 +66,7 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract @Override public String getName() throws ParsingException { try { - return getTextFromObject(JsonUtils.getObject(getCommentRenderer(), "authorText")); + return getTextFromObject(JsonUtils.getObject(commentRenderer, "authorText")); } catch (final Exception e) { return ""; } @@ -79,7 +75,7 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract @Override public String getTextualUploadDate() throws ParsingException { try { - return getTextFromObject(JsonUtils.getObject(getCommentRenderer(), + return getTextFromObject(JsonUtils.getObject(commentRenderer, "publishedTimeText")); } catch (final Exception e) { throw new ParsingException("Could not get publishedTimeText", e); @@ -90,8 +86,7 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract @Override public DateWrapper getUploadDate() throws ParsingException { final String textualPublishedTime = getTextualUploadDate(); - if (timeAgoParser != null && textualPublishedTime != null - && !textualPublishedTime.isEmpty()) { + if (textualPublishedTime != null && !textualPublishedTime.isEmpty()) { return timeAgoParser.parse(textualPublishedTime); } else { return null; @@ -118,7 +113,7 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract // Try first to get the exact like count by using the accessibility data final String likeCount; try { - likeCount = Utils.removeNonDigitCharacters(JsonUtils.getString(getCommentRenderer(), + likeCount = Utils.removeNonDigitCharacters(JsonUtils.getString(commentRenderer, "actionButtons.commentActionButtonsRenderer.likeButton.toggleButtonRenderer" + ".accessibilityData.accessibilityData.label")); } catch (final Exception e) { @@ -170,11 +165,11 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract */ try { // If a comment has no likes voteCount is not set - if (!getCommentRenderer().has("voteCount")) { + if (!commentRenderer.has("voteCount")) { return ""; } - final JsonObject voteCountObj = JsonUtils.getObject(getCommentRenderer(), "voteCount"); + final JsonObject voteCountObj = JsonUtils.getObject(commentRenderer, "voteCount"); if (voteCountObj.isEmpty()) { return ""; } @@ -188,7 +183,7 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract @Override public Description getCommentText() throws ParsingException { try { - final JsonObject contentText = JsonUtils.getObject(getCommentRenderer(), "contentText"); + final JsonObject contentText = JsonUtils.getObject(commentRenderer, "contentText"); if (contentText.isEmpty()) { // completely empty comments as described in // https://github.com/TeamNewPipe/NewPipeExtractor/issues/380#issuecomment-668808584 @@ -208,7 +203,7 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract @Override public String getCommentId() throws ParsingException { try { - return JsonUtils.getString(getCommentRenderer(), "commentId"); + return JsonUtils.getString(commentRenderer, "commentId"); } catch (final Exception e) { throw new ParsingException("Could not get comment id", e); } @@ -221,27 +216,26 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract } @Override - public boolean isHeartedByUploader() throws ParsingException { - final JsonObject commentActionButtonsRenderer = getCommentRenderer() - .getObject("actionButtons") + public boolean isHeartedByUploader() { + final JsonObject commentActionButtonsRenderer = commentRenderer.getObject("actionButtons") .getObject("commentActionButtonsRenderer"); return commentActionButtonsRenderer.has("creatorHeart"); } @Override - public boolean isPinned() throws ParsingException { - return getCommentRenderer().has("pinnedCommentBadge"); + public boolean isPinned() { + return commentRenderer.has("pinnedCommentBadge"); } @Override public boolean isUploaderVerified() throws ParsingException { - return getCommentRenderer().has("authorCommentBadge"); + return commentRenderer.has("authorCommentBadge"); } @Override public String getUploaderName() throws ParsingException { try { - return getTextFromObject(JsonUtils.getObject(getCommentRenderer(), "authorText")); + return getTextFromObject(JsonUtils.getObject(commentRenderer, "authorText")); } catch (final Exception e) { return ""; } @@ -250,7 +244,7 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract @Override public String getUploaderUrl() throws ParsingException { try { - return "https://www.youtube.com/channel/" + JsonUtils.getString(getCommentRenderer(), + return "https://www.youtube.com/channel/" + JsonUtils.getString(commentRenderer, "authorEndpoint.browseEndpoint.browseId"); } catch (final Exception e) { return ""; @@ -258,19 +252,22 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract } @Override - public int getReplyCount() throws ParsingException { - final JsonObject commentRendererJsonObject = getCommentRenderer(); - if (commentRendererJsonObject.has("replyCount")) { - return commentRendererJsonObject.getInt("replyCount"); + public int getReplyCount() { + if (commentRenderer.has("replyCount")) { + return commentRenderer.getInt("replyCount"); } return UNKNOWN_REPLY_COUNT; } @Override public Page getReplies() { + if (commentRepliesRenderer == null) { + return null; + } + try { final String id = JsonUtils.getString( - JsonUtils.getArray(json, "replies.commentRepliesRenderer.contents") + JsonUtils.getArray(commentRepliesRenderer, "contents") .getObject(0), "continuationItemRenderer.continuationEndpoint.continuationCommand.token"); return new Page(url, id); @@ -280,20 +277,17 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract } @Override - public boolean isChannelOwner() throws ParsingException { - return getCommentRenderer().getBoolean("authorIsChannelOwner"); + public boolean isChannelOwner() { + return commentRenderer.getBoolean("authorIsChannelOwner"); } - @Override - public boolean hasCreatorReply() throws ParsingException { - try { - final JsonObject commentRepliesRenderer = JsonUtils.getObject(json, - "replies.commentRepliesRenderer"); - return commentRepliesRenderer.has("viewRepliesCreatorThumbnail"); - } catch (final Exception e) { + public boolean hasCreatorReply() { + if (commentRepliesRenderer == null) { return false; } + + return commentRepliesRenderer.has("viewRepliesCreatorThumbnail"); } }