Merge pull request #703 from FireMasterK/comment-replies

Add support for extracting comment replies continuation
2021-09-14 23:58:14 +02:00 · 2021-09-14 23:58:14 +02:00 · a9d214478d
parent ce8cabb9f8 6aabdc6d16
commit a9d214478d
10 changed files with 1275 additions and 30 deletions
--- a/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfoItem.java
+++ b/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfoItem.java
@ -1,6 +1,7 @@
 package org.schabi.newpipe.extractor.comments;

 import org.schabi.newpipe.extractor.InfoItem;
+import org.schabi.newpipe.extractor.Page;
 import org.schabi.newpipe.extractor.localization.DateWrapper;

 import javax.annotation.Nullable;
@ -21,6 +22,8 @@ public class CommentsInfoItem extends InfoItem {
    private boolean heartedByUploader;
    private boolean pinned;
    private int streamPosition;
+    @Nullable
+    private Page replies;

    public static final int NO_LIKE_COUNT = -1;
    public static final int NO_STREAM_POSITION = -1;
@ -142,4 +145,8 @@ public class CommentsInfoItem extends InfoItem {
    public int getStreamPosition() {
        return streamPosition;
    }
+
+    public void setReplies(@Nullable Page replies) { this.replies = replies; }
+
+    public Page getReplies() { return this.replies; }
 }
--- a/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfoItemExtractor.java
+++ b/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfoItemExtractor.java
@ -1,6 +1,7 @@
 package org.schabi.newpipe.extractor.comments;

 import org.schabi.newpipe.extractor.InfoItemExtractor;
+import org.schabi.newpipe.extractor.Page;
 import org.schabi.newpipe.extractor.exceptions.ParsingException;
 import org.schabi.newpipe.extractor.localization.DateWrapper;
 import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeCommentsInfoItemExtractor;
@ -107,4 +108,13 @@ public interface CommentsInfoItemExtractor extends InfoItemExtractor {
    default int getStreamPosition() throws ParsingException {
        return CommentsInfoItem.NO_STREAM_POSITION;
    }
+
+    /**
+     * The continuation page which is used to get comment replies from.
+     * @return the continuation Page for the replies, or null if replies are not supported
+     */
+    @Nullable
+    default Page getReplies() throws ParsingException {
+        return null;
+    }
 }
--- a/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfoItemsCollector.java
+++ b/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfoItemsCollector.java
@ -93,6 +93,12 @@ public class CommentsInfoItemsCollector extends InfoItemsCollector<CommentsInfoI
            addError(e);
        }

+        try {
+            resultItem.setReplies(extractor.getReplies());
+        } catch (Exception e) {
+            addError(e);
+        }
+
        return resultItem;
    }

@ -106,12 +112,6 @@ public class CommentsInfoItemsCollector extends InfoItemsCollector<CommentsInfoI
    }

    public List<CommentsInfoItem> getCommentsInfoItemList() {
-        List<CommentsInfoItem> siiList = new ArrayList<>();
-        for (InfoItem ii : super.getItems()) {
-            if (ii instanceof CommentsInfoItem) {
-                siiList.add((CommentsInfoItem) ii);
-            }
-        }
-        return siiList;
+        return new ArrayList<>(super.getItems());
    }
 }
--- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java
+++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java
@ -101,7 +101,7 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {

        if (itemSectionRenderer.isPresent()) {
            token = JsonUtils.getString(((JsonObject) itemSectionRenderer.get())
-                    .getObject("itemSectionRenderer").getArray("contents").getObject(0),
+                            .getObject("itemSectionRenderer").getArray("contents").getObject(0),
                    "continuationItemRenderer.continuationEndpoint.continuationCommand.token");
        } else {
            token = null;
@ -140,10 +140,13 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {
            return null;
        }

+        final JsonObject continuationItemRenderer = jsonArray.getObject(jsonArray.size() - 1).getObject("continuationItemRenderer");
+
+        final String jsonPath = continuationItemRenderer.has("button") ? "button.buttonRenderer.command.continuationCommand.token" : "continuationEndpoint.continuationCommand.token";
+
        final String continuation;
        try {
-            continuation = JsonUtils.getString(jsonArray.getObject(jsonArray.size() - 1),
-                    "continuationItemRenderer.continuationEndpoint.continuationCommand.token");
+            continuation = JsonUtils.getString(continuationItemRenderer, jsonPath);
        } catch (final Exception e) {
            return null;
        }
@ -167,7 +170,7 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {

        final Localization localization = getExtractorLocalization();
        final byte[] body = JsonWriter.string(prepareDesktopJsonBuilder(localization,
-                        getExtractorContentCountry())
+                getExtractorContentCountry())
                .value("continuation", page.getId())
                .done())
                .getBytes(UTF_8);
@ -212,10 +215,11 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {
            contents.remove(index);
        }

+        final String jsonKey = contents.getObject(0).has("commentThreadRenderer") ? "commentThreadRenderer" : "commentRenderer";
+
        final List<Object> comments;
        try {
-            comments = JsonUtils.getValues(contents,
-                    "commentThreadRenderer.comment.commentRenderer");
+            comments = JsonUtils.getValues(contents, jsonKey);
        } catch (final Exception e) {
            throw new ParsingException("Unable to get parse youtube comments", e);
        }
@ -234,7 +238,7 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {
            throws IOException, ExtractionException {
        final Localization localization = getExtractorLocalization();
        final byte[] body = JsonWriter.string(prepareDesktopJsonBuilder(localization,
-                        getExtractorContentCountry())
+                getExtractorContentCountry())
                .value("videoId", getId())
                .done())
                .getBytes(UTF_8);
--- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsInfoItemExtractor.java
+++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsInfoItemExtractor.java
@ -3,6 +3,8 @@ package org.schabi.newpipe.extractor.services.youtube.extractors;
 import com.grack.nanojson.JsonArray;
 import com.grack.nanojson.JsonObject;

+import com.grack.nanojson.JsonWriter;
+import org.schabi.newpipe.extractor.Page;
 import org.schabi.newpipe.extractor.comments.CommentsInfoItemExtractor;
 import org.schabi.newpipe.extractor.exceptions.ParsingException;
 import org.schabi.newpipe.extractor.localization.DateWrapper;
@ -18,6 +20,7 @@ import static org.schabi.newpipe.extractor.utils.Utils.EMPTY_STRING;
 public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtractor {

    private final JsonObject json;
+    private JsonObject commentRenderer;
    private final String url;
    private final TimeAgoParser timeAgoParser;

@ -29,6 +32,16 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract
        this.timeAgoParser = timeAgoParser;
    }

+    private JsonObject getCommentRenderer() throws ParsingException {
+        if(commentRenderer == null) {
+            if(!json.has("comment"))
+                commentRenderer = json;
+            else
+                commentRenderer = JsonUtils.getObject(json, "comment.commentRenderer");
+        }
+        return commentRenderer;
+    }
+
    @Override
    public String getUrl() throws ParsingException {
        return url;
@ -37,7 +50,7 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract
    @Override
    public String getThumbnailUrl() throws ParsingException {
        try {
-            final JsonArray arr = JsonUtils.getArray(json, "authorThumbnail.thumbnails");
+            final JsonArray arr = JsonUtils.getArray(getCommentRenderer(), "authorThumbnail.thumbnails");
            return JsonUtils.getString(arr.getObject(2), "url");
        } catch (final Exception e) {
            throw new ParsingException("Could not get thumbnail url", e);
@ -47,7 +60,7 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract
    @Override
    public String getName() throws ParsingException {
        try {
-            return getTextFromObject(JsonUtils.getObject(json, "authorText"));
+            return getTextFromObject(JsonUtils.getObject(getCommentRenderer(), "authorText"));
        } catch (final Exception e) {
            return EMPTY_STRING;
        }
@ -56,7 +69,7 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract
    @Override
    public String getTextualUploadDate() throws ParsingException {
        try {
-            return getTextFromObject(JsonUtils.getObject(json, "publishedTimeText"));
+            return getTextFromObject(JsonUtils.getObject(getCommentRenderer(), "publishedTimeText"));
        } catch (final Exception e) {
            throw new ParsingException("Could not get publishedTimeText", e);
        }
@ -94,7 +107,7 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract
        // Try first to get the exact like count by using the accessibility data
        final String likeCount;
        try {
-            likeCount = Utils.removeNonDigitCharacters(JsonUtils.getString(json,
+            likeCount = Utils.removeNonDigitCharacters(JsonUtils.getString(getCommentRenderer(),
                    "actionButtons.commentActionButtonsRenderer.likeButton.toggleButtonRenderer.accessibilityData.accessibilityData.label"));
        } catch (final Exception e) {
            // Use the approximate like count returned into the voteCount object
@ -145,11 +158,11 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract
         */
        try {
            // If a comment has no likes voteCount is not set
-            if (!json.has("voteCount")) {
+            if (!getCommentRenderer().has("voteCount")) {
                return EMPTY_STRING;
            }

-            final JsonObject voteCountObj = JsonUtils.getObject(json, "voteCount");
+            final JsonObject voteCountObj = JsonUtils.getObject(getCommentRenderer(), "voteCount");
            if (voteCountObj.isEmpty()) {
                return EMPTY_STRING;
            }
@ -162,7 +175,7 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract
    @Override
    public String getCommentText() throws ParsingException {
        try {
-            final JsonObject contentText = JsonUtils.getObject(json, "contentText");
+            final JsonObject contentText = JsonUtils.getObject(getCommentRenderer(), "contentText");
            if (contentText.isEmpty()) {
                // completely empty comments as described in
                // https://github.com/TeamNewPipe/NewPipeExtractor/issues/380#issuecomment-668808584
@ -180,7 +193,7 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract
    @Override
    public String getCommentId() throws ParsingException {
        try {
-            return JsonUtils.getString(json, "commentId");
+            return JsonUtils.getString(getCommentRenderer(), "commentId");
        } catch (final Exception e) {
            throw new ParsingException("Could not get comment id", e);
        }
@ -189,7 +202,7 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract
    @Override
    public String getUploaderAvatarUrl() throws ParsingException {
        try {
-            JsonArray arr = JsonUtils.getArray(json, "authorThumbnail.thumbnails");
+            JsonArray arr = JsonUtils.getArray(getCommentRenderer(), "authorThumbnail.thumbnails");
            return JsonUtils.getString(arr.getObject(2), "url");
        } catch (final Exception e) {
            throw new ParsingException("Could not get author thumbnail", e);
@ -198,24 +211,24 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract

    @Override
    public boolean isHeartedByUploader() throws ParsingException {
-        final JsonObject commentActionButtonsRenderer = json.getObject("actionButtons")
+        final JsonObject commentActionButtonsRenderer = getCommentRenderer().getObject("actionButtons")
                .getObject("commentActionButtonsRenderer");
        return commentActionButtonsRenderer.has("creatorHeart");
    }

    @Override
-    public boolean isPinned() {
-        return json.has("pinnedCommentBadge");
+    public boolean isPinned() throws ParsingException {
+        return getCommentRenderer().has("pinnedCommentBadge");
    }

-    public boolean isUploaderVerified() {
-        return json.has("authorCommentBadge");
+    public boolean isUploaderVerified() throws ParsingException {
+        return getCommentRenderer().has("authorCommentBadge");
    }

    @Override
    public String getUploaderName() throws ParsingException {
        try {
-            return getTextFromObject(JsonUtils.getObject(json, "authorText"));
+            return getTextFromObject(JsonUtils.getObject(getCommentRenderer(), "authorText"));
        } catch (final Exception e) {
            return EMPTY_STRING;
        }
@ -224,10 +237,20 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract
    @Override
    public String getUploaderUrl() throws ParsingException {
        try {
-            return "https://www.youtube.com/channel/" + JsonUtils.getString(json,
+            return "https://www.youtube.com/channel/" + JsonUtils.getString(getCommentRenderer(),
                    "authorEndpoint.browseEndpoint.browseId");
        } catch (final Exception e) {
            return EMPTY_STRING;
        }
    }
+
+    @Override
+    public Page getReplies() throws ParsingException {
+        try {
+            final String id = JsonUtils.getString(JsonUtils.getArray(json, "replies.commentRepliesRenderer.contents").getObject(0), "continuationItemRenderer.continuationEndpoint.continuationCommand.token");
+            return new Page(url, id);
+        } catch (final Exception e) {
+            return null; // Would return null for Comment Replies, since YouTube does not support nested replies.
+        }
+    }
 }
--- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeCommentsExtractorTest.java
+++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeCommentsExtractorTest.java
@ -306,4 +306,32 @@ public class YoutubeCommentsExtractorTest {
            assertTrue("The first pinned comment has no vote count", !Utils.isBlank(pinnedComment.getTextualLikeCount()));
        }
    }
+
+    public static class RepliesTest {
+        private final static String url = "https://www.youtube.com/watch?v=--yeOvJGZQk";
+        private static YoutubeCommentsExtractor extractor;
+
+        @BeforeClass
+        public static void setUp() throws Exception {
+            YoutubeParsingHelper.resetClientVersionAndKey();
+            YoutubeParsingHelper.setNumberGenerator(new Random(1));
+            NewPipe.init(new DownloaderFactory().getDownloader(RESOURCE_PATH + "replies"));
+            extractor = (YoutubeCommentsExtractor) YouTube
+                    .getCommentsExtractor(url);
+            extractor.fetchPage();
+        }
+
+        @Test
+        public void testGetCommentsFirstReplies() throws IOException, ExtractionException {
+            final InfoItemsPage<CommentsInfoItem> comments = extractor.getInitialPage();
+
+            DefaultTests.defaultTestListOfItems(YouTube, comments.getItems(), comments.getErrors());
+
+            CommentsInfoItem firstComment = comments.getItems().get(0);
+
+            InfoItemsPage<CommentsInfoItem> replies = extractor.getPage(firstComment.getReplies());
+
+            assertEquals("First reply comment did not match", "Lol", replies.getItems().get(0).getCommentText());
+        }
+    }
 }
--- a/extractor/src/test/resources/org/schabi/newpipe/extractor/services/youtube/extractor/comments/replies/generated_mock_0.json
+++ b/extractor/src/test/resources/org/schabi/newpipe/extractor/services/youtube/extractor/comments/replies/generated_mock_0.json
--- a/extractor/src/test/resources/org/schabi/newpipe/extractor/services/youtube/extractor/comments/replies/generated_mock_1.json
+++ b/extractor/src/test/resources/org/schabi/newpipe/extractor/services/youtube/extractor/comments/replies/generated_mock_1.json
--- a/extractor/src/test/resources/org/schabi/newpipe/extractor/services/youtube/extractor/comments/replies/generated_mock_2.json
+++ b/extractor/src/test/resources/org/schabi/newpipe/extractor/services/youtube/extractor/comments/replies/generated_mock_2.json
--- a/extractor/src/test/resources/org/schabi/newpipe/extractor/services/youtube/extractor/comments/replies/generated_mock_3.json
+++ b/extractor/src/test/resources/org/schabi/newpipe/extractor/services/youtube/extractor/comments/replies/generated_mock_3.json