Use Description object for comments text.

2022-11-28 00:22:10 +00:00 · 2022-11-28 00:22:10 +00:00 · b566084cac
parent 40f1ec4a54
commit b566084cac
9 changed files with 41 additions and 30 deletions
--- a/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfoItem.java
+++ b/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfoItem.java
@ -3,13 +3,14 @@ package org.schabi.newpipe.extractor.comments;
 import org.schabi.newpipe.extractor.InfoItem;
 import org.schabi.newpipe.extractor.Page;
 import org.schabi.newpipe.extractor.localization.DateWrapper;
+import org.schabi.newpipe.extractor.stream.Description;

 import javax.annotation.Nullable;

 public class CommentsInfoItem extends InfoItem {

    private String commentId;
-    private String commentText;
+    private Description commentText;
    private String uploaderName;
    private String uploaderAvatarUrl;
    private String uploaderUrl;
@ -43,11 +44,11 @@ public class CommentsInfoItem extends InfoItem {
        this.commentId = commentId;
    }

-    public String getCommentText() {
+    public Description getCommentText() {
        return commentText;
    }

-    public void setCommentText(final String commentText) {
+    public void setCommentText(final Description commentText) {
        this.commentText = commentText;
    }

--- a/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfoItemExtractor.java
+++ b/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfoItemExtractor.java
@ -5,6 +5,7 @@ import org.schabi.newpipe.extractor.Page;
 import org.schabi.newpipe.extractor.exceptions.ParsingException;
 import org.schabi.newpipe.extractor.localization.DateWrapper;
 import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeCommentsInfoItemExtractor;
+import org.schabi.newpipe.extractor.stream.Description;
 import org.schabi.newpipe.extractor.stream.StreamExtractor;

 import javax.annotation.Nullable;
@ -41,8 +42,8 @@ public interface CommentsInfoItemExtractor extends InfoItemExtractor {
    /**
     * The text of the comment
     */
-    default String getCommentText() throws ParsingException {
-        return "";
+    default Description getCommentText() throws ParsingException {
+        return Description.EMPTY_DESCRIPTION;
    }

    /**
--- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/extractors/BandcampCommentsInfoItemExtractor.java
+++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/extractors/BandcampCommentsInfoItemExtractor.java
@ -3,6 +3,7 @@ package org.schabi.newpipe.extractor.services.bandcamp.extractors;
 import org.jsoup.nodes.Element;
 import org.schabi.newpipe.extractor.comments.CommentsInfoItemExtractor;
 import org.schabi.newpipe.extractor.exceptions.ParsingException;
+import org.schabi.newpipe.extractor.stream.Description;

 import java.util.Objects;

@ -18,7 +19,7 @@ public class BandcampCommentsInfoItemExtractor implements CommentsInfoItemExtrac

    @Override
    public String getName() throws ParsingException {
-        return getCommentText();
+        return getCommentText().getContent();
    }

    @Override
@ -32,12 +33,14 @@ public class BandcampCommentsInfoItemExtractor implements CommentsInfoItemExtrac
    }

    @Override
-    public String getCommentText() throws ParsingException {
-        return writing.getElementsByClass("text").stream()
+    public Description getCommentText() throws ParsingException {
+        final var text = writing.getElementsByClass("text").stream()
                .filter(Objects::nonNull)
                .map(Element::ownText)
                .findFirst()
                .orElseThrow(() -> new ParsingException("Could not get comment text"));
+
+        return new Description(text, Description.PLAIN_TEXT);
    }

    @Override
--- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/peertube/extractors/PeertubeCommentsInfoItemExtractor.java
+++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/peertube/extractors/PeertubeCommentsInfoItemExtractor.java
@ -9,6 +9,7 @@ import org.schabi.newpipe.extractor.comments.CommentsInfoItemExtractor;
 import org.schabi.newpipe.extractor.exceptions.ParsingException;
 import org.schabi.newpipe.extractor.localization.DateWrapper;
 import org.schabi.newpipe.extractor.services.peertube.PeertubeParsingHelper;
+import org.schabi.newpipe.extractor.stream.Description;
 import org.schabi.newpipe.extractor.utils.JsonUtils;

 import java.util.Objects;
@ -59,13 +60,15 @@ public class PeertubeCommentsInfoItemExtractor implements CommentsInfoItemExtrac
    }

    @Override
-    public String getCommentText() throws ParsingException {
+    public Description getCommentText() throws ParsingException {
        final String htmlText = JsonUtils.getString(item, "text");
        try {
            final Document doc = Jsoup.parse(htmlText);
-            return doc.body().text();
+            final var text = doc.body().text();
+            return new Description(text, Description.PLAIN_TEXT);
        } catch (final Exception e) {
-            return htmlText.replaceAll("(?s)<[^>]*>(\\s*<[^>]*>)*", "");
+            final var text = htmlText.replaceAll("(?s)<[^>]*>(\\s*<[^>]*>)*", "");
+            return new Description(text, Description.PLAIN_TEXT);
        }
    }

--- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/extractors/SoundcloudCommentsInfoItemExtractor.java
+++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/extractors/SoundcloudCommentsInfoItemExtractor.java
@ -5,6 +5,7 @@ import org.schabi.newpipe.extractor.comments.CommentsInfoItemExtractor;
 import org.schabi.newpipe.extractor.exceptions.ParsingException;
 import org.schabi.newpipe.extractor.localization.DateWrapper;
 import org.schabi.newpipe.extractor.services.soundcloud.SoundcloudParsingHelper;
+import org.schabi.newpipe.extractor.stream.Description;

 import javax.annotation.Nullable;
 import java.util.Objects;
@ -24,8 +25,8 @@ public class SoundcloudCommentsInfoItemExtractor implements CommentsInfoItemExtr
    }

    @Override
-    public String getCommentText() {
-        return json.getString("body");
+    public Description getCommentText() {
+        return new Description(json.getString("body"), Description.PLAIN_TEXT);
    }

    @Override
--- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsInfoItemExtractor.java
+++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsInfoItemExtractor.java
@ -1,21 +1,21 @@
 package org.schabi.newpipe.extractor.services.youtube.extractors;

-import static org.schabi.newpipe.extractor.comments.CommentsInfoItem.UNKNOWN_REPLY_COUNT;
-import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getTextFromObject;
-
 import com.grack.nanojson.JsonArray;
 import com.grack.nanojson.JsonObject;
-
 import org.schabi.newpipe.extractor.Page;
 import org.schabi.newpipe.extractor.comments.CommentsInfoItemExtractor;
 import org.schabi.newpipe.extractor.exceptions.ParsingException;
 import org.schabi.newpipe.extractor.localization.DateWrapper;
 import org.schabi.newpipe.extractor.localization.TimeAgoParser;
+import org.schabi.newpipe.extractor.stream.Description;
 import org.schabi.newpipe.extractor.utils.JsonUtils;
 import org.schabi.newpipe.extractor.utils.Utils;

 import javax.annotation.Nullable;

+import static org.schabi.newpipe.extractor.comments.CommentsInfoItem.UNKNOWN_REPLY_COUNT;
+import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getTextFromObject;
+
 public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtractor {

    private final JsonObject json;
@ -176,18 +176,20 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract
    }

    @Override
-    public String getCommentText() throws ParsingException {
+    public Description getCommentText() throws ParsingException {
        try {
            final JsonObject contentText = JsonUtils.getObject(getCommentRenderer(), "contentText");
            if (contentText.isEmpty()) {
                // completely empty comments as described in
                // https://github.com/TeamNewPipe/NewPipeExtractor/issues/380#issuecomment-668808584
-                return "";
+                return Description.EMPTY_DESCRIPTION;
            }
            final String commentText = getTextFromObject(contentText, true);
            // YouTube adds U+FEFF in some comments.
            // eg. https://www.youtube.com/watch?v=Nj4F63E59io<feff>
-            return Utils.removeUTF8BOM(commentText);
+            final String commentTextBomRemoved = Utils.removeUTF8BOM(commentText);
+
+            return new Description(commentTextBomRemoved, Description.HTML);
        } catch (final Exception e) {
            throw new ParsingException("Could not get comment text", e);
        }
--- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/bandcamp/BandcampCommentsExtractorTest.java
+++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/bandcamp/BandcampCommentsExtractorTest.java
@ -42,7 +42,7 @@ public class BandcampCommentsExtractorTest {
        for (CommentsInfoItem c : comments.getItems()) {
            assertFalse(Utils.isBlank(c.getUploaderName()));
            assertFalse(Utils.isBlank(c.getUploaderAvatarUrl()));
-            assertFalse(Utils.isBlank(c.getCommentText()));
+            assertFalse(Utils.isBlank(c.getCommentText().getContent()));
            assertFalse(Utils.isBlank(c.getName()));
            assertFalse(Utils.isBlank(c.getThumbnailUrl()));
            assertFalse(Utils.isBlank(c.getUrl()));
--- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/peertube/PeertubeCommentsExtractorTest.java
+++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/peertube/PeertubeCommentsExtractorTest.java
@ -75,7 +75,7 @@ public class PeertubeCommentsExtractorTest {
                assertFalse(Utils.isBlank(c.getUploaderName()));
                assertFalse(Utils.isBlank(c.getUploaderAvatarUrl()));
                assertFalse(Utils.isBlank(c.getCommentId()));
-                assertFalse(Utils.isBlank(c.getCommentText()));
+                assertFalse(Utils.isBlank(c.getCommentText().getContent()));
                assertFalse(Utils.isBlank(c.getName()));
                assertFalse(Utils.isBlank(c.getTextualUploadDate()));
                assertFalse(Utils.isBlank(c.getThumbnailUrl()));
@ -91,7 +91,7 @@ public class PeertubeCommentsExtractorTest {

        private boolean findInComments(List<CommentsInfoItem> comments, String comment) {
            for (CommentsInfoItem c : comments) {
-                if (c.getCommentText().contains(comment)) {
+                if (c.getCommentText().getContent().contains(comment)) {
                    return true;
                }
            }
--- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeCommentsExtractorTest.java
+++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeCommentsExtractorTest.java
@ -95,7 +95,7 @@ public class YoutubeCommentsExtractorTest {
                assertFalse(Utils.isBlank(c.getUploaderName()));
                assertFalse(Utils.isBlank(c.getUploaderAvatarUrl()));
                assertFalse(Utils.isBlank(c.getCommentId()));
-                assertFalse(Utils.isBlank(c.getCommentText()));
+                assertFalse(Utils.isBlank(c.getCommentText().getContent()));
                assertFalse(Utils.isBlank(c.getName()));
                assertFalse(Utils.isBlank(c.getTextualUploadDate()));
                assertNotNull(c.getUploadDate());
@ -111,7 +111,7 @@ public class YoutubeCommentsExtractorTest {

        private boolean findInComments(List<CommentsInfoItem> comments, String comment) {
            for (CommentsInfoItem c : comments) {
-                if (c.getCommentText().contains(comment)) {
+                if (c.getCommentText().getContent().contains(comment)) {
                    return true;
                }
            }
@ -152,9 +152,9 @@ public class YoutubeCommentsExtractorTest {
                assertFalse(Utils.isBlank(c.getUrl()));
                assertTrue(c.getLikeCount() >= 0);
                if (c.getCommentId().equals("Ugga_h1-EXdHB3gCoAEC")) { // comment without text
-                    assertTrue(Utils.isBlank(c.getCommentText()));
+                    assertTrue(Utils.isBlank(c.getCommentText().getContent()));
                } else {
-                    assertFalse(Utils.isBlank(c.getCommentText()));
+                    assertFalse(Utils.isBlank(c.getCommentText().getContent()));
                }
            }
        }
@ -193,7 +193,7 @@ public class YoutubeCommentsExtractorTest {
                assertFalse(Utils.isBlank(c.getThumbnailUrl()));
                assertFalse(Utils.isBlank(c.getUrl()));
                assertTrue(c.getLikeCount() >= 0);
-                assertFalse(Utils.isBlank(c.getCommentText()));
+                assertFalse(Utils.isBlank(c.getCommentText().getContent()));
                if (c.isHeartedByUploader()) {
                    heartedByUploader = true;
                }
@ -233,7 +233,7 @@ public class YoutubeCommentsExtractorTest {
                assertFalse(Utils.isBlank(c.getThumbnailUrl()));
                assertFalse(Utils.isBlank(c.getUrl()));
                assertTrue(c.getLikeCount() >= 0);
-                assertFalse(Utils.isBlank(c.getCommentText()));
+                assertFalse(Utils.isBlank(c.getCommentText().getContent()));
            }

            assertTrue(comments.getItems().get(0).isPinned(), "First comment isn't pinned");
@ -328,7 +328,7 @@ public class YoutubeCommentsExtractorTest {

            InfoItemsPage<CommentsInfoItem> replies = extractor.getPage(firstComment.getReplies());

-            assertEquals("First", replies.getItems().get(0).getCommentText(),
+            assertEquals("First", replies.getItems().get(0).getCommentText().getContent(),
                    "First reply comment did not match");
        }