Merge pull request #987 from FireMasterK/comments-text-description

Use Description object for comments text.
This commit is contained in:
Tobi 2022-11-29 16:54:56 +01:00 committed by GitHub
commit 72d5ed3318
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 41 additions and 30 deletions

View File

@ -3,13 +3,14 @@ package org.schabi.newpipe.extractor.comments;
import org.schabi.newpipe.extractor.InfoItem; import org.schabi.newpipe.extractor.InfoItem;
import org.schabi.newpipe.extractor.Page; import org.schabi.newpipe.extractor.Page;
import org.schabi.newpipe.extractor.localization.DateWrapper; import org.schabi.newpipe.extractor.localization.DateWrapper;
import org.schabi.newpipe.extractor.stream.Description;
import javax.annotation.Nullable; import javax.annotation.Nullable;
public class CommentsInfoItem extends InfoItem { public class CommentsInfoItem extends InfoItem {
private String commentId; private String commentId;
private String commentText; private Description commentText;
private String uploaderName; private String uploaderName;
private String uploaderAvatarUrl; private String uploaderAvatarUrl;
private String uploaderUrl; private String uploaderUrl;
@ -43,11 +44,11 @@ public class CommentsInfoItem extends InfoItem {
this.commentId = commentId; this.commentId = commentId;
} }
public String getCommentText() { public Description getCommentText() {
return commentText; return commentText;
} }
public void setCommentText(final String commentText) { public void setCommentText(final Description commentText) {
this.commentText = commentText; this.commentText = commentText;
} }

View File

@ -5,6 +5,7 @@ import org.schabi.newpipe.extractor.Page;
import org.schabi.newpipe.extractor.exceptions.ParsingException; import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.localization.DateWrapper; import org.schabi.newpipe.extractor.localization.DateWrapper;
import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeCommentsInfoItemExtractor; import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeCommentsInfoItemExtractor;
import org.schabi.newpipe.extractor.stream.Description;
import org.schabi.newpipe.extractor.stream.StreamExtractor; import org.schabi.newpipe.extractor.stream.StreamExtractor;
import javax.annotation.Nullable; import javax.annotation.Nullable;
@ -41,8 +42,8 @@ public interface CommentsInfoItemExtractor extends InfoItemExtractor {
/** /**
* The text of the comment * The text of the comment
*/ */
default String getCommentText() throws ParsingException { default Description getCommentText() throws ParsingException {
return ""; return Description.EMPTY_DESCRIPTION;
} }
/** /**

View File

@ -3,6 +3,7 @@ package org.schabi.newpipe.extractor.services.bandcamp.extractors;
import org.jsoup.nodes.Element; import org.jsoup.nodes.Element;
import org.schabi.newpipe.extractor.comments.CommentsInfoItemExtractor; import org.schabi.newpipe.extractor.comments.CommentsInfoItemExtractor;
import org.schabi.newpipe.extractor.exceptions.ParsingException; import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.stream.Description;
import java.util.Objects; import java.util.Objects;
@ -18,7 +19,7 @@ public class BandcampCommentsInfoItemExtractor implements CommentsInfoItemExtrac
@Override @Override
public String getName() throws ParsingException { public String getName() throws ParsingException {
return getCommentText(); return getCommentText().getContent();
} }
@Override @Override
@ -32,12 +33,14 @@ public class BandcampCommentsInfoItemExtractor implements CommentsInfoItemExtrac
} }
@Override @Override
public String getCommentText() throws ParsingException { public Description getCommentText() throws ParsingException {
return writing.getElementsByClass("text").stream() final var text = writing.getElementsByClass("text").stream()
.filter(Objects::nonNull) .filter(Objects::nonNull)
.map(Element::ownText) .map(Element::ownText)
.findFirst() .findFirst()
.orElseThrow(() -> new ParsingException("Could not get comment text")); .orElseThrow(() -> new ParsingException("Could not get comment text"));
return new Description(text, Description.PLAIN_TEXT);
} }
@Override @Override

View File

@ -9,6 +9,7 @@ import org.schabi.newpipe.extractor.comments.CommentsInfoItemExtractor;
import org.schabi.newpipe.extractor.exceptions.ParsingException; import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.localization.DateWrapper; import org.schabi.newpipe.extractor.localization.DateWrapper;
import org.schabi.newpipe.extractor.services.peertube.PeertubeParsingHelper; import org.schabi.newpipe.extractor.services.peertube.PeertubeParsingHelper;
import org.schabi.newpipe.extractor.stream.Description;
import org.schabi.newpipe.extractor.utils.JsonUtils; import org.schabi.newpipe.extractor.utils.JsonUtils;
import java.util.Objects; import java.util.Objects;
@ -59,13 +60,15 @@ public class PeertubeCommentsInfoItemExtractor implements CommentsInfoItemExtrac
} }
@Override @Override
public String getCommentText() throws ParsingException { public Description getCommentText() throws ParsingException {
final String htmlText = JsonUtils.getString(item, "text"); final String htmlText = JsonUtils.getString(item, "text");
try { try {
final Document doc = Jsoup.parse(htmlText); final Document doc = Jsoup.parse(htmlText);
return doc.body().text(); final var text = doc.body().text();
return new Description(text, Description.PLAIN_TEXT);
} catch (final Exception e) { } catch (final Exception e) {
return htmlText.replaceAll("(?s)<[^>]*>(\\s*<[^>]*>)*", ""); final var text = htmlText.replaceAll("(?s)<[^>]*>(\\s*<[^>]*>)*", "");
return new Description(text, Description.PLAIN_TEXT);
} }
} }

View File

@ -5,6 +5,7 @@ import org.schabi.newpipe.extractor.comments.CommentsInfoItemExtractor;
import org.schabi.newpipe.extractor.exceptions.ParsingException; import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.localization.DateWrapper; import org.schabi.newpipe.extractor.localization.DateWrapper;
import org.schabi.newpipe.extractor.services.soundcloud.SoundcloudParsingHelper; import org.schabi.newpipe.extractor.services.soundcloud.SoundcloudParsingHelper;
import org.schabi.newpipe.extractor.stream.Description;
import javax.annotation.Nullable; import javax.annotation.Nullable;
import java.util.Objects; import java.util.Objects;
@ -24,8 +25,8 @@ public class SoundcloudCommentsInfoItemExtractor implements CommentsInfoItemExtr
} }
@Override @Override
public String getCommentText() { public Description getCommentText() {
return json.getString("body"); return new Description(json.getString("body"), Description.PLAIN_TEXT);
} }
@Override @Override

View File

@ -1,21 +1,21 @@
package org.schabi.newpipe.extractor.services.youtube.extractors; package org.schabi.newpipe.extractor.services.youtube.extractors;
import static org.schabi.newpipe.extractor.comments.CommentsInfoItem.UNKNOWN_REPLY_COUNT;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getTextFromObject;
import com.grack.nanojson.JsonArray; import com.grack.nanojson.JsonArray;
import com.grack.nanojson.JsonObject; import com.grack.nanojson.JsonObject;
import org.schabi.newpipe.extractor.Page; import org.schabi.newpipe.extractor.Page;
import org.schabi.newpipe.extractor.comments.CommentsInfoItemExtractor; import org.schabi.newpipe.extractor.comments.CommentsInfoItemExtractor;
import org.schabi.newpipe.extractor.exceptions.ParsingException; import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.localization.DateWrapper; import org.schabi.newpipe.extractor.localization.DateWrapper;
import org.schabi.newpipe.extractor.localization.TimeAgoParser; import org.schabi.newpipe.extractor.localization.TimeAgoParser;
import org.schabi.newpipe.extractor.stream.Description;
import org.schabi.newpipe.extractor.utils.JsonUtils; import org.schabi.newpipe.extractor.utils.JsonUtils;
import org.schabi.newpipe.extractor.utils.Utils; import org.schabi.newpipe.extractor.utils.Utils;
import javax.annotation.Nullable; import javax.annotation.Nullable;
import static org.schabi.newpipe.extractor.comments.CommentsInfoItem.UNKNOWN_REPLY_COUNT;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getTextFromObject;
public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtractor { public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtractor {
private final JsonObject json; private final JsonObject json;
@ -176,18 +176,20 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract
} }
@Override @Override
public String getCommentText() throws ParsingException { public Description getCommentText() throws ParsingException {
try { try {
final JsonObject contentText = JsonUtils.getObject(getCommentRenderer(), "contentText"); final JsonObject contentText = JsonUtils.getObject(getCommentRenderer(), "contentText");
if (contentText.isEmpty()) { if (contentText.isEmpty()) {
// completely empty comments as described in // completely empty comments as described in
// https://github.com/TeamNewPipe/NewPipeExtractor/issues/380#issuecomment-668808584 // https://github.com/TeamNewPipe/NewPipeExtractor/issues/380#issuecomment-668808584
return ""; return Description.EMPTY_DESCRIPTION;
} }
final String commentText = getTextFromObject(contentText, true); final String commentText = getTextFromObject(contentText, true);
// YouTube adds U+FEFF in some comments. // YouTube adds U+FEFF in some comments.
// eg. https://www.youtube.com/watch?v=Nj4F63E59io<feff> // eg. https://www.youtube.com/watch?v=Nj4F63E59io<feff>
return Utils.removeUTF8BOM(commentText); final String commentTextBomRemoved = Utils.removeUTF8BOM(commentText);
return new Description(commentTextBomRemoved, Description.HTML);
} catch (final Exception e) { } catch (final Exception e) {
throw new ParsingException("Could not get comment text", e); throw new ParsingException("Could not get comment text", e);
} }

View File

@ -42,7 +42,7 @@ public class BandcampCommentsExtractorTest {
for (CommentsInfoItem c : comments.getItems()) { for (CommentsInfoItem c : comments.getItems()) {
assertFalse(Utils.isBlank(c.getUploaderName())); assertFalse(Utils.isBlank(c.getUploaderName()));
assertFalse(Utils.isBlank(c.getUploaderAvatarUrl())); assertFalse(Utils.isBlank(c.getUploaderAvatarUrl()));
assertFalse(Utils.isBlank(c.getCommentText())); assertFalse(Utils.isBlank(c.getCommentText().getContent()));
assertFalse(Utils.isBlank(c.getName())); assertFalse(Utils.isBlank(c.getName()));
assertFalse(Utils.isBlank(c.getThumbnailUrl())); assertFalse(Utils.isBlank(c.getThumbnailUrl()));
assertFalse(Utils.isBlank(c.getUrl())); assertFalse(Utils.isBlank(c.getUrl()));

View File

@ -75,7 +75,7 @@ public class PeertubeCommentsExtractorTest {
assertFalse(Utils.isBlank(c.getUploaderName())); assertFalse(Utils.isBlank(c.getUploaderName()));
assertFalse(Utils.isBlank(c.getUploaderAvatarUrl())); assertFalse(Utils.isBlank(c.getUploaderAvatarUrl()));
assertFalse(Utils.isBlank(c.getCommentId())); assertFalse(Utils.isBlank(c.getCommentId()));
assertFalse(Utils.isBlank(c.getCommentText())); assertFalse(Utils.isBlank(c.getCommentText().getContent()));
assertFalse(Utils.isBlank(c.getName())); assertFalse(Utils.isBlank(c.getName()));
assertFalse(Utils.isBlank(c.getTextualUploadDate())); assertFalse(Utils.isBlank(c.getTextualUploadDate()));
assertFalse(Utils.isBlank(c.getThumbnailUrl())); assertFalse(Utils.isBlank(c.getThumbnailUrl()));
@ -91,7 +91,7 @@ public class PeertubeCommentsExtractorTest {
private boolean findInComments(List<CommentsInfoItem> comments, String comment) { private boolean findInComments(List<CommentsInfoItem> comments, String comment) {
for (CommentsInfoItem c : comments) { for (CommentsInfoItem c : comments) {
if (c.getCommentText().contains(comment)) { if (c.getCommentText().getContent().contains(comment)) {
return true; return true;
} }
} }

View File

@ -95,7 +95,7 @@ public class YoutubeCommentsExtractorTest {
assertFalse(Utils.isBlank(c.getUploaderName())); assertFalse(Utils.isBlank(c.getUploaderName()));
assertFalse(Utils.isBlank(c.getUploaderAvatarUrl())); assertFalse(Utils.isBlank(c.getUploaderAvatarUrl()));
assertFalse(Utils.isBlank(c.getCommentId())); assertFalse(Utils.isBlank(c.getCommentId()));
assertFalse(Utils.isBlank(c.getCommentText())); assertFalse(Utils.isBlank(c.getCommentText().getContent()));
assertFalse(Utils.isBlank(c.getName())); assertFalse(Utils.isBlank(c.getName()));
assertFalse(Utils.isBlank(c.getTextualUploadDate())); assertFalse(Utils.isBlank(c.getTextualUploadDate()));
assertNotNull(c.getUploadDate()); assertNotNull(c.getUploadDate());
@ -111,7 +111,7 @@ public class YoutubeCommentsExtractorTest {
private boolean findInComments(List<CommentsInfoItem> comments, String comment) { private boolean findInComments(List<CommentsInfoItem> comments, String comment) {
for (CommentsInfoItem c : comments) { for (CommentsInfoItem c : comments) {
if (c.getCommentText().contains(comment)) { if (c.getCommentText().getContent().contains(comment)) {
return true; return true;
} }
} }
@ -152,9 +152,9 @@ public class YoutubeCommentsExtractorTest {
assertFalse(Utils.isBlank(c.getUrl())); assertFalse(Utils.isBlank(c.getUrl()));
assertTrue(c.getLikeCount() >= 0); assertTrue(c.getLikeCount() >= 0);
if (c.getCommentId().equals("Ugga_h1-EXdHB3gCoAEC")) { // comment without text if (c.getCommentId().equals("Ugga_h1-EXdHB3gCoAEC")) { // comment without text
assertTrue(Utils.isBlank(c.getCommentText())); assertTrue(Utils.isBlank(c.getCommentText().getContent()));
} else { } else {
assertFalse(Utils.isBlank(c.getCommentText())); assertFalse(Utils.isBlank(c.getCommentText().getContent()));
} }
} }
} }
@ -193,7 +193,7 @@ public class YoutubeCommentsExtractorTest {
assertFalse(Utils.isBlank(c.getThumbnailUrl())); assertFalse(Utils.isBlank(c.getThumbnailUrl()));
assertFalse(Utils.isBlank(c.getUrl())); assertFalse(Utils.isBlank(c.getUrl()));
assertTrue(c.getLikeCount() >= 0); assertTrue(c.getLikeCount() >= 0);
assertFalse(Utils.isBlank(c.getCommentText())); assertFalse(Utils.isBlank(c.getCommentText().getContent()));
if (c.isHeartedByUploader()) { if (c.isHeartedByUploader()) {
heartedByUploader = true; heartedByUploader = true;
} }
@ -233,7 +233,7 @@ public class YoutubeCommentsExtractorTest {
assertFalse(Utils.isBlank(c.getThumbnailUrl())); assertFalse(Utils.isBlank(c.getThumbnailUrl()));
assertFalse(Utils.isBlank(c.getUrl())); assertFalse(Utils.isBlank(c.getUrl()));
assertTrue(c.getLikeCount() >= 0); assertTrue(c.getLikeCount() >= 0);
assertFalse(Utils.isBlank(c.getCommentText())); assertFalse(Utils.isBlank(c.getCommentText().getContent()));
} }
assertTrue(comments.getItems().get(0).isPinned(), "First comment isn't pinned"); assertTrue(comments.getItems().get(0).isPinned(), "First comment isn't pinned");
@ -328,7 +328,7 @@ public class YoutubeCommentsExtractorTest {
InfoItemsPage<CommentsInfoItem> replies = extractor.getPage(firstComment.getReplies()); InfoItemsPage<CommentsInfoItem> replies = extractor.getPage(firstComment.getReplies());
assertEquals("First", replies.getItems().get(0).getCommentText(), assertEquals("First", replies.getItems().get(0).getCommentText().getContent(),
"First reply comment did not match"); "First reply comment did not match");
} }