Merge pull request #1052 from TeamNewPipe/peertube/fix/nested-comment-replies

[PeerTube] Fix multi level comment replies
This commit is contained in:
Audric V 2023-05-18 18:49:06 +02:00 committed by GitHub
commit 92a0024424
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 152 additions and 45 deletions

View File

@ -37,6 +37,10 @@ public class Page implements Serializable {
this(url, id, null, null, null); this(url, id, null, null, null);
} }
public Page(final String url, final String id, final byte[] body) {
this(url, id, null, null, body);
}
public Page(final String url, final byte[] body) { public Page(final String url, final byte[] body) {
this(url, null, null, null, body); this(url, null, null, null, body);
} }

View File

@ -3,6 +3,7 @@ package org.schabi.newpipe.extractor.services.peertube.extractors;
import com.grack.nanojson.JsonArray; import com.grack.nanojson.JsonArray;
import com.grack.nanojson.JsonObject; import com.grack.nanojson.JsonObject;
import com.grack.nanojson.JsonParser; import com.grack.nanojson.JsonParser;
import com.grack.nanojson.JsonParserException;
import org.schabi.newpipe.extractor.Page; import org.schabi.newpipe.extractor.Page;
import org.schabi.newpipe.extractor.StreamingService; import org.schabi.newpipe.extractor.StreamingService;
import org.schabi.newpipe.extractor.comments.CommentsExtractor; import org.schabi.newpipe.extractor.comments.CommentsExtractor;
@ -17,6 +18,7 @@ import org.schabi.newpipe.extractor.services.peertube.PeertubeParsingHelper;
import org.schabi.newpipe.extractor.utils.Utils; import org.schabi.newpipe.extractor.utils.Utils;
import java.io.IOException; import java.io.IOException;
import java.nio.charset.StandardCharsets;
import static org.schabi.newpipe.extractor.services.peertube.PeertubeParsingHelper.COUNT_KEY; import static org.schabi.newpipe.extractor.services.peertube.PeertubeParsingHelper.COUNT_KEY;
import static org.schabi.newpipe.extractor.services.peertube.PeertubeParsingHelper.ITEMS_PER_PAGE; import static org.schabi.newpipe.extractor.services.peertube.PeertubeParsingHelper.ITEMS_PER_PAGE;
@ -26,6 +28,9 @@ import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty;
import javax.annotation.Nonnull; import javax.annotation.Nonnull;
public class PeertubeCommentsExtractor extends CommentsExtractor { public class PeertubeCommentsExtractor extends CommentsExtractor {
static final String CHILDREN = "children";
private static final String IS_DELETED = "isDeleted";
private static final String TOTAL = "total";
/** /**
* Use {@link #isReply()} to access this variable. * Use {@link #isReply()} to access this variable.
@ -67,8 +72,9 @@ public class PeertubeCommentsExtractor extends CommentsExtractor {
for (final Object c : contents) { for (final Object c : contents) {
if (c instanceof JsonObject) { if (c instanceof JsonObject) {
final JsonObject item = (JsonObject) c; final JsonObject item = (JsonObject) c;
if (!item.getBoolean("isDeleted")) { if (!item.getBoolean(IS_DELETED)) {
collector.commit(new PeertubeCommentsInfoItemExtractor(item, this)); collector.commit(new PeertubeCommentsInfoItemExtractor(
item, null, getUrl(), getBaseUrl(), isReply()));
} }
} }
} }
@ -76,13 +82,16 @@ public class PeertubeCommentsExtractor extends CommentsExtractor {
private void collectRepliesFrom(@Nonnull final CommentsInfoItemsCollector collector, private void collectRepliesFrom(@Nonnull final CommentsInfoItemsCollector collector,
@Nonnull final JsonObject json) throws ParsingException { @Nonnull final JsonObject json) throws ParsingException {
final JsonArray contents = json.getArray("children"); final JsonArray contents = json.getArray(CHILDREN);
for (final Object c : contents) { for (final Object c : contents) {
if (c instanceof JsonObject) { if (c instanceof JsonObject) {
final JsonObject item = ((JsonObject) c).getObject("comment"); final JsonObject content = (JsonObject) c;
if (!item.getBoolean("isDeleted")) { final JsonObject item = content.getObject("comment");
collector.commit(new PeertubeCommentsInfoItemExtractor(item, this)); final JsonArray children = content.getArray(CHILDREN);
if (!item.getBoolean(IS_DELETED)) {
collector.commit(new PeertubeCommentsInfoItemExtractor(
item, children, getUrl(), getBaseUrl(), isReply()));
} }
} }
} }
@ -95,36 +104,46 @@ public class PeertubeCommentsExtractor extends CommentsExtractor {
throw new IllegalArgumentException("Page doesn't contain an URL"); throw new IllegalArgumentException("Page doesn't contain an URL");
} }
final Response response = getDownloader().get(page.getUrl());
JsonObject json = null; JsonObject json = null;
if (response != null && !Utils.isBlank(response.responseBody())) { final CommentsInfoItemsCollector collector = new CommentsInfoItemsCollector(getServiceId());
try { final long total;
json = JsonParser.object().from(response.responseBody()); if (page.getBody() == null) {
} catch (final Exception e) { final Response response = getDownloader().get(page.getUrl());
throw new ParsingException("Could not parse json data for comments info", e); if (response != null && !Utils.isBlank(response.responseBody())) {
try {
json = JsonParser.object().from(response.responseBody());
} catch (final Exception e) {
throw new ParsingException("Could not parse json data for comments info", e);
}
} }
} if (json != null) {
PeertubeParsingHelper.validate(json);
if (json != null) { if (isReply() || json.has(CHILDREN)) {
PeertubeParsingHelper.validate(json); total = json.getArray(CHILDREN).size();
final long total; collectRepliesFrom(collector, json);
final CommentsInfoItemsCollector collector } else {
= new CommentsInfoItemsCollector(getServiceId()); total = json.getLong(TOTAL);
collectCommentsFrom(collector, json);
if (isReply() || json.has("children")) { }
total = json.getArray("children").size();
collectRepliesFrom(collector, json);
} else { } else {
total = json.getLong("total"); throw new ExtractionException("Unable to get PeerTube kiosk info");
collectCommentsFrom(collector, json);
} }
return new InfoItemsPage<>(collector,
PeertubeParsingHelper.getNextPage(page.getUrl(), total));
} else { } else {
throw new ExtractionException("Unable to get PeerTube kiosk info"); try {
json = JsonParser.object().from(new String(page.getBody(), StandardCharsets.UTF_8));
isReply = true;
total = json.getArray(CHILDREN).size();
collectRepliesFrom(collector, json);
} catch (final JsonParserException e) {
throw new ParsingException(
"Could not parse json data for nested comments info", e);
}
} }
return new InfoItemsPage<>(collector,
PeertubeParsingHelper.getNextPage(page.getUrl(), total));
} }
@Override @Override

View File

@ -1,7 +1,9 @@
package org.schabi.newpipe.extractor.services.peertube.extractors; package org.schabi.newpipe.extractor.services.peertube.extractors;
import com.grack.nanojson.JsonArray;
import com.grack.nanojson.JsonObject; import com.grack.nanojson.JsonObject;
import com.grack.nanojson.JsonWriter;
import org.jsoup.Jsoup; import org.jsoup.Jsoup;
import org.jsoup.nodes.Document; import org.jsoup.nodes.Document;
import org.schabi.newpipe.extractor.Page; import org.schabi.newpipe.extractor.Page;
@ -13,20 +15,36 @@ import org.schabi.newpipe.extractor.services.peertube.PeertubeParsingHelper;
import org.schabi.newpipe.extractor.stream.Description; import org.schabi.newpipe.extractor.stream.Description;
import org.schabi.newpipe.extractor.utils.JsonUtils; import org.schabi.newpipe.extractor.utils.JsonUtils;
import javax.annotation.Nonnull;
import javax.annotation.Nullable; import javax.annotation.Nullable;
import java.nio.charset.StandardCharsets;
import java.util.Objects; import java.util.Objects;
public class PeertubeCommentsInfoItemExtractor implements CommentsInfoItemExtractor { import static org.schabi.newpipe.extractor.services.peertube.extractors.PeertubeCommentsExtractor.CHILDREN;
private final JsonObject item;
private final String url;
private final String baseUrl;
public PeertubeCommentsInfoItemExtractor(final JsonObject item, public class PeertubeCommentsInfoItemExtractor implements CommentsInfoItemExtractor {
final PeertubeCommentsExtractor extractor) @Nonnull
throws ParsingException { private final JsonObject item;
@Nullable
private final JsonArray children;
@Nonnull
private final String url;
@Nonnull
private final String baseUrl;
private final boolean isReply;
private Integer replyCount;
public PeertubeCommentsInfoItemExtractor(@Nonnull final JsonObject item,
@Nullable final JsonArray children,
@Nonnull final String url,
@Nonnull final String baseUrl,
final boolean isReply) {
this.item = item; this.item = item;
this.url = extractor.getUrl(); this.children = children;
this.baseUrl = extractor.getBaseUrl(); this.url = url;
this.baseUrl = baseUrl;
this.isReply = isReply;
} }
@Override @Override
@ -107,15 +125,34 @@ public class PeertubeCommentsInfoItemExtractor implements CommentsInfoItemExtrac
@Override @Override
@Nullable @Nullable
public Page getReplies() throws ParsingException { public Page getReplies() throws ParsingException {
if (JsonUtils.getNumber(item, "totalReplies").intValue() == 0) { if (getReplyCount() == 0) {
return null; return null;
} }
final String threadId = JsonUtils.getNumber(item, "threadId").toString(); final String threadId = JsonUtils.getNumber(item, "threadId").toString();
return new Page(url + "/" + threadId, threadId); final String repliesUrl = url + "/" + threadId;
if (isReply && children != null && !children.isEmpty()) {
// Nested replies are already included in the original thread's request.
// Wrap the replies into a JsonObject, because the original thread's request body
// is also structured like a JsonObject.
final JsonObject pageContent = new JsonObject();
pageContent.put(CHILDREN, children);
return new Page(repliesUrl, threadId,
JsonWriter.string(pageContent).getBytes(StandardCharsets.UTF_8));
}
return new Page(repliesUrl, threadId);
} }
@Override @Override
public int getReplyCount() throws ParsingException { public int getReplyCount() throws ParsingException {
return JsonUtils.getNumber(item, "totalReplies").intValue(); if (replyCount == null) {
if (children != null && !children.isEmpty()) {
// The totalReplies field is inaccurate for nested replies and sometimes returns 0
// although there are replies to that reply stored in children.
replyCount = children.size();
} else {
replyCount = JsonUtils.getNumber(item, "totalReplies").intValue();
}
}
return replyCount;
} }
} }

View File

@ -14,10 +14,9 @@ import org.schabi.newpipe.extractor.utils.Utils;
import java.io.IOException; import java.io.IOException;
import java.util.List; import java.util.List;
import java.util.Optional;
import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.*;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.schabi.newpipe.extractor.ServiceList.PeerTube; import static org.schabi.newpipe.extractor.ServiceList.PeerTube;
public class PeertubeCommentsExtractorTest { public class PeertubeCommentsExtractorTest {
@ -121,4 +120,52 @@ public class PeertubeCommentsExtractorTest {
assertTrue(commentsInfo.getErrors().isEmpty()); assertTrue(commentsInfo.getErrors().isEmpty());
} }
} }
/**
* Test a video that has comments with nested replies.
*/
public static class NestedComments {
private static PeertubeCommentsExtractor extractor;
@BeforeAll
public static void setUp() throws Exception {
NewPipe.init(DownloaderTestImpl.getInstance());
extractor = (PeertubeCommentsExtractor) PeerTube
.getCommentsExtractor("https://share.tube/w/vxu4uTstUBAUromWwXGHrq");
}
@Test
void testGetComments() throws IOException, ExtractionException {
final InfoItemsPage<CommentsInfoItem> comments = extractor.getInitialPage();
assertFalse(comments.getItems().isEmpty());
final Optional<CommentsInfoItem> nestedCommentHeadOpt =
comments.getItems()
.stream()
.filter(c -> c.getCommentId().equals("9770"))
.findFirst();
assertTrue(nestedCommentHeadOpt.isPresent());
assertTrue(findNestedCommentWithId("9773", nestedCommentHeadOpt.get()), "The nested comment replies were not found");
}
}
private static boolean findNestedCommentWithId(final String id, final CommentsInfoItem comment)
throws IOException, ExtractionException {
if (comment.getCommentId().equals(id)) {
return true;
}
return PeerTube
.getCommentsExtractor(comment.getUrl())
.getPage(comment.getReplies())
.getItems()
.stream()
.map(c -> {
try {
return findNestedCommentWithId(id, c);
} catch (final Exception ignored) {
return false;
}
})
.reduce((a, b) -> a || b)
.orElse(false);
}
} }