Merge pull request #1052 from TeamNewPipe/peertube/fix/nested-comment-replies

[PeerTube] Fix multi level comment replies
This commit is contained in:
Audric V 2023-05-18 18:49:06 +02:00 committed by GitHub
commit 92a0024424
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 152 additions and 45 deletions

View File

@ -37,6 +37,10 @@ public class Page implements Serializable {
this(url, id, null, null, null);
}
public Page(final String url, final String id, final byte[] body) {
this(url, id, null, null, body);
}
public Page(final String url, final byte[] body) {
this(url, null, null, null, body);
}

View File

@ -3,6 +3,7 @@ package org.schabi.newpipe.extractor.services.peertube.extractors;
import com.grack.nanojson.JsonArray;
import com.grack.nanojson.JsonObject;
import com.grack.nanojson.JsonParser;
import com.grack.nanojson.JsonParserException;
import org.schabi.newpipe.extractor.Page;
import org.schabi.newpipe.extractor.StreamingService;
import org.schabi.newpipe.extractor.comments.CommentsExtractor;
@ -17,6 +18,7 @@ import org.schabi.newpipe.extractor.services.peertube.PeertubeParsingHelper;
import org.schabi.newpipe.extractor.utils.Utils;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import static org.schabi.newpipe.extractor.services.peertube.PeertubeParsingHelper.COUNT_KEY;
import static org.schabi.newpipe.extractor.services.peertube.PeertubeParsingHelper.ITEMS_PER_PAGE;
@ -26,6 +28,9 @@ import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty;
import javax.annotation.Nonnull;
public class PeertubeCommentsExtractor extends CommentsExtractor {
static final String CHILDREN = "children";
private static final String IS_DELETED = "isDeleted";
private static final String TOTAL = "total";
/**
* Use {@link #isReply()} to access this variable.
@ -67,8 +72,9 @@ public class PeertubeCommentsExtractor extends CommentsExtractor {
for (final Object c : contents) {
if (c instanceof JsonObject) {
final JsonObject item = (JsonObject) c;
if (!item.getBoolean("isDeleted")) {
collector.commit(new PeertubeCommentsInfoItemExtractor(item, this));
if (!item.getBoolean(IS_DELETED)) {
collector.commit(new PeertubeCommentsInfoItemExtractor(
item, null, getUrl(), getBaseUrl(), isReply()));
}
}
}
@ -76,13 +82,16 @@ public class PeertubeCommentsExtractor extends CommentsExtractor {
private void collectRepliesFrom(@Nonnull final CommentsInfoItemsCollector collector,
@Nonnull final JsonObject json) throws ParsingException {
final JsonArray contents = json.getArray("children");
final JsonArray contents = json.getArray(CHILDREN);
for (final Object c : contents) {
if (c instanceof JsonObject) {
final JsonObject item = ((JsonObject) c).getObject("comment");
if (!item.getBoolean("isDeleted")) {
collector.commit(new PeertubeCommentsInfoItemExtractor(item, this));
final JsonObject content = (JsonObject) c;
final JsonObject item = content.getObject("comment");
final JsonArray children = content.getArray(CHILDREN);
if (!item.getBoolean(IS_DELETED)) {
collector.commit(new PeertubeCommentsInfoItemExtractor(
item, children, getUrl(), getBaseUrl(), isReply()));
}
}
}
@ -95,36 +104,46 @@ public class PeertubeCommentsExtractor extends CommentsExtractor {
throw new IllegalArgumentException("Page doesn't contain an URL");
}
final Response response = getDownloader().get(page.getUrl());
JsonObject json = null;
if (response != null && !Utils.isBlank(response.responseBody())) {
try {
json = JsonParser.object().from(response.responseBody());
} catch (final Exception e) {
throw new ParsingException("Could not parse json data for comments info", e);
final CommentsInfoItemsCollector collector = new CommentsInfoItemsCollector(getServiceId());
final long total;
if (page.getBody() == null) {
final Response response = getDownloader().get(page.getUrl());
if (response != null && !Utils.isBlank(response.responseBody())) {
try {
json = JsonParser.object().from(response.responseBody());
} catch (final Exception e) {
throw new ParsingException("Could not parse json data for comments info", e);
}
}
}
if (json != null) {
PeertubeParsingHelper.validate(json);
final long total;
final CommentsInfoItemsCollector collector
= new CommentsInfoItemsCollector(getServiceId());
if (isReply() || json.has("children")) {
total = json.getArray("children").size();
collectRepliesFrom(collector, json);
if (json != null) {
PeertubeParsingHelper.validate(json);
if (isReply() || json.has(CHILDREN)) {
total = json.getArray(CHILDREN).size();
collectRepliesFrom(collector, json);
} else {
total = json.getLong(TOTAL);
collectCommentsFrom(collector, json);
}
} else {
total = json.getLong("total");
collectCommentsFrom(collector, json);
throw new ExtractionException("Unable to get PeerTube kiosk info");
}
return new InfoItemsPage<>(collector,
PeertubeParsingHelper.getNextPage(page.getUrl(), total));
} else {
throw new ExtractionException("Unable to get PeerTube kiosk info");
try {
json = JsonParser.object().from(new String(page.getBody(), StandardCharsets.UTF_8));
isReply = true;
total = json.getArray(CHILDREN).size();
collectRepliesFrom(collector, json);
} catch (final JsonParserException e) {
throw new ParsingException(
"Could not parse json data for nested comments info", e);
}
}
return new InfoItemsPage<>(collector,
PeertubeParsingHelper.getNextPage(page.getUrl(), total));
}
@Override

View File

@ -1,7 +1,9 @@
package org.schabi.newpipe.extractor.services.peertube.extractors;
import com.grack.nanojson.JsonArray;
import com.grack.nanojson.JsonObject;
import com.grack.nanojson.JsonWriter;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.schabi.newpipe.extractor.Page;
@ -13,20 +15,36 @@ import org.schabi.newpipe.extractor.services.peertube.PeertubeParsingHelper;
import org.schabi.newpipe.extractor.stream.Description;
import org.schabi.newpipe.extractor.utils.JsonUtils;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import java.nio.charset.StandardCharsets;
import java.util.Objects;
public class PeertubeCommentsInfoItemExtractor implements CommentsInfoItemExtractor {
private final JsonObject item;
private final String url;
private final String baseUrl;
import static org.schabi.newpipe.extractor.services.peertube.extractors.PeertubeCommentsExtractor.CHILDREN;
public PeertubeCommentsInfoItemExtractor(final JsonObject item,
final PeertubeCommentsExtractor extractor)
throws ParsingException {
public class PeertubeCommentsInfoItemExtractor implements CommentsInfoItemExtractor {
@Nonnull
private final JsonObject item;
@Nullable
private final JsonArray children;
@Nonnull
private final String url;
@Nonnull
private final String baseUrl;
private final boolean isReply;
private Integer replyCount;
public PeertubeCommentsInfoItemExtractor(@Nonnull final JsonObject item,
@Nullable final JsonArray children,
@Nonnull final String url,
@Nonnull final String baseUrl,
final boolean isReply) {
this.item = item;
this.url = extractor.getUrl();
this.baseUrl = extractor.getBaseUrl();
this.children = children;
this.url = url;
this.baseUrl = baseUrl;
this.isReply = isReply;
}
@Override
@ -107,15 +125,34 @@ public class PeertubeCommentsInfoItemExtractor implements CommentsInfoItemExtrac
@Override
@Nullable
public Page getReplies() throws ParsingException {
if (JsonUtils.getNumber(item, "totalReplies").intValue() == 0) {
if (getReplyCount() == 0) {
return null;
}
final String threadId = JsonUtils.getNumber(item, "threadId").toString();
return new Page(url + "/" + threadId, threadId);
final String repliesUrl = url + "/" + threadId;
if (isReply && children != null && !children.isEmpty()) {
// Nested replies are already included in the original thread's request.
// Wrap the replies into a JsonObject, because the original thread's request body
// is also structured like a JsonObject.
final JsonObject pageContent = new JsonObject();
pageContent.put(CHILDREN, children);
return new Page(repliesUrl, threadId,
JsonWriter.string(pageContent).getBytes(StandardCharsets.UTF_8));
}
return new Page(repliesUrl, threadId);
}
@Override
public int getReplyCount() throws ParsingException {
return JsonUtils.getNumber(item, "totalReplies").intValue();
if (replyCount == null) {
if (children != null && !children.isEmpty()) {
// The totalReplies field is inaccurate for nested replies and sometimes returns 0
// although there are replies to that reply stored in children.
replyCount = children.size();
} else {
replyCount = JsonUtils.getNumber(item, "totalReplies").intValue();
}
}
return replyCount;
}
}

View File

@ -14,10 +14,9 @@ import org.schabi.newpipe.extractor.utils.Utils;
import java.io.IOException;
import java.util.List;
import java.util.Optional;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.junit.jupiter.api.Assertions.*;
import static org.schabi.newpipe.extractor.ServiceList.PeerTube;
public class PeertubeCommentsExtractorTest {
@ -121,4 +120,52 @@ public class PeertubeCommentsExtractorTest {
assertTrue(commentsInfo.getErrors().isEmpty());
}
}
/**
* Test a video that has comments with nested replies.
*/
public static class NestedComments {
private static PeertubeCommentsExtractor extractor;
@BeforeAll
public static void setUp() throws Exception {
NewPipe.init(DownloaderTestImpl.getInstance());
extractor = (PeertubeCommentsExtractor) PeerTube
.getCommentsExtractor("https://share.tube/w/vxu4uTstUBAUromWwXGHrq");
}
@Test
void testGetComments() throws IOException, ExtractionException {
final InfoItemsPage<CommentsInfoItem> comments = extractor.getInitialPage();
assertFalse(comments.getItems().isEmpty());
final Optional<CommentsInfoItem> nestedCommentHeadOpt =
comments.getItems()
.stream()
.filter(c -> c.getCommentId().equals("9770"))
.findFirst();
assertTrue(nestedCommentHeadOpt.isPresent());
assertTrue(findNestedCommentWithId("9773", nestedCommentHeadOpt.get()), "The nested comment replies were not found");
}
}
private static boolean findNestedCommentWithId(final String id, final CommentsInfoItem comment)
throws IOException, ExtractionException {
if (comment.getCommentId().equals(id)) {
return true;
}
return PeerTube
.getCommentsExtractor(comment.getUrl())
.getPage(comment.getReplies())
.getItems()
.stream()
.map(c -> {
try {
return findNestedCommentWithId(id, c);
} catch (final Exception ignored) {
return false;
}
})
.reduce((a, b) -> a || b)
.orElse(false);
}
}