Merge pull request #1052 from TeamNewPipe/peertube/fix/nested-comment-replies
[PeerTube] Fix multi level comment replies
This commit is contained in:
commit
92a0024424
|
@ -37,6 +37,10 @@ public class Page implements Serializable {
|
||||||
this(url, id, null, null, null);
|
this(url, id, null, null, null);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public Page(final String url, final String id, final byte[] body) {
|
||||||
|
this(url, id, null, null, body);
|
||||||
|
}
|
||||||
|
|
||||||
public Page(final String url, final byte[] body) {
|
public Page(final String url, final byte[] body) {
|
||||||
this(url, null, null, null, body);
|
this(url, null, null, null, body);
|
||||||
}
|
}
|
||||||
|
|
|
@ -3,6 +3,7 @@ package org.schabi.newpipe.extractor.services.peertube.extractors;
|
||||||
import com.grack.nanojson.JsonArray;
|
import com.grack.nanojson.JsonArray;
|
||||||
import com.grack.nanojson.JsonObject;
|
import com.grack.nanojson.JsonObject;
|
||||||
import com.grack.nanojson.JsonParser;
|
import com.grack.nanojson.JsonParser;
|
||||||
|
import com.grack.nanojson.JsonParserException;
|
||||||
import org.schabi.newpipe.extractor.Page;
|
import org.schabi.newpipe.extractor.Page;
|
||||||
import org.schabi.newpipe.extractor.StreamingService;
|
import org.schabi.newpipe.extractor.StreamingService;
|
||||||
import org.schabi.newpipe.extractor.comments.CommentsExtractor;
|
import org.schabi.newpipe.extractor.comments.CommentsExtractor;
|
||||||
|
@ -17,6 +18,7 @@ import org.schabi.newpipe.extractor.services.peertube.PeertubeParsingHelper;
|
||||||
import org.schabi.newpipe.extractor.utils.Utils;
|
import org.schabi.newpipe.extractor.utils.Utils;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.nio.charset.StandardCharsets;
|
||||||
|
|
||||||
import static org.schabi.newpipe.extractor.services.peertube.PeertubeParsingHelper.COUNT_KEY;
|
import static org.schabi.newpipe.extractor.services.peertube.PeertubeParsingHelper.COUNT_KEY;
|
||||||
import static org.schabi.newpipe.extractor.services.peertube.PeertubeParsingHelper.ITEMS_PER_PAGE;
|
import static org.schabi.newpipe.extractor.services.peertube.PeertubeParsingHelper.ITEMS_PER_PAGE;
|
||||||
|
@ -26,6 +28,9 @@ import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty;
|
||||||
import javax.annotation.Nonnull;
|
import javax.annotation.Nonnull;
|
||||||
|
|
||||||
public class PeertubeCommentsExtractor extends CommentsExtractor {
|
public class PeertubeCommentsExtractor extends CommentsExtractor {
|
||||||
|
static final String CHILDREN = "children";
|
||||||
|
private static final String IS_DELETED = "isDeleted";
|
||||||
|
private static final String TOTAL = "total";
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Use {@link #isReply()} to access this variable.
|
* Use {@link #isReply()} to access this variable.
|
||||||
|
@ -67,8 +72,9 @@ public class PeertubeCommentsExtractor extends CommentsExtractor {
|
||||||
for (final Object c : contents) {
|
for (final Object c : contents) {
|
||||||
if (c instanceof JsonObject) {
|
if (c instanceof JsonObject) {
|
||||||
final JsonObject item = (JsonObject) c;
|
final JsonObject item = (JsonObject) c;
|
||||||
if (!item.getBoolean("isDeleted")) {
|
if (!item.getBoolean(IS_DELETED)) {
|
||||||
collector.commit(new PeertubeCommentsInfoItemExtractor(item, this));
|
collector.commit(new PeertubeCommentsInfoItemExtractor(
|
||||||
|
item, null, getUrl(), getBaseUrl(), isReply()));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -76,13 +82,16 @@ public class PeertubeCommentsExtractor extends CommentsExtractor {
|
||||||
|
|
||||||
private void collectRepliesFrom(@Nonnull final CommentsInfoItemsCollector collector,
|
private void collectRepliesFrom(@Nonnull final CommentsInfoItemsCollector collector,
|
||||||
@Nonnull final JsonObject json) throws ParsingException {
|
@Nonnull final JsonObject json) throws ParsingException {
|
||||||
final JsonArray contents = json.getArray("children");
|
final JsonArray contents = json.getArray(CHILDREN);
|
||||||
|
|
||||||
for (final Object c : contents) {
|
for (final Object c : contents) {
|
||||||
if (c instanceof JsonObject) {
|
if (c instanceof JsonObject) {
|
||||||
final JsonObject item = ((JsonObject) c).getObject("comment");
|
final JsonObject content = (JsonObject) c;
|
||||||
if (!item.getBoolean("isDeleted")) {
|
final JsonObject item = content.getObject("comment");
|
||||||
collector.commit(new PeertubeCommentsInfoItemExtractor(item, this));
|
final JsonArray children = content.getArray(CHILDREN);
|
||||||
|
if (!item.getBoolean(IS_DELETED)) {
|
||||||
|
collector.commit(new PeertubeCommentsInfoItemExtractor(
|
||||||
|
item, children, getUrl(), getBaseUrl(), isReply()));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -95,9 +104,11 @@ public class PeertubeCommentsExtractor extends CommentsExtractor {
|
||||||
throw new IllegalArgumentException("Page doesn't contain an URL");
|
throw new IllegalArgumentException("Page doesn't contain an URL");
|
||||||
}
|
}
|
||||||
|
|
||||||
final Response response = getDownloader().get(page.getUrl());
|
|
||||||
|
|
||||||
JsonObject json = null;
|
JsonObject json = null;
|
||||||
|
final CommentsInfoItemsCollector collector = new CommentsInfoItemsCollector(getServiceId());
|
||||||
|
final long total;
|
||||||
|
if (page.getBody() == null) {
|
||||||
|
final Response response = getDownloader().get(page.getUrl());
|
||||||
if (response != null && !Utils.isBlank(response.responseBody())) {
|
if (response != null && !Utils.isBlank(response.responseBody())) {
|
||||||
try {
|
try {
|
||||||
json = JsonParser.object().from(response.responseBody());
|
json = JsonParser.object().from(response.responseBody());
|
||||||
|
@ -105,26 +116,34 @@ public class PeertubeCommentsExtractor extends CommentsExtractor {
|
||||||
throw new ParsingException("Could not parse json data for comments info", e);
|
throw new ParsingException("Could not parse json data for comments info", e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (json != null) {
|
if (json != null) {
|
||||||
PeertubeParsingHelper.validate(json);
|
PeertubeParsingHelper.validate(json);
|
||||||
final long total;
|
if (isReply() || json.has(CHILDREN)) {
|
||||||
final CommentsInfoItemsCollector collector
|
total = json.getArray(CHILDREN).size();
|
||||||
= new CommentsInfoItemsCollector(getServiceId());
|
|
||||||
|
|
||||||
if (isReply() || json.has("children")) {
|
|
||||||
total = json.getArray("children").size();
|
|
||||||
collectRepliesFrom(collector, json);
|
collectRepliesFrom(collector, json);
|
||||||
} else {
|
} else {
|
||||||
total = json.getLong("total");
|
total = json.getLong(TOTAL);
|
||||||
collectCommentsFrom(collector, json);
|
collectCommentsFrom(collector, json);
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
throw new ExtractionException("Unable to get PeerTube kiosk info");
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
try {
|
||||||
|
json = JsonParser.object().from(new String(page.getBody(), StandardCharsets.UTF_8));
|
||||||
|
isReply = true;
|
||||||
|
total = json.getArray(CHILDREN).size();
|
||||||
|
collectRepliesFrom(collector, json);
|
||||||
|
} catch (final JsonParserException e) {
|
||||||
|
throw new ParsingException(
|
||||||
|
"Could not parse json data for nested comments info", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return new InfoItemsPage<>(collector,
|
return new InfoItemsPage<>(collector,
|
||||||
PeertubeParsingHelper.getNextPage(page.getUrl(), total));
|
PeertubeParsingHelper.getNextPage(page.getUrl(), total));
|
||||||
} else {
|
|
||||||
throw new ExtractionException("Unable to get PeerTube kiosk info");
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -1,7 +1,9 @@
|
||||||
package org.schabi.newpipe.extractor.services.peertube.extractors;
|
package org.schabi.newpipe.extractor.services.peertube.extractors;
|
||||||
|
|
||||||
|
import com.grack.nanojson.JsonArray;
|
||||||
import com.grack.nanojson.JsonObject;
|
import com.grack.nanojson.JsonObject;
|
||||||
|
|
||||||
|
import com.grack.nanojson.JsonWriter;
|
||||||
import org.jsoup.Jsoup;
|
import org.jsoup.Jsoup;
|
||||||
import org.jsoup.nodes.Document;
|
import org.jsoup.nodes.Document;
|
||||||
import org.schabi.newpipe.extractor.Page;
|
import org.schabi.newpipe.extractor.Page;
|
||||||
|
@ -13,20 +15,36 @@ import org.schabi.newpipe.extractor.services.peertube.PeertubeParsingHelper;
|
||||||
import org.schabi.newpipe.extractor.stream.Description;
|
import org.schabi.newpipe.extractor.stream.Description;
|
||||||
import org.schabi.newpipe.extractor.utils.JsonUtils;
|
import org.schabi.newpipe.extractor.utils.JsonUtils;
|
||||||
|
|
||||||
|
import javax.annotation.Nonnull;
|
||||||
import javax.annotation.Nullable;
|
import javax.annotation.Nullable;
|
||||||
|
import java.nio.charset.StandardCharsets;
|
||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
|
|
||||||
public class PeertubeCommentsInfoItemExtractor implements CommentsInfoItemExtractor {
|
import static org.schabi.newpipe.extractor.services.peertube.extractors.PeertubeCommentsExtractor.CHILDREN;
|
||||||
private final JsonObject item;
|
|
||||||
private final String url;
|
|
||||||
private final String baseUrl;
|
|
||||||
|
|
||||||
public PeertubeCommentsInfoItemExtractor(final JsonObject item,
|
public class PeertubeCommentsInfoItemExtractor implements CommentsInfoItemExtractor {
|
||||||
final PeertubeCommentsExtractor extractor)
|
@Nonnull
|
||||||
throws ParsingException {
|
private final JsonObject item;
|
||||||
|
@Nullable
|
||||||
|
private final JsonArray children;
|
||||||
|
@Nonnull
|
||||||
|
private final String url;
|
||||||
|
@Nonnull
|
||||||
|
private final String baseUrl;
|
||||||
|
private final boolean isReply;
|
||||||
|
|
||||||
|
private Integer replyCount;
|
||||||
|
|
||||||
|
public PeertubeCommentsInfoItemExtractor(@Nonnull final JsonObject item,
|
||||||
|
@Nullable final JsonArray children,
|
||||||
|
@Nonnull final String url,
|
||||||
|
@Nonnull final String baseUrl,
|
||||||
|
final boolean isReply) {
|
||||||
this.item = item;
|
this.item = item;
|
||||||
this.url = extractor.getUrl();
|
this.children = children;
|
||||||
this.baseUrl = extractor.getBaseUrl();
|
this.url = url;
|
||||||
|
this.baseUrl = baseUrl;
|
||||||
|
this.isReply = isReply;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -107,15 +125,34 @@ public class PeertubeCommentsInfoItemExtractor implements CommentsInfoItemExtrac
|
||||||
@Override
|
@Override
|
||||||
@Nullable
|
@Nullable
|
||||||
public Page getReplies() throws ParsingException {
|
public Page getReplies() throws ParsingException {
|
||||||
if (JsonUtils.getNumber(item, "totalReplies").intValue() == 0) {
|
if (getReplyCount() == 0) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
final String threadId = JsonUtils.getNumber(item, "threadId").toString();
|
final String threadId = JsonUtils.getNumber(item, "threadId").toString();
|
||||||
return new Page(url + "/" + threadId, threadId);
|
final String repliesUrl = url + "/" + threadId;
|
||||||
|
if (isReply && children != null && !children.isEmpty()) {
|
||||||
|
// Nested replies are already included in the original thread's request.
|
||||||
|
// Wrap the replies into a JsonObject, because the original thread's request body
|
||||||
|
// is also structured like a JsonObject.
|
||||||
|
final JsonObject pageContent = new JsonObject();
|
||||||
|
pageContent.put(CHILDREN, children);
|
||||||
|
return new Page(repliesUrl, threadId,
|
||||||
|
JsonWriter.string(pageContent).getBytes(StandardCharsets.UTF_8));
|
||||||
|
}
|
||||||
|
return new Page(repliesUrl, threadId);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int getReplyCount() throws ParsingException {
|
public int getReplyCount() throws ParsingException {
|
||||||
return JsonUtils.getNumber(item, "totalReplies").intValue();
|
if (replyCount == null) {
|
||||||
|
if (children != null && !children.isEmpty()) {
|
||||||
|
// The totalReplies field is inaccurate for nested replies and sometimes returns 0
|
||||||
|
// although there are replies to that reply stored in children.
|
||||||
|
replyCount = children.size();
|
||||||
|
} else {
|
||||||
|
replyCount = JsonUtils.getNumber(item, "totalReplies").intValue();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return replyCount;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -14,10 +14,9 @@ import org.schabi.newpipe.extractor.utils.Utils;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Optional;
|
||||||
|
|
||||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
import static org.junit.jupiter.api.Assertions.*;
|
||||||
import static org.junit.jupiter.api.Assertions.assertFalse;
|
|
||||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
|
||||||
import static org.schabi.newpipe.extractor.ServiceList.PeerTube;
|
import static org.schabi.newpipe.extractor.ServiceList.PeerTube;
|
||||||
|
|
||||||
public class PeertubeCommentsExtractorTest {
|
public class PeertubeCommentsExtractorTest {
|
||||||
|
@ -121,4 +120,52 @@ public class PeertubeCommentsExtractorTest {
|
||||||
assertTrue(commentsInfo.getErrors().isEmpty());
|
assertTrue(commentsInfo.getErrors().isEmpty());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test a video that has comments with nested replies.
|
||||||
|
*/
|
||||||
|
public static class NestedComments {
|
||||||
|
private static PeertubeCommentsExtractor extractor;
|
||||||
|
|
||||||
|
@BeforeAll
|
||||||
|
public static void setUp() throws Exception {
|
||||||
|
NewPipe.init(DownloaderTestImpl.getInstance());
|
||||||
|
extractor = (PeertubeCommentsExtractor) PeerTube
|
||||||
|
.getCommentsExtractor("https://share.tube/w/vxu4uTstUBAUromWwXGHrq");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testGetComments() throws IOException, ExtractionException {
|
||||||
|
final InfoItemsPage<CommentsInfoItem> comments = extractor.getInitialPage();
|
||||||
|
assertFalse(comments.getItems().isEmpty());
|
||||||
|
final Optional<CommentsInfoItem> nestedCommentHeadOpt =
|
||||||
|
comments.getItems()
|
||||||
|
.stream()
|
||||||
|
.filter(c -> c.getCommentId().equals("9770"))
|
||||||
|
.findFirst();
|
||||||
|
assertTrue(nestedCommentHeadOpt.isPresent());
|
||||||
|
assertTrue(findNestedCommentWithId("9773", nestedCommentHeadOpt.get()), "The nested comment replies were not found");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static boolean findNestedCommentWithId(final String id, final CommentsInfoItem comment)
|
||||||
|
throws IOException, ExtractionException {
|
||||||
|
if (comment.getCommentId().equals(id)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return PeerTube
|
||||||
|
.getCommentsExtractor(comment.getUrl())
|
||||||
|
.getPage(comment.getReplies())
|
||||||
|
.getItems()
|
||||||
|
.stream()
|
||||||
|
.map(c -> {
|
||||||
|
try {
|
||||||
|
return findNestedCommentWithId(id, c);
|
||||||
|
} catch (final Exception ignored) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.reduce((a, b) -> a || b)
|
||||||
|
.orElse(false);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue