Try to fix detecting replies to a comment on the previous page

When getting a page which is not the initial page there it is possible that the first comments are replies to a comment from a previous page.
This commit is contained in:
TobiGr 2023-01-02 18:59:03 +01:00
parent b6e3015ee2
commit e5be686b06
3 changed files with 117 additions and 46 deletions

View File

@ -13,7 +13,7 @@ import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty;
* <br>
* A page has an {@link #id}, an {@link #url}, as well as information on possible {@link #cookies}.
* In case the data behind the URL has already been retrieved,
* it can be accessed by using @link #getBody()} and {@link #getContent()}.
* it can be accessed by using {@link #getBody()} or {@link #getContent()}.
*/
public class Page implements Serializable {
private final String url;

View File

@ -21,13 +21,24 @@ import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler;
import org.schabi.newpipe.extractor.services.soundcloud.SoundcloudParsingHelper;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
public class SoundcloudCommentsExtractor extends CommentsExtractor {
public static final String COLLECTION = "collection";
public static final String NEXT_HREF = "next_href";
/**
* The last comment which was a top level comment.
* Next pages might start with replies to the last top level comment
* and therefore the {@link SoundcloudCommentsInfoItemExtractor#replyCount}
* of the last top level comment cannot be determined certainly.
*/
@Nullable private JsonObject lastTopLevelComment;
public SoundcloudCommentsExtractor(final StreamingService service,
final ListLinkHandler uiHandler) {
super(service, uiHandler);
@ -50,14 +61,15 @@ public class SoundcloudCommentsExtractor extends CommentsExtractor {
final CommentsInfoItemsCollector collector = new CommentsInfoItemsCollector(
getServiceId());
collectCommentsFrom(collector, json);
collectCommentsFrom(collector, json, null);
return new InfoItemsPage<>(collector, new Page(json.getString(NEXT_HREF)));
}
@Override
public InfoItemsPage<CommentsInfoItem> getPage(final Page page) throws ExtractionException,
IOException {
public InfoItemsPage<CommentsInfoItem> getPage(final Page page)
throws ExtractionException, IOException {
if (page == null || isNullOrEmpty(page.getUrl())) {
throw new IllegalArgumentException("Page doesn't contain an URL");
}
@ -88,7 +100,7 @@ public class SoundcloudCommentsExtractor extends CommentsExtractor {
} catch (final JsonParserException e) {
throw new ParsingException("Could not parse json", e);
}
collectCommentsFrom(collector, json);
collectCommentsFrom(collector, json, lastTopLevelComment);
}
if (hasNextPage) {
@ -101,27 +113,86 @@ public class SoundcloudCommentsExtractor extends CommentsExtractor {
@Override
public void onFetchPage(@Nonnull final Downloader downloader) { }
private void collectCommentsFrom(final CommentsInfoItemsCollector collector,
final JsonObject json) throws ParsingException {
/**
* Collect top level comments from a SoundCloud API response.
* @param collector the collector which collects the the top level comments
* @param json the JsonObject of the API response
* @param lastTopLevelComment the last top level comment from the previous page or {@code null}
* if this method is run for the initial page.
* @throws ParsingException
*/
private void collectCommentsFrom(@Nonnull final CommentsInfoItemsCollector collector,
@Nonnull final JsonObject json,
@Nullable final JsonObject lastTopLevelComment)
throws ParsingException {
final List<SoundcloudCommentsInfoItemExtractor> extractors = new ArrayList<>();
final String url = getUrl();
final JsonArray entries = json.getArray(COLLECTION);
JsonObject lastTopComment = null;
for (int i = 0; i < entries.size(); i++) {
final JsonObject entry = entries.getObject(i);
if (i == 0
|| (!SoundcloudParsingHelper.isReplyTo(entries.getObject(i - 1), entry)
&& !SoundcloudParsingHelper.isReplyTo(lastTopComment, entry))) {
lastTopComment = entry;
collector.commit(new SoundcloudCommentsInfoItemExtractor(
json, i, entry, url));
}
/**
* The current top level comment.
*/
JsonObject currentTopLevelComment = null;
boolean isLastCommentReply = true;
// Check whether the first comment in the list is a reply to the last top level comment
// from the previous page if there was a previous page.
if (lastTopLevelComment != null) {
final JsonObject firstComment = entries.getObject(0);
if (SoundcloudParsingHelper.isReplyTo(lastTopLevelComment, firstComment)) {
currentTopLevelComment = lastTopLevelComment;
} else {
extractors.add(new SoundcloudCommentsInfoItemExtractor(
json, SoundcloudCommentsInfoItemExtractor.PREVIOUS_PAGE_INDEX,
firstComment, url, null));
}
}
private boolean collectRepliesFrom(final CommentsInfoItemsCollector collector,
final JsonObject json,
for (int i = 0; i < entries.size(); i++) {
final JsonObject entry = entries.getObject(i);
// extract all top level comments
// The first comment is either a top level comment
// if it is not a reply to the last top level comment
//
if (i == 0 && currentTopLevelComment == null
|| (!SoundcloudParsingHelper.isReplyTo(entries.getObject(i - 1), entry)
&& !SoundcloudParsingHelper.isReplyTo(currentTopLevelComment, entry))) {
currentTopLevelComment = entry;
if (i == entries.size() - 1) {
isLastCommentReply = false;
this.lastTopLevelComment = currentTopLevelComment;
// Do not collect the last comment if it is a top level comment
// because it might have replies.
// That is information we cannot get from the comment itself
// (thanks SoundCloud...) but needs to be obtained from the next comment.
// The comment will therefore be collected
// when collecting the items from the next page.
break;
}
extractors.add(new SoundcloudCommentsInfoItemExtractor(
json, i, entry, url, lastTopLevelComment));
}
}
if (isLastCommentReply) {
// Do not collect the last top level comment if it has replies and the retrieved
// comment list ends with a reply. We do not know whether the next page starts
// with more replies to the last top level comment.
this.lastTopLevelComment = extractors.remove(extractors.size() - 1).item;
}
extractors.stream().forEach(collector::commit);
}
/**
* Collect replies to a top level comment from a SoundCloud API response.
* @param collector the collector which collects the the replies
* @param json the SoundCloud API response
* @param id the comment's id for which the replies are collected
* @param url the corresponding page's URL
* @return
*/
private boolean collectRepliesFrom(@Nonnull final CommentsInfoItemsCollector collector,
@Nonnull final JsonObject json,
final int id,
final String url) {
@Nonnull final String url) {
JsonObject originalComment = null;
final JsonArray entries = json.getArray(COLLECTION);
boolean moreReplies = false;

View File

@ -6,10 +6,8 @@ import com.grack.nanojson.JsonArray;
import com.grack.nanojson.JsonObject;
import org.schabi.newpipe.extractor.Page;
import org.schabi.newpipe.extractor.ServiceList;
import org.schabi.newpipe.extractor.comments.CommentsInfoItem;
import org.schabi.newpipe.extractor.comments.CommentsInfoItemExtractor;
import org.schabi.newpipe.extractor.comments.CommentsInfoItemsCollector;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.localization.DateWrapper;
import org.schabi.newpipe.extractor.services.soundcloud.SoundcloudParsingHelper;
@ -17,32 +15,42 @@ import org.schabi.newpipe.extractor.stream.Description;
import java.util.Objects;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
public class SoundcloudCommentsInfoItemExtractor implements CommentsInfoItemExtractor {
public static final int PREVIOUS_PAGE_INDEX = -1;
public static final String BODY = "body";
public static final String USER_PERMALINK = "permalink";
public static final String USER_FULL_NAME = "full_name";
public static final String USER_USERNAME = "username";
private final JsonObject json;
@Nonnull private final JsonObject json;
private final int index;
private final JsonObject item;
@Nonnull public final JsonObject item;
private final String url;
private final JsonObject user;
private final JsonObject superComment;
@Nonnull private final JsonObject user;
/**
* A comment to which this comment is a reply.
* Is {@code null} if this comment is itself a top level comment.
*/
@Nullable private final JsonObject topLevelComment;
/**
* The reply count is not given by the SoundCloud API, but needs to be obtained
* by counting the comments which come directly after this item and have the same timestamp.
*/
private int replyCount = CommentsInfoItem.UNKNOWN_REPLY_COUNT;
private Page repliesPage = null;
public SoundcloudCommentsInfoItemExtractor(final JsonObject json, final int index,
final JsonObject item, final String url,
@Nullable final JsonObject superComment) {
public SoundcloudCommentsInfoItemExtractor(@Nonnull final JsonObject json, final int index,
@Nonnull final JsonObject item, final String url,
@Nullable final JsonObject topLevelComment) {
this.json = json;
this.index = index;
this.item = item;
this.url = url;
this.superComment = superComment;
this.topLevelComment = topLevelComment;
this.user = item.getObject("user");
}
@ -58,7 +66,7 @@ public class SoundcloudCommentsInfoItemExtractor implements CommentsInfoItemExtr
@Override
public Description getCommentText() {
String commentContent = item.getString(BODY);
if (superComment == null) {
if (topLevelComment == null) {
return new Description(commentContent, Description.PLAIN_TEXT);
}
// This comment is a reply to another comment.
@ -78,7 +86,7 @@ public class SoundcloudCommentsInfoItemExtractor implements CommentsInfoItemExtr
}
}
if (author == null) {
author = superComment.getObject("user");
author = topLevelComment.getObject("user");
}
final String name = isNullOrEmpty(author.getString(USER_FULL_NAME))
? author.getString(USER_USERNAME) : author.getString(USER_FULL_NAME);
@ -149,24 +157,17 @@ public class SoundcloudCommentsInfoItemExtractor implements CommentsInfoItemExtr
@Override
public Page getReplies() {
if (replyCount == CommentsInfoItem.UNKNOWN_REPLY_COUNT) {
final JsonArray replies = new JsonArray();
final CommentsInfoItemsCollector collector = new CommentsInfoItemsCollector(
ServiceList.SoundCloud.getServiceId());
replyCount = 0;
// SoundCloud has only comments and top level replies, but not nested replies.
// Therefore, replies cannot have further replies.
if (superComment == null) {
if (topLevelComment == null) {
// Loop through all comments which come after the original comment
// to find its replies.
final JsonArray allItems = json.getArray(SoundcloudCommentsExtractor.COLLECTION);
boolean foundReply = false;
for (int i = index + 1; i < allItems.size(); i++) {
final JsonObject comment = allItems.getObject(i);
if (SoundcloudParsingHelper.isReplyTo(item, comment)) {
replies.add(comment);
collector.commit(new SoundcloudCommentsInfoItemExtractor(
json, i, comment, url, item));
foundReply = true;
} else if (foundReply) {
if (SoundcloudParsingHelper.isReplyTo(item, allItems.getObject(i))) {
replyCount++;
} else {
// Only the comments directly after the original comment
// having the same timestamp are replies to the original comment.
// The first comment not having the same timestamp
@ -175,8 +176,7 @@ public class SoundcloudCommentsInfoItemExtractor implements CommentsInfoItemExtr
}
}
}
replyCount = replies.size();
if (collector.getItems().isEmpty()) {
if (replyCount == 0) {
return null;
}
repliesPage = new Page(getUrl(), getCommentId());