diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/Page.java b/extractor/src/main/java/org/schabi/newpipe/extractor/Page.java
index e13a92287..091c5e767 100644
--- a/extractor/src/main/java/org/schabi/newpipe/extractor/Page.java
+++ b/extractor/src/main/java/org/schabi/newpipe/extractor/Page.java
@@ -13,7 +13,7 @@ import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty;
*
* A page has an {@link #id}, an {@link #url}, as well as information on possible {@link #cookies}.
* In case the data behind the URL has already been retrieved,
- * it can be accessed by using @link #getBody()} and {@link #getContent()}.
+ * it can be accessed by using {@link #getBody()} or {@link #getContent()}.
*/
public class Page implements Serializable {
private final String url;
diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/extractors/SoundcloudCommentsExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/extractors/SoundcloudCommentsExtractor.java
index f253cb695..56af6b43e 100644
--- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/extractors/SoundcloudCommentsExtractor.java
+++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/extractors/SoundcloudCommentsExtractor.java
@@ -21,13 +21,24 @@ import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler;
import org.schabi.newpipe.extractor.services.soundcloud.SoundcloudParsingHelper;
import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
import javax.annotation.Nonnull;
+import javax.annotation.Nullable;
public class SoundcloudCommentsExtractor extends CommentsExtractor {
public static final String COLLECTION = "collection";
public static final String NEXT_HREF = "next_href";
+ /**
+ * The last comment which was a top level comment.
+ * Next pages might start with replies to the last top level comment
+ * and therefore the {@link SoundcloudCommentsInfoItemExtractor#replyCount}
+ * of the last top level comment cannot be determined certainly.
+ */
+ @Nullable private JsonObject lastTopLevelComment;
+
public SoundcloudCommentsExtractor(final StreamingService service,
final ListLinkHandler uiHandler) {
super(service, uiHandler);
@@ -50,14 +61,15 @@ public class SoundcloudCommentsExtractor extends CommentsExtractor {
final CommentsInfoItemsCollector collector = new CommentsInfoItemsCollector(
getServiceId());
- collectCommentsFrom(collector, json);
+ collectCommentsFrom(collector, json, null);
return new InfoItemsPage<>(collector, new Page(json.getString(NEXT_HREF)));
}
@Override
- public InfoItemsPage getPage(final Page page) throws ExtractionException,
- IOException {
+ public InfoItemsPage getPage(final Page page)
+ throws ExtractionException, IOException {
+
if (page == null || isNullOrEmpty(page.getUrl())) {
throw new IllegalArgumentException("Page doesn't contain an URL");
}
@@ -88,7 +100,7 @@ public class SoundcloudCommentsExtractor extends CommentsExtractor {
} catch (final JsonParserException e) {
throw new ParsingException("Could not parse json", e);
}
- collectCommentsFrom(collector, json);
+ collectCommentsFrom(collector, json, lastTopLevelComment);
}
if (hasNextPage) {
@@ -101,27 +113,86 @@ public class SoundcloudCommentsExtractor extends CommentsExtractor {
@Override
public void onFetchPage(@Nonnull final Downloader downloader) { }
- private void collectCommentsFrom(final CommentsInfoItemsCollector collector,
- final JsonObject json) throws ParsingException {
+ /**
+ * Collect top level comments from a SoundCloud API response.
+ * @param collector the collector which collects the the top level comments
+ * @param json the JsonObject of the API response
+ * @param lastTopLevelComment the last top level comment from the previous page or {@code null}
+ * if this method is run for the initial page.
+ * @throws ParsingException
+ */
+ private void collectCommentsFrom(@Nonnull final CommentsInfoItemsCollector collector,
+ @Nonnull final JsonObject json,
+ @Nullable final JsonObject lastTopLevelComment)
+ throws ParsingException {
+ final List extractors = new ArrayList<>();
final String url = getUrl();
final JsonArray entries = json.getArray(COLLECTION);
- JsonObject lastTopComment = null;
- for (int i = 0; i < entries.size(); i++) {
- final JsonObject entry = entries.getObject(i);
- if (i == 0
- || (!SoundcloudParsingHelper.isReplyTo(entries.getObject(i - 1), entry)
- && !SoundcloudParsingHelper.isReplyTo(lastTopComment, entry))) {
- lastTopComment = entry;
- collector.commit(new SoundcloudCommentsInfoItemExtractor(
- json, i, entry, url));
+ /**
+ * The current top level comment.
+ */
+ JsonObject currentTopLevelComment = null;
+ boolean isLastCommentReply = true;
+ // Check whether the first comment in the list is a reply to the last top level comment
+ // from the previous page if there was a previous page.
+ if (lastTopLevelComment != null) {
+ final JsonObject firstComment = entries.getObject(0);
+ if (SoundcloudParsingHelper.isReplyTo(lastTopLevelComment, firstComment)) {
+ currentTopLevelComment = lastTopLevelComment;
+ } else {
+ extractors.add(new SoundcloudCommentsInfoItemExtractor(
+ json, SoundcloudCommentsInfoItemExtractor.PREVIOUS_PAGE_INDEX,
+ firstComment, url, null));
}
}
+
+ for (int i = 0; i < entries.size(); i++) {
+ final JsonObject entry = entries.getObject(i);
+ // extract all top level comments
+ // The first comment is either a top level comment
+ // if it is not a reply to the last top level comment
+ //
+ if (i == 0 && currentTopLevelComment == null
+ || (!SoundcloudParsingHelper.isReplyTo(entries.getObject(i - 1), entry)
+ && !SoundcloudParsingHelper.isReplyTo(currentTopLevelComment, entry))) {
+ currentTopLevelComment = entry;
+ if (i == entries.size() - 1) {
+ isLastCommentReply = false;
+ this.lastTopLevelComment = currentTopLevelComment;
+ // Do not collect the last comment if it is a top level comment
+ // because it might have replies.
+ // That is information we cannot get from the comment itself
+ // (thanks SoundCloud...) but needs to be obtained from the next comment.
+ // The comment will therefore be collected
+ // when collecting the items from the next page.
+ break;
+ }
+ extractors.add(new SoundcloudCommentsInfoItemExtractor(
+ json, i, entry, url, lastTopLevelComment));
+ }
+ }
+ if (isLastCommentReply) {
+ // Do not collect the last top level comment if it has replies and the retrieved
+ // comment list ends with a reply. We do not know whether the next page starts
+ // with more replies to the last top level comment.
+ this.lastTopLevelComment = extractors.remove(extractors.size() - 1).item;
+ }
+ extractors.stream().forEach(collector::commit);
+
}
- private boolean collectRepliesFrom(final CommentsInfoItemsCollector collector,
- final JsonObject json,
- final int id,
- final String url) {
+ /**
+ * Collect replies to a top level comment from a SoundCloud API response.
+ * @param collector the collector which collects the the replies
+ * @param json the SoundCloud API response
+ * @param id the comment's id for which the replies are collected
+ * @param url the corresponding page's URL
+ * @return
+ */
+ private boolean collectRepliesFrom(@Nonnull final CommentsInfoItemsCollector collector,
+ @Nonnull final JsonObject json,
+ final int id,
+ @Nonnull final String url) {
JsonObject originalComment = null;
final JsonArray entries = json.getArray(COLLECTION);
boolean moreReplies = false;
diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/extractors/SoundcloudCommentsInfoItemExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/extractors/SoundcloudCommentsInfoItemExtractor.java
index db9ef549d..0dfa3edee 100644
--- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/extractors/SoundcloudCommentsInfoItemExtractor.java
+++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/extractors/SoundcloudCommentsInfoItemExtractor.java
@@ -6,10 +6,8 @@ import com.grack.nanojson.JsonArray;
import com.grack.nanojson.JsonObject;
import org.schabi.newpipe.extractor.Page;
-import org.schabi.newpipe.extractor.ServiceList;
import org.schabi.newpipe.extractor.comments.CommentsInfoItem;
import org.schabi.newpipe.extractor.comments.CommentsInfoItemExtractor;
-import org.schabi.newpipe.extractor.comments.CommentsInfoItemsCollector;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.localization.DateWrapper;
import org.schabi.newpipe.extractor.services.soundcloud.SoundcloudParsingHelper;
@@ -17,32 +15,42 @@ import org.schabi.newpipe.extractor.stream.Description;
import java.util.Objects;
+import javax.annotation.Nonnull;
import javax.annotation.Nullable;
public class SoundcloudCommentsInfoItemExtractor implements CommentsInfoItemExtractor {
+ public static final int PREVIOUS_PAGE_INDEX = -1;
public static final String BODY = "body";
public static final String USER_PERMALINK = "permalink";
public static final String USER_FULL_NAME = "full_name";
public static final String USER_USERNAME = "username";
- private final JsonObject json;
+ @Nonnull private final JsonObject json;
private final int index;
- private final JsonObject item;
+ @Nonnull public final JsonObject item;
private final String url;
- private final JsonObject user;
- private final JsonObject superComment;
+ @Nonnull private final JsonObject user;
+ /**
+ * A comment to which this comment is a reply.
+ * Is {@code null} if this comment is itself a top level comment.
+ */
+ @Nullable private final JsonObject topLevelComment;
+ /**
+ * The reply count is not given by the SoundCloud API, but needs to be obtained
+ * by counting the comments which come directly after this item and have the same timestamp.
+ */
private int replyCount = CommentsInfoItem.UNKNOWN_REPLY_COUNT;
private Page repliesPage = null;
- public SoundcloudCommentsInfoItemExtractor(final JsonObject json, final int index,
- final JsonObject item, final String url,
- @Nullable final JsonObject superComment) {
+ public SoundcloudCommentsInfoItemExtractor(@Nonnull final JsonObject json, final int index,
+ @Nonnull final JsonObject item, final String url,
+ @Nullable final JsonObject topLevelComment) {
this.json = json;
this.index = index;
this.item = item;
this.url = url;
- this.superComment = superComment;
+ this.topLevelComment = topLevelComment;
this.user = item.getObject("user");
}
@@ -58,7 +66,7 @@ public class SoundcloudCommentsInfoItemExtractor implements CommentsInfoItemExtr
@Override
public Description getCommentText() {
String commentContent = item.getString(BODY);
- if (superComment == null) {
+ if (topLevelComment == null) {
return new Description(commentContent, Description.PLAIN_TEXT);
}
// This comment is a reply to another comment.
@@ -78,7 +86,7 @@ public class SoundcloudCommentsInfoItemExtractor implements CommentsInfoItemExtr
}
}
if (author == null) {
- author = superComment.getObject("user");
+ author = topLevelComment.getObject("user");
}
final String name = isNullOrEmpty(author.getString(USER_FULL_NAME))
? author.getString(USER_USERNAME) : author.getString(USER_FULL_NAME);
@@ -149,24 +157,17 @@ public class SoundcloudCommentsInfoItemExtractor implements CommentsInfoItemExtr
@Override
public Page getReplies() {
if (replyCount == CommentsInfoItem.UNKNOWN_REPLY_COUNT) {
- final JsonArray replies = new JsonArray();
- final CommentsInfoItemsCollector collector = new CommentsInfoItemsCollector(
- ServiceList.SoundCloud.getServiceId());
+ replyCount = 0;
// SoundCloud has only comments and top level replies, but not nested replies.
// Therefore, replies cannot have further replies.
- if (superComment == null) {
+ if (topLevelComment == null) {
// Loop through all comments which come after the original comment
// to find its replies.
final JsonArray allItems = json.getArray(SoundcloudCommentsExtractor.COLLECTION);
- boolean foundReply = false;
for (int i = index + 1; i < allItems.size(); i++) {
- final JsonObject comment = allItems.getObject(i);
- if (SoundcloudParsingHelper.isReplyTo(item, comment)) {
- replies.add(comment);
- collector.commit(new SoundcloudCommentsInfoItemExtractor(
- json, i, comment, url, item));
- foundReply = true;
- } else if (foundReply) {
+ if (SoundcloudParsingHelper.isReplyTo(item, allItems.getObject(i))) {
+ replyCount++;
+ } else {
// Only the comments directly after the original comment
// having the same timestamp are replies to the original comment.
// The first comment not having the same timestamp
@@ -175,8 +176,7 @@ public class SoundcloudCommentsInfoItemExtractor implements CommentsInfoItemExtr
}
}
}
- replyCount = replies.size();
- if (collector.getItems().isEmpty()) {
+ if (replyCount == 0) {
return null;
}
repliesPage = new Page(getUrl(), getCommentId());