[SoundCloud] Add support for comment replies

2022-12-04 13:19:24 +01:00 · 2022-12-04 13:19:24 +01:00 · e33fa926dd
parent 259de3cba6
commit e33fa926dd
6 changed files with 201 additions and 30 deletions
--- a/extractor/src/main/java/org/schabi/newpipe/extractor/Page.java
+++ b/extractor/src/main/java/org/schabi/newpipe/extractor/Page.java
@ -1,18 +1,26 @@
 package org.schabi.newpipe.extractor;

+import javax.annotation.Nullable;
 import java.io.Serializable;
 import java.util.List;
 import java.util.Map;

-import javax.annotation.Nullable;
-
 import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty;

+/**
+ * The {@link Page} class is used for storing information on future requests
+ * for retrieving content.
+ * <br>
+ * A page has an {@link #id}, an {@link #url}, as well as information on possible {@link #cookies}.
+ * In case the data behind the URL has already been retrieved,
+ * it can be accessed by using @link #getBody()} and {@link #getContent()}.
+ */
 public class Page implements Serializable {
    private final String url;
    private final String id;
    private final List<String> ids;
    private final Map<String, String> cookies;
+    private Serializable content;

    @Nullable
    private final byte[] body;
@ -78,4 +86,28 @@ public class Page implements Serializable {
    public byte[] getBody() {
        return body;
    }
+
+    public boolean hasContent() {
+        return content != null;
+    }
+
+    /**
+     * Get the page's content if it has been set, returns {@code null} otherwise.
+     * @return the page's content
+     */
+    @Nullable
+    public Serializable getContent() {
+        return content;
+    }
+
+    /**
+     * Set the page's content.
+     * The page's content can either be retrieved manually by requesting the resource
+     * behind the page's URL (see {@link #url} and {@link #getUrl()})
+     * or storing it in a {@link Page}s instance in case the content has already been downloaded.
+     * @param content the page's content
+     */
+    public void setContent(@Nullable final Serializable content) {
+        this.content = content;
+    }
 }
--- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudParsingHelper.java
+++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudParsingHelper.java
@ -323,4 +323,17 @@ public final class SoundcloudParsingHelper {
    public static String getUploaderName(final JsonObject object) {
        return object.getObject("user").getString("username", "");
    }
+
+    public static boolean isReplyTo(@Nonnull final JsonObject originalComment,
+                                    @Nonnull final JsonObject otherComment) {
+        final String mention = "@" + originalComment.getObject("user").getString("permalink");
+        return otherComment.getString("body").startsWith(mention)
+                && originalComment.getInt("timestamp") == otherComment.getInt("timestamp");
+
+    }
+
+    public static boolean isReply(@Nonnull final JsonObject comment) {
+        return comment.getString("body").startsWith("@");
+    }
+
 }
--- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/extractors/SoundcloudCommentsExtractor.java
+++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/extractors/SoundcloudCommentsExtractor.java
@ -16,6 +16,7 @@ import org.schabi.newpipe.extractor.downloader.Response;
 import org.schabi.newpipe.extractor.exceptions.ExtractionException;
 import org.schabi.newpipe.extractor.exceptions.ParsingException;
 import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler;
+import org.schabi.newpipe.extractor.services.soundcloud.SoundcloudParsingHelper;

 import java.io.IOException;

@ -24,6 +25,8 @@ import javax.annotation.Nonnull;
 import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty;

 public class SoundcloudCommentsExtractor extends CommentsExtractor {
+    public static final String COLLECTION = "collection";
+
    public SoundcloudCommentsExtractor(final StreamingService service,
                                       final ListLinkHandler uiHandler) {
        super(service, uiHandler);
@ -46,7 +49,7 @@ public class SoundcloudCommentsExtractor extends CommentsExtractor {
        final CommentsInfoItemsCollector collector = new CommentsInfoItemsCollector(
                getServiceId());

-        collectStreamsFrom(collector, json.getArray("collection"));
+        collectStreamsFrom(collector, json);

        return new InfoItemsPage<>(collector, new Page(json.getString("next_href")));
    }
@ -57,21 +60,32 @@ public class SoundcloudCommentsExtractor extends CommentsExtractor {
        if (page == null || isNullOrEmpty(page.getUrl())) {
            throw new IllegalArgumentException("Page doesn't contain an URL");
        }
-
-        final Downloader downloader = NewPipe.getDownloader();
-        final Response response = downloader.get(page.getUrl());
-
        final JsonObject json;
-        try {
-            json = JsonParser.object().from(response.responseBody());
-        } catch (final JsonParserException e) {
-            throw new ParsingException("Could not parse json", e);
-        }
-
        final CommentsInfoItemsCollector collector = new CommentsInfoItemsCollector(
                getServiceId());

-        collectStreamsFrom(collector, json.getArray("collection"));
+        if (page.hasContent()) {
+            // This page contains the whole previously fetched comments.
+            // We need to get the comments which are replies to the comment with the page's id.
+            json = (JsonObject) page.getContent();
+            try {
+                final int commentId = Integer.parseInt(page.getId());
+                collectRepliesFrom(collector, json, commentId, page.getUrl());
+            } catch (final NumberFormatException e) {
+                throw new ParsingException("Got invalid comment id", e);
+            }
+        } else {
+
+            final Downloader downloader = NewPipe.getDownloader();
+            final Response response = downloader.get(page.getUrl());
+
+            try {
+                json = JsonParser.object().from(response.responseBody());
+            } catch (final JsonParserException e) {
+                throw new ParsingException("Could not parse json", e);
+            }
+            collectStreamsFrom(collector, json);
+        }

        return new InfoItemsPage<>(collector, new Page(json.getString("next_href")));
    }
@ -80,10 +94,39 @@ public class SoundcloudCommentsExtractor extends CommentsExtractor {
    public void onFetchPage(@Nonnull final Downloader downloader) { }

    private void collectStreamsFrom(final CommentsInfoItemsCollector collector,
-                                    final JsonArray entries) throws ParsingException {
+                                    final JsonObject json) throws ParsingException {
        final String url = getUrl();
-        for (final Object comment : entries) {
-            collector.commit(new SoundcloudCommentsInfoItemExtractor((JsonObject) comment, url));
+        final JsonArray entries = json.getArray(COLLECTION);
+        for (int i = 0; i < entries.size(); i++) {
+            final JsonObject entry = entries.getObject(i);
+            if (i == 0
+                    || (!SoundcloudParsingHelper.isReply(entry)
+                    && !SoundcloudParsingHelper.isReplyTo(entries.getObject(i - 1), entry))) {
+                collector.commit(new SoundcloudCommentsInfoItemExtractor(
+                        json, i, entries.getObject(i), url));
+            }
        }
    }
+
+    private void collectRepliesFrom(final CommentsInfoItemsCollector collector,
+                                    final JsonObject json,
+                                    final int id,
+                                    final String url) throws ParsingException {
+        JsonObject originalComment = null;
+        final JsonArray entries = json.getArray(COLLECTION);
+        for (int i = 0; i < entries.size(); i++) {
+            final JsonObject comment = entries.getObject(i);
+            if (comment.getInt("id") == id) {
+                originalComment = comment;
+                continue;
+            }
+            if (originalComment != null
+                    && SoundcloudParsingHelper.isReplyTo(originalComment, comment)) {
+                collector.commit(new SoundcloudCommentsInfoItemExtractor(
+                        json, i, entries.getObject(i), url));
+
+            }
+        }
+    }
+
 }
--- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/extractors/SoundcloudCommentsInfoItemExtractor.java
+++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/extractors/SoundcloudCommentsInfoItemExtractor.java
@ -1,62 +1,79 @@
 package org.schabi.newpipe.extractor.services.soundcloud.extractors;

+import com.grack.nanojson.JsonArray;
 import com.grack.nanojson.JsonObject;
+import org.schabi.newpipe.extractor.Page;
+import org.schabi.newpipe.extractor.ServiceList;
+import org.schabi.newpipe.extractor.comments.CommentsInfoItem;
 import org.schabi.newpipe.extractor.comments.CommentsInfoItemExtractor;
+import org.schabi.newpipe.extractor.comments.CommentsInfoItemsCollector;
 import org.schabi.newpipe.extractor.exceptions.ParsingException;
 import org.schabi.newpipe.extractor.localization.DateWrapper;
 import org.schabi.newpipe.extractor.services.soundcloud.SoundcloudParsingHelper;
 import org.schabi.newpipe.extractor.stream.Description;

 import javax.annotation.Nullable;
+import java.util.ArrayList;
+import java.util.List;
 import java.util.Objects;

 public class SoundcloudCommentsInfoItemExtractor implements CommentsInfoItemExtractor {
+    public static final String USER = "user";
+    public static final String BODY = "body";
+
    private final JsonObject json;
+    private final int index;
+    private final JsonObject item;
    private final String url;

-    public SoundcloudCommentsInfoItemExtractor(final JsonObject json, final String url) {
+    private int replyCount = CommentsInfoItem.UNKNOWN_REPLY_COUNT;
+    private Page repliesPage = null;
+
+    public SoundcloudCommentsInfoItemExtractor(final JsonObject json, final int index, final JsonObject item, final String url) {
        this.json = json;
+        this.index = index;
+        this.item = item;
        this.url = url;
    }

    @Override
    public String getCommentId() {
-        return Objects.toString(json.getLong("id"), null);
+        return Objects.toString(item.getLong("id"), null);
    }

    @Override
    public Description getCommentText() {
-        return new Description(json.getString("body"), Description.PLAIN_TEXT);
+        return new Description(item.getString(BODY), Description.PLAIN_TEXT);
    }

    @Override
    public String getUploaderName() {
-        return json.getObject("user").getString("username");
+        return item.getObject(USER).getString("username");
    }

    @Override
    public String getUploaderAvatarUrl() {
-        return json.getObject("user").getString("avatar_url");
+        return item.getObject(USER).getString("avatar_url");
    }

    @Override
    public boolean isUploaderVerified() throws ParsingException {
-        return json.getObject("user").getBoolean("verified");
+        return item.getObject(USER).getBoolean("verified");
    }

    @Override
    public int getStreamPosition() throws ParsingException {
-        return json.getInt("timestamp") / 1000; // convert milliseconds to seconds
+        return item.getInt("timestamp") / 1000; // convert milliseconds to seconds
    }

    @Override
    public String getUploaderUrl() {
-        return json.getObject("user").getString("permalink_url");
+        return item.getObject(USER).getString("permalink_url");
    }

    @Override
    public String getTextualUploadDate() {
-        return json.getString("created_at");
+        return item.getString("created_at");
    }

    @Nullable
@ -67,7 +84,7 @@ public class SoundcloudCommentsInfoItemExtractor implements CommentsInfoItemExtr

    @Override
    public String getName() throws ParsingException {
-        return json.getObject("user").getString("permalink");
+        return item.getObject(USER).getString("permalink");
    }

    @Override
@ -77,6 +94,52 @@ public class SoundcloudCommentsInfoItemExtractor implements CommentsInfoItemExtr

    @Override
    public String getThumbnailUrl() {
-        return json.getObject("user").getString("avatar_url");
+        return item.getObject(USER).getString("avatar_url");
+    }
+
+    @Override
+    public Page getReplies() {
+        if (replyCount == CommentsInfoItem.UNKNOWN_REPLY_COUNT) {
+            final List<JsonObject> replies = new ArrayList<>();
+            final CommentsInfoItemsCollector collector = new CommentsInfoItemsCollector(
+                    ServiceList.SoundCloud.getServiceId());
+            final JsonArray jsonArray = new JsonArray();
+            // Replies start with the mention of the user who created the original comment.
+            final String mention = "@" + item.getObject(USER).getString("permalink");
+            // Loop through all comments which come after the original comment to find its replies.
+            final JsonArray allItems = json.getArray(SoundcloudCommentsExtractor.COLLECTION);
+            for (int i = index + 1; i < allItems.size(); i++) {
+                final JsonObject comment = allItems.getObject(i);
+                final String commentContent = comment.getString("body");
+                if (commentContent.startsWith(mention)) {
+                    replies.add(comment);
+                    jsonArray.add(comment);
+                    collector.commit(new SoundcloudCommentsInfoItemExtractor(json, i, comment, url));
+                } else if (!commentContent.startsWith("@") || replies.isEmpty()) {
+                    // Only the comments directly after the original comment
+                    // starting with the mention of the comment's creator
+                    // are replies to the original comment.
+                    // The first comment not starting with these letters
+                    // is the next top-level comment.
+                    break;
+                }
+            }
+            replyCount = jsonArray.size();
+            if (collector.getItems().isEmpty()) {
+                return null;
+            }
+            repliesPage = new Page(getUrl(), getCommentId());
+            repliesPage.setContent(json);
+        }
+
+        return repliesPage;
+    }
+
+    @Override
+    public int getReplyCount() throws ParsingException {
+        if (replyCount == CommentsInfoItem.UNKNOWN_REPLY_COUNT) {
+            getReplies();
+        }
+        return replyCount;
    }
 }
--- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/linkHandler/SoundcloudCommentsLinkHandlerFactory.java
+++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/linkHandler/SoundcloudCommentsLinkHandlerFactory.java
@ -3,6 +3,7 @@ package org.schabi.newpipe.extractor.services.soundcloud.linkHandler;
 import org.schabi.newpipe.extractor.exceptions.ExtractionException;
 import org.schabi.newpipe.extractor.exceptions.ParsingException;
 import org.schabi.newpipe.extractor.linkhandler.ListLinkHandlerFactory;
+import org.schabi.newpipe.extractor.utils.Parser;

 import java.io.IOException;
 import java.util.List;
@ -14,6 +15,8 @@ public final class SoundcloudCommentsLinkHandlerFactory extends ListLinkHandlerF
    private static final SoundcloudCommentsLinkHandlerFactory INSTANCE =
            new SoundcloudCommentsLinkHandlerFactory();

+    private static final String OFFSET_PATTERN = "https://api-v2.soundcloud.com/tracks/([0-9a-z]+)/comments?([0-9a-z/&])?offset=([0-9])+"
+
    private SoundcloudCommentsLinkHandlerFactory() {
    }

@ -27,7 +30,7 @@ public final class SoundcloudCommentsLinkHandlerFactory extends ListLinkHandlerF
                         final String sortFilter) throws ParsingException {
        try {
            return "https://api-v2.soundcloud.com/tracks/" + id + "/comments" + "?client_id="
-                    + clientId() + "&threaded=0" + "&filter_replies=1";
+                    + clientId() + "&threaded=1" + "&filter_replies=1";
            // Anything but 1 = sort by new
            // + "&limit=NUMBER_OF_ITEMS_PER_REQUEST". We let the API control (default = 10)
            // + "&offset=OFFSET". We let the API control (default = 0, then we use nextPageUrl)
@ -36,12 +39,29 @@ public final class SoundcloudCommentsLinkHandlerFactory extends ListLinkHandlerF
        }
    }

+    public String getUrl(final String id,
+                         final List<String> contentFilter,
+                         final String sortFilter,
+                         final int offset) throws ParsingException {
+        return getUrl(id, contentFilter, sortFilter) + "&offset=" + offset;
+    }
+
    @Override
    public String getId(final String url) throws ParsingException {
        // Delegation to avoid duplicate code, as we need the same id
        return SoundcloudStreamLinkHandlerFactory.getInstance().getId(url);
    }

+    public int getReplyOffset(final String url) throws ParsingException {
+        try {
+            return Integer.parseInt(Parser.matchGroup(OFFSET_PATTERN, url, 3));
+        } catch (Parser.RegexException | NumberFormatException e) {
+            throw new ParsingException("Could not get offset from URL: " + url, e);
+        }
+    }
+
+
+
    @Override
    public boolean onAcceptUrl(final String url) {
        try {
--- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/linkHandler/SoundcloudStreamLinkHandlerFactory.java
+++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/linkHandler/SoundcloudStreamLinkHandlerFactory.java
@ -33,7 +33,7 @@ public final class SoundcloudStreamLinkHandlerFactory extends LinkHandlerFactory
    @Override
    public String getId(final String url) throws ParsingException {
        if (Parser.isMatch(API_URL_PATTERN, url)) {
-            return Parser.matchGroup1(API_URL_PATTERN, url);
+            return Parser.matchGroup(API_URL_PATTERN, url, 2);
        }
        Utils.checkUrl(URL_PATTERN, url);