From 8ae7fcfa1e9276478f1170796ff751894a22fe39 Mon Sep 17 00:00:00 2001 From: TobiGr Date: Tue, 3 Jan 2023 00:19:41 +0100 Subject: [PATCH] Almost fixed Implemented a cache. TODO: Do not store in cache when viewing replies.... --- .../newpipe/extractor/InfoItemsCollector.java | 2 +- .../SoundcloudCommentsExtractor.java | 108 +++++++++++++----- .../SoundcloudCommentsInfoItemExtractor.java | 12 +- .../YoutubeDashManifestCreatorsUtils.java | 2 +- .../YoutubeOtfDashManifestCreator.java | 2 +- ...ePostLiveStreamDvrDashManifestCreator.java | 2 +- ...YoutubeProgressiveDashManifestCreator.java | 2 +- .../newpipe/extractor/utils/cache/Cache.java | 9 ++ .../{ => cache}/ManifestCreatorCache.java | 4 +- .../utils/cache/SoundCloudCommentsCache.java | 74 ++++++++++++ .../{ => cache}/ManifestCreatorCacheTest.java | 3 +- .../cache/SoundCloudCommentsCacheTest.java | 83 ++++++++++++++ 12 files changed, 264 insertions(+), 39 deletions(-) create mode 100644 extractor/src/main/java/org/schabi/newpipe/extractor/utils/cache/Cache.java rename extractor/src/main/java/org/schabi/newpipe/extractor/utils/{ => cache}/ManifestCreatorCache.java (98%) create mode 100644 extractor/src/main/java/org/schabi/newpipe/extractor/utils/cache/SoundCloudCommentsCache.java rename extractor/src/test/java/org/schabi/newpipe/extractor/utils/{ => cache}/ManifestCreatorCacheTest.java (96%) create mode 100644 extractor/src/test/java/org/schabi/newpipe/extractor/utils/cache/SoundCloudCommentsCacheTest.java diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/InfoItemsCollector.java b/extractor/src/main/java/org/schabi/newpipe/extractor/InfoItemsCollector.java index b0ac2e14f..9e04238bf 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/InfoItemsCollector.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/InfoItemsCollector.java @@ -78,7 +78,7 @@ public abstract class InfoItemsCollector extractors = new ArrayList<>(); final String url = getUrl(); - final JsonArray entries = json.getArray(COLLECTION); - /** - * The current top level comment. - */ + JsonObject currentTopLevelComment = null; + int currentTopLevelCommentIndex = 0; boolean isLastCommentReply = true; + boolean isFirstCommentReply = false; + boolean addedLastTopLevelComment = lastTopLevelComment == null; // Check whether the first comment in the list is a reply to the last top level comment // from the previous page if there was a previous page. if (lastTopLevelComment != null) { - final JsonObject firstComment = entries.getObject(0); - if (SoundcloudParsingHelper.isReplyTo(lastTopLevelComment, firstComment)) { - currentTopLevelComment = lastTopLevelComment; + final JsonObject firstComment = json.getArray(COLLECTION).getObject(0); + if (SoundcloudParsingHelper.isReplyTo(lastTopLevelComment.comment, firstComment)) { + currentTopLevelComment = lastTopLevelComment.comment; + isFirstCommentReply = true; + merge(json, lastTopLevelComment.json, lastTopLevelComment.index); } else { extractors.add(new SoundcloudCommentsInfoItemExtractor( - json, SoundcloudCommentsInfoItemExtractor.PREVIOUS_PAGE_INDEX, - firstComment, url, null)); + lastTopLevelComment.json, + lastTopLevelComment.index, + lastTopLevelComment.comment, url, null)); + addedLastTopLevelComment = true; } } + final JsonArray entries = json.getArray(COLLECTION); for (int i = 0; i < entries.size(); i++) { final JsonObject entry = entries.getObject(i); - // extract all top level comments - // The first comment is either a top level comment + // Extract all top level comments + // The first comment is a top level co // if it is not a reply to the last top level comment // - if (i == 0 && currentTopLevelComment == null - || (!SoundcloudParsingHelper.isReplyTo(entries.getObject(i - 1), entry) - && !SoundcloudParsingHelper.isReplyTo(currentTopLevelComment, entry))) { + if ((i == 0 && !isFirstCommentReply) + || ( + i != 0 && !SoundcloudParsingHelper.isReplyTo(entries.getObject(i - 1), entry) + && !SoundcloudParsingHelper.isReplyTo(currentTopLevelComment, entry))) { currentTopLevelComment = entry; + currentTopLevelCommentIndex = i; + if (!addedLastTopLevelComment) { + // There is a new top level comment. This also means that we can now determine + // the reply count and get all replies for the top level comment. + extractors.add(new SoundcloudCommentsInfoItemExtractor( + json, 0, lastTopLevelComment.comment, url, null)); + addedLastTopLevelComment = true; + } if (i == entries.size() - 1) { isLastCommentReply = false; - this.lastTopLevelComment = currentTopLevelComment; + LAST_TOP_LEVEL_COMMENTS.put(getUrl(), currentTopLevelComment, json, i); + // Do not collect the last comment if it is a top level comment // because it might have replies. // That is information we cannot get from the comment itself @@ -168,14 +198,17 @@ public class SoundcloudCommentsExtractor extends CommentsExtractor { break; } extractors.add(new SoundcloudCommentsInfoItemExtractor( - json, i, entry, url, lastTopLevelComment)); + json, i, entry, url, null)); } } if (isLastCommentReply) { // Do not collect the last top level comment if it has replies and the retrieved // comment list ends with a reply. We do not know whether the next page starts // with more replies to the last top level comment. - this.lastTopLevelComment = extractors.remove(extractors.size() - 1).item; + LAST_TOP_LEVEL_COMMENTS.put( + getUrl(), + extractors.remove(extractors.size() - 1).item, + json, currentTopLevelCommentIndex); } extractors.stream().forEach(collector::commit); @@ -183,11 +216,13 @@ public class SoundcloudCommentsExtractor extends CommentsExtractor { /** * Collect replies to a top level comment from a SoundCloud API response. + * * @param collector the collector which collects the the replies - * @param json the SoundCloud API response - * @param id the comment's id for which the replies are collected - * @param url the corresponding page's URL - * @return + * @param json the SoundCloud API response + * @param id the comment's id for which the replies are collected + * @param url the corresponding page's URL + * @return {code true} if there might be more replies to the comment; + * {@code false} if there are definitely no more replies */ private boolean collectRepliesFrom(@Nonnull final CommentsInfoItemsCollector collector, @Nonnull final JsonObject json, @@ -206,8 +241,8 @@ public class SoundcloudCommentsExtractor extends CommentsExtractor { && SoundcloudParsingHelper.isReplyTo(originalComment, comment)) { collector.commit(new SoundcloudCommentsInfoItemExtractor( json, i, entries.getObject(i), url, originalComment)); - // There might be more replies to the originalComment, - // especially if the original comment is at the end of the list. + // There might be more replies to the originalComment + // if the original comment is at the end of the list. if (i == entries.size() - 1 && json.has(NEXT_HREF)) { moreReplies = true; } @@ -216,4 +251,17 @@ public class SoundcloudCommentsExtractor extends CommentsExtractor { return moreReplies; } + private void merge(@Nonnull final JsonObject target, @Nonnull final JsonObject subject, + final int index) { + final JsonArray targetArray = target.getArray(COLLECTION); + final JsonArray subjectArray = subject.getArray(COLLECTION); + final JsonArray newArray = new JsonArray( + targetArray.size() + subjectArray.size() - index - 1); + for (int i = index; i < subjectArray.size(); i++) { + newArray.add(subjectArray.getObject(i)); + } + newArray.addAll(targetArray); + target.put(COLLECTION, newArray); + } + } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/extractors/SoundcloudCommentsInfoItemExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/extractors/SoundcloudCommentsInfoItemExtractor.java index 0dfa3edee..78afff6fd 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/extractors/SoundcloudCommentsInfoItemExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/extractors/SoundcloudCommentsInfoItemExtractor.java @@ -1,5 +1,6 @@ package org.schabi.newpipe.extractor.services.soundcloud.extractors; +import static org.schabi.newpipe.extractor.services.soundcloud.extractors.SoundcloudCommentsExtractor.COLLECTION; import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty; import com.grack.nanojson.JsonArray; @@ -59,6 +60,13 @@ public class SoundcloudCommentsInfoItemExtractor implements CommentsInfoItemExtr this(json, index, item, url, null); } + public void addInfoFromNextPage(@Nonnull final JsonArray newItems, final int itemCount) { + final JsonArray currentItems = this.json.getArray(COLLECTION); + for (int i = 0; i < itemCount; i++) { + currentItems.add(newItems.getObject(i)); + } + } + @Override public String getCommentId() { return Objects.toString(item.getLong("id"), null); @@ -75,7 +83,7 @@ public class SoundcloudCommentsInfoItemExtractor implements CommentsInfoItemExtr // We need to do this manually. if (commentContent.startsWith("@")) { final String authorName = commentContent.split(" ", 2)[0].replace("@", ""); - final JsonArray comments = json.getArray(SoundcloudCommentsExtractor.COLLECTION); + final JsonArray comments = json.getArray(COLLECTION); JsonObject author = null; for (int i = index - 1; i >= 0 && author == null; i--) { final JsonObject commentsAuthor = comments.getObject(i).getObject("user"); @@ -163,7 +171,7 @@ public class SoundcloudCommentsInfoItemExtractor implements CommentsInfoItemExtr if (topLevelComment == null) { // Loop through all comments which come after the original comment // to find its replies. - final JsonArray allItems = json.getArray(SoundcloudCommentsExtractor.COLLECTION); + final JsonArray allItems = json.getArray(COLLECTION); for (int i = index + 1; i < allItems.size(); i++) { if (SoundcloudParsingHelper.isReplyTo(item, allItems.getObject(i))) { replyCount++; diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/dashmanifestcreators/YoutubeDashManifestCreatorsUtils.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/dashmanifestcreators/YoutubeDashManifestCreatorsUtils.java index 46bd32420..fcf07d093 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/dashmanifestcreators/YoutubeDashManifestCreatorsUtils.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/dashmanifestcreators/YoutubeDashManifestCreatorsUtils.java @@ -7,7 +7,7 @@ import org.schabi.newpipe.extractor.downloader.Response; import org.schabi.newpipe.extractor.exceptions.ExtractionException; import org.schabi.newpipe.extractor.services.youtube.DeliveryType; import org.schabi.newpipe.extractor.services.youtube.ItagItem; -import org.schabi.newpipe.extractor.utils.ManifestCreatorCache; +import org.schabi.newpipe.extractor.utils.cache.ManifestCreatorCache; import org.w3c.dom.Attr; import org.w3c.dom.DOMException; import org.w3c.dom.Document; diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/dashmanifestcreators/YoutubeOtfDashManifestCreator.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/dashmanifestcreators/YoutubeOtfDashManifestCreator.java index 46e84df1d..9226d8d2f 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/dashmanifestcreators/YoutubeOtfDashManifestCreator.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/dashmanifestcreators/YoutubeOtfDashManifestCreator.java @@ -15,7 +15,7 @@ import static org.schabi.newpipe.extractor.utils.Utils.isBlank; import org.schabi.newpipe.extractor.downloader.Response; import org.schabi.newpipe.extractor.services.youtube.DeliveryType; import org.schabi.newpipe.extractor.services.youtube.ItagItem; -import org.schabi.newpipe.extractor.utils.ManifestCreatorCache; +import org.schabi.newpipe.extractor.utils.cache.ManifestCreatorCache; import org.schabi.newpipe.extractor.utils.Utils; import org.w3c.dom.DOMException; import org.w3c.dom.Document; diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/dashmanifestcreators/YoutubePostLiveStreamDvrDashManifestCreator.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/dashmanifestcreators/YoutubePostLiveStreamDvrDashManifestCreator.java index 3a5a7dd23..5c23138f0 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/dashmanifestcreators/YoutubePostLiveStreamDvrDashManifestCreator.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/dashmanifestcreators/YoutubePostLiveStreamDvrDashManifestCreator.java @@ -15,7 +15,7 @@ import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty; import org.schabi.newpipe.extractor.downloader.Response; import org.schabi.newpipe.extractor.services.youtube.DeliveryType; import org.schabi.newpipe.extractor.services.youtube.ItagItem; -import org.schabi.newpipe.extractor.utils.ManifestCreatorCache; +import org.schabi.newpipe.extractor.utils.cache.ManifestCreatorCache; import org.w3c.dom.DOMException; import org.w3c.dom.Document; import org.w3c.dom.Element; diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/dashmanifestcreators/YoutubeProgressiveDashManifestCreator.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/dashmanifestcreators/YoutubeProgressiveDashManifestCreator.java index 0f69895bb..1c1e04c37 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/dashmanifestcreators/YoutubeProgressiveDashManifestCreator.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/dashmanifestcreators/YoutubeProgressiveDashManifestCreator.java @@ -2,7 +2,7 @@ package org.schabi.newpipe.extractor.services.youtube.dashmanifestcreators; import org.schabi.newpipe.extractor.services.youtube.DeliveryType; import org.schabi.newpipe.extractor.services.youtube.ItagItem; -import org.schabi.newpipe.extractor.utils.ManifestCreatorCache; +import org.schabi.newpipe.extractor.utils.cache.ManifestCreatorCache; import org.w3c.dom.DOMException; import org.w3c.dom.Document; import org.w3c.dom.Element; diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/utils/cache/Cache.java b/extractor/src/main/java/org/schabi/newpipe/extractor/utils/cache/Cache.java new file mode 100644 index 000000000..6e8180e93 --- /dev/null +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/utils/cache/Cache.java @@ -0,0 +1,9 @@ +package org.schabi.newpipe.extractor.utils.cache; + +public interface Cache { + void put(K key, V value); + V get(K key); + int size(); + boolean isEmpty(); + void clear(); +} diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/utils/ManifestCreatorCache.java b/extractor/src/main/java/org/schabi/newpipe/extractor/utils/cache/ManifestCreatorCache.java similarity index 98% rename from extractor/src/main/java/org/schabi/newpipe/extractor/utils/ManifestCreatorCache.java rename to extractor/src/main/java/org/schabi/newpipe/extractor/utils/cache/ManifestCreatorCache.java index ac12f83f9..149369c53 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/utils/ManifestCreatorCache.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/utils/cache/ManifestCreatorCache.java @@ -1,4 +1,6 @@ -package org.schabi.newpipe.extractor.utils; +package org.schabi.newpipe.extractor.utils.cache; + +import org.schabi.newpipe.extractor.utils.Pair; import javax.annotation.Nonnull; import javax.annotation.Nullable; diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/utils/cache/SoundCloudCommentsCache.java b/extractor/src/main/java/org/schabi/newpipe/extractor/utils/cache/SoundCloudCommentsCache.java new file mode 100644 index 000000000..5c367ce49 --- /dev/null +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/utils/cache/SoundCloudCommentsCache.java @@ -0,0 +1,74 @@ +package org.schabi.newpipe.extractor.utils.cache; + +import com.grack.nanojson.JsonObject; + +import java.util.HashMap; +import java.util.Map; + +import javax.annotation.Nonnull; +import javax.annotation.Nullable; + +/** + * LRU cache which can contain a few items. + */ +public class SoundCloudCommentsCache { + + private final int maxSize; + private final Map store; + public SoundCloudCommentsCache(final int size) { + if (size < 1) { + throw new IllegalArgumentException("Size must be at least 1"); + } + store = new HashMap<>(size); + maxSize = size; + } + + public void put(@Nonnull final String key, @Nonnull final JsonObject comment, + @Nonnull final JsonObject json, final int index) { + if (store.size() == maxSize) { + store.remove( + store.entrySet().stream() + .reduce((a, b) -> a.getValue().lastHit < b.getValue().lastHit ? a : b) + .get().getKey()); + } + store.put(key, new CachedCommentInfo(comment, json, index)); + } + + @Nullable + public CachedCommentInfo get(final String key) { + final CachedCommentInfo result = store.get(key); + if (result == null) { + return null; + } + result.lastHit = System.nanoTime(); + return result; + } + + public int size() { + return store.size(); + } + + public boolean isEmpty() { + return store.isEmpty(); + } + + public void clear() { + store.clear(); + } + + public final class CachedCommentInfo { + @Nonnull public final JsonObject comment; + @Nonnull public final JsonObject json; + public final int index; + private long lastHit = System.nanoTime(); + + private CachedCommentInfo(@Nonnull final JsonObject comment, + @Nonnull final JsonObject json, + final int index) { + this.comment = comment; + this.json = json; + this.index = index; + } + } + +} diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/utils/ManifestCreatorCacheTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/utils/cache/ManifestCreatorCacheTest.java similarity index 96% rename from extractor/src/test/java/org/schabi/newpipe/extractor/utils/ManifestCreatorCacheTest.java rename to extractor/src/test/java/org/schabi/newpipe/extractor/utils/cache/ManifestCreatorCacheTest.java index 83c5c1dfb..a28d745eb 100644 --- a/extractor/src/test/java/org/schabi/newpipe/extractor/utils/ManifestCreatorCacheTest.java +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/utils/cache/ManifestCreatorCacheTest.java @@ -1,6 +1,7 @@ -package org.schabi.newpipe.extractor.utils; +package org.schabi.newpipe.extractor.utils.cache; import org.junit.jupiter.api.Test; +import org.schabi.newpipe.extractor.utils.cache.ManifestCreatorCache; import static org.junit.jupiter.api.Assertions.assertEquals; diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/utils/cache/SoundCloudCommentsCacheTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/utils/cache/SoundCloudCommentsCacheTest.java new file mode 100644 index 000000000..bd985905a --- /dev/null +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/utils/cache/SoundCloudCommentsCacheTest.java @@ -0,0 +1,83 @@ +package org.schabi.newpipe.extractor.utils.cache; + +import com.grack.nanojson.JsonObject; +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.*; + +class SoundCloudCommentsCacheTest { + @Test + void testInstantiation() { + assertThrows(RuntimeException.class, () -> new SoundCloudCommentsCache(-15)); + assertThrows(RuntimeException.class, () -> new SoundCloudCommentsCache(0)); + assertDoesNotThrow(() -> new SoundCloudCommentsCache(1)); + assertDoesNotThrow(() -> new SoundCloudCommentsCache(10)); + } + + @Test + void testSize() { + SoundCloudCommentsCache cache = new SoundCloudCommentsCache(10); + assertEquals(0, cache.size()); + assertTrue(cache.isEmpty()); + cache.put("a", new JsonObject(), new JsonObject(), 1); + assertEquals(1, cache.size()); + cache.put("b", new JsonObject(), new JsonObject(), 1); + assertEquals(2, cache.size()); + cache.put("c", new JsonObject(), new JsonObject(), 1); + assertEquals(3, cache.size()); + cache.put("a", new JsonObject(), new JsonObject(), 1); + assertEquals(3, cache.size()); + cache.put("b", new JsonObject(), new JsonObject(), 1); + assertEquals(3, cache.size()); + cache.clear(); + assertEquals(0, cache.size()); + } + + @Test + void testLRUStrategy() { + final SoundCloudCommentsCache cache = new SoundCloudCommentsCache(4); + cache.put("1", new JsonObject(), new JsonObject(), 1); + cache.put("2", new JsonObject(), new JsonObject(), 2); + cache.put("3", new JsonObject(), new JsonObject(), 3); + cache.put("4", new JsonObject(), new JsonObject(), 4); + cache.put("5", new JsonObject(), new JsonObject(), 5); + assertNull(cache.get("1")); + final SoundCloudCommentsCache.CachedCommentInfo cci = cache.get("2"); + assertNotNull(cci); + cache.put("6", new JsonObject(), new JsonObject(), 6); + assertNotNull(cache.get("2")); + assertNull(cache.get("3")); + cache.put("7", new JsonObject(), new JsonObject(), 7); + cache.put("8", new JsonObject(), new JsonObject(), 8); + cache.put("9", new JsonObject(), new JsonObject(), 9); + assertNull(cache.get("1")); + assertNull(cache.get("3")); + assertNull(cache.get("4")); + assertNull(cache.get("5")); + assertNotNull(cache.get("2")); + } + + @Test + void testStorage() { + final SoundCloudCommentsCache cache = new SoundCloudCommentsCache(10); + cache.put("1", new JsonObject(), new JsonObject(), 1); + cache.put("1", new JsonObject(), new JsonObject(), 2); + assertEquals(2, cache.get("1").index); + cache.put("1", new JsonObject(), new JsonObject(), 3); + assertEquals(3, cache.get("1").index); + } + + @Test + void testClear() { + final SoundCloudCommentsCache cache = new SoundCloudCommentsCache(10); + cache.put("1", new JsonObject(), new JsonObject(), 1); + cache.put("2", new JsonObject(), new JsonObject(), 2); + cache.put("3", new JsonObject(), new JsonObject(), 3); + cache.put("4", new JsonObject(), new JsonObject(), 4); + cache.put("5", new JsonObject(), new JsonObject(), 5); + cache.clear(); + assertTrue(cache.isEmpty()); + assertEquals(0, cache.size()); + } + +}