Almost fixed

Implemented a cache.
TODO: Do not store in cache when viewing replies....
This commit is contained in:
TobiGr 2023-01-03 00:19:41 +01:00
parent e5be686b06
commit 8ae7fcfa1e
12 changed files with 264 additions and 39 deletions

View File

@ -78,7 +78,7 @@ public abstract class InfoItemsCollector<I extends InfoItem, E extends InfoItemE
* Add an error * Add an error
* @param error the error * @param error the error
*/ */
protected void addError(final Exception error) { public void addError(final Exception error) {
errors.add(error); errors.add(error);
} }

View File

@ -19,7 +19,8 @@ import org.schabi.newpipe.extractor.exceptions.ExtractionException;
import org.schabi.newpipe.extractor.exceptions.ParsingException; import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler; import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler;
import org.schabi.newpipe.extractor.services.soundcloud.SoundcloudParsingHelper; import org.schabi.newpipe.extractor.services.soundcloud.SoundcloudParsingHelper;
import org.schabi.newpipe.extractor.utils.cache.SoundCloudCommentsCache;
import org.schabi.newpipe.extractor.utils.cache.SoundCloudCommentsCache.CachedCommentInfo;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
@ -37,7 +38,8 @@ public class SoundcloudCommentsExtractor extends CommentsExtractor {
* and therefore the {@link SoundcloudCommentsInfoItemExtractor#replyCount} * and therefore the {@link SoundcloudCommentsInfoItemExtractor#replyCount}
* of the last top level comment cannot be determined certainly. * of the last top level comment cannot be determined certainly.
*/ */
@Nullable private JsonObject lastTopLevelComment; private static final SoundCloudCommentsCache LAST_TOP_LEVEL_COMMENTS =
new SoundCloudCommentsCache(10);
public SoundcloudCommentsExtractor(final StreamingService service, public SoundcloudCommentsExtractor(final StreamingService service,
final ListLinkHandler uiHandler) { final ListLinkHandler uiHandler) {
@ -100,7 +102,18 @@ public class SoundcloudCommentsExtractor extends CommentsExtractor {
} catch (final JsonParserException e) { } catch (final JsonParserException e) {
throw new ParsingException("Could not parse json", e); throw new ParsingException("Could not parse json", e);
} }
collectCommentsFrom(collector, json, lastTopLevelComment);
final CachedCommentInfo topLevelCommentElement = LAST_TOP_LEVEL_COMMENTS.get(getUrl());
if (topLevelCommentElement == null) {
if (LAST_TOP_LEVEL_COMMENTS.isEmpty()) {
collector.addError(new RuntimeException(
"Could not get last top level comment. It has been removed from cache."
+ " Increase the cache size to not loose any comments"));
}
collectCommentsFrom(collector, json, null);
} else {
collectCommentsFrom(collector, json, topLevelCommentElement);
}
} }
if (hasNextPage) { if (hasNextPage) {
@ -111,54 +124,71 @@ public class SoundcloudCommentsExtractor extends CommentsExtractor {
} }
@Override @Override
public void onFetchPage(@Nonnull final Downloader downloader) { } public void onFetchPage(@Nonnull final Downloader downloader) {
}
/** /**
* Collect top level comments from a SoundCloud API response. * Collect top level comments from a SoundCloud API response.
* @param collector the collector which collects the the top level comments *
* @param json the JsonObject of the API response * @param collector the collector which collects the the top level comments
* @param json the JsonObject of the API response
* @param lastTopLevelComment the last top level comment from the previous page or {@code null} * @param lastTopLevelComment the last top level comment from the previous page or {@code null}
* if this method is run for the initial page. * if this method is run for the initial page.
* @throws ParsingException * @throws ParsingException
*/ */
private void collectCommentsFrom(@Nonnull final CommentsInfoItemsCollector collector, private void collectCommentsFrom(@Nonnull final CommentsInfoItemsCollector collector,
@Nonnull final JsonObject json, @Nonnull final JsonObject json,
@Nullable final JsonObject lastTopLevelComment) @Nullable final CachedCommentInfo lastTopLevelComment)
throws ParsingException { throws ParsingException {
final List<SoundcloudCommentsInfoItemExtractor> extractors = new ArrayList<>(); final List<SoundcloudCommentsInfoItemExtractor> extractors = new ArrayList<>();
final String url = getUrl(); final String url = getUrl();
final JsonArray entries = json.getArray(COLLECTION);
/**
* The current top level comment.
*/
JsonObject currentTopLevelComment = null; JsonObject currentTopLevelComment = null;
int currentTopLevelCommentIndex = 0;
boolean isLastCommentReply = true; boolean isLastCommentReply = true;
boolean isFirstCommentReply = false;
boolean addedLastTopLevelComment = lastTopLevelComment == null;
// Check whether the first comment in the list is a reply to the last top level comment // Check whether the first comment in the list is a reply to the last top level comment
// from the previous page if there was a previous page. // from the previous page if there was a previous page.
if (lastTopLevelComment != null) { if (lastTopLevelComment != null) {
final JsonObject firstComment = entries.getObject(0); final JsonObject firstComment = json.getArray(COLLECTION).getObject(0);
if (SoundcloudParsingHelper.isReplyTo(lastTopLevelComment, firstComment)) { if (SoundcloudParsingHelper.isReplyTo(lastTopLevelComment.comment, firstComment)) {
currentTopLevelComment = lastTopLevelComment; currentTopLevelComment = lastTopLevelComment.comment;
isFirstCommentReply = true;
merge(json, lastTopLevelComment.json, lastTopLevelComment.index);
} else { } else {
extractors.add(new SoundcloudCommentsInfoItemExtractor( extractors.add(new SoundcloudCommentsInfoItemExtractor(
json, SoundcloudCommentsInfoItemExtractor.PREVIOUS_PAGE_INDEX, lastTopLevelComment.json,
firstComment, url, null)); lastTopLevelComment.index,
lastTopLevelComment.comment, url, null));
addedLastTopLevelComment = true;
} }
} }
final JsonArray entries = json.getArray(COLLECTION);
for (int i = 0; i < entries.size(); i++) { for (int i = 0; i < entries.size(); i++) {
final JsonObject entry = entries.getObject(i); final JsonObject entry = entries.getObject(i);
// extract all top level comments // Extract all top level comments
// The first comment is either a top level comment // The first comment is a top level co
// if it is not a reply to the last top level comment // if it is not a reply to the last top level comment
// //
if (i == 0 && currentTopLevelComment == null if ((i == 0 && !isFirstCommentReply)
|| (!SoundcloudParsingHelper.isReplyTo(entries.getObject(i - 1), entry) || (
&& !SoundcloudParsingHelper.isReplyTo(currentTopLevelComment, entry))) { i != 0 && !SoundcloudParsingHelper.isReplyTo(entries.getObject(i - 1), entry)
&& !SoundcloudParsingHelper.isReplyTo(currentTopLevelComment, entry))) {
currentTopLevelComment = entry; currentTopLevelComment = entry;
currentTopLevelCommentIndex = i;
if (!addedLastTopLevelComment) {
// There is a new top level comment. This also means that we can now determine
// the reply count and get all replies for the top level comment.
extractors.add(new SoundcloudCommentsInfoItemExtractor(
json, 0, lastTopLevelComment.comment, url, null));
addedLastTopLevelComment = true;
}
if (i == entries.size() - 1) { if (i == entries.size() - 1) {
isLastCommentReply = false; isLastCommentReply = false;
this.lastTopLevelComment = currentTopLevelComment; LAST_TOP_LEVEL_COMMENTS.put(getUrl(), currentTopLevelComment, json, i);
// Do not collect the last comment if it is a top level comment // Do not collect the last comment if it is a top level comment
// because it might have replies. // because it might have replies.
// That is information we cannot get from the comment itself // That is information we cannot get from the comment itself
@ -168,14 +198,17 @@ public class SoundcloudCommentsExtractor extends CommentsExtractor {
break; break;
} }
extractors.add(new SoundcloudCommentsInfoItemExtractor( extractors.add(new SoundcloudCommentsInfoItemExtractor(
json, i, entry, url, lastTopLevelComment)); json, i, entry, url, null));
} }
} }
if (isLastCommentReply) { if (isLastCommentReply) {
// Do not collect the last top level comment if it has replies and the retrieved // Do not collect the last top level comment if it has replies and the retrieved
// comment list ends with a reply. We do not know whether the next page starts // comment list ends with a reply. We do not know whether the next page starts
// with more replies to the last top level comment. // with more replies to the last top level comment.
this.lastTopLevelComment = extractors.remove(extractors.size() - 1).item; LAST_TOP_LEVEL_COMMENTS.put(
getUrl(),
extractors.remove(extractors.size() - 1).item,
json, currentTopLevelCommentIndex);
} }
extractors.stream().forEach(collector::commit); extractors.stream().forEach(collector::commit);
@ -183,11 +216,13 @@ public class SoundcloudCommentsExtractor extends CommentsExtractor {
/** /**
* Collect replies to a top level comment from a SoundCloud API response. * Collect replies to a top level comment from a SoundCloud API response.
*
* @param collector the collector which collects the the replies * @param collector the collector which collects the the replies
* @param json the SoundCloud API response * @param json the SoundCloud API response
* @param id the comment's id for which the replies are collected * @param id the comment's id for which the replies are collected
* @param url the corresponding page's URL * @param url the corresponding page's URL
* @return * @return {code true} if there might be more replies to the comment;
* {@code false} if there are definitely no more replies
*/ */
private boolean collectRepliesFrom(@Nonnull final CommentsInfoItemsCollector collector, private boolean collectRepliesFrom(@Nonnull final CommentsInfoItemsCollector collector,
@Nonnull final JsonObject json, @Nonnull final JsonObject json,
@ -206,8 +241,8 @@ public class SoundcloudCommentsExtractor extends CommentsExtractor {
&& SoundcloudParsingHelper.isReplyTo(originalComment, comment)) { && SoundcloudParsingHelper.isReplyTo(originalComment, comment)) {
collector.commit(new SoundcloudCommentsInfoItemExtractor( collector.commit(new SoundcloudCommentsInfoItemExtractor(
json, i, entries.getObject(i), url, originalComment)); json, i, entries.getObject(i), url, originalComment));
// There might be more replies to the originalComment, // There might be more replies to the originalComment
// especially if the original comment is at the end of the list. // if the original comment is at the end of the list.
if (i == entries.size() - 1 && json.has(NEXT_HREF)) { if (i == entries.size() - 1 && json.has(NEXT_HREF)) {
moreReplies = true; moreReplies = true;
} }
@ -216,4 +251,17 @@ public class SoundcloudCommentsExtractor extends CommentsExtractor {
return moreReplies; return moreReplies;
} }
private void merge(@Nonnull final JsonObject target, @Nonnull final JsonObject subject,
final int index) {
final JsonArray targetArray = target.getArray(COLLECTION);
final JsonArray subjectArray = subject.getArray(COLLECTION);
final JsonArray newArray = new JsonArray(
targetArray.size() + subjectArray.size() - index - 1);
for (int i = index; i < subjectArray.size(); i++) {
newArray.add(subjectArray.getObject(i));
}
newArray.addAll(targetArray);
target.put(COLLECTION, newArray);
}
} }

View File

@ -1,5 +1,6 @@
package org.schabi.newpipe.extractor.services.soundcloud.extractors; package org.schabi.newpipe.extractor.services.soundcloud.extractors;
import static org.schabi.newpipe.extractor.services.soundcloud.extractors.SoundcloudCommentsExtractor.COLLECTION;
import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty; import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty;
import com.grack.nanojson.JsonArray; import com.grack.nanojson.JsonArray;
@ -59,6 +60,13 @@ public class SoundcloudCommentsInfoItemExtractor implements CommentsInfoItemExtr
this(json, index, item, url, null); this(json, index, item, url, null);
} }
public void addInfoFromNextPage(@Nonnull final JsonArray newItems, final int itemCount) {
final JsonArray currentItems = this.json.getArray(COLLECTION);
for (int i = 0; i < itemCount; i++) {
currentItems.add(newItems.getObject(i));
}
}
@Override @Override
public String getCommentId() { public String getCommentId() {
return Objects.toString(item.getLong("id"), null); return Objects.toString(item.getLong("id"), null);
@ -75,7 +83,7 @@ public class SoundcloudCommentsInfoItemExtractor implements CommentsInfoItemExtr
// We need to do this manually. // We need to do this manually.
if (commentContent.startsWith("@")) { if (commentContent.startsWith("@")) {
final String authorName = commentContent.split(" ", 2)[0].replace("@", ""); final String authorName = commentContent.split(" ", 2)[0].replace("@", "");
final JsonArray comments = json.getArray(SoundcloudCommentsExtractor.COLLECTION); final JsonArray comments = json.getArray(COLLECTION);
JsonObject author = null; JsonObject author = null;
for (int i = index - 1; i >= 0 && author == null; i--) { for (int i = index - 1; i >= 0 && author == null; i--) {
final JsonObject commentsAuthor = comments.getObject(i).getObject("user"); final JsonObject commentsAuthor = comments.getObject(i).getObject("user");
@ -163,7 +171,7 @@ public class SoundcloudCommentsInfoItemExtractor implements CommentsInfoItemExtr
if (topLevelComment == null) { if (topLevelComment == null) {
// Loop through all comments which come after the original comment // Loop through all comments which come after the original comment
// to find its replies. // to find its replies.
final JsonArray allItems = json.getArray(SoundcloudCommentsExtractor.COLLECTION); final JsonArray allItems = json.getArray(COLLECTION);
for (int i = index + 1; i < allItems.size(); i++) { for (int i = index + 1; i < allItems.size(); i++) {
if (SoundcloudParsingHelper.isReplyTo(item, allItems.getObject(i))) { if (SoundcloudParsingHelper.isReplyTo(item, allItems.getObject(i))) {
replyCount++; replyCount++;

View File

@ -7,7 +7,7 @@ import org.schabi.newpipe.extractor.downloader.Response;
import org.schabi.newpipe.extractor.exceptions.ExtractionException; import org.schabi.newpipe.extractor.exceptions.ExtractionException;
import org.schabi.newpipe.extractor.services.youtube.DeliveryType; import org.schabi.newpipe.extractor.services.youtube.DeliveryType;
import org.schabi.newpipe.extractor.services.youtube.ItagItem; import org.schabi.newpipe.extractor.services.youtube.ItagItem;
import org.schabi.newpipe.extractor.utils.ManifestCreatorCache; import org.schabi.newpipe.extractor.utils.cache.ManifestCreatorCache;
import org.w3c.dom.Attr; import org.w3c.dom.Attr;
import org.w3c.dom.DOMException; import org.w3c.dom.DOMException;
import org.w3c.dom.Document; import org.w3c.dom.Document;

View File

@ -15,7 +15,7 @@ import static org.schabi.newpipe.extractor.utils.Utils.isBlank;
import org.schabi.newpipe.extractor.downloader.Response; import org.schabi.newpipe.extractor.downloader.Response;
import org.schabi.newpipe.extractor.services.youtube.DeliveryType; import org.schabi.newpipe.extractor.services.youtube.DeliveryType;
import org.schabi.newpipe.extractor.services.youtube.ItagItem; import org.schabi.newpipe.extractor.services.youtube.ItagItem;
import org.schabi.newpipe.extractor.utils.ManifestCreatorCache; import org.schabi.newpipe.extractor.utils.cache.ManifestCreatorCache;
import org.schabi.newpipe.extractor.utils.Utils; import org.schabi.newpipe.extractor.utils.Utils;
import org.w3c.dom.DOMException; import org.w3c.dom.DOMException;
import org.w3c.dom.Document; import org.w3c.dom.Document;

View File

@ -15,7 +15,7 @@ import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty;
import org.schabi.newpipe.extractor.downloader.Response; import org.schabi.newpipe.extractor.downloader.Response;
import org.schabi.newpipe.extractor.services.youtube.DeliveryType; import org.schabi.newpipe.extractor.services.youtube.DeliveryType;
import org.schabi.newpipe.extractor.services.youtube.ItagItem; import org.schabi.newpipe.extractor.services.youtube.ItagItem;
import org.schabi.newpipe.extractor.utils.ManifestCreatorCache; import org.schabi.newpipe.extractor.utils.cache.ManifestCreatorCache;
import org.w3c.dom.DOMException; import org.w3c.dom.DOMException;
import org.w3c.dom.Document; import org.w3c.dom.Document;
import org.w3c.dom.Element; import org.w3c.dom.Element;

View File

@ -2,7 +2,7 @@ package org.schabi.newpipe.extractor.services.youtube.dashmanifestcreators;
import org.schabi.newpipe.extractor.services.youtube.DeliveryType; import org.schabi.newpipe.extractor.services.youtube.DeliveryType;
import org.schabi.newpipe.extractor.services.youtube.ItagItem; import org.schabi.newpipe.extractor.services.youtube.ItagItem;
import org.schabi.newpipe.extractor.utils.ManifestCreatorCache; import org.schabi.newpipe.extractor.utils.cache.ManifestCreatorCache;
import org.w3c.dom.DOMException; import org.w3c.dom.DOMException;
import org.w3c.dom.Document; import org.w3c.dom.Document;
import org.w3c.dom.Element; import org.w3c.dom.Element;

View File

@ -0,0 +1,9 @@
package org.schabi.newpipe.extractor.utils.cache;
public interface Cache<K, V> {
void put(K key, V value);
V get(K key);
int size();
boolean isEmpty();
void clear();
}

View File

@ -1,4 +1,6 @@
package org.schabi.newpipe.extractor.utils; package org.schabi.newpipe.extractor.utils.cache;
import org.schabi.newpipe.extractor.utils.Pair;
import javax.annotation.Nonnull; import javax.annotation.Nonnull;
import javax.annotation.Nullable; import javax.annotation.Nullable;

View File

@ -0,0 +1,74 @@
package org.schabi.newpipe.extractor.utils.cache;
import com.grack.nanojson.JsonObject;
import java.util.HashMap;
import java.util.Map;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
/**
* LRU cache which can contain a few items.
*/
public class SoundCloudCommentsCache {
private final int maxSize;
private final Map<String, CachedCommentInfo> store;
public SoundCloudCommentsCache(final int size) {
if (size < 1) {
throw new IllegalArgumentException("Size must be at least 1");
}
store = new HashMap<>(size);
maxSize = size;
}
public void put(@Nonnull final String key, @Nonnull final JsonObject comment,
@Nonnull final JsonObject json, final int index) {
if (store.size() == maxSize) {
store.remove(
store.entrySet().stream()
.reduce((a, b) -> a.getValue().lastHit < b.getValue().lastHit ? a : b)
.get().getKey());
}
store.put(key, new CachedCommentInfo(comment, json, index));
}
@Nullable
public CachedCommentInfo get(final String key) {
final CachedCommentInfo result = store.get(key);
if (result == null) {
return null;
}
result.lastHit = System.nanoTime();
return result;
}
public int size() {
return store.size();
}
public boolean isEmpty() {
return store.isEmpty();
}
public void clear() {
store.clear();
}
public final class CachedCommentInfo {
@Nonnull public final JsonObject comment;
@Nonnull public final JsonObject json;
public final int index;
private long lastHit = System.nanoTime();
private CachedCommentInfo(@Nonnull final JsonObject comment,
@Nonnull final JsonObject json,
final int index) {
this.comment = comment;
this.json = json;
this.index = index;
}
}
}

View File

@ -1,6 +1,7 @@
package org.schabi.newpipe.extractor.utils; package org.schabi.newpipe.extractor.utils.cache;
import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Test;
import org.schabi.newpipe.extractor.utils.cache.ManifestCreatorCache;
import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertEquals;

View File

@ -0,0 +1,83 @@
package org.schabi.newpipe.extractor.utils.cache;
import com.grack.nanojson.JsonObject;
import org.junit.jupiter.api.Test;
import static org.junit.jupiter.api.Assertions.*;
class SoundCloudCommentsCacheTest {
@Test
void testInstantiation() {
assertThrows(RuntimeException.class, () -> new SoundCloudCommentsCache(-15));
assertThrows(RuntimeException.class, () -> new SoundCloudCommentsCache(0));
assertDoesNotThrow(() -> new SoundCloudCommentsCache(1));
assertDoesNotThrow(() -> new SoundCloudCommentsCache(10));
}
@Test
void testSize() {
SoundCloudCommentsCache cache = new SoundCloudCommentsCache(10);
assertEquals(0, cache.size());
assertTrue(cache.isEmpty());
cache.put("a", new JsonObject(), new JsonObject(), 1);
assertEquals(1, cache.size());
cache.put("b", new JsonObject(), new JsonObject(), 1);
assertEquals(2, cache.size());
cache.put("c", new JsonObject(), new JsonObject(), 1);
assertEquals(3, cache.size());
cache.put("a", new JsonObject(), new JsonObject(), 1);
assertEquals(3, cache.size());
cache.put("b", new JsonObject(), new JsonObject(), 1);
assertEquals(3, cache.size());
cache.clear();
assertEquals(0, cache.size());
}
@Test
void testLRUStrategy() {
final SoundCloudCommentsCache cache = new SoundCloudCommentsCache(4);
cache.put("1", new JsonObject(), new JsonObject(), 1);
cache.put("2", new JsonObject(), new JsonObject(), 2);
cache.put("3", new JsonObject(), new JsonObject(), 3);
cache.put("4", new JsonObject(), new JsonObject(), 4);
cache.put("5", new JsonObject(), new JsonObject(), 5);
assertNull(cache.get("1"));
final SoundCloudCommentsCache.CachedCommentInfo cci = cache.get("2");
assertNotNull(cci);
cache.put("6", new JsonObject(), new JsonObject(), 6);
assertNotNull(cache.get("2"));
assertNull(cache.get("3"));
cache.put("7", new JsonObject(), new JsonObject(), 7);
cache.put("8", new JsonObject(), new JsonObject(), 8);
cache.put("9", new JsonObject(), new JsonObject(), 9);
assertNull(cache.get("1"));
assertNull(cache.get("3"));
assertNull(cache.get("4"));
assertNull(cache.get("5"));
assertNotNull(cache.get("2"));
}
@Test
void testStorage() {
final SoundCloudCommentsCache cache = new SoundCloudCommentsCache(10);
cache.put("1", new JsonObject(), new JsonObject(), 1);
cache.put("1", new JsonObject(), new JsonObject(), 2);
assertEquals(2, cache.get("1").index);
cache.put("1", new JsonObject(), new JsonObject(), 3);
assertEquals(3, cache.get("1").index);
}
@Test
void testClear() {
final SoundCloudCommentsCache cache = new SoundCloudCommentsCache(10);
cache.put("1", new JsonObject(), new JsonObject(), 1);
cache.put("2", new JsonObject(), new JsonObject(), 2);
cache.put("3", new JsonObject(), new JsonObject(), 3);
cache.put("4", new JsonObject(), new JsonObject(), 4);
cache.put("5", new JsonObject(), new JsonObject(), 5);
cache.clear();
assertTrue(cache.isEmpty());
assertEquals(0, cache.size());
}
}