[Bandcamp] Support loading additional comments (#1030)

This commit is contained in:
petlyh 2023-03-04 14:01:06 +01:00 committed by GitHub
parent 6bdd698c25
commit 5a9b6ed2e3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 95 additions and 28 deletions

View File

@ -1,9 +1,13 @@
package org.schabi.newpipe.extractor.services.bandcamp.extractors; package org.schabi.newpipe.extractor.services.bandcamp.extractors;
import static org.schabi.newpipe.extractor.services.bandcamp.extractors.BandcampExtractorHelper.BASE_API_URL;
import com.grack.nanojson.JsonArray;
import com.grack.nanojson.JsonObject;
import com.grack.nanojson.JsonWriter;
import org.jsoup.Jsoup; import org.jsoup.Jsoup;
import org.jsoup.nodes.Document; import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.schabi.newpipe.extractor.Page; import org.schabi.newpipe.extractor.Page;
import org.schabi.newpipe.extractor.StreamingService; import org.schabi.newpipe.extractor.StreamingService;
import org.schabi.newpipe.extractor.comments.CommentsExtractor; import org.schabi.newpipe.extractor.comments.CommentsExtractor;
@ -11,13 +15,22 @@ import org.schabi.newpipe.extractor.comments.CommentsInfoItem;
import org.schabi.newpipe.extractor.comments.CommentsInfoItemsCollector; import org.schabi.newpipe.extractor.comments.CommentsInfoItemsCollector;
import org.schabi.newpipe.extractor.downloader.Downloader; import org.schabi.newpipe.extractor.downloader.Downloader;
import org.schabi.newpipe.extractor.exceptions.ExtractionException; import org.schabi.newpipe.extractor.exceptions.ExtractionException;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.exceptions.ReCaptchaException;
import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler; import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler;
import org.schabi.newpipe.extractor.utils.JsonUtils;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.Collections;
import java.util.List;
import javax.annotation.Nonnull; import javax.annotation.Nonnull;
import java.io.IOException;
public class BandcampCommentsExtractor extends CommentsExtractor { public class BandcampCommentsExtractor extends CommentsExtractor {
private static final String REVIEWS_API_URL = BASE_API_URL + "/tralbumcollectors/2/reviews";
private Document document; private Document document;
@ -39,19 +52,81 @@ public class BandcampCommentsExtractor extends CommentsExtractor {
final CommentsInfoItemsCollector collector = new CommentsInfoItemsCollector(getServiceId()); final CommentsInfoItemsCollector collector = new CommentsInfoItemsCollector(getServiceId());
final Elements writings = document.getElementsByClass("writing"); final JsonObject collectorsData = JsonUtils.toJsonObject(
document.getElementById("collectors-data").attr("data-blob"));
final JsonArray reviews = collectorsData.getArray("reviews");
for (final Element writing : writings) { for (final Object review : reviews) {
collector.commit(new BandcampCommentsInfoItemExtractor(writing, getUrl())); collector.commit(
new BandcampCommentsInfoItemExtractor((JsonObject) review, getUrl()));
} }
if (!collectorsData.getBoolean("more_reviews_available")) {
return new InfoItemsPage<>(collector, null); return new InfoItemsPage<>(collector, null);
} }
final String trackId = getTrackId();
final String token = getNextPageToken(reviews);
return new InfoItemsPage<>(collector, new Page(List.of(trackId, token)));
}
@Override @Override
public InfoItemsPage<CommentsInfoItem> getPage(final Page page) public InfoItemsPage<CommentsInfoItem> getPage(final Page page)
throws IOException, ExtractionException { throws IOException, ExtractionException {
return null;
final CommentsInfoItemsCollector collector = new CommentsInfoItemsCollector(getServiceId());
final List<String> pageIds = page.getIds();
final String trackId = pageIds.get(0);
final String token = pageIds.get(1);
final JsonObject reviewsData = fetchReviewsData(trackId, token);
final JsonArray reviews = reviewsData.getArray("results");
for (final Object review : reviews) {
collector.commit(
new BandcampCommentsInfoItemExtractor((JsonObject) review, getUrl()));
}
if (!reviewsData.getBoolean("more_available")) {
return new InfoItemsPage<>(collector, null);
}
return new InfoItemsPage<>(collector,
new Page(List.of(trackId, getNextPageToken(reviews))));
}
private JsonObject fetchReviewsData(final String trackId, final String token)
throws ParsingException {
try {
return JsonUtils.toJsonObject(getDownloader().postWithContentTypeJson(
REVIEWS_API_URL,
Collections.emptyMap(),
JsonWriter.string().object()
.value("tralbum_type", "t")
.value("tralbum_id", trackId)
.value("token", token)
.value("count", 7)
.array("exclude_fan_ids").end()
.end().done().getBytes(StandardCharsets.UTF_8)).responseBody());
} catch (final IOException | ReCaptchaException e) {
throw new ParsingException("Could not fetch reviews", e);
}
}
private String getNextPageToken(final JsonArray reviews) throws ParsingException {
return reviews.stream()
.filter(JsonObject.class::isInstance)
.map(JsonObject.class::cast)
.map(review -> review.getString("token"))
.reduce((a, b) -> b) // keep only the last element
.orElseThrow(() -> new ParsingException("Could not get token"));
}
private String getTrackId() throws ParsingException {
final JsonObject pageProperties = JsonUtils.toJsonObject(
document.selectFirst("meta[name=bc-page-properties]")
.attr("content"));
return Long.toString(pageProperties.getLong("item_id"));
} }
@Override @Override

View File

@ -1,19 +1,20 @@
package org.schabi.newpipe.extractor.services.bandcamp.extractors; package org.schabi.newpipe.extractor.services.bandcamp.extractors;
import org.jsoup.nodes.Element; import static org.schabi.newpipe.extractor.services.bandcamp.extractors.BandcampExtractorHelper.getImageUrl;
import com.grack.nanojson.JsonObject;
import org.schabi.newpipe.extractor.comments.CommentsInfoItemExtractor; import org.schabi.newpipe.extractor.comments.CommentsInfoItemExtractor;
import org.schabi.newpipe.extractor.exceptions.ParsingException; import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.stream.Description; import org.schabi.newpipe.extractor.stream.Description;
import java.util.Objects;
public class BandcampCommentsInfoItemExtractor implements CommentsInfoItemExtractor { public class BandcampCommentsInfoItemExtractor implements CommentsInfoItemExtractor {
private final Element writing; private final JsonObject review;
private final String url; private final String url;
public BandcampCommentsInfoItemExtractor(final Element writing, final String url) { public BandcampCommentsInfoItemExtractor(final JsonObject review, final String url) {
this.writing = writing; this.review = review;
this.url = url; this.url = url;
} }
@ -29,31 +30,21 @@ public class BandcampCommentsInfoItemExtractor implements CommentsInfoItemExtrac
@Override @Override
public String getThumbnailUrl() throws ParsingException { public String getThumbnailUrl() throws ParsingException {
return writing.getElementsByClass("thumb").attr("src"); return getUploaderAvatarUrl();
} }
@Override @Override
public Description getCommentText() throws ParsingException { public Description getCommentText() throws ParsingException {
final var text = writing.getElementsByClass("text").stream() return new Description(review.getString("why"), Description.PLAIN_TEXT);
.filter(Objects::nonNull)
.map(Element::ownText)
.findFirst()
.orElseThrow(() -> new ParsingException("Could not get comment text"));
return new Description(text, Description.PLAIN_TEXT);
} }
@Override @Override
public String getUploaderName() throws ParsingException { public String getUploaderName() throws ParsingException {
return writing.getElementsByClass("name").stream() return review.getString("name");
.filter(Objects::nonNull)
.map(Element::text)
.findFirst()
.orElseThrow(() -> new ParsingException("Could not get uploader name"));
} }
@Override @Override
public String getUploaderAvatarUrl() { public String getUploaderAvatarUrl() {
return writing.getElementsByClass("thumb").attr("src"); return getImageUrl(review.getLong("image_id"), false);
} }
} }

View File

@ -37,6 +37,7 @@ public class BandcampCommentsExtractorTest {
@Test @Test
public void testGetCommentsAllData() throws IOException, ExtractionException { public void testGetCommentsAllData() throws IOException, ExtractionException {
ListExtractor.InfoItemsPage<CommentsInfoItem> comments = extractor.getInitialPage(); ListExtractor.InfoItemsPage<CommentsInfoItem> comments = extractor.getInitialPage();
assertTrue(comments.hasNextPage());
DefaultTests.defaultTestListOfItems(Bandcamp, comments.getItems(), comments.getErrors()); DefaultTests.defaultTestListOfItems(Bandcamp, comments.getItems(), comments.getErrors());
for (CommentsInfoItem c : comments.getItems()) { for (CommentsInfoItem c : comments.getItems()) {