[Bandcamp] Support loading additional comments (#1030)
This commit is contained in:
parent
6bdd698c25
commit
5a9b6ed2e3
|
@ -1,9 +1,13 @@
|
||||||
package org.schabi.newpipe.extractor.services.bandcamp.extractors;
|
package org.schabi.newpipe.extractor.services.bandcamp.extractors;
|
||||||
|
|
||||||
|
import static org.schabi.newpipe.extractor.services.bandcamp.extractors.BandcampExtractorHelper.BASE_API_URL;
|
||||||
|
|
||||||
|
import com.grack.nanojson.JsonArray;
|
||||||
|
import com.grack.nanojson.JsonObject;
|
||||||
|
import com.grack.nanojson.JsonWriter;
|
||||||
|
|
||||||
import org.jsoup.Jsoup;
|
import org.jsoup.Jsoup;
|
||||||
import org.jsoup.nodes.Document;
|
import org.jsoup.nodes.Document;
|
||||||
import org.jsoup.nodes.Element;
|
|
||||||
import org.jsoup.select.Elements;
|
|
||||||
import org.schabi.newpipe.extractor.Page;
|
import org.schabi.newpipe.extractor.Page;
|
||||||
import org.schabi.newpipe.extractor.StreamingService;
|
import org.schabi.newpipe.extractor.StreamingService;
|
||||||
import org.schabi.newpipe.extractor.comments.CommentsExtractor;
|
import org.schabi.newpipe.extractor.comments.CommentsExtractor;
|
||||||
|
@ -11,13 +15,22 @@ import org.schabi.newpipe.extractor.comments.CommentsInfoItem;
|
||||||
import org.schabi.newpipe.extractor.comments.CommentsInfoItemsCollector;
|
import org.schabi.newpipe.extractor.comments.CommentsInfoItemsCollector;
|
||||||
import org.schabi.newpipe.extractor.downloader.Downloader;
|
import org.schabi.newpipe.extractor.downloader.Downloader;
|
||||||
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
|
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
|
||||||
|
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
||||||
|
import org.schabi.newpipe.extractor.exceptions.ReCaptchaException;
|
||||||
import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler;
|
import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler;
|
||||||
|
import org.schabi.newpipe.extractor.utils.JsonUtils;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.nio.charset.StandardCharsets;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
import javax.annotation.Nonnull;
|
import javax.annotation.Nonnull;
|
||||||
import java.io.IOException;
|
|
||||||
|
|
||||||
public class BandcampCommentsExtractor extends CommentsExtractor {
|
public class BandcampCommentsExtractor extends CommentsExtractor {
|
||||||
|
|
||||||
|
private static final String REVIEWS_API_URL = BASE_API_URL + "/tralbumcollectors/2/reviews";
|
||||||
|
|
||||||
private Document document;
|
private Document document;
|
||||||
|
|
||||||
|
|
||||||
|
@ -39,19 +52,81 @@ public class BandcampCommentsExtractor extends CommentsExtractor {
|
||||||
|
|
||||||
final CommentsInfoItemsCollector collector = new CommentsInfoItemsCollector(getServiceId());
|
final CommentsInfoItemsCollector collector = new CommentsInfoItemsCollector(getServiceId());
|
||||||
|
|
||||||
final Elements writings = document.getElementsByClass("writing");
|
final JsonObject collectorsData = JsonUtils.toJsonObject(
|
||||||
|
document.getElementById("collectors-data").attr("data-blob"));
|
||||||
|
final JsonArray reviews = collectorsData.getArray("reviews");
|
||||||
|
|
||||||
for (final Element writing : writings) {
|
for (final Object review : reviews) {
|
||||||
collector.commit(new BandcampCommentsInfoItemExtractor(writing, getUrl()));
|
collector.commit(
|
||||||
|
new BandcampCommentsInfoItemExtractor((JsonObject) review, getUrl()));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!collectorsData.getBoolean("more_reviews_available")) {
|
||||||
return new InfoItemsPage<>(collector, null);
|
return new InfoItemsPage<>(collector, null);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
final String trackId = getTrackId();
|
||||||
|
final String token = getNextPageToken(reviews);
|
||||||
|
return new InfoItemsPage<>(collector, new Page(List.of(trackId, token)));
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public InfoItemsPage<CommentsInfoItem> getPage(final Page page)
|
public InfoItemsPage<CommentsInfoItem> getPage(final Page page)
|
||||||
throws IOException, ExtractionException {
|
throws IOException, ExtractionException {
|
||||||
return null;
|
|
||||||
|
final CommentsInfoItemsCollector collector = new CommentsInfoItemsCollector(getServiceId());
|
||||||
|
|
||||||
|
final List<String> pageIds = page.getIds();
|
||||||
|
final String trackId = pageIds.get(0);
|
||||||
|
final String token = pageIds.get(1);
|
||||||
|
final JsonObject reviewsData = fetchReviewsData(trackId, token);
|
||||||
|
final JsonArray reviews = reviewsData.getArray("results");
|
||||||
|
|
||||||
|
for (final Object review : reviews) {
|
||||||
|
collector.commit(
|
||||||
|
new BandcampCommentsInfoItemExtractor((JsonObject) review, getUrl()));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!reviewsData.getBoolean("more_available")) {
|
||||||
|
return new InfoItemsPage<>(collector, null);
|
||||||
|
}
|
||||||
|
|
||||||
|
return new InfoItemsPage<>(collector,
|
||||||
|
new Page(List.of(trackId, getNextPageToken(reviews))));
|
||||||
|
}
|
||||||
|
|
||||||
|
private JsonObject fetchReviewsData(final String trackId, final String token)
|
||||||
|
throws ParsingException {
|
||||||
|
try {
|
||||||
|
return JsonUtils.toJsonObject(getDownloader().postWithContentTypeJson(
|
||||||
|
REVIEWS_API_URL,
|
||||||
|
Collections.emptyMap(),
|
||||||
|
JsonWriter.string().object()
|
||||||
|
.value("tralbum_type", "t")
|
||||||
|
.value("tralbum_id", trackId)
|
||||||
|
.value("token", token)
|
||||||
|
.value("count", 7)
|
||||||
|
.array("exclude_fan_ids").end()
|
||||||
|
.end().done().getBytes(StandardCharsets.UTF_8)).responseBody());
|
||||||
|
} catch (final IOException | ReCaptchaException e) {
|
||||||
|
throw new ParsingException("Could not fetch reviews", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private String getNextPageToken(final JsonArray reviews) throws ParsingException {
|
||||||
|
return reviews.stream()
|
||||||
|
.filter(JsonObject.class::isInstance)
|
||||||
|
.map(JsonObject.class::cast)
|
||||||
|
.map(review -> review.getString("token"))
|
||||||
|
.reduce((a, b) -> b) // keep only the last element
|
||||||
|
.orElseThrow(() -> new ParsingException("Could not get token"));
|
||||||
|
}
|
||||||
|
|
||||||
|
private String getTrackId() throws ParsingException {
|
||||||
|
final JsonObject pageProperties = JsonUtils.toJsonObject(
|
||||||
|
document.selectFirst("meta[name=bc-page-properties]")
|
||||||
|
.attr("content"));
|
||||||
|
return Long.toString(pageProperties.getLong("item_id"));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -1,19 +1,20 @@
|
||||||
package org.schabi.newpipe.extractor.services.bandcamp.extractors;
|
package org.schabi.newpipe.extractor.services.bandcamp.extractors;
|
||||||
|
|
||||||
import org.jsoup.nodes.Element;
|
import static org.schabi.newpipe.extractor.services.bandcamp.extractors.BandcampExtractorHelper.getImageUrl;
|
||||||
|
|
||||||
|
import com.grack.nanojson.JsonObject;
|
||||||
|
|
||||||
import org.schabi.newpipe.extractor.comments.CommentsInfoItemExtractor;
|
import org.schabi.newpipe.extractor.comments.CommentsInfoItemExtractor;
|
||||||
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
||||||
import org.schabi.newpipe.extractor.stream.Description;
|
import org.schabi.newpipe.extractor.stream.Description;
|
||||||
|
|
||||||
import java.util.Objects;
|
|
||||||
|
|
||||||
public class BandcampCommentsInfoItemExtractor implements CommentsInfoItemExtractor {
|
public class BandcampCommentsInfoItemExtractor implements CommentsInfoItemExtractor {
|
||||||
|
|
||||||
private final Element writing;
|
private final JsonObject review;
|
||||||
private final String url;
|
private final String url;
|
||||||
|
|
||||||
public BandcampCommentsInfoItemExtractor(final Element writing, final String url) {
|
public BandcampCommentsInfoItemExtractor(final JsonObject review, final String url) {
|
||||||
this.writing = writing;
|
this.review = review;
|
||||||
this.url = url;
|
this.url = url;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -29,31 +30,21 @@ public class BandcampCommentsInfoItemExtractor implements CommentsInfoItemExtrac
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getThumbnailUrl() throws ParsingException {
|
public String getThumbnailUrl() throws ParsingException {
|
||||||
return writing.getElementsByClass("thumb").attr("src");
|
return getUploaderAvatarUrl();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Description getCommentText() throws ParsingException {
|
public Description getCommentText() throws ParsingException {
|
||||||
final var text = writing.getElementsByClass("text").stream()
|
return new Description(review.getString("why"), Description.PLAIN_TEXT);
|
||||||
.filter(Objects::nonNull)
|
|
||||||
.map(Element::ownText)
|
|
||||||
.findFirst()
|
|
||||||
.orElseThrow(() -> new ParsingException("Could not get comment text"));
|
|
||||||
|
|
||||||
return new Description(text, Description.PLAIN_TEXT);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getUploaderName() throws ParsingException {
|
public String getUploaderName() throws ParsingException {
|
||||||
return writing.getElementsByClass("name").stream()
|
return review.getString("name");
|
||||||
.filter(Objects::nonNull)
|
|
||||||
.map(Element::text)
|
|
||||||
.findFirst()
|
|
||||||
.orElseThrow(() -> new ParsingException("Could not get uploader name"));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getUploaderAvatarUrl() {
|
public String getUploaderAvatarUrl() {
|
||||||
return writing.getElementsByClass("thumb").attr("src");
|
return getImageUrl(review.getLong("image_id"), false);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -37,6 +37,7 @@ public class BandcampCommentsExtractorTest {
|
||||||
@Test
|
@Test
|
||||||
public void testGetCommentsAllData() throws IOException, ExtractionException {
|
public void testGetCommentsAllData() throws IOException, ExtractionException {
|
||||||
ListExtractor.InfoItemsPage<CommentsInfoItem> comments = extractor.getInitialPage();
|
ListExtractor.InfoItemsPage<CommentsInfoItem> comments = extractor.getInitialPage();
|
||||||
|
assertTrue(comments.hasNextPage());
|
||||||
|
|
||||||
DefaultTests.defaultTestListOfItems(Bandcamp, comments.getItems(), comments.getErrors());
|
DefaultTests.defaultTestListOfItems(Bandcamp, comments.getItems(), comments.getErrors());
|
||||||
for (CommentsInfoItem c : comments.getItems()) {
|
for (CommentsInfoItem c : comments.getItems()) {
|
||||||
|
|
Loading…
Reference in New Issue