Merge pull request #794 from FireMasterK/comments-count
[YouTube] Add support to extract total comment count
This commit is contained in:
commit
c1040bccac
|
@ -22,6 +22,13 @@ public abstract class CommentsExtractor extends ListExtractor<CommentsInfoItem>
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return the total number of comments
|
||||||
|
*/
|
||||||
|
public int getCommentsCount() throws ExtractionException {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
@Nonnull
|
@Nonnull
|
||||||
@Override
|
@Override
|
||||||
public String getName() throws ParsingException {
|
public String getName() throws ParsingException {
|
||||||
|
|
|
@ -48,6 +48,11 @@ public final class CommentsInfo extends ListInfo<CommentsInfoItem> {
|
||||||
ExtractorHelper.getItemsPageOrLogError(commentsInfo, commentsExtractor);
|
ExtractorHelper.getItemsPageOrLogError(commentsInfo, commentsExtractor);
|
||||||
commentsInfo.setCommentsDisabled(commentsExtractor.isCommentsDisabled());
|
commentsInfo.setCommentsDisabled(commentsExtractor.isCommentsDisabled());
|
||||||
commentsInfo.setRelatedItems(initialCommentsPage.getItems());
|
commentsInfo.setRelatedItems(initialCommentsPage.getItems());
|
||||||
|
try {
|
||||||
|
commentsInfo.setCommentsCount(commentsExtractor.getCommentsCount());
|
||||||
|
} catch (final Exception e) {
|
||||||
|
commentsInfo.addError(e);
|
||||||
|
}
|
||||||
commentsInfo.setNextPage(initialCommentsPage.getNextPage());
|
commentsInfo.setNextPage(initialCommentsPage.getNextPage());
|
||||||
|
|
||||||
return commentsInfo;
|
return commentsInfo;
|
||||||
|
@ -76,6 +81,7 @@ public final class CommentsInfo extends ListInfo<CommentsInfoItem> {
|
||||||
|
|
||||||
private transient CommentsExtractor commentsExtractor;
|
private transient CommentsExtractor commentsExtractor;
|
||||||
private boolean commentsDisabled = false;
|
private boolean commentsDisabled = false;
|
||||||
|
private int commentsCount;
|
||||||
|
|
||||||
public CommentsExtractor getCommentsExtractor() {
|
public CommentsExtractor getCommentsExtractor() {
|
||||||
return commentsExtractor;
|
return commentsExtractor;
|
||||||
|
@ -86,7 +92,6 @@ public final class CommentsInfo extends ListInfo<CommentsInfoItem> {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @apiNote Warning: This method is experimental and may get removed in a future release.
|
|
||||||
* @return {@code true} if the comments are disabled otherwise {@code false} (default)
|
* @return {@code true} if the comments are disabled otherwise {@code false} (default)
|
||||||
* @see CommentsExtractor#isCommentsDisabled()
|
* @see CommentsExtractor#isCommentsDisabled()
|
||||||
*/
|
*/
|
||||||
|
@ -95,10 +100,27 @@ public final class CommentsInfo extends ListInfo<CommentsInfoItem> {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @apiNote Warning: This method is experimental and may get removed in a future release.
|
|
||||||
* @param commentsDisabled {@code true} if the comments are disabled otherwise {@code false}
|
* @param commentsDisabled {@code true} if the comments are disabled otherwise {@code false}
|
||||||
*/
|
*/
|
||||||
public void setCommentsDisabled(final boolean commentsDisabled) {
|
public void setCommentsDisabled(final boolean commentsDisabled) {
|
||||||
this.commentsDisabled = commentsDisabled;
|
this.commentsDisabled = commentsDisabled;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the total number of comments.
|
||||||
|
*
|
||||||
|
* @return the total number of comments
|
||||||
|
*/
|
||||||
|
public int getCommentsCount() {
|
||||||
|
return commentsCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets the total number of comments.
|
||||||
|
*
|
||||||
|
* @param commentsCount the commentsCount to set.
|
||||||
|
*/
|
||||||
|
public void setCommentsCount(final int commentsCount) {
|
||||||
|
this.commentsCount = commentsCount;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,18 +1,8 @@
|
||||||
package org.schabi.newpipe.extractor.services.youtube.extractors;
|
package org.schabi.newpipe.extractor.services.youtube.extractors;
|
||||||
|
|
||||||
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getJsonPostResponse;
|
import com.grack.nanojson.JsonArray;
|
||||||
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.prepareDesktopJsonBuilder;
|
import com.grack.nanojson.JsonObject;
|
||||||
import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty;
|
import com.grack.nanojson.JsonWriter;
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.nio.charset.StandardCharsets;
|
|
||||||
import java.util.Collections;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Optional;
|
|
||||||
|
|
||||||
import javax.annotation.Nonnull;
|
|
||||||
import javax.annotation.Nullable;
|
|
||||||
|
|
||||||
import org.schabi.newpipe.extractor.Page;
|
import org.schabi.newpipe.extractor.Page;
|
||||||
import org.schabi.newpipe.extractor.StreamingService;
|
import org.schabi.newpipe.extractor.StreamingService;
|
||||||
import org.schabi.newpipe.extractor.comments.CommentsExtractor;
|
import org.schabi.newpipe.extractor.comments.CommentsExtractor;
|
||||||
|
@ -24,26 +14,31 @@ import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
||||||
import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler;
|
import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler;
|
||||||
import org.schabi.newpipe.extractor.localization.Localization;
|
import org.schabi.newpipe.extractor.localization.Localization;
|
||||||
import org.schabi.newpipe.extractor.utils.JsonUtils;
|
import org.schabi.newpipe.extractor.utils.JsonUtils;
|
||||||
|
import org.schabi.newpipe.extractor.utils.Utils;
|
||||||
|
|
||||||
import com.grack.nanojson.JsonArray;
|
import javax.annotation.Nonnull;
|
||||||
import com.grack.nanojson.JsonObject;
|
import javax.annotation.Nullable;
|
||||||
import com.grack.nanojson.JsonWriter;
|
import java.io.IOException;
|
||||||
|
import java.nio.charset.StandardCharsets;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getJsonPostResponse;
|
||||||
|
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getTextFromObject;
|
||||||
|
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.prepareDesktopJsonBuilder;
|
||||||
|
import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty;
|
||||||
|
|
||||||
public class YoutubeCommentsExtractor extends CommentsExtractor {
|
public class YoutubeCommentsExtractor extends CommentsExtractor {
|
||||||
|
|
||||||
private JsonObject nextResponse;
|
/**
|
||||||
|
* Whether comments are disabled on video.
|
||||||
|
*/
|
||||||
|
private boolean commentsDisabled;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Caching mechanism and holder of the commentsDisabled value.
|
* The second ajax <b>/next</b> response.
|
||||||
* <br/>
|
|
||||||
* Initial value = empty -> unknown if comments are disabled or not<br/>
|
|
||||||
* Some method calls {@link #findInitialCommentsToken()}
|
|
||||||
* -> value is set<br/>
|
|
||||||
* If the method or another one that is depending on disabled comments
|
|
||||||
* is now called again, the method execution can avoid unnecessary calls
|
|
||||||
*/
|
*/
|
||||||
@SuppressWarnings("OptionalUsedAsFieldOrParameterType")
|
private JsonObject ajaxJson;
|
||||||
private Optional<Boolean> optCommentsDisabled = Optional.empty();
|
|
||||||
|
|
||||||
public YoutubeCommentsExtractor(
|
public YoutubeCommentsExtractor(
|
||||||
final StreamingService service,
|
final StreamingService service,
|
||||||
|
@ -56,32 +51,25 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {
|
||||||
public InfoItemsPage<CommentsInfoItem> getInitialPage()
|
public InfoItemsPage<CommentsInfoItem> getInitialPage()
|
||||||
throws IOException, ExtractionException {
|
throws IOException, ExtractionException {
|
||||||
|
|
||||||
// Check if findInitialCommentsToken was already called and optCommentsDisabled initialized
|
if (commentsDisabled) {
|
||||||
if (optCommentsDisabled.orElse(false)) {
|
|
||||||
return getInfoItemsPageForDisabledComments();
|
return getInfoItemsPageForDisabledComments();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Get the token
|
return extractComments(ajaxJson);
|
||||||
final String commentsToken = findInitialCommentsToken();
|
|
||||||
// Check if the comments have been disabled
|
|
||||||
if (optCommentsDisabled.get()) {
|
|
||||||
return getInfoItemsPageForDisabledComments();
|
|
||||||
}
|
|
||||||
|
|
||||||
return getPage(getNextPage(commentsToken));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Finds the initial comments token and initializes commentsDisabled.
|
* Finds the initial comments token and initializes commentsDisabled.
|
||||||
* <br/>
|
* <br/>
|
||||||
* Also sets {@link #optCommentsDisabled}.
|
* Also sets {@link #commentsDisabled}.
|
||||||
*
|
*
|
||||||
* @return the continuation token or null if none was found
|
* @return the continuation token or null if none was found
|
||||||
*/
|
*/
|
||||||
@Nullable
|
@Nullable
|
||||||
private String findInitialCommentsToken() throws ExtractionException {
|
private String findInitialCommentsToken(final JsonObject nextResponse)
|
||||||
|
throws ExtractionException {
|
||||||
final String token = JsonUtils.getArray(nextResponse,
|
final String token = JsonUtils.getArray(nextResponse,
|
||||||
"contents.twoColumnWatchNextResults.results.results.contents")
|
"contents.twoColumnWatchNextResults.results.results.contents")
|
||||||
.stream()
|
.stream()
|
||||||
// Only use JsonObjects
|
// Only use JsonObjects
|
||||||
.filter(JsonObject.class::isInstance)
|
.filter(JsonObject.class::isInstance)
|
||||||
|
@ -112,7 +100,7 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {
|
||||||
.orElse(null);
|
.orElse(null);
|
||||||
|
|
||||||
// The comments are disabled if we couldn't get a token
|
// The comments are disabled if we couldn't get a token
|
||||||
optCommentsDisabled = Optional.of(token == null);
|
commentsDisabled = token == null;
|
||||||
|
|
||||||
return token;
|
return token;
|
||||||
}
|
}
|
||||||
|
@ -123,9 +111,9 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Nullable
|
@Nullable
|
||||||
private Page getNextPage(@Nonnull final JsonObject ajaxJson) throws ExtractionException {
|
private Page getNextPage(@Nonnull final JsonObject jsonObject) throws ExtractionException {
|
||||||
final JsonArray onResponseReceivedEndpoints =
|
final JsonArray onResponseReceivedEndpoints =
|
||||||
ajaxJson.getArray("onResponseReceivedEndpoints");
|
jsonObject.getArray("onResponseReceivedEndpoints");
|
||||||
|
|
||||||
// Prevent ArrayIndexOutOfBoundsException
|
// Prevent ArrayIndexOutOfBoundsException
|
||||||
if (onResponseReceivedEndpoints.isEmpty()) {
|
if (onResponseReceivedEndpoints.isEmpty()) {
|
||||||
|
@ -173,30 +161,39 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {
|
||||||
@Override
|
@Override
|
||||||
public InfoItemsPage<CommentsInfoItem> getPage(final Page page)
|
public InfoItemsPage<CommentsInfoItem> getPage(final Page page)
|
||||||
throws IOException, ExtractionException {
|
throws IOException, ExtractionException {
|
||||||
if (optCommentsDisabled.orElse(false)) {
|
|
||||||
|
if (commentsDisabled) {
|
||||||
return getInfoItemsPageForDisabledComments();
|
return getInfoItemsPageForDisabledComments();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (page == null || isNullOrEmpty(page.getId())) {
|
if (page == null || isNullOrEmpty(page.getId())) {
|
||||||
throw new IllegalArgumentException("Page doesn't have the continuation.");
|
throw new IllegalArgumentException("Page doesn't have the continuation.");
|
||||||
}
|
}
|
||||||
|
|
||||||
final Localization localization = getExtractorLocalization();
|
final Localization localization = getExtractorLocalization();
|
||||||
|
// @formatter:off
|
||||||
final byte[] body = JsonWriter.string(
|
final byte[] body = JsonWriter.string(
|
||||||
prepareDesktopJsonBuilder(localization, getExtractorContentCountry())
|
prepareDesktopJsonBuilder(localization, getExtractorContentCountry())
|
||||||
.value("continuation", page.getId())
|
.value("continuation", page.getId())
|
||||||
.done())
|
.done())
|
||||||
.getBytes(StandardCharsets.UTF_8);
|
.getBytes(StandardCharsets.UTF_8);
|
||||||
|
// @formatter:on
|
||||||
|
|
||||||
final JsonObject ajaxJson = getJsonPostResponse("next", body, localization);
|
final var jsonObject = getJsonPostResponse("next", body, localization);
|
||||||
|
|
||||||
final CommentsInfoItemsCollector collector = new CommentsInfoItemsCollector(
|
return extractComments(jsonObject);
|
||||||
getServiceId());
|
|
||||||
collectCommentsFrom(collector, ajaxJson);
|
|
||||||
return new InfoItemsPage<>(collector, getNextPage(ajaxJson));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private void collectCommentsFrom(final CommentsInfoItemsCollector collector,
|
private InfoItemsPage<CommentsInfoItem> extractComments(final JsonObject jsonObject)
|
||||||
@Nonnull final JsonObject ajaxJson) throws ParsingException {
|
throws ExtractionException {
|
||||||
|
final CommentsInfoItemsCollector collector = new CommentsInfoItemsCollector(
|
||||||
|
getServiceId());
|
||||||
|
collectCommentsFrom(collector);
|
||||||
|
return new InfoItemsPage<>(collector, getNextPage(jsonObject));
|
||||||
|
}
|
||||||
|
|
||||||
|
private void collectCommentsFrom(final CommentsInfoItemsCollector collector)
|
||||||
|
throws ParsingException {
|
||||||
|
|
||||||
final JsonArray onResponseReceivedEndpoints =
|
final JsonArray onResponseReceivedEndpoints =
|
||||||
ajaxJson.getArray("onResponseReceivedEndpoints");
|
ajaxJson.getArray("onResponseReceivedEndpoints");
|
||||||
|
@ -254,24 +251,59 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {
|
||||||
public void onFetchPage(@Nonnull final Downloader downloader)
|
public void onFetchPage(@Nonnull final Downloader downloader)
|
||||||
throws IOException, ExtractionException {
|
throws IOException, ExtractionException {
|
||||||
final Localization localization = getExtractorLocalization();
|
final Localization localization = getExtractorLocalization();
|
||||||
|
// @formatter:off
|
||||||
final byte[] body = JsonWriter.string(
|
final byte[] body = JsonWriter.string(
|
||||||
prepareDesktopJsonBuilder(localization, getExtractorContentCountry())
|
prepareDesktopJsonBuilder(localization, getExtractorContentCountry())
|
||||||
.value("videoId", getId())
|
.value("videoId", getId())
|
||||||
.done())
|
.done())
|
||||||
.getBytes(StandardCharsets.UTF_8);
|
.getBytes(StandardCharsets.UTF_8);
|
||||||
|
// @formatter:on
|
||||||
|
|
||||||
nextResponse = getJsonPostResponse("next", body, localization);
|
final String initialToken =
|
||||||
|
findInitialCommentsToken(getJsonPostResponse("next", body, localization));
|
||||||
|
|
||||||
|
if (initialToken == null) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// @formatter:off
|
||||||
|
final byte[] ajaxBody = JsonWriter.string(
|
||||||
|
prepareDesktopJsonBuilder(localization, getExtractorContentCountry())
|
||||||
|
.value("continuation", initialToken)
|
||||||
|
.done())
|
||||||
|
.getBytes(StandardCharsets.UTF_8);
|
||||||
|
// @formatter:on
|
||||||
|
|
||||||
|
ajaxJson = getJsonPostResponse("next", ajaxBody, localization);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean isCommentsDisabled() throws ExtractionException {
|
public boolean isCommentsDisabled() {
|
||||||
// Check if commentsDisabled has to be initialized
|
return commentsDisabled;
|
||||||
if (!optCommentsDisabled.isPresent()) {
|
}
|
||||||
// Initialize commentsDisabled
|
|
||||||
this.findInitialCommentsToken();
|
@Override
|
||||||
|
public int getCommentsCount() throws ExtractionException {
|
||||||
|
assertPageFetched();
|
||||||
|
|
||||||
|
if (commentsDisabled) {
|
||||||
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
return optCommentsDisabled.get();
|
final JsonObject countText = ajaxJson
|
||||||
|
.getArray("onResponseReceivedEndpoints").getObject(0)
|
||||||
|
.getObject("reloadContinuationItemsCommand")
|
||||||
|
.getArray("continuationItems").getObject(0)
|
||||||
|
.getObject("commentsHeaderRenderer")
|
||||||
|
.getObject("countText");
|
||||||
|
|
||||||
|
try {
|
||||||
|
return Integer.parseInt(
|
||||||
|
Utils.removeNonDigitCharacters(getTextFromObject(countText))
|
||||||
|
);
|
||||||
|
} catch (final Exception e) {
|
||||||
|
throw new ExtractionException("Unable to get comments count", e);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -89,6 +89,7 @@ public class YoutubeCommentsExtractorTest {
|
||||||
@Test
|
@Test
|
||||||
public void testGetCommentsAllData() throws IOException, ExtractionException {
|
public void testGetCommentsAllData() throws IOException, ExtractionException {
|
||||||
InfoItemsPage<CommentsInfoItem> comments = extractor.getInitialPage();
|
InfoItemsPage<CommentsInfoItem> comments = extractor.getInitialPage();
|
||||||
|
assertTrue(extractor.getCommentsCount() > 5); // at least 5 comments
|
||||||
|
|
||||||
DefaultTests.defaultTestListOfItems(YouTube, comments.getItems(), comments.getErrors());
|
DefaultTests.defaultTestListOfItems(YouTube, comments.getItems(), comments.getErrors());
|
||||||
for (CommentsInfoItem c : comments.getItems()) {
|
for (CommentsInfoItem c : comments.getItems()) {
|
||||||
|
@ -344,6 +345,11 @@ public class YoutubeCommentsExtractorTest {
|
||||||
assertNotEquals(UNKNOWN_REPLY_COUNT, firstComment.getReplyCount(), "Could not get the reply count of the first comment");
|
assertNotEquals(UNKNOWN_REPLY_COUNT, firstComment.getReplyCount(), "Could not get the reply count of the first comment");
|
||||||
assertGreater(300, firstComment.getReplyCount());
|
assertGreater(300, firstComment.getReplyCount());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testCommentsCount() throws IOException, ExtractionException {
|
||||||
|
assertTrue(extractor.getCommentsCount() > 18800);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public static class FormattingTest {
|
public static class FormattingTest {
|
||||||
|
|
Loading…
Reference in New Issue