Merge pull request #703 from FireMasterK/comment-replies

Add support for extracting comment replies continuation
This commit is contained in:
Tobi 2021-09-14 23:58:14 +02:00 committed by GitHub
commit a9d214478d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 1275 additions and 30 deletions

View File

@ -1,6 +1,7 @@
package org.schabi.newpipe.extractor.comments;
import org.schabi.newpipe.extractor.InfoItem;
import org.schabi.newpipe.extractor.Page;
import org.schabi.newpipe.extractor.localization.DateWrapper;
import javax.annotation.Nullable;
@ -21,6 +22,8 @@ public class CommentsInfoItem extends InfoItem {
private boolean heartedByUploader;
private boolean pinned;
private int streamPosition;
@Nullable
private Page replies;
public static final int NO_LIKE_COUNT = -1;
public static final int NO_STREAM_POSITION = -1;
@ -142,4 +145,8 @@ public class CommentsInfoItem extends InfoItem {
public int getStreamPosition() {
return streamPosition;
}
public void setReplies(@Nullable Page replies) { this.replies = replies; }
public Page getReplies() { return this.replies; }
}

View File

@ -1,6 +1,7 @@
package org.schabi.newpipe.extractor.comments;
import org.schabi.newpipe.extractor.InfoItemExtractor;
import org.schabi.newpipe.extractor.Page;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.localization.DateWrapper;
import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeCommentsInfoItemExtractor;
@ -107,4 +108,13 @@ public interface CommentsInfoItemExtractor extends InfoItemExtractor {
default int getStreamPosition() throws ParsingException {
return CommentsInfoItem.NO_STREAM_POSITION;
}
/**
* The continuation page which is used to get comment replies from.
* @return the continuation Page for the replies, or null if replies are not supported
*/
@Nullable
default Page getReplies() throws ParsingException {
return null;
}
}

View File

@ -93,6 +93,12 @@ public class CommentsInfoItemsCollector extends InfoItemsCollector<CommentsInfoI
addError(e);
}
try {
resultItem.setReplies(extractor.getReplies());
} catch (Exception e) {
addError(e);
}
return resultItem;
}
@ -106,12 +112,6 @@ public class CommentsInfoItemsCollector extends InfoItemsCollector<CommentsInfoI
}
public List<CommentsInfoItem> getCommentsInfoItemList() {
List<CommentsInfoItem> siiList = new ArrayList<>();
for (InfoItem ii : super.getItems()) {
if (ii instanceof CommentsInfoItem) {
siiList.add((CommentsInfoItem) ii);
}
}
return siiList;
return new ArrayList<>(super.getItems());
}
}

View File

@ -140,10 +140,13 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {
return null;
}
final JsonObject continuationItemRenderer = jsonArray.getObject(jsonArray.size() - 1).getObject("continuationItemRenderer");
final String jsonPath = continuationItemRenderer.has("button") ? "button.buttonRenderer.command.continuationCommand.token" : "continuationEndpoint.continuationCommand.token";
final String continuation;
try {
continuation = JsonUtils.getString(jsonArray.getObject(jsonArray.size() - 1),
"continuationItemRenderer.continuationEndpoint.continuationCommand.token");
continuation = JsonUtils.getString(continuationItemRenderer, jsonPath);
} catch (final Exception e) {
return null;
}
@ -212,10 +215,11 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {
contents.remove(index);
}
final String jsonKey = contents.getObject(0).has("commentThreadRenderer") ? "commentThreadRenderer" : "commentRenderer";
final List<Object> comments;
try {
comments = JsonUtils.getValues(contents,
"commentThreadRenderer.comment.commentRenderer");
comments = JsonUtils.getValues(contents, jsonKey);
} catch (final Exception e) {
throw new ParsingException("Unable to get parse youtube comments", e);
}

View File

@ -3,6 +3,8 @@ package org.schabi.newpipe.extractor.services.youtube.extractors;
import com.grack.nanojson.JsonArray;
import com.grack.nanojson.JsonObject;
import com.grack.nanojson.JsonWriter;
import org.schabi.newpipe.extractor.Page;
import org.schabi.newpipe.extractor.comments.CommentsInfoItemExtractor;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.localization.DateWrapper;
@ -18,6 +20,7 @@ import static org.schabi.newpipe.extractor.utils.Utils.EMPTY_STRING;
public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtractor {
private final JsonObject json;
private JsonObject commentRenderer;
private final String url;
private final TimeAgoParser timeAgoParser;
@ -29,6 +32,16 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract
this.timeAgoParser = timeAgoParser;
}
private JsonObject getCommentRenderer() throws ParsingException {
if(commentRenderer == null) {
if(!json.has("comment"))
commentRenderer = json;
else
commentRenderer = JsonUtils.getObject(json, "comment.commentRenderer");
}
return commentRenderer;
}
@Override
public String getUrl() throws ParsingException {
return url;
@ -37,7 +50,7 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract
@Override
public String getThumbnailUrl() throws ParsingException {
try {
final JsonArray arr = JsonUtils.getArray(json, "authorThumbnail.thumbnails");
final JsonArray arr = JsonUtils.getArray(getCommentRenderer(), "authorThumbnail.thumbnails");
return JsonUtils.getString(arr.getObject(2), "url");
} catch (final Exception e) {
throw new ParsingException("Could not get thumbnail url", e);
@ -47,7 +60,7 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract
@Override
public String getName() throws ParsingException {
try {
return getTextFromObject(JsonUtils.getObject(json, "authorText"));
return getTextFromObject(JsonUtils.getObject(getCommentRenderer(), "authorText"));
} catch (final Exception e) {
return EMPTY_STRING;
}
@ -56,7 +69,7 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract
@Override
public String getTextualUploadDate() throws ParsingException {
try {
return getTextFromObject(JsonUtils.getObject(json, "publishedTimeText"));
return getTextFromObject(JsonUtils.getObject(getCommentRenderer(), "publishedTimeText"));
} catch (final Exception e) {
throw new ParsingException("Could not get publishedTimeText", e);
}
@ -94,7 +107,7 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract
// Try first to get the exact like count by using the accessibility data
final String likeCount;
try {
likeCount = Utils.removeNonDigitCharacters(JsonUtils.getString(json,
likeCount = Utils.removeNonDigitCharacters(JsonUtils.getString(getCommentRenderer(),
"actionButtons.commentActionButtonsRenderer.likeButton.toggleButtonRenderer.accessibilityData.accessibilityData.label"));
} catch (final Exception e) {
// Use the approximate like count returned into the voteCount object
@ -145,11 +158,11 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract
*/
try {
// If a comment has no likes voteCount is not set
if (!json.has("voteCount")) {
if (!getCommentRenderer().has("voteCount")) {
return EMPTY_STRING;
}
final JsonObject voteCountObj = JsonUtils.getObject(json, "voteCount");
final JsonObject voteCountObj = JsonUtils.getObject(getCommentRenderer(), "voteCount");
if (voteCountObj.isEmpty()) {
return EMPTY_STRING;
}
@ -162,7 +175,7 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract
@Override
public String getCommentText() throws ParsingException {
try {
final JsonObject contentText = JsonUtils.getObject(json, "contentText");
final JsonObject contentText = JsonUtils.getObject(getCommentRenderer(), "contentText");
if (contentText.isEmpty()) {
// completely empty comments as described in
// https://github.com/TeamNewPipe/NewPipeExtractor/issues/380#issuecomment-668808584
@ -180,7 +193,7 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract
@Override
public String getCommentId() throws ParsingException {
try {
return JsonUtils.getString(json, "commentId");
return JsonUtils.getString(getCommentRenderer(), "commentId");
} catch (final Exception e) {
throw new ParsingException("Could not get comment id", e);
}
@ -189,7 +202,7 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract
@Override
public String getUploaderAvatarUrl() throws ParsingException {
try {
JsonArray arr = JsonUtils.getArray(json, "authorThumbnail.thumbnails");
JsonArray arr = JsonUtils.getArray(getCommentRenderer(), "authorThumbnail.thumbnails");
return JsonUtils.getString(arr.getObject(2), "url");
} catch (final Exception e) {
throw new ParsingException("Could not get author thumbnail", e);
@ -198,24 +211,24 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract
@Override
public boolean isHeartedByUploader() throws ParsingException {
final JsonObject commentActionButtonsRenderer = json.getObject("actionButtons")
final JsonObject commentActionButtonsRenderer = getCommentRenderer().getObject("actionButtons")
.getObject("commentActionButtonsRenderer");
return commentActionButtonsRenderer.has("creatorHeart");
}
@Override
public boolean isPinned() {
return json.has("pinnedCommentBadge");
public boolean isPinned() throws ParsingException {
return getCommentRenderer().has("pinnedCommentBadge");
}
public boolean isUploaderVerified() {
return json.has("authorCommentBadge");
public boolean isUploaderVerified() throws ParsingException {
return getCommentRenderer().has("authorCommentBadge");
}
@Override
public String getUploaderName() throws ParsingException {
try {
return getTextFromObject(JsonUtils.getObject(json, "authorText"));
return getTextFromObject(JsonUtils.getObject(getCommentRenderer(), "authorText"));
} catch (final Exception e) {
return EMPTY_STRING;
}
@ -224,10 +237,20 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract
@Override
public String getUploaderUrl() throws ParsingException {
try {
return "https://www.youtube.com/channel/" + JsonUtils.getString(json,
return "https://www.youtube.com/channel/" + JsonUtils.getString(getCommentRenderer(),
"authorEndpoint.browseEndpoint.browseId");
} catch (final Exception e) {
return EMPTY_STRING;
}
}
@Override
public Page getReplies() throws ParsingException {
try {
final String id = JsonUtils.getString(JsonUtils.getArray(json, "replies.commentRepliesRenderer.contents").getObject(0), "continuationItemRenderer.continuationEndpoint.continuationCommand.token");
return new Page(url, id);
} catch (final Exception e) {
return null; // Would return null for Comment Replies, since YouTube does not support nested replies.
}
}
}

View File

@ -306,4 +306,32 @@ public class YoutubeCommentsExtractorTest {
assertTrue("The first pinned comment has no vote count", !Utils.isBlank(pinnedComment.getTextualLikeCount()));
}
}
public static class RepliesTest {
private final static String url = "https://www.youtube.com/watch?v=--yeOvJGZQk";
private static YoutubeCommentsExtractor extractor;
@BeforeClass
public static void setUp() throws Exception {
YoutubeParsingHelper.resetClientVersionAndKey();
YoutubeParsingHelper.setNumberGenerator(new Random(1));
NewPipe.init(new DownloaderFactory().getDownloader(RESOURCE_PATH + "replies"));
extractor = (YoutubeCommentsExtractor) YouTube
.getCommentsExtractor(url);
extractor.fetchPage();
}
@Test
public void testGetCommentsFirstReplies() throws IOException, ExtractionException {
final InfoItemsPage<CommentsInfoItem> comments = extractor.getInitialPage();
DefaultTests.defaultTestListOfItems(YouTube, comments.getItems(), comments.getErrors());
CommentsInfoItem firstComment = comments.getItems().get(0);
InfoItemsPage<CommentsInfoItem> replies = extractor.getPage(firstComment.getReplies());
assertEquals("First reply comment did not match", "Lol", replies.getItems().get(0).getCommentText());
}
}
}