From 0fb73301e38c72d0e0b742deab5e6d9da4cde07c Mon Sep 17 00:00:00 2001 From: TobiGr Date: Wed, 5 Aug 2020 18:25:35 +0200 Subject: [PATCH] [YouTube] Fix crash on empty comment Closes #380 --- .../extractors/YoutubeCommentsExtractor.java | 16 +- .../YoutubeCommentsInfoItemExtractor.java | 10 +- .../youtube/YoutubeCommentsExtractorTest.java | 197 +++++++++++------- 3 files changed, 139 insertions(+), 84 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java index 287e74212..bd2af3c67 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java @@ -47,13 +47,13 @@ public class YoutubeCommentsExtractor extends CommentsExtractor { @Override public InfoItemsPage getInitialPage() throws IOException, ExtractionException { - String commentsTokenInside = findValue(responseBody, "commentSectionRenderer", "}"); - String commentsToken = findValue(commentsTokenInside, "continuation\":\"", "\""); + final String commentsTokenInside = findValue(responseBody, "commentSectionRenderer", "}"); + final String commentsToken = findValue(commentsTokenInside, "continuation\":\"", "\""); return getPage(getNextPage(commentsToken)); } private Page getNextPage(JsonObject ajaxJson) throws ParsingException { - JsonArray arr; + final JsonArray arr; try { arr = JsonUtils.getArray(ajaxJson, "response.continuationContents.commentSectionContinuation.continuations"); } catch (Exception e) { @@ -89,14 +89,14 @@ public class YoutubeCommentsExtractor extends CommentsExtractor { throw new IllegalArgumentException("Page doesn't contain an URL"); } - String ajaxResponse = makeAjaxRequest(page.getUrl()); - JsonObject ajaxJson; + final String ajaxResponse = makeAjaxRequest(page.getUrl()); + final JsonObject ajaxJson; try { ajaxJson = JsonParser.array().from(ajaxResponse).getObject(1); } catch (Exception e) { throw new ParsingException("Could not parse json data for comments", e); } - CommentsInfoItemsCollector collector = new CommentsInfoItemsCollector(getServiceId()); + final CommentsInfoItemsCollector collector = new CommentsInfoItemsCollector(getServiceId()); collectCommentsFrom(collector, ajaxJson); return new InfoItemsPage<>(collector, getNextPage(ajaxJson)); } @@ -160,8 +160,8 @@ public class YoutubeCommentsExtractor extends CommentsExtractor { } private String findValue(String doc, String start, String end) { - int beginIndex = doc.indexOf(start) + start.length(); - int endIndex = doc.indexOf(end, beginIndex); + final int beginIndex = doc.indexOf(start) + start.length(); + final int endIndex = doc.indexOf(end, beginIndex); return doc.substring(beginIndex, endIndex); } } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsInfoItemExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsInfoItemExtractor.java index 913022440..d960bc2ec 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsInfoItemExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsInfoItemExtractor.java @@ -34,7 +34,7 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract @Override public String getThumbnailUrl() throws ParsingException { try { - JsonArray arr = JsonUtils.getArray(json, "authorThumbnail.thumbnails"); + final JsonArray arr = JsonUtils.getArray(json, "authorThumbnail.thumbnails"); return JsonUtils.getString(arr.getObject(2), "url"); } catch (Exception e) { throw new ParsingException("Could not get thumbnail url", e); @@ -82,7 +82,13 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract @Override public String getCommentText() throws ParsingException { try { - String commentText = getTextFromObject(JsonUtils.getObject(json, "contentText")); + final JsonObject contentText = JsonUtils.getObject(json, "contentText"); + if (contentText.isEmpty()) { + // completely empty comments as described in + // https://github.com/TeamNewPipe/NewPipeExtractor/issues/380#issuecomment-668808584 + return ""; + } + final String commentText = getTextFromObject(contentText); // youtube adds U+FEFF in some comments. eg. https://www.youtube.com/watch?v=Nj4F63E59io return Utils.removeUTF8BOM(commentText); } catch (Exception e) { diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeCommentsExtractorTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeCommentsExtractorTest.java index bb2b17bea..b43dce677 100644 --- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeCommentsExtractorTest.java +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeCommentsExtractorTest.java @@ -23,91 +23,140 @@ import static org.junit.Assert.assertTrue; import static org.schabi.newpipe.extractor.ServiceList.YouTube; public class YoutubeCommentsExtractorTest { - private static final String urlYT = "https://www.youtube.com/watch?v=D00Au7k3i6o"; - private static final String urlInvidious = "https://invidio.us/watch?v=D00Au7k3i6o"; - private static YoutubeCommentsExtractor extractorYT; - private static YoutubeCommentsExtractor extractorInvidious; + /** + * Test a "normal" YouTube and Invidious page + */ + public static class Thomas { + private static final String urlYT = "https://www.youtube.com/watch?v=D00Au7k3i6o"; + private static final String urlInvidious = "https://invidio.us/watch?v=D00Au7k3i6o"; + private static YoutubeCommentsExtractor extractorYT; + private static YoutubeCommentsExtractor extractorInvidious; - @BeforeClass - public static void setUp() throws Exception { - NewPipe.init(DownloaderTestImpl.getInstance()); - extractorYT = (YoutubeCommentsExtractor) YouTube - .getCommentsExtractor(urlYT); - extractorYT.fetchPage(); - extractorInvidious = (YoutubeCommentsExtractor) YouTube - .getCommentsExtractor(urlInvidious); - } + private static final String commentContent = "sub 4 sub"; - @Test - public void testGetComments() throws IOException, ExtractionException { - assertTrue(getCommentsHelper(extractorYT)); - assertTrue(getCommentsHelper(extractorInvidious)); - } - - private boolean getCommentsHelper(YoutubeCommentsExtractor extractor) throws IOException, ExtractionException { - InfoItemsPage comments = extractor.getInitialPage(); - boolean result = findInComments(comments, "s1ck m3m3"); - - while (comments.hasNextPage() && !result) { - comments = extractor.getPage(comments.getNextPage()); - result = findInComments(comments, "s1ck m3m3"); + @BeforeClass + public static void setUp() throws Exception { + NewPipe.init(DownloaderTestImpl.getInstance()); + extractorYT = (YoutubeCommentsExtractor) YouTube + .getCommentsExtractor(urlYT); + extractorYT.fetchPage(); + extractorInvidious = (YoutubeCommentsExtractor) YouTube + .getCommentsExtractor(urlInvidious); + extractorInvidious.fetchPage(); } - return result; - } - - @Test - public void testGetCommentsFromCommentsInfo() throws IOException, ExtractionException { - assertTrue(getCommentsFromCommentsInfoHelper(urlYT)); - assertTrue(getCommentsFromCommentsInfoHelper(urlInvidious)); - } - - private boolean getCommentsFromCommentsInfoHelper(String url) throws IOException, ExtractionException { - CommentsInfo commentsInfo = CommentsInfo.getInfo(url); - - assertEquals("Comments", commentsInfo.getName()); - boolean result = findInComments(commentsInfo.getRelatedItems(), "s1ck m3m3"); - - Page nextPage = commentsInfo.getNextPage(); - InfoItemsPage moreItems = new InfoItemsPage<>(null, nextPage, null); - while (moreItems.hasNextPage() && !result) { - moreItems = CommentsInfo.getMoreItems(YouTube, commentsInfo, nextPage); - result = findInComments(moreItems.getItems(), "s1ck m3m3"); - nextPage = moreItems.getNextPage(); + @Test + public void testGetComments() throws IOException, ExtractionException { + assertTrue(getCommentsHelper(extractorYT)); + assertTrue(getCommentsHelper(extractorInvidious)); } - return result; - } - @Test - public void testGetCommentsAllData() throws IOException, ExtractionException { - InfoItemsPage comments = extractorYT.getInitialPage(); + private boolean getCommentsHelper(YoutubeCommentsExtractor extractor) throws IOException, ExtractionException { + InfoItemsPage comments = extractor.getInitialPage(); + boolean result = findInComments(comments, commentContent); - DefaultTests.defaultTestListOfItems(YouTube, comments.getItems(), comments.getErrors()); - for (CommentsInfoItem c : comments.getItems()) { - assertFalse(Utils.isBlank(c.getUploaderUrl())); - assertFalse(Utils.isBlank(c.getUploaderName())); - assertFalse(Utils.isBlank(c.getUploaderAvatarUrl())); - assertFalse(Utils.isBlank(c.getCommentId())); - assertFalse(Utils.isBlank(c.getCommentText())); - assertFalse(Utils.isBlank(c.getName())); - assertFalse(Utils.isBlank(c.getTextualUploadDate())); - assertNotNull(c.getUploadDate()); - assertFalse(Utils.isBlank(c.getThumbnailUrl())); - assertFalse(Utils.isBlank(c.getUrl())); - assertFalse(c.getLikeCount() < 0); + while (comments.hasNextPage() && !result) { + comments = extractor.getPage(comments.getNextPage()); + result = findInComments(comments, commentContent); + } + + return result; } - } - private boolean findInComments(InfoItemsPage comments, String comment) { - return findInComments(comments.getItems(), comment); - } + @Test + public void testGetCommentsFromCommentsInfo() throws IOException, ExtractionException { + assertTrue(getCommentsFromCommentsInfoHelper(urlYT)); + assertTrue(getCommentsFromCommentsInfoHelper(urlInvidious)); + } - private boolean findInComments(List comments, String comment) { - for (CommentsInfoItem c : comments) { - if (c.getCommentText().contains(comment)) { - return true; + private boolean getCommentsFromCommentsInfoHelper(String url) throws IOException, ExtractionException { + final CommentsInfo commentsInfo = CommentsInfo.getInfo(url); + + assertEquals("Comments", commentsInfo.getName()); + boolean result = findInComments(commentsInfo.getRelatedItems(), commentContent); + + Page nextPage = commentsInfo.getNextPage(); + InfoItemsPage moreItems = new InfoItemsPage<>(null, nextPage, null); + while (moreItems.hasNextPage() && !result) { + moreItems = CommentsInfo.getMoreItems(YouTube, commentsInfo, nextPage); + result = findInComments(moreItems.getItems(), commentContent); + nextPage = moreItems.getNextPage(); + } + return result; + } + + @Test + public void testGetCommentsAllData() throws IOException, ExtractionException { + InfoItemsPage comments = extractorYT.getInitialPage(); + + DefaultTests.defaultTestListOfItems(YouTube, comments.getItems(), comments.getErrors()); + for (CommentsInfoItem c : comments.getItems()) { + assertFalse(Utils.isBlank(c.getUploaderUrl())); + assertFalse(Utils.isBlank(c.getUploaderName())); + assertFalse(Utils.isBlank(c.getUploaderAvatarUrl())); + assertFalse(Utils.isBlank(c.getCommentId())); + assertFalse(Utils.isBlank(c.getCommentText())); + assertFalse(Utils.isBlank(c.getName())); + assertFalse(Utils.isBlank(c.getTextualUploadDate())); + assertNotNull(c.getUploadDate()); + assertFalse(Utils.isBlank(c.getThumbnailUrl())); + assertFalse(Utils.isBlank(c.getUrl())); + assertFalse(c.getLikeCount() < 0); } } - return false; + + private boolean findInComments(InfoItemsPage comments, String comment) { + return findInComments(comments.getItems(), comment); + } + + private boolean findInComments(List comments, String comment) { + for (CommentsInfoItem c : comments) { + if (c.getCommentText().contains(comment)) { + return true; + } + } + return false; + } + } + + /** + * Test a video with an empty comment + */ + public static class EmptyComment { + private static YoutubeCommentsExtractor extractor; + private final static String url = "https://www.youtube.com/watch?v=VM_6n762j6M"; + + @BeforeClass + public static void setUp() throws Exception { + NewPipe.init(DownloaderTestImpl.getInstance()); + extractor = (YoutubeCommentsExtractor) YouTube + .getCommentsExtractor(url); + extractor.fetchPage(); + } + + @Test + public void testGetCommentsAllData() throws IOException, ExtractionException { + final InfoItemsPage comments = extractor.getInitialPage(); + + DefaultTests.defaultTestListOfItems(YouTube, comments.getItems(), comments.getErrors()); + for (CommentsInfoItem c : comments.getItems()) { + assertFalse(Utils.isBlank(c.getUploaderUrl())); + assertFalse(Utils.isBlank(c.getUploaderName())); + assertFalse(Utils.isBlank(c.getUploaderAvatarUrl())); + assertFalse(Utils.isBlank(c.getCommentId())); + assertFalse(Utils.isBlank(c.getName())); + assertFalse(Utils.isBlank(c.getTextualUploadDate())); + assertNotNull(c.getUploadDate()); + assertFalse(Utils.isBlank(c.getThumbnailUrl())); + assertFalse(Utils.isBlank(c.getUrl())); + assertFalse(c.getLikeCount() < 0); + if (c.getCommentId().equals("Ugga_h1-EXdHB3gCoAEC")) { // comment without text + assertTrue(Utils.isBlank(c.getCommentText())); + } else { + assertFalse(Utils.isBlank(c.getCommentText())); + } + } + } + } }