From 0fb73301e38c72d0e0b742deab5e6d9da4cde07c Mon Sep 17 00:00:00 2001
From: TobiGr <tobigr@users.noreply.github.com>
Date: Wed, 5 Aug 2020 18:25:35 +0200
Subject: [PATCH] [YouTube] Fix crash on empty  comment

Closes #380
---
 .../extractors/YoutubeCommentsExtractor.java  |  16 +-
 .../YoutubeCommentsInfoItemExtractor.java     |  10 +-
 .../youtube/YoutubeCommentsExtractorTest.java | 197 +++++++++++-------
 3 files changed, 139 insertions(+), 84 deletions(-)
diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java
index 287e74212..bd2af3c67 100644
--- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java
+++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java
@@ -47,13 +47,13 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {
 
     @Override
     public InfoItemsPage<CommentsInfoItem> getInitialPage() throws IOException, ExtractionException {
-        String commentsTokenInside = findValue(responseBody, "commentSectionRenderer", "}");
-        String commentsToken = findValue(commentsTokenInside, "continuation\":\"", "\"");
+        final String commentsTokenInside = findValue(responseBody, "commentSectionRenderer", "}");
+        final String commentsToken = findValue(commentsTokenInside, "continuation\":\"", "\"");
         return getPage(getNextPage(commentsToken));
     }
 
     private Page getNextPage(JsonObject ajaxJson) throws ParsingException {
-        JsonArray arr;
+        final JsonArray arr;
         try {
             arr = JsonUtils.getArray(ajaxJson, "response.continuationContents.commentSectionContinuation.continuations");
         } catch (Exception e) {
@@ -89,14 +89,14 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {
             throw new IllegalArgumentException("Page doesn't contain an URL");
         }
 
-        String ajaxResponse = makeAjaxRequest(page.getUrl());
-        JsonObject ajaxJson;
+        final String ajaxResponse = makeAjaxRequest(page.getUrl());
+        final JsonObject ajaxJson;
         try {
             ajaxJson = JsonParser.array().from(ajaxResponse).getObject(1);
         } catch (Exception e) {
             throw new ParsingException("Could not parse json data for comments", e);
         }
-        CommentsInfoItemsCollector collector = new CommentsInfoItemsCollector(getServiceId());
+        final CommentsInfoItemsCollector collector = new CommentsInfoItemsCollector(getServiceId());
         collectCommentsFrom(collector, ajaxJson);
         return new InfoItemsPage<>(collector, getNextPage(ajaxJson));
     }
@@ -160,8 +160,8 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {
     }
 
     private String findValue(String doc, String start, String end) {
-        int beginIndex = doc.indexOf(start) + start.length();
-        int endIndex = doc.indexOf(end, beginIndex);
+        final int beginIndex = doc.indexOf(start) + start.length();
+        final int endIndex = doc.indexOf(end, beginIndex);
         return doc.substring(beginIndex, endIndex);
     }
 }
diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsInfoItemExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsInfoItemExtractor.java
index 913022440..d960bc2ec 100644
--- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsInfoItemExtractor.java
+++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsInfoItemExtractor.java
@@ -34,7 +34,7 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract
     @Override
     public String getThumbnailUrl() throws ParsingException {
         try {
-            JsonArray arr = JsonUtils.getArray(json, "authorThumbnail.thumbnails");
+            final JsonArray arr = JsonUtils.getArray(json, "authorThumbnail.thumbnails");
             return JsonUtils.getString(arr.getObject(2), "url");
         } catch (Exception e) {
             throw new ParsingException("Could not get thumbnail url", e);
@@ -82,7 +82,13 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract
     @Override
     public String getCommentText() throws ParsingException {
         try {
-            String commentText = getTextFromObject(JsonUtils.getObject(json, "contentText"));
+            final JsonObject contentText = JsonUtils.getObject(json, "contentText");
+            if (contentText.isEmpty()) {
+                // completely empty comments as described in
+                // https://github.com/TeamNewPipe/NewPipeExtractor/issues/380#issuecomment-668808584
+                return "";
+            }
+            final String commentText = getTextFromObject(contentText);
             // youtube adds U+FEFF in some comments. eg. https://www.youtube.com/watch?v=Nj4F63E59io<feff>
             return Utils.removeUTF8BOM(commentText);
         } catch (Exception e) {
diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeCommentsExtractorTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeCommentsExtractorTest.java
index bb2b17bea..b43dce677 100644
--- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeCommentsExtractorTest.java
+++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeCommentsExtractorTest.java
@@ -23,91 +23,140 @@ import static org.junit.Assert.assertTrue;
 import static org.schabi.newpipe.extractor.ServiceList.YouTube;
 
 public class YoutubeCommentsExtractorTest {
-    private static final String urlYT = "https://www.youtube.com/watch?v=D00Au7k3i6o";
-    private static final String urlInvidious = "https://invidio.us/watch?v=D00Au7k3i6o";
-    private static YoutubeCommentsExtractor extractorYT;
-    private static YoutubeCommentsExtractor extractorInvidious;
+    /**
+     * Test a "normal" YouTube and Invidious page
+     */
+    public static class Thomas {
+        private static final String urlYT = "https://www.youtube.com/watch?v=D00Au7k3i6o";
+        private static final String urlInvidious = "https://invidio.us/watch?v=D00Au7k3i6o";
+        private static YoutubeCommentsExtractor extractorYT;
+        private static YoutubeCommentsExtractor extractorInvidious;
 
-    @BeforeClass
-    public static void setUp() throws Exception {
-        NewPipe.init(DownloaderTestImpl.getInstance());
-        extractorYT = (YoutubeCommentsExtractor) YouTube
-                .getCommentsExtractor(urlYT);
-        extractorYT.fetchPage();
-        extractorInvidious = (YoutubeCommentsExtractor) YouTube
-                .getCommentsExtractor(urlInvidious);
-    }
+        private static final String commentContent = "sub 4 sub";
 
-    @Test
-    public void testGetComments() throws IOException, ExtractionException {
-        assertTrue(getCommentsHelper(extractorYT));
-        assertTrue(getCommentsHelper(extractorInvidious));
-    }
-
-    private boolean getCommentsHelper(YoutubeCommentsExtractor extractor) throws IOException, ExtractionException {
-        InfoItemsPage<CommentsInfoItem> comments = extractor.getInitialPage();
-        boolean result = findInComments(comments, "s1ck m3m3");
-
-        while (comments.hasNextPage() && !result) {
-            comments = extractor.getPage(comments.getNextPage());
-            result = findInComments(comments, "s1ck m3m3");
+        @BeforeClass
+        public static void setUp() throws Exception {
+            NewPipe.init(DownloaderTestImpl.getInstance());
+            extractorYT = (YoutubeCommentsExtractor) YouTube
+                    .getCommentsExtractor(urlYT);
+            extractorYT.fetchPage();
+            extractorInvidious = (YoutubeCommentsExtractor) YouTube
+                    .getCommentsExtractor(urlInvidious);
+            extractorInvidious.fetchPage();
         }
 
-        return result;
-    }
-
-    @Test
-    public void testGetCommentsFromCommentsInfo() throws IOException, ExtractionException {
-        assertTrue(getCommentsFromCommentsInfoHelper(urlYT));
-        assertTrue(getCommentsFromCommentsInfoHelper(urlInvidious));
-    }
-
-    private boolean getCommentsFromCommentsInfoHelper(String url) throws IOException, ExtractionException {
-        CommentsInfo commentsInfo = CommentsInfo.getInfo(url);
-
-        assertEquals("Comments", commentsInfo.getName());
-        boolean result = findInComments(commentsInfo.getRelatedItems(), "s1ck m3m3");
-
-        Page nextPage = commentsInfo.getNextPage();
-        InfoItemsPage<CommentsInfoItem> moreItems = new InfoItemsPage<>(null, nextPage, null);
-        while (moreItems.hasNextPage() && !result) {
-            moreItems = CommentsInfo.getMoreItems(YouTube, commentsInfo, nextPage);
-            result = findInComments(moreItems.getItems(), "s1ck m3m3");
-            nextPage = moreItems.getNextPage();
+        @Test
+        public void testGetComments() throws IOException, ExtractionException {
+            assertTrue(getCommentsHelper(extractorYT));
+            assertTrue(getCommentsHelper(extractorInvidious));
         }
-        return result;
-    }
 
-    @Test
-    public void testGetCommentsAllData() throws IOException, ExtractionException {
-        InfoItemsPage<CommentsInfoItem> comments = extractorYT.getInitialPage();
+        private boolean getCommentsHelper(YoutubeCommentsExtractor extractor) throws IOException, ExtractionException {
+            InfoItemsPage<CommentsInfoItem> comments = extractor.getInitialPage();
+            boolean result = findInComments(comments, commentContent);
 
-        DefaultTests.defaultTestListOfItems(YouTube, comments.getItems(), comments.getErrors());
-        for (CommentsInfoItem c : comments.getItems()) {
-            assertFalse(Utils.isBlank(c.getUploaderUrl()));
-            assertFalse(Utils.isBlank(c.getUploaderName()));
-            assertFalse(Utils.isBlank(c.getUploaderAvatarUrl()));
-            assertFalse(Utils.isBlank(c.getCommentId()));
-            assertFalse(Utils.isBlank(c.getCommentText()));
-            assertFalse(Utils.isBlank(c.getName()));
-            assertFalse(Utils.isBlank(c.getTextualUploadDate()));
-            assertNotNull(c.getUploadDate());
-            assertFalse(Utils.isBlank(c.getThumbnailUrl()));
-            assertFalse(Utils.isBlank(c.getUrl()));
-            assertFalse(c.getLikeCount() < 0);
+            while (comments.hasNextPage() && !result) {
+                comments = extractor.getPage(comments.getNextPage());
+                result = findInComments(comments, commentContent);
+            }
+
+            return result;
         }
-    }
 
-    private boolean findInComments(InfoItemsPage<CommentsInfoItem> comments, String comment) {
-        return findInComments(comments.getItems(), comment);
-    }
+        @Test
+        public void testGetCommentsFromCommentsInfo() throws IOException, ExtractionException {
+            assertTrue(getCommentsFromCommentsInfoHelper(urlYT));
+            assertTrue(getCommentsFromCommentsInfoHelper(urlInvidious));
+        }
 
-    private boolean findInComments(List<CommentsInfoItem> comments, String comment) {
-        for (CommentsInfoItem c : comments) {
-            if (c.getCommentText().contains(comment)) {
-                return true;
+        private boolean getCommentsFromCommentsInfoHelper(String url) throws IOException, ExtractionException {
+            final CommentsInfo commentsInfo = CommentsInfo.getInfo(url);
+
+            assertEquals("Comments", commentsInfo.getName());
+            boolean result = findInComments(commentsInfo.getRelatedItems(), commentContent);
+
+            Page nextPage = commentsInfo.getNextPage();
+            InfoItemsPage<CommentsInfoItem> moreItems = new InfoItemsPage<>(null, nextPage, null);
+            while (moreItems.hasNextPage() && !result) {
+                moreItems = CommentsInfo.getMoreItems(YouTube, commentsInfo, nextPage);
+                result = findInComments(moreItems.getItems(), commentContent);
+                nextPage = moreItems.getNextPage();
+            }
+            return result;
+        }
+
+        @Test
+        public void testGetCommentsAllData() throws IOException, ExtractionException {
+            InfoItemsPage<CommentsInfoItem> comments = extractorYT.getInitialPage();
+
+            DefaultTests.defaultTestListOfItems(YouTube, comments.getItems(), comments.getErrors());
+            for (CommentsInfoItem c : comments.getItems()) {
+                assertFalse(Utils.isBlank(c.getUploaderUrl()));
+                assertFalse(Utils.isBlank(c.getUploaderName()));
+                assertFalse(Utils.isBlank(c.getUploaderAvatarUrl()));
+                assertFalse(Utils.isBlank(c.getCommentId()));
+                assertFalse(Utils.isBlank(c.getCommentText()));
+                assertFalse(Utils.isBlank(c.getName()));
+                assertFalse(Utils.isBlank(c.getTextualUploadDate()));
+                assertNotNull(c.getUploadDate());
+                assertFalse(Utils.isBlank(c.getThumbnailUrl()));
+                assertFalse(Utils.isBlank(c.getUrl()));
+                assertFalse(c.getLikeCount() < 0);
             }
         }
-        return false;
+
+        private boolean findInComments(InfoItemsPage<CommentsInfoItem> comments, String comment) {
+            return findInComments(comments.getItems(), comment);
+        }
+
+        private boolean findInComments(List<CommentsInfoItem> comments, String comment) {
+            for (CommentsInfoItem c : comments) {
+                if (c.getCommentText().contains(comment)) {
+                    return true;
+                }
+            }
+            return false;
+        }
+    }
+
+    /**
+     * Test a video with an empty comment
+     */
+    public static class EmptyComment {
+        private static YoutubeCommentsExtractor extractor;
+        private final static String url = "https://www.youtube.com/watch?v=VM_6n762j6M";
+
+        @BeforeClass
+        public static void setUp() throws Exception {
+            NewPipe.init(DownloaderTestImpl.getInstance());
+            extractor = (YoutubeCommentsExtractor) YouTube
+                    .getCommentsExtractor(url);
+            extractor.fetchPage();
+        }
+
+        @Test
+        public void testGetCommentsAllData() throws IOException, ExtractionException {
+            final InfoItemsPage<CommentsInfoItem> comments = extractor.getInitialPage();
+
+            DefaultTests.defaultTestListOfItems(YouTube, comments.getItems(), comments.getErrors());
+            for (CommentsInfoItem c : comments.getItems()) {
+                assertFalse(Utils.isBlank(c.getUploaderUrl()));
+                assertFalse(Utils.isBlank(c.getUploaderName()));
+                assertFalse(Utils.isBlank(c.getUploaderAvatarUrl()));
+                assertFalse(Utils.isBlank(c.getCommentId()));
+                assertFalse(Utils.isBlank(c.getName()));
+                assertFalse(Utils.isBlank(c.getTextualUploadDate()));
+                assertNotNull(c.getUploadDate());
+                assertFalse(Utils.isBlank(c.getThumbnailUrl()));
+                assertFalse(Utils.isBlank(c.getUrl()));
+                assertFalse(c.getLikeCount() < 0);
+                if (c.getCommentId().equals("Ugga_h1-EXdHB3gCoAEC")) { // comment without text
+                    assertTrue(Utils.isBlank(c.getCommentText()));
+                } else {
+                    assertFalse(Utils.isBlank(c.getCommentText()));
+                }
+            }
+        }
+
     }
 }