[YouTube] Fix crash on empty comment

Closes #380
This commit is contained in:
TobiGr 2020-08-05 18:25:35 +02:00
parent 5ac80624a4
commit 0fb73301e3
3 changed files with 139 additions and 84 deletions

View File

@ -47,13 +47,13 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {
@Override @Override
public InfoItemsPage<CommentsInfoItem> getInitialPage() throws IOException, ExtractionException { public InfoItemsPage<CommentsInfoItem> getInitialPage() throws IOException, ExtractionException {
String commentsTokenInside = findValue(responseBody, "commentSectionRenderer", "}"); final String commentsTokenInside = findValue(responseBody, "commentSectionRenderer", "}");
String commentsToken = findValue(commentsTokenInside, "continuation\":\"", "\""); final String commentsToken = findValue(commentsTokenInside, "continuation\":\"", "\"");
return getPage(getNextPage(commentsToken)); return getPage(getNextPage(commentsToken));
} }
private Page getNextPage(JsonObject ajaxJson) throws ParsingException { private Page getNextPage(JsonObject ajaxJson) throws ParsingException {
JsonArray arr; final JsonArray arr;
try { try {
arr = JsonUtils.getArray(ajaxJson, "response.continuationContents.commentSectionContinuation.continuations"); arr = JsonUtils.getArray(ajaxJson, "response.continuationContents.commentSectionContinuation.continuations");
} catch (Exception e) { } catch (Exception e) {
@ -89,14 +89,14 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {
throw new IllegalArgumentException("Page doesn't contain an URL"); throw new IllegalArgumentException("Page doesn't contain an URL");
} }
String ajaxResponse = makeAjaxRequest(page.getUrl()); final String ajaxResponse = makeAjaxRequest(page.getUrl());
JsonObject ajaxJson; final JsonObject ajaxJson;
try { try {
ajaxJson = JsonParser.array().from(ajaxResponse).getObject(1); ajaxJson = JsonParser.array().from(ajaxResponse).getObject(1);
} catch (Exception e) { } catch (Exception e) {
throw new ParsingException("Could not parse json data for comments", e); throw new ParsingException("Could not parse json data for comments", e);
} }
CommentsInfoItemsCollector collector = new CommentsInfoItemsCollector(getServiceId()); final CommentsInfoItemsCollector collector = new CommentsInfoItemsCollector(getServiceId());
collectCommentsFrom(collector, ajaxJson); collectCommentsFrom(collector, ajaxJson);
return new InfoItemsPage<>(collector, getNextPage(ajaxJson)); return new InfoItemsPage<>(collector, getNextPage(ajaxJson));
} }
@ -160,8 +160,8 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {
} }
private String findValue(String doc, String start, String end) { private String findValue(String doc, String start, String end) {
int beginIndex = doc.indexOf(start) + start.length(); final int beginIndex = doc.indexOf(start) + start.length();
int endIndex = doc.indexOf(end, beginIndex); final int endIndex = doc.indexOf(end, beginIndex);
return doc.substring(beginIndex, endIndex); return doc.substring(beginIndex, endIndex);
} }
} }

View File

@ -34,7 +34,7 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract
@Override @Override
public String getThumbnailUrl() throws ParsingException { public String getThumbnailUrl() throws ParsingException {
try { try {
JsonArray arr = JsonUtils.getArray(json, "authorThumbnail.thumbnails"); final JsonArray arr = JsonUtils.getArray(json, "authorThumbnail.thumbnails");
return JsonUtils.getString(arr.getObject(2), "url"); return JsonUtils.getString(arr.getObject(2), "url");
} catch (Exception e) { } catch (Exception e) {
throw new ParsingException("Could not get thumbnail url", e); throw new ParsingException("Could not get thumbnail url", e);
@ -82,7 +82,13 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract
@Override @Override
public String getCommentText() throws ParsingException { public String getCommentText() throws ParsingException {
try { try {
String commentText = getTextFromObject(JsonUtils.getObject(json, "contentText")); final JsonObject contentText = JsonUtils.getObject(json, "contentText");
if (contentText.isEmpty()) {
// completely empty comments as described in
// https://github.com/TeamNewPipe/NewPipeExtractor/issues/380#issuecomment-668808584
return "";
}
final String commentText = getTextFromObject(contentText);
// youtube adds U+FEFF in some comments. eg. https://www.youtube.com/watch?v=Nj4F63E59io<feff> // youtube adds U+FEFF in some comments. eg. https://www.youtube.com/watch?v=Nj4F63E59io<feff>
return Utils.removeUTF8BOM(commentText); return Utils.removeUTF8BOM(commentText);
} catch (Exception e) { } catch (Exception e) {

View File

@ -23,91 +23,140 @@ import static org.junit.Assert.assertTrue;
import static org.schabi.newpipe.extractor.ServiceList.YouTube; import static org.schabi.newpipe.extractor.ServiceList.YouTube;
public class YoutubeCommentsExtractorTest { public class YoutubeCommentsExtractorTest {
private static final String urlYT = "https://www.youtube.com/watch?v=D00Au7k3i6o"; /**
private static final String urlInvidious = "https://invidio.us/watch?v=D00Au7k3i6o"; * Test a "normal" YouTube and Invidious page
private static YoutubeCommentsExtractor extractorYT; */
private static YoutubeCommentsExtractor extractorInvidious; public static class Thomas {
private static final String urlYT = "https://www.youtube.com/watch?v=D00Au7k3i6o";
private static final String urlInvidious = "https://invidio.us/watch?v=D00Au7k3i6o";
private static YoutubeCommentsExtractor extractorYT;
private static YoutubeCommentsExtractor extractorInvidious;
@BeforeClass private static final String commentContent = "sub 4 sub";
public static void setUp() throws Exception {
NewPipe.init(DownloaderTestImpl.getInstance());
extractorYT = (YoutubeCommentsExtractor) YouTube
.getCommentsExtractor(urlYT);
extractorYT.fetchPage();
extractorInvidious = (YoutubeCommentsExtractor) YouTube
.getCommentsExtractor(urlInvidious);
}
@Test @BeforeClass
public void testGetComments() throws IOException, ExtractionException { public static void setUp() throws Exception {
assertTrue(getCommentsHelper(extractorYT)); NewPipe.init(DownloaderTestImpl.getInstance());
assertTrue(getCommentsHelper(extractorInvidious)); extractorYT = (YoutubeCommentsExtractor) YouTube
} .getCommentsExtractor(urlYT);
extractorYT.fetchPage();
private boolean getCommentsHelper(YoutubeCommentsExtractor extractor) throws IOException, ExtractionException { extractorInvidious = (YoutubeCommentsExtractor) YouTube
InfoItemsPage<CommentsInfoItem> comments = extractor.getInitialPage(); .getCommentsExtractor(urlInvidious);
boolean result = findInComments(comments, "s1ck m3m3"); extractorInvidious.fetchPage();
while (comments.hasNextPage() && !result) {
comments = extractor.getPage(comments.getNextPage());
result = findInComments(comments, "s1ck m3m3");
} }
return result; @Test
} public void testGetComments() throws IOException, ExtractionException {
assertTrue(getCommentsHelper(extractorYT));
@Test assertTrue(getCommentsHelper(extractorInvidious));
public void testGetCommentsFromCommentsInfo() throws IOException, ExtractionException {
assertTrue(getCommentsFromCommentsInfoHelper(urlYT));
assertTrue(getCommentsFromCommentsInfoHelper(urlInvidious));
}
private boolean getCommentsFromCommentsInfoHelper(String url) throws IOException, ExtractionException {
CommentsInfo commentsInfo = CommentsInfo.getInfo(url);
assertEquals("Comments", commentsInfo.getName());
boolean result = findInComments(commentsInfo.getRelatedItems(), "s1ck m3m3");
Page nextPage = commentsInfo.getNextPage();
InfoItemsPage<CommentsInfoItem> moreItems = new InfoItemsPage<>(null, nextPage, null);
while (moreItems.hasNextPage() && !result) {
moreItems = CommentsInfo.getMoreItems(YouTube, commentsInfo, nextPage);
result = findInComments(moreItems.getItems(), "s1ck m3m3");
nextPage = moreItems.getNextPage();
} }
return result;
}
@Test private boolean getCommentsHelper(YoutubeCommentsExtractor extractor) throws IOException, ExtractionException {
public void testGetCommentsAllData() throws IOException, ExtractionException { InfoItemsPage<CommentsInfoItem> comments = extractor.getInitialPage();
InfoItemsPage<CommentsInfoItem> comments = extractorYT.getInitialPage(); boolean result = findInComments(comments, commentContent);
DefaultTests.defaultTestListOfItems(YouTube, comments.getItems(), comments.getErrors()); while (comments.hasNextPage() && !result) {
for (CommentsInfoItem c : comments.getItems()) { comments = extractor.getPage(comments.getNextPage());
assertFalse(Utils.isBlank(c.getUploaderUrl())); result = findInComments(comments, commentContent);
assertFalse(Utils.isBlank(c.getUploaderName())); }
assertFalse(Utils.isBlank(c.getUploaderAvatarUrl()));
assertFalse(Utils.isBlank(c.getCommentId())); return result;
assertFalse(Utils.isBlank(c.getCommentText()));
assertFalse(Utils.isBlank(c.getName()));
assertFalse(Utils.isBlank(c.getTextualUploadDate()));
assertNotNull(c.getUploadDate());
assertFalse(Utils.isBlank(c.getThumbnailUrl()));
assertFalse(Utils.isBlank(c.getUrl()));
assertFalse(c.getLikeCount() < 0);
} }
}
private boolean findInComments(InfoItemsPage<CommentsInfoItem> comments, String comment) { @Test
return findInComments(comments.getItems(), comment); public void testGetCommentsFromCommentsInfo() throws IOException, ExtractionException {
} assertTrue(getCommentsFromCommentsInfoHelper(urlYT));
assertTrue(getCommentsFromCommentsInfoHelper(urlInvidious));
}
private boolean findInComments(List<CommentsInfoItem> comments, String comment) { private boolean getCommentsFromCommentsInfoHelper(String url) throws IOException, ExtractionException {
for (CommentsInfoItem c : comments) { final CommentsInfo commentsInfo = CommentsInfo.getInfo(url);
if (c.getCommentText().contains(comment)) {
return true; assertEquals("Comments", commentsInfo.getName());
boolean result = findInComments(commentsInfo.getRelatedItems(), commentContent);
Page nextPage = commentsInfo.getNextPage();
InfoItemsPage<CommentsInfoItem> moreItems = new InfoItemsPage<>(null, nextPage, null);
while (moreItems.hasNextPage() && !result) {
moreItems = CommentsInfo.getMoreItems(YouTube, commentsInfo, nextPage);
result = findInComments(moreItems.getItems(), commentContent);
nextPage = moreItems.getNextPage();
}
return result;
}
@Test
public void testGetCommentsAllData() throws IOException, ExtractionException {
InfoItemsPage<CommentsInfoItem> comments = extractorYT.getInitialPage();
DefaultTests.defaultTestListOfItems(YouTube, comments.getItems(), comments.getErrors());
for (CommentsInfoItem c : comments.getItems()) {
assertFalse(Utils.isBlank(c.getUploaderUrl()));
assertFalse(Utils.isBlank(c.getUploaderName()));
assertFalse(Utils.isBlank(c.getUploaderAvatarUrl()));
assertFalse(Utils.isBlank(c.getCommentId()));
assertFalse(Utils.isBlank(c.getCommentText()));
assertFalse(Utils.isBlank(c.getName()));
assertFalse(Utils.isBlank(c.getTextualUploadDate()));
assertNotNull(c.getUploadDate());
assertFalse(Utils.isBlank(c.getThumbnailUrl()));
assertFalse(Utils.isBlank(c.getUrl()));
assertFalse(c.getLikeCount() < 0);
} }
} }
return false;
private boolean findInComments(InfoItemsPage<CommentsInfoItem> comments, String comment) {
return findInComments(comments.getItems(), comment);
}
private boolean findInComments(List<CommentsInfoItem> comments, String comment) {
for (CommentsInfoItem c : comments) {
if (c.getCommentText().contains(comment)) {
return true;
}
}
return false;
}
}
/**
* Test a video with an empty comment
*/
public static class EmptyComment {
private static YoutubeCommentsExtractor extractor;
private final static String url = "https://www.youtube.com/watch?v=VM_6n762j6M";
@BeforeClass
public static void setUp() throws Exception {
NewPipe.init(DownloaderTestImpl.getInstance());
extractor = (YoutubeCommentsExtractor) YouTube
.getCommentsExtractor(url);
extractor.fetchPage();
}
@Test
public void testGetCommentsAllData() throws IOException, ExtractionException {
final InfoItemsPage<CommentsInfoItem> comments = extractor.getInitialPage();
DefaultTests.defaultTestListOfItems(YouTube, comments.getItems(), comments.getErrors());
for (CommentsInfoItem c : comments.getItems()) {
assertFalse(Utils.isBlank(c.getUploaderUrl()));
assertFalse(Utils.isBlank(c.getUploaderName()));
assertFalse(Utils.isBlank(c.getUploaderAvatarUrl()));
assertFalse(Utils.isBlank(c.getCommentId()));
assertFalse(Utils.isBlank(c.getName()));
assertFalse(Utils.isBlank(c.getTextualUploadDate()));
assertNotNull(c.getUploadDate());
assertFalse(Utils.isBlank(c.getThumbnailUrl()));
assertFalse(Utils.isBlank(c.getUrl()));
assertFalse(c.getLikeCount() < 0);
if (c.getCommentId().equals("Ugga_h1-EXdHB3gCoAEC")) { // comment without text
assertTrue(Utils.isBlank(c.getCommentText()));
} else {
assertFalse(Utils.isBlank(c.getCommentText()));
}
}
}
} }
} }