diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfo.java b/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfo.java index 10ac85d5c..fb0cc14b9 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfo.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfo.java @@ -1,12 +1,16 @@ package org.schabi.newpipe.extractor.comments; import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import org.schabi.newpipe.extractor.ListExtractor.InfoItemsPage; import org.schabi.newpipe.extractor.ListInfo; import org.schabi.newpipe.extractor.NewPipe; import org.schabi.newpipe.extractor.StreamingService; import org.schabi.newpipe.extractor.exceptions.ExtractionException; import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler; +import org.schabi.newpipe.extractor.utils.ExtractorHelper; public class CommentsInfo extends ListInfo{ @@ -19,9 +23,88 @@ public class CommentsInfo extends ListInfo{ return getInfo(NewPipe.getServiceByUrl(url), url); } - private static CommentsInfo getInfo(StreamingService serviceByUrl, String url) { - // TODO Auto-generated method stub - return null; + private static CommentsInfo getInfo(StreamingService serviceByUrl, String url) throws ExtractionException, IOException { + return getInfo(serviceByUrl.getCommentsExtractor(url)); } + private static CommentsInfo getInfo(CommentsExtractor commentsExtractor) throws IOException, ExtractionException { + //for services which do not have a comments extractor + if(null == commentsExtractor) { + return null; + } + + commentsExtractor.fetchPage(); + String name = commentsExtractor.getName(); + int serviceId = commentsExtractor.getServiceId(); + ListLinkHandler listUrlIdHandler = commentsExtractor.getUIHandler(); + CommentsInfo commentsInfo = new CommentsInfo(serviceId, listUrlIdHandler, name); + commentsInfo.setCommentsExtractor(commentsExtractor); + InfoItemsPage initialCommentsPage = ExtractorHelper.getItemsPageOrLogError(commentsInfo, + commentsExtractor); + commentsInfo.setComments(new ArrayList<>()); + commentsInfo.getComments().addAll(initialCommentsPage.getItems()); + commentsInfo.setHasMoreComments(initialCommentsPage.hasNextPage()); + commentsInfo.setNextCommentsPageUrl(initialCommentsPage.getNextPageUrl()); + return commentsInfo; + } + + public static void loadMoreComments(CommentsInfo commentsInfo) { + if (commentsInfo.hasMoreComments()) { + if(null == commentsInfo.getCommentsExtractor()) { + try { + commentsInfo.setCommentsExtractor(NewPipe.getService(commentsInfo.getServiceId()).getCommentsExtractor(commentsInfo.getUrl())); + } catch (ExtractionException e) { + commentsInfo.addError(e); + return; + } + } + try { + InfoItemsPage commentsPage = commentsInfo.getCommentsExtractor() + .getPage(commentsInfo.getNextCommentsPageUrl()); + commentsInfo.getComments().addAll(commentsPage.getItems()); + commentsInfo.setHasMoreComments(commentsPage.hasNextPage()); + commentsInfo.setNextCommentsPageUrl(commentsPage.getNextPageUrl()); + } catch (IOException | ExtractionException e) { + commentsInfo.addError(e); + } + } + } + + private transient CommentsExtractor commentsExtractor; + private List comments; + private boolean hasMoreComments; + private String nextCommentsPageUrl; + + public List getComments() { + return comments; + } + + public void setComments(List comments) { + this.comments = comments; + } + + public boolean hasMoreComments() { + return hasMoreComments; + } + + public void setHasMoreComments(boolean hasMoreComments) { + this.hasMoreComments = hasMoreComments; + } + + public CommentsExtractor getCommentsExtractor() { + return commentsExtractor; + } + + public void setCommentsExtractor(CommentsExtractor commentsExtractor) { + this.commentsExtractor = commentsExtractor; + } + + public String getNextCommentsPageUrl() { + return nextCommentsPageUrl; + } + + public void setNextCommentsPageUrl(String nextCommentsPageUrl) { + this.nextCommentsPageUrl = nextCommentsPageUrl; + } + } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java index 10e1beea7..fb08e9b7a 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java @@ -31,13 +31,13 @@ public class YoutubeCommentsExtractor extends CommentsExtractor { private List cookies; private String sessionToken; - private String commentsToken; + private String title; + private InfoItemsPage initPage; private ObjectMapper mapper = new ObjectMapper(); public YoutubeCommentsExtractor(StreamingService service, ListLinkHandler uiHandler) { super(service, uiHandler); - // TODO Auto-generated constructor stub } @Override @@ -45,12 +45,16 @@ public class YoutubeCommentsExtractor extends CommentsExtractor { // initial page does not load any comments but is required to get session token // and cookies super.fetchPage(); - return getPage(getNextPageUrl()); + return initPage; } + // isn't this method redundant. you can just call getnextpage on getInitialPage @Override public String getNextPageUrl() throws IOException, ExtractionException { - return getNextPageUrl(commentsToken); + // initial page does not load any comments but is required to get session token + // and cookies + super.fetchPage(); + return initPage.getNextPageUrl(); } private String getNextPageUrl(JsonNode ajaxJson) throws IOException, ExtractionException { @@ -91,6 +95,9 @@ public class YoutubeCommentsExtractor extends CommentsExtractor { } private void collectCommentsFrom(CommentsInfoItemsCollector collector, JsonNode ajaxJson, String pageUrl) { + + fetchTitle(ajaxJson); + List comments = ajaxJson.findValues("commentRenderer"); comments.stream().forEach(c -> { CommentsInfoItemExtractor extractor = new CommentsInfoItemExtractor() { @@ -192,19 +199,29 @@ public class YoutubeCommentsExtractor extends CommentsExtractor { } + private void fetchTitle(JsonNode ajaxJson) { + if(null == title) { + try { + title = ajaxJson.findValue("commentTargetTitle").get("simpleText").asText(); + } catch (Exception e) { + title = "Youtube Comments"; + } + } + } + @Override public void onFetchPage(Downloader downloader) throws IOException, ExtractionException { DownloadResponse response = downloader.get(getUrl()); String responseBody = response.getResponseBody(); cookies = response.getResponseHeaders().get("Set-Cookie"); sessionToken = findValue(responseBody, "XSRF_TOKEN"); - commentsToken = findValue(responseBody, "COMMENTS_TOKEN"); + String commentsToken = findValue(responseBody, "COMMENTS_TOKEN"); + initPage = getPage(getNextPageUrl(commentsToken)); } @Override public String getName() throws ParsingException { - // TODO Auto-generated method stub - return null; + return title; } private String makeAjaxRequest(String siteUrl) throws IOException, ReCaptchaException { diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeCommentsLinkHandlerFactory.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeCommentsLinkHandlerFactory.java index cfd4645dd..ae31337fd 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeCommentsLinkHandlerFactory.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeCommentsLinkHandlerFactory.java @@ -1,20 +1,11 @@ package org.schabi.newpipe.extractor.services.youtube.linkHandler; -import java.io.IOException; import java.io.UnsupportedEncodingException; -import java.net.URI; -import java.net.URISyntaxException; import java.net.URLDecoder; import java.util.List; -import org.jsoup.Jsoup; -import org.jsoup.nodes.Document; -import org.jsoup.nodes.Element; -import org.schabi.newpipe.extractor.Downloader; -import org.schabi.newpipe.extractor.NewPipe; import org.schabi.newpipe.extractor.exceptions.FoundAdException; import org.schabi.newpipe.extractor.exceptions.ParsingException; -import org.schabi.newpipe.extractor.exceptions.ReCaptchaException; import org.schabi.newpipe.extractor.linkhandler.ListLinkHandlerFactory; import org.schabi.newpipe.extractor.utils.Parser; @@ -27,6 +18,11 @@ public class YoutubeCommentsLinkHandlerFactory extends ListLinkHandlerFactory { return instance; } + @Override + public String getUrl(String id) { + return "https://www.youtube.com/watch?v=" + id; + } + @Override public String getId(String url) throws ParsingException, IllegalArgumentException { if (url.isEmpty()) { @@ -44,8 +40,6 @@ public class YoutubeCommentsLinkHandlerFactory extends ListLinkHandlerFactory { } catch (UnsupportedEncodingException uee) { throw new ParsingException("Could not parse attribution_link", uee); } - } else if (lowercaseUrl.contains("youtube.com/shared?ci=")) { - return getRealIdFromSharedLink(url); } else if (url.contains("vnd.youtube")) { id = Parser.matchGroup1(ID_PATTERN, url); } else if (url.contains("embed")) { @@ -86,56 +80,6 @@ public class YoutubeCommentsLinkHandlerFactory extends ListLinkHandlerFactory { } } - /** - * Get the real url from a shared uri. - *

- * Shared URI's look like this: - *

-     *     * https://www.youtube.com/shared?ci=PJICrTByb3E
-     *     * vnd.youtube://www.youtube.com/shared?ci=PJICrTByb3E&feature=twitter-deep-link
-     * 
- * - * @param url The shared url - * @return the id of the stream - * @throws ParsingException - */ - private String getRealIdFromSharedLink(String url) throws ParsingException { - URI uri; - try { - uri = new URI(url); - } catch (URISyntaxException e) { - throw new ParsingException("Invalid shared link", e); - } - String sharedId = getSharedId(uri); - Downloader downloader = NewPipe.getDownloader(); - String content; - try { - content = downloader.download("https://www.youtube.com/shared?ci=" + sharedId); - } catch (IOException | ReCaptchaException e) { - throw new ParsingException("Unable to resolve shared link", e); - } - final Document document = Jsoup.parse(content); - - final Element element = document.select("link[rel=\"canonical\"]").first(); - final String urlWithRealId = (element != null) - ? element.attr("abs:href") - : document.select("meta[property=\"og:url\"]").first() - .attr("abs:content"); - - String realId = Parser.matchGroup1(ID_PATTERN, urlWithRealId); - if (sharedId.equals(realId)) { - throw new ParsingException("Got same id for as shared info_id: " + sharedId); - } - return realId; - } - - private String getSharedId(URI uri) throws ParsingException { - if (!"/shared".equals(uri.getPath())) { - throw new ParsingException("Not a shared link: " + uri.toString() + " (path != " + uri.getPath() + ")"); - } - return Parser.matchGroup1("ci=" + ID_PATTERN, uri.getQuery()); - } - @Override public boolean onAcceptUrl(final String url) throws FoundAdException { final String lowercaseUrl = url.toLowerCase(); @@ -156,8 +100,8 @@ public class YoutubeCommentsLinkHandlerFactory extends ListLinkHandlerFactory { } } - @Override - public String getUrl(String id, List contentFilter, String sortFilter) throws ParsingException { - return "https://www.youtube.com/watch?v=" + id; - } + @Override + public String getUrl(String id, List contentFilter, String sortFilter) throws ParsingException { + return "https://www.youtube.com/watch?v=" + id; + } } diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeCommentsExtractorTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeCommentsExtractorTest.java index 41eb90aaa..cc56a21b8 100644 --- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeCommentsExtractorTest.java +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeCommentsExtractorTest.java @@ -11,6 +11,7 @@ import org.junit.Test; import org.schabi.newpipe.Downloader; import org.schabi.newpipe.extractor.ListExtractor.InfoItemsPage; import org.schabi.newpipe.extractor.NewPipe; +import org.schabi.newpipe.extractor.comments.CommentsInfo; import org.schabi.newpipe.extractor.comments.CommentsInfoItem; import org.schabi.newpipe.extractor.exceptions.ExtractionException; import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeCommentsExtractor; @@ -55,6 +56,21 @@ public class YoutubeCommentsExtractorTest { assertTrue(result); } + + @Test + public void testGetCommentsFromCommentsInfo() throws IOException, ExtractionException { + boolean result = false; + CommentsInfo commentsInfo = CommentsInfo.getInfo("https://www.youtube.com/watch?v=rrgFN3AxGfs"); + assertTrue("what the fuck am i doing with my life.wmv".equals(commentsInfo.getName())); + result = findInComments(commentsInfo.getComments(), "i should really be in the top comment.lol"); + + while (commentsInfo.hasMoreComments() && !result) { + CommentsInfo.loadMoreComments(commentsInfo); + result = findInComments(commentsInfo.getComments(), "i should really be in the top comment.lol"); + } + + assertTrue(result); + } private boolean findInComments(InfoItemsPage comments, String comment) { return findInComments(comments.getItems(), comment);