From 53d39979049e345be791d2c06ef925afb6d7b86c Mon Sep 17 00:00:00 2001 From: Ritvik Saraf <13ritvik@gmail.com> Date: Mon, 20 Aug 2018 04:22:19 +0530 Subject: [PATCH 01/31] added comments extractor --- build.gradle | 4 +- extractor/build.gradle | 1 + .../newpipe/extractor/DownloadResponse.java | 26 ++ .../schabi/newpipe/extractor/Downloader.java | 69 ++--- .../schabi/newpipe/extractor/InfoItem.java | 3 +- .../newpipe/extractor/StreamingService.java | 36 ++- .../extractor/comments/CommentsExtractor.java | 14 + .../extractor/comments/CommentsInfo.java | 27 ++ .../extractor/comments/CommentsInfoItem.java | 76 +++++ .../comments/CommentsInfoItemExtractor.java | 22 ++ .../comments/CommentsInfoItemsCollector.java | 103 +++++++ .../soundcloud/SoundcloudService.java | 13 + .../services/youtube/YoutubeService.java | 45 ++- .../extractors/YoutubeCommentsExtractor.java | 262 ++++++++++++++++++ .../YoutubeCommentsLinkHandlerFactory.java | 183 ++++++++++++ .../java/org/schabi/newpipe/Downloader.java | 236 +++++++++------- .../youtube/YoutubeCommentsExtractorTest.java | 46 +++ 17 files changed, 1007 insertions(+), 159 deletions(-) create mode 100644 extractor/src/main/java/org/schabi/newpipe/extractor/DownloadResponse.java create mode 100644 extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsExtractor.java create mode 100644 extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfo.java create mode 100644 extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfoItem.java create mode 100644 extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfoItemExtractor.java create mode 100644 extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfoItemsCollector.java create mode 100644 extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java create mode 100644 extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeCommentsLinkHandlerFactory.java create mode 100644 extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeCommentsExtractorTest.java diff --git a/build.gradle b/build.gradle index 9f25fc28e..f8f6c8e90 100644 --- a/build.gradle +++ b/build.gradle @@ -1,7 +1,7 @@ allprojects { apply plugin: 'java-library' - sourceCompatibility = 1.7 - targetCompatibility = 1.7 + sourceCompatibility = 1.8 + targetCompatibility = 1.8 version 'v0.13.0' diff --git a/extractor/build.gradle b/extractor/build.gradle index 1b7fbf001..26430b9fa 100644 --- a/extractor/build.gradle +++ b/extractor/build.gradle @@ -6,6 +6,7 @@ dependencies { implementation 'org.mozilla:rhino:1.7.7.1' implementation 'com.github.spotbugs:spotbugs-annotations:3.1.0' implementation 'org.nibor.autolink:autolink:0.8.0' + implementation 'com.fasterxml.jackson.core:jackson-databind:2.9.5' testImplementation 'junit:junit:4.12' } \ No newline at end of file diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/DownloadResponse.java b/extractor/src/main/java/org/schabi/newpipe/extractor/DownloadResponse.java new file mode 100644 index 000000000..8f91c8914 --- /dev/null +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/DownloadResponse.java @@ -0,0 +1,26 @@ +package org.schabi.newpipe.extractor; + +import java.util.List; +import java.util.Map; + +public class DownloadResponse { + private final String responseBody; + private final Map> responseHeaders; + + + + public DownloadResponse(String responseBody, Map> headers) { + super(); + this.responseBody = responseBody; + this.responseHeaders = headers; + } + + public String getResponseBody() { + return responseBody; + } + + public Map> getResponseHeaders() { + return responseHeaders; + } + +} diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/Downloader.java b/extractor/src/main/java/org/schabi/newpipe/extractor/Downloader.java index fde90a2b5..93d0a7c23 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/Downloader.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/Downloader.java @@ -1,10 +1,11 @@ package org.schabi.newpipe.extractor; -import org.schabi.newpipe.extractor.exceptions.ReCaptchaException; - import java.io.IOException; +import java.util.List; import java.util.Map; +import org.schabi.newpipe.extractor.exceptions.ReCaptchaException; + /* * Created by Christian Schabesberger on 28.01.16. * @@ -27,35 +28,41 @@ import java.util.Map; public interface Downloader { - /** - * Download the text file at the supplied URL as in download(String), - * but set the HTTP header field "Accept-Language" to the supplied string. - * - * @param siteUrl the URL of the text file to return the contents of - * @param language the language (usually a 2-character code) to set as the preferred language - * @return the contents of the specified text file - * @throws IOException - */ - String download(String siteUrl, String language) throws IOException, ReCaptchaException; + /** + * Download the text file at the supplied URL as in download(String), but set + * the HTTP header field "Accept-Language" to the supplied string. + * + * @param siteUrl the URL of the text file to return the contents of + * @param language the language (usually a 2-character code) to set as the + * preferred language + * @return the contents of the specified text file + * @throws IOException + */ + String download(String siteUrl, String language) throws IOException, ReCaptchaException; - /** - * Download the text file at the supplied URL as in download(String), - * but set the HTTP header field "Accept-Language" to the supplied string. - * - * @param siteUrl the URL of the text file to return the contents of - * @param customProperties set request header properties - * @return the contents of the specified text file - * @throws IOException - */ - String download(String siteUrl, Map customProperties) throws IOException, ReCaptchaException; + /** + * Download the text file at the supplied URL as in download(String), but set + * the HTTP header field "Accept-Language" to the supplied string. + * + * @param siteUrl the URL of the text file to return the contents of + * @param customProperties set request header properties + * @return the contents of the specified text file + * @throws IOException + */ + String download(String siteUrl, Map customProperties) throws IOException, ReCaptchaException; - /** - * Download (via HTTP) the text file located at the supplied URL, and return its contents. - * Primarily intended for downloading web pages. - * - * @param siteUrl the URL of the text file to download - * @return the contents of the specified text file - * @throws IOException - */ - String download(String siteUrl) throws IOException, ReCaptchaException; + /** + * Download (via HTTP) the text file located at the supplied URL, and return its + * contents. Primarily intended for downloading web pages. + * + * @param siteUrl the URL of the text file to download + * @return the contents of the specified text file + * @throws IOException + */ + String download(String siteUrl) throws IOException, ReCaptchaException; + + DownloadResponse downloadWithHeaders(String siteUrl, Map> requestHeaders) + throws IOException, ReCaptchaException; + + DownloadResponse downloadWithHeaders(String siteUrl) throws IOException, ReCaptchaException; } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/InfoItem.java b/extractor/src/main/java/org/schabi/newpipe/extractor/InfoItem.java index 4bed6bfbb..aead6c7f6 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/InfoItem.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/InfoItem.java @@ -68,6 +68,7 @@ public abstract class InfoItem implements Serializable { public enum InfoType { STREAM, PLAYLIST, - CHANNEL + CHANNEL, + COMMENT } } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/StreamingService.java b/extractor/src/main/java/org/schabi/newpipe/extractor/StreamingService.java index 03e465b6c..841511a8f 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/StreamingService.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/StreamingService.java @@ -1,18 +1,24 @@ package org.schabi.newpipe.extractor; -import org.schabi.newpipe.extractor.channel.ChannelExtractor; -import org.schabi.newpipe.extractor.exceptions.ExtractionException; -import org.schabi.newpipe.extractor.exceptions.ParsingException; -import org.schabi.newpipe.extractor.kiosk.KioskList; -import org.schabi.newpipe.extractor.playlist.PlaylistExtractor; -import org.schabi.newpipe.extractor.search.SearchExtractor; -import org.schabi.newpipe.extractor.linkhandler.*; -import org.schabi.newpipe.extractor.stream.StreamExtractor; -import org.schabi.newpipe.extractor.subscription.SubscriptionExtractor; - import java.util.Collections; import java.util.List; +import org.schabi.newpipe.extractor.channel.ChannelExtractor; +import org.schabi.newpipe.extractor.comments.CommentsExtractor; +import org.schabi.newpipe.extractor.exceptions.ExtractionException; +import org.schabi.newpipe.extractor.exceptions.ParsingException; +import org.schabi.newpipe.extractor.kiosk.KioskList; +import org.schabi.newpipe.extractor.linkhandler.LinkHandler; +import org.schabi.newpipe.extractor.linkhandler.LinkHandlerFactory; +import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler; +import org.schabi.newpipe.extractor.linkhandler.ListLinkHandlerFactory; +import org.schabi.newpipe.extractor.linkhandler.SearchQueryHandler; +import org.schabi.newpipe.extractor.linkhandler.SearchQueryHandlerFactory; +import org.schabi.newpipe.extractor.playlist.PlaylistExtractor; +import org.schabi.newpipe.extractor.search.SearchExtractor; +import org.schabi.newpipe.extractor.stream.StreamExtractor; +import org.schabi.newpipe.extractor.subscription.SubscriptionExtractor; + public abstract class StreamingService { public static class ServiceInfo { private final String name; @@ -71,6 +77,7 @@ public abstract class StreamingService { public abstract ListLinkHandlerFactory getChannelLHFactory(); public abstract ListLinkHandlerFactory getPlaylistLHFactory(); public abstract SearchQueryHandlerFactory getSearchQHFactory(); + public abstract ListLinkHandlerFactory getCommentsLHFactory(); //////////////////////////////////////////// @@ -84,6 +91,7 @@ public abstract class StreamingService { public abstract ChannelExtractor getChannelExtractor(ListLinkHandler urlIdHandler) throws ExtractionException; public abstract PlaylistExtractor getPlaylistExtractor(ListLinkHandler urlIdHandler) throws ExtractionException; public abstract StreamExtractor getStreamExtractor(LinkHandler UIHFactory) throws ExtractionException; + public abstract CommentsExtractor getCommentsExtractor(ListLinkHandler urlIdHandler) throws ExtractionException; public SearchExtractor getSearchExtractor(String query, List contentFilter, String sortFilter, String contentCountry) throws ExtractionException { return getSearchExtractor(getSearchQHFactory().fromQuery(query, contentFilter, sortFilter), contentCountry); @@ -112,10 +120,16 @@ public abstract class StreamingService { public StreamExtractor getStreamExtractor(String url) throws ExtractionException { return getStreamExtractor(getStreamLHFactory().fromUrl(url)); } + + public CommentsExtractor getCommentsExtractor(String url) throws ExtractionException { + return getCommentsExtractor(getCommentsLHFactory().fromUrl(url)); + } - /** + + + /** * figure out where the link is pointing to (a channel, video, playlist, etc.) */ public final LinkType getLinkTypeByUrl(String url) throws ParsingException { diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsExtractor.java new file mode 100644 index 000000000..9f43e1af5 --- /dev/null +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsExtractor.java @@ -0,0 +1,14 @@ +package org.schabi.newpipe.extractor.comments; + +import org.schabi.newpipe.extractor.ListExtractor; +import org.schabi.newpipe.extractor.StreamingService; +import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler; + +public abstract class CommentsExtractor extends ListExtractor { + + public CommentsExtractor(StreamingService service, ListLinkHandler uiHandler) { + super(service, uiHandler); + // TODO Auto-generated constructor stub + } + +} diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfo.java b/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfo.java new file mode 100644 index 000000000..10ac85d5c --- /dev/null +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfo.java @@ -0,0 +1,27 @@ +package org.schabi.newpipe.extractor.comments; + +import java.io.IOException; + +import org.schabi.newpipe.extractor.ListInfo; +import org.schabi.newpipe.extractor.NewPipe; +import org.schabi.newpipe.extractor.StreamingService; +import org.schabi.newpipe.extractor.exceptions.ExtractionException; +import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler; + +public class CommentsInfo extends ListInfo{ + + private CommentsInfo(int serviceId, ListLinkHandler listUrlIdHandler, String name) { + super(serviceId, listUrlIdHandler, name); + // TODO Auto-generated constructor stub + } + + public static CommentsInfo getInfo(String url) throws IOException, ExtractionException { + return getInfo(NewPipe.getServiceByUrl(url), url); + } + + private static CommentsInfo getInfo(StreamingService serviceByUrl, String url) { + // TODO Auto-generated method stub + return null; + } + +} diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfoItem.java b/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfoItem.java new file mode 100644 index 000000000..87d54a0df --- /dev/null +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfoItem.java @@ -0,0 +1,76 @@ +package org.schabi.newpipe.extractor.comments; + +import org.schabi.newpipe.extractor.InfoItem; + +public class CommentsInfoItem extends InfoItem{ + + private String commentId; + private String commentText; + private String authorName; + private String authorThumbnail; + private String authorEndpoint; + private String publishedTime; + private Integer likeCount; + + public CommentsInfoItem(int serviceId, String url, String name) { + super(InfoType.COMMENT, serviceId, url, name); + // TODO Auto-generated constructor stub + } + + public String getCommentText() { + return commentText; + } + + public void setCommentText(String contentText) { + this.commentText = contentText; + } + + public String getAuthorName() { + return authorName; + } + + public void setAuthorName(String authorName) { + this.authorName = authorName; + } + + public String getAuthorThumbnail() { + return authorThumbnail; + } + + public void setAuthorThumbnail(String authorThumbnail) { + this.authorThumbnail = authorThumbnail; + } + + public String getAuthorEndpoint() { + return authorEndpoint; + } + + public void setAuthorEndpoint(String authorEndpoint) { + this.authorEndpoint = authorEndpoint; + } + + public String getPublishedTime() { + return publishedTime; + } + + public void setPublishedTime(String publishedTime) { + this.publishedTime = publishedTime; + } + + public Integer getLikeCount() { + return likeCount; + } + + public void setLikeCount(Integer likeCount) { + this.likeCount = likeCount; + } + + public String getCommentId() { + return commentId; + } + + public void setCommentId(String commentId) { + this.commentId = commentId; + } + +} diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfoItemExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfoItemExtractor.java new file mode 100644 index 000000000..32757e16d --- /dev/null +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfoItemExtractor.java @@ -0,0 +1,22 @@ +package org.schabi.newpipe.extractor.comments; + +import org.schabi.newpipe.extractor.InfoItemExtractor; +import org.schabi.newpipe.extractor.exceptions.ParsingException; + +public interface CommentsInfoItemExtractor extends InfoItemExtractor { + + String getCommentId() throws ParsingException; + + String getCommentText() throws ParsingException; + + String getAuthorName() throws ParsingException; + + String getAuthorThumbnail() throws ParsingException; + + String getAuthorEndpoint() throws ParsingException; + + String getPublishedTime() throws ParsingException; + + Integer getLikeCount() throws ParsingException; + +} diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfoItemsCollector.java b/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfoItemsCollector.java new file mode 100644 index 000000000..344ba8059 --- /dev/null +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfoItemsCollector.java @@ -0,0 +1,103 @@ +package org.schabi.newpipe.extractor.comments; + +import java.util.List; +import java.util.Vector; + +import org.schabi.newpipe.extractor.InfoItem; +import org.schabi.newpipe.extractor.InfoItemsCollector; +import org.schabi.newpipe.extractor.exceptions.ParsingException; + +/* + * Created by Christian Schabesberger on 28.02.16. + * + * Copyright (C) Christian Schabesberger 2016 + * CommentsInfoItemsCollector.java is part of NewPipe. + * + * NewPipe is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * NewPipe is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with NewPipe. If not, see . + */ + +public class CommentsInfoItemsCollector extends InfoItemsCollector { + + public CommentsInfoItemsCollector(int serviceId) { + super(serviceId); + } + + @Override + public CommentsInfoItem extract(CommentsInfoItemExtractor extractor) throws ParsingException { + + // important information + int serviceId = getServiceId(); + String url = extractor.getUrl(); + String name = extractor.getName(); + + CommentsInfoItem resultItem = new CommentsInfoItem(serviceId, url, name); + + // optional information + try { + resultItem.setCommentId(extractor.getCommentId()); + } catch (Exception e) { + addError(e); + } + try { + resultItem.setCommentText(extractor.getCommentText()); + } catch (Exception e) { + addError(e); + } + try { + resultItem.setAuthorName(extractor.getAuthorName()); + } catch (Exception e) { + addError(e); + } + try { + resultItem.setAuthorThumbnail(extractor.getAuthorThumbnail()); + } catch (Exception e) { + addError(e); + } + try { + resultItem.setAuthorEndpoint(extractor.getAuthorEndpoint()); + } catch (Exception e) { + addError(e); + } + try { + resultItem.setPublishedTime(extractor.getPublishedTime()); + } catch (Exception e) { + addError(e); + } + try { + resultItem.setLikeCount(extractor.getLikeCount()); + } catch (Exception e) { + addError(e); + } + return resultItem; + } + + @Override + public void commit(CommentsInfoItemExtractor extractor) { + try { + addItem(extract(extractor)); + } catch (Exception e) { + addError(e); + } + } + + public List getCommentsInfoItemList() { + List siiList = new Vector<>(); + for (InfoItem ii : super.getItems()) { + if (ii instanceof CommentsInfoItem) { + siiList.add((CommentsInfoItem) ii); + } + } + return siiList; + } +} diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudService.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudService.java index c533b951e..1d322f738 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudService.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudService.java @@ -3,6 +3,7 @@ package org.schabi.newpipe.extractor.services.soundcloud; import org.schabi.newpipe.extractor.*; import org.schabi.newpipe.extractor.linkhandler.*; import org.schabi.newpipe.extractor.channel.ChannelExtractor; +import org.schabi.newpipe.extractor.comments.CommentsExtractor; import org.schabi.newpipe.extractor.exceptions.ExtractionException; import org.schabi.newpipe.extractor.kiosk.KioskExtractor; import org.schabi.newpipe.extractor.kiosk.KioskList; @@ -98,4 +99,16 @@ public class SoundcloudService extends StreamingService { public SubscriptionExtractor getSubscriptionExtractor() { return new SoundcloudSubscriptionExtractor(this); } + + @Override + public ListLinkHandlerFactory getCommentsLHFactory() { + // TODO Auto-generated method stub + return null; + } + + @Override + public CommentsExtractor getCommentsExtractor(ListLinkHandler urlIdHandler) throws ExtractionException { + // TODO Auto-generated method stub + return null; + } } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeService.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeService.java index 51a68f0b1..f6bc03775 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeService.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeService.java @@ -1,21 +1,42 @@ package org.schabi.newpipe.extractor.services.youtube; -import org.schabi.newpipe.extractor.*; -import org.schabi.newpipe.extractor.linkhandler.*; +import static java.util.Arrays.asList; +import static org.schabi.newpipe.extractor.StreamingService.ServiceInfo.MediaCapability.AUDIO; +import static org.schabi.newpipe.extractor.StreamingService.ServiceInfo.MediaCapability.LIVE; +import static org.schabi.newpipe.extractor.StreamingService.ServiceInfo.MediaCapability.VIDEO; + +import org.schabi.newpipe.extractor.StreamingService; +import org.schabi.newpipe.extractor.SuggestionExtractor; import org.schabi.newpipe.extractor.channel.ChannelExtractor; +import org.schabi.newpipe.extractor.comments.CommentsExtractor; import org.schabi.newpipe.extractor.exceptions.ExtractionException; import org.schabi.newpipe.extractor.kiosk.KioskExtractor; import org.schabi.newpipe.extractor.kiosk.KioskList; +import org.schabi.newpipe.extractor.linkhandler.LinkHandler; +import org.schabi.newpipe.extractor.linkhandler.LinkHandlerFactory; +import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler; +import org.schabi.newpipe.extractor.linkhandler.ListLinkHandlerFactory; +import org.schabi.newpipe.extractor.linkhandler.SearchQueryHandler; +import org.schabi.newpipe.extractor.linkhandler.SearchQueryHandlerFactory; import org.schabi.newpipe.extractor.playlist.PlaylistExtractor; import org.schabi.newpipe.extractor.search.SearchExtractor; -import org.schabi.newpipe.extractor.services.youtube.extractors.*; -import org.schabi.newpipe.extractor.services.youtube.linkHandler.*; +import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeChannelExtractor; +import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeCommentsExtractor; +import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubePlaylistExtractor; +import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeSearchExtractor; +import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeStreamExtractor; +import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeSubscriptionExtractor; +import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeSuggestionExtractor; +import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeTrendingExtractor; +import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeChannelLinkHandlerFactory; +import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeCommentsLinkHandlerFactory; +import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubePlaylistLinkHandlerFactory; +import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeSearchQueryHandlerFactory; +import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeStreamLinkHandlerFactory; +import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeTrendingLinkHandlerFactory; import org.schabi.newpipe.extractor.stream.StreamExtractor; import org.schabi.newpipe.extractor.subscription.SubscriptionExtractor; -import static java.util.Arrays.asList; -import static org.schabi.newpipe.extractor.StreamingService.ServiceInfo.MediaCapability.*; - /* * Created by Christian Schabesberger on 23.08.15. @@ -115,4 +136,14 @@ public class YoutubeService extends StreamingService { return new YoutubeSubscriptionExtractor(this); } + @Override + public ListLinkHandlerFactory getCommentsLHFactory() { + return YoutubeCommentsLinkHandlerFactory.getInstance(); + } + + @Override + public CommentsExtractor getCommentsExtractor(ListLinkHandler urlIdHandler) throws ExtractionException { + return new YoutubeCommentsExtractor(this, urlIdHandler); + } + } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java new file mode 100644 index 000000000..ec1568e3b --- /dev/null +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java @@ -0,0 +1,262 @@ +package org.schabi.newpipe.extractor.services.youtube.extractors; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStreamReader; +import java.io.UnsupportedEncodingException; +import java.net.URL; +import java.net.URLEncoder; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import javax.net.ssl.HttpsURLConnection; + +import org.schabi.newpipe.extractor.DownloadResponse; +import org.schabi.newpipe.extractor.Downloader; +import org.schabi.newpipe.extractor.StreamingService; +import org.schabi.newpipe.extractor.comments.CommentsExtractor; +import org.schabi.newpipe.extractor.comments.CommentsInfoItem; +import org.schabi.newpipe.extractor.comments.CommentsInfoItemExtractor; +import org.schabi.newpipe.extractor.comments.CommentsInfoItemsCollector; +import org.schabi.newpipe.extractor.exceptions.ExtractionException; +import org.schabi.newpipe.extractor.exceptions.ParsingException; +import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler; + +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; + +public class YoutubeCommentsExtractor extends CommentsExtractor { + + private static final String USER_AGENT = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:61.0) Gecko/20100101 Firefox/61.0"; + + private List cookies; + private String sessionToken; + private String commentsToken; + + private ObjectMapper mapper = new ObjectMapper(); + + public YoutubeCommentsExtractor(StreamingService service, ListLinkHandler uiHandler) { + super(service, uiHandler); + // TODO Auto-generated constructor stub + } + + @Override + public InfoItemsPage getInitialPage() throws IOException, ExtractionException { + // initial page does not load any comments but is required to get session token + // and cookies + return getPage(getNextPageUrl()); + } + + @Override + public String getNextPageUrl() throws IOException, ExtractionException { + return getNextPageUrl(commentsToken); + } + + private String getNextPageUrl(JsonNode ajaxJson) throws IOException, ExtractionException { + String continuation; + try { + continuation = ajaxJson.findValue("itemSectionContinuation").get("continuations").findValue("continuation") + .asText(); + } catch (Exception e) { + // no more comments + return ""; + } + return getNextPageUrl(continuation); + } + + private String getNextPageUrl(String continuation) throws ParsingException { + Map params = new HashMap<>(); + params.put("action_get_comments", "1"); + params.put("pbj", "1"); + params.put("ctoken", continuation); + params.put("continuation", continuation); + try { + return "https://www.youtube.com/comment_service_ajax?" + getDataString(params); + } catch (UnsupportedEncodingException e) { + throw new ParsingException("Could not get next page url", e); + } + } + + @Override + public InfoItemsPage getPage(String pageUrl) throws IOException, ExtractionException { + if (pageUrl == null || pageUrl.isEmpty()) { + throw new ExtractionException(new IllegalArgumentException("Page url is empty or null")); + } + String ajaxResponse = makeAjaxRequest(pageUrl); + JsonNode ajaxJson = mapper.readTree(ajaxResponse); + CommentsInfoItemsCollector collector = new CommentsInfoItemsCollector(getServiceId()); + collectCommentsFrom(collector, ajaxJson, pageUrl); + return new InfoItemsPage<>(collector, getNextPageUrl(ajaxJson)); + } + + private void collectCommentsFrom(CommentsInfoItemsCollector collector, JsonNode ajaxJson, String pageUrl) { + List comments = ajaxJson.findValues("commentRenderer"); + comments.stream().forEach(c -> { + CommentsInfoItemExtractor extractor = new CommentsInfoItemExtractor() { + + @Override + public String getUrl() throws ParsingException { + return pageUrl; + } + + @Override + public String getThumbnailUrl() throws ParsingException { + try { + return c.get("authorThumbnail").get("thumbnails").get(0).get("url").asText(); + } catch (Exception e) { + throw new ParsingException("Could not get thumbnail url", e); + } + } + + @Override + public String getName() throws ParsingException { + try { + return c.get("authorText").get("simpleText").asText(); + } catch (Exception e) { + throw new ParsingException("Could not get thumbnail url", e); + } + } + + @Override + public String getPublishedTime() throws ParsingException { + try { + return c.get("publishedTimeText").get("runs").get(0).get("text").asText(); + } catch (Exception e) { + throw new ParsingException("Could not get thumbnail url", e); + } + } + + @Override + public Integer getLikeCount() throws ParsingException { + try { + return c.get("likeCount").intValue(); + } catch (Exception e) { + throw new ParsingException("Could not get thumbnail url", e); + } + } + + @Override + public String getCommentText() throws ParsingException { + try { + if (null != c.get("contentText").get("simpleText")) { + return c.get("contentText").get("simpleText").asText(); + } else { + return c.get("contentText").get("runs").get(0).get("text").asText(); + } + } catch (Exception e) { + throw new ParsingException("Could not get thumbnail url", e); + } + } + + @Override + public String getCommentId() throws ParsingException { + try { + return c.get("commentId").asText(); + } catch (Exception e) { + throw new ParsingException("Could not get thumbnail url", e); + } + } + + @Override + public String getAuthorThumbnail() throws ParsingException { + try { + return c.get("authorThumbnail").get("thumbnails").get(0).get("url").asText(); + } catch (Exception e) { + throw new ParsingException("Could not get thumbnail url", e); + } + } + + @Override + public String getAuthorName() throws ParsingException { + try { + return c.get("authorText").get("simpleText").asText(); + } catch (Exception e) { + throw new ParsingException("Could not get thumbnail url", e); + } + } + + @Override + public String getAuthorEndpoint() throws ParsingException { + try { + return "https://youtube.com" + + c.get("authorEndpoint").get("browseEndpoint").get("canonicalBaseUrl").asText(); + } catch (Exception e) { + throw new ParsingException("Could not get thumbnail url", e); + } + } + }; + + collector.commit(extractor); + }); + + } + + @Override + public void onFetchPage(Downloader downloader) throws IOException, ExtractionException { + DownloadResponse response = downloader.downloadWithHeaders(getUrl()); + String responseBody = response.getResponseBody(); + cookies = response.getResponseHeaders().get("Set-Cookie"); + sessionToken = findValue(responseBody, "XSRF_TOKEN"); + commentsToken = findValue(responseBody, "COMMENTS_TOKEN"); + } + + @Override + public String getName() throws ParsingException { + // TODO Auto-generated method stub + return null; + } + + private String makeAjaxRequest(String siteUrl) throws IOException { + + StringBuilder postData = new StringBuilder(); + postData.append(URLEncoder.encode("session_token", "UTF-8")); + postData.append('='); + postData.append(URLEncoder.encode(sessionToken, "UTF-8")); + byte[] postDataBytes = postData.toString().getBytes("UTF-8"); + + URL url = new URL(siteUrl); + HttpsURLConnection con = (HttpsURLConnection) url.openConnection(); + con.setRequestMethod("POST"); + con.setRequestProperty("Content-Type", "application/x-www-form-urlencoded"); + con.setRequestProperty("Content-Length", String.valueOf(postDataBytes.length)); + con.setRequestProperty("Accept", "*/*"); + con.setRequestProperty("User-Agent", USER_AGENT); + con.setRequestProperty("X-YouTube-Client-Version", "2.20180815"); + con.setRequestProperty("X-YouTube-Client-Name", "1"); + // set cookies + cookies.stream().forEach(c -> con.addRequestProperty("Cookie", c)); + con.setDoOutput(true); + con.getOutputStream().write(postDataBytes); + + BufferedReader in = new BufferedReader(new InputStreamReader(con.getInputStream(), "UTF-8")); + StringBuilder sb = new StringBuilder(); + String inputLine; + while ((inputLine = in.readLine()) != null) { + sb.append(inputLine); + } + return sb.toString(); + } + + private String getDataString(Map params) throws UnsupportedEncodingException { + StringBuilder result = new StringBuilder(); + boolean first = true; + for (Map.Entry entry : params.entrySet()) { + if (first) + first = false; + else + result.append("&"); + result.append(URLEncoder.encode(entry.getKey(), "UTF-8")); + result.append("="); + result.append(URLEncoder.encode(entry.getValue(), "UTF-8")); + } + return result.toString(); + } + + private String findValue(String doc, String key) { + int beginIndex = doc.indexOf(key) + key.length() + 4; + int endIndex = doc.indexOf("\"", beginIndex); + return doc.substring(beginIndex, endIndex); + } + +} diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeCommentsLinkHandlerFactory.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeCommentsLinkHandlerFactory.java new file mode 100644 index 000000000..fbdd63f1a --- /dev/null +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeCommentsLinkHandlerFactory.java @@ -0,0 +1,183 @@ +package org.schabi.newpipe.extractor.services.youtube.linkHandler; + +import org.schabi.newpipe.extractor.linkhandler.ListLinkHandlerFactory; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.schabi.newpipe.extractor.Downloader; +import org.schabi.newpipe.extractor.NewPipe; +import org.schabi.newpipe.extractor.exceptions.FoundAdException; +import org.schabi.newpipe.extractor.exceptions.ParsingException; +import org.schabi.newpipe.extractor.exceptions.ReCaptchaException; +import org.schabi.newpipe.extractor.utils.Parser; + +import java.io.IOException; +import java.io.UnsupportedEncodingException; +import java.net.URI; +import java.net.URISyntaxException; +import java.net.URLDecoder; +import java.util.List; + +/* + * Created by Christian Schabesberger on 25.07.16. + * + * Copyright (C) Christian Schabesberger 2018 + * YoutubeChannelLinkHandlerFactory.java is part of NewPipe. + * + * NewPipe is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * NewPipe is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with NewPipe. If not, see . + */ + +public class YoutubeCommentsLinkHandlerFactory extends ListLinkHandlerFactory { + + private static final YoutubeCommentsLinkHandlerFactory instance = new YoutubeCommentsLinkHandlerFactory(); + private static final String ID_PATTERN = "([\\-a-zA-Z0-9_]{11})"; + + public static YoutubeCommentsLinkHandlerFactory getInstance() { + return instance; + } + + @Override + public String getId(String url) throws ParsingException, IllegalArgumentException { + if (url.isEmpty()) { + throw new IllegalArgumentException("The url parameter should not be empty"); + } + + String id; + String lowercaseUrl = url.toLowerCase(); + if (lowercaseUrl.contains("youtube")) { + if (url.contains("attribution_link")) { + try { + String escapedQuery = Parser.matchGroup1("u=(.[^&|$]*)", url); + String query = URLDecoder.decode(escapedQuery, "UTF-8"); + id = Parser.matchGroup1("v=" + ID_PATTERN, query); + } catch (UnsupportedEncodingException uee) { + throw new ParsingException("Could not parse attribution_link", uee); + } + } else if (lowercaseUrl.contains("youtube.com/shared?ci=")) { + return getRealIdFromSharedLink(url); + } else if (url.contains("vnd.youtube")) { + id = Parser.matchGroup1(ID_PATTERN, url); + } else if (url.contains("embed")) { + id = Parser.matchGroup1("embed/" + ID_PATTERN, url); + } else if (url.contains("googleads")) { + throw new FoundAdException("Error found add: " + url); + } else { + id = Parser.matchGroup1("[?&]v=" + ID_PATTERN, url); + } + } else if (lowercaseUrl.contains("youtu.be")) { + if (url.contains("v=")) { + id = Parser.matchGroup1("v=" + ID_PATTERN, url); + } else { + id = Parser.matchGroup1("[Yy][Oo][Uu][Tt][Uu]\\.[Bb][Ee]/" + ID_PATTERN, url); + } + } else if(lowercaseUrl.contains("hooktube")) { + if(lowercaseUrl.contains("&v=") + || lowercaseUrl.contains("?v=")) { + id = Parser.matchGroup1("[?&]v=" + ID_PATTERN, url); + } else if (url.contains("/embed/")) { + id = Parser.matchGroup1("embed/" + ID_PATTERN, url); + } else if (url.contains("/v/")) { + id = Parser.matchGroup1("v/" + ID_PATTERN, url); + } else if (url.contains("/watch/")) { + id = Parser.matchGroup1("watch/" + ID_PATTERN, url); + } else { + throw new ParsingException("Error no suitable url: " + url); + } + } else { + throw new ParsingException("Error no suitable url: " + url); + } + + + if (!id.isEmpty()) { + return id; + } else { + throw new ParsingException("Error could not parse url: " + url); + } + } + + /** + * Get the real url from a shared uri. + *

+ * Shared URI's look like this: + *

+     *     * https://www.youtube.com/shared?ci=PJICrTByb3E
+     *     * vnd.youtube://www.youtube.com/shared?ci=PJICrTByb3E&feature=twitter-deep-link
+     * 
+ * + * @param url The shared url + * @return the id of the stream + * @throws ParsingException + */ + private String getRealIdFromSharedLink(String url) throws ParsingException { + URI uri; + try { + uri = new URI(url); + } catch (URISyntaxException e) { + throw new ParsingException("Invalid shared link", e); + } + String sharedId = getSharedId(uri); + Downloader downloader = NewPipe.getDownloader(); + String content; + try { + content = downloader.download("https://www.youtube.com/shared?ci=" + sharedId); + } catch (IOException | ReCaptchaException e) { + throw new ParsingException("Unable to resolve shared link", e); + } + final Document document = Jsoup.parse(content); + + final Element element = document.select("link[rel=\"canonical\"]").first(); + final String urlWithRealId = (element != null) + ? element.attr("abs:href") + : document.select("meta[property=\"og:url\"]").first() + .attr("abs:content"); + + String realId = Parser.matchGroup1(ID_PATTERN, urlWithRealId); + if (sharedId.equals(realId)) { + throw new ParsingException("Got same id for as shared info_id: " + sharedId); + } + return realId; + } + + private String getSharedId(URI uri) throws ParsingException { + if (!"/shared".equals(uri.getPath())) { + throw new ParsingException("Not a shared link: " + uri.toString() + " (path != " + uri.getPath() + ")"); + } + return Parser.matchGroup1("ci=" + ID_PATTERN, uri.getQuery()); + } + + @Override + public boolean onAcceptUrl(final String url) throws FoundAdException { + final String lowercaseUrl = url.toLowerCase(); + if (lowercaseUrl.contains("youtube") + || lowercaseUrl.contains("youtu.be") + || lowercaseUrl.contains("hooktube")) { + // bad programming I know + try { + getId(url); + return true; + } catch (FoundAdException fe) { + throw fe; + } catch (ParsingException e) { + return false; + } + } else { + return false; + } + } + + @Override + public String getUrl(String id, List contentFilter, String sortFilter) throws ParsingException { + return "https://www.youtube.com/watch?v=" + id; + } +} diff --git a/extractor/src/test/java/org/schabi/newpipe/Downloader.java b/extractor/src/test/java/org/schabi/newpipe/Downloader.java index e3bb6f68c..fb0e38ccc 100644 --- a/extractor/src/test/java/org/schabi/newpipe/Downloader.java +++ b/extractor/src/test/java/org/schabi/newpipe/Downloader.java @@ -1,16 +1,18 @@ package org.schabi.newpipe; -import org.schabi.newpipe.extractor.exceptions.ReCaptchaException; - -import javax.net.ssl.HttpsURLConnection; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStreamReader; import java.net.URL; import java.net.UnknownHostException; import java.util.HashMap; +import java.util.List; import java.util.Map; +import javax.net.ssl.HttpsURLConnection; + +import org.schabi.newpipe.extractor.DownloadResponse; +import org.schabi.newpipe.extractor.exceptions.ReCaptchaException; /* * Created by Christian Schabesberger on 28.01.16. @@ -34,124 +36,144 @@ import java.util.Map; public class Downloader implements org.schabi.newpipe.extractor.Downloader { - private static final String USER_AGENT = "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:43.0) Gecko/20100101 Firefox/43.0"; - private static String mCookies = ""; + private static final String USER_AGENT = "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:43.0) Gecko/20100101 Firefox/43.0"; + private static String mCookies = ""; - private static Downloader instance = null; + private static Downloader instance = null; - private Downloader() { - } + private Downloader() { + } - public static Downloader getInstance() { - if (instance == null) { - synchronized (Downloader.class) { - if (instance == null) { - instance = new Downloader(); - } - } - } - return instance; - } + public static Downloader getInstance() { + if (instance == null) { + synchronized (Downloader.class) { + if (instance == null) { + instance = new Downloader(); + } + } + } + return instance; + } - public static synchronized void setCookies(String cookies) { - Downloader.mCookies = cookies; - } + public static synchronized void setCookies(String cookies) { + Downloader.mCookies = cookies; + } - public static synchronized String getCookies() { - return Downloader.mCookies; - } + public static synchronized String getCookies() { + return Downloader.mCookies; + } - /** - * Download the text file at the supplied URL as in download(String), - * but set the HTTP header field "Accept-Language" to the supplied string. - * - * @param siteUrl the URL of the text file to return the contents of - * @param language the language (usually a 2-character code) to set as the preferred language - * @return the contents of the specified text file - */ - public String download(String siteUrl, String language) throws IOException, ReCaptchaException { - Map requestProperties = new HashMap<>(); - requestProperties.put("Accept-Language", language); - return download(siteUrl, requestProperties); - } + /** + * Download the text file at the supplied URL as in download(String), but set + * the HTTP header field "Accept-Language" to the supplied string. + * + * @param siteUrl the URL of the text file to return the contents of + * @param language the language (usually a 2-character code) to set as the + * preferred language + * @return the contents of the specified text file + */ + public String download(String siteUrl, String language) throws IOException, ReCaptchaException { + Map requestProperties = new HashMap<>(); + requestProperties.put("Accept-Language", language); + return download(siteUrl, requestProperties); + } + /** + * Download the text file at the supplied URL as in download(String), but set + * the HTTP header field "Accept-Language" to the supplied string. + * + * @param siteUrl the URL of the text file to return the contents of + * @param customProperties set request header properties + * @return the contents of the specified text file + * @throws IOException + */ + public String download(String siteUrl, Map customProperties) + throws IOException, ReCaptchaException { + URL url = new URL(siteUrl); + HttpsURLConnection con = (HttpsURLConnection) url.openConnection(); + for (Map.Entry pair : customProperties.entrySet()) { + con.setRequestProperty(pair.getKey(), pair.getValue()); + } + return dl(con); + } - /** - * Download the text file at the supplied URL as in download(String), - * but set the HTTP header field "Accept-Language" to the supplied string. - * - * @param siteUrl the URL of the text file to return the contents of - * @param customProperties set request header properties - * @return the contents of the specified text file - * @throws IOException - */ - public String download(String siteUrl, Map customProperties) throws IOException, ReCaptchaException { - URL url = new URL(siteUrl); - HttpsURLConnection con = (HttpsURLConnection) url.openConnection(); - for (Map.Entry pair: customProperties.entrySet()) { - con.setRequestProperty(pair.getKey(), pair.getValue()); - } - return dl(con); - } + /** + * Common functionality between download(String url) and download(String url, + * String language) + */ + private static String dl(HttpsURLConnection con) throws IOException, ReCaptchaException { + StringBuilder response = new StringBuilder(); + BufferedReader in = null; - /** - * Common functionality between download(String url) and download(String url, String language) - */ - private static String dl(HttpsURLConnection con) throws IOException, ReCaptchaException { - StringBuilder response = new StringBuilder(); - BufferedReader in = null; + try { + con.setConnectTimeout(30 * 1000);// 30s + con.setReadTimeout(30 * 1000);// 30s + con.setRequestMethod("GET"); + con.setRequestProperty("User-Agent", USER_AGENT); - try { - con.setConnectTimeout(30 * 1000);// 30s - con.setReadTimeout(30 * 1000);// 30s - con.setRequestMethod("GET"); - con.setRequestProperty("User-Agent", USER_AGENT); + if (getCookies().length() > 0) { + con.addRequestProperty("Cookie", getCookies()); + } - if (getCookies().length() > 0) { - con.setRequestProperty("Cookie", getCookies()); - } + in = new BufferedReader(new InputStreamReader(con.getInputStream())); + String inputLine; - in = new BufferedReader( - new InputStreamReader(con.getInputStream())); - String inputLine; + while ((inputLine = in.readLine()) != null) { + response.append(inputLine); + } + } catch (UnknownHostException uhe) {// thrown when there's no internet connection + throw new IOException("unknown host or no network", uhe); + // Toast.makeText(getActivity(), uhe.getMessage(), Toast.LENGTH_LONG).show(); + } catch (Exception e) { + /* + * HTTP 429 == Too Many Request Receive from Youtube.com = ReCaptcha challenge + * request See : https://github.com/rg3/youtube-dl/issues/5138 + */ + if (con.getResponseCode() == 429) { + throw new ReCaptchaException("reCaptcha Challenge requested"); + } - while ((inputLine = in.readLine()) != null) { - response.append(inputLine); - } - } catch (UnknownHostException uhe) {//thrown when there's no internet connection - throw new IOException("unknown host or no network", uhe); - //Toast.makeText(getActivity(), uhe.getMessage(), Toast.LENGTH_LONG).show(); - } catch (Exception e) { - /* - * HTTP 429 == Too Many Request - * Receive from Youtube.com = ReCaptcha challenge request - * See : https://github.com/rg3/youtube-dl/issues/5138 - */ - if (con.getResponseCode() == 429) { - throw new ReCaptchaException("reCaptcha Challenge requested"); - } + throw new IOException(con.getResponseCode() + " " + con.getResponseMessage(), e); + } finally { + if (in != null) { + in.close(); + } + } - throw new IOException(con.getResponseCode() + " " + con.getResponseMessage(), e); - } finally { - if (in != null) { - in.close(); - } - } + return response.toString(); + } - return response.toString(); - } + /** + * Download (via HTTP) the text file located at the supplied URL, and return its + * contents. Primarily intended for downloading web pages. + * + * @param siteUrl the URL of the text file to download + * @return the contents of the specified text file + */ + public String download(String siteUrl) throws IOException, ReCaptchaException { + URL url = new URL(siteUrl); + HttpsURLConnection con = (HttpsURLConnection) url.openConnection(); + // HttpsURLConnection con = NetCipher.getHttpsURLConnection(url); + return dl(con); + } - /** - * Download (via HTTP) the text file located at the supplied URL, and return its contents. - * Primarily intended for downloading web pages. - * - * @param siteUrl the URL of the text file to download - * @return the contents of the specified text file - */ - public String download(String siteUrl) throws IOException, ReCaptchaException { - URL url = new URL(siteUrl); - HttpsURLConnection con = (HttpsURLConnection) url.openConnection(); - //HttpsURLConnection con = NetCipher.getHttpsURLConnection(url); - return dl(con); - } + @Override + public DownloadResponse downloadWithHeaders(String siteUrl, Map> requestHeaders) + throws IOException, ReCaptchaException { + URL url = new URL(siteUrl); + HttpsURLConnection con = (HttpsURLConnection) url.openConnection(); + for (Map.Entry> pair : requestHeaders.entrySet()) { + pair.getValue().stream().forEach(value -> con.addRequestProperty(pair.getKey(), value)); + } + String responseBody = dl(con); + return new DownloadResponse(responseBody, con.getHeaderFields()); + } + + @Override + public DownloadResponse downloadWithHeaders(String siteUrl) throws IOException, ReCaptchaException { + URL url = new URL(siteUrl); + HttpsURLConnection con = (HttpsURLConnection) url.openConnection(); + String responseBody = dl(con); + return new DownloadResponse(responseBody, con.getHeaderFields()); + } } diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeCommentsExtractorTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeCommentsExtractorTest.java new file mode 100644 index 000000000..468c12ce5 --- /dev/null +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeCommentsExtractorTest.java @@ -0,0 +1,46 @@ +package org.schabi.newpipe.extractor.services.youtube; + +import static org.junit.Assert.assertTrue; +import static org.schabi.newpipe.extractor.ServiceList.YouTube; + +import java.io.IOException; + +import org.junit.BeforeClass; +import org.junit.Test; +import org.schabi.newpipe.Downloader; +import org.schabi.newpipe.extractor.ListExtractor.InfoItemsPage; +import org.schabi.newpipe.extractor.NewPipe; +import org.schabi.newpipe.extractor.comments.CommentsInfoItem; +import org.schabi.newpipe.extractor.exceptions.ExtractionException; +import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeCommentsExtractor; + +public class YoutubeCommentsExtractorTest { + + private static YoutubeCommentsExtractor extractor; + + @BeforeClass + public static void setUp() throws Exception { + NewPipe.init(Downloader.getInstance()); + extractor = (YoutubeCommentsExtractor) YouTube + .getCommentsExtractor("https://www.youtube.com/watch?v=rrgFN3AxGfs"); + extractor.fetchPage(); + } + + @Test + public void testGetComments() throws IOException, ExtractionException { + boolean result = false; + InfoItemsPage comments = extractor.getInitialPage(); + result = findInComments(comments, "i should really be in the top comment.lol"); + + while (comments.hasNextPage()) { + comments = extractor.getPage(comments.getNextPageUrl()); + result = findInComments(comments, "i should really be in the top comment.lol"); + } + + assertTrue(result); + } + + private boolean findInComments(InfoItemsPage comments, String comment) { + return comments.getItems().stream().filter(c -> c.getCommentText().contains(comment)).findAny().isPresent(); + } +} From 8b8779b1762fa5896487f5381d188dbbdcf6d979 Mon Sep 17 00:00:00 2001 From: Ritvik Saraf <13ritvik@gmail.com> Date: Mon, 20 Aug 2018 04:58:33 +0530 Subject: [PATCH 02/31] removing catch exception --- .../extractors/YoutubeCommentsExtractor.java | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java index ec1568e3b..994f19332 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java @@ -9,6 +9,7 @@ import java.net.URLEncoder; import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Optional; import javax.net.ssl.HttpsURLConnection; @@ -54,15 +55,15 @@ public class YoutubeCommentsExtractor extends CommentsExtractor { } private String getNextPageUrl(JsonNode ajaxJson) throws IOException, ExtractionException { - String continuation; - try { - continuation = ajaxJson.findValue("itemSectionContinuation").get("continuations").findValue("continuation") - .asText(); - } catch (Exception e) { + Optional element = Optional.ofNullable(ajaxJson.findValue("itemSectionContinuation")) + .map(e -> e.get("continuations")).map(e -> e.findValue("continuation")); + + if (element.isPresent()) { + return getNextPageUrl(element.get().asText()); + } else { // no more comments return ""; } - return getNextPageUrl(continuation); } private String getNextPageUrl(String continuation) throws ParsingException { From 95575756eed4f7972729cfe476ab9e41ce87a19b Mon Sep 17 00:00:00 2001 From: Ritvik Saraf <13ritvik@gmail.com> Date: Sun, 2 Sep 2018 05:06:26 +0530 Subject: [PATCH 03/31] added http post method in downloader, formatting --- .../newpipe/extractor/DownloadResponse.java | 30 +- .../schabi/newpipe/extractor/Downloader.java | 69 +-- .../comments/CommentsInfoItemExtractor.java | 22 +- .../comments/CommentsInfoItemsCollector.java | 20 - .../extractors/YoutubeCommentsExtractor.java | 395 +++++++++--------- .../YoutubeCommentsLinkHandlerFactory.java | 40 +- .../java/org/schabi/newpipe/Downloader.java | 286 +++++++------ .../youtube/YoutubeCommentsExtractorTest.java | 44 +- 8 files changed, 442 insertions(+), 464 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/DownloadResponse.java b/extractor/src/main/java/org/schabi/newpipe/extractor/DownloadResponse.java index 8f91c8914..64fc2ade1 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/DownloadResponse.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/DownloadResponse.java @@ -4,23 +4,21 @@ import java.util.List; import java.util.Map; public class DownloadResponse { - private final String responseBody; - private final Map> responseHeaders; - - + private final String responseBody; + private final Map> responseHeaders; - public DownloadResponse(String responseBody, Map> headers) { - super(); - this.responseBody = responseBody; - this.responseHeaders = headers; - } + public DownloadResponse(String responseBody, Map> headers) { + super(); + this.responseBody = responseBody; + this.responseHeaders = headers; + } - public String getResponseBody() { - return responseBody; - } + public String getResponseBody() { + return responseBody; + } + + public Map> getResponseHeaders() { + return responseHeaders; + } - public Map> getResponseHeaders() { - return responseHeaders; - } - } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/Downloader.java b/extractor/src/main/java/org/schabi/newpipe/extractor/Downloader.java index 93d0a7c23..f0b6692d4 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/Downloader.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/Downloader.java @@ -28,41 +28,44 @@ import org.schabi.newpipe.extractor.exceptions.ReCaptchaException; public interface Downloader { - /** - * Download the text file at the supplied URL as in download(String), but set - * the HTTP header field "Accept-Language" to the supplied string. - * - * @param siteUrl the URL of the text file to return the contents of - * @param language the language (usually a 2-character code) to set as the - * preferred language - * @return the contents of the specified text file - * @throws IOException - */ - String download(String siteUrl, String language) throws IOException, ReCaptchaException; + /** + * Download the text file at the supplied URL as in download(String), but set + * the HTTP header field "Accept-Language" to the supplied string. + * + * @param siteUrl the URL of the text file to return the contents of + * @param language the language (usually a 2-character code) to set as the + * preferred language + * @return the contents of the specified text file + * @throws IOException + */ + String download(String siteUrl, String language) throws IOException, ReCaptchaException; - /** - * Download the text file at the supplied URL as in download(String), but set - * the HTTP header field "Accept-Language" to the supplied string. - * - * @param siteUrl the URL of the text file to return the contents of - * @param customProperties set request header properties - * @return the contents of the specified text file - * @throws IOException - */ - String download(String siteUrl, Map customProperties) throws IOException, ReCaptchaException; + /** + * Download the text file at the supplied URL as in download(String), but set + * the HTTP header field "Accept-Language" to the supplied string. + * + * @param siteUrl the URL of the text file to return the contents of + * @param customProperties set request header properties + * @return the contents of the specified text file + * @throws IOException + */ + String download(String siteUrl, Map customProperties) throws IOException, ReCaptchaException; - /** - * Download (via HTTP) the text file located at the supplied URL, and return its - * contents. Primarily intended for downloading web pages. - * - * @param siteUrl the URL of the text file to download - * @return the contents of the specified text file - * @throws IOException - */ - String download(String siteUrl) throws IOException, ReCaptchaException; + /** + * Download (via HTTP) the text file located at the supplied URL, and return its + * contents. Primarily intended for downloading web pages. + * + * @param siteUrl the URL of the text file to download + * @return the contents of the specified text file + * @throws IOException + */ + String download(String siteUrl) throws IOException, ReCaptchaException; - DownloadResponse downloadWithHeaders(String siteUrl, Map> requestHeaders) - throws IOException, ReCaptchaException; + DownloadResponse get(String siteUrl, Map> requestHeaders) + throws IOException, ReCaptchaException; - DownloadResponse downloadWithHeaders(String siteUrl) throws IOException, ReCaptchaException; + DownloadResponse get(String siteUrl) throws IOException, ReCaptchaException; + + DownloadResponse post(String siteUrl, String requestBody, Map> requestHeaders) + throws IOException, ReCaptchaException; } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfoItemExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfoItemExtractor.java index 32757e16d..b9905c2a7 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfoItemExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfoItemExtractor.java @@ -4,19 +4,11 @@ import org.schabi.newpipe.extractor.InfoItemExtractor; import org.schabi.newpipe.extractor.exceptions.ParsingException; public interface CommentsInfoItemExtractor extends InfoItemExtractor { - - String getCommentId() throws ParsingException; - - String getCommentText() throws ParsingException; - - String getAuthorName() throws ParsingException; - - String getAuthorThumbnail() throws ParsingException; - - String getAuthorEndpoint() throws ParsingException; - - String getPublishedTime() throws ParsingException; - - Integer getLikeCount() throws ParsingException; - + String getCommentId() throws ParsingException; + String getCommentText() throws ParsingException; + String getAuthorName() throws ParsingException; + String getAuthorThumbnail() throws ParsingException; + String getAuthorEndpoint() throws ParsingException; + String getPublishedTime() throws ParsingException; + Integer getLikeCount() throws ParsingException; } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfoItemsCollector.java b/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfoItemsCollector.java index 344ba8059..6bc925302 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfoItemsCollector.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfoItemsCollector.java @@ -7,26 +7,6 @@ import org.schabi.newpipe.extractor.InfoItem; import org.schabi.newpipe.extractor.InfoItemsCollector; import org.schabi.newpipe.extractor.exceptions.ParsingException; -/* - * Created by Christian Schabesberger on 28.02.16. - * - * Copyright (C) Christian Schabesberger 2016 - * CommentsInfoItemsCollector.java is part of NewPipe. - * - * NewPipe is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * NewPipe is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with NewPipe. If not, see . - */ - public class CommentsInfoItemsCollector extends InfoItemsCollector { public CommentsInfoItemsCollector(int serviceId) { diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java index 994f19332..defd579b3 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java @@ -1,20 +1,17 @@ package org.schabi.newpipe.extractor.services.youtube.extractors; -import java.io.BufferedReader; import java.io.IOException; -import java.io.InputStreamReader; import java.io.UnsupportedEncodingException; -import java.net.URL; import java.net.URLEncoder; +import java.util.Arrays; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Optional; -import javax.net.ssl.HttpsURLConnection; - import org.schabi.newpipe.extractor.DownloadResponse; import org.schabi.newpipe.extractor.Downloader; +import org.schabi.newpipe.extractor.NewPipe; import org.schabi.newpipe.extractor.StreamingService; import org.schabi.newpipe.extractor.comments.CommentsExtractor; import org.schabi.newpipe.extractor.comments.CommentsInfoItem; @@ -22,6 +19,7 @@ import org.schabi.newpipe.extractor.comments.CommentsInfoItemExtractor; import org.schabi.newpipe.extractor.comments.CommentsInfoItemsCollector; import org.schabi.newpipe.extractor.exceptions.ExtractionException; import org.schabi.newpipe.extractor.exceptions.ParsingException; +import org.schabi.newpipe.extractor.exceptions.ReCaptchaException; import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler; import com.fasterxml.jackson.databind.JsonNode; @@ -29,235 +27,222 @@ import com.fasterxml.jackson.databind.ObjectMapper; public class YoutubeCommentsExtractor extends CommentsExtractor { - private static final String USER_AGENT = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:61.0) Gecko/20100101 Firefox/61.0"; + private static final String USER_AGENT = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:61.0) Gecko/20100101 Firefox/61.0"; - private List cookies; - private String sessionToken; - private String commentsToken; + private List cookies; + private String sessionToken; + private String commentsToken; - private ObjectMapper mapper = new ObjectMapper(); + private ObjectMapper mapper = new ObjectMapper(); - public YoutubeCommentsExtractor(StreamingService service, ListLinkHandler uiHandler) { - super(service, uiHandler); - // TODO Auto-generated constructor stub - } + public YoutubeCommentsExtractor(StreamingService service, ListLinkHandler uiHandler) { + super(service, uiHandler); + // TODO Auto-generated constructor stub + } - @Override - public InfoItemsPage getInitialPage() throws IOException, ExtractionException { - // initial page does not load any comments but is required to get session token - // and cookies - return getPage(getNextPageUrl()); - } + @Override + public InfoItemsPage getInitialPage() throws IOException, ExtractionException { + // initial page does not load any comments but is required to get session token + // and cookies + return getPage(getNextPageUrl()); + } - @Override - public String getNextPageUrl() throws IOException, ExtractionException { - return getNextPageUrl(commentsToken); - } + @Override + public String getNextPageUrl() throws IOException, ExtractionException { + return getNextPageUrl(commentsToken); + } - private String getNextPageUrl(JsonNode ajaxJson) throws IOException, ExtractionException { - Optional element = Optional.ofNullable(ajaxJson.findValue("itemSectionContinuation")) - .map(e -> e.get("continuations")).map(e -> e.findValue("continuation")); + private String getNextPageUrl(JsonNode ajaxJson) throws IOException, ExtractionException { + Optional element = Optional.ofNullable(ajaxJson.findValue("itemSectionContinuation")) + .map(e -> e.get("continuations")).map(e -> e.findValue("continuation")); - if (element.isPresent()) { - return getNextPageUrl(element.get().asText()); - } else { - // no more comments - return ""; - } - } + if (element.isPresent()) { + return getNextPageUrl(element.get().asText()); + } else { + // no more comments + return ""; + } + } - private String getNextPageUrl(String continuation) throws ParsingException { - Map params = new HashMap<>(); - params.put("action_get_comments", "1"); - params.put("pbj", "1"); - params.put("ctoken", continuation); - params.put("continuation", continuation); - try { - return "https://www.youtube.com/comment_service_ajax?" + getDataString(params); - } catch (UnsupportedEncodingException e) { - throw new ParsingException("Could not get next page url", e); - } - } + private String getNextPageUrl(String continuation) throws ParsingException { + Map params = new HashMap<>(); + params.put("action_get_comments", "1"); + params.put("pbj", "1"); + params.put("ctoken", continuation); + params.put("continuation", continuation); + try { + return "https://www.youtube.com/comment_service_ajax?" + getDataString(params); + } catch (UnsupportedEncodingException e) { + throw new ParsingException("Could not get next page url", e); + } + } - @Override - public InfoItemsPage getPage(String pageUrl) throws IOException, ExtractionException { - if (pageUrl == null || pageUrl.isEmpty()) { - throw new ExtractionException(new IllegalArgumentException("Page url is empty or null")); - } - String ajaxResponse = makeAjaxRequest(pageUrl); - JsonNode ajaxJson = mapper.readTree(ajaxResponse); - CommentsInfoItemsCollector collector = new CommentsInfoItemsCollector(getServiceId()); - collectCommentsFrom(collector, ajaxJson, pageUrl); - return new InfoItemsPage<>(collector, getNextPageUrl(ajaxJson)); - } + @Override + public InfoItemsPage getPage(String pageUrl) throws IOException, ExtractionException { + if (pageUrl == null || pageUrl.isEmpty()) { + throw new ExtractionException(new IllegalArgumentException("Page url is empty or null")); + } + String ajaxResponse = makeAjaxRequest(pageUrl); + JsonNode ajaxJson = mapper.readTree(ajaxResponse); + CommentsInfoItemsCollector collector = new CommentsInfoItemsCollector(getServiceId()); + collectCommentsFrom(collector, ajaxJson, pageUrl); + return new InfoItemsPage<>(collector, getNextPageUrl(ajaxJson)); + } - private void collectCommentsFrom(CommentsInfoItemsCollector collector, JsonNode ajaxJson, String pageUrl) { - List comments = ajaxJson.findValues("commentRenderer"); - comments.stream().forEach(c -> { - CommentsInfoItemExtractor extractor = new CommentsInfoItemExtractor() { + private void collectCommentsFrom(CommentsInfoItemsCollector collector, JsonNode ajaxJson, String pageUrl) { + List comments = ajaxJson.findValues("commentRenderer"); + comments.stream().forEach(c -> { + CommentsInfoItemExtractor extractor = new CommentsInfoItemExtractor() { - @Override - public String getUrl() throws ParsingException { - return pageUrl; - } + @Override + public String getUrl() throws ParsingException { + return pageUrl; + } - @Override - public String getThumbnailUrl() throws ParsingException { - try { - return c.get("authorThumbnail").get("thumbnails").get(0).get("url").asText(); - } catch (Exception e) { - throw new ParsingException("Could not get thumbnail url", e); - } - } + @Override + public String getThumbnailUrl() throws ParsingException { + try { + return c.get("authorThumbnail").get("thumbnails").get(0).get("url").asText(); + } catch (Exception e) { + throw new ParsingException("Could not get thumbnail url", e); + } + } - @Override - public String getName() throws ParsingException { - try { - return c.get("authorText").get("simpleText").asText(); - } catch (Exception e) { - throw new ParsingException("Could not get thumbnail url", e); - } - } + @Override + public String getName() throws ParsingException { + try { + return c.get("authorText").get("simpleText").asText(); + } catch (Exception e) { + throw new ParsingException("Could not get thumbnail url", e); + } + } - @Override - public String getPublishedTime() throws ParsingException { - try { - return c.get("publishedTimeText").get("runs").get(0).get("text").asText(); - } catch (Exception e) { - throw new ParsingException("Could not get thumbnail url", e); - } - } + @Override + public String getPublishedTime() throws ParsingException { + try { + return c.get("publishedTimeText").get("runs").get(0).get("text").asText(); + } catch (Exception e) { + throw new ParsingException("Could not get thumbnail url", e); + } + } - @Override - public Integer getLikeCount() throws ParsingException { - try { - return c.get("likeCount").intValue(); - } catch (Exception e) { - throw new ParsingException("Could not get thumbnail url", e); - } - } + @Override + public Integer getLikeCount() throws ParsingException { + try { + return c.get("likeCount").intValue(); + } catch (Exception e) { + throw new ParsingException("Could not get thumbnail url", e); + } + } - @Override - public String getCommentText() throws ParsingException { - try { - if (null != c.get("contentText").get("simpleText")) { - return c.get("contentText").get("simpleText").asText(); - } else { - return c.get("contentText").get("runs").get(0).get("text").asText(); - } - } catch (Exception e) { - throw new ParsingException("Could not get thumbnail url", e); - } - } + @Override + public String getCommentText() throws ParsingException { + try { + if (null != c.get("contentText").get("simpleText")) { + return c.get("contentText").get("simpleText").asText(); + } else { + return c.get("contentText").get("runs").get(0).get("text").asText(); + } + } catch (Exception e) { + throw new ParsingException("Could not get thumbnail url", e); + } + } - @Override - public String getCommentId() throws ParsingException { - try { - return c.get("commentId").asText(); - } catch (Exception e) { - throw new ParsingException("Could not get thumbnail url", e); - } - } + @Override + public String getCommentId() throws ParsingException { + try { + return c.get("commentId").asText(); + } catch (Exception e) { + throw new ParsingException("Could not get thumbnail url", e); + } + } - @Override - public String getAuthorThumbnail() throws ParsingException { - try { - return c.get("authorThumbnail").get("thumbnails").get(0).get("url").asText(); - } catch (Exception e) { - throw new ParsingException("Could not get thumbnail url", e); - } - } + @Override + public String getAuthorThumbnail() throws ParsingException { + try { + return c.get("authorThumbnail").get("thumbnails").get(0).get("url").asText(); + } catch (Exception e) { + throw new ParsingException("Could not get thumbnail url", e); + } + } - @Override - public String getAuthorName() throws ParsingException { - try { - return c.get("authorText").get("simpleText").asText(); - } catch (Exception e) { - throw new ParsingException("Could not get thumbnail url", e); - } - } + @Override + public String getAuthorName() throws ParsingException { + try { + return c.get("authorText").get("simpleText").asText(); + } catch (Exception e) { + throw new ParsingException("Could not get thumbnail url", e); + } + } - @Override - public String getAuthorEndpoint() throws ParsingException { - try { - return "https://youtube.com" - + c.get("authorEndpoint").get("browseEndpoint").get("canonicalBaseUrl").asText(); - } catch (Exception e) { - throw new ParsingException("Could not get thumbnail url", e); - } - } - }; + @Override + public String getAuthorEndpoint() throws ParsingException { + try { + return "https://youtube.com" + + c.get("authorEndpoint").get("browseEndpoint").get("canonicalBaseUrl").asText(); + } catch (Exception e) { + throw new ParsingException("Could not get thumbnail url", e); + } + } + }; - collector.commit(extractor); - }); + collector.commit(extractor); + }); - } + } - @Override - public void onFetchPage(Downloader downloader) throws IOException, ExtractionException { - DownloadResponse response = downloader.downloadWithHeaders(getUrl()); - String responseBody = response.getResponseBody(); - cookies = response.getResponseHeaders().get("Set-Cookie"); - sessionToken = findValue(responseBody, "XSRF_TOKEN"); - commentsToken = findValue(responseBody, "COMMENTS_TOKEN"); - } + @Override + public void onFetchPage(Downloader downloader) throws IOException, ExtractionException { + DownloadResponse response = downloader.get(getUrl()); + String responseBody = response.getResponseBody(); + cookies = response.getResponseHeaders().get("Set-Cookie"); + sessionToken = findValue(responseBody, "XSRF_TOKEN"); + commentsToken = findValue(responseBody, "COMMENTS_TOKEN"); + } - @Override - public String getName() throws ParsingException { - // TODO Auto-generated method stub - return null; - } + @Override + public String getName() throws ParsingException { + // TODO Auto-generated method stub + return null; + } - private String makeAjaxRequest(String siteUrl) throws IOException { + private String makeAjaxRequest(String siteUrl) throws IOException, ReCaptchaException { - StringBuilder postData = new StringBuilder(); - postData.append(URLEncoder.encode("session_token", "UTF-8")); - postData.append('='); - postData.append(URLEncoder.encode(sessionToken, "UTF-8")); - byte[] postDataBytes = postData.toString().getBytes("UTF-8"); + StringBuilder postData = new StringBuilder(); + postData.append(URLEncoder.encode("session_token", "UTF-8")); + postData.append('='); + postData.append(URLEncoder.encode(sessionToken, "UTF-8")); - URL url = new URL(siteUrl); - HttpsURLConnection con = (HttpsURLConnection) url.openConnection(); - con.setRequestMethod("POST"); - con.setRequestProperty("Content-Type", "application/x-www-form-urlencoded"); - con.setRequestProperty("Content-Length", String.valueOf(postDataBytes.length)); - con.setRequestProperty("Accept", "*/*"); - con.setRequestProperty("User-Agent", USER_AGENT); - con.setRequestProperty("X-YouTube-Client-Version", "2.20180815"); - con.setRequestProperty("X-YouTube-Client-Name", "1"); - // set cookies - cookies.stream().forEach(c -> con.addRequestProperty("Cookie", c)); - con.setDoOutput(true); - con.getOutputStream().write(postDataBytes); + Map> requestHeaders = new HashMap<>(); + requestHeaders.put("Content-Type", Arrays.asList("application/x-www-form-urlencoded")); + requestHeaders.put("Accept", Arrays.asList("*/*")); + requestHeaders.put("User-Agent", Arrays.asList(USER_AGENT)); + requestHeaders.put("X-YouTube-Client-Version", Arrays.asList("2.20180815")); + requestHeaders.put("X-YouTube-Client-Name", Arrays.asList("1")); + requestHeaders.put("Cookie", cookies); - BufferedReader in = new BufferedReader(new InputStreamReader(con.getInputStream(), "UTF-8")); - StringBuilder sb = new StringBuilder(); - String inputLine; - while ((inputLine = in.readLine()) != null) { - sb.append(inputLine); - } - return sb.toString(); - } + return NewPipe.getDownloader().post(siteUrl, postData.toString(), requestHeaders).getResponseBody(); + } - private String getDataString(Map params) throws UnsupportedEncodingException { - StringBuilder result = new StringBuilder(); - boolean first = true; - for (Map.Entry entry : params.entrySet()) { - if (first) - first = false; - else - result.append("&"); - result.append(URLEncoder.encode(entry.getKey(), "UTF-8")); - result.append("="); - result.append(URLEncoder.encode(entry.getValue(), "UTF-8")); - } - return result.toString(); - } + private String getDataString(Map params) throws UnsupportedEncodingException { + StringBuilder result = new StringBuilder(); + boolean first = true; + for (Map.Entry entry : params.entrySet()) { + if (first) + first = false; + else + result.append("&"); + result.append(URLEncoder.encode(entry.getKey(), "UTF-8")); + result.append("="); + result.append(URLEncoder.encode(entry.getValue(), "UTF-8")); + } + return result.toString(); + } - private String findValue(String doc, String key) { - int beginIndex = doc.indexOf(key) + key.length() + 4; - int endIndex = doc.indexOf("\"", beginIndex); - return doc.substring(beginIndex, endIndex); - } + private String findValue(String doc, String key) { + int beginIndex = doc.indexOf(key) + key.length() + 4; + int endIndex = doc.indexOf("\"", beginIndex); + return doc.substring(beginIndex, endIndex); + } } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeCommentsLinkHandlerFactory.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeCommentsLinkHandlerFactory.java index fbdd63f1a..cfd4645dd 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeCommentsLinkHandlerFactory.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeCommentsLinkHandlerFactory.java @@ -1,16 +1,5 @@ package org.schabi.newpipe.extractor.services.youtube.linkHandler; -import org.schabi.newpipe.extractor.linkhandler.ListLinkHandlerFactory; -import org.jsoup.Jsoup; -import org.jsoup.nodes.Document; -import org.jsoup.nodes.Element; -import org.schabi.newpipe.extractor.Downloader; -import org.schabi.newpipe.extractor.NewPipe; -import org.schabi.newpipe.extractor.exceptions.FoundAdException; -import org.schabi.newpipe.extractor.exceptions.ParsingException; -import org.schabi.newpipe.extractor.exceptions.ReCaptchaException; -import org.schabi.newpipe.extractor.utils.Parser; - import java.io.IOException; import java.io.UnsupportedEncodingException; import java.net.URI; @@ -18,25 +7,16 @@ import java.net.URISyntaxException; import java.net.URLDecoder; import java.util.List; -/* - * Created by Christian Schabesberger on 25.07.16. - * - * Copyright (C) Christian Schabesberger 2018 - * YoutubeChannelLinkHandlerFactory.java is part of NewPipe. - * - * NewPipe is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * NewPipe is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with NewPipe. If not, see . - */ +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.schabi.newpipe.extractor.Downloader; +import org.schabi.newpipe.extractor.NewPipe; +import org.schabi.newpipe.extractor.exceptions.FoundAdException; +import org.schabi.newpipe.extractor.exceptions.ParsingException; +import org.schabi.newpipe.extractor.exceptions.ReCaptchaException; +import org.schabi.newpipe.extractor.linkhandler.ListLinkHandlerFactory; +import org.schabi.newpipe.extractor.utils.Parser; public class YoutubeCommentsLinkHandlerFactory extends ListLinkHandlerFactory { diff --git a/extractor/src/test/java/org/schabi/newpipe/Downloader.java b/extractor/src/test/java/org/schabi/newpipe/Downloader.java index fb0e38ccc..3ec65419d 100644 --- a/extractor/src/test/java/org/schabi/newpipe/Downloader.java +++ b/extractor/src/test/java/org/schabi/newpipe/Downloader.java @@ -36,144 +36,184 @@ import org.schabi.newpipe.extractor.exceptions.ReCaptchaException; public class Downloader implements org.schabi.newpipe.extractor.Downloader { - private static final String USER_AGENT = "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:43.0) Gecko/20100101 Firefox/43.0"; - private static String mCookies = ""; + private static final String USER_AGENT = "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:43.0) Gecko/20100101 Firefox/43.0"; + private static String mCookies = ""; - private static Downloader instance = null; + private static Downloader instance = null; - private Downloader() { - } + private Downloader() { + } - public static Downloader getInstance() { - if (instance == null) { - synchronized (Downloader.class) { - if (instance == null) { - instance = new Downloader(); - } - } - } - return instance; - } + public static Downloader getInstance() { + if (instance == null) { + synchronized (Downloader.class) { + if (instance == null) { + instance = new Downloader(); + } + } + } + return instance; + } - public static synchronized void setCookies(String cookies) { - Downloader.mCookies = cookies; - } + public static synchronized void setCookies(String cookies) { + Downloader.mCookies = cookies; + } - public static synchronized String getCookies() { - return Downloader.mCookies; - } + public static synchronized String getCookies() { + return Downloader.mCookies; + } - /** - * Download the text file at the supplied URL as in download(String), but set - * the HTTP header field "Accept-Language" to the supplied string. - * - * @param siteUrl the URL of the text file to return the contents of - * @param language the language (usually a 2-character code) to set as the - * preferred language - * @return the contents of the specified text file - */ - public String download(String siteUrl, String language) throws IOException, ReCaptchaException { - Map requestProperties = new HashMap<>(); - requestProperties.put("Accept-Language", language); - return download(siteUrl, requestProperties); - } + /** + * Download the text file at the supplied URL as in download(String), but set + * the HTTP header field "Accept-Language" to the supplied string. + * + * @param siteUrl the URL of the text file to return the contents of + * @param language the language (usually a 2-character code) to set as the + * preferred language + * @return the contents of the specified text file + */ + public String download(String siteUrl, String language) throws IOException, ReCaptchaException { + Map requestProperties = new HashMap<>(); + requestProperties.put("Accept-Language", language); + return download(siteUrl, requestProperties); + } - /** - * Download the text file at the supplied URL as in download(String), but set - * the HTTP header field "Accept-Language" to the supplied string. - * - * @param siteUrl the URL of the text file to return the contents of - * @param customProperties set request header properties - * @return the contents of the specified text file - * @throws IOException - */ - public String download(String siteUrl, Map customProperties) - throws IOException, ReCaptchaException { - URL url = new URL(siteUrl); - HttpsURLConnection con = (HttpsURLConnection) url.openConnection(); - for (Map.Entry pair : customProperties.entrySet()) { - con.setRequestProperty(pair.getKey(), pair.getValue()); - } - return dl(con); - } + /** + * Download the text file at the supplied URL as in download(String), but set + * the HTTP header field "Accept-Language" to the supplied string. + * + * @param siteUrl the URL of the text file to return the contents of + * @param customProperties set request header properties + * @return the contents of the specified text file + * @throws IOException + */ + public String download(String siteUrl, Map customProperties) + throws IOException, ReCaptchaException { + URL url = new URL(siteUrl); + HttpsURLConnection con = (HttpsURLConnection) url.openConnection(); + for (Map.Entry pair : customProperties.entrySet()) { + con.setRequestProperty(pair.getKey(), pair.getValue()); + } + return dl(con); + } - /** - * Common functionality between download(String url) and download(String url, - * String language) - */ - private static String dl(HttpsURLConnection con) throws IOException, ReCaptchaException { - StringBuilder response = new StringBuilder(); - BufferedReader in = null; + /** + * Common functionality between download(String url) and download(String url, + * String language) + */ + private static String dl(HttpsURLConnection con) throws IOException, ReCaptchaException { + StringBuilder response = new StringBuilder(); + BufferedReader in = null; - try { - con.setConnectTimeout(30 * 1000);// 30s - con.setReadTimeout(30 * 1000);// 30s - con.setRequestMethod("GET"); - con.setRequestProperty("User-Agent", USER_AGENT); + try { - if (getCookies().length() > 0) { - con.addRequestProperty("Cookie", getCookies()); - } + con.setRequestMethod("GET"); + setDefaults(con); - in = new BufferedReader(new InputStreamReader(con.getInputStream())); - String inputLine; + in = new BufferedReader(new InputStreamReader(con.getInputStream())); + String inputLine; - while ((inputLine = in.readLine()) != null) { - response.append(inputLine); - } - } catch (UnknownHostException uhe) {// thrown when there's no internet connection - throw new IOException("unknown host or no network", uhe); - // Toast.makeText(getActivity(), uhe.getMessage(), Toast.LENGTH_LONG).show(); - } catch (Exception e) { - /* - * HTTP 429 == Too Many Request Receive from Youtube.com = ReCaptcha challenge - * request See : https://github.com/rg3/youtube-dl/issues/5138 - */ - if (con.getResponseCode() == 429) { - throw new ReCaptchaException("reCaptcha Challenge requested"); - } + while ((inputLine = in.readLine()) != null) { + response.append(inputLine); + } + } catch (UnknownHostException uhe) {// thrown when there's no internet + // connection + throw new IOException("unknown host or no network", uhe); + // Toast.makeText(getActivity(), uhe.getMessage(), + // Toast.LENGTH_LONG).show(); + } catch (Exception e) { + /* + * HTTP 429 == Too Many Request Receive from Youtube.com = ReCaptcha challenge + * request See : https://github.com/rg3/youtube-dl/issues/5138 + */ + if (con.getResponseCode() == 429) { + throw new ReCaptchaException("reCaptcha Challenge requested"); + } - throw new IOException(con.getResponseCode() + " " + con.getResponseMessage(), e); - } finally { - if (in != null) { - in.close(); - } - } + throw new IOException(con.getResponseCode() + " " + con.getResponseMessage(), e); + } finally { + if (in != null) { + in.close(); + } + } - return response.toString(); - } + return response.toString(); + } - /** - * Download (via HTTP) the text file located at the supplied URL, and return its - * contents. Primarily intended for downloading web pages. - * - * @param siteUrl the URL of the text file to download - * @return the contents of the specified text file - */ - public String download(String siteUrl) throws IOException, ReCaptchaException { - URL url = new URL(siteUrl); - HttpsURLConnection con = (HttpsURLConnection) url.openConnection(); - // HttpsURLConnection con = NetCipher.getHttpsURLConnection(url); - return dl(con); - } + private static void setDefaults(HttpsURLConnection con) { - @Override - public DownloadResponse downloadWithHeaders(String siteUrl, Map> requestHeaders) - throws IOException, ReCaptchaException { - URL url = new URL(siteUrl); - HttpsURLConnection con = (HttpsURLConnection) url.openConnection(); - for (Map.Entry> pair : requestHeaders.entrySet()) { - pair.getValue().stream().forEach(value -> con.addRequestProperty(pair.getKey(), value)); - } - String responseBody = dl(con); - return new DownloadResponse(responseBody, con.getHeaderFields()); - } + con.setConnectTimeout(30 * 1000);// 30s + con.setReadTimeout(30 * 1000);// 30s - @Override - public DownloadResponse downloadWithHeaders(String siteUrl) throws IOException, ReCaptchaException { - URL url = new URL(siteUrl); - HttpsURLConnection con = (HttpsURLConnection) url.openConnection(); - String responseBody = dl(con); - return new DownloadResponse(responseBody, con.getHeaderFields()); - } + // set default user agent + if (null == con.getRequestProperty("User-Agent")) { + con.setRequestProperty("User-Agent", USER_AGENT); + } + + // add default cookies + if (getCookies().length() > 0) { + con.addRequestProperty("Cookie", getCookies()); + } + } + + /** + * Download (via HTTP) the text file located at the supplied URL, and return its + * contents. Primarily intended for downloading web pages. + * + * @param siteUrl the URL of the text file to download + * @return the contents of the specified text file + */ + public String download(String siteUrl) throws IOException, ReCaptchaException { + URL url = new URL(siteUrl); + HttpsURLConnection con = (HttpsURLConnection) url.openConnection(); + // HttpsURLConnection con = NetCipher.getHttpsURLConnection(url); + return dl(con); + } + + @Override + public DownloadResponse get(String siteUrl, Map> requestHeaders) + throws IOException, ReCaptchaException { + URL url = new URL(siteUrl); + HttpsURLConnection con = (HttpsURLConnection) url.openConnection(); + for (Map.Entry> pair : requestHeaders.entrySet()) { + pair.getValue().stream().forEach(value -> con.addRequestProperty(pair.getKey(), value)); + } + String responseBody = dl(con); + return new DownloadResponse(responseBody, con.getHeaderFields()); + } + + @Override + public DownloadResponse get(String siteUrl) throws IOException, ReCaptchaException { + URL url = new URL(siteUrl); + HttpsURLConnection con = (HttpsURLConnection) url.openConnection(); + String responseBody = dl(con); + return new DownloadResponse(responseBody, con.getHeaderFields()); + } + + @Override + public DownloadResponse post(String siteUrl, String requestBody, Map> requestHeaders) + throws IOException, ReCaptchaException { + URL url = new URL(siteUrl); + HttpsURLConnection con = (HttpsURLConnection) url.openConnection(); + con.setRequestMethod("POST"); + for (Map.Entry> pair : requestHeaders.entrySet()) { + pair.getValue().stream().forEach(value -> con.addRequestProperty(pair.getKey(), value)); + } + // set fields to default if not set already + setDefaults(con); + + byte[] postDataBytes = requestBody.toString().getBytes("UTF-8"); + con.setRequestProperty("Content-Length", String.valueOf(postDataBytes.length)); + + con.setDoOutput(true); + con.getOutputStream().write(postDataBytes); + + BufferedReader in = new BufferedReader(new InputStreamReader(con.getInputStream())); + StringBuilder sb = new StringBuilder(); + String inputLine; + while ((inputLine = in.readLine()) != null) { + sb.append(inputLine); + } + return new DownloadResponse(sb.toString(), con.getHeaderFields()); + } } diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeCommentsExtractorTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeCommentsExtractorTest.java index 468c12ce5..c5789e7ff 100644 --- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeCommentsExtractorTest.java +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeCommentsExtractorTest.java @@ -16,31 +16,31 @@ import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeCommentsE public class YoutubeCommentsExtractorTest { - private static YoutubeCommentsExtractor extractor; + private static YoutubeCommentsExtractor extractor; - @BeforeClass - public static void setUp() throws Exception { - NewPipe.init(Downloader.getInstance()); - extractor = (YoutubeCommentsExtractor) YouTube - .getCommentsExtractor("https://www.youtube.com/watch?v=rrgFN3AxGfs"); - extractor.fetchPage(); - } + @BeforeClass + public static void setUp() throws Exception { + NewPipe.init(Downloader.getInstance()); + extractor = (YoutubeCommentsExtractor) YouTube + .getCommentsExtractor("https://www.youtube.com/watch?v=rrgFN3AxGfs"); + extractor.fetchPage(); + } - @Test - public void testGetComments() throws IOException, ExtractionException { - boolean result = false; - InfoItemsPage comments = extractor.getInitialPage(); - result = findInComments(comments, "i should really be in the top comment.lol"); + @Test + public void testGetComments() throws IOException, ExtractionException { + boolean result = false; + InfoItemsPage comments = extractor.getInitialPage(); + result = findInComments(comments, "i should really be in the top comment.lol"); - while (comments.hasNextPage()) { - comments = extractor.getPage(comments.getNextPageUrl()); - result = findInComments(comments, "i should really be in the top comment.lol"); - } + while (comments.hasNextPage()) { + comments = extractor.getPage(comments.getNextPageUrl()); + result = findInComments(comments, "i should really be in the top comment.lol"); + } - assertTrue(result); - } + assertTrue(result); + } - private boolean findInComments(InfoItemsPage comments, String comment) { - return comments.getItems().stream().filter(c -> c.getCommentText().contains(comment)).findAny().isPresent(); - } + private boolean findInComments(InfoItemsPage comments, String comment) { + return comments.getItems().stream().filter(c -> c.getCommentText().contains(comment)).findAny().isPresent(); + } } From 06898f47b26549f9d2784fca8dc246df52b66059 Mon Sep 17 00:00:00 2001 From: Ritvik Saraf <13ritvik@gmail.com> Date: Sun, 2 Sep 2018 05:32:21 +0530 Subject: [PATCH 04/31] close resource --- .../src/test/java/org/schabi/newpipe/Downloader.java | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/extractor/src/test/java/org/schabi/newpipe/Downloader.java b/extractor/src/test/java/org/schabi/newpipe/Downloader.java index 3ec65419d..e79a74205 100644 --- a/extractor/src/test/java/org/schabi/newpipe/Downloader.java +++ b/extractor/src/test/java/org/schabi/newpipe/Downloader.java @@ -208,11 +208,12 @@ public class Downloader implements org.schabi.newpipe.extractor.Downloader { con.setDoOutput(true); con.getOutputStream().write(postDataBytes); - BufferedReader in = new BufferedReader(new InputStreamReader(con.getInputStream())); StringBuilder sb = new StringBuilder(); - String inputLine; - while ((inputLine = in.readLine()) != null) { - sb.append(inputLine); + try (BufferedReader in = new BufferedReader(new InputStreamReader(con.getInputStream()))) { + String inputLine; + while ((inputLine = in.readLine()) != null) { + sb.append(inputLine); + } } return new DownloadResponse(sb.toString(), con.getHeaderFields()); } From f62f147ea0489cc63a48516ef1a0aa0b500fda19 Mon Sep 17 00:00:00 2001 From: Ritvik Saraf <13ritvik@gmail.com> Date: Sun, 2 Sep 2018 17:21:00 +0530 Subject: [PATCH 05/31] added comments to stream info --- .../extractors/YoutubeCommentsExtractor.java | 1 + .../newpipe/extractor/stream/StreamInfo.java | 28 +++++++++++++++++++ .../extractor/utils/ExtractorHelper.java | 2 ++ .../java/org/schabi/newpipe/Downloader.java | 6 ++-- .../youtube/YoutubeCommentsExtractorTest.java | 1 - 5 files changed, 33 insertions(+), 5 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java index defd579b3..b203ed4b6 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java @@ -44,6 +44,7 @@ public class YoutubeCommentsExtractor extends CommentsExtractor { public InfoItemsPage getInitialPage() throws IOException, ExtractionException { // initial page does not load any comments but is required to get session token // and cookies + super.fetchPage(); return getPage(getNextPageUrl()); } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/stream/StreamInfo.java b/extractor/src/main/java/org/schabi/newpipe/extractor/stream/StreamInfo.java index b6da4076d..74cb190fa 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/stream/StreamInfo.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/stream/StreamInfo.java @@ -1,6 +1,10 @@ package org.schabi.newpipe.extractor.stream; import org.schabi.newpipe.extractor.*; +import org.schabi.newpipe.extractor.ListExtractor.InfoItemsPage; +import org.schabi.newpipe.extractor.comments.CommentsExtractor; +import org.schabi.newpipe.extractor.comments.CommentsInfo; +import org.schabi.newpipe.extractor.comments.CommentsInfoItem; import org.schabi.newpipe.extractor.exceptions.ContentNotAvailableException; import org.schabi.newpipe.extractor.exceptions.ExtractionException; import org.schabi.newpipe.extractor.utils.DashMpdParser; @@ -247,6 +251,19 @@ public class StreamInfo extends Info { } streamInfo.setRelatedStreams(ExtractorHelper.getRelatedVideosOrLogError(streamInfo, extractor)); + + CommentsExtractor commentsExtractor = null; + try { + commentsExtractor = NewPipe.getService(streamInfo.getServiceId()).getCommentsExtractor(streamInfo.getUrl()); + } catch (ExtractionException e) { + streamInfo.addError(e); + } + + if(null != commentsExtractor) { + InfoItemsPage initialCommentsPage = ExtractorHelper.getItemsPageOrLogError(streamInfo, commentsExtractor); + streamInfo.setComments(initialCommentsPage.getItems()); + } + return streamInfo; } @@ -273,6 +290,7 @@ public class StreamInfo extends Info { private String hlsUrl; private StreamInfoItem nextVideo; private List relatedStreams; + private List comments; private long startPosition = 0; private List subtitles; @@ -470,4 +488,14 @@ public class StreamInfo extends Info { this.subtitles = subtitles; } + public List getComments() { + return comments; + } + + public void setComments(List comments) { + this.comments = comments; + } + + + } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/utils/ExtractorHelper.java b/extractor/src/main/java/org/schabi/newpipe/extractor/utils/ExtractorHelper.java index bad595933..a25e9051d 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/utils/ExtractorHelper.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/utils/ExtractorHelper.java @@ -5,6 +5,7 @@ import org.schabi.newpipe.extractor.InfoItem; import org.schabi.newpipe.extractor.InfoItemsCollector; import org.schabi.newpipe.extractor.ListExtractor; import org.schabi.newpipe.extractor.ListExtractor.InfoItemsPage; +import org.schabi.newpipe.extractor.comments.CommentsInfo; import org.schabi.newpipe.extractor.stream.StreamExtractor; import org.schabi.newpipe.extractor.stream.StreamInfo; @@ -39,4 +40,5 @@ public class ExtractorHelper { return Collections.emptyList(); } } + } diff --git a/extractor/src/test/java/org/schabi/newpipe/Downloader.java b/extractor/src/test/java/org/schabi/newpipe/Downloader.java index e79a74205..385d896b1 100644 --- a/extractor/src/test/java/org/schabi/newpipe/Downloader.java +++ b/extractor/src/test/java/org/schabi/newpipe/Downloader.java @@ -5,6 +5,7 @@ import java.io.IOException; import java.io.InputStreamReader; import java.net.URL; import java.net.UnknownHostException; +import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -184,10 +185,7 @@ public class Downloader implements org.schabi.newpipe.extractor.Downloader { @Override public DownloadResponse get(String siteUrl) throws IOException, ReCaptchaException { - URL url = new URL(siteUrl); - HttpsURLConnection con = (HttpsURLConnection) url.openConnection(); - String responseBody = dl(con); - return new DownloadResponse(responseBody, con.getHeaderFields()); + return get(siteUrl, Collections.emptyMap()); } @Override diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeCommentsExtractorTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeCommentsExtractorTest.java index c5789e7ff..56bfceda4 100644 --- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeCommentsExtractorTest.java +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeCommentsExtractorTest.java @@ -23,7 +23,6 @@ public class YoutubeCommentsExtractorTest { NewPipe.init(Downloader.getInstance()); extractor = (YoutubeCommentsExtractor) YouTube .getCommentsExtractor("https://www.youtube.com/watch?v=rrgFN3AxGfs"); - extractor.fetchPage(); } @Test From 9fb0622a2422485da75549755aa8022aa5b91367 Mon Sep 17 00:00:00 2001 From: Ritvik Saraf <13ritvik@gmail.com> Date: Mon, 3 Sep 2018 00:11:40 +0530 Subject: [PATCH 06/31] better quality thumbnails --- .../comments/CommentsInfoItemsCollector.java | 136 +++++++++--------- .../extractors/YoutubeCommentsExtractor.java | 20 +-- .../newpipe/extractor/stream/StreamInfo.java | 15 +- 3 files changed, 90 insertions(+), 81 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfoItemsCollector.java b/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfoItemsCollector.java index 6bc925302..bf3bc3225 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfoItemsCollector.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfoItemsCollector.java @@ -9,75 +9,81 @@ import org.schabi.newpipe.extractor.exceptions.ParsingException; public class CommentsInfoItemsCollector extends InfoItemsCollector { - public CommentsInfoItemsCollector(int serviceId) { - super(serviceId); - } + public CommentsInfoItemsCollector(int serviceId) { + super(serviceId); + } - @Override - public CommentsInfoItem extract(CommentsInfoItemExtractor extractor) throws ParsingException { + @Override + public CommentsInfoItem extract(CommentsInfoItemExtractor extractor) throws ParsingException { - // important information - int serviceId = getServiceId(); - String url = extractor.getUrl(); - String name = extractor.getName(); + // important information + int serviceId = getServiceId(); + String url = extractor.getUrl(); + String name = extractor.getName(); - CommentsInfoItem resultItem = new CommentsInfoItem(serviceId, url, name); + CommentsInfoItem resultItem = new CommentsInfoItem(serviceId, url, name); - // optional information - try { - resultItem.setCommentId(extractor.getCommentId()); - } catch (Exception e) { - addError(e); - } - try { - resultItem.setCommentText(extractor.getCommentText()); - } catch (Exception e) { - addError(e); - } - try { - resultItem.setAuthorName(extractor.getAuthorName()); - } catch (Exception e) { - addError(e); - } - try { - resultItem.setAuthorThumbnail(extractor.getAuthorThumbnail()); - } catch (Exception e) { - addError(e); - } - try { - resultItem.setAuthorEndpoint(extractor.getAuthorEndpoint()); - } catch (Exception e) { - addError(e); - } - try { - resultItem.setPublishedTime(extractor.getPublishedTime()); - } catch (Exception e) { - addError(e); - } - try { - resultItem.setLikeCount(extractor.getLikeCount()); - } catch (Exception e) { - addError(e); - } - return resultItem; - } + // optional information + try { + resultItem.setCommentId(extractor.getCommentId()); + } catch (Exception e) { + addError(e); + } + try { + resultItem.setCommentText(extractor.getCommentText()); + } catch (Exception e) { + addError(e); + } + try { + resultItem.setAuthorName(extractor.getAuthorName()); + } catch (Exception e) { + addError(e); + } + try { + resultItem.setAuthorThumbnail(extractor.getAuthorThumbnail()); + } catch (Exception e) { + addError(e); + } + try { + resultItem.setAuthorEndpoint(extractor.getAuthorEndpoint()); + } catch (Exception e) { + addError(e); + } + try { + resultItem.setPublishedTime(extractor.getPublishedTime()); + } catch (Exception e) { + addError(e); + } + try { + resultItem.setLikeCount(extractor.getLikeCount()); + } catch (Exception e) { + addError(e); + } + try { + resultItem.setThumbnailUrl(extractor.getThumbnailUrl()); + } catch (Exception e) { + addError(e); + } - @Override - public void commit(CommentsInfoItemExtractor extractor) { - try { - addItem(extract(extractor)); - } catch (Exception e) { - addError(e); - } - } + return resultItem; + } - public List getCommentsInfoItemList() { - List siiList = new Vector<>(); - for (InfoItem ii : super.getItems()) { - if (ii instanceof CommentsInfoItem) { - siiList.add((CommentsInfoItem) ii); - } - } - return siiList; - } + @Override + public void commit(CommentsInfoItemExtractor extractor) { + try { + addItem(extract(extractor)); + } catch (Exception e) { + addError(e); + } + } + + public List getCommentsInfoItemList() { + List siiList = new Vector<>(); + for (InfoItem ii : super.getItems()) { + if (ii instanceof CommentsInfoItem) { + siiList.add((CommentsInfoItem) ii); + } + } + return siiList; + } } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java index b203ed4b6..10e1beea7 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java @@ -103,7 +103,7 @@ public class YoutubeCommentsExtractor extends CommentsExtractor { @Override public String getThumbnailUrl() throws ParsingException { try { - return c.get("authorThumbnail").get("thumbnails").get(0).get("url").asText(); + return c.get("authorThumbnail").get("thumbnails").get(2).get("url").asText(); } catch (Exception e) { throw new ParsingException("Could not get thumbnail url", e); } @@ -114,7 +114,7 @@ public class YoutubeCommentsExtractor extends CommentsExtractor { try { return c.get("authorText").get("simpleText").asText(); } catch (Exception e) { - throw new ParsingException("Could not get thumbnail url", e); + throw new ParsingException("Could not get author name", e); } } @@ -123,7 +123,7 @@ public class YoutubeCommentsExtractor extends CommentsExtractor { try { return c.get("publishedTimeText").get("runs").get(0).get("text").asText(); } catch (Exception e) { - throw new ParsingException("Could not get thumbnail url", e); + throw new ParsingException("Could not get publishedTimeText", e); } } @@ -132,7 +132,7 @@ public class YoutubeCommentsExtractor extends CommentsExtractor { try { return c.get("likeCount").intValue(); } catch (Exception e) { - throw new ParsingException("Could not get thumbnail url", e); + throw new ParsingException("Could not get like count", e); } } @@ -145,7 +145,7 @@ public class YoutubeCommentsExtractor extends CommentsExtractor { return c.get("contentText").get("runs").get(0).get("text").asText(); } } catch (Exception e) { - throw new ParsingException("Could not get thumbnail url", e); + throw new ParsingException("Could not get comment text", e); } } @@ -154,16 +154,16 @@ public class YoutubeCommentsExtractor extends CommentsExtractor { try { return c.get("commentId").asText(); } catch (Exception e) { - throw new ParsingException("Could not get thumbnail url", e); + throw new ParsingException("Could not get comment id", e); } } @Override public String getAuthorThumbnail() throws ParsingException { try { - return c.get("authorThumbnail").get("thumbnails").get(0).get("url").asText(); + return c.get("authorThumbnail").get("thumbnails").get(2).get("url").asText(); } catch (Exception e) { - throw new ParsingException("Could not get thumbnail url", e); + throw new ParsingException("Could not get author thumbnail", e); } } @@ -172,7 +172,7 @@ public class YoutubeCommentsExtractor extends CommentsExtractor { try { return c.get("authorText").get("simpleText").asText(); } catch (Exception e) { - throw new ParsingException("Could not get thumbnail url", e); + throw new ParsingException("Could not get author name", e); } } @@ -182,7 +182,7 @@ public class YoutubeCommentsExtractor extends CommentsExtractor { return "https://youtube.com" + c.get("authorEndpoint").get("browseEndpoint").get("canonicalBaseUrl").asText(); } catch (Exception e) { - throw new ParsingException("Could not get thumbnail url", e); + throw new ParsingException("Could not get author endpoint", e); } } }; diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/stream/StreamInfo.java b/extractor/src/main/java/org/schabi/newpipe/extractor/stream/StreamInfo.java index 74cb190fa..aa507fcaa 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/stream/StreamInfo.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/stream/StreamInfo.java @@ -1,19 +1,22 @@ package org.schabi.newpipe.extractor.stream; -import org.schabi.newpipe.extractor.*; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import org.schabi.newpipe.extractor.Info; +import org.schabi.newpipe.extractor.InfoItem; import org.schabi.newpipe.extractor.ListExtractor.InfoItemsPage; +import org.schabi.newpipe.extractor.NewPipe; +import org.schabi.newpipe.extractor.StreamingService; +import org.schabi.newpipe.extractor.Subtitles; import org.schabi.newpipe.extractor.comments.CommentsExtractor; -import org.schabi.newpipe.extractor.comments.CommentsInfo; import org.schabi.newpipe.extractor.comments.CommentsInfoItem; import org.schabi.newpipe.extractor.exceptions.ContentNotAvailableException; import org.schabi.newpipe.extractor.exceptions.ExtractionException; import org.schabi.newpipe.extractor.utils.DashMpdParser; import org.schabi.newpipe.extractor.utils.ExtractorHelper; -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; - /* * Created by Christian Schabesberger on 26.08.15. * From 823551170df43e7bf113dfebfad291c1676f55a5 Mon Sep 17 00:00:00 2001 From: Ritvik Saraf <13ritvik@gmail.com> Date: Mon, 3 Sep 2018 02:24:03 +0530 Subject: [PATCH 07/31] added loadable comments in stream info --- .../newpipe/extractor/stream/StreamInfo.java | 129 +++++++++++++----- .../youtube/YoutubeCommentsExtractorTest.java | 25 +++- 2 files changed, 116 insertions(+), 38 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/stream/StreamInfo.java b/extractor/src/main/java/org/schabi/newpipe/extractor/stream/StreamInfo.java index aa507fcaa..a69b55f52 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/stream/StreamInfo.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/stream/StreamInfo.java @@ -48,7 +48,8 @@ public class StreamInfo extends Info { } } - public StreamInfo(int serviceId, String url, String originalUrl, StreamType streamType, String id, String name, int ageLimit) { + public StreamInfo(int serviceId, String url, String originalUrl, StreamType streamType, String id, String name, + int ageLimit) { super(serviceId, id, url, originalUrl, name); this.streamType = streamType; this.ageLimit = ageLimit; @@ -70,9 +71,12 @@ public class StreamInfo extends Info { streamInfo = extractStreams(streamInfo, extractor); streamInfo = extractOptionalData(streamInfo, extractor); } catch (ExtractionException e) { - // Currently YouTube does not distinguish between age restricted videos and videos blocked - // by country. This means that during the initialisation of the extractor, the extractor - // will assume that a video is age restricted while in reality it it blocked by country. + // Currently YouTube does not distinguish between age restricted videos and + // videos blocked + // by country. This means that during the initialisation of the extractor, the + // extractor + // will assume that a video is age restricted while in reality it it blocked by + // country. // // We will now detect whether the video is blocked by country or not. String errorMsg = extractor.getErrorMessage(); @@ -89,7 +93,8 @@ public class StreamInfo extends Info { private static StreamInfo extractImportantData(StreamExtractor extractor) throws ExtractionException { /* ---- important data, without the video can't be displayed goes here: ---- */ - // if one of these is not available an exception is meant to be thrown directly into the frontend. + // if one of these is not available an exception is meant to be thrown directly + // into the frontend. int serviceId = extractor.getServiceId(); String url = extractor.getUrl(); @@ -99,18 +104,16 @@ public class StreamInfo extends Info { String name = extractor.getName(); int ageLimit = extractor.getAgeLimit(); - if ((streamType == StreamType.NONE) - || (url == null || url.isEmpty()) - || (id == null || id.isEmpty()) - || (name == null /* streamInfo.title can be empty of course */) - || (ageLimit == -1)) { + if ((streamType == StreamType.NONE) || (url == null || url.isEmpty()) || (id == null || id.isEmpty()) + || (name == null /* streamInfo.title can be empty of course */) || (ageLimit == -1)) { throw new ExtractionException("Some important stream information was not given."); } return new StreamInfo(serviceId, url, originalUrl, streamType, id, name, ageLimit); } - private static StreamInfo extractStreams(StreamInfo streamInfo, StreamExtractor extractor) throws ExtractionException { + private static StreamInfo extractStreams(StreamInfo streamInfo, StreamExtractor extractor) + throws ExtractionException { /* ---- stream extraction goes here ---- */ // At least one type of stream has to be available, // otherwise an exception will be thrown directly into the frontend. @@ -127,19 +130,19 @@ public class StreamInfo extends Info { streamInfo.addError(new ExtractionException("Couldn't get HLS manifest", e)); } - /* Load and extract audio */ + /* Load and extract audio */ try { streamInfo.setAudioStreams(extractor.getAudioStreams()); } catch (Exception e) { streamInfo.addError(new ExtractionException("Couldn't get audio streams", e)); } - /* Extract video stream url*/ + /* Extract video stream url */ try { streamInfo.setVideoStreams(extractor.getVideoStreams()); } catch (Exception e) { streamInfo.addError(new ExtractionException("Couldn't get video streams", e)); } - /* Extract video only stream url*/ + /* Extract video only stream url */ try { streamInfo.setVideoOnlyStreams(extractor.getVideoOnlyStreams()); } catch (Exception e) { @@ -147,9 +150,12 @@ public class StreamInfo extends Info { } // Lists can be null if a exception was thrown during extraction - if (streamInfo.getVideoStreams() == null) streamInfo.setVideoStreams(new ArrayList()); - if (streamInfo.getVideoOnlyStreams() == null) streamInfo.setVideoOnlyStreams(new ArrayList()); - if (streamInfo.getAudioStreams() == null) streamInfo.setAudioStreams(new ArrayList()); + if (streamInfo.getVideoStreams() == null) + streamInfo.setVideoStreams(new ArrayList()); + if (streamInfo.getVideoOnlyStreams() == null) + streamInfo.setVideoOnlyStreams(new ArrayList()); + if (streamInfo.getAudioStreams() == null) + streamInfo.setAudioStreams(new ArrayList()); Exception dashMpdError = null; if (streamInfo.getDashMpdUrl() != null && !streamInfo.getDashMpdUrl().isEmpty()) { @@ -159,19 +165,23 @@ public class StreamInfo extends Info { streamInfo.getAudioStreams().addAll(result.getAudioStreams()); streamInfo.getVideoStreams().addAll(result.getVideoStreams()); } catch (Exception e) { - // Sometimes we receive 403 (forbidden) error when trying to download the manifest (similar to what happens with youtube-dl), - // just skip the exception (but store it somewhere), as we later check if we have streams anyway. + // Sometimes we receive 403 (forbidden) error when trying to download the + // manifest (similar to what happens with youtube-dl), + // just skip the exception (but store it somewhere), as we later check if we + // have streams anyway. dashMpdError = e; } } - // Either audio or video has to be available, otherwise we didn't get a stream (since videoOnly are optional, they don't count). - if ((streamInfo.videoStreams.isEmpty()) - && (streamInfo.audioStreams.isEmpty())) { + // Either audio or video has to be available, otherwise we didn't get a stream + // (since videoOnly are optional, they don't count). + if ((streamInfo.videoStreams.isEmpty()) && (streamInfo.audioStreams.isEmpty())) { if (dashMpdError != null) { - // If we don't have any video or audio and the dashMpd 'errored', add it to the error list - // (it's optional and it don't get added automatically, but it's good to have some additional error context) + // If we don't have any video or audio and the dashMpd 'errored', add it to the + // error list + // (it's optional and it don't get added automatically, but it's good to have + // some additional error context) streamInfo.addError(dashMpdError); } @@ -182,9 +192,11 @@ public class StreamInfo extends Info { } private static StreamInfo extractOptionalData(StreamInfo streamInfo, StreamExtractor extractor) { - /* ---- optional data goes here: ---- */ - // If one of these fails, the frontend needs to handle that they are not available. - // Exceptions are therefore not thrown into the frontend, but stored into the error List, + /* ---- optional data goes here: ---- */ + // If one of these fails, the frontend needs to handle that they are not + // available. + // Exceptions are therefore not thrown into the frontend, but stored into the + // error List, // so the frontend can afterwards check where errors happened. try { @@ -254,22 +266,41 @@ public class StreamInfo extends Info { } streamInfo.setRelatedStreams(ExtractorHelper.getRelatedVideosOrLogError(streamInfo, extractor)); - + CommentsExtractor commentsExtractor = null; try { commentsExtractor = NewPipe.getService(streamInfo.getServiceId()).getCommentsExtractor(streamInfo.getUrl()); - } catch (ExtractionException e) { + streamInfo.setCommentsExtractor(commentsExtractor); + } catch (Exception e) { streamInfo.addError(e); } - - if(null != commentsExtractor) { - InfoItemsPage initialCommentsPage = ExtractorHelper.getItemsPageOrLogError(streamInfo, commentsExtractor); - streamInfo.setComments(initialCommentsPage.getItems()); + + if (null != commentsExtractor) { + InfoItemsPage initialCommentsPage = ExtractorHelper.getItemsPageOrLogError(streamInfo, + commentsExtractor); + streamInfo.setComments(new ArrayList<>()); + streamInfo.getComments().addAll(initialCommentsPage.getItems()); + streamInfo.setHasMoreComments(initialCommentsPage.hasNextPage()); + streamInfo.setNextCommentsPageUrl(initialCommentsPage.getNextPageUrl()); } - + return streamInfo; } + public static void loadMoreComments(StreamInfo streamInfo) { + if (streamInfo.hasMoreComments() && null != streamInfo.getCommentsExtractor()) { + try { + InfoItemsPage commentsPage = streamInfo.getCommentsExtractor() + .getPage(streamInfo.getNextCommentsPageUrl()); + streamInfo.getComments().addAll(commentsPage.getItems()); + streamInfo.setHasMoreComments(commentsPage.hasNextPage()); + streamInfo.setNextCommentsPageUrl(commentsPage.getNextPageUrl()); + } catch (IOException | ExtractionException e) { + streamInfo.addError(e); + } + } + } + private StreamType streamType; private String thumbnailUrl; private String uploadDate; @@ -293,7 +324,11 @@ public class StreamInfo extends Info { private String hlsUrl; private StreamInfoItem nextVideo; private List relatedStreams; + + private CommentsExtractor commentsExtractor; private List comments; + private boolean hasMoreComments; + private String nextCommentsPageUrl; private long startPosition = 0; private List subtitles; @@ -498,7 +533,29 @@ public class StreamInfo extends Info { public void setComments(List comments) { this.comments = comments; } - - + + public boolean hasMoreComments() { + return hasMoreComments; + } + + public void setHasMoreComments(boolean hasMoreComments) { + this.hasMoreComments = hasMoreComments; + } + + public CommentsExtractor getCommentsExtractor() { + return commentsExtractor; + } + + public void setCommentsExtractor(CommentsExtractor commentsExtractor) { + this.commentsExtractor = commentsExtractor; + } + + public String getNextCommentsPageUrl() { + return nextCommentsPageUrl; + } + + public void setNextCommentsPageUrl(String nextCommentsPageUrl) { + this.nextCommentsPageUrl = nextCommentsPageUrl; + } } diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeCommentsExtractorTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeCommentsExtractorTest.java index 56bfceda4..41eb90aaa 100644 --- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeCommentsExtractorTest.java +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeCommentsExtractorTest.java @@ -4,6 +4,7 @@ import static org.junit.Assert.assertTrue; import static org.schabi.newpipe.extractor.ServiceList.YouTube; import java.io.IOException; +import java.util.List; import org.junit.BeforeClass; import org.junit.Test; @@ -13,6 +14,7 @@ import org.schabi.newpipe.extractor.NewPipe; import org.schabi.newpipe.extractor.comments.CommentsInfoItem; import org.schabi.newpipe.extractor.exceptions.ExtractionException; import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeCommentsExtractor; +import org.schabi.newpipe.extractor.stream.StreamInfo; public class YoutubeCommentsExtractorTest { @@ -31,7 +33,7 @@ public class YoutubeCommentsExtractorTest { InfoItemsPage comments = extractor.getInitialPage(); result = findInComments(comments, "i should really be in the top comment.lol"); - while (comments.hasNextPage()) { + while (comments.hasNextPage() && !result) { comments = extractor.getPage(comments.getNextPageUrl()); result = findInComments(comments, "i should really be in the top comment.lol"); } @@ -39,7 +41,26 @@ public class YoutubeCommentsExtractorTest { assertTrue(result); } + @Test + public void testGetCommentsFromStreamInfo() throws IOException, ExtractionException { + boolean result = false; + StreamInfo streamInfo = StreamInfo.getInfo("https://www.youtube.com/watch?v=rrgFN3AxGfs"); + + result = findInComments(streamInfo.getComments(), "i should really be in the top comment.lol"); + + while (streamInfo.hasMoreComments() && !result) { + StreamInfo.loadMoreComments(streamInfo); + result = findInComments(streamInfo.getComments(), "i should really be in the top comment.lol"); + } + + assertTrue(result); + } + private boolean findInComments(InfoItemsPage comments, String comment) { - return comments.getItems().stream().filter(c -> c.getCommentText().contains(comment)).findAny().isPresent(); + return findInComments(comments.getItems(), comment); + } + + private boolean findInComments(List comments, String comment) { + return comments.stream().filter(c -> c.getCommentText().contains(comment)).findAny().isPresent(); } } From bf2d3deb03cec830a3757fcc1b32a7e8a5073bef Mon Sep 17 00:00:00 2001 From: Ritvik Saraf <13ritvik@gmail.com> Date: Mon, 3 Sep 2018 04:16:34 +0530 Subject: [PATCH 08/31] make extractor serializable --- .../src/main/java/org/schabi/newpipe/extractor/Extractor.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/Extractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/Extractor.java index ac2d7ca3e..df8ce0221 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/Extractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/Extractor.java @@ -7,8 +7,9 @@ import org.schabi.newpipe.extractor.linkhandler.LinkHandler; import javax.annotation.Nonnull; import javax.annotation.Nullable; import java.io.IOException; +import java.io.Serializable; -public abstract class Extractor { +public abstract class Extractor implements Serializable{ /** * {@link StreamingService} currently related to this extractor.
* Useful for getting other things from a service (like the url handlers for cleaning/accepting/get id from urls). From 4ca23ab5c36b8c5173fbdcc72309a2d73af49440 Mon Sep 17 00:00:00 2001 From: Ritvik Saraf <13ritvik@gmail.com> Date: Mon, 3 Sep 2018 04:40:35 +0530 Subject: [PATCH 09/31] revering serialzable change --- .../src/main/java/org/schabi/newpipe/extractor/Extractor.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/Extractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/Extractor.java index df8ce0221..ba6265f34 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/Extractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/Extractor.java @@ -9,7 +9,7 @@ import javax.annotation.Nullable; import java.io.IOException; import java.io.Serializable; -public abstract class Extractor implements Serializable{ +public abstract class Extractor{ /** * {@link StreamingService} currently related to this extractor.
* Useful for getting other things from a service (like the url handlers for cleaning/accepting/get id from urls). From ee239985ae8cc2598a98e13a743f541872068ab0 Mon Sep 17 00:00:00 2001 From: Ritvik Saraf <13ritvik@gmail.com> Date: Wed, 19 Sep 2018 04:22:23 +0530 Subject: [PATCH 10/31] added CommentsInfo --- .../extractor/comments/CommentsInfo.java | 89 ++++++++++++++++++- .../extractors/YoutubeCommentsExtractor.java | 31 +++++-- .../YoutubeCommentsLinkHandlerFactory.java | 74 ++------------- .../youtube/YoutubeCommentsExtractorTest.java | 16 ++++ 4 files changed, 135 insertions(+), 75 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfo.java b/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfo.java index 10ac85d5c..fb0cc14b9 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfo.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfo.java @@ -1,12 +1,16 @@ package org.schabi.newpipe.extractor.comments; import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import org.schabi.newpipe.extractor.ListExtractor.InfoItemsPage; import org.schabi.newpipe.extractor.ListInfo; import org.schabi.newpipe.extractor.NewPipe; import org.schabi.newpipe.extractor.StreamingService; import org.schabi.newpipe.extractor.exceptions.ExtractionException; import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler; +import org.schabi.newpipe.extractor.utils.ExtractorHelper; public class CommentsInfo extends ListInfo{ @@ -19,9 +23,88 @@ public class CommentsInfo extends ListInfo{ return getInfo(NewPipe.getServiceByUrl(url), url); } - private static CommentsInfo getInfo(StreamingService serviceByUrl, String url) { - // TODO Auto-generated method stub - return null; + private static CommentsInfo getInfo(StreamingService serviceByUrl, String url) throws ExtractionException, IOException { + return getInfo(serviceByUrl.getCommentsExtractor(url)); } + private static CommentsInfo getInfo(CommentsExtractor commentsExtractor) throws IOException, ExtractionException { + //for services which do not have a comments extractor + if(null == commentsExtractor) { + return null; + } + + commentsExtractor.fetchPage(); + String name = commentsExtractor.getName(); + int serviceId = commentsExtractor.getServiceId(); + ListLinkHandler listUrlIdHandler = commentsExtractor.getUIHandler(); + CommentsInfo commentsInfo = new CommentsInfo(serviceId, listUrlIdHandler, name); + commentsInfo.setCommentsExtractor(commentsExtractor); + InfoItemsPage initialCommentsPage = ExtractorHelper.getItemsPageOrLogError(commentsInfo, + commentsExtractor); + commentsInfo.setComments(new ArrayList<>()); + commentsInfo.getComments().addAll(initialCommentsPage.getItems()); + commentsInfo.setHasMoreComments(initialCommentsPage.hasNextPage()); + commentsInfo.setNextCommentsPageUrl(initialCommentsPage.getNextPageUrl()); + return commentsInfo; + } + + public static void loadMoreComments(CommentsInfo commentsInfo) { + if (commentsInfo.hasMoreComments()) { + if(null == commentsInfo.getCommentsExtractor()) { + try { + commentsInfo.setCommentsExtractor(NewPipe.getService(commentsInfo.getServiceId()).getCommentsExtractor(commentsInfo.getUrl())); + } catch (ExtractionException e) { + commentsInfo.addError(e); + return; + } + } + try { + InfoItemsPage commentsPage = commentsInfo.getCommentsExtractor() + .getPage(commentsInfo.getNextCommentsPageUrl()); + commentsInfo.getComments().addAll(commentsPage.getItems()); + commentsInfo.setHasMoreComments(commentsPage.hasNextPage()); + commentsInfo.setNextCommentsPageUrl(commentsPage.getNextPageUrl()); + } catch (IOException | ExtractionException e) { + commentsInfo.addError(e); + } + } + } + + private transient CommentsExtractor commentsExtractor; + private List comments; + private boolean hasMoreComments; + private String nextCommentsPageUrl; + + public List getComments() { + return comments; + } + + public void setComments(List comments) { + this.comments = comments; + } + + public boolean hasMoreComments() { + return hasMoreComments; + } + + public void setHasMoreComments(boolean hasMoreComments) { + this.hasMoreComments = hasMoreComments; + } + + public CommentsExtractor getCommentsExtractor() { + return commentsExtractor; + } + + public void setCommentsExtractor(CommentsExtractor commentsExtractor) { + this.commentsExtractor = commentsExtractor; + } + + public String getNextCommentsPageUrl() { + return nextCommentsPageUrl; + } + + public void setNextCommentsPageUrl(String nextCommentsPageUrl) { + this.nextCommentsPageUrl = nextCommentsPageUrl; + } + } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java index 10e1beea7..fb08e9b7a 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java @@ -31,13 +31,13 @@ public class YoutubeCommentsExtractor extends CommentsExtractor { private List cookies; private String sessionToken; - private String commentsToken; + private String title; + private InfoItemsPage initPage; private ObjectMapper mapper = new ObjectMapper(); public YoutubeCommentsExtractor(StreamingService service, ListLinkHandler uiHandler) { super(service, uiHandler); - // TODO Auto-generated constructor stub } @Override @@ -45,12 +45,16 @@ public class YoutubeCommentsExtractor extends CommentsExtractor { // initial page does not load any comments but is required to get session token // and cookies super.fetchPage(); - return getPage(getNextPageUrl()); + return initPage; } + // isn't this method redundant. you can just call getnextpage on getInitialPage @Override public String getNextPageUrl() throws IOException, ExtractionException { - return getNextPageUrl(commentsToken); + // initial page does not load any comments but is required to get session token + // and cookies + super.fetchPage(); + return initPage.getNextPageUrl(); } private String getNextPageUrl(JsonNode ajaxJson) throws IOException, ExtractionException { @@ -91,6 +95,9 @@ public class YoutubeCommentsExtractor extends CommentsExtractor { } private void collectCommentsFrom(CommentsInfoItemsCollector collector, JsonNode ajaxJson, String pageUrl) { + + fetchTitle(ajaxJson); + List comments = ajaxJson.findValues("commentRenderer"); comments.stream().forEach(c -> { CommentsInfoItemExtractor extractor = new CommentsInfoItemExtractor() { @@ -192,19 +199,29 @@ public class YoutubeCommentsExtractor extends CommentsExtractor { } + private void fetchTitle(JsonNode ajaxJson) { + if(null == title) { + try { + title = ajaxJson.findValue("commentTargetTitle").get("simpleText").asText(); + } catch (Exception e) { + title = "Youtube Comments"; + } + } + } + @Override public void onFetchPage(Downloader downloader) throws IOException, ExtractionException { DownloadResponse response = downloader.get(getUrl()); String responseBody = response.getResponseBody(); cookies = response.getResponseHeaders().get("Set-Cookie"); sessionToken = findValue(responseBody, "XSRF_TOKEN"); - commentsToken = findValue(responseBody, "COMMENTS_TOKEN"); + String commentsToken = findValue(responseBody, "COMMENTS_TOKEN"); + initPage = getPage(getNextPageUrl(commentsToken)); } @Override public String getName() throws ParsingException { - // TODO Auto-generated method stub - return null; + return title; } private String makeAjaxRequest(String siteUrl) throws IOException, ReCaptchaException { diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeCommentsLinkHandlerFactory.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeCommentsLinkHandlerFactory.java index cfd4645dd..ae31337fd 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeCommentsLinkHandlerFactory.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeCommentsLinkHandlerFactory.java @@ -1,20 +1,11 @@ package org.schabi.newpipe.extractor.services.youtube.linkHandler; -import java.io.IOException; import java.io.UnsupportedEncodingException; -import java.net.URI; -import java.net.URISyntaxException; import java.net.URLDecoder; import java.util.List; -import org.jsoup.Jsoup; -import org.jsoup.nodes.Document; -import org.jsoup.nodes.Element; -import org.schabi.newpipe.extractor.Downloader; -import org.schabi.newpipe.extractor.NewPipe; import org.schabi.newpipe.extractor.exceptions.FoundAdException; import org.schabi.newpipe.extractor.exceptions.ParsingException; -import org.schabi.newpipe.extractor.exceptions.ReCaptchaException; import org.schabi.newpipe.extractor.linkhandler.ListLinkHandlerFactory; import org.schabi.newpipe.extractor.utils.Parser; @@ -27,6 +18,11 @@ public class YoutubeCommentsLinkHandlerFactory extends ListLinkHandlerFactory { return instance; } + @Override + public String getUrl(String id) { + return "https://www.youtube.com/watch?v=" + id; + } + @Override public String getId(String url) throws ParsingException, IllegalArgumentException { if (url.isEmpty()) { @@ -44,8 +40,6 @@ public class YoutubeCommentsLinkHandlerFactory extends ListLinkHandlerFactory { } catch (UnsupportedEncodingException uee) { throw new ParsingException("Could not parse attribution_link", uee); } - } else if (lowercaseUrl.contains("youtube.com/shared?ci=")) { - return getRealIdFromSharedLink(url); } else if (url.contains("vnd.youtube")) { id = Parser.matchGroup1(ID_PATTERN, url); } else if (url.contains("embed")) { @@ -86,56 +80,6 @@ public class YoutubeCommentsLinkHandlerFactory extends ListLinkHandlerFactory { } } - /** - * Get the real url from a shared uri. - *

- * Shared URI's look like this: - *

-     *     * https://www.youtube.com/shared?ci=PJICrTByb3E
-     *     * vnd.youtube://www.youtube.com/shared?ci=PJICrTByb3E&feature=twitter-deep-link
-     * 
- * - * @param url The shared url - * @return the id of the stream - * @throws ParsingException - */ - private String getRealIdFromSharedLink(String url) throws ParsingException { - URI uri; - try { - uri = new URI(url); - } catch (URISyntaxException e) { - throw new ParsingException("Invalid shared link", e); - } - String sharedId = getSharedId(uri); - Downloader downloader = NewPipe.getDownloader(); - String content; - try { - content = downloader.download("https://www.youtube.com/shared?ci=" + sharedId); - } catch (IOException | ReCaptchaException e) { - throw new ParsingException("Unable to resolve shared link", e); - } - final Document document = Jsoup.parse(content); - - final Element element = document.select("link[rel=\"canonical\"]").first(); - final String urlWithRealId = (element != null) - ? element.attr("abs:href") - : document.select("meta[property=\"og:url\"]").first() - .attr("abs:content"); - - String realId = Parser.matchGroup1(ID_PATTERN, urlWithRealId); - if (sharedId.equals(realId)) { - throw new ParsingException("Got same id for as shared info_id: " + sharedId); - } - return realId; - } - - private String getSharedId(URI uri) throws ParsingException { - if (!"/shared".equals(uri.getPath())) { - throw new ParsingException("Not a shared link: " + uri.toString() + " (path != " + uri.getPath() + ")"); - } - return Parser.matchGroup1("ci=" + ID_PATTERN, uri.getQuery()); - } - @Override public boolean onAcceptUrl(final String url) throws FoundAdException { final String lowercaseUrl = url.toLowerCase(); @@ -156,8 +100,8 @@ public class YoutubeCommentsLinkHandlerFactory extends ListLinkHandlerFactory { } } - @Override - public String getUrl(String id, List contentFilter, String sortFilter) throws ParsingException { - return "https://www.youtube.com/watch?v=" + id; - } + @Override + public String getUrl(String id, List contentFilter, String sortFilter) throws ParsingException { + return "https://www.youtube.com/watch?v=" + id; + } } diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeCommentsExtractorTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeCommentsExtractorTest.java index 41eb90aaa..cc56a21b8 100644 --- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeCommentsExtractorTest.java +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeCommentsExtractorTest.java @@ -11,6 +11,7 @@ import org.junit.Test; import org.schabi.newpipe.Downloader; import org.schabi.newpipe.extractor.ListExtractor.InfoItemsPage; import org.schabi.newpipe.extractor.NewPipe; +import org.schabi.newpipe.extractor.comments.CommentsInfo; import org.schabi.newpipe.extractor.comments.CommentsInfoItem; import org.schabi.newpipe.extractor.exceptions.ExtractionException; import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeCommentsExtractor; @@ -55,6 +56,21 @@ public class YoutubeCommentsExtractorTest { assertTrue(result); } + + @Test + public void testGetCommentsFromCommentsInfo() throws IOException, ExtractionException { + boolean result = false; + CommentsInfo commentsInfo = CommentsInfo.getInfo("https://www.youtube.com/watch?v=rrgFN3AxGfs"); + assertTrue("what the fuck am i doing with my life.wmv".equals(commentsInfo.getName())); + result = findInComments(commentsInfo.getComments(), "i should really be in the top comment.lol"); + + while (commentsInfo.hasMoreComments() && !result) { + CommentsInfo.loadMoreComments(commentsInfo); + result = findInComments(commentsInfo.getComments(), "i should really be in the top comment.lol"); + } + + assertTrue(result); + } private boolean findInComments(InfoItemsPage comments, String comment) { return findInComments(comments.getItems(), comment); From 4794e16dcb48884679cc53d2e548d1df12533fcf Mon Sep 17 00:00:00 2001 From: Ritvik Saraf <13ritvik@gmail.com> Date: Wed, 19 Sep 2018 04:33:57 +0530 Subject: [PATCH 11/31] removed comment details form streamInfo. added commentsInfo instead --- .../newpipe/extractor/stream/StreamInfo.java | 68 ++----------------- .../youtube/YoutubeCommentsExtractorTest.java | 8 +-- 2 files changed, 11 insertions(+), 65 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/stream/StreamInfo.java b/extractor/src/main/java/org/schabi/newpipe/extractor/stream/StreamInfo.java index a69b55f52..0d452f6fe 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/stream/StreamInfo.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/stream/StreamInfo.java @@ -6,12 +6,10 @@ import java.util.List; import org.schabi.newpipe.extractor.Info; import org.schabi.newpipe.extractor.InfoItem; -import org.schabi.newpipe.extractor.ListExtractor.InfoItemsPage; import org.schabi.newpipe.extractor.NewPipe; import org.schabi.newpipe.extractor.StreamingService; import org.schabi.newpipe.extractor.Subtitles; -import org.schabi.newpipe.extractor.comments.CommentsExtractor; -import org.schabi.newpipe.extractor.comments.CommentsInfoItem; +import org.schabi.newpipe.extractor.comments.CommentsInfo; import org.schabi.newpipe.extractor.exceptions.ContentNotAvailableException; import org.schabi.newpipe.extractor.exceptions.ExtractionException; import org.schabi.newpipe.extractor.utils.DashMpdParser; @@ -267,40 +265,15 @@ public class StreamInfo extends Info { streamInfo.setRelatedStreams(ExtractorHelper.getRelatedVideosOrLogError(streamInfo, extractor)); - CommentsExtractor commentsExtractor = null; try { - commentsExtractor = NewPipe.getService(streamInfo.getServiceId()).getCommentsExtractor(streamInfo.getUrl()); - streamInfo.setCommentsExtractor(commentsExtractor); + streamInfo.setCommentsInfo(CommentsInfo.getInfo(streamInfo.getUrl())); } catch (Exception e) { streamInfo.addError(e); } - if (null != commentsExtractor) { - InfoItemsPage initialCommentsPage = ExtractorHelper.getItemsPageOrLogError(streamInfo, - commentsExtractor); - streamInfo.setComments(new ArrayList<>()); - streamInfo.getComments().addAll(initialCommentsPage.getItems()); - streamInfo.setHasMoreComments(initialCommentsPage.hasNextPage()); - streamInfo.setNextCommentsPageUrl(initialCommentsPage.getNextPageUrl()); - } - return streamInfo; } - public static void loadMoreComments(StreamInfo streamInfo) { - if (streamInfo.hasMoreComments() && null != streamInfo.getCommentsExtractor()) { - try { - InfoItemsPage commentsPage = streamInfo.getCommentsExtractor() - .getPage(streamInfo.getNextCommentsPageUrl()); - streamInfo.getComments().addAll(commentsPage.getItems()); - streamInfo.setHasMoreComments(commentsPage.hasNextPage()); - streamInfo.setNextCommentsPageUrl(commentsPage.getNextPageUrl()); - } catch (IOException | ExtractionException e) { - streamInfo.addError(e); - } - } - } - private StreamType streamType; private String thumbnailUrl; private String uploadDate; @@ -325,10 +298,7 @@ public class StreamInfo extends Info { private StreamInfoItem nextVideo; private List relatedStreams; - private CommentsExtractor commentsExtractor; - private List comments; - private boolean hasMoreComments; - private String nextCommentsPageUrl; + private CommentsInfo commentsInfo; private long startPosition = 0; private List subtitles; @@ -526,36 +496,12 @@ public class StreamInfo extends Info { this.subtitles = subtitles; } - public List getComments() { - return comments; + public CommentsInfo getCommentsInfo() { + return commentsInfo; } - public void setComments(List comments) { - this.comments = comments; - } - - public boolean hasMoreComments() { - return hasMoreComments; - } - - public void setHasMoreComments(boolean hasMoreComments) { - this.hasMoreComments = hasMoreComments; - } - - public CommentsExtractor getCommentsExtractor() { - return commentsExtractor; - } - - public void setCommentsExtractor(CommentsExtractor commentsExtractor) { - this.commentsExtractor = commentsExtractor; - } - - public String getNextCommentsPageUrl() { - return nextCommentsPageUrl; - } - - public void setNextCommentsPageUrl(String nextCommentsPageUrl) { - this.nextCommentsPageUrl = nextCommentsPageUrl; + public void setCommentsInfo(CommentsInfo commentsInfo) { + this.commentsInfo = commentsInfo; } } diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeCommentsExtractorTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeCommentsExtractorTest.java index cc56a21b8..ee13a15cc 100644 --- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeCommentsExtractorTest.java +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeCommentsExtractorTest.java @@ -47,11 +47,11 @@ public class YoutubeCommentsExtractorTest { boolean result = false; StreamInfo streamInfo = StreamInfo.getInfo("https://www.youtube.com/watch?v=rrgFN3AxGfs"); - result = findInComments(streamInfo.getComments(), "i should really be in the top comment.lol"); + result = findInComments(streamInfo.getCommentsInfo().getComments(), "i should really be in the top comment.lol"); - while (streamInfo.hasMoreComments() && !result) { - StreamInfo.loadMoreComments(streamInfo); - result = findInComments(streamInfo.getComments(), "i should really be in the top comment.lol"); + while (streamInfo.getCommentsInfo().hasMoreComments() && !result) { + CommentsInfo.loadMoreComments(streamInfo.getCommentsInfo()); + result = findInComments(streamInfo.getCommentsInfo().getComments(), "i should really be in the top comment.lol"); } assertTrue(result); From e04787f340d1ed8375cf8012ae7b35eada388322 Mon Sep 17 00:00:00 2001 From: Ritvik Saraf <13ritvik@gmail.com> Date: Wed, 19 Sep 2018 05:32:14 +0530 Subject: [PATCH 12/31] fixed NPE for services where comments is not available --- .../java/org/schabi/newpipe/extractor/StreamingService.java | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/StreamingService.java b/extractor/src/main/java/org/schabi/newpipe/extractor/StreamingService.java index 841511a8f..9486c8b68 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/StreamingService.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/StreamingService.java @@ -122,7 +122,11 @@ public abstract class StreamingService { } public CommentsExtractor getCommentsExtractor(String url) throws ExtractionException { - return getCommentsExtractor(getCommentsLHFactory().fromUrl(url)); + ListLinkHandlerFactory llhf = getCommentsLHFactory(); + if(null == llhf) { + return null; + } + return getCommentsExtractor(llhf.fromUrl(url)); } From 6a0341d59c78932a7427ebfc29fc556852015e00 Mon Sep 17 00:00:00 2001 From: Ritvik Saraf <13ritvik@gmail.com> Date: Sat, 22 Sep 2018 14:55:59 +0530 Subject: [PATCH 13/31] testing --- .../schabi/newpipe/extractor/comments/CommentsInfo.java | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfo.java b/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfo.java index fb0cc14b9..59a0e1995 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfo.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfo.java @@ -43,6 +43,10 @@ public class CommentsInfo extends ListInfo{ commentsExtractor); commentsInfo.setComments(new ArrayList<>()); commentsInfo.getComments().addAll(initialCommentsPage.getItems()); + //tmp + commentsInfo.setRelatedItems(initialCommentsPage.getItems()); + commentsInfo.setNextCommentsPageUrl(initialCommentsPage.getNextPageUrl()); + commentsInfo.setHasMoreComments(initialCommentsPage.hasNextPage()); commentsInfo.setNextCommentsPageUrl(initialCommentsPage.getNextPageUrl()); return commentsInfo; @@ -53,7 +57,8 @@ public class CommentsInfo extends ListInfo{ if(null == commentsInfo.getCommentsExtractor()) { try { commentsInfo.setCommentsExtractor(NewPipe.getService(commentsInfo.getServiceId()).getCommentsExtractor(commentsInfo.getUrl())); - } catch (ExtractionException e) { + commentsInfo.getCommentsExtractor().fetchPage(); + } catch (ExtractionException | IOException e) { commentsInfo.addError(e); return; } From 7ed0da049355b8caca0e3185ab773abea9e93422 Mon Sep 17 00:00:00 2001 From: Ritvik Saraf <13ritvik@gmail.com> Date: Sat, 22 Sep 2018 19:09:31 +0530 Subject: [PATCH 14/31] more testing --- .../org/schabi/newpipe/extractor/comments/CommentsInfo.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfo.java b/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfo.java index 59a0e1995..62a376f4e 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfo.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfo.java @@ -45,7 +45,7 @@ public class CommentsInfo extends ListInfo{ commentsInfo.getComments().addAll(initialCommentsPage.getItems()); //tmp commentsInfo.setRelatedItems(initialCommentsPage.getItems()); - commentsInfo.setNextCommentsPageUrl(initialCommentsPage.getNextPageUrl()); + commentsInfo.setNextPageUrl(initialCommentsPage.getNextPageUrl()); commentsInfo.setHasMoreComments(initialCommentsPage.hasNextPage()); commentsInfo.setNextCommentsPageUrl(initialCommentsPage.getNextPageUrl()); From 0e864758913179d6bd675a56e9d44e35be5fa06d Mon Sep 17 00:00:00 2001 From: Ritvik Saraf <13ritvik@gmail.com> Date: Sat, 22 Sep 2018 23:36:41 +0530 Subject: [PATCH 15/31] removed commentsInfo from streamInfo --- .../extractor/comments/CommentsInfo.java | 78 +++++-------------- .../newpipe/extractor/stream/StreamInfo.java | 16 ---- .../youtube/YoutubeCommentsExtractorTest.java | 27 ++----- 3 files changed, 26 insertions(+), 95 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfo.java b/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfo.java index 62a376f4e..d247c34b3 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfo.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfo.java @@ -10,6 +10,7 @@ import org.schabi.newpipe.extractor.NewPipe; import org.schabi.newpipe.extractor.StreamingService; import org.schabi.newpipe.extractor.exceptions.ExtractionException; import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler; +import org.schabi.newpipe.extractor.stream.StreamInfoItem; import org.schabi.newpipe.extractor.utils.ExtractorHelper; public class CommentsInfo extends ListInfo{ @@ -23,78 +24,45 @@ public class CommentsInfo extends ListInfo{ return getInfo(NewPipe.getServiceByUrl(url), url); } - private static CommentsInfo getInfo(StreamingService serviceByUrl, String url) throws ExtractionException, IOException { + public static CommentsInfo getInfo(StreamingService serviceByUrl, String url) throws ExtractionException, IOException { return getInfo(serviceByUrl.getCommentsExtractor(url)); } private static CommentsInfo getInfo(CommentsExtractor commentsExtractor) throws IOException, ExtractionException { - //for services which do not have a comments extractor - if(null == commentsExtractor) { + // for services which do not have a comments extractor + if (null == commentsExtractor) { return null; } - + commentsExtractor.fetchPage(); String name = commentsExtractor.getName(); int serviceId = commentsExtractor.getServiceId(); ListLinkHandler listUrlIdHandler = commentsExtractor.getUIHandler(); CommentsInfo commentsInfo = new CommentsInfo(serviceId, listUrlIdHandler, name); commentsInfo.setCommentsExtractor(commentsExtractor); - InfoItemsPage initialCommentsPage = ExtractorHelper.getItemsPageOrLogError(commentsInfo, - commentsExtractor); - commentsInfo.setComments(new ArrayList<>()); - commentsInfo.getComments().addAll(initialCommentsPage.getItems()); - //tmp + InfoItemsPage initialCommentsPage = ExtractorHelper.getItemsPageOrLogError(commentsInfo, + commentsExtractor); commentsInfo.setRelatedItems(initialCommentsPage.getItems()); commentsInfo.setNextPageUrl(initialCommentsPage.getNextPageUrl()); - - commentsInfo.setHasMoreComments(initialCommentsPage.hasNextPage()); - commentsInfo.setNextCommentsPageUrl(initialCommentsPage.getNextPageUrl()); + return commentsInfo; } - public static void loadMoreComments(CommentsInfo commentsInfo) { - if (commentsInfo.hasMoreComments()) { - if(null == commentsInfo.getCommentsExtractor()) { - try { - commentsInfo.setCommentsExtractor(NewPipe.getService(commentsInfo.getServiceId()).getCommentsExtractor(commentsInfo.getUrl())); - commentsInfo.getCommentsExtractor().fetchPage(); - } catch (ExtractionException | IOException e) { - commentsInfo.addError(e); - return; - } - } - try { - InfoItemsPage commentsPage = commentsInfo.getCommentsExtractor() - .getPage(commentsInfo.getNextCommentsPageUrl()); - commentsInfo.getComments().addAll(commentsPage.getItems()); - commentsInfo.setHasMoreComments(commentsPage.hasNextPage()); - commentsInfo.setNextCommentsPageUrl(commentsPage.getNextPageUrl()); - } catch (IOException | ExtractionException e) { - commentsInfo.addError(e); - } + public static InfoItemsPage getMoreItems(CommentsInfo commentsInfo, String pageUrl) + throws ExtractionException, IOException { + return getMoreItems(NewPipe.getService(commentsInfo.getServiceId()), commentsInfo, pageUrl); + } + + public static InfoItemsPage getMoreItems(StreamingService service, CommentsInfo commentsInfo, + String pageUrl) throws IOException, ExtractionException { + if (null == commentsInfo.getCommentsExtractor()) { + commentsInfo.setCommentsExtractor(service.getCommentsExtractor(commentsInfo.getUrl())); + commentsInfo.getCommentsExtractor().fetchPage(); } + return commentsInfo.getCommentsExtractor().getPage(pageUrl); } private transient CommentsExtractor commentsExtractor; - private List comments; - private boolean hasMoreComments; - private String nextCommentsPageUrl; - - public List getComments() { - return comments; - } - - public void setComments(List comments) { - this.comments = comments; - } - - public boolean hasMoreComments() { - return hasMoreComments; - } - - public void setHasMoreComments(boolean hasMoreComments) { - this.hasMoreComments = hasMoreComments; - } public CommentsExtractor getCommentsExtractor() { return commentsExtractor; @@ -104,12 +72,4 @@ public class CommentsInfo extends ListInfo{ this.commentsExtractor = commentsExtractor; } - public String getNextCommentsPageUrl() { - return nextCommentsPageUrl; - } - - public void setNextCommentsPageUrl(String nextCommentsPageUrl) { - this.nextCommentsPageUrl = nextCommentsPageUrl; - } - } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/stream/StreamInfo.java b/extractor/src/main/java/org/schabi/newpipe/extractor/stream/StreamInfo.java index 0d452f6fe..a71482a50 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/stream/StreamInfo.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/stream/StreamInfo.java @@ -265,12 +265,6 @@ public class StreamInfo extends Info { streamInfo.setRelatedStreams(ExtractorHelper.getRelatedVideosOrLogError(streamInfo, extractor)); - try { - streamInfo.setCommentsInfo(CommentsInfo.getInfo(streamInfo.getUrl())); - } catch (Exception e) { - streamInfo.addError(e); - } - return streamInfo; } @@ -298,8 +292,6 @@ public class StreamInfo extends Info { private StreamInfoItem nextVideo; private List relatedStreams; - private CommentsInfo commentsInfo; - private long startPosition = 0; private List subtitles; @@ -496,12 +488,4 @@ public class StreamInfo extends Info { this.subtitles = subtitles; } - public CommentsInfo getCommentsInfo() { - return commentsInfo; - } - - public void setCommentsInfo(CommentsInfo commentsInfo) { - this.commentsInfo = commentsInfo; - } - } diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeCommentsExtractorTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeCommentsExtractorTest.java index ee13a15cc..f811e1124 100644 --- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeCommentsExtractorTest.java +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeCommentsExtractorTest.java @@ -6,6 +6,7 @@ import static org.schabi.newpipe.extractor.ServiceList.YouTube; import java.io.IOException; import java.util.List; +import org.jsoup.helper.StringUtil; import org.junit.BeforeClass; import org.junit.Test; import org.schabi.newpipe.Downloader; @@ -15,7 +16,6 @@ import org.schabi.newpipe.extractor.comments.CommentsInfo; import org.schabi.newpipe.extractor.comments.CommentsInfoItem; import org.schabi.newpipe.extractor.exceptions.ExtractionException; import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeCommentsExtractor; -import org.schabi.newpipe.extractor.stream.StreamInfo; public class YoutubeCommentsExtractorTest { @@ -41,32 +41,19 @@ public class YoutubeCommentsExtractorTest { assertTrue(result); } - - @Test - public void testGetCommentsFromStreamInfo() throws IOException, ExtractionException { - boolean result = false; - StreamInfo streamInfo = StreamInfo.getInfo("https://www.youtube.com/watch?v=rrgFN3AxGfs"); - - result = findInComments(streamInfo.getCommentsInfo().getComments(), "i should really be in the top comment.lol"); - - while (streamInfo.getCommentsInfo().hasMoreComments() && !result) { - CommentsInfo.loadMoreComments(streamInfo.getCommentsInfo()); - result = findInComments(streamInfo.getCommentsInfo().getComments(), "i should really be in the top comment.lol"); - } - - assertTrue(result); - } @Test public void testGetCommentsFromCommentsInfo() throws IOException, ExtractionException { boolean result = false; CommentsInfo commentsInfo = CommentsInfo.getInfo("https://www.youtube.com/watch?v=rrgFN3AxGfs"); assertTrue("what the fuck am i doing with my life.wmv".equals(commentsInfo.getName())); - result = findInComments(commentsInfo.getComments(), "i should really be in the top comment.lol"); + result = findInComments(commentsInfo.getRelatedItems(), "i should really be in the top comment.lol"); - while (commentsInfo.hasMoreComments() && !result) { - CommentsInfo.loadMoreComments(commentsInfo); - result = findInComments(commentsInfo.getComments(), "i should really be in the top comment.lol"); + String nextPage = commentsInfo.getNextPageUrl(); + while (!StringUtil.isBlank(nextPage) && !result) { + InfoItemsPage moreItems = CommentsInfo.getMoreItems(YouTube, commentsInfo, nextPage); + result = findInComments(moreItems.getItems(), "i should really be in the top comment.lol"); + nextPage = moreItems.getNextPageUrl(); } assertTrue(result); From c1199c8fcf786c4fa44f7acae7472b613ada7d89 Mon Sep 17 00:00:00 2001 From: Ritvik Saraf <13ritvik@gmail.com> Date: Sun, 23 Sep 2018 01:12:11 +0530 Subject: [PATCH 16/31] added isCommentsSupported method --- .../org/schabi/newpipe/extractor/StreamingService.java | 3 ++- .../extractor/services/soundcloud/SoundcloudService.java | 7 +++++-- .../newpipe/extractor/services/youtube/YoutubeService.java | 5 +++++ 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/StreamingService.java b/extractor/src/main/java/org/schabi/newpipe/extractor/StreamingService.java index 9486c8b68..a6a1be789 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/StreamingService.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/StreamingService.java @@ -92,7 +92,8 @@ public abstract class StreamingService { public abstract PlaylistExtractor getPlaylistExtractor(ListLinkHandler urlIdHandler) throws ExtractionException; public abstract StreamExtractor getStreamExtractor(LinkHandler UIHFactory) throws ExtractionException; public abstract CommentsExtractor getCommentsExtractor(ListLinkHandler urlIdHandler) throws ExtractionException; - + public abstract boolean isCommentsSupported(); + public SearchExtractor getSearchExtractor(String query, List contentFilter, String sortFilter, String contentCountry) throws ExtractionException { return getSearchExtractor(getSearchQHFactory().fromQuery(query, contentFilter, sortFilter), contentCountry); } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudService.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudService.java index 1d322f738..bdd2a4eb5 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudService.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudService.java @@ -102,13 +102,16 @@ public class SoundcloudService extends StreamingService { @Override public ListLinkHandlerFactory getCommentsLHFactory() { - // TODO Auto-generated method stub return null; } @Override public CommentsExtractor getCommentsExtractor(ListLinkHandler urlIdHandler) throws ExtractionException { - // TODO Auto-generated method stub return null; } + + @Override + public boolean isCommentsSupported() { + return false; + } } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeService.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeService.java index f6bc03775..67e217986 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeService.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeService.java @@ -146,4 +146,9 @@ public class YoutubeService extends StreamingService { return new YoutubeCommentsExtractor(this, urlIdHandler); } + @Override + public boolean isCommentsSupported() { + return true; + } + } From 8e27801183e9cbb713a27fe4b7305b349c9ee523 Mon Sep 17 00:00:00 2001 From: Ritvik Saraf <13ritvik@gmail.com> Date: Wed, 26 Sep 2018 03:20:29 +0530 Subject: [PATCH 17/31] removed jackson and java 8 --- build.gradle | 4 +- extractor/build.gradle | 1 - .../extractors/YoutubeCommentsExtractor.java | 152 +++++------------- .../YoutubeCommentsInfoItemExtractor.java | 78 +++++++++ .../newpipe/extractor/utils/JsonUtils.java | 53 ++++++ .../java/org/schabi/newpipe/Downloader.java | 10 +- .../youtube/YoutubeCommentsExtractorTest.java | 9 +- .../extractor/utils/JsonUtilsTest.java | 47 ++++++ 8 files changed, 231 insertions(+), 123 deletions(-) create mode 100644 extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsInfoItemExtractor.java create mode 100644 extractor/src/main/java/org/schabi/newpipe/extractor/utils/JsonUtils.java create mode 100644 extractor/src/test/java/org/schabi/newpipe/extractor/utils/JsonUtilsTest.java diff --git a/build.gradle b/build.gradle index f8f6c8e90..9f25fc28e 100644 --- a/build.gradle +++ b/build.gradle @@ -1,7 +1,7 @@ allprojects { apply plugin: 'java-library' - sourceCompatibility = 1.8 - targetCompatibility = 1.8 + sourceCompatibility = 1.7 + targetCompatibility = 1.7 version 'v0.13.0' diff --git a/extractor/build.gradle b/extractor/build.gradle index 26430b9fa..1b7fbf001 100644 --- a/extractor/build.gradle +++ b/extractor/build.gradle @@ -6,7 +6,6 @@ dependencies { implementation 'org.mozilla:rhino:1.7.7.1' implementation 'com.github.spotbugs:spotbugs-annotations:3.1.0' implementation 'org.nibor.autolink:autolink:0.8.0' - implementation 'com.fasterxml.jackson.core:jackson-databind:2.9.5' testImplementation 'junit:junit:4.12' } \ No newline at end of file diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java index fb08e9b7a..259e52907 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java @@ -7,7 +7,6 @@ import java.util.Arrays; import java.util.HashMap; import java.util.List; import java.util.Map; -import java.util.Optional; import org.schabi.newpipe.extractor.DownloadResponse; import org.schabi.newpipe.extractor.Downloader; @@ -21,9 +20,12 @@ import org.schabi.newpipe.extractor.exceptions.ExtractionException; import org.schabi.newpipe.extractor.exceptions.ParsingException; import org.schabi.newpipe.extractor.exceptions.ReCaptchaException; import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler; +import org.schabi.newpipe.extractor.utils.JsonUtils; -import com.fasterxml.jackson.databind.JsonNode; -import com.fasterxml.jackson.databind.ObjectMapper; +import com.grack.nanojson.JsonArray; +import com.grack.nanojson.JsonObject; +import com.grack.nanojson.JsonParser; +import com.grack.nanojson.JsonParserException; public class YoutubeCommentsExtractor extends CommentsExtractor { @@ -34,8 +36,6 @@ public class YoutubeCommentsExtractor extends CommentsExtractor { private String title; private InfoItemsPage initPage; - private ObjectMapper mapper = new ObjectMapper(); - public YoutubeCommentsExtractor(StreamingService service, ListLinkHandler uiHandler) { super(service, uiHandler); } @@ -57,16 +57,24 @@ public class YoutubeCommentsExtractor extends CommentsExtractor { return initPage.getNextPageUrl(); } - private String getNextPageUrl(JsonNode ajaxJson) throws IOException, ExtractionException { - Optional element = Optional.ofNullable(ajaxJson.findValue("itemSectionContinuation")) - .map(e -> e.get("continuations")).map(e -> e.findValue("continuation")); - - if (element.isPresent()) { - return getNextPageUrl(element.get().asText()); - } else { - // no more comments + private String getNextPageUrl(JsonObject ajaxJson) throws IOException, ParsingException { + + JsonArray arr; + try { + arr = JsonUtils.getValue(ajaxJson, "response.continuationContents.itemSectionContinuation.continuations"); + } catch (ParsingException e) { return ""; } + if(null == arr || arr.isEmpty()) { + return ""; + } + String continuation; + try { + continuation = JsonUtils.getValue(arr.getObject(0), "nextContinuationData.continuation"); + } catch (ParsingException e) { + return ""; + } + return getNextPageUrl(continuation); } private String getNextPageUrl(String continuation) throws ParsingException { @@ -88,121 +96,35 @@ public class YoutubeCommentsExtractor extends CommentsExtractor { throw new ExtractionException(new IllegalArgumentException("Page url is empty or null")); } String ajaxResponse = makeAjaxRequest(pageUrl); - JsonNode ajaxJson = mapper.readTree(ajaxResponse); + JsonObject ajaxJson; + try { + ajaxJson = JsonParser.object().from(ajaxResponse); + } catch (JsonParserException e) { + throw new ParsingException("Could not parse json data for comments", e); + } CommentsInfoItemsCollector collector = new CommentsInfoItemsCollector(getServiceId()); collectCommentsFrom(collector, ajaxJson, pageUrl); return new InfoItemsPage<>(collector, getNextPageUrl(ajaxJson)); } - private void collectCommentsFrom(CommentsInfoItemsCollector collector, JsonNode ajaxJson, String pageUrl) { + private void collectCommentsFrom(CommentsInfoItemsCollector collector, JsonObject ajaxJson, String pageUrl) throws ParsingException { - fetchTitle(ajaxJson); - List comments = ajaxJson.findValues("commentRenderer"); - comments.stream().forEach(c -> { - CommentsInfoItemExtractor extractor = new CommentsInfoItemExtractor() { - - @Override - public String getUrl() throws ParsingException { - return pageUrl; - } - - @Override - public String getThumbnailUrl() throws ParsingException { - try { - return c.get("authorThumbnail").get("thumbnails").get(2).get("url").asText(); - } catch (Exception e) { - throw new ParsingException("Could not get thumbnail url", e); - } - } - - @Override - public String getName() throws ParsingException { - try { - return c.get("authorText").get("simpleText").asText(); - } catch (Exception e) { - throw new ParsingException("Could not get author name", e); - } - } - - @Override - public String getPublishedTime() throws ParsingException { - try { - return c.get("publishedTimeText").get("runs").get(0).get("text").asText(); - } catch (Exception e) { - throw new ParsingException("Could not get publishedTimeText", e); - } - } - - @Override - public Integer getLikeCount() throws ParsingException { - try { - return c.get("likeCount").intValue(); - } catch (Exception e) { - throw new ParsingException("Could not get like count", e); - } - } - - @Override - public String getCommentText() throws ParsingException { - try { - if (null != c.get("contentText").get("simpleText")) { - return c.get("contentText").get("simpleText").asText(); - } else { - return c.get("contentText").get("runs").get(0).get("text").asText(); - } - } catch (Exception e) { - throw new ParsingException("Could not get comment text", e); - } - } - - @Override - public String getCommentId() throws ParsingException { - try { - return c.get("commentId").asText(); - } catch (Exception e) { - throw new ParsingException("Could not get comment id", e); - } - } - - @Override - public String getAuthorThumbnail() throws ParsingException { - try { - return c.get("authorThumbnail").get("thumbnails").get(2).get("url").asText(); - } catch (Exception e) { - throw new ParsingException("Could not get author thumbnail", e); - } - } - - @Override - public String getAuthorName() throws ParsingException { - try { - return c.get("authorText").get("simpleText").asText(); - } catch (Exception e) { - throw new ParsingException("Could not get author name", e); - } - } - - @Override - public String getAuthorEndpoint() throws ParsingException { - try { - return "https://youtube.com" - + c.get("authorEndpoint").get("browseEndpoint").get("canonicalBaseUrl").asText(); - } catch (Exception e) { - throw new ParsingException("Could not get author endpoint", e); - } - } - }; - + JsonArray contents = JsonUtils.getValue(ajaxJson, "response.continuationContents.itemSectionContinuation.contents"); + fetchTitle(contents); + List comments = JsonUtils.getValues(contents, "commentThreadRenderer.comment.commentRenderer"); + + for(JsonObject c: comments) { + CommentsInfoItemExtractor extractor = new YoutubeCommentsInfoItemExtractor(c, pageUrl); collector.commit(extractor); - }); + } } - private void fetchTitle(JsonNode ajaxJson) { + private void fetchTitle(JsonArray contents) { if(null == title) { try { - title = ajaxJson.findValue("commentTargetTitle").get("simpleText").asText(); + title = JsonUtils.getValue(contents.getObject(0), "commentThreadRenderer.commentTargetTitle.simpleText"); } catch (Exception e) { title = "Youtube Comments"; } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsInfoItemExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsInfoItemExtractor.java new file mode 100644 index 000000000..f6507f153 --- /dev/null +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsInfoItemExtractor.java @@ -0,0 +1,78 @@ +package org.schabi.newpipe.extractor.services.youtube.extractors; + +import org.schabi.newpipe.extractor.comments.CommentsInfoItemExtractor; +import org.schabi.newpipe.extractor.exceptions.ParsingException; +import org.schabi.newpipe.extractor.utils.JsonUtils; + +import com.grack.nanojson.JsonArray; +import com.grack.nanojson.JsonObject; + +public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtractor{ + + private final JsonObject json; + private final String url; + + public YoutubeCommentsInfoItemExtractor(JsonObject json, String url) { + this.json = json; + this.url = url; + } + + @Override + public String getUrl() throws ParsingException { + return url; + } + + @Override + public String getThumbnailUrl() throws ParsingException { + JsonArray arr = JsonUtils.getValue(json, "authorThumbnail.thumbnails"); + return JsonUtils.getValue(arr.getObject(2), "url"); + } + + @Override + public String getName() throws ParsingException { + return JsonUtils.getValue(json, "authorText.simpleText"); + } + + @Override + public String getPublishedTime() throws ParsingException { + JsonArray arr = JsonUtils.getValue(json, "publishedTimeText.runs"); + return JsonUtils.getValue(arr.getObject(0), "text"); + } + + @Override + public Integer getLikeCount() throws ParsingException { + return JsonUtils.getValue(json, "likeCount"); + } + + @Override + public String getCommentText() throws ParsingException { + try { + return JsonUtils.getValue(json, "contentText.simpleText"); + } catch (Exception e) { + JsonArray arr = JsonUtils.getValue(json, "contentText.runs"); + return JsonUtils.getValue(arr.getObject(0), "text"); + } + } + + @Override + public String getCommentId() throws ParsingException { + return JsonUtils.getValue(json, "commentId"); + } + + @Override + public String getAuthorThumbnail() throws ParsingException { + JsonArray arr = JsonUtils.getValue(json, "authorThumbnail.thumbnails"); + return JsonUtils.getValue(arr.getObject(2), "url"); + } + + @Override + public String getAuthorName() throws ParsingException { + return JsonUtils.getValue(json, "authorText.simpleText"); + } + + @Override + public String getAuthorEndpoint() throws ParsingException { + return "https://youtube.com" + JsonUtils.getValue(json, "authorEndpoint.browseEndpoint.canonicalBaseUrl"); + } + +} diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/utils/JsonUtils.java b/extractor/src/main/java/org/schabi/newpipe/extractor/utils/JsonUtils.java new file mode 100644 index 000000000..8aa5cea4b --- /dev/null +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/utils/JsonUtils.java @@ -0,0 +1,53 @@ +package org.schabi.newpipe.extractor.utils; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import javax.annotation.Nonnull; +import javax.annotation.Nullable; + +import org.schabi.newpipe.extractor.exceptions.ParsingException; + +import com.grack.nanojson.JsonArray; +import com.grack.nanojson.JsonObject; + +public class JsonUtils { + + private JsonUtils() { + } + + @Nonnull + public static T getValue(@Nonnull JsonObject object, @Nonnull String path) throws ParsingException{ + + List keys = Arrays.asList(path.split("\\.")); + object = getObject(object, keys.subList(0, keys.size() - 1)); + if (null == object) throw new ParsingException("Unable to get " + path); + T result = (T) object.get(keys.get(keys.size() - 1)); + if(null == result) throw new ParsingException("Unable to get " + path); + return result; + } + + + @Nonnull + public static List getValues(@Nonnull JsonArray array, @Nonnull String path) throws ParsingException { + + List result = new ArrayList<>(); + for (int i = 0; i < array.size(); i++) { + JsonObject obj = array.getObject(i); + result.add((T)getValue(obj, path)); + } + return result; + } + + @Nullable + private static JsonObject getObject(@Nonnull JsonObject object, @Nonnull List keys) { + JsonObject result = object; + for (String key : keys) { + result = result.getObject(key); + if (null == result) break; + } + return result; + } + +} diff --git a/extractor/src/test/java/org/schabi/newpipe/Downloader.java b/extractor/src/test/java/org/schabi/newpipe/Downloader.java index 385d896b1..4745a5d1a 100644 --- a/extractor/src/test/java/org/schabi/newpipe/Downloader.java +++ b/extractor/src/test/java/org/schabi/newpipe/Downloader.java @@ -177,7 +177,9 @@ public class Downloader implements org.schabi.newpipe.extractor.Downloader { URL url = new URL(siteUrl); HttpsURLConnection con = (HttpsURLConnection) url.openConnection(); for (Map.Entry> pair : requestHeaders.entrySet()) { - pair.getValue().stream().forEach(value -> con.addRequestProperty(pair.getKey(), value)); + for(String value: pair.getValue()) { + con.addRequestProperty(pair.getKey(), value); + } } String responseBody = dl(con); return new DownloadResponse(responseBody, con.getHeaderFields()); @@ -185,7 +187,7 @@ public class Downloader implements org.schabi.newpipe.extractor.Downloader { @Override public DownloadResponse get(String siteUrl) throws IOException, ReCaptchaException { - return get(siteUrl, Collections.emptyMap()); + return get(siteUrl, Collections.EMPTY_MAP); } @Override @@ -195,7 +197,9 @@ public class Downloader implements org.schabi.newpipe.extractor.Downloader { HttpsURLConnection con = (HttpsURLConnection) url.openConnection(); con.setRequestMethod("POST"); for (Map.Entry> pair : requestHeaders.entrySet()) { - pair.getValue().stream().forEach(value -> con.addRequestProperty(pair.getKey(), value)); + for(String value: pair.getValue()) { + con.addRequestProperty(pair.getKey(), value); + } } // set fields to default if not set already setDefaults(con); diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeCommentsExtractorTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeCommentsExtractorTest.java index f811e1124..afef25145 100644 --- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeCommentsExtractorTest.java +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeCommentsExtractorTest.java @@ -41,7 +41,7 @@ public class YoutubeCommentsExtractorTest { assertTrue(result); } - + @Test public void testGetCommentsFromCommentsInfo() throws IOException, ExtractionException { boolean result = false; @@ -64,6 +64,11 @@ public class YoutubeCommentsExtractorTest { } private boolean findInComments(List comments, String comment) { - return comments.stream().filter(c -> c.getCommentText().contains(comment)).findAny().isPresent(); + for(CommentsInfoItem c: comments) { + if(c.getCommentText().contains(comment)) { + return true; + } + } + return false; } } diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/utils/JsonUtilsTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/utils/JsonUtilsTest.java new file mode 100644 index 000000000..b44d3ee9c --- /dev/null +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/utils/JsonUtilsTest.java @@ -0,0 +1,47 @@ +package org.schabi.newpipe.extractor.utils; + + +import static org.junit.Assert.assertTrue; + +import java.util.List; + +import org.junit.Test; +import org.schabi.newpipe.extractor.exceptions.ParsingException; + +import com.grack.nanojson.JsonArray; +import com.grack.nanojson.JsonObject; +import com.grack.nanojson.JsonParser; +import com.grack.nanojson.JsonParserException; + + +public class JsonUtilsTest { + + @Test + public void testGetValueFlat() throws JsonParserException, ParsingException { + JsonObject obj = JsonParser.object().from("{\"name\":\"John\",\"age\":30,\"cars\":{\"car1\":\"Ford\",\"car2\":\"BMW\",\"car3\":\"Fiat\"}}"); + assertTrue("John".equals(JsonUtils.getValue(obj, "name"))); + } + + @Test + public void testGetValueNested() throws JsonParserException, ParsingException { + JsonObject obj = JsonParser.object().from("{\"name\":\"John\",\"age\":30,\"cars\":{\"car1\":\"Ford\",\"car2\":\"BMW\",\"car3\":\"Fiat\"}}"); + assertTrue("BMW".equals(JsonUtils.getValue(obj, "cars.car2"))); + } + + @Test + public void testGetArray() throws JsonParserException, ParsingException { + JsonObject obj = JsonParser.object().from("{\"id\":\"0001\",\"type\":\"donut\",\"name\":\"Cake\",\"ppu\":0.55,\"batters\":{\"batter\":[{\"id\":\"1001\",\"type\":\"Regular\"},{\"id\":\"1002\",\"type\":\"Chocolate\"},{\"id\":\"1003\",\"type\":\"Blueberry\"},{\"id\":\"1004\",\"type\":\"Devil's Food\"}]},\"topping\":[{\"id\":\"5001\",\"type\":\"None\"},{\"id\":\"5002\",\"type\":\"Glazed\"},{\"id\":\"5005\",\"type\":\"Sugar\"},{\"id\":\"5007\",\"type\":\"Powdered Sugar\"},{\"id\":\"5006\",\"type\":\"Chocolate with Sprinkles\"},{\"id\":\"5003\",\"type\":\"Chocolate\"},{\"id\":\"5004\",\"type\":\"Maple\"}]}"); + JsonArray arr = JsonUtils.getValue(obj, "batters.batter"); + assertTrue(!arr.isEmpty()); + } + + @Test + public void testGetValues() throws JsonParserException, ParsingException { + JsonObject obj = JsonParser.object().from("{\"id\":\"0001\",\"type\":\"donut\",\"name\":\"Cake\",\"ppu\":0.55,\"batters\":{\"batter\":[{\"id\":\"1001\",\"type\":\"Regular\"},{\"id\":\"1002\",\"type\":\"Chocolate\"},{\"id\":\"1003\",\"type\":\"Blueberry\"},{\"id\":\"1004\",\"type\":\"Devil's Food\"}]},\"topping\":[{\"id\":\"5001\",\"type\":\"None\"},{\"id\":\"5002\",\"type\":\"Glazed\"},{\"id\":\"5005\",\"type\":\"Sugar\"},{\"id\":\"5007\",\"type\":\"Powdered Sugar\"},{\"id\":\"5006\",\"type\":\"Chocolate with Sprinkles\"},{\"id\":\"5003\",\"type\":\"Chocolate\"},{\"id\":\"5004\",\"type\":\"Maple\"}]}"); + JsonArray arr = JsonUtils.getValue(obj, "topping"); + List types = JsonUtils.getValues(arr, "type"); + assertTrue(types.contains("Chocolate with Sprinkles")); + + } + +} From ce76885553ac0f650329c06869e6902508da24ab Mon Sep 17 00:00:00 2001 From: Ritvik Saraf <13ritvik@gmail.com> Date: Wed, 26 Sep 2018 04:21:58 +0530 Subject: [PATCH 18/31] removed generics --- .../extractors/YoutubeCommentsExtractor.java | 36 ++++++--- .../YoutubeCommentsInfoItemExtractor.java | 73 ++++++++++++++----- .../newpipe/extractor/utils/JsonUtils.java | 10 +-- .../youtube/YoutubeCommentsExtractorTest.java | 18 +++++ .../extractor/utils/JsonUtilsTest.java | 6 +- 5 files changed, 105 insertions(+), 38 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java index 259e52907..8f67bc5bc 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java @@ -61,17 +61,17 @@ public class YoutubeCommentsExtractor extends CommentsExtractor { JsonArray arr; try { - arr = JsonUtils.getValue(ajaxJson, "response.continuationContents.itemSectionContinuation.continuations"); - } catch (ParsingException e) { + arr = (JsonArray) JsonUtils.getValue(ajaxJson, "response.continuationContents.itemSectionContinuation.continuations"); + } catch (Exception e) { return ""; } - if(null == arr || arr.isEmpty()) { + if(arr.isEmpty()) { return ""; } String continuation; try { - continuation = JsonUtils.getValue(arr.getObject(0), "nextContinuationData.continuation"); - } catch (ParsingException e) { + continuation = (String) JsonUtils.getValue(arr.getObject(0), "nextContinuationData.continuation"); + } catch (Exception e) { return ""; } return getNextPageUrl(continuation); @@ -109,14 +109,26 @@ public class YoutubeCommentsExtractor extends CommentsExtractor { private void collectCommentsFrom(CommentsInfoItemsCollector collector, JsonObject ajaxJson, String pageUrl) throws ParsingException { - - JsonArray contents = JsonUtils.getValue(ajaxJson, "response.continuationContents.itemSectionContinuation.contents"); + JsonArray contents; + try { + contents = (JsonArray) JsonUtils.getValue(ajaxJson, "response.continuationContents.itemSectionContinuation.contents"); + }catch(Exception e) { + throw new ParsingException("unable to get parse youtube comments", e); + } fetchTitle(contents); - List comments = JsonUtils.getValues(contents, "commentThreadRenderer.comment.commentRenderer"); + List comments; + try { + comments = JsonUtils.getValues(contents, "commentThreadRenderer.comment.commentRenderer"); + }catch(Exception e) { + throw new ParsingException("unable to get parse youtube comments", e); + } - for(JsonObject c: comments) { - CommentsInfoItemExtractor extractor = new YoutubeCommentsInfoItemExtractor(c, pageUrl); - collector.commit(extractor); + + for(Object c: comments) { + if(c instanceof JsonObject) { + CommentsInfoItemExtractor extractor = new YoutubeCommentsInfoItemExtractor((JsonObject) c, pageUrl); + collector.commit(extractor); + } } } @@ -124,7 +136,7 @@ public class YoutubeCommentsExtractor extends CommentsExtractor { private void fetchTitle(JsonArray contents) { if(null == title) { try { - title = JsonUtils.getValue(contents.getObject(0), "commentThreadRenderer.commentTargetTitle.simpleText"); + title = (String) JsonUtils.getValue(contents.getObject(0), "commentThreadRenderer.commentTargetTitle.simpleText"); } catch (Exception e) { title = "Youtube Comments"; } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsInfoItemExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsInfoItemExtractor.java index f6507f153..79d7a93c8 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsInfoItemExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsInfoItemExtractor.java @@ -7,11 +7,11 @@ import org.schabi.newpipe.extractor.utils.JsonUtils; import com.grack.nanojson.JsonArray; import com.grack.nanojson.JsonObject; -public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtractor{ - +public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtractor { + private final JsonObject json; private final String url; - + public YoutubeCommentsInfoItemExtractor(JsonObject json, String url) { this.json = json; this.url = url; @@ -24,55 +24,92 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract @Override public String getThumbnailUrl() throws ParsingException { - JsonArray arr = JsonUtils.getValue(json, "authorThumbnail.thumbnails"); - return JsonUtils.getValue(arr.getObject(2), "url"); + try { + JsonArray arr = (JsonArray) JsonUtils.getValue(json, "authorThumbnail.thumbnails"); + return (String) JsonUtils.getValue(arr.getObject(2), "url"); + } catch (Exception e) { + throw new ParsingException("Could not get thumbnail url", e); + } } @Override public String getName() throws ParsingException { - return JsonUtils.getValue(json, "authorText.simpleText"); + try { + return (String) JsonUtils.getValue(json, "authorText.simpleText"); + } catch (Exception e) { + throw new ParsingException("Could not get author name", e); + } } @Override public String getPublishedTime() throws ParsingException { - JsonArray arr = JsonUtils.getValue(json, "publishedTimeText.runs"); - return JsonUtils.getValue(arr.getObject(0), "text"); + try { + JsonArray arr = (JsonArray) JsonUtils.getValue(json, "publishedTimeText.runs"); + return (String) JsonUtils.getValue(arr.getObject(0), "text"); + } catch (Exception e) { + throw new ParsingException("Could not get publishedTimeText", e); + } } @Override public Integer getLikeCount() throws ParsingException { - return JsonUtils.getValue(json, "likeCount"); + try { + return (Integer) JsonUtils.getValue(json, "likeCount"); + } catch (Exception e) { + throw new ParsingException("Could not get like count", e); + } } @Override public String getCommentText() throws ParsingException { try { - return JsonUtils.getValue(json, "contentText.simpleText"); - } catch (Exception e) { - JsonArray arr = JsonUtils.getValue(json, "contentText.runs"); - return JsonUtils.getValue(arr.getObject(0), "text"); + return (String) JsonUtils.getValue(json, "contentText.simpleText"); + } catch (Exception e1) { + try { + JsonArray arr = (JsonArray) JsonUtils.getValue(json, "contentText.runs"); + return (String) JsonUtils.getValue(arr.getObject(0), "text"); + } catch (Exception e2) { + throw new ParsingException("Could not get comment text", e2); + } } } @Override public String getCommentId() throws ParsingException { - return JsonUtils.getValue(json, "commentId"); + try { + return (String) JsonUtils.getValue(json, "commentId"); + } catch (Exception e) { + throw new ParsingException("Could not get comment id", e); + } } @Override public String getAuthorThumbnail() throws ParsingException { - JsonArray arr = JsonUtils.getValue(json, "authorThumbnail.thumbnails"); - return JsonUtils.getValue(arr.getObject(2), "url"); + try { + JsonArray arr = (JsonArray) JsonUtils.getValue(json, "authorThumbnail.thumbnails"); + return (String) JsonUtils.getValue(arr.getObject(2), "url"); + } catch (Exception e) { + throw new ParsingException("Could not get author thumbnail", e); + } } @Override public String getAuthorName() throws ParsingException { - return JsonUtils.getValue(json, "authorText.simpleText"); + try { + return (String) JsonUtils.getValue(json, "authorText.simpleText"); + } catch (Exception e) { + throw new ParsingException("Could not get author name", e); + } } @Override public String getAuthorEndpoint() throws ParsingException { - return "https://youtube.com" + JsonUtils.getValue(json, "authorEndpoint.browseEndpoint.canonicalBaseUrl"); + try { + return "https://youtube.com" + + (String) JsonUtils.getValue(json, "authorEndpoint.browseEndpoint.canonicalBaseUrl"); + } catch (Exception e) { + throw new ParsingException("Could not get author endpoint", e); + } } } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/utils/JsonUtils.java b/extractor/src/main/java/org/schabi/newpipe/extractor/utils/JsonUtils.java index 8aa5cea4b..ddbf47c81 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/utils/JsonUtils.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/utils/JsonUtils.java @@ -18,24 +18,24 @@ public class JsonUtils { } @Nonnull - public static T getValue(@Nonnull JsonObject object, @Nonnull String path) throws ParsingException{ + public static Object getValue(@Nonnull JsonObject object, @Nonnull String path) throws ParsingException{ List keys = Arrays.asList(path.split("\\.")); object = getObject(object, keys.subList(0, keys.size() - 1)); if (null == object) throw new ParsingException("Unable to get " + path); - T result = (T) object.get(keys.get(keys.size() - 1)); + Object result = object.get(keys.get(keys.size() - 1)); if(null == result) throw new ParsingException("Unable to get " + path); return result; } @Nonnull - public static List getValues(@Nonnull JsonArray array, @Nonnull String path) throws ParsingException { + public static List getValues(@Nonnull JsonArray array, @Nonnull String path) throws ParsingException { - List result = new ArrayList<>(); + List result = new ArrayList<>(); for (int i = 0; i < array.size(); i++) { JsonObject obj = array.getObject(i); - result.add((T)getValue(obj, path)); + result.add(getValue(obj, path)); } return result; } diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeCommentsExtractorTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeCommentsExtractorTest.java index afef25145..6ed3b1d06 100644 --- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeCommentsExtractorTest.java +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeCommentsExtractorTest.java @@ -1,5 +1,6 @@ package org.schabi.newpipe.extractor.services.youtube; +import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; import static org.schabi.newpipe.extractor.ServiceList.YouTube; @@ -58,6 +59,23 @@ public class YoutubeCommentsExtractorTest { assertTrue(result); } + + @Test + public void testGetCommentsAllData() throws IOException, ExtractionException { + InfoItemsPage comments = extractor.getInitialPage(); + for(CommentsInfoItem c: comments.getItems()) { + assertFalse(StringUtil.isBlank(c.getAuthorEndpoint())); + assertFalse(StringUtil.isBlank(c.getAuthorName())); + assertFalse(StringUtil.isBlank(c.getAuthorThumbnail())); + assertFalse(StringUtil.isBlank(c.getCommentId())); + assertFalse(StringUtil.isBlank(c.getCommentText())); + assertFalse(StringUtil.isBlank(c.getName())); + assertFalse(StringUtil.isBlank(c.getPublishedTime())); + assertFalse(StringUtil.isBlank(c.getThumbnailUrl())); + assertFalse(StringUtil.isBlank(c.getUrl())); + assertFalse(c.getLikeCount() == null); + } + } private boolean findInComments(InfoItemsPage comments, String comment) { return findInComments(comments.getItems(), comment); diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/utils/JsonUtilsTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/utils/JsonUtilsTest.java index b44d3ee9c..dc8f2b04b 100644 --- a/extractor/src/test/java/org/schabi/newpipe/extractor/utils/JsonUtilsTest.java +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/utils/JsonUtilsTest.java @@ -31,15 +31,15 @@ public class JsonUtilsTest { @Test public void testGetArray() throws JsonParserException, ParsingException { JsonObject obj = JsonParser.object().from("{\"id\":\"0001\",\"type\":\"donut\",\"name\":\"Cake\",\"ppu\":0.55,\"batters\":{\"batter\":[{\"id\":\"1001\",\"type\":\"Regular\"},{\"id\":\"1002\",\"type\":\"Chocolate\"},{\"id\":\"1003\",\"type\":\"Blueberry\"},{\"id\":\"1004\",\"type\":\"Devil's Food\"}]},\"topping\":[{\"id\":\"5001\",\"type\":\"None\"},{\"id\":\"5002\",\"type\":\"Glazed\"},{\"id\":\"5005\",\"type\":\"Sugar\"},{\"id\":\"5007\",\"type\":\"Powdered Sugar\"},{\"id\":\"5006\",\"type\":\"Chocolate with Sprinkles\"},{\"id\":\"5003\",\"type\":\"Chocolate\"},{\"id\":\"5004\",\"type\":\"Maple\"}]}"); - JsonArray arr = JsonUtils.getValue(obj, "batters.batter"); + JsonArray arr = (JsonArray) JsonUtils.getValue(obj, "batters.batter"); assertTrue(!arr.isEmpty()); } @Test public void testGetValues() throws JsonParserException, ParsingException { JsonObject obj = JsonParser.object().from("{\"id\":\"0001\",\"type\":\"donut\",\"name\":\"Cake\",\"ppu\":0.55,\"batters\":{\"batter\":[{\"id\":\"1001\",\"type\":\"Regular\"},{\"id\":\"1002\",\"type\":\"Chocolate\"},{\"id\":\"1003\",\"type\":\"Blueberry\"},{\"id\":\"1004\",\"type\":\"Devil's Food\"}]},\"topping\":[{\"id\":\"5001\",\"type\":\"None\"},{\"id\":\"5002\",\"type\":\"Glazed\"},{\"id\":\"5005\",\"type\":\"Sugar\"},{\"id\":\"5007\",\"type\":\"Powdered Sugar\"},{\"id\":\"5006\",\"type\":\"Chocolate with Sprinkles\"},{\"id\":\"5003\",\"type\":\"Chocolate\"},{\"id\":\"5004\",\"type\":\"Maple\"}]}"); - JsonArray arr = JsonUtils.getValue(obj, "topping"); - List types = JsonUtils.getValues(arr, "type"); + JsonArray arr = (JsonArray) JsonUtils.getValue(obj, "topping"); + List types = JsonUtils.getValues(arr, "type"); assertTrue(types.contains("Chocolate with Sprinkles")); } From fb1419608ac3df3628a148d718fa2ddcd9748935 Mon Sep 17 00:00:00 2001 From: Ritvik Saraf <13ritvik@gmail.com> Date: Thu, 27 Sep 2018 00:44:55 +0530 Subject: [PATCH 19/31] using getDataString --- .../youtube/extractors/YoutubeCommentsExtractor.java | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java index 8f67bc5bc..672207e36 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java @@ -123,14 +123,12 @@ public class YoutubeCommentsExtractor extends CommentsExtractor { throw new ParsingException("unable to get parse youtube comments", e); } - for(Object c: comments) { if(c instanceof JsonObject) { CommentsInfoItemExtractor extractor = new YoutubeCommentsInfoItemExtractor((JsonObject) c, pageUrl); collector.commit(extractor); } } - } private void fetchTitle(JsonArray contents) { @@ -160,10 +158,9 @@ public class YoutubeCommentsExtractor extends CommentsExtractor { private String makeAjaxRequest(String siteUrl) throws IOException, ReCaptchaException { - StringBuilder postData = new StringBuilder(); - postData.append(URLEncoder.encode("session_token", "UTF-8")); - postData.append('='); - postData.append(URLEncoder.encode(sessionToken, "UTF-8")); + Map postDataMap = new HashMap<>(); + postDataMap.put("session_token", sessionToken); + String postData = getDataString(postDataMap); Map> requestHeaders = new HashMap<>(); requestHeaders.put("Content-Type", Arrays.asList("application/x-www-form-urlencoded")); @@ -173,7 +170,7 @@ public class YoutubeCommentsExtractor extends CommentsExtractor { requestHeaders.put("X-YouTube-Client-Name", Arrays.asList("1")); requestHeaders.put("Cookie", cookies); - return NewPipe.getDownloader().post(siteUrl, postData.toString(), requestHeaders).getResponseBody(); + return NewPipe.getDownloader().post(siteUrl, postData, requestHeaders).getResponseBody(); } private String getDataString(Map params) throws UnsupportedEncodingException { From 6b620914b64891740140a5ca0d3a03f48c568c42 Mon Sep 17 00:00:00 2001 From: Ritvik Saraf <13ritvik@gmail.com> Date: Thu, 27 Sep 2018 02:04:12 +0530 Subject: [PATCH 20/31] moved cookie logic outside --- .../newpipe/extractor/DownloadRequest.java | 44 +++++++++++++++++++ .../newpipe/extractor/DownloadResponse.java | 13 ++++++ .../schabi/newpipe/extractor/Downloader.java | 4 +- .../extractors/YoutubeCommentsExtractor.java | 8 ++-- .../java/org/schabi/newpipe/Downloader.java | 22 +++++----- 5 files changed, 76 insertions(+), 15 deletions(-) create mode 100644 extractor/src/main/java/org/schabi/newpipe/extractor/DownloadRequest.java diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/DownloadRequest.java b/extractor/src/main/java/org/schabi/newpipe/extractor/DownloadRequest.java new file mode 100644 index 000000000..32c8a67fa --- /dev/null +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/DownloadRequest.java @@ -0,0 +1,44 @@ +package org.schabi.newpipe.extractor; + +import java.util.Collections; +import java.util.List; +import java.util.Map; + +public class DownloadRequest { + + private final String requestBody; + private final Map> requestHeaders; + public static final DownloadRequest emptyRequest = new DownloadRequest(null, null); + + public DownloadRequest(String requestBody, Map> headers) { + super(); + this.requestBody = requestBody; + if(null != headers) { + this.requestHeaders = headers; + }else { + this.requestHeaders = Collections.emptyMap(); + } + } + + public String getRequestBody() { + return requestBody; + } + + public Map> getRequestHeaders() { + return requestHeaders; + } + + public void setRequestCookies(List cookies){ + requestHeaders.put("Cookie", cookies); + } + + public List getRequestCookies(){ + if(null == requestHeaders) return Collections.emptyList(); + List cookies = requestHeaders.get("Cookie"); + if(null == cookies) + return Collections.emptyList(); + else + return cookies; + } + +} diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/DownloadResponse.java b/extractor/src/main/java/org/schabi/newpipe/extractor/DownloadResponse.java index 64fc2ade1..2165002a8 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/DownloadResponse.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/DownloadResponse.java @@ -1,8 +1,11 @@ package org.schabi.newpipe.extractor; +import java.util.Collections; import java.util.List; import java.util.Map; +import javax.annotation.Nonnull; + public class DownloadResponse { private final String responseBody; private final Map> responseHeaders; @@ -20,5 +23,15 @@ public class DownloadResponse { public Map> getResponseHeaders() { return responseHeaders; } + + @Nonnull + public List getResponseCookies(){ + if(null == responseHeaders) return Collections.emptyList(); + List cookies = responseHeaders.get("Set-Cookie"); + if(null == cookies) + return Collections.emptyList(); + else + return cookies; + } } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/Downloader.java b/extractor/src/main/java/org/schabi/newpipe/extractor/Downloader.java index f0b6692d4..7a9db6aed 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/Downloader.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/Downloader.java @@ -61,11 +61,11 @@ public interface Downloader { */ String download(String siteUrl) throws IOException, ReCaptchaException; - DownloadResponse get(String siteUrl, Map> requestHeaders) + DownloadResponse get(String siteUrl, DownloadRequest request) throws IOException, ReCaptchaException; DownloadResponse get(String siteUrl) throws IOException, ReCaptchaException; - DownloadResponse post(String siteUrl, String requestBody, Map> requestHeaders) + DownloadResponse post(String siteUrl, DownloadRequest request) throws IOException, ReCaptchaException; } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java index 672207e36..9aa2fba3c 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java @@ -8,6 +8,7 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import org.schabi.newpipe.extractor.DownloadRequest; import org.schabi.newpipe.extractor.DownloadResponse; import org.schabi.newpipe.extractor.Downloader; import org.schabi.newpipe.extractor.NewPipe; @@ -145,7 +146,7 @@ public class YoutubeCommentsExtractor extends CommentsExtractor { public void onFetchPage(Downloader downloader) throws IOException, ExtractionException { DownloadResponse response = downloader.get(getUrl()); String responseBody = response.getResponseBody(); - cookies = response.getResponseHeaders().get("Set-Cookie"); + cookies = response.getResponseCookies(); sessionToken = findValue(responseBody, "XSRF_TOKEN"); String commentsToken = findValue(responseBody, "COMMENTS_TOKEN"); initPage = getPage(getNextPageUrl(commentsToken)); @@ -168,9 +169,10 @@ public class YoutubeCommentsExtractor extends CommentsExtractor { requestHeaders.put("User-Agent", Arrays.asList(USER_AGENT)); requestHeaders.put("X-YouTube-Client-Version", Arrays.asList("2.20180815")); requestHeaders.put("X-YouTube-Client-Name", Arrays.asList("1")); - requestHeaders.put("Cookie", cookies); + DownloadRequest request = new DownloadRequest(postData, requestHeaders); + request.setRequestCookies(cookies); - return NewPipe.getDownloader().post(siteUrl, postData, requestHeaders).getResponseBody(); + return NewPipe.getDownloader().post(siteUrl, request).getResponseBody(); } private String getDataString(Map params) throws UnsupportedEncodingException { diff --git a/extractor/src/test/java/org/schabi/newpipe/Downloader.java b/extractor/src/test/java/org/schabi/newpipe/Downloader.java index 4745a5d1a..77940ba92 100644 --- a/extractor/src/test/java/org/schabi/newpipe/Downloader.java +++ b/extractor/src/test/java/org/schabi/newpipe/Downloader.java @@ -12,6 +12,7 @@ import java.util.Map; import javax.net.ssl.HttpsURLConnection; +import org.schabi.newpipe.extractor.DownloadRequest; import org.schabi.newpipe.extractor.DownloadResponse; import org.schabi.newpipe.extractor.exceptions.ReCaptchaException; @@ -172,11 +173,11 @@ public class Downloader implements org.schabi.newpipe.extractor.Downloader { } @Override - public DownloadResponse get(String siteUrl, Map> requestHeaders) + public DownloadResponse get(String siteUrl, DownloadRequest request) throws IOException, ReCaptchaException { URL url = new URL(siteUrl); HttpsURLConnection con = (HttpsURLConnection) url.openConnection(); - for (Map.Entry> pair : requestHeaders.entrySet()) { + for (Map.Entry> pair : request.getRequestHeaders().entrySet()) { for(String value: pair.getValue()) { con.addRequestProperty(pair.getKey(), value); } @@ -187,16 +188,16 @@ public class Downloader implements org.schabi.newpipe.extractor.Downloader { @Override public DownloadResponse get(String siteUrl) throws IOException, ReCaptchaException { - return get(siteUrl, Collections.EMPTY_MAP); + return get(siteUrl, DownloadRequest.emptyRequest); } @Override - public DownloadResponse post(String siteUrl, String requestBody, Map> requestHeaders) + public DownloadResponse post(String siteUrl, DownloadRequest request) throws IOException, ReCaptchaException { URL url = new URL(siteUrl); HttpsURLConnection con = (HttpsURLConnection) url.openConnection(); con.setRequestMethod("POST"); - for (Map.Entry> pair : requestHeaders.entrySet()) { + for (Map.Entry> pair : request.getRequestHeaders().entrySet()) { for(String value: pair.getValue()) { con.addRequestProperty(pair.getKey(), value); } @@ -204,11 +205,12 @@ public class Downloader implements org.schabi.newpipe.extractor.Downloader { // set fields to default if not set already setDefaults(con); - byte[] postDataBytes = requestBody.toString().getBytes("UTF-8"); - con.setRequestProperty("Content-Length", String.valueOf(postDataBytes.length)); - - con.setDoOutput(true); - con.getOutputStream().write(postDataBytes); + if(null != request.getRequestBody()) { + byte[] postDataBytes = request.getRequestBody().getBytes("UTF-8"); + con.setRequestProperty("Content-Length", String.valueOf(postDataBytes.length)); + con.setDoOutput(true); + con.getOutputStream().write(postDataBytes); + } StringBuilder sb = new StringBuilder(); try (BufferedReader in = new BufferedReader(new InputStreamReader(con.getInputStream()))) { From ad8066830df21ada53c6fc3dd2e87dff11125a53 Mon Sep 17 00:00:00 2001 From: Ritvik Saraf <13ritvik@gmail.com> Date: Thu, 27 Sep 2018 22:32:13 +0530 Subject: [PATCH 21/31] getting client version and name from initial page --- .../extractors/YoutubeCommentsExtractor.java | 26 ++++++++++++------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java index 9aa2fba3c..e8d53091d 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java @@ -34,6 +34,8 @@ public class YoutubeCommentsExtractor extends CommentsExtractor { private List cookies; private String sessionToken; + private String ytClientVersion; + private String ytClientName; private String title; private InfoItemsPage initPage; @@ -144,11 +146,17 @@ public class YoutubeCommentsExtractor extends CommentsExtractor { @Override public void onFetchPage(Downloader downloader) throws IOException, ExtractionException { - DownloadResponse response = downloader.get(getUrl()); + Map> requestHeaders = new HashMap<>(); + requestHeaders.put("User-Agent", Arrays.asList(USER_AGENT)); + DownloadRequest request = new DownloadRequest(null, requestHeaders); + DownloadResponse response = downloader.get(getUrl(), request); String responseBody = response.getResponseBody(); cookies = response.getResponseCookies(); - sessionToken = findValue(responseBody, "XSRF_TOKEN"); - String commentsToken = findValue(responseBody, "COMMENTS_TOKEN"); + sessionToken = findValue(responseBody, "XSRF_TOKEN\":\"", "\""); + ytClientVersion = findValue(responseBody, "INNERTUBE_CONTEXT_CLIENT_VERSION\":\"", "\""); + ytClientName = findValue(responseBody, "INNERTUBE_CONTEXT_CLIENT_NAME\":", ","); + String commentsTokenInside = findValue(responseBody, "itemSectionRenderer", "comment-item-section"); + String commentsToken = findValue(commentsTokenInside, "continuation\":\"", "\""); initPage = getPage(getNextPageUrl(commentsToken)); } @@ -167,8 +175,8 @@ public class YoutubeCommentsExtractor extends CommentsExtractor { requestHeaders.put("Content-Type", Arrays.asList("application/x-www-form-urlencoded")); requestHeaders.put("Accept", Arrays.asList("*/*")); requestHeaders.put("User-Agent", Arrays.asList(USER_AGENT)); - requestHeaders.put("X-YouTube-Client-Version", Arrays.asList("2.20180815")); - requestHeaders.put("X-YouTube-Client-Name", Arrays.asList("1")); + requestHeaders.put("X-YouTube-Client-Version", Arrays.asList(ytClientVersion)); + requestHeaders.put("X-YouTube-Client-Name", Arrays.asList(ytClientName)); DownloadRequest request = new DownloadRequest(postData, requestHeaders); request.setRequestCookies(cookies); @@ -190,10 +198,10 @@ public class YoutubeCommentsExtractor extends CommentsExtractor { return result.toString(); } - private String findValue(String doc, String key) { - int beginIndex = doc.indexOf(key) + key.length() + 4; - int endIndex = doc.indexOf("\"", beginIndex); + private String findValue(String doc, String start, String end) { + int beginIndex = doc.indexOf(start) + start.length(); + int endIndex = doc.indexOf(end, beginIndex); return doc.substring(beginIndex, endIndex); } - + } From c2ed99b1b71c074efb89a5503707f4cb202486f0 Mon Sep 17 00:00:00 2001 From: Ritvik Saraf <13ritvik@gmail.com> Date: Thu, 27 Sep 2018 23:01:27 +0530 Subject: [PATCH 22/31] no comments --- .../services/youtube/extractors/YoutubeCommentsExtractor.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java index e8d53091d..68e96c985 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java @@ -116,7 +116,8 @@ public class YoutubeCommentsExtractor extends CommentsExtractor { try { contents = (JsonArray) JsonUtils.getValue(ajaxJson, "response.continuationContents.itemSectionContinuation.contents"); }catch(Exception e) { - throw new ParsingException("unable to get parse youtube comments", e); + //no comments + return; } fetchTitle(contents); List comments; From d1ff1c75891653f5933e42cd9f61ae3551a98f55 Mon Sep 17 00:00:00 2001 From: Ritvik Saraf <13ritvik@gmail.com> Date: Fri, 28 Sep 2018 04:54:57 +0530 Subject: [PATCH 23/31] using mobile website since it is faster --- .../extractors/YoutubeCommentsExtractor.java | 59 ++++++++++--------- .../YoutubeCommentsInfoItemExtractor.java | 19 +++--- .../YoutubeCommentsLinkHandlerFactory.java | 4 +- 3 files changed, 41 insertions(+), 41 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java index 68e96c985..66c1d116d 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java @@ -8,6 +8,8 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import javax.annotation.Nonnull; + import org.schabi.newpipe.extractor.DownloadRequest; import org.schabi.newpipe.extractor.DownloadResponse; import org.schabi.newpipe.extractor.Downloader; @@ -26,14 +28,12 @@ import org.schabi.newpipe.extractor.utils.JsonUtils; import com.grack.nanojson.JsonArray; import com.grack.nanojson.JsonObject; import com.grack.nanojson.JsonParser; -import com.grack.nanojson.JsonParserException; + public class YoutubeCommentsExtractor extends CommentsExtractor { - private static final String USER_AGENT = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:61.0) Gecko/20100101 Firefox/61.0"; + private static final String USER_AGENT = "Mozilla/5.0 (Android 8.1.0; Mobile; rv:62.0) Gecko/62.0 Firefox/62.0"; - private List cookies; - private String sessionToken; private String ytClientVersion; private String ytClientName; private String title; @@ -45,17 +45,14 @@ public class YoutubeCommentsExtractor extends CommentsExtractor { @Override public InfoItemsPage getInitialPage() throws IOException, ExtractionException { - // initial page does not load any comments but is required to get session token - // and cookies + // initial page does not load any comments but is required to get comments token super.fetchPage(); return initPage; } - // isn't this method redundant. you can just call getnextpage on getInitialPage @Override public String getNextPageUrl() throws IOException, ExtractionException { - // initial page does not load any comments but is required to get session token - // and cookies + // initial page does not load any comments but is required to get comments token super.fetchPage(); return initPage.getNextPageUrl(); } @@ -64,7 +61,7 @@ public class YoutubeCommentsExtractor extends CommentsExtractor { JsonArray arr; try { - arr = (JsonArray) JsonUtils.getValue(ajaxJson, "response.continuationContents.itemSectionContinuation.continuations"); + arr = (JsonArray) JsonUtils.getValue(ajaxJson, "response.continuationContents.commentSectionContinuation.continuations"); } catch (Exception e) { return ""; } @@ -85,9 +82,8 @@ public class YoutubeCommentsExtractor extends CommentsExtractor { params.put("action_get_comments", "1"); params.put("pbj", "1"); params.put("ctoken", continuation); - params.put("continuation", continuation); try { - return "https://www.youtube.com/comment_service_ajax?" + getDataString(params); + return "https://m.youtube.com/watch_comment?" + getDataString(params); } catch (UnsupportedEncodingException e) { throw new ParsingException("Could not get next page url", e); } @@ -101,8 +97,8 @@ public class YoutubeCommentsExtractor extends CommentsExtractor { String ajaxResponse = makeAjaxRequest(pageUrl); JsonObject ajaxJson; try { - ajaxJson = JsonParser.object().from(ajaxResponse); - } catch (JsonParserException e) { + ajaxJson = JsonParser.array().from(ajaxResponse).getObject(1); + } catch (Exception e) { throw new ParsingException("Could not parse json data for comments", e); } CommentsInfoItemsCollector collector = new CommentsInfoItemsCollector(getServiceId()); @@ -114,7 +110,7 @@ public class YoutubeCommentsExtractor extends CommentsExtractor { JsonArray contents; try { - contents = (JsonArray) JsonUtils.getValue(ajaxJson, "response.continuationContents.itemSectionContinuation.contents"); + contents = (JsonArray) JsonUtils.getValue(ajaxJson, "response.continuationContents.commentSectionContinuation.items"); }catch(Exception e) { //no comments return; @@ -138,7 +134,7 @@ public class YoutubeCommentsExtractor extends CommentsExtractor { private void fetchTitle(JsonArray contents) { if(null == title) { try { - title = (String) JsonUtils.getValue(contents.getObject(0), "commentThreadRenderer.commentTargetTitle.simpleText"); + title = getYoutubeText((JsonObject) JsonUtils.getValue(contents.getObject(0), "commentThreadRenderer.commentTargetTitle")); } catch (Exception e) { title = "Youtube Comments"; } @@ -152,11 +148,9 @@ public class YoutubeCommentsExtractor extends CommentsExtractor { DownloadRequest request = new DownloadRequest(null, requestHeaders); DownloadResponse response = downloader.get(getUrl(), request); String responseBody = response.getResponseBody(); - cookies = response.getResponseCookies(); - sessionToken = findValue(responseBody, "XSRF_TOKEN\":\"", "\""); ytClientVersion = findValue(responseBody, "INNERTUBE_CONTEXT_CLIENT_VERSION\":\"", "\""); ytClientName = findValue(responseBody, "INNERTUBE_CONTEXT_CLIENT_NAME\":", ","); - String commentsTokenInside = findValue(responseBody, "itemSectionRenderer", "comment-item-section"); + String commentsTokenInside = findValue(responseBody, "commentSectionRenderer", "}"); String commentsToken = findValue(commentsTokenInside, "continuation\":\"", "\""); initPage = getPage(getNextPageUrl(commentsToken)); } @@ -168,20 +162,14 @@ public class YoutubeCommentsExtractor extends CommentsExtractor { private String makeAjaxRequest(String siteUrl) throws IOException, ReCaptchaException { - Map postDataMap = new HashMap<>(); - postDataMap.put("session_token", sessionToken); - String postData = getDataString(postDataMap); - Map> requestHeaders = new HashMap<>(); - requestHeaders.put("Content-Type", Arrays.asList("application/x-www-form-urlencoded")); requestHeaders.put("Accept", Arrays.asList("*/*")); requestHeaders.put("User-Agent", Arrays.asList(USER_AGENT)); requestHeaders.put("X-YouTube-Client-Version", Arrays.asList(ytClientVersion)); requestHeaders.put("X-YouTube-Client-Name", Arrays.asList(ytClientName)); - DownloadRequest request = new DownloadRequest(postData, requestHeaders); - request.setRequestCookies(cookies); + DownloadRequest request = new DownloadRequest(null, requestHeaders); - return NewPipe.getDownloader().post(siteUrl, request).getResponseBody(); + return NewPipe.getDownloader().get(siteUrl, request).getResponseBody(); } private String getDataString(Map params) throws UnsupportedEncodingException { @@ -205,4 +193,21 @@ public class YoutubeCommentsExtractor extends CommentsExtractor { return doc.substring(beginIndex, endIndex); } + public static String getYoutubeText(@Nonnull JsonObject object) throws ParsingException { + try { + return (String) JsonUtils.getValue(object, "simpleText"); + } catch (Exception e1) { + try { + JsonArray arr = (JsonArray) JsonUtils.getValue(object, "runs"); + String result = ""; + for(int i=0; i getValues(@Nonnull JsonArray array, @Nonnull String path) throws ParsingException { From f58c914e7330f24a04cbc1894c3f9d6fecc615ae Mon Sep 17 00:00:00 2001 From: Ritvik Saraf <13ritvik@gmail.com> Date: Fri, 19 Oct 2018 20:17:26 +0530 Subject: [PATCH 25/31] merge fix --- .../java/org/schabi/newpipe/extractor/StreamingService.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/StreamingService.java b/extractor/src/main/java/org/schabi/newpipe/extractor/StreamingService.java index 539492cc8..0921e67a0 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/StreamingService.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/StreamingService.java @@ -122,7 +122,7 @@ public abstract class StreamingService { } public CommentsExtractor getCommentsExtractor(ListLinkHandler urlIdHandler) throws ExtractionException { - return getCommentsExtractor(urlIdHandler, NewPipe.getLocalization()); + return getCommentsExtractor(urlIdHandler, NewPipe.getPreferredLocalization()); } //////////////////////////////////////////// @@ -183,7 +183,7 @@ public abstract class StreamingService { if(null == llhf) { return null; } - return getCommentsExtractor(llhf.fromUrl(url), NewPipe.getLocalization()); + return getCommentsExtractor(llhf.fromUrl(url), NewPipe.getPreferredLocalization()); } public abstract boolean isCommentsSupported(); From 9ad102df3fbd599de26357a716dd740b5a797db5 Mon Sep 17 00:00:00 2001 From: Ritvik Saraf <13ritvik@gmail.com> Date: Fri, 21 Dec 2018 09:35:00 +0530 Subject: [PATCH 26/31] fixed fetching youtube client name --- .../extractors/YoutubeCommentsExtractor.java | 6 ++++- .../newpipe/extractor/utils/Parser.java | 22 +++++++++++++------ 2 files changed, 20 insertions(+), 8 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java index dee0b21c3..94beaf88a 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java @@ -7,6 +7,8 @@ import java.util.Arrays; import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import javax.annotation.Nonnull; @@ -25,6 +27,7 @@ import org.schabi.newpipe.extractor.exceptions.ReCaptchaException; import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler; import org.schabi.newpipe.extractor.utils.JsonUtils; import org.schabi.newpipe.extractor.utils.Localization; +import org.schabi.newpipe.extractor.utils.Parser; import com.grack.nanojson.JsonArray; import com.grack.nanojson.JsonObject; @@ -34,6 +37,7 @@ import com.grack.nanojson.JsonParser; public class YoutubeCommentsExtractor extends CommentsExtractor { private static final String USER_AGENT = "Mozilla/5.0 (Android 8.1.0; Mobile; rv:62.0) Gecko/62.0 Firefox/62.0"; + private static final Pattern YT_CLIENT_NAME_PATTERN = Pattern.compile("INNERTUBE_CONTEXT_CLIENT_NAME\\\":(.*?)[,}]"); private String ytClientVersion; private String ytClientName; @@ -150,7 +154,7 @@ public class YoutubeCommentsExtractor extends CommentsExtractor { DownloadResponse response = downloader.get(getUrl(), request); String responseBody = response.getResponseBody(); ytClientVersion = findValue(responseBody, "INNERTUBE_CONTEXT_CLIENT_VERSION\":\"", "\""); - ytClientName = findValue(responseBody, "INNERTUBE_CONTEXT_CLIENT_NAME\":", ","); + ytClientName = Parser.matchGroup1(YT_CLIENT_NAME_PATTERN, responseBody); String commentsTokenInside = findValue(responseBody, "commentSectionRenderer", "}"); String commentsToken = findValue(commentsTokenInside, "continuation\":\"", "\""); initPage = getPage(getNextPageUrl(commentsToken)); diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/utils/Parser.java b/extractor/src/main/java/org/schabi/newpipe/extractor/utils/Parser.java index 42f098dce..6cd938975 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/utils/Parser.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/utils/Parser.java @@ -1,10 +1,5 @@ package org.schabi.newpipe.extractor.utils; -import org.nibor.autolink.LinkExtractor; -import org.nibor.autolink.LinkSpan; -import org.nibor.autolink.LinkType; -import org.schabi.newpipe.extractor.exceptions.ParsingException; - import java.io.UnsupportedEncodingException; import java.net.URLDecoder; import java.util.ArrayList; @@ -14,6 +9,11 @@ import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; +import org.nibor.autolink.LinkExtractor; +import org.nibor.autolink.LinkSpan; +import org.nibor.autolink.LinkType; +import org.schabi.newpipe.extractor.exceptions.ParsingException; + /* * Created by Christian Schabesberger on 02.02.16. * @@ -51,18 +51,26 @@ public class Parser { public static String matchGroup1(String pattern, String input) throws RegexException { return matchGroup(pattern, input, 1); } + + public static String matchGroup1(Pattern pattern, String input) throws RegexException { + return matchGroup(pattern, input, 1); + } public static String matchGroup(String pattern, String input, int group) throws RegexException { Pattern pat = Pattern.compile(pattern); + return matchGroup(pat, input, group); + } + + public static String matchGroup(Pattern pat, String input, int group) throws RegexException { Matcher mat = pat.matcher(input); boolean foundMatch = mat.find(); if (foundMatch) { return mat.group(group); } else { if (input.length() > 1024) { - throw new RegexException("failed to find pattern \"" + pattern); + throw new RegexException("failed to find pattern \"" + pat.pattern()); } else { - throw new RegexException("failed to find pattern \"" + pattern + " inside of " + input + "\""); + throw new RegexException("failed to find pattern \"" + pat.pattern() + " inside of " + input + "\""); } } } From c77050dc70cf1a7d0c7a53de688b9b67a5db79c1 Mon Sep 17 00:00:00 2001 From: Ritvik Saraf <13ritvik@gmail.com> Date: Fri, 21 Dec 2018 11:02:57 +0530 Subject: [PATCH 27/31] handle empty comments --- .../services/youtube/extractors/YoutubeCommentsExtractor.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java index 94beaf88a..e0bbaa11f 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java @@ -7,7 +7,6 @@ import java.util.Arrays; import java.util.HashMap; import java.util.List; import java.util.Map; -import java.util.regex.Matcher; import java.util.regex.Pattern; import javax.annotation.Nonnull; @@ -210,7 +209,7 @@ public class YoutubeCommentsExtractor extends CommentsExtractor { } return result; } catch (Exception e2) { - throw new ParsingException("Could not get text", e2); + return ""; } } } From b05fa45080a1f60b80a30e3ab51c53a9d0e3056f Mon Sep 17 00:00:00 2001 From: Ritvik Saraf <13ritvik@gmail.com> Date: Fri, 28 Dec 2018 13:02:00 +0530 Subject: [PATCH 28/31] resolved review comments --- .../newpipe/extractor/StreamingService.java | 4 +-- .../services/youtube/YoutubeService.java | 26 +++++++++---------- .../extractors/YoutubeCommentsExtractor.java | 1 + 3 files changed, 15 insertions(+), 16 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/StreamingService.java b/extractor/src/main/java/org/schabi/newpipe/extractor/StreamingService.java index 702c4f360..8818ea362 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/StreamingService.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/StreamingService.java @@ -222,7 +222,7 @@ public abstract class StreamingService { public ChannelExtractor getChannelExtractor(ListLinkHandler linkHandler) throws ExtractionException { return getChannelExtractor(linkHandler, NewPipe.getPreferredLocalization()); } - + public PlaylistExtractor getPlaylistExtractor(ListLinkHandler linkHandler) throws ExtractionException { return getPlaylistExtractor(linkHandler, NewPipe.getPreferredLocalization()); } @@ -300,8 +300,6 @@ public abstract class StreamingService { - /** - * figure out where the link is pointing to (a channel, video, playlist, etc.) /** * Figures out where the link is pointing to (a channel, a video, a playlist, etc.) * @param url the url on which it should be decided of which link type it is diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeService.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeService.java index 0ff98b4f9..e5569775d 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeService.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeService.java @@ -1,10 +1,5 @@ package org.schabi.newpipe.extractor.services.youtube; -import static java.util.Arrays.asList; -import static org.schabi.newpipe.extractor.StreamingService.ServiceInfo.MediaCapability.AUDIO; -import static org.schabi.newpipe.extractor.StreamingService.ServiceInfo.MediaCapability.LIVE; -import static org.schabi.newpipe.extractor.StreamingService.ServiceInfo.MediaCapability.VIDEO; - import org.schabi.newpipe.extractor.StreamingService; import org.schabi.newpipe.extractor.SuggestionExtractor; import org.schabi.newpipe.extractor.channel.ChannelExtractor; @@ -38,6 +33,10 @@ import org.schabi.newpipe.extractor.stream.StreamExtractor; import org.schabi.newpipe.extractor.subscription.SubscriptionExtractor; import org.schabi.newpipe.extractor.utils.Localization; +import static java.util.Arrays.asList; +import static org.schabi.newpipe.extractor.StreamingService.ServiceInfo.MediaCapability.AUDIO; +import static org.schabi.newpipe.extractor.StreamingService.ServiceInfo.MediaCapability.LIVE; +import static org.schabi.newpipe.extractor.StreamingService.ServiceInfo.MediaCapability.VIDEO; /* * Created by Christian Schabesberger on 23.08.15. @@ -140,15 +139,16 @@ public class YoutubeService extends StreamingService { return new YoutubeSubscriptionExtractor(this); } - @Override - public ListLinkHandlerFactory getCommentsLHFactory() { - return YoutubeCommentsLinkHandlerFactory.getInstance(); - } + @Override + public ListLinkHandlerFactory getCommentsLHFactory() { + return YoutubeCommentsLinkHandlerFactory.getInstance(); + } - @Override - public CommentsExtractor getCommentsExtractor(ListLinkHandler urlIdHandler, Localization localization) throws ExtractionException { - return new YoutubeCommentsExtractor(this, urlIdHandler, localization); - } + @Override + public CommentsExtractor getCommentsExtractor(ListLinkHandler urlIdHandler, Localization localization) + throws ExtractionException { + return new YoutubeCommentsExtractor(this, urlIdHandler, localization); + } @Override public boolean isCommentsSupported() { diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java index e0bbaa11f..85d150014 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java @@ -35,6 +35,7 @@ import com.grack.nanojson.JsonParser; public class YoutubeCommentsExtractor extends CommentsExtractor { + // using the mobile site for comments because it loads faster and uses get requests instead of post private static final String USER_AGENT = "Mozilla/5.0 (Android 8.1.0; Mobile; rv:62.0) Gecko/62.0 Firefox/62.0"; private static final Pattern YT_CLIENT_NAME_PATTERN = Pattern.compile("INNERTUBE_CONTEXT_CLIENT_NAME\\\":(.*?)[,}]"); From a97b6dfa7a72f978452a23b217b6619fb82edfea Mon Sep 17 00:00:00 2001 From: Ritvik Saraf <13ritvik@gmail.com> Date: Sat, 16 Feb 2019 00:41:23 +0530 Subject: [PATCH 29/31] mediaccc merge --- .../services/media_ccc/MediaCCCService.java | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/media_ccc/MediaCCCService.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/media_ccc/MediaCCCService.java index 4dbf8e7f6..4096569c2 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/media_ccc/MediaCCCService.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/media_ccc/MediaCCCService.java @@ -3,6 +3,7 @@ package org.schabi.newpipe.extractor.services.media_ccc; import org.schabi.newpipe.extractor.StreamingService; import org.schabi.newpipe.extractor.SuggestionExtractor; import org.schabi.newpipe.extractor.channel.ChannelExtractor; +import org.schabi.newpipe.extractor.comments.CommentsExtractor; import org.schabi.newpipe.extractor.exceptions.ExtractionException; import org.schabi.newpipe.extractor.kiosk.KioskExtractor; import org.schabi.newpipe.extractor.kiosk.KioskList; @@ -104,4 +105,20 @@ public class MediaCCCService extends StreamingService { public SubscriptionExtractor getSubscriptionExtractor() { return null; } + + @Override + public ListLinkHandlerFactory getCommentsLHFactory() { + return null; + } + + @Override + public CommentsExtractor getCommentsExtractor(ListLinkHandler linkHandler, Localization localization) + throws ExtractionException { + return null; + } + + @Override + public boolean isCommentsSupported() { + return false; + } } From 3dadf63028e3ebbcf46ad555c494826c39db1882 Mon Sep 17 00:00:00 2001 From: Ritvik Saraf <13ritvik@gmail.com> Date: Sat, 16 Feb 2019 00:57:00 +0530 Subject: [PATCH 30/31] refactored comments capability --- .../newpipe/extractor/StreamingService.java | 5 +---- .../services/media_ccc/MediaCCCService.java | 22 ++++++++++--------- .../soundcloud/SoundcloudService.java | 21 +++++++++--------- .../services/youtube/YoutubeService.java | 18 ++++++--------- 4 files changed, 31 insertions(+), 35 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/StreamingService.java b/extractor/src/main/java/org/schabi/newpipe/extractor/StreamingService.java index 8818ea362..86bd9d1f8 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/StreamingService.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/StreamingService.java @@ -66,7 +66,7 @@ public abstract class StreamingService { } public enum MediaCapability { - AUDIO, VIDEO, LIVE + AUDIO, VIDEO, LIVE, COMMENTS } } @@ -296,9 +296,6 @@ public abstract class StreamingService { return getCommentsExtractor(llhf.fromUrl(url), NewPipe.getPreferredLocalization()); } - public abstract boolean isCommentsSupported(); - - /** * Figures out where the link is pointing to (a channel, a video, a playlist, etc.) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/media_ccc/MediaCCCService.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/media_ccc/MediaCCCService.java index 4096569c2..a23295a97 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/media_ccc/MediaCCCService.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/media_ccc/MediaCCCService.java @@ -1,5 +1,11 @@ package org.schabi.newpipe.extractor.services.media_ccc; +import static java.util.Arrays.asList; +import static org.schabi.newpipe.extractor.StreamingService.ServiceInfo.MediaCapability.AUDIO; +import static org.schabi.newpipe.extractor.StreamingService.ServiceInfo.MediaCapability.VIDEO; + +import java.io.IOException; + import org.schabi.newpipe.extractor.StreamingService; import org.schabi.newpipe.extractor.SuggestionExtractor; import org.schabi.newpipe.extractor.channel.ChannelExtractor; @@ -7,7 +13,12 @@ import org.schabi.newpipe.extractor.comments.CommentsExtractor; import org.schabi.newpipe.extractor.exceptions.ExtractionException; import org.schabi.newpipe.extractor.kiosk.KioskExtractor; import org.schabi.newpipe.extractor.kiosk.KioskList; -import org.schabi.newpipe.extractor.linkhandler.*; +import org.schabi.newpipe.extractor.linkhandler.LinkHandler; +import org.schabi.newpipe.extractor.linkhandler.LinkHandlerFactory; +import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler; +import org.schabi.newpipe.extractor.linkhandler.ListLinkHandlerFactory; +import org.schabi.newpipe.extractor.linkhandler.SearchQueryHandler; +import org.schabi.newpipe.extractor.linkhandler.SearchQueryHandlerFactory; import org.schabi.newpipe.extractor.playlist.PlaylistExtractor; import org.schabi.newpipe.extractor.search.SearchExtractor; import org.schabi.newpipe.extractor.services.media_ccc.extractors.MediaCCCConferenceExtractor; @@ -22,11 +33,6 @@ import org.schabi.newpipe.extractor.stream.StreamExtractor; import org.schabi.newpipe.extractor.subscription.SubscriptionExtractor; import org.schabi.newpipe.extractor.utils.Localization; -import java.io.IOException; - -import static java.util.Arrays.asList; -import static org.schabi.newpipe.extractor.StreamingService.ServiceInfo.MediaCapability.*; - public class MediaCCCService extends StreamingService { public MediaCCCService(int id) { super(id, "MediaCCC", asList(AUDIO, VIDEO)); @@ -117,8 +123,4 @@ public class MediaCCCService extends StreamingService { return null; } - @Override - public boolean isCommentsSupported() { - return false; - } } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudService.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudService.java index e62fadda1..74d38be57 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudService.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudService.java @@ -1,21 +1,27 @@ package org.schabi.newpipe.extractor.services.soundcloud; -import org.schabi.newpipe.extractor.*; -import org.schabi.newpipe.extractor.linkhandler.*; +import static java.util.Collections.singletonList; +import static org.schabi.newpipe.extractor.StreamingService.ServiceInfo.MediaCapability.AUDIO; + +import org.schabi.newpipe.extractor.StreamingService; +import org.schabi.newpipe.extractor.SuggestionExtractor; import org.schabi.newpipe.extractor.channel.ChannelExtractor; import org.schabi.newpipe.extractor.comments.CommentsExtractor; import org.schabi.newpipe.extractor.exceptions.ExtractionException; import org.schabi.newpipe.extractor.kiosk.KioskExtractor; import org.schabi.newpipe.extractor.kiosk.KioskList; +import org.schabi.newpipe.extractor.linkhandler.LinkHandler; +import org.schabi.newpipe.extractor.linkhandler.LinkHandlerFactory; +import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler; +import org.schabi.newpipe.extractor.linkhandler.ListLinkHandlerFactory; +import org.schabi.newpipe.extractor.linkhandler.SearchQueryHandler; +import org.schabi.newpipe.extractor.linkhandler.SearchQueryHandlerFactory; import org.schabi.newpipe.extractor.playlist.PlaylistExtractor; import org.schabi.newpipe.extractor.search.SearchExtractor; import org.schabi.newpipe.extractor.stream.StreamExtractor; import org.schabi.newpipe.extractor.subscription.SubscriptionExtractor; import org.schabi.newpipe.extractor.utils.Localization; -import static java.util.Collections.singletonList; -import static org.schabi.newpipe.extractor.StreamingService.ServiceInfo.MediaCapability.AUDIO; - public class SoundcloudService extends StreamingService { public SoundcloudService(int id) { @@ -113,9 +119,4 @@ public class SoundcloudService extends StreamingService { return null; } - @Override - public boolean isCommentsSupported() { - return false; - } - } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeService.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeService.java index e5569775d..3b621b8f0 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeService.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeService.java @@ -1,5 +1,11 @@ package org.schabi.newpipe.extractor.services.youtube; +import static java.util.Arrays.asList; +import static org.schabi.newpipe.extractor.StreamingService.ServiceInfo.MediaCapability.AUDIO; +import static org.schabi.newpipe.extractor.StreamingService.ServiceInfo.MediaCapability.COMMENTS; +import static org.schabi.newpipe.extractor.StreamingService.ServiceInfo.MediaCapability.LIVE; +import static org.schabi.newpipe.extractor.StreamingService.ServiceInfo.MediaCapability.VIDEO; + import org.schabi.newpipe.extractor.StreamingService; import org.schabi.newpipe.extractor.SuggestionExtractor; import org.schabi.newpipe.extractor.channel.ChannelExtractor; @@ -33,11 +39,6 @@ import org.schabi.newpipe.extractor.stream.StreamExtractor; import org.schabi.newpipe.extractor.subscription.SubscriptionExtractor; import org.schabi.newpipe.extractor.utils.Localization; -import static java.util.Arrays.asList; -import static org.schabi.newpipe.extractor.StreamingService.ServiceInfo.MediaCapability.AUDIO; -import static org.schabi.newpipe.extractor.StreamingService.ServiceInfo.MediaCapability.LIVE; -import static org.schabi.newpipe.extractor.StreamingService.ServiceInfo.MediaCapability.VIDEO; - /* * Created by Christian Schabesberger on 23.08.15. * @@ -61,7 +62,7 @@ import static org.schabi.newpipe.extractor.StreamingService.ServiceInfo.MediaCap public class YoutubeService extends StreamingService { public YoutubeService(int id) { - super(id, "YouTube", asList(AUDIO, VIDEO, LIVE)); + super(id, "YouTube", asList(AUDIO, VIDEO, LIVE, COMMENTS)); } @Override @@ -150,9 +151,4 @@ public class YoutubeService extends StreamingService { return new YoutubeCommentsExtractor(this, urlIdHandler, localization); } - @Override - public boolean isCommentsSupported() { - return true; - } - } From 04460bba2c48d504ba163c037ed72669c53c7bc5 Mon Sep 17 00:00:00 2001 From: Christian Schabesberger Date: Tue, 19 Feb 2019 16:10:46 +0100 Subject: [PATCH 31/31] add mediaCCC to supported sites in description. --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index b13f04ff0..715ec2fb8 100644 --- a/README.md +++ b/README.md @@ -19,6 +19,7 @@ The following sites are currently supported: - YouTube - SoundCloud +- MediaCCC ## License