From 53d39979049e345be791d2c06ef925afb6d7b86c Mon Sep 17 00:00:00 2001 From: Ritvik Saraf <13ritvik@gmail.com> Date: Mon, 20 Aug 2018 04:22:19 +0530 Subject: [PATCH] added comments extractor --- build.gradle | 4 +- extractor/build.gradle | 1 + .../newpipe/extractor/DownloadResponse.java | 26 ++ .../schabi/newpipe/extractor/Downloader.java | 69 ++--- .../schabi/newpipe/extractor/InfoItem.java | 3 +- .../newpipe/extractor/StreamingService.java | 36 ++- .../extractor/comments/CommentsExtractor.java | 14 + .../extractor/comments/CommentsInfo.java | 27 ++ .../extractor/comments/CommentsInfoItem.java | 76 +++++ .../comments/CommentsInfoItemExtractor.java | 22 ++ .../comments/CommentsInfoItemsCollector.java | 103 +++++++ .../soundcloud/SoundcloudService.java | 13 + .../services/youtube/YoutubeService.java | 45 ++- .../extractors/YoutubeCommentsExtractor.java | 262 ++++++++++++++++++ .../YoutubeCommentsLinkHandlerFactory.java | 183 ++++++++++++ .../java/org/schabi/newpipe/Downloader.java | 236 +++++++++------- .../youtube/YoutubeCommentsExtractorTest.java | 46 +++ 17 files changed, 1007 insertions(+), 159 deletions(-) create mode 100644 extractor/src/main/java/org/schabi/newpipe/extractor/DownloadResponse.java create mode 100644 extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsExtractor.java create mode 100644 extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfo.java create mode 100644 extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfoItem.java create mode 100644 extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfoItemExtractor.java create mode 100644 extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfoItemsCollector.java create mode 100644 extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java create mode 100644 extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeCommentsLinkHandlerFactory.java create mode 100644 extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeCommentsExtractorTest.java diff --git a/build.gradle b/build.gradle index 9f25fc28e..f8f6c8e90 100644 --- a/build.gradle +++ b/build.gradle @@ -1,7 +1,7 @@ allprojects { apply plugin: 'java-library' - sourceCompatibility = 1.7 - targetCompatibility = 1.7 + sourceCompatibility = 1.8 + targetCompatibility = 1.8 version 'v0.13.0' diff --git a/extractor/build.gradle b/extractor/build.gradle index 1b7fbf001..26430b9fa 100644 --- a/extractor/build.gradle +++ b/extractor/build.gradle @@ -6,6 +6,7 @@ dependencies { implementation 'org.mozilla:rhino:1.7.7.1' implementation 'com.github.spotbugs:spotbugs-annotations:3.1.0' implementation 'org.nibor.autolink:autolink:0.8.0' + implementation 'com.fasterxml.jackson.core:jackson-databind:2.9.5' testImplementation 'junit:junit:4.12' } \ No newline at end of file diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/DownloadResponse.java b/extractor/src/main/java/org/schabi/newpipe/extractor/DownloadResponse.java new file mode 100644 index 000000000..8f91c8914 --- /dev/null +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/DownloadResponse.java @@ -0,0 +1,26 @@ +package org.schabi.newpipe.extractor; + +import java.util.List; +import java.util.Map; + +public class DownloadResponse { + private final String responseBody; + private final Map> responseHeaders; + + + + public DownloadResponse(String responseBody, Map> headers) { + super(); + this.responseBody = responseBody; + this.responseHeaders = headers; + } + + public String getResponseBody() { + return responseBody; + } + + public Map> getResponseHeaders() { + return responseHeaders; + } + +} diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/Downloader.java b/extractor/src/main/java/org/schabi/newpipe/extractor/Downloader.java index fde90a2b5..93d0a7c23 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/Downloader.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/Downloader.java @@ -1,10 +1,11 @@ package org.schabi.newpipe.extractor; -import org.schabi.newpipe.extractor.exceptions.ReCaptchaException; - import java.io.IOException; +import java.util.List; import java.util.Map; +import org.schabi.newpipe.extractor.exceptions.ReCaptchaException; + /* * Created by Christian Schabesberger on 28.01.16. * @@ -27,35 +28,41 @@ import java.util.Map; public interface Downloader { - /** - * Download the text file at the supplied URL as in download(String), - * but set the HTTP header field "Accept-Language" to the supplied string. - * - * @param siteUrl the URL of the text file to return the contents of - * @param language the language (usually a 2-character code) to set as the preferred language - * @return the contents of the specified text file - * @throws IOException - */ - String download(String siteUrl, String language) throws IOException, ReCaptchaException; + /** + * Download the text file at the supplied URL as in download(String), but set + * the HTTP header field "Accept-Language" to the supplied string. + * + * @param siteUrl the URL of the text file to return the contents of + * @param language the language (usually a 2-character code) to set as the + * preferred language + * @return the contents of the specified text file + * @throws IOException + */ + String download(String siteUrl, String language) throws IOException, ReCaptchaException; - /** - * Download the text file at the supplied URL as in download(String), - * but set the HTTP header field "Accept-Language" to the supplied string. - * - * @param siteUrl the URL of the text file to return the contents of - * @param customProperties set request header properties - * @return the contents of the specified text file - * @throws IOException - */ - String download(String siteUrl, Map customProperties) throws IOException, ReCaptchaException; + /** + * Download the text file at the supplied URL as in download(String), but set + * the HTTP header field "Accept-Language" to the supplied string. + * + * @param siteUrl the URL of the text file to return the contents of + * @param customProperties set request header properties + * @return the contents of the specified text file + * @throws IOException + */ + String download(String siteUrl, Map customProperties) throws IOException, ReCaptchaException; - /** - * Download (via HTTP) the text file located at the supplied URL, and return its contents. - * Primarily intended for downloading web pages. - * - * @param siteUrl the URL of the text file to download - * @return the contents of the specified text file - * @throws IOException - */ - String download(String siteUrl) throws IOException, ReCaptchaException; + /** + * Download (via HTTP) the text file located at the supplied URL, and return its + * contents. Primarily intended for downloading web pages. + * + * @param siteUrl the URL of the text file to download + * @return the contents of the specified text file + * @throws IOException + */ + String download(String siteUrl) throws IOException, ReCaptchaException; + + DownloadResponse downloadWithHeaders(String siteUrl, Map> requestHeaders) + throws IOException, ReCaptchaException; + + DownloadResponse downloadWithHeaders(String siteUrl) throws IOException, ReCaptchaException; } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/InfoItem.java b/extractor/src/main/java/org/schabi/newpipe/extractor/InfoItem.java index 4bed6bfbb..aead6c7f6 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/InfoItem.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/InfoItem.java @@ -68,6 +68,7 @@ public abstract class InfoItem implements Serializable { public enum InfoType { STREAM, PLAYLIST, - CHANNEL + CHANNEL, + COMMENT } } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/StreamingService.java b/extractor/src/main/java/org/schabi/newpipe/extractor/StreamingService.java index 03e465b6c..841511a8f 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/StreamingService.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/StreamingService.java @@ -1,18 +1,24 @@ package org.schabi.newpipe.extractor; -import org.schabi.newpipe.extractor.channel.ChannelExtractor; -import org.schabi.newpipe.extractor.exceptions.ExtractionException; -import org.schabi.newpipe.extractor.exceptions.ParsingException; -import org.schabi.newpipe.extractor.kiosk.KioskList; -import org.schabi.newpipe.extractor.playlist.PlaylistExtractor; -import org.schabi.newpipe.extractor.search.SearchExtractor; -import org.schabi.newpipe.extractor.linkhandler.*; -import org.schabi.newpipe.extractor.stream.StreamExtractor; -import org.schabi.newpipe.extractor.subscription.SubscriptionExtractor; - import java.util.Collections; import java.util.List; +import org.schabi.newpipe.extractor.channel.ChannelExtractor; +import org.schabi.newpipe.extractor.comments.CommentsExtractor; +import org.schabi.newpipe.extractor.exceptions.ExtractionException; +import org.schabi.newpipe.extractor.exceptions.ParsingException; +import org.schabi.newpipe.extractor.kiosk.KioskList; +import org.schabi.newpipe.extractor.linkhandler.LinkHandler; +import org.schabi.newpipe.extractor.linkhandler.LinkHandlerFactory; +import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler; +import org.schabi.newpipe.extractor.linkhandler.ListLinkHandlerFactory; +import org.schabi.newpipe.extractor.linkhandler.SearchQueryHandler; +import org.schabi.newpipe.extractor.linkhandler.SearchQueryHandlerFactory; +import org.schabi.newpipe.extractor.playlist.PlaylistExtractor; +import org.schabi.newpipe.extractor.search.SearchExtractor; +import org.schabi.newpipe.extractor.stream.StreamExtractor; +import org.schabi.newpipe.extractor.subscription.SubscriptionExtractor; + public abstract class StreamingService { public static class ServiceInfo { private final String name; @@ -71,6 +77,7 @@ public abstract class StreamingService { public abstract ListLinkHandlerFactory getChannelLHFactory(); public abstract ListLinkHandlerFactory getPlaylistLHFactory(); public abstract SearchQueryHandlerFactory getSearchQHFactory(); + public abstract ListLinkHandlerFactory getCommentsLHFactory(); //////////////////////////////////////////// @@ -84,6 +91,7 @@ public abstract class StreamingService { public abstract ChannelExtractor getChannelExtractor(ListLinkHandler urlIdHandler) throws ExtractionException; public abstract PlaylistExtractor getPlaylistExtractor(ListLinkHandler urlIdHandler) throws ExtractionException; public abstract StreamExtractor getStreamExtractor(LinkHandler UIHFactory) throws ExtractionException; + public abstract CommentsExtractor getCommentsExtractor(ListLinkHandler urlIdHandler) throws ExtractionException; public SearchExtractor getSearchExtractor(String query, List contentFilter, String sortFilter, String contentCountry) throws ExtractionException { return getSearchExtractor(getSearchQHFactory().fromQuery(query, contentFilter, sortFilter), contentCountry); @@ -112,10 +120,16 @@ public abstract class StreamingService { public StreamExtractor getStreamExtractor(String url) throws ExtractionException { return getStreamExtractor(getStreamLHFactory().fromUrl(url)); } + + public CommentsExtractor getCommentsExtractor(String url) throws ExtractionException { + return getCommentsExtractor(getCommentsLHFactory().fromUrl(url)); + } - /** + + + /** * figure out where the link is pointing to (a channel, video, playlist, etc.) */ public final LinkType getLinkTypeByUrl(String url) throws ParsingException { diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsExtractor.java new file mode 100644 index 000000000..9f43e1af5 --- /dev/null +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsExtractor.java @@ -0,0 +1,14 @@ +package org.schabi.newpipe.extractor.comments; + +import org.schabi.newpipe.extractor.ListExtractor; +import org.schabi.newpipe.extractor.StreamingService; +import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler; + +public abstract class CommentsExtractor extends ListExtractor { + + public CommentsExtractor(StreamingService service, ListLinkHandler uiHandler) { + super(service, uiHandler); + // TODO Auto-generated constructor stub + } + +} diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfo.java b/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfo.java new file mode 100644 index 000000000..10ac85d5c --- /dev/null +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfo.java @@ -0,0 +1,27 @@ +package org.schabi.newpipe.extractor.comments; + +import java.io.IOException; + +import org.schabi.newpipe.extractor.ListInfo; +import org.schabi.newpipe.extractor.NewPipe; +import org.schabi.newpipe.extractor.StreamingService; +import org.schabi.newpipe.extractor.exceptions.ExtractionException; +import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler; + +public class CommentsInfo extends ListInfo{ + + private CommentsInfo(int serviceId, ListLinkHandler listUrlIdHandler, String name) { + super(serviceId, listUrlIdHandler, name); + // TODO Auto-generated constructor stub + } + + public static CommentsInfo getInfo(String url) throws IOException, ExtractionException { + return getInfo(NewPipe.getServiceByUrl(url), url); + } + + private static CommentsInfo getInfo(StreamingService serviceByUrl, String url) { + // TODO Auto-generated method stub + return null; + } + +} diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfoItem.java b/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfoItem.java new file mode 100644 index 000000000..87d54a0df --- /dev/null +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfoItem.java @@ -0,0 +1,76 @@ +package org.schabi.newpipe.extractor.comments; + +import org.schabi.newpipe.extractor.InfoItem; + +public class CommentsInfoItem extends InfoItem{ + + private String commentId; + private String commentText; + private String authorName; + private String authorThumbnail; + private String authorEndpoint; + private String publishedTime; + private Integer likeCount; + + public CommentsInfoItem(int serviceId, String url, String name) { + super(InfoType.COMMENT, serviceId, url, name); + // TODO Auto-generated constructor stub + } + + public String getCommentText() { + return commentText; + } + + public void setCommentText(String contentText) { + this.commentText = contentText; + } + + public String getAuthorName() { + return authorName; + } + + public void setAuthorName(String authorName) { + this.authorName = authorName; + } + + public String getAuthorThumbnail() { + return authorThumbnail; + } + + public void setAuthorThumbnail(String authorThumbnail) { + this.authorThumbnail = authorThumbnail; + } + + public String getAuthorEndpoint() { + return authorEndpoint; + } + + public void setAuthorEndpoint(String authorEndpoint) { + this.authorEndpoint = authorEndpoint; + } + + public String getPublishedTime() { + return publishedTime; + } + + public void setPublishedTime(String publishedTime) { + this.publishedTime = publishedTime; + } + + public Integer getLikeCount() { + return likeCount; + } + + public void setLikeCount(Integer likeCount) { + this.likeCount = likeCount; + } + + public String getCommentId() { + return commentId; + } + + public void setCommentId(String commentId) { + this.commentId = commentId; + } + +} diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfoItemExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfoItemExtractor.java new file mode 100644 index 000000000..32757e16d --- /dev/null +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfoItemExtractor.java @@ -0,0 +1,22 @@ +package org.schabi.newpipe.extractor.comments; + +import org.schabi.newpipe.extractor.InfoItemExtractor; +import org.schabi.newpipe.extractor.exceptions.ParsingException; + +public interface CommentsInfoItemExtractor extends InfoItemExtractor { + + String getCommentId() throws ParsingException; + + String getCommentText() throws ParsingException; + + String getAuthorName() throws ParsingException; + + String getAuthorThumbnail() throws ParsingException; + + String getAuthorEndpoint() throws ParsingException; + + String getPublishedTime() throws ParsingException; + + Integer getLikeCount() throws ParsingException; + +} diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfoItemsCollector.java b/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfoItemsCollector.java new file mode 100644 index 000000000..344ba8059 --- /dev/null +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfoItemsCollector.java @@ -0,0 +1,103 @@ +package org.schabi.newpipe.extractor.comments; + +import java.util.List; +import java.util.Vector; + +import org.schabi.newpipe.extractor.InfoItem; +import org.schabi.newpipe.extractor.InfoItemsCollector; +import org.schabi.newpipe.extractor.exceptions.ParsingException; + +/* + * Created by Christian Schabesberger on 28.02.16. + * + * Copyright (C) Christian Schabesberger 2016 + * CommentsInfoItemsCollector.java is part of NewPipe. + * + * NewPipe is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * NewPipe is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with NewPipe. If not, see . + */ + +public class CommentsInfoItemsCollector extends InfoItemsCollector { + + public CommentsInfoItemsCollector(int serviceId) { + super(serviceId); + } + + @Override + public CommentsInfoItem extract(CommentsInfoItemExtractor extractor) throws ParsingException { + + // important information + int serviceId = getServiceId(); + String url = extractor.getUrl(); + String name = extractor.getName(); + + CommentsInfoItem resultItem = new CommentsInfoItem(serviceId, url, name); + + // optional information + try { + resultItem.setCommentId(extractor.getCommentId()); + } catch (Exception e) { + addError(e); + } + try { + resultItem.setCommentText(extractor.getCommentText()); + } catch (Exception e) { + addError(e); + } + try { + resultItem.setAuthorName(extractor.getAuthorName()); + } catch (Exception e) { + addError(e); + } + try { + resultItem.setAuthorThumbnail(extractor.getAuthorThumbnail()); + } catch (Exception e) { + addError(e); + } + try { + resultItem.setAuthorEndpoint(extractor.getAuthorEndpoint()); + } catch (Exception e) { + addError(e); + } + try { + resultItem.setPublishedTime(extractor.getPublishedTime()); + } catch (Exception e) { + addError(e); + } + try { + resultItem.setLikeCount(extractor.getLikeCount()); + } catch (Exception e) { + addError(e); + } + return resultItem; + } + + @Override + public void commit(CommentsInfoItemExtractor extractor) { + try { + addItem(extract(extractor)); + } catch (Exception e) { + addError(e); + } + } + + public List getCommentsInfoItemList() { + List siiList = new Vector<>(); + for (InfoItem ii : super.getItems()) { + if (ii instanceof CommentsInfoItem) { + siiList.add((CommentsInfoItem) ii); + } + } + return siiList; + } +} diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudService.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudService.java index c533b951e..1d322f738 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudService.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudService.java @@ -3,6 +3,7 @@ package org.schabi.newpipe.extractor.services.soundcloud; import org.schabi.newpipe.extractor.*; import org.schabi.newpipe.extractor.linkhandler.*; import org.schabi.newpipe.extractor.channel.ChannelExtractor; +import org.schabi.newpipe.extractor.comments.CommentsExtractor; import org.schabi.newpipe.extractor.exceptions.ExtractionException; import org.schabi.newpipe.extractor.kiosk.KioskExtractor; import org.schabi.newpipe.extractor.kiosk.KioskList; @@ -98,4 +99,16 @@ public class SoundcloudService extends StreamingService { public SubscriptionExtractor getSubscriptionExtractor() { return new SoundcloudSubscriptionExtractor(this); } + + @Override + public ListLinkHandlerFactory getCommentsLHFactory() { + // TODO Auto-generated method stub + return null; + } + + @Override + public CommentsExtractor getCommentsExtractor(ListLinkHandler urlIdHandler) throws ExtractionException { + // TODO Auto-generated method stub + return null; + } } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeService.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeService.java index 51a68f0b1..f6bc03775 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeService.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeService.java @@ -1,21 +1,42 @@ package org.schabi.newpipe.extractor.services.youtube; -import org.schabi.newpipe.extractor.*; -import org.schabi.newpipe.extractor.linkhandler.*; +import static java.util.Arrays.asList; +import static org.schabi.newpipe.extractor.StreamingService.ServiceInfo.MediaCapability.AUDIO; +import static org.schabi.newpipe.extractor.StreamingService.ServiceInfo.MediaCapability.LIVE; +import static org.schabi.newpipe.extractor.StreamingService.ServiceInfo.MediaCapability.VIDEO; + +import org.schabi.newpipe.extractor.StreamingService; +import org.schabi.newpipe.extractor.SuggestionExtractor; import org.schabi.newpipe.extractor.channel.ChannelExtractor; +import org.schabi.newpipe.extractor.comments.CommentsExtractor; import org.schabi.newpipe.extractor.exceptions.ExtractionException; import org.schabi.newpipe.extractor.kiosk.KioskExtractor; import org.schabi.newpipe.extractor.kiosk.KioskList; +import org.schabi.newpipe.extractor.linkhandler.LinkHandler; +import org.schabi.newpipe.extractor.linkhandler.LinkHandlerFactory; +import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler; +import org.schabi.newpipe.extractor.linkhandler.ListLinkHandlerFactory; +import org.schabi.newpipe.extractor.linkhandler.SearchQueryHandler; +import org.schabi.newpipe.extractor.linkhandler.SearchQueryHandlerFactory; import org.schabi.newpipe.extractor.playlist.PlaylistExtractor; import org.schabi.newpipe.extractor.search.SearchExtractor; -import org.schabi.newpipe.extractor.services.youtube.extractors.*; -import org.schabi.newpipe.extractor.services.youtube.linkHandler.*; +import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeChannelExtractor; +import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeCommentsExtractor; +import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubePlaylistExtractor; +import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeSearchExtractor; +import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeStreamExtractor; +import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeSubscriptionExtractor; +import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeSuggestionExtractor; +import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeTrendingExtractor; +import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeChannelLinkHandlerFactory; +import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeCommentsLinkHandlerFactory; +import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubePlaylistLinkHandlerFactory; +import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeSearchQueryHandlerFactory; +import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeStreamLinkHandlerFactory; +import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeTrendingLinkHandlerFactory; import org.schabi.newpipe.extractor.stream.StreamExtractor; import org.schabi.newpipe.extractor.subscription.SubscriptionExtractor; -import static java.util.Arrays.asList; -import static org.schabi.newpipe.extractor.StreamingService.ServiceInfo.MediaCapability.*; - /* * Created by Christian Schabesberger on 23.08.15. @@ -115,4 +136,14 @@ public class YoutubeService extends StreamingService { return new YoutubeSubscriptionExtractor(this); } + @Override + public ListLinkHandlerFactory getCommentsLHFactory() { + return YoutubeCommentsLinkHandlerFactory.getInstance(); + } + + @Override + public CommentsExtractor getCommentsExtractor(ListLinkHandler urlIdHandler) throws ExtractionException { + return new YoutubeCommentsExtractor(this, urlIdHandler); + } + } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java new file mode 100644 index 000000000..ec1568e3b --- /dev/null +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java @@ -0,0 +1,262 @@ +package org.schabi.newpipe.extractor.services.youtube.extractors; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStreamReader; +import java.io.UnsupportedEncodingException; +import java.net.URL; +import java.net.URLEncoder; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import javax.net.ssl.HttpsURLConnection; + +import org.schabi.newpipe.extractor.DownloadResponse; +import org.schabi.newpipe.extractor.Downloader; +import org.schabi.newpipe.extractor.StreamingService; +import org.schabi.newpipe.extractor.comments.CommentsExtractor; +import org.schabi.newpipe.extractor.comments.CommentsInfoItem; +import org.schabi.newpipe.extractor.comments.CommentsInfoItemExtractor; +import org.schabi.newpipe.extractor.comments.CommentsInfoItemsCollector; +import org.schabi.newpipe.extractor.exceptions.ExtractionException; +import org.schabi.newpipe.extractor.exceptions.ParsingException; +import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler; + +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; + +public class YoutubeCommentsExtractor extends CommentsExtractor { + + private static final String USER_AGENT = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:61.0) Gecko/20100101 Firefox/61.0"; + + private List cookies; + private String sessionToken; + private String commentsToken; + + private ObjectMapper mapper = new ObjectMapper(); + + public YoutubeCommentsExtractor(StreamingService service, ListLinkHandler uiHandler) { + super(service, uiHandler); + // TODO Auto-generated constructor stub + } + + @Override + public InfoItemsPage getInitialPage() throws IOException, ExtractionException { + // initial page does not load any comments but is required to get session token + // and cookies + return getPage(getNextPageUrl()); + } + + @Override + public String getNextPageUrl() throws IOException, ExtractionException { + return getNextPageUrl(commentsToken); + } + + private String getNextPageUrl(JsonNode ajaxJson) throws IOException, ExtractionException { + String continuation; + try { + continuation = ajaxJson.findValue("itemSectionContinuation").get("continuations").findValue("continuation") + .asText(); + } catch (Exception e) { + // no more comments + return ""; + } + return getNextPageUrl(continuation); + } + + private String getNextPageUrl(String continuation) throws ParsingException { + Map params = new HashMap<>(); + params.put("action_get_comments", "1"); + params.put("pbj", "1"); + params.put("ctoken", continuation); + params.put("continuation", continuation); + try { + return "https://www.youtube.com/comment_service_ajax?" + getDataString(params); + } catch (UnsupportedEncodingException e) { + throw new ParsingException("Could not get next page url", e); + } + } + + @Override + public InfoItemsPage getPage(String pageUrl) throws IOException, ExtractionException { + if (pageUrl == null || pageUrl.isEmpty()) { + throw new ExtractionException(new IllegalArgumentException("Page url is empty or null")); + } + String ajaxResponse = makeAjaxRequest(pageUrl); + JsonNode ajaxJson = mapper.readTree(ajaxResponse); + CommentsInfoItemsCollector collector = new CommentsInfoItemsCollector(getServiceId()); + collectCommentsFrom(collector, ajaxJson, pageUrl); + return new InfoItemsPage<>(collector, getNextPageUrl(ajaxJson)); + } + + private void collectCommentsFrom(CommentsInfoItemsCollector collector, JsonNode ajaxJson, String pageUrl) { + List comments = ajaxJson.findValues("commentRenderer"); + comments.stream().forEach(c -> { + CommentsInfoItemExtractor extractor = new CommentsInfoItemExtractor() { + + @Override + public String getUrl() throws ParsingException { + return pageUrl; + } + + @Override + public String getThumbnailUrl() throws ParsingException { + try { + return c.get("authorThumbnail").get("thumbnails").get(0).get("url").asText(); + } catch (Exception e) { + throw new ParsingException("Could not get thumbnail url", e); + } + } + + @Override + public String getName() throws ParsingException { + try { + return c.get("authorText").get("simpleText").asText(); + } catch (Exception e) { + throw new ParsingException("Could not get thumbnail url", e); + } + } + + @Override + public String getPublishedTime() throws ParsingException { + try { + return c.get("publishedTimeText").get("runs").get(0).get("text").asText(); + } catch (Exception e) { + throw new ParsingException("Could not get thumbnail url", e); + } + } + + @Override + public Integer getLikeCount() throws ParsingException { + try { + return c.get("likeCount").intValue(); + } catch (Exception e) { + throw new ParsingException("Could not get thumbnail url", e); + } + } + + @Override + public String getCommentText() throws ParsingException { + try { + if (null != c.get("contentText").get("simpleText")) { + return c.get("contentText").get("simpleText").asText(); + } else { + return c.get("contentText").get("runs").get(0).get("text").asText(); + } + } catch (Exception e) { + throw new ParsingException("Could not get thumbnail url", e); + } + } + + @Override + public String getCommentId() throws ParsingException { + try { + return c.get("commentId").asText(); + } catch (Exception e) { + throw new ParsingException("Could not get thumbnail url", e); + } + } + + @Override + public String getAuthorThumbnail() throws ParsingException { + try { + return c.get("authorThumbnail").get("thumbnails").get(0).get("url").asText(); + } catch (Exception e) { + throw new ParsingException("Could not get thumbnail url", e); + } + } + + @Override + public String getAuthorName() throws ParsingException { + try { + return c.get("authorText").get("simpleText").asText(); + } catch (Exception e) { + throw new ParsingException("Could not get thumbnail url", e); + } + } + + @Override + public String getAuthorEndpoint() throws ParsingException { + try { + return "https://youtube.com" + + c.get("authorEndpoint").get("browseEndpoint").get("canonicalBaseUrl").asText(); + } catch (Exception e) { + throw new ParsingException("Could not get thumbnail url", e); + } + } + }; + + collector.commit(extractor); + }); + + } + + @Override + public void onFetchPage(Downloader downloader) throws IOException, ExtractionException { + DownloadResponse response = downloader.downloadWithHeaders(getUrl()); + String responseBody = response.getResponseBody(); + cookies = response.getResponseHeaders().get("Set-Cookie"); + sessionToken = findValue(responseBody, "XSRF_TOKEN"); + commentsToken = findValue(responseBody, "COMMENTS_TOKEN"); + } + + @Override + public String getName() throws ParsingException { + // TODO Auto-generated method stub + return null; + } + + private String makeAjaxRequest(String siteUrl) throws IOException { + + StringBuilder postData = new StringBuilder(); + postData.append(URLEncoder.encode("session_token", "UTF-8")); + postData.append('='); + postData.append(URLEncoder.encode(sessionToken, "UTF-8")); + byte[] postDataBytes = postData.toString().getBytes("UTF-8"); + + URL url = new URL(siteUrl); + HttpsURLConnection con = (HttpsURLConnection) url.openConnection(); + con.setRequestMethod("POST"); + con.setRequestProperty("Content-Type", "application/x-www-form-urlencoded"); + con.setRequestProperty("Content-Length", String.valueOf(postDataBytes.length)); + con.setRequestProperty("Accept", "*/*"); + con.setRequestProperty("User-Agent", USER_AGENT); + con.setRequestProperty("X-YouTube-Client-Version", "2.20180815"); + con.setRequestProperty("X-YouTube-Client-Name", "1"); + // set cookies + cookies.stream().forEach(c -> con.addRequestProperty("Cookie", c)); + con.setDoOutput(true); + con.getOutputStream().write(postDataBytes); + + BufferedReader in = new BufferedReader(new InputStreamReader(con.getInputStream(), "UTF-8")); + StringBuilder sb = new StringBuilder(); + String inputLine; + while ((inputLine = in.readLine()) != null) { + sb.append(inputLine); + } + return sb.toString(); + } + + private String getDataString(Map params) throws UnsupportedEncodingException { + StringBuilder result = new StringBuilder(); + boolean first = true; + for (Map.Entry entry : params.entrySet()) { + if (first) + first = false; + else + result.append("&"); + result.append(URLEncoder.encode(entry.getKey(), "UTF-8")); + result.append("="); + result.append(URLEncoder.encode(entry.getValue(), "UTF-8")); + } + return result.toString(); + } + + private String findValue(String doc, String key) { + int beginIndex = doc.indexOf(key) + key.length() + 4; + int endIndex = doc.indexOf("\"", beginIndex); + return doc.substring(beginIndex, endIndex); + } + +} diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeCommentsLinkHandlerFactory.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeCommentsLinkHandlerFactory.java new file mode 100644 index 000000000..fbdd63f1a --- /dev/null +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeCommentsLinkHandlerFactory.java @@ -0,0 +1,183 @@ +package org.schabi.newpipe.extractor.services.youtube.linkHandler; + +import org.schabi.newpipe.extractor.linkhandler.ListLinkHandlerFactory; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.schabi.newpipe.extractor.Downloader; +import org.schabi.newpipe.extractor.NewPipe; +import org.schabi.newpipe.extractor.exceptions.FoundAdException; +import org.schabi.newpipe.extractor.exceptions.ParsingException; +import org.schabi.newpipe.extractor.exceptions.ReCaptchaException; +import org.schabi.newpipe.extractor.utils.Parser; + +import java.io.IOException; +import java.io.UnsupportedEncodingException; +import java.net.URI; +import java.net.URISyntaxException; +import java.net.URLDecoder; +import java.util.List; + +/* + * Created by Christian Schabesberger on 25.07.16. + * + * Copyright (C) Christian Schabesberger 2018 + * YoutubeChannelLinkHandlerFactory.java is part of NewPipe. + * + * NewPipe is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * NewPipe is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with NewPipe. If not, see . + */ + +public class YoutubeCommentsLinkHandlerFactory extends ListLinkHandlerFactory { + + private static final YoutubeCommentsLinkHandlerFactory instance = new YoutubeCommentsLinkHandlerFactory(); + private static final String ID_PATTERN = "([\\-a-zA-Z0-9_]{11})"; + + public static YoutubeCommentsLinkHandlerFactory getInstance() { + return instance; + } + + @Override + public String getId(String url) throws ParsingException, IllegalArgumentException { + if (url.isEmpty()) { + throw new IllegalArgumentException("The url parameter should not be empty"); + } + + String id; + String lowercaseUrl = url.toLowerCase(); + if (lowercaseUrl.contains("youtube")) { + if (url.contains("attribution_link")) { + try { + String escapedQuery = Parser.matchGroup1("u=(.[^&|$]*)", url); + String query = URLDecoder.decode(escapedQuery, "UTF-8"); + id = Parser.matchGroup1("v=" + ID_PATTERN, query); + } catch (UnsupportedEncodingException uee) { + throw new ParsingException("Could not parse attribution_link", uee); + } + } else if (lowercaseUrl.contains("youtube.com/shared?ci=")) { + return getRealIdFromSharedLink(url); + } else if (url.contains("vnd.youtube")) { + id = Parser.matchGroup1(ID_PATTERN, url); + } else if (url.contains("embed")) { + id = Parser.matchGroup1("embed/" + ID_PATTERN, url); + } else if (url.contains("googleads")) { + throw new FoundAdException("Error found add: " + url); + } else { + id = Parser.matchGroup1("[?&]v=" + ID_PATTERN, url); + } + } else if (lowercaseUrl.contains("youtu.be")) { + if (url.contains("v=")) { + id = Parser.matchGroup1("v=" + ID_PATTERN, url); + } else { + id = Parser.matchGroup1("[Yy][Oo][Uu][Tt][Uu]\\.[Bb][Ee]/" + ID_PATTERN, url); + } + } else if(lowercaseUrl.contains("hooktube")) { + if(lowercaseUrl.contains("&v=") + || lowercaseUrl.contains("?v=")) { + id = Parser.matchGroup1("[?&]v=" + ID_PATTERN, url); + } else if (url.contains("/embed/")) { + id = Parser.matchGroup1("embed/" + ID_PATTERN, url); + } else if (url.contains("/v/")) { + id = Parser.matchGroup1("v/" + ID_PATTERN, url); + } else if (url.contains("/watch/")) { + id = Parser.matchGroup1("watch/" + ID_PATTERN, url); + } else { + throw new ParsingException("Error no suitable url: " + url); + } + } else { + throw new ParsingException("Error no suitable url: " + url); + } + + + if (!id.isEmpty()) { + return id; + } else { + throw new ParsingException("Error could not parse url: " + url); + } + } + + /** + * Get the real url from a shared uri. + *

+ * Shared URI's look like this: + *

+     *     * https://www.youtube.com/shared?ci=PJICrTByb3E
+     *     * vnd.youtube://www.youtube.com/shared?ci=PJICrTByb3E&feature=twitter-deep-link
+     * 
+ * + * @param url The shared url + * @return the id of the stream + * @throws ParsingException + */ + private String getRealIdFromSharedLink(String url) throws ParsingException { + URI uri; + try { + uri = new URI(url); + } catch (URISyntaxException e) { + throw new ParsingException("Invalid shared link", e); + } + String sharedId = getSharedId(uri); + Downloader downloader = NewPipe.getDownloader(); + String content; + try { + content = downloader.download("https://www.youtube.com/shared?ci=" + sharedId); + } catch (IOException | ReCaptchaException e) { + throw new ParsingException("Unable to resolve shared link", e); + } + final Document document = Jsoup.parse(content); + + final Element element = document.select("link[rel=\"canonical\"]").first(); + final String urlWithRealId = (element != null) + ? element.attr("abs:href") + : document.select("meta[property=\"og:url\"]").first() + .attr("abs:content"); + + String realId = Parser.matchGroup1(ID_PATTERN, urlWithRealId); + if (sharedId.equals(realId)) { + throw new ParsingException("Got same id for as shared info_id: " + sharedId); + } + return realId; + } + + private String getSharedId(URI uri) throws ParsingException { + if (!"/shared".equals(uri.getPath())) { + throw new ParsingException("Not a shared link: " + uri.toString() + " (path != " + uri.getPath() + ")"); + } + return Parser.matchGroup1("ci=" + ID_PATTERN, uri.getQuery()); + } + + @Override + public boolean onAcceptUrl(final String url) throws FoundAdException { + final String lowercaseUrl = url.toLowerCase(); + if (lowercaseUrl.contains("youtube") + || lowercaseUrl.contains("youtu.be") + || lowercaseUrl.contains("hooktube")) { + // bad programming I know + try { + getId(url); + return true; + } catch (FoundAdException fe) { + throw fe; + } catch (ParsingException e) { + return false; + } + } else { + return false; + } + } + + @Override + public String getUrl(String id, List contentFilter, String sortFilter) throws ParsingException { + return "https://www.youtube.com/watch?v=" + id; + } +} diff --git a/extractor/src/test/java/org/schabi/newpipe/Downloader.java b/extractor/src/test/java/org/schabi/newpipe/Downloader.java index e3bb6f68c..fb0e38ccc 100644 --- a/extractor/src/test/java/org/schabi/newpipe/Downloader.java +++ b/extractor/src/test/java/org/schabi/newpipe/Downloader.java @@ -1,16 +1,18 @@ package org.schabi.newpipe; -import org.schabi.newpipe.extractor.exceptions.ReCaptchaException; - -import javax.net.ssl.HttpsURLConnection; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStreamReader; import java.net.URL; import java.net.UnknownHostException; import java.util.HashMap; +import java.util.List; import java.util.Map; +import javax.net.ssl.HttpsURLConnection; + +import org.schabi.newpipe.extractor.DownloadResponse; +import org.schabi.newpipe.extractor.exceptions.ReCaptchaException; /* * Created by Christian Schabesberger on 28.01.16. @@ -34,124 +36,144 @@ import java.util.Map; public class Downloader implements org.schabi.newpipe.extractor.Downloader { - private static final String USER_AGENT = "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:43.0) Gecko/20100101 Firefox/43.0"; - private static String mCookies = ""; + private static final String USER_AGENT = "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:43.0) Gecko/20100101 Firefox/43.0"; + private static String mCookies = ""; - private static Downloader instance = null; + private static Downloader instance = null; - private Downloader() { - } + private Downloader() { + } - public static Downloader getInstance() { - if (instance == null) { - synchronized (Downloader.class) { - if (instance == null) { - instance = new Downloader(); - } - } - } - return instance; - } + public static Downloader getInstance() { + if (instance == null) { + synchronized (Downloader.class) { + if (instance == null) { + instance = new Downloader(); + } + } + } + return instance; + } - public static synchronized void setCookies(String cookies) { - Downloader.mCookies = cookies; - } + public static synchronized void setCookies(String cookies) { + Downloader.mCookies = cookies; + } - public static synchronized String getCookies() { - return Downloader.mCookies; - } + public static synchronized String getCookies() { + return Downloader.mCookies; + } - /** - * Download the text file at the supplied URL as in download(String), - * but set the HTTP header field "Accept-Language" to the supplied string. - * - * @param siteUrl the URL of the text file to return the contents of - * @param language the language (usually a 2-character code) to set as the preferred language - * @return the contents of the specified text file - */ - public String download(String siteUrl, String language) throws IOException, ReCaptchaException { - Map requestProperties = new HashMap<>(); - requestProperties.put("Accept-Language", language); - return download(siteUrl, requestProperties); - } + /** + * Download the text file at the supplied URL as in download(String), but set + * the HTTP header field "Accept-Language" to the supplied string. + * + * @param siteUrl the URL of the text file to return the contents of + * @param language the language (usually a 2-character code) to set as the + * preferred language + * @return the contents of the specified text file + */ + public String download(String siteUrl, String language) throws IOException, ReCaptchaException { + Map requestProperties = new HashMap<>(); + requestProperties.put("Accept-Language", language); + return download(siteUrl, requestProperties); + } + /** + * Download the text file at the supplied URL as in download(String), but set + * the HTTP header field "Accept-Language" to the supplied string. + * + * @param siteUrl the URL of the text file to return the contents of + * @param customProperties set request header properties + * @return the contents of the specified text file + * @throws IOException + */ + public String download(String siteUrl, Map customProperties) + throws IOException, ReCaptchaException { + URL url = new URL(siteUrl); + HttpsURLConnection con = (HttpsURLConnection) url.openConnection(); + for (Map.Entry pair : customProperties.entrySet()) { + con.setRequestProperty(pair.getKey(), pair.getValue()); + } + return dl(con); + } - /** - * Download the text file at the supplied URL as in download(String), - * but set the HTTP header field "Accept-Language" to the supplied string. - * - * @param siteUrl the URL of the text file to return the contents of - * @param customProperties set request header properties - * @return the contents of the specified text file - * @throws IOException - */ - public String download(String siteUrl, Map customProperties) throws IOException, ReCaptchaException { - URL url = new URL(siteUrl); - HttpsURLConnection con = (HttpsURLConnection) url.openConnection(); - for (Map.Entry pair: customProperties.entrySet()) { - con.setRequestProperty(pair.getKey(), pair.getValue()); - } - return dl(con); - } + /** + * Common functionality between download(String url) and download(String url, + * String language) + */ + private static String dl(HttpsURLConnection con) throws IOException, ReCaptchaException { + StringBuilder response = new StringBuilder(); + BufferedReader in = null; - /** - * Common functionality between download(String url) and download(String url, String language) - */ - private static String dl(HttpsURLConnection con) throws IOException, ReCaptchaException { - StringBuilder response = new StringBuilder(); - BufferedReader in = null; + try { + con.setConnectTimeout(30 * 1000);// 30s + con.setReadTimeout(30 * 1000);// 30s + con.setRequestMethod("GET"); + con.setRequestProperty("User-Agent", USER_AGENT); - try { - con.setConnectTimeout(30 * 1000);// 30s - con.setReadTimeout(30 * 1000);// 30s - con.setRequestMethod("GET"); - con.setRequestProperty("User-Agent", USER_AGENT); + if (getCookies().length() > 0) { + con.addRequestProperty("Cookie", getCookies()); + } - if (getCookies().length() > 0) { - con.setRequestProperty("Cookie", getCookies()); - } + in = new BufferedReader(new InputStreamReader(con.getInputStream())); + String inputLine; - in = new BufferedReader( - new InputStreamReader(con.getInputStream())); - String inputLine; + while ((inputLine = in.readLine()) != null) { + response.append(inputLine); + } + } catch (UnknownHostException uhe) {// thrown when there's no internet connection + throw new IOException("unknown host or no network", uhe); + // Toast.makeText(getActivity(), uhe.getMessage(), Toast.LENGTH_LONG).show(); + } catch (Exception e) { + /* + * HTTP 429 == Too Many Request Receive from Youtube.com = ReCaptcha challenge + * request See : https://github.com/rg3/youtube-dl/issues/5138 + */ + if (con.getResponseCode() == 429) { + throw new ReCaptchaException("reCaptcha Challenge requested"); + } - while ((inputLine = in.readLine()) != null) { - response.append(inputLine); - } - } catch (UnknownHostException uhe) {//thrown when there's no internet connection - throw new IOException("unknown host or no network", uhe); - //Toast.makeText(getActivity(), uhe.getMessage(), Toast.LENGTH_LONG).show(); - } catch (Exception e) { - /* - * HTTP 429 == Too Many Request - * Receive from Youtube.com = ReCaptcha challenge request - * See : https://github.com/rg3/youtube-dl/issues/5138 - */ - if (con.getResponseCode() == 429) { - throw new ReCaptchaException("reCaptcha Challenge requested"); - } + throw new IOException(con.getResponseCode() + " " + con.getResponseMessage(), e); + } finally { + if (in != null) { + in.close(); + } + } - throw new IOException(con.getResponseCode() + " " + con.getResponseMessage(), e); - } finally { - if (in != null) { - in.close(); - } - } + return response.toString(); + } - return response.toString(); - } + /** + * Download (via HTTP) the text file located at the supplied URL, and return its + * contents. Primarily intended for downloading web pages. + * + * @param siteUrl the URL of the text file to download + * @return the contents of the specified text file + */ + public String download(String siteUrl) throws IOException, ReCaptchaException { + URL url = new URL(siteUrl); + HttpsURLConnection con = (HttpsURLConnection) url.openConnection(); + // HttpsURLConnection con = NetCipher.getHttpsURLConnection(url); + return dl(con); + } - /** - * Download (via HTTP) the text file located at the supplied URL, and return its contents. - * Primarily intended for downloading web pages. - * - * @param siteUrl the URL of the text file to download - * @return the contents of the specified text file - */ - public String download(String siteUrl) throws IOException, ReCaptchaException { - URL url = new URL(siteUrl); - HttpsURLConnection con = (HttpsURLConnection) url.openConnection(); - //HttpsURLConnection con = NetCipher.getHttpsURLConnection(url); - return dl(con); - } + @Override + public DownloadResponse downloadWithHeaders(String siteUrl, Map> requestHeaders) + throws IOException, ReCaptchaException { + URL url = new URL(siteUrl); + HttpsURLConnection con = (HttpsURLConnection) url.openConnection(); + for (Map.Entry> pair : requestHeaders.entrySet()) { + pair.getValue().stream().forEach(value -> con.addRequestProperty(pair.getKey(), value)); + } + String responseBody = dl(con); + return new DownloadResponse(responseBody, con.getHeaderFields()); + } + + @Override + public DownloadResponse downloadWithHeaders(String siteUrl) throws IOException, ReCaptchaException { + URL url = new URL(siteUrl); + HttpsURLConnection con = (HttpsURLConnection) url.openConnection(); + String responseBody = dl(con); + return new DownloadResponse(responseBody, con.getHeaderFields()); + } } diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeCommentsExtractorTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeCommentsExtractorTest.java new file mode 100644 index 000000000..468c12ce5 --- /dev/null +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeCommentsExtractorTest.java @@ -0,0 +1,46 @@ +package org.schabi.newpipe.extractor.services.youtube; + +import static org.junit.Assert.assertTrue; +import static org.schabi.newpipe.extractor.ServiceList.YouTube; + +import java.io.IOException; + +import org.junit.BeforeClass; +import org.junit.Test; +import org.schabi.newpipe.Downloader; +import org.schabi.newpipe.extractor.ListExtractor.InfoItemsPage; +import org.schabi.newpipe.extractor.NewPipe; +import org.schabi.newpipe.extractor.comments.CommentsInfoItem; +import org.schabi.newpipe.extractor.exceptions.ExtractionException; +import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeCommentsExtractor; + +public class YoutubeCommentsExtractorTest { + + private static YoutubeCommentsExtractor extractor; + + @BeforeClass + public static void setUp() throws Exception { + NewPipe.init(Downloader.getInstance()); + extractor = (YoutubeCommentsExtractor) YouTube + .getCommentsExtractor("https://www.youtube.com/watch?v=rrgFN3AxGfs"); + extractor.fetchPage(); + } + + @Test + public void testGetComments() throws IOException, ExtractionException { + boolean result = false; + InfoItemsPage comments = extractor.getInitialPage(); + result = findInComments(comments, "i should really be in the top comment.lol"); + + while (comments.hasNextPage()) { + comments = extractor.getPage(comments.getNextPageUrl()); + result = findInComments(comments, "i should really be in the top comment.lol"); + } + + assertTrue(result); + } + + private boolean findInComments(InfoItemsPage comments, String comment) { + return comments.getItems().stream().filter(c -> c.getCommentText().contains(comment)).findAny().isPresent(); + } +}