diff --git a/README.md b/README.md index b13f04ff0..715ec2fb8 100644 --- a/README.md +++ b/README.md @@ -19,6 +19,7 @@ The following sites are currently supported: - YouTube - SoundCloud +- MediaCCC ## License diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/DownloadRequest.java b/extractor/src/main/java/org/schabi/newpipe/extractor/DownloadRequest.java new file mode 100644 index 000000000..32c8a67fa --- /dev/null +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/DownloadRequest.java @@ -0,0 +1,44 @@ +package org.schabi.newpipe.extractor; + +import java.util.Collections; +import java.util.List; +import java.util.Map; + +public class DownloadRequest { + + private final String requestBody; + private final Map> requestHeaders; + public static final DownloadRequest emptyRequest = new DownloadRequest(null, null); + + public DownloadRequest(String requestBody, Map> headers) { + super(); + this.requestBody = requestBody; + if(null != headers) { + this.requestHeaders = headers; + }else { + this.requestHeaders = Collections.emptyMap(); + } + } + + public String getRequestBody() { + return requestBody; + } + + public Map> getRequestHeaders() { + return requestHeaders; + } + + public void setRequestCookies(List cookies){ + requestHeaders.put("Cookie", cookies); + } + + public List getRequestCookies(){ + if(null == requestHeaders) return Collections.emptyList(); + List cookies = requestHeaders.get("Cookie"); + if(null == cookies) + return Collections.emptyList(); + else + return cookies; + } + +} diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/DownloadResponse.java b/extractor/src/main/java/org/schabi/newpipe/extractor/DownloadResponse.java new file mode 100644 index 000000000..2165002a8 --- /dev/null +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/DownloadResponse.java @@ -0,0 +1,37 @@ +package org.schabi.newpipe.extractor; + +import java.util.Collections; +import java.util.List; +import java.util.Map; + +import javax.annotation.Nonnull; + +public class DownloadResponse { + private final String responseBody; + private final Map> responseHeaders; + + public DownloadResponse(String responseBody, Map> headers) { + super(); + this.responseBody = responseBody; + this.responseHeaders = headers; + } + + public String getResponseBody() { + return responseBody; + } + + public Map> getResponseHeaders() { + return responseHeaders; + } + + @Nonnull + public List getResponseCookies(){ + if(null == responseHeaders) return Collections.emptyList(); + List cookies = responseHeaders.get("Set-Cookie"); + if(null == cookies) + return Collections.emptyList(); + else + return cookies; + } + +} diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/Downloader.java b/extractor/src/main/java/org/schabi/newpipe/extractor/Downloader.java index d750bc2c5..335e8a93e 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/Downloader.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/Downloader.java @@ -1,11 +1,11 @@ package org.schabi.newpipe.extractor; -import org.schabi.newpipe.extractor.exceptions.ReCaptchaException; -import org.schabi.newpipe.extractor.utils.Localization; - import java.io.IOException; import java.util.Map; +import org.schabi.newpipe.extractor.exceptions.ReCaptchaException; +import org.schabi.newpipe.extractor.utils.Localization; + /* * Created by Christian Schabesberger on 28.01.16. * @@ -29,8 +29,8 @@ import java.util.Map; public interface Downloader { /** - * Download the text file at the supplied URL as in download(String), - * but set the HTTP header field "Accept-Language" to the supplied string. + * Download the text file at the supplied URL as in download(String), but set + * the HTTP header field "Accept-Language" to the supplied string. * * @param siteUrl the URL of the text file to return the contents of * @param localization the language and country (usually a 2-character code for each) @@ -40,8 +40,8 @@ public interface Downloader { String download(String siteUrl, Localization localization) throws IOException, ReCaptchaException; /** - * Download the text file at the supplied URL as in download(String), - * but set the HTTP header field "Accept-Language" to the supplied string. + * Download the text file at the supplied URL as in download(String), but set + * the HTTP header field "Accept-Language" to the supplied string. * * @param siteUrl the URL of the text file to return the contents of * @param customProperties set request header properties @@ -51,12 +51,20 @@ public interface Downloader { String download(String siteUrl, Map customProperties) throws IOException, ReCaptchaException; /** - * Download (via HTTP) the text file located at the supplied URL, and return its contents. - * Primarily intended for downloading web pages. + * Download (via HTTP) the text file located at the supplied URL, and return its + * contents. Primarily intended for downloading web pages. * * @param siteUrl the URL of the text file to download * @return the contents of the specified text file * @throws IOException */ String download(String siteUrl) throws IOException, ReCaptchaException; + + DownloadResponse get(String siteUrl, DownloadRequest request) + throws IOException, ReCaptchaException; + + DownloadResponse get(String siteUrl) throws IOException, ReCaptchaException; + + DownloadResponse post(String siteUrl, DownloadRequest request) + throws IOException, ReCaptchaException; } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/Extractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/Extractor.java index ac96a37de..055f7fd0d 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/Extractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/Extractor.java @@ -8,8 +8,9 @@ import org.schabi.newpipe.extractor.utils.Localization; import javax.annotation.Nonnull; import javax.annotation.Nullable; import java.io.IOException; +import java.io.Serializable; -public abstract class Extractor { +public abstract class Extractor{ /** * {@link StreamingService} currently related to this extractor.
* Useful for getting other things from a service (like the url handlers for cleaning/accepting/get id from urls). diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/InfoItem.java b/extractor/src/main/java/org/schabi/newpipe/extractor/InfoItem.java index 4bed6bfbb..aead6c7f6 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/InfoItem.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/InfoItem.java @@ -68,6 +68,7 @@ public abstract class InfoItem implements Serializable { public enum InfoType { STREAM, PLAYLIST, - CHANNEL + CHANNEL, + COMMENT } } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/StreamingService.java b/extractor/src/main/java/org/schabi/newpipe/extractor/StreamingService.java index b798ec92c..86bd9d1f8 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/StreamingService.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/StreamingService.java @@ -1,19 +1,25 @@ package org.schabi.newpipe.extractor; -import org.schabi.newpipe.extractor.channel.ChannelExtractor; -import org.schabi.newpipe.extractor.exceptions.ExtractionException; -import org.schabi.newpipe.extractor.exceptions.ParsingException; -import org.schabi.newpipe.extractor.kiosk.KioskList; -import org.schabi.newpipe.extractor.playlist.PlaylistExtractor; -import org.schabi.newpipe.extractor.search.SearchExtractor; -import org.schabi.newpipe.extractor.linkhandler.*; -import org.schabi.newpipe.extractor.stream.StreamExtractor; -import org.schabi.newpipe.extractor.subscription.SubscriptionExtractor; -import org.schabi.newpipe.extractor.utils.Localization; - import java.util.Collections; import java.util.List; +import org.schabi.newpipe.extractor.channel.ChannelExtractor; +import org.schabi.newpipe.extractor.comments.CommentsExtractor; +import org.schabi.newpipe.extractor.exceptions.ExtractionException; +import org.schabi.newpipe.extractor.exceptions.ParsingException; +import org.schabi.newpipe.extractor.kiosk.KioskList; +import org.schabi.newpipe.extractor.linkhandler.LinkHandler; +import org.schabi.newpipe.extractor.linkhandler.LinkHandlerFactory; +import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler; +import org.schabi.newpipe.extractor.linkhandler.ListLinkHandlerFactory; +import org.schabi.newpipe.extractor.linkhandler.SearchQueryHandler; +import org.schabi.newpipe.extractor.linkhandler.SearchQueryHandlerFactory; +import org.schabi.newpipe.extractor.playlist.PlaylistExtractor; +import org.schabi.newpipe.extractor.search.SearchExtractor; +import org.schabi.newpipe.extractor.stream.StreamExtractor; +import org.schabi.newpipe.extractor.subscription.SubscriptionExtractor; +import org.schabi.newpipe.extractor.utils.Localization; + /* * Copyright (C) Christian Schabesberger 2018 * StreamingService.java is part of NewPipe. @@ -60,7 +66,7 @@ public abstract class StreamingService { } public enum MediaCapability { - AUDIO, VIDEO, LIVE + AUDIO, VIDEO, LIVE, COMMENTS } } @@ -135,6 +141,7 @@ public abstract class StreamingService { * @return an instance of a SearchQueryHandlerFactory */ public abstract SearchQueryHandlerFactory getSearchQHFactory(); + public abstract ListLinkHandlerFactory getCommentsLHFactory(); //////////////////////////////////////////// @@ -198,6 +205,8 @@ public abstract class StreamingService { */ public abstract StreamExtractor getStreamExtractor(LinkHandler linkHandler, Localization localization) throws ExtractionException; + public abstract CommentsExtractor getCommentsExtractor(ListLinkHandler linkHandler, + Localization localization) throws ExtractionException; //////////////////////////////////////////// // Extractor with default localization //////////////////////////////////////////// @@ -213,7 +222,7 @@ public abstract class StreamingService { public ChannelExtractor getChannelExtractor(ListLinkHandler linkHandler) throws ExtractionException { return getChannelExtractor(linkHandler, NewPipe.getPreferredLocalization()); } - + public PlaylistExtractor getPlaylistExtractor(ListLinkHandler linkHandler) throws ExtractionException { return getPlaylistExtractor(linkHandler, NewPipe.getPreferredLocalization()); } @@ -221,6 +230,10 @@ public abstract class StreamingService { public StreamExtractor getStreamExtractor(LinkHandler linkHandler) throws ExtractionException { return getStreamExtractor(linkHandler, NewPipe.getPreferredLocalization()); } + + public CommentsExtractor getCommentsExtractor(ListLinkHandler urlIdHandler) throws ExtractionException { + return getCommentsExtractor(urlIdHandler, NewPipe.getPreferredLocalization()); + } //////////////////////////////////////////// // Extractor without link handler @@ -274,6 +287,15 @@ public abstract class StreamingService { public StreamExtractor getStreamExtractor(String url) throws ExtractionException { return getStreamExtractor(getStreamLHFactory().fromUrl(url), NewPipe.getPreferredLocalization()); } + + public CommentsExtractor getCommentsExtractor(String url) throws ExtractionException { + ListLinkHandlerFactory llhf = getCommentsLHFactory(); + if(null == llhf) { + return null; + } + return getCommentsExtractor(llhf.fromUrl(url), NewPipe.getPreferredLocalization()); + } + /** * Figures out where the link is pointing to (a channel, a video, a playlist, etc.) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsExtractor.java new file mode 100644 index 000000000..a396637c2 --- /dev/null +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsExtractor.java @@ -0,0 +1,15 @@ +package org.schabi.newpipe.extractor.comments; + +import org.schabi.newpipe.extractor.ListExtractor; +import org.schabi.newpipe.extractor.StreamingService; +import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler; +import org.schabi.newpipe.extractor.utils.Localization; + +public abstract class CommentsExtractor extends ListExtractor { + + public CommentsExtractor(StreamingService service, ListLinkHandler uiHandler, Localization localization) { + super(service, uiHandler, localization); + // TODO Auto-generated constructor stub + } + +} diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfo.java b/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfo.java new file mode 100644 index 000000000..e0aaf0d25 --- /dev/null +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfo.java @@ -0,0 +1,72 @@ +package org.schabi.newpipe.extractor.comments; + +import java.io.IOException; + +import org.schabi.newpipe.extractor.ListExtractor.InfoItemsPage; +import org.schabi.newpipe.extractor.ListInfo; +import org.schabi.newpipe.extractor.NewPipe; +import org.schabi.newpipe.extractor.StreamingService; +import org.schabi.newpipe.extractor.exceptions.ExtractionException; +import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler; +import org.schabi.newpipe.extractor.utils.ExtractorHelper; + +public class CommentsInfo extends ListInfo{ + + private CommentsInfo(int serviceId, ListLinkHandler listUrlIdHandler, String name) { + super(serviceId, listUrlIdHandler, name); + // TODO Auto-generated constructor stub + } + + public static CommentsInfo getInfo(String url) throws IOException, ExtractionException { + return getInfo(NewPipe.getServiceByUrl(url), url); + } + + public static CommentsInfo getInfo(StreamingService serviceByUrl, String url) throws ExtractionException, IOException { + return getInfo(serviceByUrl.getCommentsExtractor(url)); + } + + private static CommentsInfo getInfo(CommentsExtractor commentsExtractor) throws IOException, ExtractionException { + // for services which do not have a comments extractor + if (null == commentsExtractor) { + return null; + } + + commentsExtractor.fetchPage(); + String name = commentsExtractor.getName(); + int serviceId = commentsExtractor.getServiceId(); + ListLinkHandler listUrlIdHandler = commentsExtractor.getLinkHandler(); + CommentsInfo commentsInfo = new CommentsInfo(serviceId, listUrlIdHandler, name); + commentsInfo.setCommentsExtractor(commentsExtractor); + InfoItemsPage initialCommentsPage = ExtractorHelper.getItemsPageOrLogError(commentsInfo, + commentsExtractor); + commentsInfo.setRelatedItems(initialCommentsPage.getItems()); + commentsInfo.setNextPageUrl(initialCommentsPage.getNextPageUrl()); + + return commentsInfo; + } + + public static InfoItemsPage getMoreItems(CommentsInfo commentsInfo, String pageUrl) + throws ExtractionException, IOException { + return getMoreItems(NewPipe.getService(commentsInfo.getServiceId()), commentsInfo, pageUrl); + } + + public static InfoItemsPage getMoreItems(StreamingService service, CommentsInfo commentsInfo, + String pageUrl) throws IOException, ExtractionException { + if (null == commentsInfo.getCommentsExtractor()) { + commentsInfo.setCommentsExtractor(service.getCommentsExtractor(commentsInfo.getUrl())); + commentsInfo.getCommentsExtractor().fetchPage(); + } + return commentsInfo.getCommentsExtractor().getPage(pageUrl); + } + + private transient CommentsExtractor commentsExtractor; + + public CommentsExtractor getCommentsExtractor() { + return commentsExtractor; + } + + public void setCommentsExtractor(CommentsExtractor commentsExtractor) { + this.commentsExtractor = commentsExtractor; + } + +} diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfoItem.java b/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfoItem.java new file mode 100644 index 000000000..87d54a0df --- /dev/null +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfoItem.java @@ -0,0 +1,76 @@ +package org.schabi.newpipe.extractor.comments; + +import org.schabi.newpipe.extractor.InfoItem; + +public class CommentsInfoItem extends InfoItem{ + + private String commentId; + private String commentText; + private String authorName; + private String authorThumbnail; + private String authorEndpoint; + private String publishedTime; + private Integer likeCount; + + public CommentsInfoItem(int serviceId, String url, String name) { + super(InfoType.COMMENT, serviceId, url, name); + // TODO Auto-generated constructor stub + } + + public String getCommentText() { + return commentText; + } + + public void setCommentText(String contentText) { + this.commentText = contentText; + } + + public String getAuthorName() { + return authorName; + } + + public void setAuthorName(String authorName) { + this.authorName = authorName; + } + + public String getAuthorThumbnail() { + return authorThumbnail; + } + + public void setAuthorThumbnail(String authorThumbnail) { + this.authorThumbnail = authorThumbnail; + } + + public String getAuthorEndpoint() { + return authorEndpoint; + } + + public void setAuthorEndpoint(String authorEndpoint) { + this.authorEndpoint = authorEndpoint; + } + + public String getPublishedTime() { + return publishedTime; + } + + public void setPublishedTime(String publishedTime) { + this.publishedTime = publishedTime; + } + + public Integer getLikeCount() { + return likeCount; + } + + public void setLikeCount(Integer likeCount) { + this.likeCount = likeCount; + } + + public String getCommentId() { + return commentId; + } + + public void setCommentId(String commentId) { + this.commentId = commentId; + } + +} diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfoItemExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfoItemExtractor.java new file mode 100644 index 000000000..b9905c2a7 --- /dev/null +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfoItemExtractor.java @@ -0,0 +1,14 @@ +package org.schabi.newpipe.extractor.comments; + +import org.schabi.newpipe.extractor.InfoItemExtractor; +import org.schabi.newpipe.extractor.exceptions.ParsingException; + +public interface CommentsInfoItemExtractor extends InfoItemExtractor { + String getCommentId() throws ParsingException; + String getCommentText() throws ParsingException; + String getAuthorName() throws ParsingException; + String getAuthorThumbnail() throws ParsingException; + String getAuthorEndpoint() throws ParsingException; + String getPublishedTime() throws ParsingException; + Integer getLikeCount() throws ParsingException; +} diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfoItemsCollector.java b/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfoItemsCollector.java new file mode 100644 index 000000000..bf3bc3225 --- /dev/null +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfoItemsCollector.java @@ -0,0 +1,89 @@ +package org.schabi.newpipe.extractor.comments; + +import java.util.List; +import java.util.Vector; + +import org.schabi.newpipe.extractor.InfoItem; +import org.schabi.newpipe.extractor.InfoItemsCollector; +import org.schabi.newpipe.extractor.exceptions.ParsingException; + +public class CommentsInfoItemsCollector extends InfoItemsCollector { + + public CommentsInfoItemsCollector(int serviceId) { + super(serviceId); + } + + @Override + public CommentsInfoItem extract(CommentsInfoItemExtractor extractor) throws ParsingException { + + // important information + int serviceId = getServiceId(); + String url = extractor.getUrl(); + String name = extractor.getName(); + + CommentsInfoItem resultItem = new CommentsInfoItem(serviceId, url, name); + + // optional information + try { + resultItem.setCommentId(extractor.getCommentId()); + } catch (Exception e) { + addError(e); + } + try { + resultItem.setCommentText(extractor.getCommentText()); + } catch (Exception e) { + addError(e); + } + try { + resultItem.setAuthorName(extractor.getAuthorName()); + } catch (Exception e) { + addError(e); + } + try { + resultItem.setAuthorThumbnail(extractor.getAuthorThumbnail()); + } catch (Exception e) { + addError(e); + } + try { + resultItem.setAuthorEndpoint(extractor.getAuthorEndpoint()); + } catch (Exception e) { + addError(e); + } + try { + resultItem.setPublishedTime(extractor.getPublishedTime()); + } catch (Exception e) { + addError(e); + } + try { + resultItem.setLikeCount(extractor.getLikeCount()); + } catch (Exception e) { + addError(e); + } + try { + resultItem.setThumbnailUrl(extractor.getThumbnailUrl()); + } catch (Exception e) { + addError(e); + } + + return resultItem; + } + + @Override + public void commit(CommentsInfoItemExtractor extractor) { + try { + addItem(extract(extractor)); + } catch (Exception e) { + addError(e); + } + } + + public List getCommentsInfoItemList() { + List siiList = new Vector<>(); + for (InfoItem ii : super.getItems()) { + if (ii instanceof CommentsInfoItem) { + siiList.add((CommentsInfoItem) ii); + } + } + return siiList; + } +} diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/media_ccc/MediaCCCService.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/media_ccc/MediaCCCService.java index 4dbf8e7f6..a23295a97 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/media_ccc/MediaCCCService.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/media_ccc/MediaCCCService.java @@ -1,12 +1,24 @@ package org.schabi.newpipe.extractor.services.media_ccc; +import static java.util.Arrays.asList; +import static org.schabi.newpipe.extractor.StreamingService.ServiceInfo.MediaCapability.AUDIO; +import static org.schabi.newpipe.extractor.StreamingService.ServiceInfo.MediaCapability.VIDEO; + +import java.io.IOException; + import org.schabi.newpipe.extractor.StreamingService; import org.schabi.newpipe.extractor.SuggestionExtractor; import org.schabi.newpipe.extractor.channel.ChannelExtractor; +import org.schabi.newpipe.extractor.comments.CommentsExtractor; import org.schabi.newpipe.extractor.exceptions.ExtractionException; import org.schabi.newpipe.extractor.kiosk.KioskExtractor; import org.schabi.newpipe.extractor.kiosk.KioskList; -import org.schabi.newpipe.extractor.linkhandler.*; +import org.schabi.newpipe.extractor.linkhandler.LinkHandler; +import org.schabi.newpipe.extractor.linkhandler.LinkHandlerFactory; +import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler; +import org.schabi.newpipe.extractor.linkhandler.ListLinkHandlerFactory; +import org.schabi.newpipe.extractor.linkhandler.SearchQueryHandler; +import org.schabi.newpipe.extractor.linkhandler.SearchQueryHandlerFactory; import org.schabi.newpipe.extractor.playlist.PlaylistExtractor; import org.schabi.newpipe.extractor.search.SearchExtractor; import org.schabi.newpipe.extractor.services.media_ccc.extractors.MediaCCCConferenceExtractor; @@ -21,11 +33,6 @@ import org.schabi.newpipe.extractor.stream.StreamExtractor; import org.schabi.newpipe.extractor.subscription.SubscriptionExtractor; import org.schabi.newpipe.extractor.utils.Localization; -import java.io.IOException; - -import static java.util.Arrays.asList; -import static org.schabi.newpipe.extractor.StreamingService.ServiceInfo.MediaCapability.*; - public class MediaCCCService extends StreamingService { public MediaCCCService(int id) { super(id, "MediaCCC", asList(AUDIO, VIDEO)); @@ -104,4 +111,16 @@ public class MediaCCCService extends StreamingService { public SubscriptionExtractor getSubscriptionExtractor() { return null; } + + @Override + public ListLinkHandlerFactory getCommentsLHFactory() { + return null; + } + + @Override + public CommentsExtractor getCommentsExtractor(ListLinkHandler linkHandler, Localization localization) + throws ExtractionException { + return null; + } + } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudService.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudService.java index 0202bba56..74d38be57 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudService.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudService.java @@ -1,20 +1,27 @@ package org.schabi.newpipe.extractor.services.soundcloud; -import org.schabi.newpipe.extractor.*; -import org.schabi.newpipe.extractor.linkhandler.*; +import static java.util.Collections.singletonList; +import static org.schabi.newpipe.extractor.StreamingService.ServiceInfo.MediaCapability.AUDIO; + +import org.schabi.newpipe.extractor.StreamingService; +import org.schabi.newpipe.extractor.SuggestionExtractor; import org.schabi.newpipe.extractor.channel.ChannelExtractor; +import org.schabi.newpipe.extractor.comments.CommentsExtractor; import org.schabi.newpipe.extractor.exceptions.ExtractionException; import org.schabi.newpipe.extractor.kiosk.KioskExtractor; import org.schabi.newpipe.extractor.kiosk.KioskList; +import org.schabi.newpipe.extractor.linkhandler.LinkHandler; +import org.schabi.newpipe.extractor.linkhandler.LinkHandlerFactory; +import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler; +import org.schabi.newpipe.extractor.linkhandler.ListLinkHandlerFactory; +import org.schabi.newpipe.extractor.linkhandler.SearchQueryHandler; +import org.schabi.newpipe.extractor.linkhandler.SearchQueryHandlerFactory; import org.schabi.newpipe.extractor.playlist.PlaylistExtractor; import org.schabi.newpipe.extractor.search.SearchExtractor; import org.schabi.newpipe.extractor.stream.StreamExtractor; import org.schabi.newpipe.extractor.subscription.SubscriptionExtractor; import org.schabi.newpipe.extractor.utils.Localization; -import static java.util.Collections.singletonList; -import static org.schabi.newpipe.extractor.StreamingService.ServiceInfo.MediaCapability.AUDIO; - public class SoundcloudService extends StreamingService { public SoundcloudService(int id) { @@ -100,4 +107,16 @@ public class SoundcloudService extends StreamingService { public SubscriptionExtractor getSubscriptionExtractor() { return new SoundcloudSubscriptionExtractor(this); } + + @Override + public ListLinkHandlerFactory getCommentsLHFactory() { + return null; + } + + @Override + public CommentsExtractor getCommentsExtractor(ListLinkHandler linkHandler, Localization localization) + throws ExtractionException { + return null; + } + } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeService.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeService.java index 57cd3f852..3b621b8f0 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeService.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeService.java @@ -1,23 +1,44 @@ package org.schabi.newpipe.extractor.services.youtube; -import org.schabi.newpipe.extractor.*; -import org.schabi.newpipe.extractor.linkhandler.*; +import static java.util.Arrays.asList; +import static org.schabi.newpipe.extractor.StreamingService.ServiceInfo.MediaCapability.AUDIO; +import static org.schabi.newpipe.extractor.StreamingService.ServiceInfo.MediaCapability.COMMENTS; +import static org.schabi.newpipe.extractor.StreamingService.ServiceInfo.MediaCapability.LIVE; +import static org.schabi.newpipe.extractor.StreamingService.ServiceInfo.MediaCapability.VIDEO; + +import org.schabi.newpipe.extractor.StreamingService; +import org.schabi.newpipe.extractor.SuggestionExtractor; import org.schabi.newpipe.extractor.channel.ChannelExtractor; +import org.schabi.newpipe.extractor.comments.CommentsExtractor; import org.schabi.newpipe.extractor.exceptions.ExtractionException; import org.schabi.newpipe.extractor.kiosk.KioskExtractor; import org.schabi.newpipe.extractor.kiosk.KioskList; +import org.schabi.newpipe.extractor.linkhandler.LinkHandler; +import org.schabi.newpipe.extractor.linkhandler.LinkHandlerFactory; +import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler; +import org.schabi.newpipe.extractor.linkhandler.ListLinkHandlerFactory; +import org.schabi.newpipe.extractor.linkhandler.SearchQueryHandler; +import org.schabi.newpipe.extractor.linkhandler.SearchQueryHandlerFactory; import org.schabi.newpipe.extractor.playlist.PlaylistExtractor; import org.schabi.newpipe.extractor.search.SearchExtractor; -import org.schabi.newpipe.extractor.services.youtube.extractors.*; -import org.schabi.newpipe.extractor.services.youtube.linkHandler.*; +import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeChannelExtractor; +import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeCommentsExtractor; +import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubePlaylistExtractor; +import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeSearchExtractor; +import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeStreamExtractor; +import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeSubscriptionExtractor; +import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeSuggestionExtractor; +import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeTrendingExtractor; +import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeChannelLinkHandlerFactory; +import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeCommentsLinkHandlerFactory; +import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubePlaylistLinkHandlerFactory; +import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeSearchQueryHandlerFactory; +import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeStreamLinkHandlerFactory; +import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeTrendingLinkHandlerFactory; import org.schabi.newpipe.extractor.stream.StreamExtractor; import org.schabi.newpipe.extractor.subscription.SubscriptionExtractor; import org.schabi.newpipe.extractor.utils.Localization; -import static java.util.Arrays.asList; -import static org.schabi.newpipe.extractor.StreamingService.ServiceInfo.MediaCapability.*; - - /* * Created by Christian Schabesberger on 23.08.15. * @@ -41,7 +62,7 @@ import static org.schabi.newpipe.extractor.StreamingService.ServiceInfo.MediaCap public class YoutubeService extends StreamingService { public YoutubeService(int id) { - super(id, "YouTube", asList(AUDIO, VIDEO, LIVE)); + super(id, "YouTube", asList(AUDIO, VIDEO, LIVE, COMMENTS)); } @Override @@ -119,4 +140,15 @@ public class YoutubeService extends StreamingService { return new YoutubeSubscriptionExtractor(this); } + @Override + public ListLinkHandlerFactory getCommentsLHFactory() { + return YoutubeCommentsLinkHandlerFactory.getInstance(); + } + + @Override + public CommentsExtractor getCommentsExtractor(ListLinkHandler urlIdHandler, Localization localization) + throws ExtractionException { + return new YoutubeCommentsExtractor(this, urlIdHandler, localization); + } + } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java new file mode 100644 index 000000000..85d150014 --- /dev/null +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java @@ -0,0 +1,218 @@ +package org.schabi.newpipe.extractor.services.youtube.extractors; + +import java.io.IOException; +import java.io.UnsupportedEncodingException; +import java.net.URLEncoder; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.regex.Pattern; + +import javax.annotation.Nonnull; + +import org.schabi.newpipe.extractor.DownloadRequest; +import org.schabi.newpipe.extractor.DownloadResponse; +import org.schabi.newpipe.extractor.Downloader; +import org.schabi.newpipe.extractor.NewPipe; +import org.schabi.newpipe.extractor.StreamingService; +import org.schabi.newpipe.extractor.comments.CommentsExtractor; +import org.schabi.newpipe.extractor.comments.CommentsInfoItem; +import org.schabi.newpipe.extractor.comments.CommentsInfoItemExtractor; +import org.schabi.newpipe.extractor.comments.CommentsInfoItemsCollector; +import org.schabi.newpipe.extractor.exceptions.ExtractionException; +import org.schabi.newpipe.extractor.exceptions.ParsingException; +import org.schabi.newpipe.extractor.exceptions.ReCaptchaException; +import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler; +import org.schabi.newpipe.extractor.utils.JsonUtils; +import org.schabi.newpipe.extractor.utils.Localization; +import org.schabi.newpipe.extractor.utils.Parser; + +import com.grack.nanojson.JsonArray; +import com.grack.nanojson.JsonObject; +import com.grack.nanojson.JsonParser; + + +public class YoutubeCommentsExtractor extends CommentsExtractor { + + // using the mobile site for comments because it loads faster and uses get requests instead of post + private static final String USER_AGENT = "Mozilla/5.0 (Android 8.1.0; Mobile; rv:62.0) Gecko/62.0 Firefox/62.0"; + private static final Pattern YT_CLIENT_NAME_PATTERN = Pattern.compile("INNERTUBE_CONTEXT_CLIENT_NAME\\\":(.*?)[,}]"); + + private String ytClientVersion; + private String ytClientName; + private String title; + private InfoItemsPage initPage; + + public YoutubeCommentsExtractor(StreamingService service, ListLinkHandler uiHandler, Localization localization) { + super(service, uiHandler, localization); + } + + @Override + public InfoItemsPage getInitialPage() throws IOException, ExtractionException { + // initial page does not load any comments but is required to get comments token + super.fetchPage(); + return initPage; + } + + @Override + public String getNextPageUrl() throws IOException, ExtractionException { + // initial page does not load any comments but is required to get comments token + super.fetchPage(); + return initPage.getNextPageUrl(); + } + + private String getNextPageUrl(JsonObject ajaxJson) throws IOException, ParsingException { + + JsonArray arr; + try { + arr = JsonUtils.getArray(ajaxJson, "response.continuationContents.commentSectionContinuation.continuations"); + } catch (Exception e) { + return ""; + } + if(arr.isEmpty()) { + return ""; + } + String continuation; + try { + continuation = JsonUtils.getString(arr.getObject(0), "nextContinuationData.continuation"); + } catch (Exception e) { + return ""; + } + return getNextPageUrl(continuation); + } + + private String getNextPageUrl(String continuation) throws ParsingException { + Map params = new HashMap<>(); + params.put("action_get_comments", "1"); + params.put("pbj", "1"); + params.put("ctoken", continuation); + try { + return "https://m.youtube.com/watch_comment?" + getDataString(params); + } catch (UnsupportedEncodingException e) { + throw new ParsingException("Could not get next page url", e); + } + } + + @Override + public InfoItemsPage getPage(String pageUrl) throws IOException, ExtractionException { + if (pageUrl == null || pageUrl.isEmpty()) { + throw new ExtractionException(new IllegalArgumentException("Page url is empty or null")); + } + String ajaxResponse = makeAjaxRequest(pageUrl); + JsonObject ajaxJson; + try { + ajaxJson = JsonParser.array().from(ajaxResponse).getObject(1); + } catch (Exception e) { + throw new ParsingException("Could not parse json data for comments", e); + } + CommentsInfoItemsCollector collector = new CommentsInfoItemsCollector(getServiceId()); + collectCommentsFrom(collector, ajaxJson, pageUrl); + return new InfoItemsPage<>(collector, getNextPageUrl(ajaxJson)); + } + + private void collectCommentsFrom(CommentsInfoItemsCollector collector, JsonObject ajaxJson, String pageUrl) throws ParsingException { + + JsonArray contents; + try { + contents = JsonUtils.getArray(ajaxJson, "response.continuationContents.commentSectionContinuation.items"); + }catch(Exception e) { + //no comments + return; + } + fetchTitle(contents); + List comments; + try { + comments = JsonUtils.getValues(contents, "commentThreadRenderer.comment.commentRenderer"); + }catch(Exception e) { + throw new ParsingException("unable to get parse youtube comments", e); + } + + for(Object c: comments) { + if(c instanceof JsonObject) { + CommentsInfoItemExtractor extractor = new YoutubeCommentsInfoItemExtractor((JsonObject) c, pageUrl); + collector.commit(extractor); + } + } + } + + private void fetchTitle(JsonArray contents) { + if(null == title) { + try { + title = getYoutubeText(JsonUtils.getObject(contents.getObject(0), "commentThreadRenderer.commentTargetTitle")); + } catch (Exception e) { + title = "Youtube Comments"; + } + } + } + + @Override + public void onFetchPage(Downloader downloader) throws IOException, ExtractionException { + Map> requestHeaders = new HashMap<>(); + requestHeaders.put("User-Agent", Arrays.asList(USER_AGENT)); + DownloadRequest request = new DownloadRequest(null, requestHeaders); + DownloadResponse response = downloader.get(getUrl(), request); + String responseBody = response.getResponseBody(); + ytClientVersion = findValue(responseBody, "INNERTUBE_CONTEXT_CLIENT_VERSION\":\"", "\""); + ytClientName = Parser.matchGroup1(YT_CLIENT_NAME_PATTERN, responseBody); + String commentsTokenInside = findValue(responseBody, "commentSectionRenderer", "}"); + String commentsToken = findValue(commentsTokenInside, "continuation\":\"", "\""); + initPage = getPage(getNextPageUrl(commentsToken)); + } + + @Override + public String getName() throws ParsingException { + return title; + } + + private String makeAjaxRequest(String siteUrl) throws IOException, ReCaptchaException { + + Map> requestHeaders = new HashMap<>(); + requestHeaders.put("Accept", Arrays.asList("*/*")); + requestHeaders.put("User-Agent", Arrays.asList(USER_AGENT)); + requestHeaders.put("X-YouTube-Client-Version", Arrays.asList(ytClientVersion)); + requestHeaders.put("X-YouTube-Client-Name", Arrays.asList(ytClientName)); + DownloadRequest request = new DownloadRequest(null, requestHeaders); + + return NewPipe.getDownloader().get(siteUrl, request).getResponseBody(); + } + + private String getDataString(Map params) throws UnsupportedEncodingException { + StringBuilder result = new StringBuilder(); + boolean first = true; + for (Map.Entry entry : params.entrySet()) { + if (first) + first = false; + else + result.append("&"); + result.append(URLEncoder.encode(entry.getKey(), "UTF-8")); + result.append("="); + result.append(URLEncoder.encode(entry.getValue(), "UTF-8")); + } + return result.toString(); + } + + private String findValue(String doc, String start, String end) { + int beginIndex = doc.indexOf(start) + start.length(); + int endIndex = doc.indexOf(end, beginIndex); + return doc.substring(beginIndex, endIndex); + } + + public static String getYoutubeText(@Nonnull JsonObject object) throws ParsingException { + try { + return JsonUtils.getString(object, "simpleText"); + } catch (Exception e1) { + try { + JsonArray arr = JsonUtils.getArray(object, "runs"); + String result = ""; + for(int i=0; i contentFilter, String sortFilter) throws ParsingException { + return getUrl(id); + } +} diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/stream/StreamInfo.java b/extractor/src/main/java/org/schabi/newpipe/extractor/stream/StreamInfo.java index 8adf4dc58..b07e2270c 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/stream/StreamInfo.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/stream/StreamInfo.java @@ -1,15 +1,18 @@ package org.schabi.newpipe.extractor.stream; -import org.schabi.newpipe.extractor.*; -import org.schabi.newpipe.extractor.exceptions.ContentNotAvailableException; -import org.schabi.newpipe.extractor.exceptions.ExtractionException; -import org.schabi.newpipe.extractor.utils.DashMpdParser; -import org.schabi.newpipe.extractor.utils.ExtractorHelper; - import java.io.IOException; import java.util.ArrayList; import java.util.List; +import org.schabi.newpipe.extractor.Info; +import org.schabi.newpipe.extractor.InfoItem; +import org.schabi.newpipe.extractor.NewPipe; +import org.schabi.newpipe.extractor.StreamingService; +import org.schabi.newpipe.extractor.exceptions.ContentNotAvailableException; +import org.schabi.newpipe.extractor.exceptions.ExtractionException; +import org.schabi.newpipe.extractor.utils.DashMpdParser; +import org.schabi.newpipe.extractor.utils.ExtractorHelper; + /* * Created by Christian Schabesberger on 26.08.15. * @@ -41,7 +44,8 @@ public class StreamInfo extends Info { } } - public StreamInfo(int serviceId, String url, String originalUrl, StreamType streamType, String id, String name, int ageLimit) { + public StreamInfo(int serviceId, String url, String originalUrl, StreamType streamType, String id, String name, + int ageLimit) { super(serviceId, id, url, originalUrl, name); this.streamType = streamType; this.ageLimit = ageLimit; @@ -63,9 +67,12 @@ public class StreamInfo extends Info { streamInfo = extractStreams(streamInfo, extractor); streamInfo = extractOptionalData(streamInfo, extractor); } catch (ExtractionException e) { - // Currently YouTube does not distinguish between age restricted videos and videos blocked - // by country. This means that during the initialisation of the extractor, the extractor - // will assume that a video is age restricted while in reality it it blocked by country. + // Currently YouTube does not distinguish between age restricted videos and + // videos blocked + // by country. This means that during the initialisation of the extractor, the + // extractor + // will assume that a video is age restricted while in reality it it blocked by + // country. // // We will now detect whether the video is blocked by country or not. String errorMsg = extractor.getErrorMessage(); @@ -82,7 +89,8 @@ public class StreamInfo extends Info { private static StreamInfo extractImportantData(StreamExtractor extractor) throws ExtractionException { /* ---- important data, without the video can't be displayed goes here: ---- */ - // if one of these is not available an exception is meant to be thrown directly into the frontend. + // if one of these is not available an exception is meant to be thrown directly + // into the frontend. int serviceId = extractor.getServiceId(); String url = extractor.getUrl(); @@ -92,18 +100,16 @@ public class StreamInfo extends Info { String name = extractor.getName(); int ageLimit = extractor.getAgeLimit(); - if ((streamType == StreamType.NONE) - || (url == null || url.isEmpty()) - || (id == null || id.isEmpty()) - || (name == null /* streamInfo.title can be empty of course */) - || (ageLimit == -1)) { + if ((streamType == StreamType.NONE) || (url == null || url.isEmpty()) || (id == null || id.isEmpty()) + || (name == null /* streamInfo.title can be empty of course */) || (ageLimit == -1)) { throw new ExtractionException("Some important stream information was not given."); } return new StreamInfo(serviceId, url, originalUrl, streamType, id, name, ageLimit); } - private static StreamInfo extractStreams(StreamInfo streamInfo, StreamExtractor extractor) throws ExtractionException { + private static StreamInfo extractStreams(StreamInfo streamInfo, StreamExtractor extractor) + throws ExtractionException { /* ---- stream extraction goes here ---- */ // At least one type of stream has to be available, // otherwise an exception will be thrown directly into the frontend. @@ -120,19 +126,19 @@ public class StreamInfo extends Info { streamInfo.addError(new ExtractionException("Couldn't get HLS manifest", e)); } - /* Load and extract audio */ + /* Load and extract audio */ try { streamInfo.setAudioStreams(extractor.getAudioStreams()); } catch (Exception e) { streamInfo.addError(new ExtractionException("Couldn't get audio streams", e)); } - /* Extract video stream url*/ + /* Extract video stream url */ try { streamInfo.setVideoStreams(extractor.getVideoStreams()); } catch (Exception e) { streamInfo.addError(new ExtractionException("Couldn't get video streams", e)); } - /* Extract video only stream url*/ + /* Extract video only stream url */ try { streamInfo.setVideoOnlyStreams(extractor.getVideoOnlyStreams()); } catch (Exception e) { @@ -140,9 +146,12 @@ public class StreamInfo extends Info { } // Lists can be null if a exception was thrown during extraction - if (streamInfo.getVideoStreams() == null) streamInfo.setVideoStreams(new ArrayList()); - if (streamInfo.getVideoOnlyStreams() == null) streamInfo.setVideoOnlyStreams(new ArrayList()); - if (streamInfo.getAudioStreams() == null) streamInfo.setAudioStreams(new ArrayList()); + if (streamInfo.getVideoStreams() == null) + streamInfo.setVideoStreams(new ArrayList()); + if (streamInfo.getVideoOnlyStreams() == null) + streamInfo.setVideoOnlyStreams(new ArrayList()); + if (streamInfo.getAudioStreams() == null) + streamInfo.setAudioStreams(new ArrayList()); Exception dashMpdError = null; if (streamInfo.getDashMpdUrl() != null && !streamInfo.getDashMpdUrl().isEmpty()) { @@ -155,19 +164,23 @@ public class StreamInfo extends Info { streamInfo.segmentedAudioStreams = result.getSegmentedAudioStreams(); streamInfo.segmentedVideoStreams = result.getSegmentedVideoStreams(); } catch (Exception e) { - // Sometimes we receive 403 (forbidden) error when trying to download the manifest (similar to what happens with youtube-dl), - // just skip the exception (but store it somewhere), as we later check if we have streams anyway. + // Sometimes we receive 403 (forbidden) error when trying to download the + // manifest (similar to what happens with youtube-dl), + // just skip the exception (but store it somewhere), as we later check if we + // have streams anyway. dashMpdError = e; } } - // Either audio or video has to be available, otherwise we didn't get a stream (since videoOnly are optional, they don't count). - if ((streamInfo.videoStreams.isEmpty()) - && (streamInfo.audioStreams.isEmpty())) { + // Either audio or video has to be available, otherwise we didn't get a stream + // (since videoOnly are optional, they don't count). + if ((streamInfo.videoStreams.isEmpty()) && (streamInfo.audioStreams.isEmpty())) { if (dashMpdError != null) { - // If we don't have any video or audio and the dashMpd 'errored', add it to the error list - // (it's optional and it don't get added automatically, but it's good to have some additional error context) + // If we don't have any video or audio and the dashMpd 'errored', add it to the + // error list + // (it's optional and it don't get added automatically, but it's good to have + // some additional error context) streamInfo.addError(dashMpdError); } @@ -178,9 +191,11 @@ public class StreamInfo extends Info { } private static StreamInfo extractOptionalData(StreamInfo streamInfo, StreamExtractor extractor) { - /* ---- optional data goes here: ---- */ - // If one of these fails, the frontend needs to handle that they are not available. - // Exceptions are therefore not thrown into the frontend, but stored into the error List, + /* ---- optional data goes here: ---- */ + // If one of these fails, the frontend needs to handle that they are not + // available. + // Exceptions are therefore not thrown into the frontend, but stored into the + // error List, // so the frontend can afterwards check where errors happened. try { @@ -250,6 +265,7 @@ public class StreamInfo extends Info { } streamInfo.setRelatedStreams(ExtractorHelper.getRelatedVideosOrLogError(streamInfo, extractor)); + return streamInfo; } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/utils/ExtractorHelper.java b/extractor/src/main/java/org/schabi/newpipe/extractor/utils/ExtractorHelper.java index 0fc6a3120..283006496 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/utils/ExtractorHelper.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/utils/ExtractorHelper.java @@ -5,6 +5,7 @@ import org.schabi.newpipe.extractor.InfoItem; import org.schabi.newpipe.extractor.InfoItemsCollector; import org.schabi.newpipe.extractor.ListExtractor; import org.schabi.newpipe.extractor.ListExtractor.InfoItemsPage; +import org.schabi.newpipe.extractor.comments.CommentsInfo; import org.schabi.newpipe.extractor.stream.StreamExtractor; import org.schabi.newpipe.extractor.stream.StreamInfo; @@ -40,4 +41,5 @@ public class ExtractorHelper { return Collections.emptyList(); } } + } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/utils/JsonUtils.java b/extractor/src/main/java/org/schabi/newpipe/extractor/utils/JsonUtils.java new file mode 100644 index 000000000..e5d7bb62f --- /dev/null +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/utils/JsonUtils.java @@ -0,0 +1,92 @@ +package org.schabi.newpipe.extractor.utils; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import javax.annotation.Nonnull; +import javax.annotation.Nullable; + +import org.schabi.newpipe.extractor.exceptions.ParsingException; + +import com.grack.nanojson.JsonArray; +import com.grack.nanojson.JsonObject; + +public class JsonUtils { + + private JsonUtils() { + } + + @Nonnull + public static Object getValue(@Nonnull JsonObject object, @Nonnull String path) throws ParsingException{ + + List keys = Arrays.asList(path.split("\\.")); + object = getObject(object, keys.subList(0, keys.size() - 1)); + if (null == object) throw new ParsingException("Unable to get " + path); + Object result = object.get(keys.get(keys.size() - 1)); + if(null == result) throw new ParsingException("Unable to get " + path); + return result; + } + + @Nonnull + public static String getString(@Nonnull JsonObject object, @Nonnull String path) throws ParsingException{ + Object value = getValue(object, path); + if(value instanceof String) { + return (String) value; + }else { + throw new ParsingException("Unable to get " + path); + } + } + + @Nonnull + public static Number getNumber(@Nonnull JsonObject object, @Nonnull String path) throws ParsingException{ + Object value = getValue(object, path); + if(value instanceof Number) { + return (Number) value; + }else { + throw new ParsingException("Unable to get " + path); + } + } + + @Nonnull + public static JsonObject getObject(@Nonnull JsonObject object, @Nonnull String path) throws ParsingException{ + Object value = getValue(object, path); + if(value instanceof JsonObject) { + return (JsonObject) value; + }else { + throw new ParsingException("Unable to get " + path); + } + } + + @Nonnull + public static JsonArray getArray(@Nonnull JsonObject object, @Nonnull String path) throws ParsingException{ + Object value = getValue(object, path); + if(value instanceof JsonArray) { + return (JsonArray) value; + }else { + throw new ParsingException("Unable to get " + path); + } + } + + @Nonnull + public static List getValues(@Nonnull JsonArray array, @Nonnull String path) throws ParsingException { + + List result = new ArrayList<>(); + for (int i = 0; i < array.size(); i++) { + JsonObject obj = array.getObject(i); + result.add(getValue(obj, path)); + } + return result; + } + + @Nullable + private static JsonObject getObject(@Nonnull JsonObject object, @Nonnull List keys) { + JsonObject result = object; + for (String key : keys) { + result = result.getObject(key); + if (null == result) break; + } + return result; + } + +} diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/utils/Parser.java b/extractor/src/main/java/org/schabi/newpipe/extractor/utils/Parser.java index 42f098dce..6cd938975 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/utils/Parser.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/utils/Parser.java @@ -1,10 +1,5 @@ package org.schabi.newpipe.extractor.utils; -import org.nibor.autolink.LinkExtractor; -import org.nibor.autolink.LinkSpan; -import org.nibor.autolink.LinkType; -import org.schabi.newpipe.extractor.exceptions.ParsingException; - import java.io.UnsupportedEncodingException; import java.net.URLDecoder; import java.util.ArrayList; @@ -14,6 +9,11 @@ import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; +import org.nibor.autolink.LinkExtractor; +import org.nibor.autolink.LinkSpan; +import org.nibor.autolink.LinkType; +import org.schabi.newpipe.extractor.exceptions.ParsingException; + /* * Created by Christian Schabesberger on 02.02.16. * @@ -51,18 +51,26 @@ public class Parser { public static String matchGroup1(String pattern, String input) throws RegexException { return matchGroup(pattern, input, 1); } + + public static String matchGroup1(Pattern pattern, String input) throws RegexException { + return matchGroup(pattern, input, 1); + } public static String matchGroup(String pattern, String input, int group) throws RegexException { Pattern pat = Pattern.compile(pattern); + return matchGroup(pat, input, group); + } + + public static String matchGroup(Pattern pat, String input, int group) throws RegexException { Matcher mat = pat.matcher(input); boolean foundMatch = mat.find(); if (foundMatch) { return mat.group(group); } else { if (input.length() > 1024) { - throw new RegexException("failed to find pattern \"" + pattern); + throw new RegexException("failed to find pattern \"" + pat.pattern()); } else { - throw new RegexException("failed to find pattern \"" + pattern + " inside of " + input + "\""); + throw new RegexException("failed to find pattern \"" + pat.pattern() + " inside of " + input + "\""); } } } diff --git a/extractor/src/test/java/org/schabi/newpipe/Downloader.java b/extractor/src/test/java/org/schabi/newpipe/Downloader.java index f9f35c21a..c980e1e9a 100644 --- a/extractor/src/test/java/org/schabi/newpipe/Downloader.java +++ b/extractor/src/test/java/org/schabi/newpipe/Downloader.java @@ -1,17 +1,20 @@ package org.schabi.newpipe; -import org.schabi.newpipe.extractor.exceptions.ReCaptchaException; -import org.schabi.newpipe.extractor.utils.Localization; - -import javax.net.ssl.HttpsURLConnection; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStreamReader; import java.net.URL; import java.net.UnknownHostException; import java.util.HashMap; +import java.util.List; import java.util.Map; +import javax.net.ssl.HttpsURLConnection; + +import org.schabi.newpipe.extractor.DownloadRequest; +import org.schabi.newpipe.extractor.DownloadResponse; +import org.schabi.newpipe.extractor.exceptions.ReCaptchaException; +import org.schabi.newpipe.extractor.utils.Localization; /* * Created by Christian Schabesberger on 28.01.16. @@ -63,10 +66,11 @@ public class Downloader implements org.schabi.newpipe.extractor.Downloader { } /** - * Download the text file at the supplied URL as in download(String), - * but set the HTTP header field "Accept-Language" to the supplied string. + * Download the text file at the supplied URL as in download(String), but set + * the HTTP header field "Accept-Language" to the supplied string. * * @param siteUrl the URL of the text file to return the contents of + * @param localization the language and country (usually a 2-character code for both values) * @return the contents of the specified text file */ @@ -76,57 +80,53 @@ public class Downloader implements org.schabi.newpipe.extractor.Downloader { return download(siteUrl, requestProperties); } - /** - * Download the text file at the supplied URL as in download(String), - * but set the HTTP header field "Accept-Language" to the supplied string. + * Download the text file at the supplied URL as in download(String), but set + * the HTTP header field "Accept-Language" to the supplied string. * * @param siteUrl the URL of the text file to return the contents of * @param customProperties set request header properties * @return the contents of the specified text file * @throws IOException */ - public String download(String siteUrl, Map customProperties) throws IOException, ReCaptchaException { + public String download(String siteUrl, Map customProperties) + throws IOException, ReCaptchaException { URL url = new URL(siteUrl); HttpsURLConnection con = (HttpsURLConnection) url.openConnection(); - for (Map.Entry pair: customProperties.entrySet()) { + for (Map.Entry pair : customProperties.entrySet()) { con.setRequestProperty(pair.getKey(), pair.getValue()); } return dl(con); } /** - * Common functionality between download(String url) and download(String url, String language) + * Common functionality between download(String url) and download(String url, + * String language) */ private static String dl(HttpsURLConnection con) throws IOException, ReCaptchaException { StringBuilder response = new StringBuilder(); BufferedReader in = null; try { - con.setConnectTimeout(30 * 1000);// 30s - con.setReadTimeout(30 * 1000);// 30s + con.setRequestMethod("GET"); - con.setRequestProperty("User-Agent", USER_AGENT); + setDefaults(con); - if (getCookies().length() > 0) { - con.setRequestProperty("Cookie", getCookies()); - } - - in = new BufferedReader( - new InputStreamReader(con.getInputStream())); + in = new BufferedReader(new InputStreamReader(con.getInputStream())); String inputLine; while ((inputLine = in.readLine()) != null) { response.append(inputLine); } - } catch (UnknownHostException uhe) {//thrown when there's no internet connection + } catch (UnknownHostException uhe) {// thrown when there's no internet + // connection throw new IOException("unknown host or no network", uhe); - //Toast.makeText(getActivity(), uhe.getMessage(), Toast.LENGTH_LONG).show(); + // Toast.makeText(getActivity(), uhe.getMessage(), + // Toast.LENGTH_LONG).show(); } catch (Exception e) { /* - * HTTP 429 == Too Many Request - * Receive from Youtube.com = ReCaptcha challenge request - * See : https://github.com/rg3/youtube-dl/issues/5138 + * HTTP 429 == Too Many Request Receive from Youtube.com = ReCaptcha challenge + * request See : https://github.com/rg3/youtube-dl/issues/5138 */ if (con.getResponseCode() == 429) { throw new ReCaptchaException("reCaptcha Challenge requested"); @@ -142,9 +142,25 @@ public class Downloader implements org.schabi.newpipe.extractor.Downloader { return response.toString(); } + private static void setDefaults(HttpsURLConnection con) { + + con.setConnectTimeout(30 * 1000);// 30s + con.setReadTimeout(30 * 1000);// 30s + + // set default user agent + if (null == con.getRequestProperty("User-Agent")) { + con.setRequestProperty("User-Agent", USER_AGENT); + } + + // add default cookies + if (getCookies().length() > 0) { + con.addRequestProperty("Cookie", getCookies()); + } + } + /** - * Download (via HTTP) the text file located at the supplied URL, and return its contents. - * Primarily intended for downloading web pages. + * Download (via HTTP) the text file located at the supplied URL, and return its + * contents. Primarily intended for downloading web pages. * * @param siteUrl the URL of the text file to download * @return the contents of the specified text file @@ -152,7 +168,57 @@ public class Downloader implements org.schabi.newpipe.extractor.Downloader { public String download(String siteUrl) throws IOException, ReCaptchaException { URL url = new URL(siteUrl); HttpsURLConnection con = (HttpsURLConnection) url.openConnection(); - //HttpsURLConnection con = NetCipher.getHttpsURLConnection(url); + // HttpsURLConnection con = NetCipher.getHttpsURLConnection(url); return dl(con); } + + @Override + public DownloadResponse get(String siteUrl, DownloadRequest request) + throws IOException, ReCaptchaException { + URL url = new URL(siteUrl); + HttpsURLConnection con = (HttpsURLConnection) url.openConnection(); + for (Map.Entry> pair : request.getRequestHeaders().entrySet()) { + for(String value: pair.getValue()) { + con.addRequestProperty(pair.getKey(), value); + } + } + String responseBody = dl(con); + return new DownloadResponse(responseBody, con.getHeaderFields()); + } + + @Override + public DownloadResponse get(String siteUrl) throws IOException, ReCaptchaException { + return get(siteUrl, DownloadRequest.emptyRequest); + } + + @Override + public DownloadResponse post(String siteUrl, DownloadRequest request) + throws IOException, ReCaptchaException { + URL url = new URL(siteUrl); + HttpsURLConnection con = (HttpsURLConnection) url.openConnection(); + con.setRequestMethod("POST"); + for (Map.Entry> pair : request.getRequestHeaders().entrySet()) { + for(String value: pair.getValue()) { + con.addRequestProperty(pair.getKey(), value); + } + } + // set fields to default if not set already + setDefaults(con); + + if(null != request.getRequestBody()) { + byte[] postDataBytes = request.getRequestBody().getBytes("UTF-8"); + con.setRequestProperty("Content-Length", String.valueOf(postDataBytes.length)); + con.setDoOutput(true); + con.getOutputStream().write(postDataBytes); + } + + StringBuilder sb = new StringBuilder(); + try (BufferedReader in = new BufferedReader(new InputStreamReader(con.getInputStream()))) { + String inputLine; + while ((inputLine = in.readLine()) != null) { + sb.append(inputLine); + } + } + return new DownloadResponse(sb.toString(), con.getHeaderFields()); + } } diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeCommentsExtractorTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeCommentsExtractorTest.java new file mode 100644 index 000000000..a4bb6a61e --- /dev/null +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeCommentsExtractorTest.java @@ -0,0 +1,93 @@ +package org.schabi.newpipe.extractor.services.youtube; + +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; +import static org.schabi.newpipe.extractor.ServiceList.YouTube; + +import java.io.IOException; +import java.util.List; + +import org.jsoup.helper.StringUtil; +import org.junit.BeforeClass; +import org.junit.Test; +import org.schabi.newpipe.Downloader; +import org.schabi.newpipe.extractor.ListExtractor.InfoItemsPage; +import org.schabi.newpipe.extractor.NewPipe; +import org.schabi.newpipe.extractor.comments.CommentsInfo; +import org.schabi.newpipe.extractor.comments.CommentsInfoItem; +import org.schabi.newpipe.extractor.exceptions.ExtractionException; +import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeCommentsExtractor; +import org.schabi.newpipe.extractor.utils.Localization; + +public class YoutubeCommentsExtractorTest { + + private static YoutubeCommentsExtractor extractor; + + @BeforeClass + public static void setUp() throws Exception { + NewPipe.init(Downloader.getInstance(), new Localization("GB", "en")); + extractor = (YoutubeCommentsExtractor) YouTube + .getCommentsExtractor("https://www.youtube.com/watch?v=rrgFN3AxGfs"); + } + + @Test + public void testGetComments() throws IOException, ExtractionException { + boolean result = false; + InfoItemsPage comments = extractor.getInitialPage(); + result = findInComments(comments, "i should really be in the top comment.lol"); + + while (comments.hasNextPage() && !result) { + comments = extractor.getPage(comments.getNextPageUrl()); + result = findInComments(comments, "i should really be in the top comment.lol"); + } + + assertTrue(result); + } + + @Test + public void testGetCommentsFromCommentsInfo() throws IOException, ExtractionException { + boolean result = false; + CommentsInfo commentsInfo = CommentsInfo.getInfo("https://www.youtube.com/watch?v=rrgFN3AxGfs"); + assertTrue("what the fuck am i doing with my life.wmv".equals(commentsInfo.getName())); + result = findInComments(commentsInfo.getRelatedItems(), "i should really be in the top comment.lol"); + + String nextPage = commentsInfo.getNextPageUrl(); + while (!StringUtil.isBlank(nextPage) && !result) { + InfoItemsPage moreItems = CommentsInfo.getMoreItems(YouTube, commentsInfo, nextPage); + result = findInComments(moreItems.getItems(), "i should really be in the top comment.lol"); + nextPage = moreItems.getNextPageUrl(); + } + + assertTrue(result); + } + + @Test + public void testGetCommentsAllData() throws IOException, ExtractionException { + InfoItemsPage comments = extractor.getInitialPage(); + for(CommentsInfoItem c: comments.getItems()) { + assertFalse(StringUtil.isBlank(c.getAuthorEndpoint())); + assertFalse(StringUtil.isBlank(c.getAuthorName())); + assertFalse(StringUtil.isBlank(c.getAuthorThumbnail())); + assertFalse(StringUtil.isBlank(c.getCommentId())); + assertFalse(StringUtil.isBlank(c.getCommentText())); + assertFalse(StringUtil.isBlank(c.getName())); + assertFalse(StringUtil.isBlank(c.getPublishedTime())); + assertFalse(StringUtil.isBlank(c.getThumbnailUrl())); + assertFalse(StringUtil.isBlank(c.getUrl())); + assertFalse(c.getLikeCount() == null); + } + } + + private boolean findInComments(InfoItemsPage comments, String comment) { + return findInComments(comments.getItems(), comment); + } + + private boolean findInComments(List comments, String comment) { + for(CommentsInfoItem c: comments) { + if(c.getCommentText().contains(comment)) { + return true; + } + } + return false; + } +} diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/utils/JsonUtilsTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/utils/JsonUtilsTest.java new file mode 100644 index 000000000..dc8f2b04b --- /dev/null +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/utils/JsonUtilsTest.java @@ -0,0 +1,47 @@ +package org.schabi.newpipe.extractor.utils; + + +import static org.junit.Assert.assertTrue; + +import java.util.List; + +import org.junit.Test; +import org.schabi.newpipe.extractor.exceptions.ParsingException; + +import com.grack.nanojson.JsonArray; +import com.grack.nanojson.JsonObject; +import com.grack.nanojson.JsonParser; +import com.grack.nanojson.JsonParserException; + + +public class JsonUtilsTest { + + @Test + public void testGetValueFlat() throws JsonParserException, ParsingException { + JsonObject obj = JsonParser.object().from("{\"name\":\"John\",\"age\":30,\"cars\":{\"car1\":\"Ford\",\"car2\":\"BMW\",\"car3\":\"Fiat\"}}"); + assertTrue("John".equals(JsonUtils.getValue(obj, "name"))); + } + + @Test + public void testGetValueNested() throws JsonParserException, ParsingException { + JsonObject obj = JsonParser.object().from("{\"name\":\"John\",\"age\":30,\"cars\":{\"car1\":\"Ford\",\"car2\":\"BMW\",\"car3\":\"Fiat\"}}"); + assertTrue("BMW".equals(JsonUtils.getValue(obj, "cars.car2"))); + } + + @Test + public void testGetArray() throws JsonParserException, ParsingException { + JsonObject obj = JsonParser.object().from("{\"id\":\"0001\",\"type\":\"donut\",\"name\":\"Cake\",\"ppu\":0.55,\"batters\":{\"batter\":[{\"id\":\"1001\",\"type\":\"Regular\"},{\"id\":\"1002\",\"type\":\"Chocolate\"},{\"id\":\"1003\",\"type\":\"Blueberry\"},{\"id\":\"1004\",\"type\":\"Devil's Food\"}]},\"topping\":[{\"id\":\"5001\",\"type\":\"None\"},{\"id\":\"5002\",\"type\":\"Glazed\"},{\"id\":\"5005\",\"type\":\"Sugar\"},{\"id\":\"5007\",\"type\":\"Powdered Sugar\"},{\"id\":\"5006\",\"type\":\"Chocolate with Sprinkles\"},{\"id\":\"5003\",\"type\":\"Chocolate\"},{\"id\":\"5004\",\"type\":\"Maple\"}]}"); + JsonArray arr = (JsonArray) JsonUtils.getValue(obj, "batters.batter"); + assertTrue(!arr.isEmpty()); + } + + @Test + public void testGetValues() throws JsonParserException, ParsingException { + JsonObject obj = JsonParser.object().from("{\"id\":\"0001\",\"type\":\"donut\",\"name\":\"Cake\",\"ppu\":0.55,\"batters\":{\"batter\":[{\"id\":\"1001\",\"type\":\"Regular\"},{\"id\":\"1002\",\"type\":\"Chocolate\"},{\"id\":\"1003\",\"type\":\"Blueberry\"},{\"id\":\"1004\",\"type\":\"Devil's Food\"}]},\"topping\":[{\"id\":\"5001\",\"type\":\"None\"},{\"id\":\"5002\",\"type\":\"Glazed\"},{\"id\":\"5005\",\"type\":\"Sugar\"},{\"id\":\"5007\",\"type\":\"Powdered Sugar\"},{\"id\":\"5006\",\"type\":\"Chocolate with Sprinkles\"},{\"id\":\"5003\",\"type\":\"Chocolate\"},{\"id\":\"5004\",\"type\":\"Maple\"}]}"); + JsonArray arr = (JsonArray) JsonUtils.getValue(obj, "topping"); + List types = JsonUtils.getValues(arr, "type"); + assertTrue(types.contains("Chocolate with Sprinkles")); + + } + +}