From 95575756eed4f7972729cfe476ab9e41ce87a19b Mon Sep 17 00:00:00 2001 From: Ritvik Saraf <13ritvik@gmail.com> Date: Sun, 2 Sep 2018 05:06:26 +0530 Subject: [PATCH] added http post method in downloader, formatting --- .../newpipe/extractor/DownloadResponse.java | 30 +- .../schabi/newpipe/extractor/Downloader.java | 69 +-- .../comments/CommentsInfoItemExtractor.java | 22 +- .../comments/CommentsInfoItemsCollector.java | 20 - .../extractors/YoutubeCommentsExtractor.java | 395 +++++++++--------- .../YoutubeCommentsLinkHandlerFactory.java | 40 +- .../java/org/schabi/newpipe/Downloader.java | 286 +++++++------ .../youtube/YoutubeCommentsExtractorTest.java | 44 +- 8 files changed, 442 insertions(+), 464 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/DownloadResponse.java b/extractor/src/main/java/org/schabi/newpipe/extractor/DownloadResponse.java index 8f91c8914..64fc2ade1 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/DownloadResponse.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/DownloadResponse.java @@ -4,23 +4,21 @@ import java.util.List; import java.util.Map; public class DownloadResponse { - private final String responseBody; - private final Map> responseHeaders; - - + private final String responseBody; + private final Map> responseHeaders; - public DownloadResponse(String responseBody, Map> headers) { - super(); - this.responseBody = responseBody; - this.responseHeaders = headers; - } + public DownloadResponse(String responseBody, Map> headers) { + super(); + this.responseBody = responseBody; + this.responseHeaders = headers; + } - public String getResponseBody() { - return responseBody; - } + public String getResponseBody() { + return responseBody; + } + + public Map> getResponseHeaders() { + return responseHeaders; + } - public Map> getResponseHeaders() { - return responseHeaders; - } - } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/Downloader.java b/extractor/src/main/java/org/schabi/newpipe/extractor/Downloader.java index 93d0a7c23..f0b6692d4 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/Downloader.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/Downloader.java @@ -28,41 +28,44 @@ import org.schabi.newpipe.extractor.exceptions.ReCaptchaException; public interface Downloader { - /** - * Download the text file at the supplied URL as in download(String), but set - * the HTTP header field "Accept-Language" to the supplied string. - * - * @param siteUrl the URL of the text file to return the contents of - * @param language the language (usually a 2-character code) to set as the - * preferred language - * @return the contents of the specified text file - * @throws IOException - */ - String download(String siteUrl, String language) throws IOException, ReCaptchaException; + /** + * Download the text file at the supplied URL as in download(String), but set + * the HTTP header field "Accept-Language" to the supplied string. + * + * @param siteUrl the URL of the text file to return the contents of + * @param language the language (usually a 2-character code) to set as the + * preferred language + * @return the contents of the specified text file + * @throws IOException + */ + String download(String siteUrl, String language) throws IOException, ReCaptchaException; - /** - * Download the text file at the supplied URL as in download(String), but set - * the HTTP header field "Accept-Language" to the supplied string. - * - * @param siteUrl the URL of the text file to return the contents of - * @param customProperties set request header properties - * @return the contents of the specified text file - * @throws IOException - */ - String download(String siteUrl, Map customProperties) throws IOException, ReCaptchaException; + /** + * Download the text file at the supplied URL as in download(String), but set + * the HTTP header field "Accept-Language" to the supplied string. + * + * @param siteUrl the URL of the text file to return the contents of + * @param customProperties set request header properties + * @return the contents of the specified text file + * @throws IOException + */ + String download(String siteUrl, Map customProperties) throws IOException, ReCaptchaException; - /** - * Download (via HTTP) the text file located at the supplied URL, and return its - * contents. Primarily intended for downloading web pages. - * - * @param siteUrl the URL of the text file to download - * @return the contents of the specified text file - * @throws IOException - */ - String download(String siteUrl) throws IOException, ReCaptchaException; + /** + * Download (via HTTP) the text file located at the supplied URL, and return its + * contents. Primarily intended for downloading web pages. + * + * @param siteUrl the URL of the text file to download + * @return the contents of the specified text file + * @throws IOException + */ + String download(String siteUrl) throws IOException, ReCaptchaException; - DownloadResponse downloadWithHeaders(String siteUrl, Map> requestHeaders) - throws IOException, ReCaptchaException; + DownloadResponse get(String siteUrl, Map> requestHeaders) + throws IOException, ReCaptchaException; - DownloadResponse downloadWithHeaders(String siteUrl) throws IOException, ReCaptchaException; + DownloadResponse get(String siteUrl) throws IOException, ReCaptchaException; + + DownloadResponse post(String siteUrl, String requestBody, Map> requestHeaders) + throws IOException, ReCaptchaException; } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfoItemExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfoItemExtractor.java index 32757e16d..b9905c2a7 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfoItemExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfoItemExtractor.java @@ -4,19 +4,11 @@ import org.schabi.newpipe.extractor.InfoItemExtractor; import org.schabi.newpipe.extractor.exceptions.ParsingException; public interface CommentsInfoItemExtractor extends InfoItemExtractor { - - String getCommentId() throws ParsingException; - - String getCommentText() throws ParsingException; - - String getAuthorName() throws ParsingException; - - String getAuthorThumbnail() throws ParsingException; - - String getAuthorEndpoint() throws ParsingException; - - String getPublishedTime() throws ParsingException; - - Integer getLikeCount() throws ParsingException; - + String getCommentId() throws ParsingException; + String getCommentText() throws ParsingException; + String getAuthorName() throws ParsingException; + String getAuthorThumbnail() throws ParsingException; + String getAuthorEndpoint() throws ParsingException; + String getPublishedTime() throws ParsingException; + Integer getLikeCount() throws ParsingException; } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfoItemsCollector.java b/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfoItemsCollector.java index 344ba8059..6bc925302 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfoItemsCollector.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfoItemsCollector.java @@ -7,26 +7,6 @@ import org.schabi.newpipe.extractor.InfoItem; import org.schabi.newpipe.extractor.InfoItemsCollector; import org.schabi.newpipe.extractor.exceptions.ParsingException; -/* - * Created by Christian Schabesberger on 28.02.16. - * - * Copyright (C) Christian Schabesberger 2016 - * CommentsInfoItemsCollector.java is part of NewPipe. - * - * NewPipe is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * NewPipe is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with NewPipe. If not, see . - */ - public class CommentsInfoItemsCollector extends InfoItemsCollector { public CommentsInfoItemsCollector(int serviceId) { diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java index 994f19332..defd579b3 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java @@ -1,20 +1,17 @@ package org.schabi.newpipe.extractor.services.youtube.extractors; -import java.io.BufferedReader; import java.io.IOException; -import java.io.InputStreamReader; import java.io.UnsupportedEncodingException; -import java.net.URL; import java.net.URLEncoder; +import java.util.Arrays; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Optional; -import javax.net.ssl.HttpsURLConnection; - import org.schabi.newpipe.extractor.DownloadResponse; import org.schabi.newpipe.extractor.Downloader; +import org.schabi.newpipe.extractor.NewPipe; import org.schabi.newpipe.extractor.StreamingService; import org.schabi.newpipe.extractor.comments.CommentsExtractor; import org.schabi.newpipe.extractor.comments.CommentsInfoItem; @@ -22,6 +19,7 @@ import org.schabi.newpipe.extractor.comments.CommentsInfoItemExtractor; import org.schabi.newpipe.extractor.comments.CommentsInfoItemsCollector; import org.schabi.newpipe.extractor.exceptions.ExtractionException; import org.schabi.newpipe.extractor.exceptions.ParsingException; +import org.schabi.newpipe.extractor.exceptions.ReCaptchaException; import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler; import com.fasterxml.jackson.databind.JsonNode; @@ -29,235 +27,222 @@ import com.fasterxml.jackson.databind.ObjectMapper; public class YoutubeCommentsExtractor extends CommentsExtractor { - private static final String USER_AGENT = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:61.0) Gecko/20100101 Firefox/61.0"; + private static final String USER_AGENT = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:61.0) Gecko/20100101 Firefox/61.0"; - private List cookies; - private String sessionToken; - private String commentsToken; + private List cookies; + private String sessionToken; + private String commentsToken; - private ObjectMapper mapper = new ObjectMapper(); + private ObjectMapper mapper = new ObjectMapper(); - public YoutubeCommentsExtractor(StreamingService service, ListLinkHandler uiHandler) { - super(service, uiHandler); - // TODO Auto-generated constructor stub - } + public YoutubeCommentsExtractor(StreamingService service, ListLinkHandler uiHandler) { + super(service, uiHandler); + // TODO Auto-generated constructor stub + } - @Override - public InfoItemsPage getInitialPage() throws IOException, ExtractionException { - // initial page does not load any comments but is required to get session token - // and cookies - return getPage(getNextPageUrl()); - } + @Override + public InfoItemsPage getInitialPage() throws IOException, ExtractionException { + // initial page does not load any comments but is required to get session token + // and cookies + return getPage(getNextPageUrl()); + } - @Override - public String getNextPageUrl() throws IOException, ExtractionException { - return getNextPageUrl(commentsToken); - } + @Override + public String getNextPageUrl() throws IOException, ExtractionException { + return getNextPageUrl(commentsToken); + } - private String getNextPageUrl(JsonNode ajaxJson) throws IOException, ExtractionException { - Optional element = Optional.ofNullable(ajaxJson.findValue("itemSectionContinuation")) - .map(e -> e.get("continuations")).map(e -> e.findValue("continuation")); + private String getNextPageUrl(JsonNode ajaxJson) throws IOException, ExtractionException { + Optional element = Optional.ofNullable(ajaxJson.findValue("itemSectionContinuation")) + .map(e -> e.get("continuations")).map(e -> e.findValue("continuation")); - if (element.isPresent()) { - return getNextPageUrl(element.get().asText()); - } else { - // no more comments - return ""; - } - } + if (element.isPresent()) { + return getNextPageUrl(element.get().asText()); + } else { + // no more comments + return ""; + } + } - private String getNextPageUrl(String continuation) throws ParsingException { - Map params = new HashMap<>(); - params.put("action_get_comments", "1"); - params.put("pbj", "1"); - params.put("ctoken", continuation); - params.put("continuation", continuation); - try { - return "https://www.youtube.com/comment_service_ajax?" + getDataString(params); - } catch (UnsupportedEncodingException e) { - throw new ParsingException("Could not get next page url", e); - } - } + private String getNextPageUrl(String continuation) throws ParsingException { + Map params = new HashMap<>(); + params.put("action_get_comments", "1"); + params.put("pbj", "1"); + params.put("ctoken", continuation); + params.put("continuation", continuation); + try { + return "https://www.youtube.com/comment_service_ajax?" + getDataString(params); + } catch (UnsupportedEncodingException e) { + throw new ParsingException("Could not get next page url", e); + } + } - @Override - public InfoItemsPage getPage(String pageUrl) throws IOException, ExtractionException { - if (pageUrl == null || pageUrl.isEmpty()) { - throw new ExtractionException(new IllegalArgumentException("Page url is empty or null")); - } - String ajaxResponse = makeAjaxRequest(pageUrl); - JsonNode ajaxJson = mapper.readTree(ajaxResponse); - CommentsInfoItemsCollector collector = new CommentsInfoItemsCollector(getServiceId()); - collectCommentsFrom(collector, ajaxJson, pageUrl); - return new InfoItemsPage<>(collector, getNextPageUrl(ajaxJson)); - } + @Override + public InfoItemsPage getPage(String pageUrl) throws IOException, ExtractionException { + if (pageUrl == null || pageUrl.isEmpty()) { + throw new ExtractionException(new IllegalArgumentException("Page url is empty or null")); + } + String ajaxResponse = makeAjaxRequest(pageUrl); + JsonNode ajaxJson = mapper.readTree(ajaxResponse); + CommentsInfoItemsCollector collector = new CommentsInfoItemsCollector(getServiceId()); + collectCommentsFrom(collector, ajaxJson, pageUrl); + return new InfoItemsPage<>(collector, getNextPageUrl(ajaxJson)); + } - private void collectCommentsFrom(CommentsInfoItemsCollector collector, JsonNode ajaxJson, String pageUrl) { - List comments = ajaxJson.findValues("commentRenderer"); - comments.stream().forEach(c -> { - CommentsInfoItemExtractor extractor = new CommentsInfoItemExtractor() { + private void collectCommentsFrom(CommentsInfoItemsCollector collector, JsonNode ajaxJson, String pageUrl) { + List comments = ajaxJson.findValues("commentRenderer"); + comments.stream().forEach(c -> { + CommentsInfoItemExtractor extractor = new CommentsInfoItemExtractor() { - @Override - public String getUrl() throws ParsingException { - return pageUrl; - } + @Override + public String getUrl() throws ParsingException { + return pageUrl; + } - @Override - public String getThumbnailUrl() throws ParsingException { - try { - return c.get("authorThumbnail").get("thumbnails").get(0).get("url").asText(); - } catch (Exception e) { - throw new ParsingException("Could not get thumbnail url", e); - } - } + @Override + public String getThumbnailUrl() throws ParsingException { + try { + return c.get("authorThumbnail").get("thumbnails").get(0).get("url").asText(); + } catch (Exception e) { + throw new ParsingException("Could not get thumbnail url", e); + } + } - @Override - public String getName() throws ParsingException { - try { - return c.get("authorText").get("simpleText").asText(); - } catch (Exception e) { - throw new ParsingException("Could not get thumbnail url", e); - } - } + @Override + public String getName() throws ParsingException { + try { + return c.get("authorText").get("simpleText").asText(); + } catch (Exception e) { + throw new ParsingException("Could not get thumbnail url", e); + } + } - @Override - public String getPublishedTime() throws ParsingException { - try { - return c.get("publishedTimeText").get("runs").get(0).get("text").asText(); - } catch (Exception e) { - throw new ParsingException("Could not get thumbnail url", e); - } - } + @Override + public String getPublishedTime() throws ParsingException { + try { + return c.get("publishedTimeText").get("runs").get(0).get("text").asText(); + } catch (Exception e) { + throw new ParsingException("Could not get thumbnail url", e); + } + } - @Override - public Integer getLikeCount() throws ParsingException { - try { - return c.get("likeCount").intValue(); - } catch (Exception e) { - throw new ParsingException("Could not get thumbnail url", e); - } - } + @Override + public Integer getLikeCount() throws ParsingException { + try { + return c.get("likeCount").intValue(); + } catch (Exception e) { + throw new ParsingException("Could not get thumbnail url", e); + } + } - @Override - public String getCommentText() throws ParsingException { - try { - if (null != c.get("contentText").get("simpleText")) { - return c.get("contentText").get("simpleText").asText(); - } else { - return c.get("contentText").get("runs").get(0).get("text").asText(); - } - } catch (Exception e) { - throw new ParsingException("Could not get thumbnail url", e); - } - } + @Override + public String getCommentText() throws ParsingException { + try { + if (null != c.get("contentText").get("simpleText")) { + return c.get("contentText").get("simpleText").asText(); + } else { + return c.get("contentText").get("runs").get(0).get("text").asText(); + } + } catch (Exception e) { + throw new ParsingException("Could not get thumbnail url", e); + } + } - @Override - public String getCommentId() throws ParsingException { - try { - return c.get("commentId").asText(); - } catch (Exception e) { - throw new ParsingException("Could not get thumbnail url", e); - } - } + @Override + public String getCommentId() throws ParsingException { + try { + return c.get("commentId").asText(); + } catch (Exception e) { + throw new ParsingException("Could not get thumbnail url", e); + } + } - @Override - public String getAuthorThumbnail() throws ParsingException { - try { - return c.get("authorThumbnail").get("thumbnails").get(0).get("url").asText(); - } catch (Exception e) { - throw new ParsingException("Could not get thumbnail url", e); - } - } + @Override + public String getAuthorThumbnail() throws ParsingException { + try { + return c.get("authorThumbnail").get("thumbnails").get(0).get("url").asText(); + } catch (Exception e) { + throw new ParsingException("Could not get thumbnail url", e); + } + } - @Override - public String getAuthorName() throws ParsingException { - try { - return c.get("authorText").get("simpleText").asText(); - } catch (Exception e) { - throw new ParsingException("Could not get thumbnail url", e); - } - } + @Override + public String getAuthorName() throws ParsingException { + try { + return c.get("authorText").get("simpleText").asText(); + } catch (Exception e) { + throw new ParsingException("Could not get thumbnail url", e); + } + } - @Override - public String getAuthorEndpoint() throws ParsingException { - try { - return "https://youtube.com" - + c.get("authorEndpoint").get("browseEndpoint").get("canonicalBaseUrl").asText(); - } catch (Exception e) { - throw new ParsingException("Could not get thumbnail url", e); - } - } - }; + @Override + public String getAuthorEndpoint() throws ParsingException { + try { + return "https://youtube.com" + + c.get("authorEndpoint").get("browseEndpoint").get("canonicalBaseUrl").asText(); + } catch (Exception e) { + throw new ParsingException("Could not get thumbnail url", e); + } + } + }; - collector.commit(extractor); - }); + collector.commit(extractor); + }); - } + } - @Override - public void onFetchPage(Downloader downloader) throws IOException, ExtractionException { - DownloadResponse response = downloader.downloadWithHeaders(getUrl()); - String responseBody = response.getResponseBody(); - cookies = response.getResponseHeaders().get("Set-Cookie"); - sessionToken = findValue(responseBody, "XSRF_TOKEN"); - commentsToken = findValue(responseBody, "COMMENTS_TOKEN"); - } + @Override + public void onFetchPage(Downloader downloader) throws IOException, ExtractionException { + DownloadResponse response = downloader.get(getUrl()); + String responseBody = response.getResponseBody(); + cookies = response.getResponseHeaders().get("Set-Cookie"); + sessionToken = findValue(responseBody, "XSRF_TOKEN"); + commentsToken = findValue(responseBody, "COMMENTS_TOKEN"); + } - @Override - public String getName() throws ParsingException { - // TODO Auto-generated method stub - return null; - } + @Override + public String getName() throws ParsingException { + // TODO Auto-generated method stub + return null; + } - private String makeAjaxRequest(String siteUrl) throws IOException { + private String makeAjaxRequest(String siteUrl) throws IOException, ReCaptchaException { - StringBuilder postData = new StringBuilder(); - postData.append(URLEncoder.encode("session_token", "UTF-8")); - postData.append('='); - postData.append(URLEncoder.encode(sessionToken, "UTF-8")); - byte[] postDataBytes = postData.toString().getBytes("UTF-8"); + StringBuilder postData = new StringBuilder(); + postData.append(URLEncoder.encode("session_token", "UTF-8")); + postData.append('='); + postData.append(URLEncoder.encode(sessionToken, "UTF-8")); - URL url = new URL(siteUrl); - HttpsURLConnection con = (HttpsURLConnection) url.openConnection(); - con.setRequestMethod("POST"); - con.setRequestProperty("Content-Type", "application/x-www-form-urlencoded"); - con.setRequestProperty("Content-Length", String.valueOf(postDataBytes.length)); - con.setRequestProperty("Accept", "*/*"); - con.setRequestProperty("User-Agent", USER_AGENT); - con.setRequestProperty("X-YouTube-Client-Version", "2.20180815"); - con.setRequestProperty("X-YouTube-Client-Name", "1"); - // set cookies - cookies.stream().forEach(c -> con.addRequestProperty("Cookie", c)); - con.setDoOutput(true); - con.getOutputStream().write(postDataBytes); + Map> requestHeaders = new HashMap<>(); + requestHeaders.put("Content-Type", Arrays.asList("application/x-www-form-urlencoded")); + requestHeaders.put("Accept", Arrays.asList("*/*")); + requestHeaders.put("User-Agent", Arrays.asList(USER_AGENT)); + requestHeaders.put("X-YouTube-Client-Version", Arrays.asList("2.20180815")); + requestHeaders.put("X-YouTube-Client-Name", Arrays.asList("1")); + requestHeaders.put("Cookie", cookies); - BufferedReader in = new BufferedReader(new InputStreamReader(con.getInputStream(), "UTF-8")); - StringBuilder sb = new StringBuilder(); - String inputLine; - while ((inputLine = in.readLine()) != null) { - sb.append(inputLine); - } - return sb.toString(); - } + return NewPipe.getDownloader().post(siteUrl, postData.toString(), requestHeaders).getResponseBody(); + } - private String getDataString(Map params) throws UnsupportedEncodingException { - StringBuilder result = new StringBuilder(); - boolean first = true; - for (Map.Entry entry : params.entrySet()) { - if (first) - first = false; - else - result.append("&"); - result.append(URLEncoder.encode(entry.getKey(), "UTF-8")); - result.append("="); - result.append(URLEncoder.encode(entry.getValue(), "UTF-8")); - } - return result.toString(); - } + private String getDataString(Map params) throws UnsupportedEncodingException { + StringBuilder result = new StringBuilder(); + boolean first = true; + for (Map.Entry entry : params.entrySet()) { + if (first) + first = false; + else + result.append("&"); + result.append(URLEncoder.encode(entry.getKey(), "UTF-8")); + result.append("="); + result.append(URLEncoder.encode(entry.getValue(), "UTF-8")); + } + return result.toString(); + } - private String findValue(String doc, String key) { - int beginIndex = doc.indexOf(key) + key.length() + 4; - int endIndex = doc.indexOf("\"", beginIndex); - return doc.substring(beginIndex, endIndex); - } + private String findValue(String doc, String key) { + int beginIndex = doc.indexOf(key) + key.length() + 4; + int endIndex = doc.indexOf("\"", beginIndex); + return doc.substring(beginIndex, endIndex); + } } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeCommentsLinkHandlerFactory.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeCommentsLinkHandlerFactory.java index fbdd63f1a..cfd4645dd 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeCommentsLinkHandlerFactory.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeCommentsLinkHandlerFactory.java @@ -1,16 +1,5 @@ package org.schabi.newpipe.extractor.services.youtube.linkHandler; -import org.schabi.newpipe.extractor.linkhandler.ListLinkHandlerFactory; -import org.jsoup.Jsoup; -import org.jsoup.nodes.Document; -import org.jsoup.nodes.Element; -import org.schabi.newpipe.extractor.Downloader; -import org.schabi.newpipe.extractor.NewPipe; -import org.schabi.newpipe.extractor.exceptions.FoundAdException; -import org.schabi.newpipe.extractor.exceptions.ParsingException; -import org.schabi.newpipe.extractor.exceptions.ReCaptchaException; -import org.schabi.newpipe.extractor.utils.Parser; - import java.io.IOException; import java.io.UnsupportedEncodingException; import java.net.URI; @@ -18,25 +7,16 @@ import java.net.URISyntaxException; import java.net.URLDecoder; import java.util.List; -/* - * Created by Christian Schabesberger on 25.07.16. - * - * Copyright (C) Christian Schabesberger 2018 - * YoutubeChannelLinkHandlerFactory.java is part of NewPipe. - * - * NewPipe is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * NewPipe is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with NewPipe. If not, see . - */ +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.schabi.newpipe.extractor.Downloader; +import org.schabi.newpipe.extractor.NewPipe; +import org.schabi.newpipe.extractor.exceptions.FoundAdException; +import org.schabi.newpipe.extractor.exceptions.ParsingException; +import org.schabi.newpipe.extractor.exceptions.ReCaptchaException; +import org.schabi.newpipe.extractor.linkhandler.ListLinkHandlerFactory; +import org.schabi.newpipe.extractor.utils.Parser; public class YoutubeCommentsLinkHandlerFactory extends ListLinkHandlerFactory { diff --git a/extractor/src/test/java/org/schabi/newpipe/Downloader.java b/extractor/src/test/java/org/schabi/newpipe/Downloader.java index fb0e38ccc..3ec65419d 100644 --- a/extractor/src/test/java/org/schabi/newpipe/Downloader.java +++ b/extractor/src/test/java/org/schabi/newpipe/Downloader.java @@ -36,144 +36,184 @@ import org.schabi.newpipe.extractor.exceptions.ReCaptchaException; public class Downloader implements org.schabi.newpipe.extractor.Downloader { - private static final String USER_AGENT = "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:43.0) Gecko/20100101 Firefox/43.0"; - private static String mCookies = ""; + private static final String USER_AGENT = "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:43.0) Gecko/20100101 Firefox/43.0"; + private static String mCookies = ""; - private static Downloader instance = null; + private static Downloader instance = null; - private Downloader() { - } + private Downloader() { + } - public static Downloader getInstance() { - if (instance == null) { - synchronized (Downloader.class) { - if (instance == null) { - instance = new Downloader(); - } - } - } - return instance; - } + public static Downloader getInstance() { + if (instance == null) { + synchronized (Downloader.class) { + if (instance == null) { + instance = new Downloader(); + } + } + } + return instance; + } - public static synchronized void setCookies(String cookies) { - Downloader.mCookies = cookies; - } + public static synchronized void setCookies(String cookies) { + Downloader.mCookies = cookies; + } - public static synchronized String getCookies() { - return Downloader.mCookies; - } + public static synchronized String getCookies() { + return Downloader.mCookies; + } - /** - * Download the text file at the supplied URL as in download(String), but set - * the HTTP header field "Accept-Language" to the supplied string. - * - * @param siteUrl the URL of the text file to return the contents of - * @param language the language (usually a 2-character code) to set as the - * preferred language - * @return the contents of the specified text file - */ - public String download(String siteUrl, String language) throws IOException, ReCaptchaException { - Map requestProperties = new HashMap<>(); - requestProperties.put("Accept-Language", language); - return download(siteUrl, requestProperties); - } + /** + * Download the text file at the supplied URL as in download(String), but set + * the HTTP header field "Accept-Language" to the supplied string. + * + * @param siteUrl the URL of the text file to return the contents of + * @param language the language (usually a 2-character code) to set as the + * preferred language + * @return the contents of the specified text file + */ + public String download(String siteUrl, String language) throws IOException, ReCaptchaException { + Map requestProperties = new HashMap<>(); + requestProperties.put("Accept-Language", language); + return download(siteUrl, requestProperties); + } - /** - * Download the text file at the supplied URL as in download(String), but set - * the HTTP header field "Accept-Language" to the supplied string. - * - * @param siteUrl the URL of the text file to return the contents of - * @param customProperties set request header properties - * @return the contents of the specified text file - * @throws IOException - */ - public String download(String siteUrl, Map customProperties) - throws IOException, ReCaptchaException { - URL url = new URL(siteUrl); - HttpsURLConnection con = (HttpsURLConnection) url.openConnection(); - for (Map.Entry pair : customProperties.entrySet()) { - con.setRequestProperty(pair.getKey(), pair.getValue()); - } - return dl(con); - } + /** + * Download the text file at the supplied URL as in download(String), but set + * the HTTP header field "Accept-Language" to the supplied string. + * + * @param siteUrl the URL of the text file to return the contents of + * @param customProperties set request header properties + * @return the contents of the specified text file + * @throws IOException + */ + public String download(String siteUrl, Map customProperties) + throws IOException, ReCaptchaException { + URL url = new URL(siteUrl); + HttpsURLConnection con = (HttpsURLConnection) url.openConnection(); + for (Map.Entry pair : customProperties.entrySet()) { + con.setRequestProperty(pair.getKey(), pair.getValue()); + } + return dl(con); + } - /** - * Common functionality between download(String url) and download(String url, - * String language) - */ - private static String dl(HttpsURLConnection con) throws IOException, ReCaptchaException { - StringBuilder response = new StringBuilder(); - BufferedReader in = null; + /** + * Common functionality between download(String url) and download(String url, + * String language) + */ + private static String dl(HttpsURLConnection con) throws IOException, ReCaptchaException { + StringBuilder response = new StringBuilder(); + BufferedReader in = null; - try { - con.setConnectTimeout(30 * 1000);// 30s - con.setReadTimeout(30 * 1000);// 30s - con.setRequestMethod("GET"); - con.setRequestProperty("User-Agent", USER_AGENT); + try { - if (getCookies().length() > 0) { - con.addRequestProperty("Cookie", getCookies()); - } + con.setRequestMethod("GET"); + setDefaults(con); - in = new BufferedReader(new InputStreamReader(con.getInputStream())); - String inputLine; + in = new BufferedReader(new InputStreamReader(con.getInputStream())); + String inputLine; - while ((inputLine = in.readLine()) != null) { - response.append(inputLine); - } - } catch (UnknownHostException uhe) {// thrown when there's no internet connection - throw new IOException("unknown host or no network", uhe); - // Toast.makeText(getActivity(), uhe.getMessage(), Toast.LENGTH_LONG).show(); - } catch (Exception e) { - /* - * HTTP 429 == Too Many Request Receive from Youtube.com = ReCaptcha challenge - * request See : https://github.com/rg3/youtube-dl/issues/5138 - */ - if (con.getResponseCode() == 429) { - throw new ReCaptchaException("reCaptcha Challenge requested"); - } + while ((inputLine = in.readLine()) != null) { + response.append(inputLine); + } + } catch (UnknownHostException uhe) {// thrown when there's no internet + // connection + throw new IOException("unknown host or no network", uhe); + // Toast.makeText(getActivity(), uhe.getMessage(), + // Toast.LENGTH_LONG).show(); + } catch (Exception e) { + /* + * HTTP 429 == Too Many Request Receive from Youtube.com = ReCaptcha challenge + * request See : https://github.com/rg3/youtube-dl/issues/5138 + */ + if (con.getResponseCode() == 429) { + throw new ReCaptchaException("reCaptcha Challenge requested"); + } - throw new IOException(con.getResponseCode() + " " + con.getResponseMessage(), e); - } finally { - if (in != null) { - in.close(); - } - } + throw new IOException(con.getResponseCode() + " " + con.getResponseMessage(), e); + } finally { + if (in != null) { + in.close(); + } + } - return response.toString(); - } + return response.toString(); + } - /** - * Download (via HTTP) the text file located at the supplied URL, and return its - * contents. Primarily intended for downloading web pages. - * - * @param siteUrl the URL of the text file to download - * @return the contents of the specified text file - */ - public String download(String siteUrl) throws IOException, ReCaptchaException { - URL url = new URL(siteUrl); - HttpsURLConnection con = (HttpsURLConnection) url.openConnection(); - // HttpsURLConnection con = NetCipher.getHttpsURLConnection(url); - return dl(con); - } + private static void setDefaults(HttpsURLConnection con) { - @Override - public DownloadResponse downloadWithHeaders(String siteUrl, Map> requestHeaders) - throws IOException, ReCaptchaException { - URL url = new URL(siteUrl); - HttpsURLConnection con = (HttpsURLConnection) url.openConnection(); - for (Map.Entry> pair : requestHeaders.entrySet()) { - pair.getValue().stream().forEach(value -> con.addRequestProperty(pair.getKey(), value)); - } - String responseBody = dl(con); - return new DownloadResponse(responseBody, con.getHeaderFields()); - } + con.setConnectTimeout(30 * 1000);// 30s + con.setReadTimeout(30 * 1000);// 30s - @Override - public DownloadResponse downloadWithHeaders(String siteUrl) throws IOException, ReCaptchaException { - URL url = new URL(siteUrl); - HttpsURLConnection con = (HttpsURLConnection) url.openConnection(); - String responseBody = dl(con); - return new DownloadResponse(responseBody, con.getHeaderFields()); - } + // set default user agent + if (null == con.getRequestProperty("User-Agent")) { + con.setRequestProperty("User-Agent", USER_AGENT); + } + + // add default cookies + if (getCookies().length() > 0) { + con.addRequestProperty("Cookie", getCookies()); + } + } + + /** + * Download (via HTTP) the text file located at the supplied URL, and return its + * contents. Primarily intended for downloading web pages. + * + * @param siteUrl the URL of the text file to download + * @return the contents of the specified text file + */ + public String download(String siteUrl) throws IOException, ReCaptchaException { + URL url = new URL(siteUrl); + HttpsURLConnection con = (HttpsURLConnection) url.openConnection(); + // HttpsURLConnection con = NetCipher.getHttpsURLConnection(url); + return dl(con); + } + + @Override + public DownloadResponse get(String siteUrl, Map> requestHeaders) + throws IOException, ReCaptchaException { + URL url = new URL(siteUrl); + HttpsURLConnection con = (HttpsURLConnection) url.openConnection(); + for (Map.Entry> pair : requestHeaders.entrySet()) { + pair.getValue().stream().forEach(value -> con.addRequestProperty(pair.getKey(), value)); + } + String responseBody = dl(con); + return new DownloadResponse(responseBody, con.getHeaderFields()); + } + + @Override + public DownloadResponse get(String siteUrl) throws IOException, ReCaptchaException { + URL url = new URL(siteUrl); + HttpsURLConnection con = (HttpsURLConnection) url.openConnection(); + String responseBody = dl(con); + return new DownloadResponse(responseBody, con.getHeaderFields()); + } + + @Override + public DownloadResponse post(String siteUrl, String requestBody, Map> requestHeaders) + throws IOException, ReCaptchaException { + URL url = new URL(siteUrl); + HttpsURLConnection con = (HttpsURLConnection) url.openConnection(); + con.setRequestMethod("POST"); + for (Map.Entry> pair : requestHeaders.entrySet()) { + pair.getValue().stream().forEach(value -> con.addRequestProperty(pair.getKey(), value)); + } + // set fields to default if not set already + setDefaults(con); + + byte[] postDataBytes = requestBody.toString().getBytes("UTF-8"); + con.setRequestProperty("Content-Length", String.valueOf(postDataBytes.length)); + + con.setDoOutput(true); + con.getOutputStream().write(postDataBytes); + + BufferedReader in = new BufferedReader(new InputStreamReader(con.getInputStream())); + StringBuilder sb = new StringBuilder(); + String inputLine; + while ((inputLine = in.readLine()) != null) { + sb.append(inputLine); + } + return new DownloadResponse(sb.toString(), con.getHeaderFields()); + } } diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeCommentsExtractorTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeCommentsExtractorTest.java index 468c12ce5..c5789e7ff 100644 --- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeCommentsExtractorTest.java +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeCommentsExtractorTest.java @@ -16,31 +16,31 @@ import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeCommentsE public class YoutubeCommentsExtractorTest { - private static YoutubeCommentsExtractor extractor; + private static YoutubeCommentsExtractor extractor; - @BeforeClass - public static void setUp() throws Exception { - NewPipe.init(Downloader.getInstance()); - extractor = (YoutubeCommentsExtractor) YouTube - .getCommentsExtractor("https://www.youtube.com/watch?v=rrgFN3AxGfs"); - extractor.fetchPage(); - } + @BeforeClass + public static void setUp() throws Exception { + NewPipe.init(Downloader.getInstance()); + extractor = (YoutubeCommentsExtractor) YouTube + .getCommentsExtractor("https://www.youtube.com/watch?v=rrgFN3AxGfs"); + extractor.fetchPage(); + } - @Test - public void testGetComments() throws IOException, ExtractionException { - boolean result = false; - InfoItemsPage comments = extractor.getInitialPage(); - result = findInComments(comments, "i should really be in the top comment.lol"); + @Test + public void testGetComments() throws IOException, ExtractionException { + boolean result = false; + InfoItemsPage comments = extractor.getInitialPage(); + result = findInComments(comments, "i should really be in the top comment.lol"); - while (comments.hasNextPage()) { - comments = extractor.getPage(comments.getNextPageUrl()); - result = findInComments(comments, "i should really be in the top comment.lol"); - } + while (comments.hasNextPage()) { + comments = extractor.getPage(comments.getNextPageUrl()); + result = findInComments(comments, "i should really be in the top comment.lol"); + } - assertTrue(result); - } + assertTrue(result); + } - private boolean findInComments(InfoItemsPage comments, String comment) { - return comments.getItems().stream().filter(c -> c.getCommentText().contains(comment)).findAny().isPresent(); - } + private boolean findInComments(InfoItemsPage comments, String comment) { + return comments.getItems().stream().filter(c -> c.getCommentText().contains(comment)).findAny().isPresent(); + } }