From 3441946bea9c172a4c96dd6deae450cecab875a4 Mon Sep 17 00:00:00 2001 From: Mauricio Colli Date: Sat, 29 Feb 2020 17:52:12 -0300 Subject: [PATCH 01/11] Make test downloader return a response instead of throwing an exception The test implementation was throwing an exception instead of just returning the response and letting the caller handle it. --- .../test/java/org/schabi/newpipe/DownloaderTestImpl.java | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/extractor/src/test/java/org/schabi/newpipe/DownloaderTestImpl.java b/extractor/src/test/java/org/schabi/newpipe/DownloaderTestImpl.java index e524ac8d4..8d7a34370 100644 --- a/extractor/src/test/java/org/schabi/newpipe/DownloaderTestImpl.java +++ b/extractor/src/test/java/org/schabi/newpipe/DownloaderTestImpl.java @@ -102,16 +102,20 @@ public class DownloaderTestImpl extends Downloader { return new Response(responseCode, responseMessage, responseHeaders, response.toString()); } catch (Exception e) { + final int responseCode = connection.getResponseCode(); + /* * HTTP 429 == Too Many Request * Receive from Youtube.com = ReCaptcha challenge request * See : https://github.com/rg3/youtube-dl/issues/5138 */ - if (connection.getResponseCode() == 429) { + if (responseCode == 429) { throw new ReCaptchaException("reCaptcha Challenge requested", url); + } else if (responseCode != -1) { + return new Response(responseCode, connection.getResponseMessage(), connection.getHeaderFields(), null); } - throw new IOException(connection.getResponseCode() + " " + connection.getResponseMessage(), e); + throw new IOException("Error occurred while fetching the content", e); } finally { if (outputStream != null) outputStream.close(); if (input != null) input.close(); From 5edd774fc44b037608840e1e968aa72a581040a8 Mon Sep 17 00:00:00 2001 From: Mauricio Colli Date: Sat, 29 Feb 2020 17:57:23 -0300 Subject: [PATCH 02/11] Add latest url to the response to make detection of a redirect possible Will be latest one in this commit because there's need to check the history of redirects as of now. --- .../newpipe/extractor/downloader/Response.java | 16 +++++++++++++++- .../org/schabi/newpipe/DownloaderTestImpl.java | 6 ++++-- 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/downloader/Response.java b/extractor/src/main/java/org/schabi/newpipe/extractor/downloader/Response.java index 59c3911cc..5fa432c28 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/downloader/Response.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/downloader/Response.java @@ -15,12 +15,16 @@ public class Response { private final Map> responseHeaders; private final String responseBody; - public Response(int responseCode, String responseMessage, Map> responseHeaders, @Nullable String responseBody) { + private final String latestUrl; + + public Response(int responseCode, String responseMessage, Map> responseHeaders, + @Nullable String responseBody, @Nullable String latestUrl) { this.responseCode = responseCode; this.responseMessage = responseMessage; this.responseHeaders = responseHeaders != null ? responseHeaders : Collections.>emptyMap(); this.responseBody = responseBody == null ? "" : responseBody; + this.latestUrl = latestUrl; } public int responseCode() { @@ -40,6 +44,16 @@ public class Response { return responseBody; } + /** + * Used for detecting a possible redirection, limited to the latest one. + * + * @return latest url known right before this response object was created + */ + @Nonnull + public String latestUrl() { + return latestUrl; + } + /*////////////////////////////////////////////////////////////////////////// // Utils //////////////////////////////////////////////////////////////////////////*/ diff --git a/extractor/src/test/java/org/schabi/newpipe/DownloaderTestImpl.java b/extractor/src/test/java/org/schabi/newpipe/DownloaderTestImpl.java index 8d7a34370..c93f31b76 100644 --- a/extractor/src/test/java/org/schabi/newpipe/DownloaderTestImpl.java +++ b/extractor/src/test/java/org/schabi/newpipe/DownloaderTestImpl.java @@ -99,8 +99,9 @@ public class DownloaderTestImpl extends Downloader { final int responseCode = connection.getResponseCode(); final String responseMessage = connection.getResponseMessage(); final Map> responseHeaders = connection.getHeaderFields(); + final String latestUrl = connection.getURL().toString(); - return new Response(responseCode, responseMessage, responseHeaders, response.toString()); + return new Response(responseCode, responseMessage, responseHeaders, response.toString(), latestUrl); } catch (Exception e) { final int responseCode = connection.getResponseCode(); @@ -112,7 +113,8 @@ public class DownloaderTestImpl extends Downloader { if (responseCode == 429) { throw new ReCaptchaException("reCaptcha Challenge requested", url); } else if (responseCode != -1) { - return new Response(responseCode, connection.getResponseMessage(), connection.getHeaderFields(), null); + final String latestUrl = connection.getURL().toString(); + return new Response(responseCode, connection.getResponseMessage(), connection.getHeaderFields(), null, latestUrl); } throw new IOException("Error occurred while fetching the content", e); From f66c20de54418b464d529bebf0a49f66a1f27e74 Mon Sep 17 00:00:00 2001 From: Mauricio Colli Date: Sat, 29 Feb 2020 17:59:51 -0300 Subject: [PATCH 03/11] Ignore null-keyed entries when iterating through the response headers --- .../java/org/schabi/newpipe/extractor/downloader/Response.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/downloader/Response.java b/extractor/src/main/java/org/schabi/newpipe/extractor/downloader/Response.java index 5fa432c28..b252d4638 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/downloader/Response.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/downloader/Response.java @@ -68,7 +68,8 @@ public class Response { @Nullable public String getHeader(String name) { for (Map.Entry> headerEntry : responseHeaders.entrySet()) { - if (headerEntry.getKey().equalsIgnoreCase(name)) { + final String key = headerEntry.getKey(); + if (key != null && key.equalsIgnoreCase(name)) { if (headerEntry.getValue().size() > 0) { return headerEntry.getValue().get(0); } From da2c562eb0fe5244e6078b8d6d038d0341dddd32 Mon Sep 17 00:00:00 2001 From: Mauricio Colli Date: Sat, 29 Feb 2020 18:01:43 -0300 Subject: [PATCH 04/11] [YouTube] Take into account videos that have their views hidden Also remove catch because it may hide bugs when the page changes. --- .../extractors/YoutubeStreamInfoItemExtractor.java | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamInfoItemExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamInfoItemExtractor.java index 2ee89e245..0ed79eecb 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamInfoItemExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamInfoItemExtractor.java @@ -194,11 +194,16 @@ public class YoutubeStreamInfoItemExtractor implements StreamInfoItemExtractor { if (videoInfo.getObject("topStandaloneBadge") != null || isPremium()) { return -1; } - String viewCount = getTextFromObject(videoInfo.getObject("viewCountText")); + + final JsonObject viewCountObject = videoInfo.getObject("viewCountText"); + if (viewCountObject == null) { + // This object is null when a video has its views hidden. + return -1; + } + + final String viewCount = getTextFromObject(viewCountObject); return Long.parseLong(Utils.removeNonDigitCharacters(viewCount)); - } catch (NumberFormatException e) { - return -1; } catch (Exception e) { throw new ParsingException("Could not get view count", e); } From 2a470ac4f740f9232e8fe7a7ec0adab7b90bf659 Mon Sep 17 00:00:00 2001 From: Mauricio Colli Date: Sat, 29 Feb 2020 18:02:00 -0300 Subject: [PATCH 05/11] [YouTube] Handle videos with no views or with "Recommended to you" text --- .../youtube/extractors/YoutubeStreamInfoItemExtractor.java | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamInfoItemExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamInfoItemExtractor.java index 0ed79eecb..b9d61046e 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamInfoItemExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamInfoItemExtractor.java @@ -203,6 +203,12 @@ public class YoutubeStreamInfoItemExtractor implements StreamInfoItemExtractor { final String viewCount = getTextFromObject(viewCountObject); + if (viewCount.toLowerCase().contains("no views")) { + return 0; + } else if (viewCount.toLowerCase().contains("recommended")) { + return -1; + } + return Long.parseLong(Utils.removeNonDigitCharacters(viewCount)); } catch (Exception e) { throw new ParsingException("Could not get view count", e); From e9644e6216c9aaa20a6fefa4a0c94b471c09b1bf Mon Sep 17 00:00:00 2001 From: Mauricio Colli Date: Sat, 29 Feb 2020 18:19:34 -0300 Subject: [PATCH 06/11] [YouTube] Handle video premiere's date and duration --- .../YoutubeStreamInfoItemExtractor.java | 54 ++++++++++++++++--- 1 file changed, 46 insertions(+), 8 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamInfoItemExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamInfoItemExtractor.java index b9d61046e..be2aaf97e 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamInfoItemExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamInfoItemExtractor.java @@ -2,7 +2,6 @@ package org.schabi.newpipe.extractor.services.youtube.extractors; import com.grack.nanojson.JsonArray; import com.grack.nanojson.JsonObject; - import org.schabi.newpipe.extractor.exceptions.ParsingException; import org.schabi.newpipe.extractor.localization.DateWrapper; import org.schabi.newpipe.extractor.localization.TimeAgoParser; @@ -13,10 +12,11 @@ import org.schabi.newpipe.extractor.stream.StreamType; import org.schabi.newpipe.extractor.utils.Utils; import javax.annotation.Nullable; +import java.text.SimpleDateFormat; +import java.util.Calendar; +import java.util.Date; -import static org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper.fixThumbnailUrl; -import static org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper.getTextFromObject; -import static org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper.getUrlFromNavigationEndpoint; +import static org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper.*; /* * Copyright (C) Christian Schabesberger 2016 @@ -86,7 +86,9 @@ public class YoutubeStreamInfoItemExtractor implements StreamInfoItemExtractor { @Override public long getDuration() throws ParsingException { - if (getStreamType() == StreamType.LIVE_STREAM) return -1; + if (getStreamType() == StreamType.LIVE_STREAM || isPremiere()) { + return -1; + } String duration = null; @@ -165,7 +167,16 @@ public class YoutubeStreamInfoItemExtractor implements StreamInfoItemExtractor { @Nullable @Override - public String getTextualUploadDate() { + public String getTextualUploadDate() throws ParsingException { + if (getStreamType().equals(StreamType.LIVE_STREAM)) { + return null; + } + + if (isPremiere()) { + final Date date = getDateFromPremiere().getTime(); + return new SimpleDateFormat("yyyy-MM-dd HH:mm").format(date); + } + try { return getTextFromObject(videoInfo.getObject("publishedTimeText")); } catch (Exception e) { @@ -177,7 +188,15 @@ public class YoutubeStreamInfoItemExtractor implements StreamInfoItemExtractor { @Nullable @Override public DateWrapper getUploadDate() throws ParsingException { - String textualUploadDate = getTextualUploadDate(); + if (getStreamType().equals(StreamType.LIVE_STREAM)) { + return null; + } + + if (isPremiere()) { + return new DateWrapper(getDateFromPremiere()); + } + + final String textualUploadDate = getTextualUploadDate(); if (timeAgoParser != null && textualUploadDate != null && !textualUploadDate.isEmpty()) { try { return timeAgoParser.parse(textualUploadDate); @@ -236,7 +255,26 @@ public class YoutubeStreamInfoItemExtractor implements StreamInfoItemExtractor { return true; } } - } catch (Exception ignored) {} + } catch (Exception ignored) { + } return false; } + + private boolean isPremiere() { + return videoInfo.has("upcomingEventData"); + } + + private Calendar getDateFromPremiere() throws ParsingException { + final JsonObject upcomingEventData = videoInfo.getObject("upcomingEventData"); + final String startTime = upcomingEventData.getString("startTime"); + + try { + final long startTimeTimestamp = Long.parseLong(startTime); + final Calendar calendar = Calendar.getInstance(); + calendar.setTime(new Date(startTimeTimestamp * 1000L)); + return calendar; + } catch (Exception e) { + throw new ParsingException("Could not parse date from premiere: \"" + startTime + "\""); + } + } } From 342bdbb85256a510a7c9b4aeeea148d8a34481bf Mon Sep 17 00:00:00 2001 From: Mauricio Colli Date: Sat, 29 Feb 2020 18:42:43 -0300 Subject: [PATCH 07/11] [YouTube] Avoid crashing by letting exceptions bubble up --- .../linkHandler/YoutubeParsingHelper.java | 126 ++++++++---------- .../youtube/YoutubeParsingHelperTest.java | 3 +- 2 files changed, 58 insertions(+), 71 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeParsingHelper.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeParsingHelper.java index 1c603f5b2..5b37e4c9d 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeParsingHelper.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeParsingHelper.java @@ -5,7 +5,6 @@ import com.grack.nanojson.JsonArray; import com.grack.nanojson.JsonObject; import com.grack.nanojson.JsonParser; import com.grack.nanojson.JsonParserException; - import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.schabi.newpipe.extractor.downloader.Response; @@ -22,12 +21,7 @@ import java.net.URL; import java.net.URLDecoder; import java.text.ParseException; import java.text.SimpleDateFormat; -import java.util.Calendar; -import java.util.Collections; -import java.util.Date; -import java.util.HashMap; -import java.util.List; -import java.util.Map; +import java.util.*; import static org.schabi.newpipe.extractor.NewPipe.getDownloader; import static org.schabi.newpipe.extractor.utils.Utils.HTTP; @@ -177,21 +171,16 @@ public class YoutubeParsingHelper { } } - public static boolean isHardcodedClientVersionValid() throws IOException { - try { - final String url = "https://www.youtube.com/results?search_query=test&pbj=1"; + public static boolean isHardcodedClientVersionValid() throws IOException, ExtractionException { + final String url = "https://www.youtube.com/results?search_query=test&pbj=1"; - Map> headers = new HashMap<>(); - headers.put("X-YouTube-Client-Name", Collections.singletonList("1")); - headers.put("X-YouTube-Client-Version", - Collections.singletonList(HARDCODED_CLIENT_VERSION)); - final String response = getDownloader().get(url, headers).responseBody(); - if (response.length() > 50) { // ensure to have a valid response - return true; - } - } catch (ReCaptchaException ignored) {} + Map> headers = new HashMap<>(); + headers.put("X-YouTube-Client-Name", Collections.singletonList("1")); + headers.put("X-YouTube-Client-Version", + Collections.singletonList(HARDCODED_CLIENT_VERSION)); + final String response = getDownloader().get(url, headers).responseBody(); - return false; + return response.length() > 50; // ensure to have a valid response } /** @@ -199,7 +188,7 @@ public class YoutubeParsingHelper { * @return * @throws ParsingException */ - public static String getClientVersion() throws ParsingException, IOException { + public static String getClientVersion() throws IOException, ExtractionException { if (clientVersion != null && !clientVersion.isEmpty()) return clientVersion; if (isHardcodedClientVersionValid()) { @@ -207,62 +196,59 @@ public class YoutubeParsingHelper { return clientVersion; } - // Try extracting it from YouTube's website otherwise - try { - final String url = "https://www.youtube.com/results?search_query=test"; - final String html = getDownloader().get(url).responseBody(); - JsonObject initialData = getInitialData(html); - JsonArray serviceTrackingParams = initialData.getObject("responseContext").getArray("serviceTrackingParams"); - String shortClientVersion = null; + final String url = "https://www.youtube.com/results?search_query=test"; + final String html = getDownloader().get(url).responseBody(); + JsonObject initialData = getInitialData(html); + JsonArray serviceTrackingParams = initialData.getObject("responseContext").getArray("serviceTrackingParams"); + String shortClientVersion = null; - // try to get version from initial data first - for (Object service : serviceTrackingParams) { - JsonObject s = (JsonObject) service; - if (s.getString("service").equals("CSI")) { - JsonArray params = s.getArray("params"); - for (Object param : params) { - JsonObject p = (JsonObject) param; - String key = p.getString("key"); - if (key != null && key.equals("cver")) { - clientVersion = p.getString("value"); - return clientVersion; - } - } - } else if (s.getString("service").equals("ECATCHER")) { - // fallback to get a shortened client version which does not contain the last two digits - JsonArray params = s.getArray("params"); - for (Object param : params) { - JsonObject p = (JsonObject) param; - String key = p.getString("key"); - if (key != null && key.equals("client.version")) { - shortClientVersion = p.getString("value"); - } - } - } - } - - String contextClientVersion; - String[] patterns = { - "INNERTUBE_CONTEXT_CLIENT_VERSION\":\"([0-9\\.]+?)\"", - "innertube_context_client_version\":\"([0-9\\.]+?)\"", - "client.version=([0-9\\.]+)" - }; - for (String pattern : patterns) { - try { - contextClientVersion = Parser.matchGroup1(pattern, html); - if (contextClientVersion != null && !contextClientVersion.isEmpty()) { - clientVersion = contextClientVersion; + // try to get version from initial data first + for (Object service : serviceTrackingParams) { + JsonObject s = (JsonObject) service; + if (s.getString("service").equals("CSI")) { + JsonArray params = s.getArray("params"); + for (Object param : params) { + JsonObject p = (JsonObject) param; + String key = p.getString("key"); + if (key != null && key.equals("cver")) { + clientVersion = p.getString("value"); return clientVersion; } - } catch (Exception ignored) { + } + } else if (s.getString("service").equals("ECATCHER")) { + // fallback to get a shortened client version which does not contain the last two digits + JsonArray params = s.getArray("params"); + for (Object param : params) { + JsonObject p = (JsonObject) param; + String key = p.getString("key"); + if (key != null && key.equals("client.version")) { + shortClientVersion = p.getString("value"); + } } } + } - if (shortClientVersion != null) { - clientVersion = shortClientVersion; - return clientVersion; + String contextClientVersion; + String[] patterns = { + "INNERTUBE_CONTEXT_CLIENT_VERSION\":\"([0-9\\.]+?)\"", + "innertube_context_client_version\":\"([0-9\\.]+?)\"", + "client.version=([0-9\\.]+)" + }; + for (String pattern : patterns) { + try { + contextClientVersion = Parser.matchGroup1(pattern, html); + if (contextClientVersion != null && !contextClientVersion.isEmpty()) { + clientVersion = contextClientVersion; + return clientVersion; + } + } catch (Exception ignored) { } - } catch (Exception ignored) {} + } + + if (shortClientVersion != null) { + clientVersion = shortClientVersion; + return clientVersion; + } throw new ParsingException("Could not get client version"); } diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeParsingHelperTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeParsingHelperTest.java index f8ff12358..87dbbd750 100644 --- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeParsingHelperTest.java +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeParsingHelperTest.java @@ -4,6 +4,7 @@ import org.junit.BeforeClass; import org.junit.Test; import org.schabi.newpipe.DownloaderTestImpl; import org.schabi.newpipe.extractor.NewPipe; +import org.schabi.newpipe.extractor.exceptions.ExtractionException; import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper; import java.io.IOException; @@ -17,7 +18,7 @@ public class YoutubeParsingHelperTest { } @Test - public void testIsHardcodedClientVersionValid() throws IOException { + public void testIsHardcodedClientVersionValid() throws IOException, ExtractionException { assertTrue("Hardcoded client version is not valid anymore", YoutubeParsingHelper.isHardcodedClientVersionValid()); } From 408f0421276da45410af1206e7de761865829f84 Mon Sep 17 00:00:00 2001 From: Mauricio Colli Date: Sat, 29 Feb 2020 18:57:25 -0300 Subject: [PATCH 08/11] [YouTube] Fix bug when url isn't present in the browseEndpoint object --- .../extractors/YoutubeSearchExtractor.java | 4 ++-- .../extractors/YoutubeStreamExtractor.java | 8 +++++-- .../linkHandler/YoutubeParsingHelper.java | 21 +++++++++++++++---- .../youtube/YoutubePlaylistExtractorTest.java | 2 +- .../YoutubeSearchExtractorDefaultTest.java | 2 +- 5 files changed, 27 insertions(+), 10 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeSearchExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeSearchExtractor.java index 0449a9dbe..6fec11643 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeSearchExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeSearchExtractor.java @@ -63,7 +63,7 @@ public class YoutubeSearchExtractor extends SearchExtractor { } @Override - public String getSearchSuggestion() { + public String getSearchSuggestion() throws ParsingException { JsonObject showingResultsForRenderer = initialData.getObject("contents") .getObject("twoColumnSearchResultsRenderer").getObject("primaryContents") .getObject("sectionListRenderer").getArray("contents").getObject(0) @@ -114,7 +114,7 @@ public class YoutubeSearchExtractor extends SearchExtractor { return new InfoItemsPage<>(collector, getNextPageUrlFrom(itemSectionRenderer.getArray("continuations"))); } - private void collectStreamsFrom(InfoItemsSearchCollector collector, JsonArray videos) throws NothingFoundException { + private void collectStreamsFrom(InfoItemsSearchCollector collector, JsonArray videos) throws NothingFoundException, ParsingException { collector.reset(); final TimeAgoParser timeAgoParser = getTimeAgoParser(); diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java index 3a66dd3cb..6aad8cf20 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java @@ -564,8 +564,12 @@ public class YoutubeStreamExtractor extends StreamExtractor { */ @Override public String getErrorMessage() { - return getTextFromObject(initialAjaxJson.getObject(2).getObject("playerResponse").getObject("playabilityStatus") - .getObject("errorScreen").getObject("playerErrorMessageRenderer").getObject("reason")); + try { + return getTextFromObject(initialAjaxJson.getObject(2).getObject("playerResponse").getObject("playabilityStatus") + .getObject("errorScreen").getObject("playerErrorMessageRenderer").getObject("reason")); + } catch (ParsingException e) { + return null; + } } /*////////////////////////////////////////////////////////////////////////// diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeParsingHelper.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeParsingHelper.java index 5b37e4c9d..32f6769fa 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeParsingHelper.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeParsingHelper.java @@ -253,7 +253,7 @@ public class YoutubeParsingHelper { throw new ParsingException("Could not get client version"); } - public static String getUrlFromNavigationEndpoint(JsonObject navigationEndpoint) { + public static String getUrlFromNavigationEndpoint(JsonObject navigationEndpoint) throws ParsingException { if (navigationEndpoint.getObject("urlEndpoint") != null) { String internUrl = navigationEndpoint.getObject("urlEndpoint").getString("url"); if (internUrl.startsWith("/redirect?")) { @@ -275,7 +275,20 @@ public class YoutubeParsingHelper { return internUrl; } } else if (navigationEndpoint.getObject("browseEndpoint") != null) { - return "https://www.youtube.com" + navigationEndpoint.getObject("browseEndpoint").getString("canonicalBaseUrl"); + final JsonObject browseEndpoint = navigationEndpoint.getObject("browseEndpoint"); + final String canonicalBaseUrl = browseEndpoint.getString("canonicalBaseUrl"); + final String browseId = browseEndpoint.getString("browseId"); + + // All channel ids are prefixed with UC + if (browseId != null && browseId.startsWith("UC")) { + return "https://www.youtube.com/channel/" + browseId; + } + + if (canonicalBaseUrl != null && !canonicalBaseUrl.isEmpty()) { + return "https://www.youtube.com" + canonicalBaseUrl; + } + + throw new ParsingException("canonicalBaseUrl is null and browseId is not a channel (\"" + browseEndpoint + "\")"); } else if (navigationEndpoint.getObject("watchEndpoint") != null) { StringBuilder url = new StringBuilder(); url.append("https://www.youtube.com/watch?v=").append(navigationEndpoint.getObject("watchEndpoint").getString("videoId")); @@ -288,7 +301,7 @@ public class YoutubeParsingHelper { return null; } - public static String getTextFromObject(JsonObject textObject, boolean html) { + public static String getTextFromObject(JsonObject textObject, boolean html) throws ParsingException { if (textObject.has("simpleText")) return textObject.getString("simpleText"); StringBuilder textBuilder = new StringBuilder(); @@ -314,7 +327,7 @@ public class YoutubeParsingHelper { return text; } - public static String getTextFromObject(JsonObject textObject) { + public static String getTextFromObject(JsonObject textObject) throws ParsingException { return getTextFromObject(textObject, false); } diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubePlaylistExtractorTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubePlaylistExtractorTest.java index 7c9112798..d4de9175e 100644 --- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubePlaylistExtractorTest.java +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubePlaylistExtractorTest.java @@ -99,7 +99,7 @@ public class YoutubePlaylistExtractorTest { @Test public void testUploaderUrl() throws Exception { - assertEquals("https://www.youtube.com/user/andre0y0you", extractor.getUploaderUrl()); + assertEquals("https://www.youtube.com/channel/UCs72iRpTEuwV3y6pdWYLgiw", extractor.getUploaderUrl()); } @Test diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/search/YoutubeSearchExtractorDefaultTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/search/YoutubeSearchExtractorDefaultTest.java index 65ffe839d..1f905bdec 100644 --- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/search/YoutubeSearchExtractorDefaultTest.java +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/search/YoutubeSearchExtractorDefaultTest.java @@ -124,7 +124,7 @@ public class YoutubeSearchExtractorDefaultTest extends YoutubeSearchExtractorBas } @Test - public void testSuggestionNotNull() { + public void testSuggestionNotNull() throws Exception { //todo write a real test assertNotNull(extractor.getSearchSuggestion()); } From 98e359438a4bdcd252b7fa6bb5fe9a48fde5d0be Mon Sep 17 00:00:00 2001 From: Mauricio Colli Date: Sat, 29 Feb 2020 21:50:31 -0300 Subject: [PATCH 09/11] [YouTube] Detect simple 404s in the standard fetch method --- .../youtube/linkHandler/YoutubeParsingHelper.java | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeParsingHelper.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeParsingHelper.java index 32f6769fa..87d37f8e5 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeParsingHelper.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeParsingHelper.java @@ -8,6 +8,7 @@ import com.grack.nanojson.JsonParserException; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.schabi.newpipe.extractor.downloader.Response; +import org.schabi.newpipe.extractor.exceptions.ContentNotAvailableException; import org.schabi.newpipe.extractor.exceptions.ExtractionException; import org.schabi.newpipe.extractor.exceptions.ParsingException; import org.schabi.newpipe.extractor.exceptions.ReCaptchaException; @@ -349,14 +350,20 @@ public class YoutubeParsingHelper { Map> headers = new HashMap<>(); headers.put("X-YouTube-Client-Name", Collections.singletonList("1")); headers.put("X-YouTube-Client-Version", Collections.singletonList(getClientVersion())); - final String response = getDownloader().get(url, headers, localization).responseBody(); + final Response response = getDownloader().get(url, headers, localization); - if (response.length() < 50) { // ensure to have a valid response + if (response.responseCode() == 404) { + throw new ContentNotAvailableException("Not found" + + " (\"" + response.responseCode() + " " + response.responseMessage() + "\")"); + } + + final String responseBody = response.responseBody(); + if (responseBody.length() < 50) { // ensure to have a valid response throw new ParsingException("JSON response is too short"); } try { - return JsonParser.array().from(response); + return JsonParser.array().from(responseBody); } catch (JsonParserException e) { throw new ParsingException("Could not parse JSON", e); } From e65333c3cea86b6fa8197044b8969b21df47291e Mon Sep 17 00:00:00 2001 From: Mauricio Colli Date: Sat, 29 Feb 2020 21:52:25 -0300 Subject: [PATCH 10/11] [YouTube] Detect deleted/nonexistent/invalid channels and playlists - Added tests for these cases. --- .../extractors/YoutubeChannelExtractor.java | 2 +- .../extractors/YoutubePlaylistExtractor.java | 3 ++ .../linkHandler/YoutubeParsingHelper.java | 35 +++++++++++++++++++ .../youtube/YoutubeChannelExtractorTest.java | 23 ++++++++++++ .../youtube/YoutubePlaylistExtractorTest.java | 23 ++++++++++++ 5 files changed, 85 insertions(+), 1 deletion(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java index 91450d474..11dd8985f 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java @@ -58,8 +58,8 @@ public class YoutubeChannelExtractor extends ChannelExtractor { final String url = super.getUrl() + "/videos?pbj=1&view=0&flow=grid"; final JsonArray ajaxJson = getJsonResponse(url, getExtractorLocalization()); - initialData = ajaxJson.getObject(1).getObject("response"); + YoutubeParsingHelper.defaultAlertsCheck(initialData); } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubePlaylistExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubePlaylistExtractor.java index 0f57a21a7..eef85aa9d 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubePlaylistExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubePlaylistExtractor.java @@ -10,6 +10,7 @@ import org.schabi.newpipe.extractor.exceptions.ParsingException; import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler; import org.schabi.newpipe.extractor.localization.TimeAgoParser; import org.schabi.newpipe.extractor.playlist.PlaylistExtractor; +import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper; import org.schabi.newpipe.extractor.stream.StreamInfoItem; import org.schabi.newpipe.extractor.stream.StreamInfoItemsCollector; import org.schabi.newpipe.extractor.utils.Utils; @@ -39,6 +40,8 @@ public class YoutubePlaylistExtractor extends PlaylistExtractor { final JsonArray ajaxJson = getJsonResponse(url, getExtractorLocalization()); initialData = ajaxJson.getObject(1).getObject("response"); + YoutubeParsingHelper.defaultAlertsCheck(initialData); + playlistInfo = getPlaylistInfo(); } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeParsingHelper.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeParsingHelper.java index 87d37f8e5..d2b0a7da6 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeParsingHelper.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeParsingHelper.java @@ -362,10 +362,45 @@ public class YoutubeParsingHelper { throw new ParsingException("JSON response is too short"); } + // Check if the request was redirected to the error page. + final URL latestUrl = new URL(response.latestUrl()); + if (latestUrl.getHost().equalsIgnoreCase("www.youtube.com")) { + final String path = latestUrl.getPath(); + if (path.equalsIgnoreCase("/oops") || path.equalsIgnoreCase("/error")) { + throw new ContentNotAvailableException("Content unavailable"); + } + } + + final String responseContentType = response.getHeader("Content-Type"); + if (responseContentType != null && responseContentType.toLowerCase().contains("text/html")) { + throw new ParsingException("Got HTML document, expected JSON response" + + " (latest url was: \"" + response.latestUrl() + "\")"); + } + try { return JsonParser.array().from(responseBody); } catch (JsonParserException e) { throw new ParsingException("Could not parse JSON", e); } } + + /** + * Shared alert detection function, multiple endpoints return the error similarly structured. + *

+ * Will check if the object has an alert of the type "ERROR". + * + * @param initialData the object which will be checked if an alert is present + * @throws ContentNotAvailableException if an alert is detected + */ + public static void defaultAlertsCheck(JsonObject initialData) throws ContentNotAvailableException { + final JsonArray alerts = initialData.getArray("alerts"); + if (alerts != null && !alerts.isEmpty()) { + final JsonObject alertRenderer = alerts.getObject(0).getObject("alertRenderer"); + final String alertText = alertRenderer.getObject("text").getString("simpleText"); + final String alertType = alertRenderer.getString("type"); + if (alertType.equalsIgnoreCase("ERROR")) { + throw new ContentNotAvailableException("Got error: \"" + alertText + "\""); + } + } + } } diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeChannelExtractorTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeChannelExtractorTest.java index fc4ffff31..4d69a6c89 100644 --- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeChannelExtractorTest.java +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeChannelExtractorTest.java @@ -6,6 +6,7 @@ import org.schabi.newpipe.DownloaderTestImpl; import org.schabi.newpipe.extractor.NewPipe; import org.schabi.newpipe.extractor.ServiceList; import org.schabi.newpipe.extractor.channel.ChannelExtractor; +import org.schabi.newpipe.extractor.exceptions.ContentNotAvailableException; import org.schabi.newpipe.extractor.exceptions.ParsingException; import org.schabi.newpipe.extractor.services.BaseChannelExtractorTest; import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeChannelExtractor; @@ -19,6 +20,28 @@ import static org.schabi.newpipe.extractor.services.DefaultTests.*; * Test for {@link ChannelExtractor} */ public class YoutubeChannelExtractorTest { + + public static class NotAvailable { + @BeforeClass + public static void setUp() { + NewPipe.init(DownloaderTestImpl.getInstance()); + } + + @Test(expected = ContentNotAvailableException.class) + public void deletedFetch() throws Exception { + final ChannelExtractor extractor = + YouTube.getChannelExtractor("https://www.youtube.com/channel/UCAUc4iz6edWerIjlnL8OSSw"); + extractor.fetchPage(); + } + + @Test(expected = ContentNotAvailableException.class) + public void nonExistentFetch() throws Exception { + final ChannelExtractor extractor = + YouTube.getChannelExtractor("https://www.youtube.com/channel/DOESNT-EXIST"); + extractor.fetchPage(); + } + } + public static class Gronkh implements BaseChannelExtractorTest { private static YoutubeChannelExtractor extractor; diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubePlaylistExtractorTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubePlaylistExtractorTest.java index d4de9175e..67584195c 100644 --- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubePlaylistExtractorTest.java +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubePlaylistExtractorTest.java @@ -7,6 +7,7 @@ import org.schabi.newpipe.DownloaderTestImpl; import org.schabi.newpipe.extractor.ListExtractor; import org.schabi.newpipe.extractor.NewPipe; import org.schabi.newpipe.extractor.ServiceList; +import org.schabi.newpipe.extractor.exceptions.ContentNotAvailableException; import org.schabi.newpipe.extractor.exceptions.ParsingException; import org.schabi.newpipe.extractor.playlist.PlaylistExtractor; import org.schabi.newpipe.extractor.services.BasePlaylistExtractorTest; @@ -23,6 +24,28 @@ import static org.schabi.newpipe.extractor.services.DefaultTests.*; * Test for {@link YoutubePlaylistExtractor} */ public class YoutubePlaylistExtractorTest { + + public static class NotAvailable { + @BeforeClass + public static void setUp() { + NewPipe.init(DownloaderTestImpl.getInstance()); + } + + @Test(expected = ContentNotAvailableException.class) + public void nonExistentFetch() throws Exception { + final PlaylistExtractor extractor = + YouTube.getPlaylistExtractor("https://www.youtube.com/playlist?list=PL11111111111111111111111111111111"); + extractor.fetchPage(); + } + + @Test(expected = ContentNotAvailableException.class) + public void invalidId() throws Exception { + final PlaylistExtractor extractor = + YouTube.getPlaylistExtractor("https://www.youtube.com/playlist?list=INVALID_ID"); + extractor.fetchPage(); + } + } + public static class TimelessPopHits implements BasePlaylistExtractorTest { private static YoutubePlaylistExtractor extractor; From 5686a6f562619c50bed53904eaafbc2aa85c8dbe Mon Sep 17 00:00:00 2001 From: Mauricio Colli Date: Sat, 29 Feb 2020 22:00:33 -0300 Subject: [PATCH 11/11] [YouTube] Detect when a stream is deleted or doesn't exist Added a test case as well. --- .../extractors/YoutubeStreamExtractor.java | 9 ++++++++ .../newpipe/extractor/utils/JsonUtils.java | 1 + .../YoutubeStreamExtractorDefaultTest.java | 22 +++++++++++++++++++ 3 files changed, 32 insertions(+) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java index 6aad8cf20..13f881101 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java @@ -11,6 +11,7 @@ import org.schabi.newpipe.extractor.MediaFormat; import org.schabi.newpipe.extractor.NewPipe; import org.schabi.newpipe.extractor.StreamingService; import org.schabi.newpipe.extractor.downloader.Downloader; +import org.schabi.newpipe.extractor.exceptions.ContentNotAvailableException; import org.schabi.newpipe.extractor.exceptions.ExtractionException; import org.schabi.newpipe.extractor.exceptions.ParsingException; import org.schabi.newpipe.extractor.exceptions.ReCaptchaException; @@ -32,6 +33,7 @@ import org.schabi.newpipe.extractor.stream.StreamInfoItemsCollector; import org.schabi.newpipe.extractor.stream.StreamType; import org.schabi.newpipe.extractor.stream.SubtitlesStream; import org.schabi.newpipe.extractor.stream.VideoStream; +import org.schabi.newpipe.extractor.utils.JsonUtils; import org.schabi.newpipe.extractor.utils.Parser; import org.schabi.newpipe.extractor.utils.Utils; @@ -619,6 +621,13 @@ public class YoutubeStreamExtractor extends StreamExtractor { playerResponse = getPlayerResponse(); + final JsonObject playabilityStatus = playerResponse.getObject("playabilityStatus", JsonUtils.DEFAULT_EMPTY); + final String status = playabilityStatus.getString("status"); + if (status != null && status.toLowerCase().equals("error")) { + final String reason = playabilityStatus.getString("reason"); + throw new ContentNotAvailableException("Got error: \"" + reason + "\""); + } + if (decryptionCode.isEmpty()) { decryptionCode = loadDecryptionCode(playerUrl); } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/utils/JsonUtils.java b/extractor/src/main/java/org/schabi/newpipe/extractor/utils/JsonUtils.java index ab916379e..25bb3f6c0 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/utils/JsonUtils.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/utils/JsonUtils.java @@ -11,6 +11,7 @@ import java.util.Arrays; import java.util.List; public class JsonUtils { + public static final JsonObject DEFAULT_EMPTY = new JsonObject(); private JsonUtils() { } diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/stream/YoutubeStreamExtractorDefaultTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/stream/YoutubeStreamExtractorDefaultTest.java index d6cf3815f..0d36fd9a8 100644 --- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/stream/YoutubeStreamExtractorDefaultTest.java +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/stream/YoutubeStreamExtractorDefaultTest.java @@ -7,6 +7,7 @@ import org.schabi.newpipe.DownloaderTestImpl; import org.schabi.newpipe.extractor.ExtractorAsserts; import org.schabi.newpipe.extractor.MediaFormat; import org.schabi.newpipe.extractor.NewPipe; +import org.schabi.newpipe.extractor.exceptions.ContentNotAvailableException; import org.schabi.newpipe.extractor.exceptions.ExtractionException; import org.schabi.newpipe.extractor.exceptions.ParsingException; import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeStreamExtractor; @@ -56,6 +57,27 @@ import static org.schabi.newpipe.extractor.ServiceList.YouTube; */ public class YoutubeStreamExtractorDefaultTest { + public static class NotAvailable { + @BeforeClass + public static void setUp() { + NewPipe.init(DownloaderTestImpl.getInstance()); + } + + @Test(expected = ContentNotAvailableException.class) + public void nonExistentFetch() throws Exception { + final StreamExtractor extractor = + YouTube.getStreamExtractor("https://www.youtube.com/watch?v=don-t-exist"); + extractor.fetchPage(); + } + + @Test(expected = ParsingException.class) + public void invalidId() throws Exception { + final StreamExtractor extractor = + YouTube.getStreamExtractor("https://www.youtube.com/watch?v=INVALID_ID_INVALID_ID"); + extractor.fetchPage(); + } + } + /** * Test for {@link StreamExtractor} */