From c190a3029bf0a3ae36e584f1e74e83e4ba8137c4 Mon Sep 17 00:00:00 2001 From: Stypox Date: Sat, 27 Jun 2020 22:58:12 +0200 Subject: [PATCH 1/3] Consider protocol as base url when it is a custom one (e.g. vnd.youtube) --- .../services/youtube/YoutubeParsingHelper.java | 6 ------ .../YoutubeCommentsLinkHandlerFactory.java | 12 ------------ .../linkHandler/YoutubeStreamLinkHandlerFactory.java | 12 ------------ .../org/schabi/newpipe/extractor/utils/Utils.java | 11 ++++++++--- .../schabi/newpipe/extractor/utils/UtilsTest.java | 9 +++++++++ 5 files changed, 17 insertions(+), 33 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeParsingHelper.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeParsingHelper.java index ac7db9855..a8ca2a03a 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeParsingHelper.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeParsingHelper.java @@ -55,12 +55,6 @@ public class YoutubeParsingHelper { private YoutubeParsingHelper() { } - /** - * The official youtube app supports intents in this format, where after the ':' is the videoId. - * Accordingly there are other apps sharing streams in this format. - */ - public final static String BASE_YOUTUBE_INTENT_URL = "vnd.youtube"; - private static final String HARDCODED_CLIENT_VERSION = "2.20200214.04.00"; private static String clientVersion; diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeCommentsLinkHandlerFactory.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeCommentsLinkHandlerFactory.java index 15bc31b66..421fc13f3 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeCommentsLinkHandlerFactory.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeCommentsLinkHandlerFactory.java @@ -1,10 +1,7 @@ package org.schabi.newpipe.extractor.services.youtube.linkHandler; -import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.BASE_YOUTUBE_INTENT_URL; - import org.schabi.newpipe.extractor.exceptions.FoundAdException; import org.schabi.newpipe.extractor.exceptions.ParsingException; -import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler; import org.schabi.newpipe.extractor.linkhandler.ListLinkHandlerFactory; import java.util.List; @@ -17,15 +14,6 @@ public class YoutubeCommentsLinkHandlerFactory extends ListLinkHandlerFactory { return instance; } - @Override - public ListLinkHandler fromUrl(String url) throws ParsingException { - if (url.startsWith(BASE_YOUTUBE_INTENT_URL)){ - return super.fromUrl(url, BASE_YOUTUBE_INTENT_URL); - } else { - return super.fromUrl(url); - } - } - @Override public String getUrl(String id) { return "https://m.youtube.com/watch?v=" + id; diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeStreamLinkHandlerFactory.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeStreamLinkHandlerFactory.java index bcc1fac58..efc06da2a 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeStreamLinkHandlerFactory.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeStreamLinkHandlerFactory.java @@ -2,7 +2,6 @@ package org.schabi.newpipe.extractor.services.youtube.linkHandler; import org.schabi.newpipe.extractor.exceptions.FoundAdException; import org.schabi.newpipe.extractor.exceptions.ParsingException; -import org.schabi.newpipe.extractor.linkhandler.LinkHandler; import org.schabi.newpipe.extractor.linkhandler.LinkHandlerFactory; import org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper; import org.schabi.newpipe.extractor.utils.Utils; @@ -15,8 +14,6 @@ import java.net.URL; import java.util.regex.Matcher; import java.util.regex.Pattern; -import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.BASE_YOUTUBE_INTENT_URL; - /* * Created by Christian Schabesberger on 02.02.16. * @@ -67,15 +64,6 @@ public class YoutubeStreamLinkHandlerFactory extends LinkHandlerFactory { } } - @Override - public LinkHandler fromUrl(String url) throws ParsingException { - if (url.startsWith(BASE_YOUTUBE_INTENT_URL)) { - return super.fromUrl(url, BASE_YOUTUBE_INTENT_URL); - } else { - return super.fromUrl(url); - } - } - @Override public String getUrl(String id) { return "https://www.youtube.com/watch?v=" + id; diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/utils/Utils.java b/extractor/src/main/java/org/schabi/newpipe/extractor/utils/Utils.java index 288e401c3..c6bd508a2 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/utils/Utils.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/utils/Utils.java @@ -182,13 +182,18 @@ public class Utils { } public static String getBaseUrl(String url) throws ParsingException { - URL uri; try { - uri = stringToURL(url); + final URL uri = stringToURL(url); + return uri.getProtocol() + "://" + uri.getAuthority(); } catch (MalformedURLException e) { + final String message = e.getMessage(); + if (message.startsWith("unknown protocol: ")) { + System.out.println(message.substring(18)); + return message.substring(18); // return just the protocol (e.g. vnd.youtube) + } + throw new ParsingException("Malformed url: " + url, e); } - return uri.getProtocol() + "://" + uri.getAuthority(); } public static boolean isNullOrEmpty(final String str) { diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/utils/UtilsTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/utils/UtilsTest.java index 5b0dfdb3d..dcccc16da 100644 --- a/extractor/src/test/java/org/schabi/newpipe/extractor/utils/UtilsTest.java +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/utils/UtilsTest.java @@ -21,4 +21,13 @@ public class UtilsTest { public void testJoin() { assertEquals("some,random,stuff", Utils.join(",", Arrays.asList("some", "random", "stuff"))); } + + @Test + public void testGetBaseUrl() throws ParsingException { + assertEquals("https://www.youtube.com", Utils.getBaseUrl("https://www.youtube.com/watch?v=Hu80uDzh8RY")); + assertEquals("vnd.youtube", Utils.getBaseUrl("vnd.youtube://www.youtube.com/watch?v=jZViOEv90dI")); + assertEquals("vnd.youtube", Utils.getBaseUrl("vnd.youtube:jZViOEv90dI")); + assertEquals("vnd.youtube", Utils.getBaseUrl("vnd.youtube://n8X9_MgEdCg")); + assertEquals("https://music.youtube.com", Utils.getBaseUrl("https://music.youtube.com/watch?v=O0EDx9WAelc")); + } } From 3fe55b30ba43ff44241e394d8b021b8c78c16e18 Mon Sep 17 00:00:00 2001 From: Stypox Date: Sat, 27 Jun 2020 15:47:49 +0200 Subject: [PATCH 2/3] Add support for Google search redirect url --- .../newpipe/extractor/StreamingService.java | 10 ++++++---- .../linkhandler/LinkHandlerFactory.java | 18 +++++++++++++++++ .../linkhandler/ListLinkHandlerFactory.java | 3 ++- .../schabi/newpipe/extractor/utils/Utils.java | 20 +++++++++++++++++++ .../schabi/newpipe/extractor/NewPipeTest.java | 5 ++++- .../newpipe/extractor/utils/UtilsTest.java | 15 ++++++++++++++ 6 files changed, 65 insertions(+), 6 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/StreamingService.java b/extractor/src/main/java/org/schabi/newpipe/extractor/StreamingService.java index e21b17f3b..3d09d5094 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/StreamingService.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/StreamingService.java @@ -16,6 +16,7 @@ import org.schabi.newpipe.extractor.search.SearchExtractor; import org.schabi.newpipe.extractor.stream.StreamExtractor; import org.schabi.newpipe.extractor.subscription.SubscriptionExtractor; import org.schabi.newpipe.extractor.suggestion.SuggestionExtractor; +import org.schabi.newpipe.extractor.utils.Utils; import javax.annotation.Nullable; import java.util.Collections; @@ -277,12 +278,13 @@ public abstract class StreamingService { * Figures out where the link is pointing to (a channel, a video, a playlist, etc.) * @param url the url on which it should be decided of which link type it is * @return the link type of url - * @throws ParsingException */ public final LinkType getLinkTypeByUrl(String url) throws ParsingException { - LinkHandlerFactory sH = getStreamLHFactory(); - LinkHandlerFactory cH = getChannelLHFactory(); - LinkHandlerFactory pH = getPlaylistLHFactory(); + url = Utils.followGoogleRedirectIfNeeded(url); + + final LinkHandlerFactory sH = getStreamLHFactory(); + final LinkHandlerFactory cH = getChannelLHFactory(); + final LinkHandlerFactory pH = getPlaylistLHFactory(); if (sH != null && sH.acceptUrl(url)) { return LinkType.STREAM; diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/linkhandler/LinkHandlerFactory.java b/extractor/src/main/java/org/schabi/newpipe/extractor/linkhandler/LinkHandlerFactory.java index 6bba7b4e5..7dcfe5f48 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/linkhandler/LinkHandlerFactory.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/linkhandler/LinkHandlerFactory.java @@ -42,12 +42,30 @@ public abstract class LinkHandlerFactory { // Logic /////////////////////////////////// + /** + * Builds a {@link LinkHandler} from a url.
+ * Be sure to call {@link Utils#followGoogleRedirectIfNeeded(String)} on the url if overriding + * this function. + * @param url the url to extract path and id from + * @return a {@link LinkHandler} complete with information + */ public LinkHandler fromUrl(String url) throws ParsingException { if (url == null) throw new IllegalArgumentException("url can not be null"); + url = Utils.followGoogleRedirectIfNeeded(url); final String baseUrl = Utils.getBaseUrl(url); return fromUrl(url, baseUrl); } + /** + * Builds a {@link LinkHandler} from a url and a base url. The url is expected to be already + * polished from google search redirects (otherwise how could {@code baseUrl} have been + * extracted?).
+ * So do not call {@link Utils#followGoogleRedirectIfNeeded(String)} on the url if overriding + * this function, since that should be done in {@link #fromUrl(String)}. + * @param url the url without google search redirects to extract id from + * @param baseUrl the base url + * @return a {@link LinkHandler} complete with information + */ public LinkHandler fromUrl(String url, String baseUrl) throws ParsingException { if (url == null) throw new IllegalArgumentException("url can not be null"); if (!acceptUrl(url)) { diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/linkhandler/ListLinkHandlerFactory.java b/extractor/src/main/java/org/schabi/newpipe/extractor/linkhandler/ListLinkHandlerFactory.java index 9ea478b02..cdbbab4f0 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/linkhandler/ListLinkHandlerFactory.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/linkhandler/ListLinkHandlerFactory.java @@ -32,7 +32,8 @@ public abstract class ListLinkHandlerFactory extends LinkHandlerFactory { @Override public ListLinkHandler fromUrl(String url) throws ParsingException { - String baseUrl = Utils.getBaseUrl(url); + url = Utils.followGoogleRedirectIfNeeded(url); + final String baseUrl = Utils.getBaseUrl(url); return fromUrl(url, baseUrl); } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/utils/Utils.java b/extractor/src/main/java/org/schabi/newpipe/extractor/utils/Utils.java index c6bd508a2..e8823af86 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/utils/Utils.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/utils/Utils.java @@ -196,6 +196,26 @@ public class Utils { } } + /** + * If the provided url is a Google search redirect, then the actual url is extracted from the + * {@code url=} query value and returned, otherwise the original url is returned. + * @param url the url which can possibly be a Google search redirect + * @return an url with no Google search redirects + */ + public static String followGoogleRedirectIfNeeded(final String url) { + // if the url is a redirect from a Google search, extract the actual url + try { + final URL decoded = Utils.stringToURL(url); + if (decoded.getHost().contains("google") && decoded.getPath().equals("/url")) { + return URLDecoder.decode(Parser.matchGroup1("&url=([^&]+)(?:&|$)", url), "UTF-8"); + } + } catch (final Exception ignored) { + } + + // url is not a google search redirect + return url; + } + public static boolean isNullOrEmpty(final String str) { return str == null || str.isEmpty(); } diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/NewPipeTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/NewPipeTest.java index bdad6cb6b..5dbc43174 100644 --- a/extractor/src/test/java/org/schabi/newpipe/extractor/NewPipeTest.java +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/NewPipeTest.java @@ -6,6 +6,7 @@ import java.util.HashSet; import static org.junit.Assert.*; import static org.schabi.newpipe.extractor.NewPipe.getServiceByUrl; +import static org.schabi.newpipe.extractor.ServiceList.SoundCloud; import static org.schabi.newpipe.extractor.ServiceList.YouTube; public class NewPipeTest { @@ -39,8 +40,10 @@ public class NewPipeTest { assertEquals(getServiceByUrl("https://www.youtube.com/watch?v=_r6CgaFNAGg"), YouTube); assertEquals(getServiceByUrl("https://www.youtube.com/channel/UCi2bIyFtz-JdI-ou8kaqsqg"), YouTube); assertEquals(getServiceByUrl("https://www.youtube.com/playlist?list=PLRqwX-V7Uu6ZiZxtDDRCi6uhfTH4FilpH"), YouTube); + assertEquals(getServiceByUrl("https://www.google.it/url?sa=t&rct=j&q=&esrc=s&cd=&cad=rja&uact=8&url=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DHu80uDzh8RY&source=video"), YouTube); - assertNotEquals(getServiceByUrl("https://soundcloud.com/pegboardnerds"), YouTube); + assertEquals(getServiceByUrl("https://soundcloud.com/pegboardnerds"), SoundCloud); + assertEquals(getServiceByUrl("https://www.google.com/url?sa=t&url=https%3A%2F%2Fsoundcloud.com%2Fciaoproduction&rct=j&q=&esrc=s&source=web&cd="), SoundCloud); } @Test diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/utils/UtilsTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/utils/UtilsTest.java index dcccc16da..e4a65505b 100644 --- a/extractor/src/test/java/org/schabi/newpipe/extractor/utils/UtilsTest.java +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/utils/UtilsTest.java @@ -30,4 +30,19 @@ public class UtilsTest { assertEquals("vnd.youtube", Utils.getBaseUrl("vnd.youtube://n8X9_MgEdCg")); assertEquals("https://music.youtube.com", Utils.getBaseUrl("https://music.youtube.com/watch?v=O0EDx9WAelc")); } + + @Test + public void testFollowGoogleRedirect() { + assertEquals("https://www.youtube.com/watch?v=Hu80uDzh8RY", + Utils.followGoogleRedirectIfNeeded("https://www.google.it/url?sa=t&rct=j&q=&esrc=s&cd=&cad=rja&uact=8&url=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DHu80uDzh8RY&source=video")); + assertEquals("https://www.youtube.com/watch?v=0b6cFWG45kA", + Utils.followGoogleRedirectIfNeeded("https://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=video&cd=&cad=rja&uact=8&url=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3D0b6cFWG45kA")); + assertEquals("https://soundcloud.com/ciaoproduction", + Utils.followGoogleRedirectIfNeeded("https://www.google.com/url?sa=t&url=https%3A%2F%2Fsoundcloud.com%2Fciaoproduction&rct=j&q=&esrc=s&source=web&cd=")); + + assertEquals("https://www.youtube.com/watch?v=Hu80uDzh8RY¶m=xyz", + Utils.followGoogleRedirectIfNeeded("https://www.youtube.com/watch?v=Hu80uDzh8RY¶m=xyz")); + assertEquals("https://www.youtube.com/watch?v=Hu80uDzh8RY&url=hello", + Utils.followGoogleRedirectIfNeeded("https://www.youtube.com/watch?v=Hu80uDzh8RY&url=hello")); + } } From 9e53cf0b56652c37e68d3c5a03624f0b6d8fff05 Mon Sep 17 00:00:00 2001 From: Stypox Date: Sun, 28 Jun 2020 22:44:16 +0200 Subject: [PATCH 3/3] Fix parameter reassignment and other style issues Also remove left-behind debug statement --- .../org/schabi/newpipe/extractor/StreamingService.java | 10 +++++----- .../extractor/linkhandler/LinkHandlerFactory.java | 9 ++++----- .../extractor/linkhandler/ListLinkHandlerFactory.java | 8 ++++---- .../java/org/schabi/newpipe/extractor/utils/Utils.java | 8 ++++---- 4 files changed, 17 insertions(+), 18 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/StreamingService.java b/extractor/src/main/java/org/schabi/newpipe/extractor/StreamingService.java index 3d09d5094..dcde0aff6 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/StreamingService.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/StreamingService.java @@ -279,18 +279,18 @@ public abstract class StreamingService { * @param url the url on which it should be decided of which link type it is * @return the link type of url */ - public final LinkType getLinkTypeByUrl(String url) throws ParsingException { - url = Utils.followGoogleRedirectIfNeeded(url); + public final LinkType getLinkTypeByUrl(final String url) throws ParsingException { + final String polishedUrl = Utils.followGoogleRedirectIfNeeded(url); final LinkHandlerFactory sH = getStreamLHFactory(); final LinkHandlerFactory cH = getChannelLHFactory(); final LinkHandlerFactory pH = getPlaylistLHFactory(); - if (sH != null && sH.acceptUrl(url)) { + if (sH != null && sH.acceptUrl(polishedUrl)) { return LinkType.STREAM; - } else if (cH != null && cH.acceptUrl(url)) { + } else if (cH != null && cH.acceptUrl(polishedUrl)) { return LinkType.CHANNEL; - } else if (pH != null && pH.acceptUrl(url)) { + } else if (pH != null && pH.acceptUrl(polishedUrl)) { return LinkType.PLAYLIST; } else { return LinkType.NONE; diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/linkhandler/LinkHandlerFactory.java b/extractor/src/main/java/org/schabi/newpipe/extractor/linkhandler/LinkHandlerFactory.java index 7dcfe5f48..ca428b706 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/linkhandler/LinkHandlerFactory.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/linkhandler/LinkHandlerFactory.java @@ -49,11 +49,10 @@ public abstract class LinkHandlerFactory { * @param url the url to extract path and id from * @return a {@link LinkHandler} complete with information */ - public LinkHandler fromUrl(String url) throws ParsingException { - if (url == null) throw new IllegalArgumentException("url can not be null"); - url = Utils.followGoogleRedirectIfNeeded(url); - final String baseUrl = Utils.getBaseUrl(url); - return fromUrl(url, baseUrl); + public LinkHandler fromUrl(final String url) throws ParsingException { + final String polishedUrl = Utils.followGoogleRedirectIfNeeded(url); + final String baseUrl = Utils.getBaseUrl(polishedUrl); + return fromUrl(polishedUrl, baseUrl); } /** diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/linkhandler/ListLinkHandlerFactory.java b/extractor/src/main/java/org/schabi/newpipe/extractor/linkhandler/ListLinkHandlerFactory.java index cdbbab4f0..4980c3191 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/linkhandler/ListLinkHandlerFactory.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/linkhandler/ListLinkHandlerFactory.java @@ -31,10 +31,10 @@ public abstract class ListLinkHandlerFactory extends LinkHandlerFactory { /////////////////////////////////// @Override - public ListLinkHandler fromUrl(String url) throws ParsingException { - url = Utils.followGoogleRedirectIfNeeded(url); - final String baseUrl = Utils.getBaseUrl(url); - return fromUrl(url, baseUrl); + public ListLinkHandler fromUrl(final String url) throws ParsingException { + final String polishedUrl = Utils.followGoogleRedirectIfNeeded(url); + final String baseUrl = Utils.getBaseUrl(polishedUrl); + return fromUrl(polishedUrl, baseUrl); } @Override diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/utils/Utils.java b/extractor/src/main/java/org/schabi/newpipe/extractor/utils/Utils.java index e8823af86..959202700 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/utils/Utils.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/utils/Utils.java @@ -181,15 +181,15 @@ public class Utils { return s; } - public static String getBaseUrl(String url) throws ParsingException { + public static String getBaseUrl(final String url) throws ParsingException { try { final URL uri = stringToURL(url); return uri.getProtocol() + "://" + uri.getAuthority(); - } catch (MalformedURLException e) { + } catch (final MalformedURLException e) { final String message = e.getMessage(); if (message.startsWith("unknown protocol: ")) { - System.out.println(message.substring(18)); - return message.substring(18); // return just the protocol (e.g. vnd.youtube) + // return just the protocol (e.g. vnd.youtube) + return message.substring("unknown protocol: ".length()); } throw new ParsingException("Malformed url: " + url, e);