diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/StreamingService.java b/extractor/src/main/java/org/schabi/newpipe/extractor/StreamingService.java index e21b17f3b..dcde0aff6 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/StreamingService.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/StreamingService.java @@ -16,6 +16,7 @@ import org.schabi.newpipe.extractor.search.SearchExtractor; import org.schabi.newpipe.extractor.stream.StreamExtractor; import org.schabi.newpipe.extractor.subscription.SubscriptionExtractor; import org.schabi.newpipe.extractor.suggestion.SuggestionExtractor; +import org.schabi.newpipe.extractor.utils.Utils; import javax.annotation.Nullable; import java.util.Collections; @@ -277,18 +278,19 @@ public abstract class StreamingService { * Figures out where the link is pointing to (a channel, a video, a playlist, etc.) * @param url the url on which it should be decided of which link type it is * @return the link type of url - * @throws ParsingException */ - public final LinkType getLinkTypeByUrl(String url) throws ParsingException { - LinkHandlerFactory sH = getStreamLHFactory(); - LinkHandlerFactory cH = getChannelLHFactory(); - LinkHandlerFactory pH = getPlaylistLHFactory(); + public final LinkType getLinkTypeByUrl(final String url) throws ParsingException { + final String polishedUrl = Utils.followGoogleRedirectIfNeeded(url); - if (sH != null && sH.acceptUrl(url)) { + final LinkHandlerFactory sH = getStreamLHFactory(); + final LinkHandlerFactory cH = getChannelLHFactory(); + final LinkHandlerFactory pH = getPlaylistLHFactory(); + + if (sH != null && sH.acceptUrl(polishedUrl)) { return LinkType.STREAM; - } else if (cH != null && cH.acceptUrl(url)) { + } else if (cH != null && cH.acceptUrl(polishedUrl)) { return LinkType.CHANNEL; - } else if (pH != null && pH.acceptUrl(url)) { + } else if (pH != null && pH.acceptUrl(polishedUrl)) { return LinkType.PLAYLIST; } else { return LinkType.NONE; diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/linkhandler/LinkHandlerFactory.java b/extractor/src/main/java/org/schabi/newpipe/extractor/linkhandler/LinkHandlerFactory.java index 6bba7b4e5..ca428b706 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/linkhandler/LinkHandlerFactory.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/linkhandler/LinkHandlerFactory.java @@ -42,12 +42,29 @@ public abstract class LinkHandlerFactory { // Logic /////////////////////////////////// - public LinkHandler fromUrl(String url) throws ParsingException { - if (url == null) throw new IllegalArgumentException("url can not be null"); - final String baseUrl = Utils.getBaseUrl(url); - return fromUrl(url, baseUrl); + /** + * Builds a {@link LinkHandler} from a url.
+ * Be sure to call {@link Utils#followGoogleRedirectIfNeeded(String)} on the url if overriding + * this function. + * @param url the url to extract path and id from + * @return a {@link LinkHandler} complete with information + */ + public LinkHandler fromUrl(final String url) throws ParsingException { + final String polishedUrl = Utils.followGoogleRedirectIfNeeded(url); + final String baseUrl = Utils.getBaseUrl(polishedUrl); + return fromUrl(polishedUrl, baseUrl); } + /** + * Builds a {@link LinkHandler} from a url and a base url. The url is expected to be already + * polished from google search redirects (otherwise how could {@code baseUrl} have been + * extracted?).
+ * So do not call {@link Utils#followGoogleRedirectIfNeeded(String)} on the url if overriding + * this function, since that should be done in {@link #fromUrl(String)}. + * @param url the url without google search redirects to extract id from + * @param baseUrl the base url + * @return a {@link LinkHandler} complete with information + */ public LinkHandler fromUrl(String url, String baseUrl) throws ParsingException { if (url == null) throw new IllegalArgumentException("url can not be null"); if (!acceptUrl(url)) { diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/linkhandler/ListLinkHandlerFactory.java b/extractor/src/main/java/org/schabi/newpipe/extractor/linkhandler/ListLinkHandlerFactory.java index 9ea478b02..4980c3191 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/linkhandler/ListLinkHandlerFactory.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/linkhandler/ListLinkHandlerFactory.java @@ -31,9 +31,10 @@ public abstract class ListLinkHandlerFactory extends LinkHandlerFactory { /////////////////////////////////// @Override - public ListLinkHandler fromUrl(String url) throws ParsingException { - String baseUrl = Utils.getBaseUrl(url); - return fromUrl(url, baseUrl); + public ListLinkHandler fromUrl(final String url) throws ParsingException { + final String polishedUrl = Utils.followGoogleRedirectIfNeeded(url); + final String baseUrl = Utils.getBaseUrl(polishedUrl); + return fromUrl(polishedUrl, baseUrl); } @Override diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeParsingHelper.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeParsingHelper.java index ac7db9855..a8ca2a03a 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeParsingHelper.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeParsingHelper.java @@ -55,12 +55,6 @@ public class YoutubeParsingHelper { private YoutubeParsingHelper() { } - /** - * The official youtube app supports intents in this format, where after the ':' is the videoId. - * Accordingly there are other apps sharing streams in this format. - */ - public final static String BASE_YOUTUBE_INTENT_URL = "vnd.youtube"; - private static final String HARDCODED_CLIENT_VERSION = "2.20200214.04.00"; private static String clientVersion; diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeCommentsLinkHandlerFactory.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeCommentsLinkHandlerFactory.java index 15bc31b66..421fc13f3 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeCommentsLinkHandlerFactory.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeCommentsLinkHandlerFactory.java @@ -1,10 +1,7 @@ package org.schabi.newpipe.extractor.services.youtube.linkHandler; -import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.BASE_YOUTUBE_INTENT_URL; - import org.schabi.newpipe.extractor.exceptions.FoundAdException; import org.schabi.newpipe.extractor.exceptions.ParsingException; -import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler; import org.schabi.newpipe.extractor.linkhandler.ListLinkHandlerFactory; import java.util.List; @@ -17,15 +14,6 @@ public class YoutubeCommentsLinkHandlerFactory extends ListLinkHandlerFactory { return instance; } - @Override - public ListLinkHandler fromUrl(String url) throws ParsingException { - if (url.startsWith(BASE_YOUTUBE_INTENT_URL)){ - return super.fromUrl(url, BASE_YOUTUBE_INTENT_URL); - } else { - return super.fromUrl(url); - } - } - @Override public String getUrl(String id) { return "https://m.youtube.com/watch?v=" + id; diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeStreamLinkHandlerFactory.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeStreamLinkHandlerFactory.java index bcc1fac58..efc06da2a 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeStreamLinkHandlerFactory.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeStreamLinkHandlerFactory.java @@ -2,7 +2,6 @@ package org.schabi.newpipe.extractor.services.youtube.linkHandler; import org.schabi.newpipe.extractor.exceptions.FoundAdException; import org.schabi.newpipe.extractor.exceptions.ParsingException; -import org.schabi.newpipe.extractor.linkhandler.LinkHandler; import org.schabi.newpipe.extractor.linkhandler.LinkHandlerFactory; import org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper; import org.schabi.newpipe.extractor.utils.Utils; @@ -15,8 +14,6 @@ import java.net.URL; import java.util.regex.Matcher; import java.util.regex.Pattern; -import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.BASE_YOUTUBE_INTENT_URL; - /* * Created by Christian Schabesberger on 02.02.16. * @@ -67,15 +64,6 @@ public class YoutubeStreamLinkHandlerFactory extends LinkHandlerFactory { } } - @Override - public LinkHandler fromUrl(String url) throws ParsingException { - if (url.startsWith(BASE_YOUTUBE_INTENT_URL)) { - return super.fromUrl(url, BASE_YOUTUBE_INTENT_URL); - } else { - return super.fromUrl(url); - } - } - @Override public String getUrl(String id) { return "https://www.youtube.com/watch?v=" + id; diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/utils/Utils.java b/extractor/src/main/java/org/schabi/newpipe/extractor/utils/Utils.java index 288e401c3..959202700 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/utils/Utils.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/utils/Utils.java @@ -181,14 +181,39 @@ public class Utils { return s; } - public static String getBaseUrl(String url) throws ParsingException { - URL uri; + public static String getBaseUrl(final String url) throws ParsingException { try { - uri = stringToURL(url); - } catch (MalformedURLException e) { + final URL uri = stringToURL(url); + return uri.getProtocol() + "://" + uri.getAuthority(); + } catch (final MalformedURLException e) { + final String message = e.getMessage(); + if (message.startsWith("unknown protocol: ")) { + // return just the protocol (e.g. vnd.youtube) + return message.substring("unknown protocol: ".length()); + } + throw new ParsingException("Malformed url: " + url, e); } - return uri.getProtocol() + "://" + uri.getAuthority(); + } + + /** + * If the provided url is a Google search redirect, then the actual url is extracted from the + * {@code url=} query value and returned, otherwise the original url is returned. + * @param url the url which can possibly be a Google search redirect + * @return an url with no Google search redirects + */ + public static String followGoogleRedirectIfNeeded(final String url) { + // if the url is a redirect from a Google search, extract the actual url + try { + final URL decoded = Utils.stringToURL(url); + if (decoded.getHost().contains("google") && decoded.getPath().equals("/url")) { + return URLDecoder.decode(Parser.matchGroup1("&url=([^&]+)(?:&|$)", url), "UTF-8"); + } + } catch (final Exception ignored) { + } + + // url is not a google search redirect + return url; } public static boolean isNullOrEmpty(final String str) { diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/NewPipeTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/NewPipeTest.java index bdad6cb6b..5dbc43174 100644 --- a/extractor/src/test/java/org/schabi/newpipe/extractor/NewPipeTest.java +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/NewPipeTest.java @@ -6,6 +6,7 @@ import java.util.HashSet; import static org.junit.Assert.*; import static org.schabi.newpipe.extractor.NewPipe.getServiceByUrl; +import static org.schabi.newpipe.extractor.ServiceList.SoundCloud; import static org.schabi.newpipe.extractor.ServiceList.YouTube; public class NewPipeTest { @@ -39,8 +40,10 @@ public class NewPipeTest { assertEquals(getServiceByUrl("https://www.youtube.com/watch?v=_r6CgaFNAGg"), YouTube); assertEquals(getServiceByUrl("https://www.youtube.com/channel/UCi2bIyFtz-JdI-ou8kaqsqg"), YouTube); assertEquals(getServiceByUrl("https://www.youtube.com/playlist?list=PLRqwX-V7Uu6ZiZxtDDRCi6uhfTH4FilpH"), YouTube); + assertEquals(getServiceByUrl("https://www.google.it/url?sa=t&rct=j&q=&esrc=s&cd=&cad=rja&uact=8&url=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DHu80uDzh8RY&source=video"), YouTube); - assertNotEquals(getServiceByUrl("https://soundcloud.com/pegboardnerds"), YouTube); + assertEquals(getServiceByUrl("https://soundcloud.com/pegboardnerds"), SoundCloud); + assertEquals(getServiceByUrl("https://www.google.com/url?sa=t&url=https%3A%2F%2Fsoundcloud.com%2Fciaoproduction&rct=j&q=&esrc=s&source=web&cd="), SoundCloud); } @Test diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/utils/UtilsTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/utils/UtilsTest.java index 5b0dfdb3d..e4a65505b 100644 --- a/extractor/src/test/java/org/schabi/newpipe/extractor/utils/UtilsTest.java +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/utils/UtilsTest.java @@ -21,4 +21,28 @@ public class UtilsTest { public void testJoin() { assertEquals("some,random,stuff", Utils.join(",", Arrays.asList("some", "random", "stuff"))); } + + @Test + public void testGetBaseUrl() throws ParsingException { + assertEquals("https://www.youtube.com", Utils.getBaseUrl("https://www.youtube.com/watch?v=Hu80uDzh8RY")); + assertEquals("vnd.youtube", Utils.getBaseUrl("vnd.youtube://www.youtube.com/watch?v=jZViOEv90dI")); + assertEquals("vnd.youtube", Utils.getBaseUrl("vnd.youtube:jZViOEv90dI")); + assertEquals("vnd.youtube", Utils.getBaseUrl("vnd.youtube://n8X9_MgEdCg")); + assertEquals("https://music.youtube.com", Utils.getBaseUrl("https://music.youtube.com/watch?v=O0EDx9WAelc")); + } + + @Test + public void testFollowGoogleRedirect() { + assertEquals("https://www.youtube.com/watch?v=Hu80uDzh8RY", + Utils.followGoogleRedirectIfNeeded("https://www.google.it/url?sa=t&rct=j&q=&esrc=s&cd=&cad=rja&uact=8&url=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DHu80uDzh8RY&source=video")); + assertEquals("https://www.youtube.com/watch?v=0b6cFWG45kA", + Utils.followGoogleRedirectIfNeeded("https://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=video&cd=&cad=rja&uact=8&url=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3D0b6cFWG45kA")); + assertEquals("https://soundcloud.com/ciaoproduction", + Utils.followGoogleRedirectIfNeeded("https://www.google.com/url?sa=t&url=https%3A%2F%2Fsoundcloud.com%2Fciaoproduction&rct=j&q=&esrc=s&source=web&cd=")); + + assertEquals("https://www.youtube.com/watch?v=Hu80uDzh8RY¶m=xyz", + Utils.followGoogleRedirectIfNeeded("https://www.youtube.com/watch?v=Hu80uDzh8RY¶m=xyz")); + assertEquals("https://www.youtube.com/watch?v=Hu80uDzh8RY&url=hello", + Utils.followGoogleRedirectIfNeeded("https://www.youtube.com/watch?v=Hu80uDzh8RY&url=hello")); + } }