This commit is contained in:
Isira Seneviratne 2020-11-01 06:27:59 +05:30
commit b2d0c098a3
12 changed files with 109 additions and 56 deletions

View File

@ -16,6 +16,7 @@ import org.schabi.newpipe.extractor.search.SearchExtractor;
import org.schabi.newpipe.extractor.stream.StreamExtractor;
import org.schabi.newpipe.extractor.subscription.SubscriptionExtractor;
import org.schabi.newpipe.extractor.suggestion.SuggestionExtractor;
import org.schabi.newpipe.extractor.utils.Utils;
import javax.annotation.Nullable;
import java.util.Collections;
@ -277,18 +278,19 @@ public abstract class StreamingService {
* Figures out where the link is pointing to (a channel, a video, a playlist, etc.)
* @param url the url on which it should be decided of which link type it is
* @return the link type of url
* @throws ParsingException
*/
public final LinkType getLinkTypeByUrl(String url) throws ParsingException {
LinkHandlerFactory sH = getStreamLHFactory();
LinkHandlerFactory cH = getChannelLHFactory();
LinkHandlerFactory pH = getPlaylistLHFactory();
public final LinkType getLinkTypeByUrl(final String url) throws ParsingException {
final String polishedUrl = Utils.followGoogleRedirectIfNeeded(url);
if (sH != null && sH.acceptUrl(url)) {
final LinkHandlerFactory sH = getStreamLHFactory();
final LinkHandlerFactory cH = getChannelLHFactory();
final LinkHandlerFactory pH = getPlaylistLHFactory();
if (sH != null && sH.acceptUrl(polishedUrl)) {
return LinkType.STREAM;
} else if (cH != null && cH.acceptUrl(url)) {
} else if (cH != null && cH.acceptUrl(polishedUrl)) {
return LinkType.CHANNEL;
} else if (pH != null && pH.acceptUrl(url)) {
} else if (pH != null && pH.acceptUrl(polishedUrl)) {
return LinkType.PLAYLIST;
} else {
return LinkType.NONE;

View File

@ -42,12 +42,29 @@ public abstract class LinkHandlerFactory {
// Logic
///////////////////////////////////
public LinkHandler fromUrl(String url) throws ParsingException {
if (url == null) throw new IllegalArgumentException("url can not be null");
final String baseUrl = Utils.getBaseUrl(url);
return fromUrl(url, baseUrl);
/**
* Builds a {@link LinkHandler} from a url.<br>
* Be sure to call {@link Utils#followGoogleRedirectIfNeeded(String)} on the url if overriding
* this function.
* @param url the url to extract path and id from
* @return a {@link LinkHandler} complete with information
*/
public LinkHandler fromUrl(final String url) throws ParsingException {
final String polishedUrl = Utils.followGoogleRedirectIfNeeded(url);
final String baseUrl = Utils.getBaseUrl(polishedUrl);
return fromUrl(polishedUrl, baseUrl);
}
/**
* Builds a {@link LinkHandler} from a url and a base url. The url is expected to be already
* polished from google search redirects (otherwise how could {@code baseUrl} have been
* extracted?).<br>
* So do not call {@link Utils#followGoogleRedirectIfNeeded(String)} on the url if overriding
* this function, since that should be done in {@link #fromUrl(String)}.
* @param url the url without google search redirects to extract id from
* @param baseUrl the base url
* @return a {@link LinkHandler} complete with information
*/
public LinkHandler fromUrl(String url, String baseUrl) throws ParsingException {
if (url == null) throw new IllegalArgumentException("url can not be null");
if (!acceptUrl(url)) {

View File

@ -31,9 +31,10 @@ public abstract class ListLinkHandlerFactory extends LinkHandlerFactory {
///////////////////////////////////
@Override
public ListLinkHandler fromUrl(String url) throws ParsingException {
String baseUrl = Utils.getBaseUrl(url);
return fromUrl(url, baseUrl);
public ListLinkHandler fromUrl(final String url) throws ParsingException {
final String polishedUrl = Utils.followGoogleRedirectIfNeeded(url);
final String baseUrl = Utils.getBaseUrl(polishedUrl);
return fromUrl(polishedUrl, baseUrl);
}
@Override

View File

@ -25,6 +25,8 @@ import org.schabi.newpipe.extractor.utils.Utils;
import javax.annotation.Nonnull;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLEncoder;
import java.text.ParseException;
import java.text.SimpleDateFormat;
@ -148,12 +150,21 @@ public class SoundcloudParsingHelper {
*
* @return the resolved id
*/
public static String resolveIdWithEmbedPlayer(String url) throws IOException, ReCaptchaException, ParsingException {
public static String resolveIdWithEmbedPlayer(String urlString) throws IOException, ReCaptchaException, ParsingException {
// Remove the tailing slash from URLs due to issues with the SoundCloud API
if (urlString.charAt(urlString.length() -1) == '/') urlString = urlString.substring(0, urlString.length()-1);
URL url;
try {
url = Utils.stringToURL(urlString);
} catch (MalformedURLException e){
throw new IllegalArgumentException("The given URL is not valid");
}
String response = NewPipe.getDownloader().get("https://w.soundcloud.com/player/?url="
+ URLEncoder.encode(url, "UTF-8"), SoundCloud.getLocalization()).responseBody();
+ URLEncoder.encode(url.toString(), "UTF-8"), SoundCloud.getLocalization()).responseBody();
// handle playlists / sets different and get playlist id via uir field in JSON
if (url.contains("sets") && !url.endsWith("sets") && !url.endsWith("sets/"))
if (url.getPath().contains("/sets/") && !url.getPath().endsWith("/sets"))
return Parser.matchGroup1("\"uri\":\\s*\"https:\\/\\/api\\.soundcloud\\.com\\/playlists\\/((\\d)*?)\"", response);
return Parser.matchGroup1(",\"id\":(([^}\\n])*?),", response);
}

View File

@ -30,8 +30,6 @@ public class SoundcloudStreamLinkHandlerFactory extends LinkHandlerFactory {
@Override
public String getId(String url) throws ParsingException {
Utils.checkUrl(URL_PATTERN, url);
// Remove the tailing slash from URLs due to issues with the SoundCloud API
if (url.charAt(url.length() -1) == '/') url = url.substring(0, url.length()-1);
try {
return SoundcloudParsingHelper.resolveIdWithEmbedPlayer(url);

View File

@ -55,12 +55,6 @@ public class YoutubeParsingHelper {
private YoutubeParsingHelper() {
}
/**
* The official youtube app supports intents in this format, where after the ':' is the videoId.
* Accordingly there are other apps sharing streams in this format.
*/
public final static String BASE_YOUTUBE_INTENT_URL = "vnd.youtube";
private static final String HARDCODED_CLIENT_VERSION = "2.20200214.04.00";
private static String clientVersion;

View File

@ -1,10 +1,7 @@
package org.schabi.newpipe.extractor.services.youtube.linkHandler;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.BASE_YOUTUBE_INTENT_URL;
import org.schabi.newpipe.extractor.exceptions.FoundAdException;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler;
import org.schabi.newpipe.extractor.linkhandler.ListLinkHandlerFactory;
import java.util.List;
@ -17,15 +14,6 @@ public class YoutubeCommentsLinkHandlerFactory extends ListLinkHandlerFactory {
return instance;
}
@Override
public ListLinkHandler fromUrl(String url) throws ParsingException {
if (url.startsWith(BASE_YOUTUBE_INTENT_URL)){
return super.fromUrl(url, BASE_YOUTUBE_INTENT_URL);
} else {
return super.fromUrl(url);
}
}
@Override
public String getUrl(String id) {
return "https://m.youtube.com/watch?v=" + id;

View File

@ -2,7 +2,6 @@ package org.schabi.newpipe.extractor.services.youtube.linkHandler;
import org.schabi.newpipe.extractor.exceptions.FoundAdException;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.linkhandler.LinkHandler;
import org.schabi.newpipe.extractor.linkhandler.LinkHandlerFactory;
import org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper;
import org.schabi.newpipe.extractor.utils.Utils;
@ -15,8 +14,6 @@ import java.net.URL;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.BASE_YOUTUBE_INTENT_URL;
/*
* Created by Christian Schabesberger on 02.02.16.
*
@ -67,15 +64,6 @@ public class YoutubeStreamLinkHandlerFactory extends LinkHandlerFactory {
}
}
@Override
public LinkHandler fromUrl(String url) throws ParsingException {
if (url.startsWith(BASE_YOUTUBE_INTENT_URL)) {
return super.fromUrl(url, BASE_YOUTUBE_INTENT_URL);
} else {
return super.fromUrl(url);
}
}
@Override
public String getUrl(String id) {
return "https://www.youtube.com/watch?v=" + id;

View File

@ -181,14 +181,39 @@ public class Utils {
return s;
}
public static String getBaseUrl(String url) throws ParsingException {
URL uri;
public static String getBaseUrl(final String url) throws ParsingException {
try {
uri = stringToURL(url);
} catch (MalformedURLException e) {
final URL uri = stringToURL(url);
return uri.getProtocol() + "://" + uri.getAuthority();
} catch (final MalformedURLException e) {
final String message = e.getMessage();
if (message.startsWith("unknown protocol: ")) {
// return just the protocol (e.g. vnd.youtube)
return message.substring("unknown protocol: ".length());
}
throw new ParsingException("Malformed url: " + url, e);
}
return uri.getProtocol() + "://" + uri.getAuthority();
}
/**
* If the provided url is a Google search redirect, then the actual url is extracted from the
* {@code url=} query value and returned, otherwise the original url is returned.
* @param url the url which can possibly be a Google search redirect
* @return an url with no Google search redirects
*/
public static String followGoogleRedirectIfNeeded(final String url) {
// if the url is a redirect from a Google search, extract the actual url
try {
final URL decoded = Utils.stringToURL(url);
if (decoded.getHost().contains("google") && decoded.getPath().equals("/url")) {
return URLDecoder.decode(Parser.matchGroup1("&url=([^&]+)(?:&|$)", url), "UTF-8");
}
} catch (final Exception ignored) {
}
// url is not a google search redirect
return url;
}
public static boolean isNullOrEmpty(final String str) {

View File

@ -6,6 +6,7 @@ import java.util.HashSet;
import static org.junit.Assert.*;
import static org.schabi.newpipe.extractor.NewPipe.getServiceByUrl;
import static org.schabi.newpipe.extractor.ServiceList.SoundCloud;
import static org.schabi.newpipe.extractor.ServiceList.YouTube;
public class NewPipeTest {
@ -39,8 +40,10 @@ public class NewPipeTest {
assertEquals(getServiceByUrl("https://www.youtube.com/watch?v=_r6CgaFNAGg"), YouTube);
assertEquals(getServiceByUrl("https://www.youtube.com/channel/UCi2bIyFtz-JdI-ou8kaqsqg"), YouTube);
assertEquals(getServiceByUrl("https://www.youtube.com/playlist?list=PLRqwX-V7Uu6ZiZxtDDRCi6uhfTH4FilpH"), YouTube);
assertEquals(getServiceByUrl("https://www.google.it/url?sa=t&rct=j&q=&esrc=s&cd=&cad=rja&uact=8&url=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DHu80uDzh8RY&source=video"), YouTube);
assertNotEquals(getServiceByUrl("https://soundcloud.com/pegboardnerds"), YouTube);
assertEquals(getServiceByUrl("https://soundcloud.com/pegboardnerds"), SoundCloud);
assertEquals(getServiceByUrl("https://www.google.com/url?sa=t&url=https%3A%2F%2Fsoundcloud.com%2Fciaoproduction&rct=j&q=&esrc=s&source=web&cd="), SoundCloud);
}
@Test

View File

@ -53,6 +53,7 @@ public class SoundcloudStreamLinkHandlerFactoryTest {
assertEquals("309689103", linkHandler.fromUrl("https://soundcloud.com/liluzivert/15-ysl").getId());
assertEquals("309689082", linkHandler.fromUrl("https://www.soundcloud.com/liluzivert/15-luv-scars-ko").getId());
assertEquals("309689035", linkHandler.fromUrl("http://soundcloud.com/liluzivert/15-boring-shit").getId());
assertEquals("259273264", linkHandler.fromUrl("https://soundcloud.com/liluzivert/ps-qs-produced-by-don-cannon/").getId());
assertEquals("294488599", linkHandler.fromUrl("http://www.soundcloud.com/liluzivert/secure-the-bag-produced-by-glohan-beats").getId());
assertEquals("294488438", linkHandler.fromUrl("HtTpS://sOuNdClOuD.cOm/LiLuZiVeRt/In-O4-pRoDuCeD-bY-dP-bEaTz").getId());
assertEquals("294488147", linkHandler.fromUrl("https://soundcloud.com/liluzivert/fresh-produced-by-zaytoven#t=69").getId());
@ -60,6 +61,7 @@ public class SoundcloudStreamLinkHandlerFactoryTest {
assertEquals("294487684", linkHandler.fromUrl("https://soundcloud.com/liluzivert/blonde-brigitte-produced-manny-fresh#t=1:9").getId());
assertEquals("294487428", linkHandler.fromUrl("https://soundcloud.com/liluzivert/today-produced-by-c-note#t=1m9s").getId());
assertEquals("294487157", linkHandler.fromUrl("https://soundcloud.com/liluzivert/changed-my-phone-produced-by-c-note#t=1m09s").getId());
assertEquals("44556776", linkHandler.fromUrl("https://soundcloud.com/kechuspider-sets-1/last-days").getId());
}

View File

@ -21,4 +21,28 @@ public class UtilsTest {
public void testJoin() {
assertEquals("some,random,stuff", Utils.join(",", Arrays.asList("some", "random", "stuff")));
}
@Test
public void testGetBaseUrl() throws ParsingException {
assertEquals("https://www.youtube.com", Utils.getBaseUrl("https://www.youtube.com/watch?v=Hu80uDzh8RY"));
assertEquals("vnd.youtube", Utils.getBaseUrl("vnd.youtube://www.youtube.com/watch?v=jZViOEv90dI"));
assertEquals("vnd.youtube", Utils.getBaseUrl("vnd.youtube:jZViOEv90dI"));
assertEquals("vnd.youtube", Utils.getBaseUrl("vnd.youtube://n8X9_MgEdCg"));
assertEquals("https://music.youtube.com", Utils.getBaseUrl("https://music.youtube.com/watch?v=O0EDx9WAelc"));
}
@Test
public void testFollowGoogleRedirect() {
assertEquals("https://www.youtube.com/watch?v=Hu80uDzh8RY",
Utils.followGoogleRedirectIfNeeded("https://www.google.it/url?sa=t&rct=j&q=&esrc=s&cd=&cad=rja&uact=8&url=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DHu80uDzh8RY&source=video"));
assertEquals("https://www.youtube.com/watch?v=0b6cFWG45kA",
Utils.followGoogleRedirectIfNeeded("https://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=video&cd=&cad=rja&uact=8&url=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3D0b6cFWG45kA"));
assertEquals("https://soundcloud.com/ciaoproduction",
Utils.followGoogleRedirectIfNeeded("https://www.google.com/url?sa=t&url=https%3A%2F%2Fsoundcloud.com%2Fciaoproduction&rct=j&q=&esrc=s&source=web&cd="));
assertEquals("https://www.youtube.com/watch?v=Hu80uDzh8RY&param=xyz",
Utils.followGoogleRedirectIfNeeded("https://www.youtube.com/watch?v=Hu80uDzh8RY&param=xyz"));
assertEquals("https://www.youtube.com/watch?v=Hu80uDzh8RY&url=hello",
Utils.followGoogleRedirectIfNeeded("https://www.youtube.com/watch?v=Hu80uDzh8RY&url=hello"));
}
}