From 4fc18a6994ffe415f47b20f5e9813b8a2a0a5b6d Mon Sep 17 00:00:00 2001 From: Mauricio Colli Date: Tue, 22 Oct 2019 13:21:24 -0300 Subject: [PATCH] [SoundCloud] Fix extraction of client id - Hardcoded id and check at the first usage. - As a fallback, and considering that the scripts containing the client id were all split up, try searching it in each of them. --- .../soundcloud/SoundcloudParsingHelper.java | 56 ++++++++++++------- .../SoundcloudSearchQueryHandlerFactory.java | 5 +- 2 files changed, 40 insertions(+), 21 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudParsingHelper.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudParsingHelper.java index bb4638f63..f0d39bf0e 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudParsingHelper.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudParsingHelper.java @@ -7,9 +7,12 @@ import com.grack.nanojson.JsonParserException; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; +import org.schabi.newpipe.extractor.DownloadResponse; import org.schabi.newpipe.extractor.Downloader; import org.schabi.newpipe.extractor.NewPipe; import org.schabi.newpipe.extractor.channel.ChannelInfoItemsCollector; +import org.schabi.newpipe.extractor.exceptions.ExtractionException; import org.schabi.newpipe.extractor.exceptions.ParsingException; import org.schabi.newpipe.extractor.exceptions.ReCaptchaException; import org.schabi.newpipe.extractor.stream.StreamInfoItemsCollector; @@ -21,6 +24,7 @@ import java.io.IOException; import java.net.URLEncoder; import java.text.ParseException; import java.text.SimpleDateFormat; +import java.util.Collections; import java.util.Date; import java.util.HashMap; @@ -32,29 +36,43 @@ public class SoundcloudParsingHelper { private SoundcloudParsingHelper() { } - public static String clientId() throws ReCaptchaException, IOException, RegexException { + public static String clientId() throws ExtractionException, IOException { if (clientId != null && !clientId.isEmpty()) return clientId; Downloader dl = NewPipe.getDownloader(); - String response = dl.download("https://soundcloud.com"); - - Document doc = Jsoup.parse(response); - Element jsElement = doc.select("script[src^=https://a-v2.sndcdn.com/assets/app]").first(); - - final String clientIdPattern = ",client_id:\"(.*?)\""; - - try { - final HashMap headers = new HashMap<>(); - headers.put("Range", "bytes=0-16384"); - String js = dl.download(jsElement.attr("src"), headers); - - return clientId = Parser.matchGroup1(clientIdPattern, js); - } catch (IOException | RegexException ignored) { - // Ignore it and proceed to download the whole js file + clientId = "LHzSAKe8eP9Yy3FgBugfBapRPLncO6Ng"; // Updated on 22/10/19 + final String apiUrl = "https://api.soundcloud.com/connect?client_id=" + clientId; + // Should return 200 to indicate that the client id is valid, a 401 is returned otherwise. + // In that case, the fallback method is used. + if (dl.head(apiUrl).getResponseCode() == 200) { + return clientId; } - String js = dl.download(jsElement.attr("src")); - return clientId = Parser.matchGroup1(clientIdPattern, js); + final DownloadResponse download = dl.get("https://soundcloud.com"); + String response = download.getResponseBody(); + final String clientIdPattern = ",client_id:\"(.*?)\""; + + Document doc = Jsoup.parse(response); + final Elements possibleScripts = doc.select("script[src*=\"sndcdn.com/assets/\"][src$=\".js\"]"); + // The one containing the client id will likely be the last one + Collections.reverse(possibleScripts); + + final HashMap headers = new HashMap<>(); + headers.put("Range", "bytes=0-16384"); + + for (Element element : possibleScripts) { + final String srcUrl = element.attr("src"); + if (srcUrl != null && !srcUrl.isEmpty()) { + try { + return clientId = Parser.matchGroup1(clientIdPattern, dl.download(srcUrl, headers)); + } catch (RegexException ignored) { + // Ignore it and proceed to try searching other script + } + } + } + + // Officially give up + throw new ExtractionException("Couldn't extract client id"); } public static String toDateString(String time) throws ParsingException { @@ -79,7 +97,7 @@ public class SoundcloudParsingHelper { * * See https://developers.soundcloud.com/docs/api/reference#resolve */ - public static JsonObject resolveFor(Downloader downloader, String url) throws IOException, ReCaptchaException, ParsingException { + public static JsonObject resolveFor(Downloader downloader, String url) throws IOException, ExtractionException { String apiUrl = "https://api.soundcloud.com/resolve" + "?url=" + URLEncoder.encode(url, "UTF-8") + "&client_id=" + clientId(); diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudSearchQueryHandlerFactory.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudSearchQueryHandlerFactory.java index 8dbe287eb..900571202 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudSearchQueryHandlerFactory.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudSearchQueryHandlerFactory.java @@ -1,5 +1,6 @@ package org.schabi.newpipe.extractor.services.soundcloud; +import org.schabi.newpipe.extractor.exceptions.ExtractionException; import org.schabi.newpipe.extractor.exceptions.ParsingException; import org.schabi.newpipe.extractor.exceptions.ReCaptchaException; import org.schabi.newpipe.extractor.linkhandler.SearchQueryHandlerFactory; @@ -48,10 +49,10 @@ public class SoundcloudSearchQueryHandlerFactory extends SearchQueryHandlerFacto } catch (UnsupportedEncodingException e) { throw new ParsingException("Could not encode query", e); - } catch (IOException e) { - throw new ParsingException("Could not get client id", e); } catch (ReCaptchaException e) { throw new ParsingException("ReCaptcha required", e); + } catch (IOException | ExtractionException e) { + throw new ParsingException("Could not get client id", e); } }