From 80cf8b3acdc66efaa76f8bd67d1d6bf7a20b98f7 Mon Sep 17 00:00:00 2001 From: XiangRongLin <41164160+XiangRongLin@users.noreply.github.com> Date: Mon, 12 Jul 2021 20:06:19 +0200 Subject: [PATCH] Extract separate YoutubeThrottlingDecoder --- .../youtube/YoutubeThrottlingDecoder.java | 101 ++++++++++++++++++ .../extractors/YoutubeStreamExtractor.java | 77 +++++-------- .../newpipe/extractor/utils/Javascript.java | 24 +++++ 3 files changed, 151 insertions(+), 51 deletions(-) create mode 100644 extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeThrottlingDecoder.java create mode 100644 extractor/src/main/java/org/schabi/newpipe/extractor/utils/Javascript.java diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeThrottlingDecoder.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeThrottlingDecoder.java new file mode 100644 index 000000000..975df002a --- /dev/null +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeThrottlingDecoder.java @@ -0,0 +1,101 @@ +package org.schabi.newpipe.extractor.services.youtube; + +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; +import org.schabi.newpipe.extractor.NewPipe; +import org.schabi.newpipe.extractor.exceptions.ParsingException; +import org.schabi.newpipe.extractor.localization.Localization; +import org.schabi.newpipe.extractor.utils.Javascript; +import org.schabi.newpipe.extractor.utils.Parser; + +import java.util.regex.Pattern; + +public class YoutubeThrottlingDecoder { + + private static final String HTTPS = "https:"; + + private final String functionName; + private final String function; + + public YoutubeThrottlingDecoder(String videoId, Localization localization) throws ParsingException { + String playerJsUrl = cleanPlayerJsUrl(extractPlayerJsUrl(videoId, localization)); + String playerJsCode = downloadPlayerJsCode(localization, playerJsUrl); + + functionName = parseDecodeFunctionName(playerJsCode); + function = parseDecodeFunction(playerJsCode, functionName); + } + + private String extractPlayerJsUrl(String videoId, Localization localization) throws ParsingException { + try { + final String embedUrl = "https://www.youtube.com/embed/" + videoId; + final String embedPageContent = NewPipe.getDownloader() + .get(embedUrl, localization).responseBody(); + + try { + final String assetsPattern = "\"assets\":.+?\"js\":\\s*(\"[^\"]+\")"; + return Parser.matchGroup1(assetsPattern, embedPageContent) + .replace("\\", "").replace("\"", ""); + } catch (final Parser.RegexException ex) { + // playerJsUrl is still available in the file, just somewhere else TODO + // it is ok not to find it, see how that's handled in getDeobfuscationCode() + final Document doc = Jsoup.parse(embedPageContent); + final Elements elems = doc.select("script").attr("name", "player_ias/base"); + for (final Element elem : elems) { + if (elem.attr("src").contains("base.js")) { + return elem.attr("src"); + } + } + } + + } catch (final Exception i) { + throw new ParsingException("Embedded info did not provide YouTube player js url"); + } + throw new ParsingException("Embedded info did not provide YouTube player js url"); + } + + private String cleanPlayerJsUrl(String playerJsUrl) { + if (playerJsUrl.startsWith("//")) { + return HTTPS + playerJsUrl; + } else if (playerJsUrl.startsWith("/")) { + // sometimes https://www.youtube.com part has to be added manually + return HTTPS + "//www.youtube.com" + playerJsUrl; + } else { + return playerJsUrl; + } + } + + private String downloadPlayerJsCode(Localization localization, String playerJsUrl) throws ParsingException { + try { + return NewPipe.getDownloader().get(playerJsUrl, localization).responseBody(); + } catch (Exception e) { + throw new ParsingException("Could not get player js code from url: " + playerJsUrl); + } + } + + private String parseDecodeFunctionName(String playerJsCode) throws Parser.RegexException { + Pattern pattern = Pattern.compile("b=a\\.get\\(\"n\"\\)\\)&&\\(b=(\\w+)\\(b\\),a\\.set\\(\"n\",b\\)"); + return Parser.matchGroup1(pattern, playerJsCode); + } + + private String parseDecodeFunction(String playerJsCode, String functionName) throws Parser.RegexException { + Pattern functionPattern = Pattern.compile(functionName + "=function(.*?;)\n", Pattern.DOTALL); + return "function " + functionName + Parser.matchGroup1(functionPattern, playerJsCode); + } + + public String parseNParam(String url) throws Parser.RegexException { + Pattern nValuePattern = Pattern.compile("[&?]n=([^&]+)"); + return Parser.matchGroup1(nValuePattern, url); + } + + public String decodeNParam(String nParam) { + Javascript javascript = new Javascript(); + return javascript.run(function, functionName, nParam); + } + + public String replaceNParam(String url, String newValue) { + Pattern nValuePattern = Pattern.compile("[&?]n=([^&]+)"); + return nValuePattern.matcher(url).replaceFirst(newValue); + } +} diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java index 03be11d35..55ba96a41 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java @@ -25,6 +25,7 @@ import org.schabi.newpipe.extractor.localization.TimeAgoParser; import org.schabi.newpipe.extractor.localization.TimeAgoPatternsManager; import org.schabi.newpipe.extractor.services.youtube.ItagItem; import org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper; +import org.schabi.newpipe.extractor.services.youtube.YoutubeThrottlingDecoder; import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeChannelLinkHandlerFactory; import org.schabi.newpipe.extractor.stream.*; import org.schabi.newpipe.extractor.utils.JsonUtils; @@ -39,7 +40,6 @@ import java.time.LocalDate; import java.time.OffsetDateTime; import java.time.format.DateTimeFormatter; import java.util.*; -import java.util.regex.Pattern; import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.*; import static org.schabi.newpipe.extractor.utils.Utils.EMPTY_STRING; @@ -80,13 +80,10 @@ public class YoutubeStreamExtractor extends StreamExtractor { @Nullable private static String cachedDeobfuscationCode = null; - @Nullable - private String playerJsUrl = null; - - private JsonArray initialAjaxJson; - private JsonObject initialData; @Nonnull private final Map videoInfoPage = new HashMap<>(); + private JsonArray initialAjaxJson; + private JsonObject initialData; private JsonObject playerResponse; private JsonObject videoPrimaryInfoRenderer; private JsonObject videoSecondaryInfoRenderer; @@ -526,32 +523,18 @@ public class YoutubeStreamExtractor extends StreamExtractor { public List getVideoStreams() throws ExtractionException { assertPageFetched(); final List videoStreams = new ArrayList<>(); + YoutubeThrottlingDecoder throttlingDecoder = new YoutubeThrottlingDecoder(getId(), getExtractorLocalization()); try { - getDeobfuscationCode(); - final String playerCode = NewPipe.getDownloader() - .get(playerJsUrl, getExtractorLocalization()).responseBody(); - Pattern pattern = Pattern.compile("b=a\\.get\\(\"n\"\\)\\)&&\\(b=(\\w+)\\(b\\),a\\.set\\(\"n\",b\\)"); - String functionName = Parser.matchGroup1(pattern, playerCode); - Pattern functionPattern = Pattern.compile(functionName + "=function(.*?;)\n", Pattern.DOTALL); - String function = "function " + functionName + Parser.matchGroup1(functionPattern, playerCode); - - Context context = Context.enter(); - context.setOptimizationLevel(-1); - ScriptableObject scope = context.initSafeStandardObjects(); - for (final Map.Entry entry : getItags(FORMATS, ItagItem.ItagType.VIDEO).entrySet()) { final ItagItem itag = entry.getValue(); final String url = entry.getKey(); - Pattern nValuePattern = Pattern.compile("[&?]n=([^&]+)"); - String nValue = Parser.matchGroup1(nValuePattern, url); - context.evaluateString(scope, function, functionName, 1, null); - final Function jsFunction = (Function) scope.get(functionName, scope); - Object result = jsFunction.call(context, scope, scope, new Object[]{nValue}); - String newNValue = Objects.toString(result, nValue); - String newUrl = nValuePattern.matcher(url).replaceFirst(newNValue); - System.out.println("aaaaaa " + nValue + " - " + newNValue); + String oldNParam = throttlingDecoder.parseNParam(url); + String newNParam = throttlingDecoder.decodeNParam(oldNParam); + String newUrl = throttlingDecoder.replaceNParam(url, newNParam); + + System.out.println("aaaaaa " + oldNParam + " - " + newNParam); final VideoStream videoStream = new VideoStream(newUrl, false, itag); if (!Stream.containSimilarStream(videoStream, videoStreams)) { videoStreams.add(videoStream); @@ -820,8 +803,7 @@ public class YoutubeStreamExtractor extends StreamExtractor { } } - @Nonnull - private String getEmbeddedInfoStsAndStorePlayerJsUrl() { + private String extractPlayerJsUrl() throws ParsingException { try { final String embedUrl = "https://www.youtube.com/embed/" + getId(); final String embedPageContent = NewPipe.getDownloader() @@ -829,7 +811,7 @@ public class YoutubeStreamExtractor extends StreamExtractor { try { final String assetsPattern = "\"assets\":.+?\"js\":\\s*(\"[^\"]+\")"; - playerJsUrl = Parser.matchGroup1(assetsPattern, embedPageContent) + return Parser.matchGroup1(assetsPattern, embedPageContent) .replace("\\", "").replace("\"", ""); } catch (final Parser.RegexException ex) { // playerJsUrl is still available in the file, just somewhere else TODO @@ -838,17 +820,25 @@ public class YoutubeStreamExtractor extends StreamExtractor { final Elements elems = doc.select("script").attr("name", "player_ias/base"); for (final Element elem : elems) { if (elem.attr("src").contains("base.js")) { - playerJsUrl = elem.attr("src"); - break; + return elem.attr("src"); } } } - // Get embed sts - return Parser.matchGroup1("\"sts\"\\s*:\\s*(\\d+)", embedPageContent); } catch (final Exception i) { - // if it fails we simply reply with no sts as then it does not seem to be necessary - return ""; + throw new ParsingException("Embedded info did not provide YouTube player js url"); + } + throw new ParsingException("Embedded info did not provide YouTube player js url"); + } + + private String cleanPlayerJsUrl(String playerJsUrl) { + if (playerJsUrl.startsWith("//")) { + return HTTPS + playerJsUrl; + } else if (playerJsUrl.startsWith("/")) { + // sometimes https://www.youtube.com part has to be added manually + return HTTPS + "//www.youtube.com" + playerJsUrl; + } else { + return playerJsUrl; } } @@ -899,22 +889,7 @@ public class YoutubeStreamExtractor extends StreamExtractor { @Nonnull private String getDeobfuscationCode() throws ParsingException { if (cachedDeobfuscationCode == null) { - if (playerJsUrl == null) { - // the currentPlayerJsUrl was not found in any page fetched so far and there is - // nothing cached, so try fetching embedded info - getEmbeddedInfoStsAndStorePlayerJsUrl(); - if (playerJsUrl == null) { - throw new ParsingException( - "Embedded info did not provide YouTube player js url"); - } - } - - if (playerJsUrl.startsWith("//")) { - playerJsUrl = HTTPS + playerJsUrl; - } else if (playerJsUrl.startsWith("/")) { - // sometimes https://www.youtube.com part has to be added manually - playerJsUrl = HTTPS + "//www.youtube.com" + playerJsUrl; - } + String playerJsUrl = cleanPlayerJsUrl(extractPlayerJsUrl()); cachedDeobfuscationCode = loadDeobfuscationCode(playerJsUrl); } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/utils/Javascript.java b/extractor/src/main/java/org/schabi/newpipe/extractor/utils/Javascript.java new file mode 100644 index 000000000..c7c81bcb5 --- /dev/null +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/utils/Javascript.java @@ -0,0 +1,24 @@ +package org.schabi.newpipe.extractor.utils; + +import org.mozilla.javascript.Context; +import org.mozilla.javascript.Function; +import org.mozilla.javascript.ScriptableObject; + +public class Javascript { + + public String run(String function, String functionName, String... parameters) { + try { + Context context = Context.enter(); + context.setOptimizationLevel(-1); + ScriptableObject scope = context.initSafeStandardObjects(); + + context.evaluateString(scope, function, functionName, 1, null); + Function jsFunction = (Function) scope.get(functionName, scope); + Object result = jsFunction.call(context, scope, scope, parameters); + return result.toString(); + } finally { + Context.exit(); + } + } + +}