From 592f1596e6341e1058378468152ba4221a0ddb69 Mon Sep 17 00:00:00 2001 From: XiangRongLin <41164160+XiangRongLin@users.noreply.github.com> Date: Thu, 11 Jul 2024 11:20:33 +0200 Subject: [PATCH] [Youtube] Adjust throttling function extraction to changes (#1191) * [Youtube] Adjust throttling function extraction to changes --------- Co-authored-by: Stypox --- .../youtube/YoutubeSignatureUtils.java | 29 +++++------- .../YoutubeThrottlingParameterUtils.java | 45 +++++++++++++++---- .../newpipe/extractor/utils/Parser.java | 31 +++++++++++++ 3 files changed, 79 insertions(+), 26 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeSignatureUtils.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeSignatureUtils.java index 8e0567927..13365f5d6 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeSignatureUtils.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeSignatureUtils.java @@ -1,5 +1,7 @@ package org.schabi.newpipe.extractor.services.youtube; +import static org.schabi.newpipe.extractor.utils.Parser.matchGroup1MultiplePatterns; + import org.schabi.newpipe.extractor.exceptions.ParsingException; import org.schabi.newpipe.extractor.utils.JavaScript; import org.schabi.newpipe.extractor.utils.Parser; @@ -20,13 +22,13 @@ final class YoutubeSignatureUtils { */ static final String DEOBFUSCATION_FUNCTION_NAME = "deobfuscate"; - private static final String[] FUNCTION_REGEXES = { - "\\bm=([a-zA-Z0-9$]{2,})\\(decodeURIComponent\\(h\\.s\\)\\)", - "\\bc&&\\(c=([a-zA-Z0-9$]{2,})\\(decodeURIComponent\\(c\\)\\)", + private static final Pattern[] FUNCTION_REGEXES = { // CHECKSTYLE:OFF - "(?:\\b|[^a-zA-Z0-9$])([a-zA-Z0-9$]{2,})\\s*=\\s*function\\(\\s*a\\s*\\)\\s*\\{\\s*a\\s*=\\s*a\\.split\\(\\s*\"\"\\s*\\)", + Pattern.compile("\\bm=([a-zA-Z0-9$]{2,})\\(decodeURIComponent\\(h\\.s\\)\\)"), + Pattern.compile("\\bc&&\\(c=([a-zA-Z0-9$]{2,})\\(decodeURIComponent\\(c\\)\\)"), + Pattern.compile("(?:\\b|[^a-zA-Z0-9$])([a-zA-Z0-9$]{2,})\\s*=\\s*function\\(\\s*a\\s*\\)\\s*\\{\\s*a\\s*=\\s*a\\.split\\(\\s*\"\"\\s*\\)"), + Pattern.compile("([\\w$]+)\\s*=\\s*function\\((\\w+)\\)\\{\\s*\\2=\\s*\\2\\.split\\(\"\"\\)\\s*;") // CHECKSTYLE:ON - "([\\w$]+)\\s*=\\s*function\\((\\w+)\\)\\{\\s*\\2=\\s*\\2\\.split\\(\"\"\\)\\s*;" }; private static final String STS_REGEX = "signatureTimestamp[=:](\\d+)"; @@ -104,19 +106,12 @@ final class YoutubeSignatureUtils { @Nonnull private static String getDeobfuscationFunctionName(@Nonnull final String javaScriptPlayerCode) throws ParsingException { - Parser.RegexException exception = null; - for (final String regex : FUNCTION_REGEXES) { - try { - return Parser.matchGroup1(regex, javaScriptPlayerCode); - } catch (final Parser.RegexException e) { - if (exception == null) { - exception = e; - } - } + try { + return matchGroup1MultiplePatterns(FUNCTION_REGEXES, javaScriptPlayerCode); + } catch (final Parser.RegexException e) { + throw new ParsingException( + "Could not find deobfuscation function with any of the known patterns", e); } - - throw new ParsingException( - "Could not find deobfuscation function with any of the known patterns", exception); } @Nonnull diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeThrottlingParameterUtils.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeThrottlingParameterUtils.java index c398c6202..5dae1c9bf 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeThrottlingParameterUtils.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeThrottlingParameterUtils.java @@ -1,5 +1,7 @@ package org.schabi.newpipe.extractor.services.youtube; +import static org.schabi.newpipe.extractor.utils.Parser.matchMultiplePatterns; + import org.schabi.newpipe.extractor.exceptions.ParsingException; import org.schabi.newpipe.extractor.utils.JavaScript; import org.schabi.newpipe.extractor.utils.Parser; @@ -18,10 +20,33 @@ final class YoutubeThrottlingParameterUtils { private static final Pattern THROTTLING_PARAM_PATTERN = Pattern.compile("[&?]n=([^&]+)"); - private static final Pattern DEOBFUSCATION_FUNCTION_NAME_PATTERN = Pattern.compile( - // CHECKSTYLE:OFF - "\\.get\\(\"n\"\\)\\)&&\\([a-zA-Z0-9$_]=([a-zA-Z0-9$_]+)(?:\\[(\\d+)])?\\([a-zA-Z0-9$_]\\)"); - // CHECKSTYLE:ON + private static final String SINGLE_CHAR_VARIABLE_REGEX = "[a-zA-Z0-9$_]"; + + private static final String FUNCTION_NAME_REGEX = SINGLE_CHAR_VARIABLE_REGEX + "+"; + + private static final String ARRAY_ACCESS_REGEX = "\\[(\\d+)]"; + + /** + * The first regex matches this, where we want BDa: + *

+ * (b=String.fromCharCode(110),c=a.get(b))&&(c=BDa[0](c) + *

+ * Array access is optional, but needs to be handled, since the actual function is inside the + * array. + */ + // CHECKSTYLE:OFF + private static final Pattern[] DEOBFUSCATION_FUNCTION_NAME_REGEXES = { + Pattern.compile("\\(" + SINGLE_CHAR_VARIABLE_REGEX + "=String\\.fromCharCode\\(110\\)," + + SINGLE_CHAR_VARIABLE_REGEX + "=" + SINGLE_CHAR_VARIABLE_REGEX + "\\.get\\(" + + SINGLE_CHAR_VARIABLE_REGEX + "\\)\\)" + "&&\\(" + SINGLE_CHAR_VARIABLE_REGEX + + "=(" + FUNCTION_NAME_REGEX + ")" + "(?:" + ARRAY_ACCESS_REGEX + ")?\\(" + + SINGLE_CHAR_VARIABLE_REGEX + "\\)"), + Pattern.compile("\\.get\\(\"n\"\\)\\)&&\\(" + SINGLE_CHAR_VARIABLE_REGEX + + "=(" + FUNCTION_NAME_REGEX + ")(?:" + ARRAY_ACCESS_REGEX + ")?\\(" + + SINGLE_CHAR_VARIABLE_REGEX + "\\)"), + }; + // CHECKSTYLE:ON + // Escape the curly end brace to allow compatibility with Android's regex engine // See https://stackoverflow.com/q/45074813 @@ -48,11 +73,13 @@ final class YoutubeThrottlingParameterUtils { @Nonnull static String getDeobfuscationFunctionName(@Nonnull final String javaScriptPlayerCode) throws ParsingException { - final Matcher matcher = DEOBFUSCATION_FUNCTION_NAME_PATTERN.matcher(javaScriptPlayerCode); - if (!matcher.find()) { - throw new ParsingException("Failed to find deobfuscation function name pattern \"" - + DEOBFUSCATION_FUNCTION_NAME_PATTERN - + "\" in the base JavaScript player code"); + final Matcher matcher; + try { + matcher = matchMultiplePatterns(DEOBFUSCATION_FUNCTION_NAME_REGEXES, + javaScriptPlayerCode); + } catch (final Parser.RegexException e) { + throw new ParsingException("Could not find deobfuscation function with any of the " + + "known patterns in the base JavaScript player code", e); } final String functionName = matcher.group(1); diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/utils/Parser.java b/extractor/src/main/java/org/schabi/newpipe/extractor/utils/Parser.java index 6efc74f2d..cde9b3b62 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/utils/Parser.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/utils/Parser.java @@ -78,6 +78,37 @@ public final class Parser { } } + public static String matchGroup1MultiplePatterns(final Pattern[] patterns, final String input) + throws RegexException { + return matchMultiplePatterns(patterns, input).group(1); + } + + public static Matcher matchMultiplePatterns(final Pattern[] patterns, final String input) + throws RegexException { + Parser.RegexException exception = null; + for (final Pattern pattern : patterns) { + final Matcher matcher = pattern.matcher(input); + if (matcher.find()) { + return matcher; + } else if (exception == null) { + // only pass input to exception message when it is not too long + if (input.length() > 1024) { + exception = new RegexException("Failed to find pattern \"" + pattern.pattern() + + "\""); + } else { + exception = new RegexException("Failed to find pattern \"" + pattern.pattern() + + "\" inside of \"" + input + "\""); + } + } + } + + if (exception == null) { + throw new RegexException("Empty patterns array passed to matchMultiplePatterns"); + } else { + throw exception; + } + } + public static boolean isMatch(final String pattern, final String input) { final Pattern pat = Pattern.compile(pattern); final Matcher mat = pat.matcher(input);