From 22f818109ff8bd85cf04e2d27500fe8a215037f6 Mon Sep 17 00:00:00 2001 From: AudricV <74829229+AudricV@users.noreply.github.com> Date: Tue, 23 Jul 2024 19:01:58 +0200 Subject: [PATCH] [YouTube] Fix JavaScript n parameter decoding function name extraction This commits fixes extraction of the function name decoding the n parameter for HTML5 clients' streaming URLs for YouTube base JavaScript player 3400486c. Two new regexes have been added to the existing ones. All regexes and what they extract has been documented. --- .../YoutubeThrottlingParameterUtils.java | 48 +++++++++++++++---- 1 file changed, 39 insertions(+), 9 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeThrottlingParameterUtils.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeThrottlingParameterUtils.java index 5dae1c9bf..741f7a16b 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeThrottlingParameterUtils.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeThrottlingParameterUtils.java @@ -26,24 +26,54 @@ final class YoutubeThrottlingParameterUtils { private static final String ARRAY_ACCESS_REGEX = "\\[(\\d+)]"; - /** - * The first regex matches this, where we want BDa: - *

- * (b=String.fromCharCode(110),c=a.get(b))&&(c=BDa[0](c) - *

- * Array access is optional, but needs to be handled, since the actual function is inside the - * array. - */ // CHECKSTYLE:OFF private static final Pattern[] DEOBFUSCATION_FUNCTION_NAME_REGEXES = { + + /* + * The first regex matches the following text, where we want rDa and the array index + * accessed: + * + * a.D&&(b="nn"[+a.D],c=a.get(b))&&(c=rDa[0](c),a.set(b,c),rDa.length||rma("") + */ + Pattern.compile(SINGLE_CHAR_VARIABLE_REGEX + "+=\"nn\"\\[\\+" + + SINGLE_CHAR_VARIABLE_REGEX + "+\\." + SINGLE_CHAR_VARIABLE_REGEX + "+]," + + SINGLE_CHAR_VARIABLE_REGEX + "+=" + SINGLE_CHAR_VARIABLE_REGEX + + "+\\.get\\(" + SINGLE_CHAR_VARIABLE_REGEX + "+\\)\\)&&\\(" + + SINGLE_CHAR_VARIABLE_REGEX + "+=(" + SINGLE_CHAR_VARIABLE_REGEX + + "+)\\[(\\d+)]"), + + /* + * The second regex matches the following text, where we want rma: + * + * a.D&&(b="nn"[+a.D],c=a.get(b))&&(c=rDa[0](c),a.set(b,c),rDa.length||rma("") + */ + Pattern.compile(SINGLE_CHAR_VARIABLE_REGEX + "+=\"nn\"\\[\\+" + + SINGLE_CHAR_VARIABLE_REGEX + "+\\." + SINGLE_CHAR_VARIABLE_REGEX + "+]," + + SINGLE_CHAR_VARIABLE_REGEX + "+=" + SINGLE_CHAR_VARIABLE_REGEX + "+\\.get\\(" + + SINGLE_CHAR_VARIABLE_REGEX + "+\\)\\).+\\|\\|(" + SINGLE_CHAR_VARIABLE_REGEX + + "+)\\(\"\"\\)"), + + /* + * The third regex matches the following text, where we want BDa and the array index + * accessed: + * + * (b=String.fromCharCode(110),c=a.get(b))&&(c=BDa[0](c) + */ Pattern.compile("\\(" + SINGLE_CHAR_VARIABLE_REGEX + "=String\\.fromCharCode\\(110\\)," + SINGLE_CHAR_VARIABLE_REGEX + "=" + SINGLE_CHAR_VARIABLE_REGEX + "\\.get\\(" + SINGLE_CHAR_VARIABLE_REGEX + "\\)\\)" + "&&\\(" + SINGLE_CHAR_VARIABLE_REGEX + "=(" + FUNCTION_NAME_REGEX + ")" + "(?:" + ARRAY_ACCESS_REGEX + ")?\\(" + SINGLE_CHAR_VARIABLE_REGEX + "\\)"), + + /* + * The fourth regex matches the following text, where we want Yva and the array index + * accessed: + * + * .get("n"))&&(b=Yva[0](b) + */ Pattern.compile("\\.get\\(\"n\"\\)\\)&&\\(" + SINGLE_CHAR_VARIABLE_REGEX + "=(" + FUNCTION_NAME_REGEX + ")(?:" + ARRAY_ACCESS_REGEX + ")?\\(" - + SINGLE_CHAR_VARIABLE_REGEX + "\\)"), + + SINGLE_CHAR_VARIABLE_REGEX + "\\)") }; // CHECKSTYLE:ON