[YouTube] Improve WEB client version and API key HTML extraction

Common code in WEB client version HTML extraction has been deduplicated, usage of the Java 8 Stream API has been made and initial data fallback has been used as a last resort.
This means that the client version extraction from regexes will be used before this fallback, as it doesn't contain the full client version.
This can be used as a way to fingerprint the extractor, even if it seems to be not the case.
This commit is contained in:
AudricV 2022-08-08 19:36:19 +02:00
parent 6a885ef5ab
commit d7e678aca2
No known key found for this signature in database
GPG Key ID: DA92EC7905614198
1 changed files with 61 additions and 39 deletions

View File

@ -73,6 +73,7 @@ import java.util.Objects;
import java.util.Optional; import java.util.Optional;
import java.util.Random; import java.util.Random;
import java.util.regex.Pattern; import java.util.regex.Pattern;
import java.util.stream.Stream;
import javax.annotation.Nonnull; import javax.annotation.Nonnull;
import javax.annotation.Nullable; import javax.annotation.Nullable;
@ -640,59 +641,79 @@ public final class YoutubeParsingHelper {
if (keyAndVersionExtracted) { if (keyAndVersionExtracted) {
return; return;
} }
// Don't provide a search term in order to have a smaller response // Don't provide a search term in order to have a smaller response
final String url = "https://www.youtube.com/results?search_query=&ucbcb=1"; final String url = "https://www.youtube.com/results?search_query=&ucbcb=1";
final String html = getDownloader().get(url, getCookieHeader()).responseBody(); final String html = getDownloader().get(url, getCookieHeader()).responseBody();
final JsonObject initialData = getInitialData(html); final JsonObject initialData = getInitialData(html);
final JsonArray serviceTrackingParams = initialData.getObject("responseContext") final JsonArray serviceTrackingParams = initialData.getObject("responseContext")
.getArray("serviceTrackingParams"); .getArray("serviceTrackingParams");
String shortClientVersion = null;
// Try to get version from initial data first // Try to get version from initial data first
for (final Object service : serviceTrackingParams) { final Stream<JsonObject> serviceTrackingParamsStream = serviceTrackingParams.stream()
final JsonObject s = (JsonObject) service; .filter(JsonObject.class::isInstance)
if (s.getString("service").equals("CSI")) { .map(JsonObject.class::cast);
final JsonArray params = s.getArray("params");
for (final Object param : params) { clientVersion = getClientVersionFromServiceTrackingParam(
final JsonObject p = (JsonObject) param; serviceTrackingParamsStream, "CSI", "cver");
final String paramKey = p.getString("key");
if (paramKey != null && paramKey.equals("cver")) { if (clientVersion == null) {
clientVersion = p.getString("value"); try {
} clientVersion = getStringResultFromRegexArray(html,
} INNERTUBE_CONTEXT_CLIENT_VERSION_REGEXES, 1);
} else if (s.getString("service").equals("ECATCHER")) { } catch (final Parser.RegexException ignored) {
// Fallback to get a shortened client version which does not contain the last two
// digits
final JsonArray params = s.getArray("params");
for (final Object param : params) {
final JsonObject p = (JsonObject) param;
final String paramKey = p.getString("key");
if (paramKey != null && paramKey.equals("client.version")) {
shortClientVersion = p.getString("value");
}
}
} }
} }
try { // Fallback to get a shortened client version which does not contain the last two
clientVersion = getStringResultFromRegexArray(html, // digits
INNERTUBE_CONTEXT_CLIENT_VERSION_REGEXES, 1); if (isNullOrEmpty(clientVersion)) {
} catch (final Parser.RegexException ignored) { clientVersion = getClientVersionFromServiceTrackingParam(
} serviceTrackingParamsStream, "ECATCHER", "client.version");
if (!isNullOrEmpty(clientVersion) && !isNullOrEmpty(shortClientVersion)) {
clientVersion = shortClientVersion;
} }
try { try {
key = getStringResultFromRegexArray(html, INNERTUBE_API_KEY_REGEXES, 1); key = getStringResultFromRegexArray(html, INNERTUBE_API_KEY_REGEXES, 1);
} catch (final Parser.RegexException e) { } catch (final Parser.RegexException ignored) {
throw new ParsingException("Could not extract YouTube WEB InnerTube client version "
+ "and API key from HTML search results page", e);
} }
if (isNullOrEmpty(key)) {
throw new ParsingException(
// CHECKSTYLE:OFF
"Could not extract YouTube WEB InnerTube API key from HTML search results page");
// CHECKSTYLE:ON
}
if (clientVersion == null) {
throw new ParsingException(
// CHECKSTYLE:OFF
"Could not extract YouTube WEB InnerTube client version from HTML search results page");
// CHECKSTYLE:ON
}
keyAndVersionExtracted = true; keyAndVersionExtracted = true;
} }
@Nullable
private static String getClientVersionFromServiceTrackingParam(
@Nonnull final Stream<JsonObject> serviceTrackingParamsStream,
@Nonnull final String serviceName,
@Nonnull final String clientVersionKey) {
return serviceTrackingParamsStream.filter(serviceTrackingParam ->
serviceTrackingParam.getString("service", "")
.equals(serviceName))
.flatMap(serviceTrackingParam -> serviceTrackingParam.getArray("params")
.stream())
.filter(JsonObject.class::isInstance)
.map(JsonObject.class::cast)
.filter(param -> param.getString("key", "")
.equals(clientVersionKey))
.map(param -> param.getString("value"))
.filter(paramValue -> !isNullOrEmpty(paramValue))
.findFirst()
.orElse(null);
}
/** /**
* Get the client version used by YouTube website on InnerTube requests. * Get the client version used by YouTube website on InnerTube requests.
*/ */
@ -701,8 +722,8 @@ public final class YoutubeParsingHelper {
return clientVersion; return clientVersion;
} }
// Always extract latest client version, by trying first to extract it from the JavaScript // Always extract the latest client version, by trying first to extract it from the
// service worker, then from HTML search results page as a fallback, to prevent // JavaScript service worker, then from HTML search results page as a fallback, to prevent
// fingerprinting based on the client version used // fingerprinting based on the client version used
try { try {
extractClientVersionAndKeyFromSwJs(); extractClientVersionAndKeyFromSwJs();
@ -714,7 +735,7 @@ public final class YoutubeParsingHelper {
return clientVersion; return clientVersion;
} }
// Fallback to the hardcoded one if it's valid // Fallback to the hardcoded one if it is valid
if (areHardcodedClientVersionAndKeyValid()) { if (areHardcodedClientVersionAndKeyValid()) {
clientVersion = HARDCODED_CLIENT_VERSION; clientVersion = HARDCODED_CLIENT_VERSION;
return clientVersion; return clientVersion;
@ -731,7 +752,7 @@ public final class YoutubeParsingHelper {
return key; return key;
} }
// Always extract the key used by the webiste, by trying first to extract it from the // Always extract the key used by the website, by trying first to extract it from the
// JavaScript service worker, then from HTML search results page as a fallback, to prevent // JavaScript service worker, then from HTML search results page as a fallback, to prevent
// fingerprinting based on the key and/or invalid key issues // fingerprinting based on the key and/or invalid key issues
try { try {
@ -751,7 +772,8 @@ public final class YoutubeParsingHelper {
} }
// The ANDROID API key is also valid with the WEB client so return it if we couldn't // The ANDROID API key is also valid with the WEB client so return it if we couldn't
// extract the WEB API key. // extract the WEB API key. This can be used as a way to fingerprint the extractor in this
// case
return ANDROID_YOUTUBE_KEY; return ANDROID_YOUTUBE_KEY;
} }