From b80c3f5d5158685ce59c883d41d425426a632563 Mon Sep 17 00:00:00 2001 From: Stypox Date: Mon, 8 Apr 2024 00:14:28 +0200 Subject: [PATCH] [YouTube] Replace link text with accessibility label --- .../youtube/YoutubeDescriptionHelper.java | 81 ++++++++++++++++--- 1 file changed, 71 insertions(+), 10 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeDescriptionHelper.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeDescriptionHelper.java index 0eeecdac7..49b94f6e9 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeDescriptionHelper.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeDescriptionHelper.java @@ -12,6 +12,9 @@ import java.util.Collections; import java.util.Comparator; import java.util.List; import java.util.Stack; +import java.util.function.Function; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import javax.annotation.Nonnull; import javax.annotation.Nullable; @@ -29,6 +32,11 @@ public final class YoutubeDescriptionHelper { public static final String ITALIC_OPEN = ""; public static final String ITALIC_CLOSE = ""; + // special link chips (e.g. for YT videos, YT channels or social media accounts): + // (u00a0) u00a0 u00a0 [/•] u00a0 u00a0 u00a0 + private static final Pattern LINK_CONTENT_CLEANER_REGEX + = Pattern.compile("(?s)^\u00a0+[/•]\u00a0+(.*?)\u00a0+$"); + /** * Can be a command run, or a style run. */ @@ -37,17 +45,30 @@ public final class YoutubeDescriptionHelper { @Nonnull final String close; final int pos; final boolean isClose; + @Nullable final Function transformContent; + int openPosInOutput = -1; Run( @Nonnull final String open, @Nonnull final String close, final int pos, final boolean isClose + ) { + this(open, close, pos, isClose, null); + } + + Run( + @Nonnull final String open, + @Nonnull final String close, + final int pos, + final boolean isClose, + @Nullable final Function transformContent ) { this.open = open; this.close = close; this.pos = pos; this.isClose = isClose; + this.transformContent = transformContent; } public boolean sameOpen(@Nonnull final Run other) { @@ -148,12 +169,22 @@ public final class YoutubeDescriptionHelper { // condition, because no run will close before being opened, but let's be sure while (!openRuns.empty()) { final Run popped = openRuns.pop(); - textBuilder.append(popped.close); if (popped.sameOpen(closer)) { + // before closing the current run, if the run has a transformContent + // function, use it to transform the content of the current run, based on + // the openPosInOutput set when the current run was opened + if (popped.transformContent != null && popped.openPosInOutput >= 0) { + textBuilder.replace(popped.openPosInOutput, textBuilder.length(), + popped.transformContent.apply( + textBuilder.substring(popped.openPosInOutput))); + } + // close the run that we really need to close + textBuilder.append(popped.close); break; } // we keep popping from openRuns, closing all of the runs we find, // until we find the run that we really need to close ... + textBuilder.append(popped.close); tempStack.push(popped); } while (!tempStack.empty()) { @@ -168,8 +199,10 @@ public final class YoutubeDescriptionHelper { } else { // this will never be reached if openersIndex >= openers.size() because of the // way minPos is calculated - textBuilder.append(openers.get(openersIndex).open); - openRuns.push(openers.get(openersIndex)); + final Run opener = openers.get(openersIndex); + textBuilder.append(opener.open); + opener.openPosInOutput = textBuilder.length(); // save for transforming later + openRuns.push(opener); ++openersIndex; } } @@ -180,11 +213,7 @@ public final class YoutubeDescriptionHelper { return textBuilder.toString() .replace("\n", "
") .replace(" ", "  ") - // special link chips (e.g. for YT videos, YT channels or social media accounts): - // u00a0 u00a0 [/•] u00a0 u00a0 u00a0 - .replace("\">\u00a0\u00a0/\u00a0", "\">") - .replace("\">\u00a0\u00a0•\u00a0", "\">") - .replace("\u00a0\u00a0", ""); + .replace('\u00a0', ' '); } private static void addAllCommandRuns( @@ -212,12 +241,44 @@ public final class YoutubeDescriptionHelper { } final String open = ""; + final Function transformContent = getTransformContentFun(run); - openers.add(new Run(open, LINK_CLOSE, startIndex, false)); - closers.add(new Run(open, LINK_CLOSE, startIndex + length, true)); + openers.add(new Run(open, LINK_CLOSE, startIndex, false, + transformContent)); + closers.add(new Run(open, LINK_CLOSE, startIndex + length, true, + transformContent)); }); } + private static Function getTransformContentFun(final JsonObject run) { + final String accessibilityLabel = run.getObject("onTapOptions") + .getObject("accessibilityInfo") + .getString("accessibilityLabel", "") + // accessibility labels are e.g. "Instagram Channel Link: instagram_profile_name" + .replaceFirst(" Channel Link", ""); + + final Function transformContent; + if (accessibilityLabel.isEmpty() || accessibilityLabel.startsWith("YouTube: ")) { + // if there is no accessibility label, or the link points to YouTube, cleanup the link + // text, see LINK_CONTENT_CLEANER_REGEX's documentation for more details + transformContent = (content) -> { + final Matcher m = LINK_CONTENT_CLEANER_REGEX.matcher(content); + if (m.find()) { + return m.group(1); + } + return content; + }; + } else { + // if there is an accessibility label, replace the link text with it, because on the + // YouTube website an ambiguous link text is next to an icon explaining which service it + // belongs to, but since we can't add icons, we instead use the accessibility label + // which contains information about the service + transformContent = (content) -> accessibilityLabel; + } + + return transformContent; + } + private static void addAllStyleRuns( @Nonnull final JsonObject attributedDescription, @Nonnull final List openers,