[YouTube] Avoid XSS attacks in description or comments

This commit is contained in:
Stypox 2024-04-08 10:19:26 +02:00
parent 3f7b2653e3
commit 02274d5395
No known key found for this signature in database
GPG Key ID: 4BDF1B40A49FDD23
2 changed files with 20 additions and 15 deletions

View File

@ -35,7 +35,7 @@ public final class YoutubeDescriptionHelper {
// special link chips (e.g. for YT videos, YT channels or social media accounts): // special link chips (e.g. for YT videos, YT channels or social media accounts):
// (u00a0) u00a0 u00a0 [/] u00a0 <link content> u00a0 u00a0 // (u00a0) u00a0 u00a0 [/] u00a0 <link content> u00a0 u00a0
private static final Pattern LINK_CONTENT_CLEANER_REGEX private static final Pattern LINK_CONTENT_CLEANER_REGEX
= Pattern.compile("(?s)^\u00a0+[/•]\u00a0+(.*?)\u00a0+$"); = Pattern.compile("(?s)^ +[/•] +(.*?) +$");
/** /**
* Can be a command run, or a style run. * Can be a command run, or a style run.
@ -74,10 +74,13 @@ public final class YoutubeDescriptionHelper {
/** /**
* Parse a video description in the new "attributed" format, which contains the entire visible * Parse a video description in the new "attributed" format, which contains the entire visible
* plaintext ({@code content}) and an array of {@code commandRuns}. * plaintext ({@code content}) and an array of {@code commandRuns} and {@code styleRuns}.
* Returns the formatted content in HTML format, and escapes the text to make sure there are no
* XSS attacks.
* *
* <p> * <p>
* The {@code commandRuns} include the links and their position in the text. * {@code commandRuns} include the links and their range in the text, while {@code styleRuns}
* include the styling to apply to various ranges in the text.
* </p> * </p>
* *
* @param attributedDescription the JSON object of the attributed description * @param attributedDescription the JSON object of the attributed description
@ -119,25 +122,28 @@ public final class YoutubeDescriptionHelper {
* Applies the formatting specified by the intervals stored in {@code openers} and {@code * Applies the formatting specified by the intervals stored in {@code openers} and {@code
* closers} to {@code content} in order to obtain valid HTML even when intervals overlap. For * closers} to {@code content} in order to obtain valid HTML even when intervals overlap. For
* example &lt;b&gt;b&lt;i&gt;b&i&lt;/b&gt;i&lt;/i&gt; would not be valid HTML, so this function * example &lt;b&gt;b&lt;i&gt;b&i&lt;/b&gt;i&lt;/i&gt; would not be valid HTML, so this function
* instead generates &lt;b&gt;b&lt;i&gt;b&i&lt;/i&gt;&lt;/b&gt;&lt;i&gt;i&lt;/i&gt;. * instead generates &lt;b&gt;b&lt;i&gt;b&i&lt;/i&gt;&lt;/b&gt;&lt;i&gt;i&lt;/i&gt;. Any HTML
* special characters in {@code rawContent} are escaped to make sure there are no XSS attacks.
*
* <p> * <p>
* Every opener in {@code openers} must have a corresponding closer in {@code closers}. Every * Every opener in {@code openers} must have a corresponding closer in {@code closers}. Every
* corresponding (opener, closer) pair must have a length of at least one (i.e. empty intervals * corresponding (opener, closer) pair must have a length of at least one (i.e. empty intervals
* are not allowed). * are not allowed).
* </p> * </p>
* *
* @param openers contains all of the places where a run begins, must have the same size of * @param openers contains all of the places where a run begins, must have the same size of
* closers, must be ordered by {@link Run#pos} * closers, must be ordered by {@link Run#pos}
* @param closers contains all of the places where a run ends, must have the same size of * @param closers contains all of the places where a run ends, must have the same size of
* openers, must be ordered by {@link Run#pos} * openers, must be ordered by {@link Run#pos}
* @param content the content to apply formatting to * @param rawContent the content to apply formatting to, and to escape to avoid XSS
* @return the formatted content in HTML * @return the formatted content in HTML
*/ */
static String runsToHtml( static String runsToHtml(
@Nonnull final List<Run> openers, @Nonnull final List<Run> openers,
@Nonnull final List<Run> closers, @Nonnull final List<Run> closers,
@Nonnull final String content @Nonnull final String rawContent
) { ) {
final String content = rawContent.replace('\u00a0', ' ');
final Stack<Run> openRuns = new Stack<>(); final Stack<Run> openRuns = new Stack<>();
final Stack<Run> tempStack = new Stack<>(); final Stack<Run> tempStack = new Stack<>();
final StringBuilder textBuilder = new StringBuilder(); final StringBuilder textBuilder = new StringBuilder();
@ -154,7 +160,7 @@ public final class YoutubeDescriptionHelper {
: closers.get(closersIndex).pos; : closers.get(closersIndex).pos;
// append piece of text until current index // append piece of text until current index
textBuilder.append(content, currentTextPos, minPos); textBuilder.append(Entities.escape(content.substring(currentTextPos, minPos)));
currentTextPos = minPos; currentTextPos = minPos;
if (closers.get(closersIndex).pos == minPos) { if (closers.get(closersIndex).pos == minPos) {
@ -205,12 +211,11 @@ public final class YoutubeDescriptionHelper {
} }
// append last piece of text // append last piece of text
textBuilder.append(content, currentTextPos, content.length()); textBuilder.append(Entities.escape(content.substring(currentTextPos)));
return textBuilder.toString() return textBuilder.toString()
.replace("\n", "<br>") .replace("\n", "<br>")
.replace(" ", " &nbsp;") .replace(" ", " &nbsp;");
.replace('\u00a0', ' ');
} }
private static void addAllCommandRuns( private static void addAllCommandRuns(

View File

@ -34,7 +34,7 @@ public class YoutubeDescriptionHelperTest {
@Test @Test
public void testNoRuns() { public void testNoRuns() {
assertRunsToHtml( assertRunsToHtml(
"abc *a* _c_ <br> <br> <a href=\"#\">test</a> &nbsp;&amp;", "abc *a* _c_ &lt;br&gt; <br> &lt;a href=\"#\"&gt;test&lt;/a&gt; &nbsp;&amp;amp;",
List.of(), List.of(),
List.of(), List.of(),
"abc *a* _c_ <br>\u00a0\n\u00a0<a href=\"#\">test</a> &amp;" "abc *a* _c_ <br>\u00a0\n\u00a0<a href=\"#\">test</a> &amp;"