Fix parsing description for navigationEndpoint without urlEndpoint

This commit is contained in:
wb9688 2020-02-25 20:54:11 +01:00 committed by TobiGr
parent 26fb44595f
commit 729fd2eaee
1 changed files with 23 additions and 21 deletions

View File

@ -173,7 +173,6 @@ public class YoutubeStreamExtractor extends StreamExtractor {
@Override @Override
public Description getDescription() throws ParsingException { public Description getDescription() throws ParsingException {
assertPageFetched(); assertPageFetched();
// description with more info on links // description with more info on links
try { try {
boolean htmlConversionRequired = false; boolean htmlConversionRequired = false;
@ -184,30 +183,33 @@ public class YoutubeStreamExtractor extends StreamExtractor {
String text = textHolder.getString("text"); String text = textHolder.getString("text");
if (textHolder.getObject("navigationEndpoint") != null) { if (textHolder.getObject("navigationEndpoint") != null) {
// The text is a link. Get the URL it points to and generate a HTML link of it // The text is a link. Get the URL it points to and generate a HTML link of it
String internUrl = textHolder.getObject("navigationEndpoint").getObject("urlEndpoint").getString("url"); if (textHolder.getObject("navigationEndpoint").getObject("urlEndpoint") != null) {
if (internUrl.startsWith("/redirect?")) { String internUrl = textHolder.getObject("navigationEndpoint").getObject("urlEndpoint").getString("url");
// q parameter can be the first parameter if (internUrl.startsWith("/redirect?")) {
internUrl = internUrl.substring(10); // q parameter can be the first parameter
String[] params = internUrl.split("&"); internUrl = internUrl.substring(10);
for (String param : params) { String[] params = internUrl.split("&");
if (param.split("=")[0].equals("q")) { for (String param : params) {
String url = URLDecoder.decode(param.split("=")[1], StandardCharsets.UTF_8.name()); if (param.split("=")[0].equals("q")) {
if (url != null && !url.isEmpty()) { String url = URLDecoder.decode(param.split("=")[1], StandardCharsets.UTF_8.name());
descriptionBuilder.append("<a href=\"").append(url).append("\">").append(text).append("</a>"); if (url != null && !url.isEmpty()) {
htmlConversionRequired = true; descriptionBuilder.append("<a href=\"").append(url).append("\">").append(text).append("</a>");
} else { htmlConversionRequired = true;
descriptionBuilder.append(text); } else {
descriptionBuilder.append(text);
}
break;
} }
break;
} }
} else if (internUrl.startsWith("http")) {
descriptionBuilder.append("<a href=\"").append(internUrl).append("\">").append(text).append("</a>");
htmlConversionRequired = true;
} }
} else if (internUrl.startsWith("http")) { continue;
descriptionBuilder.append("<a href=\"").append(internUrl).append("\">").append(text).append("</a>");
htmlConversionRequired = true;
} else if (text != null) {
descriptionBuilder.append(text);
} }
} else if (text != null) { continue;
}
if (text != null) {
descriptionBuilder.append(text); descriptionBuilder.append(text);
} }
} }