[YouTube] Make non-extraction of videoPrimaryInfoRenderer and/or videoSecondaryInfoRenderer not fatal

Also de-duplicated common code related to the obtain of these video info renderers.

This change allows extraction of videos without visual metadata.
This commit is contained in:
AudricV 2022-11-04 18:35:53 +01:00
parent eb07d70a2c
commit aa9a8ca23c
No known key found for this signature in database
GPG Key ID: DA92EC7905614198
1 changed files with 56 additions and 73 deletions

View File

@ -204,45 +204,48 @@ public class YoutubeStreamExtractor extends StreamExtractor {
return null; return null;
} }
if (getTextFromObject(getVideoPrimaryInfoRenderer().getObject("dateText")) final String videoPrimaryInfoRendererDateText =
.startsWith("Premiered")) { getTextFromObject(getVideoPrimaryInfoRenderer().getObject("dateText"));
final String time = getTextFromObject(
getVideoPrimaryInfoRenderer().getObject("dateText")).substring(13);
try { // Premiered 20 hours ago if (videoPrimaryInfoRendererDateText != null) {
final TimeAgoParser timeAgoParser = TimeAgoPatternsManager.getTimeAgoParserFor( if (videoPrimaryInfoRendererDateText.startsWith("Premiered")) {
Localization.fromLocalizationCode("en")); final String time = videoPrimaryInfoRendererDateText.substring(13);
final OffsetDateTime parsedTime = timeAgoParser.parse(time).offsetDateTime();
return DateTimeFormatter.ISO_LOCAL_DATE.format(parsedTime); try { // Premiered 20 hours ago
} catch (final Exception ignored) { final TimeAgoParser timeAgoParser = TimeAgoPatternsManager.getTimeAgoParserFor(
Localization.fromLocalizationCode("en"));
final OffsetDateTime parsedTime = timeAgoParser.parse(time).offsetDateTime();
return DateTimeFormatter.ISO_LOCAL_DATE.format(parsedTime);
} catch (final Exception ignored) {
}
try { // Premiered Feb 21, 2020
final LocalDate localDate = LocalDate.parse(time,
DateTimeFormatter.ofPattern("MMM dd, yyyy", Locale.ENGLISH));
return DateTimeFormatter.ISO_LOCAL_DATE.format(localDate);
} catch (final Exception ignored) {
}
try { // Premiered on 21 Feb 2020
final LocalDate localDate = LocalDate.parse(time,
DateTimeFormatter.ofPattern("dd MMM yyyy", Locale.ENGLISH));
return DateTimeFormatter.ISO_LOCAL_DATE.format(localDate);
} catch (final Exception ignored) {
}
} }
try { // Premiered Feb 21, 2020 try {
final LocalDate localDate = LocalDate.parse(time, // TODO: this parses English formatted dates only, we need a better approach to
DateTimeFormatter.ofPattern("MMM dd, yyyy", Locale.ENGLISH)); // parse the textual date
return DateTimeFormatter.ISO_LOCAL_DATE.format(localDate); final LocalDate localDate = LocalDate.parse(videoPrimaryInfoRendererDateText,
} catch (final Exception ignored) {
}
try { // Premiered on 21 Feb 2020
final LocalDate localDate = LocalDate.parse(time,
DateTimeFormatter.ofPattern("dd MMM yyyy", Locale.ENGLISH)); DateTimeFormatter.ofPattern("dd MMM yyyy", Locale.ENGLISH));
return DateTimeFormatter.ISO_LOCAL_DATE.format(localDate); return DateTimeFormatter.ISO_LOCAL_DATE.format(localDate);
} catch (final Exception ignored) { } catch (final Exception e) {
throw new ParsingException("Could not get upload date", e);
} }
} }
try { throw new ParsingException("Could not get upload date");
// TODO: this parses English formatted dates only, we need a better approach to parse
// the textual date
final LocalDate localDate = LocalDate.parse(getTextFromObject(
getVideoPrimaryInfoRenderer().getObject("dateText")),
DateTimeFormatter.ofPattern("dd MMM yyyy", Locale.ENGLISH));
return DateTimeFormatter.ISO_LOCAL_DATE.format(localDate);
} catch (final Exception e) {
throw new ParsingException("Could not get upload date", e);
}
} }
@Override @Override
@ -565,19 +568,13 @@ public class YoutubeStreamExtractor extends StreamExtractor {
public String getUploaderAvatarUrl() throws ParsingException { public String getUploaderAvatarUrl() throws ParsingException {
assertPageFetched(); assertPageFetched();
String url = null; final String url = getVideoSecondaryInfoRenderer()
.getObject("owner")
try { .getObject("videoOwnerRenderer")
url = getVideoSecondaryInfoRenderer() .getObject("thumbnail")
.getObject("owner") .getArray("thumbnails")
.getObject("videoOwnerRenderer") .getObject(0)
.getObject("thumbnail") .getString("url");
.getArray("thumbnails")
.getObject(0)
.getString("url");
} catch (final ParsingException ignored) {
// Age-restricted videos cause a ParsingException here
}
if (isNullOrEmpty(url)) { if (isNullOrEmpty(url)) {
if (ageLimit == NO_AGE_LIMIT) { if (ageLimit == NO_AGE_LIMIT) {
@ -1212,40 +1209,29 @@ public class YoutubeStreamExtractor extends StreamExtractor {
// Utils // Utils
//////////////////////////////////////////////////////////////////////////*/ //////////////////////////////////////////////////////////////////////////*/
private JsonObject getVideoPrimaryInfoRenderer() throws ParsingException { @Nonnull
private JsonObject getVideoPrimaryInfoRenderer() {
if (videoPrimaryInfoRenderer != null) { if (videoPrimaryInfoRenderer != null) {
return videoPrimaryInfoRenderer; return videoPrimaryInfoRenderer;
} }
final JsonArray contents = nextResponse.getObject("contents") videoPrimaryInfoRenderer = getVideoInfoRenderer("videoPrimaryInfoRenderer");
.getObject("twoColumnWatchNextResults").getObject("results").getObject("results") return videoPrimaryInfoRenderer;
.getArray("contents");
JsonObject theVideoPrimaryInfoRenderer = null;
for (final Object content : contents) {
if (((JsonObject) content).has("videoPrimaryInfoRenderer")) {
theVideoPrimaryInfoRenderer = ((JsonObject) content)
.getObject("videoPrimaryInfoRenderer");
break;
}
}
if (isNullOrEmpty(theVideoPrimaryInfoRenderer)) {
throw new ParsingException("Could not find videoPrimaryInfoRenderer");
}
videoPrimaryInfoRenderer = theVideoPrimaryInfoRenderer;
return theVideoPrimaryInfoRenderer;
} }
@Nonnull @Nonnull
private JsonObject getVideoSecondaryInfoRenderer() throws ParsingException { private JsonObject getVideoSecondaryInfoRenderer() {
if (videoSecondaryInfoRenderer != null) { if (videoSecondaryInfoRenderer != null) {
return videoSecondaryInfoRenderer; return videoSecondaryInfoRenderer;
} }
videoSecondaryInfoRenderer = nextResponse videoSecondaryInfoRenderer = getVideoInfoRenderer("videoSecondaryInfoRenderer");
.getObject("contents") return videoSecondaryInfoRenderer;
}
@Nonnull
private JsonObject getVideoInfoRenderer(@Nonnull final String videoRendererName) {
return nextResponse.getObject("contents")
.getObject("twoColumnWatchNextResults") .getObject("twoColumnWatchNextResults")
.getObject("results") .getObject("results")
.getObject("results") .getObject("results")
@ -1253,13 +1239,10 @@ public class YoutubeStreamExtractor extends StreamExtractor {
.stream() .stream()
.filter(JsonObject.class::isInstance) .filter(JsonObject.class::isInstance)
.map(JsonObject.class::cast) .map(JsonObject.class::cast)
.filter(content -> content.has("videoSecondaryInfoRenderer")) .filter(content -> content.has(videoRendererName))
.map(content -> content.getObject("videoSecondaryInfoRenderer")) .map(content -> content.getObject(videoRendererName))
.findFirst() .findFirst()
.orElseThrow( .orElse(new JsonObject());
() -> new ParsingException("Could not find videoSecondaryInfoRenderer"));
return videoSecondaryInfoRenderer;
} }
@Nonnull @Nonnull