Merge pull request #968 from AudricV/yt-support-no-video-info-renderers-for-streams
[YouTube] Support lack of video info renderers for streams
This commit is contained in:
commit
c953e23414
|
@ -204,45 +204,48 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
|||
return null;
|
||||
}
|
||||
|
||||
if (getTextFromObject(getVideoPrimaryInfoRenderer().getObject("dateText"))
|
||||
.startsWith("Premiered")) {
|
||||
final String time = getTextFromObject(
|
||||
getVideoPrimaryInfoRenderer().getObject("dateText")).substring(13);
|
||||
final String videoPrimaryInfoRendererDateText =
|
||||
getTextFromObject(getVideoPrimaryInfoRenderer().getObject("dateText"));
|
||||
|
||||
try { // Premiered 20 hours ago
|
||||
final TimeAgoParser timeAgoParser = TimeAgoPatternsManager.getTimeAgoParserFor(
|
||||
Localization.fromLocalizationCode("en"));
|
||||
final OffsetDateTime parsedTime = timeAgoParser.parse(time).offsetDateTime();
|
||||
return DateTimeFormatter.ISO_LOCAL_DATE.format(parsedTime);
|
||||
} catch (final Exception ignored) {
|
||||
if (videoPrimaryInfoRendererDateText != null) {
|
||||
if (videoPrimaryInfoRendererDateText.startsWith("Premiered")) {
|
||||
final String time = videoPrimaryInfoRendererDateText.substring(13);
|
||||
|
||||
try { // Premiered 20 hours ago
|
||||
final TimeAgoParser timeAgoParser = TimeAgoPatternsManager.getTimeAgoParserFor(
|
||||
Localization.fromLocalizationCode("en"));
|
||||
final OffsetDateTime parsedTime = timeAgoParser.parse(time).offsetDateTime();
|
||||
return DateTimeFormatter.ISO_LOCAL_DATE.format(parsedTime);
|
||||
} catch (final Exception ignored) {
|
||||
}
|
||||
|
||||
try { // Premiered Feb 21, 2020
|
||||
final LocalDate localDate = LocalDate.parse(time,
|
||||
DateTimeFormatter.ofPattern("MMM dd, yyyy", Locale.ENGLISH));
|
||||
return DateTimeFormatter.ISO_LOCAL_DATE.format(localDate);
|
||||
} catch (final Exception ignored) {
|
||||
}
|
||||
|
||||
try { // Premiered on 21 Feb 2020
|
||||
final LocalDate localDate = LocalDate.parse(time,
|
||||
DateTimeFormatter.ofPattern("dd MMM yyyy", Locale.ENGLISH));
|
||||
return DateTimeFormatter.ISO_LOCAL_DATE.format(localDate);
|
||||
} catch (final Exception ignored) {
|
||||
}
|
||||
}
|
||||
|
||||
try { // Premiered Feb 21, 2020
|
||||
final LocalDate localDate = LocalDate.parse(time,
|
||||
DateTimeFormatter.ofPattern("MMM dd, yyyy", Locale.ENGLISH));
|
||||
return DateTimeFormatter.ISO_LOCAL_DATE.format(localDate);
|
||||
} catch (final Exception ignored) {
|
||||
}
|
||||
|
||||
try { // Premiered on 21 Feb 2020
|
||||
final LocalDate localDate = LocalDate.parse(time,
|
||||
try {
|
||||
// TODO: this parses English formatted dates only, we need a better approach to
|
||||
// parse the textual date
|
||||
final LocalDate localDate = LocalDate.parse(videoPrimaryInfoRendererDateText,
|
||||
DateTimeFormatter.ofPattern("dd MMM yyyy", Locale.ENGLISH));
|
||||
return DateTimeFormatter.ISO_LOCAL_DATE.format(localDate);
|
||||
} catch (final Exception ignored) {
|
||||
} catch (final Exception e) {
|
||||
throw new ParsingException("Could not get upload date", e);
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
// TODO: this parses English formatted dates only, we need a better approach to parse
|
||||
// the textual date
|
||||
final LocalDate localDate = LocalDate.parse(getTextFromObject(
|
||||
getVideoPrimaryInfoRenderer().getObject("dateText")),
|
||||
DateTimeFormatter.ofPattern("dd MMM yyyy", Locale.ENGLISH));
|
||||
return DateTimeFormatter.ISO_LOCAL_DATE.format(localDate);
|
||||
} catch (final Exception e) {
|
||||
throw new ParsingException("Could not get upload date", e);
|
||||
}
|
||||
|
||||
throw new ParsingException("Could not get upload date");
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -565,19 +568,13 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
|||
public String getUploaderAvatarUrl() throws ParsingException {
|
||||
assertPageFetched();
|
||||
|
||||
String url = null;
|
||||
|
||||
try {
|
||||
url = getVideoSecondaryInfoRenderer()
|
||||
.getObject("owner")
|
||||
.getObject("videoOwnerRenderer")
|
||||
.getObject("thumbnail")
|
||||
.getArray("thumbnails")
|
||||
.getObject(0)
|
||||
.getString("url");
|
||||
} catch (final ParsingException ignored) {
|
||||
// Age-restricted videos cause a ParsingException here
|
||||
}
|
||||
final String url = getVideoSecondaryInfoRenderer()
|
||||
.getObject("owner")
|
||||
.getObject("videoOwnerRenderer")
|
||||
.getObject("thumbnail")
|
||||
.getArray("thumbnails")
|
||||
.getObject(0)
|
||||
.getString("url");
|
||||
|
||||
if (isNullOrEmpty(url)) {
|
||||
if (ageLimit == NO_AGE_LIMIT) {
|
||||
|
@ -1212,40 +1209,29 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
|||
// Utils
|
||||
//////////////////////////////////////////////////////////////////////////*/
|
||||
|
||||
private JsonObject getVideoPrimaryInfoRenderer() throws ParsingException {
|
||||
@Nonnull
|
||||
private JsonObject getVideoPrimaryInfoRenderer() {
|
||||
if (videoPrimaryInfoRenderer != null) {
|
||||
return videoPrimaryInfoRenderer;
|
||||
}
|
||||
|
||||
final JsonArray contents = nextResponse.getObject("contents")
|
||||
.getObject("twoColumnWatchNextResults").getObject("results").getObject("results")
|
||||
.getArray("contents");
|
||||
JsonObject theVideoPrimaryInfoRenderer = null;
|
||||
|
||||
for (final Object content : contents) {
|
||||
if (((JsonObject) content).has("videoPrimaryInfoRenderer")) {
|
||||
theVideoPrimaryInfoRenderer = ((JsonObject) content)
|
||||
.getObject("videoPrimaryInfoRenderer");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (isNullOrEmpty(theVideoPrimaryInfoRenderer)) {
|
||||
throw new ParsingException("Could not find videoPrimaryInfoRenderer");
|
||||
}
|
||||
|
||||
videoPrimaryInfoRenderer = theVideoPrimaryInfoRenderer;
|
||||
return theVideoPrimaryInfoRenderer;
|
||||
videoPrimaryInfoRenderer = getVideoInfoRenderer("videoPrimaryInfoRenderer");
|
||||
return videoPrimaryInfoRenderer;
|
||||
}
|
||||
|
||||
@Nonnull
|
||||
private JsonObject getVideoSecondaryInfoRenderer() throws ParsingException {
|
||||
private JsonObject getVideoSecondaryInfoRenderer() {
|
||||
if (videoSecondaryInfoRenderer != null) {
|
||||
return videoSecondaryInfoRenderer;
|
||||
}
|
||||
|
||||
videoSecondaryInfoRenderer = nextResponse
|
||||
.getObject("contents")
|
||||
videoSecondaryInfoRenderer = getVideoInfoRenderer("videoSecondaryInfoRenderer");
|
||||
return videoSecondaryInfoRenderer;
|
||||
}
|
||||
|
||||
@Nonnull
|
||||
private JsonObject getVideoInfoRenderer(@Nonnull final String videoRendererName) {
|
||||
return nextResponse.getObject("contents")
|
||||
.getObject("twoColumnWatchNextResults")
|
||||
.getObject("results")
|
||||
.getObject("results")
|
||||
|
@ -1253,13 +1239,10 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
|||
.stream()
|
||||
.filter(JsonObject.class::isInstance)
|
||||
.map(JsonObject.class::cast)
|
||||
.filter(content -> content.has("videoSecondaryInfoRenderer"))
|
||||
.map(content -> content.getObject("videoSecondaryInfoRenderer"))
|
||||
.filter(content -> content.has(videoRendererName))
|
||||
.map(content -> content.getObject(videoRendererName))
|
||||
.findFirst()
|
||||
.orElseThrow(
|
||||
() -> new ParsingException("Could not find videoSecondaryInfoRenderer"));
|
||||
|
||||
return videoSecondaryInfoRenderer;
|
||||
.orElse(new JsonObject());
|
||||
}
|
||||
|
||||
@Nonnull
|
||||
|
|
|
@ -430,6 +430,69 @@ public class YoutubeStreamExtractorDefaultTest {
|
|||
// @formatter:on
|
||||
}
|
||||
|
||||
public static class NoVisualMetadataVideoTest extends DefaultStreamExtractorTest {
|
||||
// Video without visual metadata on YouTube clients (video title, upload date, channel name,
|
||||
// comments, ...)
|
||||
private static final String ID = "An8vtD1FDqs";
|
||||
private static final String URL = BASE_URL + ID;
|
||||
private static StreamExtractor extractor;
|
||||
|
||||
@BeforeAll
|
||||
public static void setUp() throws Exception {
|
||||
YoutubeTestsUtils.ensureStateless();
|
||||
NewPipe.init(DownloaderFactory.getDownloader(RESOURCE_PATH + "noVisualMetadata"));
|
||||
extractor = YouTube.getStreamExtractor(URL);
|
||||
extractor.fetchPage();
|
||||
}
|
||||
|
||||
@Override public StreamType expectedStreamType() { return StreamType.VIDEO_STREAM; }
|
||||
@Override public String expectedUploaderName() { return "Makani"; }
|
||||
@Override public String expectedUploaderUrl() { return "https://www.youtube.com/channel/UC-iMZJ8NppwT2fLwzFWJKOQ"; }
|
||||
@Override public List<String> expectedDescriptionContains() { return Arrays.asList("Makani", "prototype", "rotors"); }
|
||||
@Override public long expectedLength() { return 175; }
|
||||
@Override public long expectedViewCountAtLeast() { return 88_000; }
|
||||
@Nullable @Override public String expectedUploadDate() { return "2017-05-16 00:00:00.000"; }
|
||||
@Nullable @Override public String expectedTextualUploadDate() { return "2017-05-16"; }
|
||||
@Override public long expectedLikeCountAtLeast() { return -1; }
|
||||
@Override public long expectedDislikeCountAtLeast() { return -1; }
|
||||
@Override public StreamExtractor extractor() { return extractor; }
|
||||
@Override public StreamingService expectedService() { return YouTube; }
|
||||
@Override public String expectedName() { return "Makani’s first commercial-scale energy kite"; }
|
||||
@Override public String expectedId() { return "An8vtD1FDqs"; }
|
||||
@Override public String expectedUrlContains() { return BASE_URL + ID; }
|
||||
@Override public String expectedOriginalUrlContains() { return URL; }
|
||||
@Override public String expectedCategory() { return "Science & Technology"; }
|
||||
@Override public String expectedLicence() { return YOUTUBE_LICENCE; }
|
||||
@Override public List<String> expectedTags() {
|
||||
return Arrays.asList("Makani", "Moonshot", "Moonshot Factory", "Prototyping",
|
||||
"california", "california wind", "clean", "clean energy", "climate change",
|
||||
"climate crisis", "energy", "energy kite", "google", "google x", "green",
|
||||
"green energy", "kite", "kite power", "kite power solutions",
|
||||
"kite power systems", "makani power", "power", "renewable", "renewable energy",
|
||||
"renewable energy engineering", "renewable energy projects",
|
||||
"renewable energy sources", "renewables", "solutions", "tech", "technology",
|
||||
"turbine", "wind", "wind energy", "wind power", "wind turbine", "windmill");
|
||||
}
|
||||
|
||||
@Test
|
||||
@Override
|
||||
public void testSubscriberCount() {
|
||||
assertThrows(ParsingException.class, () -> extractor.getUploaderSubscriberCount());
|
||||
}
|
||||
|
||||
@Test
|
||||
@Override
|
||||
public void testLikeCount() {
|
||||
assertThrows(ParsingException.class, () -> extractor.getLikeCount());
|
||||
}
|
||||
|
||||
@Test
|
||||
@Override
|
||||
public void testUploaderAvatarUrl() {
|
||||
assertThrows(ParsingException.class, () -> extractor.getUploaderAvatarUrl());
|
||||
}
|
||||
}
|
||||
|
||||
public static class UnlistedTest {
|
||||
private static YoutubeStreamExtractor extractor;
|
||||
|
||||
|
|
|
@ -0,0 +1,73 @@
|
|||
{
|
||||
"request": {
|
||||
"httpMethod": "GET",
|
||||
"url": "https://www.youtube.com/iframe_api",
|
||||
"headers": {
|
||||
"Accept-Language": [
|
||||
"en-GB, en;q\u003d0.9"
|
||||
]
|
||||
},
|
||||
"localization": {
|
||||
"languageCode": "en",
|
||||
"countryCode": "GB"
|
||||
}
|
||||
},
|
||||
"response": {
|
||||
"responseCode": 200,
|
||||
"responseMessage": "",
|
||||
"responseHeaders": {
|
||||
"alt-svc": [
|
||||
"h3\u003d\":443\"; ma\u003d2592000,h3-29\u003d\":443\"; ma\u003d2592000,h3-Q050\u003d\":443\"; ma\u003d2592000,h3-Q046\u003d\":443\"; ma\u003d2592000,h3-Q043\u003d\":443\"; ma\u003d2592000,quic\u003d\":443\"; ma\u003d2592000; v\u003d\"46,43\""
|
||||
],
|
||||
"cache-control": [
|
||||
"private, max-age\u003d0"
|
||||
],
|
||||
"content-type": [
|
||||
"text/javascript; charset\u003dutf-8"
|
||||
],
|
||||
"cross-origin-opener-policy-report-only": [
|
||||
"same-origin; report-to\u003d\"youtube_main\""
|
||||
],
|
||||
"cross-origin-resource-policy": [
|
||||
"cross-origin"
|
||||
],
|
||||
"date": [
|
||||
"Fri, 04 Nov 2022 18:36:38 GMT"
|
||||
],
|
||||
"expires": [
|
||||
"Fri, 04 Nov 2022 18:36:38 GMT"
|
||||
],
|
||||
"p3p": [
|
||||
"CP\u003d\"This is not a P3P policy! See http://support.google.com/accounts/answer/151657?hl\u003den-GB for more info.\""
|
||||
],
|
||||
"permissions-policy": [
|
||||
"ch-ua-arch\u003d*, ch-ua-bitness\u003d*, ch-ua-full-version\u003d*, ch-ua-full-version-list\u003d*, ch-ua-model\u003d*, ch-ua-wow64\u003d*, ch-ua-platform\u003d*, ch-ua-platform-version\u003d*"
|
||||
],
|
||||
"report-to": [
|
||||
"{\"group\":\"youtube_main\",\"max_age\":2592000,\"endpoints\":[{\"url\":\"https://csp.withgoogle.com/csp/report-to/youtube_main\"}]}"
|
||||
],
|
||||
"server": [
|
||||
"ESF"
|
||||
],
|
||||
"set-cookie": [
|
||||
"YSC\u003dUBx6tMGNmRg; Domain\u003d.youtube.com; Path\u003d/; Secure; HttpOnly; SameSite\u003dnone",
|
||||
"VISITOR_INFO1_LIVE\u003dvY4W1Ai6Us0; Domain\u003d.youtube.com; Expires\u003dWed, 03-May-2023 18:36:38 GMT; Path\u003d/; Secure; HttpOnly; SameSite\u003dnone",
|
||||
"CONSENT\u003dPENDING+815; expires\u003dSun, 03-Nov-2024 18:36:38 GMT; path\u003d/; domain\u003d.youtube.com; Secure"
|
||||
],
|
||||
"strict-transport-security": [
|
||||
"max-age\u003d31536000"
|
||||
],
|
||||
"x-content-type-options": [
|
||||
"nosniff"
|
||||
],
|
||||
"x-frame-options": [
|
||||
"SAMEORIGIN"
|
||||
],
|
||||
"x-xss-protection": [
|
||||
"0"
|
||||
]
|
||||
},
|
||||
"responseBody": "var scriptUrl \u003d \u0027https:\\/\\/www.youtube.com\\/s\\/player\\/03bec62d\\/www-widgetapi.vflset\\/www-widgetapi.js\u0027;try{var ttPolicy\u003dwindow.trustedTypes.createPolicy(\"youtube-widget-api\",{createScriptURL:function(x){return x}});scriptUrl\u003dttPolicy.createScriptURL(scriptUrl)}catch(e){}var YT;if(!window[\"YT\"])YT\u003d{loading:0,loaded:0};var YTConfig;if(!window[\"YTConfig\"])YTConfig\u003d{\"host\":\"https://www.youtube.com\"};\nif(!YT.loading){YT.loading\u003d1;(function(){var l\u003d[];YT.ready\u003dfunction(f){if(YT.loaded)f();else l.push(f)};window.onYTReady\u003dfunction(){YT.loaded\u003d1;for(var i\u003d0;i\u003cl.length;i++)try{l[i]()}catch(e$0){}};YT.setConfig\u003dfunction(c){for(var k in c)if(c.hasOwnProperty(k))YTConfig[k]\u003dc[k]};var a\u003ddocument.createElement(\"script\");a.type\u003d\"text/javascript\";a.id\u003d\"www-widgetapi-script\";a.src\u003dscriptUrl;a.async\u003dtrue;var c\u003ddocument.currentScript;if(c){var n\u003dc.nonce||c.getAttribute(\"nonce\");if(n)a.setAttribute(\"nonce\",n)}var b\u003d\ndocument.getElementsByTagName(\"script\")[0];b.parentNode.insertBefore(a,b)})()};\n",
|
||||
"latestUrl": "https://www.youtube.com/iframe_api"
|
||||
}
|
||||
}
|
File diff suppressed because one or more lines are too long
|
@ -0,0 +1,82 @@
|
|||
{
|
||||
"request": {
|
||||
"httpMethod": "GET",
|
||||
"url": "https://www.youtube.com/sw.js",
|
||||
"headers": {
|
||||
"Origin": [
|
||||
"https://www.youtube.com"
|
||||
],
|
||||
"Referer": [
|
||||
"https://www.youtube.com"
|
||||
],
|
||||
"Accept-Language": [
|
||||
"en-GB, en;q\u003d0.9"
|
||||
]
|
||||
},
|
||||
"localization": {
|
||||
"languageCode": "en",
|
||||
"countryCode": "GB"
|
||||
}
|
||||
},
|
||||
"response": {
|
||||
"responseCode": 200,
|
||||
"responseMessage": "",
|
||||
"responseHeaders": {
|
||||
"access-control-allow-credentials": [
|
||||
"true"
|
||||
],
|
||||
"access-control-allow-origin": [
|
||||
"https://www.youtube.com"
|
||||
],
|
||||
"alt-svc": [
|
||||
"h3\u003d\":443\"; ma\u003d2592000,h3-29\u003d\":443\"; ma\u003d2592000,h3-Q050\u003d\":443\"; ma\u003d2592000,h3-Q046\u003d\":443\"; ma\u003d2592000,h3-Q043\u003d\":443\"; ma\u003d2592000,quic\u003d\":443\"; ma\u003d2592000; v\u003d\"46,43\""
|
||||
],
|
||||
"cache-control": [
|
||||
"private, max-age\u003d0"
|
||||
],
|
||||
"content-type": [
|
||||
"text/javascript; charset\u003dutf-8"
|
||||
],
|
||||
"cross-origin-opener-policy-report-only": [
|
||||
"same-origin; report-to\u003d\"youtube_main\""
|
||||
],
|
||||
"date": [
|
||||
"Fri, 04 Nov 2022 18:36:42 GMT"
|
||||
],
|
||||
"expires": [
|
||||
"Fri, 04 Nov 2022 18:36:42 GMT"
|
||||
],
|
||||
"p3p": [
|
||||
"CP\u003d\"This is not a P3P policy! See http://support.google.com/accounts/answer/151657?hl\u003den-GB for more info.\""
|
||||
],
|
||||
"permissions-policy": [
|
||||
"ch-ua-arch\u003d*, ch-ua-bitness\u003d*, ch-ua-full-version\u003d*, ch-ua-full-version-list\u003d*, ch-ua-model\u003d*, ch-ua-wow64\u003d*, ch-ua-platform\u003d*, ch-ua-platform-version\u003d*"
|
||||
],
|
||||
"report-to": [
|
||||
"{\"group\":\"youtube_main\",\"max_age\":2592000,\"endpoints\":[{\"url\":\"https://csp.withgoogle.com/csp/report-to/youtube_main\"}]}"
|
||||
],
|
||||
"server": [
|
||||
"ESF"
|
||||
],
|
||||
"set-cookie": [
|
||||
"YSC\u003dOWXOpWivHI0; Domain\u003d.youtube.com; Path\u003d/; Secure; HttpOnly; SameSite\u003dnone",
|
||||
"VISITOR_INFO1_LIVE\u003d; Domain\u003d.youtube.com; Expires\u003dSat, 08-Feb-2020 18:36:42 GMT; Path\u003d/; Secure; HttpOnly; SameSite\u003dnone",
|
||||
"CONSENT\u003dPENDING+611; expires\u003dSun, 03-Nov-2024 18:36:42 GMT; path\u003d/; domain\u003d.youtube.com; Secure"
|
||||
],
|
||||
"strict-transport-security": [
|
||||
"max-age\u003d31536000"
|
||||
],
|
||||
"x-content-type-options": [
|
||||
"nosniff"
|
||||
],
|
||||
"x-frame-options": [
|
||||
"SAMEORIGIN"
|
||||
],
|
||||
"x-xss-protection": [
|
||||
"0"
|
||||
]
|
||||
},
|
||||
"responseBody": "\n self.addEventListener(\u0027install\u0027, event \u003d\u003e {\n event.waitUntil(self.skipWaiting());\n });\n self.addEventListener(\u0027activate\u0027, event \u003d\u003e {\n event.waitUntil(\n self.clients.claim().then(() \u003d\u003e self.registration.unregister()));\n });\n ",
|
||||
"latestUrl": "https://www.youtube.com/sw.js"
|
||||
}
|
||||
}
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
Loading…
Reference in New Issue