Merge pull request #233 from Stypox/yt-stream-ext

Youtube stream extractor improvements by using playerResponse json
This commit is contained in:
Tobias Groza 2020-01-20 23:17:36 +01:00 committed by GitHub
commit b9afc9807c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 143 additions and 83 deletions

View File

@ -106,19 +106,21 @@ public class YoutubeStreamExtractor extends StreamExtractor {
@Override @Override
public String getName() throws ParsingException { public String getName() throws ParsingException {
assertPageFetched(); assertPageFetched();
String name = getStringFromMetaData("title"); try {
if(name == null) { return playerResponse.getObject("videoDetails").getString("title");
// Fallback to HTML method
} catch (Exception e) {
// fallback HTML method
String name = null;
try { try {
name = doc.select("meta[name=title]").attr(CONTENT); name = doc.select("meta[name=title]").attr(CONTENT);
} catch (Exception e) { } catch (Exception ignored) {}
throw new ParsingException("Could not get the title", e);
if (name == null) {
throw new ParsingException("Could not get name", e);
} }
return name;
} }
if(name == null || name.isEmpty()) {
throw new ParsingException("Could not get the title");
}
return name;
} }
@Override @Override
@ -128,9 +130,17 @@ public class YoutubeStreamExtractor extends StreamExtractor {
} }
try { try {
return doc.select("meta[itemprop=datePublished]").attr(CONTENT); return playerResponse.getObject("microformat").getObject("playerMicroformatRenderer").getString("publishDate");
} catch (Exception e) {//todo: add fallback method } catch (Exception e) {
throw new ParsingException("Could not get upload date", e); String uploadDate = null;
try {
uploadDate = doc.select("meta[itemprop=datePublished]").attr(CONTENT);
} catch (Exception ignored) {}
if (uploadDate == null) {
throw new ParsingException("Could not get upload date", e);
}
return uploadDate;
} }
} }
@ -149,24 +159,23 @@ public class YoutubeStreamExtractor extends StreamExtractor {
@Override @Override
public String getThumbnailUrl() throws ParsingException { public String getThumbnailUrl() throws ParsingException {
assertPageFetched(); assertPageFetched();
// Try to get high resolution thumbnail first, if it fails, use low res from the player instead
try { try {
return doc.select("link[itemprop=\"thumbnailUrl\"]").first().attr("abs:href"); JsonArray thumbnails = playerResponse.getObject("videoDetails").getObject("thumbnail").getArray("thumbnails");
} catch (Exception ignored) { // the last thumbnail is the one with the highest resolution
// Try other method... return thumbnails.getObject(thumbnails.size() - 1).getString("url");
}
try {
if (playerArgs != null && playerArgs.isString("thumbnail_url")) return playerArgs.getString("thumbnail_url");
} catch (Exception ignored) {
// Try other method...
}
try {
return videoInfoPage.get("thumbnail_url");
} catch (Exception e) { } catch (Exception e) {
throw new ParsingException("Could not get thumbnail url", e); String url = null;
try {
url = doc.select("link[itemprop=\"thumbnailUrl\"]").first().attr("abs:href");
} catch (Exception ignored) {}
if (url == null) {
throw new ParsingException("Could not get thumbnail url", e);
}
return url;
} }
} }
@Nonnull @Nonnull
@ -174,9 +183,15 @@ public class YoutubeStreamExtractor extends StreamExtractor {
public String getDescription() throws ParsingException { public String getDescription() throws ParsingException {
assertPageFetched(); assertPageFetched();
try { try {
// first try to get html-formatted description
return parseHtmlAndGetFullLinks(doc.select("p[id=\"eow-description\"]").first().html()); return parseHtmlAndGetFullLinks(doc.select("p[id=\"eow-description\"]").first().html());
} catch (Exception e) { } catch (Exception e) {
throw new ParsingException("Could not get the description", e); try {
// fallback to raw non-html description
return playerResponse.getObject("videoDetails").getString("shortDescription");
} catch (Exception ignored) {
throw new ParsingException("Could not get the description", e);
}
} }
} }
@ -269,25 +284,22 @@ public class YoutubeStreamExtractor extends StreamExtractor {
public long getLength() throws ParsingException { public long getLength() throws ParsingException {
assertPageFetched(); assertPageFetched();
// try getting duration from playerargs
try {
String durationMs = playerResponse
.getObject("streamingData")
.getArray("formats")
.getObject(0)
.getString("approxDurationMs");
return Long.parseLong(durationMs)/1000;
} catch (Exception e) {
}
//try getting value from age gated video
try { try {
String duration = playerResponse String duration = playerResponse
.getObject("videoDetails") .getObject("videoDetails")
.getString("lengthSeconds"); .getString("lengthSeconds");
return Long.parseLong(duration); return Long.parseLong(duration);
} catch (Exception e) { } catch (Exception e) {
throw new ParsingException("Every methode to get the duration has failed: ", e); try {
String durationMs = playerResponse
.getObject("streamingData")
.getArray("formats")
.getObject(0)
.getString("approxDurationMs");
return Math.round(Long.parseLong(durationMs) / 1000f);
} catch (Exception ignored) {
throw new ParsingException("Could not get duration", e);
}
} }
} }
@ -307,11 +319,15 @@ public class YoutubeStreamExtractor extends StreamExtractor {
try { try {
if (getStreamType().equals(StreamType.LIVE_STREAM)) { if (getStreamType().equals(StreamType.LIVE_STREAM)) {
return getLiveStreamWatchingCount(); return getLiveStreamWatchingCount();
} else {
return Long.parseLong(playerResponse.getObject("videoDetails").getString("viewCount"));
}
} catch (Exception e) {
try {
return Long.parseLong(doc.select("meta[itemprop=interactionCount]").attr(CONTENT));
} catch (Exception ignored) {
throw new ParsingException("Could not get view count", e);
} }
return Long.parseLong(doc.select("meta[itemprop=interactionCount]").attr(CONTENT));
} catch (Exception e) {//todo: find fallback method
throw new ParsingException("Could not get number of views", e);
} }
} }
@ -373,7 +389,10 @@ public class YoutubeStreamExtractor extends StreamExtractor {
try { try {
likesString = button.select("span.yt-uix-button-content").first().text(); likesString = button.select("span.yt-uix-button-content").first().text();
} catch (NullPointerException e) { } catch (NullPointerException e) {
//if this kicks in our button has no content and therefore likes/dislikes are disabled //if this kicks in our button has no content and therefore ratings must be disabled
if (playerResponse.getObject("videoDetails").getBoolean("allowRatings")) {
throw new ParsingException("Ratings are enabled even though the like button is missing", e);
}
return -1; return -1;
} }
return Integer.parseInt(Utils.removeNonDigitCharacters(likesString)); return Integer.parseInt(Utils.removeNonDigitCharacters(likesString));
@ -393,7 +412,10 @@ public class YoutubeStreamExtractor extends StreamExtractor {
try { try {
dislikesString = button.select("span.yt-uix-button-content").first().text(); dislikesString = button.select("span.yt-uix-button-content").first().text();
} catch (NullPointerException e) { } catch (NullPointerException e) {
//if this kicks in our button has no content and therefore likes/dislikes are disabled //if this kicks in our button has no content and therefore ratings must be disabled
if (playerResponse.getObject("videoDetails").getBoolean("allowRatings")) {
throw new ParsingException("Ratings are enabled even though the dislike button is missing", e);
}
return -1; return -1;
} }
return Integer.parseInt(Utils.removeNonDigitCharacters(dislikesString)); return Integer.parseInt(Utils.removeNonDigitCharacters(dislikesString));
@ -409,60 +431,59 @@ public class YoutubeStreamExtractor extends StreamExtractor {
public String getUploaderUrl() throws ParsingException { public String getUploaderUrl() throws ParsingException {
assertPageFetched(); assertPageFetched();
try { try {
return doc.select("div[class=\"yt-user-info\"]").first().children() return "https://www.youtube.com/channel/" +
.select("a").first().attr("abs:href"); playerResponse.getObject("videoDetails").getString("channelId");
} catch (Exception e) { } catch (Exception e) {
throw new ParsingException("Could not get channel link", e); String uploaderUrl = null;
} try {
} uploaderUrl = doc.select("div[class=\"yt-user-info\"]").first().children()
.select("a").first().attr("abs:href");
} catch (Exception ignored) {}
if (uploaderUrl == null) {
@Nullable throw new ParsingException("Could not get channel link", e);
private String getStringFromMetaData(String field) { }
assertPageFetched(); return uploaderUrl;
String value = null;
if(playerArgs != null) {
// This can not fail
value = playerArgs.getString(field);
} }
if(value == null) {
// This can not fail too
value = videoInfoPage.get(field);
}
return value;
} }
@Nonnull @Nonnull
@Override @Override
public String getUploaderName() throws ParsingException { public String getUploaderName() throws ParsingException {
assertPageFetched(); assertPageFetched();
String name = getStringFromMetaData("author"); try {
return playerResponse.getObject("videoDetails").getString("author");
if(name == null) { } catch (Exception e) {
String name = null;
try { try {
// Fallback to HTML method
name = doc.select("div.yt-user-info").first().text(); name = doc.select("div.yt-user-info").first().text();
} catch (Exception e) { } catch (Exception ignored) {}
throw new ParsingException("Could not get uploader name", e);
if (name == null) {
throw new ParsingException("Could not get uploader name");
} }
return name;
} }
if(name == null || name.isEmpty()) {
throw new ParsingException("Could not get uploader name");
}
return name;
} }
@Nonnull @Nonnull
@Override @Override
public String getUploaderAvatarUrl() throws ParsingException { public String getUploaderAvatarUrl() throws ParsingException {
assertPageFetched(); assertPageFetched();
String uploaderAvatarUrl = null;
try { try {
return doc.select("a[class*=\"yt-user-photo\"]").first() uploaderAvatarUrl = doc.select("a[class*=\"yt-user-photo\"]").first()
.select("img").first() .select("img").first()
.attr("abs:data-thumb"); .attr("abs:data-thumb");
} catch (Exception e) {//todo: add fallback method } catch (Exception e) {//todo: add fallback method
throw new ParsingException("Could not get uploader thumbnail URL.", e); throw new ParsingException("Could not get uploader avatar url", e);
} }
if (uploaderAvatarUrl == null) {
throw new ParsingException("Could not get uploader avatar url");
}
return uploaderAvatarUrl;
} }
@Nonnull @Nonnull
@ -590,12 +611,12 @@ public class YoutubeStreamExtractor extends StreamExtractor {
public StreamType getStreamType() throws ParsingException { public StreamType getStreamType() throws ParsingException {
assertPageFetched(); assertPageFetched();
try { try {
if (playerArgs != null && (playerArgs.has("ps") && playerArgs.get("ps").toString().equals("live") || if (!playerResponse.getObject("streamingData").has(FORMATS) ||
(!playerResponse.getObject("streamingData").has(FORMATS)))) { (playerArgs != null && playerArgs.has("ps") && playerArgs.get("ps").toString().equals("live"))) {
return StreamType.LIVE_STREAM; return StreamType.LIVE_STREAM;
} }
} catch (Exception e) { } catch (Exception e) {
throw new ParsingException("Could not get hls manifest url", e); throw new ParsingException("Could not get stream type", e);
} }
return StreamType.VIDEO_STREAM; return StreamType.VIDEO_STREAM;
} }

View File

@ -75,7 +75,7 @@ public class YoutubeStreamExtractorAgeRestrictedTest {
@Test @Test
public void testGetLength() throws ParsingException { public void testGetLength() throws ParsingException {
assertEquals(1789, extractor.getLength()); assertEquals(1790, extractor.getLength());
} }
@Test @Test

View File

@ -65,7 +65,7 @@ public class YoutubeStreamExtractorControversialTest {
@Test @Test
public void testGetDescription() throws ParsingException { public void testGetDescription() throws ParsingException {
assertNotNull(extractor.getDescription()); assertNotNull(extractor.getDescription());
// assertFalse(extractor.getDescription().isEmpty()); assertFalse(extractor.getDescription().isEmpty());
} }
@Test @Test

View File

@ -101,7 +101,7 @@ public class YoutubeStreamExtractorDefaultTest {
@Test @Test
public void testGetLength() throws ParsingException { public void testGetLength() throws ParsingException {
assertEquals(366, extractor.getLength()); assertEquals(367, extractor.getLength());
} }
@Test @Test
@ -124,7 +124,11 @@ public class YoutubeStreamExtractorDefaultTest {
@Test @Test
public void testGetUploaderUrl() throws ParsingException { public void testGetUploaderUrl() throws ParsingException {
assertEquals("https://www.youtube.com/channel/UCsRM0YB_dabtEPGPTKo-gcw", extractor.getUploaderUrl()); String url = extractor.getUploaderUrl();
if (!url.equals("https://www.youtube.com/channel/UCsRM0YB_dabtEPGPTKo-gcw") &&
!url.equals("https://www.youtube.com/channel/UComP_epzeKzvBX156r6pm1Q")) {
fail("Uploader url is neither the music channel one nor the Vevo one");
}
} }
@Test @Test
@ -183,6 +187,18 @@ public class YoutubeStreamExtractorDefaultTest {
// Video (/view?v=YQHsXMglC9A) set in the setUp() method has no captions => null // Video (/view?v=YQHsXMglC9A) set in the setUp() method has no captions => null
assertTrue(extractor.getSubtitles(MediaFormat.TTML).isEmpty()); assertTrue(extractor.getSubtitles(MediaFormat.TTML).isEmpty());
} }
@Test
public void testGetLikeCount() throws ParsingException {
long likeCount = extractor.getLikeCount();
assertTrue("" + likeCount, likeCount >= 15000000);
}
@Test
public void testGetDislikeCount() throws ParsingException {
long dislikeCount = extractor.getDislikeCount();
assertTrue("" + dislikeCount, dislikeCount >= 818000);
}
} }
public static class DescriptionTestPewdiepie { public static class DescriptionTestPewdiepie {
@ -245,6 +261,29 @@ public class YoutubeStreamExtractorDefaultTest {
} }
} }
public static class RatingsDisabledTest {
private static YoutubeStreamExtractor extractor;
@BeforeClass
public static void setUp() throws Exception {
NewPipe.init(DownloaderTestImpl.getInstance());
extractor = (YoutubeStreamExtractor) YouTube
.getStreamExtractor("https://www.youtube.com/watch?v=HRKu0cvrr_o");
extractor.fetchPage();
}
@Test
public void testGetLikeCount() throws ParsingException {
assertEquals(-1, extractor.getLikeCount());
}
@Test
public void testGetDislikeCount() throws ParsingException {
assertEquals(-1, extractor.getDislikeCount());
}
}
public static class FramesTest { public static class FramesTest {
private static YoutubeStreamExtractor extractor; private static YoutubeStreamExtractor extractor;