Merge pull request #233 from Stypox/yt-stream-ext
Youtube stream extractor improvements by using playerResponse json
This commit is contained in:
commit
b9afc9807c
|
@ -106,19 +106,21 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
||||||
@Override
|
@Override
|
||||||
public String getName() throws ParsingException {
|
public String getName() throws ParsingException {
|
||||||
assertPageFetched();
|
assertPageFetched();
|
||||||
String name = getStringFromMetaData("title");
|
try {
|
||||||
if(name == null) {
|
return playerResponse.getObject("videoDetails").getString("title");
|
||||||
// Fallback to HTML method
|
|
||||||
|
} catch (Exception e) {
|
||||||
|
// fallback HTML method
|
||||||
|
String name = null;
|
||||||
try {
|
try {
|
||||||
name = doc.select("meta[name=title]").attr(CONTENT);
|
name = doc.select("meta[name=title]").attr(CONTENT);
|
||||||
} catch (Exception e) {
|
} catch (Exception ignored) {}
|
||||||
throw new ParsingException("Could not get the title", e);
|
|
||||||
|
if (name == null) {
|
||||||
|
throw new ParsingException("Could not get name", e);
|
||||||
}
|
}
|
||||||
|
return name;
|
||||||
}
|
}
|
||||||
if(name == null || name.isEmpty()) {
|
|
||||||
throw new ParsingException("Could not get the title");
|
|
||||||
}
|
|
||||||
return name;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -128,9 +130,17 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
return doc.select("meta[itemprop=datePublished]").attr(CONTENT);
|
return playerResponse.getObject("microformat").getObject("playerMicroformatRenderer").getString("publishDate");
|
||||||
} catch (Exception e) {//todo: add fallback method
|
} catch (Exception e) {
|
||||||
throw new ParsingException("Could not get upload date", e);
|
String uploadDate = null;
|
||||||
|
try {
|
||||||
|
uploadDate = doc.select("meta[itemprop=datePublished]").attr(CONTENT);
|
||||||
|
} catch (Exception ignored) {}
|
||||||
|
|
||||||
|
if (uploadDate == null) {
|
||||||
|
throw new ParsingException("Could not get upload date", e);
|
||||||
|
}
|
||||||
|
return uploadDate;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -149,24 +159,23 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
||||||
@Override
|
@Override
|
||||||
public String getThumbnailUrl() throws ParsingException {
|
public String getThumbnailUrl() throws ParsingException {
|
||||||
assertPageFetched();
|
assertPageFetched();
|
||||||
// Try to get high resolution thumbnail first, if it fails, use low res from the player instead
|
|
||||||
try {
|
try {
|
||||||
return doc.select("link[itemprop=\"thumbnailUrl\"]").first().attr("abs:href");
|
JsonArray thumbnails = playerResponse.getObject("videoDetails").getObject("thumbnail").getArray("thumbnails");
|
||||||
} catch (Exception ignored) {
|
// the last thumbnail is the one with the highest resolution
|
||||||
// Try other method...
|
return thumbnails.getObject(thumbnails.size() - 1).getString("url");
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
|
||||||
if (playerArgs != null && playerArgs.isString("thumbnail_url")) return playerArgs.getString("thumbnail_url");
|
|
||||||
} catch (Exception ignored) {
|
|
||||||
// Try other method...
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
|
||||||
return videoInfoPage.get("thumbnail_url");
|
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
throw new ParsingException("Could not get thumbnail url", e);
|
String url = null;
|
||||||
|
try {
|
||||||
|
url = doc.select("link[itemprop=\"thumbnailUrl\"]").first().attr("abs:href");
|
||||||
|
} catch (Exception ignored) {}
|
||||||
|
|
||||||
|
if (url == null) {
|
||||||
|
throw new ParsingException("Could not get thumbnail url", e);
|
||||||
|
}
|
||||||
|
return url;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Nonnull
|
@Nonnull
|
||||||
|
@ -174,9 +183,15 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
||||||
public String getDescription() throws ParsingException {
|
public String getDescription() throws ParsingException {
|
||||||
assertPageFetched();
|
assertPageFetched();
|
||||||
try {
|
try {
|
||||||
|
// first try to get html-formatted description
|
||||||
return parseHtmlAndGetFullLinks(doc.select("p[id=\"eow-description\"]").first().html());
|
return parseHtmlAndGetFullLinks(doc.select("p[id=\"eow-description\"]").first().html());
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
throw new ParsingException("Could not get the description", e);
|
try {
|
||||||
|
// fallback to raw non-html description
|
||||||
|
return playerResponse.getObject("videoDetails").getString("shortDescription");
|
||||||
|
} catch (Exception ignored) {
|
||||||
|
throw new ParsingException("Could not get the description", e);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -269,25 +284,22 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
||||||
public long getLength() throws ParsingException {
|
public long getLength() throws ParsingException {
|
||||||
assertPageFetched();
|
assertPageFetched();
|
||||||
|
|
||||||
// try getting duration from playerargs
|
|
||||||
try {
|
|
||||||
String durationMs = playerResponse
|
|
||||||
.getObject("streamingData")
|
|
||||||
.getArray("formats")
|
|
||||||
.getObject(0)
|
|
||||||
.getString("approxDurationMs");
|
|
||||||
return Long.parseLong(durationMs)/1000;
|
|
||||||
} catch (Exception e) {
|
|
||||||
}
|
|
||||||
|
|
||||||
//try getting value from age gated video
|
|
||||||
try {
|
try {
|
||||||
String duration = playerResponse
|
String duration = playerResponse
|
||||||
.getObject("videoDetails")
|
.getObject("videoDetails")
|
||||||
.getString("lengthSeconds");
|
.getString("lengthSeconds");
|
||||||
return Long.parseLong(duration);
|
return Long.parseLong(duration);
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
throw new ParsingException("Every methode to get the duration has failed: ", e);
|
try {
|
||||||
|
String durationMs = playerResponse
|
||||||
|
.getObject("streamingData")
|
||||||
|
.getArray("formats")
|
||||||
|
.getObject(0)
|
||||||
|
.getString("approxDurationMs");
|
||||||
|
return Math.round(Long.parseLong(durationMs) / 1000f);
|
||||||
|
} catch (Exception ignored) {
|
||||||
|
throw new ParsingException("Could not get duration", e);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -307,11 +319,15 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
||||||
try {
|
try {
|
||||||
if (getStreamType().equals(StreamType.LIVE_STREAM)) {
|
if (getStreamType().equals(StreamType.LIVE_STREAM)) {
|
||||||
return getLiveStreamWatchingCount();
|
return getLiveStreamWatchingCount();
|
||||||
|
} else {
|
||||||
|
return Long.parseLong(playerResponse.getObject("videoDetails").getString("viewCount"));
|
||||||
|
}
|
||||||
|
} catch (Exception e) {
|
||||||
|
try {
|
||||||
|
return Long.parseLong(doc.select("meta[itemprop=interactionCount]").attr(CONTENT));
|
||||||
|
} catch (Exception ignored) {
|
||||||
|
throw new ParsingException("Could not get view count", e);
|
||||||
}
|
}
|
||||||
|
|
||||||
return Long.parseLong(doc.select("meta[itemprop=interactionCount]").attr(CONTENT));
|
|
||||||
} catch (Exception e) {//todo: find fallback method
|
|
||||||
throw new ParsingException("Could not get number of views", e);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -373,7 +389,10 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
||||||
try {
|
try {
|
||||||
likesString = button.select("span.yt-uix-button-content").first().text();
|
likesString = button.select("span.yt-uix-button-content").first().text();
|
||||||
} catch (NullPointerException e) {
|
} catch (NullPointerException e) {
|
||||||
//if this kicks in our button has no content and therefore likes/dislikes are disabled
|
//if this kicks in our button has no content and therefore ratings must be disabled
|
||||||
|
if (playerResponse.getObject("videoDetails").getBoolean("allowRatings")) {
|
||||||
|
throw new ParsingException("Ratings are enabled even though the like button is missing", e);
|
||||||
|
}
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
return Integer.parseInt(Utils.removeNonDigitCharacters(likesString));
|
return Integer.parseInt(Utils.removeNonDigitCharacters(likesString));
|
||||||
|
@ -393,7 +412,10 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
||||||
try {
|
try {
|
||||||
dislikesString = button.select("span.yt-uix-button-content").first().text();
|
dislikesString = button.select("span.yt-uix-button-content").first().text();
|
||||||
} catch (NullPointerException e) {
|
} catch (NullPointerException e) {
|
||||||
//if this kicks in our button has no content and therefore likes/dislikes are disabled
|
//if this kicks in our button has no content and therefore ratings must be disabled
|
||||||
|
if (playerResponse.getObject("videoDetails").getBoolean("allowRatings")) {
|
||||||
|
throw new ParsingException("Ratings are enabled even though the dislike button is missing", e);
|
||||||
|
}
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
return Integer.parseInt(Utils.removeNonDigitCharacters(dislikesString));
|
return Integer.parseInt(Utils.removeNonDigitCharacters(dislikesString));
|
||||||
|
@ -409,60 +431,59 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
||||||
public String getUploaderUrl() throws ParsingException {
|
public String getUploaderUrl() throws ParsingException {
|
||||||
assertPageFetched();
|
assertPageFetched();
|
||||||
try {
|
try {
|
||||||
return doc.select("div[class=\"yt-user-info\"]").first().children()
|
return "https://www.youtube.com/channel/" +
|
||||||
.select("a").first().attr("abs:href");
|
playerResponse.getObject("videoDetails").getString("channelId");
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
throw new ParsingException("Could not get channel link", e);
|
String uploaderUrl = null;
|
||||||
}
|
try {
|
||||||
}
|
uploaderUrl = doc.select("div[class=\"yt-user-info\"]").first().children()
|
||||||
|
.select("a").first().attr("abs:href");
|
||||||
|
} catch (Exception ignored) {}
|
||||||
|
|
||||||
|
if (uploaderUrl == null) {
|
||||||
@Nullable
|
throw new ParsingException("Could not get channel link", e);
|
||||||
private String getStringFromMetaData(String field) {
|
}
|
||||||
assertPageFetched();
|
return uploaderUrl;
|
||||||
String value = null;
|
|
||||||
if(playerArgs != null) {
|
|
||||||
// This can not fail
|
|
||||||
value = playerArgs.getString(field);
|
|
||||||
}
|
}
|
||||||
if(value == null) {
|
|
||||||
// This can not fail too
|
|
||||||
value = videoInfoPage.get(field);
|
|
||||||
}
|
|
||||||
return value;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Nonnull
|
@Nonnull
|
||||||
@Override
|
@Override
|
||||||
public String getUploaderName() throws ParsingException {
|
public String getUploaderName() throws ParsingException {
|
||||||
assertPageFetched();
|
assertPageFetched();
|
||||||
String name = getStringFromMetaData("author");
|
try {
|
||||||
|
return playerResponse.getObject("videoDetails").getString("author");
|
||||||
if(name == null) {
|
} catch (Exception e) {
|
||||||
|
String name = null;
|
||||||
try {
|
try {
|
||||||
// Fallback to HTML method
|
|
||||||
name = doc.select("div.yt-user-info").first().text();
|
name = doc.select("div.yt-user-info").first().text();
|
||||||
} catch (Exception e) {
|
} catch (Exception ignored) {}
|
||||||
throw new ParsingException("Could not get uploader name", e);
|
|
||||||
|
if (name == null) {
|
||||||
|
throw new ParsingException("Could not get uploader name");
|
||||||
}
|
}
|
||||||
|
return name;
|
||||||
}
|
}
|
||||||
if(name == null || name.isEmpty()) {
|
|
||||||
throw new ParsingException("Could not get uploader name");
|
|
||||||
}
|
|
||||||
return name;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Nonnull
|
@Nonnull
|
||||||
@Override
|
@Override
|
||||||
public String getUploaderAvatarUrl() throws ParsingException {
|
public String getUploaderAvatarUrl() throws ParsingException {
|
||||||
assertPageFetched();
|
assertPageFetched();
|
||||||
|
|
||||||
|
String uploaderAvatarUrl = null;
|
||||||
try {
|
try {
|
||||||
return doc.select("a[class*=\"yt-user-photo\"]").first()
|
uploaderAvatarUrl = doc.select("a[class*=\"yt-user-photo\"]").first()
|
||||||
.select("img").first()
|
.select("img").first()
|
||||||
.attr("abs:data-thumb");
|
.attr("abs:data-thumb");
|
||||||
} catch (Exception e) {//todo: add fallback method
|
} catch (Exception e) {//todo: add fallback method
|
||||||
throw new ParsingException("Could not get uploader thumbnail URL.", e);
|
throw new ParsingException("Could not get uploader avatar url", e);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (uploaderAvatarUrl == null) {
|
||||||
|
throw new ParsingException("Could not get uploader avatar url");
|
||||||
|
}
|
||||||
|
return uploaderAvatarUrl;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Nonnull
|
@Nonnull
|
||||||
|
@ -590,12 +611,12 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
||||||
public StreamType getStreamType() throws ParsingException {
|
public StreamType getStreamType() throws ParsingException {
|
||||||
assertPageFetched();
|
assertPageFetched();
|
||||||
try {
|
try {
|
||||||
if (playerArgs != null && (playerArgs.has("ps") && playerArgs.get("ps").toString().equals("live") ||
|
if (!playerResponse.getObject("streamingData").has(FORMATS) ||
|
||||||
(!playerResponse.getObject("streamingData").has(FORMATS)))) {
|
(playerArgs != null && playerArgs.has("ps") && playerArgs.get("ps").toString().equals("live"))) {
|
||||||
return StreamType.LIVE_STREAM;
|
return StreamType.LIVE_STREAM;
|
||||||
}
|
}
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
throw new ParsingException("Could not get hls manifest url", e);
|
throw new ParsingException("Could not get stream type", e);
|
||||||
}
|
}
|
||||||
return StreamType.VIDEO_STREAM;
|
return StreamType.VIDEO_STREAM;
|
||||||
}
|
}
|
||||||
|
|
|
@ -75,7 +75,7 @@ public class YoutubeStreamExtractorAgeRestrictedTest {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testGetLength() throws ParsingException {
|
public void testGetLength() throws ParsingException {
|
||||||
assertEquals(1789, extractor.getLength());
|
assertEquals(1790, extractor.getLength());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
|
|
@ -65,7 +65,7 @@ public class YoutubeStreamExtractorControversialTest {
|
||||||
@Test
|
@Test
|
||||||
public void testGetDescription() throws ParsingException {
|
public void testGetDescription() throws ParsingException {
|
||||||
assertNotNull(extractor.getDescription());
|
assertNotNull(extractor.getDescription());
|
||||||
// assertFalse(extractor.getDescription().isEmpty());
|
assertFalse(extractor.getDescription().isEmpty());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
|
|
@ -101,7 +101,7 @@ public class YoutubeStreamExtractorDefaultTest {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testGetLength() throws ParsingException {
|
public void testGetLength() throws ParsingException {
|
||||||
assertEquals(366, extractor.getLength());
|
assertEquals(367, extractor.getLength());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@ -124,7 +124,11 @@ public class YoutubeStreamExtractorDefaultTest {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testGetUploaderUrl() throws ParsingException {
|
public void testGetUploaderUrl() throws ParsingException {
|
||||||
assertEquals("https://www.youtube.com/channel/UCsRM0YB_dabtEPGPTKo-gcw", extractor.getUploaderUrl());
|
String url = extractor.getUploaderUrl();
|
||||||
|
if (!url.equals("https://www.youtube.com/channel/UCsRM0YB_dabtEPGPTKo-gcw") &&
|
||||||
|
!url.equals("https://www.youtube.com/channel/UComP_epzeKzvBX156r6pm1Q")) {
|
||||||
|
fail("Uploader url is neither the music channel one nor the Vevo one");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@ -183,6 +187,18 @@ public class YoutubeStreamExtractorDefaultTest {
|
||||||
// Video (/view?v=YQHsXMglC9A) set in the setUp() method has no captions => null
|
// Video (/view?v=YQHsXMglC9A) set in the setUp() method has no captions => null
|
||||||
assertTrue(extractor.getSubtitles(MediaFormat.TTML).isEmpty());
|
assertTrue(extractor.getSubtitles(MediaFormat.TTML).isEmpty());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testGetLikeCount() throws ParsingException {
|
||||||
|
long likeCount = extractor.getLikeCount();
|
||||||
|
assertTrue("" + likeCount, likeCount >= 15000000);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testGetDislikeCount() throws ParsingException {
|
||||||
|
long dislikeCount = extractor.getDislikeCount();
|
||||||
|
assertTrue("" + dislikeCount, dislikeCount >= 818000);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public static class DescriptionTestPewdiepie {
|
public static class DescriptionTestPewdiepie {
|
||||||
|
@ -245,6 +261,29 @@ public class YoutubeStreamExtractorDefaultTest {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static class RatingsDisabledTest {
|
||||||
|
private static YoutubeStreamExtractor extractor;
|
||||||
|
|
||||||
|
@BeforeClass
|
||||||
|
public static void setUp() throws Exception {
|
||||||
|
NewPipe.init(DownloaderTestImpl.getInstance());
|
||||||
|
extractor = (YoutubeStreamExtractor) YouTube
|
||||||
|
.getStreamExtractor("https://www.youtube.com/watch?v=HRKu0cvrr_o");
|
||||||
|
extractor.fetchPage();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testGetLikeCount() throws ParsingException {
|
||||||
|
assertEquals(-1, extractor.getLikeCount());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testGetDislikeCount() throws ParsingException {
|
||||||
|
assertEquals(-1, extractor.getDislikeCount());
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
public static class FramesTest {
|
public static class FramesTest {
|
||||||
private static YoutubeStreamExtractor extractor;
|
private static YoutubeStreamExtractor extractor;
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue