From 147322b5265d5e35f6e300b52bc19e2c655dc89b Mon Sep 17 00:00:00 2001 From: TobiGr Date: Thu, 26 Dec 2019 22:11:12 +0100 Subject: [PATCH 01/30] Deploy JDoc even when tests on master fail --- .travis.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index 7e3db2a03..504a51ad0 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,7 +1,8 @@ language: java -script: ./gradlew check -after_success: ./gradlew aggregatedJavadocs +script: + ./gradlew check + ./gradlew aggregatedJavadocs deploy: provider: pages From 01eb7a96586b30c71b63ebbc3eb210d82b9e4ced Mon Sep 17 00:00:00 2001 From: TobiGr Date: Thu, 26 Dec 2019 22:21:52 +0100 Subject: [PATCH 02/30] Fix syntax --- .travis.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index 504a51ad0..f150640f7 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,8 +1,8 @@ language: java script: - ./gradlew check - ./gradlew aggregatedJavadocs + - ./gradlew check + - ./gradlew aggregatedJavadocs deploy: provider: pages From f826c247499ef1a4eedb13e944657fdb57312441 Mon Sep 17 00:00:00 2001 From: TobiGr Date: Thu, 26 Dec 2019 22:46:19 +0100 Subject: [PATCH 03/30] Use UTF-8 encoding for docs and bump version to v0.18.0 --- build.gradle | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/build.gradle b/build.gradle index b0cdf7114..3d8ced48b 100644 --- a/build.gradle +++ b/build.gradle @@ -5,7 +5,7 @@ allprojects { sourceCompatibility = 1.7 targetCompatibility = 1.7 - version 'v0.13.0' + version 'v0.18.0' group 'com.github.TeamNewPipe' repositories { @@ -43,7 +43,8 @@ task aggregatedJavadocs(type: Javadoc, group: 'Documentation') { title = "$project.name $version" // options.memberLevel = JavadocMemberLevel.PRIVATE options.links 'https://docs.oracle.com/javase/7/docs/api/' - + options.encoding 'UTF-8' + subprojects.each { project -> project.tasks.withType(Javadoc).each { javadocTask -> source += javadocTask.source From 305ff217701dbd81ea2926fde16ad5d36f13b00f Mon Sep 17 00:00:00 2001 From: TobiGr Date: Thu, 26 Dec 2019 23:38:40 +0100 Subject: [PATCH 04/30] use openjdk8 for travis build --- .travis.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.travis.yml b/.travis.yml index f150640f7..f5bd81060 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,4 +1,6 @@ language: java +jdk: + - openjdk8 script: - ./gradlew check From 1ed89aad3eff72231bfec6c0d8d1fcf6b8c2c8e8 Mon Sep 17 00:00:00 2001 From: Stypox Date: Mon, 6 Jan 2020 20:45:57 +0100 Subject: [PATCH 05/30] Use more often playerResponse in yt stream extractor This enhances performance and should make the extractor more reliable since it get info from a stable json structure that shouldn't be subject to many changes. Fallback html methods have been kept. In case of error the thrown exception contains the data about the playerResponse failure, that should be clearer than a NPE caused by not-found html tags. --- .../extractors/YoutubeStreamExtractor.java | 179 ++++++++++-------- 1 file changed, 100 insertions(+), 79 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java index c05004ede..ef1edad33 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java @@ -106,19 +106,21 @@ public class YoutubeStreamExtractor extends StreamExtractor { @Override public String getName() throws ParsingException { assertPageFetched(); - String name = getStringFromMetaData("title"); - if(name == null) { - // Fallback to HTML method + try { + return playerResponse.getObject("videoDetails").getString("title"); + + } catch (Exception e) { + // fallback HTML method + String name = null; try { name = doc.select("meta[name=title]").attr(CONTENT); - } catch (Exception e) { - throw new ParsingException("Could not get the title", e); + } catch (Exception ignored) {} + + if (name == null) { + throw new ParsingException("Could not get name", e); } + return name; } - if(name == null || name.isEmpty()) { - throw new ParsingException("Could not get the title"); - } - return name; } @Override @@ -128,9 +130,17 @@ public class YoutubeStreamExtractor extends StreamExtractor { } try { - return doc.select("meta[itemprop=datePublished]").attr(CONTENT); - } catch (Exception e) {//todo: add fallback method - throw new ParsingException("Could not get upload date", e); + return playerResponse.getObject("microformat").getObject("playerMicroformatRenderer").getString("publishDate"); + } catch (Exception e) { + String uploadDate = null; + try { + uploadDate = doc.select("meta[itemprop=datePublished]").attr(CONTENT); + } catch (Exception ignored) {} + + if (uploadDate == null) { + throw new ParsingException("Could not get upload date", e); + } + return uploadDate; } } @@ -149,24 +159,23 @@ public class YoutubeStreamExtractor extends StreamExtractor { @Override public String getThumbnailUrl() throws ParsingException { assertPageFetched(); - // Try to get high resolution thumbnail first, if it fails, use low res from the player instead try { - return doc.select("link[itemprop=\"thumbnailUrl\"]").first().attr("abs:href"); - } catch (Exception ignored) { - // Try other method... - } + JsonArray thumbnails = playerResponse.getObject("videoDetails").getObject("thumbnail").getArray("thumbnails"); + // the last thumbnail is the one with the highest resolution + return thumbnails.getObject(thumbnails.size()-1).getString("url"); - try { - if (playerArgs != null && playerArgs.isString("thumbnail_url")) return playerArgs.getString("thumbnail_url"); - } catch (Exception ignored) { - // Try other method... - } - - try { - return videoInfoPage.get("thumbnail_url"); } catch (Exception e) { - throw new ParsingException("Could not get thumbnail url", e); + String url = null; + try { + url = doc.select("link[itemprop=\"thumbnailUrl\"]").first().attr("abs:href"); + } catch (Exception ignored) {} + + if (url == null) { + throw new ParsingException("Could not get thumbnail url", e); + } + return url; } + } @Nonnull @@ -174,9 +183,15 @@ public class YoutubeStreamExtractor extends StreamExtractor { public String getDescription() throws ParsingException { assertPageFetched(); try { + // first try to get html-formatted description return parseHtmlAndGetFullLinks(doc.select("p[id=\"eow-description\"]").first().html()); } catch (Exception e) { - throw new ParsingException("Could not get the description", e); + try { + // fallback to raw non-html description + return playerResponse.getObject("videoDetails").getString("shortDescription"); + } catch (Exception ignored) { + throw new ParsingException("Could not get the description", e); + } } } @@ -269,25 +284,22 @@ public class YoutubeStreamExtractor extends StreamExtractor { public long getLength() throws ParsingException { assertPageFetched(); - // try getting duration from playerargs - try { - String durationMs = playerResponse - .getObject("streamingData") - .getArray("formats") - .getObject(0) - .getString("approxDurationMs"); - return Long.parseLong(durationMs)/1000; - } catch (Exception e) { - } - - //try getting value from age gated video try { String duration = playerResponse .getObject("videoDetails") .getString("lengthSeconds"); return Long.parseLong(duration); } catch (Exception e) { - throw new ParsingException("Every methode to get the duration has failed: ", e); + try { + String durationMs = playerResponse + .getObject("streamingData") + .getArray("formats") + .getObject(0) + .getString("approxDurationMs"); + return Math.round(Long.parseLong(durationMs)/1000.0f); + } catch (Exception ignored) { + throw new ParsingException("Could not get duration", e); + } } } @@ -307,11 +319,15 @@ public class YoutubeStreamExtractor extends StreamExtractor { try { if (getStreamType().equals(StreamType.LIVE_STREAM)) { return getLiveStreamWatchingCount(); + } else { + return Long.parseLong(playerResponse.getObject("videoDetails").getString("viewCount")); + } + } catch (Exception e) { + try { + return Long.parseLong(doc.select("meta[itemprop=interactionCount]").attr(CONTENT)); + } catch (Exception ignored) { + throw new ParsingException("Could not get view count", e); } - - return Long.parseLong(doc.select("meta[itemprop=interactionCount]").attr(CONTENT)); - } catch (Exception e) {//todo: find fallback method - throw new ParsingException("Could not get number of views", e); } } @@ -373,7 +389,10 @@ public class YoutubeStreamExtractor extends StreamExtractor { try { likesString = button.select("span.yt-uix-button-content").first().text(); } catch (NullPointerException e) { - //if this kicks in our button has no content and therefore likes/dislikes are disabled + //if this kicks in our button has no content and therefore ratings must be disabled + if (playerResponse.getObject("videoDetails").getBoolean("allowRatings")) { + throw new ParsingException("Ratings are enabled even though the like button is missing", e); + } return -1; } return Integer.parseInt(Utils.removeNonDigitCharacters(likesString)); @@ -393,7 +412,10 @@ public class YoutubeStreamExtractor extends StreamExtractor { try { dislikesString = button.select("span.yt-uix-button-content").first().text(); } catch (NullPointerException e) { - //if this kicks in our button has no content and therefore likes/dislikes are disabled + //if this kicks in our button has no content and therefore ratings must be disabled + if (playerResponse.getObject("videoDetails").getBoolean("allowRatings")) { + throw new ParsingException("Ratings are enabled even though the dislike button is missing", e); + } return -1; } return Integer.parseInt(Utils.removeNonDigitCharacters(dislikesString)); @@ -409,60 +431,59 @@ public class YoutubeStreamExtractor extends StreamExtractor { public String getUploaderUrl() throws ParsingException { assertPageFetched(); try { - return doc.select("div[class=\"yt-user-info\"]").first().children() - .select("a").first().attr("abs:href"); + return "https://www.youtube.com/channel/" + + playerResponse.getObject("videoDetails").getString("channelId"); } catch (Exception e) { - throw new ParsingException("Could not get channel link", e); - } - } + String uploaderUrl = null; + try { + uploaderUrl = doc.select("div[class=\"yt-user-info\"]").first().children() + .select("a").first().attr("abs:href"); + } catch (Exception ignored) {} - - @Nullable - private String getStringFromMetaData(String field) { - assertPageFetched(); - String value = null; - if(playerArgs != null) { - // This can not fail - value = playerArgs.getString(field); + if (uploaderUrl == null) { + throw new ParsingException("Could not get channel link", e); + } + return uploaderUrl; } - if(value == null) { - // This can not fail too - value = videoInfoPage.get(field); - } - return value; } @Nonnull @Override public String getUploaderName() throws ParsingException { assertPageFetched(); - String name = getStringFromMetaData("author"); - - if(name == null) { + try { + return playerResponse.getObject("videoDetails").getString("author"); + } catch (Exception e) { + String name = null; try { - // Fallback to HTML method name = doc.select("div.yt-user-info").first().text(); - } catch (Exception e) { - throw new ParsingException("Could not get uploader name", e); + } catch (Exception ignored) {} + + if (name == null) { + throw new ParsingException("Could not get uploader name"); } + return name; } - if(name == null || name.isEmpty()) { - throw new ParsingException("Could not get uploader name"); - } - return name; } @Nonnull @Override public String getUploaderAvatarUrl() throws ParsingException { assertPageFetched(); + + String uploaderAvatarUrl = null; try { - return doc.select("a[class*=\"yt-user-photo\"]").first() + uploaderAvatarUrl = doc.select("a[class*=\"yt-user-photo\"]").first() .select("img").first() .attr("abs:data-thumb"); } catch (Exception e) {//todo: add fallback method - throw new ParsingException("Could not get uploader thumbnail URL.", e); + throw new ParsingException("Could not get uploader avatar url", e); } + + if (uploaderAvatarUrl == null) { + throw new ParsingException("Could not get uploader avatar url"); + } + return uploaderAvatarUrl; } @Nonnull @@ -590,12 +611,12 @@ public class YoutubeStreamExtractor extends StreamExtractor { public StreamType getStreamType() throws ParsingException { assertPageFetched(); try { - if (playerArgs != null && (playerArgs.has("ps") && playerArgs.get("ps").toString().equals("live") || - (!playerResponse.getObject("streamingData").has(FORMATS)))) { + if (!playerResponse.getObject("streamingData").has(FORMATS) || + (playerArgs != null && playerArgs.has("ps") && playerArgs.get("ps").toString().equals("live"))) { return StreamType.LIVE_STREAM; } } catch (Exception e) { - throw new ParsingException("Could not get hls manifest url", e); + throw new ParsingException("Could not get stream type", e); } return StreamType.VIDEO_STREAM; } From 1689037b0115d440bce5ee5e612ab646cafa496f Mon Sep 17 00:00:00 2001 From: Stypox Date: Mon, 6 Jan 2020 20:49:08 +0100 Subject: [PATCH 06/30] Fix getLength() tests now that the value is rounded (not floor-ed) In yt stream extractor --- .../youtube/stream/YoutubeStreamExtractorAgeRestrictedTest.java | 2 +- .../youtube/stream/YoutubeStreamExtractorDefaultTest.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/stream/YoutubeStreamExtractorAgeRestrictedTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/stream/YoutubeStreamExtractorAgeRestrictedTest.java index 992a8cd92..b9da6d173 100644 --- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/stream/YoutubeStreamExtractorAgeRestrictedTest.java +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/stream/YoutubeStreamExtractorAgeRestrictedTest.java @@ -75,7 +75,7 @@ public class YoutubeStreamExtractorAgeRestrictedTest { @Test public void testGetLength() throws ParsingException { - assertEquals(1789, extractor.getLength()); + assertEquals(1790, extractor.getLength()); } @Test diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/stream/YoutubeStreamExtractorDefaultTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/stream/YoutubeStreamExtractorDefaultTest.java index e715144a9..14b785014 100644 --- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/stream/YoutubeStreamExtractorDefaultTest.java +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/stream/YoutubeStreamExtractorDefaultTest.java @@ -101,7 +101,7 @@ public class YoutubeStreamExtractorDefaultTest { @Test public void testGetLength() throws ParsingException { - assertEquals(366, extractor.getLength()); + assertEquals(367, extractor.getLength()); } @Test From d5ca02f3f69690b1bab065639db139747a4997b4 Mon Sep 17 00:00:00 2001 From: Stypox Date: Mon, 6 Jan 2020 21:41:15 +0100 Subject: [PATCH 07/30] Fix test failing because music channel could be Vevo In yt stream extractor test. The Vevo and the normal music channel are equivalent, so Youtube picks one of them at random, and in playerResponse the channel id is Vevo's one. --- .../youtube/stream/YoutubeStreamExtractorDefaultTest.java | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/stream/YoutubeStreamExtractorDefaultTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/stream/YoutubeStreamExtractorDefaultTest.java index 14b785014..8dd35ce77 100644 --- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/stream/YoutubeStreamExtractorDefaultTest.java +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/stream/YoutubeStreamExtractorDefaultTest.java @@ -124,7 +124,11 @@ public class YoutubeStreamExtractorDefaultTest { @Test public void testGetUploaderUrl() throws ParsingException { - assertEquals("https://www.youtube.com/channel/UCsRM0YB_dabtEPGPTKo-gcw", extractor.getUploaderUrl()); + String url = extractor.getUploaderUrl(); + if (!url.equals("https://www.youtube.com/channel/UCsRM0YB_dabtEPGPTKo-gcw") && + !url.equals("https://www.youtube.com/channel/UComP_epzeKzvBX156r6pm1Q")) { + fail("Uploader url is neither the music channel one nor the Vevo one"); + } } @Test From 1bb6cdee225a8544bdc87311523f4abb859efcd6 Mon Sep 17 00:00:00 2001 From: Stypox Date: Mon, 6 Jan 2020 21:43:24 +0100 Subject: [PATCH 08/30] Enable commented-out test for yt stream extractor --- .../youtube/stream/YoutubeStreamExtractorControversialTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/stream/YoutubeStreamExtractorControversialTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/stream/YoutubeStreamExtractorControversialTest.java index 7faec8051..5a063a936 100644 --- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/stream/YoutubeStreamExtractorControversialTest.java +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/stream/YoutubeStreamExtractorControversialTest.java @@ -65,7 +65,7 @@ public class YoutubeStreamExtractorControversialTest { @Test public void testGetDescription() throws ParsingException { assertNotNull(extractor.getDescription()); -// assertFalse(extractor.getDescription().isEmpty()); + assertFalse(extractor.getDescription().isEmpty()); } @Test From 7ba04836ebe15268e12c5055d0919d61a8dfe1f4 Mon Sep 17 00:00:00 2001 From: Stypox Date: Mon, 6 Jan 2020 23:57:08 +0100 Subject: [PATCH 09/30] Add tests for ratings in yt stream extractor tests --- .../YoutubeStreamExtractorDefaultTest.java | 35 +++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/stream/YoutubeStreamExtractorDefaultTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/stream/YoutubeStreamExtractorDefaultTest.java index 8dd35ce77..5f21df553 100644 --- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/stream/YoutubeStreamExtractorDefaultTest.java +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/stream/YoutubeStreamExtractorDefaultTest.java @@ -187,6 +187,18 @@ public class YoutubeStreamExtractorDefaultTest { // Video (/view?v=YQHsXMglC9A) set in the setUp() method has no captions => null assertTrue(extractor.getSubtitles(MediaFormat.TTML).isEmpty()); } + + @Test + public void testGetLikeCount() throws ParsingException { + long likeCount = extractor.getLikeCount(); + assertTrue("" + likeCount, likeCount >= 15000000); + } + + @Test + public void testGetDislikeCount() throws ParsingException { + long dislikeCount = extractor.getDislikeCount(); + assertTrue("" + dislikeCount, dislikeCount >= 818000); + } } public static class DescriptionTestPewdiepie { @@ -249,6 +261,29 @@ public class YoutubeStreamExtractorDefaultTest { } } + public static class RatingsDisabledTest { + private static YoutubeStreamExtractor extractor; + + @BeforeClass + public static void setUp() throws Exception { + NewPipe.init(DownloaderTestImpl.getInstance()); + extractor = (YoutubeStreamExtractor) YouTube + .getStreamExtractor("https://www.youtube.com/watch?v=HRKu0cvrr_o"); + extractor.fetchPage(); + } + + @Test + public void testGetLikeCount() throws ParsingException { + assertEquals(-1, extractor.getLikeCount()); + } + + @Test + public void testGetDislikeCount() throws ParsingException { + assertEquals(-1, extractor.getDislikeCount()); + } + + } + public static class FramesTest { private static YoutubeStreamExtractor extractor; From 3743c60695c7a4349b5ddec9bedee13b3fb75c26 Mon Sep 17 00:00:00 2001 From: B0pol Date: Wed, 15 Jan 2020 14:27:52 +0100 Subject: [PATCH 10/30] fix: typos in Test --- .../peertube/PeertubeChannelLinkHandlerFactoryTest.java | 2 +- .../peertube/PeertubeCommentsLinkHandlerFactoryTest.java | 2 +- .../peertube/PeertubePlaylistLinkHandlerFactoryTest.java | 2 +- .../services/youtube/YoutubeChannelLinkHandlerFactoryTest.java | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/services/peertube/PeertubeChannelLinkHandlerFactoryTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/services/peertube/PeertubeChannelLinkHandlerFactoryTest.java index d90e4321b..633307a28 100644 --- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/peertube/PeertubeChannelLinkHandlerFactoryTest.java +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/peertube/PeertubeChannelLinkHandlerFactoryTest.java @@ -24,7 +24,7 @@ public class PeertubeChannelLinkHandlerFactoryTest { } @Test - public void acceptrUrlTest() throws ParsingException { + public void acceptUrlTest() throws ParsingException { assertTrue(linkHandler.acceptUrl("https://peertube.mastodon.host/accounts/kranti@videos.squat.net")); } diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/services/peertube/PeertubeCommentsLinkHandlerFactoryTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/services/peertube/PeertubeCommentsLinkHandlerFactoryTest.java index c13150302..fcc27c54d 100644 --- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/peertube/PeertubeCommentsLinkHandlerFactoryTest.java +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/peertube/PeertubeCommentsLinkHandlerFactoryTest.java @@ -24,7 +24,7 @@ public class PeertubeCommentsLinkHandlerFactoryTest { } @Test - public void acceptrUrlTest() throws ParsingException { + public void acceptUrlTest() throws ParsingException { assertTrue(linkHandler.acceptUrl("https://peertube.mastodon.host/api/v1/videos/19319/comment-threads?start=0&count=10&sort=-createdAt")); } diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/services/peertube/PeertubePlaylistLinkHandlerFactoryTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/services/peertube/PeertubePlaylistLinkHandlerFactoryTest.java index 77e7d2e84..9ad291027 100644 --- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/peertube/PeertubePlaylistLinkHandlerFactoryTest.java +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/peertube/PeertubePlaylistLinkHandlerFactoryTest.java @@ -24,7 +24,7 @@ public class PeertubePlaylistLinkHandlerFactoryTest { } @Test - public void acceptrUrlTest() throws ParsingException { + public void acceptUrlTest() throws ParsingException { assertTrue(linkHandler.acceptUrl("https://peertube.mastodon.host/video-channels/b45e84fb-c47f-475b-94f2-718126154d33/videos")); } diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeChannelLinkHandlerFactoryTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeChannelLinkHandlerFactoryTest.java index 7b4814cf6..4c598a9b6 100644 --- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeChannelLinkHandlerFactoryTest.java +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeChannelLinkHandlerFactoryTest.java @@ -24,7 +24,7 @@ public class YoutubeChannelLinkHandlerFactoryTest { } @Test - public void acceptrUrlTest() throws ParsingException { + public void acceptUrlTest() throws ParsingException { assertTrue(linkHandler.acceptUrl("https://www.youtube.com/user/Gronkh")); assertTrue(linkHandler.acceptUrl("https://www.youtube.com/user/Netzkino/videos")); From a12c0e2a37ff2697aa528cb13d69713bc860997b Mon Sep 17 00:00:00 2001 From: B0pol Date: Fri, 17 Jan 2020 12:09:59 +0100 Subject: [PATCH 11/30] fix:invidious: getID and onAccpetURl for comments --- .../YoutubeCommentsLinkHandlerFactory.java | 222 ++++++++++++------ .../youtube/YoutubeCommentsExtractorTest.java | 4 +- 2 files changed, 157 insertions(+), 69 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeCommentsLinkHandlerFactory.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeCommentsLinkHandlerFactory.java index 036529abd..8dc8715f2 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeCommentsLinkHandlerFactory.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeCommentsLinkHandlerFactory.java @@ -1,13 +1,15 @@ package org.schabi.newpipe.extractor.services.youtube.linkHandler; -import java.io.UnsupportedEncodingException; -import java.net.URLDecoder; -import java.util.List; - import org.schabi.newpipe.extractor.exceptions.FoundAdException; import org.schabi.newpipe.extractor.exceptions.ParsingException; import org.schabi.newpipe.extractor.linkhandler.ListLinkHandlerFactory; -import org.schabi.newpipe.extractor.utils.Parser; +import org.schabi.newpipe.extractor.utils.Utils; + +import java.net.MalformedURLException; +import java.net.URI; +import java.net.URISyntaxException; +import java.net.URL; +import java.util.List; public class YoutubeCommentsLinkHandlerFactory extends ListLinkHandlerFactory { @@ -18,84 +20,170 @@ public class YoutubeCommentsLinkHandlerFactory extends ListLinkHandlerFactory { return instance; } + private static String assertIsID(String id) throws ParsingException { + if (id == null || !id.matches("[a-zA-Z0-9_-]{11}")) { + throw new ParsingException("The given string is not a Youtube-Video-ID"); + } + + return id; + } + @Override public String getUrl(String id) { return "https://m.youtube.com/watch?v=" + id; } @Override - public String getId(String url) throws ParsingException, IllegalArgumentException { - if (url.isEmpty()) { - throw new IllegalArgumentException("The url parameter should not be empty"); - } + public String getId(String urlString) throws ParsingException, IllegalArgumentException { + try { + URI uri = new URI(urlString); + String scheme = uri.getScheme(); - String id; - String lowercaseUrl = url.toLowerCase(); - if (lowercaseUrl.contains("youtube")) { - if (url.contains("attribution_link")) { - try { - String escapedQuery = Parser.matchGroup1("u=(.[^&|$]*)", url); - String query = URLDecoder.decode(escapedQuery, "UTF-8"); - id = Parser.matchGroup1("v=" + ID_PATTERN, query); - } catch (UnsupportedEncodingException uee) { - throw new ParsingException("Could not parse attribution_link", uee); + if (scheme != null && (scheme.equals("vnd.youtube") || scheme.equals("vnd.youtube.launch"))) { + String schemeSpecificPart = uri.getSchemeSpecificPart(); + if (schemeSpecificPart.startsWith("//")) { + urlString = "https:" + schemeSpecificPart; + } else { + return assertIsID(schemeSpecificPart); } - } else if (url.contains("vnd.youtube")) { - id = Parser.matchGroup1(ID_PATTERN, url); - } else if (url.contains("embed")) { - id = Parser.matchGroup1("embed/" + ID_PATTERN, url); - } else if (url.contains("googleads")) { - throw new FoundAdException("Error found add: " + url); - } else { - id = Parser.matchGroup1("[?&]v=" + ID_PATTERN, url); } - } else if (lowercaseUrl.contains("youtu.be")) { - if (url.contains("v=")) { - id = Parser.matchGroup1("v=" + ID_PATTERN, url); - } else { - id = Parser.matchGroup1("[Yy][Oo][Uu][Tt][Uu]\\.[Bb][Ee]/" + ID_PATTERN, url); - } - } else if(lowercaseUrl.contains("hooktube")) { - if(lowercaseUrl.contains("&v=") - || lowercaseUrl.contains("?v=")) { - id = Parser.matchGroup1("[?&]v=" + ID_PATTERN, url); - } else if (url.contains("/embed/")) { - id = Parser.matchGroup1("embed/" + ID_PATTERN, url); - } else if (url.contains("/v/")) { - id = Parser.matchGroup1("v/" + ID_PATTERN, url); - } else if (url.contains("/watch/")) { - id = Parser.matchGroup1("watch/" + ID_PATTERN, url); - } else { - throw new ParsingException("Error no suitable url: " + url); - } - } else { - throw new ParsingException("Error no suitable url: " + url); + } catch (URISyntaxException ignored) { } - - if (!id.isEmpty()) { - return id; - } else { - throw new ParsingException("Error could not parse url: " + url); + URL url; + try { + url = Utils.stringToURL(urlString); + } catch (MalformedURLException e) { + throw new IllegalArgumentException("The given URL is not valid"); } + + String host = url.getHost(); + String path = url.getPath(); + // remove leading "/" of URL-path if URL-path is given + if (!path.isEmpty()) { + path = path.substring(1); + } + + if (!Utils.isHTTP(url) || !(YoutubeParsingHelper.isYoutubeURL(url) || + YoutubeParsingHelper.isYoutubeServiceURL(url) || YoutubeParsingHelper.isHooktubeURL(url) || + YoutubeParsingHelper.isInvidioURL(url))) { + if (host.equalsIgnoreCase("googleads.g.doubleclick.net")) { + throw new FoundAdException("Error found ad: " + urlString); + } + + throw new ParsingException("The url is not a Youtube-URL"); + } + + if (YoutubePlaylistLinkHandlerFactory.getInstance().acceptUrl(urlString)) { + throw new ParsingException("Error no suitable url: " + urlString); + } + + // using uppercase instead of lowercase, because toLowercase replaces some unicode characters + // with their lowercase ASCII equivalent. Using toLowercase could result in faultily matching unicode urls. + switch (host.toUpperCase()) { + case "WWW.YOUTUBE-NOCOOKIE.COM": { + if (path.startsWith("embed/")) { + String id = path.split("/")[1]; + + return assertIsID(id); + } + + break; + } + + case "YOUTUBE.COM": + case "WWW.YOUTUBE.COM": + case "M.YOUTUBE.COM": + case "MUSIC.YOUTUBE.COM": { + if (path.equals("attribution_link")) { + String uQueryValue = Utils.getQueryValue(url, "u"); + + URL decodedURL; + try { + decodedURL = Utils.stringToURL("http://www.youtube.com" + uQueryValue); + } catch (MalformedURLException e) { + throw new ParsingException("Error no suitable url: " + urlString); + } + + String viewQueryValue = Utils.getQueryValue(decodedURL, "v"); + return assertIsID(viewQueryValue); + } + + if (path.startsWith("embed/")) { + String id = path.split("/")[1]; + + return assertIsID(id); + } + + String viewQueryValue = Utils.getQueryValue(url, "v"); + return assertIsID(viewQueryValue); + } + + case "YOUTU.BE": { + String viewQueryValue = Utils.getQueryValue(url, "v"); + if (viewQueryValue != null) { + return assertIsID(viewQueryValue); + } + + return assertIsID(path); + } + + case "HOOKTUBE.COM": { + if (path.startsWith("v/")) { + String id = path.substring("v/".length()); + + return assertIsID(id); + } + if (path.startsWith("watch/")) { + String id = path.substring("watch/".length()); + + return assertIsID(id); + } + // there is no break-statement here on purpose so the next code-block gets also run for hooktube + } + + case "WWW.INVIDIO.US": + case "DEV.INVIDIO.US": + case "INVIDIO.US": + case "INVIDIOUS.SNOPYTA.ORG": + case "DE.INVIDIOUS.SNOPYTA.ORG": + case "FI.INVIDIOUS.SNOPYTA.ORG": + case "VID.WXZM.SX": + case "INVIDIOUS.KABI.TK": + case "INVIDIOU.SH": + case "WWW.INVIDIOU.SH": + case "NO.INVIDIOU.SH": + case "INVIDIOUS.ENKIRTON.NET": + case "TUBE.POAL.CO": + case "INVIDIOUS.13AD.DE": + case "YT.ELUKERIO.ORG": { // code-block for hooktube.com and Invidious instances + if (path.equals("watch")) { + String viewQueryValue = Utils.getQueryValue(url, "v"); + if (viewQueryValue != null) { + return assertIsID(viewQueryValue); + } + } + if (path.startsWith("embed/")) { + String id = path.substring("embed/".length()); + + return assertIsID(id); + } + + break; + } + } + + throw new ParsingException("Error no suitable url: " + urlString); } @Override public boolean onAcceptUrl(final String url) throws FoundAdException { - final String lowercaseUrl = url.toLowerCase(); - if (lowercaseUrl.contains("youtube") - || lowercaseUrl.contains("youtu.be") - || lowercaseUrl.contains("hooktube")) { - // bad programming I know - try { - getId(url); - return true; - } catch (FoundAdException fe) { - throw fe; - } catch (ParsingException e) { - return false; - } - } else { + try { + getId(url); + return true; + } catch (FoundAdException fe) { + throw fe; + } catch (ParsingException e) { return false; } } diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeCommentsExtractorTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeCommentsExtractorTest.java index 6bf76b615..79f13b6bf 100644 --- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeCommentsExtractorTest.java +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeCommentsExtractorTest.java @@ -27,7 +27,7 @@ public class YoutubeCommentsExtractorTest { public static void setUp() throws Exception { NewPipe.init(DownloaderTestImpl.getInstance()); extractor = (YoutubeCommentsExtractor) YouTube - .getCommentsExtractor("https://www.youtube.com/watch?v=D00Au7k3i6o"); + .getCommentsExtractor("https://www.invidio.us/watch?v=D00Au7k3i6o"); } @Test @@ -47,7 +47,7 @@ public class YoutubeCommentsExtractorTest { @Test public void testGetCommentsFromCommentsInfo() throws IOException, ExtractionException { boolean result = false; - CommentsInfo commentsInfo = CommentsInfo.getInfo("https://www.youtube.com/watch?v=D00Au7k3i6o"); + CommentsInfo commentsInfo = CommentsInfo.getInfo("https://www.invidio.us/watch?v=D00Au7k3i6o"); assertTrue("what the fuck am i doing with my life".equals(commentsInfo.getName())); result = findInComments(commentsInfo.getRelatedItems(), "s1ck m3m3"); From 02930d08f1c17d1bc95a6326bd86fd8e499a9cc5 Mon Sep 17 00:00:00 2001 From: B0pol Date: Fri, 17 Jan 2020 12:58:50 +0100 Subject: [PATCH 12/30] added comments test for invidious --- .../YoutubeCommentsLinkHandlerFactory.java | 1 - .../youtube/YoutubeCommentsExtractorTest.java | 46 +++++++++++++------ 2 files changed, 33 insertions(+), 14 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeCommentsLinkHandlerFactory.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeCommentsLinkHandlerFactory.java index 8dc8715f2..71e54568d 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeCommentsLinkHandlerFactory.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeCommentsLinkHandlerFactory.java @@ -14,7 +14,6 @@ import java.util.List; public class YoutubeCommentsLinkHandlerFactory extends ListLinkHandlerFactory { private static final YoutubeCommentsLinkHandlerFactory instance = new YoutubeCommentsLinkHandlerFactory(); - private static final String ID_PATTERN = "([\\-a-zA-Z0-9_]{11})"; public static YoutubeCommentsLinkHandlerFactory getInstance() { return instance; diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeCommentsExtractorTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeCommentsExtractorTest.java index 79f13b6bf..44a79621c 100644 --- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeCommentsExtractorTest.java +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeCommentsExtractorTest.java @@ -4,7 +4,6 @@ import org.jsoup.helper.StringUtil; import org.junit.BeforeClass; import org.junit.Test; import org.schabi.newpipe.DownloaderTestImpl; -import org.schabi.newpipe.DownloaderTestImpl; import org.schabi.newpipe.extractor.ListExtractor.InfoItemsPage; import org.schabi.newpipe.extractor.NewPipe; import org.schabi.newpipe.extractor.comments.CommentsInfo; @@ -21,17 +20,32 @@ import static org.schabi.newpipe.extractor.ServiceList.YouTube; public class YoutubeCommentsExtractorTest { - private static YoutubeCommentsExtractor extractor; + private static final String urlYT = "https://www.youtube.com/watch?v=D00Au7k3i6o"; + private static final String urlInvidious = "https://invidio.us/watch?v=D00Au7k3i6o"; + private static final String urlInvidioush = "https://invidiou.sh/watch?v=D00Au7k3i6o"; + private static YoutubeCommentsExtractor extractorYT; + private static YoutubeCommentsExtractor extractorInvidious; + private static YoutubeCommentsExtractor extractorInvidioush; @BeforeClass public static void setUp() throws Exception { NewPipe.init(DownloaderTestImpl.getInstance()); - extractor = (YoutubeCommentsExtractor) YouTube - .getCommentsExtractor("https://www.invidio.us/watch?v=D00Au7k3i6o"); + extractorYT = (YoutubeCommentsExtractor) YouTube + .getCommentsExtractor(urlYT); + extractorInvidious = (YoutubeCommentsExtractor) YouTube + .getCommentsExtractor(urlInvidious); + extractorInvidioush = (YoutubeCommentsExtractor) YouTube + .getCommentsExtractor(urlInvidioush); } @Test public void testGetComments() throws IOException, ExtractionException { + assertTrue(getCommentsHelper(extractorYT)); + assertTrue(getCommentsHelper(extractorInvidious)); + assertTrue(getCommentsHelper(extractorInvidioush)); + } + + private boolean getCommentsHelper(YoutubeCommentsExtractor extractor) throws IOException, ExtractionException { boolean result; InfoItemsPage comments = extractor.getInitialPage(); result = findInComments(comments, "s1ck m3m3"); @@ -41,13 +55,20 @@ public class YoutubeCommentsExtractorTest { result = findInComments(comments, "s1ck m3m3"); } - assertTrue(result); + return result; + } @Test public void testGetCommentsFromCommentsInfo() throws IOException, ExtractionException { + assertTrue(getCommentsFromCommentsInfoHelper(urlYT)); + assertTrue(getCommentsFromCommentsInfoHelper(urlInvidious)); + assertTrue(getCommentsFromCommentsInfoHelper(urlInvidioush)); + } + + private boolean getCommentsFromCommentsInfoHelper(String url) throws IOException, ExtractionException { boolean result = false; - CommentsInfo commentsInfo = CommentsInfo.getInfo("https://www.invidio.us/watch?v=D00Au7k3i6o"); + CommentsInfo commentsInfo = CommentsInfo.getInfo(url); assertTrue("what the fuck am i doing with my life".equals(commentsInfo.getName())); result = findInComments(commentsInfo.getRelatedItems(), "s1ck m3m3"); @@ -57,16 +78,15 @@ public class YoutubeCommentsExtractorTest { result = findInComments(moreItems.getItems(), "s1ck m3m3"); nextPage = moreItems.getNextPageUrl(); } - - assertTrue(result); + return result; } - + @Test public void testGetCommentsAllData() throws IOException, ExtractionException { - InfoItemsPage comments = extractor.getInitialPage(); + InfoItemsPage comments = extractorYT.getInitialPage(); DefaultTests.defaultTestListOfItems(YouTube.getServiceId(), comments.getItems(), comments.getErrors()); - for(CommentsInfoItem c: comments.getItems()) { + for (CommentsInfoItem c : comments.getItems()) { assertFalse(StringUtil.isBlank(c.getAuthorEndpoint())); assertFalse(StringUtil.isBlank(c.getAuthorName())); assertFalse(StringUtil.isBlank(c.getAuthorThumbnail())); @@ -86,8 +106,8 @@ public class YoutubeCommentsExtractorTest { } private boolean findInComments(List comments, String comment) { - for(CommentsInfoItem c: comments) { - if(c.getCommentText().contains(comment)) { + for (CommentsInfoItem c : comments) { + if (c.getCommentText().contains(comment)) { return true; } } From 7e8d00981f9a8c508469525ebe55c8e0334e22d5 Mon Sep 17 00:00:00 2001 From: B0pol Date: Fri, 17 Jan 2020 13:44:55 +0100 Subject: [PATCH 13/30] changed getId() to remove duplicate code --- .../YoutubeCommentsLinkHandlerFactory.java | 153 +----------------- 1 file changed, 1 insertion(+), 152 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeCommentsLinkHandlerFactory.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeCommentsLinkHandlerFactory.java index 71e54568d..421fc13f3 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeCommentsLinkHandlerFactory.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeCommentsLinkHandlerFactory.java @@ -3,12 +3,7 @@ package org.schabi.newpipe.extractor.services.youtube.linkHandler; import org.schabi.newpipe.extractor.exceptions.FoundAdException; import org.schabi.newpipe.extractor.exceptions.ParsingException; import org.schabi.newpipe.extractor.linkhandler.ListLinkHandlerFactory; -import org.schabi.newpipe.extractor.utils.Utils; -import java.net.MalformedURLException; -import java.net.URI; -import java.net.URISyntaxException; -import java.net.URL; import java.util.List; public class YoutubeCommentsLinkHandlerFactory extends ListLinkHandlerFactory { @@ -19,14 +14,6 @@ public class YoutubeCommentsLinkHandlerFactory extends ListLinkHandlerFactory { return instance; } - private static String assertIsID(String id) throws ParsingException { - if (id == null || !id.matches("[a-zA-Z0-9_-]{11}")) { - throw new ParsingException("The given string is not a Youtube-Video-ID"); - } - - return id; - } - @Override public String getUrl(String id) { return "https://m.youtube.com/watch?v=" + id; @@ -34,145 +21,7 @@ public class YoutubeCommentsLinkHandlerFactory extends ListLinkHandlerFactory { @Override public String getId(String urlString) throws ParsingException, IllegalArgumentException { - try { - URI uri = new URI(urlString); - String scheme = uri.getScheme(); - - if (scheme != null && (scheme.equals("vnd.youtube") || scheme.equals("vnd.youtube.launch"))) { - String schemeSpecificPart = uri.getSchemeSpecificPart(); - if (schemeSpecificPart.startsWith("//")) { - urlString = "https:" + schemeSpecificPart; - } else { - return assertIsID(schemeSpecificPart); - } - } - } catch (URISyntaxException ignored) { - } - - URL url; - try { - url = Utils.stringToURL(urlString); - } catch (MalformedURLException e) { - throw new IllegalArgumentException("The given URL is not valid"); - } - - String host = url.getHost(); - String path = url.getPath(); - // remove leading "/" of URL-path if URL-path is given - if (!path.isEmpty()) { - path = path.substring(1); - } - - if (!Utils.isHTTP(url) || !(YoutubeParsingHelper.isYoutubeURL(url) || - YoutubeParsingHelper.isYoutubeServiceURL(url) || YoutubeParsingHelper.isHooktubeURL(url) || - YoutubeParsingHelper.isInvidioURL(url))) { - if (host.equalsIgnoreCase("googleads.g.doubleclick.net")) { - throw new FoundAdException("Error found ad: " + urlString); - } - - throw new ParsingException("The url is not a Youtube-URL"); - } - - if (YoutubePlaylistLinkHandlerFactory.getInstance().acceptUrl(urlString)) { - throw new ParsingException("Error no suitable url: " + urlString); - } - - // using uppercase instead of lowercase, because toLowercase replaces some unicode characters - // with their lowercase ASCII equivalent. Using toLowercase could result in faultily matching unicode urls. - switch (host.toUpperCase()) { - case "WWW.YOUTUBE-NOCOOKIE.COM": { - if (path.startsWith("embed/")) { - String id = path.split("/")[1]; - - return assertIsID(id); - } - - break; - } - - case "YOUTUBE.COM": - case "WWW.YOUTUBE.COM": - case "M.YOUTUBE.COM": - case "MUSIC.YOUTUBE.COM": { - if (path.equals("attribution_link")) { - String uQueryValue = Utils.getQueryValue(url, "u"); - - URL decodedURL; - try { - decodedURL = Utils.stringToURL("http://www.youtube.com" + uQueryValue); - } catch (MalformedURLException e) { - throw new ParsingException("Error no suitable url: " + urlString); - } - - String viewQueryValue = Utils.getQueryValue(decodedURL, "v"); - return assertIsID(viewQueryValue); - } - - if (path.startsWith("embed/")) { - String id = path.split("/")[1]; - - return assertIsID(id); - } - - String viewQueryValue = Utils.getQueryValue(url, "v"); - return assertIsID(viewQueryValue); - } - - case "YOUTU.BE": { - String viewQueryValue = Utils.getQueryValue(url, "v"); - if (viewQueryValue != null) { - return assertIsID(viewQueryValue); - } - - return assertIsID(path); - } - - case "HOOKTUBE.COM": { - if (path.startsWith("v/")) { - String id = path.substring("v/".length()); - - return assertIsID(id); - } - if (path.startsWith("watch/")) { - String id = path.substring("watch/".length()); - - return assertIsID(id); - } - // there is no break-statement here on purpose so the next code-block gets also run for hooktube - } - - case "WWW.INVIDIO.US": - case "DEV.INVIDIO.US": - case "INVIDIO.US": - case "INVIDIOUS.SNOPYTA.ORG": - case "DE.INVIDIOUS.SNOPYTA.ORG": - case "FI.INVIDIOUS.SNOPYTA.ORG": - case "VID.WXZM.SX": - case "INVIDIOUS.KABI.TK": - case "INVIDIOU.SH": - case "WWW.INVIDIOU.SH": - case "NO.INVIDIOU.SH": - case "INVIDIOUS.ENKIRTON.NET": - case "TUBE.POAL.CO": - case "INVIDIOUS.13AD.DE": - case "YT.ELUKERIO.ORG": { // code-block for hooktube.com and Invidious instances - if (path.equals("watch")) { - String viewQueryValue = Utils.getQueryValue(url, "v"); - if (viewQueryValue != null) { - return assertIsID(viewQueryValue); - } - } - if (path.startsWith("embed/")) { - String id = path.substring("embed/".length()); - - return assertIsID(id); - } - - break; - } - } - - throw new ParsingException("Error no suitable url: " + urlString); + return YoutubeStreamLinkHandlerFactory.getInstance().getId(urlString); //we need the same id, avoids duplicate code } @Override From 5e81ed1dff5ab3109e02ac4ac98dcd093c6acd6b Mon Sep 17 00:00:00 2001 From: bopol Date: Fri, 17 Jan 2020 19:27:54 +0100 Subject: [PATCH 14/30] add support to /c/shortened_url channel links --- .../linkHandler/YoutubeChannelLinkHandlerFactory.java | 2 +- .../youtube/YoutubeChannelLinkHandlerFactoryTest.java | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeChannelLinkHandlerFactory.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeChannelLinkHandlerFactory.java index 5a2e687c9..eb34cf065 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeChannelLinkHandlerFactory.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeChannelLinkHandlerFactory.java @@ -51,7 +51,7 @@ public class YoutubeChannelLinkHandlerFactory extends ListLinkHandlerFactory { throw new ParsingException("the URL given is not a Youtube-URL"); } - if (!path.startsWith("/user/") && !path.startsWith("/channel/")) { + if (!path.startsWith("/user/") && !path.startsWith("/channel/") && !path.startsWith("/c/")) { throw new ParsingException("the URL given is neither a channel nor an user"); } diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeChannelLinkHandlerFactoryTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeChannelLinkHandlerFactoryTest.java index 7b4814cf6..3e70a44c9 100644 --- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeChannelLinkHandlerFactoryTest.java +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeChannelLinkHandlerFactoryTest.java @@ -28,6 +28,8 @@ public class YoutubeChannelLinkHandlerFactoryTest { assertTrue(linkHandler.acceptUrl("https://www.youtube.com/user/Gronkh")); assertTrue(linkHandler.acceptUrl("https://www.youtube.com/user/Netzkino/videos")); + assertTrue(linkHandler.acceptUrl("https://www.youtube.com/c/creatoracademy")); + assertTrue(linkHandler.acceptUrl("https://www.youtube.com/channel/UClq42foiSgl7sSpLupnugGA")); assertTrue(linkHandler.acceptUrl("https://www.youtube.com/channel/UClq42foiSgl7sSpLupnugGA/videos?disable_polymer=1")); @@ -64,5 +66,8 @@ public class YoutubeChannelLinkHandlerFactoryTest { assertEquals("channel/UClq42foiSgl7sSpLupnugGA", linkHandler.fromUrl("https://invidio.us/channel/UClq42foiSgl7sSpLupnugGA").getId()); assertEquals("channel/UClq42foiSgl7sSpLupnugGA", linkHandler.fromUrl("https://invidio.us/channel/UClq42foiSgl7sSpLupnugGA/videos?disable_polymer=1").getId()); + + assertEquals("c/creatoracademy", linkHandler.fromUrl("https://www.youtube.com/c/creatoracademy").getId()); + assertEquals("c/YouTubeCreators", linkHandler.fromUrl("https://www.youtube.com/c/YouTubeCreators").getId()); } } From 221e8dd171ac31ac684774d432079fb4b60e0e79 Mon Sep 17 00:00:00 2001 From: bopol Date: Fri, 17 Jan 2020 21:24:52 +0100 Subject: [PATCH 15/30] changed assertTrue(string.equals(string)) to assertEquals(string, string) --- .../services/youtube/YoutubeCommentsExtractorTest.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeCommentsExtractorTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeCommentsExtractorTest.java index 44a79621c..3345ae3a4 100644 --- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeCommentsExtractorTest.java +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeCommentsExtractorTest.java @@ -56,7 +56,6 @@ public class YoutubeCommentsExtractorTest { } return result; - } @Test @@ -69,7 +68,7 @@ public class YoutubeCommentsExtractorTest { private boolean getCommentsFromCommentsInfoHelper(String url) throws IOException, ExtractionException { boolean result = false; CommentsInfo commentsInfo = CommentsInfo.getInfo(url); - assertTrue("what the fuck am i doing with my life".equals(commentsInfo.getName())); + assertEquals("what the fuck am i doing with my life", commentsInfo.getName()); result = findInComments(commentsInfo.getRelatedItems(), "s1ck m3m3"); String nextPage = commentsInfo.getNextPageUrl(); From 39de55dcd3ee32b95d476feec3b322759205c0e4 Mon Sep 17 00:00:00 2001 From: TobiGr Date: Mon, 20 Jan 2020 21:24:51 +0100 Subject: [PATCH 16/30] [YouTube] Fix and move streamCountTest --- .../search/YoutubeSearchCountTest.java | 36 ------------------- ...YoutubeSearchExtractorChannelOnlyTest.java | 11 ++++-- 2 files changed, 9 insertions(+), 38 deletions(-) delete mode 100644 extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/search/YoutubeSearchCountTest.java diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/search/YoutubeSearchCountTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/search/YoutubeSearchCountTest.java deleted file mode 100644 index c9568045a..000000000 --- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/search/YoutubeSearchCountTest.java +++ /dev/null @@ -1,36 +0,0 @@ -package org.schabi.newpipe.extractor.services.youtube.search; - -import org.junit.BeforeClass; -import org.junit.Test; -import org.schabi.newpipe.DownloaderTestImpl; -import org.schabi.newpipe.extractor.NewPipe; -import org.schabi.newpipe.extractor.channel.ChannelInfoItem; -import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeSearchExtractor; -import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeSearchQueryHandlerFactory; - -import static java.util.Collections.singletonList; -import static junit.framework.TestCase.assertTrue; -import static org.schabi.newpipe.extractor.ServiceList.YouTube; - -/** - * Test for {@link YoutubeSearchExtractor} - */ -public class YoutubeSearchCountTest { - public static class YoutubeChannelViewCountTest extends YoutubeSearchExtractorBaseTest { - @BeforeClass - public static void setUpClass() throws Exception { - NewPipe.init(DownloaderTestImpl.getInstance()); - extractor = (YoutubeSearchExtractor) YouTube.getSearchExtractor("pewdiepie", - singletonList(YoutubeSearchQueryHandlerFactory.CHANNELS), null); - extractor.fetchPage(); - itemsPage = extractor.getInitialPage(); - } - - @Test - public void testViewCount() { - ChannelInfoItem ci = (ChannelInfoItem) itemsPage.getItems().get(0); - assertTrue("Count does not fit: " + Long.toString(ci.getSubscriberCount()), - 69043316 < ci.getSubscriberCount() && ci.getSubscriberCount() < 103043316); - } - } -} diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/search/YoutubeSearchExtractorChannelOnlyTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/search/YoutubeSearchExtractorChannelOnlyTest.java index 12e76a08a..91cbebd9e 100644 --- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/search/YoutubeSearchExtractorChannelOnlyTest.java +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/search/YoutubeSearchExtractorChannelOnlyTest.java @@ -39,7 +39,7 @@ public class YoutubeSearchExtractorChannelOnlyTest extends YoutubeSearchExtracto boolean equals = true; for (int i = 0; i < secondPage.getItems().size() && i < itemsPage.getItems().size(); i++) { - if(!secondPage.getItems().get(i).getUrl().equals( + if (!secondPage.getItems().get(i).getUrl().equals( itemsPage.getItems().get(i).getUrl())) { equals = false; } @@ -58,7 +58,7 @@ public class YoutubeSearchExtractorChannelOnlyTest extends YoutubeSearchExtracto @Test public void testOnlyContainChannels() { for(InfoItem item : itemsPage.getItems()) { - if(!(item instanceof ChannelInfoItem)) { + if (!(item instanceof ChannelInfoItem)) { fail("The following item is no channel item: " + item.toString()); } } @@ -78,4 +78,11 @@ public class YoutubeSearchExtractorChannelOnlyTest extends YoutubeSearchExtracto } } } + + @Test + public void testStreamCount() { + ChannelInfoItem ci = (ChannelInfoItem) itemsPage.getItems().get(0); + assertTrue("Stream count does not fit: " + ci.getStreamCount(), + 4000 < ci.getStreamCount() && ci.getStreamCount() < 5500); + } } From 79431303079643c49f88a40b92584ce61a50c8ab Mon Sep 17 00:00:00 2001 From: TobiGr Date: Mon, 20 Jan 2020 21:25:16 +0100 Subject: [PATCH 17/30] Throw IllegalArgumentException when url is null in LinkHandlerFactory --- .../extractor/linkhandler/LinkHandlerFactory.java | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/linkhandler/LinkHandlerFactory.java b/extractor/src/main/java/org/schabi/newpipe/extractor/linkhandler/LinkHandlerFactory.java index b9e60ce0d..3890c2b28 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/linkhandler/LinkHandlerFactory.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/linkhandler/LinkHandlerFactory.java @@ -34,7 +34,7 @@ public abstract class LinkHandlerFactory { public abstract String getUrl(String id) throws ParsingException; public abstract boolean onAcceptUrl(final String url) throws ParsingException; - public String getUrl(String id, String baseUrl) throws ParsingException{ + public String getUrl(String id, String baseUrl) throws ParsingException { return getUrl(id); } @@ -43,13 +43,14 @@ public abstract class LinkHandlerFactory { /////////////////////////////////// public LinkHandler fromUrl(String url) throws ParsingException { + if (url == null) throw new IllegalArgumentException("url can not be null"); final String baseUrl = Utils.getBaseUrl(url); return fromUrl(url, baseUrl); } public LinkHandler fromUrl(String url, String baseUrl) throws ParsingException { - if(url == null) throw new IllegalArgumentException("url can not be null"); - if(!acceptUrl(url)) { + if (url == null) throw new IllegalArgumentException("url can not be null"); + if (!acceptUrl(url)) { throw new ParsingException("Malformed unacceptable url: " + url); } @@ -58,13 +59,13 @@ public abstract class LinkHandlerFactory { } public LinkHandler fromId(String id) throws ParsingException { - if(id == null) throw new IllegalArgumentException("id can not be null"); + if (id == null) throw new IllegalArgumentException("id can not be null"); final String url = getUrl(id); return new LinkHandler(url, url, id); } public LinkHandler fromId(String id, String baseUrl) throws ParsingException { - if(id == null) throw new IllegalArgumentException("id can not be null"); + if (id == null) throw new IllegalArgumentException("id can not be null"); final String url = getUrl(id, baseUrl); return new LinkHandler(url, url, id); } From 69ccb7e69dd0d40840d1dba671d147c738f10b95 Mon Sep 17 00:00:00 2001 From: TobiGr Date: Mon, 20 Jan 2020 21:25:47 +0100 Subject: [PATCH 18/30] Update subtitle tests comments --- .../stream/YoutubeStreamExtractorControversialTest.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/stream/YoutubeStreamExtractorControversialTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/stream/YoutubeStreamExtractorControversialTest.java index 7faec8051..4adcccbd0 100644 --- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/stream/YoutubeStreamExtractorControversialTest.java +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/stream/YoutubeStreamExtractorControversialTest.java @@ -122,13 +122,13 @@ public class YoutubeStreamExtractorControversialTest { @Test public void testGetSubtitlesListDefault() throws IOException, ExtractionException { - // Video (/view?v=YQHsXMglC9A) set in the setUp() method has no captions => null + // Video (/view?v=T4XJQO3qol8) set in the setUp() method has at least auto-generated (English) captions assertFalse(extractor.getSubtitlesDefault().isEmpty()); } @Test public void testGetSubtitlesList() throws IOException, ExtractionException { - // Video (/view?v=YQHsXMglC9A) set in the setUp() method has no captions => null + // Video (/view?v=T4XJQO3qol8) set in the setUp() method has at least auto-generated (English) captions assertFalse(extractor.getSubtitles(MediaFormat.TTML).isEmpty()); } } From 089dbe8afb6ba9beb21aecce423d12601813e611 Mon Sep 17 00:00:00 2001 From: TobiGr Date: Mon, 20 Jan 2020 22:10:18 +0100 Subject: [PATCH 19/30] Fix StreamLinkHandlerFactoryTest by passing baseUrl to vdn.youtube urls --- .../services/youtube/YoutubeStreamLinkHandlerFactoryTest.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeStreamLinkHandlerFactoryTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeStreamLinkHandlerFactoryTest.java index d3f463f3f..796238f99 100644 --- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeStreamLinkHandlerFactoryTest.java +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeStreamLinkHandlerFactoryTest.java @@ -77,8 +77,8 @@ public class YoutubeStreamLinkHandlerFactoryTest { assertEquals("jZViOEv90dI", linkHandler.fromUrl("http://www.Youtube.com/embed/jZViOEv90dI").getId()); assertEquals("jZViOEv90dI", linkHandler.fromUrl("http://www.youtube-nocookie.com/embed/jZViOEv90dI").getId()); assertEquals("EhxJLojIE_o", linkHandler.fromUrl("http://www.youtube.com/attribution_link?a=JdfC0C9V6ZI&u=%2Fwatch%3Fv%3DEhxJLojIE_o%26feature%3Dshare").getId()); - assertEquals("jZViOEv90dI", linkHandler.fromUrl("vnd.youtube://www.youtube.com/watch?v=jZViOEv90dI").getId()); - assertEquals("jZViOEv90dI", linkHandler.fromUrl("vnd.youtube:jZViOEv90dI").getId()); + assertEquals("jZViOEv90dI", linkHandler.fromUrl("vnd.youtube://www.youtube.com/watch?v=jZViOEv90dI", "youtube.com").getId()); + assertEquals("jZViOEv90dI", linkHandler.fromUrl("vnd.youtube:jZViOEv90dI", "youtube.com").getId()); assertEquals("O0EDx9WAelc", linkHandler.fromUrl("https://music.youtube.com/watch?v=O0EDx9WAelc").getId()); } From 2308b074f0603ec65c1b2c2a5ac0f3285cb3a0e6 Mon Sep 17 00:00:00 2001 From: TobiGr Date: Mon, 20 Jan 2020 22:12:39 +0100 Subject: [PATCH 20/30] [MediaCCC] Fix testGetTextualUploadDate() --- .../services/media_ccc/MediaCCCStreamExtractorTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/services/media_ccc/MediaCCCStreamExtractorTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/services/media_ccc/MediaCCCStreamExtractorTest.java index befd840f4..71b1e7795 100644 --- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/media_ccc/MediaCCCStreamExtractorTest.java +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/media_ccc/MediaCCCStreamExtractorTest.java @@ -89,7 +89,7 @@ public class MediaCCCStreamExtractorTest implements BaseExtractorTest { @Test public void testGetTextualUploadDate() throws ParsingException { - Assert.assertEquals("2018-05-11", extractor.getTextualUploadDate()); + Assert.assertEquals("2018-05-11T02:00:00.000+02:00", extractor.getTextualUploadDate()); } @Test From 1e1100ef76c2a9cb917759374bc0bc2dddb53c47 Mon Sep 17 00:00:00 2001 From: Stypox Date: Mon, 20 Jan 2020 22:52:36 +0100 Subject: [PATCH 21/30] Update extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java Co-Authored-By: Tobias Groza --- .../services/youtube/extractors/YoutubeStreamExtractor.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java index ef1edad33..45874906a 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java @@ -162,7 +162,7 @@ public class YoutubeStreamExtractor extends StreamExtractor { try { JsonArray thumbnails = playerResponse.getObject("videoDetails").getObject("thumbnail").getArray("thumbnails"); // the last thumbnail is the one with the highest resolution - return thumbnails.getObject(thumbnails.size()-1).getString("url"); + return thumbnails.getObject(thumbnails.size() - 1).getString("url"); } catch (Exception e) { String url = null; From 1cfdc4547a41a70caa6b368b7267b68d04531e53 Mon Sep 17 00:00:00 2001 From: Stypox Date: Mon, 20 Jan 2020 22:52:48 +0100 Subject: [PATCH 22/30] Update extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java Co-Authored-By: Tobias Groza --- .../services/youtube/extractors/YoutubeStreamExtractor.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java index 45874906a..d0e0ff79a 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java @@ -296,7 +296,7 @@ public class YoutubeStreamExtractor extends StreamExtractor { .getArray("formats") .getObject(0) .getString("approxDurationMs"); - return Math.round(Long.parseLong(durationMs)/1000.0f); + return Math.round(Long.parseLong(durationMs) / 1000f); } catch (Exception ignored) { throw new ParsingException("Could not get duration", e); } From 29a4cc78bf71b1f443b7bbc220f3769f03c914e5 Mon Sep 17 00:00:00 2001 From: TobiGr Date: Thu, 23 Jan 2020 21:20:38 +0100 Subject: [PATCH 23/30] Fix typo in decryption constant --- .../services/youtube/extractors/YoutubeStreamExtractor.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java index c05004ede..8466d5d1a 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java @@ -677,7 +677,7 @@ public class YoutubeStreamExtractor extends StreamExtractor { private static final String VERIFIED_URL_PARAMS = "&has_verified=1&bpctr=9999999999"; - private final static String DECYRYPTION_SIGNATURE_FUNCTION_REGEX = + private final static String DECRYPTION_SIGNATURE_FUNCTION_REGEX = "([\\w$]+)\\s*=\\s*function\\((\\w+)\\)\\{\\s*\\2=\\s*\\2\\.split\\(\"\"\\)\\s*;"; private final static String DECRYPTION_AKAMAIZED_STRING_REGEX = "yt\\.akamaized\\.net/\\)\\s*\\|\\|\\s*.*?\\s*c\\s*&&\\s*d\\.set\\([^,]+\\s*,\\s*(:encodeURIComponent\\s*\\()([a-zA-Z0-9$]+)\\("; @@ -873,7 +873,7 @@ public class YoutubeStreamExtractor extends StreamExtractor { // to se if the function can actually be found with the given regex. // However if this cascading should propably be cleaned up somehow as it looks a bit weird. try { - decryptionFunctionName = Parser.matchGroup1(DECYRYPTION_SIGNATURE_FUNCTION_REGEX, playerCode); + decryptionFunctionName = Parser.matchGroup1(DECRYPTION_SIGNATURE_FUNCTION_REGEX, playerCode); } catch (Parser.RegexException re) { try { decryptionFunctionName = Parser.matchGroup1(DECRYPTION_AKAMAIZED_SHORT_STRING_REGEX, playerCode); From 9b45c61103e169dad440d2207222fd3c929cad41 Mon Sep 17 00:00:00 2001 From: TobiGr Date: Fri, 24 Jan 2020 01:27:54 +0100 Subject: [PATCH 24/30] Typos, comments and formatting --- .../java/org/schabi/newpipe/extractor/Extractor.java | 2 +- .../youtube/extractors/YoutubeStreamExtractor.java | 10 +++++----- .../org/schabi/newpipe/extractor/utils/Parser.java | 1 + 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/Extractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/Extractor.java index e5c03d3f1..b6e90d53c 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/Extractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/Extractor.java @@ -56,7 +56,7 @@ public abstract class Extractor{ } protected void assertPageFetched() { - if(!pageFetched) throw new IllegalStateException("Page is not fetched. Make sure you call fetchPage()"); + if (!pageFetched) throw new IllegalStateException("Page is not fetched. Make sure you call fetchPage()"); } protected boolean isPageFetched() { diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java index 8466d5d1a..db3bc4dcd 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java @@ -773,7 +773,7 @@ public class YoutubeStreamExtractor extends StreamExtractor { private JsonObject getPlayerResponse() throws ParsingException { try { String playerResponseStr; - if(playerArgs != null) { + if (playerArgs != null) { playerResponseStr = playerArgs.getString("player_response"); } else { playerResponseStr = videoInfoPage.get("player_response"); @@ -805,7 +805,7 @@ public class YoutubeStreamExtractor extends StreamExtractor { final String sts = Parser.matchGroup1(stsPattern, embedPageContent); return new EmbeddedInfo(playerUrl, sts); } catch (Exception i) { - // if it failes we simply reply with no sts as then it does not seem to be necessary + // if it fails we simply reply with no sts as then it does not seem to be necessary return new EmbeddedInfo(playerUrl, ""); } @@ -871,7 +871,7 @@ public class YoutubeStreamExtractor extends StreamExtractor { String decryptionFunctionName; // Cascading things in catch is ugly, but its faster than running a match before getting the actual name // to se if the function can actually be found with the given regex. - // However if this cascading should propably be cleaned up somehow as it looks a bit weird. + // However if this cascading should probably be cleaned up somehow as it looks a bit weird. try { decryptionFunctionName = Parser.matchGroup1(DECRYPTION_SIGNATURE_FUNCTION_REGEX, playerCode); } catch (Parser.RegexException re) { @@ -891,7 +891,7 @@ public class YoutubeStreamExtractor extends StreamExtractor { @Nonnull private List getAvailableSubtitlesInfo() throws SubtitlesException { // If the video is age restricted getPlayerConfig will fail - if(isAgeRestricted) return Collections.emptyList(); + if (isAgeRestricted) return Collections.emptyList(); final JsonObject captions; if (!playerResponse.has("captions")) { @@ -908,7 +908,7 @@ public class YoutubeStreamExtractor extends StreamExtractor { // This check is necessary since there may be cases where subtitles metadata do not contain caption track info // e.g. https://www.youtube.com/watch?v=-Vpwatutnko final int captionsSize = captionsArray.size(); - if(captionsSize == 0) return Collections.emptyList(); + if (captionsSize == 0) return Collections.emptyList(); List result = new ArrayList<>(); for (int i = 0; i < captionsSize; i++) { diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/utils/Parser.java b/extractor/src/main/java/org/schabi/newpipe/extractor/utils/Parser.java index 6cd938975..b03e304c5 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/utils/Parser.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/utils/Parser.java @@ -67,6 +67,7 @@ public class Parser { if (foundMatch) { return mat.group(group); } else { + // only pass input to exception message when it is not too long if (input.length() > 1024) { throw new RegexException("failed to find pattern \"" + pat.pattern()); } else { From bce87f3d01a3717c330eda880a6e60990e698727 Mon Sep 17 00:00:00 2001 From: TobiGr Date: Fri, 24 Jan 2020 01:30:54 +0100 Subject: [PATCH 25/30] Improve getDescriptionFuncName by removing catches and adding a loop --- .../extractors/YoutubeStreamExtractor.java | 27 +++++++++---------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java index db3bc4dcd..98df78f74 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java @@ -868,24 +868,21 @@ public class YoutubeStreamExtractor extends StreamExtractor { } private String getDecryptionFuncName(String playerCode) throws DecryptException { - String decryptionFunctionName; - // Cascading things in catch is ugly, but its faster than running a match before getting the actual name - // to se if the function can actually be found with the given regex. - // However if this cascading should probably be cleaned up somehow as it looks a bit weird. - try { - decryptionFunctionName = Parser.matchGroup1(DECRYPTION_SIGNATURE_FUNCTION_REGEX, playerCode); - } catch (Parser.RegexException re) { + String[] decryptionFuncNameRegexes = { + DECRYPTION_SIGNATURE_FUNCTION_REGEX, + DECRYPTION_AKAMAIZED_SHORT_STRING_REGEX, + DECRYPTION_AKAMAIZED_STRING_REGEX + }; + Parser.RegexException exception = null; + for (String regex : decryptionFuncNameRegexes) { try { - decryptionFunctionName = Parser.matchGroup1(DECRYPTION_AKAMAIZED_SHORT_STRING_REGEX, playerCode); - } catch (Parser.RegexException re2) { - try { - decryptionFunctionName = Parser.matchGroup1(DECRYPTION_AKAMAIZED_STRING_REGEX, playerCode); - } catch (Parser.RegexException re3) { - throw new DecryptException("Could not find decrypt function with any of the given patterns.", re); - } + return Parser.matchGroup1(regex, playerCode); + } catch (Parser.RegexException re) { + if (exception == null) + exception = re; } } - return decryptionFunctionName; + throw new DecryptException("Could not find decrypt function with any of the given patterns.", exception); } @Nonnull From 7b72fd2a7d3632f3de0c57d525664c4c26c9af19 Mon Sep 17 00:00:00 2001 From: TobiGr Date: Fri, 24 Jan 2020 01:59:39 +0100 Subject: [PATCH 26/30] [YouTube] Add new decryption function --- .../services/youtube/extractors/YoutubeStreamExtractor.java | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java index 98df78f74..cb2a5f53a 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java @@ -679,6 +679,8 @@ public class YoutubeStreamExtractor extends StreamExtractor { private final static String DECRYPTION_SIGNATURE_FUNCTION_REGEX = "([\\w$]+)\\s*=\\s*function\\((\\w+)\\)\\{\\s*\\2=\\s*\\2\\.split\\(\"\"\\)\\s*;"; + private final static String DECRYPTION_SIGNATURE_FUNCTION_REGEX_2 = + "\\b([\\w$]{2})\\s*=\\s*function\\((\\w+)\\)\\{\\s*\\2=\\s*\\2\\.split\\(\"\"\\)\\s*;"; private final static String DECRYPTION_AKAMAIZED_STRING_REGEX = "yt\\.akamaized\\.net/\\)\\s*\\|\\|\\s*.*?\\s*c\\s*&&\\s*d\\.set\\([^,]+\\s*,\\s*(:encodeURIComponent\\s*\\()([a-zA-Z0-9$]+)\\("; private final static String DECRYPTION_AKAMAIZED_SHORT_STRING_REGEX = @@ -697,7 +699,8 @@ public class YoutubeStreamExtractor extends StreamExtractor { final String playerUrl; // Check if the video is age restricted - if (!doc.select("meta[property=\"og:restrictions:age\"]").isEmpty()) { + Elements e = doc.select("meta[property=\"og:restrictions:age\"]"); + if (!e.isEmpty()) { final EmbeddedInfo info = getEmbeddedInfo(); final String videoInfoUrl = getVideoInfoUrl(getId(), info.sts); final String infoPageResponse = downloader.get(videoInfoUrl, getExtractorLocalization()).responseBody(); @@ -869,6 +872,7 @@ public class YoutubeStreamExtractor extends StreamExtractor { private String getDecryptionFuncName(String playerCode) throws DecryptException { String[] decryptionFuncNameRegexes = { + DECRYPTION_SIGNATURE_FUNCTION_REGEX_2, DECRYPTION_SIGNATURE_FUNCTION_REGEX, DECRYPTION_AKAMAIZED_SHORT_STRING_REGEX, DECRYPTION_AKAMAIZED_STRING_REGEX From 033a9fb8e388e50a93cd5c21de44a832a975048b Mon Sep 17 00:00:00 2001 From: TobiGr Date: Sat, 25 Jan 2020 21:08:17 +0100 Subject: [PATCH 27/30] Revert change which I accidentally committed while fixing the decryption function in 7b72fd2a7d3632f3de0c57d525664c4c26c9af19 --- .../services/youtube/extractors/YoutubeStreamExtractor.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java index 9ce9d6aba..db167e30d 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java @@ -720,8 +720,7 @@ public class YoutubeStreamExtractor extends StreamExtractor { final String playerUrl; // Check if the video is age restricted - Elements e = doc.select("meta[property=\"og:restrictions:age\"]"); - if (!e.isEmpty()) { + if (!doc.select("meta[property=\"og:restrictions:age\"]").isEmpty()) { final EmbeddedInfo info = getEmbeddedInfo(); final String videoInfoUrl = getVideoInfoUrl(getId(), info.sts); final String infoPageResponse = downloader.get(videoInfoUrl, getExtractorLocalization()).responseBody(); From 5c0c35064c8e64d0feb93c1afc0492e883a680f9 Mon Sep 17 00:00:00 2001 From: Mauricio Colli Date: Mon, 16 Dec 2019 04:35:42 -0300 Subject: [PATCH 28/30] Use clean url and id when creating a channel info --- .../extractor/channel/ChannelInfo.java | 21 ++++++++----------- 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/channel/ChannelInfo.java b/extractor/src/main/java/org/schabi/newpipe/extractor/channel/ChannelInfo.java index aaa99c7b7..0fc44063b 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/channel/ChannelInfo.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/channel/ChannelInfo.java @@ -5,9 +5,7 @@ import org.schabi.newpipe.extractor.ListInfo; import org.schabi.newpipe.extractor.NewPipe; import org.schabi.newpipe.extractor.StreamingService; import org.schabi.newpipe.extractor.exceptions.ExtractionException; -import org.schabi.newpipe.extractor.exceptions.ParsingException; import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler; -import org.schabi.newpipe.extractor.localization.Localization; import org.schabi.newpipe.extractor.stream.StreamInfoItem; import org.schabi.newpipe.extractor.utils.ExtractorHelper; @@ -35,8 +33,8 @@ import java.io.IOException; public class ChannelInfo extends ListInfo { - public ChannelInfo(int serviceId, ListLinkHandler linkHandler, String name) throws ParsingException { - super(serviceId, linkHandler, name); + public ChannelInfo(int serviceId, String id, String url, String originalUrl, String name, ListLinkHandler listLinkHandler) { + super(serviceId, id, url, originalUrl, name, listLinkHandler.getContentFilters(), listLinkHandler.getSortFilter()); } public static ChannelInfo getInfo(String url) throws IOException, ExtractionException { @@ -57,15 +55,14 @@ public class ChannelInfo extends ListInfo { public static ChannelInfo getInfo(ChannelExtractor extractor) throws IOException, ExtractionException { - ChannelInfo info = new ChannelInfo(extractor.getServiceId(), - extractor.getLinkHandler(), - extractor.getName()); + final int serviceId = extractor.getServiceId(); + final String id = extractor.getId(); + final String url = extractor.getUrl(); + final String originalUrl = extractor.getOriginalUrl(); + final String name = extractor.getName(); + + final ChannelInfo info = new ChannelInfo(serviceId, id, url, originalUrl, name, extractor.getLinkHandler()); - try { - info.setOriginalUrl(extractor.getOriginalUrl()); - } catch (Exception e) { - info.addError(e); - } try { info.setAvatarUrl(extractor.getAvatarUrl()); } catch (Exception e) { From be81f2945cd937467dbdfa1891e585cdd7b1b75e Mon Sep 17 00:00:00 2001 From: Mauricio Colli Date: Mon, 16 Dec 2019 04:35:43 -0300 Subject: [PATCH 29/30] [YouTube] Use correct upload date flag in StreamExtractor --- .../services/youtube/extractors/YoutubeStreamExtractor.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java index db167e30d..cee1a4b9a 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java @@ -22,8 +22,8 @@ import org.schabi.newpipe.extractor.exceptions.ExtractionException; import org.schabi.newpipe.extractor.exceptions.ParsingException; import org.schabi.newpipe.extractor.exceptions.ReCaptchaException; import org.schabi.newpipe.extractor.linkhandler.LinkHandler; -import org.schabi.newpipe.extractor.localization.TimeAgoParser; import org.schabi.newpipe.extractor.localization.DateWrapper; +import org.schabi.newpipe.extractor.localization.TimeAgoParser; import org.schabi.newpipe.extractor.services.youtube.ItagItem; import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper; import org.schabi.newpipe.extractor.stream.*; @@ -152,7 +152,7 @@ public class YoutubeStreamExtractor extends StreamExtractor { return null; } - return new DateWrapper(YoutubeParsingHelper.parseDateFrom(textualUploadDate)); + return new DateWrapper(YoutubeParsingHelper.parseDateFrom(textualUploadDate), true); } @Nonnull From 26234a1c0fa7cf0a03a47a38948f0ee4f0df29a7 Mon Sep 17 00:00:00 2001 From: Mauricio Colli Date: Mon, 16 Dec 2019 04:35:44 -0300 Subject: [PATCH 30/30] Introduce FeedExtractor making fetching from dedicated feeds possible YouTube, for example, has a dedicated feed which was built to be used like this. --- .../newpipe/extractor/StreamingService.java | 22 ++++- .../newpipe/extractor/feed/FeedExtractor.java | 17 ++++ .../newpipe/extractor/feed/FeedInfo.java | 52 ++++++++++ .../services/youtube/YoutubeService.java | 20 ++-- .../extractors/YoutubeChannelExtractor.java | 3 +- .../extractors/YoutubeFeedExtractor.java | 82 ++++++++++++++++ .../YoutubeFeedInfoItemExtractor.java | 94 +++++++++++++++++++ .../linkHandler/YoutubeParsingHelper.java | 13 +++ .../youtube/YoutubeFeedExtractorTest.java | 72 ++++++++++++++ 9 files changed, 361 insertions(+), 14 deletions(-) create mode 100644 extractor/src/main/java/org/schabi/newpipe/extractor/feed/FeedExtractor.java create mode 100644 extractor/src/main/java/org/schabi/newpipe/extractor/feed/FeedInfo.java create mode 100644 extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeFeedExtractor.java create mode 100644 extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeFeedInfoItemExtractor.java create mode 100644 extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeFeedExtractorTest.java diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/StreamingService.java b/extractor/src/main/java/org/schabi/newpipe/extractor/StreamingService.java index ad22642db..683985062 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/StreamingService.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/StreamingService.java @@ -7,6 +7,7 @@ import org.schabi.newpipe.extractor.channel.ChannelExtractor; import org.schabi.newpipe.extractor.comments.CommentsExtractor; import org.schabi.newpipe.extractor.exceptions.ExtractionException; import org.schabi.newpipe.extractor.exceptions.ParsingException; +import org.schabi.newpipe.extractor.feed.FeedExtractor; import org.schabi.newpipe.extractor.kiosk.KioskList; import org.schabi.newpipe.extractor.linkhandler.LinkHandler; import org.schabi.newpipe.extractor.linkhandler.LinkHandlerFactory; @@ -24,6 +25,8 @@ import org.schabi.newpipe.extractor.stream.StreamExtractor; import org.schabi.newpipe.extractor.subscription.SubscriptionExtractor; import org.schabi.newpipe.extractor.suggestion.SuggestionExtractor; +import javax.annotation.Nullable; + /* * Copyright (C) Christian Schabesberger 2018 * StreamingService.java is part of NewPipe. @@ -65,7 +68,7 @@ public abstract class StreamingService { public String getName() { return name; } - + public List getMediaCapabilities() { return mediaCapabilities; } @@ -116,7 +119,7 @@ public abstract class StreamingService { public String toString() { return serviceId + ":" + serviceInfo.getName(); } - + public abstract String getBaseUrl(); /*////////////////////////////////////////////////////////////////////////// @@ -173,6 +176,19 @@ public abstract class StreamingService { */ public abstract SubscriptionExtractor getSubscriptionExtractor(); + /** + * This method decides which strategy will be chosen to fetch the feed. In YouTube, for example, a separate feed + * exists which is lightweight and made specifically to be used like this. + *

+ * In services which there's no other way to retrieve them, null should be returned. + * + * @return a {@link FeedExtractor} instance or null. + */ + @Nullable + public FeedExtractor getFeedExtractor(String url) throws ExtractionException { + return null; + } + /** * Must create a new instance of a KioskList implementation. * @return a new KioskList instance @@ -258,7 +274,7 @@ public abstract class StreamingService { } return getCommentsExtractor(llhf.fromUrl(url)); } - + /*////////////////////////////////////////////////////////////////////////// // Utils //////////////////////////////////////////////////////////////////////////*/ diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/feed/FeedExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/feed/FeedExtractor.java new file mode 100644 index 000000000..df3e8915b --- /dev/null +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/feed/FeedExtractor.java @@ -0,0 +1,17 @@ +package org.schabi.newpipe.extractor.feed; + +import org.schabi.newpipe.extractor.ListExtractor; +import org.schabi.newpipe.extractor.StreamingService; +import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler; +import org.schabi.newpipe.extractor.stream.StreamInfoItem; + +/** + * This class helps to extract items from lightweight feeds that the services may provide. + *

+ * YouTube is an example of a service that has this alternative available. + */ +public abstract class FeedExtractor extends ListExtractor { + public FeedExtractor(StreamingService service, ListLinkHandler listLinkHandler) { + super(service, listLinkHandler); + } +} diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/feed/FeedInfo.java b/extractor/src/main/java/org/schabi/newpipe/extractor/feed/FeedInfo.java new file mode 100644 index 000000000..f361cec7e --- /dev/null +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/feed/FeedInfo.java @@ -0,0 +1,52 @@ +package org.schabi.newpipe.extractor.feed; + +import org.schabi.newpipe.extractor.ListExtractor.InfoItemsPage; +import org.schabi.newpipe.extractor.ListInfo; +import org.schabi.newpipe.extractor.NewPipe; +import org.schabi.newpipe.extractor.StreamingService; +import org.schabi.newpipe.extractor.exceptions.ExtractionException; +import org.schabi.newpipe.extractor.stream.StreamInfoItem; +import org.schabi.newpipe.extractor.utils.ExtractorHelper; + +import java.io.IOException; +import java.util.List; + +public class FeedInfo extends ListInfo { + + public FeedInfo(int serviceId, String id, String url, String originalUrl, String name, List contentFilter, String sortFilter) { + super(serviceId, id, url, originalUrl, name, contentFilter, sortFilter); + } + + public static FeedInfo getInfo(String url) throws IOException, ExtractionException { + return getInfo(NewPipe.getServiceByUrl(url), url); + } + + public static FeedInfo getInfo(StreamingService service, String url) throws IOException, ExtractionException { + final FeedExtractor extractor = service.getFeedExtractor(url); + + if (extractor == null) { + throw new IllegalArgumentException("Service \"" + service.getServiceInfo().getName() + "\" doesn't support FeedExtractor."); + } + + extractor.fetchPage(); + return getInfo(extractor); + } + + public static FeedInfo getInfo(FeedExtractor extractor) throws IOException, ExtractionException { + extractor.fetchPage(); + + final int serviceId = extractor.getServiceId(); + final String id = extractor.getId(); + final String url = extractor.getUrl(); + final String originalUrl = extractor.getOriginalUrl(); + final String name = extractor.getName(); + + final FeedInfo info = new FeedInfo(serviceId, id, url, originalUrl, name, null, null); + + final InfoItemsPage itemsPage = ExtractorHelper.getItemsPageOrLogError(info, extractor); + info.setRelatedItems(itemsPage.getItems()); + info.setNextPageUrl(itemsPage.getNextPageUrl()); + + return info; + } +} diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeService.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeService.java index 78c97cfbe..6137f0293 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeService.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeService.java @@ -12,6 +12,7 @@ import org.schabi.newpipe.extractor.StreamingService; import org.schabi.newpipe.extractor.channel.ChannelExtractor; import org.schabi.newpipe.extractor.comments.CommentsExtractor; import org.schabi.newpipe.extractor.exceptions.ExtractionException; +import org.schabi.newpipe.extractor.feed.FeedExtractor; import org.schabi.newpipe.extractor.kiosk.KioskExtractor; import org.schabi.newpipe.extractor.kiosk.KioskList; import org.schabi.newpipe.extractor.linkhandler.LinkHandler; @@ -24,14 +25,7 @@ import org.schabi.newpipe.extractor.localization.ContentCountry; import org.schabi.newpipe.extractor.localization.Localization; import org.schabi.newpipe.extractor.playlist.PlaylistExtractor; import org.schabi.newpipe.extractor.search.SearchExtractor; -import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeChannelExtractor; -import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeCommentsExtractor; -import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubePlaylistExtractor; -import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeSearchExtractor; -import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeStreamExtractor; -import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeSubscriptionExtractor; -import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeSuggestionExtractor; -import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeTrendingExtractor; +import org.schabi.newpipe.extractor.services.youtube.extractors.*; import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeChannelLinkHandlerFactory; import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeCommentsLinkHandlerFactory; import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubePlaylistLinkHandlerFactory; @@ -42,6 +36,8 @@ import org.schabi.newpipe.extractor.stream.StreamExtractor; import org.schabi.newpipe.extractor.subscription.SubscriptionExtractor; import org.schabi.newpipe.extractor.suggestion.SuggestionExtractor; +import javax.annotation.Nonnull; + /* * Created by Christian Schabesberger on 23.08.15. * @@ -72,7 +68,7 @@ public class YoutubeService extends StreamingService { public String getBaseUrl() { return "https://youtube.com"; } - + @Override public LinkHandlerFactory getStreamLHFactory() { return YoutubeStreamLinkHandlerFactory.getInstance(); @@ -147,6 +143,12 @@ public class YoutubeService extends StreamingService { return new YoutubeSubscriptionExtractor(this); } + @Nonnull + @Override + public FeedExtractor getFeedExtractor(final String channelUrl) throws ExtractionException { + return new YoutubeFeedExtractor(this, getChannelLHFactory().fromUrl(channelUrl)); + } + @Override public ListLinkHandlerFactory getCommentsLHFactory() { return YoutubeCommentsLinkHandlerFactory.getInstance(); diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java index adc4705e4..d675cb255 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java @@ -46,7 +46,6 @@ import java.io.IOException; @SuppressWarnings("WeakerAccess") public class YoutubeChannelExtractor extends ChannelExtractor { /*package-private*/ static final String CHANNEL_URL_BASE = "https://www.youtube.com/channel/"; - private static final String CHANNEL_FEED_BASE = "https://www.youtube.com/feeds/videos.xml?channel_id="; private static final String CHANNEL_URL_PARAMETERS = "/videos?view=0&flow=list&sort=dd&live_view=10000"; private Document doc; @@ -130,7 +129,7 @@ public class YoutubeChannelExtractor extends ChannelExtractor { @Override public String getFeedUrl() throws ParsingException { try { - return CHANNEL_FEED_BASE + getId(); + return YoutubeParsingHelper.getFeedUrlFrom(getId()); } catch (Exception e) { throw new ParsingException("Could not get feed url", e); } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeFeedExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeFeedExtractor.java new file mode 100644 index 000000000..6ed91fe4f --- /dev/null +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeFeedExtractor.java @@ -0,0 +1,82 @@ +package org.schabi.newpipe.extractor.services.youtube.extractors; + +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; +import org.schabi.newpipe.extractor.ListExtractor; +import org.schabi.newpipe.extractor.StreamingService; +import org.schabi.newpipe.extractor.downloader.Downloader; +import org.schabi.newpipe.extractor.downloader.Response; +import org.schabi.newpipe.extractor.exceptions.ExtractionException; +import org.schabi.newpipe.extractor.feed.FeedExtractor; +import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler; +import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper; +import org.schabi.newpipe.extractor.stream.StreamInfoItem; +import org.schabi.newpipe.extractor.stream.StreamInfoItemsCollector; + +import javax.annotation.Nonnull; +import java.io.IOException; + +public class YoutubeFeedExtractor extends FeedExtractor { + public YoutubeFeedExtractor(StreamingService service, ListLinkHandler linkHandler) { + super(service, linkHandler); + } + + private Document document; + + @Override + public void onFetchPage(@Nonnull Downloader downloader) throws IOException, ExtractionException { + final String channelIdOrUser = getLinkHandler().getId(); + final String feedUrl = YoutubeParsingHelper.getFeedUrlFrom(channelIdOrUser); + + final Response response = downloader.get(feedUrl); + document = Jsoup.parse(response.responseBody()); + } + + @Nonnull + @Override + public ListExtractor.InfoItemsPage getInitialPage() { + final Elements entries = document.select("feed > entry"); + final StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId()); + + for (Element entryElement : entries) { + collector.commit(new YoutubeFeedInfoItemExtractor(entryElement)); + } + + return new InfoItemsPage<>(collector, null); + } + + @Nonnull + @Override + public String getId() { + return document.getElementsByTag("yt:channelId").first().text(); + } + + @Nonnull + @Override + public String getUrl() { + return document.select("feed > author > uri").first().text(); + } + + @Nonnull + @Override + public String getName() { + return document.select("feed > author > name").first().text(); + } + + @Override + public String getNextPageUrl() { + return null; + } + + @Override + public InfoItemsPage getPage(String pageUrl) { + return null; + } + + @Override + public boolean hasNextPage() { + return false; + } +} diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeFeedInfoItemExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeFeedInfoItemExtractor.java new file mode 100644 index 000000000..aadc80223 --- /dev/null +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeFeedInfoItemExtractor.java @@ -0,0 +1,94 @@ +package org.schabi.newpipe.extractor.services.youtube.extractors; + +import org.jsoup.nodes.Element; +import org.schabi.newpipe.extractor.exceptions.ParsingException; +import org.schabi.newpipe.extractor.localization.DateWrapper; +import org.schabi.newpipe.extractor.stream.StreamInfoItemExtractor; +import org.schabi.newpipe.extractor.stream.StreamType; + +import javax.annotation.Nullable; +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.util.Calendar; +import java.util.Date; +import java.util.TimeZone; + +public class YoutubeFeedInfoItemExtractor implements StreamInfoItemExtractor { + private final Element entryElement; + + public YoutubeFeedInfoItemExtractor(Element entryElement) { + this.entryElement = entryElement; + } + + @Override + public StreamType getStreamType() { + // It is not possible to determine the stream type using the feed endpoint. + // All entries are considered a video stream. + return StreamType.VIDEO_STREAM; + } + + @Override + public boolean isAd() { + return false; + } + + @Override + public long getDuration() { + // Not available when fetching through the feed endpoint. + return -1; + } + + @Override + public long getViewCount() { + return Long.parseLong(entryElement.getElementsByTag("media:statistics").first().attr("views")); + } + + @Override + public String getUploaderName() { + return entryElement.select("author > name").first().text(); + } + + @Override + public String getUploaderUrl() { + return entryElement.select("author > uri").first().text(); + } + + @Nullable + @Override + public String getTextualUploadDate() { + return entryElement.getElementsByTag("published").first().text(); + } + + @Nullable + @Override + public DateWrapper getUploadDate() throws ParsingException { + final Date date; + try { + final SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss+00:00"); + dateFormat.setTimeZone(TimeZone.getTimeZone("UTC")); + date = dateFormat.parse(getTextualUploadDate()); + } catch (ParseException e) { + throw new ParsingException("Could not parse date (\"" + getTextualUploadDate() + "\")", e); + } + + final Calendar calendar = Calendar.getInstance(); + calendar.setTime(date); + + return new DateWrapper(calendar); + } + + @Override + public String getName() { + return entryElement.getElementsByTag("title").first().text(); + } + + @Override + public String getUrl() { + return entryElement.getElementsByTag("link").first().attr("href"); + } + + @Override + public String getThumbnailUrl() { + return entryElement.getElementsByTag("media:thumbnail").first().attr("url"); + } +} diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeParsingHelper.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeParsingHelper.java index 0e76ddf67..65ec7e3f6 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeParsingHelper.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeParsingHelper.java @@ -38,6 +38,9 @@ public class YoutubeParsingHelper { private YoutubeParsingHelper() { } + private static final String FEED_BASE_CHANNEL_ID = "https://www.youtube.com/feeds/videos.xml?channel_id="; + private static final String FEED_BASE_USER = "https://www.youtube.com/feeds/videos.xml?user="; + private static final String[] RECAPTCHA_DETECTION_SELECTORS = { "form[action*=\"/das_captcha\"]", "input[name*=\"action_recaptcha_verify\"]" @@ -118,6 +121,16 @@ public class YoutubeParsingHelper { + Long.parseLong(seconds); } + public static String getFeedUrlFrom(final String channelIdOrUser) { + if (channelIdOrUser.startsWith("user/")) { + return FEED_BASE_USER + channelIdOrUser.replace("user/", ""); + } else if (channelIdOrUser.startsWith("channel/")) { + return FEED_BASE_CHANNEL_ID + channelIdOrUser.replace("channel/", ""); + } else { + return FEED_BASE_CHANNEL_ID + channelIdOrUser; + } + } + public static Calendar parseDateFrom(String textualUploadDate) throws ParsingException { Date date; try { diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeFeedExtractorTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeFeedExtractorTest.java new file mode 100644 index 000000000..c7f1b1c1d --- /dev/null +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeFeedExtractorTest.java @@ -0,0 +1,72 @@ +package org.schabi.newpipe.extractor.services.youtube; + +import org.junit.BeforeClass; +import org.junit.Test; +import org.schabi.newpipe.DownloaderTestImpl; +import org.schabi.newpipe.extractor.NewPipe; +import org.schabi.newpipe.extractor.exceptions.ParsingException; +import org.schabi.newpipe.extractor.services.BaseListExtractorTest; +import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeFeedExtractor; + +import static org.junit.Assert.*; +import static org.schabi.newpipe.extractor.ServiceList.YouTube; +import static org.schabi.newpipe.extractor.services.DefaultTests.defaultTestRelatedItems; + +public class YoutubeFeedExtractorTest { + public static class Kurzgesagt implements BaseListExtractorTest { + private static YoutubeFeedExtractor extractor; + + @BeforeClass + public static void setUp() throws Exception { + NewPipe.init(DownloaderTestImpl.getInstance()); + extractor = (YoutubeFeedExtractor) YouTube + .getFeedExtractor("https://www.youtube.com/user/Kurzgesagt"); + extractor.fetchPage(); + } + + /*////////////////////////////////////////////////////////////////////////// + // Extractor + //////////////////////////////////////////////////////////////////////////*/ + + @Test + public void testServiceId() { + assertEquals(YouTube.getServiceId(), extractor.getServiceId()); + } + + @Test + public void testName() { + String name = extractor.getName(); + assertTrue(name, name.startsWith("Kurzgesagt")); + } + + @Test + public void testId() { + assertEquals("UCsXVk37bltHxD1rDPwtNM8Q", extractor.getId()); + } + + @Test + public void testUrl() { + assertEquals("https://www.youtube.com/channel/UCsXVk37bltHxD1rDPwtNM8Q", extractor.getUrl()); + } + + @Test + public void testOriginalUrl() throws ParsingException { + assertEquals("https://www.youtube.com/user/Kurzgesagt", extractor.getOriginalUrl()); + } + + /*////////////////////////////////////////////////////////////////////////// + // ListExtractor + //////////////////////////////////////////////////////////////////////////*/ + + @Test + public void testRelatedItems() throws Exception { + defaultTestRelatedItems(extractor, YouTube.getServiceId()); + } + + @Test + public void testMoreRelatedItems() { + assertFalse(extractor.hasNextPage()); + assertNull(extractor.getNextPageUrl()); + } + } +} \ No newline at end of file