From c0a8e0188967db4e8ae84b5f0acbe2fd61b1119c Mon Sep 17 00:00:00 2001 From: wb9688 Date: Mon, 24 Feb 2020 16:45:36 +0100 Subject: [PATCH] Implement pagination in YoutubePlaylistExtractor --- .../extractors/YoutubeChannelExtractor.java | 4 + .../extractors/YoutubePlaylistExtractor.java | 79 +++++++++---------- 2 files changed, 41 insertions(+), 42 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java index c60e1c8fc..27678657c 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java @@ -231,6 +231,10 @@ public class YoutubeChannelExtractor extends ChannelExtractor { private String getNextPageUrlFrom(JsonArray continuations) { + if (continuations == null) { + return ""; + } + JsonObject nextContinuationData = continuations.getObject(0).getObject("nextContinuationData"); String continuation = nextContinuationData.getString("continuation"); String clickTrackingParams = nextContinuationData.getString("clickTrackingParams"); diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubePlaylistExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubePlaylistExtractor.java index f0fb91a17..8abee5f1d 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubePlaylistExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubePlaylistExtractor.java @@ -5,9 +5,7 @@ import com.grack.nanojson.JsonObject; import com.grack.nanojson.JsonParser; import com.grack.nanojson.JsonParserException; -import org.jsoup.Jsoup; import org.jsoup.nodes.Document; -import org.jsoup.nodes.Element; import org.schabi.newpipe.extractor.StreamingService; import org.schabi.newpipe.extractor.downloader.Downloader; import org.schabi.newpipe.extractor.downloader.Response; @@ -22,9 +20,12 @@ import org.schabi.newpipe.extractor.stream.StreamInfoItemsCollector; import org.schabi.newpipe.extractor.utils.Utils; import java.io.IOException; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; import javax.annotation.Nonnull; -import javax.annotation.Nullable; @SuppressWarnings("WeakerAccess") public class YoutubePlaylistExtractor extends PlaylistExtractor { @@ -95,7 +96,11 @@ public class YoutubePlaylistExtractor extends PlaylistExtractor { @Override public String getNextPageUrl() throws ExtractionException { - return getNextPageUrlFrom(doc); + return getNextPageUrlFrom(initialData.getObject("contents").getObject("twoColumnBrowseResultsRenderer") + .getArray("tabs").getObject(0).getObject("tabRenderer").getObject("content") + .getObject("sectionListRenderer").getArray("contents").getObject(0) + .getObject("itemSectionRenderer").getArray("contents").getObject(0) + .getObject("playlistVideoListRenderer").getArray("continuations")); } @Nonnull @@ -174,8 +179,14 @@ public class YoutubePlaylistExtractor extends PlaylistExtractor { @Override public InfoItemsPage getInitialPage() throws ExtractionException { StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId()); - Element tbody = doc.select("tbody[id=\"pl-load-more-destination\"]").first(); - collectStreamsFrom(collector, tbody); + + JsonArray videos = initialData.getObject("contents").getObject("twoColumnBrowseResultsRenderer") + .getArray("tabs").getObject(0).getObject("tabRenderer").getObject("content") + .getObject("sectionListRenderer").getArray("contents").getObject(0) + .getObject("itemSectionRenderer").getArray("contents").getObject(0) + .getObject("playlistVideoListRenderer").getArray("contents"); + + collectStreamsFrom(collector, videos); return new InfoItemsPage<>(collector, getNextPageUrl()); } @@ -186,58 +197,42 @@ public class YoutubePlaylistExtractor extends PlaylistExtractor { } StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId()); - JsonObject pageJson; + JsonArray ajaxJson; try { - final String responseBody = getDownloader().get(pageUrl, getExtractorLocalization()).responseBody(); - pageJson = JsonParser.object().from(responseBody); + Map> headers = new HashMap<>(); + headers.put("X-YouTube-Client-Name", Collections.singletonList("1")); + headers.put("X-YouTube-Client-Version", Collections.singletonList("2.20200221.03.00")); // TODO: Automatically get YouTube client version somehow + final String response = getDownloader().get(pageUrl, headers, getExtractorLocalization()).responseBody(); + ajaxJson = JsonParser.array().from(response); } catch (JsonParserException pe) { - throw new ParsingException("Could not parse ajax json", pe); + throw new ParsingException("Could not parse json data for next streams", pe); } - final Document pageHtml = Jsoup.parse("" - + pageJson.getString("content_html") - + "
", pageUrl); + JsonObject sectionListContinuation = ajaxJson.getObject(1).getObject("response") + .getObject("continuationContents").getObject("playlistVideoListContinuation"); - collectStreamsFrom(collector, pageHtml.select("tbody[id=\"pl-load-more-destination\"]").first()); + collectStreamsFrom(collector, sectionListContinuation.getArray("contents")); - return new InfoItemsPage<>(collector, getNextPageUrlFromAjax(pageJson, pageUrl)); + return new InfoItemsPage<>(collector, getNextPageUrlFrom(sectionListContinuation.getArray("continuations"))); } - private String getNextPageUrlFromAjax(final JsonObject pageJson, final String pageUrl) - throws ParsingException { - String nextPageHtml = pageJson.getString("load_more_widget_html"); - if (!nextPageHtml.isEmpty()) { - return getNextPageUrlFrom(Jsoup.parse(nextPageHtml, pageUrl)); - } else { + private String getNextPageUrlFrom(JsonArray continuations) { + if (continuations == null) { return ""; } + + JsonObject nextContinuationData = continuations.getObject(0).getObject("nextContinuationData"); + String continuation = nextContinuationData.getString("continuation"); + String clickTrackingParams = nextContinuationData.getString("clickTrackingParams"); + return "https://www.youtube.com/browse_ajax?ctoken=" + continuation + "&continuation=" + continuation + + "&itct=" + clickTrackingParams; } - private String getNextPageUrlFrom(Document d) throws ParsingException { - try { - Element button = d.select("button[class*=\"yt-uix-load-more\"]").first(); - if (button != null) { - return button.attr("abs:data-uix-load-more-href"); - } else { - // Sometimes playlists are simply so small, they don't have a more streams/videos - return ""; - } - } catch (Exception e) { - throw new ParsingException("could not get next streams' url", e); - } - } - - private void collectStreamsFrom(@Nonnull StreamInfoItemsCollector collector, @Nullable Element element) { + private void collectStreamsFrom(StreamInfoItemsCollector collector, JsonArray videos) { collector.reset(); final TimeAgoParser timeAgoParser = getTimeAgoParser(); - JsonArray videos = initialData.getObject("contents").getObject("twoColumnBrowseResultsRenderer") - .getArray("tabs").getObject(0).getObject("tabRenderer").getObject("content") - .getObject("sectionListRenderer").getArray("contents").getObject(0) - .getObject("itemSectionRenderer").getArray("contents").getObject(0) - .getObject("playlistVideoListRenderer").getArray("contents"); - for (Object video : videos) { if (((JsonObject) video).getObject("playlistVideoRenderer") != null) { collector.commit(new YoutubeStreamInfoItemExtractor(((JsonObject) video).getObject("playlistVideoRenderer"), timeAgoParser) {