Implement pagination in YoutubeChannelExtractor

2020-02-24 15:48:23 +01:00 · 2020-02-24 15:48:23 +01:00 · 2dfa2187ff
parent a38ab9b791
commit 2dfa2187ff
1 changed files with 50 additions and 51 deletions
--- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java
+++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java
@ -5,7 +5,6 @@ import com.grack.nanojson.JsonObject;
 import com.grack.nanojson.JsonParser;
 import com.grack.nanojson.JsonParserException;
 import org.jsoup.Jsoup;
 import org.jsoup.nodes.Document;
 import org.jsoup.nodes.Element;
 import org.schabi.newpipe.extractor.StreamingService;
@ -22,6 +21,10 @@ import org.schabi.newpipe.extractor.stream.StreamInfoItemsCollector;
 import org.schabi.newpipe.extractor.utils.Utils;
 import java.io.IOException;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import javax.annotation.Nonnull;
@ -71,7 +74,7 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
    @Override
    public String getNextPageUrl() throws ExtractionException {
-        return getNextPageUrlFrom(doc);
+        return getNextPageUrlFrom(getVideoTab().getObject("content").getObject("sectionListRenderer").getArray("continuations"));
    }
    @Nonnull
@ -189,8 +192,10 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
    @Override
    public InfoItemsPage<StreamInfoItem> getInitialPage() throws ExtractionException {
        StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId());
-        Element ul = doc.select("ul[id=\"browse-items-primary\"]").first();
+
-        collectStreamsFrom(collector, ul);
+        JsonArray videos = getVideoTab().getObject("content").getObject("sectionListRenderer").getArray("contents");
        collectStreamsFrom(collector, videos);
        return new InfoItemsPage<>(collector, getNextPageUrl());
    }
@ -203,71 +208,44 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
        // Unfortunately, we have to fetch the page even if we are only getting next streams,
        // as they don't deliver enough information on their own (the channel name, for example).
-        fetchPage();
+//        fetchPage();
        StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId());
-        JsonObject ajaxJson;
+        JsonArray ajaxJson;
        try {
-            final String response = getDownloader().get(pageUrl, getExtractorLocalization()).responseBody();
+            Map<String, List<String>> headers = new HashMap<>();
-            ajaxJson = JsonParser.object().from(response);
+            headers.put("X-YouTube-Client-Name", Collections.singletonList("1"));
            headers.put("X-YouTube-Client-Version", Collections.singletonList("2.20200221.03.00")); // TODO: Automatically get YouTube client version somehow
            final String response = getDownloader().get(pageUrl, headers, getExtractorLocalization()).responseBody();
            ajaxJson = JsonParser.array().from(response);
        } catch (JsonParserException pe) {
            throw new ParsingException("Could not parse json data for next streams", pe);
        }
-        final Document ajaxHtml = Jsoup.parse(ajaxJson.getString("content_html"), pageUrl);
+        JsonObject sectionListContinuation = ajaxJson.getObject(1).getObject("response")
-        collectStreamsFrom(collector, ajaxHtml.select("body").first());
+                .getObject("continuationContents").getObject("sectionListContinuation");
-        return new InfoItemsPage<>(collector, getNextPageUrlFromAjaxPage(ajaxJson, pageUrl));
+        collectStreamsFrom(collector, sectionListContinuation.getArray("contents"));
        return new InfoItemsPage<>(collector, getNextPageUrlFrom(sectionListContinuation.getArray("continuations")));
    }
-    private String getNextPageUrlFromAjaxPage(final JsonObject ajaxJson, final String pageUrl)
+
-            throws ParsingException {
+    private String getNextPageUrlFrom(JsonArray continuations) {
-        String loadMoreHtmlDataRaw = ajaxJson.getString("load_more_widget_html");
+        JsonObject nextContinuationData = continuations.getObject(0).getObject("nextContinuationData");
-        if (!loadMoreHtmlDataRaw.isEmpty()) {
+        String continuation = nextContinuationData.getString("continuation");
-            return getNextPageUrlFrom(Jsoup.parse(loadMoreHtmlDataRaw, pageUrl));
+        String clickTrackingParams = nextContinuationData.getString("clickTrackingParams");
-        } else {
+        return "https://www.youtube.com/browse_ajax?ctoken=" + continuation + "&continuation=" + continuation
-            return "";
+                + "&itct=" + clickTrackingParams;
        }
    }
-    private String getNextPageUrlFrom(Document d) throws ParsingException {
+    private void collectStreamsFrom(StreamInfoItemsCollector collector, JsonArray videos) throws ParsingException {
        try {
            Element button = d.select("button[class*=\"yt-uix-load-more\"]").first();
            if (button != null) {
                return button.attr("abs:data-uix-load-more-href");
            } else {
                // Sometimes channels are simply so small, they don't have a more streams/videos
                return "";
            }
        } catch (Exception e) {
            throw new ParsingException("Could not get next page url", e);
        }
    }
    private void collectStreamsFrom(StreamInfoItemsCollector collector, Element element) throws ParsingException {
        collector.reset();
        final String uploaderName = getName();
        final String uploaderUrl = getUrl();
        final TimeAgoParser timeAgoParser = getTimeAgoParser();
        JsonArray tabs = initialData.getObject("contents").getObject("twoColumnBrowseResultsRenderer")
                .getArray("tabs");
        JsonArray videos = null;
        for (Object tab : tabs) {
            if (((JsonObject) tab).getObject("tabRenderer") != null) {
                if (((JsonObject) tab).getObject("tabRenderer").getString("title").equals("Videos")) {
                    videos = ((JsonObject) tab).getObject("tabRenderer").getObject("content")
                            .getObject("sectionListRenderer").getArray("contents");
                }
            }
        }
        if (videos == null) {
            throw new ParsingException("Could not find Videos tab");
        }
        for (Object video : videos) {
            JsonObject videoInfo = ((JsonObject) video).getObject("itemSectionRenderer")
                    .getArray("contents").getObject(0);
@ -286,4 +264,25 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
            }
        }
    }
    private JsonObject getVideoTab() throws ParsingException {
        JsonArray tabs = initialData.getObject("contents").getObject("twoColumnBrowseResultsRenderer")
                .getArray("tabs");
        JsonObject videoTab = null;
        for (Object tab : tabs) {
            if (((JsonObject) tab).getObject("tabRenderer") != null) {
                if (((JsonObject) tab).getObject("tabRenderer").getString("title").equals("Videos")) {
                    videoTab = ((JsonObject) tab).getObject("tabRenderer");
                    break;
                }
            }
        }
        if (videoTab == null) {
            throw new ParsingException("Could not find Videos tab");
        }
        return videoTab;
    }
 }