[Youtube] Adjust mix extractor to new user agent

Also extract continuation now
This commit is contained in:
Xiang Rong Lin 2020-03-07 08:42:26 +01:00 committed by XiangRongLin
parent 0ff054acb4
commit ec6b99c082
1 changed files with 43 additions and 97 deletions

View File

@ -1,5 +1,6 @@
package org.schabi.newpipe.extractor.services.youtube.extractors; package org.schabi.newpipe.extractor.services.youtube.extractors;
import static org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper.fixThumbnailUrl;
import static org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper.getJsonResponse; import static org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper.getJsonResponse;
import com.grack.nanojson.JsonArray; import com.grack.nanojson.JsonArray;
@ -29,7 +30,7 @@ import org.schabi.newpipe.extractor.stream.StreamInfoItemsCollector;
public class YoutubeMixPlaylistExtractor extends PlaylistExtractor { public class YoutubeMixPlaylistExtractor extends PlaylistExtractor {
private JsonObject initialData; private JsonObject initialData;
private Document doc; private JsonObject playlistData;
public YoutubeMixPlaylistExtractor(StreamingService service, ListLinkHandler linkHandler) { public YoutubeMixPlaylistExtractor(StreamingService service, ListLinkHandler linkHandler) {
super(service, linkHandler); super(service, linkHandler);
@ -40,10 +41,7 @@ public class YoutubeMixPlaylistExtractor extends PlaylistExtractor {
final String url = getUrl() + "&pbj=1"; final String url = getUrl() + "&pbj=1";
final JsonArray ajaxJson = getJsonResponse(url, getExtractorLocalization()); final JsonArray ajaxJson = getJsonResponse(url, getExtractorLocalization());
initialData = ajaxJson.getObject(3).getObject("response"); initialData = ajaxJson.getObject(3).getObject("response");
JsonObject a = initialData.getObject("contents"); playlistData = initialData.getObject("contents").getObject("twoColumnWatchNextResults").getObject("playlist").getObject("playlist");
JsonObject b = initialData.getObject("contents").getObject("twoColumnWatchNextResults");
JsonObject c = initialData.getObject("contents");
JsonObject playlist = initialData.getObject("contents").getObject("twoColumnWatchNextResults").getObject("playlist").getObject("playlist");
System.out.println(); System.out.println();
} }
@ -51,7 +49,9 @@ public class YoutubeMixPlaylistExtractor extends PlaylistExtractor {
@Override @Override
public String getName() throws ParsingException { public String getName() throws ParsingException {
try { try {
return doc.select("div[class=\"playlist-info\"] h3[class=\"playlist-title\"]").first().text(); final String name = playlistData.getString("title");
if (name!= null) return name;
else return "";
} catch (Exception e) { } catch (Exception e) {
throw new ParsingException("Could not get playlist name", e); throw new ParsingException("Could not get playlist name", e);
} }
@ -60,16 +60,8 @@ public class YoutubeMixPlaylistExtractor extends PlaylistExtractor {
@Override @Override
public String getThumbnailUrl() throws ParsingException { public String getThumbnailUrl() throws ParsingException {
try { try {
Element li = doc.select("ol[class*=\"playlist-videos-list\"] li").first(); final String videoId = playlistData.getArray("contents").getObject(0).getString("videoId");
String videoId = li.attr("data-video-id");
if (videoId != null && !videoId.isEmpty()) {
//higher quality
return getThumbnailUrlFromId(videoId); return getThumbnailUrlFromId(videoId);
} else {
//lower quality
return doc.select("ol[class*=\"playlist-videos-list\"] li").first()
.attr("data-thumbnail-url");
}
} catch (Exception e) { } catch (Exception e) {
throw new ParsingException("Could not get playlist thumbnail", e); throw new ParsingException("Could not get playlist thumbnail", e);
} }
@ -101,112 +93,66 @@ public class YoutubeMixPlaylistExtractor extends PlaylistExtractor {
@Override @Override
public long getStreamCount() { public long getStreamCount() {
// Auto-generated playlist always start with 25 videos and are endless // Auto-generated playlist always start with 25 videos and are endless
// But the html doesn't have a continuation url return 25;
return doc.select("ol[class*=\"playlist-videos-list\"] li").size();
} }
@Nonnull @Nonnull
@Override @Override
public InfoItemsPage<StreamInfoItem> getInitialPage() { public InfoItemsPage<StreamInfoItem> getInitialPage() {
StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId()); StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId());
Element ol = doc.select("ol[class*=\"playlist-videos-list\"]").first(); collectStreamsFrom(collector, playlistData.getArray("contents"));
collectStreamsFrom(collector, ol);
return new InfoItemsPage<>(collector, getNextPageUrl()); return new InfoItemsPage<>(collector, getNextPageUrl());
} }
@Override @Override
public String getNextPageUrl() { public String getNextPageUrl() {
return ""; final JsonObject lastStream = ((JsonObject) playlistData.getArray("contents")
.get(playlistData.getArray("contents").size() - 1));
final String lastStreamId = lastStream.getObject("playlistPanelVideoRenderer")
.getString("videoId");
return "https://youtube.com" + lastStream.getObject("playlistPanelVideoRenderer").getObject("navigationEndpoint")
.getObject("commandMetadata").getObject("webCommandMetadata").getString("url") + "&pbj=1";
} }
@Override @Override
public InfoItemsPage<StreamInfoItem> getPage(final String pageUrl) { public InfoItemsPage<StreamInfoItem> getPage(final String pageUrl)
//Continuations are not implemented throws ExtractionException, IOException {
return null; if (pageUrl == null || pageUrl.isEmpty()) {
throw new ExtractionException(new IllegalArgumentException("Page url is empty or null"));
}
StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId());
final JsonArray ajaxJson = getJsonResponse(pageUrl, getExtractorLocalization());
playlistData =
ajaxJson.getObject(3).getObject("response").getObject("contents")
.getObject("twoColumnWatchNextResults").getObject("playlist")
.getObject("playlist");
final JsonArray streams = playlistData.getArray("contents");
//Because continuation requests are created with the last video of previous request as start
streams.remove(0);
collectStreamsFrom(collector, streams);
return new InfoItemsPage<>(collector, getNextPageUrl());
} }
private void collectStreamsFrom( private void collectStreamsFrom(
@Nonnull StreamInfoItemsCollector collector, @Nonnull StreamInfoItemsCollector collector,
@Nullable Element element) { @Nullable JsonArray streams) {
collector.reset(); collector.reset();
if (element == null) { if (streams == null) {
return; return;
} }
final LinkHandlerFactory streamLinkHandlerFactory = getService().getStreamLHFactory();
final TimeAgoParser timeAgoParser = getTimeAgoParser(); final TimeAgoParser timeAgoParser = getTimeAgoParser();
// for (final Element li : element.children()) { for (Object stream : streams) {
// if (stream instanceof JsonObject) {
// collector.commit(new YoutubeStreamInfoItemExtractor(li, timeAgoParser) { JsonObject streamInfo = ((JsonObject) stream).getObject("playlistPanelVideoRenderer");
// if (streamInfo != null) {
// @Override collector.commit(new YoutubeStreamInfoItemExtractor(streamInfo, timeAgoParser));
// public boolean isAd() { }
// return false; }
// } }
//
// @Override
// public String getUrl() throws ParsingException {
// try {
// return streamLinkHandlerFactory.fromId(li.attr("data-video-id")).getUrl();
// } catch (Exception e) {
// throw new ParsingException("Could not get web page url for the video", e);
// }
// }
//
// @Override
// public String getName() throws ParsingException {
// try {
// return li.attr("data-video-title");
// } catch (Exception e) {
// throw new ParsingException("Could not get name", e);
// }
// }
//
// @Override
// public long getDuration() {
// //Not present in doc
// return 0;
// }
//
// @Override
// public String getUploaderName() throws ParsingException {
// String uploaderName = li.attr("data-video-username");
// if (uploaderName == null || uploaderName.isEmpty()) {
// throw new ParsingException("Could not get uploader name");
// } else {
// return uploaderName;
// }
// }
//
// @Override
// public String getUploaderUrl() {
// //Not present in doc
// return "";
// }
//
// @Override
// public String getTextualUploadDate() {
// //Not present in doc
// return "";
// }
//
// @Override
// public long getViewCount() {
// return -1;
// }
//
// @Override
// public String getThumbnailUrl() throws ParsingException {
// try {
// return getThumbnailUrlFromId(streamLinkHandlerFactory.fromUrl(getUrl()).getId());
// } catch (Exception e) {
// throw new ParsingException("Could not get thumbnail url", e);
// }
// }
// });
// }
} }
private String getThumbnailUrlFromId(String videoId) { private String getThumbnailUrlFromId(String videoId) {