Use the youtubei API for YouTube comments
Migrate YouTube comments to the desktop version by using the `next` endpoint of the InnerTube internal API. With the desktop version, we are able to get the exact like count of YouTube comments (by parsing the accessibility data) (the current extraction is used as a fallback). We are also now able to get if the uploader of the comment is verified or not. Co-authored-by: TiA4f8R <74829229+TiA4f8R@users.noreply.github.com>
This commit is contained in:
parent
286d839a3b
commit
f3e4c9d689
|
@ -2,6 +2,7 @@ package org.schabi.newpipe.extractor.comments;
|
||||||
|
|
||||||
import org.schabi.newpipe.extractor.ListExtractor;
|
import org.schabi.newpipe.extractor.ListExtractor;
|
||||||
import org.schabi.newpipe.extractor.StreamingService;
|
import org.schabi.newpipe.extractor.StreamingService;
|
||||||
|
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
|
||||||
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
||||||
import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler;
|
import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler;
|
||||||
|
|
||||||
|
@ -17,7 +18,7 @@ public abstract class CommentsExtractor extends ListExtractor<CommentsInfoItem>
|
||||||
* @apiNote Warning: This method is experimental and may get removed in a future release.
|
* @apiNote Warning: This method is experimental and may get removed in a future release.
|
||||||
* @return <code>true</code> if the comments are disabled otherwise <code>false</code> (default)
|
* @return <code>true</code> if the comments are disabled otherwise <code>false</code> (default)
|
||||||
*/
|
*/
|
||||||
public boolean isCommentsDisabled() {
|
public boolean isCommentsDisabled() throws ExtractionException {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,8 +1,18 @@
|
||||||
package org.schabi.newpipe.extractor.services.youtube.extractors;
|
package org.schabi.newpipe.extractor.services.youtube.extractors;
|
||||||
|
|
||||||
import com.grack.nanojson.JsonArray;
|
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getJsonPostResponse;
|
||||||
import com.grack.nanojson.JsonObject;
|
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.prepareDesktopJsonBuilder;
|
||||||
import com.grack.nanojson.JsonParser;
|
import static org.schabi.newpipe.extractor.utils.Utils.UTF_8;
|
||||||
|
import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Optional;
|
||||||
|
|
||||||
|
import javax.annotation.Nonnull;
|
||||||
|
import javax.annotation.Nullable;
|
||||||
|
|
||||||
import org.schabi.newpipe.extractor.Page;
|
import org.schabi.newpipe.extractor.Page;
|
||||||
import org.schabi.newpipe.extractor.StreamingService;
|
import org.schabi.newpipe.extractor.StreamingService;
|
||||||
import org.schabi.newpipe.extractor.comments.CommentsExtractor;
|
import org.schabi.newpipe.extractor.comments.CommentsExtractor;
|
||||||
|
@ -10,38 +20,19 @@ import org.schabi.newpipe.extractor.comments.CommentsInfoItem;
|
||||||
import org.schabi.newpipe.extractor.comments.CommentsInfoItemExtractor;
|
import org.schabi.newpipe.extractor.comments.CommentsInfoItemExtractor;
|
||||||
import org.schabi.newpipe.extractor.comments.CommentsInfoItemsCollector;
|
import org.schabi.newpipe.extractor.comments.CommentsInfoItemsCollector;
|
||||||
import org.schabi.newpipe.extractor.downloader.Downloader;
|
import org.schabi.newpipe.extractor.downloader.Downloader;
|
||||||
import org.schabi.newpipe.extractor.downloader.Response;
|
|
||||||
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
|
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
|
||||||
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
||||||
import org.schabi.newpipe.extractor.exceptions.ReCaptchaException;
|
|
||||||
import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler;
|
import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler;
|
||||||
import org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper;
|
import org.schabi.newpipe.extractor.localization.Localization;
|
||||||
import org.schabi.newpipe.extractor.utils.JsonUtils;
|
import org.schabi.newpipe.extractor.utils.JsonUtils;
|
||||||
import org.schabi.newpipe.extractor.utils.Parser;
|
|
||||||
|
|
||||||
import javax.annotation.Nonnull;
|
import com.grack.nanojson.JsonArray;
|
||||||
import java.io.IOException;
|
import com.grack.nanojson.JsonObject;
|
||||||
import java.io.UnsupportedEncodingException;
|
import com.grack.nanojson.JsonWriter;
|
||||||
import java.net.URLEncoder;
|
|
||||||
import java.util.Collections;
|
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Map;
|
|
||||||
import java.util.Optional;
|
|
||||||
import java.util.regex.Pattern;
|
|
||||||
|
|
||||||
import static java.util.Collections.singletonList;
|
|
||||||
import static org.schabi.newpipe.extractor.utils.Utils.UTF_8;
|
|
||||||
import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty;
|
|
||||||
|
|
||||||
public class YoutubeCommentsExtractor extends CommentsExtractor {
|
public class YoutubeCommentsExtractor extends CommentsExtractor {
|
||||||
// using the mobile site for comments because it loads faster and uses get requests instead of post
|
|
||||||
private static final String USER_AGENT = "Mozilla/5.0 (Android 9; Mobile; rv:78.0) Gecko/20100101 Firefox/78.0";
|
|
||||||
private static final Pattern YT_CLIENT_NAME_PATTERN = Pattern.compile("INNERTUBE_CONTEXT_CLIENT_NAME\\\":(.*?)[,}]");
|
|
||||||
|
|
||||||
private String ytClientVersion;
|
private JsonObject nextResponse;
|
||||||
private String ytClientName;
|
|
||||||
private String responseBody;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Caching mechanism and holder of the commentsDisabled value.
|
* Caching mechanism and holder of the commentsDisabled value.
|
||||||
|
@ -52,6 +43,7 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {
|
||||||
* If the method or another one that is depending on disabled comments
|
* If the method or another one that is depending on disabled comments
|
||||||
* is now called again, the method execution can avoid unnecessary calls
|
* is now called again, the method execution can avoid unnecessary calls
|
||||||
*/
|
*/
|
||||||
|
@SuppressWarnings("OptionalUsedAsFieldOrParameterType")
|
||||||
private Optional<Boolean> optCommentsDisabled = Optional.empty();
|
private Optional<Boolean> optCommentsDisabled = Optional.empty();
|
||||||
|
|
||||||
public YoutubeCommentsExtractor(
|
public YoutubeCommentsExtractor(
|
||||||
|
@ -60,6 +52,7 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {
|
||||||
super(service, uiHandler);
|
super(service, uiHandler);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Nonnull
|
||||||
@Override
|
@Override
|
||||||
public InfoItemsPage<CommentsInfoItem> getInitialPage()
|
public InfoItemsPage<CommentsInfoItem> getInitialPage()
|
||||||
throws IOException, ExtractionException {
|
throws IOException, ExtractionException {
|
||||||
|
@ -81,163 +74,177 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Finds the initial comments token and initializes commentsDisabled.
|
* Finds the initial comments token and initializes commentsDisabled.
|
||||||
|
*
|
||||||
* @return the continuation token or null if none was found
|
* @return the continuation token or null if none was found
|
||||||
*/
|
*/
|
||||||
private String findInitialCommentsToken() {
|
@Nullable
|
||||||
final String continuationStartPattern = "continuation\":\"";
|
private String findInitialCommentsToken() throws ExtractionException {
|
||||||
|
|
||||||
String commentsTokenInside = findValue(responseBody, "sectionListRenderer", "}");
|
final JsonArray jArray = JsonUtils.getArray(nextResponse,
|
||||||
if (commentsTokenInside == null || !commentsTokenInside.contains(continuationStartPattern)) {
|
"contents.twoColumnWatchNextResults.results.results.contents");
|
||||||
commentsTokenInside = findValue(responseBody, "commentSectionRenderer", "}");
|
|
||||||
|
final Optional<Object> itemSectionRenderer = jArray.stream().filter(o -> {
|
||||||
|
JsonObject jObj = (JsonObject) o;
|
||||||
|
|
||||||
|
if (jObj.has("itemSectionRenderer")) {
|
||||||
|
try {
|
||||||
|
return JsonUtils.getString(jObj, "itemSectionRenderer.targetId")
|
||||||
|
.equals("comments-section");
|
||||||
|
} catch (final ParsingException ignored) {
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}).findFirst();
|
||||||
|
|
||||||
|
final String token;
|
||||||
|
|
||||||
|
if (itemSectionRenderer.isPresent()) {
|
||||||
|
token = JsonUtils.getString(((JsonObject) itemSectionRenderer.get())
|
||||||
|
.getObject("itemSectionRenderer").getArray("contents").getObject(0),
|
||||||
|
"continuationItemRenderer.continuationEndpoint.continuationCommand.token");
|
||||||
|
} else {
|
||||||
|
token = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
// If no continuation token is found the comments are disabled
|
if (token == null) {
|
||||||
if (commentsTokenInside == null || !commentsTokenInside.contains(continuationStartPattern)) {
|
|
||||||
optCommentsDisabled = Optional.of(true);
|
optCommentsDisabled = Optional.of(true);
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
// If a continuation token is found there are >= 0 comments
|
|
||||||
final String commentsToken = findValue(commentsTokenInside, continuationStartPattern, "\"");
|
|
||||||
|
|
||||||
optCommentsDisabled = Optional.of(false);
|
optCommentsDisabled = Optional.of(false);
|
||||||
|
|
||||||
return commentsToken;
|
return token;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Nonnull
|
||||||
private InfoItemsPage<CommentsInfoItem> getInfoItemsPageForDisabledComments() {
|
private InfoItemsPage<CommentsInfoItem> getInfoItemsPageForDisabledComments() {
|
||||||
return new InfoItemsPage<>(Collections.emptyList(), null, Collections.emptyList());
|
return new InfoItemsPage<>(Collections.emptyList(), null, Collections.emptyList());
|
||||||
}
|
}
|
||||||
|
|
||||||
private Page getNextPage(final JsonObject ajaxJson) throws ParsingException {
|
@Nullable
|
||||||
final JsonArray arr;
|
private Page getNextPage(@Nonnull final JsonObject ajaxJson) throws ExtractionException {
|
||||||
|
final JsonArray jsonArray;
|
||||||
|
final JsonArray onResponseReceivedEndpoints = ajaxJson.getArray(
|
||||||
|
"onResponseReceivedEndpoints");
|
||||||
|
final JsonObject endpoint = onResponseReceivedEndpoints.getObject(
|
||||||
|
onResponseReceivedEndpoints.size() - 1);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
arr = JsonUtils.getArray(ajaxJson, "response.continuationContents.commentSectionContinuation.continuations");
|
jsonArray = endpoint.getObject("reloadContinuationItemsCommand", endpoint.getObject(
|
||||||
|
"appendContinuationItemsAction")).getArray("continuationItems");
|
||||||
} catch (final Exception e) {
|
} catch (final Exception e) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
if (arr.isEmpty()) {
|
if (jsonArray.isEmpty()) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
final String continuation;
|
final String continuation;
|
||||||
try {
|
try {
|
||||||
continuation = JsonUtils.getString(arr.getObject(0), "nextContinuationData.continuation");
|
continuation = JsonUtils.getString(jsonArray.getObject(jsonArray.size() - 1),
|
||||||
|
"continuationItemRenderer.continuationEndpoint.continuationCommand.token");
|
||||||
} catch (final Exception e) {
|
} catch (final Exception e) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
return getNextPage(continuation);
|
return getNextPage(continuation);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Nonnull
|
||||||
private Page getNextPage(final String continuation) throws ParsingException {
|
private Page getNextPage(final String continuation) throws ParsingException {
|
||||||
final Map<String, String> params = new HashMap<>();
|
return new Page(getUrl(), continuation); // URL is ignored tho
|
||||||
params.put("action_get_comments", "1");
|
|
||||||
params.put("pbj", "1");
|
|
||||||
params.put("ctoken", continuation);
|
|
||||||
try {
|
|
||||||
return new Page("https://m.youtube.com/watch_comment?" + getDataString(params));
|
|
||||||
} catch (final UnsupportedEncodingException e) {
|
|
||||||
throw new ParsingException("Could not get next page url", e);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public InfoItemsPage<CommentsInfoItem> getPage(final Page page) throws IOException, ExtractionException {
|
public InfoItemsPage<CommentsInfoItem> getPage(final Page page)
|
||||||
|
throws IOException, ExtractionException {
|
||||||
if (optCommentsDisabled.orElse(false)) {
|
if (optCommentsDisabled.orElse(false)) {
|
||||||
return getInfoItemsPageForDisabledComments();
|
return getInfoItemsPageForDisabledComments();
|
||||||
}
|
}
|
||||||
if (page == null || isNullOrEmpty(page.getUrl())) {
|
if (page == null || isNullOrEmpty(page.getId())) {
|
||||||
throw new IllegalArgumentException("Page doesn't contain an URL");
|
throw new IllegalArgumentException("Page doesn't have the continuation.");
|
||||||
}
|
}
|
||||||
|
|
||||||
final String ajaxResponse = makeAjaxRequest(page.getUrl());
|
final Localization localization = getExtractorLocalization();
|
||||||
final JsonObject ajaxJson;
|
final byte[] body = JsonWriter.string(prepareDesktopJsonBuilder(localization,
|
||||||
try {
|
getExtractorContentCountry())
|
||||||
ajaxJson = JsonParser.array().from(ajaxResponse).getObject(1);
|
.value("continuation", page.getId())
|
||||||
} catch (final Exception e) {
|
.done())
|
||||||
throw new ParsingException("Could not parse json data for comments", e);
|
.getBytes(UTF_8);
|
||||||
}
|
|
||||||
final CommentsInfoItemsCollector collector = new CommentsInfoItemsCollector(getServiceId());
|
final JsonObject ajaxJson = getJsonPostResponse("next", body, localization);
|
||||||
|
|
||||||
|
final CommentsInfoItemsCollector collector = new CommentsInfoItemsCollector(
|
||||||
|
getServiceId());
|
||||||
collectCommentsFrom(collector, ajaxJson);
|
collectCommentsFrom(collector, ajaxJson);
|
||||||
return new InfoItemsPage<>(collector, getNextPage(ajaxJson));
|
return new InfoItemsPage<>(collector, getNextPage(ajaxJson));
|
||||||
}
|
}
|
||||||
|
|
||||||
private void collectCommentsFrom(final CommentsInfoItemsCollector collector, final JsonObject ajaxJson) throws ParsingException {
|
private void collectCommentsFrom(final CommentsInfoItemsCollector collector,
|
||||||
final JsonArray contents;
|
@Nonnull final JsonObject ajaxJson) throws ParsingException {
|
||||||
try {
|
|
||||||
contents = JsonUtils.getArray(ajaxJson, "response.continuationContents.commentSectionContinuation.items");
|
final JsonArray onResponseReceivedEndpoints = ajaxJson.getArray(
|
||||||
} catch (final Exception e) {
|
"onResponseReceivedEndpoints");
|
||||||
//no comments
|
final JsonObject commentsEndpoint = onResponseReceivedEndpoints.getObject(
|
||||||
|
onResponseReceivedEndpoints.size() - 1);
|
||||||
|
|
||||||
|
final String path;
|
||||||
|
|
||||||
|
if (commentsEndpoint.has("reloadContinuationItemsCommand")) {
|
||||||
|
path = "reloadContinuationItemsCommand.continuationItems";
|
||||||
|
} else if (commentsEndpoint.has("appendContinuationItemsAction")) {
|
||||||
|
path = "appendContinuationItemsAction.continuationItems";
|
||||||
|
} else {
|
||||||
|
// No comments
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
final JsonArray contents;
|
||||||
|
try {
|
||||||
|
contents = (JsonArray) JsonUtils.getArray(commentsEndpoint, path).clone();
|
||||||
|
} catch (final Exception e) {
|
||||||
|
// No comments
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
final int index = contents.size() - 1;
|
||||||
|
if (contents.getObject(index).has("continuationItemRenderer")) {
|
||||||
|
contents.remove(index);
|
||||||
|
}
|
||||||
|
|
||||||
final List<Object> comments;
|
final List<Object> comments;
|
||||||
try {
|
try {
|
||||||
comments = JsonUtils.getValues(contents, "commentThreadRenderer.comment.commentRenderer");
|
comments = JsonUtils.getValues(contents,
|
||||||
|
"commentThreadRenderer.comment.commentRenderer");
|
||||||
} catch (final Exception e) {
|
} catch (final Exception e) {
|
||||||
throw new ParsingException("unable to get parse youtube comments", e);
|
throw new ParsingException("Unable to get parse youtube comments", e);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (final Object c : comments) {
|
for (final Object c : comments) {
|
||||||
if (c instanceof JsonObject) {
|
if (c instanceof JsonObject) {
|
||||||
final CommentsInfoItemExtractor extractor =
|
final CommentsInfoItemExtractor extractor = new YoutubeCommentsInfoItemExtractor(
|
||||||
new YoutubeCommentsInfoItemExtractor((JsonObject) c, getUrl(), getTimeAgoParser());
|
(JsonObject) c, getUrl(), getTimeAgoParser());
|
||||||
collector.commit(extractor);
|
collector.commit(extractor);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void onFetchPage(@Nonnull final Downloader downloader) throws IOException, ExtractionException {
|
public void onFetchPage(@Nonnull final Downloader downloader)
|
||||||
final Map<String, List<String>> requestHeaders = new HashMap<>();
|
throws IOException, ExtractionException {
|
||||||
requestHeaders.put("User-Agent", singletonList(USER_AGENT));
|
final Localization localization = getExtractorLocalization();
|
||||||
final Response response = downloader.get(getUrl(), requestHeaders, getExtractorLocalization());
|
final byte[] body = JsonWriter.string(prepareDesktopJsonBuilder(localization,
|
||||||
responseBody = YoutubeParsingHelper.unescapeDocument(response.responseBody());
|
getExtractorContentCountry())
|
||||||
ytClientVersion = findValue(responseBody, "INNERTUBE_CONTEXT_CLIENT_VERSION\":\"", "\"");
|
.value("videoId", getId())
|
||||||
ytClientName = Parser.matchGroup1(YT_CLIENT_NAME_PATTERN, responseBody);
|
.done())
|
||||||
|
.getBytes(UTF_8);
|
||||||
|
|
||||||
|
nextResponse = getJsonPostResponse("next", body, localization);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private String makeAjaxRequest(final String siteUrl) throws IOException, ReCaptchaException {
|
|
||||||
final Map<String, List<String>> requestHeaders = new HashMap<>();
|
|
||||||
requestHeaders.put("Accept", singletonList("*/*"));
|
|
||||||
requestHeaders.put("User-Agent", singletonList(USER_AGENT));
|
|
||||||
requestHeaders.put("X-YouTube-Client-Version", singletonList(ytClientVersion));
|
|
||||||
requestHeaders.put("X-YouTube-Client-Name", singletonList(ytClientName));
|
|
||||||
return getDownloader().get(siteUrl, requestHeaders, getExtractorLocalization()).responseBody();
|
|
||||||
}
|
|
||||||
|
|
||||||
private String getDataString(final Map<String, String> params) throws UnsupportedEncodingException {
|
|
||||||
final StringBuilder result = new StringBuilder();
|
|
||||||
boolean first = true;
|
|
||||||
for (final Map.Entry<String, String> entry : params.entrySet()) {
|
|
||||||
if (first) {
|
|
||||||
first = false;
|
|
||||||
} else {
|
|
||||||
result.append("&");
|
|
||||||
}
|
|
||||||
result.append(URLEncoder.encode(entry.getKey(), UTF_8));
|
|
||||||
result.append("=");
|
|
||||||
result.append(URLEncoder.encode(entry.getValue(), UTF_8));
|
|
||||||
}
|
|
||||||
return result.toString();
|
|
||||||
}
|
|
||||||
|
|
||||||
private String findValue(final String doc, final String start, final String end) {
|
|
||||||
int beginIndex = doc.indexOf(start);
|
|
||||||
// Start string was not found
|
|
||||||
if (beginIndex == -1) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
beginIndex = beginIndex + start.length();
|
|
||||||
final int endIndex = doc.indexOf(end, beginIndex);
|
|
||||||
// End string was not found
|
|
||||||
if (endIndex == -1) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
return doc.substring(beginIndex, endIndex);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean isCommentsDisabled() {
|
public boolean isCommentsDisabled() throws ExtractionException {
|
||||||
// Check if commentsDisabled has to be initialized
|
// Check if commentsDisabled has to be initialized
|
||||||
if (!optCommentsDisabled.isPresent()) {
|
if (!optCommentsDisabled.isPresent()) {
|
||||||
// Initialize commentsDisabled
|
// Initialize commentsDisabled
|
||||||
|
|
|
@ -21,7 +21,9 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract
|
||||||
private final String url;
|
private final String url;
|
||||||
private final TimeAgoParser timeAgoParser;
|
private final TimeAgoParser timeAgoParser;
|
||||||
|
|
||||||
public YoutubeCommentsInfoItemExtractor(JsonObject json, String url, TimeAgoParser timeAgoParser) {
|
public YoutubeCommentsInfoItemExtractor(final JsonObject json,
|
||||||
|
final String url,
|
||||||
|
final TimeAgoParser timeAgoParser) {
|
||||||
this.json = json;
|
this.json = json;
|
||||||
this.url = url;
|
this.url = url;
|
||||||
this.timeAgoParser = timeAgoParser;
|
this.timeAgoParser = timeAgoParser;
|
||||||
|
@ -37,7 +39,7 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract
|
||||||
try {
|
try {
|
||||||
final JsonArray arr = JsonUtils.getArray(json, "authorThumbnail.thumbnails");
|
final JsonArray arr = JsonUtils.getArray(json, "authorThumbnail.thumbnails");
|
||||||
return JsonUtils.getString(arr.getObject(2), "url");
|
return JsonUtils.getString(arr.getObject(2), "url");
|
||||||
} catch (Exception e) {
|
} catch (final Exception e) {
|
||||||
throw new ParsingException("Could not get thumbnail url", e);
|
throw new ParsingException("Could not get thumbnail url", e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -46,7 +48,7 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract
|
||||||
public String getName() throws ParsingException {
|
public String getName() throws ParsingException {
|
||||||
try {
|
try {
|
||||||
return getTextFromObject(JsonUtils.getObject(json, "authorText"));
|
return getTextFromObject(JsonUtils.getObject(json, "authorText"));
|
||||||
} catch (Exception e) {
|
} catch (final Exception e) {
|
||||||
return EMPTY_STRING;
|
return EMPTY_STRING;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -55,7 +57,7 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract
|
||||||
public String getTextualUploadDate() throws ParsingException {
|
public String getTextualUploadDate() throws ParsingException {
|
||||||
try {
|
try {
|
||||||
return getTextFromObject(JsonUtils.getObject(json, "publishedTimeText"));
|
return getTextFromObject(JsonUtils.getObject(json, "publishedTimeText"));
|
||||||
} catch (Exception e) {
|
} catch (final Exception e) {
|
||||||
throw new ParsingException("Could not get publishedTimeText", e);
|
throw new ParsingException("Could not get publishedTimeText", e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -64,7 +66,8 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract
|
||||||
@Override
|
@Override
|
||||||
public DateWrapper getUploadDate() throws ParsingException {
|
public DateWrapper getUploadDate() throws ParsingException {
|
||||||
String textualPublishedTime = getTextualUploadDate();
|
String textualPublishedTime = getTextualUploadDate();
|
||||||
if (timeAgoParser != null && textualPublishedTime != null && !textualPublishedTime.isEmpty()) {
|
if (timeAgoParser != null && textualPublishedTime != null
|
||||||
|
&& !textualPublishedTime.isEmpty()) {
|
||||||
return timeAgoParser.parse(textualPublishedTime);
|
return timeAgoParser.parse(textualPublishedTime);
|
||||||
} else {
|
} else {
|
||||||
return null;
|
return null;
|
||||||
|
@ -72,33 +75,51 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @implNote The method is parsing internally a localized string.<br>
|
* @implNote The method tries first to get the exact like count by using the accessibility data
|
||||||
|
* returned. But if the parsing of this accessibility data fails, the method parses internally
|
||||||
|
* a localized string.
|
||||||
|
* <br>
|
||||||
* <ul>
|
* <ul>
|
||||||
* <li>
|
* <li>More than 1k likes will result in an inaccurate number</li>
|
||||||
* More than 1k likes will result in an inaccurate number
|
* <li>This will fail for other languages than English. However as long as the Extractor
|
||||||
* </li>
|
* only uses "en-GB" (as seen in {@link
|
||||||
* <li>
|
* org.schabi.newpipe.extractor.services.youtube.YoutubeService#getSupportedLocalizations})
|
||||||
* This will fail for other languages than English.
|
* , everything will work fine.</li>
|
||||||
* However as long as the Extractor only uses "en-GB"
|
|
||||||
* (as seen in {@link org.schabi.newpipe.extractor.services.youtube.YoutubeService#SUPPORTED_LANGUAGES})
|
|
||||||
* everything will work fine.
|
|
||||||
* </li>
|
|
||||||
* </ul>
|
* </ul>
|
||||||
* <br>
|
* <br>
|
||||||
* Consider using {@link #getTextualLikeCount()}
|
* Consider using {@link #getTextualLikeCount()}
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public int getLikeCount() throws ParsingException {
|
public int getLikeCount() throws ParsingException {
|
||||||
// This may return a language dependent version, e.g. in German: 3,3 Mio
|
// Try first to get the exact like count by using the accessibility data
|
||||||
final String textualLikeCount = getTextualLikeCount();
|
final String likeCount;
|
||||||
try {
|
try {
|
||||||
if (Utils.isBlank(textualLikeCount)) {
|
likeCount = Utils.removeNonDigitCharacters(JsonUtils.getString(json,
|
||||||
|
"actionButtons.commentActionButtonsRenderer.likeButton.toggleButtonRenderer.accessibilityData.accessibilityData.label"));
|
||||||
|
} catch (final Exception e) {
|
||||||
|
// Use the approximate like count returned into the voteCount object
|
||||||
|
// This may return a language dependent version, e.g. in German: 3,3 Mio
|
||||||
|
final String textualLikeCount = getTextualLikeCount();
|
||||||
|
try {
|
||||||
|
if (Utils.isBlank(textualLikeCount)) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
return (int) Utils.mixedNumberWordToLong(textualLikeCount);
|
||||||
|
} catch (final Exception i) {
|
||||||
|
throw new ParsingException(
|
||||||
|
"Unexpected error while converting textual like count to like count", i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
if (Utils.isBlank(likeCount)) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
return (int) Utils.mixedNumberWordToLong(textualLikeCount);
|
return Integer.parseInt(likeCount);
|
||||||
} catch (Exception e) {
|
} catch (final Exception e) {
|
||||||
throw new ParsingException("Unexpected error while converting textual like count to like count", e);
|
throw new ParsingException("Unexpected error while parsing like count as Integer", e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -133,8 +154,8 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract
|
||||||
return EMPTY_STRING;
|
return EMPTY_STRING;
|
||||||
}
|
}
|
||||||
return getTextFromObject(voteCountObj);
|
return getTextFromObject(voteCountObj);
|
||||||
} catch (Exception e) {
|
} catch (final Exception e) {
|
||||||
throw new ParsingException("Could not get vote count", e);
|
throw new ParsingException("Could not get the vote count", e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -148,9 +169,10 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract
|
||||||
return EMPTY_STRING;
|
return EMPTY_STRING;
|
||||||
}
|
}
|
||||||
final String commentText = getTextFromObject(contentText);
|
final String commentText = getTextFromObject(contentText);
|
||||||
// youtube adds U+FEFF in some comments. eg. https://www.youtube.com/watch?v=Nj4F63E59io<feff>
|
// YouTube adds U+FEFF in some comments.
|
||||||
|
// eg. https://www.youtube.com/watch?v=Nj4F63E59io<feff>
|
||||||
return Utils.removeUTF8BOM(commentText);
|
return Utils.removeUTF8BOM(commentText);
|
||||||
} catch (Exception e) {
|
} catch (final Exception e) {
|
||||||
throw new ParsingException("Could not get comment text", e);
|
throw new ParsingException("Could not get comment text", e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -159,7 +181,7 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract
|
||||||
public String getCommentId() throws ParsingException {
|
public String getCommentId() throws ParsingException {
|
||||||
try {
|
try {
|
||||||
return JsonUtils.getString(json, "commentId");
|
return JsonUtils.getString(json, "commentId");
|
||||||
} catch (Exception e) {
|
} catch (final Exception e) {
|
||||||
throw new ParsingException("Could not get comment id", e);
|
throw new ParsingException("Could not get comment id", e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -169,14 +191,16 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract
|
||||||
try {
|
try {
|
||||||
JsonArray arr = JsonUtils.getArray(json, "authorThumbnail.thumbnails");
|
JsonArray arr = JsonUtils.getArray(json, "authorThumbnail.thumbnails");
|
||||||
return JsonUtils.getString(arr.getObject(2), "url");
|
return JsonUtils.getString(arr.getObject(2), "url");
|
||||||
} catch (Exception e) {
|
} catch (final Exception e) {
|
||||||
throw new ParsingException("Could not get author thumbnail", e);
|
throw new ParsingException("Could not get author thumbnail", e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean isHeartedByUploader() throws ParsingException {
|
public boolean isHeartedByUploader() throws ParsingException {
|
||||||
return json.has("creatorHeart");
|
final JsonObject commentActionButtonsRenderer = json.getObject("actionButtons")
|
||||||
|
.getObject("commentActionButtonsRenderer");
|
||||||
|
return commentActionButtonsRenderer.has("creatorHeart");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -185,15 +209,14 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean isUploaderVerified() {
|
public boolean isUploaderVerified() {
|
||||||
// impossible to get this information from the mobile layout
|
return json.has("authorCommentBadge");
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getUploaderName() throws ParsingException {
|
public String getUploaderName() throws ParsingException {
|
||||||
try {
|
try {
|
||||||
return getTextFromObject(JsonUtils.getObject(json, "authorText"));
|
return getTextFromObject(JsonUtils.getObject(json, "authorText"));
|
||||||
} catch (Exception e) {
|
} catch (final Exception e) {
|
||||||
return EMPTY_STRING;
|
return EMPTY_STRING;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -201,10 +224,10 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract
|
||||||
@Override
|
@Override
|
||||||
public String getUploaderUrl() throws ParsingException {
|
public String getUploaderUrl() throws ParsingException {
|
||||||
try {
|
try {
|
||||||
return "https://youtube.com/channel/" + JsonUtils.getString(json, "authorEndpoint.browseEndpoint.browseId");
|
return "https://www.youtube.com/channel/" + JsonUtils.getString(json,
|
||||||
} catch (Exception e) {
|
"authorEndpoint.browseEndpoint.browseId");
|
||||||
|
} catch (final Exception e) {
|
||||||
return EMPTY_STRING;
|
return EMPTY_STRING;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -16,7 +16,7 @@ public class YoutubeCommentsLinkHandlerFactory extends ListLinkHandlerFactory {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getUrl(String id) {
|
public String getUrl(String id) {
|
||||||
return "https://m.youtube.com/watch?v=" + id;
|
return "https://www.youtube.com/watch?v=" + id;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
Loading…
Reference in New Issue