Merge pull request #652 from litetex/fixYTCommentsAndAddDisabledComments
Fix yt comments and add disabled comments functionallity
This commit is contained in:
commit
b45bb411e8
|
@ -9,9 +9,16 @@ import javax.annotation.Nonnull;
|
||||||
|
|
||||||
public abstract class CommentsExtractor extends ListExtractor<CommentsInfoItem> {
|
public abstract class CommentsExtractor extends ListExtractor<CommentsInfoItem> {
|
||||||
|
|
||||||
public CommentsExtractor(StreamingService service, ListLinkHandler uiHandler) {
|
public CommentsExtractor(final StreamingService service, final ListLinkHandler uiHandler) {
|
||||||
super(service, uiHandler);
|
super(service, uiHandler);
|
||||||
// TODO Auto-generated constructor stub
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @apiNote Warning: This method is experimental and may get removed in a future release.
|
||||||
|
* @return <code>true</code> if the comments are disabled otherwise <code>false</code> (default)
|
||||||
|
*/
|
||||||
|
public boolean isCommentsDisabled() {
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Nonnull
|
@Nonnull
|
||||||
|
|
|
@ -13,45 +13,56 @@ import java.io.IOException;
|
||||||
|
|
||||||
public class CommentsInfo extends ListInfo<CommentsInfoItem> {
|
public class CommentsInfo extends ListInfo<CommentsInfoItem> {
|
||||||
|
|
||||||
private CommentsInfo(int serviceId, ListLinkHandler listUrlIdHandler, String name) {
|
private CommentsInfo(
|
||||||
|
final int serviceId,
|
||||||
|
final ListLinkHandler listUrlIdHandler,
|
||||||
|
final String name) {
|
||||||
super(serviceId, listUrlIdHandler, name);
|
super(serviceId, listUrlIdHandler, name);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static CommentsInfo getInfo(String url) throws IOException, ExtractionException {
|
public static CommentsInfo getInfo(final String url) throws IOException, ExtractionException {
|
||||||
return getInfo(NewPipe.getServiceByUrl(url), url);
|
return getInfo(NewPipe.getServiceByUrl(url), url);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static CommentsInfo getInfo(StreamingService serviceByUrl, String url) throws ExtractionException, IOException {
|
public static CommentsInfo getInfo(final StreamingService serviceByUrl, final String url)
|
||||||
|
throws ExtractionException, IOException {
|
||||||
return getInfo(serviceByUrl.getCommentsExtractor(url));
|
return getInfo(serviceByUrl.getCommentsExtractor(url));
|
||||||
}
|
}
|
||||||
|
|
||||||
public static CommentsInfo getInfo(CommentsExtractor commentsExtractor) throws IOException, ExtractionException {
|
public static CommentsInfo getInfo(final CommentsExtractor commentsExtractor)
|
||||||
|
throws IOException, ExtractionException {
|
||||||
// for services which do not have a comments extractor
|
// for services which do not have a comments extractor
|
||||||
if (null == commentsExtractor) {
|
if (commentsExtractor == null) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
commentsExtractor.fetchPage();
|
commentsExtractor.fetchPage();
|
||||||
String name = commentsExtractor.getName();
|
|
||||||
int serviceId = commentsExtractor.getServiceId();
|
final String name = commentsExtractor.getName();
|
||||||
ListLinkHandler listUrlIdHandler = commentsExtractor.getLinkHandler();
|
final int serviceId = commentsExtractor.getServiceId();
|
||||||
CommentsInfo commentsInfo = new CommentsInfo(serviceId, listUrlIdHandler, name);
|
final ListLinkHandler listUrlIdHandler = commentsExtractor.getLinkHandler();
|
||||||
|
|
||||||
|
final CommentsInfo commentsInfo = new CommentsInfo(serviceId, listUrlIdHandler, name);
|
||||||
commentsInfo.setCommentsExtractor(commentsExtractor);
|
commentsInfo.setCommentsExtractor(commentsExtractor);
|
||||||
InfoItemsPage<CommentsInfoItem> initialCommentsPage = ExtractorHelper.getItemsPageOrLogError(commentsInfo,
|
final InfoItemsPage<CommentsInfoItem> initialCommentsPage =
|
||||||
commentsExtractor);
|
ExtractorHelper.getItemsPageOrLogError(commentsInfo, commentsExtractor);
|
||||||
|
commentsInfo.setCommentsDisabled(commentsExtractor.isCommentsDisabled());
|
||||||
commentsInfo.setRelatedItems(initialCommentsPage.getItems());
|
commentsInfo.setRelatedItems(initialCommentsPage.getItems());
|
||||||
commentsInfo.setNextPage(initialCommentsPage.getNextPage());
|
commentsInfo.setNextPage(initialCommentsPage.getNextPage());
|
||||||
|
|
||||||
return commentsInfo;
|
return commentsInfo;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static InfoItemsPage<CommentsInfoItem> getMoreItems(CommentsInfo commentsInfo, Page page)
|
public static InfoItemsPage<CommentsInfoItem> getMoreItems(
|
||||||
throws ExtractionException, IOException {
|
final CommentsInfo commentsInfo,
|
||||||
|
final Page page) throws ExtractionException, IOException {
|
||||||
return getMoreItems(NewPipe.getService(commentsInfo.getServiceId()), commentsInfo, page);
|
return getMoreItems(NewPipe.getService(commentsInfo.getServiceId()), commentsInfo, page);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static InfoItemsPage<CommentsInfoItem> getMoreItems(StreamingService service, CommentsInfo commentsInfo,
|
public static InfoItemsPage<CommentsInfoItem> getMoreItems(
|
||||||
Page page) throws IOException, ExtractionException {
|
final StreamingService service,
|
||||||
|
final CommentsInfo commentsInfo,
|
||||||
|
final Page page) throws IOException, ExtractionException {
|
||||||
if (null == commentsInfo.getCommentsExtractor()) {
|
if (null == commentsInfo.getCommentsExtractor()) {
|
||||||
commentsInfo.setCommentsExtractor(service.getCommentsExtractor(commentsInfo.getUrl()));
|
commentsInfo.setCommentsExtractor(service.getCommentsExtractor(commentsInfo.getUrl()));
|
||||||
commentsInfo.getCommentsExtractor().fetchPage();
|
commentsInfo.getCommentsExtractor().fetchPage();
|
||||||
|
@ -60,13 +71,30 @@ public class CommentsInfo extends ListInfo<CommentsInfoItem> {
|
||||||
}
|
}
|
||||||
|
|
||||||
private transient CommentsExtractor commentsExtractor;
|
private transient CommentsExtractor commentsExtractor;
|
||||||
|
private boolean commentsDisabled = false;
|
||||||
|
|
||||||
public CommentsExtractor getCommentsExtractor() {
|
public CommentsExtractor getCommentsExtractor() {
|
||||||
return commentsExtractor;
|
return commentsExtractor;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setCommentsExtractor(CommentsExtractor commentsExtractor) {
|
public void setCommentsExtractor(final CommentsExtractor commentsExtractor) {
|
||||||
this.commentsExtractor = commentsExtractor;
|
this.commentsExtractor = commentsExtractor;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @apiNote Warning: This method is experimental and may get removed in a future release.
|
||||||
|
* @return <code>true</code> if the comments are disabled otherwise <code>false</code> (default)
|
||||||
|
* @see CommentsExtractor#isCommentsDisabled()
|
||||||
|
*/
|
||||||
|
public boolean isCommentsDisabled() {
|
||||||
|
return commentsDisabled;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @apiNote Warning: This method is experimental and may get removed in a future release.
|
||||||
|
* @param commentsDisabled <code>true</code> if the comments are disabled otherwise <code>false</code>
|
||||||
|
*/
|
||||||
|
public void setCommentsDisabled(final boolean commentsDisabled) {
|
||||||
|
this.commentsDisabled = commentsDisabled;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -23,9 +23,11 @@ import javax.annotation.Nonnull;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.UnsupportedEncodingException;
|
import java.io.UnsupportedEncodingException;
|
||||||
import java.net.URLEncoder;
|
import java.net.URLEncoder;
|
||||||
|
import java.util.Collections;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
import java.util.Optional;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
import static java.util.Collections.singletonList;
|
import static java.util.Collections.singletonList;
|
||||||
|
@ -41,53 +43,108 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {
|
||||||
private String ytClientName;
|
private String ytClientName;
|
||||||
private String responseBody;
|
private String responseBody;
|
||||||
|
|
||||||
public YoutubeCommentsExtractor(StreamingService service, ListLinkHandler uiHandler) {
|
/**
|
||||||
|
* Caching mechanism and holder of the commentsDisabled value.
|
||||||
|
* <br/>
|
||||||
|
* Initial value = empty -> unknown if comments are disabled or not<br/>
|
||||||
|
* Some method calls {@link YoutubeCommentsExtractor#findInitialCommentsToken()}
|
||||||
|
* -> value is set<br/>
|
||||||
|
* If the method or another one that is depending on disabled comments
|
||||||
|
* is now called again, the method execution can avoid unnecessary calls
|
||||||
|
*/
|
||||||
|
private Optional<Boolean> optCommentsDisabled = Optional.empty();
|
||||||
|
|
||||||
|
public YoutubeCommentsExtractor(
|
||||||
|
final StreamingService service,
|
||||||
|
final ListLinkHandler uiHandler) {
|
||||||
super(service, uiHandler);
|
super(service, uiHandler);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public InfoItemsPage<CommentsInfoItem> getInitialPage() throws IOException, ExtractionException {
|
public InfoItemsPage<CommentsInfoItem> getInitialPage()
|
||||||
String commentsTokenInside = findValue(responseBody, "sectionListRenderer", "}");
|
throws IOException, ExtractionException {
|
||||||
if (!commentsTokenInside.contains("continuation\":\"")) {
|
|
||||||
commentsTokenInside = findValue(responseBody, "commentSectionRenderer", "}");
|
// Check if findInitialCommentsToken was already called and optCommentsDisabled initialized
|
||||||
|
if (optCommentsDisabled.orElse(false)) {
|
||||||
|
return getInfoItemsPageForDisabledComments();
|
||||||
}
|
}
|
||||||
final String commentsToken = findValue(commentsTokenInside, "continuation\":\"", "\"");
|
|
||||||
|
// Get the token
|
||||||
|
final String commentsToken = findInitialCommentsToken();
|
||||||
|
// Check if the comments have been disabled
|
||||||
|
if (optCommentsDisabled.get()) {
|
||||||
|
return getInfoItemsPageForDisabledComments();
|
||||||
|
}
|
||||||
|
|
||||||
return getPage(getNextPage(commentsToken));
|
return getPage(getNextPage(commentsToken));
|
||||||
}
|
}
|
||||||
|
|
||||||
private Page getNextPage(JsonObject ajaxJson) throws ParsingException {
|
/**
|
||||||
|
* Finds the initial comments token and initializes commentsDisabled.
|
||||||
|
* @return the continuation token or null if none was found
|
||||||
|
*/
|
||||||
|
private String findInitialCommentsToken() {
|
||||||
|
final String continuationStartPattern = "continuation\":\"";
|
||||||
|
|
||||||
|
String commentsTokenInside = findValue(responseBody, "sectionListRenderer", "}");
|
||||||
|
if (commentsTokenInside == null || !commentsTokenInside.contains(continuationStartPattern)) {
|
||||||
|
commentsTokenInside = findValue(responseBody, "commentSectionRenderer", "}");
|
||||||
|
}
|
||||||
|
|
||||||
|
// If no continuation token is found the comments are disabled
|
||||||
|
if (commentsTokenInside == null || !commentsTokenInside.contains(continuationStartPattern)) {
|
||||||
|
optCommentsDisabled = Optional.of(true);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// If a continuation token is found there are >= 0 comments
|
||||||
|
final String commentsToken = findValue(commentsTokenInside, continuationStartPattern, "\"");
|
||||||
|
|
||||||
|
optCommentsDisabled = Optional.of(false);
|
||||||
|
|
||||||
|
return commentsToken;
|
||||||
|
}
|
||||||
|
|
||||||
|
private InfoItemsPage<CommentsInfoItem> getInfoItemsPageForDisabledComments() {
|
||||||
|
return new InfoItemsPage<>(Collections.emptyList(), null, Collections.emptyList());
|
||||||
|
}
|
||||||
|
|
||||||
|
private Page getNextPage(final JsonObject ajaxJson) throws ParsingException {
|
||||||
final JsonArray arr;
|
final JsonArray arr;
|
||||||
try {
|
try {
|
||||||
arr = JsonUtils.getArray(ajaxJson, "response.continuationContents.commentSectionContinuation.continuations");
|
arr = JsonUtils.getArray(ajaxJson, "response.continuationContents.commentSectionContinuation.continuations");
|
||||||
} catch (Exception e) {
|
} catch (final Exception e) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
if (arr.isEmpty()) {
|
if (arr.isEmpty()) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
String continuation;
|
final String continuation;
|
||||||
try {
|
try {
|
||||||
continuation = JsonUtils.getString(arr.getObject(0), "nextContinuationData.continuation");
|
continuation = JsonUtils.getString(arr.getObject(0), "nextContinuationData.continuation");
|
||||||
} catch (Exception e) {
|
} catch (final Exception e) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
return getNextPage(continuation);
|
return getNextPage(continuation);
|
||||||
}
|
}
|
||||||
|
|
||||||
private Page getNextPage(String continuation) throws ParsingException {
|
private Page getNextPage(final String continuation) throws ParsingException {
|
||||||
Map<String, String> params = new HashMap<>();
|
final Map<String, String> params = new HashMap<>();
|
||||||
params.put("action_get_comments", "1");
|
params.put("action_get_comments", "1");
|
||||||
params.put("pbj", "1");
|
params.put("pbj", "1");
|
||||||
params.put("ctoken", continuation);
|
params.put("ctoken", continuation);
|
||||||
try {
|
try {
|
||||||
return new Page("https://m.youtube.com/watch_comment?" + getDataString(params));
|
return new Page("https://m.youtube.com/watch_comment?" + getDataString(params));
|
||||||
} catch (UnsupportedEncodingException e) {
|
} catch (final UnsupportedEncodingException e) {
|
||||||
throw new ParsingException("Could not get next page url", e);
|
throw new ParsingException("Could not get next page url", e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public InfoItemsPage<CommentsInfoItem> getPage(final Page page) throws IOException, ExtractionException {
|
public InfoItemsPage<CommentsInfoItem> getPage(final Page page) throws IOException, ExtractionException {
|
||||||
|
if (optCommentsDisabled.orElse(false)) {
|
||||||
|
return getInfoItemsPageForDisabledComments();
|
||||||
|
}
|
||||||
if (page == null || isNullOrEmpty(page.getUrl())) {
|
if (page == null || isNullOrEmpty(page.getUrl())) {
|
||||||
throw new IllegalArgumentException("Page doesn't contain an URL");
|
throw new IllegalArgumentException("Page doesn't contain an URL");
|
||||||
}
|
}
|
||||||
|
@ -96,7 +153,7 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {
|
||||||
final JsonObject ajaxJson;
|
final JsonObject ajaxJson;
|
||||||
try {
|
try {
|
||||||
ajaxJson = JsonParser.array().from(ajaxResponse).getObject(1);
|
ajaxJson = JsonParser.array().from(ajaxResponse).getObject(1);
|
||||||
} catch (Exception e) {
|
} catch (final Exception e) {
|
||||||
throw new ParsingException("Could not parse json data for comments", e);
|
throw new ParsingException("Could not parse json data for comments", e);
|
||||||
}
|
}
|
||||||
final CommentsInfoItemsCollector collector = new CommentsInfoItemsCollector(getServiceId());
|
final CommentsInfoItemsCollector collector = new CommentsInfoItemsCollector(getServiceId());
|
||||||
|
@ -104,31 +161,32 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {
|
||||||
return new InfoItemsPage<>(collector, getNextPage(ajaxJson));
|
return new InfoItemsPage<>(collector, getNextPage(ajaxJson));
|
||||||
}
|
}
|
||||||
|
|
||||||
private void collectCommentsFrom(CommentsInfoItemsCollector collector, JsonObject ajaxJson) throws ParsingException {
|
private void collectCommentsFrom(final CommentsInfoItemsCollector collector, final JsonObject ajaxJson) throws ParsingException {
|
||||||
JsonArray contents;
|
final JsonArray contents;
|
||||||
try {
|
try {
|
||||||
contents = JsonUtils.getArray(ajaxJson, "response.continuationContents.commentSectionContinuation.items");
|
contents = JsonUtils.getArray(ajaxJson, "response.continuationContents.commentSectionContinuation.items");
|
||||||
} catch (Exception e) {
|
} catch (final Exception e) {
|
||||||
//no comments
|
//no comments
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
List<Object> comments;
|
final List<Object> comments;
|
||||||
try {
|
try {
|
||||||
comments = JsonUtils.getValues(contents, "commentThreadRenderer.comment.commentRenderer");
|
comments = JsonUtils.getValues(contents, "commentThreadRenderer.comment.commentRenderer");
|
||||||
} catch (Exception e) {
|
} catch (final Exception e) {
|
||||||
throw new ParsingException("unable to get parse youtube comments", e);
|
throw new ParsingException("unable to get parse youtube comments", e);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (Object c : comments) {
|
for (final Object c : comments) {
|
||||||
if (c instanceof JsonObject) {
|
if (c instanceof JsonObject) {
|
||||||
CommentsInfoItemExtractor extractor = new YoutubeCommentsInfoItemExtractor((JsonObject) c, getUrl(), getTimeAgoParser());
|
final CommentsInfoItemExtractor extractor =
|
||||||
|
new YoutubeCommentsInfoItemExtractor((JsonObject) c, getUrl(), getTimeAgoParser());
|
||||||
collector.commit(extractor);
|
collector.commit(extractor);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void onFetchPage(@Nonnull Downloader downloader) throws IOException, ExtractionException {
|
public void onFetchPage(@Nonnull final Downloader downloader) throws IOException, ExtractionException {
|
||||||
final Map<String, List<String>> requestHeaders = new HashMap<>();
|
final Map<String, List<String>> requestHeaders = new HashMap<>();
|
||||||
requestHeaders.put("User-Agent", singletonList(USER_AGENT));
|
requestHeaders.put("User-Agent", singletonList(USER_AGENT));
|
||||||
final Response response = downloader.get(getUrl(), requestHeaders, getExtractorLocalization());
|
final Response response = downloader.get(getUrl(), requestHeaders, getExtractorLocalization());
|
||||||
|
@ -138,8 +196,8 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private String makeAjaxRequest(String siteUrl) throws IOException, ReCaptchaException {
|
private String makeAjaxRequest(final String siteUrl) throws IOException, ReCaptchaException {
|
||||||
Map<String, List<String>> requestHeaders = new HashMap<>();
|
final Map<String, List<String>> requestHeaders = new HashMap<>();
|
||||||
requestHeaders.put("Accept", singletonList("*/*"));
|
requestHeaders.put("Accept", singletonList("*/*"));
|
||||||
requestHeaders.put("User-Agent", singletonList(USER_AGENT));
|
requestHeaders.put("User-Agent", singletonList(USER_AGENT));
|
||||||
requestHeaders.put("X-YouTube-Client-Version", singletonList(ytClientVersion));
|
requestHeaders.put("X-YouTube-Client-Version", singletonList(ytClientVersion));
|
||||||
|
@ -147,14 +205,15 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {
|
||||||
return getDownloader().get(siteUrl, requestHeaders, getExtractorLocalization()).responseBody();
|
return getDownloader().get(siteUrl, requestHeaders, getExtractorLocalization()).responseBody();
|
||||||
}
|
}
|
||||||
|
|
||||||
private String getDataString(Map<String, String> params) throws UnsupportedEncodingException {
|
private String getDataString(final Map<String, String> params) throws UnsupportedEncodingException {
|
||||||
StringBuilder result = new StringBuilder();
|
final StringBuilder result = new StringBuilder();
|
||||||
boolean first = true;
|
boolean first = true;
|
||||||
for (Map.Entry<String, String> entry : params.entrySet()) {
|
for (final Map.Entry<String, String> entry : params.entrySet()) {
|
||||||
if (first)
|
if (first) {
|
||||||
first = false;
|
first = false;
|
||||||
else
|
} else {
|
||||||
result.append("&");
|
result.append("&");
|
||||||
|
}
|
||||||
result.append(URLEncoder.encode(entry.getKey(), UTF_8));
|
result.append(URLEncoder.encode(entry.getKey(), UTF_8));
|
||||||
result.append("=");
|
result.append("=");
|
||||||
result.append(URLEncoder.encode(entry.getValue(), UTF_8));
|
result.append(URLEncoder.encode(entry.getValue(), UTF_8));
|
||||||
|
@ -163,8 +222,28 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {
|
||||||
}
|
}
|
||||||
|
|
||||||
private String findValue(final String doc, final String start, final String end) {
|
private String findValue(final String doc, final String start, final String end) {
|
||||||
final int beginIndex = doc.indexOf(start) + start.length();
|
int beginIndex = doc.indexOf(start);
|
||||||
|
// Start string was not found
|
||||||
|
if (beginIndex == -1) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
beginIndex = beginIndex + start.length();
|
||||||
final int endIndex = doc.indexOf(end, beginIndex);
|
final int endIndex = doc.indexOf(end, beginIndex);
|
||||||
|
// End string was not found
|
||||||
|
if (endIndex == -1) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
return doc.substring(beginIndex, endIndex);
|
return doc.substring(beginIndex, endIndex);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean isCommentsDisabled() {
|
||||||
|
// Check if commentsDisabled has to be initialized
|
||||||
|
if (!optCommentsDisabled.isPresent()) {
|
||||||
|
// Initialize commentsDisabled
|
||||||
|
this.findInitialCommentsToken();
|
||||||
|
}
|
||||||
|
|
||||||
|
return optCommentsDisabled.get();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue