From 51f9b39953da0d5d495c878e0df7c4fae62c036f Mon Sep 17 00:00:00 2001 From: AudricV <74829229+AudricV@users.noreply.github.com> Date: Mon, 20 Feb 2023 16:28:36 +0100 Subject: [PATCH] [YouTube] Fix partial non-extraction of no views string in stream items As the "No views" string is returned in the case there is no view on a video, a number cannot be parsed in this case, so -1 was returned. This string is now detected in all methods to get the view count of a stream. --- .../YoutubeStreamInfoItemExtractor.java | 50 ++++++++++++++----- 1 file changed, 37 insertions(+), 13 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamInfoItemExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamInfoItemExtractor.java index ca21ab2f3..c5842323b 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamInfoItemExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamInfoItemExtractor.java @@ -18,6 +18,7 @@ import java.time.Instant; import java.time.OffsetDateTime; import java.time.ZoneOffset; import java.time.format.DateTimeFormatter; +import java.util.regex.Pattern; import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getTextFromObject; import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getThumbnailUrlFromInfoItem; @@ -43,6 +44,11 @@ import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty; */ public class YoutubeStreamInfoItemExtractor implements StreamInfoItemExtractor { + + private static final Pattern ACCESSIBILITY_DATA_VIEW_COUNT_REGEX = + Pattern.compile("([\\d,]+) views$"); + private static final String NO_VIEWS_LOWERCASE = "no views"; + private final JsonObject videoInfo; private final TimeAgoParser timeAgoParser; private StreamType cachedStreamType; @@ -289,7 +295,7 @@ public class YoutubeStreamInfoItemExtractor implements StreamInfoItemExtractor { if (!isNullOrEmpty(viewCount)) { try { // These approaches are language dependent - if (viewCount.toLowerCase().contains("no views")) { + if (viewCount.toLowerCase().contains(NO_VIEWS_LOWERCASE)) { return 0; } else if (viewCount.toLowerCase().contains("recommended")) { return -1; @@ -297,7 +303,7 @@ public class YoutubeStreamInfoItemExtractor implements StreamInfoItemExtractor { return Long.parseLong(Utils.removeNonDigitCharacters(viewCount)); } catch (final Exception ignored) { - // Ignore all exceptions, as we can fallback to accessibility data + // Ignore all exceptions, as we can fall back to accessibility data } } @@ -305,14 +311,20 @@ public class YoutubeStreamInfoItemExtractor implements StreamInfoItemExtractor { // livestream (the view count is returned and not the count of people watching currently // the livestream) if (getStreamType() != StreamType.LIVE_STREAM) { + final String videoInfoTitleAccessibilityData = videoInfo.getObject("title") + .getObject("accessibility") + .getObject("accessibilityData") + .getString("label", ""); + + if (videoInfoTitleAccessibilityData.toLowerCase().endsWith(NO_VIEWS_LOWERCASE)) { + return 0; + } + try { return Long.parseLong(Utils.removeNonDigitCharacters( // This approach is language dependent - Parser.matchGroup1("([\\d,]+) views$", - videoInfo.getObject("title") - .getObject("accessibility") - .getObject("accessibilityData") - .getString("label", "")))); + Parser.matchGroup1(ACCESSIBILITY_DATA_VIEW_COUNT_REGEX, + videoInfoTitleAccessibilityData))); } catch (final Exception ignored) { // Ignore all exceptions, as the view count can be hidden by creators, and so // cannot be found in this case @@ -323,18 +335,30 @@ public class YoutubeStreamInfoItemExtractor implements StreamInfoItemExtractor { try { // Returned in playlists, in the form: view count separator upload date if (videoInfo.has("videoInfo")) { - return Utils.mixedNumberWordToLong(videoInfo.getObject("videoInfo") + final String videoInfoViewCountText = videoInfo.getObject("videoInfo") .getArray("runs") .getObject(0) - .getString("text")); + .getString("text", ""); + if (videoInfoViewCountText.toLowerCase().contains(NO_VIEWS_LOWERCASE)) { + return 0; + } + + return Utils.mixedNumberWordToLong(videoInfoViewCountText); } // Returned everywhere but in playlists, used by the website to show view counts if (videoInfo.has("shortViewCountText")) { - return Utils.mixedNumberWordToLong(videoInfo.getObject("shortViewCountText") - .getArray("runs") - .getObject(0) - .getString("text")); + final String shortVideoViewCountText = + getTextFromObject(videoInfo.getObject("shortViewCountText")); + if (isNullOrEmpty(shortVideoViewCountText)) { + return -1; + } + + if (shortVideoViewCountText.toLowerCase().contains(NO_VIEWS_LOWERCASE)) { + return 0; + } + + return Utils.mixedNumberWordToLong(shortVideoViewCountText); } } catch (final Exception ignored) { // Ignore all exceptions, as the view count can be hidden by creators, and so cannot be