From 180836c180780e470ef83ece140bd20582fb75e2 Mon Sep 17 00:00:00 2001 From: wojcik-online Date: Wed, 2 Oct 2019 02:02:01 -0300 Subject: [PATCH] Base Implementation: Parse the upload date of StreamInfoItems In the format '2 days ago' (in English) on a YouTube channel page. (Parser extensible to other pages.) --- .../newpipe/extractor/StreamingService.java | 10 +- .../soundcloud/SoundcloudParsingHelper.java | 21 ++- .../soundcloud/SoundcloudStreamExtractor.java | 2 +- .../SoundcloudStreamInfoItemExtractor.java | 17 +- .../extractors/YoutubeChannelExtractor.java | 5 +- .../extractors/YoutubePlaylistExtractor.java | 7 +- .../extractors/YoutubeSearchExtractor.java | 3 +- .../extractors/YoutubeStreamExtractor.java | 6 +- .../YoutubeStreamInfoItemExtractor.java | 39 ++++- .../extractors/YoutubeTrendingExtractor.java | 5 +- .../extractor/stream/StreamInfoItem.java | 39 +++-- .../stream/StreamInfoItemExtractor.java | 30 +++- .../stream/StreamInfoItemsCollector.java | 7 +- .../extractor/stream/TimeAgoParser.java | 158 ++++++++++++++++++ .../java/org/schabi/newpipe/Downloader.java | 2 + .../extractor/services/DefaultTests.java | 9 + 16 files changed, 316 insertions(+), 44 deletions(-) create mode 100644 extractor/src/main/java/org/schabi/newpipe/extractor/stream/TimeAgoParser.java diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/StreamingService.java b/extractor/src/main/java/org/schabi/newpipe/extractor/StreamingService.java index 26e84da97..c613d34f2 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/StreamingService.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/StreamingService.java @@ -17,6 +17,7 @@ import org.schabi.newpipe.extractor.linkhandler.SearchQueryHandlerFactory; import org.schabi.newpipe.extractor.playlist.PlaylistExtractor; import org.schabi.newpipe.extractor.search.SearchExtractor; import org.schabi.newpipe.extractor.stream.StreamExtractor; +import org.schabi.newpipe.extractor.stream.TimeAgoParser; import org.schabi.newpipe.extractor.subscription.SubscriptionExtractor; import org.schabi.newpipe.extractor.utils.Localization; @@ -222,7 +223,7 @@ public abstract class StreamingService { public ChannelExtractor getChannelExtractor(ListLinkHandler linkHandler) throws ExtractionException { return getChannelExtractor(linkHandler, NewPipe.getPreferredLocalization()); } - + public PlaylistExtractor getPlaylistExtractor(ListLinkHandler linkHandler) throws ExtractionException { return getPlaylistExtractor(linkHandler, NewPipe.getPreferredLocalization()); } @@ -230,7 +231,7 @@ public abstract class StreamingService { public StreamExtractor getStreamExtractor(LinkHandler linkHandler) throws ExtractionException { return getStreamExtractor(linkHandler, NewPipe.getPreferredLocalization()); } - + public CommentsExtractor getCommentsExtractor(ListLinkHandler urlIdHandler) throws ExtractionException { return getCommentsExtractor(urlIdHandler, NewPipe.getPreferredLocalization()); } @@ -287,7 +288,7 @@ public abstract class StreamingService { public StreamExtractor getStreamExtractor(String url) throws ExtractionException { return getStreamExtractor(getStreamLHFactory().fromUrl(url), NewPipe.getPreferredLocalization()); } - + public CommentsExtractor getCommentsExtractor(String url) throws ExtractionException { ListLinkHandlerFactory llhf = getCommentsLHFactory(); if(null == llhf) { @@ -296,6 +297,9 @@ public abstract class StreamingService { return getCommentsExtractor(llhf.fromUrl(url), NewPipe.getPreferredLocalization()); } + public TimeAgoParser getTimeAgoParser() { + return new TimeAgoParser(TimeAgoParser.DEFAULT_AGO_PHRASES); + } /** * Figures out where the link is pointing to (a channel, a video, a playlist, etc.) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudParsingHelper.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudParsingHelper.java index 96b7fcea7..d4a8123c0 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudParsingHelper.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudParsingHelper.java @@ -79,23 +79,22 @@ public class SoundcloudParsingHelper { return dl.head(apiUrl).getResponseCode() == 200; } - public static String toDateString(String time) throws ParsingException { + static Date parseDate(String time) throws ParsingException { try { - Date date; - // Have two date formats, one for the 'api.soundc...' and the other 'api-v2.soundc...'. + return new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'").parse(time); + } catch (ParseException e1) { try { - date = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'").parse(time); - } catch (Exception e) { - date = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss +0000").parse(time); + return new SimpleDateFormat("yyyy/MM/dd HH:mm:ss +0000").parse(time); + } catch (ParseException e2) { + throw new ParsingException(e1.getMessage(), e2); } - - SimpleDateFormat newDateFormat = new SimpleDateFormat("yyyy-MM-dd"); - return newDateFormat.format(date); - } catch (ParseException e) { - throw new ParsingException(e.getMessage(), e); } } + static String toTextualDate(String time) throws ParsingException { + return new SimpleDateFormat("yyyy-MM-dd").format(parseDate(time)); + } + /** * Call the endpoint "/resolve" of the api.

* diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudStreamExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudStreamExtractor.java index f5860d835..b842d85af 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudStreamExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudStreamExtractor.java @@ -51,7 +51,7 @@ public class SoundcloudStreamExtractor extends StreamExtractor { @Nonnull @Override public String getUploadDate() throws ParsingException { - return SoundcloudParsingHelper.toDateString(track.getString("created_at")); + return SoundcloudParsingHelper.toTextualDate(track.getString("created_at")); } @Nonnull diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudStreamInfoItemExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudStreamInfoItemExtractor.java index 09455e193..a81421f01 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudStreamInfoItemExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudStreamInfoItemExtractor.java @@ -5,6 +5,8 @@ import org.schabi.newpipe.extractor.exceptions.ParsingException; import org.schabi.newpipe.extractor.stream.StreamInfoItemExtractor; import org.schabi.newpipe.extractor.stream.StreamType; +import java.util.Calendar; + import static org.schabi.newpipe.extractor.utils.Utils.replaceHttpWithHttps; public class SoundcloudStreamInfoItemExtractor implements StreamInfoItemExtractor { @@ -41,8 +43,19 @@ public class SoundcloudStreamInfoItemExtractor implements StreamInfoItemExtracto } @Override - public String getUploadDate() throws ParsingException { - return SoundcloudParsingHelper.toDateString(itemObject.getString("created_at")); + public String getTextualUploadDate() throws ParsingException { + return SoundcloudParsingHelper.toTextualDate(getCreatedAt()); + } + + @Override + public Calendar getUploadDate() throws ParsingException { + Calendar uploadTime = Calendar.getInstance(); + uploadTime.setTime(SoundcloudParsingHelper.parseDate(getCreatedAt())); + return uploadTime; + } + + private String getCreatedAt() { + return itemObject.getString("created_at"); } @Override diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java index 9641d3931..14c5a9ed1 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeChannelExtractor.java @@ -18,6 +18,7 @@ import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler; import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper; import org.schabi.newpipe.extractor.stream.StreamInfoItem; import org.schabi.newpipe.extractor.stream.StreamInfoItemsCollector; +import org.schabi.newpipe.extractor.stream.TimeAgoParser; import org.schabi.newpipe.extractor.utils.DonationLinkHelper; import org.schabi.newpipe.extractor.utils.Localization; import org.schabi.newpipe.extractor.utils.Parser; @@ -53,6 +54,8 @@ public class YoutubeChannelExtractor extends ChannelExtractor { private static final String CHANNEL_FEED_BASE = "https://www.youtube.com/feeds/videos.xml?channel_id="; private static final String CHANNEL_URL_PARAMETERS = "/videos?view=0&flow=list&sort=dd&live_view=10000&gl=US&hl=en"; + private final TimeAgoParser timeAgoParser = getService().getTimeAgoParser(); + private Document doc; public YoutubeChannelExtractor(StreamingService service, ListLinkHandler linkHandler, Localization localization) { @@ -230,7 +233,7 @@ public class YoutubeChannelExtractor extends ChannelExtractor { final String uploaderUrl = getUrl(); for (final Element li : element.children()) { if (li.select("div[class=\"feed-item-dismissable\"]").first() != null) { - collector.commit(new YoutubeStreamInfoItemExtractor(li) { + collector.commit(new YoutubeStreamInfoItemExtractor(li, timeAgoParser) { @Override public String getUrl() throws ParsingException { try { diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubePlaylistExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubePlaylistExtractor.java index 4480b38af..0d5668e9f 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubePlaylistExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubePlaylistExtractor.java @@ -18,6 +18,7 @@ import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingH import org.schabi.newpipe.extractor.stream.StreamInfoItem; import org.schabi.newpipe.extractor.stream.StreamInfoItemsCollector; import org.schabi.newpipe.extractor.stream.StreamType; +import org.schabi.newpipe.extractor.stream.TimeAgoParser; import org.schabi.newpipe.extractor.utils.Localization; import org.schabi.newpipe.extractor.utils.Utils; @@ -28,6 +29,8 @@ import java.io.IOException; @SuppressWarnings("WeakerAccess") public class YoutubePlaylistExtractor extends PlaylistExtractor { + private final TimeAgoParser timeAgoParser = getService().getTimeAgoParser(); + private Document doc; public YoutubePlaylistExtractor(StreamingService service, ListLinkHandler linkHandler, Localization localization) { @@ -192,7 +195,7 @@ public class YoutubePlaylistExtractor extends PlaylistExtractor { continue; } - collector.commit(new YoutubeStreamInfoItemExtractor(li) { + collector.commit(new YoutubeStreamInfoItemExtractor(li, timeAgoParser) { public Element uploaderLink; @Override @@ -258,7 +261,7 @@ public class YoutubePlaylistExtractor extends PlaylistExtractor { } @Override - public String getUploadDate() throws ParsingException { + public String getTextualUploadDate() throws ParsingException { return ""; } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeSearchExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeSearchExtractor.java index 0a954607f..c3f234aaf 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeSearchExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeSearchExtractor.java @@ -9,6 +9,7 @@ import org.schabi.newpipe.extractor.InfoItem; import org.schabi.newpipe.extractor.StreamingService; import org.schabi.newpipe.extractor.exceptions.ExtractionException; import org.schabi.newpipe.extractor.exceptions.ParsingException; +import org.schabi.newpipe.extractor.linkhandler.SearchQueryHandler; import org.schabi.newpipe.extractor.search.InfoItemsSearchCollector; import org.schabi.newpipe.extractor.search.SearchExtractor; import org.schabi.newpipe.extractor.linkhandler.SearchQueryHandler; @@ -129,7 +130,7 @@ public class YoutubeSearchExtractor extends SearchExtractor { // video item type } else if ((el = item.select("div[class*=\"yt-lockup-video\"]").first()) != null) { - collector.commit(new YoutubeStreamInfoItemExtractor(el)); + collector.commit(new YoutubeStreamInfoItemExtractor(el, getService().getTimeAgoParser())); } else if ((el = item.select("div[class*=\"yt-lockup-channel\"]").first()) != null) { collector.commit(new YoutubeChannelInfoItemExtractor(el)); } else if ((el = item.select("div[class*=\"yt-lockup-playlist\"]").first()) != null && diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java index fa866cd5b..4c33a258c 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java @@ -75,6 +75,8 @@ public class YoutubeStreamExtractor extends StreamExtractor { /*//////////////////////////////////////////////////////////////////////////*/ + private final TimeAgoParser timeAgoParser = getService().getTimeAgoParser(); + private Document doc; @Nullable private JsonObject playerArgs; @@ -932,7 +934,7 @@ public class YoutubeStreamExtractor extends StreamExtractor { * This is encapsulated in a StreamInfoItem object, which is a subset of the fields in a full StreamInfo. */ private StreamInfoItemExtractor extractVideoPreviewInfo(final Element li) { - return new YoutubeStreamInfoItemExtractor(li) { + return new YoutubeStreamInfoItemExtractor(li, timeAgoParser) { @Override public String getUrl() throws ParsingException { @@ -959,7 +961,7 @@ public class YoutubeStreamExtractor extends StreamExtractor { } @Override - public String getUploadDate() throws ParsingException { + public String getTextualUploadDate() throws ParsingException { return ""; } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamInfoItemExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamInfoItemExtractor.java index 5bfeaa38e..763c25380 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamInfoItemExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamInfoItemExtractor.java @@ -1,12 +1,17 @@ package org.schabi.newpipe.extractor.services.youtube.extractors; import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; import org.schabi.newpipe.extractor.exceptions.ParsingException; import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper; import org.schabi.newpipe.extractor.stream.StreamInfoItemExtractor; import org.schabi.newpipe.extractor.stream.StreamType; +import org.schabi.newpipe.extractor.stream.TimeAgoParser; import org.schabi.newpipe.extractor.utils.Utils; +import javax.annotation.Nullable; +import java.util.Calendar; + /* * Copyright (C) Christian Schabesberger 2016 * YoutubeStreamInfoItemExtractor.java is part of NewPipe. @@ -28,9 +33,18 @@ import org.schabi.newpipe.extractor.utils.Utils; public class YoutubeStreamInfoItemExtractor implements StreamInfoItemExtractor { private final Element item; + private final TimeAgoParser timeAgoParser; - public YoutubeStreamInfoItemExtractor(Element item) { + private String cachedUploadDate; + + /** + * Creates an extractor of StreamInfoItems from a YouTube page. + * @param item The page element + * @param timeAgoParser A parser of the textual dates or {@code null}. + */ + public YoutubeStreamInfoItemExtractor(Element item, @Nullable TimeAgoParser timeAgoParser) { this.item = item; + this.timeAgoParser = timeAgoParser; } @Override @@ -126,20 +140,35 @@ public class YoutubeStreamInfoItemExtractor implements StreamInfoItemExtractor { } @Override - public String getUploadDate() throws ParsingException { + public String getTextualUploadDate() throws ParsingException { + if (cachedUploadDate != null) { + return cachedUploadDate; + } + try { Element meta = item.select("div[class=\"yt-lockup-meta\"]").first(); if (meta == null) return ""; - Element li = meta.select("li").first(); - if(li == null) return ""; + final Elements li = meta.select("li"); + if (li.isEmpty()) return ""; - return meta.select("li").first().text(); + return cachedUploadDate = li.first().text(); } catch (Exception e) { throw new ParsingException("Could not get upload date", e); } } + @Override + public Calendar getUploadDate() throws ParsingException { + String textualUploadDate = getTextualUploadDate(); + if (timeAgoParser != null + && textualUploadDate != null && !"".equals(textualUploadDate)) { + return timeAgoParser.parse(textualUploadDate); + } else { + return null; + } + } + @Override public long getViewCount() throws ParsingException { String input; diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeTrendingExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeTrendingExtractor.java index dc7cc7e69..31e743542 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeTrendingExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeTrendingExtractor.java @@ -35,12 +35,15 @@ import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingH import org.schabi.newpipe.extractor.stream.StreamInfoItem; import org.schabi.newpipe.extractor.stream.StreamInfoItemsCollector; import org.schabi.newpipe.extractor.utils.Localization; +import org.schabi.newpipe.extractor.stream.TimeAgoParser; import javax.annotation.Nonnull; import java.io.IOException; public class YoutubeTrendingExtractor extends KioskExtractor { + private final TimeAgoParser timeAgoParser = getService().getTimeAgoParser(); + private Document doc; public YoutubeTrendingExtractor(StreamingService service, @@ -93,7 +96,7 @@ public class YoutubeTrendingExtractor extends KioskExtractor { for(Element ul : uls) { for(final Element li : ul.children()) { final Element el = li.select("div[class*=\"yt-lockup-dismissable\"]").first(); - collector.commit(new YoutubeStreamInfoItemExtractor(li) { + collector.commit(new YoutubeStreamInfoItemExtractor(li, timeAgoParser) { @Override public String getUrl() throws ParsingException { try { diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/stream/StreamInfoItem.java b/extractor/src/main/java/org/schabi/newpipe/extractor/stream/StreamInfoItem.java index 375739160..3f786baec 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/stream/StreamInfoItem.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/stream/StreamInfoItem.java @@ -22,6 +22,8 @@ package org.schabi.newpipe.extractor.stream; import org.schabi.newpipe.extractor.InfoItem; +import java.util.Calendar; + /** * Info object for previews of unopened videos, eg search results, related videos */ @@ -29,7 +31,8 @@ public class StreamInfoItem extends InfoItem { private final StreamType streamType; private String uploaderName; - private String uploadDate; + private String textualUploadDate; + private Calendar uploadDate; private long viewCount = -1; private long duration = -1; @@ -52,14 +55,6 @@ public class StreamInfoItem extends InfoItem { this.uploaderName = uploader_name; } - public String getUploadDate() { - return uploadDate; - } - - public void setUploadDate(String upload_date) { - this.uploadDate = upload_date; - } - public long getViewCount() { return viewCount; } @@ -84,12 +79,36 @@ public class StreamInfoItem extends InfoItem { this.uploaderUrl = uploaderUrl; } + /** + * @return The original textual upload date as returned by the streaming service. + * @see #getUploadDate() + */ + public String getTextualUploadDate() { + return textualUploadDate; + } + + public void setTextualUploadDate(String upload_date) { + this.textualUploadDate = upload_date; + } + + /** + * @return The (approximated) date and time this item was uploaded or {@code null}. + * @see #getTextualUploadDate() + */ + public Calendar getUploadDate() { + return uploadDate; + } + + public void setUploadDate(Calendar uploadDate) { + this.uploadDate = uploadDate; + } + @Override public String toString() { return "StreamInfoItem{" + "streamType=" + streamType + ", uploaderName='" + uploaderName + '\'' + - ", uploadDate='" + uploadDate + '\'' + + ", textualUploadDate='" + textualUploadDate + '\'' + ", viewCount=" + viewCount + ", duration=" + duration + ", uploaderUrl='" + uploaderUrl + '\'' + diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/stream/StreamInfoItemExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/stream/StreamInfoItemExtractor.java index 3bc3a9e3c..4184131e3 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/stream/StreamInfoItemExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/stream/StreamInfoItemExtractor.java @@ -3,6 +3,8 @@ package org.schabi.newpipe.extractor.stream; import org.schabi.newpipe.extractor.InfoItemExtractor; import org.schabi.newpipe.extractor.exceptions.ParsingException; +import java.util.Calendar; + /* * Created by Christian Schabesberger on 28.02.16. * @@ -64,10 +66,30 @@ public interface StreamInfoItemExtractor extends InfoItemExtractor { String getUploaderUrl() throws ParsingException; /** - * Extract the uploader name - * @return the uploader name - * @throws ParsingException thrown if there is an error in the extraction + * Extract the textual upload date of this item. + * The original textual date provided by the service may be used if it is short; + * otherwise the format "yyyy-MM-dd" or an locale specific version is preferred. + * + * @return The original textual upload date. + * @throws ParsingException if there is an error in the extraction + * @see #getUploadDate() */ - String getUploadDate() throws ParsingException; + String getTextualUploadDate() throws ParsingException; + + /** + * Extracts the upload date and time of this item and parses it. + *

+ * If the service doesn't provide an exact time, an approximation can be returned. + * The approximation should be marked by setting seconds and milliseconds to zero. + *
+ * If the service doesn't provide any date at all, then {@code null} should be returned. + *

+ * + * @return The (approximated) date and time this item was uploaded or {@code null}. + * @throws ParsingException if there is an error in the extraction + * or the extracted date couldn't be parsed. + * @see #getTextualUploadDate() + */ + Calendar getUploadDate() throws ParsingException; } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/stream/StreamInfoItemsCollector.java b/extractor/src/main/java/org/schabi/newpipe/extractor/stream/StreamInfoItemsCollector.java index b838e7ac8..85dce8f0b 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/stream/StreamInfoItemsCollector.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/stream/StreamInfoItemsCollector.java @@ -61,10 +61,15 @@ public class StreamInfoItemsCollector extends InfoItemsCollector> DEFAULT_AGO_PHRASES = + new EnumMap<>(TimeAgoUnit.class); + + private final Map> agoPhrases; + + private final Calendar consistentNow; + + /** + * Creates a helper to parse upload dates in the format '2 days ago'. + *

+ * Instantiate a new {@link TimeAgoParser} every time you extract a new batch of items. + *

+ * @param agoPhrases A set of phrases how to recognize the time units in a given language. + */ + public TimeAgoParser(Map> agoPhrases) { + this.agoPhrases = agoPhrases; + consistentNow = Calendar.getInstance(); + } + + /** + * Parses a textual date in the format '2 days ago' into a Calendar representation. + * Beginning with days ago, marks the date as approximated by setting minutes, seconds + * and milliseconds to 0. + * @param textualDate The original date as provided by the streaming service + * @return The parsed (approximated) time + * @throws ParsingException if the time unit could not be recognized + */ + public Calendar parse(String textualDate) throws ParsingException { + int timeAgoAmount; + try { + timeAgoAmount = parseTimeAgoAmount(textualDate); + } catch (NumberFormatException e) { + // If there is no valid number in the textual date, + // assume it is 1 (as in 'a second ago'). + timeAgoAmount = 1; + } + + TimeAgoUnit timeAgoUnit = parseTimeAgoUnit(textualDate); + return getCalendar(timeAgoAmount, timeAgoUnit); + } + + private int parseTimeAgoAmount(String textualDate) throws NumberFormatException { + String timeValueStr = textualDate.replaceAll("\\D+", ""); + return Integer.parseInt(timeValueStr); + } + + private TimeAgoUnit parseTimeAgoUnit(String textualDate) throws ParsingException { + for (TimeAgoUnit timeAgoUnit : agoPhrases.keySet()) { + for (String agoPhrase : agoPhrases.get(timeAgoUnit)) { + if (textualDate.toLowerCase().contains(agoPhrase.toLowerCase())){ + return timeAgoUnit; + } + } + } + + throw new ParsingException("Unable to parse the date: " + textualDate); + } + + private Calendar getCalendar(int timeAgoAmount, TimeAgoUnit timeAgoUnit) { + Calendar calendarTime = getNow(); + + switch (timeAgoUnit) { + case SECONDS: + calendarTime.add(Calendar.SECOND, -timeAgoAmount); + break; + + case MINUTES: + calendarTime.add(Calendar.MINUTE, -timeAgoAmount); + break; + + case HOURS: + calendarTime.add(Calendar.HOUR_OF_DAY, -timeAgoAmount); + break; + + case DAYS: + calendarTime.add(Calendar.DAY_OF_MONTH, -timeAgoAmount); + markApproximatedTime(calendarTime); + break; + + case WEEKS: + calendarTime.add(Calendar.WEEK_OF_YEAR, -timeAgoAmount); + markApproximatedTime(calendarTime); + break; + + case MONTHS: + calendarTime.add(Calendar.MONTH, -timeAgoAmount); + markApproximatedTime(calendarTime); + break; + + case YEARS: + calendarTime.add(Calendar.YEAR, -timeAgoAmount); + // Prevent `PrettyTime` from showing '12 months ago'. + calendarTime.add(Calendar.DAY_OF_MONTH, -1); + markApproximatedTime(calendarTime); + break; + } + + return calendarTime; + } + + private Calendar getNow() { + return (Calendar) consistentNow.clone(); + } + + /** + * Marks the time as approximated by setting minutes, seconds and milliseconds to 0. + * @param calendarTime Time to be marked as approximated + */ + private void markApproximatedTime(Calendar calendarTime) { + calendarTime.set(Calendar.MINUTE, 0); + calendarTime.set(Calendar.SECOND, 0); + calendarTime.set(Calendar.MILLISECOND, 0); + } + + static { + DEFAULT_AGO_PHRASES.put(TimeAgoUnit.SECONDS, Collections.singleton("sec")); + DEFAULT_AGO_PHRASES.put(TimeAgoUnit.MINUTES, Collections.singleton("min")); + DEFAULT_AGO_PHRASES.put(TimeAgoUnit.HOURS, Collections.singleton("hour")); + DEFAULT_AGO_PHRASES.put(TimeAgoUnit.DAYS, Collections.singleton("day")); + DEFAULT_AGO_PHRASES.put(TimeAgoUnit.WEEKS, Collections.singleton("week")); + DEFAULT_AGO_PHRASES.put(TimeAgoUnit.MONTHS, Collections.singleton("month")); + DEFAULT_AGO_PHRASES.put(TimeAgoUnit.YEARS, Collections.singleton("year")); + } + + public enum TimeAgoUnit { + SECONDS, + MINUTES, + HOURS, + DAYS, + WEEKS, + MONTHS, + YEARS, + } +} diff --git a/extractor/src/test/java/org/schabi/newpipe/Downloader.java b/extractor/src/test/java/org/schabi/newpipe/Downloader.java index 3091c74bb..172b2ca78 100644 --- a/extractor/src/test/java/org/schabi/newpipe/Downloader.java +++ b/extractor/src/test/java/org/schabi/newpipe/Downloader.java @@ -41,6 +41,7 @@ import static java.util.Collections.singletonList; public class Downloader implements org.schabi.newpipe.extractor.Downloader { private static final String USER_AGENT = "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:43.0) Gecko/20100101 Firefox/43.0"; + private static final String DEFAULT_HTTP_ACCEPT_LANGUAGE = "en"; private static String mCookies = ""; private static Downloader instance = null; @@ -171,6 +172,7 @@ public class Downloader implements org.schabi.newpipe.extractor.Downloader { URL url = new URL(siteUrl); HttpsURLConnection con = (HttpsURLConnection) url.openConnection(); // HttpsURLConnection con = NetCipher.getHttpsURLConnection(url); + con.setRequestProperty("Accept-Language", DEFAULT_HTTP_ACCEPT_LANGUAGE); return dl(con); } diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/services/DefaultTests.java b/extractor/src/test/java/org/schabi/newpipe/extractor/services/DefaultTests.java index c2355a19f..3b839fd89 100644 --- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/DefaultTests.java +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/DefaultTests.java @@ -4,6 +4,7 @@ import org.schabi.newpipe.extractor.InfoItem; import org.schabi.newpipe.extractor.ListExtractor; import org.schabi.newpipe.extractor.stream.StreamInfoItem; +import java.util.Calendar; import java.util.List; import static org.junit.Assert.*; @@ -27,6 +28,14 @@ public final class DefaultTests { StreamInfoItem streamInfoItem = (StreamInfoItem) item; assertNotEmpty("Uploader name not set: " + item, streamInfoItem.getUploaderName()); assertNotEmpty("Uploader url not set: " + item, streamInfoItem.getUploaderUrl()); + + final String textualUploadDate = streamInfoItem.getTextualUploadDate(); + if (textualUploadDate != null && !textualUploadDate.isEmpty()) { + final Calendar uploadDate = streamInfoItem.getUploadDate(); + assertNotNull("No parsed upload date", uploadDate); + assertTrue("Upload date not in the past", uploadDate.before(Calendar.getInstance())); + } + } } }