Base Implementation: Parse the upload date of StreamInfoItems
In the format '2 days ago' (in English) on a YouTube channel page. (Parser extensible to other pages.)
This commit is contained in:
parent
514ed7bdc1
commit
180836c180
|
@ -17,6 +17,7 @@ import org.schabi.newpipe.extractor.linkhandler.SearchQueryHandlerFactory;
|
||||||
import org.schabi.newpipe.extractor.playlist.PlaylistExtractor;
|
import org.schabi.newpipe.extractor.playlist.PlaylistExtractor;
|
||||||
import org.schabi.newpipe.extractor.search.SearchExtractor;
|
import org.schabi.newpipe.extractor.search.SearchExtractor;
|
||||||
import org.schabi.newpipe.extractor.stream.StreamExtractor;
|
import org.schabi.newpipe.extractor.stream.StreamExtractor;
|
||||||
|
import org.schabi.newpipe.extractor.stream.TimeAgoParser;
|
||||||
import org.schabi.newpipe.extractor.subscription.SubscriptionExtractor;
|
import org.schabi.newpipe.extractor.subscription.SubscriptionExtractor;
|
||||||
import org.schabi.newpipe.extractor.utils.Localization;
|
import org.schabi.newpipe.extractor.utils.Localization;
|
||||||
|
|
||||||
|
@ -222,7 +223,7 @@ public abstract class StreamingService {
|
||||||
public ChannelExtractor getChannelExtractor(ListLinkHandler linkHandler) throws ExtractionException {
|
public ChannelExtractor getChannelExtractor(ListLinkHandler linkHandler) throws ExtractionException {
|
||||||
return getChannelExtractor(linkHandler, NewPipe.getPreferredLocalization());
|
return getChannelExtractor(linkHandler, NewPipe.getPreferredLocalization());
|
||||||
}
|
}
|
||||||
|
|
||||||
public PlaylistExtractor getPlaylistExtractor(ListLinkHandler linkHandler) throws ExtractionException {
|
public PlaylistExtractor getPlaylistExtractor(ListLinkHandler linkHandler) throws ExtractionException {
|
||||||
return getPlaylistExtractor(linkHandler, NewPipe.getPreferredLocalization());
|
return getPlaylistExtractor(linkHandler, NewPipe.getPreferredLocalization());
|
||||||
}
|
}
|
||||||
|
@ -230,7 +231,7 @@ public abstract class StreamingService {
|
||||||
public StreamExtractor getStreamExtractor(LinkHandler linkHandler) throws ExtractionException {
|
public StreamExtractor getStreamExtractor(LinkHandler linkHandler) throws ExtractionException {
|
||||||
return getStreamExtractor(linkHandler, NewPipe.getPreferredLocalization());
|
return getStreamExtractor(linkHandler, NewPipe.getPreferredLocalization());
|
||||||
}
|
}
|
||||||
|
|
||||||
public CommentsExtractor getCommentsExtractor(ListLinkHandler urlIdHandler) throws ExtractionException {
|
public CommentsExtractor getCommentsExtractor(ListLinkHandler urlIdHandler) throws ExtractionException {
|
||||||
return getCommentsExtractor(urlIdHandler, NewPipe.getPreferredLocalization());
|
return getCommentsExtractor(urlIdHandler, NewPipe.getPreferredLocalization());
|
||||||
}
|
}
|
||||||
|
@ -287,7 +288,7 @@ public abstract class StreamingService {
|
||||||
public StreamExtractor getStreamExtractor(String url) throws ExtractionException {
|
public StreamExtractor getStreamExtractor(String url) throws ExtractionException {
|
||||||
return getStreamExtractor(getStreamLHFactory().fromUrl(url), NewPipe.getPreferredLocalization());
|
return getStreamExtractor(getStreamLHFactory().fromUrl(url), NewPipe.getPreferredLocalization());
|
||||||
}
|
}
|
||||||
|
|
||||||
public CommentsExtractor getCommentsExtractor(String url) throws ExtractionException {
|
public CommentsExtractor getCommentsExtractor(String url) throws ExtractionException {
|
||||||
ListLinkHandlerFactory llhf = getCommentsLHFactory();
|
ListLinkHandlerFactory llhf = getCommentsLHFactory();
|
||||||
if(null == llhf) {
|
if(null == llhf) {
|
||||||
|
@ -296,6 +297,9 @@ public abstract class StreamingService {
|
||||||
return getCommentsExtractor(llhf.fromUrl(url), NewPipe.getPreferredLocalization());
|
return getCommentsExtractor(llhf.fromUrl(url), NewPipe.getPreferredLocalization());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public TimeAgoParser getTimeAgoParser() {
|
||||||
|
return new TimeAgoParser(TimeAgoParser.DEFAULT_AGO_PHRASES);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Figures out where the link is pointing to (a channel, a video, a playlist, etc.)
|
* Figures out where the link is pointing to (a channel, a video, a playlist, etc.)
|
||||||
|
|
|
@ -79,23 +79,22 @@ public class SoundcloudParsingHelper {
|
||||||
return dl.head(apiUrl).getResponseCode() == 200;
|
return dl.head(apiUrl).getResponseCode() == 200;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String toDateString(String time) throws ParsingException {
|
static Date parseDate(String time) throws ParsingException {
|
||||||
try {
|
try {
|
||||||
Date date;
|
return new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'").parse(time);
|
||||||
// Have two date formats, one for the 'api.soundc...' and the other 'api-v2.soundc...'.
|
} catch (ParseException e1) {
|
||||||
try {
|
try {
|
||||||
date = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'").parse(time);
|
return new SimpleDateFormat("yyyy/MM/dd HH:mm:ss +0000").parse(time);
|
||||||
} catch (Exception e) {
|
} catch (ParseException e2) {
|
||||||
date = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss +0000").parse(time);
|
throw new ParsingException(e1.getMessage(), e2);
|
||||||
}
|
}
|
||||||
|
|
||||||
SimpleDateFormat newDateFormat = new SimpleDateFormat("yyyy-MM-dd");
|
|
||||||
return newDateFormat.format(date);
|
|
||||||
} catch (ParseException e) {
|
|
||||||
throw new ParsingException(e.getMessage(), e);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static String toTextualDate(String time) throws ParsingException {
|
||||||
|
return new SimpleDateFormat("yyyy-MM-dd").format(parseDate(time));
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Call the endpoint "/resolve" of the api.<p>
|
* Call the endpoint "/resolve" of the api.<p>
|
||||||
*
|
*
|
||||||
|
|
|
@ -51,7 +51,7 @@ public class SoundcloudStreamExtractor extends StreamExtractor {
|
||||||
@Nonnull
|
@Nonnull
|
||||||
@Override
|
@Override
|
||||||
public String getUploadDate() throws ParsingException {
|
public String getUploadDate() throws ParsingException {
|
||||||
return SoundcloudParsingHelper.toDateString(track.getString("created_at"));
|
return SoundcloudParsingHelper.toTextualDate(track.getString("created_at"));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Nonnull
|
@Nonnull
|
||||||
|
|
|
@ -5,6 +5,8 @@ import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
||||||
import org.schabi.newpipe.extractor.stream.StreamInfoItemExtractor;
|
import org.schabi.newpipe.extractor.stream.StreamInfoItemExtractor;
|
||||||
import org.schabi.newpipe.extractor.stream.StreamType;
|
import org.schabi.newpipe.extractor.stream.StreamType;
|
||||||
|
|
||||||
|
import java.util.Calendar;
|
||||||
|
|
||||||
import static org.schabi.newpipe.extractor.utils.Utils.replaceHttpWithHttps;
|
import static org.schabi.newpipe.extractor.utils.Utils.replaceHttpWithHttps;
|
||||||
|
|
||||||
public class SoundcloudStreamInfoItemExtractor implements StreamInfoItemExtractor {
|
public class SoundcloudStreamInfoItemExtractor implements StreamInfoItemExtractor {
|
||||||
|
@ -41,8 +43,19 @@ public class SoundcloudStreamInfoItemExtractor implements StreamInfoItemExtracto
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getUploadDate() throws ParsingException {
|
public String getTextualUploadDate() throws ParsingException {
|
||||||
return SoundcloudParsingHelper.toDateString(itemObject.getString("created_at"));
|
return SoundcloudParsingHelper.toTextualDate(getCreatedAt());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Calendar getUploadDate() throws ParsingException {
|
||||||
|
Calendar uploadTime = Calendar.getInstance();
|
||||||
|
uploadTime.setTime(SoundcloudParsingHelper.parseDate(getCreatedAt()));
|
||||||
|
return uploadTime;
|
||||||
|
}
|
||||||
|
|
||||||
|
private String getCreatedAt() {
|
||||||
|
return itemObject.getString("created_at");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -18,6 +18,7 @@ import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler;
|
||||||
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper;
|
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper;
|
||||||
import org.schabi.newpipe.extractor.stream.StreamInfoItem;
|
import org.schabi.newpipe.extractor.stream.StreamInfoItem;
|
||||||
import org.schabi.newpipe.extractor.stream.StreamInfoItemsCollector;
|
import org.schabi.newpipe.extractor.stream.StreamInfoItemsCollector;
|
||||||
|
import org.schabi.newpipe.extractor.stream.TimeAgoParser;
|
||||||
import org.schabi.newpipe.extractor.utils.DonationLinkHelper;
|
import org.schabi.newpipe.extractor.utils.DonationLinkHelper;
|
||||||
import org.schabi.newpipe.extractor.utils.Localization;
|
import org.schabi.newpipe.extractor.utils.Localization;
|
||||||
import org.schabi.newpipe.extractor.utils.Parser;
|
import org.schabi.newpipe.extractor.utils.Parser;
|
||||||
|
@ -53,6 +54,8 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
|
||||||
private static final String CHANNEL_FEED_BASE = "https://www.youtube.com/feeds/videos.xml?channel_id=";
|
private static final String CHANNEL_FEED_BASE = "https://www.youtube.com/feeds/videos.xml?channel_id=";
|
||||||
private static final String CHANNEL_URL_PARAMETERS = "/videos?view=0&flow=list&sort=dd&live_view=10000&gl=US&hl=en";
|
private static final String CHANNEL_URL_PARAMETERS = "/videos?view=0&flow=list&sort=dd&live_view=10000&gl=US&hl=en";
|
||||||
|
|
||||||
|
private final TimeAgoParser timeAgoParser = getService().getTimeAgoParser();
|
||||||
|
|
||||||
private Document doc;
|
private Document doc;
|
||||||
|
|
||||||
public YoutubeChannelExtractor(StreamingService service, ListLinkHandler linkHandler, Localization localization) {
|
public YoutubeChannelExtractor(StreamingService service, ListLinkHandler linkHandler, Localization localization) {
|
||||||
|
@ -230,7 +233,7 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
|
||||||
final String uploaderUrl = getUrl();
|
final String uploaderUrl = getUrl();
|
||||||
for (final Element li : element.children()) {
|
for (final Element li : element.children()) {
|
||||||
if (li.select("div[class=\"feed-item-dismissable\"]").first() != null) {
|
if (li.select("div[class=\"feed-item-dismissable\"]").first() != null) {
|
||||||
collector.commit(new YoutubeStreamInfoItemExtractor(li) {
|
collector.commit(new YoutubeStreamInfoItemExtractor(li, timeAgoParser) {
|
||||||
@Override
|
@Override
|
||||||
public String getUrl() throws ParsingException {
|
public String getUrl() throws ParsingException {
|
||||||
try {
|
try {
|
||||||
|
|
|
@ -18,6 +18,7 @@ import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingH
|
||||||
import org.schabi.newpipe.extractor.stream.StreamInfoItem;
|
import org.schabi.newpipe.extractor.stream.StreamInfoItem;
|
||||||
import org.schabi.newpipe.extractor.stream.StreamInfoItemsCollector;
|
import org.schabi.newpipe.extractor.stream.StreamInfoItemsCollector;
|
||||||
import org.schabi.newpipe.extractor.stream.StreamType;
|
import org.schabi.newpipe.extractor.stream.StreamType;
|
||||||
|
import org.schabi.newpipe.extractor.stream.TimeAgoParser;
|
||||||
import org.schabi.newpipe.extractor.utils.Localization;
|
import org.schabi.newpipe.extractor.utils.Localization;
|
||||||
import org.schabi.newpipe.extractor.utils.Utils;
|
import org.schabi.newpipe.extractor.utils.Utils;
|
||||||
|
|
||||||
|
@ -28,6 +29,8 @@ import java.io.IOException;
|
||||||
@SuppressWarnings("WeakerAccess")
|
@SuppressWarnings("WeakerAccess")
|
||||||
public class YoutubePlaylistExtractor extends PlaylistExtractor {
|
public class YoutubePlaylistExtractor extends PlaylistExtractor {
|
||||||
|
|
||||||
|
private final TimeAgoParser timeAgoParser = getService().getTimeAgoParser();
|
||||||
|
|
||||||
private Document doc;
|
private Document doc;
|
||||||
|
|
||||||
public YoutubePlaylistExtractor(StreamingService service, ListLinkHandler linkHandler, Localization localization) {
|
public YoutubePlaylistExtractor(StreamingService service, ListLinkHandler linkHandler, Localization localization) {
|
||||||
|
@ -192,7 +195,7 @@ public class YoutubePlaylistExtractor extends PlaylistExtractor {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
collector.commit(new YoutubeStreamInfoItemExtractor(li) {
|
collector.commit(new YoutubeStreamInfoItemExtractor(li, timeAgoParser) {
|
||||||
public Element uploaderLink;
|
public Element uploaderLink;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -258,7 +261,7 @@ public class YoutubePlaylistExtractor extends PlaylistExtractor {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getUploadDate() throws ParsingException {
|
public String getTextualUploadDate() throws ParsingException {
|
||||||
return "";
|
return "";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -9,6 +9,7 @@ import org.schabi.newpipe.extractor.InfoItem;
|
||||||
import org.schabi.newpipe.extractor.StreamingService;
|
import org.schabi.newpipe.extractor.StreamingService;
|
||||||
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
|
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
|
||||||
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
||||||
|
import org.schabi.newpipe.extractor.linkhandler.SearchQueryHandler;
|
||||||
import org.schabi.newpipe.extractor.search.InfoItemsSearchCollector;
|
import org.schabi.newpipe.extractor.search.InfoItemsSearchCollector;
|
||||||
import org.schabi.newpipe.extractor.search.SearchExtractor;
|
import org.schabi.newpipe.extractor.search.SearchExtractor;
|
||||||
import org.schabi.newpipe.extractor.linkhandler.SearchQueryHandler;
|
import org.schabi.newpipe.extractor.linkhandler.SearchQueryHandler;
|
||||||
|
@ -129,7 +130,7 @@ public class YoutubeSearchExtractor extends SearchExtractor {
|
||||||
|
|
||||||
// video item type
|
// video item type
|
||||||
} else if ((el = item.select("div[class*=\"yt-lockup-video\"]").first()) != null) {
|
} else if ((el = item.select("div[class*=\"yt-lockup-video\"]").first()) != null) {
|
||||||
collector.commit(new YoutubeStreamInfoItemExtractor(el));
|
collector.commit(new YoutubeStreamInfoItemExtractor(el, getService().getTimeAgoParser()));
|
||||||
} else if ((el = item.select("div[class*=\"yt-lockup-channel\"]").first()) != null) {
|
} else if ((el = item.select("div[class*=\"yt-lockup-channel\"]").first()) != null) {
|
||||||
collector.commit(new YoutubeChannelInfoItemExtractor(el));
|
collector.commit(new YoutubeChannelInfoItemExtractor(el));
|
||||||
} else if ((el = item.select("div[class*=\"yt-lockup-playlist\"]").first()) != null &&
|
} else if ((el = item.select("div[class*=\"yt-lockup-playlist\"]").first()) != null &&
|
||||||
|
|
|
@ -75,6 +75,8 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
||||||
|
|
||||||
/*//////////////////////////////////////////////////////////////////////////*/
|
/*//////////////////////////////////////////////////////////////////////////*/
|
||||||
|
|
||||||
|
private final TimeAgoParser timeAgoParser = getService().getTimeAgoParser();
|
||||||
|
|
||||||
private Document doc;
|
private Document doc;
|
||||||
@Nullable
|
@Nullable
|
||||||
private JsonObject playerArgs;
|
private JsonObject playerArgs;
|
||||||
|
@ -932,7 +934,7 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
||||||
* This is encapsulated in a StreamInfoItem object, which is a subset of the fields in a full StreamInfo.
|
* This is encapsulated in a StreamInfoItem object, which is a subset of the fields in a full StreamInfo.
|
||||||
*/
|
*/
|
||||||
private StreamInfoItemExtractor extractVideoPreviewInfo(final Element li) {
|
private StreamInfoItemExtractor extractVideoPreviewInfo(final Element li) {
|
||||||
return new YoutubeStreamInfoItemExtractor(li) {
|
return new YoutubeStreamInfoItemExtractor(li, timeAgoParser) {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getUrl() throws ParsingException {
|
public String getUrl() throws ParsingException {
|
||||||
|
@ -959,7 +961,7 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getUploadDate() throws ParsingException {
|
public String getTextualUploadDate() throws ParsingException {
|
||||||
return "";
|
return "";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,12 +1,17 @@
|
||||||
package org.schabi.newpipe.extractor.services.youtube.extractors;
|
package org.schabi.newpipe.extractor.services.youtube.extractors;
|
||||||
|
|
||||||
import org.jsoup.nodes.Element;
|
import org.jsoup.nodes.Element;
|
||||||
|
import org.jsoup.select.Elements;
|
||||||
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
||||||
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper;
|
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper;
|
||||||
import org.schabi.newpipe.extractor.stream.StreamInfoItemExtractor;
|
import org.schabi.newpipe.extractor.stream.StreamInfoItemExtractor;
|
||||||
import org.schabi.newpipe.extractor.stream.StreamType;
|
import org.schabi.newpipe.extractor.stream.StreamType;
|
||||||
|
import org.schabi.newpipe.extractor.stream.TimeAgoParser;
|
||||||
import org.schabi.newpipe.extractor.utils.Utils;
|
import org.schabi.newpipe.extractor.utils.Utils;
|
||||||
|
|
||||||
|
import javax.annotation.Nullable;
|
||||||
|
import java.util.Calendar;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Copyright (C) Christian Schabesberger 2016 <chris.schabesberger@mailbox.org>
|
* Copyright (C) Christian Schabesberger 2016 <chris.schabesberger@mailbox.org>
|
||||||
* YoutubeStreamInfoItemExtractor.java is part of NewPipe.
|
* YoutubeStreamInfoItemExtractor.java is part of NewPipe.
|
||||||
|
@ -28,9 +33,18 @@ import org.schabi.newpipe.extractor.utils.Utils;
|
||||||
public class YoutubeStreamInfoItemExtractor implements StreamInfoItemExtractor {
|
public class YoutubeStreamInfoItemExtractor implements StreamInfoItemExtractor {
|
||||||
|
|
||||||
private final Element item;
|
private final Element item;
|
||||||
|
private final TimeAgoParser timeAgoParser;
|
||||||
|
|
||||||
public YoutubeStreamInfoItemExtractor(Element item) {
|
private String cachedUploadDate;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates an extractor of StreamInfoItems from a YouTube page.
|
||||||
|
* @param item The page element
|
||||||
|
* @param timeAgoParser A parser of the textual dates or {@code null}.
|
||||||
|
*/
|
||||||
|
public YoutubeStreamInfoItemExtractor(Element item, @Nullable TimeAgoParser timeAgoParser) {
|
||||||
this.item = item;
|
this.item = item;
|
||||||
|
this.timeAgoParser = timeAgoParser;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -126,20 +140,35 @@ public class YoutubeStreamInfoItemExtractor implements StreamInfoItemExtractor {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getUploadDate() throws ParsingException {
|
public String getTextualUploadDate() throws ParsingException {
|
||||||
|
if (cachedUploadDate != null) {
|
||||||
|
return cachedUploadDate;
|
||||||
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
Element meta = item.select("div[class=\"yt-lockup-meta\"]").first();
|
Element meta = item.select("div[class=\"yt-lockup-meta\"]").first();
|
||||||
if (meta == null) return "";
|
if (meta == null) return "";
|
||||||
|
|
||||||
Element li = meta.select("li").first();
|
final Elements li = meta.select("li");
|
||||||
if(li == null) return "";
|
if (li.isEmpty()) return "";
|
||||||
|
|
||||||
return meta.select("li").first().text();
|
return cachedUploadDate = li.first().text();
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
throw new ParsingException("Could not get upload date", e);
|
throw new ParsingException("Could not get upload date", e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Calendar getUploadDate() throws ParsingException {
|
||||||
|
String textualUploadDate = getTextualUploadDate();
|
||||||
|
if (timeAgoParser != null
|
||||||
|
&& textualUploadDate != null && !"".equals(textualUploadDate)) {
|
||||||
|
return timeAgoParser.parse(textualUploadDate);
|
||||||
|
} else {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public long getViewCount() throws ParsingException {
|
public long getViewCount() throws ParsingException {
|
||||||
String input;
|
String input;
|
||||||
|
|
|
@ -35,12 +35,15 @@ import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingH
|
||||||
import org.schabi.newpipe.extractor.stream.StreamInfoItem;
|
import org.schabi.newpipe.extractor.stream.StreamInfoItem;
|
||||||
import org.schabi.newpipe.extractor.stream.StreamInfoItemsCollector;
|
import org.schabi.newpipe.extractor.stream.StreamInfoItemsCollector;
|
||||||
import org.schabi.newpipe.extractor.utils.Localization;
|
import org.schabi.newpipe.extractor.utils.Localization;
|
||||||
|
import org.schabi.newpipe.extractor.stream.TimeAgoParser;
|
||||||
|
|
||||||
import javax.annotation.Nonnull;
|
import javax.annotation.Nonnull;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
public class YoutubeTrendingExtractor extends KioskExtractor<StreamInfoItem> {
|
public class YoutubeTrendingExtractor extends KioskExtractor<StreamInfoItem> {
|
||||||
|
|
||||||
|
private final TimeAgoParser timeAgoParser = getService().getTimeAgoParser();
|
||||||
|
|
||||||
private Document doc;
|
private Document doc;
|
||||||
|
|
||||||
public YoutubeTrendingExtractor(StreamingService service,
|
public YoutubeTrendingExtractor(StreamingService service,
|
||||||
|
@ -93,7 +96,7 @@ public class YoutubeTrendingExtractor extends KioskExtractor<StreamInfoItem> {
|
||||||
for(Element ul : uls) {
|
for(Element ul : uls) {
|
||||||
for(final Element li : ul.children()) {
|
for(final Element li : ul.children()) {
|
||||||
final Element el = li.select("div[class*=\"yt-lockup-dismissable\"]").first();
|
final Element el = li.select("div[class*=\"yt-lockup-dismissable\"]").first();
|
||||||
collector.commit(new YoutubeStreamInfoItemExtractor(li) {
|
collector.commit(new YoutubeStreamInfoItemExtractor(li, timeAgoParser) {
|
||||||
@Override
|
@Override
|
||||||
public String getUrl() throws ParsingException {
|
public String getUrl() throws ParsingException {
|
||||||
try {
|
try {
|
||||||
|
|
|
@ -22,6 +22,8 @@ package org.schabi.newpipe.extractor.stream;
|
||||||
|
|
||||||
import org.schabi.newpipe.extractor.InfoItem;
|
import org.schabi.newpipe.extractor.InfoItem;
|
||||||
|
|
||||||
|
import java.util.Calendar;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Info object for previews of unopened videos, eg search results, related videos
|
* Info object for previews of unopened videos, eg search results, related videos
|
||||||
*/
|
*/
|
||||||
|
@ -29,7 +31,8 @@ public class StreamInfoItem extends InfoItem {
|
||||||
private final StreamType streamType;
|
private final StreamType streamType;
|
||||||
|
|
||||||
private String uploaderName;
|
private String uploaderName;
|
||||||
private String uploadDate;
|
private String textualUploadDate;
|
||||||
|
private Calendar uploadDate;
|
||||||
private long viewCount = -1;
|
private long viewCount = -1;
|
||||||
private long duration = -1;
|
private long duration = -1;
|
||||||
|
|
||||||
|
@ -52,14 +55,6 @@ public class StreamInfoItem extends InfoItem {
|
||||||
this.uploaderName = uploader_name;
|
this.uploaderName = uploader_name;
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getUploadDate() {
|
|
||||||
return uploadDate;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setUploadDate(String upload_date) {
|
|
||||||
this.uploadDate = upload_date;
|
|
||||||
}
|
|
||||||
|
|
||||||
public long getViewCount() {
|
public long getViewCount() {
|
||||||
return viewCount;
|
return viewCount;
|
||||||
}
|
}
|
||||||
|
@ -84,12 +79,36 @@ public class StreamInfoItem extends InfoItem {
|
||||||
this.uploaderUrl = uploaderUrl;
|
this.uploaderUrl = uploaderUrl;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return The original textual upload date as returned by the streaming service.
|
||||||
|
* @see #getUploadDate()
|
||||||
|
*/
|
||||||
|
public String getTextualUploadDate() {
|
||||||
|
return textualUploadDate;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setTextualUploadDate(String upload_date) {
|
||||||
|
this.textualUploadDate = upload_date;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return The (approximated) date and time this item was uploaded or {@code null}.
|
||||||
|
* @see #getTextualUploadDate()
|
||||||
|
*/
|
||||||
|
public Calendar getUploadDate() {
|
||||||
|
return uploadDate;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setUploadDate(Calendar uploadDate) {
|
||||||
|
this.uploadDate = uploadDate;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String toString() {
|
public String toString() {
|
||||||
return "StreamInfoItem{" +
|
return "StreamInfoItem{" +
|
||||||
"streamType=" + streamType +
|
"streamType=" + streamType +
|
||||||
", uploaderName='" + uploaderName + '\'' +
|
", uploaderName='" + uploaderName + '\'' +
|
||||||
", uploadDate='" + uploadDate + '\'' +
|
", textualUploadDate='" + textualUploadDate + '\'' +
|
||||||
", viewCount=" + viewCount +
|
", viewCount=" + viewCount +
|
||||||
", duration=" + duration +
|
", duration=" + duration +
|
||||||
", uploaderUrl='" + uploaderUrl + '\'' +
|
", uploaderUrl='" + uploaderUrl + '\'' +
|
||||||
|
|
|
@ -3,6 +3,8 @@ package org.schabi.newpipe.extractor.stream;
|
||||||
import org.schabi.newpipe.extractor.InfoItemExtractor;
|
import org.schabi.newpipe.extractor.InfoItemExtractor;
|
||||||
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
||||||
|
|
||||||
|
import java.util.Calendar;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Created by Christian Schabesberger on 28.02.16.
|
* Created by Christian Schabesberger on 28.02.16.
|
||||||
*
|
*
|
||||||
|
@ -64,10 +66,30 @@ public interface StreamInfoItemExtractor extends InfoItemExtractor {
|
||||||
String getUploaderUrl() throws ParsingException;
|
String getUploaderUrl() throws ParsingException;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Extract the uploader name
|
* Extract the textual upload date of this item.
|
||||||
* @return the uploader name
|
* The original textual date provided by the service may be used if it is short;
|
||||||
* @throws ParsingException thrown if there is an error in the extraction
|
* otherwise the format "yyyy-MM-dd" or an locale specific version is preferred.
|
||||||
|
*
|
||||||
|
* @return The original textual upload date.
|
||||||
|
* @throws ParsingException if there is an error in the extraction
|
||||||
|
* @see #getUploadDate()
|
||||||
*/
|
*/
|
||||||
String getUploadDate() throws ParsingException;
|
String getTextualUploadDate() throws ParsingException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Extracts the upload date and time of this item and parses it.
|
||||||
|
* <p>
|
||||||
|
* If the service doesn't provide an exact time, an approximation can be returned.
|
||||||
|
* The approximation should be marked by setting seconds and milliseconds to zero.
|
||||||
|
* <br>
|
||||||
|
* If the service doesn't provide any date at all, then {@code null} should be returned.
|
||||||
|
* </p>
|
||||||
|
*
|
||||||
|
* @return The (approximated) date and time this item was uploaded or {@code null}.
|
||||||
|
* @throws ParsingException if there is an error in the extraction
|
||||||
|
* or the extracted date couldn't be parsed.
|
||||||
|
* @see #getTextualUploadDate()
|
||||||
|
*/
|
||||||
|
Calendar getUploadDate() throws ParsingException;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -61,10 +61,15 @@ public class StreamInfoItemsCollector extends InfoItemsCollector<StreamInfoItem,
|
||||||
addError(e);
|
addError(e);
|
||||||
}
|
}
|
||||||
try {
|
try {
|
||||||
resultItem.setUploadDate(extractor.getUploadDate());
|
resultItem.setTextualUploadDate(extractor.getTextualUploadDate());
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
addError(e);
|
addError(e);
|
||||||
}
|
}
|
||||||
|
try {
|
||||||
|
resultItem.setUploadDate(extractor.getUploadDate());
|
||||||
|
} catch (ParsingException e) {
|
||||||
|
addError(e);
|
||||||
|
}
|
||||||
try {
|
try {
|
||||||
resultItem.setViewCount(extractor.getViewCount());
|
resultItem.setViewCount(extractor.getViewCount());
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
|
|
|
@ -0,0 +1,158 @@
|
||||||
|
package org.schabi.newpipe.extractor.stream;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Created by wojcik.online on 2018-01-25.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
||||||
|
|
||||||
|
import java.util.Calendar;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.EnumMap;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A helper class that is meant to be used by services that need to parse upload dates in the
|
||||||
|
* format '2 days ago' or similar.
|
||||||
|
*/
|
||||||
|
public class TimeAgoParser {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A set of english phrases that are contained in the time units.
|
||||||
|
* (e.g. '7 minutes ago' contains 'min')
|
||||||
|
*/
|
||||||
|
public static Map<TimeAgoUnit, Collection<String>> DEFAULT_AGO_PHRASES =
|
||||||
|
new EnumMap<>(TimeAgoUnit.class);
|
||||||
|
|
||||||
|
private final Map<TimeAgoUnit, Collection<String>> agoPhrases;
|
||||||
|
|
||||||
|
private final Calendar consistentNow;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a helper to parse upload dates in the format '2 days ago'.
|
||||||
|
* <p>
|
||||||
|
* Instantiate a new {@link TimeAgoParser} every time you extract a new batch of items.
|
||||||
|
* </p>
|
||||||
|
* @param agoPhrases A set of phrases how to recognize the time units in a given language.
|
||||||
|
*/
|
||||||
|
public TimeAgoParser(Map<TimeAgoUnit, Collection<String>> agoPhrases) {
|
||||||
|
this.agoPhrases = agoPhrases;
|
||||||
|
consistentNow = Calendar.getInstance();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Parses a textual date in the format '2 days ago' into a Calendar representation.
|
||||||
|
* Beginning with days ago, marks the date as approximated by setting minutes, seconds
|
||||||
|
* and milliseconds to 0.
|
||||||
|
* @param textualDate The original date as provided by the streaming service
|
||||||
|
* @return The parsed (approximated) time
|
||||||
|
* @throws ParsingException if the time unit could not be recognized
|
||||||
|
*/
|
||||||
|
public Calendar parse(String textualDate) throws ParsingException {
|
||||||
|
int timeAgoAmount;
|
||||||
|
try {
|
||||||
|
timeAgoAmount = parseTimeAgoAmount(textualDate);
|
||||||
|
} catch (NumberFormatException e) {
|
||||||
|
// If there is no valid number in the textual date,
|
||||||
|
// assume it is 1 (as in 'a second ago').
|
||||||
|
timeAgoAmount = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
TimeAgoUnit timeAgoUnit = parseTimeAgoUnit(textualDate);
|
||||||
|
return getCalendar(timeAgoAmount, timeAgoUnit);
|
||||||
|
}
|
||||||
|
|
||||||
|
private int parseTimeAgoAmount(String textualDate) throws NumberFormatException {
|
||||||
|
String timeValueStr = textualDate.replaceAll("\\D+", "");
|
||||||
|
return Integer.parseInt(timeValueStr);
|
||||||
|
}
|
||||||
|
|
||||||
|
private TimeAgoUnit parseTimeAgoUnit(String textualDate) throws ParsingException {
|
||||||
|
for (TimeAgoUnit timeAgoUnit : agoPhrases.keySet()) {
|
||||||
|
for (String agoPhrase : agoPhrases.get(timeAgoUnit)) {
|
||||||
|
if (textualDate.toLowerCase().contains(agoPhrase.toLowerCase())){
|
||||||
|
return timeAgoUnit;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
throw new ParsingException("Unable to parse the date: " + textualDate);
|
||||||
|
}
|
||||||
|
|
||||||
|
private Calendar getCalendar(int timeAgoAmount, TimeAgoUnit timeAgoUnit) {
|
||||||
|
Calendar calendarTime = getNow();
|
||||||
|
|
||||||
|
switch (timeAgoUnit) {
|
||||||
|
case SECONDS:
|
||||||
|
calendarTime.add(Calendar.SECOND, -timeAgoAmount);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case MINUTES:
|
||||||
|
calendarTime.add(Calendar.MINUTE, -timeAgoAmount);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case HOURS:
|
||||||
|
calendarTime.add(Calendar.HOUR_OF_DAY, -timeAgoAmount);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case DAYS:
|
||||||
|
calendarTime.add(Calendar.DAY_OF_MONTH, -timeAgoAmount);
|
||||||
|
markApproximatedTime(calendarTime);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case WEEKS:
|
||||||
|
calendarTime.add(Calendar.WEEK_OF_YEAR, -timeAgoAmount);
|
||||||
|
markApproximatedTime(calendarTime);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case MONTHS:
|
||||||
|
calendarTime.add(Calendar.MONTH, -timeAgoAmount);
|
||||||
|
markApproximatedTime(calendarTime);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case YEARS:
|
||||||
|
calendarTime.add(Calendar.YEAR, -timeAgoAmount);
|
||||||
|
// Prevent `PrettyTime` from showing '12 months ago'.
|
||||||
|
calendarTime.add(Calendar.DAY_OF_MONTH, -1);
|
||||||
|
markApproximatedTime(calendarTime);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
return calendarTime;
|
||||||
|
}
|
||||||
|
|
||||||
|
private Calendar getNow() {
|
||||||
|
return (Calendar) consistentNow.clone();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Marks the time as approximated by setting minutes, seconds and milliseconds to 0.
|
||||||
|
* @param calendarTime Time to be marked as approximated
|
||||||
|
*/
|
||||||
|
private void markApproximatedTime(Calendar calendarTime) {
|
||||||
|
calendarTime.set(Calendar.MINUTE, 0);
|
||||||
|
calendarTime.set(Calendar.SECOND, 0);
|
||||||
|
calendarTime.set(Calendar.MILLISECOND, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
static {
|
||||||
|
DEFAULT_AGO_PHRASES.put(TimeAgoUnit.SECONDS, Collections.singleton("sec"));
|
||||||
|
DEFAULT_AGO_PHRASES.put(TimeAgoUnit.MINUTES, Collections.singleton("min"));
|
||||||
|
DEFAULT_AGO_PHRASES.put(TimeAgoUnit.HOURS, Collections.singleton("hour"));
|
||||||
|
DEFAULT_AGO_PHRASES.put(TimeAgoUnit.DAYS, Collections.singleton("day"));
|
||||||
|
DEFAULT_AGO_PHRASES.put(TimeAgoUnit.WEEKS, Collections.singleton("week"));
|
||||||
|
DEFAULT_AGO_PHRASES.put(TimeAgoUnit.MONTHS, Collections.singleton("month"));
|
||||||
|
DEFAULT_AGO_PHRASES.put(TimeAgoUnit.YEARS, Collections.singleton("year"));
|
||||||
|
}
|
||||||
|
|
||||||
|
public enum TimeAgoUnit {
|
||||||
|
SECONDS,
|
||||||
|
MINUTES,
|
||||||
|
HOURS,
|
||||||
|
DAYS,
|
||||||
|
WEEKS,
|
||||||
|
MONTHS,
|
||||||
|
YEARS,
|
||||||
|
}
|
||||||
|
}
|
|
@ -41,6 +41,7 @@ import static java.util.Collections.singletonList;
|
||||||
public class Downloader implements org.schabi.newpipe.extractor.Downloader {
|
public class Downloader implements org.schabi.newpipe.extractor.Downloader {
|
||||||
|
|
||||||
private static final String USER_AGENT = "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:43.0) Gecko/20100101 Firefox/43.0";
|
private static final String USER_AGENT = "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:43.0) Gecko/20100101 Firefox/43.0";
|
||||||
|
private static final String DEFAULT_HTTP_ACCEPT_LANGUAGE = "en";
|
||||||
private static String mCookies = "";
|
private static String mCookies = "";
|
||||||
|
|
||||||
private static Downloader instance = null;
|
private static Downloader instance = null;
|
||||||
|
@ -171,6 +172,7 @@ public class Downloader implements org.schabi.newpipe.extractor.Downloader {
|
||||||
URL url = new URL(siteUrl);
|
URL url = new URL(siteUrl);
|
||||||
HttpsURLConnection con = (HttpsURLConnection) url.openConnection();
|
HttpsURLConnection con = (HttpsURLConnection) url.openConnection();
|
||||||
// HttpsURLConnection con = NetCipher.getHttpsURLConnection(url);
|
// HttpsURLConnection con = NetCipher.getHttpsURLConnection(url);
|
||||||
|
con.setRequestProperty("Accept-Language", DEFAULT_HTTP_ACCEPT_LANGUAGE);
|
||||||
return dl(con);
|
return dl(con);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -4,6 +4,7 @@ import org.schabi.newpipe.extractor.InfoItem;
|
||||||
import org.schabi.newpipe.extractor.ListExtractor;
|
import org.schabi.newpipe.extractor.ListExtractor;
|
||||||
import org.schabi.newpipe.extractor.stream.StreamInfoItem;
|
import org.schabi.newpipe.extractor.stream.StreamInfoItem;
|
||||||
|
|
||||||
|
import java.util.Calendar;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
import static org.junit.Assert.*;
|
import static org.junit.Assert.*;
|
||||||
|
@ -27,6 +28,14 @@ public final class DefaultTests {
|
||||||
StreamInfoItem streamInfoItem = (StreamInfoItem) item;
|
StreamInfoItem streamInfoItem = (StreamInfoItem) item;
|
||||||
assertNotEmpty("Uploader name not set: " + item, streamInfoItem.getUploaderName());
|
assertNotEmpty("Uploader name not set: " + item, streamInfoItem.getUploaderName());
|
||||||
assertNotEmpty("Uploader url not set: " + item, streamInfoItem.getUploaderUrl());
|
assertNotEmpty("Uploader url not set: " + item, streamInfoItem.getUploaderUrl());
|
||||||
|
|
||||||
|
final String textualUploadDate = streamInfoItem.getTextualUploadDate();
|
||||||
|
if (textualUploadDate != null && !textualUploadDate.isEmpty()) {
|
||||||
|
final Calendar uploadDate = streamInfoItem.getUploadDate();
|
||||||
|
assertNotNull("No parsed upload date", uploadDate);
|
||||||
|
assertTrue("Upload date not in the past", uploadDate.before(Calendar.getInstance()));
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue