Move stuff from extractVideoPreviewInfo() into YoutubeStreamInfoItemExtractor and partially fix search
This commit is contained in:
parent
af49b3c487
commit
b88188d419
|
@ -4,6 +4,7 @@ import com.grack.nanojson.JsonArray;
|
|||
import com.grack.nanojson.JsonObject;
|
||||
import com.grack.nanojson.JsonParser;
|
||||
import com.grack.nanojson.JsonParserException;
|
||||
|
||||
import org.jsoup.Jsoup;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
|
@ -23,9 +24,10 @@ import org.schabi.newpipe.extractor.stream.StreamType;
|
|||
import org.schabi.newpipe.extractor.utils.Parser;
|
||||
import org.schabi.newpipe.extractor.utils.Utils;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import javax.annotation.Nonnull;
|
||||
import javax.annotation.Nullable;
|
||||
import java.io.IOException;
|
||||
|
||||
@SuppressWarnings("WeakerAccess")
|
||||
public class YoutubePlaylistExtractor extends PlaylistExtractor {
|
||||
|
@ -318,7 +320,7 @@ public class YoutubePlaylistExtractor extends PlaylistExtractor {
|
|||
}
|
||||
|
||||
@Override
|
||||
public String getTextualUploadDate() throws ParsingException {
|
||||
public String getTextualUploadDate() {
|
||||
return "";
|
||||
}
|
||||
|
||||
|
|
|
@ -1,5 +1,10 @@
|
|||
package org.schabi.newpipe.extractor.services.youtube.extractors;
|
||||
|
||||
import com.grack.nanojson.JsonArray;
|
||||
import com.grack.nanojson.JsonObject;
|
||||
import com.grack.nanojson.JsonParser;
|
||||
import com.grack.nanojson.JsonParserException;
|
||||
|
||||
import org.jsoup.Jsoup;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
|
@ -16,12 +21,13 @@ import org.schabi.newpipe.extractor.search.SearchExtractor;
|
|||
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper;
|
||||
import org.schabi.newpipe.extractor.utils.Parser;
|
||||
|
||||
import javax.annotation.Nonnull;
|
||||
import java.io.IOException;
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URL;
|
||||
|
||||
import javax.annotation.Nonnull;
|
||||
|
||||
/*
|
||||
* Created by Christian Schabesberger on 22.07.2018
|
||||
*
|
||||
|
@ -45,6 +51,7 @@ import java.net.URL;
|
|||
public class YoutubeSearchExtractor extends SearchExtractor {
|
||||
|
||||
private Document doc;
|
||||
private JsonObject ytInitialData;
|
||||
|
||||
public YoutubeSearchExtractor(StreamingService service, SearchQueryHandler linkHandler) {
|
||||
super(service, linkHandler);
|
||||
|
@ -55,6 +62,7 @@ public class YoutubeSearchExtractor extends SearchExtractor {
|
|||
final String url = getUrl();
|
||||
final Response response = downloader.get(url, getExtractorLocalization());
|
||||
doc = YoutubeParsingHelper.parseAndCheckPage(url, response);
|
||||
ytInitialData = getInitialData();
|
||||
}
|
||||
|
||||
@Nonnull
|
||||
|
@ -86,6 +94,7 @@ public class YoutubeSearchExtractor extends SearchExtractor {
|
|||
|
||||
@Override
|
||||
public InfoItemsPage<InfoItem> getPage(String pageUrl) throws IOException, ExtractionException {
|
||||
// TODO: Get extracting next pages working
|
||||
final String response = getDownloader().get(pageUrl, getExtractorLocalization()).responseBody();
|
||||
doc = Jsoup.parse(response, pageUrl);
|
||||
|
||||
|
@ -108,37 +117,33 @@ public class YoutubeSearchExtractor extends SearchExtractor {
|
|||
InfoItemsSearchCollector collector = getInfoItemSearchCollector();
|
||||
collector.reset();
|
||||
|
||||
Element list = doc.select("ol[class=\"item-section\"]").first();
|
||||
final TimeAgoParser timeAgoParser = getTimeAgoParser();
|
||||
|
||||
for (Element item : list.children()) {
|
||||
/* First we need to determine which kind of item we are working with.
|
||||
Youtube depicts five different kinds of items on its search result page. These are
|
||||
regular videos, playlists, channels, two types of video suggestions, and a "no video
|
||||
found" item. Since we only want videos, we need to filter out all the others.
|
||||
An example for this can be seen here:
|
||||
https://www.youtube.com/results?search_query=asdf&page=1
|
||||
JsonArray list = ytInitialData.getObject("contents").getObject("twoColumnSearchResultsRenderer")
|
||||
.getObject("primaryContents").getObject("sectionListRenderer").getArray("contents")
|
||||
.getObject(0).getObject("itemSectionRenderer").getArray("contents");
|
||||
|
||||
We already applied a filter to the url, so we don't need to care about channels and
|
||||
playlists now.
|
||||
*/
|
||||
|
||||
Element el;
|
||||
|
||||
if ((el = item.select("div[class*=\"search-message\"]").first()) != null) {
|
||||
throw new NothingFoundException(el.text());
|
||||
|
||||
// video item type
|
||||
} else if ((el = item.select("div[class*=\"yt-lockup-video\"]").first()) != null) {
|
||||
collector.commit(new YoutubeStreamInfoItemExtractor(el, timeAgoParser));
|
||||
} else if ((el = item.select("div[class*=\"yt-lockup-channel\"]").first()) != null) {
|
||||
collector.commit(new YoutubeChannelInfoItemExtractor(el));
|
||||
} else if ((el = item.select("div[class*=\"yt-lockup-playlist\"]").first()) != null &&
|
||||
item.select(".yt-pl-icon-mix").isEmpty()) {
|
||||
collector.commit(new YoutubePlaylistInfoItemExtractor(el));
|
||||
for (Object item : list) {
|
||||
if (((JsonObject) item).getObject("backgroundPromoRenderer") != null) {
|
||||
throw new NothingFoundException(((JsonObject) item).getObject("backgroundPromoRenderer")
|
||||
.getObject("bodyText").getArray("runs").getObject(0).getString("text"));
|
||||
} else if (((JsonObject) item).getObject("videoRenderer") != null) {
|
||||
collector.commit(new YoutubeStreamInfoItemExtractor(((JsonObject) item).getObject("videoRenderer"), timeAgoParser));
|
||||
} else if (((JsonObject) item).getObject("channelRenderer") != null) {
|
||||
// collector.commit(new YoutubeChannelInfoItemExtractor(((JsonObject) item).getObject("channelRenderer")));
|
||||
} else if (((JsonObject) item).getObject("playlistRenderer") != null) {
|
||||
// collector.commit(new YoutubePlaylistInfoItemExtractor(((JsonObject) item).getObject("playlistRenderer")));
|
||||
}
|
||||
}
|
||||
|
||||
return collector;
|
||||
}
|
||||
|
||||
private JsonObject getInitialData() throws ParsingException {
|
||||
try {
|
||||
String initialData = Parser.matchGroup1("window\\[\"ytInitialData\"\\]\\s*=\\s*(\\{.*?\\});", doc.toString());
|
||||
return JsonParser.object().from(initialData);
|
||||
} catch (JsonParserException | Parser.RegexException e) {
|
||||
throw new ParsingException("Could not get ytInitialData", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -4,10 +4,10 @@ import com.grack.nanojson.JsonArray;
|
|||
import com.grack.nanojson.JsonObject;
|
||||
import com.grack.nanojson.JsonParser;
|
||||
import com.grack.nanojson.JsonParserException;
|
||||
|
||||
import org.jsoup.Jsoup;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
import org.jsoup.select.Elements;
|
||||
import org.mozilla.javascript.Context;
|
||||
import org.mozilla.javascript.Function;
|
||||
import org.mozilla.javascript.ScriptableObject;
|
||||
|
@ -25,24 +25,38 @@ import org.schabi.newpipe.extractor.linkhandler.LinkHandler;
|
|||
import org.schabi.newpipe.extractor.localization.DateWrapper;
|
||||
import org.schabi.newpipe.extractor.localization.TimeAgoParser;
|
||||
import org.schabi.newpipe.extractor.services.youtube.ItagItem;
|
||||
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeChannelLinkHandlerFactory;
|
||||
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper;
|
||||
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeStreamLinkHandlerFactory;
|
||||
import org.schabi.newpipe.extractor.stream.*;
|
||||
import org.schabi.newpipe.extractor.stream.AudioStream;
|
||||
import org.schabi.newpipe.extractor.stream.Description;
|
||||
import org.schabi.newpipe.extractor.stream.Frameset;
|
||||
import org.schabi.newpipe.extractor.stream.Stream;
|
||||
import org.schabi.newpipe.extractor.stream.StreamExtractor;
|
||||
import org.schabi.newpipe.extractor.stream.StreamInfoItem;
|
||||
import org.schabi.newpipe.extractor.stream.StreamInfoItemsCollector;
|
||||
import org.schabi.newpipe.extractor.stream.StreamType;
|
||||
import org.schabi.newpipe.extractor.stream.SubtitlesStream;
|
||||
import org.schabi.newpipe.extractor.stream.VideoStream;
|
||||
import org.schabi.newpipe.extractor.utils.JsonUtils;
|
||||
import org.schabi.newpipe.extractor.utils.Parser;
|
||||
import org.schabi.newpipe.extractor.utils.Utils;
|
||||
|
||||
import javax.annotation.Nonnull;
|
||||
import javax.annotation.Nullable;
|
||||
import java.io.IOException;
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URL;
|
||||
import java.util.*;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import javax.annotation.Nonnull;
|
||||
import javax.annotation.Nullable;
|
||||
|
||||
/*
|
||||
* Created by Christian Schabesberger on 06.08.15.
|
||||
*
|
||||
|
@ -663,7 +677,7 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
|||
final TimeAgoParser timeAgoParser = getTimeAgoParser();
|
||||
StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId());
|
||||
|
||||
collector.commit(extractVideoPreviewInfo(videoInfo, timeAgoParser));
|
||||
collector.commit(new YoutubeStreamInfoItemExtractor(videoInfo, timeAgoParser));
|
||||
return collector.getItems().get(0);
|
||||
} catch (Exception e) {
|
||||
throw new ParsingException("Could not get next video", e);
|
||||
|
@ -684,7 +698,7 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
|||
for (Object ul : results) {
|
||||
final JsonObject videoInfo = ((JsonObject) ul).getObject("compactVideoRenderer");
|
||||
|
||||
if (videoInfo != null) collector.commit(extractVideoPreviewInfo(videoInfo, timeAgoParser));
|
||||
if (videoInfo != null) collector.commit(new YoutubeStreamInfoItemExtractor(videoInfo, timeAgoParser));
|
||||
}
|
||||
return collector;
|
||||
} catch (Exception e) {
|
||||
|
@ -1064,123 +1078,6 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
|||
return urlAndItags;
|
||||
}
|
||||
|
||||
/**
|
||||
* Provides information about links to other videos on the video page, such as related videos.
|
||||
* This is encapsulated in a StreamInfoItem object, which is a subset of the fields in a full StreamInfo.
|
||||
*/
|
||||
private StreamInfoItemExtractor extractVideoPreviewInfo(final JsonObject videoInfo, final TimeAgoParser timeAgoParser) {
|
||||
return new YoutubeStreamInfoItemExtractor(videoInfo, timeAgoParser) {
|
||||
@Override
|
||||
public StreamType getStreamType() {
|
||||
try {
|
||||
if (videoInfo.getArray("badges").getObject(0).getObject("metadataBadgeRenderer").getString("label").equals("LIVE NOW")) {
|
||||
return StreamType.LIVE_STREAM;
|
||||
}
|
||||
} catch (Exception ignored) {}
|
||||
return StreamType.VIDEO_STREAM;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isAd() {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getUrl() throws ParsingException {
|
||||
try {
|
||||
String videoId = videoInfo.getString("videoId");
|
||||
return YoutubeStreamLinkHandlerFactory.getInstance().getUrl(videoId);
|
||||
} catch (Exception e) {
|
||||
throw new ParsingException("Could not get url", e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getName() throws ParsingException {
|
||||
String name = null;
|
||||
try {
|
||||
name = videoInfo.getObject("title").getString("simpleText");
|
||||
} catch (Exception ignored) {}
|
||||
if (name != null && !name.isEmpty()) return name;
|
||||
throw new ParsingException("Could not get title");
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getDuration() throws ParsingException {
|
||||
try {
|
||||
if (getStreamType() == StreamType.LIVE_STREAM) return -1;
|
||||
return YoutubeParsingHelper.parseDurationString(videoInfo.getObject("lengthText").getString("simpleText"));
|
||||
} catch (Exception e) {
|
||||
throw new ParsingException("Could not get duration", e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getUploaderUrl() throws ParsingException {
|
||||
try {
|
||||
String id = videoInfo.getObject("longBylineText").getArray("runs")
|
||||
.getObject(0).getObject("navigationEndpoint")
|
||||
.getObject("browseEndpoint").getString("browseId");
|
||||
if (id == null || id.isEmpty()) {
|
||||
throw new IllegalArgumentException("is empty");
|
||||
}
|
||||
return YoutubeChannelLinkHandlerFactory.getInstance().getUrl(id);
|
||||
} catch (Exception e) {
|
||||
throw new ParsingException("Could not get uploader url");
|
||||
}
|
||||
}
|
||||
|
||||
@Nullable
|
||||
@Override
|
||||
public String getTextualUploadDate() {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Nullable
|
||||
@Override
|
||||
public DateWrapper getUploadDate() {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getViewCount() throws ParsingException {
|
||||
try {
|
||||
String viewCount;
|
||||
if (getStreamType() == StreamType.LIVE_STREAM) {
|
||||
viewCount = videoInfo.getObject("viewCountText")
|
||||
.getArray("runs").getObject(0).getString("text");
|
||||
} else {
|
||||
viewCount = videoInfo.getObject("viewCountText").getString("simpleText");
|
||||
}
|
||||
if (viewCount.equals("Recommended for you")) return -1;
|
||||
return Long.parseLong(Utils.removeNonDigitCharacters(viewCount));
|
||||
} catch (Exception e) {
|
||||
throw new ParsingException("Could not get view count", e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getUploaderName() throws ParsingException {
|
||||
try {
|
||||
return videoInfo.getObject("longBylineText").getArray("runs")
|
||||
.getObject(0).getString("text");
|
||||
} catch (Exception e) {
|
||||
throw new ParsingException("Could not get uploader name", e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getThumbnailUrl() throws ParsingException {
|
||||
try {
|
||||
return videoInfo.getObject("thumbnail").getArray("thumbnails")
|
||||
.getObject(0).getString("url");
|
||||
} catch (Exception e) {
|
||||
throw new ParsingException("Could not get thumbnail url", e);
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Nonnull
|
||||
@Override
|
||||
public List<Frameset> getFrames() throws ExtractionException {
|
||||
|
|
|
@ -2,19 +2,17 @@ package org.schabi.newpipe.extractor.services.youtube.extractors;
|
|||
|
||||
import com.grack.nanojson.JsonObject;
|
||||
import org.jsoup.nodes.Element;
|
||||
import org.jsoup.select.Elements;
|
||||
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
||||
import org.schabi.newpipe.extractor.localization.DateWrapper;
|
||||
import org.schabi.newpipe.extractor.localization.TimeAgoParser;
|
||||
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeChannelLinkHandlerFactory;
|
||||
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper;
|
||||
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeStreamLinkHandlerFactory;
|
||||
import org.schabi.newpipe.extractor.stream.StreamInfoItemExtractor;
|
||||
import org.schabi.newpipe.extractor.stream.StreamType;
|
||||
import org.schabi.newpipe.extractor.utils.Utils;
|
||||
|
||||
import javax.annotation.Nullable;
|
||||
import java.text.SimpleDateFormat;
|
||||
import java.util.Calendar;
|
||||
import java.util.Date;
|
||||
|
||||
/*
|
||||
* Copyright (C) Christian Schabesberger 2016 <chris.schabesberger@mailbox.org>
|
||||
|
@ -36,20 +34,10 @@ import java.util.Date;
|
|||
|
||||
public class YoutubeStreamInfoItemExtractor implements StreamInfoItemExtractor {
|
||||
|
||||
private JsonObject videoInfoItem;
|
||||
private Element item;
|
||||
private JsonObject videoInfo;
|
||||
private final TimeAgoParser timeAgoParser;
|
||||
|
||||
private String cachedUploadDate;
|
||||
|
||||
/**
|
||||
* Creates an extractor of StreamInfoItems from a YouTube page.
|
||||
*
|
||||
* @param item The page element
|
||||
* @param timeAgoParser A parser of the textual dates or {@code null}.
|
||||
*/
|
||||
public YoutubeStreamInfoItemExtractor(Element item, @Nullable TimeAgoParser timeAgoParser) {
|
||||
this.item = item;
|
||||
public YoutubeStreamInfoItemExtractor(Element a, @Nullable TimeAgoParser timeAgoParser) {
|
||||
this.timeAgoParser = timeAgoParser;
|
||||
}
|
||||
|
||||
|
@ -60,251 +48,123 @@ public class YoutubeStreamInfoItemExtractor implements StreamInfoItemExtractor {
|
|||
* @param timeAgoParser A parser of the textual dates or {@code null}.
|
||||
*/
|
||||
public YoutubeStreamInfoItemExtractor(JsonObject videoInfoItem, @Nullable TimeAgoParser timeAgoParser) {
|
||||
this.videoInfoItem = videoInfoItem;
|
||||
this.videoInfo = videoInfoItem;
|
||||
this.timeAgoParser = timeAgoParser;
|
||||
}
|
||||
|
||||
@Override
|
||||
public StreamType getStreamType() throws ParsingException {
|
||||
if (isLiveStream(item)) {
|
||||
return StreamType.LIVE_STREAM;
|
||||
} else {
|
||||
return StreamType.VIDEO_STREAM;
|
||||
}
|
||||
public StreamType getStreamType() {
|
||||
try {
|
||||
if (videoInfo.getArray("badges").getObject(0).getObject("metadataBadgeRenderer").getString("label").equals("LIVE NOW")) {
|
||||
return StreamType.LIVE_STREAM;
|
||||
}
|
||||
} catch (Exception ignored) {}
|
||||
return StreamType.VIDEO_STREAM;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isAd() throws ParsingException {
|
||||
return !item.select("span[class*=\"icon-not-available\"]").isEmpty()
|
||||
|| !item.select("span[class*=\"yt-badge-ad\"]").isEmpty()
|
||||
|| isPremiumVideo();
|
||||
}
|
||||
|
||||
private boolean isPremiumVideo() {
|
||||
Element premiumSpan = item.select("span[class=\"standalone-collection-badge-renderer-red-text\"]").first();
|
||||
if (premiumSpan == null) return false;
|
||||
|
||||
// if this span has text it most likely says ("Free Video") so we can play this
|
||||
if (premiumSpan.hasText()) return false;
|
||||
return true;
|
||||
public boolean isAd() {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getUrl() throws ParsingException {
|
||||
try {
|
||||
Element el = item.select("div[class*=\"yt-lockup-video\"]").first();
|
||||
Element dl = el.select("h3").first().select("a").first();
|
||||
return dl.attr("abs:href");
|
||||
String videoId = videoInfo.getString("videoId");
|
||||
return YoutubeStreamLinkHandlerFactory.getInstance().getUrl(videoId);
|
||||
} catch (Exception e) {
|
||||
throw new ParsingException("Could not get web page url for the video", e);
|
||||
throw new ParsingException("Could not get url", e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getName() throws ParsingException {
|
||||
String name = null;
|
||||
try {
|
||||
Element el = item.select("div[class*=\"yt-lockup-video\"]").first();
|
||||
Element dl = el.select("h3").first().select("a").first();
|
||||
return dl.text();
|
||||
} catch (Exception e) {
|
||||
throw new ParsingException("Could not get title", e);
|
||||
name = videoInfo.getObject("title").getString("simpleText");
|
||||
} catch (Exception ignored) {}
|
||||
if (name == null) {
|
||||
try {
|
||||
name = videoInfo.getObject("title").getArray("runs").getObject(0).getString("text");
|
||||
} catch (Exception ignored) {}
|
||||
}
|
||||
if (name != null && !name.isEmpty()) return name;
|
||||
throw new ParsingException("Could not get name");
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getDuration() throws ParsingException {
|
||||
try {
|
||||
if (getStreamType() == StreamType.LIVE_STREAM) return -1;
|
||||
|
||||
final Element duration = item.select("span[class*=\"video-time\"]").first();
|
||||
// apparently on youtube, video-time element will not show up if the video has a duration of 00:00
|
||||
// see: https://www.youtube.com/results?sp=EgIQAVAU&q=asdfgf
|
||||
return duration == null ? 0 : YoutubeParsingHelper.parseDurationString(duration.text());
|
||||
return YoutubeParsingHelper.parseDurationString(videoInfo.getObject("lengthText").getString("simpleText"));
|
||||
} catch (Exception e) {
|
||||
throw new ParsingException("Could not get Duration: " + getUrl(), e);
|
||||
throw new ParsingException("Could not get duration", e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getUploaderName() throws ParsingException {
|
||||
try {
|
||||
return item.select("div[class=\"yt-lockup-byline\"]").first()
|
||||
.select("a").first()
|
||||
.text();
|
||||
return videoInfo.getObject("longBylineText").getArray("runs")
|
||||
.getObject(0).getString("text");
|
||||
} catch (Exception e) {
|
||||
throw new ParsingException("Could not get uploader", e);
|
||||
throw new ParsingException("Could not get uploader name", e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getUploaderUrl() throws ParsingException {
|
||||
// this url is not always in the form "/channel/..."
|
||||
// sometimes Youtube provides urls in the from "/user/..."
|
||||
try {
|
||||
try {
|
||||
return item.select("div[class=\"yt-lockup-byline\"]").first()
|
||||
.select("a").first()
|
||||
.attr("abs:href");
|
||||
} catch (Exception e){}
|
||||
|
||||
// try this if the first didn't work
|
||||
return item.select("span[class=\"title\"")
|
||||
.text().split(" - ")[0];
|
||||
} catch (Exception e) {
|
||||
System.out.println(item.html());
|
||||
throw new ParsingException("Could not get uploader url", e);
|
||||
}
|
||||
}
|
||||
|
||||
@Nullable
|
||||
@Override
|
||||
public String getTextualUploadDate() throws ParsingException {
|
||||
if (getStreamType().equals(StreamType.LIVE_STREAM)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (cachedUploadDate != null) {
|
||||
return cachedUploadDate;
|
||||
}
|
||||
|
||||
try {
|
||||
if (isVideoReminder()) {
|
||||
final Calendar calendar = getDateFromReminder();
|
||||
if (calendar != null) {
|
||||
return cachedUploadDate = new SimpleDateFormat("yyyy-MM-dd HH:mm")
|
||||
.format(calendar.getTime());
|
||||
}
|
||||
String id = videoInfo.getObject("longBylineText").getArray("runs")
|
||||
.getObject(0).getObject("navigationEndpoint")
|
||||
.getObject("browseEndpoint").getString("browseId");
|
||||
if (id == null || id.isEmpty()) {
|
||||
throw new IllegalArgumentException("is empty");
|
||||
}
|
||||
|
||||
|
||||
Element meta = item.select("div[class=\"yt-lockup-meta\"]").first();
|
||||
if (meta == null) return "";
|
||||
|
||||
final Elements li = meta.select("li");
|
||||
if (li.isEmpty()) return "";
|
||||
|
||||
return cachedUploadDate = li.first().text();
|
||||
return YoutubeChannelLinkHandlerFactory.getInstance().getUrl(id);
|
||||
} catch (Exception e) {
|
||||
throw new ParsingException("Could not get upload date", e);
|
||||
throw new ParsingException("Could not get uploader url");
|
||||
}
|
||||
}
|
||||
|
||||
@Nullable
|
||||
@Override
|
||||
public DateWrapper getUploadDate() throws ParsingException {
|
||||
if (getStreamType().equals(StreamType.LIVE_STREAM)) {
|
||||
return null;
|
||||
}
|
||||
public String getTextualUploadDate() {
|
||||
// TODO: Get upload date in case of a videoRenderer (not available in case of a compactVideoRenderer)
|
||||
return null;
|
||||
}
|
||||
|
||||
if (isVideoReminder()) {
|
||||
return new DateWrapper(getDateFromReminder());
|
||||
}
|
||||
|
||||
String textualUploadDate = getTextualUploadDate();
|
||||
if (timeAgoParser != null && textualUploadDate != null && !textualUploadDate.isEmpty()) {
|
||||
return timeAgoParser.parse(textualUploadDate);
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
@Nullable
|
||||
@Override
|
||||
public DateWrapper getUploadDate() {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getViewCount() throws ParsingException {
|
||||
String input;
|
||||
|
||||
final Element spanViewCount = item.select("span.view-count").first();
|
||||
if (spanViewCount != null) {
|
||||
input = spanViewCount.text();
|
||||
|
||||
} else if (getStreamType().equals(StreamType.LIVE_STREAM)) {
|
||||
Element meta = item.select("ul.yt-lockup-meta-info").first();
|
||||
if (meta == null) return 0;
|
||||
|
||||
final Elements li = meta.select("li");
|
||||
if (li.isEmpty()) return 0;
|
||||
|
||||
input = li.first().text();
|
||||
} else {
|
||||
try {
|
||||
Element meta = item.select("div.yt-lockup-meta").first();
|
||||
if (meta == null) return -1;
|
||||
|
||||
// This case can happen if google releases a special video
|
||||
if (meta.select("li").size() < 2) return -1;
|
||||
|
||||
input = meta.select("li").get(1).text();
|
||||
} catch (IndexOutOfBoundsException e) {
|
||||
throw new ParsingException("Could not parse yt-lockup-meta although available: " + getUrl(), e);
|
||||
}
|
||||
}
|
||||
|
||||
if (input == null) {
|
||||
throw new ParsingException("Input is null");
|
||||
}
|
||||
|
||||
try {
|
||||
|
||||
return Long.parseLong(Utils.removeNonDigitCharacters(input));
|
||||
} catch (NumberFormatException e) {
|
||||
// if this happens the video probably has no views
|
||||
if (!input.isEmpty()) {
|
||||
return 0;
|
||||
String viewCount;
|
||||
if (getStreamType() == StreamType.LIVE_STREAM) {
|
||||
viewCount = videoInfo.getObject("viewCountText")
|
||||
.getArray("runs").getObject(0).getString("text");
|
||||
} else {
|
||||
viewCount = videoInfo.getObject("viewCountText").getString("simpleText");
|
||||
}
|
||||
|
||||
throw new ParsingException("Could not handle input: " + input, e);
|
||||
if (viewCount.equals("Recommended for you")) return -1;
|
||||
return Long.parseLong(Utils.removeNonDigitCharacters(viewCount));
|
||||
} catch (Exception e) {
|
||||
throw new ParsingException("Could not get view count", e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getThumbnailUrl() throws ParsingException {
|
||||
try {
|
||||
String url;
|
||||
Element te = item.select("div[class=\"yt-thumb video-thumb\"]").first()
|
||||
.select("img").first();
|
||||
url = te.attr("abs:src");
|
||||
// Sometimes youtube sends links to gif files which somehow seem to not exist
|
||||
// anymore. Items with such gif also offer a secondary image source. So we are going
|
||||
// to use that if we've caught such an item.
|
||||
if (url.contains(".gif")) {
|
||||
url = te.attr("abs:data-thumb");
|
||||
}
|
||||
return url;
|
||||
// TODO: Don't simply get the first item, but look at all thumbnails and their resolution
|
||||
return videoInfo.getObject("thumbnail").getArray("thumbnails")
|
||||
.getObject(0).getString("url");
|
||||
} catch (Exception e) {
|
||||
throw new ParsingException("Could not get thumbnail url", e);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private boolean isVideoReminder() {
|
||||
return !item.select("span.yt-uix-livereminder").isEmpty();
|
||||
}
|
||||
|
||||
private Calendar getDateFromReminder() throws ParsingException {
|
||||
final Element timeFuture = item.select("span.yt-badge.localized-date").first();
|
||||
|
||||
if (timeFuture == null) {
|
||||
throw new ParsingException("Span timeFuture is null");
|
||||
}
|
||||
|
||||
final String timestamp = timeFuture.attr("data-timestamp");
|
||||
if (!timestamp.isEmpty()) {
|
||||
try {
|
||||
final Calendar calendar = Calendar.getInstance();
|
||||
calendar.setTime(new Date(Long.parseLong(timestamp) * 1000L));
|
||||
return calendar;
|
||||
} catch (Exception e) {
|
||||
throw new ParsingException("Could not parse = \"" + timestamp + "\"");
|
||||
}
|
||||
}
|
||||
|
||||
throw new ParsingException("Could not parse date from reminder element: \"" + timeFuture + "\"");
|
||||
}
|
||||
|
||||
/**
|
||||
* Generic method that checks if the element contains any clues that it's a livestream item
|
||||
*/
|
||||
protected static boolean isLiveStream(Element item) {
|
||||
return !item.select("span[class*=\"yt-badge-live\"]").isEmpty()
|
||||
|| !item.select("span[class*=\"video-time-overlay-live\"]").isEmpty();
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue