Fix bugs and improve InfoItemExtractors

- Improve livestream detection
This commit is contained in:
Mauricio Colli 2017-08-10 14:50:59 -03:00
parent 5bf2e95d7b
commit c4f521fbb4
15 changed files with 253 additions and 490 deletions

View File

@ -48,7 +48,7 @@ public class SoundcloudPlaylistExtractor extends PlaylistExtractor {
@Override
public String getPlaylistName() {
return playlist.getString("title");
return playlist.optString("title");
}
@Override

View File

@ -1,6 +1,5 @@
package org.schabi.newpipe.extractor.services.soundcloud;
import org.json.JSONArray;
import org.json.JSONObject;
import org.schabi.newpipe.extractor.Downloader;
import org.schabi.newpipe.extractor.MediaFormat;
@ -27,8 +26,9 @@ public class SoundcloudStreamExtractor extends StreamExtractor {
public void fetchPage() throws IOException, ExtractionException {
track = SoundcloudParsingHelper.resolveFor(getOriginalUrl());
if (!track.getString("policy").equals("ALLOW") && !track.getString("policy").equals("MONETIZE")) {
throw new ContentNotAvailableException("Content not available: policy " + track.getString("policy"));
String policy = track.getString("policy");
if (!policy.equals("ALLOW") && !policy.equals("MONETIZE")) {
throw new ContentNotAvailableException("Content not available: policy " + policy);
}
}
@ -48,12 +48,12 @@ public class SoundcloudStreamExtractor extends StreamExtractor {
@Override
public String getTitle() {
return track.getString("title");
return track.optString("title");
}
@Override
public String getDescription() {
return track.getString("description");
return track.optString("description");
}
@Override
@ -62,8 +62,23 @@ public class SoundcloudStreamExtractor extends StreamExtractor {
}
@Override
public int getLength() {
return track.getInt("duration") / 1000;
public String getUploaderUrl() {
return track.getJSONObject("user").getString("permalink_url");
}
@Override
public String getUploaderAvatarUrl() {
return track.getJSONObject("user").optString("avatar_url");
}
@Override
public String getThumbnailUrl() {
return track.optString("artwork_url");
}
@Override
public long getLength() {
return track.getLong("duration") / 1000L;
}
@Override
@ -76,16 +91,6 @@ public class SoundcloudStreamExtractor extends StreamExtractor {
return SoundcloudParsingHelper.toDateString(track.getString("created_at"));
}
@Override
public String getThumbnailUrl() {
return track.optString("artwork_url");
}
@Override
public String getUploaderAvatarUrl() {
return track.getJSONObject("user").getString("avatar_url");
}
@Override
public String getDashMpdUrl() {
return null;
@ -171,44 +176,31 @@ public class SoundcloudStreamExtractor extends StreamExtractor {
}
@Override
public int getLikeCount() {
return track.getInt("likes_count");
public long getLikeCount() {
return track.getLong("likes_count");
}
@Override
public int getDislikeCount() {
public long getDislikeCount() {
return 0;
}
@Override
public StreamInfoItemExtractor getNextVideo() throws IOException, ExtractionException {
public StreamInfoItem getNextVideo() throws IOException, ExtractionException {
return null;
}
@Override
public StreamInfoItemCollector getRelatedVideos() throws IOException, ExtractionException {
StreamInfoItemCollector collector = new StreamInfoItemCollector(getServiceId());
Downloader dl = NewPipe.getDownloader();
String apiUrl = "https://api-v2.soundcloud.com/tracks/" + getId() + "/related"
+ "?client_id=" + SoundcloudParsingHelper.clientId();
String response = dl.download(apiUrl);
JSONObject responseObject = new JSONObject(response);
JSONArray responseCollection = responseObject.getJSONArray("collection");
for (int i = 0; i < responseCollection.length(); i++) {
JSONObject relatedVideo = responseCollection.getJSONObject(i);
collector.commit(new SoundcloudStreamInfoItemExtractor(relatedVideo));
}
SoundcloudParsingHelper.getStreamsFromApi(collector, apiUrl);
return collector;
}
@Override
public String getUploaderUrl() {
return track.getJSONObject("user").getString("permalink_url");
}
@Override
public StreamType getStreamType() {
return StreamType.AUDIO_STREAM;

View File

@ -53,7 +53,7 @@ public class SoundcloudUserExtractor extends UserExtractor {
@Override
public String getAvatarUrl() {
return user.getString("avatar_url");
return user.optString("avatar_url");
}
@Override
@ -67,7 +67,7 @@ public class SoundcloudUserExtractor extends UserExtractor {
@Override
public long getSubscriberCount() {
return user.getLong("followers_count");
return user.optLong("followers_count", 0L);
}
@Override
@ -102,6 +102,6 @@ public class SoundcloudUserExtractor extends UserExtractor {
@Override
public String getDescription() throws ParsingException {
return user.getString("description");
return user.optString("description");
}
}

View File

@ -27,7 +27,7 @@ public class SoundcloudUserInfoItemExtractor implements UserInfoItemExtractor {
@Override
public long getSubscriberCount() {
return searchResult.getLong("followers_count");
return searchResult.optLong("followers_count", 0L);
}
@Override

View File

@ -14,7 +14,6 @@ import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.exceptions.ReCaptchaException;
import org.schabi.newpipe.extractor.playlist.PlaylistExtractor;
import org.schabi.newpipe.extractor.stream.StreamInfoItemCollector;
import org.schabi.newpipe.extractor.stream.StreamInfoItemExtractor;
import org.schabi.newpipe.extractor.stream.StreamType;
import org.schabi.newpipe.extractor.utils.Parser;
import org.schabi.newpipe.extractor.utils.Utils;
@ -199,10 +198,10 @@ public class YoutubePlaylistExtractor extends PlaylistExtractor {
final UrlIdHandler streamUrlIdHandler = getService().getStreamUrlIdHandler();
for (final Element li : element.children()) {
collector.commit(new StreamInfoItemExtractor() {
collector.commit(new YoutubeStreamInfoItemExtractor(li) {
@Override
public StreamType getStreamType() throws ParsingException {
return StreamType.VIDEO_STREAM;
public boolean isAd() throws ParsingException {
return false;
}
@Override
@ -226,15 +225,18 @@ public class YoutubePlaylistExtractor extends PlaylistExtractor {
@Override
public int getDuration() throws ParsingException {
try {
return YoutubeParsingHelper.parseDurationString(
li.select("div[class=\"timestamp\"] span").first().text().trim());
} catch (Exception e) {
if (isLiveStream(li)) {
// -1 for no duration
if (getStreamType() == StreamType.LIVE_STREAM) return -1;
Element first = li.select("div[class=\"timestamp\"] span").first();
if (first == null) {
// Video unavailable (private, deleted, etc.), this is a thing that happens specifically with playlists,
// because in other cases, those videos don't even show up
return -1;
} else {
throw new ParsingException("Could not get Duration: " + getTitle(), e);
}
return YoutubeParsingHelper.parseDurationString(first.text());
} catch (Exception e) {
throw new ParsingException("Could not get Duration: " + getTitle(), e);
}
}
@ -261,24 +263,6 @@ public class YoutubePlaylistExtractor extends PlaylistExtractor {
throw new ParsingException("Could not get thumbnail url", e);
}
}
@Override
public boolean isAd() throws ParsingException {
return false;
}
private boolean isLiveStream(Element item) {
Element bla = item.select("span[class*=\"yt-badge-live\"]").first();
if (bla == null) {
// sometimes livestreams dont have badges but sill are live streams
// if video time is not available we most likly have an offline livestream
if (item.select("span[class*=\"video-time\"]").first() == null) {
return true;
}
}
return bla != null;
}
});
}
}

View File

@ -1,6 +1,5 @@
package org.schabi.newpipe.extractor.services.youtube;
import org.json.JSONException;
import org.json.JSONObject;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
@ -15,18 +14,13 @@ import org.schabi.newpipe.extractor.exceptions.ContentNotAvailableException;
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.exceptions.ReCaptchaException;
import org.schabi.newpipe.extractor.stream.AudioStream;
import org.schabi.newpipe.extractor.stream.Stream;
import org.schabi.newpipe.extractor.stream.StreamExtractor;
import org.schabi.newpipe.extractor.stream.StreamInfoItemCollector;
import org.schabi.newpipe.extractor.stream.StreamInfoItemExtractor;
import org.schabi.newpipe.extractor.stream.StreamType;
import org.schabi.newpipe.extractor.stream.VideoStream;
import org.schabi.newpipe.extractor.stream.*;
import org.schabi.newpipe.extractor.utils.Parser;
import org.schabi.newpipe.extractor.utils.Utils;
import java.io.IOException;
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
@ -80,6 +74,10 @@ public class YoutubeStreamExtractor extends StreamExtractor {
/*//////////////////////////////////////////////////////////////////////////*/
private Document doc;
private JSONObject playerArgs;
private Map<String, String> videoInfoPage;
private boolean isAgeRestricted;
public YoutubeStreamExtractor(StreamingService service, String url) throws IOException, ExtractionException {
super(service, url);
@ -106,13 +104,12 @@ public class YoutubeStreamExtractor extends StreamExtractor {
}
//json player args method
return playerArgs.getString("title");
} catch (JSONException je) {//html <meta> method
je.printStackTrace();
} catch (Exception je) {
System.err.println("failed to load title from JSON args; trying to extract it from HTML");
try { // fall through to fall-back
try { // fall-back to html
return doc.select("meta[name=title]").attr(CONTENT);
} catch (Exception e) {
throw new ParsingException("failed permanently to load title.", e);
throw new ParsingException("Could not get the title", e);
}
}
}
@ -122,49 +119,61 @@ public class YoutubeStreamExtractor extends StreamExtractor {
try {
return doc.select("p[id=\"eow-description\"]").first().html();
} catch (Exception e) {//todo: add fallback method <-- there is no ... as long as i know
throw new ParsingException("failed to load description.", e);
throw new ParsingException("Could not get the description", e);
}
}
@Override
public String getUploaderName() throws ParsingException {
try {
if (playerArgs == null) {
return videoInfoPage.get("author");
}
//json player args method
return playerArgs.getString("author");
} catch (JSONException je) {
je.printStackTrace();
System.err.println(
"failed to load uploader name from JSON args; trying to extract it from HTML");
} catch (Exception ignored) {
// Try other method...
}
try {//fall through to fallback HTML method
try {
return videoInfoPage.get("author");
} catch (Exception ignored) {
// Try other method...
}
try {
// Fallback to HTML method
return doc.select("div.yt-user-info").first().text();
} catch (Exception e) {
throw new ParsingException("failed permanently to load uploader name.", e);
throw new ParsingException("Could not get uploader name", e);
}
}
@Override
public int getLength() throws ParsingException {
public long getLength() throws ParsingException {
try {
if (playerArgs == null) {
return Integer.valueOf(videoInfoPage.get("length_seconds"));
}
return playerArgs.getInt("length_seconds");
} catch (JSONException e) {//todo: find fallback method
throw new ParsingException("failed to load video duration from JSON args", e);
return playerArgs.getLong("length_seconds");
} catch (Exception ignored) {
// Try other method...
}
try {
return Long.parseLong(videoInfoPage.get("length_seconds"));
} catch (Exception ignored) {
// Try other method...
}
try {
// Fallback to HTML method
return Long.parseLong(doc.select("div[class~=\"ytp-progress-bar\"][role=\"slider\"]")
.first().attr("aria-valuemax"));
} catch (Exception e) {
throw new ParsingException("Could not get video length", e);
}
}
@Override
public long getViewCount() throws ParsingException {
try {
String viewCountString = doc.select("meta[itemprop=interactionCount]").attr(CONTENT);
return Long.parseLong(viewCountString);
return Long.parseLong(doc.select("meta[itemprop=interactionCount]").attr(CONTENT));
} catch (Exception e) {//todo: find fallback method
throw new ParsingException("failed to get number of views", e);
throw new ParsingException("Could not get number of views", e);
}
}
@ -173,28 +182,29 @@ public class YoutubeStreamExtractor extends StreamExtractor {
try {
return doc.select("meta[itemprop=datePublished]").attr(CONTENT);
} catch (Exception e) {//todo: add fallback method
throw new ParsingException("failed to get upload date.", e);
throw new ParsingException("Could not get upload date", e);
}
}
@Override
public String getThumbnailUrl() throws ParsingException {
//first attempt getting a small image version
//in the html extracting part we try to get a thumbnail with a higher resolution
// Try to get high resolution thumbnail if it fails use low res from the player instead
// Try to get high resolution thumbnail first, if it fails, use low res from the player instead
try {
return doc.select("link[itemprop=\"thumbnailUrl\"]").first().attr("abs:href");
} catch (Exception e) {
System.err.println("Could not find high res Thumbnail. Using low res instead");
} catch (Exception ignored) {
// Try other method...
}
try { //fall through to fallback
try {
return playerArgs.getString("thumbnail_url");
} catch (JSONException je) {
throw new ParsingException(
"failed to extract thumbnail URL from JSON args; trying to extract it from HTML", je);
} catch (NullPointerException ne) {
// Get from the video info page instead
} catch (Exception ignored) {
// Try other method...
}
try {
return videoInfoPage.get("thumbnail_url");
} catch (Exception e) {
throw new ParsingException("Could not get thumbnail url", e);
}
}
@ -205,7 +215,7 @@ public class YoutubeStreamExtractor extends StreamExtractor {
.select("img").first()
.attr("abs:data-thumb");
} catch (Exception e) {//todo: add fallback method
throw new ParsingException("failed to get uploader thumbnail URL.", e);
throw new ParsingException("Could not get uploader thumbnail URL.", e);
}
}
@ -215,11 +225,12 @@ public class YoutubeStreamExtractor extends StreamExtractor {
String dashManifestUrl;
if (videoInfoPage != null && videoInfoPage.containsKey("dashmpd")) {
dashManifestUrl = videoInfoPage.get("dashmpd");
} else if (playerArgs.has("dashmpd")) {
dashManifestUrl = playerArgs.getString("dashmpd");
} else if (playerArgs.get("dashmpd") != null) {
dashManifestUrl = playerArgs.optString("dashmpd");
} else {
return "";
}
if (!dashManifestUrl.contains("/signature/")) {
String encryptedSig = Parser.matchGroup1("/s/([a-fA-F0-9\\.]+)", dashManifestUrl);
String decryptedSig;
@ -227,10 +238,10 @@ public class YoutubeStreamExtractor extends StreamExtractor {
decryptedSig = decryptSignature(encryptedSig, decryptionCode);
dashManifestUrl = dashManifestUrl.replace("/s/" + encryptedSig, "/signature/" + decryptedSig);
}
return dashManifestUrl;
} catch (Exception e) {
throw new ParsingException(
"Could not get \"dashmpd\" maybe VideoInfoPage is broken.", e);
throw new ParsingException("Could not get dash manifest url", e);
}
}
@ -238,158 +249,56 @@ public class YoutubeStreamExtractor extends StreamExtractor {
public List<AudioStream> getAudioStreams() throws IOException, ExtractionException {
List<AudioStream> audioStreams = new ArrayList<>();
try {
String encodedUrlMap;
// playerArgs could be null if the video is age restricted
if (playerArgs == null) {
if (videoInfoPage.containsKey("adaptive_fmts")) {
encodedUrlMap = videoInfoPage.get("adaptive_fmts");
} else {
return null;
}
} else {
if (playerArgs.has("adaptive_fmts")) {
encodedUrlMap = playerArgs.getString("adaptive_fmts");
} else {
return null;
}
}
for (String url_data_str : encodedUrlMap.split(",")) {
// This loop iterates through multiple streams, therefor tags
// is related to one and the same stream at a time.
Map<String, String> tags = Parser.compatParseMap(
org.jsoup.parser.Parser.unescapeEntities(url_data_str, true));
for (Map.Entry<String, ItagItem> entry : getItags(ADAPTIVE_FMTS, ItagItem.ItagType.AUDIO).entrySet()) {
ItagItem itag = entry.getValue();
int itag = Integer.parseInt(tags.get("itag"));
if (ItagItem.isSupported(itag)) {
ItagItem itagItem = ItagItem.getItag(itag);
if (itagItem.itagType == ItagItem.ItagType.AUDIO) {
String streamUrl = tags.get("url");
// if video has a signature: decrypt it and add it to the url
if (tags.get("s") != null) {
streamUrl = streamUrl + "&signature="
+ decryptSignature(tags.get("s"), decryptionCode);
}
AudioStream audioStream = new AudioStream(streamUrl, itagItem.mediaFormatId, itagItem.avgBitrate);
if (!Stream.containSimilarStream(audioStream, audioStreams)) {
audioStreams.add(audioStream);
}
}
AudioStream audioStream = new AudioStream(entry.getKey(), itag.mediaFormatId, itag.avgBitrate);
if (!Stream.containSimilarStream(audioStream, audioStreams)) {
audioStreams.add(audioStream);
}
}
} catch (Exception e) {
throw new ParsingException("Could not get audiostreams", e);
throw new ParsingException("Could not get audio streams", e);
}
return audioStreams;
}
@Override
public List<VideoStream> getVideoStreams() throws IOException, ExtractionException {
List<VideoStream> videoStreams = new ArrayList<>();
try {
String encodedUrlMap;
// playerArgs could be null if the video is age restricted
if (playerArgs == null) {
encodedUrlMap = videoInfoPage.get(URL_ENCODED_FMT_STREAM_MAP);
} else {
encodedUrlMap = playerArgs.getString(URL_ENCODED_FMT_STREAM_MAP);
}
for (String url_data_str : encodedUrlMap.split(",")) {
try {
// This loop iterates through multiple streams, therefor tags
// is related to one and the same stream at a time.
Map<String, String> tags = Parser.compatParseMap(
org.jsoup.parser.Parser.unescapeEntities(url_data_str, true));
for (Map.Entry<String, ItagItem> entry : getItags(URL_ENCODED_FMT_STREAM_MAP, ItagItem.ItagType.VIDEO).entrySet()) {
ItagItem itag = entry.getValue();
int itag = Integer.parseInt(tags.get("itag"));
if (ItagItem.isSupported(itag)) {
ItagItem itagItem = ItagItem.getItag(itag);
if (itagItem.itagType == ItagItem.ItagType.VIDEO) {
String streamUrl = tags.get("url");
// if video has a signature: decrypt it and add it to the url
if (tags.get("s") != null) {
streamUrl = streamUrl + "&signature="
+ decryptSignature(tags.get("s"), decryptionCode);
}
VideoStream videoStream = new VideoStream(streamUrl, itagItem.mediaFormatId, itagItem.resolutionString);
if (!Stream.containSimilarStream(videoStream, videoStreams)) {
videoStreams.add(videoStream);
}
}
}
} catch (Exception e) {
//todo: dont log throw an error
System.err.println("Could not get Video stream.");
e.printStackTrace();
VideoStream videoStream = new VideoStream(entry.getKey(), itag.mediaFormatId, itag.resolutionString);
if (!Stream.containSimilarStream(videoStream, videoStreams)) {
videoStreams.add(videoStream);
}
}
} catch (Exception e) {
throw new ParsingException("Failed to get video streams", e);
throw new ParsingException("Could not get video streams", e);
}
if (videoStreams.isEmpty()) {
throw new ParsingException("Failed to get any video stream");
}
return videoStreams;
}
@Override
public List<VideoStream> getVideoOnlyStreams() throws IOException, ExtractionException {
List<VideoStream> videoOnlyStreams = new ArrayList<>();
try {
String encodedUrlMap;
// playerArgs could be null if the video is age restricted
if (playerArgs == null) {
if (videoInfoPage.containsKey("adaptive_fmts")) {
encodedUrlMap = videoInfoPage.get("adaptive_fmts");
} else {
return null;
}
} else {
if (playerArgs.has("adaptive_fmts")) {
encodedUrlMap = playerArgs.getString("adaptive_fmts");
} else {
return null;
}
}
for (String url_data_str : encodedUrlMap.split(",")) {
// This loop iterates through multiple streams, therefor tags
// is related to one and the same stream at a time.
Map<String, String> tags = Parser.compatParseMap(
org.jsoup.parser.Parser.unescapeEntities(url_data_str, true));
for (Map.Entry<String, ItagItem> entry : getItags(ADAPTIVE_FMTS, ItagItem.ItagType.VIDEO_ONLY).entrySet()) {
ItagItem itag = entry.getValue();
int itag = Integer.parseInt(tags.get("itag"));
if (ItagItem.isSupported(itag)) {
ItagItem itagItem = ItagItem.getItag(itag);
if (itagItem.itagType == ItagItem.ItagType.VIDEO_ONLY) {
String streamUrl = tags.get("url");
// if video has a signature: decrypt it and add it to the url
if (tags.get("s") != null) {
streamUrl = streamUrl + "&signature="
+ decryptSignature(tags.get("s"), decryptionCode);
}
VideoStream videoStream = new VideoStream(streamUrl, itagItem.mediaFormatId, itagItem.resolutionString, true);
if (!Stream.containSimilarStream(videoStream, videoOnlyStreams)) {
videoOnlyStreams.add(videoStream);
}
}
VideoStream videoStream = new VideoStream(entry.getKey(), itag.mediaFormatId, itag.resolutionString, true);
if (!Stream.containSimilarStream(videoStream, videoOnlyStreams)) {
videoOnlyStreams.add(videoStream);
}
}
} catch (Exception e) {
throw new ParsingException("Failed to get video only streams", e);
throw new ParsingException("Could not get video only streams", e);
}
if (videoOnlyStreams.isEmpty()) {
throw new ParsingException("Failed to get any video only stream");
}
return videoOnlyStreams;
}
@ -460,10 +369,9 @@ public class YoutubeStreamExtractor extends StreamExtractor {
}
@Override
public int getLikeCount() throws ParsingException {
public long getLikeCount() throws ParsingException {
String likesString = "";
try {
Element button = doc.select("button.like-button-renderer-like-button").first();
try {
likesString = button.select("span.yt-uix-button-content").first().text();
@ -473,15 +381,14 @@ public class YoutubeStreamExtractor extends StreamExtractor {
}
return Integer.parseInt(Utils.removeNonDigitCharacters(likesString));
} catch (NumberFormatException nfe) {
throw new ParsingException(
"failed to parse likesString \"" + likesString + "\" as integers", nfe);
throw new ParsingException("Could not parse \"" + likesString + "\" as an Integer", nfe);
} catch (Exception e) {
throw new ParsingException("Could not get like count", e);
}
}
@Override
public int getDislikeCount() throws ParsingException {
public long getDislikeCount() throws ParsingException {
String dislikesString = "";
try {
Element button = doc.select("button.like-button-renderer-dislike-button").first();
@ -493,18 +400,20 @@ public class YoutubeStreamExtractor extends StreamExtractor {
}
return Integer.parseInt(Utils.removeNonDigitCharacters(dislikesString));
} catch (NumberFormatException nfe) {
throw new ParsingException(
"failed to parse dislikesString \"" + dislikesString + "\" as integers", nfe);
throw new ParsingException("Could not parse \"" + dislikesString + "\" as an Integer", nfe);
} catch (Exception e) {
throw new ParsingException("Could not get dislike count", e);
}
}
@Override
public StreamInfoItemExtractor getNextVideo() throws IOException, ExtractionException {
public StreamInfoItem getNextVideo() throws IOException, ExtractionException {
try {
return extractVideoPreviewInfo(doc.select("div[class=\"watch-sidebar-section\"]").first()
.select("li").first());
StreamInfoItemCollector collector = new StreamInfoItemCollector(getServiceId());
collector.commit(extractVideoPreviewInfo(doc.select("div[class=\"watch-sidebar-section\"]")
.first().select("li").first()));
return ((StreamInfoItem) collector.getItemList().get(0));
} catch (Exception e) {
throw new ParsingException("Could not get next video", e);
}
@ -571,57 +480,37 @@ public class YoutubeStreamExtractor extends StreamExtractor {
}
/*//////////////////////////////////////////////////////////////////////////
// Utils
// Fetch page
//////////////////////////////////////////////////////////////////////////*/
private JSONObject playerArgs;
private boolean isAgeRestricted;
private Map<String, String> videoInfoPage;
private static final String URL_ENCODED_FMT_STREAM_MAP = "url_encoded_fmt_stream_map";
private static final String ADAPTIVE_FMTS = "adaptive_fmts";
private static final String HTTPS = "https:";
private static final String CONTENT = "content";
/**
* Sometimes if the html page of youtube is already downloaded, youtube web page will internally
* download the /get_video_info page. Since a certain date dashmpd url is only available over
* this /get_video_info page, so we always need to download this one to.
* <p>
* %%video_id%% will be replaced by the actual video id
* $$el_type$$ will be replaced by the actual el_type (se the declarations below)
*/
private static final String GET_VIDEO_INFO_URL =
"https://www.youtube.com/get_video_info?video_id=%%video_id%%$$el_type$$&ps=default&eurl=&gl=US&hl=en";
// eltype is necessary for the url above
private static final String EL_INFO = "el=info";
// static values
private static final String DECRYPTION_FUNC_NAME = "decrypt";
private static final String GET_VIDEO_INFO_URL = "https://www.youtube.com/get_video_info?video_id=" + "%s" +
"&el=info&ps=default&eurl=&gl=US&hl=en";
// cached values
private static volatile String decryptionCode = "";
@Override
public void fetchPage() throws IOException, ExtractionException {
Downloader downloader = NewPipe.getDownloader();
Downloader dl = NewPipe.getDownloader();
String pageContent = downloader.download(getCleanUrl());
String pageContent = dl.download(getCleanUrl());
doc = Jsoup.parse(pageContent, getCleanUrl());
String infoPageResponse = dl.download(String.format(GET_VIDEO_INFO_URL, getId()));
videoInfoPage = Parser.compatParseMap(infoPageResponse);
JSONObject ytPlayerConfig;
String playerUrl;
String videoInfoUrl = GET_VIDEO_INFO_URL.replace("%%video_id%%", getId()).replace("$$el_type$$", "&" + EL_INFO);
String videoInfoPageString = downloader.download(videoInfoUrl);
videoInfoPage = Parser.compatParseMap(videoInfoPageString);
// Check if the video is age restricted
if (pageContent.contains("<meta property=\"og:restrictions:age")) {
playerUrl = getPlayerUrlFromRestrictedVideo();
isAgeRestricted = true;
} else {
ytPlayerConfig = getPlayerConfig(pageContent);
JSONObject ytPlayerConfig = getPlayerConfig(pageContent);
playerArgs = getPlayerArgs(ytPlayerConfig);
playerUrl = getPlayerUrl(ytPlayerConfig);
isAgeRestricted = false;
@ -647,7 +536,7 @@ public class YoutubeStreamExtractor extends StreamExtractor {
default:
throw new ContentNotAvailableException("Content not available", e);
}
} catch (JSONException e) {
} catch (Exception e) {
throw new ParsingException("Could not parse yt player config", e);
}
}
@ -665,7 +554,7 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|| (playerArgs.get(URL_ENCODED_FMT_STREAM_MAP).toString().isEmpty())) {
isLiveStream = true;
}
} catch (JSONException e) {
} catch (Exception e) {
throw new ParsingException("Could not parse yt player config", e);
}
if (isLiveStream) {
@ -689,7 +578,7 @@ public class YoutubeStreamExtractor extends StreamExtractor {
playerUrl = HTTPS + playerUrl;
}
return playerUrl;
} catch (JSONException e) {
} catch (Exception e) {
throw new ParsingException(
"Could not load decryption code for the Youtube service.", e);
}
@ -782,23 +671,55 @@ public class YoutubeStreamExtractor extends StreamExtractor {
return result == null ? "" : result.toString();
}
/*//////////////////////////////////////////////////////////////////////////
// Utils
//////////////////////////////////////////////////////////////////////////*/
private Map<String, ItagItem> getItags(String encodedUrlMapKey, ItagItem.ItagType itagTypeWanted) throws ParsingException {
Map<String, ItagItem> urlAndItags = new LinkedHashMap<>();
String encodedUrlMap = "";
if (videoInfoPage != null && videoInfoPage.containsKey(encodedUrlMapKey)) {
encodedUrlMap = videoInfoPage.get(encodedUrlMapKey);
} else if (playerArgs != null && playerArgs.get(encodedUrlMapKey) != null) {
encodedUrlMap = playerArgs.optString(encodedUrlMapKey);
}
for (String url_data_str : encodedUrlMap.split(",")) {
try {
// This loop iterates through multiple streams, therefore tags
// is related to one and the same stream at a time.
Map<String, String> tags = Parser.compatParseMap(
org.jsoup.parser.Parser.unescapeEntities(url_data_str, true));
int itag = Integer.parseInt(tags.get("itag"));
if (ItagItem.isSupported(itag)) {
ItagItem itagItem = ItagItem.getItag(itag);
if (itagItem.itagType == itagTypeWanted) {
String streamUrl = tags.get("url");
// if video has a signature: decrypt it and add it to the url
if (tags.get("s") != null) {
streamUrl = streamUrl + "&signature=" + decryptSignature(tags.get("s"), decryptionCode);
}
urlAndItags.put(streamUrl, itagItem);
}
}
} catch (DecryptException e) {
throw e;
} catch (Exception ignored) {
}
}
return urlAndItags;
}
/**
* Provides information about links to other videos on the video page, such as related videos.
* This is encapsulated in a StreamInfoItem object,
* which is a subset of the fields in a full StreamInfo.
* This is encapsulated in a StreamInfoItem object, which is a subset of the fields in a full StreamInfo.
*/
private StreamInfoItemExtractor extractVideoPreviewInfo(final Element li) {
return new StreamInfoItemExtractor() {
@Override
public StreamType getStreamType() throws ParsingException {
return StreamType.VIDEO_STREAM;
}
@Override
public boolean isAd() throws ParsingException {
return !li.select("span[class*=\"icon-not-available\"]").isEmpty() ||
!li.select("span[class*=\"yt-badge-ad\"]").isEmpty();
}
return new YoutubeStreamInfoItemExtractor(li) {
@Override
public String getWebPageUrl() throws ParsingException {
@ -813,21 +734,6 @@ public class YoutubeStreamExtractor extends StreamExtractor {
//https://www.youtube.com/watch?v=Uqg0aEhLFAg
}
@Override
public int getDuration() throws ParsingException {
try {
return YoutubeParsingHelper.parseDurationString(
li.select("span[class*=\"video-time\"]").first().text());
} catch (Exception e) {
if (isLiveStream(li)) {
// -1 for no duration
return -1;
} else {
throw new ParsingException("Could not get Duration: " + getTitle(), e);
}
}
}
@Override
public String getUploaderName() throws ParsingException {
return li.select("span.g-hovercard").first().text();
@ -835,12 +741,14 @@ public class YoutubeStreamExtractor extends StreamExtractor {
@Override
public String getUploadDate() throws ParsingException {
return null;
return "";
}
@Override
public long getViewCount() throws ParsingException {
try {
if (getStreamType() == StreamType.LIVE_STREAM) return -1;
return Long.parseLong(Utils.removeNonDigitCharacters(
li.select("span.view-count").first().text()));
} catch (Exception e) {
@ -864,19 +772,6 @@ public class YoutubeStreamExtractor extends StreamExtractor {
}
return thumbnailUrl;
}
private boolean isLiveStream(Element item) {
Element bla = item.select("span[class*=\"yt-badge-live\"]").first();
if (bla == null) {
// sometimes livestreams dont have badges but sill are live streams
// if video time is not available we most likly have an offline livestream
if (item.select("span[class*=\"video-time\"]").first() == null) {
return true;
}
}
return bla != null;
}
};
}
}

View File

@ -1,7 +1,6 @@
package org.schabi.newpipe.extractor.services.youtube;
import org.jsoup.nodes.Element;
import org.schabi.newpipe.extractor.exceptions.FoundAdException;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.stream.StreamInfoItemExtractor;
import org.schabi.newpipe.extractor.stream.StreamType;
@ -29,10 +28,25 @@ public class YoutubeStreamInfoItemExtractor implements StreamInfoItemExtractor {
private final Element item;
public YoutubeStreamInfoItemExtractor(Element item) throws FoundAdException {
public YoutubeStreamInfoItemExtractor(Element item) {
this.item = item;
}
@Override
public StreamType getStreamType() throws ParsingException {
if (isLiveStream(item)) {
return StreamType.LIVE_STREAM;
} else {
return StreamType.VIDEO_STREAM;
}
}
@Override
public boolean isAd() throws ParsingException {
return !item.select("span[class*=\"icon-not-available\"]").isEmpty()
|| !item.select("span[class*=\"yt-badge-ad\"]").isEmpty();
}
@Override
public String getWebPageUrl() throws ParsingException {
try {
@ -58,15 +72,11 @@ public class YoutubeStreamInfoItemExtractor implements StreamInfoItemExtractor {
@Override
public int getDuration() throws ParsingException {
try {
return YoutubeParsingHelper.parseDurationString(
item.select("span[class*=\"video-time\"]").first().text());
if (getStreamType() == StreamType.LIVE_STREAM) return -1;
return YoutubeParsingHelper.parseDurationString(item.select("span[class*=\"video-time\"]").first().text());
} catch (Exception e) {
if (isLiveStream(item)) {
// -1 for no duration
return -1;
} else {
throw new ParsingException("Could not get Duration: " + getTitle(), e);
}
throw new ParsingException("Could not get Duration: " + getTitle(), e);
}
}
@ -84,12 +94,10 @@ public class YoutubeStreamInfoItemExtractor implements StreamInfoItemExtractor {
@Override
public String getUploadDate() throws ParsingException {
try {
Element div = item.select("div[class=\"yt-lockup-meta\"]").first();
if (div == null) {
return null;
} else {
return div.select("li").first().text();
}
Element meta = item.select("div[class=\"yt-lockup-meta\"]").first();
if (meta == null) return "";
return meta.select("li").first().text();
} catch (Exception e) {
throw new ParsingException("Could not get upload date", e);
}
@ -97,35 +105,29 @@ public class YoutubeStreamInfoItemExtractor implements StreamInfoItemExtractor {
@Override
public long getViewCount() throws ParsingException {
String output;
String input;
try {
Element div = item.select("div[class=\"yt-lockup-meta\"]").first();
if (div == null) {
return -1;
} else {
input = div.select("li").get(1).text();
}
// TODO: Return the actual live stream's watcher count
// -1 for no view count
if (getStreamType() == StreamType.LIVE_STREAM) return -1;
Element meta = item.select("div[class=\"yt-lockup-meta\"]").first();
if (meta == null) return -1;
input = meta.select("li").get(1).text();
} catch (IndexOutOfBoundsException e) {
if (isLiveStream(item)) {
// -1 for no view count
return -1;
} else {
throw new ParsingException("Could not parse yt-lockup-meta although available: " + getTitle(), e);
}
throw new ParsingException("Could not parse yt-lockup-meta although available: " + getTitle(), e);
}
output = Utils.removeNonDigitCharacters(input);
try {
return Long.parseLong(output);
return Long.parseLong(Utils.removeNonDigitCharacters(input));
} catch (NumberFormatException e) {
// if this happens the video probably has no views
if (!input.isEmpty()) {
if (!input.isEmpty()){
return 0;
} else {
throw new ParsingException("Could not handle input: " + input, e);
}
throw new ParsingException("Could not handle input: " + input, e);
}
}
@ -148,31 +150,11 @@ public class YoutubeStreamInfoItemExtractor implements StreamInfoItemExtractor {
}
}
@Override
public StreamType getStreamType() {
if (isLiveStream(item)) {
return StreamType.LIVE_STREAM;
} else {
return StreamType.VIDEO_STREAM;
}
}
@Override
public boolean isAd() throws ParsingException {
return !item.select("span[class*=\"icon-not-available\"]").isEmpty() ||
!item.select("span[class*=\"yt-badge-ad\"]").isEmpty();
}
private boolean isLiveStream(Element item) {
Element bla = item.select("span[class*=\"yt-badge-live\"]").first();
if (bla == null) {
// sometimes livestreams dont have badges but sill are live streams
// if video time is not available we most likly have an offline livestream
if (item.select("span[class*=\"video-time\"]").first() == null) {
return true;
}
}
return bla != null;
/**
* Generic method that checks if the element contains any clues that it's a livestream item
*/
protected static boolean isLiveStream(Element item) {
return !item.select("span[class*=\"yt-badge-live\"]").isEmpty()
|| !item.select("span[class*=\"video-time-overlay-live\"]").isEmpty();
}
}

View File

@ -13,8 +13,6 @@ import org.schabi.newpipe.extractor.exceptions.ExtractionException;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.exceptions.ReCaptchaException;
import org.schabi.newpipe.extractor.stream.StreamInfoItemCollector;
import org.schabi.newpipe.extractor.stream.StreamInfoItemExtractor;
import org.schabi.newpipe.extractor.stream.StreamType;
import org.schabi.newpipe.extractor.user.UserExtractor;
import org.schabi.newpipe.extractor.utils.Parser;
import org.schabi.newpipe.extractor.utils.Utils;
@ -197,18 +195,7 @@ public class YoutubeUserExtractor extends UserExtractor {
for (final Element li : element.children()) {
if (li.select("div[class=\"feed-item-dismissable\"]").first() != null) {
collector.commit(new StreamInfoItemExtractor() {
@Override
public StreamType getStreamType() throws ParsingException {
return StreamType.VIDEO_STREAM;
}
@Override
public boolean isAd() throws ParsingException {
return !li.select("span[class*=\"icon-not-available\"]").isEmpty() ||
!li.select("span[class*=\"yt-badge-ad\"]").isEmpty();
}
collector.commit(new YoutubeStreamInfoItemExtractor(li) {
@Override
public String getWebPageUrl() throws ParsingException {
try {
@ -231,68 +218,11 @@ public class YoutubeUserExtractor extends UserExtractor {
}
}
@Override
public int getDuration() throws ParsingException {
try {
return YoutubeParsingHelper.parseDurationString(
li.select("span[class*=\"video-time\"]").first().text());
} catch (Exception e) {
if (isLiveStream(li)) {
// -1 for no duration
return -1;
} else {
throw new ParsingException("Could not get Duration: " + getTitle(), e);
}
}
}
@Override
public String getUploaderName() throws ParsingException {
return getUserName();
}
@Override
public String getUploadDate() throws ParsingException {
try {
Element meta = li.select("div[class=\"yt-lockup-meta\"]").first();
Element li = meta.select("li").first();
if (li == null) {
//this means we have a youtube red video
return "";
} else {
return li.text();
}
} catch (Exception e) {
throw new ParsingException("Could not get upload date", e);
}
}
@Override
public long getViewCount() throws ParsingException {
String output;
String input;
try {
input = li.select("div[class=\"yt-lockup-meta\"]").first()
.select("li").get(1)
.text();
} catch (IndexOutOfBoundsException e) {
return -1;
}
output = Utils.removeNonDigitCharacters(input);
try {
return Long.parseLong(output);
} catch (NumberFormatException e) {
// if this happens the video probably has no views
if (!input.isEmpty()) {
return 0;
} else {
throw new ParsingException("Could not handle input: " + input, e);
}
}
}
@Override
public String getThumbnailUrl() throws ParsingException {
try {
@ -311,19 +241,6 @@ public class YoutubeUserExtractor extends UserExtractor {
throw new ParsingException("Could not get thumbnail url", e);
}
}
private boolean isLiveStream(Element item) {
Element bla = item.select("span[class*=\"yt-badge-live\"]").first();
if (bla == null) {
// sometimes livestreams dont have badges but sill are live streams
// if video time is not available we most likly have an offline livestream
if (item.select("span[class*=\"video-time\"]").first() == null) {
return true;
}
}
return bla != null;
}
});
}
}

View File

@ -50,7 +50,7 @@ public abstract class StreamExtractor extends Extractor {
public abstract String getDescription() throws ParsingException;
public abstract String getUploaderName() throws ParsingException;
public abstract String getUploaderUrl() throws ParsingException;
public abstract int getLength() throws ParsingException;
public abstract long getLength() throws ParsingException;
public abstract long getViewCount() throws ParsingException;
public abstract String getUploadDate() throws ParsingException;
public abstract String getThumbnailUrl() throws ParsingException;
@ -60,9 +60,9 @@ public abstract class StreamExtractor extends Extractor {
public abstract List<VideoStream> getVideoOnlyStreams() throws IOException, ExtractionException;
public abstract String getDashMpdUrl() throws ParsingException;
public abstract int getAgeLimit() throws ParsingException;
public abstract int getLikeCount() throws ParsingException;
public abstract int getDislikeCount() throws ParsingException;
public abstract StreamInfoItemExtractor getNextVideo() throws IOException, ExtractionException;
public abstract long getLikeCount() throws ParsingException;
public abstract long getDislikeCount() throws ParsingException;
public abstract StreamInfoItem getNextVideo() throws IOException, ExtractionException;
public abstract StreamInfoItemCollector getRelatedVideos() throws IOException, ExtractionException;
public abstract StreamType getStreamType() throws ParsingException;

View File

@ -235,18 +235,11 @@ public class StreamInfo extends Info {
streamInfo.addException(e);
}
try {
StreamInfoItemCollector c = new StreamInfoItemCollector(extractor.getServiceId());
StreamInfoItemExtractor nextVideo = extractor.getNextVideo();
c.commit(nextVideo);
if (c.getItemList().size() != 0) {
streamInfo.next_video = (StreamInfoItem) c.getItemList().get(0);
}
streamInfo.errors.addAll(c.getErrors());
streamInfo.next_video = extractor.getNextVideo();
} catch (Exception e) {
streamInfo.addException(e);
}
try {
// get related videos
StreamInfoItemCollector c = extractor.getRelatedVideos();
streamInfo.related_streams = c.getItemList();
streamInfo.errors.addAll(c.getErrors());
@ -266,12 +259,12 @@ public class StreamInfo extends Info {
public StreamType stream_type;
public String thumbnail_url;
public String upload_date;
public int duration = -1;
public long duration = -1;
public int age_limit = -1;
public long view_count = -1;
public int like_count = -1;
public int dislike_count = -1;
public long like_count = -1;
public long dislike_count = -1;
public String uploader_name;
public String uploader_url;

View File

@ -37,7 +37,7 @@ public class SoundcloudSearchEngineStreamTest {
}
@Test
public void testStreamItemType() {
public void testResultsItemType() {
for (InfoItem infoItem : result.resultList) {
assertEquals(InfoItem.InfoType.STREAM, infoItem.info_type);
}

View File

@ -37,7 +37,7 @@ public class SoundcloudSearchEngineUserTest {
}
@Test
public void testUserItemType() {
public void testResultsItemType() {
for (InfoItem infoItem : result.resultList) {
assertEquals(InfoItem.InfoType.USER, infoItem.info_type);
}

View File

@ -63,7 +63,7 @@ public class SoundcloudUserExtractorTest {
@Test
public void testGetSubscriberCount() throws Exception {
assertTrue("wrong subscriber count", extractor.getSubscriberCount() >= 1224324);
assertTrue("wrong subscriber count", extractor.getSubscriberCount() >= 1000000);
}
@Test

View File

@ -58,7 +58,7 @@ public class YoutubeSearchEngineStreamTest {
}
@Test
public void testStreamItemType() {
public void testResultsItemType() {
for (InfoItem infoItem : result.resultList) {
assertEquals(InfoItem.InfoType.STREAM, infoItem.info_type);
}

View File

@ -58,7 +58,7 @@ public class YoutubeSearchEngineUserTest {
}
@Test
public void testUserItemType() {
public void testResultsItemType() {
for (InfoItem infoItem : result.resultList) {
assertEquals(InfoItem.InfoType.USER, infoItem.info_type);
}