got rid of getVideoInfo() in youtube crawler

This commit is contained in:
Christian Schabesberger 2016-02-02 14:06:09 +01:00
parent fb942912db
commit bad576c23d
4 changed files with 80 additions and 58 deletions

View File

@ -115,6 +115,7 @@ public class VideoItemDetailFragment extends Fragment {
VideoInfo videoInfo = videoExtractor.getVideoInfo(); VideoInfo videoInfo = videoExtractor.getVideoInfo();
h.post(new VideoResultReturnedRunnable(videoInfo)); h.post(new VideoResultReturnedRunnable(videoInfo));
h.post(new SetThumbnailRunnable( h.post(new SetThumbnailRunnable(
//todo: make bitmaps not bypass tor
BitmapFactory.decodeStream( BitmapFactory.decodeStream(
new URL(videoInfo.thumbnail_url) new URL(videoInfo.thumbnail_url)
.openConnection() .openConnection()

View File

@ -157,6 +157,7 @@ public class VideoItemListFragment extends ListFragment {
if(!downloadedList.get(i)) { if(!downloadedList.get(i)) {
Bitmap thumbnail; Bitmap thumbnail;
try { try {
//todo: make bitmaps not bypass tor
thumbnail = BitmapFactory.decodeStream( thumbnail = BitmapFactory.decodeStream(
new URL(thumbnailUrlList.get(i)).openConnection().getInputStream()); new URL(thumbnailUrlList.get(i)).openConnection().getInputStream());
h.post(new SetThumbnailRunnable(i, thumbnail, requestId)); h.post(new SetThumbnailRunnable(i, thumbnail, requestId));

View File

@ -20,6 +20,9 @@ package org.schabi.newpipe.crawler;
* along with NewPipe. If not, see <http://www.gnu.org/licenses/>. * along with NewPipe. If not, see <http://www.gnu.org/licenses/>.
*/ */
import java.util.List;
import java.util.Vector;
/**Scrapes information from a video streaming service (eg, YouTube).*/ /**Scrapes information from a video streaming service (eg, YouTube).*/
@ -134,6 +137,25 @@ public abstract class VideoExtractor {
videoInfo.dashMpdUrl = getDashMpdUrl(); videoInfo.dashMpdUrl = getDashMpdUrl();
} }
if(videoInfo.average_rating.isEmpty()) {
videoInfo.average_rating = getAverageRating();
}
if(videoInfo.like_count == -1) {
videoInfo.like_count = getLikeCount();
}
if(videoInfo.dislike_count == -1) {
videoInfo.dislike_count = getDislikeCount();
}
if(videoInfo.nextVideo == null) {
videoInfo.nextVideo = getNextVideo();
}
if(videoInfo.relatedVideos == null) {
videoInfo.relatedVideos = getRelatedVideos();
}
//Bitmap thumbnail = null; //Bitmap thumbnail = null;
//Bitmap uploader_thumbnail = null; //Bitmap uploader_thumbnail = null;
@ -158,4 +180,9 @@ public abstract class VideoExtractor {
public abstract VideoInfo.VideoStream[] getVideoStreams() throws ParsingException; public abstract VideoInfo.VideoStream[] getVideoStreams() throws ParsingException;
public abstract String getDashMpdUrl() throws ParsingException; public abstract String getDashMpdUrl() throws ParsingException;
public abstract int getAgeLimit() throws ParsingException; public abstract int getAgeLimit() throws ParsingException;
public abstract String getAverageRating() throws ParsingException;
public abstract int getLikeCount() throws ParsingException;
public abstract int getDislikeCount() throws ParsingException;
public abstract VideoPreviewInfo getNextVideo() throws ParsingException;
public abstract Vector<VideoPreviewInfo> getRelatedVideos() throws ParsingException;
} }

View File

@ -25,6 +25,7 @@ import java.io.IOException;
import java.io.StringReader; import java.io.StringReader;
import java.net.URLDecoder; import java.net.URLDecoder;
import java.util.HashMap; import java.util.HashMap;
import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Vector; import java.util.Vector;
import java.util.regex.Matcher; import java.util.regex.Matcher;
@ -389,65 +390,58 @@ public class YoutubeVideoExtractor extends VideoExtractor {
return 0; return 0;
} }
@Override
public String getAverageRating() throws ParsingException {
try {
return playerArgs.getString("avg_rating");
} catch (JSONException e) {
throw new ParsingException("Could not get Average rating", e);
}
}
@Override @Override
public VideoInfo getVideoInfo() throws CrawlingException { public int getLikeCount() throws ParsingException {
videoInfo = super.getVideoInfo();
//todo: replace this with a call to getVideoId, if possible
//videoInfo.id = matchGroup1("v=([0-9a-zA-Z_-]{11})", pageUrl);
videoInfo.id = getVideoId(pageUrl);
if (videoInfo.audioStreams == null
|| videoInfo.audioStreams.length == 0) {
Log.e(TAG, "uninitialised audio streams!");
}
if (videoInfo.videoStreams == null
|| videoInfo.videoStreams.length == 0) {
Log.e(TAG, "uninitialised video streams!");
}
videoInfo.age_limit = 0;
//average rating
try {
videoInfo.average_rating = playerArgs.getString("avg_rating");
} catch (JSONException e) {
e.printStackTrace();
}
//---------------------------------------
// extracting information from html page
//---------------------------------------
String likesString = ""; String likesString = "";
String dislikesString = "";
try { try {
// likes
likesString = doc.select("button.like-button-renderer-like-button").first() likesString = doc.select("button.like-button-renderer-like-button").first()
.select("span.yt-uix-button-content").first().text(); .select("span.yt-uix-button-content").first().text();
videoInfo.like_count = Integer.parseInt(likesString.replaceAll("[^\\d]", "")); return Integer.parseInt(likesString.replaceAll("[^\\d]", ""));
// dislikes
dislikesString = doc.select("button.like-button-renderer-dislike-button").first()
.select("span.yt-uix-button-content").first().text();
videoInfo.dislike_count = Integer.parseInt(dislikesString.replaceAll("[^\\d]", ""));
} catch (NumberFormatException nfe) { } catch (NumberFormatException nfe) {
Log.e(TAG, "failed to parse likesString \"" + likesString + "\" and dislikesString \"" + throw new ParsingException(
dislikesString + "\" as integers"); "failed to parse likesString \"" + likesString + "\" as integers", nfe);
} catch (Exception e) { } catch (Exception e) {
// if it fails we know that the video does not offer dislikes. throw new ParsingException("Could not get like count", e);
e.printStackTrace(); }
videoInfo.like_count = 0;
videoInfo.dislike_count = 0;
} }
// next video @Override
videoInfo.nextVideo = extractVideoPreviewInfo(doc.select("div[class=\"watch-sidebar-section\"]").first() public int getDislikeCount() throws ParsingException {
.select("li").first()); String dislikesString = "";
try {
dislikesString = doc.select("button.like-button-renderer-dislike-button").first()
.select("span.yt-uix-button-content").first().text();
return Integer.parseInt(dislikesString.replaceAll("[^\\d]", ""));
} catch(NumberFormatException nfe) {
throw new ParsingException(
"failed to parse dislikesString \"" + dislikesString + "\" as integers", nfe);
} catch(Exception e) {
throw new ParsingException("Could not get dislike count", e);
}
}
// related videos @Override
public VideoPreviewInfo getNextVideo() throws ParsingException {
try {
return extractVideoPreviewInfo(doc.select("div[class=\"watch-sidebar-section\"]").first()
.select("li").first());
} catch(Exception e) {
throw new ParsingException("Could not get next video", e);
}
}
@Override
public Vector<VideoPreviewInfo> getRelatedVideos() throws ParsingException {
try {
Vector<VideoPreviewInfo> relatedVideos = new Vector<>(); Vector<VideoPreviewInfo> relatedVideos = new Vector<>();
for (Element li : doc.select("ul[id=\"watch-related\"]").first().children()) { for (Element li : doc.select("ul[id=\"watch-related\"]").first().children()) {
// first check if we have a playlist. If so leave them out // first check if we have a playlist. If so leave them out
@ -455,11 +449,10 @@ public class YoutubeVideoExtractor extends VideoExtractor {
relatedVideos.add(extractVideoPreviewInfo(li)); relatedVideos.add(extractVideoPreviewInfo(li));
} }
} }
//todo: replace conversion return relatedVideos;
videoInfo.relatedVideos = relatedVideos; } catch(Exception e) {
//videoInfo.relatedVideos = relatedVideos.toArray(new VideoPreviewInfo[relatedVideos.size()]); throw new ParsingException("Could not get related videos", e);
}
return videoInfo;
} }
private VideoInfo.AudioStream[] parseDashManifest(String dashManifest, String decryptoinCode) throws RegexException, DecryptException { private VideoInfo.AudioStream[] parseDashManifest(String dashManifest, String decryptoinCode) throws RegexException, DecryptException {