Merge branch 'dev' into patch-1

This commit is contained in:
Tobias Groza 2019-09-09 19:05:00 +02:00 committed by GitHub
commit c7e9ad57c4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
15 changed files with 127 additions and 38 deletions

View File

@ -21,7 +21,14 @@ package org.schabi.newpipe.extractor.exceptions;
*/ */
public class ReCaptchaException extends ExtractionException { public class ReCaptchaException extends ExtractionException {
public ReCaptchaException(String message) { private String url;
public ReCaptchaException(String message, String url) {
super(message); super(message);
this.url = url;
}
public String getUrl() {
return url;
} }
} }

View File

@ -47,6 +47,7 @@ import java.util.ArrayList;
@SuppressWarnings("WeakerAccess") @SuppressWarnings("WeakerAccess")
public class YoutubeChannelExtractor extends ChannelExtractor { public class YoutubeChannelExtractor extends ChannelExtractor {
/*package-private*/ static final String CHANNEL_URL_BASE = "https://www.youtube.com/channel/";
private static final String CHANNEL_FEED_BASE = "https://www.youtube.com/feeds/videos.xml?channel_id="; private static final String CHANNEL_FEED_BASE = "https://www.youtube.com/feeds/videos.xml?channel_id=";
private static final String CHANNEL_URL_PARAMETERS = "/videos?view=0&flow=list&sort=dd&live_view=10000"; private static final String CHANNEL_URL_PARAMETERS = "/videos?view=0&flow=list&sort=dd&live_view=10000";
@ -72,7 +73,7 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
@Override @Override
public String getUrl() throws ParsingException { public String getUrl() throws ParsingException {
try { try {
return "https://www.youtube.com/channel/" + getId(); return CHANNEL_URL_BASE + getId();
} catch (ParsingException e) { } catch (ParsingException e) {
return super.getUrl(); return super.getUrl();
} }

View File

@ -5,6 +5,9 @@ import org.schabi.newpipe.extractor.channel.ChannelInfoItemExtractor;
import org.schabi.newpipe.extractor.exceptions.ParsingException; import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.utils.Utils; import org.schabi.newpipe.extractor.utils.Utils;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/* /*
* Created by Christian Schabesberger on 12.02.17. * Created by Christian Schabesberger on 12.02.17.
* *
@ -53,8 +56,20 @@ public class YoutubeChannelInfoItemExtractor implements ChannelInfoItemExtractor
@Override @Override
public String getUrl() throws ParsingException { public String getUrl() throws ParsingException {
return el.select("a[class*=\"yt-uix-tile-link\"]").first() String buttonTrackingUrl = el.select("button[class*=\"yt-uix-button\"]").first()
.attr("abs:href"); .attr("abs:data-href");
Pattern channelIdPattern = Pattern.compile("(?:.*?)\\%252Fchannel\\%252F([A-Za-z0-9\\-\\_]+)(?:.*)");
Matcher match = channelIdPattern.matcher(buttonTrackingUrl);
if (match.matches()) {
return YoutubeChannelExtractor.CHANNEL_URL_BASE + match.group(1);
} else {
// fallback method just in case youtube changes things; it should never run and tests will fail
// provides an url with "/user/NAME", that is inconsistent with stream and channel extractor
return el.select("a[class*=\"yt-uix-tile-link\"]").first()
.attr("abs:href");
}
} }
@Override @Override

View File

@ -50,7 +50,7 @@ public class YoutubePlaylistExtractor extends PlaylistExtractor {
try { try {
return doc.select("div[id=pl-header] h1[class=pl-header-title]").first().text(); return doc.select("div[id=pl-header] h1[class=pl-header-title]").first().text();
} catch (Exception e) { } catch (Exception e) {
throw new ParsingException("Could not get playlist name"); throw new ParsingException("Could not get playlist name", e);
} }
} }
@ -59,7 +59,7 @@ public class YoutubePlaylistExtractor extends PlaylistExtractor {
try { try {
return doc.select("div[id=pl-header] div[class=pl-header-thumb] img").first().attr("abs:src"); return doc.select("div[id=pl-header] div[class=pl-header-thumb] img").first().attr("abs:src");
} catch (Exception e) { } catch (Exception e) {
throw new ParsingException("Could not get playlist thumbnail"); throw new ParsingException("Could not get playlist thumbnail", e);
} }
} }
@ -72,9 +72,11 @@ public class YoutubePlaylistExtractor extends PlaylistExtractor {
@Override @Override
public String getUploaderUrl() throws ParsingException { public String getUploaderUrl() throws ParsingException {
try { try {
return doc.select("ul[class=\"pl-header-details\"] li").first().select("a").first().attr("abs:href"); return YoutubeChannelExtractor.CHANNEL_URL_BASE +
doc.select("button[class*=\"yt-uix-subscription-button\"]")
.first().attr("data-channel-external-id");
} catch (Exception e) { } catch (Exception e) {
throw new ParsingException("Could not get playlist uploader name"); throw new ParsingException("Could not get playlist uploader url", e);
} }
} }
@ -83,7 +85,7 @@ public class YoutubePlaylistExtractor extends PlaylistExtractor {
try { try {
return doc.select("span[class=\"qualified-channel-title-text\"]").first().select("a").first().text(); return doc.select("span[class=\"qualified-channel-title-text\"]").first().select("a").first().text();
} catch (Exception e) { } catch (Exception e) {
throw new ParsingException("Could not get playlist uploader name"); throw new ParsingException("Could not get playlist uploader name", e);
} }
} }
@ -92,7 +94,7 @@ public class YoutubePlaylistExtractor extends PlaylistExtractor {
try { try {
return doc.select("div[id=gh-banner] img[class=channel-header-profile-image]").first().attr("abs:src"); return doc.select("div[id=gh-banner] img[class=channel-header-profile-image]").first().attr("abs:src");
} catch (Exception e) { } catch (Exception e) {
throw new ParsingException("Could not get playlist uploader avatar"); throw new ParsingException("Could not get playlist uploader avatar", e);
} }
} }
@ -248,6 +250,8 @@ public class YoutubePlaylistExtractor extends PlaylistExtractor {
@Override @Override
public String getUploaderUrl() throws ParsingException { public String getUploaderUrl() throws ParsingException {
// this url is not always in the form "/channel/..."
// sometimes Youtube provides urls in the from "/user/..."
return getUploaderLink().attr("abs:href"); return getUploaderLink().attr("abs:href");
} }

View File

@ -49,10 +49,11 @@ public class YoutubePlaylistInfoItemExtractor implements PlaylistInfoItemExtract
@Override @Override
public String getUrl() throws ParsingException { public String getUrl() throws ParsingException {
try { try {
final Element div = el.select("div[class=\"yt-lockup-meta\"]").first(); final Element a = el.select("div[class=\"yt-lockup-meta\"]")
.select("ul[class=\"yt-lockup-meta-info\"]")
.select("li").select("a").first();
if(div != null) { if(a != null) {
final Element a = div.select("a").first();
return a.attr("abs:href"); return a.attr("abs:href");
} }

View File

@ -30,6 +30,8 @@ import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException; import java.net.MalformedURLException;
import java.net.URL; import java.net.URL;
import java.util.*; import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/* /*
* Created by Christian Schabesberger on 06.08.15. * Created by Christian Schabesberger on 06.08.15.
@ -162,14 +164,54 @@ public class YoutubeStreamExtractor extends StreamExtractor {
} }
} }
// onclick="yt.www.watch.player.seekTo(0*3600+00*60+00);return false;"
// :00 is NOT recognized as a timestamp in description or comments.
// 0:00 is recognized in both description and comments.
// https://www.youtube.com/watch?v=4cccfDXu1vA
private final static Pattern DESCRIPTION_TIMESTAMP_ONCLICK_REGEX = Pattern.compile(
"seekTo\\("
+ "(?:(\\d+)\\*3600\\+)?" // hours?
+ "(\\d+)\\*60\\+" // minutes
+ "(\\d+)" // seconds
+ "\\)");
@SafeVarargs
private static <T> T coalesce(T... args) {
for (T arg : args) {
if (arg != null) return arg;
}
throw new IllegalArgumentException("all arguments to coalesce() were null");
}
private String parseHtmlAndGetFullLinks(String descriptionHtml) private String parseHtmlAndGetFullLinks(String descriptionHtml)
throws MalformedURLException, UnsupportedEncodingException, ParsingException { throws MalformedURLException, UnsupportedEncodingException, ParsingException {
final Document description = Jsoup.parse(descriptionHtml, getUrl()); final Document description = Jsoup.parse(descriptionHtml, getUrl());
for(Element a : description.select("a")) { for(Element a : description.select("a")) {
final String rawUrl = a.attr("abs:href"); final String rawUrl = a.attr("abs:href");
final URL redirectLink = new URL(rawUrl); final URL redirectLink = new URL(rawUrl);
final String queryString = redirectLink.getQuery();
if(queryString != null) { final Matcher onClickTimestamp;
final String queryString;
if ((onClickTimestamp = DESCRIPTION_TIMESTAMP_ONCLICK_REGEX.matcher(a.attr("onclick")))
.find()) {
a.removeAttr("onclick");
String hours = coalesce(onClickTimestamp.group(1), "0");
String minutes = onClickTimestamp.group(2);
String seconds = onClickTimestamp.group(3);
int timestamp = 0;
timestamp += Integer.parseInt(hours) * 3600;
timestamp += Integer.parseInt(minutes) * 60;
timestamp += Integer.parseInt(seconds);
String setTimestamp = "&t=" + timestamp;
// Even after clicking https://youtu.be/...?t=6,
// getUrl() is https://www.youtube.com/watch?v=..., never youtu.be, never &t=.
a.attr("href", getUrl() + setTimestamp);
} else if((queryString = redirectLink.getQuery()) != null) {
// if the query string is null we are not dealing with a redirect link, // if the query string is null we are not dealing with a redirect link,
// so we don't need to override it. // so we don't need to override it.
final String link = final String link =
@ -714,8 +756,6 @@ public class YoutubeStreamExtractor extends StreamExtractor {
} catch (IOException e) { } catch (IOException e) {
throw new ParsingException( throw new ParsingException(
"Could load decryption code form restricted video for the Youtube service.", e); "Could load decryption code form restricted video for the Youtube service.", e);
} catch (ReCaptchaException e) {
throw new ReCaptchaException("reCaptcha Challenge requested");
} }
} }

View File

@ -61,7 +61,7 @@ public class YoutubeStreamInfoItemExtractor implements StreamInfoItemExtractor {
@Override @Override
public String getUrl() throws ParsingException { public String getUrl() throws ParsingException {
try { try {
Element el = item.select("div[class*=\"yt-lockup-video\"").first(); Element el = item.select("div[class*=\"yt-lockup-video\"]").first();
Element dl = el.select("h3").first().select("a").first(); Element dl = el.select("h3").first().select("a").first();
return dl.attr("abs:href"); return dl.attr("abs:href");
} catch (Exception e) { } catch (Exception e) {
@ -72,7 +72,7 @@ public class YoutubeStreamInfoItemExtractor implements StreamInfoItemExtractor {
@Override @Override
public String getName() throws ParsingException { public String getName() throws ParsingException {
try { try {
Element el = item.select("div[class*=\"yt-lockup-video\"").first(); Element el = item.select("div[class*=\"yt-lockup-video\"]").first();
Element dl = el.select("h3").first().select("a").first(); Element dl = el.select("h3").first().select("a").first();
return dl.text(); return dl.text();
} catch (Exception e) { } catch (Exception e) {
@ -107,6 +107,8 @@ public class YoutubeStreamInfoItemExtractor implements StreamInfoItemExtractor {
@Override @Override
public String getUploaderUrl() throws ParsingException { public String getUploaderUrl() throws ParsingException {
// this url is not always in the form "/channel/..."
// sometimes Youtube provides urls in the from "/user/..."
try { try {
try { try {
return item.select("div[class=\"yt-lockup-byline\"]").first() return item.select("div[class=\"yt-lockup-byline\"]").first()
@ -119,7 +121,7 @@ public class YoutubeStreamInfoItemExtractor implements StreamInfoItemExtractor {
.text().split(" - ")[0]; .text().split(" - ")[0];
} catch (Exception e) { } catch (Exception e) {
System.out.println(item.html()); System.out.println(item.html());
throw new ParsingException("Could not get uploader", e); throw new ParsingException("Could not get uploader url", e);
} }
} }

View File

@ -126,6 +126,8 @@ public class YoutubeTrendingExtractor extends KioskExtractor<StreamInfoItem> {
} }
private Element getUploaderLink() { private Element getUploaderLink() {
// this url is not always in the form "/channel/..."
// sometimes Youtube provides urls in the from "/user/..."
Element uploaderEl = el.select("div[class*=\"yt-lockup-byline \"]").first(); Element uploaderEl = el.select("div[class*=\"yt-lockup-byline \"]").first();
return uploaderEl.select("a").first(); return uploaderEl.select("a").first();
} }

View File

@ -123,8 +123,6 @@ public class DashMpdParser {
dashDoc = downloader.download(streamInfo.getDashMpdUrl()); dashDoc = downloader.download(streamInfo.getDashMpdUrl());
} catch (IOException ioe) { } catch (IOException ioe) {
throw new DashMpdParsingException("Could not get dash mpd: " + streamInfo.getDashMpdUrl(), ioe); throw new DashMpdParsingException("Could not get dash mpd: " + streamInfo.getDashMpdUrl(), ioe);
} catch (ReCaptchaException e) {
throw new ReCaptchaException("reCaptcha Challenge needed");
} }
try { try {

View File

@ -129,7 +129,7 @@ public class Downloader implements org.schabi.newpipe.extractor.Downloader {
* request See : https://github.com/rg3/youtube-dl/issues/5138 * request See : https://github.com/rg3/youtube-dl/issues/5138
*/ */
if (con.getResponseCode() == 429) { if (con.getResponseCode() == 429) {
throw new ReCaptchaException("reCaptcha Challenge requested"); throw new ReCaptchaException("reCaptcha Challenge requested", con.getURL().toString());
} }
throw new IOException(con.getResponseCode() + " " + con.getResponseMessage(), e); throw new IOException(con.getResponseCode() + " " + con.getResponseMessage(), e);

View File

@ -1,5 +1,6 @@
package org.schabi.newpipe.extractor.services.soundcloud; package org.schabi.newpipe.extractor.services.soundcloud;
import org.hamcrest.CoreMatchers;
import org.junit.BeforeClass; import org.junit.BeforeClass;
import org.junit.Ignore; import org.junit.Ignore;
import org.junit.Test; import org.junit.Test;
@ -119,14 +120,14 @@ public class SoundcloudPlaylistExtractorTest {
} }
} }
public static class RandomHouseDanceMusic implements BasePlaylistExtractorTest { public static class RandomHouseMusic implements BasePlaylistExtractorTest {
private static SoundcloudPlaylistExtractor extractor; private static SoundcloudPlaylistExtractor extractor;
@BeforeClass @BeforeClass
public static void setUp() throws Exception { public static void setUp() throws Exception {
NewPipe.init(Downloader.getInstance(), new Localization("GB", "en")); NewPipe.init(Downloader.getInstance(), new Localization("GB", "en"));
extractor = (SoundcloudPlaylistExtractor) SoundCloud extractor = (SoundcloudPlaylistExtractor) SoundCloud
.getPlaylistExtractor("https://soundcloud.com/hunter-leader/sets/house-electro-dance-music-2"); .getPlaylistExtractor("https://soundcloud.com/micky96/sets/house");
extractor.fetchPage(); extractor.fetchPage();
} }
@ -141,22 +142,22 @@ public class SoundcloudPlaylistExtractorTest {
@Test @Test
public void testName() { public void testName() {
assertEquals("House, Electro , Dance Music 2", extractor.getName()); assertEquals("House", extractor.getName());
} }
@Test @Test
public void testId() { public void testId() {
assertEquals("310980722", extractor.getId()); assertEquals("123062856", extractor.getId());
} }
@Test @Test
public void testUrl() throws Exception { public void testUrl() throws Exception {
assertEquals("https://soundcloud.com/hunter-leader/sets/house-electro-dance-music-2", extractor.getUrl()); assertEquals("https://soundcloud.com/micky96/sets/house", extractor.getUrl());
} }
@Test @Test
public void testOriginalUrl() throws Exception { public void testOriginalUrl() throws Exception {
assertEquals("https://soundcloud.com/hunter-leader/sets/house-electro-dance-music-2", extractor.getOriginalUrl()); assertEquals("https://soundcloud.com/micky96/sets/house", extractor.getOriginalUrl());
} }
/*////////////////////////////////////////////////////////////////////////// /*//////////////////////////////////////////////////////////////////////////
@ -182,7 +183,7 @@ public class SoundcloudPlaylistExtractorTest {
assertIsSecureUrl(extractor.getThumbnailUrl()); assertIsSecureUrl(extractor.getThumbnailUrl());
} }
@Ignore @Ignore("not implemented")
@Test @Test
public void testBannerUrl() { public void testBannerUrl() {
assertIsSecureUrl(extractor.getBannerUrl()); assertIsSecureUrl(extractor.getBannerUrl());
@ -192,12 +193,12 @@ public class SoundcloudPlaylistExtractorTest {
public void testUploaderUrl() { public void testUploaderUrl() {
final String uploaderUrl = extractor.getUploaderUrl(); final String uploaderUrl = extractor.getUploaderUrl();
assertIsSecureUrl(uploaderUrl); assertIsSecureUrl(uploaderUrl);
assertTrue(uploaderUrl, uploaderUrl.contains("hunter-leader")); assertThat(uploaderUrl, CoreMatchers.containsString("micky96"));
} }
@Test @Test
public void testUploaderName() { public void testUploaderName() {
assertEquals("Gosu", extractor.getUploaderName()); assertEquals("_mickyyy", extractor.getUploaderName());
} }
@Test @Test
@ -266,6 +267,7 @@ public class SoundcloudPlaylistExtractorTest {
// ListExtractor // ListExtractor
//////////////////////////////////////////////////////////////////////////*/ //////////////////////////////////////////////////////////////////////////*/
@Ignore
@Test @Test
public void testRelatedItems() throws Exception { public void testRelatedItems() throws Exception {
defaultTestRelatedItems(extractor, SoundCloud.getServiceId()); defaultTestRelatedItems(extractor, SoundCloud.getServiceId());
@ -287,6 +289,7 @@ public class SoundcloudPlaylistExtractorTest {
// PlaylistExtractor // PlaylistExtractor
//////////////////////////////////////////////////////////////////////////*/ //////////////////////////////////////////////////////////////////////////*/
@Ignore
@Test @Test
public void testThumbnailUrl() { public void testThumbnailUrl() {
assertIsSecureUrl(extractor.getThumbnailUrl()); assertIsSecureUrl(extractor.getThumbnailUrl());

View File

@ -100,7 +100,7 @@ public class YoutubePlaylistExtractorTest {
@Test @Test
public void testUploaderUrl() throws Exception { public void testUploaderUrl() throws Exception {
assertTrue(extractor.getUploaderUrl().contains("youtube.com")); assertEquals("https://www.youtube.com/channel/UCs72iRpTEuwV3y6pdWYLgiw", extractor.getUploaderUrl());
} }
@Test @Test
@ -185,8 +185,8 @@ public class YoutubePlaylistExtractorTest {
public void testMoreRelatedItems() throws Exception { public void testMoreRelatedItems() throws Exception {
ListExtractor.InfoItemsPage<StreamInfoItem> currentPage ListExtractor.InfoItemsPage<StreamInfoItem> currentPage
= defaultTestMoreItems(extractor, ServiceList.YouTube.getServiceId()); = defaultTestMoreItems(extractor, ServiceList.YouTube.getServiceId());
// Test for 2 more levels
// test for 2 more levels
for (int i = 0; i < 2; i++) { for (int i = 0; i < 2; i++) {
currentPage = extractor.getPage(currentPage.getNextPageUrl()); currentPage = extractor.getPage(currentPage.getNextPageUrl());
defaultTestListOfItems(YouTube.getServiceId(), currentPage.getItems(), currentPage.getErrors()); defaultTestListOfItems(YouTube.getServiceId(), currentPage.getItems(), currentPage.getErrors());
@ -214,7 +214,7 @@ public class YoutubePlaylistExtractorTest {
@Test @Test
public void testUploaderUrl() throws Exception { public void testUploaderUrl() throws Exception {
assertTrue(extractor.getUploaderUrl().contains("youtube.com")); assertEquals("https://www.youtube.com/channel/UCHSPWoY1J5fbDVbcnyeqwdw", extractor.getUploaderUrl());
} }
@Test @Test

View File

@ -81,7 +81,7 @@ public class YoutubeStreamExtractorDefaultTest {
} }
@Test @Test
public void testGetFullLinksInDescriptlion() throws ParsingException { public void testGetFullLinksInDescription() throws ParsingException {
assertTrue(extractor.getDescription().contains("http://adele.com")); assertTrue(extractor.getDescription().contains("http://adele.com"));
assertFalse(extractor.getDescription().contains("http://smarturl.it/SubscribeAdele?IQi...")); assertFalse(extractor.getDescription().contains("http://smarturl.it/SubscribeAdele?IQi..."));
} }
@ -111,7 +111,7 @@ public class YoutubeStreamExtractorDefaultTest {
@Test @Test
public void testGetUploaderUrl() throws ParsingException { public void testGetUploaderUrl() throws ParsingException {
assertTrue(extractor.getUploaderUrl().length() > 0); assertEquals("https://www.youtube.com/channel/UCsRM0YB_dabtEPGPTKo-gcw", extractor.getUploaderUrl());
} }
@Test @Test

View File

@ -1,5 +1,6 @@
package org.schabi.newpipe.extractor.services.youtube.search; package org.schabi.newpipe.extractor.services.youtube.search;
import org.hamcrest.CoreMatchers;
import org.junit.BeforeClass; import org.junit.BeforeClass;
import org.junit.Ignore; import org.junit.Ignore;
import org.junit.Test; import org.junit.Test;
@ -63,4 +64,19 @@ public class YoutubeSearchExtractorChannelOnlyTest extends YoutubeSearchExtracto
} }
} }
} }
@Test
public void testChannelUrl() {
for(InfoItem item : itemsPage.getItems()) {
if (item instanceof ChannelInfoItem) {
ChannelInfoItem channel = (ChannelInfoItem) item;
if (channel.getSubscriberCount() > 5e7) { // the real PewDiePie
assertEquals("https://www.youtube.com/channel/UC-lHJZR3Gqxm24_Vd_AJ5Yw", item.getUrl());
} else {
assertThat(item.getUrl(), CoreMatchers.startsWith("https://www.youtube.com/channel/"));
}
}
}
}
} }

View File

@ -73,7 +73,7 @@ public class YoutubeSearchExtractorDefaultTest extends YoutubeSearchExtractorBas
assertTrue((firstInfoItem instanceof ChannelInfoItem) assertTrue((firstInfoItem instanceof ChannelInfoItem)
|| (secondInfoItem instanceof ChannelInfoItem)); || (secondInfoItem instanceof ChannelInfoItem));
assertEquals("name", "PewDiePie", channelItem.getName()); assertEquals("name", "PewDiePie", channelItem.getName());
assertEquals("url","https://www.youtube.com/user/PewDiePie", channelItem.getUrl()); assertEquals("url", "https://www.youtube.com/channel/UC-lHJZR3Gqxm24_Vd_AJ5Yw", channelItem.getUrl());
} }
@Test @Test