Merge branch 'dev'

This commit is contained in:
TobiGr 2020-01-19 10:13:14 +01:00
commit c1e1ac1f57
14 changed files with 131 additions and 96 deletions

View File

@ -31,6 +31,7 @@ The following sites are currently supported:
- YouTube
- SoundCloud
- MediaCCC
- PeerTube (no P2P)
## License

View File

@ -48,6 +48,7 @@ public class MediaCCCSearchExtractor extends SearchExtractor {
@Override
public InfoItemsPage<InfoItem> getInitialPage() throws IOException, ExtractionException {
InfoItemsSearchCollector searchItems = getInfoItemSearchCollector();
searchItems.reset();
if(getLinkHandler().getContentFilters().contains(CONFERENCES)
|| getLinkHandler().getContentFilters().contains(ALL)

View File

@ -31,7 +31,7 @@ import static org.schabi.newpipe.extractor.ServiceList.SoundCloud;
import static org.schabi.newpipe.extractor.utils.Utils.replaceHttpWithHttps;
public class SoundcloudParsingHelper {
private static final String HARDCODED_CLIENT_ID = "bkcJLoXNaiFlsLaKBQXOxO5FhW0NJVnu"; // Updated on 29/11/19
private static final String HARDCODED_CLIENT_ID = "r5ELVSy3RkcjX7ilaL7n2v1Z8irA9SL8"; // Updated on 31/12/19
private static String clientId;
private SoundcloudParsingHelper() {

View File

@ -76,6 +76,7 @@ public class SoundcloudSearchExtractor extends SearchExtractor {
private InfoItemsCollector<InfoItem, InfoItemExtractor> collectItems(JsonArray searchCollection) {
final InfoItemsSearchCollector collector = getInfoItemSearchCollector();
collector.reset();
for (Object result : searchCollection) {
if (!(result instanceof JsonObject)) continue;

View File

@ -106,6 +106,7 @@ public class YoutubeSearchExtractor extends SearchExtractor {
private InfoItemsSearchCollector collectItems(Document doc) throws NothingFoundException {
InfoItemsSearchCollector collector = getInfoItemSearchCollector();
collector.reset();
Element list = doc.select("ol[class=\"item-section\"]").first();
final TimeAgoParser timeAgoParser = getTimeAgoParser();

View File

@ -662,7 +662,7 @@ public class YoutubeStreamExtractor extends StreamExtractor {
}
}
return errorReason != null ? errorReason.toString() : null;
return errorReason != null ? errorReason.toString() : "";
}
/*//////////////////////////////////////////////////////////////////////////
@ -697,7 +697,7 @@ public class YoutubeStreamExtractor extends StreamExtractor {
final String playerUrl;
// Check if the video is age restricted
if (!doc.select("meta[property=\"og:restrictions:age\"").isEmpty()) {
if (!doc.select("meta[property=\"og:restrictions:age\"]").isEmpty()) {
final EmbeddedInfo info = getEmbeddedInfo();
final String videoInfoUrl = getVideoInfoUrl(getId(), info.sts);
final String infoPageResponse = downloader.get(videoInfoUrl, getExtractorLocalization()).responseBody();

View File

@ -51,7 +51,7 @@ public class YoutubeChannelLinkHandlerFactory extends ListLinkHandlerFactory {
throw new ParsingException("the URL given is not a Youtube-URL");
}
if (!path.startsWith("/user/") && !path.startsWith("/channel/")) {
if (!path.startsWith("/user/") && !path.startsWith("/channel/") && !path.startsWith("/c/")) {
throw new ParsingException("the URL given is neither a channel nor an user");
}

View File

@ -1,18 +1,14 @@
package org.schabi.newpipe.extractor.services.youtube.linkHandler;
import java.io.UnsupportedEncodingException;
import java.net.URLDecoder;
import java.util.List;
import org.schabi.newpipe.extractor.exceptions.FoundAdException;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.linkhandler.ListLinkHandlerFactory;
import org.schabi.newpipe.extractor.utils.Parser;
import java.util.List;
public class YoutubeCommentsLinkHandlerFactory extends ListLinkHandlerFactory {
private static final YoutubeCommentsLinkHandlerFactory instance = new YoutubeCommentsLinkHandlerFactory();
private static final String ID_PATTERN = "([\\-a-zA-Z0-9_]{11})";
public static YoutubeCommentsLinkHandlerFactory getInstance() {
return instance;
@ -24,69 +20,12 @@ public class YoutubeCommentsLinkHandlerFactory extends ListLinkHandlerFactory {
}
@Override
public String getId(String url) throws ParsingException, IllegalArgumentException {
if (url.isEmpty()) {
throw new IllegalArgumentException("The url parameter should not be empty");
}
String id;
String lowercaseUrl = url.toLowerCase();
if (lowercaseUrl.contains("youtube")) {
if (url.contains("attribution_link")) {
try {
String escapedQuery = Parser.matchGroup1("u=(.[^&|$]*)", url);
String query = URLDecoder.decode(escapedQuery, "UTF-8");
id = Parser.matchGroup1("v=" + ID_PATTERN, query);
} catch (UnsupportedEncodingException uee) {
throw new ParsingException("Could not parse attribution_link", uee);
}
} else if (url.contains("vnd.youtube")) {
id = Parser.matchGroup1(ID_PATTERN, url);
} else if (url.contains("embed")) {
id = Parser.matchGroup1("embed/" + ID_PATTERN, url);
} else if (url.contains("googleads")) {
throw new FoundAdException("Error found add: " + url);
} else {
id = Parser.matchGroup1("[?&]v=" + ID_PATTERN, url);
}
} else if (lowercaseUrl.contains("youtu.be")) {
if (url.contains("v=")) {
id = Parser.matchGroup1("v=" + ID_PATTERN, url);
} else {
id = Parser.matchGroup1("[Yy][Oo][Uu][Tt][Uu]\\.[Bb][Ee]/" + ID_PATTERN, url);
}
} else if(lowercaseUrl.contains("hooktube")) {
if(lowercaseUrl.contains("&v=")
|| lowercaseUrl.contains("?v=")) {
id = Parser.matchGroup1("[?&]v=" + ID_PATTERN, url);
} else if (url.contains("/embed/")) {
id = Parser.matchGroup1("embed/" + ID_PATTERN, url);
} else if (url.contains("/v/")) {
id = Parser.matchGroup1("v/" + ID_PATTERN, url);
} else if (url.contains("/watch/")) {
id = Parser.matchGroup1("watch/" + ID_PATTERN, url);
} else {
throw new ParsingException("Error no suitable url: " + url);
}
} else {
throw new ParsingException("Error no suitable url: " + url);
}
if (!id.isEmpty()) {
return id;
} else {
throw new ParsingException("Error could not parse url: " + url);
}
public String getId(String urlString) throws ParsingException, IllegalArgumentException {
return YoutubeStreamLinkHandlerFactory.getInstance().getId(urlString); //we need the same id, avoids duplicate code
}
@Override
public boolean onAcceptUrl(final String url) throws FoundAdException {
final String lowercaseUrl = url.toLowerCase();
if (lowercaseUrl.contains("youtube")
|| lowercaseUrl.contains("youtu.be")
|| lowercaseUrl.contains("hooktube")) {
// bad programming I know
try {
getId(url);
return true;
@ -95,9 +34,6 @@ public class YoutubeCommentsLinkHandlerFactory extends ListLinkHandlerFactory {
} catch (ParsingException e) {
return false;
}
} else {
return false;
}
}
@Override

View File

@ -24,7 +24,7 @@ public class PeertubeChannelLinkHandlerFactoryTest {
}
@Test
public void acceptrUrlTest() throws ParsingException {
public void acceptUrlTest() throws ParsingException {
assertTrue(linkHandler.acceptUrl("https://peertube.mastodon.host/accounts/kranti@videos.squat.net"));
}

View File

@ -24,7 +24,7 @@ public class PeertubeCommentsLinkHandlerFactoryTest {
}
@Test
public void acceptrUrlTest() throws ParsingException {
public void acceptUrlTest() throws ParsingException {
assertTrue(linkHandler.acceptUrl("https://peertube.mastodon.host/api/v1/videos/19319/comment-threads?start=0&count=10&sort=-createdAt"));
}

View File

@ -24,7 +24,7 @@ public class PeertubePlaylistLinkHandlerFactoryTest {
}
@Test
public void acceptrUrlTest() throws ParsingException {
public void acceptUrlTest() throws ParsingException {
assertTrue(linkHandler.acceptUrl("https://peertube.mastodon.host/video-channels/b45e84fb-c47f-475b-94f2-718126154d33/videos"));
}

View File

@ -24,10 +24,12 @@ public class YoutubeChannelLinkHandlerFactoryTest {
}
@Test
public void acceptrUrlTest() throws ParsingException {
public void acceptUrlTest() throws ParsingException {
assertTrue(linkHandler.acceptUrl("https://www.youtube.com/user/Gronkh"));
assertTrue(linkHandler.acceptUrl("https://www.youtube.com/user/Netzkino/videos"));
assertTrue(linkHandler.acceptUrl("https://www.youtube.com/c/creatoracademy"));
assertTrue(linkHandler.acceptUrl("https://www.youtube.com/channel/UClq42foiSgl7sSpLupnugGA"));
assertTrue(linkHandler.acceptUrl("https://www.youtube.com/channel/UClq42foiSgl7sSpLupnugGA/videos?disable_polymer=1"));
@ -64,5 +66,8 @@ public class YoutubeChannelLinkHandlerFactoryTest {
assertEquals("channel/UClq42foiSgl7sSpLupnugGA", linkHandler.fromUrl("https://invidio.us/channel/UClq42foiSgl7sSpLupnugGA").getId());
assertEquals("channel/UClq42foiSgl7sSpLupnugGA", linkHandler.fromUrl("https://invidio.us/channel/UClq42foiSgl7sSpLupnugGA/videos?disable_polymer=1").getId());
assertEquals("c/creatoracademy", linkHandler.fromUrl("https://www.youtube.com/c/creatoracademy").getId());
assertEquals("c/YouTubeCreators", linkHandler.fromUrl("https://www.youtube.com/c/YouTubeCreators").getId());
}
}

View File

@ -4,7 +4,6 @@ import org.jsoup.helper.StringUtil;
import org.junit.BeforeClass;
import org.junit.Test;
import org.schabi.newpipe.DownloaderTestImpl;
import org.schabi.newpipe.DownloaderTestImpl;
import org.schabi.newpipe.extractor.ListExtractor.InfoItemsPage;
import org.schabi.newpipe.extractor.NewPipe;
import org.schabi.newpipe.extractor.comments.CommentsInfo;
@ -21,17 +20,32 @@ import static org.schabi.newpipe.extractor.ServiceList.YouTube;
public class YoutubeCommentsExtractorTest {
private static YoutubeCommentsExtractor extractor;
private static final String urlYT = "https://www.youtube.com/watch?v=D00Au7k3i6o";
private static final String urlInvidious = "https://invidio.us/watch?v=D00Au7k3i6o";
private static final String urlInvidioush = "https://invidiou.sh/watch?v=D00Au7k3i6o";
private static YoutubeCommentsExtractor extractorYT;
private static YoutubeCommentsExtractor extractorInvidious;
private static YoutubeCommentsExtractor extractorInvidioush;
@BeforeClass
public static void setUp() throws Exception {
NewPipe.init(DownloaderTestImpl.getInstance());
extractor = (YoutubeCommentsExtractor) YouTube
.getCommentsExtractor("https://www.youtube.com/watch?v=D00Au7k3i6o");
extractorYT = (YoutubeCommentsExtractor) YouTube
.getCommentsExtractor(urlYT);
extractorInvidious = (YoutubeCommentsExtractor) YouTube
.getCommentsExtractor(urlInvidious);
extractorInvidioush = (YoutubeCommentsExtractor) YouTube
.getCommentsExtractor(urlInvidioush);
}
@Test
public void testGetComments() throws IOException, ExtractionException {
assertTrue(getCommentsHelper(extractorYT));
assertTrue(getCommentsHelper(extractorInvidious));
assertTrue(getCommentsHelper(extractorInvidioush));
}
private boolean getCommentsHelper(YoutubeCommentsExtractor extractor) throws IOException, ExtractionException {
boolean result;
InfoItemsPage<CommentsInfoItem> comments = extractor.getInitialPage();
result = findInComments(comments, "s1ck m3m3");
@ -41,14 +55,20 @@ public class YoutubeCommentsExtractorTest {
result = findInComments(comments, "s1ck m3m3");
}
assertTrue(result);
return result;
}
@Test
public void testGetCommentsFromCommentsInfo() throws IOException, ExtractionException {
assertTrue(getCommentsFromCommentsInfoHelper(urlYT));
assertTrue(getCommentsFromCommentsInfoHelper(urlInvidious));
assertTrue(getCommentsFromCommentsInfoHelper(urlInvidioush));
}
private boolean getCommentsFromCommentsInfoHelper(String url) throws IOException, ExtractionException {
boolean result = false;
CommentsInfo commentsInfo = CommentsInfo.getInfo("https://www.youtube.com/watch?v=D00Au7k3i6o");
assertTrue("what the fuck am i doing with my life".equals(commentsInfo.getName()));
CommentsInfo commentsInfo = CommentsInfo.getInfo(url);
assertEquals("what the fuck am i doing with my life", commentsInfo.getName());
result = findInComments(commentsInfo.getRelatedItems(), "s1ck m3m3");
String nextPage = commentsInfo.getNextPageUrl();
@ -57,16 +77,15 @@ public class YoutubeCommentsExtractorTest {
result = findInComments(moreItems.getItems(), "s1ck m3m3");
nextPage = moreItems.getNextPageUrl();
}
assertTrue(result);
return result;
}
@Test
public void testGetCommentsAllData() throws IOException, ExtractionException {
InfoItemsPage<CommentsInfoItem> comments = extractor.getInitialPage();
InfoItemsPage<CommentsInfoItem> comments = extractorYT.getInitialPage();
DefaultTests.defaultTestListOfItems(YouTube.getServiceId(), comments.getItems(), comments.getErrors());
for(CommentsInfoItem c: comments.getItems()) {
for (CommentsInfoItem c : comments.getItems()) {
assertFalse(StringUtil.isBlank(c.getAuthorEndpoint()));
assertFalse(StringUtil.isBlank(c.getAuthorName()));
assertFalse(StringUtil.isBlank(c.getAuthorThumbnail()));
@ -86,8 +105,8 @@ public class YoutubeCommentsExtractorTest {
}
private boolean findInComments(List<CommentsInfoItem> comments, String comment) {
for(CommentsInfoItem c: comments) {
if(c.getCommentText().contains(comment)) {
for (CommentsInfoItem c : comments) {
if (c.getCommentText().contains(comment)) {
return true;
}
}

View File

@ -0,0 +1,71 @@
package org.schabi.newpipe.extractor.services.youtube.search;
import static java.util.Collections.singletonList;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
import static org.schabi.newpipe.extractor.ServiceList.YouTube;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.junit.BeforeClass;
import org.junit.Test;
import org.schabi.newpipe.DownloaderTestImpl;
import org.schabi.newpipe.extractor.InfoItem;
import org.schabi.newpipe.extractor.ListExtractor;
import org.schabi.newpipe.extractor.NewPipe;
import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeSearchExtractor;
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeSearchQueryHandlerFactory;
public class YoutubeSearchPagingTest {
private static ListExtractor.InfoItemsPage<InfoItem> page1;
private static ListExtractor.InfoItemsPage<InfoItem> page2;
private static Set<String> urlList1;
private static Set<String> urlList2;
private static int page1Size;
private static int page2Size;
@BeforeClass
public static void setUpClass() throws Exception {
NewPipe.init(DownloaderTestImpl.getInstance());
YoutubeSearchExtractor extractor = (YoutubeSearchExtractor) YouTube.getSearchExtractor("cirque du soleil",
singletonList(YoutubeSearchQueryHandlerFactory.VIDEOS), null);
extractor.fetchPage();
page1 = extractor.getInitialPage();
urlList1 = extractUrls(page1.getItems());
assertTrue("failed to load search result page one: too few items", 15 < page1.getItems().size());
page1Size = page1.getItems().size();
assertEquals("duplicated items in search result on page one", page1Size, urlList1.size());
assertTrue("search result has no second page", page1.hasNextPage());
assertNotNull("next page url is null", page1.getNextPageUrl());
page2 = extractor.getPage(page1.getNextPageUrl());
urlList2 = extractUrls(page2.getItems());
page2Size = page2.getItems().size();
}
private static Set<String> extractUrls(List<InfoItem> list) {
Set<String> result = new HashSet<>();
for (InfoItem item : list) {
result.add(item.getUrl());
}
return result;
}
@Test
public void secondPageUniqueVideos() {
assertEquals("Second search result page has duplicated items", page2Size, urlList2.size());
}
@Test
public void noRepeatingVideosInPages() {
Set<String> intersection = new HashSet<>(urlList2);
intersection.retainAll(urlList1);
assertEquals("Found the same item on first AND second search page", 0, intersection.size());
}
}