Bandcamp search: multiple pages

This commit is contained in:
Fynn Godau 2019-12-22 13:51:17 +01:00
parent ce2a88e56f
commit 9b16baffb7
4 changed files with 47 additions and 7 deletions

View File

@ -89,19 +89,45 @@ public class BandcampSearchExtractor extends SearchExtractor {
} }
// Count pages
Elements pageLists = d.getElementsByClass("pagelist");
if (pageLists.size() == 0)
return new InfoItemsPage<>(collector, null);
Elements pages = pageLists.first().getElementsByTag("li");
// Find current page
int currentPage = -1;
for (int i = 0; i < pages.size(); i++) {
Element page = pages.get(i);
if (page.getElementsByTag("span").size() > 0) {
currentPage = i + 1;
break;
}
}
// Search results appear to be capped at six pages
assert pages.size() < 10;
String nextUrl = null;
if (currentPage < pages.size()) {
nextUrl = pageUrl.substring(0, pageUrl.length() - 1) + (currentPage + 1);
}
return new InfoItemsPage<>(collector, nextUrl);
return new InfoItemsPage<>(getInfoItemSearchCollector(), null);
} }
@Nonnull @Nonnull
@Override @Override
public InfoItemsPage<InfoItem> getInitialPage() throws IOException, ExtractionException { public InfoItemsPage<InfoItem> getInitialPage() throws IOException, ExtractionException {
return getPage(getUrl());//new InfoItemsPage<>(getInfoItemSearchCollector(), null); return getPage(getUrl());
} }
@Override @Override
public String getNextPageUrl() throws IOException, ExtractionException { public String getNextPageUrl() throws ExtractionException {
return null; String url = getUrl();
return url.substring(0, url.length() - 1).concat("2");
} }
@Override @Override

View File

@ -21,7 +21,8 @@ public class BandcampSearchQueryHandlerFactory extends SearchQueryHandlerFactory
try { try {
return SEARCH_URL + return SEARCH_URL +
URLEncoder.encode(query, CHARSET_UTF_8); URLEncoder.encode(query, CHARSET_UTF_8)
+ "&page=1";
} catch (UnsupportedEncodingException e) { } catch (UnsupportedEncodingException e) {
throw new ParsingException("query \"" + query + "\" could not be encoded", e); throw new ParsingException("query \"" + query + "\" could not be encoded", e);

View File

@ -89,4 +89,17 @@ public class BandcampSearchExtractorTest {
assertEquals(24, ((PlaylistInfoItem) minecraft).getStreamCount()); assertEquals(24, ((PlaylistInfoItem) minecraft).getStreamCount());
} }
/**
* Tests searches with multiple pages
*/
@Test
public void testMultiplePages() throws ExtractionException, IOException {
// A query practically guaranteed to have the maximum amount of pages
SearchExtractor extractor = bandcamp.getSearchExtractor("e");
assertEquals("https://bandcamp.com/search?q=e&page=2", extractor.getInitialPage().getNextPageUrl());
assertEquals("https://bandcamp.com/search?q=e&page=3", extractor.getPage(extractor.getNextPageUrl()).getNextPageUrl());
}
} }

View File

@ -27,8 +27,8 @@ public class BandcampSearchQueryHandlerFactoryTest {
@Test @Test
public void testEncoding() throws ParsingException { public void testEncoding() throws ParsingException {
// Note: this isn't exactly as bandcamp does it (it wouldn't encode '!'), but both works // Note: this isn't exactly as bandcamp does it (it wouldn't encode '!'), but both works
assertEquals("https://bandcamp.com/search?q=hello%21%22%C2%A7%24%25%26%2F%28%29%3D", searchQuery.getUrl("hello!\"§$%&/()=")); assertEquals("https://bandcamp.com/search?q=hello%21%22%C2%A7%24%25%26%2F%28%29%3D&page=1", searchQuery.getUrl("hello!\"§$%&/()="));
// Note: bandcamp uses %20 instead of '+', but both works // Note: bandcamp uses %20 instead of '+', but both works
assertEquals("https://bandcamp.com/search?q=search+query+with+spaces", searchQuery.getUrl("search query with spaces")); assertEquals("https://bandcamp.com/search?q=search+query+with+spaces&page=1", searchQuery.getUrl("search query with spaces"));
} }
} }