Bandcamp search: multiple pages
This commit is contained in:
parent
ce2a88e56f
commit
9b16baffb7
|
@ -89,19 +89,45 @@ public class BandcampSearchExtractor extends SearchExtractor {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Count pages
|
||||||
|
Elements pageLists = d.getElementsByClass("pagelist");
|
||||||
|
if (pageLists.size() == 0)
|
||||||
|
return new InfoItemsPage<>(collector, null);
|
||||||
|
|
||||||
|
Elements pages = pageLists.first().getElementsByTag("li");
|
||||||
|
|
||||||
|
// Find current page
|
||||||
|
int currentPage = -1;
|
||||||
|
for (int i = 0; i < pages.size(); i++) {
|
||||||
|
Element page = pages.get(i);
|
||||||
|
if (page.getElementsByTag("span").size() > 0) {
|
||||||
|
currentPage = i + 1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Search results appear to be capped at six pages
|
||||||
|
assert pages.size() < 10;
|
||||||
|
|
||||||
|
String nextUrl = null;
|
||||||
|
if (currentPage < pages.size()) {
|
||||||
|
nextUrl = pageUrl.substring(0, pageUrl.length() - 1) + (currentPage + 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
return new InfoItemsPage<>(collector, nextUrl);
|
||||||
|
|
||||||
return new InfoItemsPage<>(getInfoItemSearchCollector(), null);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Nonnull
|
@Nonnull
|
||||||
@Override
|
@Override
|
||||||
public InfoItemsPage<InfoItem> getInitialPage() throws IOException, ExtractionException {
|
public InfoItemsPage<InfoItem> getInitialPage() throws IOException, ExtractionException {
|
||||||
return getPage(getUrl());//new InfoItemsPage<>(getInfoItemSearchCollector(), null);
|
return getPage(getUrl());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getNextPageUrl() throws IOException, ExtractionException {
|
public String getNextPageUrl() throws ExtractionException {
|
||||||
return null;
|
String url = getUrl();
|
||||||
|
return url.substring(0, url.length() - 1).concat("2");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -21,7 +21,8 @@ public class BandcampSearchQueryHandlerFactory extends SearchQueryHandlerFactory
|
||||||
try {
|
try {
|
||||||
|
|
||||||
return SEARCH_URL +
|
return SEARCH_URL +
|
||||||
URLEncoder.encode(query, CHARSET_UTF_8);
|
URLEncoder.encode(query, CHARSET_UTF_8)
|
||||||
|
+ "&page=1";
|
||||||
|
|
||||||
} catch (UnsupportedEncodingException e) {
|
} catch (UnsupportedEncodingException e) {
|
||||||
throw new ParsingException("query \"" + query + "\" could not be encoded", e);
|
throw new ParsingException("query \"" + query + "\" could not be encoded", e);
|
||||||
|
|
|
@ -89,4 +89,17 @@ public class BandcampSearchExtractorTest {
|
||||||
assertEquals(24, ((PlaylistInfoItem) minecraft).getStreamCount());
|
assertEquals(24, ((PlaylistInfoItem) minecraft).getStreamCount());
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tests searches with multiple pages
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testMultiplePages() throws ExtractionException, IOException {
|
||||||
|
// A query practically guaranteed to have the maximum amount of pages
|
||||||
|
SearchExtractor extractor = bandcamp.getSearchExtractor("e");
|
||||||
|
|
||||||
|
assertEquals("https://bandcamp.com/search?q=e&page=2", extractor.getInitialPage().getNextPageUrl());
|
||||||
|
|
||||||
|
assertEquals("https://bandcamp.com/search?q=e&page=3", extractor.getPage(extractor.getNextPageUrl()).getNextPageUrl());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -27,8 +27,8 @@ public class BandcampSearchQueryHandlerFactoryTest {
|
||||||
@Test
|
@Test
|
||||||
public void testEncoding() throws ParsingException {
|
public void testEncoding() throws ParsingException {
|
||||||
// Note: this isn't exactly as bandcamp does it (it wouldn't encode '!'), but both works
|
// Note: this isn't exactly as bandcamp does it (it wouldn't encode '!'), but both works
|
||||||
assertEquals("https://bandcamp.com/search?q=hello%21%22%C2%A7%24%25%26%2F%28%29%3D", searchQuery.getUrl("hello!\"§$%&/()="));
|
assertEquals("https://bandcamp.com/search?q=hello%21%22%C2%A7%24%25%26%2F%28%29%3D&page=1", searchQuery.getUrl("hello!\"§$%&/()="));
|
||||||
// Note: bandcamp uses %20 instead of '+', but both works
|
// Note: bandcamp uses %20 instead of '+', but both works
|
||||||
assertEquals("https://bandcamp.com/search?q=search+query+with+spaces", searchQuery.getUrl("search query with spaces"));
|
assertEquals("https://bandcamp.com/search?q=search+query+with+spaces&page=1", searchQuery.getUrl("search query with spaces"));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue