diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/extractors/BandcampExtractorHelper.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/extractors/BandcampExtractorHelper.java index aff866053..e7bbac999 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/extractors/BandcampExtractorHelper.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/extractors/BandcampExtractorHelper.java @@ -6,25 +6,81 @@ import com.grack.nanojson.JsonObject; import com.grack.nanojson.JsonParser; import com.grack.nanojson.JsonParserException; import com.grack.nanojson.JsonWriter; + import org.jsoup.Jsoup; import org.jsoup.nodes.Element; +import org.schabi.newpipe.extractor.Image; +import org.schabi.newpipe.extractor.Image.ResolutionLevel; import org.schabi.newpipe.extractor.NewPipe; import org.schabi.newpipe.extractor.exceptions.ParsingException; import org.schabi.newpipe.extractor.exceptions.ReCaptchaException; import org.schabi.newpipe.extractor.localization.DateWrapper; -import org.schabi.newpipe.extractor.utils.Utils; +import org.schabi.newpipe.extractor.utils.ImageSuffix; -import javax.annotation.Nullable; import java.io.IOException; import java.nio.charset.StandardCharsets; import java.time.DateTimeException; import java.time.ZonedDateTime; import java.time.format.DateTimeFormatter; import java.util.Collections; +import java.util.List; import java.util.Locale; +import java.util.stream.Collectors; + +import javax.annotation.Nonnull; +import javax.annotation.Nullable; + +import static org.schabi.newpipe.extractor.Image.HEIGHT_UNKNOWN; +import static org.schabi.newpipe.extractor.Image.WIDTH_UNKNOWN; +import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty; +import static org.schabi.newpipe.extractor.utils.Utils.replaceHttpWithHttps; public final class BandcampExtractorHelper { + /** + * List of image IDs which preserve aspect ratio with their theoretical dimension known. + * + *

+ * Bandcamp images are not always squares, so images which preserve aspect ratio are only used. + *

+ * + *

+ * One of the direct consequences of this specificity is that only one dimension of images is + * known at time, depending of the image ID. + *

+ * + *

+ * Note also that dimensions are only theoretical because if the image size is less than the + * dimensions of the image ID, it will be not upscaled but kept to its original size. + *

+ * + *

+ * IDs come from the + * GitHub Gist "Bandcamp File Format Parameters" by f2k1de + *

+ */ + private static final List IMAGE_URL_SUFFIXES_AND_RESOLUTIONS = List.of( + // ID | HEIGHT | WIDTH + new ImageSuffix("10.jpg", HEIGHT_UNKNOWN, 1200, ResolutionLevel.HIGH), + new ImageSuffix("101.jpg", 90, WIDTH_UNKNOWN, ResolutionLevel.LOW), + new ImageSuffix("170.jpg", 422, WIDTH_UNKNOWN, ResolutionLevel.MEDIUM), + // 180 returns the same image aspect ratio and size as 171 + new ImageSuffix("171.jpg", 646, WIDTH_UNKNOWN, ResolutionLevel.MEDIUM), + new ImageSuffix("20.jpg", HEIGHT_UNKNOWN, 1024, ResolutionLevel.HIGH), + // 203 returns the same image aspect ratio and size as 200 + new ImageSuffix("200.jpg", 420, WIDTH_UNKNOWN, ResolutionLevel.MEDIUM), + new ImageSuffix("201.jpg", 280, WIDTH_UNKNOWN, ResolutionLevel.MEDIUM), + new ImageSuffix("202.jpg", 140, WIDTH_UNKNOWN, ResolutionLevel.LOW), + new ImageSuffix("204.jpg", 360, WIDTH_UNKNOWN, ResolutionLevel.MEDIUM), + new ImageSuffix("205.jpg", 240, WIDTH_UNKNOWN, ResolutionLevel.MEDIUM), + new ImageSuffix("206.jpg", 180, WIDTH_UNKNOWN, ResolutionLevel.MEDIUM), + new ImageSuffix("207.jpg", 120, WIDTH_UNKNOWN, ResolutionLevel.LOW), + new ImageSuffix("43.jpg", 100, WIDTH_UNKNOWN, ResolutionLevel.LOW), + new ImageSuffix("44.jpg", 200, WIDTH_UNKNOWN, ResolutionLevel.MEDIUM)); + + private static final String IMAGE_URL_APPENDIX_AND_EXTENSION_REGEX = "_\\d+\\.\\w+"; + private static final String IMAGES_DOMAIN_AND_PATH = "https://f4.bcbits.com/img/"; + public static final String BASE_URL = "https://bandcamp.com"; public static final String BASE_API_URL = BASE_URL + "/api"; @@ -44,7 +100,7 @@ public final class BandcampExtractorHelper { + "&tralbum_id=" + itemId + "&tralbum_type=" + itemType.charAt(0)) .responseBody(); - return Utils.replaceHttpWithHttps(JsonParser.object().from(jsonString) + return replaceHttpWithHttps(JsonParser.object().from(jsonString) .getString("bandcamp_url")); } catch (final JsonParserException | ReCaptchaException | IOException e) { @@ -76,17 +132,26 @@ public final class BandcampExtractorHelper { } /** - * Generate image url from image ID. - *

- * The appendix "_10" was chosen because it provides images sized 1200x1200. Other integer - * values are possible as well (e.g. 0 is a very large resolution, possibly the original). + * Generate an image url from an image ID. * - * @param id The image ID - * @param album True if this is the cover of an album or track - * @return URL of image with this ID sized 1200x1200 + *

+ * The image ID {@code 10} was chosen because it provides images wide up to 1200px (when + * the original image width is more than or equal this resolution). + *

+ * + *

+ * Other integer values are possible as well (e.g. 0 is a very large resolution, possibly the + * original); see {@link #IMAGE_URL_SUFFIXES_AND_RESOLUTIONS} for more details about image + * resolution IDs. + *

+ * + * @param id the image ID + * @param isAlbum whether the image is the cover of an album or a track + * @return a URL of the image with this ID with a width up to 1200px */ - public static String getImageUrl(final long id, final boolean album) { - return "https://f4.bcbits.com/img/" + (album ? 'a' : "") + id + "_10.jpg"; + @Nonnull + public static String getImageUrl(final long id, final boolean isAlbum) { + return IMAGES_DOMAIN_AND_PATH + (isAlbum ? 'a' : "") + id + "_10.jpg"; } /** @@ -136,13 +201,94 @@ public final class BandcampExtractorHelper { } } - @Nullable - public static String getThumbnailUrlFromSearchResult(final Element searchResult) { - return searchResult.getElementsByClass("art").stream() + /** + * Get a list of images from a search result {@link Element}. + * + *

+ * This method will call {@link #getImagesFromImageUrl(String)} using the first non null and + * non empty image URL found from the {@code src} attribute of {@code img} HTML elements, or an + * empty string if no valid image URL was found. + *

+ * + * @param searchResult a search result {@link Element} + * @return an unmodifiable list of {@link Image}s, which is never null but can be empty, in the + * case where no valid image URL was found + */ + @Nonnull + public static List getImagesFromSearchResult(@Nonnull final Element searchResult) { + return getImagesFromImageUrl(searchResult.getElementsByClass("art") + .stream() .flatMap(element -> element.getElementsByTag("img").stream()) .map(element -> element.attr("src")) - .filter(string -> !string.isEmpty()) + .filter(imageUrl -> !isNullOrEmpty(imageUrl)) .findFirst() - .orElse(null); + .orElse("")); + } + + /** + * Get all images which have resolutions preserving aspect ratio from an image URL. + * + *

+ * This method will remove the image ID and its extension from the end of the URL and then call + * {@link #getImagesFromImageBaseUrl(String)}. + *

+ * + * @param imageUrl the full URL of an image provided by Bandcamp, such as in its HTML code + * @return an unmodifiable list of {@link Image}s, which is never null but can be empty, in the + * case where the image URL has been not extracted (and so is null or empty) + */ + @Nonnull + public static List getImagesFromImageUrl(@Nullable final String imageUrl) { + if (isNullOrEmpty(imageUrl)) { + return List.of(); + } + + return getImagesFromImageBaseUrl( + imageUrl.replaceFirst(IMAGE_URL_APPENDIX_AND_EXTENSION_REGEX, "_")); + } + + /** + * Get all images which have resolutions preserving aspect ratio from an image ID. + * + *

+ * This method will call {@link #getImagesFromImageBaseUrl(String)}. + *

+ * + * @param id the id of an image provided by Bandcamp + * @param isAlbum whether the image is the cover of an album + * @return an unmodifiable list of {@link Image}s, which is never null but can be empty, in the + * case where the image ID has been not extracted (and so equal to 0) + */ + @Nonnull + public static List getImagesFromImageId(final long id, final boolean isAlbum) { + if (id == 0) { + return List.of(); + } + + return getImagesFromImageBaseUrl(IMAGES_DOMAIN_AND_PATH + (isAlbum ? 'a' : "") + id + "_"); + } + + /** + * Get all images resolutions preserving aspect ratio from a base image URL. + * + *

+ * Base image URLs are images containing the image path, a {@code a} letter if it comes from an + * album, its ID and an underscore. + *

+ * + *

+ * Images resolutions returned are the ones of {@link #IMAGE_URL_SUFFIXES_AND_RESOLUTIONS}. + *

+ * + * @param baseUrl the base URL of the image + * @return an unmodifiable and non-empty list of {@link Image}s + */ + @Nonnull + private static List getImagesFromImageBaseUrl(@Nonnull final String baseUrl) { + return IMAGE_URL_SUFFIXES_AND_RESOLUTIONS.stream() + .map(imageSuffix -> new Image(baseUrl + imageSuffix.getSuffix(), + imageSuffix.getHeight(), imageSuffix.getWidth(), + imageSuffix.getResolutionLevel())) + .collect(Collectors.toUnmodifiableList()); } }