From 81b5e7cf3d22015d58ef4817ec687048473dcebe Mon Sep 17 00:00:00 2001 From: Fynn Godau Date: Thu, 8 Oct 2020 17:56:03 +0200 Subject: [PATCH] Fix extractor --- .../extractors/BandcampExtractorHelper.java | 37 ++++--------------- .../extractors/BandcampPlaylistExtractor.java | 4 +- .../extractors/BandcampStreamExtractor.java | 10 ++--- .../BandcampChannelLinkHandlerFactory.java | 2 +- 4 files changed, 14 insertions(+), 39 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/extractors/BandcampExtractorHelper.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/extractors/BandcampExtractorHelper.java index 707eaf062..a239c440a 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/extractors/BandcampExtractorHelper.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/extractors/BandcampExtractorHelper.java @@ -6,6 +6,8 @@ import com.grack.nanojson.JsonObject; import com.grack.nanojson.JsonParser; import com.grack.nanojson.JsonParserException; import com.grack.nanojson.JsonWriter; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; import org.schabi.newpipe.extractor.NewPipe; import org.schabi.newpipe.extractor.exceptions.ParsingException; import org.schabi.newpipe.extractor.exceptions.ReCaptchaException; @@ -20,7 +22,7 @@ import java.util.*; public class BandcampExtractorHelper { /** - *

Get JSON behind var $variable = out of web page

+ *

Get an attribute of a web page as JSON * *

Originally a part of bandcampDirect.

* @@ -29,35 +31,10 @@ public class BandcampExtractorHelper { * @param variable Name of the variable * @return The JsonObject stored in the variable with this name */ - public static JsonObject getJSONFromJavaScriptVariables(String html, String variable) throws JsonParserException, ArrayIndexOutOfBoundsException, ParsingException { - - String[] part = html.split("var " + variable + " = "); - - String firstHalfGone = part[1]; - - firstHalfGone = firstHalfGone.replaceAll("\" \\+ \"", ""); - - int position = -1; - int level = 0; - for (char character : firstHalfGone.toCharArray()) { - position++; - - switch (character) { - case '{': - level++; - continue; - case '}': - level--; - if (level == 0) { - return JsonParser.object().from(firstHalfGone.substring(0, position + 1) - .replaceAll(" {4}//.+", "") // Remove "for the curious" in JSON - .replaceAll("// xxx: note - don't internationalize this variable", "") // Remove this comment - ); - } - } - } - - throw new ParsingException("Unexpected HTML: JSON never ends"); + public static JsonObject getJsonData(String html, String variable) throws JsonParserException, ArrayIndexOutOfBoundsException, ParsingException { + Document document = Jsoup.parse(html); + String json = document.getElementsByAttribute(variable).attr(variable); + return JsonParser.object().from(json); } /** diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/extractors/BandcampPlaylistExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/extractors/BandcampPlaylistExtractor.java index f06d51d38..b7e812695 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/extractors/BandcampPlaylistExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/extractors/BandcampPlaylistExtractor.java @@ -21,7 +21,7 @@ import javax.annotation.Nonnull; import java.io.IOException; import static org.schabi.newpipe.extractor.services.bandcamp.extractors.BandcampExtractorHelper.getImageUrl; -import static org.schabi.newpipe.extractor.services.bandcamp.extractors.BandcampExtractorHelper.getJSONFromJavaScriptVariables; +import static org.schabi.newpipe.extractor.services.bandcamp.extractors.BandcampExtractorHelper.getJsonData; import static org.schabi.newpipe.extractor.services.bandcamp.extractors.BandcampStreamExtractor.getAlbumInfoJson; public class BandcampPlaylistExtractor extends PlaylistExtractor { @@ -50,7 +50,7 @@ public class BandcampPlaylistExtractor extends PlaylistExtractor { trackInfo = albumJson.getArray("trackinfo"); try { - name = getJSONFromJavaScriptVariables(html, "EmbedData").getString("album_title"); + name = getJsonData(html, "data-embed").getString("album_title"); } catch (JsonParserException e) { throw new ParsingException("Faulty JSON; page likely does not contain album data", e); } catch (ArrayIndexOutOfBoundsException e) { diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/extractors/BandcampStreamExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/extractors/BandcampStreamExtractor.java index a0da730fc..ae172ed15 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/extractors/BandcampStreamExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/extractors/BandcampStreamExtractor.java @@ -20,11 +20,7 @@ import org.schabi.newpipe.extractor.stream.*; import javax.annotation.Nonnull; import javax.annotation.Nullable; import java.io.IOException; -import java.text.ParseException; -import java.text.SimpleDateFormat; import java.util.ArrayList; -import java.util.Calendar; -import java.util.Date; import java.util.List; import java.util.Locale; @@ -63,7 +59,7 @@ public class BandcampStreamExtractor extends StreamExtractor { */ public static JsonObject getAlbumInfoJson(String html) throws ParsingException { try { - return BandcampExtractorHelper.getJSONFromJavaScriptVariables(html, "TralbumData"); + return BandcampExtractorHelper.getJsonData(html, "data-tralbum"); } catch (JsonParserException e) { throw new ParsingException("Faulty JSON; page likely does not contain album data", e); } catch (ArrayIndexOutOfBoundsException e) { @@ -264,7 +260,9 @@ public class BandcampStreamExtractor extends StreamExtractor { @Override public String getCategory() { // Get first tag from html, which is the artist's Genre - return document.getElementsByAttributeValue("itemprop", "keywords").first().text(); + return document + .getElementsByClass("tralbum-tags").first() + .getElementsByClass("tag").first().text(); } @Nonnull diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/linkHandler/BandcampChannelLinkHandlerFactory.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/linkHandler/BandcampChannelLinkHandlerFactory.java index 370057dde..825add10b 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/linkHandler/BandcampChannelLinkHandlerFactory.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/linkHandler/BandcampChannelLinkHandlerFactory.java @@ -25,7 +25,7 @@ public class BandcampChannelLinkHandlerFactory extends ListLinkHandlerFactory { String response = NewPipe.getDownloader().get(url).responseBody(); // This variable contains band data! - JsonObject bandData = BandcampExtractorHelper.getJSONFromJavaScriptVariables(response, "BandData"); + JsonObject bandData = BandcampExtractorHelper.getJsonData(response, "data-band"); return String.valueOf(bandData.getLong("id"));