Fix extractor
This commit is contained in:
parent
cfe88a74c1
commit
81b5e7cf3d
|
@ -6,6 +6,8 @@ import com.grack.nanojson.JsonObject;
|
|||
import com.grack.nanojson.JsonParser;
|
||||
import com.grack.nanojson.JsonParserException;
|
||||
import com.grack.nanojson.JsonWriter;
|
||||
import org.jsoup.Jsoup;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.schabi.newpipe.extractor.NewPipe;
|
||||
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
||||
import org.schabi.newpipe.extractor.exceptions.ReCaptchaException;
|
||||
|
@ -20,7 +22,7 @@ import java.util.*;
|
|||
public class BandcampExtractorHelper {
|
||||
|
||||
/**
|
||||
* <p>Get JSON behind <code>var $variable = </code> out of web page</p>
|
||||
* <p>Get an attribute of a web page as JSON
|
||||
*
|
||||
* <p>Originally a part of bandcampDirect.</p>
|
||||
*
|
||||
|
@ -29,35 +31,10 @@ public class BandcampExtractorHelper {
|
|||
* @param variable Name of the variable
|
||||
* @return The JsonObject stored in the variable with this name
|
||||
*/
|
||||
public static JsonObject getJSONFromJavaScriptVariables(String html, String variable) throws JsonParserException, ArrayIndexOutOfBoundsException, ParsingException {
|
||||
|
||||
String[] part = html.split("var " + variable + " = ");
|
||||
|
||||
String firstHalfGone = part[1];
|
||||
|
||||
firstHalfGone = firstHalfGone.replaceAll("\" \\+ \"", "");
|
||||
|
||||
int position = -1;
|
||||
int level = 0;
|
||||
for (char character : firstHalfGone.toCharArray()) {
|
||||
position++;
|
||||
|
||||
switch (character) {
|
||||
case '{':
|
||||
level++;
|
||||
continue;
|
||||
case '}':
|
||||
level--;
|
||||
if (level == 0) {
|
||||
return JsonParser.object().from(firstHalfGone.substring(0, position + 1)
|
||||
.replaceAll(" {4}//.+", "") // Remove "for the curious" in JSON
|
||||
.replaceAll("// xxx: note - don't internationalize this variable", "") // Remove this comment
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
throw new ParsingException("Unexpected HTML: JSON never ends");
|
||||
public static JsonObject getJsonData(String html, String variable) throws JsonParserException, ArrayIndexOutOfBoundsException, ParsingException {
|
||||
Document document = Jsoup.parse(html);
|
||||
String json = document.getElementsByAttribute(variable).attr(variable);
|
||||
return JsonParser.object().from(json);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -21,7 +21,7 @@ import javax.annotation.Nonnull;
|
|||
import java.io.IOException;
|
||||
|
||||
import static org.schabi.newpipe.extractor.services.bandcamp.extractors.BandcampExtractorHelper.getImageUrl;
|
||||
import static org.schabi.newpipe.extractor.services.bandcamp.extractors.BandcampExtractorHelper.getJSONFromJavaScriptVariables;
|
||||
import static org.schabi.newpipe.extractor.services.bandcamp.extractors.BandcampExtractorHelper.getJsonData;
|
||||
import static org.schabi.newpipe.extractor.services.bandcamp.extractors.BandcampStreamExtractor.getAlbumInfoJson;
|
||||
|
||||
public class BandcampPlaylistExtractor extends PlaylistExtractor {
|
||||
|
@ -50,7 +50,7 @@ public class BandcampPlaylistExtractor extends PlaylistExtractor {
|
|||
trackInfo = albumJson.getArray("trackinfo");
|
||||
|
||||
try {
|
||||
name = getJSONFromJavaScriptVariables(html, "EmbedData").getString("album_title");
|
||||
name = getJsonData(html, "data-embed").getString("album_title");
|
||||
} catch (JsonParserException e) {
|
||||
throw new ParsingException("Faulty JSON; page likely does not contain album data", e);
|
||||
} catch (ArrayIndexOutOfBoundsException e) {
|
||||
|
|
|
@ -20,11 +20,7 @@ import org.schabi.newpipe.extractor.stream.*;
|
|||
import javax.annotation.Nonnull;
|
||||
import javax.annotation.Nullable;
|
||||
import java.io.IOException;
|
||||
import java.text.ParseException;
|
||||
import java.text.SimpleDateFormat;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Calendar;
|
||||
import java.util.Date;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
|
||||
|
@ -63,7 +59,7 @@ public class BandcampStreamExtractor extends StreamExtractor {
|
|||
*/
|
||||
public static JsonObject getAlbumInfoJson(String html) throws ParsingException {
|
||||
try {
|
||||
return BandcampExtractorHelper.getJSONFromJavaScriptVariables(html, "TralbumData");
|
||||
return BandcampExtractorHelper.getJsonData(html, "data-tralbum");
|
||||
} catch (JsonParserException e) {
|
||||
throw new ParsingException("Faulty JSON; page likely does not contain album data", e);
|
||||
} catch (ArrayIndexOutOfBoundsException e) {
|
||||
|
@ -264,7 +260,9 @@ public class BandcampStreamExtractor extends StreamExtractor {
|
|||
@Override
|
||||
public String getCategory() {
|
||||
// Get first tag from html, which is the artist's Genre
|
||||
return document.getElementsByAttributeValue("itemprop", "keywords").first().text();
|
||||
return document
|
||||
.getElementsByClass("tralbum-tags").first()
|
||||
.getElementsByClass("tag").first().text();
|
||||
}
|
||||
|
||||
@Nonnull
|
||||
|
|
|
@ -25,7 +25,7 @@ public class BandcampChannelLinkHandlerFactory extends ListLinkHandlerFactory {
|
|||
String response = NewPipe.getDownloader().get(url).responseBody();
|
||||
|
||||
// This variable contains band data!
|
||||
JsonObject bandData = BandcampExtractorHelper.getJSONFromJavaScriptVariables(response, "BandData");
|
||||
JsonObject bandData = BandcampExtractorHelper.getJsonData(response, "data-band");
|
||||
|
||||
return String.valueOf(bandData.getLong("id"));
|
||||
|
||||
|
|
Loading…
Reference in New Issue