Fix extractor
This commit is contained in:
parent
cfe88a74c1
commit
81b5e7cf3d
|
@ -6,6 +6,8 @@ import com.grack.nanojson.JsonObject;
|
||||||
import com.grack.nanojson.JsonParser;
|
import com.grack.nanojson.JsonParser;
|
||||||
import com.grack.nanojson.JsonParserException;
|
import com.grack.nanojson.JsonParserException;
|
||||||
import com.grack.nanojson.JsonWriter;
|
import com.grack.nanojson.JsonWriter;
|
||||||
|
import org.jsoup.Jsoup;
|
||||||
|
import org.jsoup.nodes.Document;
|
||||||
import org.schabi.newpipe.extractor.NewPipe;
|
import org.schabi.newpipe.extractor.NewPipe;
|
||||||
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
||||||
import org.schabi.newpipe.extractor.exceptions.ReCaptchaException;
|
import org.schabi.newpipe.extractor.exceptions.ReCaptchaException;
|
||||||
|
@ -20,7 +22,7 @@ import java.util.*;
|
||||||
public class BandcampExtractorHelper {
|
public class BandcampExtractorHelper {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* <p>Get JSON behind <code>var $variable = </code> out of web page</p>
|
* <p>Get an attribute of a web page as JSON
|
||||||
*
|
*
|
||||||
* <p>Originally a part of bandcampDirect.</p>
|
* <p>Originally a part of bandcampDirect.</p>
|
||||||
*
|
*
|
||||||
|
@ -29,35 +31,10 @@ public class BandcampExtractorHelper {
|
||||||
* @param variable Name of the variable
|
* @param variable Name of the variable
|
||||||
* @return The JsonObject stored in the variable with this name
|
* @return The JsonObject stored in the variable with this name
|
||||||
*/
|
*/
|
||||||
public static JsonObject getJSONFromJavaScriptVariables(String html, String variable) throws JsonParserException, ArrayIndexOutOfBoundsException, ParsingException {
|
public static JsonObject getJsonData(String html, String variable) throws JsonParserException, ArrayIndexOutOfBoundsException, ParsingException {
|
||||||
|
Document document = Jsoup.parse(html);
|
||||||
String[] part = html.split("var " + variable + " = ");
|
String json = document.getElementsByAttribute(variable).attr(variable);
|
||||||
|
return JsonParser.object().from(json);
|
||||||
String firstHalfGone = part[1];
|
|
||||||
|
|
||||||
firstHalfGone = firstHalfGone.replaceAll("\" \\+ \"", "");
|
|
||||||
|
|
||||||
int position = -1;
|
|
||||||
int level = 0;
|
|
||||||
for (char character : firstHalfGone.toCharArray()) {
|
|
||||||
position++;
|
|
||||||
|
|
||||||
switch (character) {
|
|
||||||
case '{':
|
|
||||||
level++;
|
|
||||||
continue;
|
|
||||||
case '}':
|
|
||||||
level--;
|
|
||||||
if (level == 0) {
|
|
||||||
return JsonParser.object().from(firstHalfGone.substring(0, position + 1)
|
|
||||||
.replaceAll(" {4}//.+", "") // Remove "for the curious" in JSON
|
|
||||||
.replaceAll("// xxx: note - don't internationalize this variable", "") // Remove this comment
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
throw new ParsingException("Unexpected HTML: JSON never ends");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -21,7 +21,7 @@ import javax.annotation.Nonnull;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
import static org.schabi.newpipe.extractor.services.bandcamp.extractors.BandcampExtractorHelper.getImageUrl;
|
import static org.schabi.newpipe.extractor.services.bandcamp.extractors.BandcampExtractorHelper.getImageUrl;
|
||||||
import static org.schabi.newpipe.extractor.services.bandcamp.extractors.BandcampExtractorHelper.getJSONFromJavaScriptVariables;
|
import static org.schabi.newpipe.extractor.services.bandcamp.extractors.BandcampExtractorHelper.getJsonData;
|
||||||
import static org.schabi.newpipe.extractor.services.bandcamp.extractors.BandcampStreamExtractor.getAlbumInfoJson;
|
import static org.schabi.newpipe.extractor.services.bandcamp.extractors.BandcampStreamExtractor.getAlbumInfoJson;
|
||||||
|
|
||||||
public class BandcampPlaylistExtractor extends PlaylistExtractor {
|
public class BandcampPlaylistExtractor extends PlaylistExtractor {
|
||||||
|
@ -50,7 +50,7 @@ public class BandcampPlaylistExtractor extends PlaylistExtractor {
|
||||||
trackInfo = albumJson.getArray("trackinfo");
|
trackInfo = albumJson.getArray("trackinfo");
|
||||||
|
|
||||||
try {
|
try {
|
||||||
name = getJSONFromJavaScriptVariables(html, "EmbedData").getString("album_title");
|
name = getJsonData(html, "data-embed").getString("album_title");
|
||||||
} catch (JsonParserException e) {
|
} catch (JsonParserException e) {
|
||||||
throw new ParsingException("Faulty JSON; page likely does not contain album data", e);
|
throw new ParsingException("Faulty JSON; page likely does not contain album data", e);
|
||||||
} catch (ArrayIndexOutOfBoundsException e) {
|
} catch (ArrayIndexOutOfBoundsException e) {
|
||||||
|
|
|
@ -20,11 +20,7 @@ import org.schabi.newpipe.extractor.stream.*;
|
||||||
import javax.annotation.Nonnull;
|
import javax.annotation.Nonnull;
|
||||||
import javax.annotation.Nullable;
|
import javax.annotation.Nullable;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.text.ParseException;
|
|
||||||
import java.text.SimpleDateFormat;
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Calendar;
|
|
||||||
import java.util.Date;
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Locale;
|
import java.util.Locale;
|
||||||
|
|
||||||
|
@ -63,7 +59,7 @@ public class BandcampStreamExtractor extends StreamExtractor {
|
||||||
*/
|
*/
|
||||||
public static JsonObject getAlbumInfoJson(String html) throws ParsingException {
|
public static JsonObject getAlbumInfoJson(String html) throws ParsingException {
|
||||||
try {
|
try {
|
||||||
return BandcampExtractorHelper.getJSONFromJavaScriptVariables(html, "TralbumData");
|
return BandcampExtractorHelper.getJsonData(html, "data-tralbum");
|
||||||
} catch (JsonParserException e) {
|
} catch (JsonParserException e) {
|
||||||
throw new ParsingException("Faulty JSON; page likely does not contain album data", e);
|
throw new ParsingException("Faulty JSON; page likely does not contain album data", e);
|
||||||
} catch (ArrayIndexOutOfBoundsException e) {
|
} catch (ArrayIndexOutOfBoundsException e) {
|
||||||
|
@ -264,7 +260,9 @@ public class BandcampStreamExtractor extends StreamExtractor {
|
||||||
@Override
|
@Override
|
||||||
public String getCategory() {
|
public String getCategory() {
|
||||||
// Get first tag from html, which is the artist's Genre
|
// Get first tag from html, which is the artist's Genre
|
||||||
return document.getElementsByAttributeValue("itemprop", "keywords").first().text();
|
return document
|
||||||
|
.getElementsByClass("tralbum-tags").first()
|
||||||
|
.getElementsByClass("tag").first().text();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Nonnull
|
@Nonnull
|
||||||
|
|
|
@ -25,7 +25,7 @@ public class BandcampChannelLinkHandlerFactory extends ListLinkHandlerFactory {
|
||||||
String response = NewPipe.getDownloader().get(url).responseBody();
|
String response = NewPipe.getDownloader().get(url).responseBody();
|
||||||
|
|
||||||
// This variable contains band data!
|
// This variable contains band data!
|
||||||
JsonObject bandData = BandcampExtractorHelper.getJSONFromJavaScriptVariables(response, "BandData");
|
JsonObject bandData = BandcampExtractorHelper.getJsonData(response, "data-band");
|
||||||
|
|
||||||
return String.valueOf(bandData.getLong("id"));
|
return String.valueOf(bandData.getLong("id"));
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue