fucking brought it almost to work fuck

This commit is contained in:
Christian Schabesberger 2016-09-10 18:28:48 +02:00
parent 53059bcb91
commit 4f8ca9ef16
1 changed files with 7 additions and 21 deletions

View File

@ -84,19 +84,19 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
nextPageUrl = getNextPageUrl(doc); nextPageUrl = getNextPageUrl(doc);
isAjaxPage = false; isAjaxPage = false;
} else { } else {
Map<String, String> userProperties = new HashMap<>(); String ajaxDataRaw = downloader.download(nextPageUrl);
userProperties.put("Referer", userUrl);
String ajaxDataRaw = downloader.download(nextPageUrl, userProperties);
JSONObject ajaxData; JSONObject ajaxData;
String htmlDataRaw;
try { try {
ajaxData = new JSONObject(ajaxDataRaw); ajaxData = new JSONObject(ajaxDataRaw);
htmlDataRaw = ajaxData.getString("content_html"); String htmlDataRaw = ajaxData.getString("content_html");
doc = Jsoup.parse(htmlDataRaw, nextPageUrl);
String nextPageHtmlDataRaw = ajaxData.getString("load_more_widget_html");
Document nextPageData = Jsoup.parse(nextPageHtmlDataRaw, nextPageUrl);
nextPageUrl = getNextPageUrl( nextPageData);
} catch (JSONException e) { } catch (JSONException e) {
throw new ParsingException("Could not parse json data for next page", e); throw new ParsingException("Could not parse json data for next page", e);
} }
doc = Jsoup.parse(htmlDataRaw, nextPageUrl);
nextPageUrl = getNextPageUrl(ajaxData);
isAjaxPage = true; isAjaxPage = true;
} }
} }
@ -324,18 +324,4 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
throw new ParsingException("could not load next page url", e); throw new ParsingException("could not load next page url", e);
} }
} }
private String getNextPageUrl(JSONObject ajaxData) throws ParsingException {
Document doc = null;
try {
String docRaw = ajaxData.getString("load_more_widget_html");
if(docRaw.isEmpty()) {
return "";
}
doc = Jsoup.parse(docRaw);
} catch(JSONException je) {
throw new ParsingException("Could not get load_more_widget from ajax response", je);
}
return getNextPageUrl(doc);
}
} }