Merge pull request #1056 from AudricV/yt-improve-search-suggestions-extraction
[YouTube] Switch to new search suggestion domain and improve error handling
This commit is contained in:
commit
a9ca5c49e4
|
@ -1,6 +1,27 @@
|
||||||
|
/*
|
||||||
|
* Created by Christian Schabesberger on 28.09.16.
|
||||||
|
*
|
||||||
|
* Copyright (C) Christian Schabesberger 2015 <chris.schabesberger@mailbox.org>
|
||||||
|
* YoutubeSuggestionExtractor.java is part of NewPipe Extractor.
|
||||||
|
*
|
||||||
|
* NewPipe Extractor is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation, either version 3 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* NewPipe Extractor is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with NewPipe Extractor. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
package org.schabi.newpipe.extractor.services.youtube.extractors;
|
package org.schabi.newpipe.extractor.services.youtube.extractors;
|
||||||
|
|
||||||
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getCookieHeader;
|
import static org.schabi.newpipe.extractor.utils.Utils.isBlank;
|
||||||
|
import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty;
|
||||||
|
|
||||||
import com.grack.nanojson.JsonArray;
|
import com.grack.nanojson.JsonArray;
|
||||||
import com.grack.nanojson.JsonParser;
|
import com.grack.nanojson.JsonParser;
|
||||||
|
@ -8,35 +29,18 @@ import com.grack.nanojson.JsonParserException;
|
||||||
|
|
||||||
import org.schabi.newpipe.extractor.NewPipe;
|
import org.schabi.newpipe.extractor.NewPipe;
|
||||||
import org.schabi.newpipe.extractor.StreamingService;
|
import org.schabi.newpipe.extractor.StreamingService;
|
||||||
import org.schabi.newpipe.extractor.downloader.Downloader;
|
import org.schabi.newpipe.extractor.downloader.Response;
|
||||||
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
|
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
|
||||||
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
||||||
import org.schabi.newpipe.extractor.suggestion.SuggestionExtractor;
|
import org.schabi.newpipe.extractor.suggestion.SuggestionExtractor;
|
||||||
import org.schabi.newpipe.extractor.utils.Utils;
|
import org.schabi.newpipe.extractor.utils.Utils;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.ArrayList;
|
import java.util.Collections;
|
||||||
|
import java.util.HashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
/*
|
import java.util.stream.Collectors;
|
||||||
* Created by Christian Schabesberger on 28.09.16.
|
|
||||||
*
|
|
||||||
* Copyright (C) Christian Schabesberger 2015 <chris.schabesberger@mailbox.org>
|
|
||||||
* YoutubeSuggestionExtractor.java is part of NewPipe.
|
|
||||||
*
|
|
||||||
* NewPipe is free software: you can redistribute it and/or modify
|
|
||||||
* it under the terms of the GNU General Public License as published by
|
|
||||||
* the Free Software Foundation, either version 3 of the License, or
|
|
||||||
* (at your option) any later version.
|
|
||||||
*
|
|
||||||
* NewPipe is distributed in the hope that it will be useful,
|
|
||||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
* GNU General Public License for more details.
|
|
||||||
*
|
|
||||||
* You should have received a copy of the GNU General Public License
|
|
||||||
* along with NewPipe. If not, see <http://www.gnu.org/licenses/>.
|
|
||||||
*/
|
|
||||||
|
|
||||||
public class YoutubeSuggestionExtractor extends SuggestionExtractor {
|
public class YoutubeSuggestionExtractor extends SuggestionExtractor {
|
||||||
|
|
||||||
|
@ -46,35 +50,45 @@ public class YoutubeSuggestionExtractor extends SuggestionExtractor {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public List<String> suggestionList(final String query) throws IOException, ExtractionException {
|
public List<String> suggestionList(final String query) throws IOException, ExtractionException {
|
||||||
final Downloader dl = NewPipe.getDownloader();
|
final String url = "https://suggestqueries-clients6.youtube.com/complete/search"
|
||||||
final List<String> suggestions = new ArrayList<>();
|
+ "?client=" + "youtube"
|
||||||
|
|
||||||
final String url = "https://suggestqueries.google.com/complete/search"
|
|
||||||
+ "?client=" + "youtube" //"firefox" for JSON, 'toolbar' for xml
|
|
||||||
+ "&jsonp=" + "JP"
|
|
||||||
+ "&ds=" + "yt"
|
+ "&ds=" + "yt"
|
||||||
+ "&gl=" + Utils.encodeUrlUtf8(getExtractorContentCountry().getCountryCode())
|
+ "&gl=" + Utils.encodeUrlUtf8(getExtractorContentCountry().getCountryCode())
|
||||||
+ "&q=" + Utils.encodeUrlUtf8(query);
|
+ "&q=" + Utils.encodeUrlUtf8(query)
|
||||||
|
+ "&xhr=t";
|
||||||
|
|
||||||
|
final Map<String, List<String>> headers = new HashMap<>();
|
||||||
|
headers.put("Origin", Collections.singletonList("https://www.youtube.com"));
|
||||||
|
headers.put("Referer", Collections.singletonList("https://www.youtube.com"));
|
||||||
|
|
||||||
|
final Response response = NewPipe.getDownloader()
|
||||||
|
.get(url, headers, getExtractorLocalization());
|
||||||
|
|
||||||
|
final String contentTypeHeader = response.getHeader("Content-Type");
|
||||||
|
if (isNullOrEmpty(contentTypeHeader) || !contentTypeHeader.contains("application/json")) {
|
||||||
|
throw new ExtractionException("Invalid response type (got \"" + contentTypeHeader
|
||||||
|
+ "\", excepted a JSON response) (response code "
|
||||||
|
+ response.responseCode() + ")");
|
||||||
|
}
|
||||||
|
|
||||||
|
final String responseBody = response.responseBody();
|
||||||
|
|
||||||
|
if (responseBody.isEmpty()) {
|
||||||
|
throw new ExtractionException("Empty response received");
|
||||||
|
}
|
||||||
|
|
||||||
String response = dl.get(url, getCookieHeader(), getExtractorLocalization()).responseBody();
|
|
||||||
// trim JSONP part "JP(...)"
|
|
||||||
response = response.substring(3, response.length() - 1);
|
|
||||||
try {
|
try {
|
||||||
final JsonArray collection = JsonParser.array().from(response).getArray(1);
|
final JsonArray suggestions = JsonParser.array()
|
||||||
for (final Object suggestion : collection) {
|
.from(responseBody)
|
||||||
if (!(suggestion instanceof JsonArray)) {
|
.getArray(1); // 0: search query, 1: search suggestions, 2: tracking data?
|
||||||
continue;
|
return suggestions.stream()
|
||||||
}
|
.filter(JsonArray.class::isInstance)
|
||||||
final String suggestionStr = ((JsonArray) suggestion).getString(0);
|
.map(JsonArray.class::cast)
|
||||||
if (suggestionStr == null) {
|
.map(suggestion -> suggestion.getString(0)) // 0 is the search suggestion
|
||||||
continue;
|
.filter(suggestion -> !isBlank(suggestion)) // Filter blank suggestions
|
||||||
}
|
.collect(Collectors.toUnmodifiableList());
|
||||||
suggestions.add(suggestionStr);
|
|
||||||
}
|
|
||||||
|
|
||||||
return suggestions;
|
|
||||||
} catch (final JsonParserException e) {
|
} catch (final JsonParserException e) {
|
||||||
throw new ParsingException("Could not parse json response", e);
|
throw new ParsingException("Could not parse JSON response", e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,25 +1,25 @@
|
||||||
package org.schabi.newpipe.extractor.services.youtube;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Created by Christian Schabesberger on 18.11.16.
|
* Created by Christian Schabesberger on 18.11.16.
|
||||||
*
|
*
|
||||||
* Copyright (C) Christian Schabesberger 2016 <chris.schabesberger@mailbox.org>
|
* Copyright (C) Christian Schabesberger 2016 <chris.schabesberger@mailbox.org>
|
||||||
* YoutubeSuggestionExtractorTest.java is part of NewPipe.
|
* YoutubeSuggestionExtractorTest.java is part of NewPipe Extractor.
|
||||||
*
|
*
|
||||||
* NewPipe is free software: you can redistribute it and/or modify
|
* NewPipe Extractor is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
* the Free Software Foundation, either version 3 of the License, or
|
* the Free Software Foundation, either version 3 of the License, or
|
||||||
* (at your option) any later version.
|
* (at your option) any later version.
|
||||||
*
|
*
|
||||||
* NewPipe is distributed in the hope that it will be useful,
|
* NewPipe Extractor is distributed in the hope that it will be useful,
|
||||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
* GNU General Public License for more details.
|
* GNU General Public License for more details.
|
||||||
*
|
*
|
||||||
* You should have received a copy of the GNU General Public License
|
* You should have received a copy of the GNU General Public License
|
||||||
* along with NewPipe. If not, see <http://www.gnu.org/licenses/>.
|
* along with NewPipe Extractor. If not, see <https://www.gnu.org/licenses/>.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
package org.schabi.newpipe.extractor.services.youtube;
|
||||||
|
|
||||||
import static org.junit.jupiter.api.Assertions.assertFalse;
|
import static org.junit.jupiter.api.Assertions.assertFalse;
|
||||||
import static org.schabi.newpipe.extractor.ServiceList.YouTube;
|
import static org.schabi.newpipe.extractor.ServiceList.YouTube;
|
||||||
|
|
||||||
|
@ -29,14 +29,15 @@ import org.schabi.newpipe.downloader.DownloaderFactory;
|
||||||
import org.schabi.newpipe.extractor.NewPipe;
|
import org.schabi.newpipe.extractor.NewPipe;
|
||||||
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
|
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
|
||||||
import org.schabi.newpipe.extractor.localization.Localization;
|
import org.schabi.newpipe.extractor.localization.Localization;
|
||||||
|
import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeSuggestionExtractor;
|
||||||
import org.schabi.newpipe.extractor.suggestion.SuggestionExtractor;
|
import org.schabi.newpipe.extractor.suggestion.SuggestionExtractor;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Test for {@link SuggestionExtractor}
|
* Test for {@link YoutubeSuggestionExtractor}
|
||||||
*/
|
*/
|
||||||
public class YoutubeSuggestionExtractorTest {
|
class YoutubeSuggestionExtractorTest {
|
||||||
|
|
||||||
private static final String RESOURCE_PATH = DownloaderFactory.RESOURCE_PATH + "services/youtube/extractor/suggestions/";
|
private static final String RESOURCE_PATH = DownloaderFactory.RESOURCE_PATH + "services/youtube/extractor/suggestions/";
|
||||||
|
|
||||||
|
@ -45,12 +46,12 @@ public class YoutubeSuggestionExtractorTest {
|
||||||
@BeforeAll
|
@BeforeAll
|
||||||
public static void setUp() throws Exception {
|
public static void setUp() throws Exception {
|
||||||
YoutubeTestsUtils.ensureStateless();
|
YoutubeTestsUtils.ensureStateless();
|
||||||
NewPipe.init(DownloaderFactory.getDownloader(RESOURCE_PATH + ""), new Localization("de", "DE"));
|
NewPipe.init(DownloaderFactory.getDownloader(RESOURCE_PATH), new Localization("de", "DE"));
|
||||||
suggestionExtractor = YouTube.getSuggestionExtractor();
|
suggestionExtractor = YouTube.getSuggestionExtractor();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testIfSuggestions() throws IOException, ExtractionException {
|
void testIfSuggestions() throws IOException, ExtractionException {
|
||||||
assertFalse(suggestionExtractor.suggestionList("hello").isEmpty());
|
assertFalse(suggestionExtractor.suggestionList("hello").isEmpty());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,10 +1,13 @@
|
||||||
{
|
{
|
||||||
"request": {
|
"request": {
|
||||||
"httpMethod": "GET",
|
"httpMethod": "GET",
|
||||||
"url": "https://suggestqueries.google.com/complete/search?client\u003dyoutube\u0026jsonp\u003dJP\u0026ds\u003dyt\u0026gl\u003dDE\u0026q\u003dhello",
|
"url": "https://suggestqueries-clients6.youtube.com/complete/search?client\u003dyoutube\u0026ds\u003dyt\u0026gl\u003dDE\u0026q\u003dhello\u0026xhr\u003dt",
|
||||||
"headers": {
|
"headers": {
|
||||||
"Cookie": [
|
"Origin": [
|
||||||
"CONSENT\u003dPENDING+385"
|
"https://www.youtube.com"
|
||||||
|
],
|
||||||
|
"Referer": [
|
||||||
|
"https://www.youtube.com"
|
||||||
],
|
],
|
||||||
"Accept-Language": [
|
"Accept-Language": [
|
||||||
"en-GB, en;q\u003d0.9"
|
"en-GB, en;q\u003d0.9"
|
||||||
|
@ -19,8 +22,20 @@
|
||||||
"responseCode": 200,
|
"responseCode": 200,
|
||||||
"responseMessage": "",
|
"responseMessage": "",
|
||||||
"responseHeaders": {
|
"responseHeaders": {
|
||||||
|
"access-control-allow-credentials": [
|
||||||
|
"true"
|
||||||
|
],
|
||||||
|
"access-control-allow-headers": [
|
||||||
|
"Authorization, X-Goog-Visitor-Id, X-Goog-PageId"
|
||||||
|
],
|
||||||
|
"access-control-allow-origin": [
|
||||||
|
"https://www.youtube.com"
|
||||||
|
],
|
||||||
|
"access-control-max-age": [
|
||||||
|
"86400"
|
||||||
|
],
|
||||||
"alt-svc": [
|
"alt-svc": [
|
||||||
"h3\u003d\":443\"; ma\u003d2592000,h3-29\u003d\":443\"; ma\u003d2592000,h3-Q050\u003d\":443\"; ma\u003d2592000,h3-Q046\u003d\":443\"; ma\u003d2592000,h3-Q043\u003d\":443\"; ma\u003d2592000,quic\u003d\":443\"; ma\u003d2592000; v\u003d\"46,43\""
|
"h3\u003d\":443\"; ma\u003d2592000,h3-29\u003d\":443\"; ma\u003d2592000"
|
||||||
],
|
],
|
||||||
"cache-control": [
|
"cache-control": [
|
||||||
"private, max-age\u003d3600"
|
"private, max-age\u003d3600"
|
||||||
|
@ -28,18 +43,39 @@
|
||||||
"content-disposition": [
|
"content-disposition": [
|
||||||
"attachment; filename\u003d\"f.txt\""
|
"attachment; filename\u003d\"f.txt\""
|
||||||
],
|
],
|
||||||
|
"content-security-policy": [
|
||||||
|
"object-src \u0027none\u0027;base-uri \u0027self\u0027;script-src \u0027nonce-BWS-e4XX2c-fmiIRfypT7Q\u0027 \u0027strict-dynamic\u0027 \u0027report-sample\u0027 \u0027unsafe-eval\u0027 \u0027unsafe-inline\u0027 https: http:;report-uri https://csp.withgoogle.com/csp/gws/other"
|
||||||
|
],
|
||||||
"content-type": [
|
"content-type": [
|
||||||
"text/javascript; charset\u003dUTF-8"
|
"application/json; charset\u003dUTF-8"
|
||||||
|
],
|
||||||
|
"cross-origin-opener-policy": [
|
||||||
|
"same-origin-allow-popups; report-to\u003d\"gws\""
|
||||||
],
|
],
|
||||||
"date": [
|
"date": [
|
||||||
"Tue, 22 Nov 2022 10:40:53 GMT"
|
"Sun, 30 Apr 2023 17:40:12 GMT"
|
||||||
],
|
],
|
||||||
"expires": [
|
"expires": [
|
||||||
"Tue, 22 Nov 2022 10:40:53 GMT"
|
"Sun, 30 Apr 2023 17:40:12 GMT"
|
||||||
|
],
|
||||||
|
"p3p": [
|
||||||
|
"CP\u003d\"This is not a P3P policy! See g.co/p3phelp for more info.\""
|
||||||
|
],
|
||||||
|
"report-to": [
|
||||||
|
"{\"group\":\"gws\",\"max_age\":2592000,\"endpoints\":[{\"url\":\"https://csp.withgoogle.com/csp/report-to/gws/other\"}]}"
|
||||||
],
|
],
|
||||||
"server": [
|
"server": [
|
||||||
"gws"
|
"gws"
|
||||||
],
|
],
|
||||||
|
"set-cookie": [
|
||||||
|
"CONSENT\u003dPENDING+465; expires\u003dTue, 29-Apr-2025 17:40:12 GMT; path\u003d/; domain\u003d.youtube.com; Secure"
|
||||||
|
],
|
||||||
|
"vary": [
|
||||||
|
"Sec-Fetch-Site"
|
||||||
|
],
|
||||||
|
"x-content-type-options": [
|
||||||
|
"nosniff"
|
||||||
|
],
|
||||||
"x-frame-options": [
|
"x-frame-options": [
|
||||||
"SAMEORIGIN"
|
"SAMEORIGIN"
|
||||||
],
|
],
|
||||||
|
@ -47,7 +83,7 @@
|
||||||
"0"
|
"0"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"responseBody": "JP([\"hello\",[[\"hello\",0,[512,433]],[\"hello adele\",0,[512,433,131]],[\"hello kitty\",0,[512,433]],[\"hello neighbor\",0,[512,433,131]],[\"hello hello\",0,[512,433]],[\"hello darkness my old friend\",0,[512,433,131]],[\"hello song\",0,[512,433,131]],[\"hello treasure\",0,[512,433]],[\"hello lionel richie\",0,[512,433]],[\"hello darkness my old friend lyrics\",0,[433,131]],[\"hello karaoke\",0,[433,131]],[\"hello kitty song\",0,[512,433,131]],[\"hello there\",0,[512]],[\"hello everybody my name is markiplier\",0,[433,131]]],{\"k\":1,\"q\":\"hL1qy05h57rukpOvO6x7ykUZN_0\"}])",
|
"responseBody": "[\"hello\",[[\"hello kitty\",0,[512,433,131]],[\"hello\",0,[512,433]],[\"hello neighbor\",0,[512,433,131]],[\"hello adele\",0,[512,433]],[\"hello kitty and friends\",0,[512,433,131]],[\"hello movie\",0,[650,433,131]],[\"hello darkness my old friend\",0,[512,433]],[\"hello hello\",0,[512,433]],[\"hello brother songs\",0,[650,433,131]],[\"hello neighbor 2\",0,[512,433]],[\"hello mama\",0,[650,433,131]],[\"hello lionel richie\",0,[512,433,131]],[\"hello neighbor song\",0,[512,433,131]],[\"hello blueface\",0,[3]]],{\"k\":1,\"q\":\"NYmTwY0mojkxWaSAZEeA30TjXp4\"}]",
|
||||||
"latestUrl": "https://suggestqueries.google.com/complete/search?client\u003dyoutube\u0026jsonp\u003dJP\u0026ds\u003dyt\u0026gl\u003dDE\u0026q\u003dhello"
|
"latestUrl": "https://suggestqueries-clients6.youtube.com/complete/search?client\u003dyoutube\u0026ds\u003dyt\u0026gl\u003dDE\u0026q\u003dhello\u0026xhr\u003dt"
|
||||||
}
|
}
|
||||||
}
|
}
|
Loading…
Reference in New Issue