diff --git a/htroot/yacysearchitem.html b/htroot/yacysearchitem.html index ba2b82447..51b8b2303 100644 --- a/htroot/yacysearchitem.html +++ b/htroot/yacysearchitem.html @@ -30,7 +30,7 @@ #(showMetadata)#:: | Metadata#(/showMetadata)# #(showParser)#:: | Parser#(/showParser)# #(showCitation)#:: | Citations#(/showCitation)# - #(showPictures)#:: | Pictures#(/showPictures)# + #(showPictures)#:: | Pictures#(/showPictures)# #(showCache)#:: | Cache#(/showCache)# #(showProxy)#:: | Augmented Browsing#(/showProxy)# #(showHostBrowser)#:: | Browse index#(/showHostBrowser)# diff --git a/htroot/yacysearchtrailer.java b/htroot/yacysearchtrailer.java index a2f8d74a3..b70d53a4c 100644 --- a/htroot/yacysearchtrailer.java +++ b/htroot/yacysearchtrailer.java @@ -105,13 +105,14 @@ public class yacysearchtrailer { prop.put("nav-namespace", 1); navigatorIterator = theSearch.namespaceNavigator.keys(false); int i = 0, pos = 0, neg = 0; - String nav; + String nav, rawNav; while (i < QueryParams.FACETS_STANDARD_MAXCOUNT && navigatorIterator.hasNext()) { name = navigatorIterator.next(); count = theSearch.namespaceNavigator.get(name); if (count == 0) break; - /* We use ':' character here, it will be percent encoded later, in QueryParams.navurl() function*/ - nav = "inurl:" + name; + nav = "inurl%3A" + name; + /* Avoid double percent encoding in QueryParams.navurl */ + rawNav = "inurl:" + name; if (!theSearch.query.modifier.toString().contains("inurl:"+name)) { pos++; prop.put("nav-namespace_element_" + i + "_on", 1); @@ -121,9 +122,11 @@ public class yacysearchtrailer { prop.put("nav-namespace_element_" + i + "_on", 0); prop.put(fileType, "nav-namespace_element_" + i + "_modifier", "-" + nav); nav=""; + rawNav = ""; } prop.put(fileType, "nav-namespace_element_" + i + "_name", name); - prop.put(fileType, "nav-namespace_element_" + i + "_url", QueryParams.navurl(fileType, 0, theSearch.query, nav, false).toString()); + /* URL is already percent encoded : no need to re-encode specifically for the file type */ + prop.put("nav-namespace_element_" + i + "_url", QueryParams.navurl(fileType, 0, theSearch.query, rawNav, false).toString()); prop.put(fileType, "nav-namespace_element_" + i + "_id", "namespace_" + i); prop.put("nav-namespace_element_" + i + "_count", count); prop.put("nav-namespace_element_" + i + "_nl", 1); @@ -143,12 +146,14 @@ public class yacysearchtrailer { prop.put("nav-domains", 1); navigatorIterator = hostNavigator.keys(false); int i = 0, pos = 0, neg = 0; - String nav; + String nav, rawNav; while (i < QueryParams.FACETS_STANDARD_MAXCOUNT && navigatorIterator.hasNext()) { name = navigatorIterator.next(); count = hostNavigator.get(name); if (count == 0) break; nav = "site%3A" + name; + /* Avoid double percent encoding in QueryParams.navurl */ + rawNav = "site:" + name; if (theSearch.query.modifier.sitehost == null || !theSearch.query.modifier.sitehost.contains(name)) { pos++; prop.put("nav-domains_element_" + i + "_on", 1); @@ -158,9 +163,11 @@ public class yacysearchtrailer { prop.put("nav-domains_element_" + i + "_on", 0); prop.put(fileType, "nav-domains_element_" + i + "_modifier", "-" + nav); nav=""; + rawNav = ""; } prop.put(fileType, "nav-domains_element_" + i + "_name", name); - prop.put(fileType, "nav-domains_element_" + i + "_url", QueryParams.navurl(fileType, 0, theSearch.query, nav, false).toString()); + /* URL is already percent encoded : no need to re-encode specifically for the file type */ + prop.put("nav-domains_element_" + i + "_url", QueryParams.navurl(fileType, 0, theSearch.query, rawNav, false).toString()); prop.put(fileType, "nav-domains_element_" + i + "_id", "domains_" + i); prop.put("nav-domains_element_" + i + "_count", count); prop.put("nav-domains_element_" + i + "_nl", 1); @@ -181,12 +188,14 @@ public class yacysearchtrailer { prop.put("nav-languages", 1); navigatorIterator = languageNavigator.keys(false); int i = 0, pos = 0, neg = 0; - String nav; + String nav, rawNav; while (i < QueryParams.FACETS_STANDARD_MAXCOUNT && navigatorIterator.hasNext()) { name = navigatorIterator.next(); count = languageNavigator.get(name); if (count == 0) break; nav = "%2Flanguage%2F" + name; + /* Avoid double percent encoding in QueryParams.navurl */ + rawNav = "/language/" + name; if (theSearch.query.modifier.language == null || !theSearch.query.modifier.language.contains(name)) { pos++; prop.put("nav-languages_element_" + i + "_on", 1); @@ -196,10 +205,12 @@ public class yacysearchtrailer { prop.put("nav-languages_element_" + i + "_on", 0); prop.put(fileType, "nav-languages_element_" + i + "_modifier", "-" + nav); nav=""; + rawNav = ""; } String longname = ISO639.country(name); prop.put(fileType, "nav-languages_element_" + i + "_name", longname == null ? name : longname); - prop.put(fileType, "nav-languages_element_" + i + "_url", QueryParams.navurl(fileType, 0, theSearch.query, nav, false).toString()); + /* URL is already percent encoded : no need to re-encode specifically for the file type */ + prop.put("nav-languages_element_" + i + "_url", QueryParams.navurl(fileType, 0, theSearch.query, rawNav, false).toString()); prop.put(fileType, "nav-languages_element_" + i + "_id", "languages_" + i); prop.put("nav-languages_element_" + i + "_count", count); prop.put("nav-languages_element_" + i + "_nl", 1); @@ -219,12 +230,14 @@ public class yacysearchtrailer { prop.put("nav-authors", 1); navigatorIterator = theSearch.authorNavigator.keys(false); int i = 0, pos = 0, neg = 0; - String nav; + String nav, rawNav; while (i < QueryParams.FACETS_STANDARD_MAXCOUNT && navigatorIterator.hasNext()) { name = navigatorIterator.next().trim(); count = theSearch.authorNavigator.get(name); if (count == 0) break; nav = (name.indexOf(' ', 0) < 0) ? "author%3A" + name : "author%3A%28" + name.replace(" ", "+") + "%29"; + /* Avoid double percent encoding in QueryParams.navurl */ + rawNav = (name.indexOf(' ', 0) < 0) ? "author:" + name : "author:(" + name.replace(" ", "+") + ")"; if (theSearch.query.modifier.author == null || !theSearch.query.modifier.author.contains(name)) { pos++; prop.put("nav-authors_element_" + i + "_on", 1); @@ -234,9 +247,11 @@ public class yacysearchtrailer { prop.put("nav-authors_element_" + i + "_on", 0); prop.put(fileType, "nav-authors_element_" + i + "_modifier", "-" + nav); nav=""; + rawNav = ""; } prop.put(fileType, "nav-authors_element_" + i + "_name", name); - prop.put(fileType, "nav-authors_element_" + i + "_url", QueryParams.navurl(fileType, 0, theSearch.query, nav, false).toString()); + /* URL is already percent encoded : no need to re-encode specifically for the file type */ + prop.put("nav-authors_element_" + i + "_url", QueryParams.navurl(fileType, 0, theSearch.query, rawNav, false).toString()); prop.put(fileType, "nav-authors_element_" + i + "_id", "authors_" + i); prop.put("nav-authors_element_" + i + "_count", count); prop.put("nav-authors_element_" + i + "_nl", 1); @@ -258,12 +273,14 @@ public class yacysearchtrailer { prop.put("nav-collections", 1); navigatorIterator = theSearch.collectionNavigator.keys(false); int i = 0, pos = 0, neg = 0; - String nav; + String nav, rawNav; while (i < QueryParams.FACETS_STANDARD_MAXCOUNT && navigatorIterator.hasNext()) { name = navigatorIterator.next().trim(); count = theSearch.collectionNavigator.get(name); if (count == 0) break; nav = (name.indexOf(' ', 0) < 0) ? "collection%3A" + name : "collection%3A%28" + name.replace(" ", "+") + "%29"; + /* Avoid double percent encoding in QueryParams.navurl */ + rawNav = (name.indexOf(' ', 0) < 0) ? "collection:" + name : "collection:(" + name.replace(" ", "+") + ")"; if (theSearch.query.modifier.collection == null || !theSearch.query.modifier.collection.contains(name)) { pos++; prop.put("nav-collections_element_" + i + "_on", 1); @@ -273,9 +290,11 @@ public class yacysearchtrailer { prop.put("nav-collections_element_" + i + "_on", 0); prop.put(fileType, "nav-collections_element_" + i + "_modifier", "-" + nav); nav=""; + rawNav = ""; } prop.put(fileType, "nav-collections_element_" + i + "_name", name); - prop.put(fileType, "nav-collections_element_" + i + "_url", QueryParams.navurl(fileType, 0, theSearch.query, nav, true).toString()); + /* URL is already percent encoded : no need to re-encode specifically for the file type */ + prop.put("nav-collections_element_" + i + "_url", QueryParams.navurl(fileType, 0, theSearch.query, rawNav, true).toString()); prop.put(fileType, "nav-collections_element_" + i + "_id", "collections_" + i); prop.put("nav-collections_element_" + i + "_count", count); prop.put("nav-collections_element_" + i + "_nl", 1); @@ -342,7 +361,7 @@ public class yacysearchtrailer { //theSearch.protocolNavigator.inc("http(s)", httpCount + httpsCount); navigatorIterator = theSearch.protocolNavigator.keys(false); int i = 0, pos = 0, neg = 0; - String nav; + String nav, rawNav; boolean visible = false; String oldQuery = theSearch.query.getQueryGoal().query_original; // prepare hack to make radio-button like navigation String oldProtocolModifier = theSearch.query.modifier.protocol; @@ -355,6 +374,8 @@ public class yacysearchtrailer { if (count == 0) break; visible = visible || "ftp,smb".indexOf(name) >= 0; nav = "%2F" + name; + /* Avoid double percent encoding in QueryParams.navurl */ + rawNav = "/" + name; if (oldProtocolModifier == null || !oldProtocolModifier.equals(name)) { pos++; prop.put("nav-protocols_element_" + i + "_on", 0); @@ -364,10 +385,12 @@ public class yacysearchtrailer { prop.put("nav-protocols_element_" + i + "_on", 1); prop.put(fileType, "nav-protocols_element_" + i + "_modifier", "-" + nav); nav=""; + rawNav = ""; } prop.put(fileType, "nav-protocols_element_" + i + "_name", name); - String url = QueryParams.navurl(fileType, 0, theSearch.query, nav, false).toString(); - prop.put(fileType, "nav-protocols_element_" + i + "_on_url", url); + /* URL is already percent encoded : no need to re-encode specifically for the file type */ + String url = QueryParams.navurl(fileType, 0, theSearch.query, rawNav, false).toString(); + prop.put("nav-protocols_element_" + i + "_on_url", url); prop.put("nav-protocols_element_" + i + "_count", count); prop.put("nav-protocols_element_" + i + "_nl", 1); i++; @@ -442,12 +465,14 @@ public class yacysearchtrailer { prop.put("nav-filetypes", 1); navigatorIterator = theSearch.filetypeNavigator.keys(false); int i = 0, pos = 0, neg = 0; - String nav; + String nav, rawNav; while (i < QueryParams.FACETS_STANDARD_MAXCOUNT && navigatorIterator.hasNext()) { name = navigatorIterator.next().trim(); count = theSearch.filetypeNavigator.get(name); if (count == 0) break; nav = "filetype%3A" + name; + /* Avoid double percent encoding in QueryParams.navurl */ + rawNav = "filetype:" + name; if (theSearch.query.modifier.filetype == null || !theSearch.query.modifier.filetype.contains(name) ) { pos++; prop.put("nav-filetypes_element_" + i + "_on", 1); @@ -457,9 +482,11 @@ public class yacysearchtrailer { prop.put("nav-filetypes_element_" + i + "_on", 0); prop.put(fileType, "nav-filetypes_element_" + i + "_modifier", "-" + nav); nav=""; + rawNav = ""; } prop.put(fileType, "nav-filetypes_element_" + i + "_name", name); - prop.put(fileType, "nav-filetypes_element_" + i + "_url", QueryParams.navurl(fileType, 0, theSearch.query, nav, false).toString()); + /* URL is already percent encoded : no need to re-encode specifically for the file type */ + prop.put("nav-filetypes_element_" + i + "_url", QueryParams.navurl(fileType, 0, theSearch.query, rawNav, false).toString()); prop.put(fileType, "nav-filetypes_element_" + i + "_id", "filetypes_" + i); prop.put("nav-filetypes_element_" + i + "_count", count); prop.put("nav-filetypes_element_" + i + "_nl", 1); @@ -484,12 +511,14 @@ public class yacysearchtrailer { prop.put(fileType, "nav-vocabulary_" + navvoccount + "_navname", navname); navigatorIterator = ve.getValue().keys(false); int i = 0; - String nav; + String nav, rawNav; while (i < 20 && navigatorIterator.hasNext()) { name = navigatorIterator.next(); count = ve.getValue().get(name); if (count == 0) break; nav = "%2Fvocabulary%2F" + navname + "%2F" + MultiProtocolURL.escape(Tagging.encodePrintname(name)).toString(); + /* Avoid double percent encoding in QueryParams.navurl */ + rawNav = "/vocabulary/" + navname + "/" + MultiProtocolURL.escape(Tagging.encodePrintname(name)).toString(); if (!theSearch.query.modifier.toString().contains("/vocabulary/" + navname + "/" + name.replace(' ', '_'))) { prop.put("nav-vocabulary_" + navvoccount + "_element_" + i + "_on", 1); prop.put(fileType, "nav-vocabulary_" + navvoccount + "_element_" + i + "_modifier", nav); @@ -497,9 +526,11 @@ public class yacysearchtrailer { prop.put("nav-vocabulary_" + navvoccount + "_element_" + i + "_on", 0); prop.put(fileType, "nav-vocabulary_" + navvoccount + "_element_" + i + "_modifier", "-" + nav); nav=""; + rawNav = ""; } prop.put(fileType, "nav-vocabulary_" + navvoccount + "_element_" + i + "_name", name); - prop.put(fileType, "nav-vocabulary_" + navvoccount + "_element_" + i + "_url", QueryParams.navurl(fileType, 0, theSearch.query, nav, false).toString()); + /* URL is already percent encoded : no need to re-encode specifically for the file type */ + prop.put("nav-vocabulary_" + navvoccount + "_element_" + i + "_url", QueryParams.navurl(fileType, 0, theSearch.query, rawNav, false).toString()); prop.put(fileType, "nav-vocabulary_" + navvoccount + "_element_" + i + "_id", "vocabulary_" + navname + "_" + i); prop.put("nav-vocabulary_" + navvoccount + "_element_" + i + "_count", count); prop.put("nav-vocabulary_" + navvoccount + "_element_" + i + "_nl", 1); diff --git a/test/java/net/yacy/server/serverObjectsTest.java b/test/java/net/yacy/server/serverObjectsTest.java new file mode 100644 index 000000000..12ec903fe --- /dev/null +++ b/test/java/net/yacy/server/serverObjectsTest.java @@ -0,0 +1,72 @@ +// (C) 2016 by luccioman; http://github.com/luccioman +// +// This is a part of YaCy, a peer-to-peer based web search engine +// +// LICENSE +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +package net.yacy.server; + +import java.io.UnsupportedEncodingException; +import java.net.URLEncoder; +import java.nio.charset.StandardCharsets; + +import org.junit.Test; + +/** + * Unit tests for serverObjects + */ +public class serverObjectsTest { + + /** + * This method allow manual check of the output obtained with different file + * types encoding reserved characters. + * + * @throws UnsupportedEncodingException + */ + @Test + public void testPut() throws UnsupportedEncodingException { + String raw = "query with reserved chars : \"#<>?`{}/:;=@[\\]^|\t&"; + String urlEncoded = URLEncoder.encode(raw, StandardCharsets.UTF_8.name()); + + serverObjects prop = new serverObjects(); + + prop.put("raw", raw); + System.out.println("no file type raw : " + prop.get("raw")); + + prop.put("urlEncoded", urlEncoded); + System.out.println("no file type urlEncoded : " + prop.get("urlEncoded") + "\n"); + + prop.putHTML("html raw", raw); + System.out.println("html raw : " + prop.get("html raw")); + + prop.putHTML("html urlEncoded", urlEncoded); + System.out.println("html urlEncoded : " + prop.get("html urlEncoded") + "\n"); + + prop.putXML("xml raw", raw); + System.out.println("xml raw : " + prop.get("xml raw")); + + prop.putHTML("xml urlEncoded", urlEncoded); + System.out.println("xml urlEncoded : " + prop.get("xml urlEncoded") + "\n"); + + prop.putJSON("json raw", raw); + System.out.println("json raw : " + prop.get("json raw")); + + prop.putJSON("json urlEncoded", urlEncoded); + System.out.println("json urlEncoded : " + prop.get("json urlEncoded") + "\n"); + } + +}