From a4494d6e01da6e494dbb5462d1a8b47bdb100f40 Mon Sep 17 00:00:00 2001 From: luccioman Date: Mon, 4 Dec 2017 18:23:26 +0100 Subject: [PATCH] Improved support for internationalized domain names on "site:" modifier Allow typing directly internationalized domain names including non ASCII characters in the search field. Search is done using the ASCII Compatible Encoding (ACE) representation. --- source/net/yacy/search/query/QueryModifier.java | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/source/net/yacy/search/query/QueryModifier.java b/source/net/yacy/search/query/QueryModifier.java index 59d603f7a..0a9c44bd3 100644 --- a/source/net/yacy/search/query/QueryModifier.java +++ b/source/net/yacy/search/query/QueryModifier.java @@ -29,6 +29,8 @@ import org.apache.solr.common.params.CommonParams; import org.apache.solr.common.params.MultiMapSolrParams; import net.yacy.cora.document.id.DigestURL; +import net.yacy.cora.document.id.MultiProtocolURL; +import net.yacy.cora.document.id.Punycode.PunycodeException; import net.yacy.cora.util.CommonPattern; import net.yacy.cora.util.ConcurrentLog; import net.yacy.document.DateDetection; @@ -250,6 +252,14 @@ public class QueryModifier { while ( sitehost.endsWith(".") ) { this.sitehost = this.sitehost.substring(0, this.sitehost.length() - 1); } + + try { + /* Internationalized domain names support : convert to the same ASCII Compatible Encoding (ACE) representation that is used in normalized URLs */ + this.sitehost = MultiProtocolURL.toPunycode(this.sitehost); + } catch (final PunycodeException e1) { + ConcurrentLog.logException(e1); + } + /* Domain name in an URL is case insensitive : convert now modifier to lower case for further processing over normalized URLs */ this.sitehost = this.sitehost.toLowerCase(Locale.ROOT);