diff --git a/defaults/yacy.init b/defaults/yacy.init
index dcb81f7de..d4172ce96 100644
--- a/defaults/yacy.init
+++ b/defaults/yacy.init
@@ -762,6 +762,9 @@ search.navigation=hosts,authors,namespace,topics,filetype,protocol
all search results are valid without verification
search.verify = iffresh
+search.excludehosts=
+search.excludehosth=
+
# in case that a link verification fails then the corresponding index reference can be
# deleted to clean up the index. If this property is set then failed index verification in
# the cases of nocache, iffresh and ifexist causes an index deletion
diff --git a/htroot/ConfigPortal.html b/htroot/ConfigPortal.html
index 369a19766..ba4b86885 100644
--- a/htroot/ConfigPortal.html
+++ b/htroot/ConfigPortal.html
@@ -109,12 +109,18 @@
+
Exclude Hosts
+
List of hosts that shall be excluded from search results by default but can be included using the site:<host> operator:
+
+ #[search.excludehosth]#
+
+
'About' Column (shown in a column alongside with the search result page)
(Headline)
(Content)
-
+
diff --git a/htroot/ConfigPortal.java b/htroot/ConfigPortal.java
index 7e6379833..fe61eb69d 100644
--- a/htroot/ConfigPortal.java
+++ b/htroot/ConfigPortal.java
@@ -26,6 +26,7 @@
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
import net.yacy.cora.protocol.RequestHeader;
+import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.search.Switchboard;
import net.yacy.search.SwitchboardConstants;
import de.anomic.data.WorkTables;
@@ -93,6 +94,10 @@ public class ConfigPortal {
sb.setConfig("about.headline", post.get("about.headline", ""));
sb.setConfig("about.body", post.get("about.body", ""));
+ String excludehosts = post.get("search.excludehosts", "");
+ sb.setConfig("search.excludehosts", excludehosts);
+ sb.setConfig("search.excludehosth", DigestURI.hosthashes(excludehosts));
+
// construct navigation String
String nav = "";
if (post.getBoolean("search.navigation.hosts", false)) nav += "hosts,";
@@ -126,8 +131,10 @@ public class ConfigPortal {
sb.setConfig("search.result.show.pictures", false);
sb.setConfig(SwitchboardConstants.SEARCH_VERIFY, "iffresh");
sb.setConfig(SwitchboardConstants.SEARCH_VERIFY_DELETE, "true");
- prop.put("about.headline", "");
- prop.put("about.body", "");
+ sb.setConfig("about.headline", "");
+ sb.setConfig("about.body", "");
+ sb.setConfig("search.excludehosts", "");
+ sb.setConfig("search.excludehosth", "");
}
}
@@ -167,6 +174,9 @@ public class ConfigPortal {
prop.put("about.headline", sb.getConfig("about.headline", ""));
prop.put("about.body", sb.getConfig("about.body", ""));
+ prop.put("search.excludehosts", sb.getConfig("search.excludehosts", ""));
+ prop.put("search.excludehosth", sb.getConfig("search.excludehosth", ""));
+
final String browserPopUpPage = sb.getConfig(SwitchboardConstants.BROWSER_POP_UP_PAGE, "ConfigBasic.html");
prop.put("popupFront", 0);
prop.put("popupSearch", 0);
diff --git a/htroot/yacy/search.java b/htroot/yacy/search.java
index a5bfc0aee..e7db51239 100644
--- a/htroot/yacy/search.java
+++ b/htroot/yacy/search.java
@@ -244,6 +244,7 @@ public final class search {
null,
false,
sitehash,
+ null,
authorhash,
DigestURI.TLD_any_zone_filter,
client,
@@ -305,6 +306,7 @@ public final class search {
constraint,
false,
sitehash,
+ null,
authorhash,
DigestURI.TLD_any_zone_filter,
client,
diff --git a/htroot/yacysearch.java b/htroot/yacysearch.java
index 7ae89fa42..41cd4e58b 100644
--- a/htroot/yacysearch.java
+++ b/htroot/yacysearch.java
@@ -618,6 +618,7 @@ public class yacysearch {
constraint,
true,
sitehash,
+ DigestURI.hosthashess(sb.getConfig("search.excludehosth", "")),
authorhash,
DigestURI.TLD_any_zone_filter,
client,
diff --git a/source/net/yacy/kelondro/data/meta/DigestURI.java b/source/net/yacy/kelondro/data/meta/DigestURI.java
index a4713d551..f80501e49 100644
--- a/source/net/yacy/kelondro/data/meta/DigestURI.java
+++ b/source/net/yacy/kelondro/data/meta/DigestURI.java
@@ -30,6 +30,8 @@ package net.yacy.kelondro.data.meta;
import java.io.File;
import java.io.Serializable;
import java.net.MalformedURLException;
+import java.util.HashSet;
+import java.util.Set;
import net.yacy.cora.document.ASCII;
import net.yacy.cora.document.MultiProtocolURI;
@@ -71,6 +73,37 @@ public class DigestURI extends MultiProtocolURI implements Serializable {
}
return (url == null) ? null : ASCII.String(url.hash(), 6, 6);
}
+
+ /**
+ * from a given list of hosts make a list of host hashes
+ * the list is separated by comma
+ * @param hostlist
+ * @return list of host hashes without separation
+ */
+ public static String hosthashes(final String hostlist) {
+ String[] hs = hostlist.split(",");
+ StringBuilder sb = new StringBuilder(hostlist.length());
+ for (String h: hs) {
+ if (h == null) continue;
+ h = h.trim();
+ if (h.length() == 0) continue;
+ h = hosthash(h);
+ if (h == null || h.length() != 6) continue;
+ sb.append(h);
+ }
+ return sb.toString();
+ }
+
+ public static Set hosthashess(String hosthashes) {
+ if (hosthashes == null || hosthashes.length() == 0) return null;
+ HashSet h = new HashSet();
+ assert hosthashes.length() % 6 == 0;
+ for (int i = 0; i < hosthashes.length(); i = i + 6) {
+ h.add(hosthashes.substring(i, i + 6));
+ }
+ return h;
+ }
+
/**
* DigestURI from File
diff --git a/source/net/yacy/search/query/QueryParams.java b/source/net/yacy/search/query/QueryParams.java
index ed9203a9a..57483398f 100644
--- a/source/net/yacy/search/query/QueryParams.java
+++ b/source/net/yacy/search/query/QueryParams.java
@@ -29,8 +29,10 @@ package net.yacy.search.query;
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.util.HashMap;
+import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
+import java.util.Set;
import java.util.SortedSet;
import java.util.TreeSet;
import java.util.regex.Matcher;
@@ -124,6 +126,7 @@ public final class QueryParams {
private final Segment indexSegment;
public final String host; // this is the client host that starts the query, not a site operator
public final String sitehash; // this is a domain hash, 6 bytes long or null
+ public final Set siteexcludes; // set of domain hashes that are excluded if not included by sitehash
public final String authorhash;
public final String tenant;
public final Modifier modifier;
@@ -182,6 +185,7 @@ public final class QueryParams {
this.snippetCacheStrategy = null;
this.host = null;
this.sitehash = null;
+ this.siteexcludes = null;
this.authorhash = null;
this.remotepeer = null;
this.time = Long.valueOf(System.currentTimeMillis());
@@ -208,6 +212,7 @@ public final class QueryParams {
final Searchdom domType, final int domMaxTargets,
final Bitfield constraint, final boolean allofconstraint,
final String site,
+ final Set siteexcludes,
final String authorhash,
final int domainzone,
final String host,
@@ -250,6 +255,7 @@ public final class QueryParams {
this.constraint = constraint;
this.allofconstraint = allofconstraint;
this.sitehash = site; assert site == null || site.length() == 6;
+ this.siteexcludes = siteexcludes != null && siteexcludes.size() == 0 ? null: siteexcludes;
this.authorhash = authorhash; assert authorhash == null || !authorhash.isEmpty();
this.snippetCacheStrategy = snippetCacheStrategy;
this.host = host;
@@ -491,6 +497,8 @@ public final class QueryParams {
context.append(asterisk);
context.append(this.sitehash);
context.append(asterisk);
+ context.append(this.siteexcludes);
+ context.append(asterisk);
context.append(this.authorhash);
context.append(asterisk);
context.append(this.targetlang);
diff --git a/source/net/yacy/search/query/RWIProcess.java b/source/net/yacy/search/query/RWIProcess.java
index e221959b9..06a6d856b 100644
--- a/source/net/yacy/search/query/RWIProcess.java
+++ b/source/net/yacy/search/query/RWIProcess.java
@@ -311,6 +311,9 @@ public final class RWIProcess extends Thread
// check site constraints
final String hosthash = iEntry.hosthash();
if ( this.query.sitehash == null ) {
+ if (this.query.siteexcludes != null && this.query.siteexcludes.contains(hosthash)) {
+ continue pollloop;
+ }
// no site constraint there; maybe collect host navigation information
if ( nav_hosts && this.query.urlMask_isCatchall ) {
this.hostNavigator.inc(hosthash);