+
#(searchoptions)#
-
diff --git a/htroot/yacy/search.java b/htroot/yacy/search.java
index 770a147bc..850e72849 100644
--- a/htroot/yacy/search.java
+++ b/htroot/yacy/search.java
@@ -219,7 +219,6 @@ public final class search {
indexSegment,
rankingProfile
);
- theQuery.domType = QueryParams.SEARCHDOM_LOCAL;
yacyCore.log.logInfo("INIT HASH SEARCH (abstracts only): " + QueryParams.anonymizedQueryHashes(theQuery.queryHashes) + " - " + theQuery.displayResults() + " links");
final long timer = System.currentTimeMillis();
@@ -273,7 +272,6 @@ public final class search {
sb.indexSegments.segment(Segments.Process.PUBLIC),
rankingProfile
);
- theQuery.domType = QueryParams.SEARCHDOM_LOCAL;
yacyCore.log.logInfo("INIT HASH SEARCH (query-" + abstracts + "): " + QueryParams.anonymizedQueryHashes(theQuery.queryHashes) + " - " + theQuery.displayResults() + " links");
RSSFeed.channels(RSSFeed.REMOTESEARCH).addMessage(new RSSMessage("Remote Search Request from " + ((remoteSeed == null) ? "unknown" : remoteSeed.getName()), QueryParams.anonymizedQueryHashes(theQuery.queryHashes), ""));
diff --git a/htroot/yacysearch.java b/htroot/yacysearch.java
index daa1ba5d5..be4c1da61 100644
--- a/htroot/yacysearch.java
+++ b/htroot/yacysearch.java
@@ -541,8 +541,8 @@ public class yacysearch {
"&resource=" + ((theQuery.isLocal()) ? "local" : "global") +
"&verify=" + ((theQuery.onlineSnippetFetch) ? "true" : "false") +
"&nav=" + theQuery.navigators +
- "&urlmaskfilter=" + originalUrlMask +
- "&prefermaskfilter=" + theQuery.prefer +
+ "&urlmaskfilter=" + originalUrlMask.toString() +
+ "&prefermaskfilter=" + theQuery.prefer.toString() +
"&cat=href&constraint=" + ((theQuery.constraint == null) ? "" : theQuery.constraint.exportB64()) +
"&contentdom=" + theQuery.contentdom() +
"&former=" + theQuery.queryString(true) +
diff --git a/source/de/anomic/search/QueryParams.java b/source/de/anomic/search/QueryParams.java
index 3f9abf680..116dfdde6 100644
--- a/source/de/anomic/search/QueryParams.java
+++ b/source/de/anomic/search/QueryParams.java
@@ -59,29 +59,33 @@ public final class QueryParams {
public static final Bitfield empty_constraint = new Bitfield(4, "AAAAAA");
public static final Bitfield catchall_constraint = new Bitfield(4, "______");
+ public static final Pattern catchall_pattern = Pattern.compile(".*");
+ public static final Pattern matchnothing_pattern = Pattern.compile("");
- public String queryString;
+ public final String queryString;
public TreeSet fullqueryHashes, queryHashes, excludeHashes;
- public int itemsPerPage, offset;
- public Pattern urlMask, prefer;
- public ContentDomain contentdom;
- public String targetlang;
- public String navigators;
- public int domType;
- public int zonecode;
- public int domMaxTargets;
- public int maxDistance;
- public Bitfield constraint;
- public boolean allofconstraint;
- public boolean onlineSnippetFetch;
- public RankingProfile ranking;
+ public final int itemsPerPage;
+ public int offset;
+ public final Pattern urlMask, prefer;
+ public final boolean urlMask_isCatchall, prefer_isMatchnothing;
+ public final ContentDomain contentdom;
+ public final String targetlang;
+ public final String navigators;
+ public final int domType;
+ public final int zonecode;
+ public final int domMaxTargets;
+ public final int maxDistance;
+ public final Bitfield constraint;
+ public final boolean allofconstraint;
+ public final boolean onlineSnippetFetch;
+ public final RankingProfile ranking;
private final Segment indexSegment;
- public String host; // this is the client host that starts the query, not a site operator
- public String sitehash; // this is a domain hash, 6 bytes long or null
- public String authorhash;
- public String tenant;
+ public final String host; // this is the client host that starts the query, not a site operator
+ public final String sitehash; // this is a domain hash, 6 bytes long or null
+ public final String authorhash;
+ public final String tenant;
public yacySeed remotepeer;
- public Long handle;
+ public final Long handle;
// values that are set after a search:
public int resultcount; // number of found results
public long searchtime, urlretrievaltime, snippetcomputationtime; // time to perform the search, to get all the urls, and to compute the snippets
@@ -107,11 +111,13 @@ public final class QueryParams {
this.ranking = ranking;
this.tenant = null;
this.maxDistance = Integer.MAX_VALUE;
- this.prefer = Pattern.compile("");
+ this.urlMask = catchall_pattern;
+ this.urlMask_isCatchall = true;
+ this.prefer = matchnothing_pattern;
+ this.prefer_isMatchnothing = true;
this.contentdom = ContentDomain.ALL;
this.itemsPerPage = itemsPerPage;
this.offset = 0;
- this.urlMask = Pattern.compile(".*");
this.targetlang = "en";
this.domType = SEARCHDOM_LOCAL;
this.zonecode = DigestURI.TLD_any_zone_filter;
@@ -155,11 +161,13 @@ public final class QueryParams {
this.tenant = (tenant != null && tenant.length() == 0) ? null : tenant;
this.ranking = ranking;
this.maxDistance = maxDistance;
- this.prefer = Pattern.compile(prefer);
this.contentdom = contentdom;
this.itemsPerPage = Math.min((specialRights) ? 1000 : 50, itemsPerPage);
this.offset = Math.min((specialRights) ? 10000 : 100, offset);
this.urlMask = Pattern.compile(urlMask);
+ this.urlMask_isCatchall = this.urlMask.toString().equals(catchall_pattern.toString());
+ this.prefer = Pattern.compile(prefer);
+ this.prefer_isMatchnothing = this.prefer.toString().equals(matchnothing_pattern.toString());;
assert language != null;
this.targetlang = language;
this.navigators = navigators;
diff --git a/source/de/anomic/search/RankingProcess.java b/source/de/anomic/search/RankingProcess.java
index 5da2243e3..acff75be1 100644
--- a/source/de/anomic/search/RankingProcess.java
+++ b/source/de/anomic/search/RankingProcess.java
@@ -220,7 +220,7 @@ public final class RankingProcess extends Thread {
//this.domZones[DigestURI.domDomain(iEntry.metadataHash())]++;
// get statistics for host navigator
- if (nav_hosts) {
+ if (nav_hosts && query.urlMask_isCatchall) {
domhash = iEntry.urlHash.substring(6);
this.hostNavigator.inc(domhash, iEntry.urlHash);
}
@@ -374,6 +374,7 @@ public final class RankingProcess extends Thread {
// returns from the current RWI list the best URL entry and removes this entry from the list
long timeLimit = System.currentTimeMillis() + timeout;
int p = -1;
+ String urlhash;
while (System.currentTimeMillis() < timeLimit) {
final SortStack.stackElement obrwi = takeRWI(skipDoubleDom);
if (obrwi == null) {
@@ -381,7 +382,8 @@ public final class RankingProcess extends Thread {
try {Thread.sleep(50);} catch (final InterruptedException e1) {}
continue;
}
- final URIMetadataRow page = this.query.getSegment().urlMetadata().load(obrwi.element.metadataHash(), obrwi.element, obrwi.weight.longValue());
+ urlhash = obrwi.element.metadataHash();
+ final URIMetadataRow page = this.query.getSegment().urlMetadata().load(urlhash, obrwi.element, obrwi.weight.longValue());
if (page == null) {
misses.add(obrwi.element.metadataHash());
continue;
@@ -395,12 +397,18 @@ public final class RankingProcess extends Thread {
continue; // rare case where the url is corrupted
}
- // check url mask
- if (!metadata.matches(query.urlMask)) {
- continue;
+ if (!query.urlMask_isCatchall) {
+ // check url mask
+ if (!metadata.matches(query.urlMask)) {
+ continue;
+ }
+
+ // in case that we do not have e catchall filter for urls
+ // we must also construct the domain navigator here
+ this.hostNavigator.inc(urlhash.substring(6), urlhash);
}
- // check for more errors
+ // check for more errors
if (metadata.url() == null) {
continue; // rare case where the url is corrupted
}
@@ -539,14 +547,10 @@ public final class RankingProcess extends Thread {
int rc = Math.min(count, hsa.length);
ArrayList result = new ArrayList();
for (int i = 0; i < rc; i++) result.add(hsa[i]);
+ if (result.size() < 2) result.clear(); // navigators with one entry are not useful
return result;
}
- public List getHostNavigators(int count) {
- if (!this.query.navigators.equals("all") && this.query.navigators.indexOf("hosts") < 0) return new ArrayList(0);
-
- return this.hostNavigator.entries(10);
- }
public List getHostNavigator(int count) {
List result = new ArrayList();
if (!this.query.navigators.equals("all") && this.query.navigators.indexOf("hosts") < 0) return result;
@@ -569,6 +573,7 @@ public final class RankingProcess extends Thread {
for (Navigator.Item entry: result) if (entry.name.equals(hostname)) continue loop; // check if one entry already exists
result.add(new Navigator.Item(hostname, item.count));
}
+ if (result.size() < 2) result.clear(); // navigators with one entry are not useful
return result;
}
@@ -589,8 +594,9 @@ public final class RankingProcess extends Thread {
// create a list of words that had been computed by statistics over all
// words that appeared in the url or the description of all urls
if (!this.query.navigators.equals("all") && this.query.navigators.indexOf("topics") < 0) return new ArrayList(0);
-
- return this.ref.entries(10);
+ List result = this.ref.entries(10);
+ if (result.size() < 2) result.clear(); // navigators with one entry are not useful
+ return result;
}
public void addTopic(final String[] words) {
@@ -623,8 +629,9 @@ public final class RankingProcess extends Thread {
// create a list of words that had been computed by statistics over all
// words that appeared in the url or the description of all urls
if (!this.query.navigators.equals("all") && this.query.navigators.indexOf("authors") < 0) return new ArrayList(0);
-
- return this.authorNavigator.entries(count);
+ List result = this.authorNavigator.entries(count);
+ if (result.size() < 2) result.clear(); // navigators with one entry are not useful
+ return result;
}
public static void loadYBR(final File rankingPath, final int count) {
diff --git a/source/net/yacy/document/Document.java b/source/net/yacy/document/Document.java
index 6e69f38b3..025cdab51 100644
--- a/source/net/yacy/document/Document.java
+++ b/source/net/yacy/document/Document.java
@@ -55,7 +55,7 @@ import net.yacy.kelondro.util.FileUtils;
public class Document {
- private final DigestURI source; // the source url
+ private final DigestURI source; // the source url
private final String mimeType; // mimeType as taken from http header
private final String charset; // the charset of the document
private final List keywords; // most resources provide a keyword field