diff --git a/defaults/yacy.init b/defaults/yacy.init
index 86ac2d0a9..138a65991 100644
--- a/defaults/yacy.init
+++ b/defaults/yacy.init
@@ -941,3 +941,8 @@ segment.process.default_tmp = default
# this is only shown, if the about.body is filled
about.headline =
about.body =
+
+# search heuristics
+heuristic.site = false
+heuristic.scroogle = false
+
diff --git a/htroot/ConfigHeuristics_p.html b/htroot/ConfigHeuristics_p.html
new file mode 100644
index 000000000..3a42dc786
--- /dev/null
+++ b/htroot/ConfigHeuristics_p.html
@@ -0,0 +1,72 @@
+
+
+
+
#(heuristic)#::
-
::
-
+
::
+
#(/heuristic)#
#(authorized)#::
diff --git a/htroot/yacysearchitem.java b/htroot/yacysearchitem.java
index a7c606499..035f8d47b 100644
--- a/htroot/yacysearchitem.java
+++ b/htroot/yacysearchitem.java
@@ -117,17 +117,6 @@ public class yacysearchitem {
prop.putHTML("content_authorized_recommend_deletelink", "/yacysearch.html?query=" + theQuery.queryString.replace(' ', '+') + "&Enter=Search&count=" + theQuery.displayResults() + "&offset=" + (theQuery.neededResults() - theQuery.displayResults()) + "&order=" + crypt.simpleEncode(theQuery.ranking.toExternalString()) + "&resource=local&time=3&deleteref=" + new String(result.hash()) + "&urlmaskfilter=.*");
prop.putHTML("content_authorized_recommend_recommendlink", "/yacysearch.html?query=" + theQuery.queryString.replace(' ', '+') + "&Enter=Search&count=" + theQuery.displayResults() + "&offset=" + (theQuery.neededResults() - theQuery.displayResults()) + "&order=" + crypt.simpleEncode(theQuery.ranking.toExternalString()) + "&resource=local&time=3&recommendref=" + new String(result.hash()) + "&urlmaskfilter=.*");
prop.put("content_authorized_urlhash", new String(result.hash()));
- SearchEvent.HeuristicResult heuristic = theSearch.getHeuristic(result.hash());
- if (heuristic == null) {
- prop.put("content_heuristic", 0);
- } else {
- if (heuristic.redundant) {
- prop.put("content_heuristic", 1);
- } else {
- prop.put("content_heuristic", 2);
- }
- prop.put("content_heuristic_name", heuristic.heuristicName);
- }
String resulthashString = new String(result.hash());
prop.putHTML("content_title", result.title());
prop.putXML("content_title-xml", result.title());
@@ -160,6 +149,17 @@ public class yacysearchitem {
prop.put("content_description", desc);
prop.putXML("content_description-xml", desc);
prop.putJSON("content_description-json", desc);
+ SearchEvent.HeuristicResult heuristic = theSearch.getHeuristic(result.hash());
+ if (heuristic == null) {
+ prop.put("content_heuristic", 0);
+ } else {
+ if (heuristic.redundant) {
+ prop.put("content_heuristic", 1);
+ } else {
+ prop.put("content_heuristic", 2);
+ }
+ prop.put("content_heuristic_name", heuristic.heuristicName);
+ }
EventTracker.update("SEARCH", new ProfilingGraph.searchEvent(theQuery.id(true), SearchEvent.FINALIZATION + "-" + item, 0, 0), false, 30000, ProfilingGraph.maxTime);
return prop;
diff --git a/source/de/anomic/search/ReferenceOrder.java b/source/de/anomic/search/ReferenceOrder.java
index 16b791144..ef9ea2f25 100644
--- a/source/de/anomic/search/ReferenceOrder.java
+++ b/source/de/anomic/search/ReferenceOrder.java
@@ -117,7 +117,6 @@ public class ReferenceOrder {
private final BlockingQueue
decodedEntries;
public NormalizeWorker(final BlockingQueue out, Semaphore termination) {
- // normalize ranking: find minimum and maximum of separate ranking criteria
this.out = out;
this.termination = termination;
this.decodedEntries = new LinkedBlockingQueue();
@@ -131,38 +130,8 @@ public class ReferenceOrder {
}
public void run() {
-
- Map doms0 = new HashMap();
- Integer int1 = 1;
-
- WordReferenceVars iEntry;
- String dom;
- Integer count;
try {
- // calculate min and max for normalization
- while ((iEntry = decodedEntries.take()) != WordReferenceVars.poison) {
- out.put(iEntry);
- // find min/max
- if (min == null) min = iEntry.clone(); else min.min(iEntry);
- if (max == null) max = iEntry.clone(); else max.max(iEntry);
- // update domcount
- dom = new String(iEntry.metadataHash()).substring(6);
- count = doms0.get(dom);
- if (count == null) {
- doms0.put(dom, int1);
- } else {
- doms0.put(dom, Integer.valueOf(count.intValue() + 1));
- }
- }
-
- // update domain score
- Map.Entry entry;
- final Iterator> di = doms0.entrySet().iterator();
- while (di.hasNext()) {
- entry = di.next();
- doms.addScore(entry.getKey(), (entry.getValue()).intValue());
- }
- if (!doms.isEmpty()) maxdomcount = doms.getMaxScore();
+ addNormalizer(decodedEntries, out);
} catch (InterruptedException e) {
Log.logException(e);
} catch (Exception e) {
@@ -177,6 +146,57 @@ public class ReferenceOrder {
}
}
+ /**
+ * normalize ranking: find minimum and maximum of separate ranking criteria
+ * @param decodedEntries
+ * @param out
+ * @throws InterruptedException
+ */
+ public void addNormalizer(BlockingQueue decodedEntries, final BlockingQueue out) throws InterruptedException {
+ WordReferenceVars iEntry;
+ Map doms0 = new HashMap();
+ String dom;
+ Integer count;
+ final Integer int1 = 1;
+ while ((iEntry = decodedEntries.take()) != WordReferenceVars.poison) {
+ out.put(iEntry);
+ // find min/max
+ if (min == null) min = iEntry.clone(); else min.min(iEntry);
+ if (max == null) max = iEntry.clone(); else max.max(iEntry);
+ // update domcount
+ dom = new String(iEntry.metadataHash()).substring(6);
+ count = doms0.get(dom);
+ if (count == null) {
+ doms0.put(dom, int1);
+ } else {
+ doms0.put(dom, Integer.valueOf(count.intValue() + 1));
+ }
+ }
+
+ // update domain score
+ Map.Entry entry;
+ final Iterator> di = doms0.entrySet().iterator();
+ while (di.hasNext()) {
+ entry = di.next();
+ doms.addScore(entry.getKey(), (entry.getValue()).intValue());
+ }
+ if (!doms.isEmpty()) this.maxdomcount = doms.getMaxScore();
+ }
+
+ public void addNormalizer(WordReferenceVars iEntry, final BlockingQueue out) throws InterruptedException {
+ out.put(iEntry);
+
+ // find min/max
+ if (min == null) min = iEntry.clone(); else min.min(iEntry);
+ if (max == null) max = iEntry.clone(); else max.max(iEntry);
+
+ // update domcount
+ String dom = new String(iEntry.metadataHash()).substring(6);
+ doms.addScore(dom, 1);
+
+ if (!doms.isEmpty()) this.maxdomcount = doms.getMaxScore();
+ }
+
public int authority(final byte[] urlHash) {
return (doms.getScore(new String(urlHash, 6, 6)) << 8) / (1 + this.maxdomcount);
}
diff --git a/source/de/anomic/search/SearchEvent.java b/source/de/anomic/search/SearchEvent.java
index d58b3f26e..7816611ae 100644
--- a/source/de/anomic/search/SearchEvent.java
+++ b/source/de/anomic/search/SearchEvent.java
@@ -319,7 +319,7 @@ public final class SearchEvent {
return this.rankedCache.getAuthorNavigator(maxentries);
}
- public void addHeuristicResult(byte[] urlhash, String heuristicName, boolean redundant) {
+ public void addHeuristic(byte[] urlhash, String heuristicName, boolean redundant) {
synchronized (this.heuristics) {
this.heuristics.put(urlhash, new HeuristicResult(urlhash, heuristicName, redundant));
}
diff --git a/source/de/anomic/search/Switchboard.java b/source/de/anomic/search/Switchboard.java
index 26c4b15ee..9383c0b1e 100644
--- a/source/de/anomic/search/Switchboard.java
+++ b/source/de/anomic/search/Switchboard.java
@@ -1930,7 +1930,7 @@ public final class Switchboard extends serverSwitch {
public void addToIndex(final DigestURI url, final SearchEvent searchEvent, final String heuristicName) throws IOException, ParserException {
final Segments.Process process = Segments.Process.LOCALCRAWLING;
if (indexSegments.segment(process).urlMetadata.exists(url.hash())) {
- searchEvent.addHeuristicResult(url.hash(), heuristicName, true);
+ searchEvent.addHeuristic(url.hash(), heuristicName, true);
return; // don't do double-work
}
final Request request = loader.request(url, true, true);
@@ -1939,9 +1939,9 @@ public final class Switchboard extends serverSwitch {
log.logInfo("Heuristic: cannot load " + url.toNormalform(false, false) + ": " + acceptedError);
return;
}
+ searchEvent.addHeuristic(url.hash(), heuristicName, false);
new Thread() {public void run() {
try {
- searchEvent.addHeuristicResult(url.hash(), heuristicName, false);
Response response = loader.load(request, CacheStrategy.IFFRESH, Long.MAX_VALUE);
if (response == null) throw new IOException("response == null");
if (response.getContent() == null) throw new IOException("content == null");