diff --git a/build.properties b/build.properties
index f1d02eeae..a237a15f4 100644
--- a/build.properties
+++ b/build.properties
@@ -3,7 +3,7 @@ javacSource=1.4
javacTarget=1.4
# Release Configuration
-releaseVersion=0.553
+releaseVersion=0.554
releaseFile=yacy_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
proReleaseFile=yacy_pro_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
releaseFileParentDir=yacy
diff --git a/htroot/yacy/search.java b/htroot/yacy/search.java
index d9b91c952..d03365985 100644
--- a/htroot/yacy/search.java
+++ b/htroot/yacy/search.java
@@ -128,7 +128,7 @@ public final class search {
int indexabstractContainercount = 0;
int joincount = 0;
plasmaSearchQuery theQuery = null;
- serverProfiling localProcess = null;
+ serverProfiling localProfiling = null;
ArrayList accu = null;
long urlRetrievalAllTime = 0, snippetComputationAllTime = 0;
if ((query.length() == 0) && (abstractSet != null)) {
@@ -138,12 +138,12 @@ public final class search {
yacyCore.log.logInfo("INIT HASH SEARCH (abstracts only): " + plasmaSearchQuery.anonymizedQueryHashes(theQuery.queryHashes) + " - " + theQuery.displayResults() + " links");
// prepare a search profile
- localProcess = new serverProfiling(theQuery.maximumTime, theQuery.displayResults());
+ localProfiling = new serverProfiling();
//theSearch = new plasmaSearchEvent(squery, rankingProfile, localTiming, remoteTiming, true, sb.wordIndex, null);
- localProcess.startTimer();
+ localProfiling.startTimer();
Map[] containers = sb.wordIndex.localSearchContainers(theQuery, plasmaSearchQuery.hashes2Set(urls));
- localProcess.yield(plasmaSearchEvent.COLLECTION, containers[0].size());
+ localProfiling.yield(plasmaSearchEvent.COLLECTION, containers[0].size());
if (containers != null) {
Iterator ci = containers[0].entrySet().iterator();
Map.Entry entry;
@@ -170,8 +170,8 @@ public final class search {
// prepare a search profile
plasmaSearchRankingProfile rankingProfile = (profile.length() == 0) ? new plasmaSearchRankingProfile(plasmaSearchQuery.contentdomParser(contentdom)) : new plasmaSearchRankingProfile("", profile);
- localProcess = new serverProfiling(theQuery.maximumTime, theQuery.displayResults());
- plasmaSearchEvent theSearch = plasmaSearchEvent.getEvent(theQuery, rankingProfile, localProcess, sb.wordIndex, null, true, abstractSet);
+ localProfiling = new serverProfiling();
+ plasmaSearchEvent theSearch = plasmaSearchEvent.getEvent(theQuery, rankingProfile, localProfiling, sb.wordIndex, null, true, abstractSet);
urlRetrievalAllTime = theSearch.getURLRetrievalTime();
snippetComputationAllTime = theSearch.getSnippetComputationTime();
@@ -231,7 +231,7 @@ public final class search {
if (partitions > 0) sb.requestedQueries = sb.requestedQueries + 1d / partitions; // increase query counter
// prepare reference hints
- localProcess.startTimer();
+ localProfiling.startTimer();
Set ws = theSearch.references(10);
StringBuffer refstr = new StringBuffer();
Iterator j = ws.iterator();
@@ -239,7 +239,7 @@ public final class search {
refstr.append(",").append((String) j.next());
}
prop.put("references", (refstr.length() > 0) ? refstr.substring(1) : refstr.toString());
- localProcess.yield("reference collection", ws.size());
+ localProfiling.yield("reference collection", ws.size());
}
prop.put("indexabstract", indexabstract.toString());
@@ -253,7 +253,7 @@ public final class search {
} else {
// result is a List of urlEntry elements
- localProcess.startTimer();
+ localProfiling.startTimer();
StringBuffer links = new StringBuffer();
String resource = null;
plasmaSearchEvent.ResultEntry entry;
@@ -266,7 +266,7 @@ public final class search {
}
prop.put("links", links.toString());
prop.put("linkcount", accu.size());
- localProcess.yield("result list preparation", accu.size());
+ localProfiling.yield("result list preparation", accu.size());
}
// add information about forward peers
diff --git a/htroot/yacysearch.java b/htroot/yacysearch.java
index 0c18a19b7..e55c0ecfa 100644
--- a/htroot/yacysearch.java
+++ b/htroot/yacysearch.java
@@ -268,7 +268,7 @@ public class yacysearch {
20,
constraint,
true);
- serverProfiling localTiming = new serverProfiling(4 * theQuery.maximumTime / 10, theQuery.displayResults());
+ serverProfiling localProfiling = new serverProfiling();
String client = (String) header.get("CLIENTIP"); // the search client who initiated the search
@@ -287,7 +287,7 @@ public class yacysearch {
theQuery.setOffset(0); // in case that this is a new search, always start without a offset
offset = 0;
}
- plasmaSearchEvent theSearch = plasmaSearchEvent.getEvent(theQuery, sb.getRanking(), localTiming, sb.wordIndex, (sb.isRobinsonMode()) ? sb.clusterhashes : null, false, null);
+ plasmaSearchEvent theSearch = plasmaSearchEvent.getEvent(theQuery, sb.getRanking(), localProfiling, sb.wordIndex, (sb.isRobinsonMode()) ? sb.clusterhashes : null, false, null);
// generate result object
serverLog.logFine("LOCAL_SEARCH", "SEARCH TIME AFTER ORDERING OF SEARCH RESULTS: " + ((System.currentTimeMillis() - timestamp) / 1000) + " seconds");
diff --git a/htroot/yacysearchitem.html b/htroot/yacysearchitem.html
index 0ba3e074a..accde7bee 100644
--- a/htroot/yacysearchitem.html
+++ b/htroot/yacysearchitem.html
@@ -1,7 +1,7 @@
#(content)#::
#(authorized)#::
diff --git a/source/de/anomic/plasma/plasmaCrawlLURL.java b/source/de/anomic/plasma/plasmaCrawlLURL.java
index cbc0f8cd1..110459a8c 100644
--- a/source/de/anomic/plasma/plasmaCrawlLURL.java
+++ b/source/de/anomic/plasma/plasmaCrawlLURL.java
@@ -62,6 +62,7 @@ import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
+import de.anomic.data.htmlTools;
import de.anomic.http.httpc;
import de.anomic.http.httpc.response;
import de.anomic.index.indexRWIEntry;
@@ -628,14 +629,14 @@ public final class plasmaCrawlLURL {
pw.println(url);
}
if (format == 1) {
- pw.println("
" + comp.title() + "");
+ pw.println("
" + htmlTools.encodeUnicode2html(comp.title(), true, true) + "");
}
if (format == 2) {
pw.println("
- ");
- pw.println("" + comp.title() + "");
+ pw.println("" + htmlTools.encodeUnicode2html(comp.title(), true, true) + "");
pw.println("" + yacyURL.escape(url) + "");
- if (comp.author().length() > 0) pw.println("" + comp.author() + "");
- if (comp.tags().length() > 0) pw.println("" + comp.tags() + "");
+ if (comp.author().length() > 0) pw.println("" + htmlTools.encodeUnicode2html(comp.author(), true, true) + "");
+ if (comp.tags().length() > 0) pw.println("" + htmlTools.encodeUnicode2html(comp.tags(), true, true) + "");
pw.println("" + entry.moddate().toString() + "");
pw.println("" + entry.hash() + "");
pw.println("
");
diff --git a/source/de/anomic/plasma/plasmaSearchEvent.java b/source/de/anomic/plasma/plasmaSearchEvent.java
index 417bf9df3..737bedafb 100644
--- a/source/de/anomic/plasma/plasmaSearchEvent.java
+++ b/source/de/anomic/plasma/plasmaSearchEvent.java
@@ -57,7 +57,7 @@ public final class plasmaSearchEvent {
public static final String URLFETCH = "urlfetch";
public static final String NORMALIZING = "normalizing";
- public static int workerThreadCount = 3;
+ public static int workerThreadCount = 8;
public static String lastEventID = "";
private static HashMap lastEvents = new HashMap(); // a cache for objects from this class: re-use old search requests
public static final long eventLifetime = 600000; // the time an event will stay in the cache, 10 Minutes
@@ -200,7 +200,7 @@ public final class plasmaSearchEvent {
// start worker threads to fetch urls and snippets
this.workerThreads = new resultWorker[workerThreadCount];
for (int i = 0; i < workerThreadCount; i++) {
- this.workerThreads[i] = new resultWorker(i, process.getTargetTime() * 3);
+ this.workerThreads[i] = new resultWorker(i, query.maximumTime * 3);
this.workerThreads[i].start();
}
} else {
@@ -480,7 +480,7 @@ public final class plasmaSearchEvent {
// start worker threads to fetch urls and snippets
event.workerThreads = new resultWorker[workerThreadCount];
for (int i = 0; i < workerThreadCount; i++) {
- event.workerThreads[i] = event.deployWorker(i, 3 * event.process.getTargetTime());
+ event.workerThreads[i] = event.deployWorker(i, 3 * query.maximumTime);
}
}
@@ -514,9 +514,15 @@ public final class plasmaSearchEvent {
// start fetching urls and snippets
indexURLEntry page;
- while ((resultList.size() < query.neededResults() + query.displayResults()) &&
- (System.currentTimeMillis() < this.timeout) &&
- ((page = rankedCache.bestURL(true)) != null)) {
+ while (System.currentTimeMillis() < this.timeout) {
+
+ // get next entry
+ page = rankedCache.bestURL(true);
+ if (page == null) {
+ // if we did not get another entry, sleep some time and try again
+ try {Thread.sleep(100);} catch (InterruptedException e1) {}
+ continue;
+ }
if (anyResultWith(page.hash())) continue;
if (anyFailureWith(page.hash())) continue;
@@ -527,6 +533,7 @@ public final class plasmaSearchEvent {
if (resultEntry == null) continue; // the entry had some problems, cannot be used
urlRetrievalAllTime += resultEntry.dbRetrievalTime;
snippetComputationAllTime += resultEntry.snippetComputationTime;
+ //System.out.println("+++DEBUG-resultWorker+++ fetched " + resultEntry.urlstring());
// place the result to the result vector
synchronized (resultList) {
@@ -537,8 +544,9 @@ public final class plasmaSearchEvent {
synchronized (rankedCache) {
rankedCache.addReferences(resultEntry);
}
-
- System.out.println("DEBUG SNIPPET_LOADING: thread " + id + " got " + resultEntry.url());
+ //System.out.println("DEBUG SNIPPET_LOADING: thread " + id + " got " + resultEntry.url());
+
+ if (resultList.size() >= query.neededResults() + query.displayResults()) break; // we have enough
}
serverLog.logInfo("SEARCH", "resultWorker thread " + id + " terminated");
}
@@ -565,15 +573,18 @@ public final class plasmaSearchEvent {
long sleeptime = this.eventTime + (this.query.maximumTime / this.query.displayResults() * ((item % this.query.displayResults()) + 1)) - System.currentTimeMillis();
if ((anyWorkerAlive()) && (sleeptime > 0)) {
try {Thread.sleep(sleeptime);} catch (InterruptedException e) {}
+ //System.out.println("+++DEBUG-oneResult+++ (1) sleeping " + sleeptime);
}
// if there are less than 10 more results available, sleep some extra time to get a chance that the "common sense" ranking algorithm can work
if ((this.resultList.size() <= item + 10) && (anyWorkerAlive())) {
try {Thread.sleep(300);} catch (InterruptedException e) {}
+ //System.out.println("+++DEBUG-oneResult+++ (2) sleeping " + 300);
}
// then sleep until any result is available (that should not happen)
while ((this.resultList.size() <= item) && (anyWorkerAlive())) {
try {Thread.sleep(100);} catch (InterruptedException e) {}
+ //System.out.println("+++DEBUG-oneResult+++ (3) sleeping " + 100);
}
// finally, if there is something, return the result
@@ -602,6 +613,7 @@ public final class plasmaSearchEvent {
long timeout = System.currentTimeMillis() + waitingtime;
while ((this.resultList.size() < query.neededResults()) && (anyWorkerAlive()) && (System.currentTimeMillis() < timeout)) {
try {Thread.sleep(200);} catch (InterruptedException e) {}
+ //System.out.println("+++DEBUG-completeResults+++ sleeping " + 200);
}
return this.resultList;
}
diff --git a/source/de/anomic/server/serverProfiling.java b/source/de/anomic/server/serverProfiling.java
index 89872caeb..0fa0e791c 100644
--- a/source/de/anomic/server/serverProfiling.java
+++ b/source/de/anomic/server/serverProfiling.java
@@ -31,25 +31,14 @@ import java.util.Iterator;
public class serverProfiling implements Cloneable {
- private static final long minimumTargetTime = 100;
- private long targetTime;
- private int targetCount;
private ArrayList yield;
private long timer;
- private serverProfiling() {
- targetTime = minimumTargetTime;
- targetCount = 10;
+ public serverProfiling() {
yield = new ArrayList();
timer = 0;
}
- public serverProfiling(long time, int count) {
- this();
- this.targetTime = time;
- this.targetCount = count;
- }
-
public static class Entry {
public String process;
public int count;
@@ -62,14 +51,6 @@ public class serverProfiling implements Cloneable {
}
}
- public int getTargetCount() {
- return this.targetCount;
- }
-
- public long getTargetTime() {
- return this.targetTime;
- }
-
public void startTimer() {
this.timer = System.currentTimeMillis();
}