diff --git a/build.properties b/build.properties index f1d02eeae..a237a15f4 100644 --- a/build.properties +++ b/build.properties @@ -3,7 +3,7 @@ javacSource=1.4 javacTarget=1.4 # Release Configuration -releaseVersion=0.553 +releaseVersion=0.554 releaseFile=yacy_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz proReleaseFile=yacy_pro_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz releaseFileParentDir=yacy diff --git a/htroot/yacy/search.java b/htroot/yacy/search.java index d9b91c952..d03365985 100644 --- a/htroot/yacy/search.java +++ b/htroot/yacy/search.java @@ -128,7 +128,7 @@ public final class search { int indexabstractContainercount = 0; int joincount = 0; plasmaSearchQuery theQuery = null; - serverProfiling localProcess = null; + serverProfiling localProfiling = null; ArrayList accu = null; long urlRetrievalAllTime = 0, snippetComputationAllTime = 0; if ((query.length() == 0) && (abstractSet != null)) { @@ -138,12 +138,12 @@ public final class search { yacyCore.log.logInfo("INIT HASH SEARCH (abstracts only): " + plasmaSearchQuery.anonymizedQueryHashes(theQuery.queryHashes) + " - " + theQuery.displayResults() + " links"); // prepare a search profile - localProcess = new serverProfiling(theQuery.maximumTime, theQuery.displayResults()); + localProfiling = new serverProfiling(); //theSearch = new plasmaSearchEvent(squery, rankingProfile, localTiming, remoteTiming, true, sb.wordIndex, null); - localProcess.startTimer(); + localProfiling.startTimer(); Map[] containers = sb.wordIndex.localSearchContainers(theQuery, plasmaSearchQuery.hashes2Set(urls)); - localProcess.yield(plasmaSearchEvent.COLLECTION, containers[0].size()); + localProfiling.yield(plasmaSearchEvent.COLLECTION, containers[0].size()); if (containers != null) { Iterator ci = containers[0].entrySet().iterator(); Map.Entry entry; @@ -170,8 +170,8 @@ public final class search { // prepare a search profile plasmaSearchRankingProfile rankingProfile = (profile.length() == 0) ? new plasmaSearchRankingProfile(plasmaSearchQuery.contentdomParser(contentdom)) : new plasmaSearchRankingProfile("", profile); - localProcess = new serverProfiling(theQuery.maximumTime, theQuery.displayResults()); - plasmaSearchEvent theSearch = plasmaSearchEvent.getEvent(theQuery, rankingProfile, localProcess, sb.wordIndex, null, true, abstractSet); + localProfiling = new serverProfiling(); + plasmaSearchEvent theSearch = plasmaSearchEvent.getEvent(theQuery, rankingProfile, localProfiling, sb.wordIndex, null, true, abstractSet); urlRetrievalAllTime = theSearch.getURLRetrievalTime(); snippetComputationAllTime = theSearch.getSnippetComputationTime(); @@ -231,7 +231,7 @@ public final class search { if (partitions > 0) sb.requestedQueries = sb.requestedQueries + 1d / partitions; // increase query counter // prepare reference hints - localProcess.startTimer(); + localProfiling.startTimer(); Set ws = theSearch.references(10); StringBuffer refstr = new StringBuffer(); Iterator j = ws.iterator(); @@ -239,7 +239,7 @@ public final class search { refstr.append(",").append((String) j.next()); } prop.put("references", (refstr.length() > 0) ? refstr.substring(1) : refstr.toString()); - localProcess.yield("reference collection", ws.size()); + localProfiling.yield("reference collection", ws.size()); } prop.put("indexabstract", indexabstract.toString()); @@ -253,7 +253,7 @@ public final class search { } else { // result is a List of urlEntry elements - localProcess.startTimer(); + localProfiling.startTimer(); StringBuffer links = new StringBuffer(); String resource = null; plasmaSearchEvent.ResultEntry entry; @@ -266,7 +266,7 @@ public final class search { } prop.put("links", links.toString()); prop.put("linkcount", accu.size()); - localProcess.yield("result list preparation", accu.size()); + localProfiling.yield("result list preparation", accu.size()); } // add information about forward peers diff --git a/htroot/yacysearch.java b/htroot/yacysearch.java index 0c18a19b7..e55c0ecfa 100644 --- a/htroot/yacysearch.java +++ b/htroot/yacysearch.java @@ -268,7 +268,7 @@ public class yacysearch { 20, constraint, true); - serverProfiling localTiming = new serverProfiling(4 * theQuery.maximumTime / 10, theQuery.displayResults()); + serverProfiling localProfiling = new serverProfiling(); String client = (String) header.get("CLIENTIP"); // the search client who initiated the search @@ -287,7 +287,7 @@ public class yacysearch { theQuery.setOffset(0); // in case that this is a new search, always start without a offset offset = 0; } - plasmaSearchEvent theSearch = plasmaSearchEvent.getEvent(theQuery, sb.getRanking(), localTiming, sb.wordIndex, (sb.isRobinsonMode()) ? sb.clusterhashes : null, false, null); + plasmaSearchEvent theSearch = plasmaSearchEvent.getEvent(theQuery, sb.getRanking(), localProfiling, sb.wordIndex, (sb.isRobinsonMode()) ? sb.clusterhashes : null, false, null); // generate result object serverLog.logFine("LOCAL_SEARCH", "SEARCH TIME AFTER ORDERING OF SEARCH RESULTS: " + ((System.currentTimeMillis() - timestamp) / 1000) + " seconds"); diff --git a/htroot/yacysearchitem.html b/htroot/yacysearchitem.html index 0ba3e074a..accde7bee 100644 --- a/htroot/yacysearchitem.html +++ b/htroot/yacysearchitem.html @@ -1,7 +1,7 @@ #(content)#::

- favicon of #[url]# + #[description]#

#(authorized)#::
diff --git a/source/de/anomic/plasma/plasmaCrawlLURL.java b/source/de/anomic/plasma/plasmaCrawlLURL.java index cbc0f8cd1..110459a8c 100644 --- a/source/de/anomic/plasma/plasmaCrawlLURL.java +++ b/source/de/anomic/plasma/plasmaCrawlLURL.java @@ -62,6 +62,7 @@ import java.util.HashSet; import java.util.Iterator; import java.util.LinkedList; +import de.anomic.data.htmlTools; import de.anomic.http.httpc; import de.anomic.http.httpc.response; import de.anomic.index.indexRWIEntry; @@ -628,14 +629,14 @@ public final class plasmaCrawlLURL { pw.println(url); } if (format == 1) { - pw.println("" + comp.title() + "
"); + pw.println("" + htmlTools.encodeUnicode2html(comp.title(), true, true) + "
"); } if (format == 2) { pw.println(""); - pw.println("" + comp.title() + ""); + pw.println("" + htmlTools.encodeUnicode2html(comp.title(), true, true) + ""); pw.println("" + yacyURL.escape(url) + ""); - if (comp.author().length() > 0) pw.println("" + comp.author() + ""); - if (comp.tags().length() > 0) pw.println("" + comp.tags() + ""); + if (comp.author().length() > 0) pw.println("" + htmlTools.encodeUnicode2html(comp.author(), true, true) + ""); + if (comp.tags().length() > 0) pw.println("" + htmlTools.encodeUnicode2html(comp.tags(), true, true) + ""); pw.println("" + entry.moddate().toString() + ""); pw.println("" + entry.hash() + ""); pw.println(""); diff --git a/source/de/anomic/plasma/plasmaSearchEvent.java b/source/de/anomic/plasma/plasmaSearchEvent.java index 417bf9df3..737bedafb 100644 --- a/source/de/anomic/plasma/plasmaSearchEvent.java +++ b/source/de/anomic/plasma/plasmaSearchEvent.java @@ -57,7 +57,7 @@ public final class plasmaSearchEvent { public static final String URLFETCH = "urlfetch"; public static final String NORMALIZING = "normalizing"; - public static int workerThreadCount = 3; + public static int workerThreadCount = 8; public static String lastEventID = ""; private static HashMap lastEvents = new HashMap(); // a cache for objects from this class: re-use old search requests public static final long eventLifetime = 600000; // the time an event will stay in the cache, 10 Minutes @@ -200,7 +200,7 @@ public final class plasmaSearchEvent { // start worker threads to fetch urls and snippets this.workerThreads = new resultWorker[workerThreadCount]; for (int i = 0; i < workerThreadCount; i++) { - this.workerThreads[i] = new resultWorker(i, process.getTargetTime() * 3); + this.workerThreads[i] = new resultWorker(i, query.maximumTime * 3); this.workerThreads[i].start(); } } else { @@ -480,7 +480,7 @@ public final class plasmaSearchEvent { // start worker threads to fetch urls and snippets event.workerThreads = new resultWorker[workerThreadCount]; for (int i = 0; i < workerThreadCount; i++) { - event.workerThreads[i] = event.deployWorker(i, 3 * event.process.getTargetTime()); + event.workerThreads[i] = event.deployWorker(i, 3 * query.maximumTime); } } @@ -514,9 +514,15 @@ public final class plasmaSearchEvent { // start fetching urls and snippets indexURLEntry page; - while ((resultList.size() < query.neededResults() + query.displayResults()) && - (System.currentTimeMillis() < this.timeout) && - ((page = rankedCache.bestURL(true)) != null)) { + while (System.currentTimeMillis() < this.timeout) { + + // get next entry + page = rankedCache.bestURL(true); + if (page == null) { + // if we did not get another entry, sleep some time and try again + try {Thread.sleep(100);} catch (InterruptedException e1) {} + continue; + } if (anyResultWith(page.hash())) continue; if (anyFailureWith(page.hash())) continue; @@ -527,6 +533,7 @@ public final class plasmaSearchEvent { if (resultEntry == null) continue; // the entry had some problems, cannot be used urlRetrievalAllTime += resultEntry.dbRetrievalTime; snippetComputationAllTime += resultEntry.snippetComputationTime; + //System.out.println("+++DEBUG-resultWorker+++ fetched " + resultEntry.urlstring()); // place the result to the result vector synchronized (resultList) { @@ -537,8 +544,9 @@ public final class plasmaSearchEvent { synchronized (rankedCache) { rankedCache.addReferences(resultEntry); } - - System.out.println("DEBUG SNIPPET_LOADING: thread " + id + " got " + resultEntry.url()); + //System.out.println("DEBUG SNIPPET_LOADING: thread " + id + " got " + resultEntry.url()); + + if (resultList.size() >= query.neededResults() + query.displayResults()) break; // we have enough } serverLog.logInfo("SEARCH", "resultWorker thread " + id + " terminated"); } @@ -565,15 +573,18 @@ public final class plasmaSearchEvent { long sleeptime = this.eventTime + (this.query.maximumTime / this.query.displayResults() * ((item % this.query.displayResults()) + 1)) - System.currentTimeMillis(); if ((anyWorkerAlive()) && (sleeptime > 0)) { try {Thread.sleep(sleeptime);} catch (InterruptedException e) {} + //System.out.println("+++DEBUG-oneResult+++ (1) sleeping " + sleeptime); } // if there are less than 10 more results available, sleep some extra time to get a chance that the "common sense" ranking algorithm can work if ((this.resultList.size() <= item + 10) && (anyWorkerAlive())) { try {Thread.sleep(300);} catch (InterruptedException e) {} + //System.out.println("+++DEBUG-oneResult+++ (2) sleeping " + 300); } // then sleep until any result is available (that should not happen) while ((this.resultList.size() <= item) && (anyWorkerAlive())) { try {Thread.sleep(100);} catch (InterruptedException e) {} + //System.out.println("+++DEBUG-oneResult+++ (3) sleeping " + 100); } // finally, if there is something, return the result @@ -602,6 +613,7 @@ public final class plasmaSearchEvent { long timeout = System.currentTimeMillis() + waitingtime; while ((this.resultList.size() < query.neededResults()) && (anyWorkerAlive()) && (System.currentTimeMillis() < timeout)) { try {Thread.sleep(200);} catch (InterruptedException e) {} + //System.out.println("+++DEBUG-completeResults+++ sleeping " + 200); } return this.resultList; } diff --git a/source/de/anomic/server/serverProfiling.java b/source/de/anomic/server/serverProfiling.java index 89872caeb..0fa0e791c 100644 --- a/source/de/anomic/server/serverProfiling.java +++ b/source/de/anomic/server/serverProfiling.java @@ -31,25 +31,14 @@ import java.util.Iterator; public class serverProfiling implements Cloneable { - private static final long minimumTargetTime = 100; - private long targetTime; - private int targetCount; private ArrayList yield; private long timer; - private serverProfiling() { - targetTime = minimumTargetTime; - targetCount = 10; + public serverProfiling() { yield = new ArrayList(); timer = 0; } - public serverProfiling(long time, int count) { - this(); - this.targetTime = time; - this.targetCount = count; - } - public static class Entry { public String process; public int count; @@ -62,14 +51,6 @@ public class serverProfiling implements Cloneable { } } - public int getTargetCount() { - return this.targetCount; - } - - public long getTargetTime() { - return this.targetTime; - } - public void startTimer() { this.timer = System.currentTimeMillis(); }