diff --git a/source/de/anomic/plasma/plasmaSearchEvent.java b/source/de/anomic/plasma/plasmaSearchEvent.java index baf32b78e..684ffdff5 100644 --- a/source/de/anomic/plasma/plasmaSearchEvent.java +++ b/source/de/anomic/plasma/plasmaSearchEvent.java @@ -118,6 +118,7 @@ public final class plasmaSearchEvent extends Thread implements Runnable { plasmaSearchResult result = order(); result.globalContributions = globalContributions; result.localContributions = rcLocal.size(); + flushResults(); // flush results in a separate thread this.start(); // start to flush results @@ -256,35 +257,18 @@ public final class plasmaSearchEvent extends Thread implements Runnable { public void run() { flushThreads.add(this); // this will care that the search event object is referenced from somewhere while it is still alive - flushResults(); - flushThreads.remove(this); - } - - public void flushResults() { + // put all new results into wordIndex // this must be called after search results had been computed - // it is wise to call this within a separate thread because this method waits untill all - if (searchThreads == null) return; + // it is wise to call this within a separate thread because + // this method waits until all threads are finished - // wait until all threads are finished int remaining; - int count = 0; - String wordHash; + int allcount = 0; long starttime = System.currentTimeMillis(); - while ((remaining = yacySearch.remainingWaiting(searchThreads)) > 0) { - // flush the rcGlobal as much as is there so far - if (rcGlobal.size() > 0) synchronized (rcGlobal) { - Iterator hashi = query.queryHashes.iterator(); - while (hashi.hasNext()) { - wordHash = (String) hashi.next(); - rcGlobal.setWordHash(wordHash); - wordIndex.addEntries(rcGlobal, true); - log.logFine("FLUSHED " + wordHash + ": " + rcGlobal.size() + " url entries"); - } - // the rcGlobal was flushed, empty it - count += rcGlobal.size(); - rcGlobal.clear(); - } + while ((searchThreads != null) && ((remaining = yacySearch.remainingWaiting(searchThreads)) > 0)) { + allcount += flushResults(); + // wait a little bit before trying again try {Thread.sleep(3000);} catch (InterruptedException e) {} if (System.currentTimeMillis() - starttime > 90000) { @@ -295,10 +279,34 @@ public final class plasmaSearchEvent extends Thread implements Runnable { log.logFine("FINISHED FLUSH RESULTS PROCESS for query " + query.hashes(",")); } - serverLog.logFine("PLASMA", "FINISHED FLUSHING " + count + " GLOBAL SEARCH RESULTS FOR SEARCH " + query.queryWords); - + serverLog.logFine("PLASMA", "FINISHED FLUSHING " + allcount + " GLOBAL SEARCH RESULTS FOR SEARCH " + query.queryWords); + // finally delete the temporary index rcGlobal = null; + + flushThreads.remove(this); + } + + public int flushResults() { + // flush the rcGlobal as much as is there so far + // this must be called sometime after search results had been computed + int count = 0; + if ((rcGlobal != null) && (rcGlobal.size() > 0)) { + synchronized (rcGlobal) { + String wordHash; + Iterator hashi = query.queryHashes.iterator(); + while (hashi.hasNext()) { + wordHash = (String) hashi.next(); + rcGlobal.setWordHash(wordHash); + wordIndex.addEntries(rcGlobal, true); + log.logFine("FLUSHED " + wordHash + ": " + rcGlobal.size() + " url entries"); + } + // the rcGlobal was flushed, empty it + count += rcGlobal.size(); + rcGlobal.clear(); + } + } + return count; } } diff --git a/source/de/anomic/plasma/plasmaWordIndexCache.java b/source/de/anomic/plasma/plasmaWordIndexCache.java index f15a587ea..240c4ec89 100644 --- a/source/de/anomic/plasma/plasmaWordIndexCache.java +++ b/source/de/anomic/plasma/plasmaWordIndexCache.java @@ -408,16 +408,23 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface { public plasmaWordIndexEntryContainer getContainer(String wordHash, boolean deleteIfEmpty, long maxTime) { long start = System.currentTimeMillis(); - plasmaWordIndexEntryContainer container = (plasmaWordIndexEntryContainer) cache.get(wordHash); - if (container == null) { - container = new plasmaWordIndexEntryContainer(wordHash); - } - container.add(assortmentCluster.getFromAll(wordHash, (maxTime < 1) ? -1 : 8 * maxTime / 10)); - if (maxTime > 0) { - maxTime -= System.currentTimeMillis() - start; - if (maxTime < 0) maxTime = 0; + + plasmaWordIndexEntryContainer container; + synchronized (cache) { + // get from cache + container = (plasmaWordIndexEntryContainer) cache.get(wordHash); + if (container == null) container = new plasmaWordIndexEntryContainer(wordHash); + + // get from assortments + container.add(assortmentCluster.getFromAll(wordHash, (maxTime < 0) ? -1 : maxTime / 2)); + + // get from backend + if (maxTime > 0) { + maxTime = maxTime - (System.currentTimeMillis() - start); + if (maxTime < 0) maxTime = 100; + } + container.add(backend.getContainer(wordHash, deleteIfEmpty, (maxTime < 0) ? -1 : maxTime)); } - container.add(backend.getContainer(wordHash, deleteIfEmpty, maxTime)); return container; } diff --git a/source/de/anomic/plasma/plasmaWordIndexDistribution.java b/source/de/anomic/plasma/plasmaWordIndexDistribution.java index fbff12358..96980c0a7 100644 --- a/source/de/anomic/plasma/plasmaWordIndexDistribution.java +++ b/source/de/anomic/plasma/plasmaWordIndexDistribution.java @@ -323,7 +323,7 @@ public final class plasmaWordIndexDistribution { Iterator urlIter; plasmaWordIndexEntry indexEntry; plasmaCrawlLURL.Entry lurl; - int notBoundCounter = 0; + final HashMap knownURLs = new HashMap(); while ( (count > 0) && @@ -335,6 +335,7 @@ public final class plasmaWordIndexDistribution { ) { // make an on-the-fly entity and insert values indexContainer = this.wordIndex.getContainer(nexthash, true, 10000); + int notBoundCounter = 0; try { urlIter = indexContainer.entries(); // iterate over indexes to fetch url entries and store them in the urlCache diff --git a/source/de/anomic/plasma/plasmaWordIndexEntryContainer.java b/source/de/anomic/plasma/plasmaWordIndexEntryContainer.java index 2737d5664..28a05e7a1 100644 --- a/source/de/anomic/plasma/plasmaWordIndexEntryContainer.java +++ b/source/de/anomic/plasma/plasmaWordIndexEntryContainer.java @@ -114,6 +114,7 @@ public final class plasmaWordIndexEntryContainer implements Comparable { public int add(plasmaWordIndexEntryContainer c) { // returns the number of new elements + if (c == null) return 0; Iterator i = c.entries(); int x = 0; while (i.hasNext()) {