fixed bug that caused wrong behavior of search result preparation

(second search on same topic resulted in less links)

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@1502 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 19 years ago
parent 31c8476b5d
commit 3834675084

@ -118,6 +118,7 @@ public final class plasmaSearchEvent extends Thread implements Runnable {
plasmaSearchResult result = order(); plasmaSearchResult result = order();
result.globalContributions = globalContributions; result.globalContributions = globalContributions;
result.localContributions = rcLocal.size(); result.localContributions = rcLocal.size();
flushResults();
// flush results in a separate thread // flush results in a separate thread
this.start(); // start to flush results this.start(); // start to flush results
@ -256,35 +257,18 @@ public final class plasmaSearchEvent extends Thread implements Runnable {
public void run() { public void run() {
flushThreads.add(this); // this will care that the search event object is referenced from somewhere while it is still alive flushThreads.add(this); // this will care that the search event object is referenced from somewhere while it is still alive
flushResults();
flushThreads.remove(this);
}
public void flushResults() {
// put all new results into wordIndex // put all new results into wordIndex
// this must be called after search results had been computed // this must be called after search results had been computed
// it is wise to call this within a separate thread because this method waits untill all // it is wise to call this within a separate thread because
if (searchThreads == null) return; // this method waits until all threads are finished
// wait until all threads are finished
int remaining; int remaining;
int count = 0; int allcount = 0;
String wordHash;
long starttime = System.currentTimeMillis(); long starttime = System.currentTimeMillis();
while ((remaining = yacySearch.remainingWaiting(searchThreads)) > 0) { while ((searchThreads != null) && ((remaining = yacySearch.remainingWaiting(searchThreads)) > 0)) {
// flush the rcGlobal as much as is there so far allcount += flushResults();
if (rcGlobal.size() > 0) synchronized (rcGlobal) {
Iterator hashi = query.queryHashes.iterator();
while (hashi.hasNext()) {
wordHash = (String) hashi.next();
rcGlobal.setWordHash(wordHash);
wordIndex.addEntries(rcGlobal, true);
log.logFine("FLUSHED " + wordHash + ": " + rcGlobal.size() + " url entries");
}
// the rcGlobal was flushed, empty it
count += rcGlobal.size();
rcGlobal.clear();
}
// wait a little bit before trying again // wait a little bit before trying again
try {Thread.sleep(3000);} catch (InterruptedException e) {} try {Thread.sleep(3000);} catch (InterruptedException e) {}
if (System.currentTimeMillis() - starttime > 90000) { if (System.currentTimeMillis() - starttime > 90000) {
@ -295,10 +279,34 @@ public final class plasmaSearchEvent extends Thread implements Runnable {
log.logFine("FINISHED FLUSH RESULTS PROCESS for query " + query.hashes(",")); log.logFine("FINISHED FLUSH RESULTS PROCESS for query " + query.hashes(","));
} }
serverLog.logFine("PLASMA", "FINISHED FLUSHING " + count + " GLOBAL SEARCH RESULTS FOR SEARCH " + query.queryWords); serverLog.logFine("PLASMA", "FINISHED FLUSHING " + allcount + " GLOBAL SEARCH RESULTS FOR SEARCH " + query.queryWords);
// finally delete the temporary index // finally delete the temporary index
rcGlobal = null; rcGlobal = null;
flushThreads.remove(this);
}
public int flushResults() {
// flush the rcGlobal as much as is there so far
// this must be called sometime after search results had been computed
int count = 0;
if ((rcGlobal != null) && (rcGlobal.size() > 0)) {
synchronized (rcGlobal) {
String wordHash;
Iterator hashi = query.queryHashes.iterator();
while (hashi.hasNext()) {
wordHash = (String) hashi.next();
rcGlobal.setWordHash(wordHash);
wordIndex.addEntries(rcGlobal, true);
log.logFine("FLUSHED " + wordHash + ": " + rcGlobal.size() + " url entries");
}
// the rcGlobal was flushed, empty it
count += rcGlobal.size();
rcGlobal.clear();
}
}
return count;
} }
} }

@ -408,16 +408,23 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
public plasmaWordIndexEntryContainer getContainer(String wordHash, boolean deleteIfEmpty, long maxTime) { public plasmaWordIndexEntryContainer getContainer(String wordHash, boolean deleteIfEmpty, long maxTime) {
long start = System.currentTimeMillis(); long start = System.currentTimeMillis();
plasmaWordIndexEntryContainer container = (plasmaWordIndexEntryContainer) cache.get(wordHash);
if (container == null) { plasmaWordIndexEntryContainer container;
container = new plasmaWordIndexEntryContainer(wordHash); synchronized (cache) {
} // get from cache
container.add(assortmentCluster.getFromAll(wordHash, (maxTime < 1) ? -1 : 8 * maxTime / 10)); container = (plasmaWordIndexEntryContainer) cache.get(wordHash);
if (maxTime > 0) { if (container == null) container = new plasmaWordIndexEntryContainer(wordHash);
maxTime -= System.currentTimeMillis() - start;
if (maxTime < 0) maxTime = 0; // get from assortments
container.add(assortmentCluster.getFromAll(wordHash, (maxTime < 0) ? -1 : maxTime / 2));
// get from backend
if (maxTime > 0) {
maxTime = maxTime - (System.currentTimeMillis() - start);
if (maxTime < 0) maxTime = 100;
}
container.add(backend.getContainer(wordHash, deleteIfEmpty, (maxTime < 0) ? -1 : maxTime));
} }
container.add(backend.getContainer(wordHash, deleteIfEmpty, maxTime));
return container; return container;
} }

@ -323,7 +323,7 @@ public final class plasmaWordIndexDistribution {
Iterator urlIter; Iterator urlIter;
plasmaWordIndexEntry indexEntry; plasmaWordIndexEntry indexEntry;
plasmaCrawlLURL.Entry lurl; plasmaCrawlLURL.Entry lurl;
int notBoundCounter = 0;
final HashMap knownURLs = new HashMap(); final HashMap knownURLs = new HashMap();
while ( while (
(count > 0) && (count > 0) &&
@ -335,6 +335,7 @@ public final class plasmaWordIndexDistribution {
) { ) {
// make an on-the-fly entity and insert values // make an on-the-fly entity and insert values
indexContainer = this.wordIndex.getContainer(nexthash, true, 10000); indexContainer = this.wordIndex.getContainer(nexthash, true, 10000);
int notBoundCounter = 0;
try { try {
urlIter = indexContainer.entries(); urlIter = indexContainer.entries();
// iterate over indexes to fetch url entries and store them in the urlCache // iterate over indexes to fetch url entries and store them in the urlCache

@ -114,6 +114,7 @@ public final class plasmaWordIndexEntryContainer implements Comparable {
public int add(plasmaWordIndexEntryContainer c) { public int add(plasmaWordIndexEntryContainer c) {
// returns the number of new elements // returns the number of new elements
if (c == null) return 0;
Iterator i = c.entries(); Iterator i = c.entries();
int x = 0; int x = 0;
while (i.hasNext()) { while (i.hasNext()) {

Loading…
Cancel
Save