diff --git a/source/de/anomic/plasma/plasmaWordIndex.java b/source/de/anomic/plasma/plasmaWordIndex.java index 1904e93e1..4bbd7cd46 100644 --- a/source/de/anomic/plasma/plasmaWordIndex.java +++ b/source/de/anomic/plasma/plasmaWordIndex.java @@ -51,6 +51,7 @@ import de.anomic.kelondro.kelondroCloneableIterator; import de.anomic.kelondro.kelondroMergeIterator; import de.anomic.kelondro.kelondroOrder; import de.anomic.kelondro.kelondroRotateIterator; +import de.anomic.kelondro.kelondroRowSet; import de.anomic.plasma.urlPattern.plasmaURLPattern; import de.anomic.server.serverMemory; import de.anomic.server.logging.serverLog; @@ -361,6 +362,27 @@ public final class plasmaWordIndex implements indexRI { container.addAllUnique(collections.getContainer(wordHash, urlselection)); } } + + // check doubles + int beforeDouble = container.size(); + ArrayList d = container.removeDoubles(); + kelondroRowSet set; + for (int i = 0; i < d.size(); i++) { + // for each element in the double-set, take that one that is the most recent one + set = d.get(i); + indexRWIRowEntry e, elm = null; + long lm = 0; + for (int j = 0; j < set.size(); j++) { + e = new indexRWIRowEntry(set.get(j)); + if ((elm == null) || (e.lastModified() > lm)) { + elm = e; + lm = e.lastModified(); + } + } + container.addUnique(elm.toKelondroEntry()); + } + if (container.size() < beforeDouble) System.out.println("*** DEBUG DOUBLECHECK - removed " + (beforeDouble - container.size()) + " index entries from word container " + container.getWordHash()); + return container; } diff --git a/source/de/anomic/yacy/yacyClient.java b/source/de/anomic/yacy/yacyClient.java index a27fcb4a7..1e021df74 100644 --- a/source/de/anomic/yacy/yacyClient.java +++ b/source/de/anomic/yacy/yacyClient.java @@ -572,11 +572,6 @@ public final class yacyClient { } } - // insert the containers to the index - for (int m = 0; m < words; m++) { - wordIndex.addEntries(container[m], true); - } - // read index abstract if (abstractCache != null) { Iterator> i = result.entrySet().iterator(); @@ -600,7 +595,12 @@ public final class yacyClient { } } - // generate statistics + // insert the containers to the index + for (int m = 0; m < words; m++) { + wordIndex.addEntries(container[m], true); + } + + // generate statistics long searchtime; try { searchtime = Integer.parseInt((String) result.get("searchtime"));