diff --git a/htroot/yacy/transferRWI.java b/htroot/yacy/transferRWI.java index d34177486..7575fc545 100644 --- a/htroot/yacy/transferRWI.java +++ b/htroot/yacy/transferRWI.java @@ -53,7 +53,6 @@ import java.util.LinkedList; import de.anomic.http.httpHeader; import de.anomic.plasma.plasmaSwitchboard; import de.anomic.plasma.plasmaWordIndexEntry; -import de.anomic.plasma.plasmaWordIndexEntryContainer; import de.anomic.server.serverCore; import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; @@ -126,7 +125,8 @@ public final class transferRWI { wordHash = estring.substring(0, p); wordhashes[received] = wordHash; entry = new plasmaWordIndexEntry(estring.substring(p)); - sb.wordIndex.addEntries(plasmaWordIndexEntryContainer.instantContainer(wordHash, System.currentTimeMillis(), entry), System.currentTimeMillis(), true); + sb.wordIndex.addEntry(wordHash, entry, System.currentTimeMillis(), true); + //sb.wordIndex.addEntries(plasmaWordIndexEntryContainer.instantContainer(wordHash, System.currentTimeMillis(), entry), System.currentTimeMillis(), true); serverCore.checkInterruption(); urlHash = entry.getUrlHash(); diff --git a/source/de/anomic/plasma/plasmaWordIndex.java b/source/de/anomic/plasma/plasmaWordIndex.java index 95464270d..681031682 100644 --- a/source/de/anomic/plasma/plasmaWordIndex.java +++ b/source/de/anomic/plasma/plasmaWordIndex.java @@ -114,10 +114,8 @@ public final class plasmaWordIndex { ramCache.setMaxWords(maxWordsLow, maxWordsHigh); } - public int addEntries(plasmaWordIndexEntryContainer entries, long updateTime, boolean highPriority) { - int added = ramCache.addEntries(entries, updateTime, highPriority); - - // force flush + public void flushControl(boolean highPriority) { + // check for forced flush if (highPriority) { if (ramCache.size() > ramCache.getMaxWordsHigh()) { while (ramCache.size() + 500 > ramCache.getMaxWordsHigh()) { @@ -134,13 +132,28 @@ public final class plasmaWordIndex { } } } + } + + public boolean addEntry(String wordHash, plasmaWordIndexEntry entry, long updateTime, boolean highPriority) { + if (ramCache.addEntry(wordHash, entry, updateTime)) { + flushControl(highPriority); + return true; + } + return false; + } + + public int addEntries(plasmaWordIndexEntryContainer entries, long updateTime, boolean highPriority) { + int added = ramCache.addEntries(entries, updateTime, highPriority); + + // force flush + flushControl(highPriority); return added; } public synchronized void flushCacheSome() { - int flushCount = ramCache.size() / 500; + int flushCount = ramCache.size() / 1000; if (flushCount > 50) flushCount = 50; - if (flushCount < 5) flushCount = 5; + if (flushCount < 3) flushCount = 3; flushCache(flushCount); } @@ -230,7 +243,8 @@ public final class plasmaWordIndex { language, doctype, true); - addEntries(plasmaWordIndexEntryContainer.instantContainer(wordHash, System.currentTimeMillis(), ientry), System.currentTimeMillis(), false); + addEntry(wordHash, ientry, System.currentTimeMillis(), false); + //addEntries(plasmaWordIndexEntryContainer.instantContainer(wordHash, System.currentTimeMillis(), ientry), System.currentTimeMillis(), false); } // System.out.println("DEBUG: plasmaSearch.addPageIndex: added " + // condenser.getWords().size() + " words, flushed " + c + " entries"); diff --git a/source/de/anomic/plasma/plasmaWordIndexCache.java b/source/de/anomic/plasma/plasmaWordIndexCache.java index 373eed6d4..4adb29094 100644 --- a/source/de/anomic/plasma/plasmaWordIndexCache.java +++ b/source/de/anomic/plasma/plasmaWordIndexCache.java @@ -326,9 +326,10 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface { // put new words into cache String wordHash = container.wordHash(); + plasmaWordIndexEntryContainer entries = null; synchronized (cache) { // put container into cache - plasmaWordIndexEntryContainer entries = (plasmaWordIndexEntryContainer) cache.get(wordHash); // null pointer exception? wordhash != null! must be cache==null + entries = (plasmaWordIndexEntryContainer) cache.get(wordHash); // null pointer exception? wordhash != null! must be cache==null if (entries == null) entries = new plasmaWordIndexEntryContainer(wordHash); added = entries.add(container); if (added > 0) { @@ -336,24 +337,28 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface { hashScore.addScore(wordHash, added); hashDate.setScore(wordHash, intTime(updateTime)); } - entries = null; } + entries = null; return added; } - private void addEntry(String wordHash, plasmaWordIndexEntry newEntry, long updateTime) { + public boolean addEntry(String wordHash, plasmaWordIndexEntry newEntry, long updateTime) { + plasmaWordIndexEntryContainer container = null; + plasmaWordIndexEntry[] entries = null; synchronized (cache) { - plasmaWordIndexEntryContainer container = (plasmaWordIndexEntryContainer) cache.get(wordHash); + container = (plasmaWordIndexEntryContainer) cache.get(wordHash); if (container == null) container = new plasmaWordIndexEntryContainer(wordHash); - plasmaWordIndexEntry[] entries = new plasmaWordIndexEntry[] { newEntry }; + entries = new plasmaWordIndexEntry[] { newEntry }; if (container.add(entries, updateTime) > 0) { cache.put(wordHash, container); hashScore.incScore(wordHash); hashDate.setScore(wordHash, intTime(updateTime)); + return true; } - entries = null; - container = null; } + container = null; + entries = null; + return false; } public void close(int waitingSeconds) { diff --git a/source/de/anomic/plasma/plasmaWordIndexEntryContainer.java b/source/de/anomic/plasma/plasmaWordIndexEntryContainer.java index dffcfd37b..a594d294c 100644 --- a/source/de/anomic/plasma/plasmaWordIndexEntryContainer.java +++ b/source/de/anomic/plasma/plasmaWordIndexEntryContainer.java @@ -60,11 +60,12 @@ import de.anomic.kelondro.kelondroBase64Order; import de.anomic.kelondro.kelondroNaturalOrder; import de.anomic.kelondro.kelondroOrder; -public final class plasmaWordIndexEntryContainer implements Comparable { +public final class plasmaWordIndexEntryContainer { private String wordHash; private final TreeMap container; // urlHash/plasmaWordIndexEntry - Mapping private long updateTime; + private kelondroOrder ordering; public plasmaWordIndexEntryContainer(String wordHash) { this(wordHash, new kelondroNaturalOrder(true)); @@ -73,6 +74,7 @@ public final class plasmaWordIndexEntryContainer implements Comparable { public plasmaWordIndexEntryContainer(String wordHash, kelondroOrder ordering) { this.wordHash = wordHash; this.updateTime = 0; + this.ordering = ordering; container = new TreeMap(ordering); // a urlhash/plasmaWordIndexEntry - relation } @@ -158,22 +160,10 @@ public final class plasmaWordIndexEntryContainer implements Comparable { return container.values().iterator(); } - public static plasmaWordIndexEntryContainer instantContainer(String wordHash, long creationTime, plasmaWordIndexEntry entry) { - plasmaWordIndexEntryContainer c = new plasmaWordIndexEntryContainer(wordHash); - c.add(entry); - c.updateTime = creationTime; - return c; - } - public String toString() { return "C[" + wordHash + "] has " + container.size() + " entries"; } - public int compareTo(Object obj) { - plasmaWordIndexEntryContainer other = (plasmaWordIndexEntryContainer) obj; - return this.wordHash.compareTo(other.wordHash); - } - public int hashCode() { return (int) kelondroBase64Order.enhancedCoder.decodeLong(this.wordHash.substring(0, 4)); } @@ -272,6 +262,7 @@ public final class plasmaWordIndexEntryContainer implements Comparable { private static plasmaWordIndexEntryContainer joinConstructiveByEnumeration(plasmaWordIndexEntryContainer i1, plasmaWordIndexEntryContainer i2, long time, int maxDistance) { System.out.println("DEBUG: JOIN METHOD BY ENUMERATION"); plasmaWordIndexEntryContainer conj = new plasmaWordIndexEntryContainer(null); // start with empty search result + if (!(i1.ordering.signature().equals(i2.ordering.signature()))) return conj; // ordering must be equal Iterator e1 = i1.entries(); Iterator e2 = i2.entries(); int c; @@ -283,7 +274,7 @@ public final class plasmaWordIndexEntryContainer implements Comparable { long stamp = System.currentTimeMillis(); while ((System.currentTimeMillis() - stamp) < time) { - c = ie1.getUrlHash().compareTo(ie2.getUrlHash()); + c = i1.ordering.compare(ie1.getUrlHash(), ie2.getUrlHash()); //System.out.println("** '" + ie1.getUrlHash() + "'.compareTo('" + ie2.getUrlHash() + "')="+c); if (c < 0) { if (e1.hasNext()) ie1 = (plasmaWordIndexEntry) e1.next(); else break;