From cb295fbbdc20cd87d92d5101bbf3ad1d8fbbfc65 Mon Sep 17 00:00:00 2001 From: orbiter Date: Fri, 26 May 2006 23:55:30 +0000 Subject: [PATCH] refactoring git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2147 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- ...Prototype.java => indexAbstractEntry.java} | 68 +++++++++--------- source/de/anomic/index/indexAbstractRI.java | 4 +- source/de/anomic/index/indexRI.java | 5 +- source/de/anomic/plasma/plasmaWordIndex.java | 33 ++++----- .../plasma/plasmaWordIndexAssortment.java | 4 +- .../plasmaWordIndexAssortmentCluster.java | 70 +++++++++++++------ .../anomic/plasma/plasmaWordIndexCache.java | 12 ++-- .../plasma/plasmaWordIndexClassicDB.java | 12 ++-- .../plasma/plasmaWordIndexEntryInstance.java | 4 +- source/yacy.java | 2 +- 10 files changed, 119 insertions(+), 95 deletions(-) rename source/de/anomic/index/{indexEntryPrototype.java => indexAbstractEntry.java} (60%) diff --git a/source/de/anomic/index/indexEntryPrototype.java b/source/de/anomic/index/indexAbstractEntry.java similarity index 60% rename from source/de/anomic/index/indexEntryPrototype.java rename to source/de/anomic/index/indexAbstractEntry.java index 1c0d98687..134faa2c4 100644 --- a/source/de/anomic/index/indexEntryPrototype.java +++ b/source/de/anomic/index/indexAbstractEntry.java @@ -1,4 +1,4 @@ -// indexEntryPrototype.java +// indexbstractEntry.java // (C) 2006 by Michael Peter Christen; mc@anomic.de, Frankfurt a. M., Germany // first published 20.05.2006 on http://www.anomic.de // @@ -29,7 +29,7 @@ package de.anomic.index; //import de.anomic.plasma.plasmaURL; import de.anomic.plasma.plasmaWordIndex; -public abstract class indexEntryPrototype implements indexEntry { +public abstract class indexAbstractEntry implements indexEntry { // the associated hash protected String urlHash; @@ -57,40 +57,40 @@ public abstract class indexEntryPrototype implements indexEntry { public abstract String toPropertyForm(); public void combineDistance(indexEntry oe) { - this.worddistance = this.worddistance + ((indexEntryPrototype) oe).worddistance + Math.abs(this.posintext - ((indexEntryPrototype) oe).posintext); - this.posintext = Math.min(this.posintext, ((indexEntryPrototype) oe).posintext); - if (this.posofphrase != ((indexEntryPrototype) oe).posofphrase) this.posinphrase = 0; // (unknown) - this.posofphrase = Math.min(this.posofphrase, ((indexEntryPrototype) oe).posofphrase); - this.wordcount = (this.wordcount + ((indexEntryPrototype) oe).wordcount) / 2; + this.worddistance = this.worddistance + ((indexAbstractEntry) oe).worddistance + Math.abs(this.posintext - ((indexAbstractEntry) oe).posintext); + this.posintext = Math.min(this.posintext, ((indexAbstractEntry) oe).posintext); + if (this.posofphrase != ((indexAbstractEntry) oe).posofphrase) this.posinphrase = 0; // (unknown) + this.posofphrase = Math.min(this.posofphrase, ((indexAbstractEntry) oe).posofphrase); + this.wordcount = (this.wordcount + ((indexAbstractEntry) oe).wordcount) / 2; } public void min(indexEntry other) { - if (this.hitcount > ((indexEntryPrototype) other).hitcount) this.hitcount = ((indexEntryPrototype) other).hitcount; - if (this.wordcount > ((indexEntryPrototype) other).wordcount) this.wordcount = ((indexEntryPrototype) other).wordcount; - if (this.phrasecount > ((indexEntryPrototype) other).phrasecount) this.phrasecount = ((indexEntryPrototype) other).phrasecount; - if (this.posintext > ((indexEntryPrototype) other).posintext) this.posintext = ((indexEntryPrototype) other).posintext; - if (this.posinphrase > ((indexEntryPrototype) other).posinphrase) this.posinphrase = ((indexEntryPrototype) other).posinphrase; - if (this.posofphrase > ((indexEntryPrototype) other).posofphrase) this.posofphrase = ((indexEntryPrototype) other).posofphrase; - if (this.worddistance > ((indexEntryPrototype) other).worddistance) this.worddistance = ((indexEntryPrototype) other).worddistance; - if (this.lastModified > ((indexEntryPrototype) other).lastModified) this.lastModified = ((indexEntryPrototype) other).lastModified; - if (this.quality > ((indexEntryPrototype) other).quality) this.quality = ((indexEntryPrototype) other).quality; + if (this.hitcount > ((indexAbstractEntry) other).hitcount) this.hitcount = ((indexAbstractEntry) other).hitcount; + if (this.wordcount > ((indexAbstractEntry) other).wordcount) this.wordcount = ((indexAbstractEntry) other).wordcount; + if (this.phrasecount > ((indexAbstractEntry) other).phrasecount) this.phrasecount = ((indexAbstractEntry) other).phrasecount; + if (this.posintext > ((indexAbstractEntry) other).posintext) this.posintext = ((indexAbstractEntry) other).posintext; + if (this.posinphrase > ((indexAbstractEntry) other).posinphrase) this.posinphrase = ((indexAbstractEntry) other).posinphrase; + if (this.posofphrase > ((indexAbstractEntry) other).posofphrase) this.posofphrase = ((indexAbstractEntry) other).posofphrase; + if (this.worddistance > ((indexAbstractEntry) other).worddistance) this.worddistance = ((indexAbstractEntry) other).worddistance; + if (this.lastModified > ((indexAbstractEntry) other).lastModified) this.lastModified = ((indexAbstractEntry) other).lastModified; + if (this.quality > ((indexAbstractEntry) other).quality) this.quality = ((indexAbstractEntry) other).quality; } public void max(indexEntry other) { - if (this.hitcount < ((indexEntryPrototype) other).hitcount) this.hitcount = ((indexEntryPrototype) other).hitcount; - if (this.wordcount < ((indexEntryPrototype) other).wordcount) this.wordcount = ((indexEntryPrototype) other).wordcount; - if (this.phrasecount < ((indexEntryPrototype) other).phrasecount) this.phrasecount = ((indexEntryPrototype) other).phrasecount; - if (this.posintext < ((indexEntryPrototype) other).posintext) this.posintext = ((indexEntryPrototype) other).posintext; - if (this.posinphrase < ((indexEntryPrototype) other).posinphrase) this.posinphrase = ((indexEntryPrototype) other).posinphrase; - if (this.posofphrase < ((indexEntryPrototype) other).posofphrase) this.posofphrase = ((indexEntryPrototype) other).posofphrase; - if (this.worddistance < ((indexEntryPrototype) other).worddistance) this.worddistance = ((indexEntryPrototype) other).worddistance; - if (this.lastModified < ((indexEntryPrototype) other).lastModified) this.lastModified = ((indexEntryPrototype) other).lastModified; - if (this.quality < ((indexEntryPrototype) other).quality) this.quality = ((indexEntryPrototype) other).quality; + if (this.hitcount < ((indexAbstractEntry) other).hitcount) this.hitcount = ((indexAbstractEntry) other).hitcount; + if (this.wordcount < ((indexAbstractEntry) other).wordcount) this.wordcount = ((indexAbstractEntry) other).wordcount; + if (this.phrasecount < ((indexAbstractEntry) other).phrasecount) this.phrasecount = ((indexAbstractEntry) other).phrasecount; + if (this.posintext < ((indexAbstractEntry) other).posintext) this.posintext = ((indexAbstractEntry) other).posintext; + if (this.posinphrase < ((indexAbstractEntry) other).posinphrase) this.posinphrase = ((indexAbstractEntry) other).posinphrase; + if (this.posofphrase < ((indexAbstractEntry) other).posofphrase) this.posofphrase = ((indexAbstractEntry) other).posofphrase; + if (this.worddistance < ((indexAbstractEntry) other).worddistance) this.worddistance = ((indexAbstractEntry) other).worddistance; + if (this.lastModified < ((indexAbstractEntry) other).lastModified) this.lastModified = ((indexAbstractEntry) other).lastModified; + if (this.quality < ((indexAbstractEntry) other).quality) this.quality = ((indexAbstractEntry) other).quality; } public void normalize(indexEntry mi, indexEntry ma) { - indexEntryPrototype min = (indexEntryPrototype) mi; - indexEntryPrototype max = (indexEntryPrototype) ma; + indexAbstractEntry min = (indexAbstractEntry) mi; + indexAbstractEntry max = (indexAbstractEntry) ma; this.hitcount = (this.hitcount == 0) ? 0 : 1 + 255 * (this.hitcount - min.hitcount ) / (1 + max.hitcount - min.hitcount); this.wordcount = (this.wordcount == 0) ? 0 : 1 + 255 * (this.wordcount - min.wordcount ) / (1 + max.wordcount - min.wordcount); this.phrasecount = (this.phrasecount == 0) ? 0 : 1 + 255 * (this.phrasecount - min.phrasecount ) / (1 + max.phrasecount - min.phrasecount); @@ -103,7 +103,7 @@ public abstract class indexEntryPrototype implements indexEntry { } public indexEntry generateNormalized(indexEntry min, indexEntry max) { - indexEntry e = (indexEntryPrototype) this.clone(); + indexEntry e = (indexAbstractEntry) this.clone(); e.normalize(min, max); return e; } @@ -125,18 +125,18 @@ public abstract class indexEntryPrototype implements indexEntry { public boolean isNewer(indexEntry other) { if (other == null) return true; - if (this.lastModified > ((indexEntryPrototype) other).lastModified) return true; - if (this.lastModified == ((indexEntryPrototype) other).getLastModified()) { - if (this.quality > ((indexEntryPrototype) other).quality) return true; + if (this.lastModified > ((indexAbstractEntry) other).lastModified) return true; + if (this.lastModified == ((indexAbstractEntry) other).getLastModified()) { + if (this.quality > ((indexAbstractEntry) other).quality) return true; } return false; } public boolean isOlder(indexEntry other) { if (other == null) return false; - if (this.lastModified < ((indexEntryPrototype) other).getLastModified()) return true; - if (this.lastModified == ((indexEntryPrototype) other).getLastModified()) { - if (this.quality < ((indexEntryPrototype) other).quality) return true; + if (this.lastModified < ((indexAbstractEntry) other).getLastModified()) return true; + if (this.lastModified == ((indexAbstractEntry) other).getLastModified()) { + if (this.quality < ((indexAbstractEntry) other).quality) return true; } return false; } diff --git a/source/de/anomic/index/indexAbstractRI.java b/source/de/anomic/index/indexAbstractRI.java index 65fdf679e..d58f5a42e 100644 --- a/source/de/anomic/index/indexAbstractRI.java +++ b/source/de/anomic/index/indexAbstractRI.java @@ -30,10 +30,10 @@ import de.anomic.plasma.plasmaWordIndexEntryContainer; public abstract class indexAbstractRI implements indexRI { - public boolean addEntry(String wordHash, indexEntry newEntry, long updateTime, boolean dhtCase) { + public plasmaWordIndexEntryContainer addEntry(String wordHash, indexEntry newEntry, long updateTime, boolean dhtCase) { plasmaWordIndexEntryContainer container = new plasmaWordIndexEntryContainer(wordHash); container.add(newEntry); - return addEntries(container, updateTime, dhtCase) > 0; + return addEntries(container, updateTime, dhtCase); } public long getUpdateTime(String wordHash) { diff --git a/source/de/anomic/index/indexRI.java b/source/de/anomic/index/indexRI.java index 05442dc82..ebc661f0c 100644 --- a/source/de/anomic/index/indexRI.java +++ b/source/de/anomic/index/indexRI.java @@ -45,7 +45,6 @@ package de.anomic.index; import java.util.Iterator; import de.anomic.plasma.plasmaWordIndexEntryContainer; -import de.anomic.plasma.plasmaWordIndexEntryInstance; public interface indexRI { @@ -58,8 +57,8 @@ public interface indexRI { public plasmaWordIndexEntryContainer deleteContainer(String wordHash); public int removeEntries(String wordHash, String[] referenceHashes, boolean deleteComplete); - public boolean addEntry(String wordHash, indexEntry entry, long updateTime, boolean dhtCase); - public int addEntries(plasmaWordIndexEntryContainer newEntries, long creationTime, boolean dhtCase); + public plasmaWordIndexEntryContainer addEntry(String wordHash, indexEntry entry, long updateTime, boolean dhtCase); + public plasmaWordIndexEntryContainer addEntries(plasmaWordIndexEntryContainer newEntries, long creationTime, boolean dhtCase); public void close(int waitingSeconds); diff --git a/source/de/anomic/plasma/plasmaWordIndex.java b/source/de/anomic/plasma/plasmaWordIndex.java index 0e9bf7d60..643dd456a 100644 --- a/source/de/anomic/plasma/plasmaWordIndex.java +++ b/source/de/anomic/plasma/plasmaWordIndex.java @@ -158,16 +158,17 @@ public final class plasmaWordIndex extends indexAbstractRI implements indexRI { } } - public boolean addEntry(String wordHash, indexEntry entry, long updateTime, boolean dhtCase) { - if (ramCache.addEntry(wordHash, entry, updateTime, dhtCase)) { + public plasmaWordIndexEntryContainer addEntry(String wordHash, indexEntry entry, long updateTime, boolean dhtCase) { + plasmaWordIndexEntryContainer c; + if ((c = ramCache.addEntry(wordHash, entry, updateTime, dhtCase)) == null) { if (!dhtCase) flushControl(); - return true; + return null; } - return false; + return c; } - public int addEntries(plasmaWordIndexEntryContainer entries, long updateTime, boolean dhtCase) { - int added = ramCache.addEntries(entries, updateTime, dhtCase); + public plasmaWordIndexEntryContainer addEntries(plasmaWordIndexEntryContainer entries, long updateTime, boolean dhtCase) { + plasmaWordIndexEntryContainer added = ramCache.addEntries(entries, updateTime, dhtCase); // force flush if (!dhtCase) flushControl(); @@ -193,7 +194,7 @@ public final class plasmaWordIndex extends indexAbstractRI implements indexRI { private synchronized void flushCache(String wordHash) { plasmaWordIndexEntryContainer c = ramCache.deleteContainer(wordHash); if (c != null) { - plasmaWordIndexEntryContainer feedback = assortmentCluster.storeTry(wordHash, c); + plasmaWordIndexEntryContainer feedback = assortmentCluster.addEntries(c, c.updated(), false); if (feedback != null) { backend.addEntries(feedback, System.currentTimeMillis(), true); } @@ -290,7 +291,7 @@ public final class plasmaWordIndex extends indexAbstractRI implements indexRI { container.add(ramCache.getContainer(wordHash, true, (maxTime < 0) ? -1 : maxTime / 2), (maxTime < 0) ? -1 : maxTime / 2); // get from assortments - container.add(assortmentCluster.getFromAll(wordHash, (maxTime < 0) ? -1 : maxTime / 2), (maxTime < 0) ? -1 : maxTime / 2); + container.add(assortmentCluster.getContainer(wordHash, true, (maxTime < 0) ? -1 : maxTime / 2), (maxTime < 0) ? -1 : maxTime / 2); // get from backend if (maxTime > 0) { @@ -331,7 +332,7 @@ public final class plasmaWordIndex extends indexAbstractRI implements indexRI { } public int size() { - return java.lang.Math.max(assortmentCluster.sizeTotal(), + return java.lang.Math.max(assortmentCluster.size(), java.lang.Math.max(backend.size(), ramCache.size())); } @@ -351,13 +352,13 @@ public final class plasmaWordIndex extends indexAbstractRI implements indexRI { public synchronized void close(int waitingBoundSeconds) { ramCache.close(waitingBoundSeconds); - assortmentCluster.close(); + assortmentCluster.close(-1); backend.close(10); } public synchronized plasmaWordIndexEntryContainer deleteContainer(String wordHash) { plasmaWordIndexEntryContainer c = ramCache.deleteContainer(wordHash); - c.add(assortmentCluster.removeFromAll(wordHash, -1), -1); + c.add(assortmentCluster.deleteContainer(wordHash, -1), -1); c.add(backend.deleteContainer(wordHash), -1); return c; } @@ -368,7 +369,7 @@ public final class plasmaWordIndex extends indexAbstractRI implements indexRI { synchronized (this) { removed = ramCache.removeEntries(wordHash, urlHashes, deleteComplete); if (removed == urlHashes.length) return removed; - plasmaWordIndexEntryContainer container = assortmentCluster.removeFromAll(wordHash, -1); + plasmaWordIndexEntryContainer container = assortmentCluster.deleteContainer(wordHash, -1); if (container != null) { removed += container.removeEntries(wordHash, urlHashes, deleteComplete); if (container.size() != 0) { @@ -439,7 +440,7 @@ public final class plasmaWordIndex extends indexAbstractRI implements indexRI { if (resourceLevel == plasmaWordIndex.RL_ASSORTMENTS) { return new kelondroMergeIterator( ramCache.wordHashes(startWordHash, false), - assortmentCluster.hashConjunction(startWordHash, true, false), + assortmentCluster.wordHashes(startWordHash, true, false), kelondroNaturalOrder.naturalOrder, true); } @@ -447,7 +448,7 @@ public final class plasmaWordIndex extends indexAbstractRI implements indexRI { return new kelondroMergeIterator( new kelondroMergeIterator( ramCache.wordHashes(startWordHash, false), - assortmentCluster.hashConjunction(startWordHash, true, false), + assortmentCluster.wordHashes(startWordHash, true, false), kelondroNaturalOrder.naturalOrder, true), backend.wordHashes(startWordHash, true, false), @@ -505,7 +506,7 @@ public final class plasmaWordIndex extends indexAbstractRI implements indexRI { } else { // take out all words from the assortment to see if it fits // together with the extracted assortment - plasmaWordIndexEntryContainer container = assortmentCluster.removeFromAll(wordhash, -1); + plasmaWordIndexEntryContainer container = assortmentCluster.deleteContainer(wordhash, -1); if (size + container.size() > assortmentCluster.clusterCapacity) { // this will also be too big to integrate, add to entity entity.addEntries(container); @@ -525,7 +526,7 @@ public final class plasmaWordIndex extends indexAbstractRI implements indexRI { entity.deleteComplete(); entity.close(); entity = null; // integrate the container into the assortments; this will work - assortmentCluster.storeTry(wordhash, container); + assortmentCluster.addEntries(container, container.updated(), false); return new Integer(size); } catch (kelondroException e) { // database corrupted, we simply give up the database and delete it diff --git a/source/de/anomic/plasma/plasmaWordIndexAssortment.java b/source/de/anomic/plasma/plasmaWordIndexAssortment.java index 6201051af..aaf8fa013 100644 --- a/source/de/anomic/plasma/plasmaWordIndexAssortment.java +++ b/source/de/anomic/plasma/plasmaWordIndexAssortment.java @@ -125,13 +125,13 @@ public final class plasmaWordIndexAssortment { if (log != null) log.logConfig("Created new Assortment Database, width " + assortmentLength + ", " + bufferkb + "kb buffer"); } - public void store(String wordHash, plasmaWordIndexEntryContainer newContainer) { + public void store(plasmaWordIndexEntryContainer newContainer) { // stores a word index to assortment database // this throws an exception if the word hash already existed //log.logDebug("storeAssortment: wordHash=" + wordHash + ", urlHash=" + entry.getUrlHash() + ", time=" + creationTime); if (newContainer.size() != assortmentLength) throw new RuntimeException("plasmaWordIndexAssortment.store: wrong container size"); byte[][] row = new byte[this.bufferStructureLength][]; - row[0] = wordHash.getBytes(); + row[0] = newContainer.wordHash().getBytes(); row[1] = kelondroRecords.long2bytes(1, 4); row[2] = kelondroRecords.long2bytes(newContainer.updated(), 8); Iterator entries = newContainer.entries(); diff --git a/source/de/anomic/plasma/plasmaWordIndexAssortmentCluster.java b/source/de/anomic/plasma/plasmaWordIndexAssortmentCluster.java index 19c04811e..3e91621c7 100644 --- a/source/de/anomic/plasma/plasmaWordIndexAssortmentCluster.java +++ b/source/de/anomic/plasma/plasmaWordIndexAssortmentCluster.java @@ -51,13 +51,15 @@ import java.io.IOException; import java.util.HashSet; import java.util.Iterator; +import de.anomic.index.indexRI; +import de.anomic.index.indexAbstractRI; import de.anomic.kelondro.kelondroNaturalOrder; import de.anomic.kelondro.kelondroObjectCache; import de.anomic.kelondro.kelondroRecords; import de.anomic.kelondro.kelondroMergeIterator; import de.anomic.server.logging.serverLog; -public final class plasmaWordIndexAssortmentCluster { +public final class plasmaWordIndexAssortmentCluster extends indexAbstractRI implements indexRI { // class variables private int clusterCount; // number of cluster files @@ -95,37 +97,37 @@ public final class plasmaWordIndexAssortmentCluster { } } - private plasmaWordIndexEntryContainer storeSingular(String wordHash, plasmaWordIndexEntryContainer newContainer) { + private plasmaWordIndexEntryContainer storeSingular(plasmaWordIndexEntryContainer newContainer) { // this tries to store the record. If the record does not fit, or a same hash already // exists and would not fit together with the new record, then the record is deleted from // the assortmen(s) and returned together with the newRecord. // if storage was successful, NULL is returned. if (newContainer.size() > clusterCount) return newContainer; // it will not fit plasmaWordIndexEntryContainer buffer; - while ((buffer = assortments[newContainer.size() - 1].remove(wordHash)) != null) { + while ((buffer = assortments[newContainer.size() - 1].remove(newContainer.wordHash())) != null) { if (newContainer.add(buffer, -1) == 0) return newContainer; // security check; othervise this loop does not terminate if (newContainer.size() > clusterCount) return newContainer; // it will not fit } // the assortment (newContainer.size() - 1) should now be empty. put it in there - assortments[newContainer.size() - 1].store(wordHash, newContainer); + assortments[newContainer.size() - 1].store(newContainer); // return null to show that we have stored the new Record successfully return null; } - private void storeForced(String wordHash, plasmaWordIndexEntryContainer newContainer) { + private void storeForced(plasmaWordIndexEntryContainer newContainer) { // this stores the record and overwrites an existing record. // this is safe if we can be shure that the record does not exist before. if ((newContainer == null) || (newContainer.size() == 0) || (newContainer.size() > clusterCount)) return; // it will not fit - assortments[newContainer.size() - 1].store(wordHash, newContainer); + assortments[newContainer.size() - 1].store(newContainer); } - private void storeStretched(String wordHash, plasmaWordIndexEntryContainer newContainer) { + private void storeStretched(plasmaWordIndexEntryContainer newContainer) { // this stores the record and stretches the storage over // all the assortments that are necessary to fit in the record // IMPORTANT: it must be ensured that the wordHash does not exist in the cluster before // i.e. by calling removeFromAll if (newContainer.size() <= clusterCount) { - storeForced(wordHash, newContainer); + storeForced(newContainer); return; } @@ -144,20 +146,20 @@ public final class plasmaWordIndexAssortmentCluster { plasmaWordIndexEntryContainer c; Iterator i = newContainer.entries(); for (int j = clusterStart; j >= 1; j--) { - c = new plasmaWordIndexEntryContainer(wordHash); + c = new plasmaWordIndexEntryContainer(newContainer.wordHash()); for (int k = 0; k < j; k++) { if (i.hasNext()) { c.add((plasmaWordIndexEntryInstance) i.next(), newContainer.updated()); } else { - storeForced(wordHash, c); + storeForced(c); return; } } - storeForced(wordHash, c); + storeForced(c); } } - public plasmaWordIndexEntryContainer storeTry(String wordHash, plasmaWordIndexEntryContainer newContainer) { + public plasmaWordIndexEntryContainer addEntries(plasmaWordIndexEntryContainer newContainer, long creationTime, boolean dhtCase) { // this is called by the index ram cache flush process // it returnes NULL if the storage was successful // it returnes a new container if the given container cannot be stored @@ -174,7 +176,7 @@ public final class plasmaWordIndexAssortmentCluster { int selectedAssortment = testsize - 1; while (selectedAssortment >= 0) { if (selectedAssortment + 1 <= need) { - spaces[selectedAssortment] = (assortments[selectedAssortment].get(wordHash) == null) ? (selectedAssortment + 1) : 0; + spaces[selectedAssortment] = (assortments[selectedAssortment].get(newContainer.wordHash()) == null) ? (selectedAssortment + 1) : 0; need -= spaces[selectedAssortment]; assert (need >= 0); if (need == 0) break; @@ -187,27 +189,31 @@ public final class plasmaWordIndexAssortmentCluster { Iterator i = newContainer.entries(); for (int j = testsize - 1; j >= 0; j--) { if (spaces[j] == 0) continue; - c = new plasmaWordIndexEntryContainer(wordHash); + c = new plasmaWordIndexEntryContainer(newContainer.wordHash()); for (int k = 0; k <= j; k++) { assert (i.hasNext()); c.add((plasmaWordIndexEntryInstance) i.next(), newContainer.updated()); } - storeForced(wordHash, c); + storeForced(c); } return null; } - if (newContainer.size() <= clusterCount) newContainer = storeSingular(wordHash, newContainer); + if (newContainer.size() <= clusterCount) newContainer = storeSingular(newContainer); if (newContainer == null) return null; // clean up the whole thing and try to insert the container then - newContainer.add(removeFromAll(wordHash, -1), -1); + newContainer.add(deleteContainer(newContainer.wordHash(), -1), -1); if (newContainer.size() > clusterCapacity) return newContainer; - storeStretched(wordHash, newContainer); + storeStretched(newContainer); return null; } - public plasmaWordIndexEntryContainer removeFromAll(String wordHash, long maxTime) { + public plasmaWordIndexEntryContainer deleteContainer(String wordHash) { + return deleteContainer(wordHash, -1); + } + + public plasmaWordIndexEntryContainer deleteContainer(String wordHash, long maxTime) { // removes all records from all the assortments and return them plasmaWordIndexEntryContainer buffer, record = new plasmaWordIndexEntryContainer(wordHash); long limitTime = (maxTime < 0) ? Long.MAX_VALUE : System.currentTimeMillis() + maxTime; @@ -221,7 +227,17 @@ public final class plasmaWordIndexAssortmentCluster { return record; } - public plasmaWordIndexEntryContainer getFromAll(String wordHash, long maxTime) { + public int removeEntries(String wordHash, String[] referenceHashes, boolean deleteComplete) { + plasmaWordIndexEntryContainer c = deleteContainer(wordHash, -1); + int b = c.size(); + c.removeEntries(wordHash, referenceHashes, false); + if (c.size() != 0) { + addEntries(c, c.updated(), false); + } + return b - c.size(); + } + + public plasmaWordIndexEntryContainer getContainer(String wordHash, boolean deleteIfEmpty, long maxTime) { // collect all records from all the assortments and return them plasmaWordIndexEntryContainer buffer, record = new plasmaWordIndexEntryContainer(wordHash); long limitTime = (maxTime < 0) ? Long.MAX_VALUE : System.currentTimeMillis() + maxTime; @@ -244,14 +260,22 @@ public final class plasmaWordIndexAssortmentCluster { return size; } - public Iterator hashConjunction(String startWordHash, boolean up, boolean rot) throws IOException { + public Iterator wordHashes(String startWordHash, boolean rot) { + try { + return wordHashes(startWordHash, true, rot); + } catch (IOException e) { + return new HashSet().iterator(); + } + } + + public Iterator wordHashes(String startWordHash, boolean up, boolean rot) throws IOException { HashSet iterators = new HashSet(); //if (rot) System.out.println("WARNING: kelondroMergeIterator does not work correctly when individual iterators rotate on their own!"); for (int i = 0; i < clusterCount; i++) iterators.add(assortments[i].hashes(startWordHash, up, rot)); return kelondroMergeIterator.cascade(iterators, kelondroNaturalOrder.naturalOrder, up); } - public int sizeTotal() { + public int size() { int total = 0; for (int i = 0; i < clusterCount; i++) total += assortments[i].size(); return total; @@ -290,7 +314,7 @@ public final class plasmaWordIndexAssortmentCluster { return kelondroObjectCache.combinedStatus(a, a.length); } - public void close() { + public void close(int waitingSeconds) { for (int i = 0; i < clusterCount; i++) assortments[i].close(); } diff --git a/source/de/anomic/plasma/plasmaWordIndexCache.java b/source/de/anomic/plasma/plasmaWordIndexCache.java index f0d9de837..8a1284446 100644 --- a/source/de/anomic/plasma/plasmaWordIndexCache.java +++ b/source/de/anomic/plasma/plasmaWordIndexCache.java @@ -416,7 +416,7 @@ public final class plasmaWordIndexCache extends indexAbstractRI implements index return delCount; } - public int addEntries(plasmaWordIndexEntryContainer container, long updateTime, boolean dhtCase) { + public plasmaWordIndexEntryContainer addEntries(plasmaWordIndexEntryContainer container, long updateTime, boolean dhtCase) { // this puts the entries into the cache, not into the assortment directly int added = 0; @@ -440,10 +440,10 @@ public final class plasmaWordIndexCache extends indexAbstractRI implements index } entries = null; } - return added; + return null; } - public boolean addEntry(String wordHash, indexEntry newEntry, long updateTime, boolean dhtCase) { + public plasmaWordIndexEntryContainer addEntry(String wordHash, indexEntry newEntry, long updateTime, boolean dhtCase) { if (dhtCase) synchronized (kCache) { // put container into kCache plasmaWordIndexEntryContainer container = new plasmaWordIndexEntryContainer(wordHash); @@ -451,7 +451,7 @@ public final class plasmaWordIndexCache extends indexAbstractRI implements index kCache.put(new Long(updateTime + kCacheInc), container); kCacheInc++; if (kCacheInc > 10000) kCacheInc = 0; - return true; + return null; } else synchronized (wCache) { plasmaWordIndexEntryContainer container = (plasmaWordIndexEntryContainer) wCache.get(wordHash); if (container == null) container = new plasmaWordIndexEntryContainer(wordHash); @@ -460,11 +460,11 @@ public final class plasmaWordIndexCache extends indexAbstractRI implements index wCache.put(wordHash, container); hashScore.incScore(wordHash); hashDate.setScore(wordHash, intTime(updateTime)); - return true; + return null; } container = null; entries = null; - return false; + return null; } } diff --git a/source/de/anomic/plasma/plasmaWordIndexClassicDB.java b/source/de/anomic/plasma/plasmaWordIndexClassicDB.java index 9f67debf1..9b89b2b33 100644 --- a/source/de/anomic/plasma/plasmaWordIndexClassicDB.java +++ b/source/de/anomic/plasma/plasmaWordIndexClassicDB.java @@ -73,7 +73,7 @@ public class plasmaWordIndexClassicDB extends indexAbstractRI implements indexRI } public Iterator wordHashes(String startHash, boolean rot) { - return wordHashes(startHash, rot); + return wordHashes(startHash, true, rot); } public Iterator wordHashes(String startHash, boolean up, boolean rot) { @@ -240,23 +240,23 @@ public class plasmaWordIndexClassicDB extends indexAbstractRI implements indexRI } } - public int addEntries(plasmaWordIndexEntryContainer container, long creationTime, boolean highPriority) { + public plasmaWordIndexEntryContainer addEntries(plasmaWordIndexEntryContainer container, long creationTime, boolean highPriority) { //System.out.println("* adding " + newEntries.size() + " cached word index entries for word " + wordHash); // debug // fetch the index cache - if ((container == null) || (container.size() == 0)) return 0; + if ((container == null) || (container.size() == 0)) return null; // open file plasmaWordIndexEntity pi = null; try { pi = new plasmaWordIndexEntity(databaseRoot, container.wordHash(), false); - int count = pi.addEntries(container); + pi.addEntries(container); // close and return pi.close(); pi = null; - return count; + return null; } catch (IOException e) { log.logSevere("plasmaWordIndexClassic.addEntries: " + e.getMessage()); - return 0; + return container; } finally { if (pi != null) try{pi.close();}catch (Exception e){} } diff --git a/source/de/anomic/plasma/plasmaWordIndexEntryInstance.java b/source/de/anomic/plasma/plasmaWordIndexEntryInstance.java index 6dc52bc96..ddf5bbdc4 100644 --- a/source/de/anomic/plasma/plasmaWordIndexEntryInstance.java +++ b/source/de/anomic/plasma/plasmaWordIndexEntryInstance.java @@ -52,11 +52,11 @@ import java.util.Properties; import de.anomic.index.indexEntry; import de.anomic.index.indexEntryAttribute; -import de.anomic.index.indexEntryPrototype; +import de.anomic.index.indexAbstractEntry; import de.anomic.index.indexURL; import de.anomic.kelondro.kelondroBase64Order; -public final class plasmaWordIndexEntryInstance extends indexEntryPrototype implements Cloneable, indexEntry { +public final class plasmaWordIndexEntryInstance extends indexAbstractEntry implements Cloneable, indexEntry { // an wordEntry can be filled in either of two ways: // by the discrete values of the entry diff --git a/source/yacy.java b/source/yacy.java index 6b5af1e67..cf89188a4 100644 --- a/source/yacy.java +++ b/source/yacy.java @@ -1312,7 +1312,7 @@ public final class yacy { WordHashIterator = WordIndex.wordHashes(wordChunkStartHash, plasmaWordIndex.RL_WORDFILES, false); } else if (resource.equals("assortments")) { plasmaWordIndexAssortmentCluster assortmentCluster = new plasmaWordIndexAssortmentCluster(new File(homeDBroot, "ACLUSTER"), 64, 16*1024*1024, log); - WordHashIterator = assortmentCluster.hashConjunction(wordChunkStartHash, true, false); + WordHashIterator = assortmentCluster.wordHashes(wordChunkStartHash, true, false); } else if (resource.startsWith("assortment")) { int a = Integer.parseInt(resource.substring(10)); plasmaWordIndexAssortment assortment = new plasmaWordIndexAssortment(new File(homeDBroot, "ACLUSTER"), a, 8*1024*1024, null);