From 9a85f5abc392966fbde73535ebefe89c2ef38ffb Mon Sep 17 00:00:00 2001 From: orbiter Date: Wed, 6 Dec 2006 12:51:46 +0000 Subject: [PATCH] cleanup - removed 'deleteComplete' flag; this was used especially for WORDS indexes - shifted methods from plasmaSwitchboard to plasmaWordIndex git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@3051 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- htroot/DetailedSearch.java | 2 +- htroot/IndexControl_p.java | 8 +- htroot/htdocsdefault/dir.java | 2 +- htroot/xml/snippet.java | 2 +- htroot/yacysearch.java | 2 +- source/de/anomic/index/indexCachedRI.java | 42 ++++----- source/de/anomic/index/indexCollectionRI.java | 14 +-- source/de/anomic/index/indexContainer.java | 4 +- source/de/anomic/index/indexRAMRI.java | 16 ++-- source/de/anomic/index/indexRI.java | 6 +- .../kelondro/kelondroCollectionIndex.java | 4 +- source/de/anomic/plasma/plasmaDHTChunk.java | 13 ++- source/de/anomic/plasma/plasmaDHTFlush.java | 2 +- .../anomic/plasma/plasmaRankingCRProcess.java | 2 +- .../de/anomic/plasma/plasmaSearchEvent.java | 2 +- .../de/anomic/plasma/plasmaSwitchboard.java | 46 +--------- source/de/anomic/plasma/plasmaWordIndex.java | 90 +++++++++++-------- .../de/anomic/plasma/plasmaWordIndexFile.java | 6 +- .../plasma/plasmaWordIndexFileCluster.java | 14 +-- source/de/anomic/yacy/yacyPeerActions.java | 2 +- source/yacy.java | 2 +- 21 files changed, 125 insertions(+), 156 deletions(-) diff --git a/htroot/DetailedSearch.java b/htroot/DetailedSearch.java index a69917c78..c9d524e54 100644 --- a/htroot/DetailedSearch.java +++ b/htroot/DetailedSearch.java @@ -117,7 +117,7 @@ public class DetailedSearch { return prop; } final String delHash = post.get("deleteref", ""); - sb.removeReferences(delHash, query); + sb.wordIndex.removeReferences(query, delHash); } // prepare search order diff --git a/htroot/IndexControl_p.java b/htroot/IndexControl_p.java index 62758aafb..b8191d340 100644 --- a/htroot/IndexControl_p.java +++ b/htroot/IndexControl_p.java @@ -155,7 +155,7 @@ public class IndexControl_p { if (delurl || delurlref) { // generate an urlx array indexContainer index = null; - index = switchboard.wordIndex.getContainer(keyhash, null, true, -1); + index = switchboard.wordIndex.getContainer(keyhash, null, -1); Iterator en = index.entries(); int i = 0; urlx = new String[index.size()]; @@ -194,7 +194,7 @@ public class IndexControl_p { } Set urlHashes = new HashSet(); for (int i = 0; i < urlx.length; i++) urlHashes.add(urlx[i]); - switchboard.wordIndex.removeEntries(keyhash, urlHashes, true); + switchboard.wordIndex.removeEntries(keyhash, urlHashes); // this shall lead to a presentation of the list; so handle that the remaining program // thinks that it was called for a list presentation post.remove("keyhashdelete"); @@ -272,7 +272,7 @@ public class IndexControl_p { indexContainer index; String result; long starttime = System.currentTimeMillis(); - index = switchboard.wordIndex.getContainer(keyhash, null, true, -1); + index = switchboard.wordIndex.getContainer(keyhash, null, -1); // built urlCache Iterator urlIter = index.entries(); HashMap knownURLs = new HashMap(); @@ -451,7 +451,7 @@ public class IndexControl_p { serverObjects prop = new serverObjects(); indexContainer index = null; try { - index = switchboard.wordIndex.getContainer(keyhash, null, true, -1); + index = switchboard.wordIndex.getContainer(keyhash, null, -1); prop.put("genUrlList_keyHash", keyhash); diff --git a/htroot/htdocsdefault/dir.java b/htroot/htdocsdefault/dir.java index 5b6bfdab1..5fe394a42 100644 --- a/htroot/htdocsdefault/dir.java +++ b/htroot/htdocsdefault/dir.java @@ -399,7 +399,7 @@ public class dir { Map.Entry entry; while (words.hasNext()) { entry = (Map.Entry) words.next(); - switchboard.wordIndex.removeEntry(plasmaCondenser.word2hash((String) entry.getKey()), urlhash, true); + switchboard.wordIndex.removeEntry(plasmaCondenser.word2hash((String) entry.getKey()), urlhash); } switchboard.wordIndex.loadedURL.remove(urlhash); } catch (Exception e) { diff --git a/htroot/xml/snippet.java b/htroot/xml/snippet.java index 50449c702..075137495 100644 --- a/htroot/xml/snippet.java +++ b/htroot/xml/snippet.java @@ -53,7 +53,7 @@ public class snippet { } else { String error = snippet.getError(); if ((remove) && (error.equals("no matching snippet found"))) { - switchboard.removeReferences(plasmaURL.urlHash(url), query); + switchboard.wordIndex.removeReferences(query, plasmaURL.urlHash(url)); } prop.put("text", error); } diff --git a/htroot/yacysearch.java b/htroot/yacysearch.java index 33b30ff4a..327689068 100644 --- a/htroot/yacysearch.java +++ b/htroot/yacysearch.java @@ -184,7 +184,7 @@ public class yacysearch { // delete the index entry locally final String delHash = post.get("deleteref", ""); // urlhash - sb.removeReferences(delHash, query); + sb.wordIndex.removeReferences(query, delHash); // make new news message with negative voting HashMap map = new HashMap(); diff --git a/source/de/anomic/index/indexCachedRI.java b/source/de/anomic/index/indexCachedRI.java index b412be81c..3ecfd5bc7 100644 --- a/source/de/anomic/index/indexCachedRI.java +++ b/source/de/anomic/index/indexCachedRI.java @@ -80,7 +80,7 @@ public class indexCachedRI implements indexRI { } public long getUpdateTime(String wordHash) { - indexContainer entries = getContainer(wordHash, null, false, -1); + indexContainer entries = getContainer(wordHash, null, -1); if (entries == null) return 0; return entries.updated(); } @@ -139,25 +139,25 @@ public class indexCachedRI implements indexRI { busyCacheFlush = false; } - public indexContainer getContainer(String wordHash, Set urlselection, boolean deleteIfEmpty, long maxTime) { + public indexContainer getContainer(String wordHash, Set urlselection, long maxTime) { // get from cache - indexContainer container = riExtern.getContainer(wordHash, urlselection, true, maxTime); + indexContainer container = riExtern.getContainer(wordHash, urlselection, maxTime); if (container == null) { - container = riIntern.getContainer(wordHash, urlselection, true, maxTime); + container = riIntern.getContainer(wordHash, urlselection, maxTime); } else { - container.add(riIntern.getContainer(wordHash, urlselection, true, maxTime), maxTime); + container.add(riIntern.getContainer(wordHash, urlselection, maxTime), maxTime); } // get from collection index if (container == null) { - container = backend.getContainer(wordHash, urlselection, true, (maxTime < 0) ? -1 : maxTime); + container = backend.getContainer(wordHash, urlselection, (maxTime < 0) ? -1 : maxTime); } else { - container.add(backend.getContainer(wordHash, urlselection, true, (maxTime < 0) ? -1 : maxTime), maxTime); + container.add(backend.getContainer(wordHash, urlselection, (maxTime < 0) ? -1 : maxTime), maxTime); } return container; } - public Map getContainers(Set wordHashes, Set urlselection, boolean deleteIfEmpty, boolean interruptIfEmpty, long maxTime) { + public Map getContainers(Set wordHashes, Set urlselection, boolean interruptIfEmpty, long maxTime) { // return map of wordhash:indexContainer // retrieve entities that belong to the hashes @@ -177,7 +177,7 @@ public class indexCachedRI implements indexRI { singleHash = (String) i.next(); // retrieve index - singleContainer = getContainer(singleHash, urlselection, deleteIfEmpty, (maxTime < 0) ? -1 : remaining / (wordHashes.size() - containers.size())); + singleContainer = getContainer(singleHash, urlselection, (maxTime < 0) ? -1 : remaining / (wordHashes.size() - containers.size())); // check result if (((singleContainer == null) || (singleContainer.size() == 0)) && (interruptIfEmpty)) return new HashMap(); @@ -213,27 +213,27 @@ public class indexCachedRI implements indexRI { return c; } - public boolean removeEntry(String wordHash, String urlHash, boolean deleteComplete) { + public boolean removeEntry(String wordHash, String urlHash) { boolean removed = false; - removed = removed | (riIntern.removeEntry(wordHash, urlHash, deleteComplete)); - removed = removed | (riExtern.removeEntry(wordHash, urlHash, deleteComplete)); - removed = removed | (backend.removeEntry(wordHash, urlHash, deleteComplete)); + removed = removed | (riIntern.removeEntry(wordHash, urlHash)); + removed = removed | (riExtern.removeEntry(wordHash, urlHash)); + removed = removed | (backend.removeEntry(wordHash, urlHash)); return removed; } - public int removeEntries(String wordHash, Set urlHashes, boolean deleteComplete) { + public int removeEntries(String wordHash, Set urlHashes) { int removed = 0; - removed += riIntern.removeEntries(wordHash, urlHashes, deleteComplete); - removed += riExtern.removeEntries(wordHash, urlHashes, deleteComplete); - removed += backend.removeEntries(wordHash, urlHashes, deleteComplete); + removed += riIntern.removeEntries(wordHash, urlHashes); + removed += riExtern.removeEntries(wordHash, urlHashes); + removed += backend.removeEntries(wordHash, urlHashes); return removed; } - public String removeEntriesExpl(String wordHash, Set urlHashes, boolean deleteComplete) { + public String removeEntriesExpl(String wordHash, Set urlHashes) { String removed = ""; - removed += riIntern.removeEntries(wordHash, urlHashes, deleteComplete) + ", "; - removed += riExtern.removeEntries(wordHash, urlHashes, deleteComplete) + ", "; - removed += backend.removeEntries(wordHash, urlHashes, deleteComplete) + ", "; + removed += riIntern.removeEntries(wordHash, urlHashes) + ", "; + removed += riExtern.removeEntries(wordHash, urlHashes) + ", "; + removed += backend.removeEntries(wordHash, urlHashes) + ", "; return removed; } diff --git a/source/de/anomic/index/indexCollectionRI.java b/source/de/anomic/index/indexCollectionRI.java index 116139a59..ad140f00b 100644 --- a/source/de/anomic/index/indexCollectionRI.java +++ b/source/de/anomic/index/indexCollectionRI.java @@ -61,7 +61,7 @@ public class indexCollectionRI implements indexRI { } public long getUpdateTime(String wordHash) { - indexContainer entries = getContainer(wordHash, null, false, -1); + indexContainer entries = getContainer(wordHash, null, -1); if (entries == null) return 0; return entries.updated(); } @@ -113,9 +113,9 @@ public class indexCollectionRI implements indexRI { } - public synchronized indexContainer getContainer(String wordHash, Set urlselection, boolean deleteIfEmpty, long maxtime) { + public synchronized indexContainer getContainer(String wordHash, Set urlselection, long maxtime) { try { - kelondroRowSet collection = collectionIndex.get(wordHash.getBytes(), deleteIfEmpty); + kelondroRowSet collection = collectionIndex.get(wordHash.getBytes()); if (collection != null) collection.select(urlselection); if ((collection == null) || (collection.size() == 0)) return null; return new indexContainer(wordHash, collection); @@ -134,15 +134,15 @@ public class indexCollectionRI implements indexRI { } } - public synchronized boolean removeEntry(String wordHash, String urlHash, boolean deleteComplete) { + public synchronized boolean removeEntry(String wordHash, String urlHash) { HashSet hs = new HashSet(); hs.add(urlHash.getBytes()); - return removeEntries(wordHash, hs, deleteComplete) == 1; + return removeEntries(wordHash, hs) == 1; } - public synchronized int removeEntries(String wordHash, Set urlHashes, boolean deleteComplete) { + public synchronized int removeEntries(String wordHash, Set urlHashes) { try { - return collectionIndex.remove(wordHash.getBytes(), urlHashes, deleteComplete); + return collectionIndex.remove(wordHash.getBytes(), urlHashes); } catch (kelondroOutOfLimitsException e) { e.printStackTrace(); return 0; diff --git a/source/de/anomic/index/indexContainer.java b/source/de/anomic/index/indexContainer.java index 499ba36b0..172fadf41 100644 --- a/source/de/anomic/index/indexContainer.java +++ b/source/de/anomic/index/indexContainer.java @@ -145,12 +145,12 @@ public class indexContainer extends kelondroRowSet { return new indexRWIEntryNew(entry); } - public boolean removeEntry(String wordHash, String urlHash, boolean deleteComplete) { + public boolean removeEntry(String wordHash, String urlHash) { if (!wordHash.equals(this.wordHash)) return false; return remove(urlHash) != null; } - public int removeEntries(String wordHash, Set urlHashes, boolean deleteComplete) { + public int removeEntries(String wordHash, Set urlHashes) { if (!wordHash.equals(this.wordHash)) return 0; int count = 0; Iterator i = urlHashes.iterator(); diff --git a/source/de/anomic/index/indexRAMRI.java b/source/de/anomic/index/indexRAMRI.java index 70c7c323e..3b21fe910 100644 --- a/source/de/anomic/index/indexRAMRI.java +++ b/source/de/anomic/index/indexRAMRI.java @@ -98,7 +98,7 @@ public final class indexRAMRI implements indexRI { public synchronized long getUpdateTime(String wordHash) { - indexContainer entries = getContainer(wordHash, null, false, -1); + indexContainer entries = getContainer(wordHash, null, -1); if (entries == null) return 0; return entries.updated(); } @@ -334,7 +334,7 @@ public final class indexRAMRI implements indexRI { return (((long) intTime) * (long) 1000) + initTime; } - public synchronized indexContainer getContainer(String wordHash, Set urlselection, boolean deleteIfEmpty, long maxtime_dummy) { + public synchronized indexContainer getContainer(String wordHash, Set urlselection, long maxtime_dummy) { // retrieve container indexContainer container = (indexContainer) cache.get(wordHash); @@ -359,11 +359,11 @@ public final class indexRAMRI implements indexRI { return container; } - public synchronized boolean removeEntry(String wordHash, String urlHash, boolean deleteComplete) { + public synchronized boolean removeEntry(String wordHash, String urlHash) { indexContainer c = (indexContainer) cache.get(wordHash); - if ((c != null) && (c.removeEntry(wordHash, urlHash, deleteComplete))) { + if ((c != null) && (c.removeEntry(wordHash, urlHash))) { // removal successful - if ((c.size() == 0) && (deleteComplete)) { + if (c.size() == 0) { deleteContainer(wordHash); } else { cache.put(wordHash, c); @@ -375,13 +375,13 @@ public final class indexRAMRI implements indexRI { return false; } - public synchronized int removeEntries(String wordHash, Set urlHashes, boolean deleteComplete) { + public synchronized int removeEntries(String wordHash, Set urlHashes) { if (urlHashes.size() == 0) return 0; indexContainer c = (indexContainer) cache.get(wordHash); int count; - if ((c != null) && ((count = c.removeEntries(wordHash, urlHashes, deleteComplete)) > 0)) { + if ((c != null) && ((count = c.removeEntries(wordHash, urlHashes)) > 0)) { // removal successful - if ((c.size() == 0) && (deleteComplete)) { + if (c.size() == 0) { deleteContainer(wordHash); } else { cache.put(wordHash, c); diff --git a/source/de/anomic/index/indexRI.java b/source/de/anomic/index/indexRI.java index 4313dbe9f..733f852ba 100644 --- a/source/de/anomic/index/indexRI.java +++ b/source/de/anomic/index/indexRI.java @@ -39,11 +39,11 @@ public interface indexRI { public long getUpdateTime(String wordHash); public int indexSize(String wordHash); - public indexContainer getContainer(String wordHash, Set urlselection, boolean deleteIfEmpty, long maxtime); + public indexContainer getContainer(String wordHash, Set urlselection, long maxtime); public indexContainer deleteContainer(String wordHash); - public boolean removeEntry(String wordHash, String urlHash, boolean deleteComplete); - public int removeEntries(String wordHash, Set urlHashes, boolean deleteComplete); + public boolean removeEntry(String wordHash, String urlHash); + public int removeEntries(String wordHash, Set urlHashes); public void addEntry(String wordHash, indexRWIEntry entry, long updateTime, boolean dhtCase); public void addEntries(indexContainer newEntries, long creationTime, boolean dhtCase); diff --git a/source/de/anomic/kelondro/kelondroCollectionIndex.java b/source/de/anomic/kelondro/kelondroCollectionIndex.java index 3b7671021..e0de0dcf8 100644 --- a/source/de/anomic/kelondro/kelondroCollectionIndex.java +++ b/source/de/anomic/kelondro/kelondroCollectionIndex.java @@ -262,7 +262,7 @@ public class kelondroCollectionIndex { putmergeremove(key, collection, true, null); } - public synchronized int remove(byte[] key, Set removekeys, boolean deletecomplete) throws IOException, kelondroOutOfLimitsException { + public synchronized int remove(byte[] key, Set removekeys) throws IOException, kelondroOutOfLimitsException { return putmergeremove(key, null, false, removekeys); } @@ -406,7 +406,7 @@ public class kelondroCollectionIndex { return (int) indexrow.getColLong(idx_col_chunkcount); } - public synchronized kelondroRowSet get(byte[] key, boolean deleteIfEmpty) throws IOException { + public synchronized kelondroRowSet get(byte[] key) throws IOException { // find an entry, if one exists kelondroRow.Entry indexrow = index.get(key); if (indexrow == null) return null; diff --git a/source/de/anomic/plasma/plasmaDHTChunk.java b/source/de/anomic/plasma/plasmaDHTChunk.java index 5b9741ce2..7cba88ef9 100644 --- a/source/de/anomic/plasma/plasmaDHTChunk.java +++ b/source/de/anomic/plasma/plasmaDHTChunk.java @@ -71,7 +71,6 @@ public class plasmaDHTChunk { private plasmaWordIndex wordIndex; private serverLog log; - private plasmaCrawlLURL lurls; private int status = chunkStatus_UNDEFINED; private String startPointHash; @@ -124,11 +123,10 @@ public class plasmaDHTChunk { return this.status; } - public plasmaDHTChunk(serverLog log, plasmaWordIndex wordIndex, plasmaCrawlLURL lurls, int minCount, int maxCount, int maxtime) { + public plasmaDHTChunk(serverLog log, plasmaWordIndex wordIndex, int minCount, int maxCount, int maxtime) { try { this.log = log; this.wordIndex = wordIndex; - this.lurls = lurls; this.startPointHash = selectTransferStart(); log.logFine("Selected hash " + this.startPointHash + " as start point for index distribution, distance = " + yacyDHTAction.dhtDistance(yacyCore.seedDB.mySeed.hash, this.startPointHash)); selectTransferContainers(this.startPointHash, minCount, maxCount, maxtime); @@ -144,11 +142,10 @@ public class plasmaDHTChunk { } } - public plasmaDHTChunk(serverLog log, plasmaWordIndex wordIndex, plasmaCrawlLURL lurls, int minCount, int maxCount, int maxtime, String startHash) { + public plasmaDHTChunk(serverLog log, plasmaWordIndex wordIndex, int minCount, int maxCount, int maxtime, String startHash) { try { this.log = log; this.wordIndex = wordIndex; - this.lurls = lurls; log.logFine("Demanded hash " + startHash + " as start point for index distribution, distance = " + yacyDHTAction.dhtDistance(yacyCore.seedDB.mySeed.hash, this.startPointHash)); selectTransferContainers(startHash, minCount, maxCount, maxtime); @@ -233,12 +230,12 @@ public class plasmaDHTChunk { urlIter.remove(); continue; } - lurl = lurls.load(iEntry.urlHash(), iEntry); + lurl = wordIndex.loadedURL.load(iEntry.urlHash(), iEntry); if ((lurl == null) || (lurl.comp().url() == null)) { //yacyCore.log.logFine("DEBUG selectTransferContainersResource: not-bound url hash '" + iEntry.urlHash() + "' for word hash " + container.getWordHash()); notBoundCounter++; urlIter.remove(); - wordIndex.removeEntry(container.getWordHash(), iEntry.urlHash(), true); + wordIndex.removeEntry(container.getWordHash(), iEntry.urlHash()); } else { urlCache.put(iEntry.urlHash(), lurl); //yacyCore.log.logFine("DEBUG selectTransferContainersResource: added url hash '" + iEntry.urlHash() + "' to urlCache for word hash " + container.getWordHash()); @@ -302,7 +299,7 @@ public class plasmaDHTChunk { urlHashes.add(iEntry.urlHash()); } String wordHash = indexContainers[i].getWordHash(); - count = wordIndex.removeEntriesExpl(this.indexContainers[i].getWordHash(), urlHashes, true); + count = wordIndex.removeEntriesExpl(this.indexContainers[i].getWordHash(), urlHashes); if (log.isFine()) log.logFine("Deleted partial index (" + c + " URLs) for word " + wordHash + "; " + this.wordIndex.indexSize(wordHash) + " entries left"); this.indexContainers[i] = null; diff --git a/source/de/anomic/plasma/plasmaDHTFlush.java b/source/de/anomic/plasma/plasmaDHTFlush.java index f8ad29a23..5eacb025f 100644 --- a/source/de/anomic/plasma/plasmaDHTFlush.java +++ b/source/de/anomic/plasma/plasmaDHTFlush.java @@ -169,7 +169,7 @@ public class plasmaDHTFlush extends Thread { // selecting 500 words to transfer this.status = "Running: Selecting chunk " + iteration; - newDHTChunk = new plasmaDHTChunk(this.log, this.wordIndex, this.sb.wordIndex.loadedURL, this.chunkSize/3*2, this.chunkSize, -1, this.startPointHash); + newDHTChunk = new plasmaDHTChunk(this.log, this.wordIndex, this.chunkSize/3*2, this.chunkSize, -1, this.startPointHash); /* If we havn't selected a word chunk this could be because of * a) no words are left in the index diff --git a/source/de/anomic/plasma/plasmaRankingCRProcess.java b/source/de/anomic/plasma/plasmaRankingCRProcess.java index 6797033ea..9d1b5e9f0 100644 --- a/source/de/anomic/plasma/plasmaRankingCRProcess.java +++ b/source/de/anomic/plasma/plasmaRankingCRProcess.java @@ -421,7 +421,7 @@ public class plasmaRankingCRProcess { if (anchor.length() == 6) anchorDom = anchor; else anchorDom = anchor.substring(6); // update domain-specific entry - rci_entry = rci.get(anchorDom.getBytes(), false); + rci_entry = rci.get(anchorDom.getBytes()); if (rci_entry == null) rci_entry = new kelondroRowSet(RCI_coli, 0); rci_entry.add(refereeDom.getBytes()); diff --git a/source/de/anomic/plasma/plasmaSearchEvent.java b/source/de/anomic/plasma/plasmaSearchEvent.java index eea50b4ef..19b3ff0e7 100644 --- a/source/de/anomic/plasma/plasmaSearchEvent.java +++ b/source/de/anomic/plasma/plasmaSearchEvent.java @@ -401,7 +401,7 @@ public final class plasmaSearchEvent extends Thread implements Runnable { log.logFine("filtered out " + page.comp().url().toString()); // filter out bad results Iterator wi = query.queryHashes.iterator(); - while (wi.hasNext()) wordIndex.removeEntry((String) wi.next(), page.hash(), true); + while (wi.hasNext()) wordIndex.removeEntry((String) wi.next(), page.hash()); } else if (query.contentdom != plasmaSearchQuery.CONTENTDOM_TEXT) { if ((query.contentdom == plasmaSearchQuery.CONTENTDOM_AUDIO) && (page.laudio() > 0)) acc.addResult(page, preranking); else if ((query.contentdom == plasmaSearchQuery.CONTENTDOM_VIDEO) && (page.lvideo() > 0)) acc.addResult(page, preranking); diff --git a/source/de/anomic/plasma/plasmaSwitchboard.java b/source/de/anomic/plasma/plasmaSwitchboard.java index 6964213fc..8deab46c9 100644 --- a/source/de/anomic/plasma/plasmaSwitchboard.java +++ b/source/de/anomic/plasma/plasmaSwitchboard.java @@ -119,7 +119,6 @@ import java.util.HashMap; import java.util.Hashtable; import java.util.Iterator; import java.util.Map; -import java.util.Set; import java.util.TreeSet; import de.anomic.data.blogBoard; @@ -1003,10 +1002,6 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser //return processStack.size() + cacheLoader.size() + noticeURL.stackSize(); } - public int cacheSizeMin() { - return wordIndex.size(); - } - public void enQueue(Object job) { if (!(job instanceof plasmaSwitchboardQueue.Entry)) { System.out.println("internal error at plasmaSwitchboard.enQueue: wrong job type"); @@ -1063,7 +1058,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser ) { // generate new chunk int minChunkSize = (int) getConfigLong("indexDistribution.minChunkSize", 30); - dhtTransferChunk = new plasmaDHTChunk(this.log, wordIndex, wordIndex.loadedURL, minChunkSize, dhtTransferIndexCount, 5000); + dhtTransferChunk = new plasmaDHTChunk(this.log, wordIndex, minChunkSize, dhtTransferIndexCount, 5000); doneSomething = true; } @@ -2115,7 +2110,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser filename = comp.url().getFile(); if ((seed == null) || ((address = seed.getAddress()) == null)) { // seed is not known from here - removeReferences(urlentry.hash(), plasmaCondenser.getWords(("yacyshare " + filename.replace('?', ' ') + " " + comp.descr()).getBytes(), "UTF-8")); + wordIndex.removeReferences(plasmaCondenser.getWords(("yacyshare " + filename.replace('?', ' ') + " " + comp.descr()).getBytes(), "UTF-8"), urlentry.hash()); wordIndex.loadedURL.remove(urlentry.hash()); // clean up continue; // next result } @@ -2264,7 +2259,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser // delete all word references int count = 0; - if (witer != null) count = removeReferences(urlhash, witer); + if (witer != null) count = wordIndex.removeReferences(witer, urlhash); // finally delete the url entry itself wordIndex.loadedURL.remove(urlhash); @@ -2275,39 +2270,6 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser if (resourceContent != null) try { resourceContent.close(); } catch (Exception e) {/* ignore this */} } } - - public int removeReferences(URL url, Set words) { - return removeReferences(plasmaURL.urlHash(url), words); - } - - public int removeReferences(final String urlhash, final Set words) { - // sequentially delete all word references - // returns number of deletions - Iterator iter = words.iterator(); - String word; - int count = 0; - while (iter.hasNext()) { - word = (String) iter.next(); - // delete the URL reference in this word index - if (wordIndex.removeEntry(plasmaCondenser.word2hash(word), urlhash, true)) count++; - } - return count; - } - - public int removeReferences(final String urlhash, final Iterator wordStatPropIterator) { - // sequentially delete all word references - // returns number of deletions - Map.Entry entry; - String word; - int count = 0; - while (wordStatPropIterator.hasNext()) { - entry = (Map.Entry) wordStatPropIterator.next(); - word = (String) entry.getKey(); - // delete the URL reference in this word index - if (wordIndex.removeEntry(plasmaCondenser.word2hash(word), urlhash, true)) count++; - } - return count; - } public int adminAuthenticated(httpHeader header) { @@ -2402,7 +2364,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser return "no DHT distribution: not enough words - wordIndex.size() = " + wordIndex.size(); } if ((getConfig("allowDistributeIndexWhileCrawling","false").equalsIgnoreCase("false")) && - ((noticeURL.stackSize() > 0) || (sbQueue.size() > 3))) { + ((noticeURL.stackSize() > 0) /*|| (sbQueue.size() > 3)*/)) { return "no DHT distribution: crawl in progress: noticeURL.stackSize() = " + noticeURL.stackSize() + ", sbQueue.size() = " + sbQueue.size(); } return null; diff --git a/source/de/anomic/plasma/plasmaWordIndex.java b/source/de/anomic/plasma/plasmaWordIndex.java index f49aef5e5..97e15ea35 100644 --- a/source/de/anomic/plasma/plasmaWordIndex.java +++ b/source/de/anomic/plasma/plasmaWordIndex.java @@ -139,7 +139,7 @@ public final class plasmaWordIndex implements indexRI { } public long getUpdateTime(String wordHash) { - indexContainer entries = getContainer(wordHash, null, false, -1); + indexContainer entries = getContainer(wordHash, null, -1); if (entries == null) return 0; return entries.updated(); } @@ -163,21 +163,6 @@ public final class plasmaWordIndex implements indexRI { } } - /* - private indexContainer convertOld2New(indexContainer entries) { - // convert old entries to new entries - indexContainer newentries = new indexContainer(entries.getWordHash(), indexRWIEntryNew.urlEntryRow); - Iterator i = entries.entries(); - indexRWIEntryOld old; - while (i.hasNext()) { - old = (indexRWIEntryOld) i.next(); - if (old.urlHash() != null) { - newentries.add(new indexRWIEntryNew(old)); - } - } - return newentries; - } - */ public void addEntries(indexContainer entries, long updateTime, boolean dhtInCase) { assert (entries.row().objectsize() == indexRWIEntryNew.urlEntryRow.objectsize()); @@ -306,21 +291,21 @@ public final class plasmaWordIndex implements indexRI { return condenser.RESULT_SIMI_WORDS; } - public indexContainer getContainer(String wordHash, Set urlselection, boolean deleteIfEmpty, long maxTime) { + public indexContainer getContainer(String wordHash, Set urlselection, long maxTime) { // get from cache - indexContainer container = dhtOutCache.getContainer(wordHash, urlselection, true, -1); + indexContainer container = dhtOutCache.getContainer(wordHash, urlselection, -1); if (container == null) { - container = dhtInCache.getContainer(wordHash, urlselection, true, -1); + container = dhtInCache.getContainer(wordHash, urlselection, -1); } else { - container.add(dhtInCache.getContainer(wordHash, urlselection, true, -1), -1); + container.add(dhtInCache.getContainer(wordHash, urlselection, -1), -1); } // get from collection index if (container == null) { - container = collections.getContainer(wordHash, urlselection, true, (maxTime < 0) ? -1 : maxTime); + container = collections.getContainer(wordHash, urlselection, (maxTime < 0) ? -1 : maxTime); } else { - container.add(collections.getContainer(wordHash, urlselection, true, (maxTime < 0) ? -1 : maxTime), -1); + container.add(collections.getContainer(wordHash, urlselection, (maxTime < 0) ? -1 : maxTime), -1); } return container; } @@ -345,7 +330,7 @@ public final class plasmaWordIndex implements indexRI { singleHash = (String) i.next(); // retrieve index - singleContainer = getContainer(singleHash, urlselection, deleteIfEmpty, (maxTime < 0) ? -1 : remaining / (wordHashes.size() - containers.size())); + singleContainer = getContainer(singleHash, urlselection, (maxTime < 0) ? -1 : remaining / (wordHashes.size() - containers.size())); // check result if (((singleContainer == null) || (singleContainer.size() == 0)) && (interruptIfEmpty)) return new HashMap(); @@ -383,39 +368,66 @@ public final class plasmaWordIndex implements indexRI { return c; } - public boolean removeEntry(String wordHash, String urlHash, boolean deleteComplete) { + public boolean removeEntry(String wordHash, String urlHash) { boolean removed = false; - removed = removed | (dhtInCache.removeEntry(wordHash, urlHash, deleteComplete)); - removed = removed | (dhtOutCache.removeEntry(wordHash, urlHash, deleteComplete)); - removed = removed | (collections.removeEntry(wordHash, urlHash, deleteComplete)); + removed = removed | (dhtInCache.removeEntry(wordHash, urlHash)); + removed = removed | (dhtOutCache.removeEntry(wordHash, urlHash)); + removed = removed | (collections.removeEntry(wordHash, urlHash)); return removed; } - public int removeEntries(String wordHash, Set urlHashes, boolean deleteComplete) { + public int removeEntries(String wordHash, Set urlHashes) { int removed = 0; - removed += dhtInCache.removeEntries(wordHash, urlHashes, deleteComplete); - removed += dhtOutCache.removeEntries(wordHash, urlHashes, deleteComplete); - removed += collections.removeEntries(wordHash, urlHashes, deleteComplete); + removed += dhtInCache.removeEntries(wordHash, urlHashes); + removed += dhtOutCache.removeEntries(wordHash, urlHashes); + removed += collections.removeEntries(wordHash, urlHashes); return removed; } - public String removeEntriesExpl(String wordHash, Set urlHashes, boolean deleteComplete) { + public String removeEntriesExpl(String wordHash, Set urlHashes) { String removed = ""; - removed += dhtInCache.removeEntries(wordHash, urlHashes, deleteComplete) + ", "; - removed += dhtOutCache.removeEntries(wordHash, urlHashes, deleteComplete) + ", "; - removed += collections.removeEntries(wordHash, urlHashes, deleteComplete); + removed += dhtInCache.removeEntries(wordHash, urlHashes) + ", "; + removed += dhtOutCache.removeEntries(wordHash, urlHashes) + ", "; + removed += collections.removeEntries(wordHash, urlHashes); return removed; } - public static final int RL_RAMCACHE = 0; - public static final int RL_COLLECTIONS = 1; + public int removeReferences(Set words, String urlhash) { + // sequentially delete all word references + // returns number of deletions + Iterator iter = words.iterator(); + String word; + int count = 0; + while (iter.hasNext()) { + word = (String) iter.next(); + // delete the URL reference in this word index + if (removeEntry(plasmaCondenser.word2hash(word), urlhash)) count++; + } + return count; + } + + public int removeReferences(Iterator wordStatPropIterator, String urlhash) { + // sequentially delete all word references + // returns number of deletions + Map.Entry entry; + String word; + int count = 0; + while (wordStatPropIterator.hasNext()) { + entry = (Map.Entry) wordStatPropIterator.next(); + word = (String) entry.getKey(); + // delete the URL reference in this word index + if (removeEntry(plasmaCondenser.word2hash(word), urlhash)) count++; + } + return count; + } public int tryRemoveURLs(String urlHash) { // this tries to delete an index from the cache that has this // urlHash assigned. This can only work if the entry is really fresh // and can be found in the RAM cache // this returns the number of deletion that had been possible - return dhtInCache.tryRemoveURLs(urlHash) | dhtOutCache.tryRemoveURLs(urlHash); + int d = dhtInCache.tryRemoveURLs(urlHash); + if (d > 0) return d; else return dhtOutCache.tryRemoveURLs(urlHash); } public TreeSet indexContainerSet(String startHash, boolean ram, boolean rot, int count) { @@ -540,7 +552,7 @@ public final class plasmaWordIndex implements indexRI { } } if (urlHashs.size() > 0) { - int removed = removeEntries(container.getWordHash(), urlHashs, true); + int removed = removeEntries(container.getWordHash(), urlHashs); serverLog.logFine("INDEXCLEANER", container.getWordHash() + ": " + removed + " of " + container.size() + " URL-entries deleted"); lastWordHash = container.getWordHash(); lastDeletionCounter = urlHashs.size(); diff --git a/source/de/anomic/plasma/plasmaWordIndexFile.java b/source/de/anomic/plasma/plasmaWordIndexFile.java index 2c3c30836..813452bf9 100644 --- a/source/de/anomic/plasma/plasmaWordIndexFile.java +++ b/source/de/anomic/plasma/plasmaWordIndexFile.java @@ -64,12 +64,10 @@ public final class plasmaWordIndexFile { private final String theWordHash; private kelondroTree theIndex; private File theLocation; - private boolean delete; - public plasmaWordIndexFile(File databaseRoot, String wordHash, boolean deleteIfEmpty) { + public plasmaWordIndexFile(File databaseRoot, String wordHash) { theWordHash = wordHash; theIndex = indexFile(databaseRoot, wordHash); - delete = deleteIfEmpty; } public static boolean removePlasmaIndex(File databaseRoot, String wordHash) { @@ -112,7 +110,7 @@ public final class plasmaWordIndexFile { public int size() { if (theIndex == null) return 0; int size = theIndex.size(); - if ((size == 0) && (delete)) { + if (size == 0) { deleteComplete(); return 0; } else { diff --git a/source/de/anomic/plasma/plasmaWordIndexFileCluster.java b/source/de/anomic/plasma/plasmaWordIndexFileCluster.java index c49d6e035..c2b7aca0a 100644 --- a/source/de/anomic/plasma/plasmaWordIndexFileCluster.java +++ b/source/de/anomic/plasma/plasmaWordIndexFileCluster.java @@ -95,7 +95,7 @@ public class plasmaWordIndexFileCluster implements indexRI { } public Object next() { - return getContainer((String) wordIterator.next(), null, true, 100); + return getContainer((String) wordIterator.next(), null, 100); } public void remove() { @@ -224,11 +224,11 @@ public class plasmaWordIndexFileCluster implements indexRI { return plasmaWordIndexFile.wordHash2path(databaseRoot, wordHash).exists(); } - public synchronized indexContainer getContainer(String wordHash, Set urlselection, boolean deleteIfEmpty, long maxTime) { + public synchronized indexContainer getContainer(String wordHash, Set urlselection, long maxTime) { long start = System.currentTimeMillis(); if ((maxTime < 0) || (maxTime > 60000)) maxTime=60000; // maximum is one minute if (exists(wordHash)) { - plasmaWordIndexFile entity = this.getEntity(wordHash, deleteIfEmpty, (maxTime < 0) ? -1 : maxTime * 9 / 10); + plasmaWordIndexFile entity = this.getEntity(wordHash, (maxTime < 0) ? -1 : maxTime * 9 / 10); indexContainer container = new indexContainer(wordHash, indexRWIEntryNew.urlEntryRow); indexRWIEntryNew entry; Iterator i = entity.elements(true); @@ -242,8 +242,8 @@ public class plasmaWordIndexFileCluster implements indexRI { } } - public plasmaWordIndexFile getEntity(String wordHash, boolean deleteIfEmpty, long maxTime) { - return new plasmaWordIndexFile(databaseRoot, wordHash, deleteIfEmpty); + public plasmaWordIndexFile getEntity(String wordHash, long maxTime) { + return new plasmaWordIndexFile(databaseRoot, wordHash); } public long getUpdateTime(String wordHash) { @@ -256,11 +256,11 @@ public class plasmaWordIndexFileCluster implements indexRI { return null; } - public boolean removeEntry(String wordHash, String urlHash, boolean deleteComplete) { + public boolean removeEntry(String wordHash, String urlHash) { throw new UnsupportedOperationException("word files are not supported in YaCy 0.491 and above"); } - public int removeEntries(String wordHash, Set urlHashes, boolean deleteComplete) { + public int removeEntries(String wordHash, Set urlHashes) { throw new UnsupportedOperationException("word files are not supported in YaCy 0.491 and above"); } diff --git a/source/de/anomic/yacy/yacyPeerActions.java b/source/de/anomic/yacy/yacyPeerActions.java index 7644ffba8..b6f42eae2 100644 --- a/source/de/anomic/yacy/yacyPeerActions.java +++ b/source/de/anomic/yacy/yacyPeerActions.java @@ -136,7 +136,7 @@ public class yacyPeerActions { seedDB.mySeed.put(yacySeed.UPTIME, Long.toString(uptime/60)); // the number of minutes that the peer is up in minutes/day (moving average MA30) seedDB.mySeed.put(yacySeed.LCOUNT, Integer.toString(sb.wordIndex.loadedURL.size())); // the number of links that the peer has stored (LURL's) seedDB.mySeed.put(yacySeed.NCOUNT, Integer.toString(sb.noticeURL.stackSize())); // the number of links that the peer has noticed, but not loaded (NURL's) - seedDB.mySeed.put(yacySeed.ICOUNT, Integer.toString(sb.cacheSizeMin())); // the minimum number of words that the peer has indexed (as it says) + seedDB.mySeed.put(yacySeed.ICOUNT, Integer.toString(sb.wordIndex.size())); // the minimum number of words that the peer has indexed (as it says) seedDB.mySeed.put(yacySeed.SCOUNT, Integer.toString(seedDB.sizeConnected())); // the number of seeds that the peer has stored seedDB.mySeed.put(yacySeed.CCOUNT, Double.toString(((int) ((seedDB.sizeConnected() + seedDB.sizeDisconnected() + seedDB.sizePotential()) * 60.0 / (uptime + 1.01)) * 100) / 100.0)); // the number of clients that the peer connects (as connects/hour) seedDB.mySeed.put(yacySeed.VERSION, sb.getConfig("version", "")); diff --git a/source/yacy.java b/source/yacy.java index fa718369b..48bc0eb6a 100644 --- a/source/yacy.java +++ b/source/yacy.java @@ -697,7 +697,7 @@ public final class yacy { } plasmaWordIndexFile entity = null; try { - entity = new plasmaWordIndexFile(oldDatabaseRoot, wordhash, true); + entity = new plasmaWordIndexFile(oldDatabaseRoot, wordhash); int size = entity.size(); indexContainer container = new indexContainer(wordhash, indexRWIEntryNew.urlEntryRow);