- removed 'deleteComplete' flag; this was used especially for WORDS indexes
- shifted methods from plasmaSwitchboard to plasmaWordIndex

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@3051 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 19 years ago
parent fbe1ee402b
commit 9a85f5abc3

@ -117,7 +117,7 @@ public class DetailedSearch {
return prop;
}
final String delHash = post.get("deleteref", "");
sb.removeReferences(delHash, query);
sb.wordIndex.removeReferences(query, delHash);
}
// prepare search order

@ -155,7 +155,7 @@ public class IndexControl_p {
if (delurl || delurlref) {
// generate an urlx array
indexContainer index = null;
index = switchboard.wordIndex.getContainer(keyhash, null, true, -1);
index = switchboard.wordIndex.getContainer(keyhash, null, -1);
Iterator en = index.entries();
int i = 0;
urlx = new String[index.size()];
@ -194,7 +194,7 @@ public class IndexControl_p {
}
Set urlHashes = new HashSet();
for (int i = 0; i < urlx.length; i++) urlHashes.add(urlx[i]);
switchboard.wordIndex.removeEntries(keyhash, urlHashes, true);
switchboard.wordIndex.removeEntries(keyhash, urlHashes);
// this shall lead to a presentation of the list; so handle that the remaining program
// thinks that it was called for a list presentation
post.remove("keyhashdelete");
@ -272,7 +272,7 @@ public class IndexControl_p {
indexContainer index;
String result;
long starttime = System.currentTimeMillis();
index = switchboard.wordIndex.getContainer(keyhash, null, true, -1);
index = switchboard.wordIndex.getContainer(keyhash, null, -1);
// built urlCache
Iterator urlIter = index.entries();
HashMap knownURLs = new HashMap();
@ -451,7 +451,7 @@ public class IndexControl_p {
serverObjects prop = new serverObjects();
indexContainer index = null;
try {
index = switchboard.wordIndex.getContainer(keyhash, null, true, -1);
index = switchboard.wordIndex.getContainer(keyhash, null, -1);
prop.put("genUrlList_keyHash", keyhash);

@ -399,7 +399,7 @@ public class dir {
Map.Entry entry;
while (words.hasNext()) {
entry = (Map.Entry) words.next();
switchboard.wordIndex.removeEntry(plasmaCondenser.word2hash((String) entry.getKey()), urlhash, true);
switchboard.wordIndex.removeEntry(plasmaCondenser.word2hash((String) entry.getKey()), urlhash);
}
switchboard.wordIndex.loadedURL.remove(urlhash);
} catch (Exception e) {

@ -53,7 +53,7 @@ public class snippet {
} else {
String error = snippet.getError();
if ((remove) && (error.equals("no matching snippet found"))) {
switchboard.removeReferences(plasmaURL.urlHash(url), query);
switchboard.wordIndex.removeReferences(query, plasmaURL.urlHash(url));
}
prop.put("text", error);
}

@ -184,7 +184,7 @@ public class yacysearch {
// delete the index entry locally
final String delHash = post.get("deleteref", ""); // urlhash
sb.removeReferences(delHash, query);
sb.wordIndex.removeReferences(query, delHash);
// make new news message with negative voting
HashMap map = new HashMap();

@ -80,7 +80,7 @@ public class indexCachedRI implements indexRI {
}
public long getUpdateTime(String wordHash) {
indexContainer entries = getContainer(wordHash, null, false, -1);
indexContainer entries = getContainer(wordHash, null, -1);
if (entries == null) return 0;
return entries.updated();
}
@ -139,25 +139,25 @@ public class indexCachedRI implements indexRI {
busyCacheFlush = false;
}
public indexContainer getContainer(String wordHash, Set urlselection, boolean deleteIfEmpty, long maxTime) {
public indexContainer getContainer(String wordHash, Set urlselection, long maxTime) {
// get from cache
indexContainer container = riExtern.getContainer(wordHash, urlselection, true, maxTime);
indexContainer container = riExtern.getContainer(wordHash, urlselection, maxTime);
if (container == null) {
container = riIntern.getContainer(wordHash, urlselection, true, maxTime);
container = riIntern.getContainer(wordHash, urlselection, maxTime);
} else {
container.add(riIntern.getContainer(wordHash, urlselection, true, maxTime), maxTime);
container.add(riIntern.getContainer(wordHash, urlselection, maxTime), maxTime);
}
// get from collection index
if (container == null) {
container = backend.getContainer(wordHash, urlselection, true, (maxTime < 0) ? -1 : maxTime);
container = backend.getContainer(wordHash, urlselection, (maxTime < 0) ? -1 : maxTime);
} else {
container.add(backend.getContainer(wordHash, urlselection, true, (maxTime < 0) ? -1 : maxTime), maxTime);
container.add(backend.getContainer(wordHash, urlselection, (maxTime < 0) ? -1 : maxTime), maxTime);
}
return container;
}
public Map getContainers(Set wordHashes, Set urlselection, boolean deleteIfEmpty, boolean interruptIfEmpty, long maxTime) {
public Map getContainers(Set wordHashes, Set urlselection, boolean interruptIfEmpty, long maxTime) {
// return map of wordhash:indexContainer
// retrieve entities that belong to the hashes
@ -177,7 +177,7 @@ public class indexCachedRI implements indexRI {
singleHash = (String) i.next();
// retrieve index
singleContainer = getContainer(singleHash, urlselection, deleteIfEmpty, (maxTime < 0) ? -1 : remaining / (wordHashes.size() - containers.size()));
singleContainer = getContainer(singleHash, urlselection, (maxTime < 0) ? -1 : remaining / (wordHashes.size() - containers.size()));
// check result
if (((singleContainer == null) || (singleContainer.size() == 0)) && (interruptIfEmpty)) return new HashMap();
@ -213,27 +213,27 @@ public class indexCachedRI implements indexRI {
return c;
}
public boolean removeEntry(String wordHash, String urlHash, boolean deleteComplete) {
public boolean removeEntry(String wordHash, String urlHash) {
boolean removed = false;
removed = removed | (riIntern.removeEntry(wordHash, urlHash, deleteComplete));
removed = removed | (riExtern.removeEntry(wordHash, urlHash, deleteComplete));
removed = removed | (backend.removeEntry(wordHash, urlHash, deleteComplete));
removed = removed | (riIntern.removeEntry(wordHash, urlHash));
removed = removed | (riExtern.removeEntry(wordHash, urlHash));
removed = removed | (backend.removeEntry(wordHash, urlHash));
return removed;
}
public int removeEntries(String wordHash, Set urlHashes, boolean deleteComplete) {
public int removeEntries(String wordHash, Set urlHashes) {
int removed = 0;
removed += riIntern.removeEntries(wordHash, urlHashes, deleteComplete);
removed += riExtern.removeEntries(wordHash, urlHashes, deleteComplete);
removed += backend.removeEntries(wordHash, urlHashes, deleteComplete);
removed += riIntern.removeEntries(wordHash, urlHashes);
removed += riExtern.removeEntries(wordHash, urlHashes);
removed += backend.removeEntries(wordHash, urlHashes);
return removed;
}
public String removeEntriesExpl(String wordHash, Set urlHashes, boolean deleteComplete) {
public String removeEntriesExpl(String wordHash, Set urlHashes) {
String removed = "";
removed += riIntern.removeEntries(wordHash, urlHashes, deleteComplete) + ", ";
removed += riExtern.removeEntries(wordHash, urlHashes, deleteComplete) + ", ";
removed += backend.removeEntries(wordHash, urlHashes, deleteComplete) + ", ";
removed += riIntern.removeEntries(wordHash, urlHashes) + ", ";
removed += riExtern.removeEntries(wordHash, urlHashes) + ", ";
removed += backend.removeEntries(wordHash, urlHashes) + ", ";
return removed;
}

@ -61,7 +61,7 @@ public class indexCollectionRI implements indexRI {
}
public long getUpdateTime(String wordHash) {
indexContainer entries = getContainer(wordHash, null, false, -1);
indexContainer entries = getContainer(wordHash, null, -1);
if (entries == null) return 0;
return entries.updated();
}
@ -113,9 +113,9 @@ public class indexCollectionRI implements indexRI {
}
public synchronized indexContainer getContainer(String wordHash, Set urlselection, boolean deleteIfEmpty, long maxtime) {
public synchronized indexContainer getContainer(String wordHash, Set urlselection, long maxtime) {
try {
kelondroRowSet collection = collectionIndex.get(wordHash.getBytes(), deleteIfEmpty);
kelondroRowSet collection = collectionIndex.get(wordHash.getBytes());
if (collection != null) collection.select(urlselection);
if ((collection == null) || (collection.size() == 0)) return null;
return new indexContainer(wordHash, collection);
@ -134,15 +134,15 @@ public class indexCollectionRI implements indexRI {
}
}
public synchronized boolean removeEntry(String wordHash, String urlHash, boolean deleteComplete) {
public synchronized boolean removeEntry(String wordHash, String urlHash) {
HashSet hs = new HashSet();
hs.add(urlHash.getBytes());
return removeEntries(wordHash, hs, deleteComplete) == 1;
return removeEntries(wordHash, hs) == 1;
}
public synchronized int removeEntries(String wordHash, Set urlHashes, boolean deleteComplete) {
public synchronized int removeEntries(String wordHash, Set urlHashes) {
try {
return collectionIndex.remove(wordHash.getBytes(), urlHashes, deleteComplete);
return collectionIndex.remove(wordHash.getBytes(), urlHashes);
} catch (kelondroOutOfLimitsException e) {
e.printStackTrace();
return 0;

@ -145,12 +145,12 @@ public class indexContainer extends kelondroRowSet {
return new indexRWIEntryNew(entry);
}
public boolean removeEntry(String wordHash, String urlHash, boolean deleteComplete) {
public boolean removeEntry(String wordHash, String urlHash) {
if (!wordHash.equals(this.wordHash)) return false;
return remove(urlHash) != null;
}
public int removeEntries(String wordHash, Set urlHashes, boolean deleteComplete) {
public int removeEntries(String wordHash, Set urlHashes) {
if (!wordHash.equals(this.wordHash)) return 0;
int count = 0;
Iterator i = urlHashes.iterator();

@ -98,7 +98,7 @@ public final class indexRAMRI implements indexRI {
public synchronized long getUpdateTime(String wordHash) {
indexContainer entries = getContainer(wordHash, null, false, -1);
indexContainer entries = getContainer(wordHash, null, -1);
if (entries == null) return 0;
return entries.updated();
}
@ -334,7 +334,7 @@ public final class indexRAMRI implements indexRI {
return (((long) intTime) * (long) 1000) + initTime;
}
public synchronized indexContainer getContainer(String wordHash, Set urlselection, boolean deleteIfEmpty, long maxtime_dummy) {
public synchronized indexContainer getContainer(String wordHash, Set urlselection, long maxtime_dummy) {
// retrieve container
indexContainer container = (indexContainer) cache.get(wordHash);
@ -359,11 +359,11 @@ public final class indexRAMRI implements indexRI {
return container;
}
public synchronized boolean removeEntry(String wordHash, String urlHash, boolean deleteComplete) {
public synchronized boolean removeEntry(String wordHash, String urlHash) {
indexContainer c = (indexContainer) cache.get(wordHash);
if ((c != null) && (c.removeEntry(wordHash, urlHash, deleteComplete))) {
if ((c != null) && (c.removeEntry(wordHash, urlHash))) {
// removal successful
if ((c.size() == 0) && (deleteComplete)) {
if (c.size() == 0) {
deleteContainer(wordHash);
} else {
cache.put(wordHash, c);
@ -375,13 +375,13 @@ public final class indexRAMRI implements indexRI {
return false;
}
public synchronized int removeEntries(String wordHash, Set urlHashes, boolean deleteComplete) {
public synchronized int removeEntries(String wordHash, Set urlHashes) {
if (urlHashes.size() == 0) return 0;
indexContainer c = (indexContainer) cache.get(wordHash);
int count;
if ((c != null) && ((count = c.removeEntries(wordHash, urlHashes, deleteComplete)) > 0)) {
if ((c != null) && ((count = c.removeEntries(wordHash, urlHashes)) > 0)) {
// removal successful
if ((c.size() == 0) && (deleteComplete)) {
if (c.size() == 0) {
deleteContainer(wordHash);
} else {
cache.put(wordHash, c);

@ -39,11 +39,11 @@ public interface indexRI {
public long getUpdateTime(String wordHash);
public int indexSize(String wordHash);
public indexContainer getContainer(String wordHash, Set urlselection, boolean deleteIfEmpty, long maxtime);
public indexContainer getContainer(String wordHash, Set urlselection, long maxtime);
public indexContainer deleteContainer(String wordHash);
public boolean removeEntry(String wordHash, String urlHash, boolean deleteComplete);
public int removeEntries(String wordHash, Set urlHashes, boolean deleteComplete);
public boolean removeEntry(String wordHash, String urlHash);
public int removeEntries(String wordHash, Set urlHashes);
public void addEntry(String wordHash, indexRWIEntry entry, long updateTime, boolean dhtCase);
public void addEntries(indexContainer newEntries, long creationTime, boolean dhtCase);

@ -262,7 +262,7 @@ public class kelondroCollectionIndex {
putmergeremove(key, collection, true, null);
}
public synchronized int remove(byte[] key, Set removekeys, boolean deletecomplete) throws IOException, kelondroOutOfLimitsException {
public synchronized int remove(byte[] key, Set removekeys) throws IOException, kelondroOutOfLimitsException {
return putmergeremove(key, null, false, removekeys);
}
@ -406,7 +406,7 @@ public class kelondroCollectionIndex {
return (int) indexrow.getColLong(idx_col_chunkcount);
}
public synchronized kelondroRowSet get(byte[] key, boolean deleteIfEmpty) throws IOException {
public synchronized kelondroRowSet get(byte[] key) throws IOException {
// find an entry, if one exists
kelondroRow.Entry indexrow = index.get(key);
if (indexrow == null) return null;

@ -71,7 +71,6 @@ public class plasmaDHTChunk {
private plasmaWordIndex wordIndex;
private serverLog log;
private plasmaCrawlLURL lurls;
private int status = chunkStatus_UNDEFINED;
private String startPointHash;
@ -124,11 +123,10 @@ public class plasmaDHTChunk {
return this.status;
}
public plasmaDHTChunk(serverLog log, plasmaWordIndex wordIndex, plasmaCrawlLURL lurls, int minCount, int maxCount, int maxtime) {
public plasmaDHTChunk(serverLog log, plasmaWordIndex wordIndex, int minCount, int maxCount, int maxtime) {
try {
this.log = log;
this.wordIndex = wordIndex;
this.lurls = lurls;
this.startPointHash = selectTransferStart();
log.logFine("Selected hash " + this.startPointHash + " as start point for index distribution, distance = " + yacyDHTAction.dhtDistance(yacyCore.seedDB.mySeed.hash, this.startPointHash));
selectTransferContainers(this.startPointHash, minCount, maxCount, maxtime);
@ -144,11 +142,10 @@ public class plasmaDHTChunk {
}
}
public plasmaDHTChunk(serverLog log, plasmaWordIndex wordIndex, plasmaCrawlLURL lurls, int minCount, int maxCount, int maxtime, String startHash) {
public plasmaDHTChunk(serverLog log, plasmaWordIndex wordIndex, int minCount, int maxCount, int maxtime, String startHash) {
try {
this.log = log;
this.wordIndex = wordIndex;
this.lurls = lurls;
log.logFine("Demanded hash " + startHash + " as start point for index distribution, distance = " + yacyDHTAction.dhtDistance(yacyCore.seedDB.mySeed.hash, this.startPointHash));
selectTransferContainers(startHash, minCount, maxCount, maxtime);
@ -233,12 +230,12 @@ public class plasmaDHTChunk {
urlIter.remove();
continue;
}
lurl = lurls.load(iEntry.urlHash(), iEntry);
lurl = wordIndex.loadedURL.load(iEntry.urlHash(), iEntry);
if ((lurl == null) || (lurl.comp().url() == null)) {
//yacyCore.log.logFine("DEBUG selectTransferContainersResource: not-bound url hash '" + iEntry.urlHash() + "' for word hash " + container.getWordHash());
notBoundCounter++;
urlIter.remove();
wordIndex.removeEntry(container.getWordHash(), iEntry.urlHash(), true);
wordIndex.removeEntry(container.getWordHash(), iEntry.urlHash());
} else {
urlCache.put(iEntry.urlHash(), lurl);
//yacyCore.log.logFine("DEBUG selectTransferContainersResource: added url hash '" + iEntry.urlHash() + "' to urlCache for word hash " + container.getWordHash());
@ -302,7 +299,7 @@ public class plasmaDHTChunk {
urlHashes.add(iEntry.urlHash());
}
String wordHash = indexContainers[i].getWordHash();
count = wordIndex.removeEntriesExpl(this.indexContainers[i].getWordHash(), urlHashes, true);
count = wordIndex.removeEntriesExpl(this.indexContainers[i].getWordHash(), urlHashes);
if (log.isFine())
log.logFine("Deleted partial index (" + c + " URLs) for word " + wordHash + "; " + this.wordIndex.indexSize(wordHash) + " entries left");
this.indexContainers[i] = null;

@ -169,7 +169,7 @@ public class plasmaDHTFlush extends Thread {
// selecting 500 words to transfer
this.status = "Running: Selecting chunk " + iteration;
newDHTChunk = new plasmaDHTChunk(this.log, this.wordIndex, this.sb.wordIndex.loadedURL, this.chunkSize/3*2, this.chunkSize, -1, this.startPointHash);
newDHTChunk = new plasmaDHTChunk(this.log, this.wordIndex, this.chunkSize/3*2, this.chunkSize, -1, this.startPointHash);
/* If we havn't selected a word chunk this could be because of
* a) no words are left in the index

@ -421,7 +421,7 @@ public class plasmaRankingCRProcess {
if (anchor.length() == 6) anchorDom = anchor; else anchorDom = anchor.substring(6);
// update domain-specific entry
rci_entry = rci.get(anchorDom.getBytes(), false);
rci_entry = rci.get(anchorDom.getBytes());
if (rci_entry == null) rci_entry = new kelondroRowSet(RCI_coli, 0);
rci_entry.add(refereeDom.getBytes());

@ -401,7 +401,7 @@ public final class plasmaSearchEvent extends Thread implements Runnable {
log.logFine("filtered out " + page.comp().url().toString());
// filter out bad results
Iterator wi = query.queryHashes.iterator();
while (wi.hasNext()) wordIndex.removeEntry((String) wi.next(), page.hash(), true);
while (wi.hasNext()) wordIndex.removeEntry((String) wi.next(), page.hash());
} else if (query.contentdom != plasmaSearchQuery.CONTENTDOM_TEXT) {
if ((query.contentdom == plasmaSearchQuery.CONTENTDOM_AUDIO) && (page.laudio() > 0)) acc.addResult(page, preranking);
else if ((query.contentdom == plasmaSearchQuery.CONTENTDOM_VIDEO) && (page.lvideo() > 0)) acc.addResult(page, preranking);

@ -119,7 +119,6 @@ import java.util.HashMap;
import java.util.Hashtable;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
import de.anomic.data.blogBoard;
@ -1003,10 +1002,6 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
//return processStack.size() + cacheLoader.size() + noticeURL.stackSize();
}
public int cacheSizeMin() {
return wordIndex.size();
}
public void enQueue(Object job) {
if (!(job instanceof plasmaSwitchboardQueue.Entry)) {
System.out.println("internal error at plasmaSwitchboard.enQueue: wrong job type");
@ -1063,7 +1058,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
) {
// generate new chunk
int minChunkSize = (int) getConfigLong("indexDistribution.minChunkSize", 30);
dhtTransferChunk = new plasmaDHTChunk(this.log, wordIndex, wordIndex.loadedURL, minChunkSize, dhtTransferIndexCount, 5000);
dhtTransferChunk = new plasmaDHTChunk(this.log, wordIndex, minChunkSize, dhtTransferIndexCount, 5000);
doneSomething = true;
}
@ -2115,7 +2110,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
filename = comp.url().getFile();
if ((seed == null) || ((address = seed.getAddress()) == null)) {
// seed is not known from here
removeReferences(urlentry.hash(), plasmaCondenser.getWords(("yacyshare " + filename.replace('?', ' ') + " " + comp.descr()).getBytes(), "UTF-8"));
wordIndex.removeReferences(plasmaCondenser.getWords(("yacyshare " + filename.replace('?', ' ') + " " + comp.descr()).getBytes(), "UTF-8"), urlentry.hash());
wordIndex.loadedURL.remove(urlentry.hash()); // clean up
continue; // next result
}
@ -2264,7 +2259,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
// delete all word references
int count = 0;
if (witer != null) count = removeReferences(urlhash, witer);
if (witer != null) count = wordIndex.removeReferences(witer, urlhash);
// finally delete the url entry itself
wordIndex.loadedURL.remove(urlhash);
@ -2275,39 +2270,6 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
if (resourceContent != null) try { resourceContent.close(); } catch (Exception e) {/* ignore this */}
}
}
public int removeReferences(URL url, Set words) {
return removeReferences(plasmaURL.urlHash(url), words);
}
public int removeReferences(final String urlhash, final Set words) {
// sequentially delete all word references
// returns number of deletions
Iterator iter = words.iterator();
String word;
int count = 0;
while (iter.hasNext()) {
word = (String) iter.next();
// delete the URL reference in this word index
if (wordIndex.removeEntry(plasmaCondenser.word2hash(word), urlhash, true)) count++;
}
return count;
}
public int removeReferences(final String urlhash, final Iterator wordStatPropIterator) {
// sequentially delete all word references
// returns number of deletions
Map.Entry entry;
String word;
int count = 0;
while (wordStatPropIterator.hasNext()) {
entry = (Map.Entry) wordStatPropIterator.next();
word = (String) entry.getKey();
// delete the URL reference in this word index
if (wordIndex.removeEntry(plasmaCondenser.word2hash(word), urlhash, true)) count++;
}
return count;
}
public int adminAuthenticated(httpHeader header) {
@ -2402,7 +2364,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
return "no DHT distribution: not enough words - wordIndex.size() = " + wordIndex.size();
}
if ((getConfig("allowDistributeIndexWhileCrawling","false").equalsIgnoreCase("false")) &&
((noticeURL.stackSize() > 0) || (sbQueue.size() > 3))) {
((noticeURL.stackSize() > 0) /*|| (sbQueue.size() > 3)*/)) {
return "no DHT distribution: crawl in progress: noticeURL.stackSize() = " + noticeURL.stackSize() + ", sbQueue.size() = " + sbQueue.size();
}
return null;

@ -139,7 +139,7 @@ public final class plasmaWordIndex implements indexRI {
}
public long getUpdateTime(String wordHash) {
indexContainer entries = getContainer(wordHash, null, false, -1);
indexContainer entries = getContainer(wordHash, null, -1);
if (entries == null) return 0;
return entries.updated();
}
@ -163,21 +163,6 @@ public final class plasmaWordIndex implements indexRI {
}
}
/*
private indexContainer convertOld2New(indexContainer entries) {
// convert old entries to new entries
indexContainer newentries = new indexContainer(entries.getWordHash(), indexRWIEntryNew.urlEntryRow);
Iterator i = entries.entries();
indexRWIEntryOld old;
while (i.hasNext()) {
old = (indexRWIEntryOld) i.next();
if (old.urlHash() != null) {
newentries.add(new indexRWIEntryNew(old));
}
}
return newentries;
}
*/
public void addEntries(indexContainer entries, long updateTime, boolean dhtInCase) {
assert (entries.row().objectsize() == indexRWIEntryNew.urlEntryRow.objectsize());
@ -306,21 +291,21 @@ public final class plasmaWordIndex implements indexRI {
return condenser.RESULT_SIMI_WORDS;
}
public indexContainer getContainer(String wordHash, Set urlselection, boolean deleteIfEmpty, long maxTime) {
public indexContainer getContainer(String wordHash, Set urlselection, long maxTime) {
// get from cache
indexContainer container = dhtOutCache.getContainer(wordHash, urlselection, true, -1);
indexContainer container = dhtOutCache.getContainer(wordHash, urlselection, -1);
if (container == null) {
container = dhtInCache.getContainer(wordHash, urlselection, true, -1);
container = dhtInCache.getContainer(wordHash, urlselection, -1);
} else {
container.add(dhtInCache.getContainer(wordHash, urlselection, true, -1), -1);
container.add(dhtInCache.getContainer(wordHash, urlselection, -1), -1);
}
// get from collection index
if (container == null) {
container = collections.getContainer(wordHash, urlselection, true, (maxTime < 0) ? -1 : maxTime);
container = collections.getContainer(wordHash, urlselection, (maxTime < 0) ? -1 : maxTime);
} else {
container.add(collections.getContainer(wordHash, urlselection, true, (maxTime < 0) ? -1 : maxTime), -1);
container.add(collections.getContainer(wordHash, urlselection, (maxTime < 0) ? -1 : maxTime), -1);
}
return container;
}
@ -345,7 +330,7 @@ public final class plasmaWordIndex implements indexRI {
singleHash = (String) i.next();
// retrieve index
singleContainer = getContainer(singleHash, urlselection, deleteIfEmpty, (maxTime < 0) ? -1 : remaining / (wordHashes.size() - containers.size()));
singleContainer = getContainer(singleHash, urlselection, (maxTime < 0) ? -1 : remaining / (wordHashes.size() - containers.size()));
// check result
if (((singleContainer == null) || (singleContainer.size() == 0)) && (interruptIfEmpty)) return new HashMap();
@ -383,39 +368,66 @@ public final class plasmaWordIndex implements indexRI {
return c;
}
public boolean removeEntry(String wordHash, String urlHash, boolean deleteComplete) {
public boolean removeEntry(String wordHash, String urlHash) {
boolean removed = false;
removed = removed | (dhtInCache.removeEntry(wordHash, urlHash, deleteComplete));
removed = removed | (dhtOutCache.removeEntry(wordHash, urlHash, deleteComplete));
removed = removed | (collections.removeEntry(wordHash, urlHash, deleteComplete));
removed = removed | (dhtInCache.removeEntry(wordHash, urlHash));
removed = removed | (dhtOutCache.removeEntry(wordHash, urlHash));
removed = removed | (collections.removeEntry(wordHash, urlHash));
return removed;
}
public int removeEntries(String wordHash, Set urlHashes, boolean deleteComplete) {
public int removeEntries(String wordHash, Set urlHashes) {
int removed = 0;
removed += dhtInCache.removeEntries(wordHash, urlHashes, deleteComplete);
removed += dhtOutCache.removeEntries(wordHash, urlHashes, deleteComplete);
removed += collections.removeEntries(wordHash, urlHashes, deleteComplete);
removed += dhtInCache.removeEntries(wordHash, urlHashes);
removed += dhtOutCache.removeEntries(wordHash, urlHashes);
removed += collections.removeEntries(wordHash, urlHashes);
return removed;
}
public String removeEntriesExpl(String wordHash, Set urlHashes, boolean deleteComplete) {
public String removeEntriesExpl(String wordHash, Set urlHashes) {
String removed = "";
removed += dhtInCache.removeEntries(wordHash, urlHashes, deleteComplete) + ", ";
removed += dhtOutCache.removeEntries(wordHash, urlHashes, deleteComplete) + ", ";
removed += collections.removeEntries(wordHash, urlHashes, deleteComplete);
removed += dhtInCache.removeEntries(wordHash, urlHashes) + ", ";
removed += dhtOutCache.removeEntries(wordHash, urlHashes) + ", ";
removed += collections.removeEntries(wordHash, urlHashes);
return removed;
}
public static final int RL_RAMCACHE = 0;
public static final int RL_COLLECTIONS = 1;
public int removeReferences(Set words, String urlhash) {
// sequentially delete all word references
// returns number of deletions
Iterator iter = words.iterator();
String word;
int count = 0;
while (iter.hasNext()) {
word = (String) iter.next();
// delete the URL reference in this word index
if (removeEntry(plasmaCondenser.word2hash(word), urlhash)) count++;
}
return count;
}
public int removeReferences(Iterator wordStatPropIterator, String urlhash) {
// sequentially delete all word references
// returns number of deletions
Map.Entry entry;
String word;
int count = 0;
while (wordStatPropIterator.hasNext()) {
entry = (Map.Entry) wordStatPropIterator.next();
word = (String) entry.getKey();
// delete the URL reference in this word index
if (removeEntry(plasmaCondenser.word2hash(word), urlhash)) count++;
}
return count;
}
public int tryRemoveURLs(String urlHash) {
// this tries to delete an index from the cache that has this
// urlHash assigned. This can only work if the entry is really fresh
// and can be found in the RAM cache
// this returns the number of deletion that had been possible
return dhtInCache.tryRemoveURLs(urlHash) | dhtOutCache.tryRemoveURLs(urlHash);
int d = dhtInCache.tryRemoveURLs(urlHash);
if (d > 0) return d; else return dhtOutCache.tryRemoveURLs(urlHash);
}
public TreeSet indexContainerSet(String startHash, boolean ram, boolean rot, int count) {
@ -540,7 +552,7 @@ public final class plasmaWordIndex implements indexRI {
}
}
if (urlHashs.size() > 0) {
int removed = removeEntries(container.getWordHash(), urlHashs, true);
int removed = removeEntries(container.getWordHash(), urlHashs);
serverLog.logFine("INDEXCLEANER", container.getWordHash() + ": " + removed + " of " + container.size() + " URL-entries deleted");
lastWordHash = container.getWordHash();
lastDeletionCounter = urlHashs.size();

@ -64,12 +64,10 @@ public final class plasmaWordIndexFile {
private final String theWordHash;
private kelondroTree theIndex;
private File theLocation;
private boolean delete;
public plasmaWordIndexFile(File databaseRoot, String wordHash, boolean deleteIfEmpty) {
public plasmaWordIndexFile(File databaseRoot, String wordHash) {
theWordHash = wordHash;
theIndex = indexFile(databaseRoot, wordHash);
delete = deleteIfEmpty;
}
public static boolean removePlasmaIndex(File databaseRoot, String wordHash) {
@ -112,7 +110,7 @@ public final class plasmaWordIndexFile {
public int size() {
if (theIndex == null) return 0;
int size = theIndex.size();
if ((size == 0) && (delete)) {
if (size == 0) {
deleteComplete();
return 0;
} else {

@ -95,7 +95,7 @@ public class plasmaWordIndexFileCluster implements indexRI {
}
public Object next() {
return getContainer((String) wordIterator.next(), null, true, 100);
return getContainer((String) wordIterator.next(), null, 100);
}
public void remove() {
@ -224,11 +224,11 @@ public class plasmaWordIndexFileCluster implements indexRI {
return plasmaWordIndexFile.wordHash2path(databaseRoot, wordHash).exists();
}
public synchronized indexContainer getContainer(String wordHash, Set urlselection, boolean deleteIfEmpty, long maxTime) {
public synchronized indexContainer getContainer(String wordHash, Set urlselection, long maxTime) {
long start = System.currentTimeMillis();
if ((maxTime < 0) || (maxTime > 60000)) maxTime=60000; // maximum is one minute
if (exists(wordHash)) {
plasmaWordIndexFile entity = this.getEntity(wordHash, deleteIfEmpty, (maxTime < 0) ? -1 : maxTime * 9 / 10);
plasmaWordIndexFile entity = this.getEntity(wordHash, (maxTime < 0) ? -1 : maxTime * 9 / 10);
indexContainer container = new indexContainer(wordHash, indexRWIEntryNew.urlEntryRow);
indexRWIEntryNew entry;
Iterator i = entity.elements(true);
@ -242,8 +242,8 @@ public class plasmaWordIndexFileCluster implements indexRI {
}
}
public plasmaWordIndexFile getEntity(String wordHash, boolean deleteIfEmpty, long maxTime) {
return new plasmaWordIndexFile(databaseRoot, wordHash, deleteIfEmpty);
public plasmaWordIndexFile getEntity(String wordHash, long maxTime) {
return new plasmaWordIndexFile(databaseRoot, wordHash);
}
public long getUpdateTime(String wordHash) {
@ -256,11 +256,11 @@ public class plasmaWordIndexFileCluster implements indexRI {
return null;
}
public boolean removeEntry(String wordHash, String urlHash, boolean deleteComplete) {
public boolean removeEntry(String wordHash, String urlHash) {
throw new UnsupportedOperationException("word files are not supported in YaCy 0.491 and above");
}
public int removeEntries(String wordHash, Set urlHashes, boolean deleteComplete) {
public int removeEntries(String wordHash, Set urlHashes) {
throw new UnsupportedOperationException("word files are not supported in YaCy 0.491 and above");
}

@ -136,7 +136,7 @@ public class yacyPeerActions {
seedDB.mySeed.put(yacySeed.UPTIME, Long.toString(uptime/60)); // the number of minutes that the peer is up in minutes/day (moving average MA30)
seedDB.mySeed.put(yacySeed.LCOUNT, Integer.toString(sb.wordIndex.loadedURL.size())); // the number of links that the peer has stored (LURL's)
seedDB.mySeed.put(yacySeed.NCOUNT, Integer.toString(sb.noticeURL.stackSize())); // the number of links that the peer has noticed, but not loaded (NURL's)
seedDB.mySeed.put(yacySeed.ICOUNT, Integer.toString(sb.cacheSizeMin())); // the minimum number of words that the peer has indexed (as it says)
seedDB.mySeed.put(yacySeed.ICOUNT, Integer.toString(sb.wordIndex.size())); // the minimum number of words that the peer has indexed (as it says)
seedDB.mySeed.put(yacySeed.SCOUNT, Integer.toString(seedDB.sizeConnected())); // the number of seeds that the peer has stored
seedDB.mySeed.put(yacySeed.CCOUNT, Double.toString(((int) ((seedDB.sizeConnected() + seedDB.sizeDisconnected() + seedDB.sizePotential()) * 60.0 / (uptime + 1.01)) * 100) / 100.0)); // the number of clients that the peer connects (as connects/hour)
seedDB.mySeed.put(yacySeed.VERSION, sb.getConfig("version", ""));

@ -697,7 +697,7 @@ public final class yacy {
}
plasmaWordIndexFile entity = null;
try {
entity = new plasmaWordIndexFile(oldDatabaseRoot, wordhash, true);
entity = new plasmaWordIndexFile(oldDatabaseRoot, wordhash);
int size = entity.size();
indexContainer container = new indexContainer(wordhash, indexRWIEntryNew.urlEntryRow);

Loading…
Cancel
Save