bugfix for index remove bug,

appeared after search where snippet-loading triggered word removal

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2869 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 18 years ago
parent 0e79f2fd7e
commit 78b7f6f7fd

@ -114,13 +114,13 @@ public final class transferRWI {
granted = false; // don't accept more words if there are too many words to flush
result = "busy";
pause = 60000;
} else if ((checkLimit && sb.wordIndex.dhtOutCacheSize() > sb.getConfigLong("wordCacheMaxCount", 20000)) || ((sb.wordIndex.busyCacheFlush) && (!shortCacheFlush))) {
} /* else if ((checkLimit && sb.wordIndex.dhtOutCacheSize() > sb.getConfigLong("wordCacheMaxCount", 20000)) || ((sb.wordIndex.busyCacheFlush) && (!shortCacheFlush))) {
// we are too busy flushing the ramCache to receive indexes
sb.getLog().logInfo("Rejecting RWIs from peer " + otherPeerName + ". We are too busy (wordcachesize=" + sb.wordIndex.dhtOutCacheSize() + ").");
granted = false; // don't accept more words if there are too many words to flush
result = "busy";
pause = 300000;
} else {
} */ else {
// we want and can receive indexes
// log value status (currently added to find outOfMemory error
sb.getLog().logFine("Processing " + indexes.length + " bytes / " + wordc + " words / " + entryc + " entries from " + otherPeerName);

@ -1,44 +0,0 @@
// indexAsbtractRI.java
// (C) 2006 by Michael Peter Christen; mc@anomic.de, Frankfurt a. M., Germany
// first published 26.05.2006 on http://www.anomic.de
//
// This is a part of YaCy, a peer-to-peer based web search engine
//
// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $
// $LastChangedRevision: 1986 $
// $LastChangedBy: orbiter $
//
// LICENSE
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
package de.anomic.index;
public abstract class indexAbstractRI implements indexRI {
public indexContainer addEntry(String wordHash, indexEntry newEntry, long updateTime, boolean dhtCase) {
indexContainer container = new indexContainer(wordHash);
container.add(newEntry);
return addEntries(container, updateTime, dhtCase);
}
public long getUpdateTime(String wordHash) {
indexContainer entries = getContainer(wordHash, null, false, -1);
if (entries == null) return 0;
return entries.updated();
}
}

@ -40,7 +40,7 @@ import de.anomic.kelondro.kelondroRowCollection;
import de.anomic.kelondro.kelondroRowSet;
import de.anomic.server.logging.serverLog;
public class indexCollectionRI extends indexAbstractRI implements indexRI {
public class indexCollectionRI implements indexRI {
kelondroCollectionIndex collectionIndex;
@ -61,7 +61,13 @@ public class indexCollectionRI extends indexAbstractRI implements indexRI {
}
}
public int size() {
public long getUpdateTime(String wordHash) {
indexContainer entries = getContainer(wordHash, null, false, -1);
if (entries == null) return 0;
return entries.updated();
}
public synchronized int size() {
try {
return collectionIndex.size();
} catch (IOException e) {
@ -70,7 +76,7 @@ public class indexCollectionRI extends indexAbstractRI implements indexRI {
}
}
public int indexSize(String wordHash) {
public synchronized int indexSize(String wordHash) {
try {
return collectionIndex.indexSize(wordHash.getBytes());
} catch (IOException e) {
@ -78,7 +84,7 @@ public class indexCollectionRI extends indexAbstractRI implements indexRI {
}
}
public Iterator wordContainers(String startWordHash, boolean rot) {
public synchronized Iterator wordContainers(String startWordHash, boolean rot) {
return new wordContainersIterator(startWordHash, rot);
}
@ -108,7 +114,7 @@ public class indexCollectionRI extends indexAbstractRI implements indexRI {
}
public indexContainer getContainer(String wordHash, Set urlselection, boolean deleteIfEmpty, long maxtime) {
public synchronized indexContainer getContainer(String wordHash, Set urlselection, boolean deleteIfEmpty, long maxtime) {
try {
kelondroRowSet collection = collectionIndex.get(wordHash.getBytes(), deleteIfEmpty);
if (collection != null) collection.select(urlselection);
@ -119,7 +125,7 @@ public class indexCollectionRI extends indexAbstractRI implements indexRI {
}
}
public indexContainer deleteContainer(String wordHash) {
public synchronized indexContainer deleteContainer(String wordHash) {
try {
kelondroRowSet collection = collectionIndex.delete(wordHash.getBytes());
if (collection == null) return null;
@ -129,13 +135,13 @@ public class indexCollectionRI extends indexAbstractRI implements indexRI {
}
}
public boolean removeEntry(String wordHash, String urlHash, boolean deleteComplete) {
public synchronized boolean removeEntry(String wordHash, String urlHash, boolean deleteComplete) {
HashSet hs = new HashSet();
hs.add(urlHash.getBytes());
return removeEntries(wordHash, hs, deleteComplete) == 1;
}
public int removeEntries(String wordHash, Set urlHashes, boolean deleteComplete) {
public synchronized int removeEntries(String wordHash, Set urlHashes, boolean deleteComplete) {
try {
return collectionIndex.remove(wordHash.getBytes(), urlHashes, deleteComplete);
} catch (kelondroOutOfLimitsException e) {
@ -147,7 +153,13 @@ public class indexCollectionRI extends indexAbstractRI implements indexRI {
}
}
public indexContainer addEntries(indexContainer newEntries, long creationTime, boolean dhtCase) {
public synchronized indexContainer addEntry(String wordHash, indexEntry newEntry, long updateTime, boolean dhtCase) {
indexContainer container = new indexContainer(wordHash);
container.add(newEntry);
return addEntries(container, updateTime, dhtCase);
}
public synchronized indexContainer addEntries(indexContainer newEntries, long creationTime, boolean dhtCase) {
String wordHash = newEntries.getWordHash();
try {
collectionIndex.merge(wordHash.getBytes(), (kelondroRowCollection) newEntries);
@ -160,7 +172,7 @@ public class indexCollectionRI extends indexAbstractRI implements indexRI {
}
}
public void close(int waitingSeconds) {
public synchronized void close(int waitingSeconds) {
try {
collectionIndex.close();
} catch (IOException e) {

@ -42,7 +42,7 @@ import de.anomic.plasma.plasmaWordIndexAssortment;
import de.anomic.server.logging.serverLog;
import de.anomic.yacy.yacySeedDB;
public final class indexRAMCacheRI extends indexAbstractRI implements indexRI {
public final class indexRAMCacheRI implements indexRI {
// environment constants
public static final long wCacheMaxAge = 1000 * 60 * 30; // milliseconds; 30 minutes
@ -87,6 +87,12 @@ public final class indexRAMCacheRI extends indexAbstractRI implements indexRI {
}
}
public synchronized long getUpdateTime(String wordHash) {
indexContainer entries = getContainer(wordHash, null, false, -1);
if (entries == null) return 0;
return entries.updated();
}
private void dump(int waitingSeconds) throws IOException {
log.logConfig("creating dump for index cache '" + indexArrayFileName + "', " + cache.size() + " words (and much more urls)");
File indexDumpFile = new File(databaseRoot, indexArrayFileName);
@ -217,18 +223,18 @@ public final class indexRAMCacheRI extends indexAbstractRI implements indexRI {
return this.cacheMaxCount;
}
public int size() {
public synchronized int size() {
return cache.size();
}
public int indexSize(String wordHash) {
public synchronized int indexSize(String wordHash) {
int size = 0;
indexContainer cacheIndex = (indexContainer) cache.get(wordHash);
if (cacheIndex != null) size += cacheIndex.size();
return size;
}
public Iterator wordContainers(String startWordHash, boolean rot) {
public synchronized Iterator wordContainers(String startWordHash, boolean rot) {
// we return an iterator object that creates top-level-clones of the indexContainers
// in the cache, so that manipulations of the iterated objects do not change
// objects in the cache.
@ -276,14 +282,13 @@ public final class indexRAMCacheRI extends indexAbstractRI implements indexRI {
}
public String bestFlushWordHash() {
public synchronized String bestFlushWordHash() {
// select appropriate hash
// we have 2 different methods to find a good hash:
// - the oldest entry in the cache
// - the entry with maximum count
if (cache.size() == 0) return null;
try {
synchronized (cache) {
String hash = null;
int count = hashScore.getMaxScore();
if ((count >= cacheReferenceLimit) &&
@ -307,7 +312,6 @@ public final class indexRAMCacheRI extends indexAbstractRI implements indexRI {
hash = (String) hashDate.getMinObject(); // flush oldest entries
}
return hash;
}
} catch (Exception e) {
log.logSevere("flushFromMem: " + e.getMessage(), e);
}
@ -322,7 +326,7 @@ public final class indexRAMCacheRI extends indexAbstractRI implements indexRI {
return (((long) intTime) * (long) 1000) + initTime;
}
public indexContainer getContainer(String wordHash, Set urlselection, boolean deleteIfEmpty, long maxtime_dummy) {
public synchronized indexContainer getContainer(String wordHash, Set urlselection, boolean deleteIfEmpty, long maxtime_dummy) {
// retrieve container
indexContainer container = (indexContainer) cache.get(wordHash);
@ -339,46 +343,53 @@ public final class indexRAMCacheRI extends indexAbstractRI implements indexRI {
return container;
}
public indexContainer deleteContainer(String wordHash) {
public synchronized indexContainer deleteContainer(String wordHash) {
// returns the index that had been deleted
synchronized (cache) {
indexContainer container = (indexContainer) cache.remove(wordHash);
hashScore.deleteScore(wordHash);
hashDate.deleteScore(wordHash);
return container;
}
indexContainer container = (indexContainer) cache.remove(wordHash);
hashScore.deleteScore(wordHash);
hashDate.deleteScore(wordHash);
return container;
}
public boolean removeEntry(String wordHash, String urlHash, boolean deleteComplete) {
synchronized (cache) {
indexContainer c = (indexContainer) deleteContainer(wordHash);
if (c != null) {
if (c.removeEntry(wordHash, urlHash, deleteComplete)) return true;
this.addEntries(c, System.currentTimeMillis(), false);
public synchronized boolean removeEntry(String wordHash, String urlHash, boolean deleteComplete) {
indexContainer c = (indexContainer) cache.get(wordHash);
if ((c != null) && (c.removeEntry(wordHash, urlHash, deleteComplete))) {
// removal successful
if ((c.size() == 0) && (deleteComplete)) {
deleteContainer(wordHash);
} else {
cache.put(wordHash, c);
hashScore.decScore(wordHash);
hashDate.setScore(wordHash, intTime(System.currentTimeMillis()));
}
return true;
}
return false;
}
public int removeEntries(String wordHash, Set urlHashes, boolean deleteComplete) {
public synchronized int removeEntries(String wordHash, Set urlHashes, boolean deleteComplete) {
if (urlHashes.size() == 0) return 0;
int count = 0;
synchronized (cache) {
indexContainer c = (indexContainer) deleteContainer(wordHash);
if (c != null) {
count = c.removeEntries(wordHash, urlHashes, deleteComplete);
if (c.size() != 0) this.addEntries(c, System.currentTimeMillis(), false);
indexContainer c = (indexContainer) cache.get(wordHash);
int count;
if ((c != null) && ((count = c.removeEntries(wordHash, urlHashes, deleteComplete)) > 0)) {
// removal successful
if ((c.size() == 0) && (deleteComplete)) {
deleteContainer(wordHash);
} else {
cache.put(wordHash, c);
hashScore.setScore(wordHash, c.size());
hashDate.setScore(wordHash, intTime(System.currentTimeMillis()));
}
return count;
}
return count;
return 0;
}
public int tryRemoveURLs(String urlHash) {
public synchronized int tryRemoveURLs(String urlHash) {
// this tries to delete an index from the cache that has this
// urlHash assigned. This can only work if the entry is really fresh
// Such entries must be searched in the latest entries
int delCount = 0;
synchronized (cache) {
Iterator i = cache.entrySet().iterator();
Map.Entry entry;
String wordhash;
@ -398,16 +409,14 @@ public final class indexRAMCacheRI extends indexAbstractRI implements indexRI {
delCount++;
}
}
}
return delCount;
}
public indexContainer addEntries(indexContainer container, long updateTime, boolean dhtCase) {
public synchronized indexContainer addEntries(indexContainer container, long updateTime, boolean dhtCase) {
// this puts the entries into the cache, not into the assortment directly
int added = 0;
// put new words into cache
synchronized (cache) {
// put container into wCache
String wordHash = container.getWordHash();
indexContainer entries = (indexContainer) cache.get(wordHash); // null pointer exception? wordhash != null! must be cache==null
@ -419,12 +428,10 @@ public final class indexRAMCacheRI extends indexAbstractRI implements indexRI {
hashDate.setScore(wordHash, intTime(updateTime));
}
entries = null;
}
return null;
}
public indexContainer addEntry(String wordHash, indexEntry newEntry, long updateTime, boolean dhtCase) {
synchronized (cache) {
public synchronized indexContainer addEntry(String wordHash, indexEntry newEntry, long updateTime, boolean dhtCase) {
indexContainer container = (indexContainer) cache.get(wordHash);
if (container == null) container = new indexContainer(wordHash);
indexEntry[] entries = new indexEntry[] { newEntry };
@ -437,10 +444,9 @@ public final class indexRAMCacheRI extends indexAbstractRI implements indexRI {
container = null;
entries = null;
return null;
}
}
public void close(int waitingSeconds) {
public synchronized void close(int waitingSeconds) {
// dump cache
try {
dump(waitingSeconds);

@ -37,7 +37,6 @@ import java.util.Set;
import java.util.TreeSet;
import de.anomic.htmlFilter.htmlFilterContentScraper;
import de.anomic.index.indexAbstractRI;
import de.anomic.index.indexCollectionRI;
import de.anomic.index.indexContainer;
import de.anomic.index.indexContainerOrder;
@ -56,7 +55,7 @@ import de.anomic.plasma.urlPattern.plasmaURLPattern;
import de.anomic.server.logging.serverLog;
import de.anomic.yacy.yacyDHTAction;
public final class plasmaWordIndex extends indexAbstractRI implements indexRI {
public final class plasmaWordIndex implements indexRI {
private static final String indexAssortmentClusterPath = "ACLUSTER";
private static final int assortmentCount = 64;
@ -185,7 +184,13 @@ public final class plasmaWordIndex extends indexAbstractRI implements indexRI {
}
}
}
public long getUpdateTime(String wordHash) {
indexContainer entries = getContainer(wordHash, null, false, -1);
if (entries == null) return 0;
return entries.updated();
}
public indexContainer addEntry(String wordHash, indexEntry entry, long updateTime, boolean dhtInCase) {
// set dhtInCase depending on wordHash
if ((!dhtInCase) && (yacyDHTAction.shallBeOwnWord(wordHash))) dhtInCase = true;

@ -52,7 +52,6 @@ import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
import de.anomic.index.indexAbstractRI;
import de.anomic.index.indexContainer;
import de.anomic.index.indexContainerOrder;
import de.anomic.index.indexEntry;
@ -63,7 +62,7 @@ import de.anomic.kelondro.kelondroNaturalOrder;
import de.anomic.kelondro.kelondroRecords;
import de.anomic.server.logging.serverLog;
public final class plasmaWordIndexAssortmentCluster extends indexAbstractRI implements indexRI {
public final class plasmaWordIndexAssortmentCluster implements indexRI {
// class variables
private int clusterCount; // number of cluster files
@ -174,6 +173,18 @@ public final class plasmaWordIndexAssortmentCluster extends indexAbstractRI impl
}
}
public indexContainer addEntry(String wordHash, indexEntry newEntry, long updateTime, boolean dhtCase) {
indexContainer container = new indexContainer(wordHash);
container.add(newEntry);
return addEntries(container, updateTime, dhtCase);
}
public long getUpdateTime(String wordHash) {
indexContainer entries = getContainer(wordHash, null, false, -1);
if (entries == null) return 0;
return entries.updated();
}
public indexContainer addEntries(indexContainer newContainer, long creationTime, boolean dhtCase) {
// this is called by the index ram cache flush process
// it returnes NULL if the storage was successful

@ -50,7 +50,6 @@ import java.util.Iterator;
import java.util.Set;
import java.util.TreeSet;
import de.anomic.index.indexAbstractRI;
import de.anomic.index.indexContainer;
import de.anomic.index.indexEntry;
import de.anomic.index.indexRI;
@ -58,7 +57,7 @@ import de.anomic.kelondro.kelondroNaturalOrder;
import de.anomic.server.logging.serverLog;
import de.anomic.yacy.yacySeedDB;
public class plasmaWordIndexFileCluster extends indexAbstractRI implements indexRI {
public class plasmaWordIndexFileCluster implements indexRI {
// class variables
private final File databaseRoot;
@ -107,7 +106,6 @@ public class plasmaWordIndexFileCluster extends indexAbstractRI implements index
}
public Iterator wordHashes(String startHash, boolean rot) {
// outdated method: to be replaced by wordContainers
return wordHashes(startHash, true, rot);
@ -301,6 +299,12 @@ public class plasmaWordIndexFileCluster extends indexAbstractRI implements index
} else return 0;
}
public indexContainer addEntry(String wordHash, indexEntry newEntry, long updateTime, boolean dhtCase) {
indexContainer container = new indexContainer(wordHash);
container.add(newEntry);
return addEntries(container, updateTime, dhtCase);
}
public indexContainer addEntries(indexContainer container, long creationTime, boolean highPriority) {
//System.out.println("* adding " + newEntries.size() + " cached word index entries for word " + wordHash); // debug
// fetch the index cache

Loading…
Cancel
Save