bugfix for index remove bug,

appeared after search where snippet-loading triggered word removal

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2869 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 19 years ago
parent 0e79f2fd7e
commit 78b7f6f7fd

@ -114,13 +114,13 @@ public final class transferRWI {
granted = false; // don't accept more words if there are too many words to flush granted = false; // don't accept more words if there are too many words to flush
result = "busy"; result = "busy";
pause = 60000; pause = 60000;
} else if ((checkLimit && sb.wordIndex.dhtOutCacheSize() > sb.getConfigLong("wordCacheMaxCount", 20000)) || ((sb.wordIndex.busyCacheFlush) && (!shortCacheFlush))) { } /* else if ((checkLimit && sb.wordIndex.dhtOutCacheSize() > sb.getConfigLong("wordCacheMaxCount", 20000)) || ((sb.wordIndex.busyCacheFlush) && (!shortCacheFlush))) {
// we are too busy flushing the ramCache to receive indexes // we are too busy flushing the ramCache to receive indexes
sb.getLog().logInfo("Rejecting RWIs from peer " + otherPeerName + ". We are too busy (wordcachesize=" + sb.wordIndex.dhtOutCacheSize() + ")."); sb.getLog().logInfo("Rejecting RWIs from peer " + otherPeerName + ". We are too busy (wordcachesize=" + sb.wordIndex.dhtOutCacheSize() + ").");
granted = false; // don't accept more words if there are too many words to flush granted = false; // don't accept more words if there are too many words to flush
result = "busy"; result = "busy";
pause = 300000; pause = 300000;
} else { } */ else {
// we want and can receive indexes // we want and can receive indexes
// log value status (currently added to find outOfMemory error // log value status (currently added to find outOfMemory error
sb.getLog().logFine("Processing " + indexes.length + " bytes / " + wordc + " words / " + entryc + " entries from " + otherPeerName); sb.getLog().logFine("Processing " + indexes.length + " bytes / " + wordc + " words / " + entryc + " entries from " + otherPeerName);

@ -1,44 +0,0 @@
// indexAsbtractRI.java
// (C) 2006 by Michael Peter Christen; mc@anomic.de, Frankfurt a. M., Germany
// first published 26.05.2006 on http://www.anomic.de
//
// This is a part of YaCy, a peer-to-peer based web search engine
//
// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $
// $LastChangedRevision: 1986 $
// $LastChangedBy: orbiter $
//
// LICENSE
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
package de.anomic.index;
public abstract class indexAbstractRI implements indexRI {
public indexContainer addEntry(String wordHash, indexEntry newEntry, long updateTime, boolean dhtCase) {
indexContainer container = new indexContainer(wordHash);
container.add(newEntry);
return addEntries(container, updateTime, dhtCase);
}
public long getUpdateTime(String wordHash) {
indexContainer entries = getContainer(wordHash, null, false, -1);
if (entries == null) return 0;
return entries.updated();
}
}

@ -40,7 +40,7 @@ import de.anomic.kelondro.kelondroRowCollection;
import de.anomic.kelondro.kelondroRowSet; import de.anomic.kelondro.kelondroRowSet;
import de.anomic.server.logging.serverLog; import de.anomic.server.logging.serverLog;
public class indexCollectionRI extends indexAbstractRI implements indexRI { public class indexCollectionRI implements indexRI {
kelondroCollectionIndex collectionIndex; kelondroCollectionIndex collectionIndex;
@ -61,7 +61,13 @@ public class indexCollectionRI extends indexAbstractRI implements indexRI {
} }
} }
public int size() { public long getUpdateTime(String wordHash) {
indexContainer entries = getContainer(wordHash, null, false, -1);
if (entries == null) return 0;
return entries.updated();
}
public synchronized int size() {
try { try {
return collectionIndex.size(); return collectionIndex.size();
} catch (IOException e) { } catch (IOException e) {
@ -70,7 +76,7 @@ public class indexCollectionRI extends indexAbstractRI implements indexRI {
} }
} }
public int indexSize(String wordHash) { public synchronized int indexSize(String wordHash) {
try { try {
return collectionIndex.indexSize(wordHash.getBytes()); return collectionIndex.indexSize(wordHash.getBytes());
} catch (IOException e) { } catch (IOException e) {
@ -78,7 +84,7 @@ public class indexCollectionRI extends indexAbstractRI implements indexRI {
} }
} }
public Iterator wordContainers(String startWordHash, boolean rot) { public synchronized Iterator wordContainers(String startWordHash, boolean rot) {
return new wordContainersIterator(startWordHash, rot); return new wordContainersIterator(startWordHash, rot);
} }
@ -108,7 +114,7 @@ public class indexCollectionRI extends indexAbstractRI implements indexRI {
} }
public indexContainer getContainer(String wordHash, Set urlselection, boolean deleteIfEmpty, long maxtime) { public synchronized indexContainer getContainer(String wordHash, Set urlselection, boolean deleteIfEmpty, long maxtime) {
try { try {
kelondroRowSet collection = collectionIndex.get(wordHash.getBytes(), deleteIfEmpty); kelondroRowSet collection = collectionIndex.get(wordHash.getBytes(), deleteIfEmpty);
if (collection != null) collection.select(urlselection); if (collection != null) collection.select(urlselection);
@ -119,7 +125,7 @@ public class indexCollectionRI extends indexAbstractRI implements indexRI {
} }
} }
public indexContainer deleteContainer(String wordHash) { public synchronized indexContainer deleteContainer(String wordHash) {
try { try {
kelondroRowSet collection = collectionIndex.delete(wordHash.getBytes()); kelondroRowSet collection = collectionIndex.delete(wordHash.getBytes());
if (collection == null) return null; if (collection == null) return null;
@ -129,13 +135,13 @@ public class indexCollectionRI extends indexAbstractRI implements indexRI {
} }
} }
public boolean removeEntry(String wordHash, String urlHash, boolean deleteComplete) { public synchronized boolean removeEntry(String wordHash, String urlHash, boolean deleteComplete) {
HashSet hs = new HashSet(); HashSet hs = new HashSet();
hs.add(urlHash.getBytes()); hs.add(urlHash.getBytes());
return removeEntries(wordHash, hs, deleteComplete) == 1; return removeEntries(wordHash, hs, deleteComplete) == 1;
} }
public int removeEntries(String wordHash, Set urlHashes, boolean deleteComplete) { public synchronized int removeEntries(String wordHash, Set urlHashes, boolean deleteComplete) {
try { try {
return collectionIndex.remove(wordHash.getBytes(), urlHashes, deleteComplete); return collectionIndex.remove(wordHash.getBytes(), urlHashes, deleteComplete);
} catch (kelondroOutOfLimitsException e) { } catch (kelondroOutOfLimitsException e) {
@ -147,7 +153,13 @@ public class indexCollectionRI extends indexAbstractRI implements indexRI {
} }
} }
public indexContainer addEntries(indexContainer newEntries, long creationTime, boolean dhtCase) { public synchronized indexContainer addEntry(String wordHash, indexEntry newEntry, long updateTime, boolean dhtCase) {
indexContainer container = new indexContainer(wordHash);
container.add(newEntry);
return addEntries(container, updateTime, dhtCase);
}
public synchronized indexContainer addEntries(indexContainer newEntries, long creationTime, boolean dhtCase) {
String wordHash = newEntries.getWordHash(); String wordHash = newEntries.getWordHash();
try { try {
collectionIndex.merge(wordHash.getBytes(), (kelondroRowCollection) newEntries); collectionIndex.merge(wordHash.getBytes(), (kelondroRowCollection) newEntries);
@ -160,7 +172,7 @@ public class indexCollectionRI extends indexAbstractRI implements indexRI {
} }
} }
public void close(int waitingSeconds) { public synchronized void close(int waitingSeconds) {
try { try {
collectionIndex.close(); collectionIndex.close();
} catch (IOException e) { } catch (IOException e) {

@ -42,7 +42,7 @@ import de.anomic.plasma.plasmaWordIndexAssortment;
import de.anomic.server.logging.serverLog; import de.anomic.server.logging.serverLog;
import de.anomic.yacy.yacySeedDB; import de.anomic.yacy.yacySeedDB;
public final class indexRAMCacheRI extends indexAbstractRI implements indexRI { public final class indexRAMCacheRI implements indexRI {
// environment constants // environment constants
public static final long wCacheMaxAge = 1000 * 60 * 30; // milliseconds; 30 minutes public static final long wCacheMaxAge = 1000 * 60 * 30; // milliseconds; 30 minutes
@ -87,6 +87,12 @@ public final class indexRAMCacheRI extends indexAbstractRI implements indexRI {
} }
} }
public synchronized long getUpdateTime(String wordHash) {
indexContainer entries = getContainer(wordHash, null, false, -1);
if (entries == null) return 0;
return entries.updated();
}
private void dump(int waitingSeconds) throws IOException { private void dump(int waitingSeconds) throws IOException {
log.logConfig("creating dump for index cache '" + indexArrayFileName + "', " + cache.size() + " words (and much more urls)"); log.logConfig("creating dump for index cache '" + indexArrayFileName + "', " + cache.size() + " words (and much more urls)");
File indexDumpFile = new File(databaseRoot, indexArrayFileName); File indexDumpFile = new File(databaseRoot, indexArrayFileName);
@ -217,18 +223,18 @@ public final class indexRAMCacheRI extends indexAbstractRI implements indexRI {
return this.cacheMaxCount; return this.cacheMaxCount;
} }
public int size() { public synchronized int size() {
return cache.size(); return cache.size();
} }
public int indexSize(String wordHash) { public synchronized int indexSize(String wordHash) {
int size = 0; int size = 0;
indexContainer cacheIndex = (indexContainer) cache.get(wordHash); indexContainer cacheIndex = (indexContainer) cache.get(wordHash);
if (cacheIndex != null) size += cacheIndex.size(); if (cacheIndex != null) size += cacheIndex.size();
return size; return size;
} }
public Iterator wordContainers(String startWordHash, boolean rot) { public synchronized Iterator wordContainers(String startWordHash, boolean rot) {
// we return an iterator object that creates top-level-clones of the indexContainers // we return an iterator object that creates top-level-clones of the indexContainers
// in the cache, so that manipulations of the iterated objects do not change // in the cache, so that manipulations of the iterated objects do not change
// objects in the cache. // objects in the cache.
@ -276,14 +282,13 @@ public final class indexRAMCacheRI extends indexAbstractRI implements indexRI {
} }
public String bestFlushWordHash() { public synchronized String bestFlushWordHash() {
// select appropriate hash // select appropriate hash
// we have 2 different methods to find a good hash: // we have 2 different methods to find a good hash:
// - the oldest entry in the cache // - the oldest entry in the cache
// - the entry with maximum count // - the entry with maximum count
if (cache.size() == 0) return null; if (cache.size() == 0) return null;
try { try {
synchronized (cache) {
String hash = null; String hash = null;
int count = hashScore.getMaxScore(); int count = hashScore.getMaxScore();
if ((count >= cacheReferenceLimit) && if ((count >= cacheReferenceLimit) &&
@ -307,7 +312,6 @@ public final class indexRAMCacheRI extends indexAbstractRI implements indexRI {
hash = (String) hashDate.getMinObject(); // flush oldest entries hash = (String) hashDate.getMinObject(); // flush oldest entries
} }
return hash; return hash;
}
} catch (Exception e) { } catch (Exception e) {
log.logSevere("flushFromMem: " + e.getMessage(), e); log.logSevere("flushFromMem: " + e.getMessage(), e);
} }
@ -322,7 +326,7 @@ public final class indexRAMCacheRI extends indexAbstractRI implements indexRI {
return (((long) intTime) * (long) 1000) + initTime; return (((long) intTime) * (long) 1000) + initTime;
} }
public indexContainer getContainer(String wordHash, Set urlselection, boolean deleteIfEmpty, long maxtime_dummy) { public synchronized indexContainer getContainer(String wordHash, Set urlselection, boolean deleteIfEmpty, long maxtime_dummy) {
// retrieve container // retrieve container
indexContainer container = (indexContainer) cache.get(wordHash); indexContainer container = (indexContainer) cache.get(wordHash);
@ -339,46 +343,53 @@ public final class indexRAMCacheRI extends indexAbstractRI implements indexRI {
return container; return container;
} }
public indexContainer deleteContainer(String wordHash) { public synchronized indexContainer deleteContainer(String wordHash) {
// returns the index that had been deleted // returns the index that had been deleted
synchronized (cache) { indexContainer container = (indexContainer) cache.remove(wordHash);
indexContainer container = (indexContainer) cache.remove(wordHash); hashScore.deleteScore(wordHash);
hashScore.deleteScore(wordHash); hashDate.deleteScore(wordHash);
hashDate.deleteScore(wordHash); return container;
return container;
}
} }
public boolean removeEntry(String wordHash, String urlHash, boolean deleteComplete) { public synchronized boolean removeEntry(String wordHash, String urlHash, boolean deleteComplete) {
synchronized (cache) { indexContainer c = (indexContainer) cache.get(wordHash);
indexContainer c = (indexContainer) deleteContainer(wordHash); if ((c != null) && (c.removeEntry(wordHash, urlHash, deleteComplete))) {
if (c != null) { // removal successful
if (c.removeEntry(wordHash, urlHash, deleteComplete)) return true; if ((c.size() == 0) && (deleteComplete)) {
this.addEntries(c, System.currentTimeMillis(), false); deleteContainer(wordHash);
} else {
cache.put(wordHash, c);
hashScore.decScore(wordHash);
hashDate.setScore(wordHash, intTime(System.currentTimeMillis()));
} }
return true;
} }
return false; return false;
} }
public int removeEntries(String wordHash, Set urlHashes, boolean deleteComplete) { public synchronized int removeEntries(String wordHash, Set urlHashes, boolean deleteComplete) {
if (urlHashes.size() == 0) return 0; if (urlHashes.size() == 0) return 0;
int count = 0; indexContainer c = (indexContainer) cache.get(wordHash);
synchronized (cache) { int count;
indexContainer c = (indexContainer) deleteContainer(wordHash); if ((c != null) && ((count = c.removeEntries(wordHash, urlHashes, deleteComplete)) > 0)) {
if (c != null) { // removal successful
count = c.removeEntries(wordHash, urlHashes, deleteComplete); if ((c.size() == 0) && (deleteComplete)) {
if (c.size() != 0) this.addEntries(c, System.currentTimeMillis(), false); deleteContainer(wordHash);
} else {
cache.put(wordHash, c);
hashScore.setScore(wordHash, c.size());
hashDate.setScore(wordHash, intTime(System.currentTimeMillis()));
} }
return count;
} }
return count; return 0;
} }
public int tryRemoveURLs(String urlHash) { public synchronized int tryRemoveURLs(String urlHash) {
// this tries to delete an index from the cache that has this // this tries to delete an index from the cache that has this
// urlHash assigned. This can only work if the entry is really fresh // urlHash assigned. This can only work if the entry is really fresh
// Such entries must be searched in the latest entries // Such entries must be searched in the latest entries
int delCount = 0; int delCount = 0;
synchronized (cache) {
Iterator i = cache.entrySet().iterator(); Iterator i = cache.entrySet().iterator();
Map.Entry entry; Map.Entry entry;
String wordhash; String wordhash;
@ -398,16 +409,14 @@ public final class indexRAMCacheRI extends indexAbstractRI implements indexRI {
delCount++; delCount++;
} }
} }
}
return delCount; return delCount;
} }
public indexContainer addEntries(indexContainer container, long updateTime, boolean dhtCase) { public synchronized indexContainer addEntries(indexContainer container, long updateTime, boolean dhtCase) {
// this puts the entries into the cache, not into the assortment directly // this puts the entries into the cache, not into the assortment directly
int added = 0; int added = 0;
// put new words into cache // put new words into cache
synchronized (cache) {
// put container into wCache // put container into wCache
String wordHash = container.getWordHash(); String wordHash = container.getWordHash();
indexContainer entries = (indexContainer) cache.get(wordHash); // null pointer exception? wordhash != null! must be cache==null indexContainer entries = (indexContainer) cache.get(wordHash); // null pointer exception? wordhash != null! must be cache==null
@ -419,12 +428,10 @@ public final class indexRAMCacheRI extends indexAbstractRI implements indexRI {
hashDate.setScore(wordHash, intTime(updateTime)); hashDate.setScore(wordHash, intTime(updateTime));
} }
entries = null; entries = null;
}
return null; return null;
} }
public indexContainer addEntry(String wordHash, indexEntry newEntry, long updateTime, boolean dhtCase) { public synchronized indexContainer addEntry(String wordHash, indexEntry newEntry, long updateTime, boolean dhtCase) {
synchronized (cache) {
indexContainer container = (indexContainer) cache.get(wordHash); indexContainer container = (indexContainer) cache.get(wordHash);
if (container == null) container = new indexContainer(wordHash); if (container == null) container = new indexContainer(wordHash);
indexEntry[] entries = new indexEntry[] { newEntry }; indexEntry[] entries = new indexEntry[] { newEntry };
@ -437,10 +444,9 @@ public final class indexRAMCacheRI extends indexAbstractRI implements indexRI {
container = null; container = null;
entries = null; entries = null;
return null; return null;
}
} }
public void close(int waitingSeconds) { public synchronized void close(int waitingSeconds) {
// dump cache // dump cache
try { try {
dump(waitingSeconds); dump(waitingSeconds);

@ -37,7 +37,6 @@ import java.util.Set;
import java.util.TreeSet; import java.util.TreeSet;
import de.anomic.htmlFilter.htmlFilterContentScraper; import de.anomic.htmlFilter.htmlFilterContentScraper;
import de.anomic.index.indexAbstractRI;
import de.anomic.index.indexCollectionRI; import de.anomic.index.indexCollectionRI;
import de.anomic.index.indexContainer; import de.anomic.index.indexContainer;
import de.anomic.index.indexContainerOrder; import de.anomic.index.indexContainerOrder;
@ -56,7 +55,7 @@ import de.anomic.plasma.urlPattern.plasmaURLPattern;
import de.anomic.server.logging.serverLog; import de.anomic.server.logging.serverLog;
import de.anomic.yacy.yacyDHTAction; import de.anomic.yacy.yacyDHTAction;
public final class plasmaWordIndex extends indexAbstractRI implements indexRI { public final class plasmaWordIndex implements indexRI {
private static final String indexAssortmentClusterPath = "ACLUSTER"; private static final String indexAssortmentClusterPath = "ACLUSTER";
private static final int assortmentCount = 64; private static final int assortmentCount = 64;
@ -185,7 +184,13 @@ public final class plasmaWordIndex extends indexAbstractRI implements indexRI {
} }
} }
} }
public long getUpdateTime(String wordHash) {
indexContainer entries = getContainer(wordHash, null, false, -1);
if (entries == null) return 0;
return entries.updated();
}
public indexContainer addEntry(String wordHash, indexEntry entry, long updateTime, boolean dhtInCase) { public indexContainer addEntry(String wordHash, indexEntry entry, long updateTime, boolean dhtInCase) {
// set dhtInCase depending on wordHash // set dhtInCase depending on wordHash
if ((!dhtInCase) && (yacyDHTAction.shallBeOwnWord(wordHash))) dhtInCase = true; if ((!dhtInCase) && (yacyDHTAction.shallBeOwnWord(wordHash))) dhtInCase = true;

@ -52,7 +52,6 @@ import java.util.HashSet;
import java.util.Iterator; import java.util.Iterator;
import java.util.Set; import java.util.Set;
import de.anomic.index.indexAbstractRI;
import de.anomic.index.indexContainer; import de.anomic.index.indexContainer;
import de.anomic.index.indexContainerOrder; import de.anomic.index.indexContainerOrder;
import de.anomic.index.indexEntry; import de.anomic.index.indexEntry;
@ -63,7 +62,7 @@ import de.anomic.kelondro.kelondroNaturalOrder;
import de.anomic.kelondro.kelondroRecords; import de.anomic.kelondro.kelondroRecords;
import de.anomic.server.logging.serverLog; import de.anomic.server.logging.serverLog;
public final class plasmaWordIndexAssortmentCluster extends indexAbstractRI implements indexRI { public final class plasmaWordIndexAssortmentCluster implements indexRI {
// class variables // class variables
private int clusterCount; // number of cluster files private int clusterCount; // number of cluster files
@ -174,6 +173,18 @@ public final class plasmaWordIndexAssortmentCluster extends indexAbstractRI impl
} }
} }
public indexContainer addEntry(String wordHash, indexEntry newEntry, long updateTime, boolean dhtCase) {
indexContainer container = new indexContainer(wordHash);
container.add(newEntry);
return addEntries(container, updateTime, dhtCase);
}
public long getUpdateTime(String wordHash) {
indexContainer entries = getContainer(wordHash, null, false, -1);
if (entries == null) return 0;
return entries.updated();
}
public indexContainer addEntries(indexContainer newContainer, long creationTime, boolean dhtCase) { public indexContainer addEntries(indexContainer newContainer, long creationTime, boolean dhtCase) {
// this is called by the index ram cache flush process // this is called by the index ram cache flush process
// it returnes NULL if the storage was successful // it returnes NULL if the storage was successful

@ -50,7 +50,6 @@ import java.util.Iterator;
import java.util.Set; import java.util.Set;
import java.util.TreeSet; import java.util.TreeSet;
import de.anomic.index.indexAbstractRI;
import de.anomic.index.indexContainer; import de.anomic.index.indexContainer;
import de.anomic.index.indexEntry; import de.anomic.index.indexEntry;
import de.anomic.index.indexRI; import de.anomic.index.indexRI;
@ -58,7 +57,7 @@ import de.anomic.kelondro.kelondroNaturalOrder;
import de.anomic.server.logging.serverLog; import de.anomic.server.logging.serverLog;
import de.anomic.yacy.yacySeedDB; import de.anomic.yacy.yacySeedDB;
public class plasmaWordIndexFileCluster extends indexAbstractRI implements indexRI { public class plasmaWordIndexFileCluster implements indexRI {
// class variables // class variables
private final File databaseRoot; private final File databaseRoot;
@ -107,7 +106,6 @@ public class plasmaWordIndexFileCluster extends indexAbstractRI implements index
} }
public Iterator wordHashes(String startHash, boolean rot) { public Iterator wordHashes(String startHash, boolean rot) {
// outdated method: to be replaced by wordContainers // outdated method: to be replaced by wordContainers
return wordHashes(startHash, true, rot); return wordHashes(startHash, true, rot);
@ -301,6 +299,12 @@ public class plasmaWordIndexFileCluster extends indexAbstractRI implements index
} else return 0; } else return 0;
} }
public indexContainer addEntry(String wordHash, indexEntry newEntry, long updateTime, boolean dhtCase) {
indexContainer container = new indexContainer(wordHash);
container.add(newEntry);
return addEntries(container, updateTime, dhtCase);
}
public indexContainer addEntries(indexContainer container, long creationTime, boolean highPriority) { public indexContainer addEntries(indexContainer container, long creationTime, boolean highPriority) {
//System.out.println("* adding " + newEntries.size() + " cached word index entries for word " + wordHash); // debug //System.out.println("* adding " + newEntries.size() + " cached word index entries for word " + wordHash); // debug
// fetch the index cache // fetch the index cache

Loading…
Cancel
Save