added exists-check based only on RAM index lookup:

- faster double-check during crawling
- less IO

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@3179 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
(no author) 19 years ago
parent 37e53b4a6a
commit c67d22116e

Binary file not shown.

@ -492,6 +492,10 @@ final class dbTable implements kelondroIndex {
return this.rowdef;
}
public boolean has(byte[] key) throws IOException {
return (get(key) != null);
}
public kelondroRow.Entry get(byte[] key) throws IOException {
try {
String sqlQuery = new String

@ -143,6 +143,13 @@ public class indexCachedRI implements indexRI {
busyCacheFlush = false;
}
public boolean hasContainer(String wordHash) {
if (riExtern.hasContainer(wordHash)) return true;
if (riIntern.hasContainer(wordHash)) return true;
if (backend.hasContainer(wordHash)) return true;
return false;
}
public indexContainer getContainer(String wordHash, Set urlselection, long maxTime) {
// get from cache
indexContainer container = riExtern.getContainer(wordHash, urlselection, maxTime);

@ -118,7 +118,15 @@ public class indexCollectionRI implements indexRI {
}
}
public synchronized boolean hasContainer(String wordHash) {
try {
return collectionIndex.has(wordHash.getBytes());
} catch (IOException e) {
return false;
}
}
public synchronized indexContainer getContainer(String wordHash, Set urlselection, long maxtime) {
try {
kelondroRowSet collection = collectionIndex.get(wordHash.getBytes());

@ -332,6 +332,10 @@ public final class indexRAMRI implements indexRI {
return (((long) intTime) * (long) 1000) + initTime;
}
public synchronized boolean hasContainer(String wordHash) {
return cache.containsKey(wordHash);
}
public synchronized indexContainer getContainer(String wordHash, Set urlselection, long maxtime_dummy) {
// retrieve container

@ -40,6 +40,7 @@ public interface indexRI {
public long getUpdateTime(String wordHash);
public int indexSize(String wordHash);
public boolean hasContainer(String wordHash); // should only be used if in case that true is returned the getContainer is NOT called
public indexContainer getContainer(String wordHash, Set urlselection, long maxtime);
public indexContainer deleteContainer(String wordHash);

@ -278,6 +278,10 @@ public class kelondroCache implements kelondroIndex {
index.close();
}
public boolean has(byte[] key) throws IOException {
return (get(key) != null);
}
public synchronized Entry get(byte[] key) throws IOException {
// first look into the miss cache
if (readMissCache != null) {

@ -420,6 +420,10 @@ public class kelondroCollectionIndex {
return (int) indexrow.getColLong(idx_col_chunkcount);
}
public synchronized boolean has(byte[] key) throws IOException {
return index.has(key);
}
public synchronized kelondroRowSet get(byte[] key) throws IOException {
// find an entry, if one exists
kelondroRow.Entry indexrow = index.get(key);

@ -169,6 +169,16 @@ public class kelondroFlexSplitTable implements kelondroIndex {
return this.rowdef;
}
public boolean has(byte[] key) throws IOException {
Iterator i = tables.values().iterator();
kelondroIndex table;
while (i.hasNext()) {
table = (kelondroIndex) i.next();
if (table.has(key)) return true;
}
return false;
}
public synchronized kelondroRow.Entry get(byte[] key) throws IOException {
Object[] keeper = keeperOf(key);
if (keeper == null) return null;

@ -59,6 +59,7 @@ public interface kelondroIndex {
public int size() throws IOException;
public kelondroProfile profile();
public kelondroRow row() throws IOException;
public boolean has(byte[] key) throws IOException; // use this only if there is no get in case that has returns true
public kelondroRow.Entry get(byte[] key) throws IOException;
public kelondroRow.Entry put(kelondroRow.Entry row) throws IOException;
public kelondroRow.Entry put(kelondroRow.Entry row, Date entryDate) throws IOException;

@ -24,6 +24,7 @@
package de.anomic.kelondro;
import java.io.IOException;
import java.util.Date;
import java.util.Iterator;
import java.util.Random;
@ -63,6 +64,10 @@ public class kelondroRowSet extends kelondroRowCollection implements kelondroInd
this.profile = new kelondroProfile();
}
public boolean has(byte[] key) throws IOException {
return (get(key) != null);
}
public kelondroRow.Entry get(byte[] key) {
return get(key, 0, key.length);
}

@ -105,6 +105,10 @@ public class kelondroSplittedTree implements kelondroIndex {
return (int) order.partition(key, ff);
}
public boolean has(byte[] key) throws IOException {
throw new UnsupportedOperationException("has should not be used with kelondroSplittedTree.");
}
public kelondroRow.Entry get(byte[] key) throws IOException {
return ktfs[partition(key)].get(key);
}

@ -168,6 +168,10 @@ public class kelondroTree extends kelondroRecords implements kelondroIndex {
else n.commit(CP_HIGH);
}
public boolean has(byte[] key) throws IOException {
throw new UnsupportedOperationException("has should not be used with kelondroTree.");
}
// Returns the value to which this map maps the specified key.
public kelondroRow.Entry get(byte[] key) throws IOException {
kelondroRow.Entry result;

@ -250,7 +250,7 @@ public class plasmaCrawlEURL {
public boolean exists(String urlHash) {
try {
return (urlIndexFile.get(urlHash.getBytes()) != null);
return urlIndexFile.has(urlHash.getBytes());
} catch (IOException e) {
return false;
}

@ -360,7 +360,7 @@ public final class plasmaCrawlLURL {
public synchronized boolean exists(String urlHash) {
try {
return (urlIndexFile.get(urlHash.getBytes()) != null);
return urlIndexFile.has(urlHash.getBytes());
} catch (IOException e) {
return false;
}

@ -293,6 +293,13 @@ public final class plasmaWordIndex implements indexRI {
return wordCount;
}
public boolean hasContainer(String wordHash) {
if (dhtOutCache.hasContainer(wordHash)) return true;
if (dhtInCache.hasContainer(wordHash)) return true;
if (collections.hasContainer(wordHash)) return true;
return false;
}
public indexContainer getContainer(String wordHash, Set urlselection, long maxTime) {
// get from cache

@ -224,6 +224,10 @@ public class plasmaWordIndexFileCluster implements indexRI {
return plasmaWordIndexFile.wordHash2path(databaseRoot, wordHash).exists();
}
public synchronized boolean hasContainer(String wordHash) {
return getContainer(wordHash, new TreeSet(), -1) != null;
}
public synchronized indexContainer getContainer(String wordHash, Set urlselection, long maxTime) {
long start = System.currentTimeMillis();
if ((maxTime < 0) || (maxTime > 60000)) maxTime=60000; // maximum is one minute

Loading…
Cancel
Save