some code-cleanup and possible speed enhancements in different core methods

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4935 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 17 years ago
parent 6a9cc29cdd
commit b928ae492a

@ -230,12 +230,7 @@ public class Balancer {
} }
public synchronized boolean has(String urlhash) { public synchronized boolean has(String urlhash) {
try {
return urlFileIndex.has(urlhash.getBytes()); return urlFileIndex.has(urlhash.getBytes());
} catch (IOException e) {
e.printStackTrace();
return false;
}
} }
public boolean notEmpty() { public boolean notEmpty() {
@ -345,7 +340,7 @@ public class Balancer {
urlFileIndex.put(entry.toRow()); urlFileIndex.put(entry.toRow());
// check size of domainStacks and flush // check size of domainStacks and flush
if ((domainStacks.size() > 20) || (sizeDomainStacks() > 1000)) { if ((domainStacks.size() > 100) || (sizeDomainStacks() > 1000)) {
flushOnceDomStacks(1, urlRAMStack.size() < 100); // when the ram stack is small, flush it there flushOnceDomStacks(1, urlRAMStack.size() < 100); // when the ram stack is small, flush it there
} }
} }

@ -238,6 +238,11 @@ public final class CrawlStacker extends Thread {
int currentdepth, int currentdepth,
CrawlProfile.entry profile) { CrawlProfile.entry profile) {
if (profile == null) return; if (profile == null) return;
// check first before we create a big object
if (this.urlEntryCache.has(nexturl.hash().getBytes())) return;
// now create the big object before we enter the synchronized block
CrawlEntry newEntry = new CrawlEntry( CrawlEntry newEntry = new CrawlEntry(
initiatorHash, initiatorHash,
nexturl, nexturl,
@ -249,15 +254,15 @@ public final class CrawlStacker extends Thread {
0, 0,
0 0
); );
if (newEntry == null) return; if (newEntry == null) return;
kelondroRow.Entry newEntryRow = newEntry.toRow();
synchronized(this.urlEntryHashCache) { synchronized(this.urlEntryHashCache) {
kelondroRow.Entry oldValue; kelondroRow.Entry oldValue;
boolean hostknown = true; boolean hostknown = true;
if (prequeue) hostknown = prefetchHost(nexturl.getHost()); if (prequeue) hostknown = prefetchHost(nexturl.getHost());
try { try {
oldValue = this.urlEntryCache.put(newEntry.toRow()); oldValue = this.urlEntryCache.put(newEntryRow);
} catch (IOException e) { } catch (IOException e) {
oldValue = null; oldValue = null;
} }
@ -346,7 +351,7 @@ public final class CrawlStacker extends Thread {
synchronized (this.urlEntryHashCache) { synchronized (this.urlEntryHashCache) {
urlHash = this.urlEntryHashCache.removeFirst(); urlHash = this.urlEntryHashCache.removeFirst();
if (urlHash == null) throw new IOException("urlHash is null"); if (urlHash == null) throw new IOException("urlHash is null");
entry = this.urlEntryCache.remove(urlHash.getBytes(), false); entry = this.urlEntryCache.remove(urlHash.getBytes(), true);
} }
if ((urlHash == null) || (entry == null)) return null; if ((urlHash == null) || (entry == null)) return null;

@ -90,15 +90,6 @@ public final class ResultURLs {
assert executorHash != null; assert executorHash != null;
if (e == null) { return; } if (e == null) { return; }
try { try {
// switch (stackType) {
// case 0: break;
// case 1: externResultStack.add(e.hash() + initiatorHash + executorHash); break;
// case 2: searchResultStack.add(e.hash() + initiatorHash + executorHash); break;
// case 3: transfResultStack.add(e.hash() + initiatorHash + executorHash); break;
// case 4: proxyResultStack.add(e.hash() + initiatorHash + executorHash); break;
// case 5: lcrawlResultStack.add(e.hash() + initiatorHash + executorHash); break;
// case 6: gcrawlResultStack.add(e.hash() + initiatorHash + executorHash); break;
// }
final List<String> resultStack = getStack(stackType); final List<String> resultStack = getStack(stackType);
if(resultStack != null) { if(resultStack != null) {
resultStack.add(e.hash() + initiatorHash + executorHash); resultStack.add(e.hash() + initiatorHash + executorHash);
@ -121,54 +112,18 @@ public final class ResultURLs {
} else { } else {
return -1; return -1;
} }
// switch (stack) {
// case 1: return externResultStack.size();
// case 2: return searchResultStack.size();
// case 3: return transfResultStack.size();
// case 4: return proxyResultStack.size();
// case 5: return lcrawlResultStack.size();
// case 6: return gcrawlResultStack.size();
// }
// return -1;
} }
public synchronized String getUrlHash(int stack, int pos) { public synchronized String getUrlHash(int stack, int pos) {
return getHashNo(stack, pos, 0); return getHashNo(stack, pos, 0);
// switch (stack) {
// case 1: return (externResultStack.get(pos)).substring(0, yacySeedDB.commonHashLength);
// case 2: return (searchResultStack.get(pos)).substring(0, yacySeedDB.commonHashLength);
// case 3: return (transfResultStack.get(pos)).substring(0, yacySeedDB.commonHashLength);
// case 4: return (proxyResultStack.get(pos)).substring(0, yacySeedDB.commonHashLength);
// case 5: return (lcrawlResultStack.get(pos)).substring(0, yacySeedDB.commonHashLength);
// case 6: return (gcrawlResultStack.get(pos)).substring(0, yacySeedDB.commonHashLength);
// }
// return null;
} }
public synchronized String getInitiatorHash(int stack, int pos) { public synchronized String getInitiatorHash(int stack, int pos) {
return getHashNo(stack, pos, 1); return getHashNo(stack, pos, 1);
// switch (stack) {
// case 1: return (externResultStack.get(pos)).substring(yacySeedDB.commonHashLength, yacySeedDB.commonHashLength * 2);
// case 2: return (searchResultStack.get(pos)).substring(yacySeedDB.commonHashLength, yacySeedDB.commonHashLength * 2);
// case 3: return (transfResultStack.get(pos)).substring(yacySeedDB.commonHashLength, yacySeedDB.commonHashLength * 2);
// case 4: return (proxyResultStack.get(pos)).substring(yacySeedDB.commonHashLength, yacySeedDB.commonHashLength * 2);
// case 5: return (lcrawlResultStack.get(pos)).substring(yacySeedDB.commonHashLength, yacySeedDB.commonHashLength * 2);
// case 6: return (gcrawlResultStack.get(pos)).substring(yacySeedDB.commonHashLength, yacySeedDB.commonHashLength * 2);
// }
// return null;
} }
public synchronized String getExecutorHash(final int stack, int pos) { public synchronized String getExecutorHash(final int stack, int pos) {
return getHashNo(stack, pos, 2); return getHashNo(stack, pos, 2);
// switch (stack) {
// case 1: return (externResultStack.get(pos)).substring(yacySeedDB.commonHashLength * 2, yacySeedDB.commonHashLength * 3);
// case 2: return (searchResultStack.get(pos)).substring(yacySeedDB.commonHashLength * 2, yacySeedDB.commonHashLength * 3);
// case 3: return (transfResultStack.get(pos)).substring(yacySeedDB.commonHashLength * 2, yacySeedDB.commonHashLength * 3);
// case 4: return (proxyResultStack.get(pos)).substring(yacySeedDB.commonHashLength * 2, yacySeedDB.commonHashLength * 3);
// case 5: return (lcrawlResultStack.get(pos)).substring(yacySeedDB.commonHashLength * 2, yacySeedDB.commonHashLength * 3);
// case 6: return (gcrawlResultStack.get(pos)).substring(yacySeedDB.commonHashLength * 2, yacySeedDB.commonHashLength * 3);
// }
// return null;
} }
/** /**
@ -221,6 +176,7 @@ public final class ResultURLs {
final List<String> resultStack = getStack(stack); final List<String> resultStack = getStack(stack);
if(resultStack != null) { if(resultStack != null) {
assert pos < resultStack.size() : "pos = " + pos + ", resultStack.size() = " + resultStack.size();
if(pos < resultStack.size()) { if(pos < resultStack.size()) {
return resultStack.get(pos); return resultStack.get(pos);
} else { } else {

@ -141,11 +141,7 @@ public class ZURL {
} }
public boolean exists(String urlHash) { public boolean exists(String urlHash) {
try {
return urlIndex.has(urlHash.getBytes()); return urlIndex.has(urlHash.getBytes());
} catch (IOException e) {
return false;
}
} }
public void clearStack() { public void clearStack() {

@ -28,6 +28,7 @@ package de.anomic.index;
import java.io.File; import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList;
import java.util.Set; import java.util.Set;
import de.anomic.kelondro.kelondroCloneableIterator; import de.anomic.kelondro.kelondroCloneableIterator;
@ -159,7 +160,7 @@ public final class indexRAMRI implements indexRI, indexRIReader {
return null; return null;
} }
public synchronized String bestFlushWordHash() { private String bestFlushWordHash() {
// select appropriate hash // select appropriate hash
// we have 2 different methods to find a good hash: // we have 2 different methods to find a good hash:
// - the oldest entry in the cache // - the oldest entry in the cache
@ -189,7 +190,8 @@ public final class indexRAMRI implements indexRI, indexRIReader {
hash = hashDate.getMinObject(); // flush oldest entries hash = hashDate.getMinObject(); // flush oldest entries
} }
if (hash == null) { if (hash == null) {
heap.wordContainers(null, false).next(); indexContainer ic = heap.wordContainers(null, false).next();
if (ic != null) hash = ic.getWordHash();
} }
return hash; return hash;
} catch (Exception e) { } catch (Exception e) {
@ -198,6 +200,23 @@ public final class indexRAMRI implements indexRI, indexRIReader {
return null; return null;
} }
public synchronized ArrayList<indexContainer> bestFlushContainers(int count) {
ArrayList<indexContainer> containerList = new ArrayList<indexContainer>();
String hash;
indexContainer container;
for (int i = 0; i < count; i++) {
hash = bestFlushWordHash();
if (hash == null) return containerList;
container = heap.delete(hash);
assert (container != null);
if (container == null) return containerList;
hashScore.deleteScore(hash);
hashDate.deleteScore(hash);
containerList.add(container);
}
return containerList;
}
private int intTime(long longTime) { private int intTime(long longTime) {
return (int) Math.max(0, ((longTime - initTime) / 1000)); return (int) Math.max(0, ((longTime - initTime) / 1000));
} }

@ -151,11 +151,7 @@ public final class indexRepositoryReference {
public synchronized boolean exists(String urlHash) { public synchronized boolean exists(String urlHash) {
if (urlIndexFile == null) return false; // case may happen during shutdown if (urlIndexFile == null) return false; // case may happen during shutdown
try {
return urlIndexFile.has(urlHash.getBytes()); return urlIndexFile.has(urlHash.getBytes());
} catch (IOException e) {
return false;
}
} }
public kelondroCloneableIterator<indexURLReference> entries(boolean up, String firstHash) throws IOException { public kelondroCloneableIterator<indexURLReference> entries(boolean up, String firstHash) throws IOException {

@ -49,6 +49,11 @@ public class kelondroBytesIntMap {
return index.row(); return index.row();
} }
public synchronized boolean has(byte[] key) {
assert (key != null);
return index.has(key);
}
public synchronized int geti(byte[] key) throws IOException { public synchronized int geti(byte[] key) throws IOException {
assert (key != null); assert (key != null);
kelondroRow.Entry indexentry = index.get(key); kelondroRow.Entry indexentry = index.get(key);

@ -191,7 +191,7 @@ public class kelondroCache implements kelondroIndex {
readMissCache = null; readMissCache = null;
} }
public boolean has(byte[] key) throws IOException { public boolean has(byte[] key) {
// first look into the miss cache // first look into the miss cache
if (readMissCache != null) { if (readMissCache != null) {
if (readMissCache.get(key) != null) { if (readMissCache.get(key) != null) {

@ -345,10 +345,15 @@ public class kelondroEcoTable implements kelondroIndex {
return rowdef.newEntry(b); return rowdef.newEntry(b);
} }
public synchronized boolean has(byte[] key) throws IOException { public synchronized boolean has(byte[] key) {
try {
assert file.size() == index.size() + fail : "file.size() = " + file.size() + ", index.size() = " + index.size(); assert file.size() == index.size() + fail : "file.size() = " + file.size() + ", index.size() = " + index.size();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
assert ((table == null) || (table.size() == index.size())); assert ((table == null) || (table.size() == index.size()));
return index.geti(key) >= 0; return index.has(key);
} }
public synchronized kelondroCloneableIterator<byte[]> keys(boolean up, byte[] firstKey) throws IOException { public synchronized kelondroCloneableIterator<byte[]> keys(boolean up, byte[] firstKey) throws IOException {

@ -147,12 +147,12 @@ public class kelondroFlexTable extends kelondroFlexWidthArray implements kelondr
return RAMIndex; return RAMIndex;
} }
public synchronized boolean has(byte[] key) throws IOException { public synchronized boolean has(byte[] key) {
// it is not recommended to implement or use a has predicate unless // it is not recommended to implement or use a has predicate unless
// it can be ensured that it causes no IO // it can be ensured that it causes no IO
if ((kelondroAbstractRecords.debugmode) && (RAMIndex != true)) serverLog.logWarning("kelondroFlexTable", "RAM index warning in file " + super.tablename); if ((kelondroAbstractRecords.debugmode) && (RAMIndex != true)) serverLog.logWarning("kelondroFlexTable", "RAM index warning in file " + super.tablename);
assert this.size() == index.size() : "content.size() = " + this.size() + ", index.size() = " + index.size(); assert this.size() == index.size() : "content.size() = " + this.size() + ", index.size() = " + index.size();
return index.geti(key) >= 0; return index.has(key);
} }
private kelondroBytesIntMap initializeRamIndex(int initialSpace) { private kelondroBytesIntMap initializeRamIndex(int initialSpace) {

@ -61,7 +61,7 @@ public interface kelondroIndex {
public int size(); public int size();
public kelondroProfile profile(); public kelondroProfile profile();
public kelondroRow row(); public kelondroRow row();
public boolean has(byte[] key) throws IOException; // use this only if there is no get in case that has returns true public boolean has(byte[] key); // use this only if there is no get in case that has returns true
public kelondroRow.Entry get(byte[] key) throws IOException; public kelondroRow.Entry get(byte[] key) throws IOException;
public kelondroRow.Entry put(kelondroRow.Entry row) throws IOException; public kelondroRow.Entry put(kelondroRow.Entry row) throws IOException;
public kelondroRow.Entry put(kelondroRow.Entry row, Date entryDate) throws IOException; public kelondroRow.Entry put(kelondroRow.Entry row, Date entryDate) throws IOException;

@ -109,7 +109,10 @@ public class kelondroRowSet extends kelondroRowCollection implements kelondroInd
} }
public synchronized boolean has(byte[] key) { public synchronized boolean has(byte[] key) {
return (get(key) != null); long handle = profile.startRead();
int index = find(key, 0, key.length);
profile.stopRead(handle);
return index >= 0;
} }
public synchronized kelondroRow.Entry get(byte[] key) { public synchronized kelondroRow.Entry get(byte[] key) {

@ -135,8 +135,12 @@ public class kelondroSQLTable implements kelondroIndex {
return this.rowdef; return this.rowdef;
} }
public boolean has(byte[] key) throws IOException { public boolean has(byte[] key) {
try {
return (get(key) != null); return (get(key) != null);
} catch (IOException e) {
return false;
}
} }
public ArrayList<kelondroRowCollection> removeDoubles() { public ArrayList<kelondroRowCollection> removeDoubles() {

@ -139,7 +139,7 @@ public class kelondroSplitTable implements kelondroIndex {
if (f.isDirectory()) { if (f.isDirectory()) {
// this is a kelonodroFlex table // this is a kelonodroFlex table
serverLog.logInfo("kelondroSplitTable", "opening partial flex table " + path); serverLog.logInfo("kelondroSplitTable", "opening partial flex table " + path);
table = new kelondroCache(new kelondroFlexTable(path, maxf, rowdef, 0, resetOnFail)); table = new kelondroFlexTable(path, maxf, rowdef, 0, resetOnFail);
} else { } else {
serverLog.logInfo("kelondroSplitTable", "opening partial eco table " + f); serverLog.logInfo("kelondroSplitTable", "opening partial eco table " + f);
table = new kelondroEcoTable(f, rowdef, kelondroEcoTable.tailCacheUsageAuto, EcoFSBufferSize, 0); table = new kelondroEcoTable(f, rowdef, kelondroEcoTable.tailCacheUsageAuto, EcoFSBufferSize, 0);
@ -209,7 +209,7 @@ public class kelondroSplitTable implements kelondroIndex {
return this.rowdef; return this.rowdef;
} }
public boolean has(byte[] key) throws IOException { public boolean has(byte[] key) {
return keeperOf(key) != null; return keeperOf(key) != null;
} }
@ -276,19 +276,13 @@ public class kelondroSplitTable implements kelondroIndex {
try { try {
cs.submit(new Callable<kelondroIndex>() { cs.submit(new Callable<kelondroIndex>() {
public kelondroIndex call() { public kelondroIndex call() {
try {
if (table.has(key)) return table; else return dummyIndex; if (table.has(key)) return table; else return dummyIndex;
} catch (IOException e) {
return dummyIndex;
}
} }
}); });
} catch (RejectedExecutionException e) { } catch (RejectedExecutionException e) {
// the executor is either shutting down or the blocking queue is full // the executor is either shutting down or the blocking queue is full
// execute the search direct here without concurrency // execute the search direct here without concurrency
try {
if (table.has(key)) return table; if (table.has(key)) return table;
} catch (IOException ee) {}
rejected++; rejected++;
} }
} }

@ -170,7 +170,7 @@ public class kelondroTree extends kelondroCachedRecords implements kelondroIndex
n.commit(); n.commit();
} }
public boolean has(byte[] key) throws IOException { public boolean has(byte[] key) {
throw new UnsupportedOperationException("has should not be used with kelondroTree."); throw new UnsupportedOperationException("has should not be used with kelondroTree.");
} }

@ -358,7 +358,7 @@ public final class plasmaCondenser {
k = it.next(); k = it.next();
wsp = words.get(k); wsp = words.get(k);
wsp.check(idx); wsp.check(idx);
words.put(k, wsp); words.put(k, wsp); // is that necessary?
} }
} }
sentence = new StringBuffer(100); sentence = new StringBuffer(100);

@ -502,19 +502,8 @@ public final class plasmaWordIndex implements indexRI {
} }
} }
count = count - containerList.size(); count = count - containerList.size();
for (int i = 0; i < count; i++) { // possible position of outOfMemoryError ? containerList.addAll(ram.bestFlushContainers(count));
synchronized (ram) {
if (ram.size() == 0) break;
if (serverMemory.available() < collections.minMem()) break; // protect memory during flush
// select one word to flush
wordHash = ram.bestFlushWordHash();
// move one container from ram to flush list
if (wordHash == null) c = null; else c = ram.deleteContainer(wordHash);
}
if (c != null) containerList.add(c);
}
// flush the containers // flush the containers
for (indexContainer container : containerList) collections.addEntries(container); for (indexContainer container : containerList) collections.addEntries(container);
//System.out.println("DEBUG-Finished flush of " + count + " entries from RAM to DB in " + (System.currentTimeMillis() - start) + " milliseconds"); //System.out.println("DEBUG-Finished flush of " + count + " entries from RAM to DB in " + (System.currentTimeMillis() - start) + " milliseconds");

Loading…
Cancel
Save